diff --git a/configure.ac b/configure.ac
index 74fede99f4fbf578af4e703cedaa42f2c278b037..84548743a97423946c38b99e4811afff74bac45a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -842,10 +842,10 @@ esac
 #  Gravity multipole order
 AC_ARG_WITH([multipole-order],
    [AS_HELP_STRING([--with-multipole-order=<order>],
-      [order of the multipole and gravitational field expansion @<:@ default: 4@:>@]
+      [order of the multipole and gravitational field expansion @<:@ default: 5@:>@]
    )],
    [with_multipole_order="$withval"],
-   [with_multipole_order="4"]
+   [with_multipole_order="5"]
 )
 AC_DEFINE_UNQUOTED([SELF_GRAVITY_MULTIPOLE_ORDER], [$with_multipole_order], [Multipole order])
 
diff --git a/examples/EAGLE_6/eagle_6.yml b/examples/EAGLE_6/eagle_6.yml
index f55ecc856953d4cb60a86e3461625318a1757693..346d2c0627ce2fdc1147d0d34fd4faab25b76559 100644
--- a/examples/EAGLE_6/eagle_6.yml
+++ b/examples/EAGLE_6/eagle_6.yml
@@ -30,7 +30,7 @@ Statistics:
 Gravity:
   eta:                   0.025    # Constant dimensionless multiplier for time integration.
   theta:                 0.7      # Opening angle (Multipole acceptance criterion)
-  epsilon:               0.0001   # Softening length (in internal units).
+  epsilon:               0.001   # Softening length (in internal units).
   
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/src/cell.c b/src/cell.c
index 4502f5d265dc68540e16ed0e51e681cf5733f842..4b344d475482549c1168a32b5740b86d3a8cfad4 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -1290,45 +1290,18 @@ void cell_clean(struct cell *c) {
     if (c->progeny[k]) cell_clean(c->progeny[k]);
 }
 
-/**
- * @brief Checks whether a given cell needs drifting or not.
- *
- * @param c the #cell.
- * @param e The #engine (holding current time information).
- *
- * @return 1 If the cell needs drifting, 0 otherwise.
- */
-int cell_is_drift_needed(struct cell *c, const struct engine *e) {
-
-  /* Do we have at least one active particle in the cell ?*/
-  if (cell_is_active(c, e)) return 1;
-
-  /* Loop over the pair tasks that involve this cell */
-  for (struct link *l = c->density; l != NULL; l = l->next) {
-
-    if (l->t->type != task_type_pair && l->t->type != task_type_sub_pair)
-      continue;
-
-    /* Is the other cell in the pair active ? */
-    if ((l->t->ci == c && cell_is_active(l->t->cj, e)) ||
-        (l->t->cj == c && cell_is_active(l->t->ci, e)))
-      return 1;
-  }
-
-  /* No neighbouring cell has active particles. Drift not necessary */
-  return 0;
-}
-
 /**
  * @brief Clear the drift flags on the given cell.
  */
 void cell_clear_drift_flags(struct cell *c, void *data) {
   c->do_drift = 0;
   c->do_sub_drift = 0;
+  c->do_grav_drift = 0;
+  c->do_grav_sub_drift = 0;
 }
 
 /**
- * @brief Activate the drifts on the given cell.
+ * @brief Activate the #part drifts on the given cell.
  */
 void cell_activate_drift_part(struct cell *c, struct scheduler *s) {
 
@@ -1355,9 +1328,36 @@ void cell_activate_drift_part(struct cell *c, struct scheduler *s) {
 }
 
 /**
- * @brief Activate the sorts up a cell hierarchy.
+ * @brief Activate the #gpart drifts on the given cell.
  */
+void cell_activate_drift_gpart(struct cell *c, struct scheduler *s) {
+
+  /* If this cell is already marked for drift, quit early. */
+  if (c->do_grav_drift) return;
 
+  /* Mark this cell for drifting. */
+  c->do_grav_drift = 1;
+
+  /* Set the do_grav_sub_drifts all the way up and activate the super drift
+     if this has not yet been done. */
+  if (c == c->super) {
+    scheduler_activate(s, c->drift_gpart);
+  } else {
+    for (struct cell *parent = c->parent;
+         parent != NULL && !parent->do_grav_sub_drift;
+         parent = parent->parent) {
+      parent->do_grav_sub_drift = 1;
+      if (parent == c->super) {
+        scheduler_activate(s, parent->drift_gpart);
+        break;
+      }
+    }
+  }
+}
+
+/**
+ * @brief Activate the sorts up a cell hierarchy.
+ */
 void cell_activate_sorts_up(struct cell *c, struct scheduler *s) {
   if (c == c->super) {
     scheduler_activate(s, c->sorts);
@@ -1401,7 +1401,13 @@ void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s) {
 }
 
 /**
- * @brief Traverse a sub-cell task and activate the sort tasks along the way.
+ * @brief Traverse a sub-cell task and activate the hydro drift tasks that are
+ * required
+ * by a hydro task
+ *
+ * @param ci The first #cell we recurse in.
+ * @param cj The second #cell we recurse in.
+ * @param s The task #scheduler.
  */
 void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj,
                                  struct scheduler *s) {
@@ -1668,6 +1674,172 @@ void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj,
   }
 }
 
+/**
+ * @brief Traverse a sub-cell task and activate the gravity drift tasks that are
+ * required
+ * by a self gravity task.
+ *
+ * @param ci The first #cell we recurse in.
+ * @param cj The second #cell we recurse in.
+ * @param s The task #scheduler.
+ */
+void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj,
+                                      struct scheduler *s) {
+  /* Some constants */
+  const struct space *sp = s->space;
+  const struct engine *e = sp->e;
+  const int periodic = sp->periodic;
+  const double dim[3] = {sp->dim[0], sp->dim[1], sp->dim[2]};
+  const double theta_crit2 = e->gravity_properties->theta_crit2;
+
+  /* Self interaction? */
+  if (cj == NULL) {
+
+    /* Do anything? */
+    if (!cell_is_active(ci, e)) return;
+
+    /* Recurse? */
+    if (ci->split) {
+
+      /* Loop over all progenies and pairs of progenies */
+      for (int j = 0; j < 8; j++) {
+        if (ci->progeny[j] != NULL) {
+          cell_activate_subcell_grav_tasks(ci->progeny[j], NULL, s);
+          for (int k = j + 1; k < 8; k++)
+            if (ci->progeny[k] != NULL)
+              cell_activate_subcell_grav_tasks(ci->progeny[j], ci->progeny[k],
+                                               s);
+        }
+      }
+    } else {
+
+      /* We have reached the bottom of the tree: activate gpart drift */
+      cell_activate_drift_gpart(ci, s);
+    }
+  }
+
+  /* Pair interaction */
+  else {
+
+    /* Anything to do here? */
+    if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+
+    /* Recover the multipole information */
+    struct gravity_tensors *const multi_i = ci->multipole;
+    struct gravity_tensors *const multi_j = cj->multipole;
+    const double ri_max = multi_i->r_max;
+    const double rj_max = multi_j->r_max;
+
+    /* Get the distance between the CoMs */
+    double dx = multi_i->CoM[0] - multi_j->CoM[0];
+    double dy = multi_i->CoM[1] - multi_j->CoM[1];
+    double dz = multi_i->CoM[2] - multi_j->CoM[2];
+
+    /* Apply BC */
+    if (periodic) {
+      dx = nearest(dx, dim[0]);
+      dy = nearest(dy, dim[1]);
+      dz = nearest(dz, dim[2]);
+    }
+    const double r2 = dx * dx + dy * dy + dz * dz;
+
+    /* Can we use multipoles ? */
+    if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2)) {
+
+      /* Ok, no need to drift anything */
+      return;
+    }
+    /* Otherwise, activate the gpart drifts if we are at the bottom. */
+    else if (!ci->split && !cj->split) {
+
+      /* Activate the drifts if the cells are local. */
+      if (cell_is_active(ci, e) || cell_is_active(cj, e)) {
+        if (ci->nodeID == engine_rank) cell_activate_drift_gpart(ci, s);
+        if (cj->nodeID == engine_rank) cell_activate_drift_gpart(cj, s);
+      }
+    }
+    /* Ok, we can still recurse */
+    else {
+
+      if (ri_max > rj_max) {
+        if (ci->split) {
+
+          /* Loop over ci's children */
+          for (int k = 0; k < 8; k++) {
+            if (ci->progeny[k] != NULL)
+              cell_activate_subcell_grav_tasks(ci->progeny[k], cj, s);
+          }
+
+        } else if (cj->split) {
+
+          /* Loop over cj's children */
+          for (int k = 0; k < 8; k++) {
+            if (cj->progeny[k] != NULL)
+              cell_activate_subcell_grav_tasks(ci, cj->progeny[k], s);
+          }
+
+        } else {
+          error("Fundamental error in the logic");
+        }
+      } else if (rj_max >= ri_max) {
+        if (cj->split) {
+
+          /* Loop over cj's children */
+          for (int k = 0; k < 8; k++) {
+            if (cj->progeny[k] != NULL)
+              cell_activate_subcell_grav_tasks(ci, cj->progeny[k], s);
+          }
+
+        } else if (ci->split) {
+
+          /* Loop over ci's children */
+          for (int k = 0; k < 8; k++) {
+            if (ci->progeny[k] != NULL)
+              cell_activate_subcell_grav_tasks(ci->progeny[k], cj, s);
+          }
+
+        } else {
+          error("Fundamental error in the logic");
+        }
+      }
+    }
+  }
+}
+
+/**
+ * @brief Traverse a sub-cell task and activate the gravity drift tasks that are
+ * required
+ * by an external gravity task.
+ *
+ * @param ci The #cell we recurse in.
+ * @param s The task #scheduler.
+ */
+void cell_activate_subcell_external_grav_tasks(struct cell *ci,
+                                               struct scheduler *s) {
+
+  /* Some constants */
+  const struct space *sp = s->space;
+  const struct engine *e = sp->e;
+
+  /* Do anything? */
+  if (!cell_is_active(ci, e)) return;
+
+  /* Recurse? */
+  if (ci->split) {
+
+    /* Loop over all progenies (no need for pairs for self-gravity) */
+    for (int j = 0; j < 8; j++) {
+      if (ci->progeny[j] != NULL) {
+        cell_activate_subcell_external_grav_tasks(ci->progeny[j], s);
+      }
+    }
+  } else {
+
+    /* We have reached the bottom of the tree: activate gpart drift */
+    cell_activate_drift_gpart(ci, s);
+  }
+}
+
 /**
  * @brief Un-skips all the tasks associated with a given cell and checks
  * if the space needs to be rebuilt.
@@ -1693,8 +1865,13 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
         (cj != NULL && cell_is_active(cj, e) && cj->nodeID == engine_rank)) {
       scheduler_activate(s, t);
 
-      /* Set the correct sorting flags */
-      if (t->type == task_type_pair) {
+      /* Activate hydro drift */
+      if (t->type == task_type_self) {
+        if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s);
+      }
+
+      /* Set the correct sorting flags and activate hydro drifts */
+      else if (t->type == task_type_pair) {
         /* Store some values. */
         atomic_or(&ci->requires_sorts, 1 << t->flags);
         atomic_or(&cj->requires_sorts, 1 << t->flags);
@@ -1843,6 +2020,29 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
     }
   }
 
+  /* Un-skip the gravity tasks involved with this cell. */
+  for (struct link *l = c->grav; l != NULL; l = l->next) {
+    struct task *t = l->t;
+    struct cell *ci = t->ci;
+    struct cell *cj = t->cj;
+
+    /* Only activate tasks that involve a local active cell. */
+    if ((cell_is_active(ci, e) && ci->nodeID == engine_rank) ||
+        (cj != NULL && cell_is_active(cj, e) && cj->nodeID == engine_rank)) {
+      scheduler_activate(s, t);
+
+      /* Set the drifting flags */
+      if (t->type == task_type_self &&
+          t->subtype == task_subtype_external_grav) {
+        cell_activate_subcell_external_grav_tasks(t->ci, s);
+      } else if (t->type == task_type_self && t->subtype == task_subtype_grav) {
+        cell_activate_subcell_grav_tasks(t->ci, NULL, s);
+      } else if (t->type == task_type_pair) {
+        cell_activate_subcell_grav_tasks(t->ci, t->cj, s);
+      }
+    }
+  }
+
   /* Unskip all the other task types. */
   if (c->nodeID == engine_rank && cell_is_active(c, e)) {
 
@@ -1850,15 +2050,12 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
       scheduler_activate(s, l->t);
     for (struct link *l = c->force; l != NULL; l = l->next)
       scheduler_activate(s, l->t);
-    for (struct link *l = c->grav; l != NULL; l = l->next)
-      scheduler_activate(s, l->t);
 
     if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost);
     if (c->ghost_in != NULL) scheduler_activate(s, c->ghost_in);
     if (c->ghost_out != NULL) scheduler_activate(s, c->ghost_out);
     if (c->ghost != NULL) scheduler_activate(s, c->ghost);
     if (c->init_grav != NULL) scheduler_activate(s, c->init_grav);
-    if (c->drift_gpart != NULL) scheduler_activate(s, c->drift_gpart);
     if (c->kick1 != NULL) scheduler_activate(s, c->kick1);
     if (c->kick2 != NULL) scheduler_activate(s, c->kick2);
     if (c->timestep != NULL) scheduler_activate(s, c->timestep);
@@ -1931,7 +2128,7 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) {
 
   /* Check that we are actually going to move forward. */
   if (ti_current < ti_old_part) error("Attempt to drift to the past");
-#endif  // SWIFT_DEBUG_CHECKS
+#endif
 
   /* Are we not in a leaf ? */
   if (c->split && (force || c->do_sub_drift)) {
@@ -2016,8 +2213,9 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) {
  *
  * @param c The #cell.
  * @param e The #engine (to get ti_current).
+ * @param force Drift the particles irrespective of the #cell flags.
  */
-void cell_drift_gpart(struct cell *c, const struct engine *e) {
+void cell_drift_gpart(struct cell *c, const struct engine *e, int force) {
 
   const double timeBase = e->timeBase;
   const integertime_t ti_old_gpart = c->ti_old_gpart;
@@ -2029,11 +2227,19 @@ void cell_drift_gpart(struct cell *c, const struct engine *e) {
   const double dt = (ti_current - ti_old_gpart) * timeBase;
   float dx_max = 0.f, dx2_max = 0.f;
 
+  /* Drift irrespective of cell flags? */
+  force |= c->do_grav_drift;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that we only drift local cells. */
+  if (c->nodeID != engine_rank) error("Drifting a foreign cell is nope.");
+
   /* Check that we are actually going to move forward. */
   if (ti_current < ti_old_gpart) error("Attempt to drift to the past");
+#endif
 
   /* Are we not in a leaf ? */
-  if (c->split) {
+  if (c->split && (force || c->do_grav_sub_drift)) {
 
     /* Loop over the progeny and collect their data. */
     for (int k = 0; k < 8; k++)
@@ -2041,13 +2247,19 @@ void cell_drift_gpart(struct cell *c, const struct engine *e) {
         struct cell *cp = c->progeny[k];
 
         /* Recurse */
-        cell_drift_gpart(cp, e);
+        cell_drift_gpart(cp, e, force);
 
         /* Update */
         dx_max = max(dx_max, cp->dx_max_gpart);
       }
 
-  } else if (ti_current > ti_old_gpart) {
+    /* Store the values */
+    c->dx_max_gpart = dx_max;
+
+    /* Update the time of the last drift */
+    c->ti_old_gpart = ti_current;
+
+  } else if (!c->split && force && ti_current > ti_old_gpart) {
 
     /* Loop over all the g-particles in the cell */
     const size_t nr_gparts = c->gcount;
@@ -2087,16 +2299,16 @@ void cell_drift_gpart(struct cell *c, const struct engine *e) {
     /* Now, get the maximal particle motion from its square */
     dx_max = sqrtf(dx2_max);
 
-  } else {
+    /* Store the values */
+    c->dx_max_gpart = dx_max;
 
-    dx_max = c->dx_max_gpart;
+    /* Update the time of the last drift */
+    c->ti_old_gpart = ti_current;
   }
 
-  /* Store the values */
-  c->dx_max_gpart = dx_max;
-
-  /* Update the time of the last drift */
-  c->ti_old_gpart = ti_current;
+  /* Clear the drift flags. */
+  c->do_grav_drift = 0;
+  c->do_grav_sub_drift = 0;
 }
 
 /**
@@ -2118,7 +2330,8 @@ void cell_drift_all_multipoles(struct cell *c, const struct engine *e) {
   if (ti_current < ti_old_multipole) error("Attempt to drift to the past");
 
   /* Drift the multipole */
-  if (ti_current > ti_old_multipole) gravity_drift(c->multipole, dt);
+  if (ti_current > ti_old_multipole)
+    gravity_drift(c->multipole, dt, c->dx_max_gpart);
 
   /* Are we not in a leaf ? */
   if (c->split) {
@@ -2153,7 +2366,8 @@ void cell_drift_multipole(struct cell *c, const struct engine *e) {
   /* Check that we are actually going to move forward. */
   if (ti_current < ti_old_multipole) error("Attempt to drift to the past");
 
-  if (ti_current > ti_old_multipole) gravity_drift(c->multipole, dt);
+  if (ti_current > ti_old_multipole)
+    gravity_drift(c->multipole, dt, c->dx_max_gpart);
 
   /* Update the time of the last drift */
   c->ti_old_multipole = ti_current;
diff --git a/src/cell.h b/src/cell.h
index e97400623dbb7a66aee981d21883fe4d8f73406a..9c6bfa3431bba5f84bbdd16c4e6a1842f924523e 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -152,9 +152,13 @@ struct cell {
   /*! The multipole initialistation task */
   struct task *init_grav;
 
-  /*! The ghost tasks */
+  /*! Dependency implicit task for the ghost  (in->ghost->out)*/
   struct task *ghost_in;
+
+  /*! Dependency implicit task for the ghost  (in->ghost->out)*/
   struct task *ghost_out;
+
+  /*! The ghost task itself */
   struct task *ghost;
 
   /*! The extra ghost task for complex hydro schemes */
@@ -311,6 +315,21 @@ struct cell {
   /*! Is the #spart data of this cell being used in a sub-cell? */
   int shold;
 
+  /*! Values of dx_max before the drifts, used for sub-cell tasks. */
+  float dx_max_old;
+
+  /*! Values of h_max before the drifts, used for sub-cell tasks. */
+  float h_max_old;
+
+  /*! Values of dx_max_sort before the drifts, used for sub-cell tasks. */
+  float dx_max_sort_old;
+
+  /*! Bit mask of sort directions that will be needed in the next timestep. */
+  unsigned int requires_sorts;
+
+  /*! Bit mask of sorts that need to be computed for this cell. */
+  unsigned int do_sort;
+
   /*! Number of tasks that are associated with this cell. */
   short int nr_tasks;
 
@@ -323,22 +342,17 @@ struct cell {
   /*! The maximal depth of this cell and its progenies */
   char maxdepth;
 
-  /*! Values of dx_max and h_max before the drifts, used for sub-cell tasks. */
-  float dx_max_old;
-  float h_max_old;
-  float dx_max_sort_old;
-
-  /* Bit mask of sort directions that will be needed in the next timestep. */
-  unsigned int requires_sorts;
-
-  /*! Does this cell need to be drifted? */
+  /*! Does this cell need to be drifted (hydro)? */
   char do_drift;
 
-  /*! Do any of this cell's sub-cells need to be drifted? */
+  /*! Do any of this cell's sub-cells need to be drifted (hydro)? */
   char do_sub_drift;
 
-  /*! Bit mask of sorts that need to be computed for this cell. */
-  unsigned int do_sort;
+  /*! Does this cell need to be drifted (gravity)? */
+  char do_grav_drift;
+
+  /*! Do any of this cell's sub-cells need to be drifted (gravity)? */
+  char do_grav_sub_drift;
 
   /*! Do any of this cell's sub-cells need to be sorted? */
   char do_sub_sort;
@@ -390,18 +404,22 @@ void cell_check_part_drift_point(struct cell *c, void *data);
 void cell_check_gpart_drift_point(struct cell *c, void *data);
 void cell_check_multipole_drift_point(struct cell *c, void *data);
 void cell_reset_task_counters(struct cell *c);
-int cell_is_drift_needed(struct cell *c, const struct engine *e);
 int cell_unskip_tasks(struct cell *c, struct scheduler *s);
 void cell_set_super(struct cell *c, struct cell *super);
 void cell_drift_part(struct cell *c, const struct engine *e, int force);
-void cell_drift_gpart(struct cell *c, const struct engine *e);
+void cell_drift_gpart(struct cell *c, const struct engine *e, int force);
 void cell_drift_multipole(struct cell *c, const struct engine *e);
 void cell_drift_all_multipoles(struct cell *c, const struct engine *e);
 void cell_check_timesteps(struct cell *c);
 void cell_store_pre_drift_values(struct cell *c);
 void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj,
                                  struct scheduler *s);
+void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj,
+                                      struct scheduler *s);
+void cell_activate_subcell_external_grav_tasks(struct cell *ci,
+                                               struct scheduler *s);
 void cell_activate_drift_part(struct cell *c, struct scheduler *s);
+void cell_activate_drift_gpart(struct cell *c, struct scheduler *s);
 void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s);
 void cell_clear_drift_flags(struct cell *c, void *data);
 void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data);
diff --git a/src/engine.c b/src/engine.c
index 21ea5130e869072113661b2ef237fb66dc8c7977..93c430d611cf573c643b4cf94325fb97333381a7 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -156,6 +156,7 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
   const int periodic = e->s->periodic;
   const int is_with_hydro = (e->policy & engine_policy_hydro);
   const int is_self_gravity = (e->policy & engine_policy_self_gravity);
+  const int is_external_gravity = (e->policy & engine_policy_external_gravity);
   const int is_with_cooling = (e->policy & engine_policy_cooling);
   const int is_with_sourceterms = (e->policy & engine_policy_sourceterms);
 
@@ -171,11 +172,15 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
     /* Local tasks only... */
     if (c->nodeID == e->nodeID) {
 
-      /* Add the drift task. */
+      /* Add the drift tasks corresponding to the policy. */
       if (is_with_hydro) {
         c->drift_part = scheduler_addtask(s, task_type_drift_part,
                                           task_subtype_none, 0, 0, c, NULL);
       }
+      if (is_self_gravity || is_external_gravity) {
+        c->drift_gpart = scheduler_addtask(s, task_type_drift_gpart,
+                                           task_subtype_none, 0, 0, c, NULL);
+      }
 
       /* Add the two half kicks */
       c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0,
@@ -191,6 +196,7 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
       scheduler_addunlock(s, c->kick2, c->timestep);
       scheduler_addunlock(s, c->timestep, c->kick1);
 
+      /* Add the self-gravity tasks */
       if (is_self_gravity) {
 
         /* Initialisation of the multipoles */
@@ -211,8 +217,10 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
         scheduler_addunlock(s, c->grav_down, c->kick2);
       }
 
-      /* Generate the ghost tasks. */
+      /* Add the hydrodynamics tasks */
       if (is_with_hydro) {
+
+        /* Generate the ghost tasks. */
         c->ghost_in =
             scheduler_addtask(s, task_type_ghost, task_subtype_none, 0,
                               /* implicit = */ 1, c, NULL);
@@ -1720,7 +1728,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
   const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
   const int cdim_ghost[3] = {s->cdim[0] / 4 + 1, s->cdim[1] / 4 + 1,
                              s->cdim[2] / 4 + 1};
-  const double theta_crit_inv = e->gravity_properties->theta_crit_inv;
+  const double theta_crit2 = e->gravity_properties->theta_crit2;
   struct cell *cells = s->cells_top;
   const int n_ghosts = cdim_ghost[0] * cdim_ghost[1] * cdim_ghost[2] * 2;
 
@@ -1776,7 +1784,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
           if (cj->nodeID != nodeID) continue;  // MATTHIEU
 
           /* Recover the multipole information */
-          struct gravity_tensors *const multi_j = cj->multipole;
+          const struct gravity_tensors *const multi_j = cj->multipole;
 
           /* Get the distance between the CoMs */
           double dx = CoM_i[0] - multi_j->CoM[0];
@@ -1792,8 +1800,8 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
           const double r2 = dx * dx + dy * dy + dz * dz;
 
           /* Are the cells too close for a MM interaction ? */
-          if (!gravity_multipole_accept_rebuild(multi_i, multi_j,
-                                                theta_crit_inv, r2)) {
+          if (!gravity_M2L_accept(multi_i->r_max_rebuild,
+                                  multi_j->r_max_rebuild, theta_crit2, r2)) {
 
             /* Ok, we need to add a direct pair calculation */
             scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0,
@@ -1839,11 +1847,9 @@ void engine_make_self_gravity_tasks(struct engine *e) {
     /* Make the ghosts implicit and add the dependencies */
     for (int n = 0; n < n_ghosts / 2; ++n) {
       ghosts[2 * n + 0] = scheduler_addtask(
-          sched, task_type_grav_ghost, task_subtype_none, 0, 0, NULL, NULL);
+          sched, task_type_grav_ghost, task_subtype_none, 0, 1, NULL, NULL);
       ghosts[2 * n + 1] = scheduler_addtask(
-          sched, task_type_grav_ghost, task_subtype_none, 0, 0, NULL, NULL);
-      ghosts[2 * n + 0]->implicit = 1;
-      ghosts[2 * n + 1]->implicit = 1;
+          sched, task_type_grav_ghost, task_subtype_none, 0, 1, NULL, NULL);
       scheduler_addunlock(sched, ghosts[2 * n + 0], s->grav_top_level);
       scheduler_addunlock(sched, s->grav_top_level, ghosts[2 * n + 1]);
     }
@@ -2063,6 +2069,7 @@ static inline void engine_make_self_gravity_dependencies(
     struct scheduler *sched, struct task *gravity, struct cell *c) {
 
   /* init --> gravity --> grav_down --> kick */
+  scheduler_addunlock(sched, c->super->drift_gpart, gravity);
   scheduler_addunlock(sched, c->super->init_grav, gravity);
   scheduler_addunlock(sched, gravity, c->super->grav_down);
 }
@@ -2648,16 +2655,32 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
     struct task *t = &tasks[ind];
 
     /* Single-cell task? */
-    if (t->type == task_type_self || t->type == task_type_ghost ||
-        t->type == task_type_extra_ghost || t->type == task_type_cooling ||
-        t->type == task_type_sourceterms || t->type == task_type_sub_self) {
+    if (t->type == task_type_self || t->type == task_type_sub_self) {
+
+      /* Local pointer. */
+      struct cell *ci = t->ci;
+
+      if (ci->nodeID != engine_rank) error("Non-local self task found");
 
       /* Set this task's skip. */
-      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
+      if (cell_is_active(ci, e)) scheduler_activate(s, t);
 
+      /* Activate the hydro drift */
+      if (t->type == task_type_self && t->subtype == task_subtype_density) {
+        cell_activate_drift_part(ci, s);
+      }
+      /* Activate the gravity drift */
+      else if (t->type == task_type_self && t->subtype == task_subtype_grav) {
+        cell_activate_subcell_grav_tasks(t->ci, NULL, s);
+      }
       /* Store current values of dx_max and h_max. */
-      if (t->type == task_type_sub_self && t->subtype == task_subtype_density) {
-        cell_activate_subcell_tasks(t->ci, NULL, s);
+      else if (t->type == task_type_sub_self &&
+               t->subtype == task_subtype_density) {
+        cell_activate_subcell_tasks(ci, NULL, s);
+
+      } else if (t->type == task_type_sub_self &&
+                 t->subtype == task_subtype_grav) {
+        error("Invalid task sub-type encountered");
       }
     }
 
@@ -2668,34 +2691,42 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
       struct cell *ci = t->ci;
       struct cell *cj = t->cj;
 
-      /* If this task does not involve any active cells, skip it. */
-      if (!cell_is_active(t->ci, e) && !cell_is_active(t->cj, e)) continue;
-
       /* Only activate tasks that involve a local active cell. */
       if ((cell_is_active(ci, e) && ci->nodeID == engine_rank) ||
-          (cj != NULL && cell_is_active(cj, e) && cj->nodeID == engine_rank)) {
+          (cell_is_active(cj, e) && cj->nodeID == engine_rank)) {
         scheduler_activate(s, t);
 
         /* Set the correct sorting flags */
         if (t->type == task_type_pair && t->subtype == task_subtype_density) {
+
           /* Store some values. */
           atomic_or(&ci->requires_sorts, 1 << t->flags);
           atomic_or(&cj->requires_sorts, 1 << t->flags);
           ci->dx_max_sort_old = ci->dx_max_sort;
           cj->dx_max_sort_old = cj->dx_max_sort;
 
-          /* Activate the drift tasks. */
+          /* Activate the hydro drift tasks. */
           if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s);
           if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s);
 
           /* Check the sorts and activate them if needed. */
           cell_activate_sorts(ci, t->flags, s);
           cell_activate_sorts(cj, t->flags, s);
+
+        } else if (t->type == task_type_pair &&
+                   t->subtype == task_subtype_grav) {
+          /* Activate the gravity drift */
+          cell_activate_subcell_grav_tasks(t->ci, t->cj, s);
         }
+
         /* Store current values of dx_max and h_max. */
         else if (t->type == task_type_sub_pair &&
                  t->subtype == task_subtype_density) {
           cell_activate_subcell_tasks(t->ci, t->cj, s);
+
+        } else if (t->type == task_type_sub_pair &&
+                   t->subtype == task_subtype_grav) {
+          error("Invalid task sub-type encountered");
         }
       }
 
@@ -2828,19 +2859,24 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
       }
     }
 
-    /* Kick/Drift/init ? */
-    if (t->type == task_type_kick1 || t->type == task_type_kick2 ||
-        t->type == task_type_drift_gpart || t->type == task_type_init_grav) {
+    /* Kick/init ? */
+    else if (t->type == task_type_kick1 || t->type == task_type_kick2 ||
+             t->type == task_type_init_grav) {
+      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
+    }
+
+    /* Hydro ghost tasks ? */
+    else if (t->type == task_type_ghost || t->type == task_type_extra_ghost) {
       if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
     }
 
-    /* Gravity ? */
+    /* Gravity stuff ? */
     else if (t->type == task_type_grav_down ||
              t->type == task_type_grav_long_range) {
       if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
     }
 
-    /* Periodic gravity ? */
+    /* Periodic gravity stuff (Note this is not linked to a cell) ? */
     else if (t->type == task_type_grav_top_level ||
              t->type == task_type_grav_ghost) {
       scheduler_activate(s, t);
@@ -2853,6 +2889,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
       t->ci->s_updated = 0;
       if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
     }
+
+    /* Subgrid tasks */
+    else if (t->type == task_type_cooling || t->type == task_type_sourceterms) {
+      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
+    }
   }
 }
 
@@ -3393,8 +3434,9 @@ void engine_skip_drift(struct engine *e) {
 
     struct task *t = &tasks[i];
 
-    /* Skip everything that updates the particles */
-    if (t->type == task_type_drift_part) t->skip = 1;
+    /* Skip everything that moves the particles */
+    if (t->type == task_type_drift_part || t->type == task_type_drift_gpart)
+      t->skip = 1;
   }
 
   /* Run through the cells and clear some flags. */
@@ -3832,7 +3874,7 @@ void engine_do_drift_all_mapper(void *map_data, int num_elements,
       cell_drift_part(c, e, 1);
 
       /* Drift all the g-particles */
-      cell_drift_gpart(c, e);
+      cell_drift_gpart(c, e, 1);
 
       /* Drift the multipoles */
       if (e->policy & engine_policy_self_gravity)
diff --git a/src/gravity.c b/src/gravity.c
index f58bc1b7456bc5dfc95b4c976ebda8e1999ff3e0..05f4f3724414287e5aeaa6e932ff4df7810914d9 100644
--- a/src/gravity.c
+++ b/src/gravity.c
@@ -307,7 +307,10 @@ int gravity_exact_force_file_exits(const struct engine *e) {
 
   /* File name */
   char file_name[100];
-  sprintf(file_name, "gravity_checks_exact_step%d.dat", e->step);
+  if (e->s->periodic)
+    sprintf(file_name, "gravity_checks_exact_periodic_step%d.dat", e->step);
+  else
+    sprintf(file_name, "gravity_checks_exact_step%d.dat", e->step);
 
   /* Does the file exist ? */
   if (access(file_name, R_OK | W_OK) == 0) {
@@ -552,14 +555,20 @@ void gravity_exact_force_check(struct space *s, const struct engine *e,
   if (!gravity_exact_force_file_exits(e)) {
 
     char file_name_exact[100];
-    sprintf(file_name_exact, "gravity_checks_exact_step%d.dat", e->step);
+    if (s->periodic)
+      sprintf(file_name_exact, "gravity_checks_exact_periodic_step%d.dat",
+              e->step);
+    else
+      sprintf(file_name_exact, "gravity_checks_exact_step%d.dat", e->step);
 
     FILE *file_exact = fopen(file_name_exact, "w");
     fprintf(file_exact, "# Gravity accuracy test - EXACT FORCES\n");
     fprintf(file_exact, "# G= %16.8e\n", e->physical_constants->const_newton_G);
     fprintf(file_exact, "# N= %d\n", SWIFT_GRAVITY_FORCE_CHECKS);
     fprintf(file_exact, "# epsilon=%16.8e\n", e->gravity_properties->epsilon);
-    fprintf(file_exact, "# theta=%16.8e\n", e->gravity_properties->theta_crit);
+    fprintf(file_exact, "# periodic= %d\n", s->periodic);
+    fprintf(file_exact, "# Git Branch: %s\n", git_branch());
+    fprintf(file_exact, "# Git Revision: %s\n", git_revision());
     fprintf(file_exact, "# %16s %16s %16s %16s %16s %16s %16s\n", "id",
             "pos[0]", "pos[1]", "pos[2]", "a_exact[0]", "a_exact[1]",
             "a_exact[2]");
diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h
index d4a95540de17631ad445075d672d03a1236e34e3..811d6fc8f902530840bcce4cf378c72ce25d0f4f 100644
--- a/src/gravity/Default/gravity_iact.h
+++ b/src/gravity/Default/gravity_iact.h
@@ -21,232 +21,142 @@
 #define SWIFT_DEFAULT_GRAVITY_IACT_H
 
 /* Includes. */
-#include "const.h"
 #include "kernel_gravity.h"
 #include "kernel_long_gravity.h"
 #include "multipole.h"
-#include "vector.h"
 
 /**
- * @brief Gravity forces between particles truncated by the long-range kernel
+ * @brief Computes the intensity of the force at a point generated by a
+ * point-mass.
+ *
+ * The returned quantity needs to be multiplied by the distance vector to obtain
+ * the force vector.
+ *
+ * @param r2 Square of the distance to the point-mass.
+ * @param h2 Square of the softening length.
+ * @param h_inv Inverse of the softening length.
+ * @param h_inv3 Cube of the inverse of the softening length.
+ * @param mass Mass of the point-mass.
+ * @param f_ij (return) The force intensity.
  */
-__attribute__((always_inline)) INLINE static void runner_iact_grav_pp_truncated(
-    float r2, const float *dx, struct gpart *gpi, struct gpart *gpj,
-    float rlr_inv) {
-
-  /* Apply the gravitational acceleration. */
-  const float r = sqrtf(r2);
-  const float ir = 1.f / r;
-  const float mi = gpi->mass;
-  const float mj = gpj->mass;
-  const float hi = gpi->epsilon;
-  const float hj = gpj->epsilon;
-  const float u_lr = r * rlr_inv;
-  float f_lr, fi, fj, W;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (r == 0.f) error("Interacting particles with 0 distance");
-#endif
-
-  /* Get long-range correction */
-  kernel_long_grav_eval(u_lr, &f_lr);
+__attribute__((always_inline)) INLINE static void runner_iact_grav_pp_full(
+    float r2, float h2, float h_inv, float h_inv3, float mass, float *f_ij) {
 
-  if (r >= hi) {
-
-    /* Get Newtonian gravity */
-    fi = mj * ir * ir * ir * f_lr;
-
-  } else {
-
-    const float hi_inv = 1.f / hi;
-    const float hi_inv3 = hi_inv * hi_inv * hi_inv;
-    const float ui = r * hi_inv;
-
-    kernel_grav_eval(ui, &W);
-
-    /* Get softened gravity */
-    fi = mj * hi_inv3 * W * f_lr;
-  }
+  /* Get the inverse distance */
+  const float r_inv = 1.f / sqrtf(r2);
 
-  if (r >= hj) {
+  /* Should we soften ? */
+  if (r2 >= h2) {
 
     /* Get Newtonian gravity */
-    fj = mi * ir * ir * ir * f_lr;
+    *f_ij = mass * r_inv * r_inv * r_inv;
 
   } else {
 
-    const float hj_inv = 1.f / hj;
-    const float hj_inv3 = hj_inv * hj_inv * hj_inv;
-    const float uj = r * hj_inv;
+    const float r = r2 * r_inv;
+    const float ui = r * h_inv;
+    float W_ij;
 
-    kernel_grav_eval(uj, &W);
+    kernel_grav_eval(ui, &W_ij);
 
     /* Get softened gravity */
-    fj = mi * hj_inv3 * W * f_lr;
+    *f_ij = mass * h_inv3 * W_ij;
   }
-
-  const float fidx[3] = {fi * dx[0], fi * dx[1], fi * dx[2]};
-  gpi->a_grav[0] -= fidx[0];
-  gpi->a_grav[1] -= fidx[1];
-  gpi->a_grav[2] -= fidx[2];
-
-  const float fjdx[3] = {fj * dx[0], fj * dx[1], fj * dx[2]};
-  gpj->a_grav[0] += fjdx[0];
-  gpj->a_grav[1] += fjdx[1];
-  gpj->a_grav[2] += fjdx[2];
 }
 
 /**
- * @brief Gravity forces between particles
+ * @brief Computes the intensity of the force at a point generated by a
+ * point-mass truncated for long-distance periodicity.
+ *
+ * The returned quantity needs to be multiplied by the distance vector to obtain
+ * the force vector.
+ *
+ * @param r2 Square of the distance to the point-mass.
+ * @param h2 Square of the softening length.
+ * @param h_inv Inverse of the softening length.
+ * @param h_inv3 Cube of the inverse of the softening length.
+ * @param mass Mass of the point-mass.
+ * @param rlr_inv Inverse of the mesh smoothing scale.
+ * @param f_ij (return) The force intensity.
  */
-__attribute__((always_inline)) INLINE static void runner_iact_grav_pp(
-    float r2, const float *dx, struct gpart *gpi, struct gpart *gpj) {
-
-  /* Apply the gravitational acceleration. */
-  const float r = sqrtf(r2);
-  const float ir = 1.f / r;
-  const float mi = gpi->mass;
-  const float mj = gpj->mass;
-  const float hi = gpi->epsilon;
-  const float hj = gpj->epsilon;
-  float fi, fj, W;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (r == 0.f) error("Interacting particles with 0 distance");
-#endif
-
-  if (r >= hi) {
-
-    /* Get Newtonian gravity */
-    fi = mj * ir * ir * ir;
-
-  } else {
-
-    const float hi_inv = 1.f / hi;
-    const float hi_inv3 = hi_inv * hi_inv * hi_inv;
-    const float ui = r * hi_inv;
-
-    kernel_grav_eval(ui, &W);
+__attribute__((always_inline)) INLINE static void runner_iact_grav_pp_truncated(
+    float r2, float h2, float h_inv, float h_inv3, float mass, float rlr_inv,
+    float *f_ij) {
 
-    /* Get softened gravity */
-    fi = mj * hi_inv3 * W;
-  }
+  /* Get the inverse distance */
+  const float r_inv = 1.f / sqrtf(r2);
+  const float r = r2 * r_inv;
 
-  if (r >= hj) {
+  /* Should we soften ? */
+  if (r2 >= h2) {
 
     /* Get Newtonian gravity */
-    fj = mi * ir * ir * ir;
+    *f_ij = mass * r_inv * r_inv * r_inv;
 
   } else {
 
-    const float hj_inv = 1.f / hj;
-    const float hj_inv3 = hj_inv * hj_inv * hj_inv;
-    const float uj = r * hj_inv;
+    const float r = r2 * r_inv;
+    const float ui = r * h_inv;
+    float W_ij;
 
-    kernel_grav_eval(uj, &W);
+    kernel_grav_eval(ui, &W_ij);
 
     /* Get softened gravity */
-    fj = mi * hj_inv3 * W;
+    *f_ij = mass * h_inv3 * W_ij;
   }
 
-  const float fidx[3] = {fi * dx[0], fi * dx[1], fi * dx[2]};
-  gpi->a_grav[0] -= fidx[0];
-  gpi->a_grav[1] -= fidx[1];
-  gpi->a_grav[2] -= fidx[2];
-
-  const float fjdx[3] = {fj * dx[0], fj * dx[1], fj * dx[2]};
-  gpj->a_grav[0] += fjdx[0];
-  gpj->a_grav[1] += fjdx[1];
-  gpj->a_grav[2] += fjdx[2];
-}
-
-/**
- * @brief Gravity forces between particles truncated by the long-range kernel
- * (non-symmetric version)
- */
-__attribute__((always_inline)) INLINE static void
-runner_iact_grav_pp_truncated_nonsym(float r2, const float *dx,
-                                     struct gpart *gpi, const struct gpart *gpj,
-                                     float rlr_inv) {
-
-  /* Apply the gravitational acceleration. */
-  const float r = sqrtf(r2);
-  const float ir = 1.f / r;
-  const float mj = gpj->mass;
-  const float hi = gpi->epsilon;
-  const float u_lr = r * rlr_inv;
-  float f_lr, f, W;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (r == 0.f) error("Interacting particles with 0 distance");
-#endif
-
   /* Get long-range correction */
-  kernel_long_grav_eval(u_lr, &f_lr);
-
-  if (r >= hi) {
-
-    /* Get Newtonian gravity */
-    f = mj * ir * ir * ir * f_lr;
-
-  } else {
-
-    const float hi_inv = 1.f / hi;
-    const float hi_inv3 = hi_inv * hi_inv * hi_inv;
-    const float ui = r * hi_inv;
-
-    kernel_grav_eval(ui, &W);
-
-    /* Get softened gravity */
-    f = mj * hi_inv3 * W * f_lr;
-  }
-
-  const float fdx[3] = {f * dx[0], f * dx[1], f * dx[2]};
-
-  gpi->a_grav[0] -= fdx[0];
-  gpi->a_grav[1] -= fdx[1];
-  gpi->a_grav[2] -= fdx[2];
+  const float u_lr = r * rlr_inv;
+  float corr_lr;
+  kernel_long_grav_eval(u_lr, &corr_lr);
+  *f_ij *= corr_lr;
 }
 
 /**
- * @brief Gravity forces between particles (non-symmetric version)
+ * @brief Computes the force at a point generated by a multipole.
+ *
+ * This uses the quadrupole terms only and defaults to the monopole if
+ * the code is compiled with low-order gravity only.
+ *
+ * @param r_x x-component of the distance vector to the multipole.
+ * @param r_y y-component of the distance vector to the multipole.
+ * @param r_z z-component of the distance vector to the multipole.
+ * @param r2 Square of the distance vector to the multipole.
+ * @param h The softening length.
+ * @param h_inv Inverse of the softening length.
+ * @param m The multipole.
+ * @param f_x (return) The x-component of the acceleration.
+ * @param f_y (return) The y-component of the acceleration.
+ * @param f_z (return) The z-component of the acceleration.
  */
-__attribute__((always_inline)) INLINE static void runner_iact_grav_pp_nonsym(
-    float r2, const float *dx, struct gpart *gpi, const struct gpart *gpj) {
-
-  /* Apply the gravitational acceleration. */
-  const float r = sqrtf(r2);
-  const float ir = 1.f / r;
-  const float mj = gpj->mass;
-  const float hi = gpi->epsilon;
-  float f, W;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (r == 0.f) error("Interacting particles with 0 distance");
-#endif
-
-  if (r >= hi) {
-
-    /* Get Newtonian gravity */
-    f = mj * ir * ir * ir;
+__attribute__((always_inline)) INLINE static void runner_iact_grav_pm(
+    float r_x, float r_y, float r_z, float r2, float h, float h_inv,
+    const struct multipole *m, float *f_x, float *f_y, float *f_z) {
 
-  } else {
+#if SELF_GRAVITY_MULTIPOLE_ORDER < 3
+  runner_iact_grav_pp_full(r2, h * h, h_inv, h_inv3, m->M_000, f_ij);
+#else
 
-    const float hi_inv = 1.f / hi;
-    const float hi_inv3 = hi_inv * hi_inv * hi_inv;
-    const float ui = r * hi_inv;
+  /* Get the inverse distance */
+  const float r_inv = 1.f / sqrtf(r2);
 
-    kernel_grav_eval(ui, &W);
+  struct potential_derivatives_M2P pot;
+  compute_potential_derivatives_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, &pot);
 
-    /* Get softened gravity */
-    f = mj * hi_inv3 * W;
-  }
+  /* 1st order terms (monopole) */
+  *f_x = m->M_000 * pot.D_100;
+  *f_y = m->M_000 * pot.D_010;
+  *f_z = m->M_000 * pot.D_001;
 
-  const float fdx[3] = {f * dx[0], f * dx[1], f * dx[2]};
+  /* 3rd order terms (quadrupole) */
+  *f_x += m->M_200 * pot.D_300 + m->M_020 * pot.D_120 + m->M_002 * pot.D_102;
+  *f_y += m->M_200 * pot.D_210 + m->M_020 * pot.D_030 + m->M_002 * pot.D_012;
+  *f_z += m->M_200 * pot.D_201 + m->M_020 * pot.D_021 + m->M_002 * pot.D_003;
+  *f_x += m->M_110 * pot.D_210 + m->M_101 * pot.D_201 + m->M_011 * pot.D_111;
+  *f_y += m->M_110 * pot.D_120 + m->M_101 * pot.D_111 + m->M_011 * pot.D_021;
+  *f_z += m->M_110 * pot.D_111 + m->M_101 * pot.D_102 + m->M_011 * pot.D_012;
 
-  gpi->a_grav[0] -= fdx[0];
-  gpi->a_grav[1] -= fdx[1];
-  gpi->a_grav[2] -= fdx[2];
+#endif
 }
 
 #endif /* SWIFT_DEFAULT_GRAVITY_IACT_H */
diff --git a/src/gravity_cache.h b/src/gravity_cache.h
index fd87be64315c2746bba566916a132d13dfac07ef..fdc89605765b460b355b3958e34287991be5ff1b 100644
--- a/src/gravity_cache.h
+++ b/src/gravity_cache.h
@@ -59,6 +59,12 @@ struct gravity_cache {
   /*! #gpart z acceleration. */
   float *restrict a_z SWIFT_CACHE_ALIGN;
 
+  /*! Is this #gpart active ? */
+  int *restrict active SWIFT_CACHE_ALIGN;
+
+  /*! Can this #gpart use a M2P interaction ? */
+  int *restrict use_mpole SWIFT_CACHE_ALIGN;
+
   /*! Cache size */
   int count;
 };
@@ -79,6 +85,8 @@ static INLINE void gravity_cache_clean(struct gravity_cache *c) {
     free(c->a_x);
     free(c->a_y);
     free(c->a_z);
+    free(c->active);
+    free(c->use_mpole);
   }
   c->count = 0;
 }
@@ -97,24 +105,26 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) {
 
   /* Size of the gravity cache */
   const int padded_count = count - (count % VEC_SIZE) + VEC_SIZE;
-  const size_t sizeBytes = padded_count * sizeof(float);
+  const size_t sizeBytesF = padded_count * sizeof(float);
+  const size_t sizeBytesI = padded_count * sizeof(int);
 
   /* Delete old stuff if any */
   gravity_cache_clean(c);
 
-  int error = 0;
-  error += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytes);
-  error += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytes);
-  error += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytes);
-  error +=
-      posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytes);
-  error += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytes);
-  error += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytes);
-  error += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytes);
-  error += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytes);
-
-  if (error != 0)
-    error("Couldn't allocate gravity cache, size: %d", padded_count);
+  int e = 0;
+  e += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
+  e += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
+  e += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
+  e += posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
+  e += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
+  e += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
+  e += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
+  e += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
+  e += posix_memalign((void **)&c->active, SWIFT_CACHE_ALIGNMENT, sizeBytesI);
+  e +=
+      posix_memalign((void **)&c->use_mpole, SWIFT_CACHE_ALIGNMENT, sizeBytesI);
+
+  if (e != 0) error("Couldn't allocate gravity cache, size: %d", padded_count);
 
   c->count = padded_count;
 }
@@ -122,29 +132,36 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) {
 /**
  * @brief Fills a #gravity_cache structure with some #gpart and shift them.
  *
+ * Also checks whether the #gpart can use a M2P interaction instead of the
+ * more expensive P2P.
+ *
+ * @param max_active_bin The largest active bin in the current time-step.
  * @param c The #gravity_cache to fill.
  * @param gparts The #gpart array to read from.
  * @param gcount The number of particles to read.
  * @param gcount_padded The number of particle to read padded to the next
  * multiple of the vector length.
  * @param shift A shift to apply to all the particles.
- * @param cell The cell the #gpart are in.
+ * @param CoM The position of the multipole.
+ * @param r_max2 The square of the multipole radius.
+ * @param theta_crit2 The square of the opening angle.
+ * @param cell The cell we play with (to get reasonable padding positions).
  */
-__attribute__((always_inline)) INLINE void gravity_cache_populate(
-    struct gravity_cache *c, const struct gpart *restrict gparts, int gcount,
-    int gcount_padded, const double shift[3], const struct cell *cell) {
+__attribute__((always_inline)) INLINE static void gravity_cache_populate(
+    timebin_t max_active_bin, struct gravity_cache *c,
+    const struct gpart *restrict gparts, int gcount, int gcount_padded,
+    const double shift[3], const float CoM[3], float r_max2, float theta_crit2,
+    const struct cell *cell) {
 
   /* Make the compiler understand we are in happy vectorization land */
-  float *restrict x = c->x;
-  float *restrict y = c->y;
-  float *restrict z = c->z;
-  float *restrict m = c->m;
-  float *restrict epsilon = c->epsilon;
-  swift_align_information(x, SWIFT_CACHE_ALIGNMENT);
-  swift_align_information(y, SWIFT_CACHE_ALIGNMENT);
-  swift_align_information(z, SWIFT_CACHE_ALIGNMENT);
-  swift_align_information(epsilon, SWIFT_CACHE_ALIGNMENT);
-  swift_align_information(m, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, use_mpole, c->use_mpole,
+                            SWIFT_CACHE_ALIGNMENT);
   swift_assume_size(gcount_padded, VEC_SIZE);
 
   /* Fill the input caches */
@@ -154,68 +171,91 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
     z[i] = (float)(gparts[i].x[2] - shift[2]);
     epsilon[i] = gparts[i].epsilon;
     m[i] = gparts[i].mass;
+    active[i] = (int)(gparts[i].time_bin <= max_active_bin);
+
+    /* Check whether we can use the multipole instead of P-P */
+    const float dx = x[i] - CoM[0];
+    const float dy = y[i] - CoM[1];
+    const float dz = z[i] - CoM[2];
+    const float r2 = dx * dx + dy * dy + dz * dz;
+    use_mpole[i] = gravity_M2P_accept(r_max2, theta_crit2, r2);
   }
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (gcount_padded < gcount) error("Padded counter smaller than counter");
 #endif
 
+  /* Particles used for padding should get impossible positions
+   * that have a reasonable magnitude. We use the cell width for this */
+  const float pos_padded[3] = {-2. * cell->width[0], -2. * cell->width[1],
+                               -2. * cell->width[2]};
+
   /* Pad the caches */
   for (int i = gcount; i < gcount_padded; ++i) {
-    x[i] = -3.f * cell->width[0];
-    y[i] = -3.f * cell->width[0];
-    z[i] = -3.f * cell->width[0];
+    x[i] = pos_padded[0];
+    y[i] = pos_padded[1];
+    z[i] = pos_padded[2];
     epsilon[i] = 0.f;
     m[i] = 0.f;
+    active[i] = 0;
+    use_mpole[i] = 0;
   }
 }
 
 /**
- * @brief Fills a #gravity_cache structure with some #gpart.
+ * @brief Fills a #gravity_cache structure with some #gpart and shift them.
  *
+ * @param max_active_bin The largest active bin in the current time-step.
  * @param c The #gravity_cache to fill.
  * @param gparts The #gpart array to read from.
  * @param gcount The number of particles to read.
  * @param gcount_padded The number of particle to read padded to the next
  * multiple of the vector length.
+ * @param shift A shift to apply to all the particles.
+ * @param cell The cell we play with (to get reasonable padding positions).
  */
-__attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift(
-    struct gravity_cache *c, const struct gpart *restrict gparts, int gcount,
-    int gcount_padded) {
+__attribute__((always_inline)) INLINE static void
+gravity_cache_populate_no_mpole(timebin_t max_active_bin,
+                                struct gravity_cache *c,
+                                const struct gpart *restrict gparts, int gcount,
+                                int gcount_padded, const double shift[3],
+                                const struct cell *cell) {
 
   /* Make the compiler understand we are in happy vectorization land */
-  float *restrict x = c->x;
-  float *restrict y = c->y;
-  float *restrict z = c->z;
-  float *restrict m = c->m;
-  float *restrict epsilon = c->epsilon;
-  swift_align_information(x, SWIFT_CACHE_ALIGNMENT);
-  swift_align_information(y, SWIFT_CACHE_ALIGNMENT);
-  swift_align_information(z, SWIFT_CACHE_ALIGNMENT);
-  swift_align_information(epsilon, SWIFT_CACHE_ALIGNMENT);
-  swift_align_information(m, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
   swift_assume_size(gcount_padded, VEC_SIZE);
 
   /* Fill the input caches */
   for (int i = 0; i < gcount; ++i) {
-    x[i] = (float)(gparts[i].x[0]);
-    y[i] = (float)(gparts[i].x[1]);
-    z[i] = (float)(gparts[i].x[2]);
+    x[i] = (float)(gparts[i].x[0] - shift[0]);
+    y[i] = (float)(gparts[i].x[1] - shift[1]);
+    z[i] = (float)(gparts[i].x[2] - shift[2]);
     epsilon[i] = gparts[i].epsilon;
     m[i] = gparts[i].mass;
+    active[i] = (int)(gparts[i].time_bin <= max_active_bin);
   }
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (gcount_padded < gcount) error("Padded counter smaller than counter");
 #endif
 
+  /* Particles used for padding should get impossible positions
+   * that have a reasonable magnitude. We use the cell width for this */
+  const float pos_padded[3] = {-2. * cell->width[0], -2. * cell->width[1],
+                               -2. * cell->width[2]};
   /* Pad the caches */
   for (int i = gcount; i < gcount_padded; ++i) {
-    x[i] = 0.f;
-    y[i] = 0.f;
-    z[i] = 0.f;
+    x[i] = pos_padded[0];
+    y[i] = pos_padded[1];
+    z[i] = pos_padded[2];
     epsilon[i] = 0.f;
     m[i] = 0.f;
+    active[i] = 0;
   }
 }
 
@@ -230,18 +270,18 @@ __attribute__((always_inline)) INLINE void gravity_cache_write_back(
     const struct gravity_cache *c, struct gpart *restrict gparts, int gcount) {
 
   /* Make the compiler understand we are in happy vectorization land */
-  float *restrict a_x = c->a_x;
-  float *restrict a_y = c->a_y;
-  float *restrict a_z = c->a_z;
-  swift_align_information(a_x, SWIFT_CACHE_ALIGNMENT);
-  swift_align_information(a_y, SWIFT_CACHE_ALIGNMENT);
-  swift_align_information(a_z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_y, c->a_y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_z, c->a_z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
 
   /* Write stuff back to the particles */
   for (int i = 0; i < gcount; ++i) {
-    gparts[i].a_grav[0] += a_x[i];
-    gparts[i].a_grav[1] += a_y[i];
-    gparts[i].a_grav[2] += a_z[i];
+    if (active[i]) {
+      gparts[i].a_grav[0] += a_x[i];
+      gparts[i].a_grav[1] += a_y[i];
+      gparts[i].a_grav[2] += a_z[i];
+    }
   }
 }
 
diff --git a/src/gravity_derivatives.h b/src/gravity_derivatives.h
index 8c8379f74f5fc67d3671f0154b2aeacbc35ea9f1..cf8aa54338b2e87e8bf5f2cc453ad7417eea5804 100644
--- a/src/gravity_derivatives.h
+++ b/src/gravity_derivatives.h
@@ -32,1056 +32,358 @@
 
 /* Local headers. */
 #include "inline.h"
-
-/*************************/
-/* 0th order derivatives */
-/*************************/
-
-/**
- * @brief \f$ \phi(r_x, r_y, r_z) \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_000(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-
-  return r_inv;
-}
-
-/*************************/
-/* 1st order derivatives */
-/*************************/
-
-/**
- * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_100(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-
-  return -r_x * r_inv * r_inv * r_inv;
-}
-
-/**
- * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_010(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-
-  return -r_y * r_inv * r_inv * r_inv;
-}
-
-/**
- * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_001(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-
-  return -r_z * r_inv * r_inv * r_inv;
-}
-
-/*************************/
-/* 2nd order derivatives */
-/*************************/
-
-/**
- * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x^2} \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_200(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv3 = r_inv * r_inv2;
-  const double r_inv5 = r_inv3 * r_inv2;
-  return 3. * r_x * r_x * r_inv5 - r_inv3;
-}
-
-/**
- * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_y^2} \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_020(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv3 = r_inv * r_inv2;
-  const double r_inv5 = r_inv3 * r_inv2;
-  return 3. * r_y * r_y * r_inv5 - r_inv3;
-}
-
-/**
- * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_z^2} \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_002(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv3 = r_inv * r_inv2;
-  const double r_inv5 = r_inv3 * r_inv2;
-  return 3. * r_z * r_z * r_inv5 - r_inv3;
-}
-
-/**
- * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x\partial r_y}
- * \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_110(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  return 3. * r_x * r_y * r_inv5;
-}
-
-/**
- * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x\partial r_z}
- * \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_101(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  return 3. * r_x * r_z * r_inv5;
-}
-
-/**
- * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_y\partial r_z}
- * \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_011(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  return 3. * r_y * r_z * r_inv5;
-}
-
-/*************************/
-/* 3rd order derivatives */
-/*************************/
-
-/**
- * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^3} \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_300(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  const double r_inv7 = r_inv5 * r_inv2;
-  return -15. * r_x * r_x * r_x * r_inv7 + 9. * r_x * r_inv5;
-}
-
-/**
- * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y^3} \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_030(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  const double r_inv7 = r_inv5 * r_inv2;
-  return -15. * r_y * r_y * r_y * r_inv7 + 9. * r_y * r_inv5;
-}
-
-/**
- * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_z^3} \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_003(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  const double r_inv7 = r_inv5 * r_inv2;
-  return -15. * r_z * r_z * r_z * r_inv7 + 9. * r_z * r_inv5;
-}
-
-/**
- * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^2\partial r_y}
- * \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_210(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  const double r_inv7 = r_inv5 * r_inv2;
-  return -15. * r_x * r_x * r_y * r_inv7 + 3. * r_y * r_inv5;
-}
-
-/**
- * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^2\partial r_z}
- * \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_201(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  const double r_inv7 = r_inv5 * r_inv2;
-  return -15. * r_x * r_x * r_z * r_inv7 + 3. * r_z * r_inv5;
-}
-
-/**
- * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x\partial r_y^2}
- * \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_120(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  const double r_inv7 = r_inv5 * r_inv2;
-  return -15. * r_x * r_y * r_y * r_inv7 + 3. * r_x * r_inv5;
-}
-
-/**
- * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y^2\partial r_z}
- * \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_021(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  const double r_inv7 = r_inv5 * r_inv2;
-  return -15. * r_z * r_y * r_y * r_inv7 + 3. * r_z * r_inv5;
-}
-
-/**
- * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x\partial r_z^2}
- * \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_102(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  const double r_inv7 = r_inv5 * r_inv2;
-  return -15. * r_x * r_z * r_z * r_inv7 + 3. * r_x * r_inv5;
-}
-
-/**
- * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y\partial r_z^2}
- * \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_012(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv2 = r_inv * r_inv;
-  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
-  const double r_inv7 = r_inv5 * r_inv2;
-  return -15. * r_y * r_z * r_z * r_inv7 + 3. * r_y * r_inv5;
-}
-
-/**
- * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_z\partial
- * r_y\partial r_z} \f$.
- *
- * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
- * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
- * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
- * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
- */
-__attribute__((always_inline)) INLINE static double D_111(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  const double r_inv3 = r_inv * r_inv * r_inv;
-  const double r_inv7 = r_inv3 * r_inv3 * r_inv;
-  return -15. * r_x * r_y * r_z * r_inv7;
-}
-
-/*********************************/
-/* 4th order gravity derivatives */
-/*********************************/
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_z^4 }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_004(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_z * r_z * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 *
-             (r_z * r_z) +
-         3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0;
-  /* 5 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_y^1 \partial_z^3 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_013(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_y * r_z * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_y * r_z);
-  /* 11 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_y^2 \partial_z^2 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_022(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_y * r_y * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             (r_y * r_y) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             (r_z * r_z) +
-         3. * r_inv * r_inv * r_inv * r_inv * r_inv;
-  /* 11 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_y^3 \partial_z^1 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_031(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_y * r_y * r_y * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_y * r_z);
-  /* 11 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_y^4 }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_040(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_y * r_y * r_y * r_y) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 *
-             (r_y * r_y) +
-         3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0;
-  /* 5 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_z^3 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_103(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_z * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_x * r_z);
-  /* 11 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^1 \partial_z^2
- * }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_112(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_y * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             (r_x * r_y);
-  /* 13 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^2 \partial_z^1
- * }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_121(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_y * r_y * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             (r_x * r_z);
-  /* 13 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^3 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_130(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_y * r_y * r_y) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_x * r_y);
-  /* 11 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_z^2 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_202(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_x * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             (r_x * r_x) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             (r_z * r_z) +
-         3. * r_inv * r_inv * r_inv * r_inv * r_inv;
-  /* 11 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_y^1 \partial_z^1
- * }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_211(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_x * r_y * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             (r_y * r_z);
-  /* 13 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_y^2 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_220(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_x * r_y * r_y) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             (r_x * r_x) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             (r_y * r_y) +
-         3. * r_inv * r_inv * r_inv * r_inv * r_inv;
-  /* 11 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_x^3 \partial_z^1 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_301(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_x * r_x * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_x * r_z);
-  /* 11 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_x^3 \partial_y^1 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_310(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_x * r_x * r_y) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_x * r_y);
-  /* 11 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^4}{ \partial_x^4 }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_400(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_x * r_x * r_x) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 *
-             (r_x * r_x) +
-         3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0;
-  /* 5 zero-valued terms not written out */
-}
-
-/*********************************/
-/* 5th order gravity derivatives */
-/*********************************/
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_z^5 }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_005(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_z * r_z * r_z * r_z * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 10.0 * (r_z * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 *
-             (r_z);
-  /* 26 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_y^1 \partial_z^4 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_014(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_y * r_z * r_z * r_z * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 6.0 * (r_y * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_y);
-  /* 42 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_y^2 \partial_z^3 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_023(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_y * r_y * r_z * r_z * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 3.0 * (r_y * r_y * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_z * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_z);
-  /* 44 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_y^3 \partial_z^2 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_032(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_z * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_y * r_y * r_y) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 3.0 * (r_y * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_y);
-  /* 44 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_y^4 \partial_z^1 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_041(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_y * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 6.0 * (r_y * r_y * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_z);
-  /* 42 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_y^5 }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_050(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_y * r_y) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 10.0 * (r_y * r_y * r_y) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 *
-             (r_y);
-  /* 26 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_z^4 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_104(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_z * r_z * r_z * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 6.0 * (r_x * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_x);
-  /* 42 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^1 \partial_z^3
- * }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_113(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_y * r_z * r_z * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 3.0 * (r_x * r_y * r_z);
-  /* 48 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^2 \partial_z^2
- * }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_122(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_z * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_y * r_y) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_x);
-  /* 48 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^3 \partial_z^1
- * }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_131(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_y * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 3.0 * (r_x * r_y * r_z);
-  /* 48 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^4 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_140(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_y * r_y) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 6.0 * (r_x * r_y * r_y) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_x);
-  /* 42 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_z^3 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_203(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_x * r_z * r_z * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 3.0 * (r_x * r_x * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_z * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_z);
-  /* 44 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^1 \partial_z^2
- * }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_212(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_z * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_x * r_y) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_y * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_y);
-  /* 48 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^2 \partial_z^1
- * }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_221(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_y * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_x * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_y * r_y * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_z);
-  /* 48 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^3 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_230(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_y * r_y) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 3.0 * (r_x * r_x * r_y) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_y * r_y * r_y) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_y);
-  /* 44 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_z^2 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_302(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_z * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_x * r_x) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 3.0 * (r_x * r_z * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_x);
-  /* 44 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_y^1 \partial_z^1
- * }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_311(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_y * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 3.0 * (r_x * r_y * r_z);
-  /* 48 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_y^2 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_320(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_y * r_y) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * (r_x * r_x * r_x) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 3.0 * (r_x * r_y * r_y) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_x);
-  /* 44 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^4 \partial_z^1 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_401(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_z) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 6.0 * (r_x * r_x * r_z) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_z);
-  /* 42 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^4 \partial_y^1 }\phi(x, y,
- * z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_410(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_y) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 6.0 * (r_x * r_x * r_y) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
-             (r_y);
-  /* 42 zero-valued terms not written out */
-}
-
-/**
- * @brief Compute \f$ \frac{\partial^5}{ \partial_x^5 }\phi(x, y, z} \f$.
- *
- * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
- */
-__attribute__((always_inline)) INLINE static double D_500(double r_x,
-                                                          double r_y,
-                                                          double r_z,
-                                                          double r_inv) {
-  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_x) +
-         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
-             r_inv * 10.0 * (r_x * r_x * r_x) -
-         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 *
-             (r_x);
-  /* 26 zero-valued terms not written out */
+#include "kernel_gravity.h"
+
+/**
+ * @brief Structure containing all the derivatives of the potential field
+ * required for the M2L kernel
+ */
+struct potential_derivatives_M2L {
+
+  /* 0th order terms */
+  float D_000;
+
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+
+  /* 1st order terms */
+  float D_100, D_010, D_001;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+
+  /* 2nd order terms */
+  float D_200, D_020, D_002;
+  float D_110, D_101, D_011;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+
+  /* 3rd order terms */
+  float D_300, D_030, D_003;
+  float D_210, D_201;
+  float D_120, D_021;
+  float D_102, D_012;
+  float D_111;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+
+  /* 4th order terms */
+  float D_400, D_040, D_004;
+  float D_310, D_301;
+  float D_130, D_031;
+  float D_103, D_013;
+  float D_220, D_202, D_022;
+  float D_211, D_121, D_112;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+
+  /* 5th order terms */
+  float D_005, D_014, D_023;
+  float D_032, D_041, D_050;
+  float D_104, D_113, D_122;
+  float D_131, D_140, D_203;
+  float D_212, D_221, D_230;
+  float D_302, D_311, D_320;
+  float D_401, D_410, D_500;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
+#error "Missing implementation for order >5"
+#endif
+};
+
+/**
+ * @brief Structure containing all the derivatives of the potential field
+ * required for the M2P kernel
+ */
+struct potential_derivatives_M2P {
+
+  /* 1st order terms */
+  float D_100, D_010, D_001;
+
+  /* 3rd order terms */
+  float D_300, D_030, D_003;
+  float D_210, D_201;
+  float D_120, D_021;
+  float D_102, D_012;
+  float D_111;
+};
+
+/**
+ * @brief Compute all the relevent derivatives of the softened and truncated
+ * gravitational potential for the M2L kernel.
+ *
+ * @param r_x x-component of distance vector
+ * @param r_y y-component of distance vector
+ * @param r_z z-component of distance vector
+ * @param r2 Square norm of distance vector
+ * @param r_inv Inverse norm of distance vector
+ * @param eps Softening length.
+ * @param eps_inv Inverse of softening length.
+ * @param pot (return) The structure containing all the derivatives.
+ */
+__attribute__((always_inline)) INLINE static void
+compute_potential_derivatives_M2L(float r_x, float r_y, float r_z, float r2,
+                                  float r_inv, float eps, float eps_inv,
+                                  struct potential_derivatives_M2L *pot) {
+
+  float Dt_1;
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+  float Dt_3;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+  float Dt_5;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+  float Dt_7;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+  float Dt_9;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+  float Dt_11;
+#endif
+
+  /* Un-softened case */
+  if (r2 > eps * eps) {
+
+    Dt_1 = r_inv;
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+    const float r_inv2 = r_inv * r_inv;
+    Dt_3 = -1.f * Dt_1 * r_inv2; /* -1 / r^3 */
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+    Dt_5 = -3.f * Dt_3 * r_inv2; /* 3 / r^5 */
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+    Dt_7 = -5.f * Dt_5 * r_inv2; /* -15 / r^7 */
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+    Dt_9 = -7.f * Dt_7 * r_inv2; /* 105 / r^9 */
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+    Dt_11 = -9.f * Dt_9 * r_inv2; /* -945 / r^11 */
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
+#error "Missing implementation for order >5"
+#endif
+
+  } else {
+    const float r = r2 * r_inv;
+    const float u = r * eps_inv;
+    const float u_inv = r_inv * eps;
+
+    Dt_1 = eps_inv * D_soft_1(u, u_inv);
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+    const float eps_inv2 = eps_inv * eps_inv;
+    const float eps_inv3 = eps_inv * eps_inv2;
+    Dt_3 = -eps_inv3 * D_soft_3(u, u_inv);
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+    const float eps_inv5 = eps_inv3 * eps_inv2;
+    Dt_5 = eps_inv5 * D_soft_5(u, u_inv);
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+    const float eps_inv7 = eps_inv5 * eps_inv2;
+    Dt_7 = -eps_inv7 * D_soft_7(u, u_inv);
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+    const float eps_inv9 = eps_inv7 * eps_inv2;
+    Dt_9 = eps_inv9 * D_soft_9(u, u_inv);
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+    const float eps_inv11 = eps_inv9 * eps_inv2;
+    Dt_11 = -eps_inv11 * D_soft_11(u, u_inv);
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
+#error "Missing implementation for order >5"
+#endif
+  }
+
+/* Alright, let's get the full terms */
+
+/* Compute some powers of r_x, r_y and r_z */
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+  const float r_x2 = r_x * r_x;
+  const float r_y2 = r_y * r_y;
+  const float r_z2 = r_z * r_z;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+  const float r_x3 = r_x2 * r_x;
+  const float r_y3 = r_y2 * r_y;
+  const float r_z3 = r_z2 * r_z;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+  const float r_x4 = r_x3 * r_x;
+  const float r_y4 = r_y3 * r_y;
+  const float r_z4 = r_z3 * r_z;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+  const float r_x5 = r_x4 * r_x;
+  const float r_y5 = r_y4 * r_y;
+  const float r_z5 = r_z4 * r_z;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
+#error "Missing implementation for order >5"
+#endif
+
+  /* Get the 0th order term */
+  pot->D_000 = Dt_1;
+
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+  /* 1st order derivatives */
+  pot->D_100 = r_x * Dt_3;
+  pot->D_010 = r_y * Dt_3;
+  pot->D_001 = r_z * Dt_3;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+  /* 2nd order derivatives */
+  pot->D_200 = r_x2 * Dt_5 + Dt_3;
+  pot->D_020 = r_y2 * Dt_5 + Dt_3;
+  pot->D_002 = r_z2 * Dt_5 + Dt_3;
+  pot->D_110 = r_x * r_y * Dt_5;
+  pot->D_101 = r_x * r_z * Dt_5;
+  pot->D_011 = r_y * r_z * Dt_5;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+  /* 3rd order derivatives */
+  pot->D_300 = r_x3 * Dt_7 + 3.f * r_x * Dt_5;
+  pot->D_030 = r_y3 * Dt_7 + 3.f * r_y * Dt_5;
+  pot->D_003 = r_z3 * Dt_7 + 3.f * r_z * Dt_5;
+  pot->D_210 = r_x2 * r_y * Dt_7 + r_y * Dt_5;
+  pot->D_201 = r_x2 * r_z * Dt_7 + r_z * Dt_5;
+  pot->D_120 = r_y2 * r_x * Dt_7 + r_x * Dt_5;
+  pot->D_021 = r_y2 * r_z * Dt_7 + r_z * Dt_5;
+  pot->D_102 = r_z2 * r_x * Dt_7 + r_x * Dt_5;
+  pot->D_012 = r_z2 * r_y * Dt_7 + r_y * Dt_5;
+  pot->D_111 = r_x * r_y * r_z * Dt_7;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+  /* 4th order derivatives */
+  pot->D_400 = r_x4 * Dt_9 + 6.f * r_x2 * Dt_7 + 3.f * Dt_5;
+  pot->D_040 = r_y4 * Dt_9 + 6.f * r_y2 * Dt_7 + 3.f * Dt_5;
+  pot->D_004 = r_z4 * Dt_9 + 6.f * r_z2 * Dt_7 + 3.f * Dt_5;
+  pot->D_310 = r_x3 * r_y * Dt_9 + 3.f * r_x * r_y * Dt_7;
+  pot->D_301 = r_x3 * r_z * Dt_9 + 3.f * r_x * r_z * Dt_7;
+  pot->D_130 = r_y3 * r_x * Dt_9 + 3.f * r_y * r_x * Dt_7;
+  pot->D_031 = r_y3 * r_z * Dt_9 + 3.f * r_y * r_z * Dt_7;
+  pot->D_103 = r_z3 * r_x * Dt_9 + 3.f * r_z * r_x * Dt_7;
+  pot->D_013 = r_z3 * r_y * Dt_9 + 3.f * r_z * r_y * Dt_7;
+  pot->D_220 = r_x2 * r_y2 * Dt_9 + r_x2 * Dt_7 + r_y2 * Dt_7 + Dt_5;
+  pot->D_202 = r_x2 * r_z2 * Dt_9 + r_x2 * Dt_7 + r_z2 * Dt_7 + Dt_5;
+  pot->D_022 = r_y2 * r_z2 * Dt_9 + r_y2 * Dt_7 + r_z2 * Dt_7 + Dt_5;
+  pot->D_211 = r_x2 * r_y * r_z * Dt_9 + r_y * r_z * Dt_7;
+  pot->D_121 = r_y2 * r_x * r_z * Dt_9 + r_x * r_z * Dt_7;
+  pot->D_112 = r_z2 * r_x * r_y * Dt_9 + r_x * r_y * Dt_7;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+  /* 5th order derivatives */
+  pot->D_500 = r_x5 * Dt_11 + 10.f * r_x3 * Dt_9 + 15.f * r_x * Dt_7;
+  pot->D_050 = r_y5 * Dt_11 + 10.f * r_y3 * Dt_9 + 15.f * r_y * Dt_7;
+  pot->D_005 = r_z5 * Dt_11 + 10.f * r_z3 * Dt_9 + 15.f * r_z * Dt_7;
+  pot->D_410 = r_x4 * r_y * Dt_11 + 6.f * r_x2 * r_y * Dt_9 + 3.f * r_y * Dt_7;
+  pot->D_401 = r_x4 * r_z * Dt_11 + 6.f * r_x2 * r_z * Dt_9 + 3.f * r_z * Dt_7;
+  pot->D_140 = r_y4 * r_x * Dt_11 + 6.f * r_y2 * r_x * Dt_9 + 3.f * r_x * Dt_7;
+  pot->D_041 = r_y4 * r_z * Dt_11 + 6.f * r_y2 * r_z * Dt_9 + 3.f * r_z * Dt_7;
+  pot->D_104 = r_z4 * r_x * Dt_11 + 6.f * r_z2 * r_x * Dt_9 + 3.f * r_x * Dt_7;
+  pot->D_014 = r_z4 * r_y * Dt_11 + 6.f * r_z2 * r_y * Dt_9 + 3.f * r_y * Dt_7;
+  pot->D_320 = r_x3 * r_y2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_y2 * Dt_9 +
+               3.f * r_x * Dt_7;
+  pot->D_302 = r_x3 * r_z2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_z2 * Dt_9 +
+               3.f * r_x * Dt_7;
+  pot->D_230 = r_y3 * r_x2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_x2 * Dt_9 +
+               3.f * r_y * Dt_7;
+  pot->D_032 = r_y3 * r_z2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_z2 * Dt_9 +
+               3.f * r_y * Dt_7;
+  pot->D_203 = r_z3 * r_x2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_x2 * Dt_9 +
+               3.f * r_z * Dt_7;
+  pot->D_023 = r_z3 * r_y2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_y2 * Dt_9 +
+               3.f * r_z * Dt_7;
+  pot->D_311 = r_x3 * r_y * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9;
+  pot->D_131 = r_y3 * r_x * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9;
+  pot->D_113 = r_z3 * r_x * r_y * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9;
+  pot->D_122 = r_x * r_y2 * r_z2 * Dt_11 + r_x * r_y2 * Dt_9 +
+               r_x * r_z2 * Dt_9 + r_x * Dt_7;
+  pot->D_212 = r_y * r_x2 * r_z2 * Dt_11 + r_y * r_x2 * Dt_9 +
+               r_y * r_z2 * Dt_9 + r_y * Dt_7;
+  pot->D_221 = r_z * r_x2 * r_y2 * Dt_11 + r_z * r_x2 * Dt_9 +
+               r_z * r_y2 * Dt_9 + r_z * Dt_7;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
+#error "Missing implementation for orders >5"
+#endif
+}
+
+/**
+ * @brief Compute all the relevent derivatives of the softened and truncated
+ * gravitational potential for the M2P kernel.
+ *
+ * @param r_x x-component of distance vector
+ * @param r_y y-component of distance vector
+ * @param r_z z-component of distance vector
+ * @param r2 Square norm of distance vector
+ * @param r_inv Inverse norm of distance vector
+ * @param eps Softening length.
+ * @param eps_inv Inverse of softening length.
+ * @param pot (return) The structure containing all the derivatives.
+ */
+__attribute__((always_inline)) INLINE static void
+compute_potential_derivatives_M2P(float r_x, float r_y, float r_z, float r2,
+                                  float r_inv, float eps, float eps_inv,
+                                  struct potential_derivatives_M2P *pot) {
+
+  float Dt_1;
+  float Dt_3;
+  float Dt_5;
+  float Dt_7;
+
+  /* Un-softened case */
+  if (r2 > eps * eps) {
+
+    const float r_inv2 = r_inv * r_inv;
+
+    Dt_1 = r_inv;
+    Dt_3 = -1.f * Dt_1 * r_inv2; /* -1 / r^3 */
+    Dt_5 = -3.f * Dt_3 * r_inv2; /* 3 / r^5 */
+    Dt_7 = -5.f * Dt_5 * r_inv2; /* -15 / r^7 */
+
+  } else {
+
+    const float r = r2 * r_inv;
+    const float u = r * eps_inv;
+    const float u_inv = r_inv * eps;
+    const float eps_inv2 = eps_inv * eps_inv;
+    const float eps_inv3 = eps_inv * eps_inv2;
+    const float eps_inv5 = eps_inv3 * eps_inv2;
+    const float eps_inv7 = eps_inv5 * eps_inv2;
+
+    Dt_1 = eps_inv * D_soft_1(u, u_inv);
+    Dt_3 = -eps_inv3 * D_soft_3(u, u_inv);
+    Dt_5 = eps_inv5 * D_soft_5(u, u_inv);
+    Dt_7 = -eps_inv7 * D_soft_7(u, u_inv);
+  }
+
+  /* Compute some powers of r_x, r_y and r_z */
+  const float r_x2 = r_x * r_x;
+  const float r_y2 = r_y * r_y;
+  const float r_z2 = r_z * r_z;
+  const float r_x3 = r_x2 * r_x;
+  const float r_y3 = r_y2 * r_y;
+  const float r_z3 = r_z2 * r_z;
+
+  /* 1st order derivatives */
+  pot->D_100 = r_x * Dt_3;
+  pot->D_010 = r_y * Dt_3;
+  pot->D_001 = r_z * Dt_3;
+
+  /* 3rd order derivatives */
+  pot->D_300 = r_x3 * Dt_7 + 3.f * r_x * Dt_5;
+  pot->D_030 = r_y3 * Dt_7 + 3.f * r_y * Dt_5;
+  pot->D_003 = r_z3 * Dt_7 + 3.f * r_z * Dt_5;
+  pot->D_210 = r_x2 * r_y * Dt_7 + r_y * Dt_5;
+  pot->D_201 = r_x2 * r_z * Dt_7 + r_z * Dt_5;
+  pot->D_120 = r_y2 * r_x * Dt_7 + r_x * Dt_5;
+  pot->D_021 = r_y2 * r_z * Dt_7 + r_z * Dt_5;
+  pot->D_102 = r_z2 * r_x * Dt_7 + r_x * Dt_5;
+  pot->D_012 = r_z2 * r_y * Dt_7 + r_y * Dt_5;
+  pot->D_111 = r_x * r_y * r_z * Dt_7;
 }
 
 #endif /* SWIFT_GRAVITY_DERIVATIVE_H */
diff --git a/src/gravity_properties.c b/src/gravity_properties.c
index 18cf044434f7840a5a76f483540bb924a2365e26..27a5de0a4102cae4ca787c10c60cf3bbc3a983ee 100644
--- a/src/gravity_properties.c
+++ b/src/gravity_properties.c
@@ -52,12 +52,15 @@ void gravity_props_init(struct gravity_props *p,
 
   /* Opening angle */
   p->theta_crit = parser_get_param_double(params, "Gravity:theta");
+  if (p->theta_crit >= 1.) error("Theta too large. FMM won't converge.");
+  p->theta_crit2 = p->theta_crit * p->theta_crit;
   p->theta_crit_inv = 1. / p->theta_crit;
 
   /* Softening lengths */
   p->epsilon = 3. * parser_get_param_double(params, "Gravity:epsilon");
   p->epsilon2 = p->epsilon * p->epsilon;
-  p->epsilon_inv = 1. / p->epsilon;
+  p->epsilon_inv = 1.f / p->epsilon;
+  p->epsilon_inv3 = p->epsilon_inv * p->epsilon_inv * p->epsilon_inv;
 }
 
 void gravity_props_print(const struct gravity_props *p) {
diff --git a/src/gravity_properties.h b/src/gravity_properties.h
index 2a5e4cb1e07ea591e2e3821704ec55abe7980360..f7b9950052b302a003e5d128191c9dbe68fe875f 100644
--- a/src/gravity_properties.h
+++ b/src/gravity_properties.h
@@ -51,17 +51,23 @@ struct gravity_props {
   /*! Tree opening angle (Multipole acceptance criterion) */
   double theta_crit;
 
+  /*! Square of opening angle */
+  double theta_crit2;
+
   /*! Inverse of opening angle */
   double theta_crit_inv;
 
   /*! Softening length */
-  double epsilon;
+  float epsilon;
 
   /*! Square of softening length */
-  double epsilon2;
+  float epsilon2;
 
   /*! Inverse of softening length */
-  double epsilon_inv;
+  float epsilon_inv;
+
+  /*! Cube of the inverse of softening length */
+  float epsilon_inv3;
 };
 
 void gravity_props_print(const struct gravity_props *p);
diff --git a/src/gravity_softened_derivatives.h b/src/gravity_softened_derivatives.h
index 3f92476dab5940765b112708a867d940d4d5e6e9..6ef9a0b455a572d8ea6254f9f91941978e7729ac 100644
--- a/src/gravity_softened_derivatives.h
+++ b/src/gravity_softened_derivatives.h
@@ -34,6 +34,8 @@
 #include "inline.h"
 #include "kernel_gravity.h"
 
+#if 0
+
 /*************************/
 /* 0th order derivatives */
 /*************************/
@@ -440,4 +442,6 @@ __attribute__((always_inline)) INLINE static double D_soft_111(
   return -r_x * r_y * r_z * eps_inv7 * D_soft_3(u);
 }
 
+#endif
+
 #endif /* SWIFT_GRAVITY_SOFTENED_DERIVATIVE_H */
diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h
index 5a9e839b63422a3f18c80caf9d891dd6f8be5da6..799bda85b0c69dd2757f47fb0225006adb6d1432 100644
--- a/src/kernel_gravity.h
+++ b/src/kernel_gravity.h
@@ -71,46 +71,74 @@ __attribute__((always_inline)) INLINE static void kernel_grav_eval_double(
 /* Derivatives of softening kernel used for FMM */
 /************************************************/
 
-__attribute__((always_inline)) INLINE static double D_soft_0(double u) {
+__attribute__((always_inline)) INLINE static float D_soft_1(float u,
+                                                            float u_inv) {
 
   /* phi(u) = -3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3 */
-  double phi = -3. * u + 15.;
-  phi = phi * u - 28.;
-  phi = phi * u + 21.;
+  float phi = -3.f * u + 15.f;
+  phi = phi * u - 28.f;
+  phi = phi * u + 21.f;
   phi = phi * u;
-  phi = phi * u - 7.;
+  phi = phi * u - 7.f;
   phi = phi * u;
-  phi = phi * u + 3.;
+  phi = phi * u + 3.f;
 
   return phi;
 }
 
-__attribute__((always_inline)) INLINE static double D_soft_1(double u) {
+__attribute__((always_inline)) INLINE static float D_soft_3(float u,
+                                                            float u_inv) {
 
   /* phi'(u)/u = 21u^5 - 90u^4 + 140u^3 - 84u^2 + 14 */
-  double phi = 21. * u - 90.;
-  phi = phi * u + 140.;
-  phi = phi * u - 84.;
+  float phi = 21.f * u - 90.f;
+  phi = phi * u + 140.f;
+  phi = phi * u - 84.f;
   phi = phi * u;
-  phi = phi * u + 14.;
+  phi = phi * u + 14.f;
 
   return phi;
 }
 
-__attribute__((always_inline)) INLINE static double D_soft_2(double u) {
+__attribute__((always_inline)) INLINE static float D_soft_5(float u,
+                                                            float u_inv) {
 
   /* (phi'(u)/u)'/u = -105u^3 + 360u^2 - 420u + 168 */
-  double phi = -105. * u + 360.;
-  phi = phi * u - 420.;
-  phi = phi * u + 168.;
+  float phi = -105.f * u + 360.f;
+  phi = phi * u - 420.f;
+  phi = phi * u + 168.f;
 
   return phi;
 }
 
-__attribute__((always_inline)) INLINE static double D_soft_3(double u) {
+__attribute__((always_inline)) INLINE static float D_soft_7(float u,
+                                                            float u_inv) {
 
-  /* ((phi'(u)/u)'/u)'/u = 315u - 720 + 420/u */
-  return 315. * u - 720. + 420. / u;
+  /* ((phi'(u)/u)'/u)'/u = 315u - 720 + 420u^-1 */
+  return 315.f * u - 720.f + 420.f * u_inv;
+}
+
+__attribute__((always_inline)) INLINE static float D_soft_9(float u,
+                                                            float u_inv) {
+
+  /* (((phi'(u)/u)'/u)'/u)'/u = -315u^-1 + 420u^-3 */
+  float phi = 420.f * u_inv;
+  phi = phi * u_inv - 315.f;
+  phi = phi * u_inv;
+
+  return phi;
+}
+
+__attribute__((always_inline)) INLINE static float D_soft_11(float u,
+                                                             float u_inv) {
+
+  /* ((((phi'(u)/u)'/u)'/u)'/u)'/u = 315u^-3 - 1260u^-5 */
+  float phi = -1260.f * u_inv;
+  phi = phi * u_inv + 315.f;
+  phi = phi * u_inv;
+  phi = phi * u_inv;
+  phi = phi * u_inv;
+
+  return phi;
 }
 
 #endif /* SWIFT_KERNEL_GRAVITY_H */
diff --git a/src/multipole.h b/src/multipole.h
index 004757924cccb6bc2f450c19f1ccd600f50e1990..e408e5b6e0b38f724648e3a9bbade30b76e09db0 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -45,48 +45,48 @@
 struct grav_tensor {
 
   /* 0th order terms */
-  double F_000;
+  float F_000;
 
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
 
   /* 1st order terms */
-  double F_100, F_010, F_001;
+  float F_100, F_010, F_001;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
 
   /* 2nd order terms */
-  double F_200, F_020, F_002;
-  double F_110, F_101, F_011;
+  float F_200, F_020, F_002;
+  float F_110, F_101, F_011;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
 
   /* 3rd order terms */
-  double F_300, F_030, F_003;
-  double F_210, F_201;
-  double F_120, F_021;
-  double F_102, F_012;
-  double F_111;
+  float F_300, F_030, F_003;
+  float F_210, F_201;
+  float F_120, F_021;
+  float F_102, F_012;
+  float F_111;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
 
   /* 4th order terms */
-  double F_400, F_040, F_004;
-  double F_310, F_301;
-  double F_130, F_031;
-  double F_103, F_013;
-  double F_220, F_202, F_022;
-  double F_211, F_121, F_112;
+  float F_400, F_040, F_004;
+  float F_310, F_301;
+  float F_130, F_031;
+  float F_103, F_013;
+  float F_220, F_202, F_022;
+  float F_211, F_121, F_112;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 4
 
   /* 5th order terms */
-  double F_005, F_014, F_023;
-  double F_032, F_041, F_050;
-  double F_104, F_113, F_122;
-  double F_131, F_140, F_203;
-  double F_212, F_221, F_230;
-  double F_302, F_311, F_320;
-  double F_401, F_410, F_500;
+  float F_005, F_014, F_023;
+  float F_032, F_041, F_050;
+  float F_104, F_113, F_122;
+  float F_131, F_140, F_203;
+  float F_212, F_221, F_230;
+  float F_302, F_311, F_320;
+  float F_401, F_410, F_500;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 5
 #error "Missing implementation for order >5"
@@ -96,7 +96,13 @@ struct grav_tensor {
   /* Total number of gpart this field tensor interacted with */
   long long num_interacted;
 
+  /* Last time this tensor was zeroed */
+  integertime_t ti_init;
+
 #endif
+
+  /* Has this tensor received any contribution? */
+  char interacted;
 };
 
 struct multipole {
@@ -173,6 +179,12 @@ struct gravity_tensors {
     /*! The actual content */
     struct {
 
+      /*! Multipole mass */
+      struct multipole m_pole;
+
+      /*! Field tensor for the potential */
+      struct grav_tensor pot;
+
       /*! Centre of mass of the matter dsitribution */
       double CoM[3];
 
@@ -184,12 +196,6 @@ struct gravity_tensors {
 
       /*! Upper limit of the CoM<->gpart distance at the last rebuild */
       double r_max_rebuild;
-
-      /*! Multipole mass */
-      struct multipole m_pole;
-
-      /*! Field tensor for the potential */
-      struct grav_tensor pot;
     };
   };
 } SWIFT_STRUCT_ALIGN;
@@ -210,8 +216,11 @@ INLINE static void gravity_reset(struct gravity_tensors *m) {
  *
  * @param m The #multipole.
  * @param dt The drift time-step.
+ * @param x_diff The maximal distance moved by any particle since the last
+ * rebuild.
  */
-INLINE static void gravity_drift(struct gravity_tensors *m, double dt) {
+INLINE static void gravity_drift(struct gravity_tensors *m, double dt,
+                                 float x_diff) {
 
   const double dx = m->m_pole.vel[0] * dt;
   const double dy = m->m_pole.vel[1] * dt;
@@ -223,22 +232,27 @@ INLINE static void gravity_drift(struct gravity_tensors *m, double dt) {
   m->CoM[2] += dz;
 
   /* Conservative change in maximal radius containing all gpart */
-  /* MATTHIEU: Use gpart->x_diff here ? */
-  m->r_max += sqrt(dx * dx + dy * dy + dz * dz);
+  m->r_max = m->r_max_rebuild + 2. * x_diff;
 }
 
 /**
  * @brief Zeroes all the fields of a field tensor
  *
  * @param l The field tensor.
+ * @param ti_current The current (integer) time (for debugging only).
  */
-INLINE static void gravity_field_tensors_init(struct grav_tensor *l) {
+INLINE static void gravity_field_tensors_init(struct grav_tensor *l,
+                                              integertime_t ti_current) {
 
   bzero(l, sizeof(struct grav_tensor));
+
+#ifdef SWIFT_DEBUG_CHECKS
+  l->ti_init = ti_current;
+#endif
 }
 
 /**
- * @brief Adds field tensrs to other ones (i.e. does la += lb).
+ * @brief Adds a field tensor to another one (i.e. does la += lb).
  *
  * @param la The gravity tensors to add to.
  * @param lb The gravity tensors to add.
@@ -250,6 +264,8 @@ INLINE static void gravity_field_tensors_add(struct grav_tensor *la,
   la->num_interacted += lb->num_interacted;
 #endif
 
+  la->interacted = 1;
+
   /* Add 0th order terms */
   la->F_000 += lb->F_000;
 
@@ -338,6 +354,7 @@ INLINE static void gravity_field_tensors_add(struct grav_tensor *la,
 INLINE static void gravity_field_tensors_print(const struct grav_tensor *l) {
 
   printf("-------------------------\n");
+  printf("Interacted: %d\n", l->interacted);
   printf("F_000= %12.5e\n", l->F_000);
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
   printf("-------------------------\n");
@@ -1507,12 +1524,13 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b,
                                const double dim[3]) {
 
   /* Recover some constants */
-  const double eps2 = props->epsilon2;
+  const float eps = props->epsilon;
+  const float eps_inv = props->epsilon_inv;
 
   /* Compute distance vector */
-  double dx = pos_b[0] - pos_a[0];
-  double dy = pos_b[1] - pos_a[1];
-  double dz = pos_b[2] - pos_a[2];
+  float dx = (float)(pos_b[0] - pos_a[0]);
+  float dy = (float)(pos_b[1] - pos_a[1]);
+  float dz = (float)(pos_b[2] - pos_a[2]);
 
   /* Apply BC */
   if (periodic) {
@@ -1522,652 +1540,350 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b,
   }
 
   /* Compute distance */
-  const double r2 = dx * dx + dy * dy + dz * dz;
-  const double r_inv = 1. / sqrt(r2);
+  const float r2 = dx * dx + dy * dy + dz * dz;
+  const float r_inv = 1. / sqrtf(r2);
+
+  /* Compute all derivatives */
+  struct potential_derivatives_M2L pot;
+  compute_potential_derivatives_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv, &pot);
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Count interactions */
   l_b->num_interacted += m_a->num_gpart;
 #endif
 
-  /* Un-softened case */
-  if (r2 > eps2) {
+  /* Record that this tensor has received contributions */
+  l_b->interacted = 1;
 
-    /*  0th order term */
-    l_b->F_000 += m_a->M_000 * D_000(dx, dy, dz, r_inv);
+  /*  0th order term */
+  l_b->F_000 += m_a->M_000 * pot.D_000;
 
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
-    /*  1st order multipole term (addition to rank 0)*/
-    l_b->F_000 += m_a->M_100 * D_100(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_010(dx, dy, dz, r_inv) +
-                  m_a->M_001 * D_001(dx, dy, dz, r_inv);
+  /*  1st order multipole term (addition to rank 0)*/
+  l_b->F_000 +=
+      m_a->M_100 * pot.D_100 + m_a->M_010 * pot.D_010 + m_a->M_001 * pot.D_001;
 
-    /*  1st order multipole term (addition to rank 1)*/
-    l_b->F_100 += m_a->M_000 * D_100(dx, dy, dz, r_inv);
-    l_b->F_010 += m_a->M_000 * D_010(dx, dy, dz, r_inv);
-    l_b->F_001 += m_a->M_000 * D_001(dx, dy, dz, r_inv);
+  /*  1st order multipole term (addition to rank 1)*/
+  l_b->F_100 += m_a->M_000 * pot.D_100;
+  l_b->F_010 += m_a->M_000 * pot.D_010;
+  l_b->F_001 += m_a->M_000 * pot.D_001;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
 
-    /*  2nd order multipole term (addition to rank 0)*/
-    l_b->F_000 += m_a->M_200 * D_200(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_020(dx, dy, dz, r_inv) +
-                  m_a->M_002 * D_002(dx, dy, dz, r_inv);
-    l_b->F_000 += m_a->M_110 * D_110(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_101(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_011(dx, dy, dz, r_inv);
-
-    /*  2nd order multipole term (addition to rank 1)*/
-    l_b->F_100 += m_a->M_100 * D_200(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_110(dx, dy, dz, r_inv) +
-                  m_a->M_001 * D_101(dx, dy, dz, r_inv);
-    l_b->F_010 += m_a->M_100 * D_110(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_020(dx, dy, dz, r_inv) +
-                  m_a->M_001 * D_011(dx, dy, dz, r_inv);
-    l_b->F_001 += m_a->M_100 * D_101(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_011(dx, dy, dz, r_inv) +
-                  m_a->M_001 * D_002(dx, dy, dz, r_inv);
-
-    /*  2nd order multipole term (addition to rank 2)*/
-    l_b->F_200 += m_a->M_000 * D_200(dx, dy, dz, r_inv);
-    l_b->F_020 += m_a->M_000 * D_020(dx, dy, dz, r_inv);
-    l_b->F_002 += m_a->M_000 * D_002(dx, dy, dz, r_inv);
-    l_b->F_110 += m_a->M_000 * D_110(dx, dy, dz, r_inv);
-    l_b->F_101 += m_a->M_000 * D_101(dx, dy, dz, r_inv);
-    l_b->F_011 += m_a->M_000 * D_011(dx, dy, dz, r_inv);
+  /*  2nd order multipole term (addition to rank 0)*/
+  l_b->F_000 +=
+      m_a->M_200 * pot.D_200 + m_a->M_020 * pot.D_020 + m_a->M_002 * pot.D_002;
+  l_b->F_000 +=
+      m_a->M_110 * pot.D_110 + m_a->M_101 * pot.D_101 + m_a->M_011 * pot.D_011;
+
+  /*  2nd order multipole term (addition to rank 1)*/
+  l_b->F_100 +=
+      m_a->M_100 * pot.D_200 + m_a->M_010 * pot.D_110 + m_a->M_001 * pot.D_101;
+  l_b->F_010 +=
+      m_a->M_100 * pot.D_110 + m_a->M_010 * pot.D_020 + m_a->M_001 * pot.D_011;
+  l_b->F_001 +=
+      m_a->M_100 * pot.D_101 + m_a->M_010 * pot.D_011 + m_a->M_001 * pot.D_002;
+
+  /*  2nd order multipole term (addition to rank 2)*/
+  l_b->F_200 += m_a->M_000 * pot.D_200;
+  l_b->F_020 += m_a->M_000 * pot.D_020;
+  l_b->F_002 += m_a->M_000 * pot.D_002;
+  l_b->F_110 += m_a->M_000 * pot.D_110;
+  l_b->F_101 += m_a->M_000 * pot.D_101;
+  l_b->F_011 += m_a->M_000 * pot.D_011;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
 
-    /*  3rd order multipole term (addition to rank 0)*/
-    l_b->F_000 += m_a->M_300 * D_300(dx, dy, dz, r_inv) +
-                  m_a->M_030 * D_030(dx, dy, dz, r_inv) +
-                  m_a->M_003 * D_003(dx, dy, dz, r_inv);
-    l_b->F_000 += m_a->M_210 * D_210(dx, dy, dz, r_inv) +
-                  m_a->M_201 * D_201(dx, dy, dz, r_inv) +
-                  m_a->M_120 * D_120(dx, dy, dz, r_inv);
-    l_b->F_000 += m_a->M_021 * D_021(dx, dy, dz, r_inv) +
-                  m_a->M_102 * D_102(dx, dy, dz, r_inv) +
-                  m_a->M_012 * D_012(dx, dy, dz, r_inv);
-    l_b->F_000 += m_a->M_111 * D_111(dx, dy, dz, r_inv);
-
-    /*  3rd order multipole term (addition to rank 1)*/
-    l_b->F_100 += m_a->M_200 * D_300(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_120(dx, dy, dz, r_inv) +
-                  m_a->M_002 * D_102(dx, dy, dz, r_inv);
-    l_b->F_100 += m_a->M_110 * D_210(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_201(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_111(dx, dy, dz, r_inv);
-    l_b->F_010 += m_a->M_200 * D_210(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_030(dx, dy, dz, r_inv) +
-                  m_a->M_002 * D_012(dx, dy, dz, r_inv);
-    l_b->F_010 += m_a->M_110 * D_120(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_111(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_021(dx, dy, dz, r_inv);
-    l_b->F_001 += m_a->M_200 * D_201(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_021(dx, dy, dz, r_inv) +
-                  m_a->M_002 * D_003(dx, dy, dz, r_inv);
-    l_b->F_001 += m_a->M_110 * D_111(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_102(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_012(dx, dy, dz, r_inv);
-
-    /*  3rd order multipole term (addition to rank 2)*/
-    l_b->F_200 += m_a->M_100 * D_300(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_210(dx, dy, dz, r_inv) +
-                  m_a->M_001 * D_201(dx, dy, dz, r_inv);
-    l_b->F_020 += m_a->M_100 * D_120(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_030(dx, dy, dz, r_inv) +
-                  m_a->M_001 * D_021(dx, dy, dz, r_inv);
-    l_b->F_002 += m_a->M_100 * D_102(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_012(dx, dy, dz, r_inv) +
-                  m_a->M_001 * D_003(dx, dy, dz, r_inv);
-    l_b->F_110 += m_a->M_100 * D_210(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_120(dx, dy, dz, r_inv) +
-                  m_a->M_001 * D_111(dx, dy, dz, r_inv);
-    l_b->F_101 += m_a->M_100 * D_201(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_111(dx, dy, dz, r_inv) +
-                  m_a->M_001 * D_102(dx, dy, dz, r_inv);
-    l_b->F_011 += m_a->M_100 * D_111(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_021(dx, dy, dz, r_inv) +
-                  m_a->M_001 * D_012(dx, dy, dz, r_inv);
-
-    /*  3rd order multipole term (addition to rank 3)*/
-    l_b->F_300 += m_a->M_000 * D_300(dx, dy, dz, r_inv);
-    l_b->F_030 += m_a->M_000 * D_030(dx, dy, dz, r_inv);
-    l_b->F_003 += m_a->M_000 * D_003(dx, dy, dz, r_inv);
-    l_b->F_210 += m_a->M_000 * D_210(dx, dy, dz, r_inv);
-    l_b->F_201 += m_a->M_000 * D_201(dx, dy, dz, r_inv);
-    l_b->F_120 += m_a->M_000 * D_120(dx, dy, dz, r_inv);
-    l_b->F_021 += m_a->M_000 * D_021(dx, dy, dz, r_inv);
-    l_b->F_102 += m_a->M_000 * D_102(dx, dy, dz, r_inv);
-    l_b->F_012 += m_a->M_000 * D_012(dx, dy, dz, r_inv);
-    l_b->F_111 += m_a->M_000 * D_111(dx, dy, dz, r_inv);
+  /*  3rd order multipole term (addition to rank 0)*/
+  l_b->F_000 +=
+      m_a->M_300 * pot.D_300 + m_a->M_030 * pot.D_030 + m_a->M_003 * pot.D_003;
+  l_b->F_000 +=
+      m_a->M_210 * pot.D_210 + m_a->M_201 * pot.D_201 + m_a->M_120 * pot.D_120;
+  l_b->F_000 +=
+      m_a->M_021 * pot.D_021 + m_a->M_102 * pot.D_102 + m_a->M_012 * pot.D_012;
+  l_b->F_000 += m_a->M_111 * pot.D_111;
+
+  /*  3rd order multipole term (addition to rank 1)*/
+  l_b->F_100 +=
+      m_a->M_200 * pot.D_300 + m_a->M_020 * pot.D_120 + m_a->M_002 * pot.D_102;
+  l_b->F_100 +=
+      m_a->M_110 * pot.D_210 + m_a->M_101 * pot.D_201 + m_a->M_011 * pot.D_111;
+  l_b->F_010 +=
+      m_a->M_200 * pot.D_210 + m_a->M_020 * pot.D_030 + m_a->M_002 * pot.D_012;
+  l_b->F_010 +=
+      m_a->M_110 * pot.D_120 + m_a->M_101 * pot.D_111 + m_a->M_011 * pot.D_021;
+  l_b->F_001 +=
+      m_a->M_200 * pot.D_201 + m_a->M_020 * pot.D_021 + m_a->M_002 * pot.D_003;
+  l_b->F_001 +=
+      m_a->M_110 * pot.D_111 + m_a->M_101 * pot.D_102 + m_a->M_011 * pot.D_012;
+
+  /*  3rd order multipole term (addition to rank 2)*/
+  l_b->F_200 +=
+      m_a->M_100 * pot.D_300 + m_a->M_010 * pot.D_210 + m_a->M_001 * pot.D_201;
+  l_b->F_020 +=
+      m_a->M_100 * pot.D_120 + m_a->M_010 * pot.D_030 + m_a->M_001 * pot.D_021;
+  l_b->F_002 +=
+      m_a->M_100 * pot.D_102 + m_a->M_010 * pot.D_012 + m_a->M_001 * pot.D_003;
+  l_b->F_110 +=
+      m_a->M_100 * pot.D_210 + m_a->M_010 * pot.D_120 + m_a->M_001 * pot.D_111;
+  l_b->F_101 +=
+      m_a->M_100 * pot.D_201 + m_a->M_010 * pot.D_111 + m_a->M_001 * pot.D_102;
+  l_b->F_011 +=
+      m_a->M_100 * pot.D_111 + m_a->M_010 * pot.D_021 + m_a->M_001 * pot.D_012;
+
+  /*  3rd order multipole term (addition to rank 3)*/
+  l_b->F_300 += m_a->M_000 * pot.D_300;
+  l_b->F_030 += m_a->M_000 * pot.D_030;
+  l_b->F_003 += m_a->M_000 * pot.D_003;
+  l_b->F_210 += m_a->M_000 * pot.D_210;
+  l_b->F_201 += m_a->M_000 * pot.D_201;
+  l_b->F_120 += m_a->M_000 * pot.D_120;
+  l_b->F_021 += m_a->M_000 * pot.D_021;
+  l_b->F_102 += m_a->M_000 * pot.D_102;
+  l_b->F_012 += m_a->M_000 * pot.D_012;
+  l_b->F_111 += m_a->M_000 * pot.D_111;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
-    /* Compute 4th order field tensor terms (addition to rank 0) */
-    l_b->F_000 += m_a->M_004 * D_004(dx, dy, dz, r_inv) +
-                  m_a->M_013 * D_013(dx, dy, dz, r_inv) +
-                  m_a->M_022 * D_022(dx, dy, dz, r_inv) +
-                  m_a->M_031 * D_031(dx, dy, dz, r_inv) +
-                  m_a->M_040 * D_040(dx, dy, dz, r_inv) +
-                  m_a->M_103 * D_103(dx, dy, dz, r_inv) +
-                  m_a->M_112 * D_112(dx, dy, dz, r_inv) +
-                  m_a->M_121 * D_121(dx, dy, dz, r_inv) +
-                  m_a->M_130 * D_130(dx, dy, dz, r_inv) +
-                  m_a->M_202 * D_202(dx, dy, dz, r_inv) +
-                  m_a->M_211 * D_211(dx, dy, dz, r_inv) +
-                  m_a->M_220 * D_220(dx, dy, dz, r_inv) +
-                  m_a->M_301 * D_301(dx, dy, dz, r_inv) +
-                  m_a->M_310 * D_310(dx, dy, dz, r_inv) +
-                  m_a->M_400 * D_400(dx, dy, dz, r_inv);
-
-    /* Compute 4th order field tensor terms (addition to rank 1) */
-    l_b->F_001 += m_a->M_003 * D_004(dx, dy, dz, r_inv) +
-                  m_a->M_012 * D_013(dx, dy, dz, r_inv) +
-                  m_a->M_021 * D_022(dx, dy, dz, r_inv) +
-                  m_a->M_030 * D_031(dx, dy, dz, r_inv) +
-                  m_a->M_102 * D_103(dx, dy, dz, r_inv) +
-                  m_a->M_111 * D_112(dx, dy, dz, r_inv) +
-                  m_a->M_120 * D_121(dx, dy, dz, r_inv) +
-                  m_a->M_201 * D_202(dx, dy, dz, r_inv) +
-                  m_a->M_210 * D_211(dx, dy, dz, r_inv) +
-                  m_a->M_300 * D_301(dx, dy, dz, r_inv);
-    l_b->F_010 += m_a->M_003 * D_013(dx, dy, dz, r_inv) +
-                  m_a->M_012 * D_022(dx, dy, dz, r_inv) +
-                  m_a->M_021 * D_031(dx, dy, dz, r_inv) +
-                  m_a->M_030 * D_040(dx, dy, dz, r_inv) +
-                  m_a->M_102 * D_112(dx, dy, dz, r_inv) +
-                  m_a->M_111 * D_121(dx, dy, dz, r_inv) +
-                  m_a->M_120 * D_130(dx, dy, dz, r_inv) +
-                  m_a->M_201 * D_211(dx, dy, dz, r_inv) +
-                  m_a->M_210 * D_220(dx, dy, dz, r_inv) +
-                  m_a->M_300 * D_310(dx, dy, dz, r_inv);
-    l_b->F_100 += m_a->M_003 * D_103(dx, dy, dz, r_inv) +
-                  m_a->M_012 * D_112(dx, dy, dz, r_inv) +
-                  m_a->M_021 * D_121(dx, dy, dz, r_inv) +
-                  m_a->M_030 * D_130(dx, dy, dz, r_inv) +
-                  m_a->M_102 * D_202(dx, dy, dz, r_inv) +
-                  m_a->M_111 * D_211(dx, dy, dz, r_inv) +
-                  m_a->M_120 * D_220(dx, dy, dz, r_inv) +
-                  m_a->M_201 * D_301(dx, dy, dz, r_inv) +
-                  m_a->M_210 * D_310(dx, dy, dz, r_inv) +
-                  m_a->M_300 * D_400(dx, dy, dz, r_inv);
-
-    /* Compute 4th order field tensor terms (addition to rank 2) */
-    l_b->F_002 += m_a->M_002 * D_004(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_013(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_022(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_103(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_112(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_202(dx, dy, dz, r_inv);
-    l_b->F_011 += m_a->M_002 * D_013(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_022(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_031(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_112(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_121(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_211(dx, dy, dz, r_inv);
-    l_b->F_020 += m_a->M_002 * D_022(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_031(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_040(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_121(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_130(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_220(dx, dy, dz, r_inv);
-    l_b->F_101 += m_a->M_002 * D_103(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_112(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_121(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_202(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_211(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_301(dx, dy, dz, r_inv);
-    l_b->F_110 += m_a->M_002 * D_112(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_121(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_130(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_211(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_220(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_310(dx, dy, dz, r_inv);
-    l_b->F_200 += m_a->M_002 * D_202(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_211(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_220(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_301(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_310(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_400(dx, dy, dz, r_inv);
-
-    /* Compute 4th order field tensor terms (addition to rank 3) */
-    l_b->F_003 += m_a->M_001 * D_004(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_013(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_103(dx, dy, dz, r_inv);
-    l_b->F_012 += m_a->M_001 * D_013(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_022(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_112(dx, dy, dz, r_inv);
-    l_b->F_021 += m_a->M_001 * D_022(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_031(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_121(dx, dy, dz, r_inv);
-    l_b->F_030 += m_a->M_001 * D_031(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_040(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_130(dx, dy, dz, r_inv);
-    l_b->F_102 += m_a->M_001 * D_103(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_112(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_202(dx, dy, dz, r_inv);
-    l_b->F_111 += m_a->M_001 * D_112(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_121(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_211(dx, dy, dz, r_inv);
-    l_b->F_120 += m_a->M_001 * D_121(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_130(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_220(dx, dy, dz, r_inv);
-    l_b->F_201 += m_a->M_001 * D_202(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_211(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_301(dx, dy, dz, r_inv);
-    l_b->F_210 += m_a->M_001 * D_211(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_220(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_310(dx, dy, dz, r_inv);
-    l_b->F_300 += m_a->M_001 * D_301(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_310(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_400(dx, dy, dz, r_inv);
-
-    /* Compute 4th order field tensor terms (addition to rank 4) */
-    l_b->F_004 += m_a->M_000 * D_004(dx, dy, dz, r_inv);
-    l_b->F_013 += m_a->M_000 * D_013(dx, dy, dz, r_inv);
-    l_b->F_022 += m_a->M_000 * D_022(dx, dy, dz, r_inv);
-    l_b->F_031 += m_a->M_000 * D_031(dx, dy, dz, r_inv);
-    l_b->F_040 += m_a->M_000 * D_040(dx, dy, dz, r_inv);
-    l_b->F_103 += m_a->M_000 * D_103(dx, dy, dz, r_inv);
-    l_b->F_112 += m_a->M_000 * D_112(dx, dy, dz, r_inv);
-    l_b->F_121 += m_a->M_000 * D_121(dx, dy, dz, r_inv);
-    l_b->F_130 += m_a->M_000 * D_130(dx, dy, dz, r_inv);
-    l_b->F_202 += m_a->M_000 * D_202(dx, dy, dz, r_inv);
-    l_b->F_211 += m_a->M_000 * D_211(dx, dy, dz, r_inv);
-    l_b->F_220 += m_a->M_000 * D_220(dx, dy, dz, r_inv);
-    l_b->F_301 += m_a->M_000 * D_301(dx, dy, dz, r_inv);
-    l_b->F_310 += m_a->M_000 * D_310(dx, dy, dz, r_inv);
-    l_b->F_400 += m_a->M_000 * D_400(dx, dy, dz, r_inv);
+  /* Compute 4th order field tensor terms (addition to rank 0) */
+  l_b->F_000 +=
+      m_a->M_004 * pot.D_004 + m_a->M_013 * pot.D_013 + m_a->M_022 * pot.D_022 +
+      m_a->M_031 * pot.D_031 + m_a->M_040 * pot.D_040 + m_a->M_103 * pot.D_103 +
+      m_a->M_112 * pot.D_112 + m_a->M_121 * pot.D_121 + m_a->M_130 * pot.D_130 +
+      m_a->M_202 * pot.D_202 + m_a->M_211 * pot.D_211 + m_a->M_220 * pot.D_220 +
+      m_a->M_301 * pot.D_301 + m_a->M_310 * pot.D_310 + m_a->M_400 * pot.D_400;
+
+  /* Compute 4th order field tensor terms (addition to rank 1) */
+  l_b->F_001 += m_a->M_003 * pot.D_004 + m_a->M_012 * pot.D_013 +
+                m_a->M_021 * pot.D_022 + m_a->M_030 * pot.D_031 +
+                m_a->M_102 * pot.D_103 + m_a->M_111 * pot.D_112 +
+                m_a->M_120 * pot.D_121 + m_a->M_201 * pot.D_202 +
+                m_a->M_210 * pot.D_211 + m_a->M_300 * pot.D_301;
+  l_b->F_010 += m_a->M_003 * pot.D_013 + m_a->M_012 * pot.D_022 +
+                m_a->M_021 * pot.D_031 + m_a->M_030 * pot.D_040 +
+                m_a->M_102 * pot.D_112 + m_a->M_111 * pot.D_121 +
+                m_a->M_120 * pot.D_130 + m_a->M_201 * pot.D_211 +
+                m_a->M_210 * pot.D_220 + m_a->M_300 * pot.D_310;
+  l_b->F_100 += m_a->M_003 * pot.D_103 + m_a->M_012 * pot.D_112 +
+                m_a->M_021 * pot.D_121 + m_a->M_030 * pot.D_130 +
+                m_a->M_102 * pot.D_202 + m_a->M_111 * pot.D_211 +
+                m_a->M_120 * pot.D_220 + m_a->M_201 * pot.D_301 +
+                m_a->M_210 * pot.D_310 + m_a->M_300 * pot.D_400;
+
+  /* Compute 4th order field tensor terms (addition to rank 2) */
+  l_b->F_002 += m_a->M_002 * pot.D_004 + m_a->M_011 * pot.D_013 +
+                m_a->M_020 * pot.D_022 + m_a->M_101 * pot.D_103 +
+                m_a->M_110 * pot.D_112 + m_a->M_200 * pot.D_202;
+  l_b->F_011 += m_a->M_002 * pot.D_013 + m_a->M_011 * pot.D_022 +
+                m_a->M_020 * pot.D_031 + m_a->M_101 * pot.D_112 +
+                m_a->M_110 * pot.D_121 + m_a->M_200 * pot.D_211;
+  l_b->F_020 += m_a->M_002 * pot.D_022 + m_a->M_011 * pot.D_031 +
+                m_a->M_020 * pot.D_040 + m_a->M_101 * pot.D_121 +
+                m_a->M_110 * pot.D_130 + m_a->M_200 * pot.D_220;
+  l_b->F_101 += m_a->M_002 * pot.D_103 + m_a->M_011 * pot.D_112 +
+                m_a->M_020 * pot.D_121 + m_a->M_101 * pot.D_202 +
+                m_a->M_110 * pot.D_211 + m_a->M_200 * pot.D_301;
+  l_b->F_110 += m_a->M_002 * pot.D_112 + m_a->M_011 * pot.D_121 +
+                m_a->M_020 * pot.D_130 + m_a->M_101 * pot.D_211 +
+                m_a->M_110 * pot.D_220 + m_a->M_200 * pot.D_310;
+  l_b->F_200 += m_a->M_002 * pot.D_202 + m_a->M_011 * pot.D_211 +
+                m_a->M_020 * pot.D_220 + m_a->M_101 * pot.D_301 +
+                m_a->M_110 * pot.D_310 + m_a->M_200 * pot.D_400;
+
+  /* Compute 4th order field tensor terms (addition to rank 3) */
+  l_b->F_003 +=
+      m_a->M_001 * pot.D_004 + m_a->M_010 * pot.D_013 + m_a->M_100 * pot.D_103;
+  l_b->F_012 +=
+      m_a->M_001 * pot.D_013 + m_a->M_010 * pot.D_022 + m_a->M_100 * pot.D_112;
+  l_b->F_021 +=
+      m_a->M_001 * pot.D_022 + m_a->M_010 * pot.D_031 + m_a->M_100 * pot.D_121;
+  l_b->F_030 +=
+      m_a->M_001 * pot.D_031 + m_a->M_010 * pot.D_040 + m_a->M_100 * pot.D_130;
+  l_b->F_102 +=
+      m_a->M_001 * pot.D_103 + m_a->M_010 * pot.D_112 + m_a->M_100 * pot.D_202;
+  l_b->F_111 +=
+      m_a->M_001 * pot.D_112 + m_a->M_010 * pot.D_121 + m_a->M_100 * pot.D_211;
+  l_b->F_120 +=
+      m_a->M_001 * pot.D_121 + m_a->M_010 * pot.D_130 + m_a->M_100 * pot.D_220;
+  l_b->F_201 +=
+      m_a->M_001 * pot.D_202 + m_a->M_010 * pot.D_211 + m_a->M_100 * pot.D_301;
+  l_b->F_210 +=
+      m_a->M_001 * pot.D_211 + m_a->M_010 * pot.D_220 + m_a->M_100 * pot.D_310;
+  l_b->F_300 +=
+      m_a->M_001 * pot.D_301 + m_a->M_010 * pot.D_310 + m_a->M_100 * pot.D_400;
+
+  /* Compute 4th order field tensor terms (addition to rank 4) */
+  l_b->F_004 += m_a->M_000 * pot.D_004;
+  l_b->F_013 += m_a->M_000 * pot.D_013;
+  l_b->F_022 += m_a->M_000 * pot.D_022;
+  l_b->F_031 += m_a->M_000 * pot.D_031;
+  l_b->F_040 += m_a->M_000 * pot.D_040;
+  l_b->F_103 += m_a->M_000 * pot.D_103;
+  l_b->F_112 += m_a->M_000 * pot.D_112;
+  l_b->F_121 += m_a->M_000 * pot.D_121;
+  l_b->F_130 += m_a->M_000 * pot.D_130;
+  l_b->F_202 += m_a->M_000 * pot.D_202;
+  l_b->F_211 += m_a->M_000 * pot.D_211;
+  l_b->F_220 += m_a->M_000 * pot.D_220;
+  l_b->F_301 += m_a->M_000 * pot.D_301;
+  l_b->F_310 += m_a->M_000 * pot.D_310;
+  l_b->F_400 += m_a->M_000 * pot.D_400;
 
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 4
 
-    /* Compute 5th order field tensor terms (addition to rank 0) */
-    l_b->F_000 += m_a->M_005 * D_005(dx, dy, dz, r_inv) +
-                  m_a->M_014 * D_014(dx, dy, dz, r_inv) +
-                  m_a->M_023 * D_023(dx, dy, dz, r_inv) +
-                  m_a->M_032 * D_032(dx, dy, dz, r_inv) +
-                  m_a->M_041 * D_041(dx, dy, dz, r_inv) +
-                  m_a->M_050 * D_050(dx, dy, dz, r_inv) +
-                  m_a->M_104 * D_104(dx, dy, dz, r_inv) +
-                  m_a->M_113 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_122 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_131 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_140 * D_140(dx, dy, dz, r_inv) +
-                  m_a->M_203 * D_203(dx, dy, dz, r_inv) +
-                  m_a->M_212 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_221 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_230 * D_230(dx, dy, dz, r_inv) +
-                  m_a->M_302 * D_302(dx, dy, dz, r_inv) +
-                  m_a->M_311 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_320 * D_320(dx, dy, dz, r_inv) +
-                  m_a->M_401 * D_401(dx, dy, dz, r_inv) +
-                  m_a->M_410 * D_410(dx, dy, dz, r_inv) +
-                  m_a->M_500 * D_500(dx, dy, dz, r_inv);
-
-    /* Compute 5th order field tensor terms (addition to rank 1) */
-    l_b->F_001 += m_a->M_004 * D_005(dx, dy, dz, r_inv) +
-                  m_a->M_013 * D_014(dx, dy, dz, r_inv) +
-                  m_a->M_022 * D_023(dx, dy, dz, r_inv) +
-                  m_a->M_031 * D_032(dx, dy, dz, r_inv) +
-                  m_a->M_040 * D_041(dx, dy, dz, r_inv) +
-                  m_a->M_103 * D_104(dx, dy, dz, r_inv) +
-                  m_a->M_112 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_121 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_130 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_202 * D_203(dx, dy, dz, r_inv) +
-                  m_a->M_211 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_220 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_301 * D_302(dx, dy, dz, r_inv) +
-                  m_a->M_310 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_400 * D_401(dx, dy, dz, r_inv);
-    l_b->F_010 += m_a->M_004 * D_014(dx, dy, dz, r_inv) +
-                  m_a->M_013 * D_023(dx, dy, dz, r_inv) +
-                  m_a->M_022 * D_032(dx, dy, dz, r_inv) +
-                  m_a->M_031 * D_041(dx, dy, dz, r_inv) +
-                  m_a->M_040 * D_050(dx, dy, dz, r_inv) +
-                  m_a->M_103 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_112 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_121 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_130 * D_140(dx, dy, dz, r_inv) +
-                  m_a->M_202 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_211 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_220 * D_230(dx, dy, dz, r_inv) +
-                  m_a->M_301 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_310 * D_320(dx, dy, dz, r_inv) +
-                  m_a->M_400 * D_410(dx, dy, dz, r_inv);
-    l_b->F_100 += m_a->M_004 * D_104(dx, dy, dz, r_inv) +
-                  m_a->M_013 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_022 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_031 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_040 * D_140(dx, dy, dz, r_inv) +
-                  m_a->M_103 * D_203(dx, dy, dz, r_inv) +
-                  m_a->M_112 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_121 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_130 * D_230(dx, dy, dz, r_inv) +
-                  m_a->M_202 * D_302(dx, dy, dz, r_inv) +
-                  m_a->M_211 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_220 * D_320(dx, dy, dz, r_inv) +
-                  m_a->M_301 * D_401(dx, dy, dz, r_inv) +
-                  m_a->M_310 * D_410(dx, dy, dz, r_inv) +
-                  m_a->M_400 * D_500(dx, dy, dz, r_inv);
-
-    /* Compute 5th order field tensor terms (addition to rank 2) */
-    l_b->F_002 += m_a->M_003 * D_005(dx, dy, dz, r_inv) +
-                  m_a->M_012 * D_014(dx, dy, dz, r_inv) +
-                  m_a->M_021 * D_023(dx, dy, dz, r_inv) +
-                  m_a->M_030 * D_032(dx, dy, dz, r_inv) +
-                  m_a->M_102 * D_104(dx, dy, dz, r_inv) +
-                  m_a->M_111 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_120 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_201 * D_203(dx, dy, dz, r_inv) +
-                  m_a->M_210 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_300 * D_302(dx, dy, dz, r_inv);
-    l_b->F_011 += m_a->M_003 * D_014(dx, dy, dz, r_inv) +
-                  m_a->M_012 * D_023(dx, dy, dz, r_inv) +
-                  m_a->M_021 * D_032(dx, dy, dz, r_inv) +
-                  m_a->M_030 * D_041(dx, dy, dz, r_inv) +
-                  m_a->M_102 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_111 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_120 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_201 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_210 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_300 * D_311(dx, dy, dz, r_inv);
-    l_b->F_020 += m_a->M_003 * D_023(dx, dy, dz, r_inv) +
-                  m_a->M_012 * D_032(dx, dy, dz, r_inv) +
-                  m_a->M_021 * D_041(dx, dy, dz, r_inv) +
-                  m_a->M_030 * D_050(dx, dy, dz, r_inv) +
-                  m_a->M_102 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_111 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_120 * D_140(dx, dy, dz, r_inv) +
-                  m_a->M_201 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_210 * D_230(dx, dy, dz, r_inv) +
-                  m_a->M_300 * D_320(dx, dy, dz, r_inv);
-    l_b->F_101 += m_a->M_003 * D_104(dx, dy, dz, r_inv) +
-                  m_a->M_012 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_021 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_030 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_102 * D_203(dx, dy, dz, r_inv) +
-                  m_a->M_111 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_120 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_201 * D_302(dx, dy, dz, r_inv) +
-                  m_a->M_210 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_300 * D_401(dx, dy, dz, r_inv);
-    l_b->F_110 += m_a->M_003 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_012 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_021 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_030 * D_140(dx, dy, dz, r_inv) +
-                  m_a->M_102 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_111 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_120 * D_230(dx, dy, dz, r_inv) +
-                  m_a->M_201 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_210 * D_320(dx, dy, dz, r_inv) +
-                  m_a->M_300 * D_410(dx, dy, dz, r_inv);
-    l_b->F_200 += m_a->M_003 * D_203(dx, dy, dz, r_inv) +
-                  m_a->M_012 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_021 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_030 * D_230(dx, dy, dz, r_inv) +
-                  m_a->M_102 * D_302(dx, dy, dz, r_inv) +
-                  m_a->M_111 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_120 * D_320(dx, dy, dz, r_inv) +
-                  m_a->M_201 * D_401(dx, dy, dz, r_inv) +
-                  m_a->M_210 * D_410(dx, dy, dz, r_inv) +
-                  m_a->M_300 * D_500(dx, dy, dz, r_inv);
-
-    /* Compute 5th order field tensor terms (addition to rank 3) */
-    l_b->F_003 += m_a->M_002 * D_005(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_014(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_023(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_104(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_203(dx, dy, dz, r_inv);
-    l_b->F_012 += m_a->M_002 * D_014(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_023(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_032(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_212(dx, dy, dz, r_inv);
-    l_b->F_021 += m_a->M_002 * D_023(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_032(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_041(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_221(dx, dy, dz, r_inv);
-    l_b->F_030 += m_a->M_002 * D_032(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_041(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_050(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_140(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_230(dx, dy, dz, r_inv);
-    l_b->F_102 += m_a->M_002 * D_104(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_203(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_302(dx, dy, dz, r_inv);
-    l_b->F_111 += m_a->M_002 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_311(dx, dy, dz, r_inv);
-    l_b->F_120 += m_a->M_002 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_140(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_230(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_320(dx, dy, dz, r_inv);
-    l_b->F_201 += m_a->M_002 * D_203(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_302(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_401(dx, dy, dz, r_inv);
-    l_b->F_210 += m_a->M_002 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_230(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_320(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_410(dx, dy, dz, r_inv);
-    l_b->F_300 += m_a->M_002 * D_302(dx, dy, dz, r_inv) +
-                  m_a->M_011 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_020 * D_320(dx, dy, dz, r_inv) +
-                  m_a->M_101 * D_401(dx, dy, dz, r_inv) +
-                  m_a->M_110 * D_410(dx, dy, dz, r_inv) +
-                  m_a->M_200 * D_500(dx, dy, dz, r_inv);
-
-    /* Compute 5th order field tensor terms (addition to rank 4) */
-    l_b->F_004 += m_a->M_001 * D_005(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_014(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_104(dx, dy, dz, r_inv);
-    l_b->F_013 += m_a->M_001 * D_014(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_023(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_113(dx, dy, dz, r_inv);
-    l_b->F_022 += m_a->M_001 * D_023(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_032(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_122(dx, dy, dz, r_inv);
-    l_b->F_031 += m_a->M_001 * D_032(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_041(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_131(dx, dy, dz, r_inv);
-    l_b->F_040 += m_a->M_001 * D_041(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_050(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_140(dx, dy, dz, r_inv);
-    l_b->F_103 += m_a->M_001 * D_104(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_203(dx, dy, dz, r_inv);
-    l_b->F_112 += m_a->M_001 * D_113(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_212(dx, dy, dz, r_inv);
-    l_b->F_121 += m_a->M_001 * D_122(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_221(dx, dy, dz, r_inv);
-    l_b->F_130 += m_a->M_001 * D_131(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_140(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_230(dx, dy, dz, r_inv);
-    l_b->F_202 += m_a->M_001 * D_203(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_302(dx, dy, dz, r_inv);
-    l_b->F_211 += m_a->M_001 * D_212(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_311(dx, dy, dz, r_inv);
-    l_b->F_220 += m_a->M_001 * D_221(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_230(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_320(dx, dy, dz, r_inv);
-    l_b->F_301 += m_a->M_001 * D_302(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_401(dx, dy, dz, r_inv);
-    l_b->F_310 += m_a->M_001 * D_311(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_320(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_410(dx, dy, dz, r_inv);
-    l_b->F_400 += m_a->M_001 * D_401(dx, dy, dz, r_inv) +
-                  m_a->M_010 * D_410(dx, dy, dz, r_inv) +
-                  m_a->M_100 * D_500(dx, dy, dz, r_inv);
-
-    /* Compute 5th order field tensor terms (addition to rank 5) */
-    l_b->F_005 += m_a->M_000 * D_005(dx, dy, dz, r_inv);
-    l_b->F_014 += m_a->M_000 * D_014(dx, dy, dz, r_inv);
-    l_b->F_023 += m_a->M_000 * D_023(dx, dy, dz, r_inv);
-    l_b->F_032 += m_a->M_000 * D_032(dx, dy, dz, r_inv);
-    l_b->F_041 += m_a->M_000 * D_041(dx, dy, dz, r_inv);
-    l_b->F_050 += m_a->M_000 * D_050(dx, dy, dz, r_inv);
-    l_b->F_104 += m_a->M_000 * D_104(dx, dy, dz, r_inv);
-    l_b->F_113 += m_a->M_000 * D_113(dx, dy, dz, r_inv);
-    l_b->F_122 += m_a->M_000 * D_122(dx, dy, dz, r_inv);
-    l_b->F_131 += m_a->M_000 * D_131(dx, dy, dz, r_inv);
-    l_b->F_140 += m_a->M_000 * D_140(dx, dy, dz, r_inv);
-    l_b->F_203 += m_a->M_000 * D_203(dx, dy, dz, r_inv);
-    l_b->F_212 += m_a->M_000 * D_212(dx, dy, dz, r_inv);
-    l_b->F_221 += m_a->M_000 * D_221(dx, dy, dz, r_inv);
-    l_b->F_230 += m_a->M_000 * D_230(dx, dy, dz, r_inv);
-    l_b->F_302 += m_a->M_000 * D_302(dx, dy, dz, r_inv);
-    l_b->F_311 += m_a->M_000 * D_311(dx, dy, dz, r_inv);
-    l_b->F_320 += m_a->M_000 * D_320(dx, dy, dz, r_inv);
-    l_b->F_401 += m_a->M_000 * D_401(dx, dy, dz, r_inv);
-    l_b->F_410 += m_a->M_000 * D_410(dx, dy, dz, r_inv);
-    l_b->F_500 += m_a->M_000 * D_500(dx, dy, dz, r_inv);
+  /* Compute 5th order field tensor terms (addition to rank 0) */
+  l_b->F_000 +=
+      m_a->M_005 * pot.D_005 + m_a->M_014 * pot.D_014 + m_a->M_023 * pot.D_023 +
+      m_a->M_032 * pot.D_032 + m_a->M_041 * pot.D_041 + m_a->M_050 * pot.D_050 +
+      m_a->M_104 * pot.D_104 + m_a->M_113 * pot.D_113 + m_a->M_122 * pot.D_122 +
+      m_a->M_131 * pot.D_131 + m_a->M_140 * pot.D_140 + m_a->M_203 * pot.D_203 +
+      m_a->M_212 * pot.D_212 + m_a->M_221 * pot.D_221 + m_a->M_230 * pot.D_230 +
+      m_a->M_302 * pot.D_302 + m_a->M_311 * pot.D_311 + m_a->M_320 * pot.D_320 +
+      m_a->M_401 * pot.D_401 + m_a->M_410 * pot.D_410 + m_a->M_500 * pot.D_500;
+
+  /* Compute 5th order field tensor terms (addition to rank 1) */
+  l_b->F_001 +=
+      m_a->M_004 * pot.D_005 + m_a->M_013 * pot.D_014 + m_a->M_022 * pot.D_023 +
+      m_a->M_031 * pot.D_032 + m_a->M_040 * pot.D_041 + m_a->M_103 * pot.D_104 +
+      m_a->M_112 * pot.D_113 + m_a->M_121 * pot.D_122 + m_a->M_130 * pot.D_131 +
+      m_a->M_202 * pot.D_203 + m_a->M_211 * pot.D_212 + m_a->M_220 * pot.D_221 +
+      m_a->M_301 * pot.D_302 + m_a->M_310 * pot.D_311 + m_a->M_400 * pot.D_401;
+  l_b->F_010 +=
+      m_a->M_004 * pot.D_014 + m_a->M_013 * pot.D_023 + m_a->M_022 * pot.D_032 +
+      m_a->M_031 * pot.D_041 + m_a->M_040 * pot.D_050 + m_a->M_103 * pot.D_113 +
+      m_a->M_112 * pot.D_122 + m_a->M_121 * pot.D_131 + m_a->M_130 * pot.D_140 +
+      m_a->M_202 * pot.D_212 + m_a->M_211 * pot.D_221 + m_a->M_220 * pot.D_230 +
+      m_a->M_301 * pot.D_311 + m_a->M_310 * pot.D_320 + m_a->M_400 * pot.D_410;
+  l_b->F_100 +=
+      m_a->M_004 * pot.D_104 + m_a->M_013 * pot.D_113 + m_a->M_022 * pot.D_122 +
+      m_a->M_031 * pot.D_131 + m_a->M_040 * pot.D_140 + m_a->M_103 * pot.D_203 +
+      m_a->M_112 * pot.D_212 + m_a->M_121 * pot.D_221 + m_a->M_130 * pot.D_230 +
+      m_a->M_202 * pot.D_302 + m_a->M_211 * pot.D_311 + m_a->M_220 * pot.D_320 +
+      m_a->M_301 * pot.D_401 + m_a->M_310 * pot.D_410 + m_a->M_400 * pot.D_500;
+
+  /* Compute 5th order field tensor terms (addition to rank 2) */
+  l_b->F_002 += m_a->M_003 * pot.D_005 + m_a->M_012 * pot.D_014 +
+                m_a->M_021 * pot.D_023 + m_a->M_030 * pot.D_032 +
+                m_a->M_102 * pot.D_104 + m_a->M_111 * pot.D_113 +
+                m_a->M_120 * pot.D_122 + m_a->M_201 * pot.D_203 +
+                m_a->M_210 * pot.D_212 + m_a->M_300 * pot.D_302;
+  l_b->F_011 += m_a->M_003 * pot.D_014 + m_a->M_012 * pot.D_023 +
+                m_a->M_021 * pot.D_032 + m_a->M_030 * pot.D_041 +
+                m_a->M_102 * pot.D_113 + m_a->M_111 * pot.D_122 +
+                m_a->M_120 * pot.D_131 + m_a->M_201 * pot.D_212 +
+                m_a->M_210 * pot.D_221 + m_a->M_300 * pot.D_311;
+  l_b->F_020 += m_a->M_003 * pot.D_023 + m_a->M_012 * pot.D_032 +
+                m_a->M_021 * pot.D_041 + m_a->M_030 * pot.D_050 +
+                m_a->M_102 * pot.D_122 + m_a->M_111 * pot.D_131 +
+                m_a->M_120 * pot.D_140 + m_a->M_201 * pot.D_221 +
+                m_a->M_210 * pot.D_230 + m_a->M_300 * pot.D_320;
+  l_b->F_101 += m_a->M_003 * pot.D_104 + m_a->M_012 * pot.D_113 +
+                m_a->M_021 * pot.D_122 + m_a->M_030 * pot.D_131 +
+                m_a->M_102 * pot.D_203 + m_a->M_111 * pot.D_212 +
+                m_a->M_120 * pot.D_221 + m_a->M_201 * pot.D_302 +
+                m_a->M_210 * pot.D_311 + m_a->M_300 * pot.D_401;
+  l_b->F_110 += m_a->M_003 * pot.D_113 + m_a->M_012 * pot.D_122 +
+                m_a->M_021 * pot.D_131 + m_a->M_030 * pot.D_140 +
+                m_a->M_102 * pot.D_212 + m_a->M_111 * pot.D_221 +
+                m_a->M_120 * pot.D_230 + m_a->M_201 * pot.D_311 +
+                m_a->M_210 * pot.D_320 + m_a->M_300 * pot.D_410;
+  l_b->F_200 += m_a->M_003 * pot.D_203 + m_a->M_012 * pot.D_212 +
+                m_a->M_021 * pot.D_221 + m_a->M_030 * pot.D_230 +
+                m_a->M_102 * pot.D_302 + m_a->M_111 * pot.D_311 +
+                m_a->M_120 * pot.D_320 + m_a->M_201 * pot.D_401 +
+                m_a->M_210 * pot.D_410 + m_a->M_300 * pot.D_500;
+
+  /* Compute 5th order field tensor terms (addition to rank 3) */
+  l_b->F_003 += m_a->M_002 * pot.D_005 + m_a->M_011 * pot.D_014 +
+                m_a->M_020 * pot.D_023 + m_a->M_101 * pot.D_104 +
+                m_a->M_110 * pot.D_113 + m_a->M_200 * pot.D_203;
+  l_b->F_012 += m_a->M_002 * pot.D_014 + m_a->M_011 * pot.D_023 +
+                m_a->M_020 * pot.D_032 + m_a->M_101 * pot.D_113 +
+                m_a->M_110 * pot.D_122 + m_a->M_200 * pot.D_212;
+  l_b->F_021 += m_a->M_002 * pot.D_023 + m_a->M_011 * pot.D_032 +
+                m_a->M_020 * pot.D_041 + m_a->M_101 * pot.D_122 +
+                m_a->M_110 * pot.D_131 + m_a->M_200 * pot.D_221;
+  l_b->F_030 += m_a->M_002 * pot.D_032 + m_a->M_011 * pot.D_041 +
+                m_a->M_020 * pot.D_050 + m_a->M_101 * pot.D_131 +
+                m_a->M_110 * pot.D_140 + m_a->M_200 * pot.D_230;
+  l_b->F_102 += m_a->M_002 * pot.D_104 + m_a->M_011 * pot.D_113 +
+                m_a->M_020 * pot.D_122 + m_a->M_101 * pot.D_203 +
+                m_a->M_110 * pot.D_212 + m_a->M_200 * pot.D_302;
+  l_b->F_111 += m_a->M_002 * pot.D_113 + m_a->M_011 * pot.D_122 +
+                m_a->M_020 * pot.D_131 + m_a->M_101 * pot.D_212 +
+                m_a->M_110 * pot.D_221 + m_a->M_200 * pot.D_311;
+  l_b->F_120 += m_a->M_002 * pot.D_122 + m_a->M_011 * pot.D_131 +
+                m_a->M_020 * pot.D_140 + m_a->M_101 * pot.D_221 +
+                m_a->M_110 * pot.D_230 + m_a->M_200 * pot.D_320;
+  l_b->F_201 += m_a->M_002 * pot.D_203 + m_a->M_011 * pot.D_212 +
+                m_a->M_020 * pot.D_221 + m_a->M_101 * pot.D_302 +
+                m_a->M_110 * pot.D_311 + m_a->M_200 * pot.D_401;
+  l_b->F_210 += m_a->M_002 * pot.D_212 + m_a->M_011 * pot.D_221 +
+                m_a->M_020 * pot.D_230 + m_a->M_101 * pot.D_311 +
+                m_a->M_110 * pot.D_320 + m_a->M_200 * pot.D_410;
+  l_b->F_300 += m_a->M_002 * pot.D_302 + m_a->M_011 * pot.D_311 +
+                m_a->M_020 * pot.D_320 + m_a->M_101 * pot.D_401 +
+                m_a->M_110 * pot.D_410 + m_a->M_200 * pot.D_500;
+
+  /* Compute 5th order field tensor terms (addition to rank 4) */
+  l_b->F_004 +=
+      m_a->M_001 * pot.D_005 + m_a->M_010 * pot.D_014 + m_a->M_100 * pot.D_104;
+  l_b->F_013 +=
+      m_a->M_001 * pot.D_014 + m_a->M_010 * pot.D_023 + m_a->M_100 * pot.D_113;
+  l_b->F_022 +=
+      m_a->M_001 * pot.D_023 + m_a->M_010 * pot.D_032 + m_a->M_100 * pot.D_122;
+  l_b->F_031 +=
+      m_a->M_001 * pot.D_032 + m_a->M_010 * pot.D_041 + m_a->M_100 * pot.D_131;
+  l_b->F_040 +=
+      m_a->M_001 * pot.D_041 + m_a->M_010 * pot.D_050 + m_a->M_100 * pot.D_140;
+  l_b->F_103 +=
+      m_a->M_001 * pot.D_104 + m_a->M_010 * pot.D_113 + m_a->M_100 * pot.D_203;
+  l_b->F_112 +=
+      m_a->M_001 * pot.D_113 + m_a->M_010 * pot.D_122 + m_a->M_100 * pot.D_212;
+  l_b->F_121 +=
+      m_a->M_001 * pot.D_122 + m_a->M_010 * pot.D_131 + m_a->M_100 * pot.D_221;
+  l_b->F_130 +=
+      m_a->M_001 * pot.D_131 + m_a->M_010 * pot.D_140 + m_a->M_100 * pot.D_230;
+  l_b->F_202 +=
+      m_a->M_001 * pot.D_203 + m_a->M_010 * pot.D_212 + m_a->M_100 * pot.D_302;
+  l_b->F_211 +=
+      m_a->M_001 * pot.D_212 + m_a->M_010 * pot.D_221 + m_a->M_100 * pot.D_311;
+  l_b->F_220 +=
+      m_a->M_001 * pot.D_221 + m_a->M_010 * pot.D_230 + m_a->M_100 * pot.D_320;
+  l_b->F_301 +=
+      m_a->M_001 * pot.D_302 + m_a->M_010 * pot.D_311 + m_a->M_100 * pot.D_401;
+  l_b->F_310 +=
+      m_a->M_001 * pot.D_311 + m_a->M_010 * pot.D_320 + m_a->M_100 * pot.D_410;
+  l_b->F_400 +=
+      m_a->M_001 * pot.D_401 + m_a->M_010 * pot.D_410 + m_a->M_100 * pot.D_500;
+
+  /* Compute 5th order field tensor terms (addition to rank 5) */
+  l_b->F_005 += m_a->M_000 * pot.D_005;
+  l_b->F_014 += m_a->M_000 * pot.D_014;
+  l_b->F_023 += m_a->M_000 * pot.D_023;
+  l_b->F_032 += m_a->M_000 * pot.D_032;
+  l_b->F_041 += m_a->M_000 * pot.D_041;
+  l_b->F_050 += m_a->M_000 * pot.D_050;
+  l_b->F_104 += m_a->M_000 * pot.D_104;
+  l_b->F_113 += m_a->M_000 * pot.D_113;
+  l_b->F_122 += m_a->M_000 * pot.D_122;
+  l_b->F_131 += m_a->M_000 * pot.D_131;
+  l_b->F_140 += m_a->M_000 * pot.D_140;
+  l_b->F_203 += m_a->M_000 * pot.D_203;
+  l_b->F_212 += m_a->M_000 * pot.D_212;
+  l_b->F_221 += m_a->M_000 * pot.D_221;
+  l_b->F_230 += m_a->M_000 * pot.D_230;
+  l_b->F_302 += m_a->M_000 * pot.D_302;
+  l_b->F_311 += m_a->M_000 * pot.D_311;
+  l_b->F_320 += m_a->M_000 * pot.D_320;
+  l_b->F_401 += m_a->M_000 * pot.D_401;
+  l_b->F_410 += m_a->M_000 * pot.D_410;
+  l_b->F_500 += m_a->M_000 * pot.D_500;
 
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 5
 #error "Missing implementation for order >5"
 #endif
-
-    /* Softened case */
-  } else {
-
-    const double eps_inv = props->epsilon_inv;
-    const double r = r2 * r_inv;
-
-    /*  0th order term */
-    l_b->F_000 += m_a->M_000 * D_soft_000(dx, dy, dz, r, eps_inv);
-
-#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
-
-    /*  1st order multipole term (addition to rank 0)*/
-    l_b->F_000 += m_a->M_100 * D_soft_100(dx, dy, dz, r, eps_inv) +
-                  m_a->M_010 * D_soft_010(dx, dy, dz, r, eps_inv) +
-                  m_a->M_001 * D_soft_001(dx, dy, dz, r, eps_inv);
-
-    /*  1st order multipole term (addition to rank 1)*/
-    l_b->F_100 += m_a->M_000 * D_soft_100(dx, dy, dz, r, eps_inv);
-    l_b->F_010 += m_a->M_000 * D_soft_010(dx, dy, dz, r, eps_inv);
-    l_b->F_001 += m_a->M_000 * D_soft_001(dx, dy, dz, r, eps_inv);
-#endif
-#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
-
-    /*  2nd order multipole term (addition to rank 0)*/
-    l_b->F_000 += m_a->M_200 * D_soft_200(dx, dy, dz, r, eps_inv) +
-                  m_a->M_020 * D_soft_020(dx, dy, dz, r, eps_inv) +
-                  m_a->M_002 * D_soft_002(dx, dy, dz, r, eps_inv);
-    l_b->F_000 += m_a->M_110 * D_soft_110(dx, dy, dz, r, eps_inv) +
-                  m_a->M_101 * D_soft_101(dx, dy, dz, r, eps_inv) +
-                  m_a->M_011 * D_soft_011(dx, dy, dz, r, eps_inv);
-
-    /*  2nd order multipole term (addition to rank 1)*/
-    l_b->F_100 += m_a->M_100 * D_soft_200(dx, dy, dz, r, eps_inv) +
-                  m_a->M_010 * D_soft_110(dx, dy, dz, r, eps_inv) +
-                  m_a->M_001 * D_soft_101(dx, dy, dz, r, eps_inv);
-    l_b->F_010 += m_a->M_100 * D_soft_110(dx, dy, dz, r, eps_inv) +
-                  m_a->M_010 * D_soft_020(dx, dy, dz, r, eps_inv) +
-                  m_a->M_001 * D_soft_011(dx, dy, dz, r, eps_inv);
-    l_b->F_001 += m_a->M_100 * D_soft_101(dx, dy, dz, r, eps_inv) +
-                  m_a->M_010 * D_soft_011(dx, dy, dz, r, eps_inv) +
-                  m_a->M_001 * D_soft_002(dx, dy, dz, r, eps_inv);
-
-    /*  2nd order multipole term (addition to rank 2)*/
-    l_b->F_200 += m_a->M_000 * D_soft_200(dx, dy, dz, r, eps_inv);
-    l_b->F_020 += m_a->M_000 * D_soft_020(dx, dy, dz, r, eps_inv);
-    l_b->F_002 += m_a->M_000 * D_soft_002(dx, dy, dz, r, eps_inv);
-    l_b->F_110 += m_a->M_000 * D_soft_110(dx, dy, dz, r, eps_inv);
-    l_b->F_101 += m_a->M_000 * D_soft_101(dx, dy, dz, r, eps_inv);
-    l_b->F_011 += m_a->M_000 * D_soft_011(dx, dy, dz, r, eps_inv);
-#endif
-#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
-
-    /*  3rd order multipole term (addition to rank 0)*/
-    l_b->F_000 += m_a->M_300 * D_soft_300(dx, dy, dz, r, eps_inv) +
-                  m_a->M_030 * D_soft_030(dx, dy, dz, r, eps_inv) +
-                  m_a->M_003 * D_soft_003(dx, dy, dz, r, eps_inv);
-    l_b->F_000 += m_a->M_210 * D_soft_210(dx, dy, dz, r, eps_inv) +
-                  m_a->M_201 * D_soft_201(dx, dy, dz, r, eps_inv) +
-                  m_a->M_120 * D_soft_120(dx, dy, dz, r, eps_inv);
-    l_b->F_000 += m_a->M_021 * D_soft_021(dx, dy, dz, r, eps_inv) +
-                  m_a->M_102 * D_soft_102(dx, dy, dz, r, eps_inv) +
-                  m_a->M_012 * D_soft_012(dx, dy, dz, r, eps_inv);
-    l_b->F_000 += m_a->M_111 * D_soft_111(dx, dy, dz, r, eps_inv);
-
-    /*  3rd order multipole term (addition to rank 1)*/
-    l_b->F_100 += m_a->M_200 * D_soft_300(dx, dy, dz, r, eps_inv) +
-                  m_a->M_020 * D_soft_120(dx, dy, dz, r, eps_inv) +
-                  m_a->M_002 * D_soft_102(dx, dy, dz, r, eps_inv);
-    l_b->F_100 += m_a->M_110 * D_soft_210(dx, dy, dz, r, eps_inv) +
-                  m_a->M_101 * D_soft_201(dx, dy, dz, r, eps_inv) +
-                  m_a->M_011 * D_soft_111(dx, dy, dz, r, eps_inv);
-    l_b->F_010 += m_a->M_200 * D_soft_210(dx, dy, dz, r, eps_inv) +
-                  m_a->M_020 * D_soft_030(dx, dy, dz, r, eps_inv) +
-                  m_a->M_002 * D_soft_012(dx, dy, dz, r, eps_inv);
-    l_b->F_010 += m_a->M_110 * D_soft_120(dx, dy, dz, r, eps_inv) +
-                  m_a->M_101 * D_soft_111(dx, dy, dz, r, eps_inv) +
-                  m_a->M_011 * D_soft_021(dx, dy, dz, r, eps_inv);
-    l_b->F_001 += m_a->M_200 * D_soft_201(dx, dy, dz, r, eps_inv) +
-                  m_a->M_020 * D_soft_021(dx, dy, dz, r, eps_inv) +
-                  m_a->M_002 * D_soft_003(dx, dy, dz, r, eps_inv);
-    l_b->F_001 += m_a->M_110 * D_soft_111(dx, dy, dz, r, eps_inv) +
-                  m_a->M_101 * D_soft_102(dx, dy, dz, r, eps_inv) +
-                  m_a->M_011 * D_soft_012(dx, dy, dz, r, eps_inv);
-
-    /*  3rd order multipole term (addition to rank 2)*/
-    l_b->F_200 += m_a->M_100 * D_soft_300(dx, dy, dz, r, eps_inv) +
-                  m_a->M_010 * D_soft_210(dx, dy, dz, r, eps_inv) +
-                  m_a->M_001 * D_soft_201(dx, dy, dz, r, eps_inv);
-    l_b->F_020 += m_a->M_100 * D_soft_120(dx, dy, dz, r, eps_inv) +
-                  m_a->M_010 * D_soft_030(dx, dy, dz, r, eps_inv) +
-                  m_a->M_001 * D_soft_021(dx, dy, dz, r, eps_inv);
-    l_b->F_002 += m_a->M_100 * D_soft_102(dx, dy, dz, r, eps_inv) +
-                  m_a->M_010 * D_soft_012(dx, dy, dz, r, eps_inv) +
-                  m_a->M_001 * D_soft_003(dx, dy, dz, r, eps_inv);
-    l_b->F_110 += m_a->M_100 * D_soft_210(dx, dy, dz, r, eps_inv) +
-                  m_a->M_010 * D_soft_120(dx, dy, dz, r, eps_inv) +
-                  m_a->M_001 * D_soft_111(dx, dy, dz, r, eps_inv);
-    l_b->F_101 += m_a->M_100 * D_soft_201(dx, dy, dz, r, eps_inv) +
-                  m_a->M_010 * D_soft_111(dx, dy, dz, r, eps_inv) +
-                  m_a->M_001 * D_soft_102(dx, dy, dz, r, eps_inv);
-    l_b->F_011 += m_a->M_100 * D_soft_111(dx, dy, dz, r, eps_inv) +
-                  m_a->M_010 * D_soft_021(dx, dy, dz, r, eps_inv) +
-                  m_a->M_001 * D_soft_012(dx, dy, dz, r, eps_inv);
-
-    /*  3rd order multipole term (addition to rank 3)*/
-    l_b->F_300 += m_a->M_000 * D_soft_300(dx, dy, dz, r, eps_inv);
-    l_b->F_030 += m_a->M_000 * D_soft_030(dx, dy, dz, r, eps_inv);
-    l_b->F_003 += m_a->M_000 * D_soft_003(dx, dy, dz, r, eps_inv);
-    l_b->F_210 += m_a->M_000 * D_soft_210(dx, dy, dz, r, eps_inv);
-    l_b->F_201 += m_a->M_000 * D_soft_201(dx, dy, dz, r, eps_inv);
-    l_b->F_120 += m_a->M_000 * D_soft_120(dx, dy, dz, r, eps_inv);
-    l_b->F_021 += m_a->M_000 * D_soft_021(dx, dy, dz, r, eps_inv);
-    l_b->F_102 += m_a->M_000 * D_soft_102(dx, dy, dz, r, eps_inv);
-    l_b->F_012 += m_a->M_000 * D_soft_012(dx, dy, dz, r, eps_inv);
-    l_b->F_111 += m_a->M_000 * D_soft_111(dx, dy, dz, r, eps_inv);
-#endif
-  }
 }
 
 /**
@@ -2185,7 +1901,7 @@ INLINE static void gravity_L2L(struct grav_tensor *la,
                                const double pos_a[3], const double pos_b[3]) {
 
   /* Initialise everything to zero */
-  gravity_field_tensors_init(la);
+  gravity_field_tensors_init(la, 0);
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (lb->num_interacted == 0) error("Shifting tensors that did not interact");
@@ -2637,57 +2353,95 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb,
   gp->a_grav[2] += a_grav[2];
 }
 
+INLINE static void gravity_M2P(const struct multipole *ma,
+                               const struct gravity_props *props,
+                               const double loc[3], struct gpart *gp) {
+
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+
+  const float eps2 = props->epsilon2;
+  const float eps_inv = props->epsilon_inv;
+  const float eps_inv3 = props->epsilon_inv3;
+
+  /* Distance to the multipole */
+  const float dx = gp->x[0] - loc[0];
+  const float dy = gp->x[1] - loc[1];
+  const float dz = gp->x[2] - loc[2];
+  const float r2 = dx * dx + dy * dy + dz * dz;
+
+  /* Get the inverse distance */
+  const float r_inv = 1.f / sqrtf(r2);
+
+  float f, W;
+
+  if (r2 >= eps2) {
+
+    /* Get Newtonian gravity */
+    f = ma->M_000 * r_inv * r_inv * r_inv;
+
+  } else {
+
+    const float r = r2 * r_inv;
+    const float u = r * eps_inv;
+
+    kernel_grav_eval(u, &W);
+
+    /* Get softened gravity */
+    f = ma->M_000 * eps_inv3 * W;
+  }
+
+  gp->a_grav[0] -= f * dx;
+  gp->a_grav[1] -= f * dy;
+  gp->a_grav[2] -= f * dz;
+
+#endif
+}
+
 /**
  * @brief Checks whether a cell-cell interaction can be appromixated by a M-M
- * interaction using the CoM and cell radius at rebuild.
+ * interaction using the distance and cell radius.
  *
  * We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179,
  * Issue 1, pp.27-42, equation 10.
  *
- * @param ma The #multipole of the first #cell.
- * @param mb The #multipole of the second #cell.
- * @param theta_crit_inv The inverse of the critical opening angle.
+ * @param r_crit_a The size of the multipole A.
+ * @param r_crit_b The size of the multipole B.
+ * @param theta_crit2 The square of the critical opening angle.
  * @param r2 Square of the distance (periodically wrapped) between the
  * multipoles.
  */
-__attribute__((always_inline)) INLINE static int
-gravity_multipole_accept_rebuild(const struct gravity_tensors *const ma,
-                                 const struct gravity_tensors *const mb,
-                                 double theta_crit_inv, double r2) {
+__attribute__((always_inline)) INLINE static int gravity_M2L_accept(
+    double r_crit_a, double r_crit_b, double theta_crit2, double r2) {
 
-  const double r_crit_a = ma->r_max_rebuild * theta_crit_inv;
-  const double r_crit_b = mb->r_max_rebuild * theta_crit_inv;
+  const double size = r_crit_a + r_crit_b;
+  const double size2 = size * size;
 
   // MATTHIEU: Make this mass-dependent ?
 
   /* Multipole acceptance criterion (Dehnen 2002, eq.10) */
-  return (r2 > (r_crit_a + r_crit_b) * (r_crit_a + r_crit_b));
+  return (r2 * theta_crit2 > size2);
 }
 
 /**
- * @brief Checks whether a cell-cell interaction can be appromixated by a M-M
- * interaction using the CoM and cell radius at the current time.
+ * @brief Checks whether a particle-cell interaction can be appromixated by a
+ * M2P
+ * interaction using the distance and cell radius.
  *
  * We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179,
  * Issue 1, pp.27-42, equation 10.
  *
- * @param ma The #multipole of the first #cell.
- * @param mb The #multipole of the second #cell.
- * @param theta_crit_inv The inverse of the critical opening angle.
+ * @param r_max2 The square of the size of the multipole.
+ * @param theta_crit2 The square of the critical opening angle.
  * @param r2 Square of the distance (periodically wrapped) between the
  * multipoles.
  */
-__attribute__((always_inline)) INLINE static int gravity_multipole_accept(
-    const struct gravity_tensors *const ma,
-    const struct gravity_tensors *const mb, double theta_crit_inv, double r2) {
-
-  const double r_crit_a = ma->r_max * theta_crit_inv;
-  const double r_crit_b = mb->r_max * theta_crit_inv;
+__attribute__((always_inline)) INLINE static int gravity_M2P_accept(
+    float r_max2, float theta_crit2, float r2) {
 
   // MATTHIEU: Make this mass-dependent ?
 
   /* Multipole acceptance criterion (Dehnen 2002, eq.10) */
-  return (r2 > (r_crit_a + r_crit_b) * (r_crit_a + r_crit_b));
+  return (r2 * theta_crit2 > r_max2);
 }
 
 #endif /* SWIFT_MULTIPOLE_H */
diff --git a/src/runner.c b/src/runner.c
index dd3f3c8e4e59af15485ece16665ffdae85703117..69c1512479e07da0aacad0a9e28bcaa6aafce104 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -557,7 +557,7 @@ void runner_do_init_grav(struct runner *r, struct cell *c, int timer) {
   cell_drift_multipole(c, e);
 
   /* Reset the gravity acceleration tensors */
-  gravity_field_tensors_init(&c->multipole->pot);
+  gravity_field_tensors_init(&c->multipole->pot, e->ti_current);
 
   /* Recurse? */
   if (c->split) {
@@ -903,7 +903,7 @@ void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) {
 
   TIMER_TIC;
 
-  cell_drift_gpart(c, r->e);
+  cell_drift_gpart(c, r->e, 0);
 
   if (timer) TIMER_TOC(timer_drift_gpart);
 }
@@ -1472,17 +1472,19 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) {
 #ifdef SWIFT_DEBUG_CHECKS
         if (e->policy & engine_policy_self_gravity) {
 
+          /* Let's add a self interaction to simplify the count */
+          gp->num_interacted++;
+
           /* Check that this gpart has interacted with all the other
            * particles (via direct or multipoles) in the box */
-          gp->num_interacted++;
           if (gp->num_interacted != (long long)e->s->nr_gparts)
             error(
-                "g-particle (id=%lld, type=%d) did not interact "
+                "g-particle (id=%lld, type=%s) did not interact "
                 "gravitationally "
                 "with all other gparts gp->num_interacted=%lld, "
                 "total_gparts=%zd",
-                gp->id_or_neg_offset, gp->type, gp->num_interacted,
-                e->s->nr_gparts);
+                gp->id_or_neg_offset, part_type_names[gp->type],
+                gp->num_interacted, e->s->nr_gparts);
         }
 #endif
       }
@@ -1900,10 +1902,6 @@ void *runner_main(void *data) {
 #endif
           else if (t->subtype == task_subtype_force)
             runner_dosub_self2_force(r, ci, 1);
-          else if (t->subtype == task_subtype_grav)
-            runner_dosub_grav(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_external_grav)
-            runner_do_grav_external(r, ci, 1);
           else
             error("Unknown/invalid task subtype (%d).", t->subtype);
           break;
@@ -1917,8 +1915,6 @@ void *runner_main(void *data) {
 #endif
           else if (t->subtype == task_subtype_force)
             runner_dosub_pair2_force(r, ci, cj, t->flags, 1);
-          else if (t->subtype == task_subtype_grav)
-            runner_dosub_grav(r, ci, cj, 1);
           else
             error("Unknown/invalid task subtype (%d).", t->subtype);
           break;
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 69f821f0a991cb797a6ae6b2002ed83986759d86..dbf2311839f62ec25ebba95bc092d9c2306b4dea 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -47,6 +47,8 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (c->ti_old_multipole != e->ti_current) error("c->multipole not drifted.");
+  if (c->multipole->pot.ti_init != e->ti_current)
+    error("c->field tensor not initialised");
 #endif
 
   if (c->split) { /* Node case */
@@ -61,15 +63,21 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {
 #ifdef SWIFT_DEBUG_CHECKS
         if (cp->ti_old_multipole != e->ti_current)
           error("cp->multipole not drifted.");
+        if (cp->multipole->pot.ti_init != e->ti_current)
+          error("cp->field tensor not initialised");
 #endif
         struct grav_tensor shifted_tensor;
 
-        /* Shift the field tensor */
-        gravity_L2L(&shifted_tensor, &c->multipole->pot, cp->multipole->CoM,
-                    c->multipole->CoM);
+        /* If the tensor received any contribution, push it down */
+        if (c->multipole->pot.interacted) {
 
-        /* Add it to this level's tensor */
-        gravity_field_tensors_add(&cp->multipole->pot, &shifted_tensor);
+          /* Shift the field tensor */
+          gravity_L2L(&shifted_tensor, &c->multipole->pot, cp->multipole->CoM,
+                      c->multipole->CoM);
+
+          /* Add it to this level's tensor */
+          gravity_field_tensors_add(&cp->multipole->pot, &shifted_tensor);
+        }
 
         /* Recurse */
         runner_do_grav_down(r, cp, 0);
@@ -78,6 +86,11 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {
 
   } else { /* Leaf case */
 
+    /* We can abort early if no interactions via multipole happened */
+    if (!c->multipole->pot.interacted) return;
+
+    if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
+
     /* Apply accelerations to the particles */
     for (int i = 0; i < gcount; ++i) {
 
@@ -91,6 +104,8 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {
         /* Check that particles have been drifted to the current time */
         if (gp->ti_drift != e->ti_current)
           error("gpart not drifted to current time");
+        if (c->multipole->pot.ti_init != e->ti_current)
+          error("c->field tensor not initialised");
 #endif
 
         /* Apply the kernel */
@@ -135,8 +150,8 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci,
 
   if (multi_j->M_000 == 0.f) error("Multipole does not seem to have been set.");
 
-  if (ci->ti_old_multipole != e->ti_current)
-    error("ci->multipole not drifted.");
+  if (ci->multipole->pot.ti_init != e->ti_current)
+    error("ci->grav tensor not initialised.");
 #endif
 
   /* Do we need to drift the multipole ? */
@@ -149,763 +164,460 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci,
   TIMER_TOC(timer_dopair_grav_mm);
 }
 
-/**
- * @brief Computes the interaction of all the particles in a cell with all the
- * particles of another cell using the full Newtonian potential
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The other #cell.
- * @param shift The distance vector (periodically wrapped) between the cell
- * centres.
- */
-void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
-                                struct cell *cj, double shift[3]) {
-
-  /* Some constants */
-  const struct engine *const e = r->e;
-  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
-  struct gravity_cache *const cj_cache = &r->cj_gravity_cache;
+static INLINE void runner_dopair_grav_pp_full(const struct engine *e,
+                                              struct gravity_cache *ci_cache,
+                                              struct gravity_cache *cj_cache,
+                                              int gcount_i, int gcount_j,
+                                              int gcount_padded_j,
+                                              struct gpart *restrict gparts_i,
+                                              struct gpart *restrict gparts_j) {
 
-  /* Cell properties */
-  const int gcount_i = ci->gcount;
-  const int gcount_j = cj->gcount;
-  struct gpart *restrict gparts_i = ci->gparts;
-  struct gpart *restrict gparts_j = cj->gparts;
-  const int ci_active = cell_is_active(ci, e);
-  const int cj_active = cell_is_active(cj, e);
-  const double loc_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
-  const double loc_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
-  const double loc_mean[3] = {0.5 * (loc_i[0] + loc_j[0]),
-                              0.5 * (loc_i[1] + loc_j[1]),
-                              0.5 * (loc_i[2] + loc_j[2])};
+  TIMER_TIC;
 
-  /* Anything to do here ?*/
-  if (!ci_active && !cj_active) return;
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount_i; pid++) {
 
-  /* Check that we fit in cache */
-  if (gcount_i > ci_cache->count || gcount_j > cj_cache->count)
-    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
-          gcount_j);
+    /* Skip inactive particles */
+    if (!ci_cache->active[pid]) continue;
 
-  /* Computed the padded counts */
-  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
-  const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
+    /* Skip particle that can use the multipole */
+    if (ci_cache->use_mpole[pid]) continue;
 
-  /* Fill the caches */
-  gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i,
-                         loc_mean, ci);
-  gravity_cache_populate(cj_cache, gparts_j, gcount_j, gcount_padded_j,
-                         loc_mean, cj);
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!gpart_is_active(&gparts_i[pid], e))
+      error("Active particle went through the cache");
+#endif
 
-  /* Ok... Here we go ! */
+    const float x_i = ci_cache->x[pid];
+    const float y_i = ci_cache->y[pid];
+    const float z_i = ci_cache->z[pid];
 
-  if (ci_active) {
+    /* Some powers of the softening length */
+    const float h_i = ci_cache->epsilon[pid];
+    const float h2_i = h_i * h_i;
+    const float h_inv_i = 1.f / h_i;
+    const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
 
-    /* Loop over all particles in ci... */
-    for (int pid = 0; pid < gcount_i; pid++) {
+    /* Local accumulators for the acceleration */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f;
 
-      /* Skip inactive particles */
-      if (!gpart_is_active(&gparts_i[pid], e)) continue;
+    /* Make the compiler understand we are in happy vectorization land */
+    swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
+    swift_assume_size(gcount_padded_j, VEC_SIZE);
 
-      const float x_i = ci_cache->x[pid];
-      const float y_i = ci_cache->y[pid];
-      const float z_i = ci_cache->z[pid];
+    /* Loop over every particle in the other cell. */
+    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
 
-      /* Some powers of the softening length */
-      const float h_i = ci_cache->epsilon[pid];
-      const float h2_i = h_i * h_i;
-      const float h_inv_i = 1.f / h_i;
-      const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
+      /* Get info about j */
+      const float x_j = cj_cache->x[pjd];
+      const float y_j = cj_cache->y[pjd];
+      const float z_j = cj_cache->z[pjd];
+      const float mass_j = cj_cache->m[pjd];
 
-      /* Local accumulators for the acceleration */
-      float a_x = 0.f, a_y = 0.f, a_z = 0.f;
+      /* Compute the pairwise (square) distance. */
+      const float dx = x_i - x_j;
+      const float dy = y_i - y_j;
+      const float dz = z_i - z_j;
+      const float r2 = dx * dx + dy * dy + dz * dz;
 
-      /* Make the compiler understand we are in happy vectorization land */
-      swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
-      swift_assume_size(gcount_padded_j, VEC_SIZE);
+#ifdef SWIFT_DEBUG_CHECKS
+      if (r2 == 0.f) error("Interacting particles with 0 distance");
 
-      /* Loop over every particle in the other cell. */
-      for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
+      /* Check that particles have been drifted to the current time */
+      if (gparts_i[pid].ti_drift != e->ti_current)
+        error("gpi not drifted to current time");
+      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
+        error("gpj not drifted to current time");
+#endif
 
-        /* Get info about j */
-        const float x_j = cj_cache->x[pjd];
-        const float y_j = cj_cache->y[pjd];
-        const float z_j = cj_cache->z[pjd];
-        const float mass_j = cj_cache->m[pjd];
+      /* Interact! */
+      float f_ij;
+      runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij);
 
-        /* Compute the pairwise (square) distance. */
-        const float dx = x_i - x_j;
-        const float dy = y_i - y_j;
-        const float dz = z_i - z_j;
-        const float r2 = dx * dx + dy * dy + dz * dz;
+      /* Store it back */
+      a_x -= f_ij * dx;
+      a_y -= f_ij * dy;
+      a_z -= f_ij * dz;
 
 #ifdef SWIFT_DEBUG_CHECKS
-        if (r2 == 0.f) error("Interacting particles with 0 distance");
-
-        /* Check that particles have been drifted to the current time */
-        if (gparts_i[pid].ti_drift != e->ti_current)
-          error("gpi not drifted to current time");
-        if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
-          error("gpj not drifted to current time");
+      /* Update the interaction counter if it's not a padded gpart */
+      if (pjd < gcount_j) gparts_i[pid].num_interacted++;
 #endif
+    }
 
-        /* Get the inverse distance */
-        const float r_inv = 1.f / sqrtf(r2);
-
-        float f_ij, W_ij;
-
-        if (r2 >= h2_i) {
+    /* Store everything back in cache */
+    ci_cache->a_x[pid] = a_x;
+    ci_cache->a_y[pid] = a_y;
+    ci_cache->a_z[pid] = a_z;
+  }
 
-          /* Get Newtonian gravity */
-          f_ij = mass_j * r_inv * r_inv * r_inv;
+  TIMER_TOC(timer_dopair_grav_pp);
+}
 
-        } else {
+static INLINE void runner_dopair_grav_pp_truncated(
+    const struct engine *e, const float rlr_inv, struct gravity_cache *ci_cache,
+    struct gravity_cache *cj_cache, int gcount_i, int gcount_j,
+    int gcount_padded_j, struct gpart *restrict gparts_i,
+    struct gpart *restrict gparts_j) {
 
-          const float r = r2 * r_inv;
-          const float ui = r * h_inv_i;
+  TIMER_TIC;
 
-          kernel_grav_eval(ui, &W_ij);
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount_i; pid++) {
 
-          /* Get softened gravity */
-          f_ij = mass_j * h_inv3_i * W_ij;
-        }
+    /* Skip inactive particles */
+    if (!ci_cache->active[pid]) continue;
 
-        /* Store it back */
-        a_x -= f_ij * dx;
-        a_y -= f_ij * dy;
-        a_z -= f_ij * dz;
+    /* Skip particle that can use the multipole */
+    if (ci_cache->use_mpole[pid]) continue;
 
 #ifdef SWIFT_DEBUG_CHECKS
-        /* Update the interaction counter if it's not a padded gpart */
-        if (pjd < gcount_j) gparts_i[pid].num_interacted++;
+    if (!gpart_is_active(&gparts_i[pid], e))
+      error("Active particle went through the cache");
 #endif
-      }
 
-      /* Store everything back in cache */
-      ci_cache->a_x[pid] = a_x;
-      ci_cache->a_y[pid] = a_y;
-      ci_cache->a_z[pid] = a_z;
-    }
-  }
+    const float x_i = ci_cache->x[pid];
+    const float y_i = ci_cache->y[pid];
+    const float z_i = ci_cache->z[pid];
 
-  /* Now do the opposite loop */
-  if (cj_active) {
+    /* Some powers of the softening length */
+    const float h_i = ci_cache->epsilon[pid];
+    const float h2_i = h_i * h_i;
+    const float h_inv_i = 1.f / h_i;
+    const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
 
-    /* Loop over all particles in ci... */
-    for (int pjd = 0; pjd < gcount_j; pjd++) {
+    /* Local accumulators for the acceleration */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f;
 
-      /* Skip inactive particles */
-      if (!gpart_is_active(&gparts_j[pjd], e)) continue;
+    /* Make the compiler understand we are in happy vectorization land */
+    swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
+    swift_assume_size(gcount_padded_j, VEC_SIZE);
 
+    /* Loop over every particle in the other cell. */
+    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
+
+      /* Get info about j */
       const float x_j = cj_cache->x[pjd];
       const float y_j = cj_cache->y[pjd];
       const float z_j = cj_cache->z[pjd];
+      const float mass_j = cj_cache->m[pjd];
 
-      /* Some powers of the softening length */
-      const float h_j = cj_cache->epsilon[pjd];
-      const float h2_j = h_j * h_j;
-      const float h_inv_j = 1.f / h_j;
-      const float h_inv3_j = h_inv_j * h_inv_j * h_inv_j;
-
-      /* Local accumulators for the acceleration */
-      float a_x = 0.f, a_y = 0.f, a_z = 0.f;
-
-      /* Make the compiler understand we are in happy vectorization land */
-      swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT);
-      swift_assume_size(gcount_padded_i, VEC_SIZE);
-
-      /* Loop over every particle in the other cell. */
-      for (int pid = 0; pid < gcount_padded_i; pid++) {
-
-        /* Get info about j */
-        const float x_i = ci_cache->x[pid];
-        const float y_i = ci_cache->y[pid];
-        const float z_i = ci_cache->z[pid];
-        const float mass_i = ci_cache->m[pid];
-
-        /* Compute the pairwise (square) distance. */
-        const float dx = x_j - x_i;
-        const float dy = y_j - y_i;
-        const float dz = z_j - z_i;
-        const float r2 = dx * dx + dy * dy + dz * dz;
+      /* Compute the pairwise (square) distance. */
+      const float dx = x_i - x_j;
+      const float dy = y_i - y_j;
+      const float dz = z_i - z_j;
+      const float r2 = dx * dx + dy * dy + dz * dz;
 
 #ifdef SWIFT_DEBUG_CHECKS
-        if (r2 == 0.f) error("Interacting particles with 0 distance");
+      if (r2 == 0.f) error("Interacting particles with 0 distance");
 
-        /* Check that particles have been drifted to the current time */
-        if (gparts_j[pjd].ti_drift != e->ti_current)
-          error("gpj not drifted to current time");
-        if (pid < gcount_i && gparts_i[pid].ti_drift != e->ti_current)
-          error("gpi not drifted to current time");
+      /* Check that particles have been drifted to the current time */
+      if (gparts_i[pid].ti_drift != e->ti_current)
+        error("gpi not drifted to current time");
+      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
+        error("gpj not drifted to current time");
 #endif
 
-        /* Get the inverse distance */
-        const float r_inv = 1.f / sqrtf(r2);
-
-        float f_ji, W_ji;
-
-        if (r2 >= h2_j) {
-
-          /* Get Newtonian gravity */
-          f_ji = mass_i * r_inv * r_inv * r_inv;
-
-        } else {
-
-          const float r = r2 * r_inv;
-          const float uj = r * h_inv_j;
+      /* Interact! */
+      float f_ij;
+      runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j,
+                                    rlr_inv, &f_ij);
 
-          kernel_grav_eval(uj, &W_ji);
-
-          /* Get softened gravity */
-          f_ji = mass_i * h_inv3_j * W_ji;
-        }
-
-        /* Store it back */
-        a_x -= f_ji * dx;
-        a_y -= f_ji * dy;
-        a_z -= f_ji * dz;
+      /* Store it back */
+      a_x -= f_ij * dx;
+      a_y -= f_ij * dy;
+      a_z -= f_ij * dz;
 
 #ifdef SWIFT_DEBUG_CHECKS
-        /* Update the interaction counter if it's not a padded gpart */
-        if (pid < gcount_i) gparts_j[pjd].num_interacted++;
+      /* Update the interaction counter if it's not a padded gpart */
+      if (pjd < gcount_j) gparts_i[pid].num_interacted++;
 #endif
-      }
-
-      /* Store everything back in cache */
-      cj_cache->a_x[pjd] = a_x;
-      cj_cache->a_y[pjd] = a_y;
-      cj_cache->a_z[pjd] = a_z;
     }
-  }
 
-  /* Write back to the particles */
-  if (ci_active) gravity_cache_write_back(ci_cache, gparts_i, gcount_i);
-  if (cj_active) gravity_cache_write_back(cj_cache, gparts_j, gcount_j);
+    /* Store everything back in cache */
+    ci_cache->a_x[pid] = a_x;
+    ci_cache->a_y[pid] = a_y;
+    ci_cache->a_z[pid] = a_z;
+  }
 
-#ifdef MATTHIEU_OLD_STUFF
+  TIMER_TOC(timer_dopair_grav_pp);
+}
 
-  /* Some constants */
-  const struct engine *const e = r->e;
+static INLINE void runner_dopair_grav_pm(
+    const struct engine *restrict e, struct gravity_cache *ci_cache,
+    int gcount_i, int gcount_padded_i, struct gpart *restrict gparts_i,
+    const float CoM_j[3], const struct multipole *restrict multi_j,
+    struct cell *restrict cj) {
 
-  /* Cell properties */
-  const int gcount_i = ci->gcount;
-  const int gcount_j = cj->gcount;
-  struct gpart *restrict gparts_i = ci->gparts;
-  struct gpart *restrict gparts_j = cj->gparts;
+  TIMER_TIC;
 
-  /* MATTHIEU: Should we use local DP accumulators ? */
+  /* Make the compiler understand we are in happy vectorization land */
+  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, active, ci_cache->active,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_assume_size(gcount_padded_i, VEC_SIZE);
 
   /* Loop over all particles in ci... */
-  if (cell_is_active(ci, e)) {
-    for (int pid = 0; pid < gcount_i; pid++) {
-
-      /* Get a hold of the ith part in ci. */
-      struct gpart *restrict gpi = &gparts_i[pid];
-
-      if (!gpart_is_active(gpi, e)) continue;
-
-      /* Apply boundary condition */
-      const double pix[3] = {gpi->x[0] - shift[0], gpi->x[1] - shift[1],
-                             gpi->x[2] - shift[2]};
-
-      /* Loop over every particle in the other cell. */
-      for (int pjd = 0; pjd < gcount_j; pjd++) {
-
-        /* Get a hold of the jth part in cj. */
-        const struct gpart *restrict gpj = &gparts_j[pjd];
+  for (int pid = 0; pid < gcount_padded_i; pid++) {
 
-        /* Compute the pairwise distance. */
-        const float dx[3] = {pix[0] - gpj->x[0],   // x
-                             pix[1] - gpj->x[1],   // y
-                             pix[2] - gpj->x[2]};  // z
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (gpi->ti_drift != e->ti_current)
-          error("gpi not drifted to current time");
-        if (gpj->ti_drift != e->ti_current)
-          error("gpj not drifted to current time");
-#endif
+    /* Skip inactive particles */
+    if (!active[pid]) continue;
 
-        /* Interact ! */
-        runner_iact_grav_pp_nonsym(r2, dx, gpi, gpj);
+    /* Skip particle that cannot use the multipole */
+    if (!use_mpole[pid]) continue;
 
 #ifdef SWIFT_DEBUG_CHECKS
-        gpi->num_interacted++;
+    if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e))
+      error("Active particle went through the cache");
 #endif
-      }
-    }
-  }
 
-  /* Loop over all particles in cj... */
-  if (cell_is_active(cj, e)) {
-    for (int pjd = 0; pjd < gcount_j; pjd++) {
+    const float x_i = x[pid];
+    const float y_i = y[pid];
+    const float z_i = z[pid];
 
-      /* Get a hold of the ith part in ci. */
-      struct gpart *restrict gpj = &gparts_j[pjd];
-
-      if (!gpart_is_active(gpj, e)) continue;
-
-      /* Apply boundary condition */
-      const double pjx[3] = {gpj->x[0] + shift[0], gpj->x[1] + shift[1],
-                             gpj->x[2] + shift[2]};
+    /* Some powers of the softening length */
+    const float h_i = epsilon[pid];
+    const float h_inv_i = 1.f / h_i;
 
-      /* Loop over every particle in the other cell. */
-      for (int pid = 0; pid < gcount_i; pid++) {
+    /* Distance to the Multipole */
+    const float dx = x_i - CoM_j[0];
+    const float dy = y_i - CoM_j[1];
+    const float dz = z_i - CoM_j[2];
+    const float r2 = dx * dx + dy * dy + dz * dz;
 
-        /* Get a hold of the ith part in ci. */
-        const struct gpart *restrict gpi = &gparts_i[pid];
+    /* Interact! */
+    float f_x, f_y, f_z;
+    runner_iact_grav_pm(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y,
+                        &f_z);
 
-        /* Compute the pairwise distance. */
-        const float dx[3] = {pjx[0] - gpi->x[0],   // x
-                             pjx[1] - gpi->x[1],   // y
-                             pjx[2] - gpi->x[2]};  // z
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+    /* Store it back */
+    a_x[pid] = f_x;
+    a_y[pid] = f_y;
+    a_z[pid] = f_z;
 
 #ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (gpi->ti_drift != e->ti_current)
-          error("gpi not drifted to current time");
-        if (gpj->ti_drift != e->ti_current)
-          error("gpj not drifted to current time");
+    /* Update the interaction counter */
+    if (pid < gcount_i)
+      gparts_i[pid].num_interacted += cj->multipole->m_pole.num_gpart;
 #endif
-
-        /* Interact ! */
-        runner_iact_grav_pp_nonsym(r2, dx, gpj, gpi);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        gpj->num_interacted++;
-#endif
-      }
-    }
   }
-#endif
+
+  TIMER_TOC(timer_dopair_grav_pm);
 }
 
 /**
  * @brief Computes the interaction of all the particles in a cell with all the
- * particles of another cell using the truncated Newtonian potential
+ * particles of another cell (switching function between full and truncated).
  *
  * @param r The #runner.
  * @param ci The first #cell.
  * @param cj The other #cell.
- * @param shift The distance vector (periodically wrapped) between the cell
- * centres.
  */
-void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci,
-                                     struct cell *cj, double shift[3]) {
+void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) {
 
-  /* Some constants */
-  const struct engine *const e = r->e;
-  const struct space *s = e->s;
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+
+  /* Check that we are not doing something stupid */
+  if (ci->split || cj->split) error("Running P-P on splitable cells");
+
+  /* Let's start by drifting things */
+  if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts");
+  if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts");
+
+  /* Recover some useful constants */
+  struct space *s = e->s;
+  const int periodic = s->periodic;
   const double cell_width = s->width[0];
+  const float theta_crit2 = e->gravity_properties->theta_crit2;
   const double a_smooth = e->gravity_properties->a_smooth;
+  const double r_cut_min = e->gravity_properties->r_cut_min;
   const double rlr = cell_width * a_smooth;
+  const double min_trunc = rlr * r_cut_min;
   const float rlr_inv = 1. / rlr;
 
   /* Caches to play with */
   struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
   struct gravity_cache *const cj_cache = &r->cj_gravity_cache;
 
-  /* Cell properties */
-  const int gcount_i = ci->gcount;
-  const int gcount_j = cj->gcount;
-  struct gpart *restrict gparts_i = ci->gparts;
-  struct gpart *restrict gparts_j = cj->gparts;
+  /* Get the distance vector between the pairs, wrapping. */
+  double cell_shift[3];
+  space_getsid(s, &ci, &cj, cell_shift);
+
+  /* Record activity status */
   const int ci_active = cell_is_active(ci, e);
   const int cj_active = cell_is_active(cj, e);
-  const double loc_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
-  const double loc_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
-  const double loc_mean[3] = {0.5 * (loc_i[0] + loc_j[0]),
-                              0.5 * (loc_i[1] + loc_j[1]),
-                              0.5 * (loc_i[2] + loc_j[2])};
 
-  /* Anything to do here ?*/
-  if (!ci_active && !cj_active) return;
+  /* Do we need to drift the multipoles ? */
+  if (cj_active && ci->ti_old_multipole != e->ti_current)
+    cell_drift_multipole(ci, e);
+  if (ci_active && cj->ti_old_multipole != e->ti_current)
+    cell_drift_multipole(cj, e);
+
+  /* Centre of the cell pair */
+  const double loc[3] = {ci->loc[0],   // + 0. * ci->width[0],
+                         ci->loc[1],   // + 0. * ci->width[1],
+                         ci->loc[2]};  // + 0. * ci->width[2]};
+
+  /* Shift to apply to the particles in each cell */
+  const double shift_i[3] = {loc[0] + cell_shift[0], loc[1] + cell_shift[1],
+                             loc[2] + cell_shift[2]};
+  const double shift_j[3] = {loc[0], loc[1], loc[2]};
+
+  /* Recover the multipole info and shift the CoM locations */
+  const float rmax_i = ci->multipole->r_max;
+  const float rmax_j = cj->multipole->r_max;
+  const float rmax2_i = rmax_i * rmax_i;
+  const float rmax2_j = rmax_j * rmax_j;
+  const struct multipole *multi_i = &ci->multipole->m_pole;
+  const struct multipole *multi_j = &cj->multipole->m_pole;
+  const float CoM_i[3] = {ci->multipole->CoM[0] - shift_i[0],
+                          ci->multipole->CoM[1] - shift_i[1],
+                          ci->multipole->CoM[2] - shift_i[2]};
+  const float CoM_j[3] = {cj->multipole->CoM[0] - shift_j[0],
+                          cj->multipole->CoM[1] - shift_j[1],
+                          cj->multipole->CoM[2] - shift_j[2]};
 
-  /* Check that we fit in cache */
-  if (gcount_i > ci_cache->count || gcount_j > cj_cache->count)
-    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
-          gcount_j);
+  /* Start by constructing particle caches */
 
   /* Computed the padded counts */
+  const int gcount_i = ci->gcount;
+  const int gcount_j = cj->gcount;
   const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
   const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
 
-  /* Fill the caches */
-  gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i,
-                         loc_mean, ci);
-  gravity_cache_populate(cj_cache, gparts_j, gcount_j, gcount_padded_j,
-                         loc_mean, cj);
-
-  /* Ok... Here we go ! */
-
-  if (ci_active) {
-
-    /* Loop over all particles in ci... */
-    for (int pid = 0; pid < gcount_i; pid++) {
-
-      /* Skip inactive particles */
-      if (!gpart_is_active(&gparts_i[pid], e)) continue;
-
-      const float x_i = ci_cache->x[pid];
-      const float y_i = ci_cache->y[pid];
-      const float z_i = ci_cache->z[pid];
-
-      /* Some powers of the softening length */
-      const float h_i = ci_cache->epsilon[pid];
-      const float h2_i = h_i * h_i;
-      const float h_inv_i = 1.f / h_i;
-      const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
-
-      /* Local accumulators for the acceleration */
-      float a_x = 0.f, a_y = 0.f, a_z = 0.f;
-
-      /* Make the compiler understand we are in happy vectorization land */
-      swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
-      swift_assume_size(gcount_padded_j, VEC_SIZE);
-
-      /* Loop over every particle in the other cell. */
-      for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
-
-        /* Get info about j */
-        const float x_j = cj_cache->x[pjd];
-        const float y_j = cj_cache->y[pjd];
-        const float z_j = cj_cache->z[pjd];
-        const float mass_j = cj_cache->m[pjd];
-
-        /* Compute the pairwise (square) distance. */
-        const float dx = x_i - x_j;
-        const float dy = y_i - y_j;
-        const float dz = z_i - z_j;
-        const float r2 = dx * dx + dy * dy + dz * dz;
-
 #ifdef SWIFT_DEBUG_CHECKS
-        if (r2 == 0.f) error("Interacting particles with 0 distance");
-
-        /* Check that particles have been drifted to the current time */
-        if (gparts_i[pid].ti_drift != e->ti_current)
-          error("gpi not drifted to current time");
-        if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
-          error("gpj not drifted to current time");
+  /* Check that we fit in cache */
+  if (gcount_i > ci_cache->count || gcount_j > cj_cache->count)
+    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
+          gcount_j);
 #endif
 
-        /* Get the inverse distance */
-        const float r_inv = 1.f / sqrtf(r2);
-        const float r = r2 * r_inv;
-
-        float f_ij, W_ij, corr_lr;
-
-        if (r2 >= h2_i) {
-
-          /* Get Newtonian gravity */
-          f_ij = mass_j * r_inv * r_inv * r_inv;
-
-        } else {
-
-          const float ui = r * h_inv_i;
-
-          kernel_grav_eval(ui, &W_ij);
+  /* Fill the caches */
+  gravity_cache_populate(e->max_active_bin, ci_cache, ci->gparts, gcount_i,
+                         gcount_padded_i, shift_i, CoM_j, rmax2_j, theta_crit2,
+                         ci);
+  gravity_cache_populate(e->max_active_bin, cj_cache, cj->gparts, gcount_j,
+                         gcount_padded_j, shift_j, CoM_i, rmax2_i, theta_crit2,
+                         cj);
 
-          /* Get softened gravity */
-          f_ij = mass_j * h_inv3_i * W_ij;
-        }
+  /* Can we use the Newtonian version or do we need the truncated one ? */
+  if (!periodic) {
 
-        /* Get long-range correction */
-        const float u_lr = r * rlr_inv;
-        kernel_long_grav_eval(u_lr, &corr_lr);
-        f_ij *= corr_lr;
+    /* Not periodic -> Can always use Newtonian potential */
 
-        /* Store it back */
-        a_x -= f_ij * dx;
-        a_y -= f_ij * dy;
-        a_z -= f_ij * dz;
+    /* Let's updated the active cell(s) only */
+    if (ci_active) {
 
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Update the interaction counter if it's not a padded gpart */
-        if (pjd < gcount_j) gparts_i[pid].num_interacted++;
-#endif
-      }
+      /* First the P2P */
+      runner_dopair_grav_pp_full(e, ci_cache, cj_cache, gcount_i, gcount_j,
+                                 gcount_padded_j, ci->gparts, cj->gparts);
 
-      /* Store everything back in cache */
-      ci_cache->a_x[pid] = a_x;
-      ci_cache->a_y[pid] = a_y;
-      ci_cache->a_z[pid] = a_z;
+      /* Then the M2P */
+      runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i, ci->gparts,
+                            CoM_j, multi_j, cj);
     }
-  }
-
-  /* Now do the opposite loop */
-  if (cj_active) {
-
-    /* Loop over all particles in ci... */
-    for (int pjd = 0; pjd < gcount_j; pjd++) {
-
-      /* Skip inactive particles */
-      if (!gpart_is_active(&gparts_j[pjd], e)) continue;
-
-      const float x_j = cj_cache->x[pjd];
-      const float y_j = cj_cache->y[pjd];
-      const float z_j = cj_cache->z[pjd];
-
-      /* Some powers of the softening length */
-      const float h_j = cj_cache->epsilon[pjd];
-      const float h2_j = h_j * h_j;
-      const float h_inv_j = 1.f / h_j;
-      const float h_inv3_j = h_inv_j * h_inv_j * h_inv_j;
-
-      /* Local accumulators for the acceleration */
-      float a_x = 0.f, a_y = 0.f, a_z = 0.f;
-
-      /* Make the compiler understand we are in happy vectorization land */
-      swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT);
-      swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT);
-      swift_assume_size(gcount_padded_i, VEC_SIZE);
-
-      /* Loop over every particle in the other cell. */
-      for (int pid = 0; pid < gcount_padded_i; pid++) {
-
-        /* Get info about j */
-        const float x_i = ci_cache->x[pid];
-        const float y_i = ci_cache->y[pid];
-        const float z_i = ci_cache->z[pid];
-        const float mass_i = ci_cache->m[pid];
-
-        /* Compute the pairwise (square) distance. */
-        const float dx = x_j - x_i;
-        const float dy = y_j - y_i;
-        const float dz = z_j - z_i;
-        const float r2 = dx * dx + dy * dy + dz * dz;
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (r2 == 0.f) error("Interacting particles with 0 distance");
-
-        /* Check that particles have been drifted to the current time */
-        if (gparts_j[pjd].ti_drift != e->ti_current)
-          error("gpj not drifted to current time");
-        if (pid < gcount_i && gparts_i[pid].ti_drift != e->ti_current)
-          error("gpi not drifted to current time");
-#endif
-
-        /* Get the inverse distance */
-        const float r_inv = 1.f / sqrtf(r2);
-        const float r = r2 * r_inv;
-
-        float f_ji, W_ji, corr_lr;
-
-        if (r2 >= h2_j) {
-
-          /* Get Newtonian gravity */
-          f_ji = mass_i * r_inv * r_inv * r_inv;
-
-        } else {
-
-          const float uj = r * h_inv_j;
-
-          kernel_grav_eval(uj, &W_ji);
-
-          /* Get softened gravity */
-          f_ji = mass_i * h_inv3_j * W_ji;
-        }
-
-        /* Get long-range correction */
-        const float u_lr = r * rlr_inv;
-        kernel_long_grav_eval(u_lr, &corr_lr);
-        f_ji *= corr_lr;
-
-        /* Store it back */
-        a_x -= f_ji * dx;
-        a_y -= f_ji * dy;
-        a_z -= f_ji * dz;
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Update the interaction counter if it's not a padded gpart */
-        if (pid < gcount_i) gparts_j[pjd].num_interacted++;
-#endif
-      }
-
-      /* Store everything back in cache */
-      cj_cache->a_x[pjd] = a_x;
-      cj_cache->a_y[pjd] = a_y;
-      cj_cache->a_z[pjd] = a_z;
+    if (cj_active) {
+
+      /* First the P2P */
+      runner_dopair_grav_pp_full(e, cj_cache, ci_cache, gcount_j, gcount_i,
+                                 gcount_padded_i, cj->gparts, ci->gparts);
+      /* Then the M2P */
+      runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j, cj->gparts,
+                            CoM_i, multi_i, ci);
     }
-  }
 
-  /* Write back to the particles */
-  if (ci_active) gravity_cache_write_back(ci_cache, gparts_i, gcount_i);
-  if (cj_active) gravity_cache_write_back(cj_cache, gparts_j, gcount_j);
+  } else { /* Periodic BC */
 
-#ifdef MATTHIEU_OLD_STUFF
-  /* Some constants */
-  const struct engine *const e = r->e;
-  const struct space *s = e->s;
-  const double cell_width = s->width[0];
-  const double a_smooth = e->gravity_properties->a_smooth;
-  const double rlr = cell_width * a_smooth;
-  const float rlr_inv = 1. / rlr;
-
-  /* Cell properties */
-  const int gcount_i = ci->gcount;
-  const int gcount_j = cj->gcount;
-  struct gpart *restrict gparts_i = ci->gparts;
-  struct gpart *restrict gparts_j = cj->gparts;
-
-  /* MATTHIEU: Should we use local DP accumulators ? */
-
-  /* Loop over all particles in ci... */
-  if (cell_is_active(ci, e)) {
-    for (int pid = 0; pid < gcount_i; pid++) {
-
-      /* Get a hold of the ith part in ci. */
-      struct gpart *restrict gpi = &gparts_i[pid];
-
-      if (!gpart_is_active(gpi, e)) continue;
-
-      /* Apply boundary condition */
-      const double pix[3] = {gpi->x[0] - shift[0], gpi->x[1] - shift[1],
-                             gpi->x[2] - shift[2]};
+    /* Get the relative distance between the CoMs */
+    const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1],
+                          CoM_j[2] - CoM_i[2]};
+    const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
 
-      /* Loop over every particle in the other cell. */
-      for (int pjd = 0; pjd < gcount_j; pjd++) {
+    /* Get the maximal distance between any two particles */
+    const double max_r = sqrt(r2) + rmax_i + rmax_j;
 
-        /* Get a hold of the jth part in cj. */
-        const struct gpart *restrict gpj = &gparts_j[pjd];
+    /* Do we need to use the truncated interactions ? */
+    if (max_r > min_trunc) {
 
-        /* Compute the pairwise distance. */
-        const float dx[3] = {pix[0] - gpj->x[0],   // x
-                             pix[1] - gpj->x[1],   // y
-                             pix[2] - gpj->x[2]};  // z
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+      /* Periodic but far-away cells must use the truncated potential */
 
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (gpi->ti_drift != e->ti_current)
-          error("gpi not drifted to current time");
-        if (gpj->ti_drift != e->ti_current)
-          error("gpj not drifted to current time");
-#endif
+      /* Let's updated the active cell(s) only */
+      if (ci_active) {
 
-        /* Interact ! */
-        runner_iact_grav_pp_truncated_nonsym(r2, dx, gpi, gpj, rlr_inv);
+        /* First the (truncated) P2P */
+        runner_dopair_grav_pp_truncated(e, rlr_inv, ci_cache, cj_cache,
+                                        gcount_i, gcount_j, gcount_padded_j,
+                                        ci->gparts, cj->gparts);
 
-#ifdef SWIFT_DEBUG_CHECKS
-        gpi->num_interacted++;
-#endif
+        /* Then the M2P */
+        runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i,
+                              ci->gparts, CoM_j, multi_j, cj);
       }
-    }
-  }
+      if (cj_active) {
 
-  /* Loop over all particles in cj... */
-  if (cell_is_active(cj, e)) {
-    for (int pjd = 0; pjd < gcount_j; pjd++) {
+        /* First the (truncated) P2P */
+        runner_dopair_grav_pp_truncated(e, rlr_inv, cj_cache, ci_cache,
+                                        gcount_j, gcount_i, gcount_padded_i,
+                                        cj->gparts, ci->gparts);
 
-      /* Get a hold of the ith part in ci. */
-      struct gpart *restrict gpj = &gparts_j[pjd];
-
-      if (!gpart_is_active(gpj, e)) continue;
+        /* Then the M2P */
+        runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j,
+                              cj->gparts, CoM_i, multi_i, ci);
+      }
 
-      /* Apply boundary condition */
-      const double pjx[3] = {gpj->x[0] + shift[0], gpj->x[1] + shift[1],
-                             gpj->x[2] + shift[2]};
+    } else {
 
-      /* Loop over every particle in the other cell. */
-      for (int pid = 0; pid < gcount_i; pid++) {
+      /* Periodic but close-by cells can use the full Newtonian potential */
 
-        /* Get a hold of the ith part in ci. */
-        const struct gpart *restrict gpi = &gparts_i[pid];
+      /* Let's updated the active cell(s) only */
+      if (ci_active) {
 
-        /* Compute the pairwise distance. */
-        const float dx[3] = {pjx[0] - gpi->x[0],   // x
-                             pjx[1] - gpi->x[1],   // y
-                             pjx[2] - gpi->x[2]};  // z
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+        /* First the (Newtonian) P2P */
+        runner_dopair_grav_pp_full(e, ci_cache, cj_cache, gcount_i, gcount_j,
+                                   gcount_padded_j, ci->gparts, cj->gparts);
 
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (gpi->ti_drift != e->ti_current)
-          error("gpi not drifted to current time");
-        if (gpj->ti_drift != e->ti_current)
-          error("gpj not drifted to current time");
-#endif
+        /* Then the M2P */
+        runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i,
+                              ci->gparts, CoM_j, multi_j, cj);
+      }
+      if (cj_active) {
 
-        /* Interact ! */
-        runner_iact_grav_pp_truncated_nonsym(r2, dx, gpj, gpi, rlr_inv);
+        /* First the (Newtonian) P2P */
+        runner_dopair_grav_pp_full(e, cj_cache, ci_cache, gcount_j, gcount_i,
+                                   gcount_padded_i, cj->gparts, ci->gparts);
 
-#ifdef SWIFT_DEBUG_CHECKS
-        gpj->num_interacted++;
-#endif
+        /* Then the M2P */
+        runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j,
+                              cj->gparts, CoM_i, multi_i, ci);
       }
     }
   }
 
-#endif
-}
-
-/**
- * @brief Computes the interaction of all the particles in a cell with all the
- * particles of another cell (switching function between full and truncated).
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The other #cell.
- */
-void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) {
-
-  /* Some properties of the space */
-  const struct engine *e = r->e;
-  const struct space *s = e->s;
-  const int periodic = s->periodic;
-  const double cell_width = s->width[0];
-  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
-  const double a_smooth = e->gravity_properties->a_smooth;
-  const double r_cut_min = e->gravity_properties->r_cut_min;
-  const double min_trunc = cell_width * r_cut_min * a_smooth;
-  double shift[3] = {0.0, 0.0, 0.0};
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
-
-  /* Let's start by drifting things */
-  if (!cell_are_gpart_drifted(ci, e)) cell_drift_gpart(ci, e);
-  if (!cell_are_gpart_drifted(cj, e)) cell_drift_gpart(cj, e);
-
-  /* Can we use the Newtonian version or do we need the truncated one ? */
-  if (!periodic) {
-    runner_dopair_grav_pp_full(r, ci, cj, shift);
-  } else {
-
-    /* Get the relative distance between the pairs, wrapping. */
-    shift[0] = nearest(cj->loc[0] - ci->loc[0], dim[0]);
-    shift[1] = nearest(cj->loc[1] - ci->loc[1], dim[1]);
-    shift[2] = nearest(cj->loc[2] - ci->loc[2], dim[2]);
-    const double r2 =
-        shift[0] * shift[0] + shift[1] * shift[1] + shift[2] * shift[2];
-
-    /* Get the maximal distance between any two particles */
-    const double max_r = sqrt(r2) + ci->multipole->r_max + cj->multipole->r_max;
-
-    /* Do we need to use the truncated interactions ? */
-    if (max_r > min_trunc)
-      runner_dopair_grav_pp_truncated(r, ci, cj, shift);
-    else
-      runner_dopair_grav_pp_full(r, ci, cj, shift);
-  }
+  /* Write back to the particles */
+  if (ci_active) gravity_cache_write_back(ci_cache, ci->gparts, gcount_i);
+  if (cj_active) gravity_cache_write_back(cj_cache, cj->gparts, gcount_j);
 
-  TIMER_TOC(timer_dopair_grav_pp);
+  TIMER_TOC(timer_dopair_grav_branch);
 }
 
 /**
@@ -934,14 +646,17 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) {
   /* Anything to do here ?*/
   if (!c_active) return;
 
+#ifdef SWIFT_DEBUG_CHECKS
   /* Check that we fit in cache */
   if (gcount > ci_cache->count)
     error("Not enough space in the cache! gcount=%d", gcount);
+#endif
 
   /* Computed the padded counts */
   const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;
 
-  gravity_cache_populate(ci_cache, gparts, gcount, gcount_padded, loc, c);
+  gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount,
+                                  gcount_padded, loc, c);
 
   /* Ok... Here we go ! */
 
@@ -949,7 +664,7 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) {
   for (int pid = 0; pid < gcount; pid++) {
 
     /* Skip inactive particles */
-    if (!gpart_is_active(&gparts[pid], e)) continue;
+    if (!ci_cache->active[pid]) continue;
 
     const float x_i = ci_cache->x[pid];
     const float y_i = ci_cache->y[pid];
@@ -999,26 +714,9 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) {
         error("gpj not drifted to current time");
 #endif
 
-      /* Get the inverse distance */
-      const float r_inv = 1.f / sqrtf(r2);
-
-      float f_ij, W_ij;
-
-      if (r2 >= h2_i) {
-
-        /* Get Newtonian gravity */
-        f_ij = mass_j * r_inv * r_inv * r_inv;
-
-      } else {
-
-        const float r = r2 * r_inv;
-        const float ui = r * h_inv_i;
-
-        kernel_grav_eval(ui, &W_ij);
-
-        /* Get softened gravity */
-        f_ij = mass_j * h_inv3_i * W_ij;
-      }
+      /* Interact! */
+      float f_ij;
+      runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij);
 
       /* Store it back */
       a_x -= f_ij * dx;
@@ -1039,80 +737,6 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) {
 
   /* Write back to the particles */
   gravity_cache_write_back(ci_cache, gparts, gcount);
-
-#ifdef MATTHIEU_OLD_STUFF
-
-  /* Some constants */
-  const struct engine *const e = r->e;
-
-  /* Cell properties */
-  const int gcount = c->gcount;
-  struct gpart *restrict gparts = c->gparts;
-
-  /* MATTHIEU: Should we use local DP accumulators ? */
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount; pid++) {
-
-    /* Get a hold of the ith part in ci. */
-    struct gpart *restrict gpi = &gparts[pid];
-
-    /* Loop over every particle in the other cell. */
-    for (int pjd = pid + 1; pjd < gcount; pjd++) {
-
-      /* Get a hold of the jth part in ci. */
-      struct gpart *restrict gpj = &gparts[pjd];
-
-      /* Compute the pairwise distance. */
-      float dx[3] = {gpi->x[0] - gpj->x[0],   // x
-                     gpi->x[1] - gpj->x[1],   // y
-                     gpi->x[2] - gpj->x[2]};  // z
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (gpi->ti_drift != e->ti_current)
-        error("gpi not drifted to current time");
-      if (gpj->ti_drift != e->ti_current)
-        error("gpj not drifted to current time");
-#endif
-
-      /* Interact ! */
-      if (gpart_is_active(gpi, e) && gpart_is_active(gpj, e)) {
-
-        runner_iact_grav_pp(r2, dx, gpi, gpj);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        gpi->num_interacted++;
-        gpj->num_interacted++;
-#endif
-
-      } else {
-
-        if (gpart_is_active(gpi, e)) {
-
-          runner_iact_grav_pp_nonsym(r2, dx, gpi, gpj);
-
-#ifdef SWIFT_DEBUG_CHECKS
-          gpi->num_interacted++;
-#endif
-
-        } else if (gpart_is_active(gpj, e)) {
-
-          dx[0] = -dx[0];
-          dx[1] = -dx[1];
-          dx[2] = -dx[2];
-          runner_iact_grav_pp_nonsym(r2, dx, gpj, gpi);
-
-#ifdef SWIFT_DEBUG_CHECKS
-          gpj->num_interacted++;
-#endif
-        }
-      }
-    }
-  }
-
-#endif
 }
 
 /**
@@ -1148,14 +772,17 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) {
   /* Anything to do here ?*/
   if (!c_active) return;
 
+#ifdef SWIFT_DEBUG_CHECKS
   /* Check that we fit in cache */
   if (gcount > ci_cache->count)
     error("Not enough space in the caches! gcount=%d", gcount);
+#endif
 
   /* Computed the padded counts */
   const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;
 
-  gravity_cache_populate(ci_cache, gparts, gcount, gcount_padded, loc, c);
+  gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount,
+                                  gcount_padded, loc, c);
 
   /* Ok... Here we go ! */
 
@@ -1163,7 +790,7 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) {
   for (int pid = 0; pid < gcount; pid++) {
 
     /* Skip inactive particles */
-    if (!gpart_is_active(&gparts[pid], e)) continue;
+    if (!ci_cache->active[pid]) continue;
 
     const float x_i = ci_cache->x[pid];
     const float y_i = ci_cache->y[pid];
@@ -1213,31 +840,10 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) {
         error("gpj not drifted to current time");
 #endif
 
-      /* Get the inverse distance */
-      const float r_inv = 1.f / sqrtf(r2);
-      const float r = r2 * r_inv;
-
-      float f_ij, W_ij, corr_lr;
-
-      if (r2 >= h2_i) {
-
-        /* Get Newtonian gravity */
-        f_ij = mass_j * r_inv * r_inv * r_inv;
-
-      } else {
-
-        const float ui = r * h_inv_i;
-
-        kernel_grav_eval(ui, &W_ij);
-
-        /* Get softened gravity */
-        f_ij = mass_j * h_inv3_i * W_ij;
-      }
-
-      /* Get long-range correction */
-      const float u_lr = r * rlr_inv;
-      kernel_long_grav_eval(u_lr, &corr_lr);
-      f_ij *= corr_lr;
+      /* Interact! */
+      float f_ij;
+      runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j,
+                                    rlr_inv, &f_ij);
 
       /* Store it back */
       a_x -= f_ij * dx;
@@ -1258,83 +864,6 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) {
 
   /* Write back to the particles */
   gravity_cache_write_back(ci_cache, gparts, gcount);
-
-#ifdef MATTHIEU_OLD_STUFF
-  /* Some constants */
-  const struct engine *const e = r->e;
-  const struct space *s = e->s;
-  const double cell_width = s->width[0];
-  const double a_smooth = e->gravity_properties->a_smooth;
-  const double rlr = cell_width * a_smooth;
-  const float rlr_inv = 1. / rlr;
-
-  /* Cell properties */
-  const int gcount = c->gcount;
-  struct gpart *restrict gparts = c->gparts;
-
-  /* MATTHIEU: Should we use local DP accumulators ? */
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount; pid++) {
-
-    /* Get a hold of the ith part in ci. */
-    struct gpart *restrict gpi = &gparts[pid];
-
-    /* Loop over every particle in the other cell. */
-    for (int pjd = pid + 1; pjd < gcount; pjd++) {
-
-      /* Get a hold of the jth part in ci. */
-      struct gpart *restrict gpj = &gparts[pjd];
-
-      /* Compute the pairwise distance. */
-      float dx[3] = {gpi->x[0] - gpj->x[0],   // x
-                     gpi->x[1] - gpj->x[1],   // y
-                     gpi->x[2] - gpj->x[2]};  // z
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (gpi->ti_drift != e->ti_current)
-        error("gpi not drifted to current time");
-      if (gpj->ti_drift != e->ti_current)
-        error("gpj not drifted to current time");
-#endif
-
-      /* Interact ! */
-      if (gpart_is_active(gpi, e) && gpart_is_active(gpj, e)) {
-
-        runner_iact_grav_pp_truncated(r2, dx, gpi, gpj, rlr_inv);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        gpi->num_interacted++;
-        gpj->num_interacted++;
-#endif
-
-      } else {
-
-        if (gpart_is_active(gpi, e)) {
-
-          runner_iact_grav_pp_truncated_nonsym(r2, dx, gpi, gpj, rlr_inv);
-
-#ifdef SWIFT_DEBUG_CHECKS
-          gpi->num_interacted++;
-#endif
-
-        } else if (gpart_is_active(gpj, e)) {
-
-          dx[0] = -dx[0];
-          dx[1] = -dx[1];
-          dx[2] = -dx[2];
-          runner_iact_grav_pp_truncated_nonsym(r2, dx, gpj, gpi, rlr_inv);
-
-#ifdef SWIFT_DEBUG_CHECKS
-          gpj->num_interacted++;
-#endif
-        }
-      }
-    }
-  }
-#endif
 }
 
 /**
@@ -1364,8 +893,11 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) {
   /* Anything to do here? */
   if (!cell_is_active(c, e)) return;
 
+  /* Check that we are not doing something stupid */
+  if (c->split) error("Running P-P on a splitable cell");
+
   /* Do we need to start by drifting things ? */
-  if (!cell_are_gpart_drifted(c, e)) cell_drift_gpart(c, e);
+  if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
 
   /* Can we use the Newtonian version or do we need the truncated one ? */
   if (!periodic) {
@@ -1373,7 +905,7 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) {
   } else {
 
     /* Get the maximal distance between any two particles */
-    const double max_r = 2 * c->multipole->r_max;
+    const double max_r = 2. * c->multipole->r_max;
 
     /* Do we need to use the truncated interactions ? */
     if (max_r > min_trunc)
@@ -1406,7 +938,7 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj,
   const double cell_width = s->width[0];
   const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
   const struct gravity_props *props = e->gravity_properties;
-  const double theta_crit_inv = props->theta_crit_inv;
+  const double theta_crit2 = props->theta_crit2;
   const double max_distance = props->a_smooth * props->r_cut_max * cell_width;
   const double max_distance2 = max_distance * max_distance;
 
@@ -1467,7 +999,7 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj,
    * option... */
 
   /* Can we use M-M interactions ? */
-  if (gravity_multipole_accept(multi_i, multi_j, theta_crit_inv, r2)) {
+  if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2)) {
 
     /* MATTHIEU: make a symmetric M-M interaction function ! */
     runner_dopair_grav_mm(r, ci, cj);
@@ -1588,20 +1120,6 @@ void runner_doself_grav(struct runner *r, struct cell *c, int gettimer) {
   if (gettimer) TIMER_TOC(timer_dosub_self_grav);
 }
 
-void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj,
-                       int timer) {
-
-  /* Is this a single cell? */
-  if (cj == NULL) {
-
-    runner_doself_grav(r, ci, 1);
-
-  } else {
-
-    runner_dopair_grav(r, ci, cj, 1);
-  }
-}
-
 /**
  * @brief Performs all M-M interactions between a given top-level cell and all
  * the other top-levels that are far enough.
@@ -1632,7 +1150,7 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
   const int periodic = s->periodic;
   const double cell_width = s->width[0];
   const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
-  const double theta_crit_inv = props->theta_crit_inv;
+  const double theta_crit2 = props->theta_crit2;
   const double max_distance = props->a_smooth * props->r_cut_max * cell_width;
   const double max_distance2 = max_distance * max_distance;
 
@@ -1691,7 +1209,7 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
     }
 
     /* Check the multipole acceptance criterion */
-    if (gravity_multipole_accept(multi_i, multi_j, theta_crit_inv, r2)) {
+    if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2)) {
 
       /* Go for a (non-symmetric) M-M calculation */
       runner_dopair_grav_mm(r, ci, cj);
@@ -1714,8 +1232,8 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
       const double r2_rebuild = dx * dx + dy * dy + dz * dz;
 
       /* Is the criterion violated now but was OK at the last rebuild ? */
-      if (gravity_multipole_accept_rebuild(multi_i, multi_j, theta_crit_inv,
-                                           r2_rebuild)) {
+      if (gravity_M2L_accept(multi_i->r_max_rebuild, multi_j->r_max_rebuild,
+                             theta_crit2, r2_rebuild)) {
 
         /* Alright, we have to take charge of that pair in a different way. */
         // MATTHIEU: We should actually open the tree-node here and recurse.
diff --git a/src/scheduler.c b/src/scheduler.c
index d0eeb8cb726cf53321d1b4e6a028f2914246cbf2..b1cc1a572d3344e7b1e2338c7594da0edff58919 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -649,18 +649,8 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
                                         ci->progeny[k]),
                       s);
         }
-      }
-
-      /* Otherwise, make sure the self task has a drift task */
-      else {
-
-        lock_lock(&ci->lock);
+      } /* Cell is split */
 
-        if (ci->drift_gpart == NULL)
-          ci->drift_gpart = scheduler_addtask(
-              s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL);
-        lock_unlock_blind(&ci->lock);
-      }
     } /* Self interaction */
 
     /* Pair interaction? */
@@ -675,28 +665,6 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
         t->skip = 1;
         break;
       }
-
-      /* Should this task be split-up? */
-      if (0 && ci->split && cj->split) {
-
-        // MATTHIEU: nothing here for now
-
-      } else {
-
-        /* Create the drift for ci. */
-        lock_lock(&ci->lock);
-        if (ci->drift_gpart == NULL && ci->nodeID == engine_rank)
-          ci->drift_gpart = scheduler_addtask(
-              s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL);
-        lock_unlock_blind(&ci->lock);
-
-        /* Create the drift for cj. */
-        lock_lock(&cj->lock);
-        if (cj->drift_gpart == NULL && cj->nodeID == engine_rank)
-          cj->drift_gpart = scheduler_addtask(
-              s, task_type_drift_gpart, task_subtype_none, 0, 0, cj, NULL);
-        lock_unlock_blind(&cj->lock);
-      }
     } /* pair interaction? */
   }   /* iterate over the current task. */
 }
@@ -727,7 +695,7 @@ void scheduler_splittasks_mapper(void *map_data, int num_elements,
       scheduler_splittask_gravity(t, s);
     } else if (t->type == task_type_grav_top_level ||
                t->type == task_type_grav_ghost) {
-      // MATTHIEU: for the future
+      /* For future use */
     } else {
       error("Unexpected task sub-type");
     }
diff --git a/src/timers.c b/src/timers.c
index 62eac20596a082e411ced61a86f32bef9edcb636..fec111dd939528bd0648609d8a1f5f83e595ec02 100644
--- a/src/timers.c
+++ b/src/timers.c
@@ -54,8 +54,9 @@ const char* timers_names[timer_count] = {
     "dopair_density",
     "dopair_gradient",
     "dopair_force",
-    "dopair_grav_pm",
+    "dopair_grav_branch",
     "dopair_grav_mm",
+    "dopair_grav_pm",
     "dopair_grav_pp",
     "dograv_external",
     "dograv_down",
@@ -119,8 +120,9 @@ void timers_reset_all() { timers_reset(timers_mask_all); }
 void timers_print(int step) {
   fprintf(timers_file, "%d\t", step);
   for (int k = 0; k < timer_count; k++)
-    fprintf(timers_file, "%.3f\t", clocks_from_ticks(timers[k]));
+    fprintf(timers_file, "%18.3f ", clocks_from_ticks(timers[k]));
   fprintf(timers_file, "\n");
+  fflush(timers_file);
 }
 
 /**
@@ -136,7 +138,7 @@ void timers_open_file(int rank) {
 
   fprintf(timers_file, "# timers: \n# step | ");
   for (int k = 0; k < timer_count; k++)
-    fprintf(timers_file, "%s\t", timers_names[k]);
+    fprintf(timers_file, "%18s ", timers_names[k]);
   fprintf(timers_file, "\n");
 }
 
diff --git a/src/timers.h b/src/timers.h
index 9248be4f3048e468deed476f822947eed3c4ce56..38ede8251eb5d640282e728e17d9330956a1cba8 100644
--- a/src/timers.h
+++ b/src/timers.h
@@ -55,8 +55,9 @@ enum {
   timer_dopair_density,
   timer_dopair_gradient,
   timer_dopair_force,
-  timer_dopair_grav_pm,
+  timer_dopair_grav_branch,
   timer_dopair_grav_mm,
+  timer_dopair_grav_pm,
   timer_dopair_grav_pp,
   timer_dograv_external,
   timer_dograv_down,
diff --git a/src/tools.c b/src/tools.c
index 7d69ebc6c476312081d8a8c34c76c6592da5cab0..3ee55db3d5f5348699372d2620b6d15af38b23d0 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -400,64 +400,6 @@ void self_all_force(struct runner *r, struct cell *ci) {
   }
 }
 
-void pairs_single_grav(double *dim, long long int pid,
-                       struct gpart *restrict gparts, const struct part *parts,
-                       int N, int periodic) {
-
-  int i, k;
-  // int mj, mk;
-  // double maxratio = 1.0;
-  double r2, dx[3];
-  float fdx[3], a[3] = {0.0, 0.0, 0.0}, aabs[3] = {0.0, 0.0, 0.0};
-  struct gpart pi, pj;
-  // double ih = 12.0/6.25;
-
-  /* Find "our" part. */
-  for (k = 0; k < N; k++)
-    if ((gparts[k].id_or_neg_offset < 0 &&
-         parts[-gparts[k].id_or_neg_offset].id == pid) ||
-        gparts[k].id_or_neg_offset == pid)
-      break;
-  if (k == N) error("Part not found.");
-  pi = gparts[k];
-  pi.a_grav[0] = 0.0f;
-  pi.a_grav[1] = 0.0f;
-  pi.a_grav[2] = 0.0f;
-
-  /* Loop over all particle pairs. */
-  for (k = 0; k < N; k++) {
-    if (gparts[k].id_or_neg_offset == pi.id_or_neg_offset) continue;
-    pj = gparts[k];
-    for (i = 0; i < 3; i++) {
-      dx[i] = pi.x[i] - pj.x[i];
-      if (periodic) {
-        if (dx[i] < -dim[i] / 2)
-          dx[i] += dim[i];
-        else if (dx[i] > dim[i] / 2)
-          dx[i] -= dim[i];
-      }
-      fdx[i] = dx[i];
-    }
-    r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2];
-    runner_iact_grav_pp(r2, fdx, &pi, &pj);
-    a[0] += pi.a_grav[0];
-    a[1] += pi.a_grav[1];
-    a[2] += pi.a_grav[2];
-    aabs[0] += fabsf(pi.a_grav[0]);
-    aabs[1] += fabsf(pi.a_grav[1]);
-    aabs[2] += fabsf(pi.a_grav[2]);
-    pi.a_grav[0] = 0.0f;
-    pi.a_grav[1] = 0.0f;
-    pi.a_grav[2] = 0.0f;
-  }
-
-  /* Dump the result. */
-  message(
-      "acceleration on gpart %lli is a=[ %e %e %e ], |a|=[ %.2e %.2e %.2e ].\n",
-      parts[-pi.id_or_neg_offset].id, a[0], a[1], a[2], aabs[0], aabs[1],
-      aabs[2]);
-}
-
 /**
  * @brief Compute the force on a single particle brute-force.
  */
@@ -747,69 +689,3 @@ int compare_particles(struct part a, struct part b, double threshold) {
 
 #endif
 }
-
-/**
- * @brief Computes the forces between all g-particles using the N^2 algorithm
- *
- * Overwrites the accelerations of the gparts with the values.
- * Do not use for actual runs.
- *
- * @brief gparts The array of particles.
- * @brief gcount The number of particles.
- * @brief constants Physical constants in internal units.
- * @brief gravity_properties Constants governing the gravity scheme.
- */
-void gravity_n2(struct gpart *gparts, const int gcount,
-                const struct phys_const *constants,
-                const struct gravity_props *gravity_properties, float rlr) {
-
-  const float rlr_inv = 1. / rlr;
-  const float r_cut = gravity_properties->r_cut_max;
-  const float max_d = r_cut * rlr;
-  const float max_d2 = max_d * max_d;
-
-  message("rlr_inv= %f", rlr_inv);
-  message("max_d: %f", max_d);
-
-  /* Reset everything */
-  for (int pid = 0; pid < gcount; pid++) {
-    struct gpart *restrict gpi = &gparts[pid];
-    gpi->a_grav[0] = 0.f;
-    gpi->a_grav[1] = 0.f;
-    gpi->a_grav[2] = 0.f;
-  }
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount; pid++) {
-
-    /* Get a hold of the ith part in ci. */
-    struct gpart *restrict gpi = &gparts[pid];
-
-    for (int pjd = pid + 1; pjd < gcount; pjd++) {
-
-      /* Get a hold of the jth part in ci. */
-      struct gpart *restrict gpj = &gparts[pjd];
-
-      /* Compute the pairwise distance. */
-      const float dx[3] = {gpi->x[0] - gpj->x[0],   // x
-                           gpi->x[1] - gpj->x[1],   // y
-                           gpi->x[2] - gpj->x[2]};  // z
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-      if (r2 < max_d2 || 1) {
-
-        /* Apply the gravitational acceleration. */
-        runner_iact_grav_pp(r2, dx, gpi, gpj);
-      }
-    }
-  }
-
-  /* Multiply by Newton's constant */
-  const double const_G = constants->const_newton_G;
-  for (int pid = 0; pid < gcount; pid++) {
-    struct gpart *restrict gpi = &gparts[pid];
-    gpi->a_grav[0] *= const_G;
-    gpi->a_grav[1] *= const_G;
-    gpi->a_grav[2] *= const_G;
-  }
-}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 27e6ecf4fad565a28825afb7890833fce0f57318..553980a93e907e83b65bb4539ca49c8bc1b7207b 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -25,7 +25,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr
         testParser.sh testSPHStep test125cells.sh test125cellsPerturbed.sh testFFT \
         testAdiabaticIndex testRiemannExact testRiemannTRRS testRiemannHLLC \
         testMatrixInversion testThreadpool testDump testLogger testInteractions.sh \
-        testVoronoi1D testVoronoi2D testVoronoi3D \
+        testVoronoi1D testVoronoi2D testVoronoi3D testGravityDerivatives \
 	testPeriodicBC.sh testPeriodicBCPerturbed.sh
 
 # List of test programs to compile
@@ -35,7 +35,8 @@ check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
                  testSymmetry testThreadpool \
                  testAdiabaticIndex testRiemannExact testRiemannTRRS \
                  testRiemannHLLC testMatrixInversion testDump testLogger \
-		 testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC
+		 testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \
+		 testGravityDerivatives
 
 # Rebuild tests when SWIFT is updated.
 $(check_PROGRAMS): ../src/.libs/libswiftsim.a
@@ -93,6 +94,8 @@ testDump_SOURCES = testDump.c
 
 testLogger_SOURCES = testLogger.c
 
+testGravityDerivatives_SOURCES = testGravityDerivatives.c
+
 # Files necessary for distribution
 EXTRA_DIST = testReading.sh makeInput.py testActivePair.sh \
 	     test27cells.sh test27cellsPerturbed.sh testParser.sh testPeriodicBC.sh \
diff --git a/tests/testGravityDerivatives.c b/tests/testGravityDerivatives.c
new file mode 100644
index 0000000000000000000000000000000000000000..0a811cbda491c40f2f1db7bac5b1f3e2f7508b59
--- /dev/null
+++ b/tests/testGravityDerivatives.c
@@ -0,0 +1,1048 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include "../config.h"
+
+/* Some standard headers. */
+#include <fenv.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/* Local headers. */
+#include "swift.h"
+
+/*************************/
+/* 0th order derivatives */
+/*************************/
+
+/**
+ * @brief \f$ \phi(r_x, r_y, r_z) \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_000(double r_x, double r_y, double r_z, double r_inv) {
+
+  return r_inv;
+}
+
+/*************************/
+/* 1st order derivatives */
+/*************************/
+
+/**
+ * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_100(double r_x, double r_y, double r_z, double r_inv) {
+
+  return -r_x * r_inv * r_inv * r_inv;
+}
+
+/**
+ * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_010(double r_x, double r_y, double r_z, double r_inv) {
+
+  return -r_y * r_inv * r_inv * r_inv;
+}
+
+/**
+ * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_001(double r_x, double r_y, double r_z, double r_inv) {
+
+  return -r_z * r_inv * r_inv * r_inv;
+}
+
+/*************************/
+/* 2nd order derivatives */
+/*************************/
+
+/**
+ * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x^2} \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_200(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv3 = r_inv * r_inv2;
+  const double r_inv5 = r_inv3 * r_inv2;
+  return 3. * r_x * r_x * r_inv5 - r_inv3;
+}
+
+/**
+ * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_y^2} \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_020(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv3 = r_inv * r_inv2;
+  const double r_inv5 = r_inv3 * r_inv2;
+  return 3. * r_y * r_y * r_inv5 - r_inv3;
+}
+
+/**
+ * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_z^2} \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_002(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv3 = r_inv * r_inv2;
+  const double r_inv5 = r_inv3 * r_inv2;
+  return 3. * r_z * r_z * r_inv5 - r_inv3;
+}
+
+/**
+ * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x\partial r_y}
+ * \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_110(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  return 3. * r_x * r_y * r_inv5;
+}
+
+/**
+ * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x\partial r_z}
+ * \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_101(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  return 3. * r_x * r_z * r_inv5;
+}
+
+/**
+ * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_y\partial r_z}
+ * \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_011(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  return 3. * r_y * r_z * r_inv5;
+}
+
+/*************************/
+/* 3rd order derivatives */
+/*************************/
+
+/**
+ * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^3} \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_300(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  const double r_inv7 = r_inv5 * r_inv2;
+  return -15. * r_x * r_x * r_x * r_inv7 + 9. * r_x * r_inv5;
+}
+
+/**
+ * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y^3} \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_030(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  const double r_inv7 = r_inv5 * r_inv2;
+  return -15. * r_y * r_y * r_y * r_inv7 + 9. * r_y * r_inv5;
+}
+
+/**
+ * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_z^3} \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_003(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  const double r_inv7 = r_inv5 * r_inv2;
+  return -15. * r_z * r_z * r_z * r_inv7 + 9. * r_z * r_inv5;
+}
+
+/**
+ * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^2\partial r_y}
+ * \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_210(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  const double r_inv7 = r_inv5 * r_inv2;
+  return -15. * r_x * r_x * r_y * r_inv7 + 3. * r_y * r_inv5;
+}
+
+/**
+ * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^2\partial r_z}
+ * \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_201(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  const double r_inv7 = r_inv5 * r_inv2;
+  return -15. * r_x * r_x * r_z * r_inv7 + 3. * r_z * r_inv5;
+}
+
+/**
+ * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x\partial r_y^2}
+ * \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_120(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  const double r_inv7 = r_inv5 * r_inv2;
+  return -15. * r_x * r_y * r_y * r_inv7 + 3. * r_x * r_inv5;
+}
+
+/**
+ * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y^2\partial r_z}
+ * \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_021(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  const double r_inv7 = r_inv5 * r_inv2;
+  return -15. * r_z * r_y * r_y * r_inv7 + 3. * r_z * r_inv5;
+}
+
+/**
+ * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x\partial r_z^2}
+ * \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_102(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  const double r_inv7 = r_inv5 * r_inv2;
+  return -15. * r_x * r_z * r_z * r_inv7 + 3. * r_x * r_inv5;
+}
+
+/**
+ * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y\partial r_z^2}
+ * \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_012(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv2 = r_inv * r_inv;
+  const double r_inv5 = r_inv2 * r_inv2 * r_inv;
+  const double r_inv7 = r_inv5 * r_inv2;
+  return -15. * r_y * r_z * r_z * r_inv7 + 3. * r_y * r_inv5;
+}
+
+/**
+ * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_z\partial
+ * r_y\partial r_z} \f$.
+ *
+ * @param r_x x-coordinate of the distance vector (\f$ r_x \f$).
+ * @param r_y y-coordinate of the distance vector (\f$ r_y \f$).
+ * @param r_z z-coordinate of the distance vector (\f$ r_z \f$).
+ * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$)
+ */
+INLINE static double D_111(double r_x, double r_y, double r_z, double r_inv) {
+  const double r_inv3 = r_inv * r_inv * r_inv;
+  const double r_inv7 = r_inv3 * r_inv3 * r_inv;
+  return -15. * r_x * r_y * r_z * r_inv7;
+}
+
+/*********************************/
+/* 4th order gravity derivatives */
+/*********************************/
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_z^4 }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_004(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_z * r_z * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 *
+             (r_z * r_z) +
+         3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0;
+  /* 5 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_y^1 \partial_z^3 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_013(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_y * r_z * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_y * r_z);
+  /* 11 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_y^2 \partial_z^2 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_022(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_y * r_y * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             (r_y * r_y) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             (r_z * r_z) +
+         3. * r_inv * r_inv * r_inv * r_inv * r_inv;
+  /* 11 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_y^3 \partial_z^1 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_031(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_y * r_y * r_y * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_y * r_z);
+  /* 11 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_y^4 }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_040(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_y * r_y * r_y * r_y) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 *
+             (r_y * r_y) +
+         3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0;
+  /* 5 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_z^3 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_103(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_z * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_x * r_z);
+  /* 11 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^1 \partial_z^2
+ * }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_112(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_y * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             (r_x * r_y);
+  /* 13 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^2 \partial_z^1
+ * }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_121(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_y * r_y * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             (r_x * r_z);
+  /* 13 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^3 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_130(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_y * r_y * r_y) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_x * r_y);
+  /* 11 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_z^2 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_202(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_x * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             (r_x * r_x) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             (r_z * r_z) +
+         3. * r_inv * r_inv * r_inv * r_inv * r_inv;
+  /* 11 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_y^1 \partial_z^1
+ * }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_211(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_x * r_y * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             (r_y * r_z);
+  /* 13 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_y^2 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_220(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_x * r_y * r_y) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             (r_x * r_x) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             (r_y * r_y) +
+         3. * r_inv * r_inv * r_inv * r_inv * r_inv;
+  /* 11 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_x^3 \partial_z^1 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_301(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_x * r_x * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_x * r_z);
+  /* 11 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_x^3 \partial_y^1 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_310(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_x * r_x * r_y) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_x * r_y);
+  /* 11 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^4}{ \partial_x^4 }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_400(double r_x, double r_y, double r_z, double r_inv) {
+  return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_x * r_x * r_x) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 *
+             (r_x * r_x) +
+         3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0;
+  /* 5 zero-valued terms not written out */
+}
+
+/*********************************/
+/* 5th order gravity derivatives */
+/*********************************/
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_z^5 }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_005(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_z * r_z * r_z * r_z * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 10.0 * (r_z * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 *
+             (r_z);
+  /* 26 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_y^1 \partial_z^4 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_014(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_y * r_z * r_z * r_z * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 6.0 * (r_y * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_y);
+  /* 42 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_y^2 \partial_z^3 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_023(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_y * r_y * r_z * r_z * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 3.0 * (r_y * r_y * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_z * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_z);
+  /* 44 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_y^3 \partial_z^2 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_032(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_z * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_y * r_y * r_y) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 3.0 * (r_y * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_y);
+  /* 44 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_y^4 \partial_z^1 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_041(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_y * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 6.0 * (r_y * r_y * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_z);
+  /* 42 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_y^5 }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_050(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_y * r_y) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 10.0 * (r_y * r_y * r_y) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 *
+             (r_y);
+  /* 26 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_z^4 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_104(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_z * r_z * r_z * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 6.0 * (r_x * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_x);
+  /* 42 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^1 \partial_z^3
+ * }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_113(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_y * r_z * r_z * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 3.0 * (r_x * r_y * r_z);
+  /* 48 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^2 \partial_z^2
+ * }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_122(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_z * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_y * r_y) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_x);
+  /* 48 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^3 \partial_z^1
+ * }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_131(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_y * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 3.0 * (r_x * r_y * r_z);
+  /* 48 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^4 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_140(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_y * r_y) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 6.0 * (r_x * r_y * r_y) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_x);
+  /* 42 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_z^3 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_203(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_x * r_z * r_z * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 3.0 * (r_x * r_x * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_z * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_z);
+  /* 44 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^1 \partial_z^2
+ * }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_212(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_z * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_x * r_y) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_y * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_y);
+  /* 48 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^2 \partial_z^1
+ * }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_221(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_y * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_x * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_y * r_y * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_z);
+  /* 48 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^3 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_230(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_y * r_y) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 3.0 * (r_x * r_x * r_y) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_y * r_y * r_y) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_y);
+  /* 44 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_z^2 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_302(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_z * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_x * r_x) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 3.0 * (r_x * r_z * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_x);
+  /* 44 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_y^1 \partial_z^1
+ * }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_311(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_y * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 3.0 * (r_x * r_y * r_z);
+  /* 48 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_y^2 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_320(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_y * r_y) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * (r_x * r_x * r_x) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 3.0 * (r_x * r_y * r_y) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_x);
+  /* 44 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^4 \partial_z^1 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_401(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_z) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 6.0 * (r_x * r_x * r_z) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_z);
+  /* 42 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^4 \partial_y^1 }\phi(x, y,
+ * z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_410(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_y) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 6.0 * (r_x * r_x * r_y) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 *
+             (r_y);
+  /* 42 zero-valued terms not written out */
+}
+
+/**
+ * @brief Compute \f$ \frac{\partial^5}{ \partial_x^5 }\phi(x, y, z} \f$.
+ *
+ * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2)
+ */
+INLINE static double D_500(double r_x, double r_y, double r_z, double r_inv) {
+  return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_x) +
+         105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv *
+             r_inv * 10.0 * (r_x * r_x * r_x) -
+         15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 *
+             (r_x);
+  /* 26 zero-valued terms not written out */
+}
+
+void test(double x, double y, double tol, double min, const char* name) {
+
+  double diff = fabs(x - y);
+  double norm = 0.5 * fabs(x + y);
+  if (diff > norm * tol && norm > min)
+    error(
+        "Relative difference (%e) for '%s' (swift=%e) and (exact=%e) exceeds "
+        "tolerance (%e)",
+        diff / norm, name, x, y, tol);
+  /* else */
+  /*   message("'%s' (%e -- %e) OK!", name, x, y); */
+}
+
+int main() {
+
+  /* Initialize CPU frequency, this also starts time. */
+  unsigned long long cpufreq = 0;
+  clocks_set_cpufreq(cpufreq);
+
+  /* Choke on FP-exceptions */
+  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+
+  /* Relative tolerance */
+  const double tol = 1e-4;
+
+  /* Get some randomness going */
+  const int seed = time(NULL);
+  message("Seed = %d", seed);
+  srand(seed);
+
+  for (int i = 0; i < 100; ++i) {
+
+    const double dx = 100. * ((double)rand() / (RAND_MAX));
+    const double dy = 100. * ((double)rand() / (RAND_MAX));
+    const double dz = 100. * ((double)rand() / (RAND_MAX));
+
+    message("Testing gravity for r=(%e %e %e)", dx, dy, dz);
+
+    /* Compute distance */
+    const double r2 = dx * dx + dy * dy + dz * dz;
+    const double r_inv = 1. / sqrt(r2);
+
+    /* Compute all derivatives */
+    struct potential_derivatives_M2L pot;
+    compute_potential_derivatives_M2L(dx, dy, dz, r2, r_inv, 0., FLT_MAX, &pot);
+
+    /* Minimal value we care about */
+    const double min = 1e-9;
+
+    /* Now check everything... */
+
+    /* 0th order terms */
+    test(pot.D_000, D_000(dx, dy, dz, r_inv), tol, min, "D_000");
+
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+
+    /* 1st order terms */
+    test(pot.D_100, D_100(dx, dy, dz, r_inv), tol, min, "D_100");
+    test(pot.D_010, D_010(dx, dy, dz, r_inv), tol, min, "D_010");
+    test(pot.D_001, D_001(dx, dy, dz, r_inv), tol, min, "D_001");
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+
+    /* 2nd order terms */
+    test(pot.D_200, D_200(dx, dy, dz, r_inv), tol, min, "D_200");
+    test(pot.D_020, D_020(dx, dy, dz, r_inv), tol, min, "D_020");
+    test(pot.D_002, D_002(dx, dy, dz, r_inv), tol, min, "D_002");
+    test(pot.D_110, D_110(dx, dy, dz, r_inv), tol, min, "D_110");
+    test(pot.D_101, D_101(dx, dy, dz, r_inv), tol, min, "D_101");
+    test(pot.D_011, D_011(dx, dy, dz, r_inv), tol, min, "D_011");
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+
+    /* 3rd order terms */
+    test(pot.D_300, D_300(dx, dy, dz, r_inv), tol, min, "D_300");
+    test(pot.D_030, D_030(dx, dy, dz, r_inv), tol, min, "D_030");
+    test(pot.D_003, D_003(dx, dy, dz, r_inv), tol, min, "D_003");
+    test(pot.D_210, D_210(dx, dy, dz, r_inv), tol, min, "D_210");
+    test(pot.D_201, D_201(dx, dy, dz, r_inv), tol, min, "D_201");
+    test(pot.D_120, D_120(dx, dy, dz, r_inv), tol, min, "D_120");
+    test(pot.D_021, D_021(dx, dy, dz, r_inv), tol, min, "D_021");
+    test(pot.D_102, D_102(dx, dy, dz, r_inv), tol, min, "D_102");
+    test(pot.D_012, D_012(dx, dy, dz, r_inv), tol, min, "D_012");
+    test(pot.D_111, D_111(dx, dy, dz, r_inv), tol, min, "D_111");
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+
+    /* 4th order terms */
+    test(pot.D_400, D_400(dx, dy, dz, r_inv), tol, min, "D_400");
+    test(pot.D_040, D_040(dx, dy, dz, r_inv), tol, min, "D_040");
+    test(pot.D_004, D_004(dx, dy, dz, r_inv), tol, min, "D_004");
+    test(pot.D_310, D_310(dx, dy, dz, r_inv), tol, min, "D_310");
+    test(pot.D_301, D_301(dx, dy, dz, r_inv), tol, min, "D_301");
+    test(pot.D_130, D_130(dx, dy, dz, r_inv), tol, min, "D_130");
+    test(pot.D_031, D_031(dx, dy, dz, r_inv), tol, min, "D_031");
+    test(pot.D_103, D_103(dx, dy, dz, r_inv), tol, min, "D_103");
+    test(pot.D_013, D_013(dx, dy, dz, r_inv), tol, min, "D_013");
+    test(pot.D_220, D_220(dx, dy, dz, r_inv), tol, min, "D_220");
+    test(pot.D_202, D_202(dx, dy, dz, r_inv), tol, min, "D_202");
+    test(pot.D_022, D_022(dx, dy, dz, r_inv), tol, min, "D_022");
+    test(pot.D_211, D_211(dx, dy, dz, r_inv), tol, min, "D_211");
+    test(pot.D_121, D_121(dx, dy, dz, r_inv), tol, min, "D_121");
+    test(pot.D_112, D_112(dx, dy, dz, r_inv), tol, min, "D_112");
+#endif
+
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+
+    /* 5th order terms */
+    test(pot.D_500, D_500(dx, dy, dz, r_inv), tol, min, "D_500");
+    test(pot.D_050, D_050(dx, dy, dz, r_inv), tol, min, "D_050");
+    test(pot.D_005, D_005(dx, dy, dz, r_inv), tol, min, "D_005");
+    test(pot.D_410, D_410(dx, dy, dz, r_inv), tol, min, "D_410");
+    test(pot.D_401, D_401(dx, dy, dz, r_inv), tol, min, "D_401");
+    test(pot.D_140, D_140(dx, dy, dz, r_inv), tol, min, "D_140");
+    test(pot.D_041, D_041(dx, dy, dz, r_inv), tol, min, "D_041");
+    test(pot.D_104, D_104(dx, dy, dz, r_inv), tol, min, "D_104");
+    test(pot.D_014, D_014(dx, dy, dz, r_inv), tol, min, "D_014");
+    test(pot.D_320, D_320(dx, dy, dz, r_inv), tol, min, "D_320");
+    test(pot.D_302, D_302(dx, dy, dz, r_inv), tol, min, "D_302");
+    test(pot.D_230, D_230(dx, dy, dz, r_inv), tol, min, "D_230");
+    test(pot.D_032, D_032(dx, dy, dz, r_inv), tol, min, "D_032");
+    test(pot.D_203, D_203(dx, dy, dz, r_inv), tol, min, "D_203");
+    test(pot.D_023, D_023(dx, dy, dz, r_inv), tol, min, "D_023");
+    test(pot.D_311, D_311(dx, dy, dz, r_inv), tol, min, "D_311");
+    test(pot.D_131, D_131(dx, dy, dz, r_inv), tol, min, "D_131");
+    test(pot.D_113, D_113(dx, dy, dz, r_inv), tol, min, "D_113");
+    test(pot.D_122, D_122(dx, dy, dz, r_inv), tol, min, "D_122");
+    test(pot.D_212, D_212(dx, dy, dz, r_inv), tol, min, "D_212");
+    test(pot.D_221, D_221(dx, dy, dz, r_inv), tol, min, "D_221");
+
+#endif
+    message("All good!");
+  }
+  return 0;
+}
diff --git a/theory/Multipoles/fmm_standalone.tex b/theory/Multipoles/fmm_standalone.tex
index dc4266a23110873ff38ccbec4d71345e2780d6b2..d3030dc52c53eca421521023649d09522b39b7bf 100644
--- a/theory/Multipoles/fmm_standalone.tex
+++ b/theory/Multipoles/fmm_standalone.tex
@@ -2,6 +2,7 @@
 \usepackage{graphicx}
 \usepackage{amsmath,paralist,xcolor,xspace,amssymb}
 \usepackage{times}
+\usepackage{comment}
 
 \newcommand{\swift}{{\sc Swift}\xspace}
 \newcommand{\nbody}{$N$-body\xspace}
diff --git a/theory/Multipoles/potential_derivatives.tex b/theory/Multipoles/potential_derivatives.tex
index 5c7b1e6566d7d51b5d27ea3c24d785571e1ad692..d1dba978663e966f2132a65133b3c2fec5e707b6 100644
--- a/theory/Multipoles/potential_derivatives.tex
+++ b/theory/Multipoles/potential_derivatives.tex
@@ -4,19 +4,139 @@
 For completeness, we give here the full expression for the first few
 derivatives of the potential that are used in our FMM scheme. We use
 the notation $\mathbf{r}=(r_x, r_y, r_z)$, $r = |\mathbf{r}|$ and
-$u=r/H$. Starting from the potential (Eq. \ref{eq:fmm:potential},
-reproduced here for clarity), 
+$u=r/H$. We can construct the higher order derivatives by successively
+applying the "chain rule". We show representative examples of the
+first few relevant ones here split by order. We start by constructing
+common quantities that appear in derivatives of multiple orders.
+
 \begin{align}
-\mathsf{D}_{000}(\mathbf{r}) = \varphi (\mathbf{r},H) = 
-\left\lbrace\begin{array}{rcl}
-\frac{1}{H} \left(-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3\right) & \mbox{if} & u < 1,\\
-\frac{1}{r} & \mbox{if} & u \geq 1, 
-\end{array}
-\right.\nonumber
+  \mathsf{\tilde{D}}_{1}(r, u, H) =
+  \left\lbrace\begin{array}{rcl}
+  \left(-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3\right)\times  H^{-1} & \mbox{if} & u < 1,\\
+  r^{-1} & \mbox{if} & u \geq 1, 
+  \end{array}
+  \right.\nonumber
+\end{align}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{align}
+  \mathsf{\tilde{D}}_{3}(r, u, H) =
+  \left\lbrace\begin{array}{rcl}
+  -\left(21u^5 - 90u^4 + 140u^3 -84u^2 +14\right)\times  H^{-3}& \mbox{if} & u < 1,\\
+  -1 \times r^{-3} & \mbox{if} & u \geq 1, 
+  \end{array}
+  \right.\nonumber
+\end{align}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{align}
+  \mathsf{\tilde{D}}_{5}(r, u, H) =
+  \left\lbrace\begin{array}{rcl}
+  \left(-105u^3 + 360u^2 - 420u + 168\right)\times  H^{-5}& \mbox{if} & u < 1,\\
+  3\times r^{-5} & \mbox{if} & u \geq 1, 
+  \end{array}
+  \right.\nonumber
+\end{align}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{align}
+  \mathsf{\tilde{D}}_{7}(r, u, H) =
+  \left\lbrace\begin{array}{rcl}
+  -\left(315u - 720 + 420u^{-1}\right)\times  H^{-7} & \mbox{if} & u < 1,\\
+  -15\times r^{-7} & \mbox{if} & u \geq 1, 
+  \end{array}
+  \right.\nonumber
+\end{align}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{align}
+  \mathsf{\tilde{D}}_{9}(r, u, H) =
+  \left\lbrace\begin{array}{rcl}
+  \left(-315u^{-1} + 420u^{-3}\right)\times  H^{-9}& \mbox{if} & u < 1,\\
+  105\times r^{-9} & \mbox{if} & u \geq 1.
+  \end{array}
+  \right.\nonumber
+\end{align}
+Starting from the potential (Eq. \ref{eq:fmm:potential},
+reproduced here for completeness), we can now build all the relevent derivatives
+\begin{align}
+  \mathsf{D}_{000}(\mathbf{r}) = \varphi (\mathbf{r},H) =
+    \mathsf{\tilde{D}}_{1}(r, u, H) \nonumber
+\end{align}
+
+\noindent\rule{6cm}{0.4pt}
+\begin{align}
+  \mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \varphi (\mathbf{r},H) =
+    r_x \mathsf{\tilde{D}}_{3}(r, u, H) \nonumber
+\end{align}
+
+\noindent\rule{6cm}{0.4pt}
+\begin{align}
+\mathsf{D}_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \varphi (\mathbf{r},H) = 
+r_x^2 \mathsf{\tilde{D}}_{5}(r, u, H) +
+\mathsf{\tilde{D}}_{3}(r, u, H)\nonumber
+\end{align}
+
+\begin{align}
+\mathsf{D}_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \varphi (\mathbf{r},H) = 
+   r_x r_y \mathsf{\tilde{D}}_{5}(r, u, H) \nonumber
+\end{align}
+
+\noindent\rule{6cm}{0.4pt}
+\begin{align}
+\mathsf{D}_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r},H) = 
+  r_x^3 \mathsf{\tilde{D}}_{7}(r, u, H)
+  + 3 r_x \mathsf{\tilde{D}}_{5}(r, u, H) \nonumber
+\end{align}
+
+\begin{align}
+\mathsf{D}_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^2 r_y} \varphi (\mathbf{r},H) = 
+r_x^2 r_y \mathsf{\tilde{D}}_{7}(r, u, H) +
+r_y \mathsf{\tilde{D}}_{5}(r, u, H) \nonumber
+\end{align}
+
+\begin{align}
+\mathsf{D}_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \varphi (\mathbf{r},H) = 
+  r_x r_y r_z \mathsf{\tilde{D}}_{7}(r, u, H) \nonumber
+\end{align}
+
+\noindent\rule{6cm}{0.4pt}
+\begin{align}
+  \mathsf{D}_{400}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^4}
+  \varphi (\mathbf{r},H) =
+  r_x^4 \mathsf{\tilde{D}}_{9}(r, u, H)+
+  6r_x^2 \mathsf{\tilde{D}}_{7}(r, u, H) +
+  3 \mathsf{\tilde{D}}_{5}(r, u, H)
+  \nonumber
 \end{align}
-we can construct the higher order terms by successively applying the
-"chain rule". We show representative examples of the first few
-relevant ones here split by order.
+
+\begin{align}
+  \mathsf{D}_{310}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^3
+    \partial r_y} \varphi (\mathbf{r},H) =
+  r_x^3 r_y \mathsf{\tilde{D}}_{9}(r, u, H) +
+  3 r_x r_y \mathsf{\tilde{D}}_{7}(r, u, H)
+  \nonumber
+\end{align}
+
+\begin{align}
+  \mathsf{D}_{220}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^2
+    \partial r_y^2} \varphi (\mathbf{r},H) =
+    r_x^2 r_y^2 \mathsf{\tilde{D}}_{9}(r, u, H) +
+    r_x^2 \mathsf{\tilde{D}}_{7}(r, u, H) +
+    r_y^2 \mathsf{\tilde{D}}_{7}(r, u, H) +
+    \mathsf{\tilde{D}}_{5}(r, u, H)
+  \nonumber
+\end{align}
+
+\begin{align}
+  \mathsf{D}_{211}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^2
+    \partial r_y   \partial r_z} \varphi (\mathbf{r},H) =
+    r_x^2 r_y r_z \mathsf{\tilde{D}}_{9}(r, u, H) +
+    r_y r_z \mathsf{\tilde{D}}_{7}(r, u, H)
+  \nonumber
+\end{align}
+
+
+
+\begin{comment}
+
+\noindent\rule{6cm}{0.4pt}
 
 \begin{align}
 \mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \varphi (\mathbf{r},H) = 
@@ -101,3 +221,5 @@ relevant ones here split by order.
   \mathsf{D}_{211}(\mathbf{r}) &=
   \nonumber
 \end{align}
+
+\end{comment}