diff --git a/examples/PMillennium-384/p-mill-384.yml b/examples/PMillennium-384/p-mill-384.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b7fc6f762205a0a493b13392ff3001c653445d8d
--- /dev/null
+++ b/examples/PMillennium-384/p-mill-384.yml
@@ -0,0 +1,50 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     1.98848e43    # 10^10 M_sun
+  UnitLength_in_cgs:   3.08567758e24 # 1 Mpc
+  UnitVelocity_in_cgs: 1e5           # 1 km/s
+  UnitCurrent_in_cgs:  1             # Amperes
+  UnitTemp_in_cgs:     1             # Kelvin
+
+# Planck-13 cosmology
+Cosmology:
+  h:              0.6777
+  a_begin:        0.02     # z_ini = 49
+  a_end:          1.0      # z_end = 0
+  Omega_m:        0.307
+  Omega_lambda:   0.693
+  Omega_b:        0.0455
+
+# Parameters governing the time integration
+TimeIntegration:
+  dt_min:     1e-6 
+  dt_max:     1e-2 
+
+Scheduler:
+  max_top_level_cells: 16
+  cell_split_size:     100
+
+# Parameters governing the snapshots
+Snapshots:
+  basename:            PMill
+  delta_time:          1.02
+  scale_factor_first:  0.02
+  
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          1.02
+  scale_factor_first:  0.02
+  
+# Parameters for the self-gravity scheme
+Gravity:
+  eta:                    0.025         
+  theta:                  0.5          
+  comoving_softening:     0.08333  # 83.333 kpc = 1/25 mean inter-particle separation
+  max_physical_softening: 0.08333  # 83.333 kpc = 1/25 mean inter-particle separation
+  mesh_side_length:       128
+  
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:                   ics.hdf5
+  cleanup_h_factors:           1    
+  cleanup_velocity_factors:    1  
diff --git a/examples/PMillennium-768/p-mill-768.yml b/examples/PMillennium-768/p-mill-768.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8b93a7752d09545c08b98af87db1b1d46eb15189
--- /dev/null
+++ b/examples/PMillennium-768/p-mill-768.yml
@@ -0,0 +1,50 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     1.98848e43    # 10^10 M_sun
+  UnitLength_in_cgs:   3.08567758e24 # 1 Mpc
+  UnitVelocity_in_cgs: 1e5           # 1 km/s
+  UnitCurrent_in_cgs:  1             # Amperes
+  UnitTemp_in_cgs:     1             # Kelvin
+
+# Planck-13 cosmology
+Cosmology:
+  h:              0.6777
+  a_begin:        0.02     # z_ini = 49
+  a_end:          1.0      # z_end = 0
+  Omega_m:        0.307
+  Omega_lambda:   0.693
+  Omega_b:        0.0455
+
+# Parameters governing the time integration
+TimeIntegration:
+  dt_min:     1e-6 
+  dt_max:     1e-2 
+
+Scheduler:
+  max_top_level_cells: 16
+  cell_split_size:     100
+
+# Parameters governing the snapshots
+Snapshots:
+  basename:            PMill
+  delta_time:          1.02
+  scale_factor_first:  0.02
+  
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          1.02
+  scale_factor_first:  0.02
+  
+# Parameters for the self-gravity scheme
+Gravity:
+  eta:                    0.025         
+  theta:                  0.5          
+  comoving_softening:     0.041666  # 41.6666 kpc = 1/25 mean inter-particle separation
+  max_physical_softening: 0.041666  # 41.6666 kpc = 1/25 mean inter-particle separation
+  mesh_side_length:       256
+  
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:                   ics.hdf5
+  cleanup_h_factors:           1    
+  cleanup_velocity_factors:    1  
diff --git a/src/approx_math.h b/src/approx_math.h
index 90ea4eb997c71311e0c1ce854bbdd0a0ba7396ce..f347bab44790d1e3120675bcbd6e7a457ca09821 100644
--- a/src/approx_math.h
+++ b/src/approx_math.h
@@ -21,6 +21,31 @@
 
 #include "inline.h"
 
+/**
+ * @brief Approximate version of the complementay error function erfcf(x).
+ *
+ * This is based on eq. 7.1.27 of Abramowitz & Stegun, 1972.
+ * The absolute error is < 4.7*10^-4 over the range 0 < x < infinity.
+ *
+ * Returns garbage for x < 0.
+ * @param x The number to compute erfc for.
+ */
+__attribute__((always_inline, const)) INLINE static float approx_erfcf(
+    float x) {
+
+  /* 1 + 0.278393*x + 0.230389*x^2 + 0.000972*x^3 + 0.078108*x^4 */
+  float arg = 0.078108f;
+  arg = x * arg + 0.000972f;
+  arg = x * arg + 0.230389f;
+  arg = x * arg + 0.278393f;
+  arg = x * arg + 1.f;
+
+  /* 1 / arg^4 */
+  const float arg2 = arg * arg;
+  const float arg4 = arg2 * arg2;
+  return 1.f / arg4;
+}
+
 /**
  * @brief Approximate version of expf(x) using a 4th order Taylor expansion
  *
diff --git a/src/atomic.h b/src/atomic.h
index 69df59e9fba965422eaf9a3b3de9d28ab9f09dad..10548c6a20249b4b0c362c5e6ab78ea5d85b2091 100644
--- a/src/atomic.h
+++ b/src/atomic.h
@@ -127,4 +127,36 @@ __attribute__((always_inline)) INLINE static void atomic_add_f(
   } while (test_val.as_int != old_val.as_int);
 }
 
+/**
+ * @brief Atomic add operation on doubles.
+ *
+ * This is a text-book implementation based on an atomic CAS.
+ *
+ * We create a temporary union to cope with the int-only atomic CAS
+ * and the floating-point add that we want.
+ *
+ * @param address The address to update.
+ * @param y The value to update the address with.
+ */
+__attribute__((always_inline)) INLINE static void atomic_add_d(
+    volatile double *const address, const double y) {
+
+  long long *const long_long_ptr = (long long *)address;
+
+  typedef union {
+    double as_double;
+    long long as_long_long;
+  } cast_type;
+
+  cast_type test_val, old_val, new_val;
+  old_val.as_double = *address;
+
+  do {
+    test_val.as_long_long = old_val.as_long_long;
+    new_val.as_double = old_val.as_double + y;
+    old_val.as_long_long =
+        atomic_cas(long_long_ptr, test_val.as_long_long, new_val.as_long_long);
+  } while (test_val.as_long_long != old_val.as_long_long);
+}
+
 #endif /* SWIFT_ATOMIC_H */
diff --git a/src/engine.c b/src/engine.c
index 8b8a107df60938bc598f13b24a53338e105dc77b..ba0192b40f20bc6e05a70266d7f750b05b2911b0 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -3943,9 +3943,12 @@ void engine_rebuild(struct engine *e, int clean_smoothing_length_values) {
   /* Re-build the space. */
   space_rebuild(e->s, e->verbose);
 
+  /* Construct the list of purely local cells */
+  space_list_local_cells(e->s);
+
   /* Re-compute the mesh forces */
   if ((e->policy & engine_policy_self_gravity) && e->s->periodic)
-    pm_mesh_compute_potential(e->mesh, e->s, e->verbose);
+    pm_mesh_compute_potential(e->mesh, e->s, &e->threadpool, e->verbose);
 
   /* Re-compute the maximal RMS displacement constraint */
   if (e->policy & engine_policy_cosmology)
@@ -4262,7 +4265,8 @@ void engine_collect_end_of_step(struct engine *e, int apply) {
 
   /* Collect information from the local top-level cells */
   threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper,
-                 s->local_cells_top, s->nr_local_cells, sizeof(int), 0, &data);
+                 s->local_cells_with_tasks_top, s->nr_local_cells_with_tasks,
+                 sizeof(int), 0, &data);
 
   /* Store these in the temporary collection group. */
   collectgroup1_init(&e->collect_group1, data.updates, data.g_updates,
@@ -5132,14 +5136,17 @@ void engine_unskip(struct engine *e) {
 #endif  // WITH_PROFILER
 
   /* Move the active local cells to the top of the list. */
-  int *local_cells = e->s->local_cells_top;
+  int *local_cells = e->s->local_cells_with_tasks_top;
   int num_active_cells = 0;
-  for (int k = 0; k < s->nr_local_cells; k++) {
+  for (int k = 0; k < s->nr_local_cells_with_tasks; k++) {
     struct cell *c = &s->cells_top[local_cells[k]];
+
     if ((e->policy & engine_policy_hydro && cell_is_active_hydro(c, e)) ||
-        (e->policy &
-             (engine_policy_self_gravity | engine_policy_external_gravity) &&
+        (e->policy & engine_policy_self_gravity &&
+         cell_is_active_gravity(c, e)) ||
+        (e->policy & engine_policy_external_gravity &&
          cell_is_active_gravity(c, e))) {
+
       if (num_active_cells != k)
         memswap(&local_cells[k], &local_cells[num_active_cells], sizeof(int));
       num_active_cells += 1;
diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h
index 71e5007a49bda25a8b65d4a5d3733d0027aa2682..6fce3ddd512018e9ea4be21111c75904c77cb925 100644
--- a/src/gravity/Default/gravity_iact.h
+++ b/src/gravity/Default/gravity_iact.h
@@ -166,7 +166,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pm_full(
 
   /* Compute the derivatives of the potential */
   struct potential_derivatives_M2P d;
-  compute_potential_derivatives_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 0, 0.f,
+  potential_derivatives_compute_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 0, 0.f,
                                     &d);
 
   /* 0th order contributions */
@@ -271,7 +271,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pm_truncated(
 
   /* Compute the derivatives of the potential */
   struct potential_derivatives_M2P d;
-  compute_potential_derivatives_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 1,
+  potential_derivatives_compute_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 1,
                                     r_s_inv, &d);
 
   /* 0th order contributions */
diff --git a/src/gravity/Potential/gravity_iact.h b/src/gravity/Potential/gravity_iact.h
index fdc8c17da1576b85026c3e551dd70d27bc186612..f2094f6ecd5b31b94ebfe7a64f42fbd289a0c81c 100644
--- a/src/gravity/Potential/gravity_iact.h
+++ b/src/gravity/Potential/gravity_iact.h
@@ -169,7 +169,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pm_full(
 
   /* Compute the derivatives of the potential */
   struct potential_derivatives_M2P d;
-  compute_potential_derivatives_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 0, 0.f,
+  potential_derivatives_compute_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 0, 0.f,
                                     &d);
 
   /* 0th order contributions */
@@ -281,7 +281,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pm_truncated(
 
   /* Compute the derivatives of the potential */
   struct potential_derivatives_M2P d;
-  compute_potential_derivatives_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 1,
+  potential_derivatives_compute_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 1,
                                     r_s_inv, &d);
 
   /* 0th order contributions */
diff --git a/src/gravity_derivatives.h b/src/gravity_derivatives.h
index 756fb7af66d4cb695ba014452e424843b1c7c25b..3dcffe1cc04c5e10d3b2353b7e21c532747c1475 100644
--- a/src/gravity_derivatives.h
+++ b/src/gravity_derivatives.h
@@ -125,6 +125,65 @@ struct potential_derivatives_M2P {
 #endif
 };
 
+/**
+ * @brief Converts the derivatives from a distance vector to its opposite.
+ *
+ * From a series of tensors D_xxx(r), compute D_xxx(-r).
+ * This can be computed efficiently by flipping the sign of all the odd
+ * derivative terms.
+ *
+ * @param pot The derivatives of the potential.
+ */
+__attribute__((always_inline)) INLINE static void
+potential_derivatives_flip_signs(struct potential_derivatives_M2L *pot) {
+
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+  /* 1st order terms */
+  pot->D_100 = -pot->D_100;
+  pot->D_010 = -pot->D_010;
+  pot->D_001 = -pot->D_001;
+#endif
+
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+  /* 3rd order terms */
+  pot->D_300 = -pot->D_300;
+  pot->D_030 = -pot->D_030;
+  pot->D_003 = -pot->D_003;
+  pot->D_210 = -pot->D_210;
+  pot->D_201 = -pot->D_201;
+  pot->D_021 = -pot->D_021;
+  pot->D_120 = -pot->D_120;
+  pot->D_012 = -pot->D_012;
+  pot->D_102 = -pot->D_102;
+  pot->D_111 = -pot->D_111;
+#endif
+
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+  /* 5th order terms */
+  pot->D_500 = -pot->D_500;
+  pot->D_050 = -pot->D_050;
+  pot->D_005 = -pot->D_005;
+  pot->D_410 = -pot->D_410;
+  pot->D_401 = -pot->D_401;
+  pot->D_041 = -pot->D_041;
+  pot->D_140 = -pot->D_140;
+  pot->D_014 = -pot->D_014;
+  pot->D_104 = -pot->D_104;
+  pot->D_320 = -pot->D_320;
+  pot->D_302 = -pot->D_302;
+  pot->D_032 = -pot->D_032;
+  pot->D_230 = -pot->D_230;
+  pot->D_023 = -pot->D_023;
+  pot->D_203 = -pot->D_203;
+  pot->D_311 = -pot->D_311;
+  pot->D_131 = -pot->D_131;
+  pot->D_113 = -pot->D_113;
+  pot->D_122 = -pot->D_122;
+  pot->D_212 = -pot->D_212;
+  pot->D_221 = -pot->D_221;
+#endif
+}
+
 /**
  * @brief Compute all the relevent derivatives of the softened and truncated
  * gravitational potential for the M2L kernel.
@@ -141,7 +200,7 @@ struct potential_derivatives_M2P {
  * @param pot (return) The structure containing all the derivatives.
  */
 __attribute__((always_inline)) INLINE static void
-compute_potential_derivatives_M2L(const float r_x, const float r_y,
+potential_derivatives_compute_M2L(const float r_x, const float r_y,
                                   const float r_z, const float r2,
                                   const float r_inv, const float eps,
                                   const float eps_inv, const int periodic,
@@ -397,7 +456,7 @@ compute_potential_derivatives_M2L(const float r_x, const float r_y,
  * @param pot (return) The structure containing all the derivatives.
  */
 __attribute__((always_inline)) INLINE static void
-compute_potential_derivatives_M2P(const float r_x, const float r_y,
+potential_derivatives_compute_M2P(const float r_x, const float r_y,
                                   const float r_z, const float r2,
                                   const float r_inv, const float eps,
                                   const float eps_inv, const int periodic,
diff --git a/src/gravity_properties.c b/src/gravity_properties.c
index fc1ce1d62e02c32d44667d602448fc4eb3a65344..40cd7cecb10eab7ebbe7b59d34fb15dccac8b620 100644
--- a/src/gravity_properties.c
+++ b/src/gravity_properties.c
@@ -58,12 +58,17 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params,
   p->r_cut_min_ratio = parser_get_opt_param_float(
       params, "Gravity:r_cut_min", gravity_props_default_r_cut_min);
 
+  /* Some basic checks */
   if (p->mesh_size % 2 != 0)
     error("The mesh side-length must be an even number.");
 
   if (p->a_smooth <= 0.)
     error("The mesh smoothing scale 'a_smooth' must be > 0.");
 
+  if (2. * p->a_smooth * p->r_cut_max_ratio > p->mesh_size)
+    error("Mesh too small given r_cut_max. Should be at least %d cells wide.",
+          (int)(2. * p->a_smooth * p->r_cut_max_ratio) + 1);
+
   /* Time integration */
   p->eta = parser_get_param_float(params, "Gravity:eta");
 
diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h
index 1744f2cd046a90499563a182ca68212e43f4a252..f6580f8f72b9eb6a2b49a4d2d54a0e4d0593fcbf 100644
--- a/src/kernel_long_gravity.h
+++ b/src/kernel_long_gravity.h
@@ -90,7 +90,7 @@ __attribute__((always_inline)) INLINE static void kernel_long_grav_derivatives(
   const float r_s_inv5 = r_s_inv4 * r_s_inv;
 
   /* Derivatives of \chi */
-  derivs->chi_0 = erfcf(u);
+  derivs->chi_0 = approx_erfcf(u);
   derivs->chi_1 = -r_s_inv;
   derivs->chi_2 = r_s_inv2 * u;
   derivs->chi_3 = -r_s_inv3 * (u2 - 0.5f);
@@ -158,7 +158,7 @@ __attribute__((always_inline)) INLINE static void kernel_long_grav_pot_eval(
 #ifdef GADGET2_LONG_RANGE_CORRECTION
 
   const float arg1 = u * 0.5f;
-  const float term1 = erfcf(arg1);
+  const float term1 = approx_erfcf(arg1);
 
   *W = term1;
 #else
@@ -190,7 +190,7 @@ __attribute__((always_inline)) INLINE static void kernel_long_grav_force_eval(
   const float arg1 = u * 0.5f;
   const float arg2 = -arg1 * arg1;
 
-  const float term1 = erfcf(arg1);
+  const float term1 = approx_erfcf(arg1);
   const float term2 = u * one_over_sqrt_pi * expf(arg2);
 
   *W = term1 + term2;
diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c
index d193f19082338d6cd793272df80fdd80780ad144..8cad7b621a58cd211a7e84d083894c73eb8c641a 100644
--- a/src/mesh_gravity.c
+++ b/src/mesh_gravity.c
@@ -110,14 +110,22 @@ __attribute__((always_inline)) INLINE static void CIC_set(
     double dx, double dy, double dz, double value) {
 
   /* Classic CIC interpolation */
-  mesh[row_major_id_periodic(i + 0, j + 0, k + 0, N)] += value * tx * ty * tz;
-  mesh[row_major_id_periodic(i + 0, j + 0, k + 1, N)] += value * tx * ty * dz;
-  mesh[row_major_id_periodic(i + 0, j + 1, k + 0, N)] += value * tx * dy * tz;
-  mesh[row_major_id_periodic(i + 0, j + 1, k + 1, N)] += value * tx * dy * dz;
-  mesh[row_major_id_periodic(i + 1, j + 0, k + 0, N)] += value * dx * ty * tz;
-  mesh[row_major_id_periodic(i + 1, j + 0, k + 1, N)] += value * dx * ty * dz;
-  mesh[row_major_id_periodic(i + 1, j + 1, k + 0, N)] += value * dx * dy * tz;
-  mesh[row_major_id_periodic(i + 1, j + 1, k + 1, N)] += value * dx * dy * dz;
+  atomic_add_d(&mesh[row_major_id_periodic(i + 0, j + 0, k + 0, N)],
+               value * tx * ty * tz);
+  atomic_add_d(&mesh[row_major_id_periodic(i + 0, j + 0, k + 1, N)],
+               value * tx * ty * dz);
+  atomic_add_d(&mesh[row_major_id_periodic(i + 0, j + 1, k + 0, N)],
+               value * tx * dy * tz);
+  atomic_add_d(&mesh[row_major_id_periodic(i + 0, j + 1, k + 1, N)],
+               value * tx * dy * dz);
+  atomic_add_d(&mesh[row_major_id_periodic(i + 1, j + 0, k + 0, N)],
+               value * dx * ty * tz);
+  atomic_add_d(&mesh[row_major_id_periodic(i + 1, j + 0, k + 1, N)],
+               value * dx * ty * dz);
+  atomic_add_d(&mesh[row_major_id_periodic(i + 1, j + 1, k + 0, N)],
+               value * dx * dy * tz);
+  atomic_add_d(&mesh[row_major_id_periodic(i + 1, j + 1, k + 1, N)],
+               value * dx * dy * dz);
 }
 
 /**
@@ -165,6 +173,74 @@ INLINE static void gpart_to_mesh_CIC(const struct gpart* gp, double* rho, int N,
   CIC_set(rho, N, i, j, k, tx, ty, tz, dx, dy, dz, mass);
 }
 
+/**
+ * @brief Assigns all the #gpart of a #cell to a density mesh using the CIC
+ * method.
+ *
+ * @param c The #cell.
+ * @param rho The density mesh.
+ * @param N the size of the mesh along one axis.
+ * @param fac The width of a mesh cell.
+ * @param dim The dimensions of the simulation box.
+ */
+void cell_gpart_to_mesh_CIC(const struct cell* c, double* rho, int N,
+                            double fac, const double dim[3]) {
+  const int gcount = c->gcount;
+  const struct gpart* gparts = c->gparts;
+
+  /* Assign all the gpart of that cell to the mesh */
+  for (int i = 0; i < gcount; ++i)
+    gpart_to_mesh_CIC(&gparts[i], rho, N, fac, dim);
+}
+
+/**
+ * @brief Shared information about the mesh to be used by all the threads in the
+ * pool.
+ */
+struct cic_mapper_data {
+  const struct cell* cells;
+  double* rho;
+  int N;
+  double fac;
+  double dim[3];
+};
+
+/**
+ * @brief Threadpool mapper function for the mesh CIC assignment of a cell.
+ *
+ * @param map_data A chunk of the list of local cells.
+ * @param num The number of cells in the chunk.
+ * @param extra The information about the mesh and cells.
+ */
+void cell_gpart_to_mesh_CIC_mapper(void* map_data, int num, void* extra) {
+
+  /* Unpack the shared information */
+  const struct cic_mapper_data* data = (struct cic_mapper_data*)extra;
+  const struct cell* cells = data->cells;
+  double* rho = data->rho;
+  const int N = data->N;
+  const double fac = data->fac;
+  const double dim[3] = {data->dim[0], data->dim[1], data->dim[2]};
+
+  /* Pointer to the chunk to be processed */
+  int* local_cells = (int*)map_data;
+
+  // MATTHIEU: This could in principle be improved by creating a local mesh
+  //           with just the extent required for the cell. Assignment can
+  //           then be done without atomics. That local mesh is then added
+  //           atomically to the global one.
+
+  /* Loop over the elements assigned to this thread */
+  for (int i = 0; i < num; ++i) {
+
+    /* Pointer to local cell */
+    const struct cell* c = &cells[local_cells[i]];
+
+    /* Assign this cell's content to the mesh */
+    cell_gpart_to_mesh_CIC(c, rho, N, fac, dim);
+  }
+}
+
 /**
  * @brief Computes the potential on a gpart from a given mesh using the CIC
  * method.
@@ -279,16 +355,19 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, int N, double fac,
  *
  * @param mesh The #pm_mesh used to store the potential.
  * @param s The #space containing the particles.
+ * @param tp The #threadpool object used for parallelisation.
  * @param verbose Are we talkative?
  */
 void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s,
-                               int verbose) {
+                               struct threadpool* tp, int verbose) {
 
 #ifdef HAVE_FFTW
 
   const double r_s = mesh->r_s;
   const double box_size = s->dim[0];
   const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
+  const int* local_cells = s->local_cells_top;
+  const int nr_local_cells = s->nr_local_cells;
 
   if (r_s <= 0.) error("Invalid value of a_smooth");
   if (mesh->dim[0] != dim[0] || mesh->dim[1] != dim[1] ||
@@ -322,9 +401,20 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s,
   /* Zero everything */
   bzero(rho, N * N * N * sizeof(double));
 
-  /* Do a CIC mesh assignment of the gparts */
-  for (size_t i = 0; i < s->nr_gparts; ++i)
-    gpart_to_mesh_CIC(&s->gparts[i], rho, N, cell_fac, dim);
+  /* Gather the mesh shared information to be used by the threads */
+  struct cic_mapper_data data;
+  data.cells = s->cells_top;
+  data.rho = rho;
+  data.N = N;
+  data.fac = cell_fac;
+  data.dim[0] = dim[0];
+  data.dim[1] = dim[1];
+  data.dim[2] = dim[2];
+
+  /* Do a parallel CIC mesh assignment of the gparts but only using
+     the local top-level cells */
+  threadpool_map(tp, cell_gpart_to_mesh_CIC_mapper, (void*)local_cells,
+                 nr_local_cells, sizeof(int), 0, (void*)&data);
 
   if (verbose)
     message("Gpart assignment took %.3f %s.",
@@ -499,7 +589,7 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props,
 
 #ifdef HAVE_FFTW
 
-  if (dim[0] != dim[1] || dim[0] != dim[1])
+  if (dim[0] != dim[1] || dim[0] != dim[2])
     error("Doing mesh-gravity on a non-cubic domain");
 
   const int N = props->mesh_size;
@@ -516,6 +606,9 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props,
   mesh->r_cut_max = mesh->r_s * props->r_cut_max_ratio;
   mesh->r_cut_min = mesh->r_s * props->r_cut_min_ratio;
 
+  if (2. * mesh->r_cut_max > box_size)
+    error("Mesh too small or r_cut_max too big for this box size");
+
   /* Allocate the memory for the combined density and potential array */
   mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N);
   if (mesh->potential == NULL)
diff --git a/src/mesh_gravity.h b/src/mesh_gravity.h
index c512a53ca349816caf4c666c6f504dd4b717bcb7..abff77afbd9c29547a781cdfbf6fed745e1ca37f 100644
--- a/src/mesh_gravity.h
+++ b/src/mesh_gravity.h
@@ -29,6 +29,7 @@
 /* Forward declarations */
 struct space;
 struct gpart;
+struct threadpool;
 
 /**
  * @brief Data structure for the long-range periodic forces using a mesh
@@ -67,7 +68,7 @@ void pm_mesh_init(struct pm_mesh *mesh, const struct gravity_props *props,
                   double dim[3]);
 void pm_mesh_init_no_mesh(struct pm_mesh *mesh, double dim[3]);
 void pm_mesh_compute_potential(struct pm_mesh *mesh, const struct space *s,
-                               int verbose);
+                               struct threadpool *tp, int verbose);
 void pm_mesh_interpolate_forces(const struct pm_mesh *mesh,
                                 const struct engine *e, struct gpart *gparts,
                                 int gcount);
diff --git a/src/multipole.h b/src/multipole.h
index 2ada835e32565dc4075dd1352ba3959b2ba4766b..1525ee57a9b3373c892cc603c793c44a40675a8f 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -293,8 +293,8 @@ INLINE static void gravity_field_tensors_init(struct grav_tensor *l,
  * @param la The gravity tensors to add to.
  * @param lb The gravity tensors to add.
  */
-INLINE static void gravity_field_tensors_add(struct grav_tensor *la,
-                                             const struct grav_tensor *lb) {
+INLINE static void gravity_field_tensors_add(
+    struct grav_tensor *restrict la, const struct grav_tensor *restrict lb) {
 #ifdef SWIFT_DEBUG_CHECKS
   if (lb->num_interacted == 0) error("Adding tensors that did not interact");
   la->num_interacted += lb->num_interacted;
@@ -502,8 +502,8 @@ INLINE static void gravity_multipole_print(const struct multipole *m) {
  * @param ma The multipole to add to.
  * @param mb The multipole to add.
  */
-INLINE static void gravity_multipole_add(struct multipole *ma,
-                                         const struct multipole *mb) {
+INLINE static void gravity_multipole_add(struct multipole *restrict ma,
+                                         const struct multipole *restrict mb) {
 
   /* Add 0th order term */
   ma->M_000 += mb->M_000;
@@ -1307,8 +1307,8 @@ INLINE static void gravity_P2M(struct gravity_tensors *multi,
  * @param pos_a The position to which m_b will be shifted.
  * @param pos_b The current postion of the multipole to shift.
  */
-INLINE static void gravity_M2M(struct multipole *m_a,
-                               const struct multipole *m_b,
+INLINE static void gravity_M2M(struct multipole *restrict m_a,
+                               const struct multipole *restrict m_b,
                                const double pos_a[3], const double pos_b[3]) {
 
   /* Shift 0th order term */
@@ -1558,43 +1558,11 @@ INLINE static void gravity_M2M(struct multipole *m_a,
  *
  * @param l_b The field tensor to compute.
  * @param m_a The multipole creating the field.
- * @param pos_b The position of the field tensor.
- * @param pos_a The position of the multipole.
- * @param props The #gravity_props of this calculation.
- * @param periodic Is the calculation periodic ?
- * @param dim The size of the simulation box.
- * @param rs_inv The inverse of the gravity mesh-smoothing scale.
+ * @param pot The derivatives of the potential.
  */
-INLINE static void gravity_M2L(struct grav_tensor *l_b,
-                               const struct multipole *m_a,
-                               const double pos_b[3], const double pos_a[3],
-                               const struct gravity_props *props, int periodic,
-                               const double dim[3], float rs_inv) {
-
-  /* Recover some constants */
-  const float eps = props->epsilon_cur;
-  const float eps_inv = props->epsilon_cur_inv;
-
-  /* Compute distance vector */
-  float dx = (float)(pos_b[0] - pos_a[0]);
-  float dy = (float)(pos_b[1] - pos_a[1]);
-  float dz = (float)(pos_b[2] - pos_a[2]);
-
-  /* Apply BC */
-  if (periodic) {
-    dx = nearest(dx, dim[0]);
-    dy = nearest(dy, dim[1]);
-    dz = nearest(dz, dim[2]);
-  }
-
-  /* Compute distance */
-  const float r2 = dx * dx + dy * dy + dz * dz;
-  const float r_inv = 1. / sqrtf(r2);
-
-  /* Compute all derivatives */
-  struct potential_derivatives_M2L pot;
-  compute_potential_derivatives_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv,
-                                    periodic, rs_inv, &pot);
+INLINE static void gravity_M2L_apply(
+    struct grav_tensor *restrict l_b, const struct multipole *restrict m_a,
+    const struct potential_derivatives_M2L *pot) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Count interactions */
@@ -1604,330 +1572,382 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b,
   /* Record that this tensor has received contributions */
   l_b->interacted = 1;
 
+  const float M_000 = m_a->M_000;
+  const float D_000 = pot->D_000;
+
   /*  0th order term */
-  l_b->F_000 += m_a->M_000 * pot.D_000;
+  l_b->F_000 += M_000 * D_000;
 
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+
+  /* The dipole term is zero when using the CoM */
+  /* The compiler will optimize out the terms in the equations */
+  /* below. We keep them written to maintain the logical structure. */
+  const float M_100 = 0.f;
+  const float M_010 = 0.f;
+  const float M_001 = 0.f;
+
+  const float D_100 = pot->D_100;
+  const float D_010 = pot->D_010;
+  const float D_001 = pot->D_001;
+
   /*  1st order multipole term (addition to rank 0)*/
-  l_b->F_000 +=
-      m_a->M_100 * pot.D_100 + m_a->M_010 * pot.D_010 + m_a->M_001 * pot.D_001;
+  l_b->F_000 += M_100 * D_100 + M_010 * D_010 + M_001 * D_001;
 
   /*  1st order multipole term (addition to rank 1)*/
-  l_b->F_100 += m_a->M_000 * pot.D_100;
-  l_b->F_010 += m_a->M_000 * pot.D_010;
-  l_b->F_001 += m_a->M_000 * pot.D_001;
+  l_b->F_100 += M_000 * D_100;
+  l_b->F_010 += M_000 * D_010;
+  l_b->F_001 += M_000 * D_001;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
 
+  const float M_200 = m_a->M_200;
+  const float M_020 = m_a->M_020;
+  const float M_002 = m_a->M_002;
+  const float M_110 = m_a->M_110;
+  const float M_101 = m_a->M_101;
+  const float M_011 = m_a->M_011;
+
+  const float D_200 = pot->D_200;
+  const float D_020 = pot->D_020;
+  const float D_002 = pot->D_002;
+  const float D_110 = pot->D_110;
+  const float D_101 = pot->D_101;
+  const float D_011 = pot->D_011;
+
   /*  2nd order multipole term (addition to rank 0)*/
-  l_b->F_000 +=
-      m_a->M_200 * pot.D_200 + m_a->M_020 * pot.D_020 + m_a->M_002 * pot.D_002;
-  l_b->F_000 +=
-      m_a->M_110 * pot.D_110 + m_a->M_101 * pot.D_101 + m_a->M_011 * pot.D_011;
+  l_b->F_000 += M_200 * D_200 + M_020 * D_020 + M_002 * D_002;
+  l_b->F_000 += M_110 * D_110 + M_101 * D_101 + M_011 * D_011;
 
   /*  2nd order multipole term (addition to rank 1)*/
-  l_b->F_100 +=
-      m_a->M_100 * pot.D_200 + m_a->M_010 * pot.D_110 + m_a->M_001 * pot.D_101;
-  l_b->F_010 +=
-      m_a->M_100 * pot.D_110 + m_a->M_010 * pot.D_020 + m_a->M_001 * pot.D_011;
-  l_b->F_001 +=
-      m_a->M_100 * pot.D_101 + m_a->M_010 * pot.D_011 + m_a->M_001 * pot.D_002;
+  l_b->F_100 += M_100 * D_200 + M_010 * D_110 + M_001 * D_101;
+  l_b->F_010 += M_100 * D_110 + M_010 * D_020 + M_001 * D_011;
+  l_b->F_001 += M_100 * D_101 + M_010 * D_011 + M_001 * D_002;
 
   /*  2nd order multipole term (addition to rank 2)*/
-  l_b->F_200 += m_a->M_000 * pot.D_200;
-  l_b->F_020 += m_a->M_000 * pot.D_020;
-  l_b->F_002 += m_a->M_000 * pot.D_002;
-  l_b->F_110 += m_a->M_000 * pot.D_110;
-  l_b->F_101 += m_a->M_000 * pot.D_101;
-  l_b->F_011 += m_a->M_000 * pot.D_011;
+  l_b->F_200 += M_000 * D_200;
+  l_b->F_020 += M_000 * D_020;
+  l_b->F_002 += M_000 * D_002;
+  l_b->F_110 += M_000 * D_110;
+  l_b->F_101 += M_000 * D_101;
+  l_b->F_011 += M_000 * D_011;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
 
+  const float M_300 = m_a->M_300;
+  const float M_030 = m_a->M_030;
+  const float M_003 = m_a->M_003;
+  const float M_210 = m_a->M_210;
+  const float M_201 = m_a->M_201;
+  const float M_021 = m_a->M_021;
+  const float M_120 = m_a->M_120;
+  const float M_012 = m_a->M_012;
+  const float M_102 = m_a->M_102;
+  const float M_111 = m_a->M_111;
+
+  const float D_300 = pot->D_300;
+  const float D_030 = pot->D_030;
+  const float D_003 = pot->D_003;
+  const float D_210 = pot->D_210;
+  const float D_201 = pot->D_201;
+  const float D_021 = pot->D_021;
+  const float D_120 = pot->D_120;
+  const float D_012 = pot->D_012;
+  const float D_102 = pot->D_102;
+  const float D_111 = pot->D_111;
+
   /*  3rd order multipole term (addition to rank 0)*/
-  l_b->F_000 +=
-      m_a->M_300 * pot.D_300 + m_a->M_030 * pot.D_030 + m_a->M_003 * pot.D_003;
-  l_b->F_000 +=
-      m_a->M_210 * pot.D_210 + m_a->M_201 * pot.D_201 + m_a->M_120 * pot.D_120;
-  l_b->F_000 +=
-      m_a->M_021 * pot.D_021 + m_a->M_102 * pot.D_102 + m_a->M_012 * pot.D_012;
-  l_b->F_000 += m_a->M_111 * pot.D_111;
+  l_b->F_000 += M_300 * D_300 + M_030 * D_030 + M_003 * D_003;
+  l_b->F_000 += M_210 * D_210 + M_201 * D_201 + M_120 * D_120;
+  l_b->F_000 += M_021 * D_021 + M_102 * D_102 + M_012 * D_012;
+  l_b->F_000 += M_111 * D_111;
 
   /*  3rd order multipole term (addition to rank 1)*/
-  l_b->F_100 +=
-      m_a->M_200 * pot.D_300 + m_a->M_020 * pot.D_120 + m_a->M_002 * pot.D_102;
-  l_b->F_100 +=
-      m_a->M_110 * pot.D_210 + m_a->M_101 * pot.D_201 + m_a->M_011 * pot.D_111;
-  l_b->F_010 +=
-      m_a->M_200 * pot.D_210 + m_a->M_020 * pot.D_030 + m_a->M_002 * pot.D_012;
-  l_b->F_010 +=
-      m_a->M_110 * pot.D_120 + m_a->M_101 * pot.D_111 + m_a->M_011 * pot.D_021;
-  l_b->F_001 +=
-      m_a->M_200 * pot.D_201 + m_a->M_020 * pot.D_021 + m_a->M_002 * pot.D_003;
-  l_b->F_001 +=
-      m_a->M_110 * pot.D_111 + m_a->M_101 * pot.D_102 + m_a->M_011 * pot.D_012;
+  l_b->F_100 += M_200 * D_300 + M_020 * D_120 + M_002 * D_102;
+  l_b->F_100 += M_110 * D_210 + M_101 * D_201 + M_011 * D_111;
+  l_b->F_010 += M_200 * D_210 + M_020 * D_030 + M_002 * D_012;
+  l_b->F_010 += M_110 * D_120 + M_101 * D_111 + M_011 * D_021;
+  l_b->F_001 += M_200 * D_201 + M_020 * D_021 + M_002 * D_003;
+  l_b->F_001 += M_110 * D_111 + M_101 * D_102 + M_011 * D_012;
 
   /*  3rd order multipole term (addition to rank 2)*/
-  l_b->F_200 +=
-      m_a->M_100 * pot.D_300 + m_a->M_010 * pot.D_210 + m_a->M_001 * pot.D_201;
-  l_b->F_020 +=
-      m_a->M_100 * pot.D_120 + m_a->M_010 * pot.D_030 + m_a->M_001 * pot.D_021;
-  l_b->F_002 +=
-      m_a->M_100 * pot.D_102 + m_a->M_010 * pot.D_012 + m_a->M_001 * pot.D_003;
-  l_b->F_110 +=
-      m_a->M_100 * pot.D_210 + m_a->M_010 * pot.D_120 + m_a->M_001 * pot.D_111;
-  l_b->F_101 +=
-      m_a->M_100 * pot.D_201 + m_a->M_010 * pot.D_111 + m_a->M_001 * pot.D_102;
-  l_b->F_011 +=
-      m_a->M_100 * pot.D_111 + m_a->M_010 * pot.D_021 + m_a->M_001 * pot.D_012;
+  l_b->F_200 += M_100 * D_300 + M_010 * D_210 + M_001 * D_201;
+  l_b->F_020 += M_100 * D_120 + M_010 * D_030 + M_001 * D_021;
+  l_b->F_002 += M_100 * D_102 + M_010 * D_012 + M_001 * D_003;
+  l_b->F_110 += M_100 * D_210 + M_010 * D_120 + M_001 * D_111;
+  l_b->F_101 += M_100 * D_201 + M_010 * D_111 + M_001 * D_102;
+  l_b->F_011 += M_100 * D_111 + M_010 * D_021 + M_001 * D_012;
 
   /*  3rd order multipole term (addition to rank 3)*/
-  l_b->F_300 += m_a->M_000 * pot.D_300;
-  l_b->F_030 += m_a->M_000 * pot.D_030;
-  l_b->F_003 += m_a->M_000 * pot.D_003;
-  l_b->F_210 += m_a->M_000 * pot.D_210;
-  l_b->F_201 += m_a->M_000 * pot.D_201;
-  l_b->F_120 += m_a->M_000 * pot.D_120;
-  l_b->F_021 += m_a->M_000 * pot.D_021;
-  l_b->F_102 += m_a->M_000 * pot.D_102;
-  l_b->F_012 += m_a->M_000 * pot.D_012;
-  l_b->F_111 += m_a->M_000 * pot.D_111;
+  l_b->F_300 += M_000 * D_300;
+  l_b->F_030 += M_000 * D_030;
+  l_b->F_003 += M_000 * D_003;
+  l_b->F_210 += M_000 * D_210;
+  l_b->F_201 += M_000 * D_201;
+  l_b->F_120 += M_000 * D_120;
+  l_b->F_021 += M_000 * D_021;
+  l_b->F_102 += M_000 * D_102;
+  l_b->F_012 += M_000 * D_012;
+  l_b->F_111 += M_000 * D_111;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+
+  const float M_400 = m_a->M_400;
+  const float M_040 = m_a->M_040;
+  const float M_004 = m_a->M_004;
+  const float M_310 = m_a->M_310;
+  const float M_301 = m_a->M_301;
+  const float M_031 = m_a->M_031;
+  const float M_130 = m_a->M_130;
+  const float M_013 = m_a->M_013;
+  const float M_103 = m_a->M_103;
+  const float M_220 = m_a->M_220;
+  const float M_202 = m_a->M_202;
+  const float M_022 = m_a->M_022;
+  const float M_211 = m_a->M_211;
+  const float M_121 = m_a->M_121;
+  const float M_112 = m_a->M_112;
+
+  const float D_400 = pot->D_400;
+  const float D_040 = pot->D_040;
+  const float D_004 = pot->D_004;
+  const float D_310 = pot->D_310;
+  const float D_301 = pot->D_301;
+  const float D_031 = pot->D_031;
+  const float D_130 = pot->D_130;
+  const float D_013 = pot->D_013;
+  const float D_103 = pot->D_103;
+  const float D_220 = pot->D_220;
+  const float D_202 = pot->D_202;
+  const float D_022 = pot->D_022;
+  const float D_211 = pot->D_211;
+  const float D_121 = pot->D_121;
+  const float D_112 = pot->D_112;
+
   /* Compute 4th order field tensor terms (addition to rank 0) */
-  l_b->F_000 +=
-      m_a->M_004 * pot.D_004 + m_a->M_013 * pot.D_013 + m_a->M_022 * pot.D_022 +
-      m_a->M_031 * pot.D_031 + m_a->M_040 * pot.D_040 + m_a->M_103 * pot.D_103 +
-      m_a->M_112 * pot.D_112 + m_a->M_121 * pot.D_121 + m_a->M_130 * pot.D_130 +
-      m_a->M_202 * pot.D_202 + m_a->M_211 * pot.D_211 + m_a->M_220 * pot.D_220 +
-      m_a->M_301 * pot.D_301 + m_a->M_310 * pot.D_310 + m_a->M_400 * pot.D_400;
+  l_b->F_000 += M_004 * D_004 + M_013 * D_013 + M_022 * D_022 +
+                M_031 * D_031 + M_040 * D_040 + M_103 * D_103 +
+                M_112 * D_112 + M_121 * D_121 + M_130 * D_130 +
+                M_202 * D_202 + M_211 * D_211 + M_220 * D_220 +
+                M_301 * D_301 + M_310 * D_310 + M_400 * D_400;
 
   /* Compute 4th order field tensor terms (addition to rank 1) */
-  l_b->F_001 += m_a->M_003 * pot.D_004 + m_a->M_012 * pot.D_013 +
-                m_a->M_021 * pot.D_022 + m_a->M_030 * pot.D_031 +
-                m_a->M_102 * pot.D_103 + m_a->M_111 * pot.D_112 +
-                m_a->M_120 * pot.D_121 + m_a->M_201 * pot.D_202 +
-                m_a->M_210 * pot.D_211 + m_a->M_300 * pot.D_301;
-  l_b->F_010 += m_a->M_003 * pot.D_013 + m_a->M_012 * pot.D_022 +
-                m_a->M_021 * pot.D_031 + m_a->M_030 * pot.D_040 +
-                m_a->M_102 * pot.D_112 + m_a->M_111 * pot.D_121 +
-                m_a->M_120 * pot.D_130 + m_a->M_201 * pot.D_211 +
-                m_a->M_210 * pot.D_220 + m_a->M_300 * pot.D_310;
-  l_b->F_100 += m_a->M_003 * pot.D_103 + m_a->M_012 * pot.D_112 +
-                m_a->M_021 * pot.D_121 + m_a->M_030 * pot.D_130 +
-                m_a->M_102 * pot.D_202 + m_a->M_111 * pot.D_211 +
-                m_a->M_120 * pot.D_220 + m_a->M_201 * pot.D_301 +
-                m_a->M_210 * pot.D_310 + m_a->M_300 * pot.D_400;
+  l_b->F_001 += M_003 * D_004 + M_012 * D_013 + M_021 * D_022 +
+                M_030 * D_031 + M_102 * D_103 + M_111 * D_112 +
+                M_120 * D_121 + M_201 * D_202 + M_210 * D_211 +
+                M_300 * D_301;
+  l_b->F_010 += M_003 * D_013 + M_012 * D_022 + M_021 * D_031 +
+                M_030 * D_040 + M_102 * D_112 + M_111 * D_121 +
+                M_120 * D_130 + M_201 * D_211 + M_210 * D_220 +
+                M_300 * D_310;
+  l_b->F_100 += M_003 * D_103 + M_012 * D_112 + M_021 * D_121 +
+                M_030 * D_130 + M_102 * D_202 + M_111 * D_211 +
+                M_120 * D_220 + M_201 * D_301 + M_210 * D_310 +
+                M_300 * D_400;
 
   /* Compute 4th order field tensor terms (addition to rank 2) */
-  l_b->F_002 += m_a->M_002 * pot.D_004 + m_a->M_011 * pot.D_013 +
-                m_a->M_020 * pot.D_022 + m_a->M_101 * pot.D_103 +
-                m_a->M_110 * pot.D_112 + m_a->M_200 * pot.D_202;
-  l_b->F_011 += m_a->M_002 * pot.D_013 + m_a->M_011 * pot.D_022 +
-                m_a->M_020 * pot.D_031 + m_a->M_101 * pot.D_112 +
-                m_a->M_110 * pot.D_121 + m_a->M_200 * pot.D_211;
-  l_b->F_020 += m_a->M_002 * pot.D_022 + m_a->M_011 * pot.D_031 +
-                m_a->M_020 * pot.D_040 + m_a->M_101 * pot.D_121 +
-                m_a->M_110 * pot.D_130 + m_a->M_200 * pot.D_220;
-  l_b->F_101 += m_a->M_002 * pot.D_103 + m_a->M_011 * pot.D_112 +
-                m_a->M_020 * pot.D_121 + m_a->M_101 * pot.D_202 +
-                m_a->M_110 * pot.D_211 + m_a->M_200 * pot.D_301;
-  l_b->F_110 += m_a->M_002 * pot.D_112 + m_a->M_011 * pot.D_121 +
-                m_a->M_020 * pot.D_130 + m_a->M_101 * pot.D_211 +
-                m_a->M_110 * pot.D_220 + m_a->M_200 * pot.D_310;
-  l_b->F_200 += m_a->M_002 * pot.D_202 + m_a->M_011 * pot.D_211 +
-                m_a->M_020 * pot.D_220 + m_a->M_101 * pot.D_301 +
-                m_a->M_110 * pot.D_310 + m_a->M_200 * pot.D_400;
+  l_b->F_002 += M_002 * D_004 + M_011 * D_013 + M_020 * D_022 +
+                M_101 * D_103 + M_110 * D_112 + M_200 * D_202;
+  l_b->F_011 += M_002 * D_013 + M_011 * D_022 + M_020 * D_031 +
+                M_101 * D_112 + M_110 * D_121 + M_200 * D_211;
+  l_b->F_020 += M_002 * D_022 + M_011 * D_031 + M_020 * D_040 +
+                M_101 * D_121 + M_110 * D_130 + M_200 * D_220;
+  l_b->F_101 += M_002 * D_103 + M_011 * D_112 + M_020 * D_121 +
+                M_101 * D_202 + M_110 * D_211 + M_200 * D_301;
+  l_b->F_110 += M_002 * D_112 + M_011 * D_121 + M_020 * D_130 +
+                M_101 * D_211 + M_110 * D_220 + M_200 * D_310;
+  l_b->F_200 += M_002 * D_202 + M_011 * D_211 + M_020 * D_220 +
+                M_101 * D_301 + M_110 * D_310 + M_200 * D_400;
 
   /* Compute 4th order field tensor terms (addition to rank 3) */
-  l_b->F_003 +=
-      m_a->M_001 * pot.D_004 + m_a->M_010 * pot.D_013 + m_a->M_100 * pot.D_103;
-  l_b->F_012 +=
-      m_a->M_001 * pot.D_013 + m_a->M_010 * pot.D_022 + m_a->M_100 * pot.D_112;
-  l_b->F_021 +=
-      m_a->M_001 * pot.D_022 + m_a->M_010 * pot.D_031 + m_a->M_100 * pot.D_121;
-  l_b->F_030 +=
-      m_a->M_001 * pot.D_031 + m_a->M_010 * pot.D_040 + m_a->M_100 * pot.D_130;
-  l_b->F_102 +=
-      m_a->M_001 * pot.D_103 + m_a->M_010 * pot.D_112 + m_a->M_100 * pot.D_202;
-  l_b->F_111 +=
-      m_a->M_001 * pot.D_112 + m_a->M_010 * pot.D_121 + m_a->M_100 * pot.D_211;
-  l_b->F_120 +=
-      m_a->M_001 * pot.D_121 + m_a->M_010 * pot.D_130 + m_a->M_100 * pot.D_220;
-  l_b->F_201 +=
-      m_a->M_001 * pot.D_202 + m_a->M_010 * pot.D_211 + m_a->M_100 * pot.D_301;
-  l_b->F_210 +=
-      m_a->M_001 * pot.D_211 + m_a->M_010 * pot.D_220 + m_a->M_100 * pot.D_310;
-  l_b->F_300 +=
-      m_a->M_001 * pot.D_301 + m_a->M_010 * pot.D_310 + m_a->M_100 * pot.D_400;
+  l_b->F_003 += M_001 * D_004 + M_010 * D_013 + M_100 * D_103;
+  l_b->F_012 += M_001 * D_013 + M_010 * D_022 + M_100 * D_112;
+  l_b->F_021 += M_001 * D_022 + M_010 * D_031 + M_100 * D_121;
+  l_b->F_030 += M_001 * D_031 + M_010 * D_040 + M_100 * D_130;
+  l_b->F_102 += M_001 * D_103 + M_010 * D_112 + M_100 * D_202;
+  l_b->F_111 += M_001 * D_112 + M_010 * D_121 + M_100 * D_211;
+  l_b->F_120 += M_001 * D_121 + M_010 * D_130 + M_100 * D_220;
+  l_b->F_201 += M_001 * D_202 + M_010 * D_211 + M_100 * D_301;
+  l_b->F_210 += M_001 * D_211 + M_010 * D_220 + M_100 * D_310;
+  l_b->F_300 += M_001 * D_301 + M_010 * D_310 + M_100 * D_400;
 
   /* Compute 4th order field tensor terms (addition to rank 4) */
-  l_b->F_004 += m_a->M_000 * pot.D_004;
-  l_b->F_013 += m_a->M_000 * pot.D_013;
-  l_b->F_022 += m_a->M_000 * pot.D_022;
-  l_b->F_031 += m_a->M_000 * pot.D_031;
-  l_b->F_040 += m_a->M_000 * pot.D_040;
-  l_b->F_103 += m_a->M_000 * pot.D_103;
-  l_b->F_112 += m_a->M_000 * pot.D_112;
-  l_b->F_121 += m_a->M_000 * pot.D_121;
-  l_b->F_130 += m_a->M_000 * pot.D_130;
-  l_b->F_202 += m_a->M_000 * pot.D_202;
-  l_b->F_211 += m_a->M_000 * pot.D_211;
-  l_b->F_220 += m_a->M_000 * pot.D_220;
-  l_b->F_301 += m_a->M_000 * pot.D_301;
-  l_b->F_310 += m_a->M_000 * pot.D_310;
-  l_b->F_400 += m_a->M_000 * pot.D_400;
+  l_b->F_004 += M_000 * D_004;
+  l_b->F_013 += M_000 * D_013;
+  l_b->F_022 += M_000 * D_022;
+  l_b->F_031 += M_000 * D_031;
+  l_b->F_040 += M_000 * D_040;
+  l_b->F_103 += M_000 * D_103;
+  l_b->F_112 += M_000 * D_112;
+  l_b->F_121 += M_000 * D_121;
+  l_b->F_130 += M_000 * D_130;
+  l_b->F_202 += M_000 * D_202;
+  l_b->F_211 += M_000 * D_211;
+  l_b->F_220 += M_000 * D_220;
+  l_b->F_301 += M_000 * D_301;
+  l_b->F_310 += M_000 * D_310;
+  l_b->F_400 += M_000 * D_400;
 
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 4
 
+  const float M_500 = m_a->M_500;
+  const float M_050 = m_a->M_050;
+  const float M_005 = m_a->M_005;
+  const float M_410 = m_a->M_410;
+  const float M_401 = m_a->M_401;
+  const float M_041 = m_a->M_041;
+  const float M_140 = m_a->M_140;
+  const float M_014 = m_a->M_014;
+  const float M_104 = m_a->M_104;
+  const float M_320 = m_a->M_320;
+  const float M_302 = m_a->M_302;
+  const float M_230 = m_a->M_230;
+  const float M_032 = m_a->M_032;
+  const float M_203 = m_a->M_203;
+  const float M_023 = m_a->M_023;
+  const float M_122 = m_a->M_122;
+  const float M_212 = m_a->M_212;
+  const float M_221 = m_a->M_221;
+  const float M_311 = m_a->M_311;
+  const float M_131 = m_a->M_131;
+  const float M_113 = m_a->M_113;
+
+  const float D_500 = pot->D_500;
+  const float D_050 = pot->D_050;
+  const float D_005 = pot->D_005;
+  const float D_410 = pot->D_410;
+  const float D_401 = pot->D_401;
+  const float D_041 = pot->D_041;
+  const float D_140 = pot->D_140;
+  const float D_014 = pot->D_014;
+  const float D_104 = pot->D_104;
+  const float D_320 = pot->D_320;
+  const float D_302 = pot->D_302;
+  const float D_230 = pot->D_230;
+  const float D_032 = pot->D_032;
+  const float D_203 = pot->D_203;
+  const float D_023 = pot->D_023;
+  const float D_122 = pot->D_122;
+  const float D_212 = pot->D_212;
+  const float D_221 = pot->D_221;
+  const float D_311 = pot->D_311;
+  const float D_131 = pot->D_131;
+  const float D_113 = pot->D_113;
+
   /* Compute 5th order field tensor terms (addition to rank 0) */
-  l_b->F_000 +=
-      m_a->M_005 * pot.D_005 + m_a->M_014 * pot.D_014 + m_a->M_023 * pot.D_023 +
-      m_a->M_032 * pot.D_032 + m_a->M_041 * pot.D_041 + m_a->M_050 * pot.D_050 +
-      m_a->M_104 * pot.D_104 + m_a->M_113 * pot.D_113 + m_a->M_122 * pot.D_122 +
-      m_a->M_131 * pot.D_131 + m_a->M_140 * pot.D_140 + m_a->M_203 * pot.D_203 +
-      m_a->M_212 * pot.D_212 + m_a->M_221 * pot.D_221 + m_a->M_230 * pot.D_230 +
-      m_a->M_302 * pot.D_302 + m_a->M_311 * pot.D_311 + m_a->M_320 * pot.D_320 +
-      m_a->M_401 * pot.D_401 + m_a->M_410 * pot.D_410 + m_a->M_500 * pot.D_500;
+  l_b->F_000 += M_005 * D_005 + M_014 * D_014 + M_023 * D_023 +
+                M_032 * D_032 + M_041 * D_041 + M_050 * D_050 +
+                M_104 * D_104 + M_113 * D_113 + M_122 * D_122 +
+                M_131 * D_131 + M_140 * D_140 + M_203 * D_203 +
+                M_212 * D_212 + M_221 * D_221 + M_230 * D_230 +
+                M_302 * D_302 + M_311 * D_311 + M_320 * D_320 +
+                M_401 * D_401 + M_410 * D_410 + M_500 * D_500;
 
   /* Compute 5th order field tensor terms (addition to rank 1) */
-  l_b->F_001 +=
-      m_a->M_004 * pot.D_005 + m_a->M_013 * pot.D_014 + m_a->M_022 * pot.D_023 +
-      m_a->M_031 * pot.D_032 + m_a->M_040 * pot.D_041 + m_a->M_103 * pot.D_104 +
-      m_a->M_112 * pot.D_113 + m_a->M_121 * pot.D_122 + m_a->M_130 * pot.D_131 +
-      m_a->M_202 * pot.D_203 + m_a->M_211 * pot.D_212 + m_a->M_220 * pot.D_221 +
-      m_a->M_301 * pot.D_302 + m_a->M_310 * pot.D_311 + m_a->M_400 * pot.D_401;
-  l_b->F_010 +=
-      m_a->M_004 * pot.D_014 + m_a->M_013 * pot.D_023 + m_a->M_022 * pot.D_032 +
-      m_a->M_031 * pot.D_041 + m_a->M_040 * pot.D_050 + m_a->M_103 * pot.D_113 +
-      m_a->M_112 * pot.D_122 + m_a->M_121 * pot.D_131 + m_a->M_130 * pot.D_140 +
-      m_a->M_202 * pot.D_212 + m_a->M_211 * pot.D_221 + m_a->M_220 * pot.D_230 +
-      m_a->M_301 * pot.D_311 + m_a->M_310 * pot.D_320 + m_a->M_400 * pot.D_410;
-  l_b->F_100 +=
-      m_a->M_004 * pot.D_104 + m_a->M_013 * pot.D_113 + m_a->M_022 * pot.D_122 +
-      m_a->M_031 * pot.D_131 + m_a->M_040 * pot.D_140 + m_a->M_103 * pot.D_203 +
-      m_a->M_112 * pot.D_212 + m_a->M_121 * pot.D_221 + m_a->M_130 * pot.D_230 +
-      m_a->M_202 * pot.D_302 + m_a->M_211 * pot.D_311 + m_a->M_220 * pot.D_320 +
-      m_a->M_301 * pot.D_401 + m_a->M_310 * pot.D_410 + m_a->M_400 * pot.D_500;
+  l_b->F_001 += M_004 * D_005 + M_013 * D_014 + M_022 * D_023 +
+                M_031 * D_032 + M_040 * D_041 + M_103 * D_104 +
+                M_112 * D_113 + M_121 * D_122 + M_130 * D_131 +
+                M_202 * D_203 + M_211 * D_212 + M_220 * D_221 +
+                M_301 * D_302 + M_310 * D_311 + M_400 * D_401;
+  l_b->F_010 += M_004 * D_014 + M_013 * D_023 + M_022 * D_032 +
+                M_031 * D_041 + M_040 * D_050 + M_103 * D_113 +
+                M_112 * D_122 + M_121 * D_131 + M_130 * D_140 +
+                M_202 * D_212 + M_211 * D_221 + M_220 * D_230 +
+                M_301 * D_311 + M_310 * D_320 + M_400 * D_410;
+  l_b->F_100 += M_004 * D_104 + M_013 * D_113 + M_022 * D_122 +
+                M_031 * D_131 + M_040 * D_140 + M_103 * D_203 +
+                M_112 * D_212 + M_121 * D_221 + M_130 * D_230 +
+                M_202 * D_302 + M_211 * D_311 + M_220 * D_320 +
+                M_301 * D_401 + M_310 * D_410 + M_400 * D_500;
 
   /* Compute 5th order field tensor terms (addition to rank 2) */
-  l_b->F_002 += m_a->M_003 * pot.D_005 + m_a->M_012 * pot.D_014 +
-                m_a->M_021 * pot.D_023 + m_a->M_030 * pot.D_032 +
-                m_a->M_102 * pot.D_104 + m_a->M_111 * pot.D_113 +
-                m_a->M_120 * pot.D_122 + m_a->M_201 * pot.D_203 +
-                m_a->M_210 * pot.D_212 + m_a->M_300 * pot.D_302;
-  l_b->F_011 += m_a->M_003 * pot.D_014 + m_a->M_012 * pot.D_023 +
-                m_a->M_021 * pot.D_032 + m_a->M_030 * pot.D_041 +
-                m_a->M_102 * pot.D_113 + m_a->M_111 * pot.D_122 +
-                m_a->M_120 * pot.D_131 + m_a->M_201 * pot.D_212 +
-                m_a->M_210 * pot.D_221 + m_a->M_300 * pot.D_311;
-  l_b->F_020 += m_a->M_003 * pot.D_023 + m_a->M_012 * pot.D_032 +
-                m_a->M_021 * pot.D_041 + m_a->M_030 * pot.D_050 +
-                m_a->M_102 * pot.D_122 + m_a->M_111 * pot.D_131 +
-                m_a->M_120 * pot.D_140 + m_a->M_201 * pot.D_221 +
-                m_a->M_210 * pot.D_230 + m_a->M_300 * pot.D_320;
-  l_b->F_101 += m_a->M_003 * pot.D_104 + m_a->M_012 * pot.D_113 +
-                m_a->M_021 * pot.D_122 + m_a->M_030 * pot.D_131 +
-                m_a->M_102 * pot.D_203 + m_a->M_111 * pot.D_212 +
-                m_a->M_120 * pot.D_221 + m_a->M_201 * pot.D_302 +
-                m_a->M_210 * pot.D_311 + m_a->M_300 * pot.D_401;
-  l_b->F_110 += m_a->M_003 * pot.D_113 + m_a->M_012 * pot.D_122 +
-                m_a->M_021 * pot.D_131 + m_a->M_030 * pot.D_140 +
-                m_a->M_102 * pot.D_212 + m_a->M_111 * pot.D_221 +
-                m_a->M_120 * pot.D_230 + m_a->M_201 * pot.D_311 +
-                m_a->M_210 * pot.D_320 + m_a->M_300 * pot.D_410;
-  l_b->F_200 += m_a->M_003 * pot.D_203 + m_a->M_012 * pot.D_212 +
-                m_a->M_021 * pot.D_221 + m_a->M_030 * pot.D_230 +
-                m_a->M_102 * pot.D_302 + m_a->M_111 * pot.D_311 +
-                m_a->M_120 * pot.D_320 + m_a->M_201 * pot.D_401 +
-                m_a->M_210 * pot.D_410 + m_a->M_300 * pot.D_500;
+  l_b->F_002 += M_003 * D_005 + M_012 * D_014 + M_021 * D_023 +
+                M_030 * D_032 + M_102 * D_104 + M_111 * D_113 +
+                M_120 * D_122 + M_201 * D_203 + M_210 * D_212 +
+                M_300 * D_302;
+  l_b->F_011 += M_003 * D_014 + M_012 * D_023 + M_021 * D_032 +
+                M_030 * D_041 + M_102 * D_113 + M_111 * D_122 +
+                M_120 * D_131 + M_201 * D_212 + M_210 * D_221 +
+                M_300 * D_311;
+  l_b->F_020 += M_003 * D_023 + M_012 * D_032 + M_021 * D_041 +
+                M_030 * D_050 + M_102 * D_122 + M_111 * D_131 +
+                M_120 * D_140 + M_201 * D_221 + M_210 * D_230 +
+                M_300 * D_320;
+  l_b->F_101 += M_003 * D_104 + M_012 * D_113 + M_021 * D_122 +
+                M_030 * D_131 + M_102 * D_203 + M_111 * D_212 +
+                M_120 * D_221 + M_201 * D_302 + M_210 * D_311 +
+                M_300 * D_401;
+  l_b->F_110 += M_003 * D_113 + M_012 * D_122 + M_021 * D_131 +
+                M_030 * D_140 + M_102 * D_212 + M_111 * D_221 +
+                M_120 * D_230 + M_201 * D_311 + M_210 * D_320 +
+                M_300 * D_410;
+  l_b->F_200 += M_003 * D_203 + M_012 * D_212 + M_021 * D_221 +
+                M_030 * D_230 + M_102 * D_302 + M_111 * D_311 +
+                M_120 * D_320 + M_201 * D_401 + M_210 * D_410 +
+                M_300 * D_500;
 
   /* Compute 5th order field tensor terms (addition to rank 3) */
-  l_b->F_003 += m_a->M_002 * pot.D_005 + m_a->M_011 * pot.D_014 +
-                m_a->M_020 * pot.D_023 + m_a->M_101 * pot.D_104 +
-                m_a->M_110 * pot.D_113 + m_a->M_200 * pot.D_203;
-  l_b->F_012 += m_a->M_002 * pot.D_014 + m_a->M_011 * pot.D_023 +
-                m_a->M_020 * pot.D_032 + m_a->M_101 * pot.D_113 +
-                m_a->M_110 * pot.D_122 + m_a->M_200 * pot.D_212;
-  l_b->F_021 += m_a->M_002 * pot.D_023 + m_a->M_011 * pot.D_032 +
-                m_a->M_020 * pot.D_041 + m_a->M_101 * pot.D_122 +
-                m_a->M_110 * pot.D_131 + m_a->M_200 * pot.D_221;
-  l_b->F_030 += m_a->M_002 * pot.D_032 + m_a->M_011 * pot.D_041 +
-                m_a->M_020 * pot.D_050 + m_a->M_101 * pot.D_131 +
-                m_a->M_110 * pot.D_140 + m_a->M_200 * pot.D_230;
-  l_b->F_102 += m_a->M_002 * pot.D_104 + m_a->M_011 * pot.D_113 +
-                m_a->M_020 * pot.D_122 + m_a->M_101 * pot.D_203 +
-                m_a->M_110 * pot.D_212 + m_a->M_200 * pot.D_302;
-  l_b->F_111 += m_a->M_002 * pot.D_113 + m_a->M_011 * pot.D_122 +
-                m_a->M_020 * pot.D_131 + m_a->M_101 * pot.D_212 +
-                m_a->M_110 * pot.D_221 + m_a->M_200 * pot.D_311;
-  l_b->F_120 += m_a->M_002 * pot.D_122 + m_a->M_011 * pot.D_131 +
-                m_a->M_020 * pot.D_140 + m_a->M_101 * pot.D_221 +
-                m_a->M_110 * pot.D_230 + m_a->M_200 * pot.D_320;
-  l_b->F_201 += m_a->M_002 * pot.D_203 + m_a->M_011 * pot.D_212 +
-                m_a->M_020 * pot.D_221 + m_a->M_101 * pot.D_302 +
-                m_a->M_110 * pot.D_311 + m_a->M_200 * pot.D_401;
-  l_b->F_210 += m_a->M_002 * pot.D_212 + m_a->M_011 * pot.D_221 +
-                m_a->M_020 * pot.D_230 + m_a->M_101 * pot.D_311 +
-                m_a->M_110 * pot.D_320 + m_a->M_200 * pot.D_410;
-  l_b->F_300 += m_a->M_002 * pot.D_302 + m_a->M_011 * pot.D_311 +
-                m_a->M_020 * pot.D_320 + m_a->M_101 * pot.D_401 +
-                m_a->M_110 * pot.D_410 + m_a->M_200 * pot.D_500;
+  l_b->F_003 += M_002 * D_005 + M_011 * D_014 + M_020 * D_023 +
+                M_101 * D_104 + M_110 * D_113 + M_200 * D_203;
+  l_b->F_012 += M_002 * D_014 + M_011 * D_023 + M_020 * D_032 +
+                M_101 * D_113 + M_110 * D_122 + M_200 * D_212;
+  l_b->F_021 += M_002 * D_023 + M_011 * D_032 + M_020 * D_041 +
+                M_101 * D_122 + M_110 * D_131 + M_200 * D_221;
+  l_b->F_030 += M_002 * D_032 + M_011 * D_041 + M_020 * D_050 +
+                M_101 * D_131 + M_110 * D_140 + M_200 * D_230;
+  l_b->F_102 += M_002 * D_104 + M_011 * D_113 + M_020 * D_122 +
+                M_101 * D_203 + M_110 * D_212 + M_200 * D_302;
+  l_b->F_111 += M_002 * D_113 + M_011 * D_122 + M_020 * D_131 +
+                M_101 * D_212 + M_110 * D_221 + M_200 * D_311;
+  l_b->F_120 += M_002 * D_122 + M_011 * D_131 + M_020 * D_140 +
+                M_101 * D_221 + M_110 * D_230 + M_200 * D_320;
+  l_b->F_201 += M_002 * D_203 + M_011 * D_212 + M_020 * D_221 +
+                M_101 * D_302 + M_110 * D_311 + M_200 * D_401;
+  l_b->F_210 += M_002 * D_212 + M_011 * D_221 + M_020 * D_230 +
+                M_101 * D_311 + M_110 * D_320 + M_200 * D_410;
+  l_b->F_300 += M_002 * D_302 + M_011 * D_311 + M_020 * D_320 +
+                M_101 * D_401 + M_110 * D_410 + M_200 * D_500;
 
   /* Compute 5th order field tensor terms (addition to rank 4) */
-  l_b->F_004 +=
-      m_a->M_001 * pot.D_005 + m_a->M_010 * pot.D_014 + m_a->M_100 * pot.D_104;
-  l_b->F_013 +=
-      m_a->M_001 * pot.D_014 + m_a->M_010 * pot.D_023 + m_a->M_100 * pot.D_113;
-  l_b->F_022 +=
-      m_a->M_001 * pot.D_023 + m_a->M_010 * pot.D_032 + m_a->M_100 * pot.D_122;
-  l_b->F_031 +=
-      m_a->M_001 * pot.D_032 + m_a->M_010 * pot.D_041 + m_a->M_100 * pot.D_131;
-  l_b->F_040 +=
-      m_a->M_001 * pot.D_041 + m_a->M_010 * pot.D_050 + m_a->M_100 * pot.D_140;
-  l_b->F_103 +=
-      m_a->M_001 * pot.D_104 + m_a->M_010 * pot.D_113 + m_a->M_100 * pot.D_203;
-  l_b->F_112 +=
-      m_a->M_001 * pot.D_113 + m_a->M_010 * pot.D_122 + m_a->M_100 * pot.D_212;
-  l_b->F_121 +=
-      m_a->M_001 * pot.D_122 + m_a->M_010 * pot.D_131 + m_a->M_100 * pot.D_221;
-  l_b->F_130 +=
-      m_a->M_001 * pot.D_131 + m_a->M_010 * pot.D_140 + m_a->M_100 * pot.D_230;
-  l_b->F_202 +=
-      m_a->M_001 * pot.D_203 + m_a->M_010 * pot.D_212 + m_a->M_100 * pot.D_302;
-  l_b->F_211 +=
-      m_a->M_001 * pot.D_212 + m_a->M_010 * pot.D_221 + m_a->M_100 * pot.D_311;
-  l_b->F_220 +=
-      m_a->M_001 * pot.D_221 + m_a->M_010 * pot.D_230 + m_a->M_100 * pot.D_320;
-  l_b->F_301 +=
-      m_a->M_001 * pot.D_302 + m_a->M_010 * pot.D_311 + m_a->M_100 * pot.D_401;
-  l_b->F_310 +=
-      m_a->M_001 * pot.D_311 + m_a->M_010 * pot.D_320 + m_a->M_100 * pot.D_410;
-  l_b->F_400 +=
-      m_a->M_001 * pot.D_401 + m_a->M_010 * pot.D_410 + m_a->M_100 * pot.D_500;
+  l_b->F_004 += M_001 * D_005 + M_010 * D_014 + M_100 * D_104;
+  l_b->F_013 += M_001 * D_014 + M_010 * D_023 + M_100 * D_113;
+  l_b->F_022 += M_001 * D_023 + M_010 * D_032 + M_100 * D_122;
+  l_b->F_031 += M_001 * D_032 + M_010 * D_041 + M_100 * D_131;
+  l_b->F_040 += M_001 * D_041 + M_010 * D_050 + M_100 * D_140;
+  l_b->F_103 += M_001 * D_104 + M_010 * D_113 + M_100 * D_203;
+  l_b->F_112 += M_001 * D_113 + M_010 * D_122 + M_100 * D_212;
+  l_b->F_121 += M_001 * D_122 + M_010 * D_131 + M_100 * D_221;
+  l_b->F_130 += M_001 * D_131 + M_010 * D_140 + M_100 * D_230;
+  l_b->F_202 += M_001 * D_203 + M_010 * D_212 + M_100 * D_302;
+  l_b->F_211 += M_001 * D_212 + M_010 * D_221 + M_100 * D_311;
+  l_b->F_220 += M_001 * D_221 + M_010 * D_230 + M_100 * D_320;
+  l_b->F_301 += M_001 * D_302 + M_010 * D_311 + M_100 * D_401;
+  l_b->F_310 += M_001 * D_311 + M_010 * D_320 + M_100 * D_410;
+  l_b->F_400 += M_001 * D_401 + M_010 * D_410 + M_100 * D_500;
 
   /* Compute 5th order field tensor terms (addition to rank 5) */
-  l_b->F_005 += m_a->M_000 * pot.D_005;
-  l_b->F_014 += m_a->M_000 * pot.D_014;
-  l_b->F_023 += m_a->M_000 * pot.D_023;
-  l_b->F_032 += m_a->M_000 * pot.D_032;
-  l_b->F_041 += m_a->M_000 * pot.D_041;
-  l_b->F_050 += m_a->M_000 * pot.D_050;
-  l_b->F_104 += m_a->M_000 * pot.D_104;
-  l_b->F_113 += m_a->M_000 * pot.D_113;
-  l_b->F_122 += m_a->M_000 * pot.D_122;
-  l_b->F_131 += m_a->M_000 * pot.D_131;
-  l_b->F_140 += m_a->M_000 * pot.D_140;
-  l_b->F_203 += m_a->M_000 * pot.D_203;
-  l_b->F_212 += m_a->M_000 * pot.D_212;
-  l_b->F_221 += m_a->M_000 * pot.D_221;
-  l_b->F_230 += m_a->M_000 * pot.D_230;
-  l_b->F_302 += m_a->M_000 * pot.D_302;
-  l_b->F_311 += m_a->M_000 * pot.D_311;
-  l_b->F_320 += m_a->M_000 * pot.D_320;
-  l_b->F_401 += m_a->M_000 * pot.D_401;
-  l_b->F_410 += m_a->M_000 * pot.D_410;
-  l_b->F_500 += m_a->M_000 * pot.D_500;
+  l_b->F_005 += M_000 * D_005;
+  l_b->F_014 += M_000 * D_014;
+  l_b->F_023 += M_000 * D_023;
+  l_b->F_032 += M_000 * D_032;
+  l_b->F_041 += M_000 * D_041;
+  l_b->F_050 += M_000 * D_050;
+  l_b->F_104 += M_000 * D_104;
+  l_b->F_113 += M_000 * D_113;
+  l_b->F_122 += M_000 * D_122;
+  l_b->F_131 += M_000 * D_131;
+  l_b->F_140 += M_000 * D_140;
+  l_b->F_203 += M_000 * D_203;
+  l_b->F_212 += M_000 * D_212;
+  l_b->F_221 += M_000 * D_221;
+  l_b->F_230 += M_000 * D_230;
+  l_b->F_302 += M_000 * D_302;
+  l_b->F_311 += M_000 * D_311;
+  l_b->F_320 += M_000 * D_320;
+  l_b->F_401 += M_000 * D_401;
+  l_b->F_410 += M_000 * D_410;
+  l_b->F_500 += M_000 * D_500;
 
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 5
@@ -1935,6 +1955,109 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b,
 #endif
 }
 
+/**
+ * @brief Compute the field tensor due to a multipole.
+ *
+ * @param l_b The field tensor to compute.
+ * @param m_a The multipole.
+ * @param pos_b The position of the field tensor.
+ * @param pos_a The position of the multipole.
+ * @param props The #gravity_props of this calculation.
+ * @param periodic Is the calculation periodic ?
+ * @param dim The size of the simulation box.
+ * @param rs_inv The inverse of the gravity mesh-smoothing scale.
+ */
+INLINE static void gravity_M2L_nonsym(
+    struct grav_tensor *l_b, const struct multipole *m_a, const double pos_b[3],
+    const double pos_a[3], const struct gravity_props *props,
+    const int periodic, const double dim[3], const float rs_inv) {
+
+  /* Recover some constants */
+  const float eps = props->epsilon_cur;
+  const float eps_inv = props->epsilon_cur_inv;
+
+  /* Compute distance vector */
+  float dx = (float)(pos_b[0] - pos_a[0]);
+  float dy = (float)(pos_b[1] - pos_a[1]);
+  float dz = (float)(pos_b[2] - pos_a[2]);
+
+  /* Apply BC */
+  if (periodic) {
+    dx = nearest(dx, dim[0]);
+    dy = nearest(dy, dim[1]);
+    dz = nearest(dz, dim[2]);
+  }
+
+  /* Compute distance */
+  const float r2 = dx * dx + dy * dy + dz * dz;
+  const float r_inv = 1. / sqrtf(r2);
+
+  /* Compute all derivatives */
+  struct potential_derivatives_M2L pot;
+  potential_derivatives_compute_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv,
+                                    periodic, rs_inv, &pot);
+
+  /* Do the M2L tensor multiplication */
+  gravity_M2L_apply(l_b, m_a, &pot);
+}
+
+/**
+ * @brief Compute the field tensor due to a multipole and the symmetric
+ * equivalent.
+ *
+ * @param l_a The first field tensor to compute.
+ * @param l_b The second field tensor to compute.
+ * @param m_a The first multipole.
+ * @param m_b The second multipole.
+ * @param pos_a The position of the first m-pole and field tensor.
+ * @param pos_b The position of the second m-pole and field tensor.
+ * @param props The #gravity_props of this calculation.
+ * @param periodic Is the calculation periodic ?
+ * @param dim The size of the simulation box.
+ * @param rs_inv The inverse of the gravity mesh-smoothing scale.
+ */
+INLINE static void gravity_M2L_symmetric(
+    struct grav_tensor *restrict l_a, struct grav_tensor *restrict l_b,
+    const struct multipole *restrict m_a, const struct multipole *restrict m_b,
+    const double pos_a[3], const double pos_b[3],
+    const struct gravity_props *props, const int periodic, const double dim[3],
+    const float rs_inv) {
+
+  /* Recover some constants */
+  const float eps = props->epsilon_cur;
+  const float eps_inv = props->epsilon_cur_inv;
+
+  /* Compute distance vector */
+  float dx = (float)(pos_b[0] - pos_a[0]);
+  float dy = (float)(pos_b[1] - pos_a[1]);
+  float dz = (float)(pos_b[2] - pos_a[2]);
+
+  /* Apply BC */
+  if (periodic) {
+    dx = nearest(dx, dim[0]);
+    dy = nearest(dy, dim[1]);
+    dz = nearest(dz, dim[2]);
+  }
+
+  /* Compute distance */
+  const float r2 = dx * dx + dy * dy + dz * dz;
+  const float r_inv = 1. / sqrtf(r2);
+
+  /* Compute all derivatives */
+  struct potential_derivatives_M2L pot;
+  potential_derivatives_compute_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv,
+                                    periodic, rs_inv, &pot);
+
+  /* Do the first M2L tensor multiplication */
+  gravity_M2L_apply(l_b, m_a, &pot);
+
+  /* Flip the signs of odd derivatives */
+  potential_derivatives_flip_signs(&pot);
+
+  /* Do the second M2L tensor multiplication */
+  gravity_M2L_apply(l_a, m_b, &pot);
+}
+
 /**
  * @brief Creates a copy of #grav_tensor shifted to a new location.
  *
@@ -1945,8 +2068,8 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b,
  * @param pos_a The position to which m_b will be shifted.
  * @param pos_b The current postion of the multipole to shift.
  */
-INLINE static void gravity_L2L(struct grav_tensor *la,
-                               const struct grav_tensor *lb,
+INLINE static void gravity_L2L(struct grav_tensor *restrict la,
+                               const struct grav_tensor *restrict lb,
                                const double pos_a[3], const double pos_b[3]) {
 
   /* Initialise everything to zero */
diff --git a/src/runner.c b/src/runner.c
index 56b9a85f639f4a45aa4255f6cd3087e8e7e4fdf9..0be6288cbcc0b77fbe9ba3128d820b82a8943480 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -2273,7 +2273,7 @@ void *runner_main(void *data) {
           runner_do_grav_long_range(r, t->ci, 1);
           break;
         case task_type_grav_mm:
-          runner_dopair_grav_mm_symmetric(r, t->ci, t->cj);
+          runner_dopair_grav_mm(r, t->ci, t->cj);
           break;
         case task_type_cooling:
           runner_do_cooling(r, t->ci, 1);
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 961e64006f6d732e501473caaaf1b6c2f75aab31..f9d1f833e65f7e6dde6085696dfbcd0719ae2a87 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -670,8 +670,10 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci,
   TIMER_TIC;
 
   /* Record activity status */
-  const int ci_active = cell_is_active_gravity(ci, e);
-  const int cj_active = cell_is_active_gravity(cj, e);
+  const int ci_active =
+      cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID);
+  const int cj_active =
+      cell_is_active_gravity(cj, e) && (cj->nodeID == e->nodeID);
 
   /* Anything to do here? */
   if (!ci_active && !cj_active) return;
@@ -1158,6 +1160,72 @@ static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) {
   TIMER_TOC(timer_doself_grav_pp);
 }
 
+/**
+ * @brief Computes the interaction of the field tensor and multipole
+ * of two cells symmetrically.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r,
+                                                   struct cell *restrict ci,
+                                                   struct cell *restrict cj) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+  const struct gravity_props *props = e->gravity_properties;
+  const int periodic = e->mesh->periodic;
+  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
+  const float r_s_inv = e->mesh->r_s_inv;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if ((!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) ||
+      (!cell_is_active_gravity_mm(cj, e) || cj->nodeID != engine_rank))
+    error("Invalid state in symmetric M-M calculation!");
+
+  /* Short-cut to the multipole */
+  const struct multipole *multi_i = &ci->multipole->m_pole;
+  const struct multipole *multi_j = &cj->multipole->m_pole;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci == cj) error("Interacting a cell with itself using M2L");
+
+  if (multi_i->num_gpart == 0)
+    error("Multipole i does not seem to have been set.");
+
+  if (multi_j->num_gpart == 0)
+    error("Multipole j does not seem to have been set.");
+
+  if (ci->multipole->pot.ti_init != e->ti_current)
+    error("ci->grav tensor not initialised.");
+
+  if (ci->multipole->pot.ti_init != e->ti_current)
+    error("cj->grav tensor not initialised.");
+
+  if (ci->ti_old_multipole != e->ti_current)
+    error(
+        "Undrifted multipole ci->ti_old_multipole=%lld ci->nodeID=%d "
+        "cj->nodeID=%d e->ti_current=%lld",
+        ci->ti_old_multipole, ci->nodeID, cj->nodeID, e->ti_current);
+
+  if (cj->ti_old_multipole != e->ti_current)
+    error(
+        "Undrifted multipole cj->ti_old_multipole=%lld cj->nodeID=%d "
+        "ci->nodeID=%d e->ti_current=%lld",
+        cj->ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
+#endif
+
+  /* Let's interact at this level */
+  gravity_M2L_symmetric(&ci->multipole->pot, &cj->multipole->pot, multi_i,
+                        multi_j, ci->multipole->CoM, cj->multipole->CoM, props,
+                        periodic, dim, r_s_inv);
+
+  TIMER_TOC(timer_dopair_grav_mm);
+}
+
 /**
  * @brief Computes the interaction of the field tensor in a cell with the
  * multipole of another cell.
@@ -1166,9 +1234,9 @@ static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) {
  * @param ci The #cell with field tensor to interact.
  * @param cj The #cell with the multipole.
  */
-static INLINE void runner_dopair_grav_mm(struct runner *r,
-                                         struct cell *restrict ci,
-                                         struct cell *restrict cj) {
+static INLINE void runner_dopair_grav_mm_nonsym(
+    struct runner *r, struct cell *restrict ci,
+    const struct cell *restrict cj) {
 
   /* Some constants */
   const struct engine *e = r->e;
@@ -1193,22 +1261,49 @@ static INLINE void runner_dopair_grav_mm(struct runner *r,
 
   if (ci->multipole->pot.ti_init != e->ti_current)
     error("ci->grav tensor not initialised.");
-#endif
 
-  /* Do we need to drift the multipole ? */
   if (cj->ti_old_multipole != e->ti_current)
     error(
         "Undrifted multipole cj->ti_old_multipole=%lld cj->nodeID=%d "
         "ci->nodeID=%d e->ti_current=%lld",
         cj->ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
+#endif
 
   /* Let's interact at this level */
-  gravity_M2L(&ci->multipole->pot, multi_j, ci->multipole->CoM,
-              cj->multipole->CoM, props, periodic, dim, r_s_inv);
+  gravity_M2L_nonsym(&ci->multipole->pot, multi_j, ci->multipole->CoM,
+                     cj->multipole->CoM, props, periodic, dim, r_s_inv);
 
   TIMER_TOC(timer_dopair_grav_mm);
 }
 
+/**
+ * @brief Call the M-M calculation on two cells if active.
+ *
+ * @param r The #runner object.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+static INLINE void runner_dopair_grav_mm(struct runner *r,
+                                         struct cell *restrict ci,
+                                         struct cell *restrict cj) {
+
+  const struct engine *e = r->e;
+
+  const int do_i =
+      cell_is_active_gravity_mm(ci, e) && (ci->nodeID == e->nodeID);
+  const int do_j =
+      cell_is_active_gravity_mm(cj, e) && (cj->nodeID == e->nodeID);
+
+  if (do_i && do_j)
+    runner_dopair_grav_mm_symmetric(r, ci, cj);
+  else if (do_i)
+    runner_dopair_grav_mm_nonsym(r, ci, cj);
+  else if (do_j)
+    runner_dopair_grav_mm_nonsym(r, cj, ci);
+  else
+    error("Running M-M task with two inactive cells.");
+}
+
 static INLINE void runner_dopair_recursive_grav_pm(struct runner *r,
                                                    struct cell *ci,
                                                    const struct cell *cj) {
@@ -1381,9 +1476,8 @@ static INLINE void runner_dopair_recursive_grav(struct runner *r,
   /* Can we use M-M interactions ? */
   if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2)) {
 
-    /* MATTHIEU: make a symmetric M-M interaction function ! */
+    /* Go M-M */
     runner_dopair_grav_mm(r, ci, cj);
-    runner_dopair_grav_mm(r, cj, ci);
 
   } else if (!ci->split && !cj->split) {
 
@@ -1503,28 +1597,6 @@ static INLINE void runner_doself_recursive_grav(struct runner *r,
   if (gettimer) TIMER_TOC(timer_dosub_self_grav);
 }
 
-/**
- * @brief Call the non-symmetric M-M calculation on two cells if active.
- *
- * @param r The #runner object.
- * @param ci The first #cell.
- * @param cj The second #cell.
- */
-static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r,
-                                                   struct cell *ci,
-                                                   struct cell *cj) {
-
-  const struct engine *e = r->e;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e))
-    error("Running M-M task with two inactive cells.");
-#endif
-
-  if (cell_is_active_gravity_mm(ci, e)) runner_dopair_grav_mm(r, ci, cj);
-  if (cell_is_active_gravity_mm(cj, e)) runner_dopair_grav_mm(r, cj, ci);
-}
-
 /**
  * @brief Performs all M-M interactions between a given top-level cell and all
  * the other top-levels that are far enough.
@@ -1622,7 +1694,7 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci,
                            theta_crit2, r2_rebuild)) {
 
       /* Call the PM interaction fucntion on the active sub-cells of ci */
-      runner_dopair_grav_mm(r, ci, cj);
+      runner_dopair_grav_mm_nonsym(r, ci, cj);
       // runner_dopair_recursive_grav_pm(r, ci, cj);
 
       /* Record that this multipole received a contribution */
diff --git a/src/space.c b/src/space.c
index 11fc5c9422103895d5f0cbf7799864a981547090..281c452a9d75b1ca7d214f117364d96fc4515df7 100644
--- a/src/space.c
+++ b/src/space.c
@@ -360,6 +360,7 @@ void space_regrid(struct space *s, int verbose) {
     /* Free the old cells, if they were allocated. */
     if (s->cells_top != NULL) {
       space_free_cells(s);
+      free(s->local_cells_with_tasks_top);
       free(s->local_cells_top);
       free(s->cells_top);
       free(s->multipoles_top);
@@ -398,6 +399,12 @@ void space_regrid(struct space *s, int verbose) {
       error("Failed to allocate indices of local top-level cells.");
     bzero(s->local_cells_top, s->nr_cells * sizeof(int));
 
+    /* Allocate the indices of local cells with tasks */
+    if (posix_memalign((void **)&s->local_cells_with_tasks_top,
+                       SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0)
+      error("Failed to allocate indices of local top-level cells.");
+    bzero(s->local_cells_with_tasks_top, s->nr_cells * sizeof(int));
+
     /* Set the cells' locks */
     for (int k = 0; k < s->nr_cells; k++) {
       if (lock_init(&s->cells_top[k].lock) != 0)
@@ -2337,7 +2344,7 @@ void space_free_buff_sort_indices(struct space *s) {
 
 /**
  * @brief Construct the list of top-level cells that have any tasks in
- * their hierarchy.
+ * their hierarchy on this MPI rank.
  *
  * This assumes the list has been pre-allocated at a regrid.
  *
@@ -2345,15 +2352,38 @@ void space_free_buff_sort_indices(struct space *s) {
  */
 void space_list_cells_with_tasks(struct space *s) {
 
-  /* Let's rebuild the list of local top-level cells */
-  s->nr_local_cells = 0;
+  s->nr_local_cells_with_tasks = 0;
+
   for (int i = 0; i < s->nr_cells; ++i)
     if (cell_has_tasks(&s->cells_top[i])) {
+      s->local_cells_with_tasks_top[s->nr_local_cells_with_tasks] = i;
+      s->nr_local_cells_with_tasks++;
+    }
+  if (s->e->verbose)
+    message("Have %d local top-level cells with tasks (total=%d)",
+            s->nr_local_cells_with_tasks, s->nr_cells);
+}
+
+/**
+ * @brief Construct the list of local top-level cells.
+ *
+ * This assumes the list has been pre-allocated at a regrid.
+ *
+ * @param s The #space.
+ */
+void space_list_local_cells(struct space *s) {
+
+  s->nr_local_cells = 0;
+
+  for (int i = 0; i < s->nr_cells; ++i)
+    if (s->cells_top[i].nodeID == engine_rank) {
       s->local_cells_top[s->nr_local_cells] = i;
       s->nr_local_cells++;
     }
+
   if (s->e->verbose)
-    message("Have %d local cells (total=%d)", s->nr_local_cells, s->nr_cells);
+    message("Have %d local top-level cells (total=%d)", s->nr_local_cells,
+            s->nr_cells);
 }
 
 void space_synchronize_particle_positions_mapper(void *map_data, int nr_gparts,
@@ -3287,6 +3317,7 @@ void space_clean(struct space *s) {
   free(s->cells_top);
   free(s->multipoles_top);
   free(s->local_cells_top);
+  free(s->local_cells_with_tasks_top);
   free(s->parts);
   free(s->xparts);
   free(s->gparts);
@@ -3338,6 +3369,7 @@ void space_struct_restore(struct space *s, FILE *stream) {
   s->multipoles_top = NULL;
   s->multipoles_sub = NULL;
   s->local_cells_top = NULL;
+  s->local_cells_with_tasks_top = NULL;
   s->grav_top_level = NULL;
 #ifdef WITH_MPI
   s->parts_foreign = NULL;
diff --git a/src/space.h b/src/space.h
index e3173ece1e2749a3afb8072b179150587a100a82..af6d58c6c75033662f8aedbda8d9086a4e2d0950 100644
--- a/src/space.h
+++ b/src/space.h
@@ -106,9 +106,12 @@ struct space {
   /*! Total number of cells (top- and sub-) */
   int tot_cells;
 
-  /*! Number of *local* top-level cells with tasks */
+  /*! Number of *local* top-level cells */
   int nr_local_cells;
 
+  /*! Number of *local* top-level cells with tasks */
+  int nr_local_cells_with_tasks;
+
   /*! The (level 0) cells themselves. */
   struct cell *cells_top;
 
@@ -121,9 +124,12 @@ struct space {
   /*! Buffer of unused multipoles for the sub-cells. */
   struct gravity_tensors *multipoles_sub;
 
-  /*! The indices of the *local* top-level cells with tasks */
+  /*! The indices of the *local* top-level cells */
   int *local_cells_top;
 
+  /*! The indices of the *local* top-level cells with tasks */
+  int *local_cells_with_tasks_top;
+
   /*! The total number of parts in the space. */
   size_t nr_parts, size_parts;
 
@@ -228,6 +234,7 @@ void space_recycle_list(struct space *s, struct cell *cell_list_begin,
 void space_split(struct space *s, struct cell *cells, int nr_cells,
                  int verbose);
 void space_split_mapper(void *map_data, int num_elements, void *extra_data);
+void space_list_local_cells(struct space *s);
 void space_list_cells_with_tasks(struct space *s);
 void space_parts_get_cell_index(struct space *s, int *ind, int *cell_counts,
                                 struct cell *cells, int verbose);
diff --git a/tests/testGravityDerivatives.c b/tests/testGravityDerivatives.c
index 184d66db623f34963dc91915c12fc58fbaa4ec4d..125cbc70815ae14056604e79a5616ac20393b650 100644
--- a/tests/testGravityDerivatives.c
+++ b/tests/testGravityDerivatives.c
@@ -943,6 +943,13 @@ int main(int argc, char* argv[]) {
 
     message("Testing gravity for r=(%e %e %e)", dx, dy, dz);
 
+    const double r_s = 100. * ((double)rand() / (RAND_MAX));
+    const double r_s_inv = 1. / r_s;
+
+    const int periodic = 0;
+
+    message("Mesh scale r_s=%e periodic=%d", r_s, periodic);
+
     /* Compute distance */
     const double r2 = dx * dx + dy * dy + dz * dz;
     const double r_inv = 1. / sqrt(r2);
@@ -953,7 +960,7 @@ int main(int argc, char* argv[]) {
     /* Compute all derivatives */
     struct potential_derivatives_M2L pot;
     compute_potential_derivatives_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv,
-                                      /*periodic*/ 0, /* 1/r_s */ 0., &pot);
+                                      periodic, r_s_inv, &pot);
 
     /* Minimal value we care about */
     const double min = 1e-9;