diff --git a/.gitignore b/.gitignore
index 99aa018c9d8f826076cb26ae7cdf57fc314b8852..f2e5dac6918d93a9dbd2ce2e017d18a58185a872 100644
--- a/.gitignore
+++ b/.gitignore
@@ -114,8 +114,11 @@ tests/testReading
 tests/testSingle
 tests/testTimeIntegration
 tests/testSPHStep
+tests/testExp
+tests/testErfc
 tests/testKernel
 tests/testKernelGrav
+tests/testKernelLongGrav
 tests/testFFT
 tests/testInteractions
 tests/testInteractions.sh
@@ -154,6 +157,7 @@ tests/testDump
 tests/testLogger
 tests/benchmarkInteractions
 tests/testGravityDerivatives
+tests/testGravitySpeed
 tests/testPotentialSelf
 tests/testPotentialPair
 tests/testEOS
@@ -184,6 +188,7 @@ theory/Multipoles/potential.pdf
 theory/Multipoles/potential_long.pdf
 theory/Multipoles/potential_short.pdf
 theory/Multipoles/force_short.pdf
+theory/Multipoles/mac_potential.pdf
 theory/Cosmology/cosmology.pdf
 theory/Cooling/eagle_cooling.pdf
 theory/Gizmo/gizmo-implementation-details/gizmo-implementation-details.pdf
diff --git a/configure.ac b/configure.ac
index e044a46b42e4b54cc39182882527735a558e6fde..46d9fb2a8a09fa3c010899b3e9496095c00a7a9a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -53,6 +53,7 @@ AM_CONFIG_HEADER(config.h)
 AX_CHECK_ENABLE_DEBUG
 AC_PROG_CC
 AM_PROG_CC_C_O
+AC_OPENMP
 
 # If debug is selected then we also define SWIFT_DEVELOP_MODE to control
 # any developer code options.
@@ -2319,7 +2320,7 @@ AC_MSG_RESULT([
    Compiler             : $CC
     - vendor            : $ax_cv_c_compiler_vendor
     - version           : $ax_cv_c_compiler_version
-    - flags             : $CFLAGS
+    - flags             : $CFLAGS $OPENMP_CFLAGS
    MPI enabled          : $enable_mpi
    HDF5 enabled         : $with_hdf5
     - parallel          : $have_parallel_hdf5
diff --git a/examples/Cooling/CoolingRates/Makefile.am b/examples/Cooling/CoolingRates/Makefile.am
index 8bb0afa44436c5059f93b585ab6f8893752ce294..7fa7d5f6cad1f3a8c5512722d5afe3c994e1619f 100644
--- a/examples/Cooling/CoolingRates/Makefile.am
+++ b/examples/Cooling/CoolingRates/Makefile.am
@@ -15,7 +15,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Add the source directory and the non-standard paths to the included library headers to CFLAGS
-AM_CFLAGS = -I$(top_srcdir)/src -I$(top_builddir)/examples $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS)
+AM_CFLAGS = -I$(top_srcdir)/src -I$(top_builddir)/examples $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(OPENMP_CFLAGS)
 
 AM_LDFLAGS = $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS) $(NUMA_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(GSL_LIBS) $(PROFILER_LIBS)
 
diff --git a/examples/Cosmology/ConstantCosmoVolume/constant_volume.yml b/examples/Cosmology/ConstantCosmoVolume/constant_volume.yml
index a6ff72555ef68964508493856127d4cc739b7722..84764d333c416d5ef43fbe6896c9b5479c35b805 100644
--- a/examples/Cosmology/ConstantCosmoVolume/constant_volume.yml
+++ b/examples/Cosmology/ConstantCosmoVolume/constant_volume.yml
@@ -47,8 +47,10 @@ Scheduler:
   
 Gravity:
   mesh_side_length:   32
-  eta: 0.025
-  theta: 0.3
+  eta:                0.025
+  MAC:                adaptive
+  theta_cr:           0.5
+  epsilon_fmm:        0.0001
   comoving_DM_softening: 0.08	# 80 kpc = 1/25 of mean inter-particle separation
   max_physical_DM_softening: 0.08  # 80 kpc = 1/25 of mean inter-particle separation
   comoving_baryon_softening: 0.08	# 80 kpc = 1/25 of mean inter-particle separation
diff --git a/examples/Cosmology/ZeldovichPancake_3D/zeldovichPancake.yml b/examples/Cosmology/ZeldovichPancake_3D/zeldovichPancake.yml
index d43c78972b0bc8d1f250b95190dafef305abca3f..5e904fbe07adeeb3709d1e3a21d23a108470f475 100644
--- a/examples/Cosmology/ZeldovichPancake_3D/zeldovichPancake.yml
+++ b/examples/Cosmology/ZeldovichPancake_3D/zeldovichPancake.yml
@@ -48,9 +48,10 @@ Scheduler:
   
 Gravity:
   mesh_side_length:   32
-  eta: 0.025
-  theta: 0.3
-  r_cut_max: 5.
+  eta:                0.025
+  MAC:                adaptive
+  theta_cr:           0.5
+  epsilon_fmm:        0.0001
   comoving_DM_softening: 0.001
   max_physical_DM_softening: 0.001
   comoving_baryon_softening: 0.001
diff --git a/examples/EAGLE_DMO_low_z/EAGLE_DMO_100/eagle_100.yml b/examples/EAGLE_DMO_low_z/EAGLE_DMO_100/eagle_100.yml
index f4c5316aa0faac7de843b21792af6a2b635131d6..ef5c27b8b95578e680bbf5cbed91e77e5c71d83b 100644
--- a/examples/EAGLE_DMO_low_z/EAGLE_DMO_100/eagle_100.yml
+++ b/examples/EAGLE_DMO_low_z/EAGLE_DMO_100/eagle_100.yml
@@ -41,9 +41,11 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025     # Constant dimensionless multiplier for time integration.
-  theta:                  0.85      # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       512
+  eta:                      0.025     # Constant dimensionless multiplier for time integration.
+  MAC:                      geometric 
+  theta_cr:                 0.7       # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening: 1
+  mesh_side_length:         256
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
 
diff --git a/examples/EAGLE_DMO_low_z/EAGLE_DMO_12/eagle_12.yml b/examples/EAGLE_DMO_low_z/EAGLE_DMO_12/eagle_12.yml
index f5a7c57458e1cea04701eb44ac6a0dda41419334..c127384d004985f4954e989db70b3c77ee637b08 100644
--- a/examples/EAGLE_DMO_low_z/EAGLE_DMO_12/eagle_12.yml
+++ b/examples/EAGLE_DMO_low_z/EAGLE_DMO_12/eagle_12.yml
@@ -41,9 +41,11 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025     # Constant dimensionless multiplier for time integration.
-  theta:                  0.7       # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       32
+  eta:                      0.025     # Constant dimensionless multiplier for time integration.
+  MAC:                      geometric 
+  theta_cr:                 0.7       # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening: 1
+  mesh_side_length:         32
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
 
diff --git a/examples/EAGLE_DMO_low_z/EAGLE_DMO_25/eagle_25.yml b/examples/EAGLE_DMO_low_z/EAGLE_DMO_25/eagle_25.yml
index 15df02b2a0c4b735d9ce6b1e252a237b67354cb7..75743434bfb879a402b216ca0e7fd52918f28808 100644
--- a/examples/EAGLE_DMO_low_z/EAGLE_DMO_25/eagle_25.yml
+++ b/examples/EAGLE_DMO_low_z/EAGLE_DMO_25/eagle_25.yml
@@ -41,9 +41,11 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025     # Constant dimensionless multiplier for time integration.
-  theta:                  0.7       # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       64
+  eta:                      0.025     # Constant dimensionless multiplier for time integration.
+  MAC:                      geometric 
+  theta_cr:                 0.7       # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening: 1
+  mesh_side_length:         64
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
 
diff --git a/examples/EAGLE_DMO_low_z/EAGLE_DMO_50/eagle_50.yml b/examples/EAGLE_DMO_low_z/EAGLE_DMO_50/eagle_50.yml
index b9c7237b819802f7fa12dd975bb53c8f698c20ff..25a9e255598471096b7a3f181958418982a25132 100644
--- a/examples/EAGLE_DMO_low_z/EAGLE_DMO_50/eagle_50.yml
+++ b/examples/EAGLE_DMO_low_z/EAGLE_DMO_50/eagle_50.yml
@@ -40,9 +40,11 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025     # Constant dimensionless multiplier for time integration.
-  theta:                  0.7       # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       128
+  eta:                      0.025     # Constant dimensionless multiplier for time integration.
+  MAC:                      geometric 
+  theta_cr:                 0.7       # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening: 1
+  mesh_side_length:         128
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
 
diff --git a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
index ca539e3dcf262fc025d8e182e45c541c22a8c7d7..0a80d4abfade02a8d01ab46cd343f5de007416a0 100644
--- a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
+++ b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
@@ -37,14 +37,15 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025     # Constant dimensionless multiplier for time integration.
-  theta:                  0.7       # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       64
+  eta:                         0.025     # Constant dimensionless multiplier for time integration.
+  MAC:                         geometric # Use the geometric opening angle condition
+  theta_cr:                    0.7       # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening:    1
+  mesh_side_length:            64
   comoving_DM_softening:         0.003320 # Comoving softening for DM (3.32 ckpc)
   max_physical_DM_softening:     0.001300 # Physical softening for DM (1.30 pkpc)
   comoving_baryon_softening:     0.001790 # Comoving softening for baryons (1.79 ckpc)
   max_physical_baryon_softening: 0.000700 # Physical softening for baryons (0.70 pkpc)
-  dithering:             0
   
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
index e598e96436f0fc3391e46e6bd0fcd0226fb2838e..bff0d1caaed588feacaab80ab1ecd84167ebe430 100644
--- a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
+++ b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
@@ -37,14 +37,15 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025     # Constant dimensionless multiplier for time integration.
-  theta:                  0.7       # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       128
+  eta:                         0.025     # Constant dimensionless multiplier for time integration.
+  MAC:                         geometric # Use the geometric opening angle condition
+  theta_cr:                    0.7       # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening:    1
+  mesh_side_length:            128
   comoving_DM_softening:         0.003320 # Comoving softening for DM (3.32 ckpc)
   max_physical_DM_softening:     0.001300 # Physical softening for DM (1.30 pkpc)
   comoving_baryon_softening:     0.001790 # Comoving softening for baryons (1.79 ckpc)
   max_physical_baryon_softening: 0.000700 # Physical softening for baryons (0.70 pkpc)
-  dithering:             0
   
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
index 825669a8e2d6b065dc8ae3da5869be577f62bf2c..7a89423aefe6b18a2c18310364fe07a0af8e7e89 100644
--- a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
+++ b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
@@ -37,14 +37,15 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025     # Constant dimensionless multiplier for time integration.
-  theta:                  0.7       # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       256
+  eta:                         0.025     # Constant dimensionless multiplier for time integration.
+  MAC:                         geometric # Use the geometric opening angle condition
+  theta_cr:                    0.7       # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening:    1
+  mesh_side_length:            256
   comoving_DM_softening:         0.003320 # Comoving softening for DM (3.32 ckpc)
   max_physical_DM_softening:     0.001300 # Physical softening for DM (1.30 pkpc)
   comoving_baryon_softening:     0.001790 # Comoving softening for baryons (1.79 ckpc)
   max_physical_baryon_softening: 0.000700 # Physical softening for baryons (0.70 pkpc)
-  dithering:             0
 
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml b/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml
index 12919bf5d598fe4e4fece1bf1f7539ac377244cd..5ef3d21716cf54f638b5e8ee89a5e3e71bab298c 100644
--- a/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml
+++ b/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml
@@ -43,10 +43,11 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025    # Constant dimensionless multiplier for time integration. 
-  theta:                  0.85      # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       256
-  dithering:              0
+  eta:                      0.025    # Constant dimensionless multiplier for time integration.
+  MAC:                      geometric
+  theta_cr:                 0.7      # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening: 1
+  mesh_side_length:         256
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
   comoving_baryon_softening:     0.0026994 # Comoving DM softening length (in internal units).
diff --git a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml
index 68a599cb6bd25ec2e4ea459414e8690e6c52d154..a07a08d0fcf1e273c3e77a80965843516f22607b 100644
--- a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml
+++ b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml
@@ -44,15 +44,15 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025     # Constant dimensionless multiplier for time integration.
-  theta:                  0.7       # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       32
-  dithering:              0
+  eta:                      0.025     # Constant dimensionless multiplier for time integration.
+  MAC:                      geometric 
+  theta_cr:                 0.7       # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening: 1
+  mesh_side_length:         32
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
   comoving_baryon_softening:     0.0026994 # Comoving DM softening length (in internal units).
   max_physical_baryon_softening: 0.0007    # Max physical DM softening length (in internal units).
-
   
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml
index 93981aac4264d75180d19e81223ab9a6c619686c..c84352abc28c5403ae06923c0fd9627bc39297d4 100644
--- a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml
+++ b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml
@@ -51,10 +51,11 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025    # Constant dimensionless multiplier for time integration. 
-  theta:                  0.7      # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       64
-  dithering:              0
+  eta:                      0.025    # Constant dimensionless multiplier for time integration.
+  MAC:                      geometric
+  theta_cr:                 0.7      # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening: 1
+  mesh_side_length:         64
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
   comoving_baryon_softening:     0.0026994 # Comoving DM softening length (in internal units).
diff --git a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml
index 607fef8950b1583416bf87673072ffbbc715ade6..2db295f14d95e47d6acf0dbfbdd6600d8a856a64 100644
--- a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml
+++ b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml
@@ -43,10 +43,11 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025    # Constant dimensionless multiplier for time integration.
-  theta:                  0.7      # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       128
-  dithering:              0
+  eta:                      0.025    # Constant dimensionless multiplier for time integration.
+  MAC:                      geometric
+  theta_cr:                 0.7      # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening: 1
+  mesh_side_length:         128
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
   comoving_baryon_softening:     0.0026994 # Comoving DM softening length (in internal units).
diff --git a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml
index dba6d0d57ccafd86f1c56c274536aeead8fe0be7..192a76113f1f88f475496c2aae2f37db50575a0b 100644
--- a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml
+++ b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml
@@ -54,10 +54,11 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025    # Constant dimensionless multiplier for time integration.
-  theta:                  0.7      # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       16
-  dithering:              0
+  eta:                      0.025    # Constant dimensionless multiplier for time integration.
+  MAC:                      geometric
+  theta_cr:                 0.7      # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening: 1
+  mesh_side_length:         16
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
   comoving_baryon_softening:     0.0026994 # Comoving DM softening length (in internal units).
diff --git a/examples/GEAR/AgoraDisk/agora_disk.yml b/examples/GEAR/AgoraDisk/agora_disk.yml
index 43371536035eb66dd1e677e1951c7408963fd8f3..3804b454b7c18eda4e397bb21b7ffc493944ecb1 100644
--- a/examples/GEAR/AgoraDisk/agora_disk.yml
+++ b/examples/GEAR/AgoraDisk/agora_disk.yml
@@ -39,8 +39,10 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.05    # Constant dimensionless multiplier for time integration.
-  theta:                  0.7     # Opening angle (Multipole acceptance criterion)
+  eta:                       0.05    # Constant dimensionless multiplier for time integration.
+  MAC:                       geometric
+  theta_cr:                  0.7     
+  use_tree_below_softening:  1
   comoving_DM_softening:     0.08 # Comoving softening length (in internal units).
   max_physical_DM_softening: 0.08    # Physical softening length (in internal units).
   comoving_baryon_softening:     0.08 # Comoving softening length (in internal units).
diff --git a/examples/GEAR/ZoomIn/zoom_in.yml b/examples/GEAR/ZoomIn/zoom_in.yml
index 99ade12d7f457a2a0ff41cbf604255e1a2fb5bc4..08bada9a1fbb30d07310041a198762e00d0c758c 100644
--- a/examples/GEAR/ZoomIn/zoom_in.yml
+++ b/examples/GEAR/ZoomIn/zoom_in.yml
@@ -40,8 +40,11 @@ Statistics:
   delta_time:          1.05 # Time between statistics output
 
 Gravity:
-  eta:                    0.002    # Constant dimensionless multiplier for time integration.
-  theta:                  0.6     # Opening angle (Multipole acceptance criterion)
+  eta:                       0.002    # Constant dimensionless multiplier for time integration.
+  MAC:                       adaptive
+  theta_cr:                  0.7
+  epsilon_fmm:               0.001
+  use_tree_below_softening:  1
   comoving_DM_softening:     0.1278 # Comoving softening length (in internal units).
   max_physical_DM_softening: 0.03365    # Physical softening length (in internal units).
   comoving_baryon_softening:     0.03365 # Comoving softening length (in internal units).
diff --git a/examples/HydroTests/EvrardCollapse_3D/evrard.yml b/examples/HydroTests/EvrardCollapse_3D/evrard.yml
index ab438646383cd510dae4727abaf9a7f1fab5681b..7ca3c8ac216ab3338bcb56253e55eb8a181318ed 100644
--- a/examples/HydroTests/EvrardCollapse_3D/evrard.yml
+++ b/examples/HydroTests/EvrardCollapse_3D/evrard.yml
@@ -31,8 +31,10 @@ SPH:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                           0.025    # Constant dimensionless multiplier for time integration.
-  theta:                         0.7
+  eta:                0.025
+  MAC:                adaptive
+  theta_cr:           0.7
+  epsilon_fmm:        0.001
   max_physical_baryon_softening: 0.001    # Physical softening length (in internal units).
 
 # Parameters related to the initial conditions
diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_NFW_MN/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_NFW_MN/isolated_galaxy.yml
index d2ca7aae80862e1bc1e89e645c6a1bb3adfcba1b..a66446dd3245e1fcdbaa7eabdb39b2567ee9c89e 100644
--- a/examples/IsolatedGalaxy/IsolatedGalaxy_NFW_MN/isolated_galaxy.yml
+++ b/examples/IsolatedGalaxy/IsolatedGalaxy_NFW_MN/isolated_galaxy.yml
@@ -9,7 +9,9 @@ InternalUnitSystem:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:          0.025                   # Constant dimensionless multiplier for time integration.
-  theta:        0.7                     # Opening angle (Multipole acceptance criterion).
+  MAC:          geometric
+  theta_cr:     0.7                     # Opening angle (Multipole acceptance criterion).
+  use_tree_below_softening:  1
   max_physical_baryon_softening: 0.100  # Physical softening length (in internal units).
 
 # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.)
diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_dmparticles/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_dmparticles/isolated_galaxy.yml
index 27ab01d984319fea68d4d1ae8fb435a6adf895ce..aaa2f8bbacfa9500b4a99cf7c24308aff1fe3767 100644
--- a/examples/IsolatedGalaxy/IsolatedGalaxy_dmparticles/isolated_galaxy.yml
+++ b/examples/IsolatedGalaxy/IsolatedGalaxy_dmparticles/isolated_galaxy.yml
@@ -9,7 +9,9 @@ InternalUnitSystem:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:          0.025               # Constant dimensionless multiplier for time integration.
-  theta:        0.7                 # Opening angle (Multipole acceptance criterion).
+  MAC:          geometric
+  theta_cr:     0.7                 # Opening angle (Multipole acceptance criterion).
+  use_tree_below_softening:  1
   max_physical_DM_softening: 0.7    # Physical softening length (in internal units).
 
 # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.)
diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml
index 0642e9312f0926cc0d005f9ec4b6d3f5215ea7d4..48b659c453b28e4a89778d601793976df39b3926 100644
--- a/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml
+++ b/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml
@@ -9,7 +9,9 @@ InternalUnitSystem:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:          0.025                 # Constant dimensionless multiplier for time integration.
-  theta:        0.7                   # Opening angle (Multipole acceptance criterion).
+  MAC:          geometric
+  theta_cr:     0.7                   # Opening angle (Multipole acceptance criterion).
+  use_tree_below_softening:  1
   max_physical_DM_softening:     0.35 # Physical softening length (in internal units).
   max_physical_baryon_softening: 0.35 # Physical softening length (in internal units).
 
diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_potential/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_potential/isolated_galaxy.yml
index 3bd743c2ec329057c7f63f987e4809744a07f7ba..d82c3544d6bb966d0ca7fb0db72883eff4a829ca 100644
--- a/examples/IsolatedGalaxy/IsolatedGalaxy_potential/isolated_galaxy.yml
+++ b/examples/IsolatedGalaxy/IsolatedGalaxy_potential/isolated_galaxy.yml
@@ -9,7 +9,9 @@ InternalUnitSystem:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:          0.025               # Constant dimensionless multiplier for time integration.
-  theta:        0.7                 # Opening angle (Multipole acceptance criterion).
+  MAC:          geometric
+  theta_cr:     0.7                 # Opening angle (Multipole acceptance criterion).
+  use_tree_below_softening:  1
   max_physical_baryon_softening: 0.100  # Physical softening length (in internal units).
 
 # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.)
diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml
index 1403e13afd27122568fac083eb5cecef96ed330d..c2bbcac6aa7ed474646314d02f15e63ef0b93f0e 100644
--- a/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml
+++ b/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml
@@ -9,7 +9,9 @@ InternalUnitSystem:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:          0.025                 # Constant dimensionless multiplier for time integration.
-  theta:        0.7                   # Opening angle (Multipole acceptance criterion).
+  MAC:          geometric
+  theta_cr:     0.7                 # Opening angle (Multipole acceptance criterion).
+  use_tree_below_softening:  1
   max_physical_DM_softening:     0.2  # Physical softening length (in internal units).
   max_physical_baryon_softening: 0.2  # Physical softening length (in internal units).
 
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 3de490970d27ef91eda85ccc1d2c57aba2fff0fe..64735be2e177d2839c69d718303edb8192b3e72f 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -20,7 +20,7 @@ MYFLAGS =
 
 # Add the source directory and the non-standard paths to the included library headers to CFLAGS
 AM_CFLAGS = -I$(top_srcdir)/src -I$(top_srcdir)/argparse $(HDF5_CPPFLAGS) \
-	$(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(GRACKLE_INCS)
+	$(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(GRACKLE_INCS) $(OPENMP_CFLAGS)
 
 AM_LDFLAGS = $(HDF5_LDFLAGS)
 
diff --git a/examples/PMillennium/PMillennium-1536/p-mill-1536.yml b/examples/PMillennium/PMillennium-1536/p-mill-1536.yml
index f343650452a24f620edef88e42666e09213dbd64..e2e8439a71dac106b6f4fe0128805a96ff48f796 100644
--- a/examples/PMillennium/PMillennium-1536/p-mill-1536.yml
+++ b/examples/PMillennium/PMillennium-1536/p-mill-1536.yml
@@ -42,7 +42,9 @@ Statistics:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:                    0.025         
-  theta:                  0.5          
+  MAC:                    adaptive
+  theta_cr:               0.7
+  epsilon_fmm:            0.001
   comoving_DM_softening:     0.0208333  # 20.8333 kpc = 1/25 mean inter-particle separation
   max_physical_DM_softening: 0.0208333  # 20.8333 kpc = 1/25 mean inter-particle separation
   mesh_side_length:       512
diff --git a/examples/PMillennium/PMillennium-384/p-mill-384.yml b/examples/PMillennium/PMillennium-384/p-mill-384.yml
index 0e68969d0b590cec8058805e4644b2763c543be1..0e058b5b83f81de967be34c3237d1cf7060e1e4a 100644
--- a/examples/PMillennium/PMillennium-384/p-mill-384.yml
+++ b/examples/PMillennium/PMillennium-384/p-mill-384.yml
@@ -41,8 +41,10 @@ Statistics:
   
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025         
-  theta:                  0.5          
+  eta:                    0.025
+  MAC:                    adaptive
+  theta_cr:               0.7
+  epsilon_fmm:            0.001
   comoving_DM_softening:     0.08333  # 83.333 kpc = 1/25 mean inter-particle separation
   max_physical_DM_softening: 0.08333  # 83.333 kpc = 1/25 mean inter-particle separation
   mesh_side_length:       128
diff --git a/examples/PMillennium/PMillennium-768/p-mill-768.yml b/examples/PMillennium/PMillennium-768/p-mill-768.yml
index 1cd9e63b1f03ac65baf72701de276b8ff43c9575..3a058c2db9f2a0e171c8f7a56dec0b03e593c081 100644
--- a/examples/PMillennium/PMillennium-768/p-mill-768.yml
+++ b/examples/PMillennium/PMillennium-768/p-mill-768.yml
@@ -42,7 +42,9 @@ Statistics:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:                    0.025         
-  theta:                  0.5          
+  MAC:                    adaptive
+  theta_cr:               0.7
+  epsilon_fmm:            0.001
   comoving_DM_softening:     0.041666  # 41.6666 kpc = 1/25 mean inter-particle separation
   max_physical_DM_softening: 0.041666  # 41.6666 kpc = 1/25 mean inter-particle separation
   mesh_side_length:       256
diff --git a/examples/Planetary/EarthImpact/earth_impact.yml b/examples/Planetary/EarthImpact/earth_impact.yml
index 336a0eb82d570a1544c1344ab2261e35dbf3d59e..9f147b0135ffab70da2c3f17f1bda1e111a803aa 100644
--- a/examples/Planetary/EarthImpact/earth_impact.yml
+++ b/examples/Planetary/EarthImpact/earth_impact.yml
@@ -45,7 +45,10 @@ SPH:
 # Parameters for the self-gravity scheme
 Gravity:
     eta:                            0.025   # Constant dimensionless multiplier for time integration.
-    theta:                          0.7     # Opening angle (Multipole acceptance criterion)
+    MAC:                            adaptive
+    theta_cr:                       0.7     
+    epsilon_fmm:                    0.001
+    use_tree_below_softening:       1
     max_physical_baryon_softening:  0.003   # Physical softening length (in internal units).
 
 # Parameters for the task scheduling
diff --git a/examples/QuickLymanAlpha/L050N0752/qla_50.yml b/examples/QuickLymanAlpha/L050N0752/qla_50.yml
index 9eeac77e732eeeb32c5440dd5586199c401fedf1..0748fa112b10c5d8e93502f69984bbbda54edca1 100644
--- a/examples/QuickLymanAlpha/L050N0752/qla_50.yml
+++ b/examples/QuickLymanAlpha/L050N0752/qla_50.yml
@@ -37,14 +37,16 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025    # Constant dimensionless multiplier for time integration.
-  theta:                  0.7      # Opening angle (Multipole acceptance criterion)
-  mesh_side_length:       256
+  eta:                         0.025    # Constant dimensionless multiplier for time integration.
+  MAC:                         adaptive
+  epsilon_fmm:                 0.001
+  theta_cr:                    0.7       # Opening angle (Multipole acceptance criterion)
+  use_tree_below_softening:    1
+  mesh_side_length:            256
   comoving_DM_softening:         0.003320 # Comoving softening for DM (3.32 ckpc)
   max_physical_DM_softening:     0.001300 # Physical softening for DM (1.30 pkpc)
   comoving_baryon_softening:     0.001790 # Comoving softening for baryons (1.79 ckpc)
   max_physical_baryon_softening: 0.000700 # Physical softening for baryons (0.70 pkpc)
-  dithering:              0
 
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/examples/SantaBarbara/SantaBarbara-128/santa_barbara.yml b/examples/SantaBarbara/SantaBarbara-128/santa_barbara.yml
index 5ee17b5a5877c60eebab5075aebb1bec53429c60..d71394d1899de182a6df527ddd2b0c69007b2f68 100644
--- a/examples/SantaBarbara/SantaBarbara-128/santa_barbara.yml
+++ b/examples/SantaBarbara/SantaBarbara-128/santa_barbara.yml
@@ -41,8 +41,11 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025  
-  theta:                  0.5
+  eta:                      0.025
+  MAC:                      adpative
+  theta_cr:                 0.7
+  epsilon_fmm:              0.001
+  use_tree_below_softening: 1
   comoving_DM_softening:         0.02    # 20 kpc = 1/25 mean inter-particle separation
   max_physical_DM_softening:     0.00526 # 20 ckpc = 5.26 pkpc at z=2.8 (EAGLE-like evolution of softening).
   comoving_baryon_softening:     0.02    # 20 kpc = 1/25 mean inter-particle separation
diff --git a/examples/SantaBarbara/SantaBarbara-256/santa_barbara.yml b/examples/SantaBarbara/SantaBarbara-256/santa_barbara.yml
index e83a136c21404f3dcb4a6a0c9921653d71080fdd..e9a8549cee72f5219182fd8c41c3477b1f02322a 100644
--- a/examples/SantaBarbara/SantaBarbara-256/santa_barbara.yml
+++ b/examples/SantaBarbara/SantaBarbara-256/santa_barbara.yml
@@ -41,8 +41,11 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                    0.025  
-  theta:                  0.5
+  eta:                      0.025
+  MAC:                      adpative
+  theta_cr:                 0.7
+  epsilon_fmm:              0.001
+  use_tree_below_softening: 1
   comoving_DM_softening:         0.01    # 10 kpc = 1/25 mean inter-particle separation
   max_physical_DM_softening:     0.00263 # 10 ckpc = 2.63 pkpc at z=2.8 (EAGLE-like evolution of softening).
   comoving_baryon_softening:     0.01    # 10 kpc = 1/25 mean inter-particle separation
diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml b/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml
index 95991e9f1d7e385826e33f6ae0fa343173966084..85abb33b38ad87b716021e094fd483813e8a51ce 100644
--- a/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml
+++ b/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml
@@ -21,8 +21,10 @@ TimeIntegration:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:          0.025         
-  theta:        0.5           
+  eta:          0.025
+  MAC:          adpative
+  theta_cr:     0.7
+  epsilon_fmm:  0.001
   comoving_DM_softening:     0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
   max_physical_DM_softening: 0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
   mesh_side_length:       64
diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_Snipshots/small_cosmo_volume.yml b/examples/SmallCosmoVolume/SmallCosmoVolume_Snipshots/small_cosmo_volume.yml
index dc554e3dd8182b717803a902d1fb8b9f698d2f8e..5d297a3dc88674a41414311286d2b02032e83f17 100644
--- a/examples/SmallCosmoVolume/SmallCosmoVolume_Snipshots/small_cosmo_volume.yml
+++ b/examples/SmallCosmoVolume/SmallCosmoVolume_Snipshots/small_cosmo_volume.yml
@@ -21,8 +21,10 @@ TimeIntegration:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:          0.025         
-  theta:        0.5           
+  eta:          0.025
+  MAC:          adpative
+  theta_cr:     0.7
+  epsilon_fmm:  0.001
   comoving_DM_softening:         0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
   max_physical_DM_softening:     0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
   comoving_baryon_softening:     0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml b/examples/SmallCosmoVolume/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml
index 0142e806db0c1e2e6261825a2d531f1937defced..2cd7e3efd85e70c41e85bc1a66101245845a9d1c 100644
--- a/examples/SmallCosmoVolume/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml
+++ b/examples/SmallCosmoVolume/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml
@@ -21,8 +21,10 @@ TimeIntegration:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:          0.025         
-  theta:        0.5           
+  eta:          0.025
+  MAC:          adpative
+  theta_cr:     0.7
+  epsilon_fmm:  0.001
   comoving_DM_softening:         0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
   max_physical_DM_softening:     0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
   comoving_baryon_softening:     0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_cooling/small_cosmo_volume.yml b/examples/SmallCosmoVolume/SmallCosmoVolume_cooling/small_cosmo_volume.yml
index 0d93406eebc903c880a53ffa15f874e21902675b..2528a81eeacfe4bebf3dd304ccf22411a22d0956 100644
--- a/examples/SmallCosmoVolume/SmallCosmoVolume_cooling/small_cosmo_volume.yml
+++ b/examples/SmallCosmoVolume/SmallCosmoVolume_cooling/small_cosmo_volume.yml
@@ -21,8 +21,10 @@ TimeIntegration:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:          0.025         
-  theta:        0.3           
+  eta:          0.025
+  MAC:          adpative
+  theta_cr:     0.7
+  epsilon_fmm:  0.001
   comoving_DM_softening:         0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
   max_physical_DM_softening:     0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
   comoving_baryon_softening:     0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_hydro/small_cosmo_volume.yml b/examples/SmallCosmoVolume/SmallCosmoVolume_hydro/small_cosmo_volume.yml
index 250fdc653bdad771c09f8e76819a8a475393d504..da08070bbcd78086a9f53d1bb808d8d48a7c655a 100644
--- a/examples/SmallCosmoVolume/SmallCosmoVolume_hydro/small_cosmo_volume.yml
+++ b/examples/SmallCosmoVolume/SmallCosmoVolume_hydro/small_cosmo_volume.yml
@@ -21,8 +21,10 @@ TimeIntegration:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:          0.025         
-  theta:        0.5           
+  eta:          0.025
+  MAC:          adpative
+  theta_cr:     0.7
+  epsilon_fmm:  0.001
   comoving_DM_softening:         0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
   max_physical_DM_softening:     0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
   comoving_baryon_softening:     0.0889     # 1/25th of the mean inter-particle separation: 88.9 kpc
diff --git a/examples/SubgridTests/PressureFloor/pressureFloor.yml b/examples/SubgridTests/PressureFloor/pressureFloor.yml
index f9df74dcefb14204264a4ffc9a7326d3453f718d..b20eb3a3026972ad7756a155865fefd88c5a798b 100644
--- a/examples/SubgridTests/PressureFloor/pressureFloor.yml
+++ b/examples/SubgridTests/PressureFloor/pressureFloor.yml
@@ -45,9 +45,10 @@ GEARPressureFloor:
 
   # Parameters for the self-gravity scheme
 Gravity:
-  mesh_side_length:              12       # Number of cells along each axis for the periodic gravity mesh.
-  eta:                           2.5     # Constant dimensionless multiplier for time integration.
-  theta:                         0.7       # Opening angle (Multipole acceptance criterion).
-  max_physical_DM_softening:     0.5    # Maximal Plummer-equivalent softening length in physical coordinates for DM particles (in internal units).
-  max_physical_baryon_softening: 0.5    # Maximal Plummer-equivalent softening length in physical coordinates for baryon particles (in internal units).
-  softening_ratio_background:    0.04      # Fraction of the mean inter-particle separation to use as Plummer-equivalent softening for the background DM particles.
+  mesh_side_length:              16       # Number of cells along each axis for the periodic gravity mesh.
+  eta:                           0.025    # Constant dimensionless multiplier for time integration.
+  MAC:                           geometric
+  theta_cr:                      0.7      # Opening angle (Multipole acceptance criterion).
+  max_physical_DM_softening:     0.5      # Maximal Plummer-equivalent softening length in physical coordinates for DM particles (in internal units).
+  max_physical_baryon_softening: 0.5      # Maximal Plummer-equivalent softening length in physical coordinates for baryon particles (in internal units).
+  softening_ratio_background:    0.04     # Fraction of the mean inter-particle separation to use as Plummer-equivalent softening for the background DM particles.
diff --git a/examples/main.c b/examples/main.c
index c01c5494644eeb7c5603e99e63a09e0060887b92..e785edf837445ee5622c1b10474762401bb0d286 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -1117,7 +1117,7 @@ int main(int argc, char *argv[]) {
       gravity_props_init(&gravity_properties, params, &prog_const, &cosmo,
                          with_cosmology, with_external_gravity,
                          with_baryon_particles, with_DM_particles,
-                         with_DM_background_particles, periodic);
+                         with_DM_background_particles, periodic, s.dim);
 
     /* Initialise the external potential properties */
     bzero(&potential, sizeof(struct external_potential));
diff --git a/examples/main_fof.c b/examples/main_fof.c
index ee683ebb5b8258b81bfdc877eba1421a19918860..ff029cc2eb5fdee29d4acd5b4b0c1dc1e8b971bf 100644
--- a/examples/main_fof.c
+++ b/examples/main_fof.c
@@ -534,7 +534,7 @@ int main(int argc, char *argv[]) {
   gravity_props_init(&gravity_properties, params, &prog_const, &cosmo,
                      /*with_cosmology=*/1, /*with_external_gravity=*/0,
                      with_baryon_particles, with_DM_particles,
-                     with_DM_background_particles, periodic);
+                     with_DM_background_particles, periodic, s.dim);
 
   /* Initialise the long-range gravity mesh */
   if (periodic) {
diff --git a/examples/nIFTyCluster/Baryonic/nifty.yml b/examples/nIFTyCluster/Baryonic/nifty.yml
index 7b58c80009e9401f4d6ee5b9656940fa89dd2409..501e9578bbc6face9efc7fb1ab9fa0120ae897e5 100644
--- a/examples/nIFTyCluster/Baryonic/nifty.yml
+++ b/examples/nIFTyCluster/Baryonic/nifty.yml
@@ -40,8 +40,11 @@ Statistics:
 
 # Parameters for the self-gravity scheme
 Gravity:
-  eta:                           0.025  # Constant dimensionless multiplier for time integration. 
-  theta:                         0.5    # Opening angle (Multipole acceptance criterion)
+  eta:                           0.025    # Constant dimensionless multiplier for time integration.
+  MAC:                           adaptive 
+  epsilon_fmm:                   0.001 
+  theta_cr:                      0.7      
+  use_tree_below_softening:      1
   comoving_DM_softening:         0.02   # Comoving softening length (in internal units).
   comoving_baryon_softening:     0.02   # Comoving softening length (in internal units).
   max_physical_DM_softening:     0.005  # Max physical softening length (in internal units).
diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml
index 1bc907c17f36c62c822ceee7b6163be012991f03..86f3980d7288e419cac570ffb9b2ab708338cf8b 100644
--- a/examples/parameter_example.yml
+++ b/examples/parameter_example.yml
@@ -64,7 +64,11 @@ Stars:
 Gravity:
   mesh_side_length:              128       # Number of cells along each axis for the periodic gravity mesh.
   eta:                           0.025     # Constant dimensionless multiplier for time integration.
-  theta:                         0.7       # Opening angle (Multipole acceptance criterion).
+  MAC:                           adaptive  # Choice of mulitpole acceptance criterion: 'adaptive' OR 'geometric'.
+  epsilon_fmm:                   0.001     # Tolerance parameter for the adaptive multipole acceptance criterion.
+  theta_cr:                      0.7       # Opening angle for the purely gemoetric criterion.
+  use_tree_below_softening:      0         # (Optional) Can the gravity code use the multipole interactions below the softening scale?
+  allow_truncation_in_MAC:       0         # (Optional) Can the Multipole acceptance criterion use the truncated force estimator?
   comoving_DM_softening:         0.0026994 # Comoving Plummer-equivalent softening length for DM particles (in internal units).
   max_physical_DM_softening:     0.0007    # Maximal Plummer-equivalent softening length in physical coordinates for DM particles (in internal units).
   comoving_baryon_softening:     0.0026994 # Comoving Plummer-equivalent softening length for baryon particles (in internal units).
@@ -74,7 +78,7 @@ Gravity:
   a_smooth:                      1.25      # (Optional) Smoothing scale in top-level cell sizes to smooth the long-range forces over (this is the default value).
   r_cut_max:                     4.5       # (Optional) Cut-off in number of top-level cells beyond which no FMM forces are computed (this is the default value).
   r_cut_min:                     0.1       # (Optional) Cut-off in number of top-level cells below which no truncation of FMM forces are performed (this is the default value).
-  dithering:                     1         # (Optional) Activate the dithering of the gravity mesh at every rebuild (this is the default value).
+  dithering:                     0         # (Optional) Activate the dithering of the gravity mesh at every rebuild (this is the default value).
   dithering_ratio:               1.0       # (Optional) Magnitude of each component of the dithering vector in units of the top-level cell sizes (this is the default value).
 
 # Parameters when running with SWIFT_GRAVITY_FORCE_CHECKS 
diff --git a/src/Makefile.am b/src/Makefile.am
index 4369a1a37a6969b37d2d38d01d775472729e4481..12b5273ffaa007eb332df18e9bce174c13bc03b6 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -16,7 +16,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Add the non-standard paths to the included library headers
-AM_CFLAGS = $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(GRACKLE_INCS)
+AM_CFLAGS = $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(GRACKLE_INCS) $(OPENMP_CFLAGS)
 
 # Assign a "safe" version number
 AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS)
@@ -52,7 +52,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \
     mesh_gravity.h cbrt.h exp10.h velociraptor_interface.h swift_velociraptor_part.h output_list.h \
     logger_io.h tracers_io.h tracers.h tracers_struct.h star_formation_io.h fof.h fof_struct.h fof_io.h \
-    multipole.h multipole_struct.h sincos.h \
+    multipole.h multipole_accept.h multipole_struct.h binomial.h integer_power.h sincos.h \
     star_formation_struct.h star_formation.h star_formation_iact.h \
     star_formation_logger.h star_formation_logger_struct.h \
     pressure_floor.h pressure_floor_struct.h pressure_floor_iact.h \
diff --git a/src/approx_math.h b/src/approx_math.h
index 4015e6040b102839234c452783838dadddb86cf6..70cb3e203cefa7e3c8ce2e4d721d9e23588641ef 100644
--- a/src/approx_math.h
+++ b/src/approx_math.h
@@ -21,31 +21,6 @@
 
 #include "inline.h"
 
-/**
- * @brief Approximate version of the complementay error function erfcf(x).
- *
- * This is based on eq. 7.1.27 of Abramowitz & Stegun, 1972.
- * The absolute error is < 4.7*10^-4 over the range 0 < x < infinity.
- *
- * Returns garbage for x < 0.
- * @param x The number to compute erfc for.
- */
-__attribute__((always_inline, const)) INLINE static float approx_erfcf(
-    float x) {
-
-  /* 1 + 0.278393*x + 0.230389*x^2 + 0.000972*x^3 + 0.078108*x^4 */
-  float arg = 0.078108f;
-  arg = x * arg + 0.000972f;
-  arg = x * arg + 0.230389f;
-  arg = x * arg + 0.278393f;
-  arg = x * arg + 1.f;
-
-  /* 1 / arg^4 */
-  const float arg2 = arg * arg;
-  const float arg4 = arg2 * arg2;
-  return 1.f / arg4;
-}
-
 /**
  * @brief Approximate version of expf(x) using a 4th order Taylor expansion
  *
diff --git a/src/binomial.h b/src/binomial.h
new file mode 100644
index 0000000000000000000000000000000000000000..b1caf1258a75b47b0f47c26b2e16ac01d76f6454
--- /dev/null
+++ b/src/binomial.h
@@ -0,0 +1,56 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2020  Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_BINOMIAL_H
+#define SWIFT_BINOMIAL_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers */
+#include "error.h"
+#include "inline.h"
+
+/**
+ * @brief Compute the binomial coefficient (n, k)
+ *
+ * Only valid for values 0 <= n <= 8 and 0 <= k <= n.
+ */
+__attribute__((const)) INLINE static int binomial(const int n, const int k) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  assert(n >= 0);
+  assert(k >= 0);
+  assert(n <= 8);
+  assert(k <= n);
+#endif
+
+  /* Hello Pascal! Nice to meet again */
+  static const int coeffs[9][9] = {
+      {1, 0, 0, 0, 0, 0, 0, 0, 0},     {1, 1, 0, 0, 0, 0, 0, 0, 0},
+      {1, 2, 1, 0, 0, 0, 0, 0, 0},     {1, 3, 3, 1, 0, 0, 0, 0, 0},
+      {1, 4, 6, 4, 1, 0, 0, 0, 0},     {1, 5, 10, 10, 5, 1, 0, 0, 0},
+      {1, 6, 15, 20, 15, 6, 1, 0, 0},  {1, 7, 21, 35, 35, 21, 7, 1, 0},
+      {1, 8, 28, 56, 70, 56, 28, 8, 1}
+
+  };
+
+  return coeffs[n][k];
+}
+
+#endif /* SWIFT_BINOMIAL_H */
diff --git a/src/black_holes/EAGLE/black_holes_iact.h b/src/black_holes/EAGLE/black_holes_iact.h
index 30186f892e1eaf9b34e07ddcc5a419312696a78b..a08c4e6293a5612ac6cf823ec6d1c4a17abbf5e2 100644
--- a/src/black_holes/EAGLE/black_holes_iact.h
+++ b/src/black_holes/EAGLE/black_holes_iact.h
@@ -417,8 +417,8 @@ runner_iact_nonsym_bh_bh_swallow(const float r2, const float *dx,
           (r_12 < grav_props->epsilon_baryon_cur)) {
 
         /* If BHs are within softening range, take this into account */
-        float w_grav;
-        kernel_grav_pot_eval(r_12 / grav_props->epsilon_baryon_cur, &w_grav);
+        const float w_grav =
+            kernel_grav_pot_eval(r_12 / grav_props->epsilon_baryon_cur);
         const float r_mod = w_grav / grav_props->epsilon_baryon_cur;
         v2_threshold = 2.f * G_Newton * M / (r_mod);
 
diff --git a/src/cell.c b/src/cell.c
index 88c8b84e00e536ca9b91783d5f0bc009ae9186c9..8124e41d418d733460648db24db5c63062b2fa1b 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -61,6 +61,7 @@
 #include "hydro_properties.h"
 #include "memswap.h"
 #include "minmax.h"
+#include "multipole.h"
 #include "pressure_floor.h"
 #include "scheduler.h"
 #include "space.h"
@@ -2241,6 +2242,7 @@ void cell_make_multipoles(struct cell *c, integertime_t ti_current,
   gravity_reset(c->grav.multipole);
 
   if (c->split) {
+
     /* Start by recursing */
     for (int k = 0; k < 8; ++k) {
       if (c->progeny[k] != NULL)
@@ -2303,9 +2305,13 @@ void cell_make_multipoles(struct cell *c, integertime_t ti_current,
     /* Take minimum of both limits */
     c->grav.multipole->r_max = min(r_max, sqrt(dx * dx + dy * dy + dz * dz));
 
+    /* Compute the multipole power */
+    gravity_multipole_compute_power(&c->grav.multipole->m_pole);
+
   } else {
     if (c->grav.count > 0) {
       gravity_P2M(c->grav.multipole, c->grav.parts, c->grav.count, grav_props);
+      gravity_multipole_compute_power(&c->grav.multipole->m_pole);
       const double dx =
           c->grav.multipole->CoM[0] > c->loc[0] + c->width[0] * 0.5
               ? c->grav.multipole->CoM[0] - c->loc[0]
@@ -2399,6 +2405,7 @@ void cell_check_multipole(struct cell *c,
   if (c->grav.count > 0) {
     /* Brute-force calculation */
     gravity_P2M(&ma, c->grav.parts, c->grav.count, grav_props);
+    gravity_multipole_compute_power(&ma.m_pole);
 
     /* Now  compare the multipole expansion */
     if (!gravity_multipole_equal(&ma, c->grav.multipole, tolerance)) {
@@ -3343,7 +3350,9 @@ void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj,
     if (lock_unlock(&cj->grav.mlock) != 0) error("Impossible to unlock m-pole");
 
     /* Can we use multipoles ? */
-    if (cell_can_use_pair_mm(ci, cj, e, sp)) {
+    if (cell_can_use_pair_mm(ci, cj, e, sp, /*use_rebuild_data=*/0,
+                             /*is_tree_walk=*/1)) {
+
       /* Ok, no need to drift anything */
       return;
     }
@@ -4732,6 +4741,7 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) {
   const integertime_t ti_old_gpart = c->grav.ti_old_part;
   const integertime_t ti_current = e->ti_current;
   struct gpart *const gparts = c->grav.parts;
+  const struct gravity_props *grav_props = e->gravity_properties;
 
   /* Drift irrespective of cell flags? */
   force = (force || cell_get_flag(c, cell_flag_do_grav_drift));
@@ -4793,7 +4803,7 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) {
       if (gpart_is_inhibited(gp, e)) continue;
 
       /* Drift... */
-      drift_gpart(gp, dt_drift, ti_old_gpart, ti_current);
+      drift_gpart(gp, dt_drift, ti_old_gpart, ti_current, grav_props);
 
 #ifdef SWIFT_DEBUG_CHECKS
       /* Make sure the particle does not drift by more than a box length. */
@@ -6399,91 +6409,46 @@ void cell_reorder_extra_gparts(struct cell *c, struct part *parts,
 /**
  * @brief Can we use the MM interactions fo a given pair of cells?
  *
- * @param ci The first #cell.
- * @param cj The second #cell.
- * @param e The #engine.
- * @param s The #space.
- */
-int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj,
-                         const struct engine *e, const struct space *s) {
-  const double theta_crit2 = e->gravity_properties->theta_crit2;
-  const int periodic = s->periodic;
-  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
-
-  /* Recover the multipole information */
-  const struct gravity_tensors *const multi_i = ci->grav.multipole;
-  const struct gravity_tensors *const multi_j = cj->grav.multipole;
-
-  /* Get the distance between the CoMs */
-  double dx = multi_i->CoM[0] - multi_j->CoM[0];
-  double dy = multi_i->CoM[1] - multi_j->CoM[1];
-  double dz = multi_i->CoM[2] - multi_j->CoM[2];
-
-  /* Apply BC */
-  if (periodic) {
-    dx = nearest(dx, dim[0]);
-    dy = nearest(dy, dim[1]);
-    dz = nearest(dz, dim[2]);
-  }
-  const double r2 = dx * dx + dy * dy + dz * dz;
-
-  const double epsilon_i = multi_i->m_pole.max_softening;
-  const double epsilon_j = multi_j->m_pole.max_softening;
-
-  return gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2,
-                            epsilon_i, epsilon_j);
-}
-
-/**
- * @brief Can we use the MM interactions fo a given pair of cells?
- *
- * This function uses the information gathered in the multipole at rebuild
- * time and not the current position and radius of the multipole.
+ * The two cells have to be different!
  *
  * @param ci The first #cell.
  * @param cj The second #cell.
  * @param e The #engine.
  * @param s The #space.
+ * @param use_rebuild_data Are we considering the data at the last tree-build
+ * (1) or current data (0)?
+ * @param is_tree_walk Are we calling this in the tree walk (1) or for the
+ * top-level task construction (0)?
  */
-int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj,
-                                 const struct engine *e,
-                                 const struct space *s) {
-  const double theta_crit2 = e->gravity_properties->theta_crit2;
+int cell_can_use_pair_mm(const struct cell *restrict ci,
+                         const struct cell *restrict cj, const struct engine *e,
+                         const struct space *s, const int use_rebuild_data,
+                         const int is_tree_walk) {
+
+  const struct gravity_props *props = e->gravity_properties;
   const int periodic = s->periodic;
   const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
 
-  /* Recover the multipole information */
-  const struct gravity_tensors *const multi_i = ci->grav.multipole;
-  const struct gravity_tensors *const multi_j = cj->grav.multipole;
+  /* Check for trivial cases */
+  if (is_tree_walk && ci->grav.count <= 1) return 0;
+  if (is_tree_walk && cj->grav.count <= 1) return 0;
 
-#ifdef SWIFT_DEBUG_CHECKS
+  /* Recover the multipole information */
+  const struct gravity_tensors *restrict multi_i = ci->grav.multipole;
+  const struct gravity_tensors *restrict multi_j = cj->grav.multipole;
 
-  if (multi_i->CoM_rebuild[0] < ci->loc[0] ||
-      multi_i->CoM_rebuild[0] > ci->loc[0] + ci->width[0])
-    error("Invalid multipole position ci");
-  if (multi_i->CoM_rebuild[1] < ci->loc[1] ||
-      multi_i->CoM_rebuild[1] > ci->loc[1] + ci->width[1])
-    error("Invalid multipole position ci");
-  if (multi_i->CoM_rebuild[2] < ci->loc[2] ||
-      multi_i->CoM_rebuild[2] > ci->loc[2] + ci->width[2])
-    error("Invalid multipole position ci");
-
-  if (multi_j->CoM_rebuild[0] < cj->loc[0] ||
-      multi_j->CoM_rebuild[0] > cj->loc[0] + cj->width[0])
-    error("Invalid multipole position cj");
-  if (multi_j->CoM_rebuild[1] < cj->loc[1] ||
-      multi_j->CoM_rebuild[1] > cj->loc[1] + cj->width[1])
-    error("Invalid multipole position cj");
-  if (multi_j->CoM_rebuild[2] < cj->loc[2] ||
-      multi_j->CoM_rebuild[2] > cj->loc[2] + cj->width[2])
-    error("Invalid multipole position cj");
-
-#endif
+  double dx, dy, dz;
 
   /* Get the distance between the CoMs */
-  double dx = multi_i->CoM_rebuild[0] - multi_j->CoM_rebuild[0];
-  double dy = multi_i->CoM_rebuild[1] - multi_j->CoM_rebuild[1];
-  double dz = multi_i->CoM_rebuild[2] - multi_j->CoM_rebuild[2];
+  if (use_rebuild_data) {
+    dx = multi_i->CoM_rebuild[0] - multi_j->CoM_rebuild[0];
+    dy = multi_i->CoM_rebuild[1] - multi_j->CoM_rebuild[1];
+    dz = multi_i->CoM_rebuild[2] - multi_j->CoM_rebuild[2];
+  } else {
+    dx = multi_i->CoM[0] - multi_j->CoM[0];
+    dy = multi_i->CoM[1] - multi_j->CoM[1];
+    dz = multi_i->CoM[2] - multi_j->CoM[2];
+  }
 
   /* Apply BC */
   if (periodic) {
@@ -6493,9 +6458,6 @@ int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj,
   }
   const double r2 = dx * dx + dy * dy + dz * dz;
 
-  const double epsilon_i = multi_i->m_pole.max_softening;
-  const double epsilon_j = multi_j->m_pole.max_softening;
-
-  return gravity_M2L_accept(multi_i->r_max_rebuild, multi_j->r_max_rebuild,
-                            theta_crit2, r2, epsilon_i, epsilon_j);
+  return gravity_M2L_accept_symmetric(props, multi_i, multi_j, r2,
+                                      use_rebuild_data, periodic);
 }
diff --git a/src/cell.h b/src/cell.h
index e84e00bcd77972be5e2a89d89b7d180a96bd637a..9666d44d1fba32e9543ebf44151ebf69d3a84b52 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -973,9 +973,8 @@ void cell_reorder_extra_gparts(struct cell *c, struct part *parts,
                                struct spart *sparts);
 void cell_reorder_extra_sparts(struct cell *c, const ptrdiff_t sparts_offset);
 int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj,
-                         const struct engine *e, const struct space *s);
-int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj,
-                                 const struct engine *e, const struct space *s);
+                         const struct engine *e, const struct space *s,
+                         const int use_rebuild_data, const int is_tree_walk);
 
 /**
  * @brief Compute the square of the minimal distance between any two points in
diff --git a/src/drift.h b/src/drift.h
index 74ac46346f8b2038f65793cb172f5138bbfa1545..2d1ee00378c51ac165c699d4ce0c47f24b64f49a 100644
--- a/src/drift.h
+++ b/src/drift.h
@@ -43,7 +43,7 @@
  */
 __attribute__((always_inline)) INLINE static void drift_gpart(
     struct gpart *restrict gp, double dt_drift, integertime_t ti_old,
-    integertime_t ti_current) {
+    integertime_t ti_current, const struct gravity_props *grav_props) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (gp->ti_drift != ti_old)
@@ -60,6 +60,8 @@ __attribute__((always_inline)) INLINE static void drift_gpart(
   gp->x[0] += gp->v_full[0] * dt_drift;
   gp->x[1] += gp->v_full[1] * dt_drift;
   gp->x[2] += gp->v_full[2] * dt_drift;
+
+  gravity_predict_extra(gp, grav_props);
 }
 
 /**
diff --git a/src/engine.c b/src/engine.c
index e7fe2968a9b964c0526258e0f46d0d7364847c4f..15bc1b8ba7ea20c84220b69bc94da2fe0295c0c6 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -77,6 +77,7 @@
 #include "memuse.h"
 #include "minmax.h"
 #include "mpiuse.h"
+#include "multipole_struct.h"
 #include "output_list.h"
 #include "output_options.h"
 #include "parallel_io.h"
@@ -2044,19 +2045,18 @@ void engine_skip_force_and_kick(struct engine *e) {
         t->type == task_type_timestep ||
         t->type == task_type_timestep_limiter ||
         t->type == task_type_timestep_sync ||
-        t->subtype == task_subtype_force ||
-        t->subtype == task_subtype_limiter || t->subtype == task_subtype_grav ||
-        t->type == task_type_end_hydro_force ||
-        t->type == task_type_end_grav_force ||
-        t->type == task_type_grav_long_range || t->type == task_type_grav_mm ||
-        t->type == task_type_grav_down || t->type == task_type_grav_down_in ||
-        t->type == task_type_drift_gpart_out || t->type == task_type_cooling ||
+        t->type == task_type_end_hydro_force || t->type == task_type_cooling ||
         t->type == task_type_stars_in || t->type == task_type_stars_out ||
         t->type == task_type_star_formation ||
         t->type == task_type_stars_resort || t->type == task_type_extra_ghost ||
+        t->type == task_type_stars_ghost ||
+        t->type == task_type_stars_ghost_in ||
+        t->type == task_type_stars_ghost_out ||
         t->type == task_type_bh_swallow_ghost1 ||
         t->type == task_type_bh_swallow_ghost2 ||
-        t->type == task_type_bh_swallow_ghost3 ||
+        t->type == task_type_bh_swallow_ghost3 || t->type == task_type_bh_in ||
+        t->type == task_type_bh_out || t->subtype == task_subtype_force ||
+        t->subtype == task_subtype_limiter ||
         t->subtype == task_subtype_gradient ||
         t->subtype == task_subtype_stars_feedback ||
         t->subtype == task_subtype_bh_feedback ||
@@ -2072,8 +2072,7 @@ void engine_skip_force_and_kick(struct engine *e) {
         t->subtype == task_subtype_tend_gpart ||
         t->subtype == task_subtype_tend_spart ||
         t->subtype == task_subtype_tend_bpart ||
-        t->subtype == task_subtype_rho || t->subtype == task_subtype_gpart ||
-        t->subtype == task_subtype_sf_counts)
+        t->subtype == task_subtype_rho || t->subtype == task_subtype_sf_counts)
       t->skip = 1;
   }
 
@@ -2196,7 +2195,8 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs,
   if (e->nodeID == 0) message("Setting particles to a valid state...");
   engine_first_init_particles(e);
 
-  if (e->nodeID == 0) message("Computing initial gas densities.");
+  if (e->nodeID == 0)
+    message("Computing initial gas densities and approximate gravity.");
 
   /* Construct all cells and tasks to start everything */
   engine_rebuild(e, 0, clean_h_values);
@@ -2268,6 +2268,10 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs,
   /* Now time to get ready for the first time-step */
   if (e->nodeID == 0) message("Running initial fake time-step.");
 
+  /* Update the MAC strategy if necessary */
+  if (e->policy & engine_policy_self_gravity)
+    gravity_props_update_MAC_choice(e->gravity_properties);
+
   /* Construct all cells again for a new round (need to update h_max) */
   engine_rebuild(e, 0, 0);
 
@@ -3131,8 +3135,8 @@ void engine_makeproxies(struct engine *e) {
   /* Get some info about the physics */
   const int with_hydro = (e->policy & engine_policy_hydro);
   const int with_gravity = (e->policy & engine_policy_self_gravity);
-  const double theta_crit_inv = e->gravity_properties->theta_crit_inv;
-  const double theta_crit2 = e->gravity_properties->theta_crit2;
+  const double theta_crit = e->gravity_properties->theta_crit;
+  const double theta_crit_inv = 1. / e->gravity_properties->theta_crit;
   const double max_mesh_dist = e->mesh->r_cut_max;
   const double max_mesh_dist2 = max_mesh_dist * max_mesh_dist;
 
@@ -3276,26 +3280,17 @@ void engine_makeproxies(struct engine *e) {
                       sqrt(min_dist_centres2) - 2. * delta_CoM;
                   const double min_dist_CoM2 = min_dist_CoM * min_dist_CoM;
 
-                  /* We also assume that the softening is negligible compared
-                     to the cell size */
-                  const double epsilon_i = 0.;
-                  const double epsilon_j = 0.;
-
                   /* Are we beyond the distance where the truncated forces are 0
                    * but not too far such that M2L can be used? */
                   if (periodic) {
 
                     if ((min_dist_CoM2 < max_mesh_dist2) &&
-                        (!gravity_M2L_accept(r_max, r_max, theta_crit2,
-                                             min_dist_CoM2, epsilon_i,
-                                             epsilon_j)))
+                        !(2. * r_max < theta_crit * min_dist_CoM2))
                       proxy_type |= (int)proxy_cell_type_gravity;
 
                   } else {
 
-                    if (!gravity_M2L_accept(r_max, r_max, theta_crit2,
-                                            min_dist_CoM2, epsilon_i,
-                                            epsilon_j))
+                    if (!(2. * r_max < theta_crit * min_dist_CoM2))
                       proxy_type |= (int)proxy_cell_type_gravity;
                   }
                 }
diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c
index 9e48e5826fc13256647235ffa1c5dad00adb76bb..b73aa475c008cbb77f53e981d49f61c43ab845fb 100644
--- a/src/engine_maketasks.c
+++ b/src/engine_maketasks.c
@@ -1381,7 +1381,8 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
           if (periodic && min_radius2 > max_distance2) continue;
 
           /* Are the cells too close for a MM interaction ? */
-          if (!cell_can_use_pair_mm_rebuild(ci, cj, e, s)) {
+          if (!cell_can_use_pair_mm(ci, cj, e, s, /*use_rebuild_data=*/1,
+                                    /*is_tree_walk=*/0)) {
 
             /* Ok, we need to add a direct pair calculation */
             scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0,
diff --git a/src/exp.h b/src/exp.h
new file mode 100644
index 0000000000000000000000000000000000000000..e3820a56c7e7fa6ca565374bc984514798c03faa
--- /dev/null
+++ b/src/exp.h
@@ -0,0 +1,72 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_OPTIMIZED_EXP_H
+#define SWIFT_OPTIMIZED_EXP_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "inline.h"
+
+/* Standard headers */
+#include <math.h>
+
+/**
+ * @brief Compute the exponential of a number.
+ *
+ * This function has a relative accuracy of 1.618e-6 over the input
+ * range [-32., 32.].
+ *
+ * @param x The number to take the exponential of.
+ */
+__attribute__((always_inline, const)) INLINE static float optimized_expf(
+    const float x) {
+
+  /* Let's first express e^x as 2^i * e^f with
+   * f in the range [-ln(2)/2, ln(2)/2] */
+  const float i = rintf(x * ((float)M_LOG2E));
+  const float f = x - ((float)M_LN2) * i;
+
+  /* We can now compute exp(f) using a polynomial
+   * approximation valid over the range [-ln(2)/2, ln(2)/2].
+   * The coefficients come from the Cephes library and
+   * have been obtained using a minmax algorithm */
+  float exp_f = 0.041944388f;
+  exp_f = exp_f * f + 0.168006673f;
+  exp_f = exp_f * f + 0.499999940f;
+  exp_f = exp_f * f + 0.999956906f;
+  exp_f = exp_f * f + 0.999999642f;
+
+  union {
+    int i;
+    float f;
+  } e;
+
+  /* We can now construct the result by taking exp_f
+   * as the mantissa of the answer and bit-shifting i
+   * into the exponent part of the floating-point
+   * number */
+  e.f = exp_f;
+  e.i += ((int)i) << 23;
+
+  return e.f;
+}
+
+#endif /* SWIFT_OPTIMIZED_EXP_H */
diff --git a/src/gravity.c b/src/gravity.c
index 6285acc01f5379dbb9d6e40c10b8189923d4645f..8c013139aa6771342e598bd543558bfa0ffb98fa 100644
--- a/src/gravity.c
+++ b/src/gravity.c
@@ -36,6 +36,8 @@
 /* Local headers. */
 #include "active.h"
 #include "error.h"
+#include "kernel_gravity.h"
+#include "kernel_long_gravity.h"
 #include "threadpool.h"
 #include "version.h"
 
diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h
index ca91265c54262badcb3ebde5b32bd7270be1080a..f258e8604f18c45df2dac8a8966c7be276c3d24d 100644
--- a/src/gravity/Default/gravity.h
+++ b/src/gravity/Default/gravity.h
@@ -212,6 +212,17 @@ __attribute__((always_inline)) INLINE static void gravity_end_force(
 #endif
 }
 
+/**
+ * @brief Update the #gpart after a drift step.
+ *
+ * This is typically used to update the softening lengths.
+ *
+ * @param gp The particle to act upon
+ * @param grav_props The global properties of the gravity calculation.
+ */
+__attribute__((always_inline)) INLINE static void gravity_predict_extra(
+    struct gpart* gp, const struct gravity_props* grav_props) {}
+
 /**
  * @brief Kick the additional variables
  *
diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h
index 7201db6864f77b989b2e28d1e275713a52480f62..85f2236bc5610294faae6a868fc2282d3c7a0ad4 100644
--- a/src/gravity/Default/gravity_iact.h
+++ b/src/gravity/Default/gravity_iact.h
@@ -61,9 +61,7 @@ runner_iact_grav_pp_full(const float r2, const float h2, const float h_inv,
 
     const float r = r2 * r_inv;
     const float ui = r * h_inv;
-
-    float W_f_ij;
-    kernel_grav_force_eval(ui, &W_f_ij);
+    const float W_f_ij = kernel_grav_force_eval(ui);
 
     /* Get softened gravity */
     *f_ij = mass * h_inv3 * W_f_ij;
@@ -108,9 +106,7 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv,
   } else {
 
     const float ui = r * h_inv;
-    float W_f_ij;
-
-    kernel_grav_force_eval(ui, &W_f_ij);
+    const float W_f_ij = kernel_grav_force_eval(ui);
 
     /* Get softened gravity */
     *f_ij = mass * h_inv3 * W_f_ij;
@@ -118,8 +114,8 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv,
 
   /* Get long-range correction */
   const float u_lr = r * r_s_inv;
-  float corr_f_lr;
-  kernel_long_grav_force_eval(u_lr, &corr_f_lr);
+  float corr_f_lr, dummy;
+  kernel_long_grav_eval(u_lr, &corr_f_lr, &dummy);
   *f_ij *= corr_f_lr;
 
   /* No potential calculation */
@@ -149,7 +145,12 @@ runner_iact_grav_pm_full(const float r_x, const float r_y, const float r_z,
                          float *restrict pot) {
 
   /* Use the M2P kernel */
-  struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f};
+  struct reduced_grav_tensor l;
+  l.F_000 = 0.f;
+  l.F_100 = 0.f;
+  l.F_010 = 0.f;
+  l.F_001 = 0.f;
+
   gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/0, /*rs_inv=*/0.f, &l);
 
   /* Write back */
@@ -186,7 +187,12 @@ runner_iact_grav_pm_truncated(const float r_x, const float r_y, const float r_z,
                               float *restrict f_z, float *restrict pot) {
 
   /* Use the M2P kernel */
-  struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f};
+  struct reduced_grav_tensor l;
+  l.F_000 = 0.f;
+  l.F_100 = 0.f;
+  l.F_010 = 0.f;
+  l.F_001 = 0.f;
+
   gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/1, r_s_inv, &l);
 
   /* Write back */
diff --git a/src/gravity/MultiSoftening/gravity.h b/src/gravity/MultiSoftening/gravity.h
index d7716cf712254054e3cf364a807f3cb1c12355f3..e436cc3c144571645f32879d2ce950d64d080825 100644
--- a/src/gravity/MultiSoftening/gravity.h
+++ b/src/gravity/MultiSoftening/gravity.h
@@ -23,7 +23,6 @@
 #include <float.h>
 
 /* Local includes. */
-#include "accumulate.h"
 #include "cosmology.h"
 #include "error.h"
 #include "gravity_properties.h"
@@ -50,23 +49,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_mass(
 __attribute__((always_inline)) INLINE static float gravity_get_softening(
     const struct gpart* gp, const struct gravity_props* restrict grav_props) {
 
-  switch (gp->type) {
-    case swift_type_dark_matter:
-      return grav_props->epsilon_DM_cur;
-    case swift_type_stars:
-      return grav_props->epsilon_baryon_cur;
-    case swift_type_gas:
-      return grav_props->epsilon_baryon_cur;
-    case swift_type_black_hole:
-      return grav_props->epsilon_baryon_cur;
-    case swift_type_dark_matter_background:
-      return grav_props->epsilon_background_fac * cbrtf(gp->mass);
-    default:
-#ifdef SWIFT_DEBUG_CHECKS
-      error("Invalid gpart type!");
-#endif
-      return 0.f;
-  }
+  return gp->epsilon;
 }
 
 /**
@@ -78,7 +61,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening(
 __attribute__((always_inline)) INLINE static void
 gravity_add_comoving_potential(struct gpart* restrict gp, float pot) {
 
-  accumulate_add_f(&gp->potential, pot);
+  gp->potential += pot;
 }
 
 /**
@@ -206,6 +189,18 @@ __attribute__((always_inline)) INLINE static void gravity_end_force(
   /* Apply the periodic correction to the peculiar potential */
   if (periodic) gp->potential += potential_normalisation;
 
+  /* Record the norm of the acceleration for the adaptive opening criteria.
+   * Will always be an (active) timestep behind. */
+  gp->old_a_grav_norm = gp->a_grav[0] * gp->a_grav[0] +
+                        gp->a_grav[1] * gp->a_grav[1] +
+                        gp->a_grav[2] * gp->a_grav[2];
+
+  gp->old_a_grav_norm = sqrtf(gp->old_a_grav_norm);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (gp->old_a_grav_norm == 0.f) error("Old acceleration is 0!");
+#endif
+
   /* Let's get physical... */
   gp->a_grav[0] *= const_G;
   gp->a_grav[1] *= const_G;
@@ -227,6 +222,41 @@ __attribute__((always_inline)) INLINE static void gravity_end_force(
 #endif
 }
 
+/**
+ * @brief Update the #gpart after a drift step.
+ *
+ * This is typically used to update the softening lengths.
+ *
+ * @param gp The particle to act upon
+ * @param grav_props The global properties of the gravity calculation.
+ */
+__attribute__((always_inline)) INLINE static void gravity_predict_extra(
+    struct gpart* gp, const struct gravity_props* grav_props) {
+
+  switch (gp->type) {
+    case swift_type_dark_matter:
+      gp->epsilon = grav_props->epsilon_DM_cur;
+      break;
+    case swift_type_stars:
+      gp->epsilon = grav_props->epsilon_baryon_cur;
+      break;
+    case swift_type_gas:
+      gp->epsilon = grav_props->epsilon_baryon_cur;
+      break;
+    case swift_type_black_hole:
+      gp->epsilon = grav_props->epsilon_baryon_cur;
+      break;
+    case swift_type_dark_matter_background:
+      gp->epsilon = grav_props->epsilon_background_fac * cbrtf(gp->mass);
+      break;
+    default:
+#ifdef SWIFT_DEBUG_CHECKS
+      error("Invalid gpart type!");
+#endif
+      break;
+  }
+}
+
 /**
  * @brief Kick the additional variables
  *
@@ -258,6 +288,30 @@ __attribute__((always_inline)) INLINE static void gravity_first_init_gpart(
     struct gpart* gp, const struct gravity_props* grav_props) {
 
   gp->time_bin = 0;
+  gp->old_a_grav_norm = 0.f;
+
+  switch (gp->type) {
+    case swift_type_dark_matter:
+      gp->epsilon = grav_props->epsilon_DM_cur;
+      break;
+    case swift_type_stars:
+      gp->epsilon = grav_props->epsilon_baryon_cur;
+      break;
+    case swift_type_gas:
+      gp->epsilon = grav_props->epsilon_baryon_cur;
+      break;
+    case swift_type_black_hole:
+      gp->epsilon = grav_props->epsilon_baryon_cur;
+      break;
+    case swift_type_dark_matter_background:
+      gp->epsilon = grav_props->epsilon_background_fac * cbrtf(gp->mass);
+      break;
+    default:
+#ifdef SWIFT_DEBUG_CHECKS
+      error("Invalid gpart type!");
+#endif
+      break;
+  }
 
   gravity_init_gpart(gp);
 }
diff --git a/src/gravity/MultiSoftening/gravity_iact.h b/src/gravity/MultiSoftening/gravity_iact.h
index 42ac2bebea16ed46b4cac74b7029e21f310afc1d..2b51e1e7ff31dcb2874be89e4f17f435ab7e5a05 100644
--- a/src/gravity/MultiSoftening/gravity_iact.h
+++ b/src/gravity/MultiSoftening/gravity_iact.h
@@ -62,10 +62,8 @@ runner_iact_grav_pp_full(const float r2, const float h2, const float h_inv,
 
     const float r = r2 * r_inv;
     const float ui = r * h_inv;
-
-    float W_f_ij, W_pot_ij;
-    kernel_grav_force_eval(ui, &W_f_ij);
-    kernel_grav_pot_eval(ui, &W_pot_ij);
+    const float W_f_ij = kernel_grav_force_eval(ui);
+    const float W_pot_ij = kernel_grav_pot_eval(ui);
 
     /* Get softened gravity */
     *f_ij = mass * h_inv3 * W_f_ij;
@@ -109,10 +107,8 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv,
   } else {
 
     const float ui = r * h_inv;
-    float W_f_ij, W_pot_ij;
-
-    kernel_grav_force_eval(ui, &W_f_ij);
-    kernel_grav_pot_eval(ui, &W_pot_ij);
+    const float W_f_ij = kernel_grav_force_eval(ui);
+    const float W_pot_ij = kernel_grav_pot_eval(ui);
 
     /* Get softened gravity */
     *f_ij = mass * h_inv3 * W_f_ij;
@@ -122,8 +118,7 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv,
   /* Get long-range correction */
   const float u_lr = r * r_s_inv;
   float corr_f_lr, corr_pot_lr;
-  kernel_long_grav_force_eval(u_lr, &corr_f_lr);
-  kernel_long_grav_pot_eval(u_lr, &corr_pot_lr);
+  kernel_long_grav_eval(u_lr, &corr_f_lr, &corr_pot_lr);
   *f_ij *= corr_f_lr;
   *pot_ij *= corr_pot_lr;
 }
@@ -151,7 +146,12 @@ runner_iact_grav_pm_full(const float r_x, const float r_y, const float r_z,
                          float *restrict pot) {
 
   /* Use the M2P kernel */
-  struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f};
+  struct reduced_grav_tensor l;
+  l.F_000 = 0.f;
+  l.F_100 = 0.f;
+  l.F_010 = 0.f;
+  l.F_001 = 0.f;
+
   gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/0, /*rs_inv=*/0.f, &l);
 
   /* Write back */
@@ -186,7 +186,12 @@ runner_iact_grav_pm_truncated(const float r_x, const float r_y, const float r_z,
                               float *restrict f_z, float *restrict pot) {
 
   /* Use the M2P kernel */
-  struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f};
+  struct reduced_grav_tensor l;
+  l.F_000 = 0.f;
+  l.F_100 = 0.f;
+  l.F_010 = 0.f;
+  l.F_001 = 0.f;
+
   gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/1, r_s_inv, &l);
 
   /* Write back */
diff --git a/src/gravity/MultiSoftening/gravity_part.h b/src/gravity/MultiSoftening/gravity_part.h
index 2d6d0d9bfbb18040faa282b63a0aa573b751a182..49594f342606734951a099e621d9fab64ed64bbd 100644
--- a/src/gravity/MultiSoftening/gravity_part.h
+++ b/src/gravity/MultiSoftening/gravity_part.h
@@ -43,6 +43,12 @@ struct gpart {
   /*! Particle mass. */
   float mass;
 
+  /*! Norm of the acceleration at the previous step. */
+  float old_a_grav_norm;
+
+  /*! Current co-moving spline softening of the particle */
+  float epsilon;
+
   /*! Particle FoF properties (group ID, group size, ...) */
   struct fof_gpart_data fof_data;
 
diff --git a/src/gravity/Potential/gravity.h b/src/gravity/Potential/gravity.h
index f9a9502a528c161fbc82b3028f303b7f9cad49f8..4ce1e73ef3cef94065aca3ece3fa344cc5231d70 100644
--- a/src/gravity/Potential/gravity.h
+++ b/src/gravity/Potential/gravity.h
@@ -23,7 +23,6 @@
 #include <float.h>
 
 /* Local includes. */
-#include "accumulate.h"
 #include "cosmology.h"
 #include "gravity_properties.h"
 #include "kernel_gravity.h"
@@ -64,7 +63,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening(
 __attribute__((always_inline)) INLINE static void
 gravity_add_comoving_potential(struct gpart* restrict gp, float pot) {
 
-  accumulate_add_f(&gp->potential, pot);
+  gp->potential += pot;
 }
 
 /**
@@ -195,6 +194,17 @@ __attribute__((always_inline)) INLINE static void gravity_end_force(
 #endif
 }
 
+/**
+ * @brief Update the #gpart after a drift step.
+ *
+ * This is typically used to update the softening lengths.
+ *
+ * @param gp The particle to act upon
+ * @param grav_props The global properties of the gravity calculation.
+ */
+__attribute__((always_inline)) INLINE static void gravity_predict_extra(
+    struct gpart* gp, const struct gravity_props* grav_props) {}
+
 /**
  * @brief Kick the additional variables
  *
diff --git a/src/gravity/Potential/gravity_iact.h b/src/gravity/Potential/gravity_iact.h
index 66e971eae861d5205fb1289a6fee8a09bd941031..feb0abecdb3ecd43a4b9f2d9152ddb0028939e8a 100644
--- a/src/gravity/Potential/gravity_iact.h
+++ b/src/gravity/Potential/gravity_iact.h
@@ -62,10 +62,8 @@ runner_iact_grav_pp_full(const float r2, const float h2, const float h_inv,
 
     const float r = r2 * r_inv;
     const float ui = r * h_inv;
-
-    float W_f_ij, W_pot_ij;
-    kernel_grav_force_eval(ui, &W_f_ij);
-    kernel_grav_pot_eval(ui, &W_pot_ij);
+    const float W_f_ij = kernel_grav_force_eval(ui);
+    const float W_pot_ij = kernel_grav_pot_eval(ui);
 
     /* Get softened gravity */
     *f_ij = mass * h_inv3 * W_f_ij;
@@ -109,10 +107,8 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv,
   } else {
 
     const float ui = r * h_inv;
-    float W_f_ij, W_pot_ij;
-
-    kernel_grav_force_eval(ui, &W_f_ij);
-    kernel_grav_pot_eval(ui, &W_pot_ij);
+    const float W_f_ij = kernel_grav_force_eval(ui);
+    const float W_pot_ij = kernel_grav_pot_eval(ui);
 
     /* Get softened gravity */
     *f_ij = mass * h_inv3 * W_f_ij;
@@ -122,8 +118,7 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv,
   /* Get long-range correction */
   const float u_lr = r * r_s_inv;
   float corr_f_lr, corr_pot_lr;
-  kernel_long_grav_force_eval(u_lr, &corr_f_lr);
-  kernel_long_grav_pot_eval(u_lr, &corr_pot_lr);
+  kernel_long_grav_eval(u_lr, &corr_f_lr, &corr_pot_lr);
   *f_ij *= corr_f_lr;
   *pot_ij *= corr_pot_lr;
 }
@@ -151,7 +146,12 @@ runner_iact_grav_pm_full(const float r_x, const float r_y, const float r_z,
                          float *restrict pot) {
 
   /* Use the M2P kernel */
-  struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f};
+  struct reduced_grav_tensor l;
+  l.F_000 = 0.f;
+  l.F_100 = 0.f;
+  l.F_010 = 0.f;
+  l.F_001 = 0.f;
+
   gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/0, /*rs_inv=*/0.f, &l);
 
   /* Write back */
@@ -186,7 +186,12 @@ runner_iact_grav_pm_truncated(const float r_x, const float r_y, const float r_z,
                               float *restrict f_z, float *restrict pot) {
 
   /* Use the M2P kernel */
-  struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f};
+  struct reduced_grav_tensor l;
+  l.F_000 = 0.f;
+  l.F_100 = 0.f;
+  l.F_010 = 0.f;
+  l.F_001 = 0.f;
+
   gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/1, r_s_inv, &l);
 
   /* Write back */
diff --git a/src/gravity_cache.h b/src/gravity_cache.h
index e96f1ada2109eb4fffea531de16ab4258faaa1de..75fb640457f21f26e2bd81b216692fb6fd52790e 100644
--- a/src/gravity_cache.h
+++ b/src/gravity_cache.h
@@ -27,7 +27,7 @@
 #include "align.h"
 #include "error.h"
 #include "gravity.h"
-#include "multipole.h"
+#include "multipole_accept.h"
 #include "vector.h"
 
 /**
@@ -154,8 +154,8 @@ static INLINE void gravity_cache_init(struct gravity_cache *c,
  * @param c The #gravity_cache to zero.
  * @param gcount_padded The padded size of the cache arrays.
  */
-__attribute__((always_inline)) INLINE static void gravity_cache_zero_output(
-    struct gravity_cache *c, const int gcount_padded) {
+INLINE static void gravity_cache_zero_output(struct gravity_cache *c,
+                                             const int gcount_padded) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (gcount_padded % VEC_SIZE != 0)
@@ -193,20 +193,18 @@ __attribute__((always_inline)) INLINE static void gravity_cache_zero_output(
  * multiple of the vector length.
  * @param shift A shift to apply to all the particles.
  * @param CoM The position of the multipole.
- * @param r_max2 The square of the multipole radius.
+ * @param multipole The mulipole to check for.
  * @param cell The cell we play with (to get reasonable padding positions).
  * @param grav_props The global gravity properties.
  */
-__attribute__((always_inline)) INLINE static void gravity_cache_populate(
+INLINE static void gravity_cache_populate(
     const timebin_t max_active_bin, const int allow_mpole, const int periodic,
     const float dim[3], struct gravity_cache *c,
     const struct gpart *restrict gparts, const int gcount,
     const int gcount_padded, const double shift[3], const float CoM[3],
-    const float r_max2, const struct cell *cell,
+    const struct gravity_tensors *multipole, const struct cell *cell,
     const struct gravity_props *grav_props) {
 
-  const float theta_crit2 = grav_props->theta_crit2;
-
 #ifdef SWIFT_DEBUG_CHECKS
   if (gcount_padded < gcount) error("Invalid padded cache size. Too small.");
   if (gcount_padded % VEC_SIZE != 0)
@@ -227,6 +225,9 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate(
   swift_assume_size(gcount_padded, VEC_SIZE);
 
   /* Fill the input caches */
+#ifndef SWIFT_DEBUG_CHECKS
+#pragma omp simd
+#endif
   for (int i = 0; i < gcount; ++i) {
 
     x[i] = (float)(gparts[i].x[0] - shift[0]);
@@ -257,8 +258,8 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate(
     const float r2 = dx * dx + dy * dy + dz * dz;
 
     /* Check whether we can use the multipole instead of P-P */
-    use_mpole[i] =
-        allow_mpole && gravity_M2P_accept(r_max2, theta_crit2, r2, epsilon[i]);
+    use_mpole[i] = allow_mpole && gravity_M2P_accept(grav_props, &gparts[i],
+                                                     multipole, r2, periodic);
   }
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -300,13 +301,11 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate(
  * @param cell The cell we play with (to get reasonable padding positions).
  * @param grav_props The global gravity properties.
  */
-__attribute__((always_inline)) INLINE static void
-gravity_cache_populate_no_mpole(const timebin_t max_active_bin,
-                                struct gravity_cache *c,
-                                const struct gpart *restrict gparts,
-                                const int gcount, const int gcount_padded,
-                                const double shift[3], const struct cell *cell,
-                                const struct gravity_props *grav_props) {
+INLINE static void gravity_cache_populate_no_mpole(
+    const timebin_t max_active_bin, struct gravity_cache *c,
+    const struct gpart *restrict gparts, const int gcount,
+    const int gcount_padded, const double shift[3], const struct cell *cell,
+    const struct gravity_props *grav_props) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (gcount_padded < gcount) error("Invalid padded cache size. Too small.");
@@ -381,18 +380,15 @@ gravity_cache_populate_no_mpole(const timebin_t max_active_bin,
  * multiple of the vector length.
  * @param cell The cell we play with (to get reasonable padding positions).
  * @param CoM The position of the multipole.
- * @param r_max2 The square of the multipole radius.
+ * @param multipole The mulipole to check for.
  * @param grav_props The global gravity properties.
  */
-__attribute__((always_inline)) INLINE static void
-gravity_cache_populate_all_mpole(const timebin_t max_active_bin,
-                                 const int periodic, const float dim[3],
-                                 struct gravity_cache *c,
-                                 const struct gpart *restrict gparts,
-                                 const int gcount, const int gcount_padded,
-                                 const struct cell *cell, const float CoM[3],
-                                 const float r_max2,
-                                 const struct gravity_props *grav_props) {
+INLINE static void gravity_cache_populate_all_mpole(
+    const timebin_t max_active_bin, const int periodic, const float dim[3],
+    struct gravity_cache *c, const struct gpart *restrict gparts,
+    const int gcount, const int gcount_padded, const struct cell *cell,
+    const float CoM[3], const struct gravity_tensors *multipole,
+    const struct gravity_props *grav_props) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (gcount_padded < gcount) error("Invalid padded cache size. Too small.");
@@ -400,8 +396,6 @@ gravity_cache_populate_all_mpole(const timebin_t max_active_bin,
     error("Padded gravity cache size invalid. Not a multiple of SIMD length.");
   if (c->count < gcount_padded)
     error("Size of the gravity cache is not large enough.");
-
-  const float theta_crit2 = grav_props->theta_crit2;
 #endif
 
   /* Make the compiler understand we are in happy vectorization land */
@@ -439,7 +433,7 @@ gravity_cache_populate_all_mpole(const timebin_t max_active_bin,
     }
     const float r2 = dx * dx + dy * dy + dz * dz;
 
-    if (!gravity_M2P_accept(r_max2, theta_crit2, r2, epsilon[i]))
+    if (!gravity_M2P_accept(grav_props, &gparts[i], multipole, r2, periodic))
       error("Using m-pole where the test fails");
 #endif
   }
@@ -479,9 +473,9 @@ gravity_cache_populate_all_mpole(const timebin_t max_active_bin,
  * @param gparts The #gpart array to write to.
  * @param gcount The number of particles to write.
  */
-__attribute__((always_inline)) INLINE static void gravity_cache_write_back(
-    const struct gravity_cache *c, struct gpart *restrict gparts,
-    const int gcount) {
+INLINE static void gravity_cache_write_back(const struct gravity_cache *c,
+                                            struct gpart *restrict gparts,
+                                            const int gcount) {
 
   /* Make the compiler understand we are in happy vectorization land */
   swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT);
@@ -491,11 +485,14 @@ __attribute__((always_inline)) INLINE static void gravity_cache_write_back(
   swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
 
   /* Write stuff back to the particles */
+#ifndef SWIFT_DEBUG_CHECKS
+#pragma omp simd
+#endif
   for (int i = 0; i < gcount; ++i) {
     if (active[i]) {
-      accumulate_add_f(&gparts[i].a_grav[0], a_x[i]);
-      accumulate_add_f(&gparts[i].a_grav[1], a_y[i]);
-      accumulate_add_f(&gparts[i].a_grav[2], a_z[i]);
+      gparts[i].a_grav[0] += a_x[i];
+      gparts[i].a_grav[1] += a_y[i];
+      gparts[i].a_grav[2] += a_z[i];
       gravity_add_comoving_potential(&gparts[i], pot[i]);
     }
   }
diff --git a/src/gravity_derivatives.h b/src/gravity_derivatives.h
index 86b4a6c3de4db3c63c64d49a708a4d8ac19a5d93..b14ecc4e738b8c3331f7ad0039e23cb2623a2d2e 100644
--- a/src/gravity_derivatives.h
+++ b/src/gravity_derivatives.h
@@ -220,53 +220,79 @@ potential_derivatives_compute_M2L(const float r_x, const float r_y,
                                   const int periodic, const float r_s_inv,
                                   struct potential_derivatives_M2L *pot) {
 
-#ifdef SWIFT_DEBUG_CHECKS
-  if (r2 < 0.99f * eps * eps)
-    error("Computing M2L derivatives below softening length");
+  float Dt_1;
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+  float Dt_2;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+  float Dt_3;
 #endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+  float Dt_4;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+  float Dt_5;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+  float Dt_6;
+#endif
+
+  /* Softened case */
+  if (r2 < eps * eps) {
 
-  float Dt_1 = 0.f;
+    const float eps_inv = 1.f / eps;
+    const float r = r2 * r_inv;
+    const float u = r * eps_inv;
+
+    Dt_1 = eps_inv * D_soft_1(u);
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
-  float Dt_3 = 0.f;
+    const float eps_inv2 = eps_inv * eps_inv;
+    Dt_2 = eps_inv2 * D_soft_2(u);
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
-  float Dt_5 = 0.f;
+    const float eps_inv3 = eps_inv2 * eps_inv;
+    Dt_3 = eps_inv3 * D_soft_3(u);
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
-  float Dt_7 = 0.f;
+    const float eps_inv4 = eps_inv3 * eps_inv;
+    Dt_4 = eps_inv4 * D_soft_4(u);
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
-  float Dt_9 = 0.f;
+    const float eps_inv5 = eps_inv4 * eps_inv;
+    Dt_5 = eps_inv5 * D_soft_5(u);
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 4
-  float Dt_11 = 0.f;
+    const float eps_inv6 = eps_inv5 * eps_inv;
+    Dt_6 = eps_inv6 * D_soft_6(u);
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
+#error "Missing implementation for order >5"
 #endif
 
-  /* Un-truncated case (Newtonian potential) */
-  if (!periodic) {
+    /* Un-truncated un-softened case (Newtonian potential) */
+  } else if (!periodic) {
 
-    Dt_1 = r_inv;
-#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
-    const float r_inv2 = r_inv * r_inv;
-    Dt_3 = -1.f * Dt_1 * r_inv2; /* -1 / r^3 */
+    Dt_1 = r_inv; /* 1 / r */
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+    Dt_2 = -1.f * Dt_1 * r_inv; /* -1 / r^2 */
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
-    Dt_5 = -3.f * Dt_3 * r_inv2; /* 3 / r^5 */
+    Dt_3 = -3.f * Dt_2 * r_inv; /* 3 / r^3 */
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
-    Dt_7 = -5.f * Dt_5 * r_inv2; /* -15 / r^7 */
+    Dt_4 = -5.f * Dt_3 * r_inv; /* -15 / r^4 */
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
-    Dt_9 = -7.f * Dt_7 * r_inv2; /* 105 / r^9 */
+    Dt_5 = -7.f * Dt_4 * r_inv; /* 105 / r^5 */
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 4
-    Dt_11 = -9.f * Dt_9 * r_inv2; /* -945 / r^11 */
+    Dt_6 = -9.f * Dt_5 * r_inv; /* -945 / r^6 */
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 5
 #error "Missing implementation for order >5"
 #endif
 
-    /* Truncated case */
+    /* Truncated case (long-range) */
   } else {
 
     /* Get the derivatives of the truncated potential */
@@ -275,37 +301,57 @@ potential_derivatives_compute_M2L(const float r_x, const float r_y,
     kernel_long_grav_derivatives(r, r_s_inv, &derivs);
 
     Dt_1 = derivs.chi_0 * r_inv;
+
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
-    const float r_inv2 = r_inv * r_inv;
-    const float r_inv3 = r_inv2 * r_inv;
-    Dt_3 = (r * derivs.chi_1 - derivs.chi_0) * r_inv3;
+
+    /* -chi^0 r_i^2 + chi^1 r_i^1 */
+    Dt_2 = derivs.chi_1 - derivs.chi_0 * r_inv;
+    Dt_2 = Dt_2 * r_inv;
+
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
-    const float r_inv5 = r_inv2 * r_inv3;
-    Dt_5 =
-        (r * r * derivs.chi_2 - 3.f * r * derivs.chi_1 + 3.f * derivs.chi_0) *
-        r_inv5;
+
+    /* 3chi^0 r_i^3 - 3 chi^1 r_i^2 + chi^2 r_i^1 */
+    Dt_3 = derivs.chi_0 * r_inv - derivs.chi_1;
+    Dt_3 = Dt_3 * 3.f;
+    Dt_3 = Dt_3 * r_inv + derivs.chi_2;
+    Dt_3 = Dt_3 * r_inv;
+
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
-    const float r_inv7 = r_inv2 * r_inv5;
-    Dt_7 = (r * r * r * derivs.chi_3 - 6.f * r * r * derivs.chi_2 +
-            15.f * r * derivs.chi_1 - 15.f * derivs.chi_0) *
-           r_inv7;
+
+    /* -15chi^0 r_i^4 + 15 chi^1 r_i^3 - 6 chi^2 r_i^2  + chi^3 r_i^1 */
+    Dt_4 = -derivs.chi_0 * r_inv + derivs.chi_1;
+    Dt_4 = Dt_4 * 15.f;
+    Dt_4 = Dt_4 * r_inv - 6.f * derivs.chi_2;
+    Dt_4 = Dt_4 * r_inv + derivs.chi_3;
+    Dt_4 = Dt_4 * r_inv;
+
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
-    const float r_inv9 = r_inv2 * r_inv7;
-    Dt_9 = (r * r * r * r * derivs.chi_4 - 10.f * r * r * r * derivs.chi_3 +
-            45.f * r * r * derivs.chi_2 - 105.f * r * derivs.chi_1 +
-            105.f * derivs.chi_0) *
-           r_inv9;
+
+    /* 105chi^0 r_i^5 - 105 chi^1 r_i^4 + 45 chi^2 r_i^3 - 10 chi^3 r_i^2 +
+     * chi^4 r_i^1 */
+    Dt_5 = derivs.chi_0 * r_inv - derivs.chi_1;
+    Dt_5 = Dt_5 * 105.f;
+    Dt_5 = Dt_5 * r_inv + 45.f * derivs.chi_2;
+    Dt_5 = Dt_5 * r_inv - 10.f * derivs.chi_3;
+    Dt_5 = Dt_5 * r_inv + derivs.chi_4;
+    Dt_5 = Dt_5 * r_inv;
+
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 4
-    const float r_inv11 = r_inv2 * r_inv9;
-    Dt_11 = (r * r * r * r * r * derivs.chi_5 -
-             15.f * r * r * r * r * derivs.chi_4 +
-             105.f * r * r * r * derivs.chi_3 - 420.f * r * r * derivs.chi_2 +
-             945.f * r * derivs.chi_1 - 945.f * derivs.chi_0) *
-            r_inv11;
+
+    /* -945chi^0 r_i^6 + 945 chi^1 r_i^5 - 420 chi^2 r_i^4 + 105 chi^3 r_i^3 -
+     * 15 chi^4 r_i^2 + chi^5 r_i^1 */
+    Dt_6 = -derivs.chi_0 * r_inv + derivs.chi_1;
+    Dt_6 = Dt_6 * 945.f;
+    Dt_6 = Dt_6 * r_inv - 420.f * derivs.chi_2;
+    Dt_6 = Dt_6 * r_inv + 105.f * derivs.chi_3;
+    Dt_6 = Dt_6 * r_inv - 15.f * derivs.chi_4;
+    Dt_6 = Dt_6 * r_inv + derivs.chi_5;
+    Dt_6 = Dt_6 * r_inv;
+
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 5
 #error "Missing implementation for order >5"
@@ -314,112 +360,135 @@ potential_derivatives_compute_M2L(const float r_x, const float r_y,
 
 /* Alright, let's get the full terms */
 
-/* Compute some powers of r_x, r_y and r_z */
+    /* Compute some powers of (r_x / r), (r_y / r) and (r_z / r) */
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+  const float rx_r = r_x * r_inv;
+  const float ry_r = r_y * r_inv;
+  const float rz_r = r_z * r_inv;
+#endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
-  const float r_x2 = r_x * r_x;
-  const float r_y2 = r_y * r_y;
-  const float r_z2 = r_z * r_z;
+  const float rx_r2 = rx_r * rx_r;
+  const float ry_r2 = ry_r * ry_r;
+  const float rz_r2 = rz_r * rz_r;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
-  const float r_x3 = r_x2 * r_x;
-  const float r_y3 = r_y2 * r_y;
-  const float r_z3 = r_z2 * r_z;
+  const float rx_r3 = rx_r2 * rx_r;
+  const float ry_r3 = ry_r2 * ry_r;
+  const float rz_r3 = rz_r2 * rz_r;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
-  const float r_x4 = r_x3 * r_x;
-  const float r_y4 = r_y3 * r_y;
-  const float r_z4 = r_z3 * r_z;
+  const float rx_r4 = rx_r3 * rx_r;
+  const float ry_r4 = ry_r3 * ry_r;
+  const float rz_r4 = rz_r3 * rz_r;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 4
-  const float r_x5 = r_x4 * r_x;
-  const float r_y5 = r_y4 * r_y;
-  const float r_z5 = r_z4 * r_z;
-#endif
-#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
-#error "Missing implementation for order >5"
+  const float rx_r5 = rx_r4 * rx_r;
+  const float ry_r5 = ry_r4 * ry_r;
+  const float rz_r5 = rz_r4 * rz_r;
 #endif
 
   /* Get the 0th order term */
   pot->D_000 = Dt_1;
 
-#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
   /* 1st order derivatives */
-  pot->D_100 = r_x * Dt_3;
-  pot->D_010 = r_y * Dt_3;
-  pot->D_001 = r_z * Dt_3;
+  pot->D_100 = rx_r * Dt_2;
+  pot->D_010 = ry_r * Dt_2;
+  pot->D_001 = rz_r * Dt_2;
 #endif
+
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+
+  Dt_2 *= r_inv;
+
   /* 2nd order derivatives */
-  pot->D_200 = r_x2 * Dt_5 + Dt_3;
-  pot->D_020 = r_y2 * Dt_5 + Dt_3;
-  pot->D_002 = r_z2 * Dt_5 + Dt_3;
-  pot->D_110 = r_x * r_y * Dt_5;
-  pot->D_101 = r_x * r_z * Dt_5;
-  pot->D_011 = r_y * r_z * Dt_5;
+  pot->D_200 = rx_r2 * Dt_3 + Dt_2;
+  pot->D_020 = ry_r2 * Dt_3 + Dt_2;
+  pot->D_002 = rz_r2 * Dt_3 + Dt_2;
+  pot->D_110 = rx_r * ry_r * Dt_3;
+  pot->D_101 = rx_r * rz_r * Dt_3;
+  pot->D_011 = ry_r * rz_r * Dt_3;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+
+  Dt_3 *= r_inv;
+
   /* 3rd order derivatives */
-  pot->D_300 = r_x3 * Dt_7 + 3.f * r_x * Dt_5;
-  pot->D_030 = r_y3 * Dt_7 + 3.f * r_y * Dt_5;
-  pot->D_003 = r_z3 * Dt_7 + 3.f * r_z * Dt_5;
-  pot->D_210 = r_x2 * r_y * Dt_7 + r_y * Dt_5;
-  pot->D_201 = r_x2 * r_z * Dt_7 + r_z * Dt_5;
-  pot->D_120 = r_y2 * r_x * Dt_7 + r_x * Dt_5;
-  pot->D_021 = r_y2 * r_z * Dt_7 + r_z * Dt_5;
-  pot->D_102 = r_z2 * r_x * Dt_7 + r_x * Dt_5;
-  pot->D_012 = r_z2 * r_y * Dt_7 + r_y * Dt_5;
-  pot->D_111 = r_x * r_y * r_z * Dt_7;
+  pot->D_300 = rx_r3 * Dt_4 + 3.f * rx_r * Dt_3;
+  pot->D_030 = ry_r3 * Dt_4 + 3.f * ry_r * Dt_3;
+  pot->D_003 = rz_r3 * Dt_4 + 3.f * rz_r * Dt_3;
+  pot->D_210 = rx_r2 * ry_r * Dt_4 + ry_r * Dt_3;
+  pot->D_201 = rx_r2 * rz_r * Dt_4 + rz_r * Dt_3;
+  pot->D_120 = ry_r2 * rx_r * Dt_4 + rx_r * Dt_3;
+  pot->D_021 = ry_r2 * rz_r * Dt_4 + rz_r * Dt_3;
+  pot->D_102 = rz_r2 * rx_r * Dt_4 + rx_r * Dt_3;
+  pot->D_012 = rz_r2 * ry_r * Dt_4 + ry_r * Dt_3;
+  pot->D_111 = rx_r * ry_r * rz_r * Dt_4;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+
+  Dt_3 *= r_inv;
+  Dt_4 *= r_inv;
+
   /* 4th order derivatives */
-  pot->D_400 = r_x4 * Dt_9 + 6.f * r_x2 * Dt_7 + 3.f * Dt_5;
-  pot->D_040 = r_y4 * Dt_9 + 6.f * r_y2 * Dt_7 + 3.f * Dt_5;
-  pot->D_004 = r_z4 * Dt_9 + 6.f * r_z2 * Dt_7 + 3.f * Dt_5;
-  pot->D_310 = r_x3 * r_y * Dt_9 + 3.f * r_x * r_y * Dt_7;
-  pot->D_301 = r_x3 * r_z * Dt_9 + 3.f * r_x * r_z * Dt_7;
-  pot->D_130 = r_y3 * r_x * Dt_9 + 3.f * r_y * r_x * Dt_7;
-  pot->D_031 = r_y3 * r_z * Dt_9 + 3.f * r_y * r_z * Dt_7;
-  pot->D_103 = r_z3 * r_x * Dt_9 + 3.f * r_z * r_x * Dt_7;
-  pot->D_013 = r_z3 * r_y * Dt_9 + 3.f * r_z * r_y * Dt_7;
-  pot->D_220 = r_x2 * r_y2 * Dt_9 + r_x2 * Dt_7 + r_y2 * Dt_7 + Dt_5;
-  pot->D_202 = r_x2 * r_z2 * Dt_9 + r_x2 * Dt_7 + r_z2 * Dt_7 + Dt_5;
-  pot->D_022 = r_y2 * r_z2 * Dt_9 + r_y2 * Dt_7 + r_z2 * Dt_7 + Dt_5;
-  pot->D_211 = r_x2 * r_y * r_z * Dt_9 + r_y * r_z * Dt_7;
-  pot->D_121 = r_y2 * r_x * r_z * Dt_9 + r_x * r_z * Dt_7;
-  pot->D_112 = r_z2 * r_x * r_y * Dt_9 + r_x * r_y * Dt_7;
+  pot->D_400 = rx_r4 * Dt_5 + 6.f * rx_r2 * Dt_4 + 3.f * Dt_3;
+  pot->D_040 = ry_r4 * Dt_5 + 6.f * ry_r2 * Dt_4 + 3.f * Dt_3;
+  pot->D_004 = rz_r4 * Dt_5 + 6.f * rz_r2 * Dt_4 + 3.f * Dt_3;
+  pot->D_310 = rx_r3 * ry_r * Dt_5 + 3.f * rx_r * ry_r * Dt_4;
+  pot->D_301 = rx_r3 * rz_r * Dt_5 + 3.f * rx_r * rz_r * Dt_4;
+  pot->D_130 = ry_r3 * rx_r * Dt_5 + 3.f * ry_r * rx_r * Dt_4;
+  pot->D_031 = ry_r3 * rz_r * Dt_5 + 3.f * ry_r * rz_r * Dt_4;
+  pot->D_103 = rz_r3 * rx_r * Dt_5 + 3.f * rz_r * rx_r * Dt_4;
+  pot->D_013 = rz_r3 * ry_r * Dt_5 + 3.f * rz_r * ry_r * Dt_4;
+  pot->D_220 = rx_r2 * ry_r2 * Dt_5 + rx_r2 * Dt_4 + ry_r2 * Dt_4 + Dt_3;
+  pot->D_202 = rx_r2 * rz_r2 * Dt_5 + rx_r2 * Dt_4 + rz_r2 * Dt_4 + Dt_3;
+  pot->D_022 = ry_r2 * rz_r2 * Dt_5 + ry_r2 * Dt_4 + rz_r2 * Dt_4 + Dt_3;
+  pot->D_211 = rx_r2 * ry_r * rz_r * Dt_5 + ry_r * rz_r * Dt_4;
+  pot->D_121 = ry_r2 * rx_r * rz_r * Dt_5 + rx_r * rz_r * Dt_4;
+  pot->D_112 = rz_r2 * rx_r * ry_r * Dt_5 + rx_r * ry_r * Dt_4;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+
+  Dt_4 *= r_inv;
+  Dt_5 *= r_inv;
+
   /* 5th order derivatives */
-  pot->D_500 = r_x5 * Dt_11 + 10.f * r_x3 * Dt_9 + 15.f * r_x * Dt_7;
-  pot->D_050 = r_y5 * Dt_11 + 10.f * r_y3 * Dt_9 + 15.f * r_y * Dt_7;
-  pot->D_005 = r_z5 * Dt_11 + 10.f * r_z3 * Dt_9 + 15.f * r_z * Dt_7;
-  pot->D_410 = r_x4 * r_y * Dt_11 + 6.f * r_x2 * r_y * Dt_9 + 3.f * r_y * Dt_7;
-  pot->D_401 = r_x4 * r_z * Dt_11 + 6.f * r_x2 * r_z * Dt_9 + 3.f * r_z * Dt_7;
-  pot->D_140 = r_y4 * r_x * Dt_11 + 6.f * r_y2 * r_x * Dt_9 + 3.f * r_x * Dt_7;
-  pot->D_041 = r_y4 * r_z * Dt_11 + 6.f * r_y2 * r_z * Dt_9 + 3.f * r_z * Dt_7;
-  pot->D_104 = r_z4 * r_x * Dt_11 + 6.f * r_z2 * r_x * Dt_9 + 3.f * r_x * Dt_7;
-  pot->D_014 = r_z4 * r_y * Dt_11 + 6.f * r_z2 * r_y * Dt_9 + 3.f * r_y * Dt_7;
-  pot->D_320 = r_x3 * r_y2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_y2 * Dt_9 +
-               3.f * r_x * Dt_7;
-  pot->D_302 = r_x3 * r_z2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_z2 * Dt_9 +
-               3.f * r_x * Dt_7;
-  pot->D_230 = r_y3 * r_x2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_x2 * Dt_9 +
-               3.f * r_y * Dt_7;
-  pot->D_032 = r_y3 * r_z2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_z2 * Dt_9 +
-               3.f * r_y * Dt_7;
-  pot->D_203 = r_z3 * r_x2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_x2 * Dt_9 +
-               3.f * r_z * Dt_7;
-  pot->D_023 = r_z3 * r_y2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_y2 * Dt_9 +
-               3.f * r_z * Dt_7;
-  pot->D_311 = r_x3 * r_y * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9;
-  pot->D_131 = r_y3 * r_x * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9;
-  pot->D_113 = r_z3 * r_x * r_y * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9;
-  pot->D_122 = r_x * r_y2 * r_z2 * Dt_11 + r_x * r_y2 * Dt_9 +
-               r_x * r_z2 * Dt_9 + r_x * Dt_7;
-  pot->D_212 = r_y * r_x2 * r_z2 * Dt_11 + r_y * r_x2 * Dt_9 +
-               r_y * r_z2 * Dt_9 + r_y * Dt_7;
-  pot->D_221 = r_z * r_x2 * r_y2 * Dt_11 + r_z * r_x2 * Dt_9 +
-               r_z * r_y2 * Dt_9 + r_z * Dt_7;
+  pot->D_500 = rx_r5 * Dt_6 + 10.f * rx_r3 * Dt_5 + 15.f * rx_r * Dt_4;
+  pot->D_050 = ry_r5 * Dt_6 + 10.f * ry_r3 * Dt_5 + 15.f * ry_r * Dt_4;
+  pot->D_005 = rz_r5 * Dt_6 + 10.f * rz_r3 * Dt_5 + 15.f * rz_r * Dt_4;
+  pot->D_410 =
+      rx_r4 * ry_r * Dt_6 + 6.f * rx_r2 * ry_r * Dt_5 + 3.f * ry_r * Dt_4;
+  pot->D_401 =
+      rx_r4 * rz_r * Dt_6 + 6.f * rx_r2 * rz_r * Dt_5 + 3.f * rz_r * Dt_4;
+  pot->D_140 =
+      ry_r4 * rx_r * Dt_6 + 6.f * ry_r2 * rx_r * Dt_5 + 3.f * rx_r * Dt_4;
+  pot->D_041 =
+      ry_r4 * rz_r * Dt_6 + 6.f * ry_r2 * rz_r * Dt_5 + 3.f * rz_r * Dt_4;
+  pot->D_104 =
+      rz_r4 * rx_r * Dt_6 + 6.f * rz_r2 * rx_r * Dt_5 + 3.f * rx_r * Dt_4;
+  pot->D_014 =
+      rz_r4 * ry_r * Dt_6 + 6.f * rz_r2 * ry_r * Dt_5 + 3.f * ry_r * Dt_4;
+  pot->D_320 = rx_r3 * ry_r2 * Dt_6 + rx_r3 * Dt_5 + 3.f * rx_r * ry_r2 * Dt_5 +
+               3.f * rx_r * Dt_4;
+  pot->D_302 = rx_r3 * rz_r2 * Dt_6 + rx_r3 * Dt_5 + 3.f * rx_r * rz_r2 * Dt_5 +
+               3.f * rx_r * Dt_4;
+  pot->D_230 = ry_r3 * rx_r2 * Dt_6 + ry_r3 * Dt_5 + 3.f * ry_r * rx_r2 * Dt_5 +
+               3.f * ry_r * Dt_4;
+  pot->D_032 = ry_r3 * rz_r2 * Dt_6 + ry_r3 * Dt_5 + 3.f * ry_r * rz_r2 * Dt_5 +
+               3.f * ry_r * Dt_4;
+  pot->D_203 = rz_r3 * rx_r2 * Dt_6 + rz_r3 * Dt_5 + 3.f * rz_r * rx_r2 * Dt_5 +
+               3.f * rz_r * Dt_4;
+  pot->D_023 = rz_r3 * ry_r2 * Dt_6 + rz_r3 * Dt_5 + 3.f * rz_r * ry_r2 * Dt_5 +
+               3.f * rz_r * Dt_4;
+  pot->D_311 = rx_r3 * ry_r * rz_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5;
+  pot->D_131 = ry_r3 * rx_r * rz_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5;
+  pot->D_113 = rz_r3 * rx_r * ry_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5;
+  pot->D_122 = rx_r * ry_r2 * rz_r2 * Dt_6 + rx_r * ry_r2 * Dt_5 +
+               rx_r * rz_r2 * Dt_5 + rx_r * Dt_4;
+  pot->D_212 = ry_r * rx_r2 * rz_r2 * Dt_6 + ry_r * rx_r2 * Dt_5 +
+               ry_r * rz_r2 * Dt_5 + ry_r * Dt_4;
+  pot->D_221 = rz_r * rx_r2 * ry_r2 * Dt_6 + rz_r * rx_r2 * Dt_5 +
+               rz_r * ry_r2 * Dt_5 + rz_r * Dt_4;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 5
 #error "Missing implementation for orders >5"
@@ -450,46 +519,68 @@ potential_derivatives_compute_M2P(const float r_x, const float r_y,
                                   const int periodic, const float r_s_inv,
                                   struct potential_derivatives_M2P *pot) {
 
-#ifdef SWIFT_DEBUG_CHECKS
-  if (r2 < 0.99f * eps * eps)
-    error("Computing M2P derivatives below softening length");
-#endif
-
   float Dt_1;
-  float Dt_3;
+  float Dt_2;
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+  float Dt_3;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+  float Dt_4;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
   float Dt_5;
 #endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+  float Dt_6;
+#endif
+
+  /* Softened case */
+  if (r2 < eps * eps) {
+
+    const float eps_inv = 1.f / eps;
+    const float r = r2 * r_inv;
+    const float u = r * eps_inv;
+
+    Dt_1 = eps_inv * D_soft_1(u);
+
+    const float eps_inv2 = eps_inv * eps_inv;
+    Dt_2 = eps_inv2 * D_soft_2(u);
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+    const float eps_inv3 = eps_inv2 * eps_inv;
+    Dt_3 = eps_inv3 * D_soft_3(u);
+#endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
-  float Dt_7;
+    const float eps_inv4 = eps_inv3 * eps_inv;
+    Dt_4 = eps_inv4 * D_soft_4(u);
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
-  float Dt_9;
+    const float eps_inv5 = eps_inv4 * eps_inv;
+    Dt_5 = eps_inv5 * D_soft_5(u);
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
-  float Dt_11;
+    const float eps_inv6 = eps_inv5 * eps_inv;
+    Dt_6 = eps_inv6 * D_soft_6(u);
 #endif
 
-  /* Un-truncated case (Newtonian potential) */
-  if (!periodic) {
+    /* Un-truncated un-softened case (Newtonian potential) */
+  } else if (!periodic) {
 
-    const float r_inv2 = r_inv * r_inv;
-    Dt_1 = r_inv;
-    Dt_3 = -1.f * Dt_1 * r_inv2; /* -1 / r^3 */
+    Dt_1 = r_inv;               /* 1 / r */
+    Dt_2 = -1.f * Dt_1 * r_inv; /* -1 / r^2 */
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
-    Dt_5 = -3.f * Dt_3 * r_inv2; /* 3 / r^5 */
+    Dt_3 = -3.f * Dt_2 * r_inv; /* 3 / r^3 */
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
-    Dt_7 = -5.f * Dt_5 * r_inv2; /* -15 / r^7 */
+    Dt_4 = -5.f * Dt_3 * r_inv; /* -15 / r^4 */
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
-    Dt_9 = -7.f * Dt_7 * r_inv2; /* 105 / r^9 */
+    Dt_5 = -7.f * Dt_4 * r_inv; /* 105 / r^5 */
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
-    Dt_11 = -9.f * Dt_9 * r_inv2; /* -945 / r^11 */
+    Dt_6 = -9.f * Dt_5 * r_inv; /* -945 / r^6 */
 #endif
 
-    /* Truncated case */
+    /* Truncated case (long-range) */
   } else {
 
     /* Get the derivatives of the truncated potential */
@@ -497,143 +588,186 @@ potential_derivatives_compute_M2P(const float r_x, const float r_y,
     struct chi_derivatives derivs;
     kernel_long_grav_derivatives(r, r_s_inv, &derivs);
 
-    const float r_inv2 = r_inv * r_inv;
-    const float r_inv3 = r_inv2 * r_inv;
     Dt_1 = derivs.chi_0 * r_inv;
-    Dt_3 = (r * derivs.chi_1 - derivs.chi_0) * r_inv3;
+
+    /* -chi^0 r_i^2 + chi^1 r_i^1 */
+    Dt_2 = derivs.chi_1 - derivs.chi_0 * r_inv;
+    Dt_2 = Dt_2 * r_inv;
+
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
-    const float r_inv5 = r_inv2 * r_inv3;
-    Dt_5 =
-        (r * r * derivs.chi_2 - 3.f * r * derivs.chi_1 + 3.f * derivs.chi_0) *
-        r_inv5;
+
+    /* 3chi^0 r_i^3 - 3 chi^1 r_i^2 + chi^2 r_i^1 */
+    Dt_3 = derivs.chi_0 * r_inv - derivs.chi_1;
+    Dt_3 = Dt_3 * 3.f;
+    Dt_3 = Dt_3 * r_inv + derivs.chi_2;
+    Dt_3 = Dt_3 * r_inv;
+
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
-    const float r_inv7 = r_inv2 * r_inv5;
-    Dt_7 = (r * r * r * derivs.chi_3 - 6.f * r * r * derivs.chi_2 +
-            15.f * r * derivs.chi_1 - 15.f * derivs.chi_0) *
-           r_inv7;
+
+    /* -15chi^0 r_i^4 + 15 chi^1 r_i^3 - 6 chi^2 r_i^2  + chi^3 r_i^1 */
+    Dt_4 = -derivs.chi_0 * r_inv + derivs.chi_1;
+    Dt_4 = Dt_4 * 15.f;
+    Dt_4 = Dt_4 * r_inv - 6.f * derivs.chi_2;
+    Dt_4 = Dt_4 * r_inv + derivs.chi_3;
+    Dt_4 = Dt_4 * r_inv;
+
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
-    const float r_inv9 = r_inv2 * r_inv7;
-    Dt_9 = (r * r * r * r * derivs.chi_4 - 10.f * r * r * r * derivs.chi_3 +
-            45.f * r * r * derivs.chi_2 - 105.f * r * derivs.chi_1 +
-            105.f * derivs.chi_0) *
-           r_inv9;
+
+    /* 105chi^0 r_i^5 - 105 chi^1 r_i^4 + 45 chi^2 r_i^3 - 10 chi^3 r_i^2 +
+     * chi^4 r_i^1 */
+    Dt_5 = derivs.chi_0 * r_inv - derivs.chi_1;
+    Dt_5 = Dt_5 * 105.f;
+    Dt_5 = Dt_5 * r_inv + 45.f * derivs.chi_2;
+    Dt_5 = Dt_5 * r_inv - 10.f * derivs.chi_3;
+    Dt_5 = Dt_5 * r_inv + derivs.chi_4;
+    Dt_5 = Dt_5 * r_inv;
+
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
-    const float r_inv11 = r_inv2 * r_inv9;
-    Dt_11 = (r * r * r * r * r * derivs.chi_5 -
-             15.f * r * r * r * r * derivs.chi_4 +
-             105.f * r * r * r * derivs.chi_3 - 420.f * r * r * derivs.chi_2 +
-             945.f * r * derivs.chi_1 - 945.f * derivs.chi_0) *
-            r_inv11;
+
+    /* -945chi^0 r_i^6 + 945 chi^1 r_i^5 - 420 chi^2 r_i^4 + 105 chi^3 r_i^3 -
+     * 15 chi^4 r_i^2 + chi^5 r_i^1 */
+    Dt_6 = -derivs.chi_0 * r_inv + derivs.chi_1;
+    Dt_6 = Dt_6 * 945.f;
+    Dt_6 = Dt_6 * r_inv - 420.f * derivs.chi_2;
+    Dt_6 = Dt_6 * r_inv + 105.f * derivs.chi_3;
+    Dt_6 = Dt_6 * r_inv - 15.f * derivs.chi_4;
+    Dt_6 = Dt_6 * r_inv + derivs.chi_5;
+    Dt_6 = Dt_6 * r_inv;
+
 #endif
   }
 
-/* Alright, let's get the full terms */
+  /* Alright, let's get the full terms */
+
+  /* Compute some powers of (r_x / r), (r_y / r) and (r_z / r) */
+  const float rx_r = r_x * r_inv;
+  const float ry_r = r_y * r_inv;
+  const float rz_r = r_z * r_inv;
 
-/* Compute some powers of r_x, r_y and r_z */
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
-  const float r_x2 = r_x * r_x;
-  const float r_y2 = r_y * r_y;
-  const float r_z2 = r_z * r_z;
+  const float rx_r2 = rx_r * rx_r;
+  const float ry_r2 = ry_r * ry_r;
+  const float rz_r2 = rz_r * rz_r;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
-  const float r_x3 = r_x2 * r_x;
-  const float r_y3 = r_y2 * r_y;
-  const float r_z3 = r_z2 * r_z;
+  const float rx_r3 = rx_r2 * rx_r;
+  const float ry_r3 = ry_r2 * ry_r;
+  const float rz_r3 = rz_r2 * rz_r;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
-  const float r_x4 = r_x3 * r_x;
-  const float r_y4 = r_y3 * r_y;
-  const float r_z4 = r_z3 * r_z;
+  const float rx_r4 = rx_r3 * rx_r;
+  const float ry_r4 = ry_r3 * ry_r;
+  const float rz_r4 = rz_r3 * rz_r;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
-  const float r_x5 = r_x4 * r_x;
-  const float r_y5 = r_y4 * r_y;
-  const float r_z5 = r_z4 * r_z;
+  const float rx_r5 = rx_r4 * rx_r;
+  const float ry_r5 = ry_r4 * ry_r;
+  const float rz_r5 = rz_r4 * rz_r;
 #endif
 
   /* Get the 0th order term */
   pot->D_000 = Dt_1;
 
   /* 1st order derivatives */
-  pot->D_100 = r_x * Dt_3;
-  pot->D_010 = r_y * Dt_3;
-  pot->D_001 = r_z * Dt_3;
+  pot->D_100 = rx_r * Dt_2;
+  pot->D_010 = ry_r * Dt_2;
+  pot->D_001 = rz_r * Dt_2;
 
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+
+  Dt_2 *= r_inv;
+
   /* 2nd order derivatives */
-  pot->D_200 = r_x2 * Dt_5 + Dt_3;
-  pot->D_020 = r_y2 * Dt_5 + Dt_3;
-  pot->D_002 = r_z2 * Dt_5 + Dt_3;
-  pot->D_110 = r_x * r_y * Dt_5;
-  pot->D_101 = r_x * r_z * Dt_5;
-  pot->D_011 = r_y * r_z * Dt_5;
+  pot->D_200 = rx_r2 * Dt_3 + Dt_2;
+  pot->D_020 = ry_r2 * Dt_3 + Dt_2;
+  pot->D_002 = rz_r2 * Dt_3 + Dt_2;
+  pot->D_110 = rx_r * ry_r * Dt_3;
+  pot->D_101 = rx_r * rz_r * Dt_3;
+  pot->D_011 = ry_r * rz_r * Dt_3;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+
+  Dt_3 *= r_inv;
+
   /* 3rd order derivatives */
-  pot->D_300 = r_x3 * Dt_7 + 3.f * r_x * Dt_5;
-  pot->D_030 = r_y3 * Dt_7 + 3.f * r_y * Dt_5;
-  pot->D_003 = r_z3 * Dt_7 + 3.f * r_z * Dt_5;
-  pot->D_210 = r_x2 * r_y * Dt_7 + r_y * Dt_5;
-  pot->D_201 = r_x2 * r_z * Dt_7 + r_z * Dt_5;
-  pot->D_120 = r_y2 * r_x * Dt_7 + r_x * Dt_5;
-  pot->D_021 = r_y2 * r_z * Dt_7 + r_z * Dt_5;
-  pot->D_102 = r_z2 * r_x * Dt_7 + r_x * Dt_5;
-  pot->D_012 = r_z2 * r_y * Dt_7 + r_y * Dt_5;
-  pot->D_111 = r_x * r_y * r_z * Dt_7;
+  pot->D_300 = rx_r3 * Dt_4 + 3.f * rx_r * Dt_3;
+  pot->D_030 = ry_r3 * Dt_4 + 3.f * ry_r * Dt_3;
+  pot->D_003 = rz_r3 * Dt_4 + 3.f * rz_r * Dt_3;
+  pot->D_210 = rx_r2 * ry_r * Dt_4 + ry_r * Dt_3;
+  pot->D_201 = rx_r2 * rz_r * Dt_4 + rz_r * Dt_3;
+  pot->D_120 = ry_r2 * rx_r * Dt_4 + rx_r * Dt_3;
+  pot->D_021 = ry_r2 * rz_r * Dt_4 + rz_r * Dt_3;
+  pot->D_102 = rz_r2 * rx_r * Dt_4 + rx_r * Dt_3;
+  pot->D_012 = rz_r2 * ry_r * Dt_4 + ry_r * Dt_3;
+  pot->D_111 = rx_r * ry_r * rz_r * Dt_4;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+
+  Dt_3 *= r_inv;
+  Dt_4 *= r_inv;
+
   /* 4th order derivatives */
-  pot->D_400 = r_x4 * Dt_9 + 6.f * r_x2 * Dt_7 + 3.f * Dt_5;
-  pot->D_040 = r_y4 * Dt_9 + 6.f * r_y2 * Dt_7 + 3.f * Dt_5;
-  pot->D_004 = r_z4 * Dt_9 + 6.f * r_z2 * Dt_7 + 3.f * Dt_5;
-  pot->D_310 = r_x3 * r_y * Dt_9 + 3.f * r_x * r_y * Dt_7;
-  pot->D_301 = r_x3 * r_z * Dt_9 + 3.f * r_x * r_z * Dt_7;
-  pot->D_130 = r_y3 * r_x * Dt_9 + 3.f * r_y * r_x * Dt_7;
-  pot->D_031 = r_y3 * r_z * Dt_9 + 3.f * r_y * r_z * Dt_7;
-  pot->D_103 = r_z3 * r_x * Dt_9 + 3.f * r_z * r_x * Dt_7;
-  pot->D_013 = r_z3 * r_y * Dt_9 + 3.f * r_z * r_y * Dt_7;
-  pot->D_220 = r_x2 * r_y2 * Dt_9 + r_x2 * Dt_7 + r_y2 * Dt_7 + Dt_5;
-  pot->D_202 = r_x2 * r_z2 * Dt_9 + r_x2 * Dt_7 + r_z2 * Dt_7 + Dt_5;
-  pot->D_022 = r_y2 * r_z2 * Dt_9 + r_y2 * Dt_7 + r_z2 * Dt_7 + Dt_5;
-  pot->D_211 = r_x2 * r_y * r_z * Dt_9 + r_y * r_z * Dt_7;
-  pot->D_121 = r_y2 * r_x * r_z * Dt_9 + r_x * r_z * Dt_7;
-  pot->D_112 = r_z2 * r_x * r_y * Dt_9 + r_x * r_y * Dt_7;
+  pot->D_400 = rx_r4 * Dt_5 + 6.f * rx_r2 * Dt_4 + 3.f * Dt_3;
+  pot->D_040 = ry_r4 * Dt_5 + 6.f * ry_r2 * Dt_4 + 3.f * Dt_3;
+  pot->D_004 = rz_r4 * Dt_5 + 6.f * rz_r2 * Dt_4 + 3.f * Dt_3;
+  pot->D_310 = rx_r3 * ry_r * Dt_5 + 3.f * rx_r * ry_r * Dt_4;
+  pot->D_301 = rx_r3 * rz_r * Dt_5 + 3.f * rx_r * rz_r * Dt_4;
+  pot->D_130 = ry_r3 * rx_r * Dt_5 + 3.f * ry_r * rx_r * Dt_4;
+  pot->D_031 = ry_r3 * rz_r * Dt_5 + 3.f * ry_r * rz_r * Dt_4;
+  pot->D_103 = rz_r3 * rx_r * Dt_5 + 3.f * rz_r * rx_r * Dt_4;
+  pot->D_013 = rz_r3 * ry_r * Dt_5 + 3.f * rz_r * ry_r * Dt_4;
+  pot->D_220 = rx_r2 * ry_r2 * Dt_5 + rx_r2 * Dt_4 + ry_r2 * Dt_4 + Dt_3;
+  pot->D_202 = rx_r2 * rz_r2 * Dt_5 + rx_r2 * Dt_4 + rz_r2 * Dt_4 + Dt_3;
+  pot->D_022 = ry_r2 * rz_r2 * Dt_5 + ry_r2 * Dt_4 + rz_r2 * Dt_4 + Dt_3;
+  pot->D_211 = rx_r2 * ry_r * rz_r * Dt_5 + ry_r * rz_r * Dt_4;
+  pot->D_121 = ry_r2 * rx_r * rz_r * Dt_5 + rx_r * rz_r * Dt_4;
+  pot->D_112 = rz_r2 * rx_r * ry_r * Dt_5 + rx_r * ry_r * Dt_4;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+
+  Dt_4 *= r_inv;
+  Dt_5 *= r_inv;
+
   /* 5th order derivatives */
-  pot->D_500 = r_x5 * Dt_11 + 10.f * r_x3 * Dt_9 + 15.f * r_x * Dt_7;
-  pot->D_050 = r_y5 * Dt_11 + 10.f * r_y3 * Dt_9 + 15.f * r_y * Dt_7;
-  pot->D_005 = r_z5 * Dt_11 + 10.f * r_z3 * Dt_9 + 15.f * r_z * Dt_7;
-  pot->D_410 = r_x4 * r_y * Dt_11 + 6.f * r_x2 * r_y * Dt_9 + 3.f * r_y * Dt_7;
-  pot->D_401 = r_x4 * r_z * Dt_11 + 6.f * r_x2 * r_z * Dt_9 + 3.f * r_z * Dt_7;
-  pot->D_140 = r_y4 * r_x * Dt_11 + 6.f * r_y2 * r_x * Dt_9 + 3.f * r_x * Dt_7;
-  pot->D_041 = r_y4 * r_z * Dt_11 + 6.f * r_y2 * r_z * Dt_9 + 3.f * r_z * Dt_7;
-  pot->D_104 = r_z4 * r_x * Dt_11 + 6.f * r_z2 * r_x * Dt_9 + 3.f * r_x * Dt_7;
-  pot->D_014 = r_z4 * r_y * Dt_11 + 6.f * r_z2 * r_y * Dt_9 + 3.f * r_y * Dt_7;
-  pot->D_320 = r_x3 * r_y2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_y2 * Dt_9 +
-               3.f * r_x * Dt_7;
-  pot->D_302 = r_x3 * r_z2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_z2 * Dt_9 +
-               3.f * r_x * Dt_7;
-  pot->D_230 = r_y3 * r_x2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_x2 * Dt_9 +
-               3.f * r_y * Dt_7;
-  pot->D_032 = r_y3 * r_z2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_z2 * Dt_9 +
-               3.f * r_y * Dt_7;
-  pot->D_203 = r_z3 * r_x2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_x2 * Dt_9 +
-               3.f * r_z * Dt_7;
-  pot->D_023 = r_z3 * r_y2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_y2 * Dt_9 +
-               3.f * r_z * Dt_7;
-  pot->D_311 = r_x3 * r_y * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9;
-  pot->D_131 = r_y3 * r_x * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9;
-  pot->D_113 = r_z3 * r_x * r_y * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9;
-  pot->D_122 = r_x * r_y2 * r_z2 * Dt_11 + r_x * r_y2 * Dt_9 +
-               r_x * r_z2 * Dt_9 + r_x * Dt_7;
-  pot->D_212 = r_y * r_x2 * r_z2 * Dt_11 + r_y * r_x2 * Dt_9 +
-               r_y * r_z2 * Dt_9 + r_y * Dt_7;
-  pot->D_221 = r_z * r_x2 * r_y2 * Dt_11 + r_z * r_x2 * Dt_9 +
-               r_z * r_y2 * Dt_9 + r_z * Dt_7;
+  pot->D_500 = rx_r5 * Dt_6 + 10.f * rx_r3 * Dt_5 + 15.f * rx_r * Dt_4;
+  pot->D_050 = ry_r5 * Dt_6 + 10.f * ry_r3 * Dt_5 + 15.f * ry_r * Dt_4;
+  pot->D_005 = rz_r5 * Dt_6 + 10.f * rz_r3 * Dt_5 + 15.f * rz_r * Dt_4;
+  pot->D_410 =
+      rx_r4 * ry_r * Dt_6 + 6.f * rx_r2 * ry_r * Dt_5 + 3.f * ry_r * Dt_4;
+  pot->D_401 =
+      rx_r4 * rz_r * Dt_6 + 6.f * rx_r2 * rz_r * Dt_5 + 3.f * rz_r * Dt_4;
+  pot->D_140 =
+      ry_r4 * rx_r * Dt_6 + 6.f * ry_r2 * rx_r * Dt_5 + 3.f * rx_r * Dt_4;
+  pot->D_041 =
+      ry_r4 * rz_r * Dt_6 + 6.f * ry_r2 * rz_r * Dt_5 + 3.f * rz_r * Dt_4;
+  pot->D_104 =
+      rz_r4 * rx_r * Dt_6 + 6.f * rz_r2 * rx_r * Dt_5 + 3.f * rx_r * Dt_4;
+  pot->D_014 =
+      rz_r4 * ry_r * Dt_6 + 6.f * rz_r2 * ry_r * Dt_5 + 3.f * ry_r * Dt_4;
+  pot->D_320 = rx_r3 * ry_r2 * Dt_6 + rx_r3 * Dt_5 + 3.f * rx_r * ry_r2 * Dt_5 +
+               3.f * rx_r * Dt_4;
+  pot->D_302 = rx_r3 * rz_r2 * Dt_6 + rx_r3 * Dt_5 + 3.f * rx_r * rz_r2 * Dt_5 +
+               3.f * rx_r * Dt_4;
+  pot->D_230 = ry_r3 * rx_r2 * Dt_6 + ry_r3 * Dt_5 + 3.f * ry_r * rx_r2 * Dt_5 +
+               3.f * ry_r * Dt_4;
+  pot->D_032 = ry_r3 * rz_r2 * Dt_6 + ry_r3 * Dt_5 + 3.f * ry_r * rz_r2 * Dt_5 +
+               3.f * ry_r * Dt_4;
+  pot->D_203 = rz_r3 * rx_r2 * Dt_6 + rz_r3 * Dt_5 + 3.f * rz_r * rx_r2 * Dt_5 +
+               3.f * rz_r * Dt_4;
+  pot->D_023 = rz_r3 * ry_r2 * Dt_6 + rz_r3 * Dt_5 + 3.f * rz_r * ry_r2 * Dt_5 +
+               3.f * rz_r * Dt_4;
+  pot->D_311 = rx_r3 * ry_r * rz_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5;
+  pot->D_131 = ry_r3 * rx_r * rz_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5;
+  pot->D_113 = rz_r3 * rx_r * ry_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5;
+  pot->D_122 = rx_r * ry_r2 * rz_r2 * Dt_6 + rx_r * ry_r2 * Dt_5 +
+               rx_r * rz_r2 * Dt_5 + rx_r * Dt_4;
+  pot->D_212 = ry_r * rx_r2 * rz_r2 * Dt_6 + ry_r * rx_r2 * Dt_5 +
+               ry_r * rz_r2 * Dt_5 + ry_r * Dt_4;
+  pot->D_221 = rz_r * rx_r2 * ry_r2 * Dt_6 + rz_r * rx_r2 * Dt_5 +
+               rz_r * ry_r2 * Dt_5 + rz_r * Dt_4;
 #endif
 }
 
diff --git a/src/gravity_properties.c b/src/gravity_properties.c
index cd7a8909170810997684c67a54552c9fe1f0618e..a58e822ab808d08aab48b746912c601f20fcaa54 100644
--- a/src/gravity_properties.c
+++ b/src/gravity_properties.c
@@ -23,6 +23,7 @@
 /* Standard headers */
 #include <float.h>
 #include <math.h>
+#include <string.h>
 
 /* Local headers. */
 #include "adiabatic_index.h"
@@ -32,6 +33,7 @@
 #include "gravity.h"
 #include "kernel_gravity.h"
 #include "kernel_long_gravity.h"
+#include "restart.h"
 
 #define gravity_props_default_a_smooth 1.25f
 #define gravity_props_default_r_cut_max 4.5f
@@ -43,7 +45,8 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params,
                         const struct cosmology *cosmo, const int with_cosmology,
                         const int with_external_potential,
                         const int has_baryons, const int has_DM,
-                        const int is_zoom_simulation, const int periodic) {
+                        const int is_zoom_simulation, const int periodic,
+                        const double dim[3]) {
 
   /* Tree updates */
   p->rebuild_frequency =
@@ -63,6 +66,9 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params,
     p->r_cut_min_ratio = parser_get_opt_param_float(
         params, "Gravity:r_cut_min", gravity_props_default_r_cut_min);
 
+    p->r_s = p->a_smooth * dim[0] / p->mesh_size;
+    p->r_s_inv = 1. / p->r_s;
+
     /* Some basic checks of what we read */
     if (p->mesh_size % 2 != 0)
       error("The mesh side-length must be an even number.");
@@ -76,6 +82,8 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params,
   } else {
     p->mesh_size = 0;
     p->a_smooth = 0.f;
+    p->r_s = FLT_MAX;
+    p->r_s_inv = 0.f;
     p->r_cut_min_ratio = 0.f;
     p->r_cut_max_ratio = 0.f;
   }
@@ -83,16 +91,53 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params,
   /* Time integration */
   p->eta = parser_get_param_float(params, "Gravity:eta");
 
-  /* Opening angle */
-  p->theta_crit = parser_get_param_double(params, "Gravity:theta");
+  /* Read the choice of multipole acceptance criterion */
+  char buffer[32] = {0};
+  parser_get_param_string(params, "Gravity:MAC", buffer);
+
+  if (strcmp(buffer, "adaptive") == 0) {
+    p->use_adaptive_tolerance = 1;
+  } else if (strcmp(buffer, "geometric") == 0) {
+    p->use_adaptive_tolerance = 0;
+  } else {
+    error(
+        "Invalid choice of multipole acceptance criterion: '%s'. Should be "
+        "'adaptive' or 'geometric'",
+        buffer);
+  }
+
+  /* We always start with the geometric MAC */
+  p->use_advanced_MAC = 0;
+
+  /* Geometric opening angle */
+  p->theta_crit = parser_get_param_double(params, "Gravity:theta_cr");
   if (p->theta_crit >= 1.) error("Theta too large. FMM won't converge.");
-  p->theta_crit2 = p->theta_crit * p->theta_crit;
-  p->theta_crit_inv = 1. / p->theta_crit;
+
+  /* Adaptive opening angle tolerance */
+  if (p->use_adaptive_tolerance)
+    p->adaptive_tolerance =
+        parser_get_param_float(params, "Gravity:epsilon_fmm");
+
+  /* Consider truncated forces in the MAC? */
+  if (p->use_adaptive_tolerance)
+    p->consider_truncation_in_MAC =
+        parser_get_opt_param_int(params, "Gravity:allow_truncation_in_MAC", 0);
+
+  /* Are we allowing tree use below softening? */
+  p->use_tree_below_softening =
+      parser_get_opt_param_int(params, "Gravity:use_tree_below_softening", 0);
+
+#ifdef GADGET2_SOFTENING_CORRECTION
+  if (p->use_tree_below_softening)
+    error(
+        "Cannot solve gravity via the tree below softening with the "
+        "Gadget2-type softening kernel");
+#endif
 
   /* Mesh dithering */
   if (periodic && !with_external_potential) {
     p->with_dithering =
-        parser_get_opt_param_int(params, "Gravity:dithering", 1);
+        parser_get_opt_param_int(params, "Gravity:dithering", 0);
     if (p->with_dithering) {
       p->dithering_ratio =
           parser_get_opt_param_double(params, "Gravity:dithering_ratio", 1.0);
@@ -168,6 +213,13 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params,
   gravity_props_update(p, cosmo);
 }
 
+void gravity_props_update_MAC_choice(struct gravity_props *p) {
+
+  /* Now that we have run initial accelerations,
+   * switch to the better MAC */
+  if (p->use_adaptive_tolerance) p->use_advanced_MAC = 1;
+}
+
 void gravity_props_update(struct gravity_props *p,
                           const struct cosmology *cosmo) {
 
diff --git a/src/gravity_properties.h b/src/gravity_properties.h
index 8a4abe4bd372c52cccdb880839353d0dcc3d1e25..51ac0cc11e3535a32bb837586851a256d35bf207 100644
--- a/src/gravity_properties.h
+++ b/src/gravity_properties.h
@@ -26,10 +26,6 @@
 #include <hdf5.h>
 #endif
 
-/* Local includes. */
-#include "kernel_gravity.h"
-#include "restart.h"
-
 /* Forward declarations */
 struct cosmology;
 struct phys_const;
@@ -58,14 +54,23 @@ struct gravity_props {
 
   /* -------------- Properties of the FFM gravity ---------------------- */
 
+  /*! What MAC are we currently using? */
+  int use_advanced_MAC;
+
+  /*! Are we using the adaptive opening angle? (as read from param file) */
+  int use_adaptive_tolerance;
+
+  /*! Accuracy parameter of the advanced MAC */
+  float adaptive_tolerance;
+
   /*! Tree opening angle (Multipole acceptance criterion) */
   double theta_crit;
 
-  /*! Square of opening angle */
-  double theta_crit2;
+  /*! Are we allowing tree gravity below softening? */
+  int use_tree_below_softening;
 
-  /*! Inverse of opening angle */
-  double theta_crit_inv;
+  /*! Are we applying long-range truncation to the forces in the MAC? */
+  int consider_truncation_in_MAC;
 
   /* ------------- Properties of the softened gravity ------------------ */
 
@@ -111,12 +116,20 @@ struct gravity_props {
    * a_smooth */
   float r_cut_max_ratio;
 
+  /*! Long-range gravity mesh scale. */
+  float r_s;
+
+  /*! Inverse of the long-range gravity mesh scale. */
+  float r_s_inv;
+
   /*! Are we dithering the particles at every rebuild? */
   int with_dithering;
 
   /*! Fraction of the top-level cell size used to normalize the dithering */
   double dithering_ratio;
 
+  /* ------------- Physical constants ---------------------------------- */
+
   /*! Gravitational constant (in internal units, copied from the physical
    * constants) */
   float G_Newton;
@@ -128,10 +141,11 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params,
                         const struct cosmology *cosmo, const int with_cosmology,
                         const int with_external_potential,
                         const int has_baryons, const int has_DM,
-                        const int is_zoom_simulation, const int periodic);
+                        const int is_zoom_simulation, const int periodic,
+                        const double dim[3]);
 void gravity_props_update(struct gravity_props *p,
                           const struct cosmology *cosmo);
-
+void gravity_props_update_MAC_choice(struct gravity_props *p);
 #if defined(HAVE_HDF5)
 void gravity_props_print_snapshot(hid_t h_grpsph,
                                   const struct gravity_props *p);
diff --git a/src/integer_power.h b/src/integer_power.h
new file mode 100644
index 0000000000000000000000000000000000000000..3aec75fd4f828aedd800512060edccf8d18cb8da
--- /dev/null
+++ b/src/integer_power.h
@@ -0,0 +1,118 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2020  Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_INTEGER_POWER_H
+#define SWIFT_INTEGER_POWER_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers */
+#include "error.h"
+#include "inline.h"
+
+/* Standard headers */
+#include <math.h>
+
+/**
+ * @brief Computes the power of x to the n for a (small) positive integer n.
+ *
+ * Only optimized for values 0 <= n <= 8. Defaults to pow() above.
+ */
+__attribute__((const)) INLINE static double integer_pow(const double x,
+                                                        const unsigned int n) {
+
+  switch (n) {
+    case 0:
+      return 1.;
+    case 1:
+      return x;
+    case 2:
+      return x * x;
+    case 3:
+      return x * x * x;
+    case 4: {
+      const double y = x * x;
+      return y * y;
+    }
+    case 5: {
+      const double y = x * x;
+      return x * y * y;
+    }
+    case 6: {
+      const double y = x * x;
+      return y * y * y;
+    }
+    case 7: {
+      const double y = x * x;
+      return x * y * y * y;
+    }
+    case 8: {
+      const double y = x * x;
+      const double z = y * y;
+      return z * z;
+    }
+    default:
+      return pow(x, (double)n);
+  }
+}
+
+/**
+ * @brief Computes the power of x to the n for a (small) positive integer n.
+ *
+ * Only optimized for values 0 <= n <= 8. Defaults to powf() above.
+ */
+__attribute__((const)) INLINE static float integer_powf(const float x,
+                                                        const unsigned int n) {
+
+  switch (n) {
+    case 0:
+      return 1.f;
+    case 1:
+      return x;
+    case 2:
+      return x * x;
+    case 3:
+      return x * x * x;
+    case 4: {
+      const float y = x * x;
+      return y * y;
+    }
+    case 5: {
+      const float y = x * x;
+      return x * y * y;
+    }
+    case 6: {
+      const float y = x * x;
+      return y * y * y;
+    }
+    case 7: {
+      const float y = x * x;
+      return x * y * y * y;
+    }
+    case 8: {
+      const float y = x * x;
+      const float z = y * y;
+      return z * z;
+    }
+    default:
+      return powf(x, (float)n);
+  }
+}
+
+#endif /* SWIFT_INTEGER_POWER_H */
diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h
index 7f7d2453f7720458e24794db088c96e6ff180944..4f6385ca39d823ed2762b4cedf87cbfbe85ee67b 100644
--- a/src/kernel_gravity.h
+++ b/src/kernel_gravity.h
@@ -39,63 +39,63 @@
 #endif /* GADGET2_SOFTENING_CORRECTION */
 
 /**
- * @brief Computes the gravity softening function for potential.
+ * @brief Computes the gravity softening kernel for the potential.
  *
  * This functions assumes 0 < u < 1.
  *
- * @param u The ratio of the distance to the softening length $u = x/h$.
- * @param W (return) The value of the kernel function $W(x,h)$.
+ * @param u The ratio of the distance to the spline softening length $u = x/H$.
  */
-__attribute__((always_inline, nonnull)) INLINE static void kernel_grav_pot_eval(
-    const float u, float *const W) {
+__attribute__((const)) INLINE static float kernel_grav_pot_eval(const float u) {
 
+  float W;
 #ifdef GADGET2_SOFTENING_CORRECTION
   if (u < 0.5f)
-    *W = -2.8f + u * u * (5.333333333333f + u * u * (6.4f * u - 9.6f));
+    W = -2.8f + u * u * (5.333333333333f + u * u * (6.4f * u - 9.6f));
   else
-    *W =
-        -3.2f + 0.066666666667f / u +
+    W = -3.2f + 0.066666666667f / u +
         u * u *
             (10.666666666667f + u * (-16.f + u * (9.6f - 2.133333333333f * u)));
 #else
 
   /* W(u) = 3u^7 - 15u^6 + 28u^5 - 21u^4 + 7u^2 - 3 */
-  *W = 3.f * u - 15.f;
-  *W = *W * u + 28.f;
-  *W = *W * u - 21.f;
-  *W = *W * u;
-  *W = *W * u + 7.f;
-  *W = *W * u;
-  *W = *W * u - 3.f;
+  W = 3.f * u - 15.f;
+  W = W * u + 28.f;
+  W = W * u - 21.f;
+  W = W * u;
+  W = W * u + 7.f;
+  W = W * u;
+  W = W * u - 3.f;
 #endif
+  return W;
 }
 
 /**
- * @brief Computes the gravity softening function for forces.
+ * @brief Computes the gravity softening kernel for the forces.
  *
  * This functions assumes 0 < u < 1.
  *
- * @param u The ratio of the distance to the softening length $u = x/h$.
- * @param W (return) The value of the kernel function $W(x,h)$.
+ * @param u The ratio of the distance to the spline softening length $u = x/H$.
  */
-__attribute__((always_inline, nonnull)) INLINE static void
-kernel_grav_force_eval(const float u, float *const W) {
+__attribute__((const)) INLINE static float kernel_grav_force_eval(
+    const float u) {
 
+  float W;
 #ifdef GADGET2_SOFTENING_CORRECTION
   if (u < 0.5f)
-    *W = 10.6666667f + u * u * (32.f * u - 38.4f);
+    W = 10.6666667f + u * u * (32.f * u - 38.4f);
   else
-    *W = 21.3333333f - 48.f * u + 38.4f * u * u - 10.6666667f * u * u * u -
-         0.06666667f / (u * u * u);
+    W = 21.3333333f - 48.f * u + 38.4f * u * u - 10.6666667f * u * u * u -
+        0.06666667f / (u * u * u);
 #else
 
   /* W(u) = 21u^5 - 90u^4 + 140u^3 - 84u^2 + 14 */
-  *W = 21.f * u - 90.f;
-  *W = *W * u + 140.f;
-  *W = *W * u - 84.f;
-  *W = *W * u;
-  *W = *W * u + 14.f;
+  W = 21.f * u - 90.f;
+  W = W * u + 140.f;
+  W = W * u - 84.f;
+  W = W * u;
+  W = W * u + 14.f;
 #endif
+  return W;
 }
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
@@ -162,64 +162,101 @@ __attribute__((always_inline)) INLINE static void kernel_grav_eval_force_double(
 }
 #endif /* SWIFT_GRAVITY_FORCE_CHECKS */
 
-#undef GADGET2_SOFTENING_CORRECTION
-
 /************************************************/
 /* Derivatives of softening kernel used for FMM */
 /************************************************/
 
-__attribute__((always_inline)) INLINE static float D_soft_1(float u,
-                                                            float u_inv) {
+__attribute__((const)) INLINE static float D_soft_1(const float u) {
+
+#ifdef GADGET2_SOFTENING_CORRECTION
+  error("Invalid choice of softening kernel shape");
+#endif
 
-  /* phi(u) = 3u^7 - 15u^6 + 28u^5 - 21u^4 + 7u^2 - 3 */
-  float phi = 3.f * u - 15.f;
-  phi = phi * u + 28.f;
-  phi = phi * u - 21.f;
+  /* -3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3 */
+  float phi = -3.f * u + 15.f;
+  phi = phi * u - 28.f;
+  phi = phi * u + 21.f;
   phi = phi * u;
-  phi = phi * u + 7.f;
+  phi = phi * u - 7.f;
   phi = phi * u;
-  phi = phi * u - 3.f;
+  phi = phi * u + 3.f;
 
   return phi;
 }
 
-__attribute__((always_inline)) INLINE static float D_soft_3(float u,
-                                                            float u_inv) {
+__attribute__((const)) INLINE static float D_soft_2(const float u) {
 
-  /* phi'(u)/u = 21u^5 - 90u^4 + 140u^3 - 84u^2 + 14 */
-  float phi = 21.f * u - 90.f;
-  phi = phi * u + 140.f;
-  phi = phi * u - 84.f;
+#ifdef GADGET2_SOFTENING_CORRECTION
+  error("Invalid choice of softening kernel shape");
+#endif
+
+  /* -21u^6 + 90u^5 - 140u^4 + 84u^3 - 14u */
+  float phi = -21.f * u + 90.f;
+  phi = phi * u - 140.f;
+  phi = phi * u + 84.f;
+  phi = phi * u;
+  phi = phi * u - 14.f;
   phi = phi * u;
-  phi = phi * u + 14.f;
 
   return phi;
 }
 
-__attribute__((always_inline)) INLINE static float D_soft_5(float u,
-                                                            float u_inv) {
+__attribute__((const)) INLINE static float D_soft_3(const float u) {
 
-  /* (phi'(u)/u)'/u = 105u^3 - 360u^2 + 420u - 168 */
-  float phi = 105.f * u - 360.f;
-  phi = phi * u + 420.f;
-  phi = phi * u - 168.f;
+#ifdef GADGET2_SOFTENING_CORRECTION
+  error("Invalid choice of softening kernel shape");
+#endif
+
+  /* -105u^5 + 360u^4 - 420u^3 + 168u^2 */
+  float phi = -105.f * u + 360.f;
+  phi = phi * u - 420.f;
+  phi = phi * u + 168.f;
+  phi = phi * u;
+  phi = phi * u;
 
   return phi;
 }
 
-__attribute__((always_inline)) INLINE static float D_soft_7(float u,
-                                                            float u_inv) {
-  return 0.f;
+__attribute__((const)) INLINE static float D_soft_4(const float u) {
+
+#ifdef GADGET2_SOFTENING_CORRECTION
+  error("Invalid choice of softening kernel shape");
+#endif
+
+  /* -315u^4 + 720u^3 - 420u^2 */
+  float phi = -315.f * u + 720.f;
+  phi = phi * u - 420.f;
+  phi = phi * u;
+  phi = phi * u;
+
+  return phi;
 }
 
-__attribute__((always_inline)) INLINE static float D_soft_9(float u,
-                                                            float u_inv) {
-  return 0.f;
+__attribute__((const)) INLINE static float D_soft_5(const float u) {
+
+#ifdef GADGET2_SOFTENING_CORRECTION
+  error("Invalid choice of softening kernel shape");
+#endif
+
+  /* -315u^3 + 420u */
+  float phi = -315.f * u;
+  phi = phi * u + 420.f;
+  phi = phi * u;
+
+  return phi;
 }
 
-__attribute__((always_inline)) INLINE static float D_soft_11(float u,
-                                                             float u_inv) {
-  return 0.f;
+__attribute__((const)) INLINE static float D_soft_6(const float u) {
+
+#ifdef GADGET2_SOFTENING_CORRECTION
+  error("Invalid choice of softening kernel shape");
+#endif
+
+  /* 315u^2 - 1260 */
+  float phi = 315 * u;
+  phi = phi * u - 1260.f;
+
+  return phi;
 }
 
 #endif /* SWIFT_KERNEL_GRAVITY_H */
diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h
index af4a0f42b101b07163a74e42498f10e5fb664697..a565df4f59874bf633c10e7ac8809ce9bddfd0c0 100644
--- a/src/kernel_long_gravity.h
+++ b/src/kernel_long_gravity.h
@@ -23,8 +23,8 @@
 #include "../config.h"
 
 /* Local headers. */
-#include "approx_math.h"
 #include "const.h"
+#include "exp.h"
 #include "inline.h"
 
 /* Standard headers */
@@ -78,35 +78,75 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
 
 #ifdef GADGET2_LONG_RANGE_CORRECTION
 
-  /* Powers of u=r/2r_s */
+  /* Powers of u = (1/2) * (r / r_s) */
   const float u = 0.5f * r * r_s_inv;
   const float u2 = u * u;
-  const float u3 = u2 * u;
-  const float u4 = u3 * u;
+  const float u4 = u2 * u2;
 
-  /* Powers of (1/r_s) */
-  const float r_s_inv2 = r_s_inv * r_s_inv;
-  const float r_s_inv3 = r_s_inv2 * r_s_inv;
-  const float r_s_inv4 = r_s_inv3 * r_s_inv;
-  const float r_s_inv5 = r_s_inv4 * r_s_inv;
+  const float exp_u2 = expf(-u2);
 
-  /* Derivatives of \chi */
-  derivs->chi_0 = approx_erfcf(u);
-  derivs->chi_1 = -r_s_inv;
-  derivs->chi_2 = r_s_inv2 * u;
-  derivs->chi_3 = -r_s_inv3 * (u2 - 0.5f);
-  derivs->chi_4 = r_s_inv4 * (u3 - 1.5f * u);
-  derivs->chi_5 = -r_s_inv5 * (u4 - 3.f * u2 + 0.75f);
+  /* Compute erfcf(u) using eq. 7.1.26 of
+   * Abramowitz & Stegun, 1972.
+   *
+   * This has a *relative* error of less than 4e-3 over
+   * the range of interest (0 < u < 5)
+   *
+   * This is a good approximation to use since we already
+   * need exp(-u2) */
+
+  const float t = 1.f / (1.f + 0.3275911f * u);
+
+  const float a1 = 0.254829592f;
+  const float a2 = -0.284496736f;
+  const float a3 = 1.421413741f;
+  const float a4 = -1.453152027;
+  const float a5 = 1.061405429f;
 
+  /* a1 * t + a2 * t^2 + a3 * t^3 + a4 * t^4 + a5 * t^5 */
+  float a = a5 * t + a4;
+  a = a * t + a3;
+  a = a * t + a2;
+  a = a * t + a1;
+  a = a * t;
+
+  const float erfc_u = a * exp_u2;
+
+  /* C = (1/sqrt(pi)) * expf(-u^2) */
   const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5));
-  const float common_factor = one_over_sqrt_pi * expf(-u2);
+  const float common_factor = one_over_sqrt_pi * exp_u2;
+
+  /* (1/r_s)^n * C */
+  const float r_s_inv_times_C = r_s_inv * common_factor;
+  const float r_s_inv2_times_C = r_s_inv_times_C * r_s_inv;
+  const float r_s_inv3_times_C = r_s_inv2_times_C * r_s_inv;
+  const float r_s_inv4_times_C = r_s_inv3_times_C * r_s_inv;
+  const float r_s_inv5_times_C = r_s_inv4_times_C * r_s_inv;
 
-  /* Multiply in the common factors */
-  derivs->chi_1 *= common_factor;
-  derivs->chi_2 *= common_factor;
-  derivs->chi_3 *= common_factor;
-  derivs->chi_4 *= common_factor;
-  derivs->chi_5 *= common_factor;
+  /* Now, compute the derivatives of \chi */
+#ifdef GRAVITY_USE_EXACT_LONG_RANGE_MATH
+
+  /* erfc(u) */
+  derivs->chi_0 = erfcf(u);
+#else
+
+  /* erfc(u) */
+  derivs->chi_0 = erfc_u;
+#endif
+
+  /* (-1/r_s) * (1/sqrt(pi)) * expf(-u^2) */
+  derivs->chi_1 = -r_s_inv_times_C;
+
+  /* (1/r_s)^2 * u * (1/sqrt(pi)) * expf(-u^2) */
+  derivs->chi_2 = r_s_inv2_times_C * u;
+
+  /* (1/r_s)^3 * (1/2 - u^2) * (1/sqrt(pi)) * expf(-u^2) */
+  derivs->chi_3 = r_s_inv3_times_C * (0.5f - u2);
+
+  /* (1/r_s)^4 * (u^3 - 3/2 u) * (1/sqrt(pi)) * expf(-u^2) */
+  derivs->chi_4 = r_s_inv4_times_C * (u2 - 1.5f) * u;
+
+  /* (1/r_s)^5 * (3/4 - 3u^2 + u^4) * (1/sqrt(pi)) * expf(-u^2) */
+  derivs->chi_5 = r_s_inv5_times_C * (0.75f - 3.f * u2 + u4);
 
 #else
 
@@ -147,65 +187,75 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
 }
 
 /**
- * @brief Computes the long-range correction term for the potential calculation
- * coming from FFT.
+ * @brief Computes the long-range correction terms for the potential and
+ * force calculations due to the mesh truncation.
+ *
+ * We use an approximation to the erfc() that gives a *relative* accuracy
+ * for the potential tem of 3.4e-3 and 2.4e-4 for the force term over the
+ * range [0, 5] of r_over_r_s.
+ * The accuracy is much better in the range [0, 2] (6e-5 and 2e-5 respectively).
  *
  * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$.
- * @param W (return) The value of the kernel function.
  */
 __attribute__((always_inline, nonnull)) INLINE static void
-kernel_long_grav_pot_eval(const float u, float *const W) {
+kernel_long_grav_eval(const float r_over_r_s, float *restrict corr_f,
+                      float *restrict corr_pot) {
 
 #ifdef GADGET2_LONG_RANGE_CORRECTION
 
-  const float arg1 = u * 0.5f;
-  const float term1 = approx_erfcf(arg1);
-
-  *W = term1;
-#else
+  const float two_over_sqrt_pi = ((float)M_2_SQRTPI);
 
-  const float x = 2.f * u;
-  const float exp_x = expf(x);  // good_approx_expf(x);
-  const float alpha = 1.f / (1.f + exp_x);
+  const float u = 0.5f * r_over_r_s;
+  const float u2 = u * u;
+  const float exp_u2 = expf(-u2);
 
-  /* We want 2 - 2 exp(x) * alpha */
-  *W = 1.f - alpha * exp_x;
-  *W *= 2.f;
-#endif
-}
+  /* Compute erfcf(u) using eq. 7.1.26 of
+   * Abramowitz & Stegun, 1972.
+   *
+   * This has a *relative* error of less than 4e-3 over
+   * the range of interest (0 < u < 5)\
+   *
+   * This is a good approximation to use since we already
+   * need exp(-u2) */
 
-/**
- * @brief Computes the long-range correction term for the force calculation
- * coming from FFT.
- *
- * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$.
- * @param W (return) The value of the kernel function.
- */
-__attribute__((always_inline, nonnull)) INLINE static void
-kernel_long_grav_force_eval(const float u, float *const W) {
+  const float t = 1.f / (1.f + 0.3275911f * u);
 
-#ifdef GADGET2_LONG_RANGE_CORRECTION
+  const float a1 = 0.254829592f;
+  const float a2 = -0.284496736f;
+  const float a3 = 1.421413741f;
+  const float a4 = -1.453152027;
+  const float a5 = 1.061405429f;
 
-  const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5));
+  /* a1 * t + a2 * t^2 + a3 * t^3 + a4 * t^4 + a5 * t^5 */
+  float a = a5 * t + a4;
+  a = a * t + a3;
+  a = a * t + a2;
+  a = a * t + a1;
+  a = a * t;
 
-  const float arg1 = u * 0.5f;
-  const float arg2 = -arg1 * arg1;
+  const float erfc_u = a * exp_u2;
 
-  const float term1 = approx_erfcf(arg1);
-  const float term2 = u * one_over_sqrt_pi * expf(arg2);
+  *corr_pot = erfc_u;
+  *corr_f = erfc_u + two_over_sqrt_pi * u * exp_u2;
 
-  *W = term1 + term2;
 #else
-
-  const float x = 2.f * u;
+  const float x = 2.f * r_over_r_s;
   const float exp_x = expf(x);  // good_approx_expf(x);
   const float alpha = 1.f / (1.f + exp_x);
 
+  /* We want 2 - 2 exp(x) * alpha */
+  float W = 1.f - alpha * exp_x;
+  W = W * 2.f;
+
+  *corr_pot = W;
+
   /* We want 2*(x*alpha - x*alpha^2 - exp(x)*alpha + 1) */
-  *W = 1.f - alpha;
-  *W = *W * x - exp_x;
-  *W = *W * alpha + 1.f;
-  *W *= 2.f;
+  W = 1.f - alpha;
+  W = W * x - exp_x;
+  W = W * alpha + 1.f;
+  W = W * 2.f;
+
+  *corr_f = W;
 #endif
 }
 
diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c
index bbd4496112114277f650582432799b5743422a14..fd81a658ad4ffe784c20c1d69897f91a07e02bec 100644
--- a/src/mesh_gravity.c
+++ b/src/mesh_gravity.c
@@ -28,7 +28,6 @@
 #include "mesh_gravity.h"
 
 /* Local includes. */
-#include "accumulate.h"
 #include "active.h"
 #include "debug.h"
 #include "engine.h"
@@ -336,9 +335,9 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, const int N,
   /* ---- */
 
   /* Store things back */
-  accumulate_add_f(&gp->a_grav[0], fac * a[0]);
-  accumulate_add_f(&gp->a_grav[1], fac * a[1]);
-  accumulate_add_f(&gp->a_grav[2], fac * a[2]);
+  gp->a_grav[0] += fac * a[0];
+  gp->a_grav[1] += fac * a[1];
+  gp->a_grav[2] += fac * a[2];
   gravity_add_comoving_potential(gp, p);
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
   gp->potential_PM = p;
@@ -726,7 +725,7 @@ void pm_mesh_free(struct pm_mesh* mesh) {
  * @param nr_threads The number of threads on this MPI rank.
  */
 void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props,
-                  double dim[3], int nr_threads) {
+                  const double dim[3], int nr_threads) {
 
 #ifdef HAVE_FFTW
 
diff --git a/src/mesh_gravity.h b/src/mesh_gravity.h
index e9c07a0de0327984686d65bb9738cde643a7cab8..79c4d1b619cb3f73bc8aa39e97f1c5b2f6386386 100644
--- a/src/mesh_gravity.h
+++ b/src/mesh_gravity.h
@@ -26,6 +26,7 @@
 #include "gravity_properties.h"
 
 /* Forward declarations */
+struct engine;
 struct space;
 struct gpart;
 struct threadpool;
@@ -67,7 +68,7 @@ struct pm_mesh {
 };
 
 void pm_mesh_init(struct pm_mesh *mesh, const struct gravity_props *props,
-                  double dim[3], int nr_threads);
+                  const double dim[3], int nr_threads);
 void pm_mesh_init_no_mesh(struct pm_mesh *mesh, double dim[3]);
 void pm_mesh_compute_potential(struct pm_mesh *mesh, const struct space *s,
                                struct threadpool *tp, int verbose);
diff --git a/src/multipole.h b/src/multipole.h
index 1ac413111fe7a75bc929bc2d95a4ac79a1533d03..8169bff2291b00310074a8aaa9f9974c51b773b9 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -43,14 +43,6 @@
 #include "periodic.h"
 #include "vector_power.h"
 
-#ifdef WITH_MPI
-/* MPI datatypes for transfers */
-extern MPI_Datatype multipole_mpi_type;
-extern MPI_Op multipole_mpi_reduce_op;
-void multipole_create_mpi_types(void);
-void multipole_free_mpi_types(void);
-#endif
-
 /**
  * @brief Reset the data of a #multipole.
  *
@@ -59,8 +51,8 @@ void multipole_free_mpi_types(void);
 __attribute__((nonnull)) INLINE static void gravity_reset(
     struct gravity_tensors *m) {
 
-  /* Just bzero the struct. */
   bzero(m, sizeof(struct gravity_tensors));
+  m->m_pole.min_old_a_grav_norm = FLT_MAX;
 }
 
 /**
@@ -294,6 +286,7 @@ __attribute__((nonnull)) INLINE static void gravity_multipole_init(
     struct multipole *m) {
 
   bzero(m, sizeof(struct multipole));
+  m->min_old_a_grav_norm = FLT_MAX;
 }
 
 /**
@@ -312,8 +305,7 @@ __attribute__((nonnull)) INLINE static void gravity_multipole_print(
   printf("M_000= %12.5e\n", m->M_000);
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
   printf("-------------------------\n");
-  printf("M_100= %12.5e M_010= %12.5e M_001= %12.5e\n", m->M_100, m->M_010,
-         m->M_001);
+  printf("M_100= %12.5e M_010= %12.5e M_001= %12.5e\n", 0., 0., 0.);
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
   printf("-------------------------\n");
@@ -363,14 +355,18 @@ __attribute__((nonnull)) INLINE static void gravity_multipole_add(
   /* Maximum of both softenings */
   ma->max_softening = max(ma->max_softening, mb->max_softening);
 
+  /* Minimum of both old accelerations */
+  ma->min_old_a_grav_norm =
+      min(ma->min_old_a_grav_norm, mb->min_old_a_grav_norm);
+
   /* Add 0th order term */
   ma->M_000 += mb->M_000;
 
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
-  /* Add 1st order terms */
-  ma->M_100 += mb->M_100;
-  ma->M_010 += mb->M_010;
-  ma->M_001 += mb->M_001;
+  /* Add 1st order terms (all 0 since we expand around CoM) */
+  /* ma->M_100 += mb->M_100; */
+  /* ma->M_010 += mb->M_010; */
+  /* ma->M_001 += mb->M_001; */
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
   /* Add 2nd order terms */
@@ -489,6 +485,14 @@ __attribute__((nonnull)) INLINE static int gravity_multipole_equal(
     return 0;
   }
 
+  /* Check minimal old acceleration norm */
+  if (fabsf(ma->min_old_a_grav_norm - mb->min_old_a_grav_norm) /
+          fabsf(ma->min_old_a_grav_norm + mb->min_old_a_grav_norm + FLT_MIN) >
+      tolerance) {
+    message("min old_a_grav_norm different!");
+    return 0;
+  }
+
   /* Check bulk velocity (if non-zero and component > 1% of norm)*/
   if (fabsf(ma->vel[0] + mb->vel[0]) > 1e-10 &&
       (ma->vel[0] * ma->vel[0]) > 0.0001 * v2 &&
@@ -522,27 +526,9 @@ __attribute__((nonnull)) INLINE static int gravity_multipole_equal(
     return 0;
   }
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
-  /* Manhattan Norm of 1st order terms */
-  const float order1_norm = fabsf(ma->M_001) + fabsf(mb->M_001) +
-                            fabsf(ma->M_010) + fabsf(mb->M_010) +
-                            fabsf(ma->M_100) + fabsf(mb->M_100);
-
-  /* Compare 1st order terms above 1% of norm */
-  if (fabsf(ma->M_001 + mb->M_001) > 0.01f * order1_norm &&
-      fabsf(ma->M_001 - mb->M_001) / fabsf(ma->M_001 + mb->M_001) > tolerance) {
-    message("M_001 term different");
-    return 0;
-  }
-  if (fabsf(ma->M_010 + mb->M_010) > 0.01f * order1_norm &&
-      fabsf(ma->M_010 - mb->M_010) / fabsf(ma->M_010 + mb->M_010) > tolerance) {
-    message("M_010 term different");
-    return 0;
-  }
-  if (fabsf(ma->M_100 + mb->M_100) > 0.01f * order1_norm &&
-      fabsf(ma->M_100 - mb->M_100) / fabsf(ma->M_100 + mb->M_100) > tolerance) {
-    message("M_100 term different");
-    return 0;
-  }
+    /* Manhattan Norm of 1st order terms */
+    /* Nothing to do here all the 1st order terms are 0 since we expand around
+     * CoM */
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
   /* Manhattan Norm of 2nd order terms */
@@ -868,10 +854,121 @@ __attribute__((nonnull)) INLINE static int gravity_multipole_equal(
 #error "Missing implementation for order >5"
 #endif
 
+  /* Compare the multipole power */
+  for (int i = 0; i < SELF_GRAVITY_MULTIPOLE_ORDER + 1; ++i) {
+
+    /* Ignore the order 1 power to avoid FPE since it's always 0 */
+    if (i == 1 || (ma->power[i] + mb->power[i] == 0.)) continue;
+
+    if (fabsf(ma->power[i] - mb->power[i]) /
+            fabsf(ma->power[i] + mb->power[i]) >
+        tolerance)
+      message("Power of order %d different", i);
+  }
+
   /* All is good */
   return 1;
 }
 
+/**
+ * @brief Compute the multipole power of a #multipole.
+ *
+ * @param m The #multipole.
+ */
+__attribute__((nonnull)) INLINE static void gravity_multipole_compute_power(
+    struct multipole *m) {
+
+  double power[SELF_GRAVITY_MULTIPOLE_ORDER + 1] = {0.};
+
+  /* 0th order terms */
+  m->power[0] = m->M_000;
+
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+  /* 1st order terms (all 0 since we expand around CoM) */
+  // power[1] += m->M_001 * m->M_001;
+  // power[1] += m->M_010 * m->M_010;
+  // power[1] += m->M_100 * m->M_100;
+
+  // m->power[1] = sqrt(power[1]);
+  m->power[1] = 0.;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+  /* 2nd order terms */
+  power[2] += m->M_002 * m->M_002;
+  power[2] += 5.000000000000000e-01 * m->M_011 * m->M_011;
+  power[2] += m->M_020 * m->M_020;
+  power[2] += 5.000000000000000e-01 * m->M_101 * m->M_101;
+  power[2] += 5.000000000000000e-01 * m->M_110 * m->M_110;
+  power[2] += m->M_200 * m->M_200;
+
+  m->power[2] = sqrt(power[2]);
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+  /* 3rd order terms */
+  power[3] += m->M_003 * m->M_003;
+  power[3] += 3.333333333333333e-01 * m->M_012 * m->M_012;
+  power[3] += 3.333333333333333e-01 * m->M_021 * m->M_021;
+  power[3] += m->M_030 * m->M_030;
+  power[3] += 3.333333333333333e-01 * m->M_102 * m->M_102;
+  power[3] += 1.666666666666667e-01 * m->M_111 * m->M_111;
+  power[3] += 3.333333333333333e-01 * m->M_120 * m->M_120;
+  power[3] += 3.333333333333333e-01 * m->M_201 * m->M_201;
+  power[3] += 3.333333333333333e-01 * m->M_210 * m->M_210;
+  power[3] += m->M_300 * m->M_300;
+
+  m->power[3] = sqrt(power[3]);
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+  /* 4th order terms */
+  power[4] += m->M_004 * m->M_004;
+  power[4] += 2.500000000000000e-01 * m->M_013 * m->M_013;
+  power[4] += 1.666666666666667e-01 * m->M_022 * m->M_022;
+  power[4] += 2.500000000000000e-01 * m->M_031 * m->M_031;
+  power[4] += m->M_040 * m->M_040;
+  power[4] += 2.500000000000000e-01 * m->M_103 * m->M_103;
+  power[4] += 8.333333333333333e-02 * m->M_112 * m->M_112;
+  power[4] += 8.333333333333333e-02 * m->M_121 * m->M_121;
+  power[4] += 2.500000000000000e-01 * m->M_130 * m->M_130;
+  power[4] += 1.666666666666667e-01 * m->M_202 * m->M_202;
+  power[4] += 8.333333333333333e-02 * m->M_211 * m->M_211;
+  power[4] += 1.666666666666667e-01 * m->M_220 * m->M_220;
+  power[4] += 2.500000000000000e-01 * m->M_301 * m->M_301;
+  power[4] += 2.500000000000000e-01 * m->M_310 * m->M_310;
+  power[4] += m->M_400 * m->M_400;
+
+  m->power[4] = sqrt(power[4]);
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+  /* 5th order terms */
+  power[5] += m->M_005 * m->M_005;
+  power[5] += 2.000000000000000e-01 * m->M_014 * m->M_014;
+  power[5] += 1.000000000000000e-01 * m->M_023 * m->M_023;
+  power[5] += 1.000000000000000e-01 * m->M_032 * m->M_032;
+  power[5] += 2.000000000000000e-01 * m->M_041 * m->M_041;
+  power[5] += m->M_050 * m->M_050;
+  power[5] += 2.000000000000000e-01 * m->M_104 * m->M_104;
+  power[5] += 5.000000000000000e-02 * m->M_113 * m->M_113;
+  power[5] += 3.333333333333333e-02 * m->M_122 * m->M_122;
+  power[5] += 5.000000000000000e-02 * m->M_131 * m->M_131;
+  power[5] += 2.000000000000000e-01 * m->M_140 * m->M_140;
+  power[5] += 1.000000000000000e-01 * m->M_203 * m->M_203;
+  power[5] += 3.333333333333333e-02 * m->M_212 * m->M_212;
+  power[5] += 3.333333333333333e-02 * m->M_221 * m->M_221;
+  power[5] += 1.000000000000000e-01 * m->M_230 * m->M_230;
+  power[5] += 1.000000000000000e-01 * m->M_302 * m->M_302;
+  power[5] += 5.000000000000000e-02 * m->M_311 * m->M_311;
+  power[5] += 1.000000000000000e-01 * m->M_320 * m->M_320;
+  power[5] += 2.000000000000000e-01 * m->M_401 * m->M_401;
+  power[5] += 2.000000000000000e-01 * m->M_410 * m->M_410;
+  power[5] += m->M_500 * m->M_500;
+
+  m->power[5] = sqrt(power[5]);
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
+#error "Missing implementation for order >5"
+#endif
+}
+
 /**
  * @brief Constructs the #multipole of a bunch of particles around their
  * centre of mass.
@@ -889,6 +986,7 @@ __attribute__((nonnull)) INLINE static void gravity_P2M(
 
   /* Temporary variables */
   float epsilon_max = 0.f;
+  float min_old_a_grav_norm = FLT_MAX;
   double mass = 0.0;
   double com[3] = {0.0, 0.0, 0.0};
   double vel[3] = {0.f, 0.f, 0.f};
@@ -904,6 +1002,7 @@ __attribute__((nonnull)) INLINE static void gravity_P2M(
 #endif
 
     epsilon_max = max(epsilon_max, epsilon);
+    min_old_a_grav_norm = min(min_old_a_grav_norm, gparts[k].old_a_grav_norm);
     mass += m;
     com[0] += gparts[k].x[0] * m;
     com[1] += gparts[k].x[1] * m;
@@ -1060,19 +1159,13 @@ __attribute__((nonnull)) INLINE static void gravity_P2M(
 #endif
   }
 
-#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
-
-  /* We know the first-order multipole (dipole) is 0. */
-  M_100 = M_010 = M_001 = 0.f;
-#endif
-
   /* Store the data on the multipole. */
-  multi->m_pole.max_softening = epsilon_max;
-  multi->m_pole.M_000 = mass;
   multi->r_max = sqrt(r_max2);
   multi->CoM[0] = com[0];
   multi->CoM[1] = com[1];
   multi->CoM[2] = com[2];
+  multi->m_pole.max_softening = epsilon_max;
+  multi->m_pole.min_old_a_grav_norm = min_old_a_grav_norm;
   multi->m_pole.vel[0] = vel[0];
   multi->m_pole.vel[1] = vel[1];
   multi->m_pole.vel[2] = vel[2];
@@ -1082,13 +1175,14 @@ __attribute__((nonnull)) INLINE static void gravity_P2M(
   multi->m_pole.min_delta_vel[0] = min_delta_vel[0];
   multi->m_pole.min_delta_vel[1] = min_delta_vel[1];
   multi->m_pole.min_delta_vel[2] = min_delta_vel[2];
+  multi->m_pole.M_000 = mass;
 
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
 
-  /* 1st order terms */
-  multi->m_pole.M_100 = M_100;
-  multi->m_pole.M_010 = M_010;
-  multi->m_pole.M_001 = M_001;
+  /* 1st order terms (all 0 since we expand around CoM) */
+  // multi->m_pole.M_100 = M_100;
+  // multi->m_pole.M_010 = M_010;
+  // multi->m_pole.M_001 = M_001;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
 
@@ -1184,6 +1278,9 @@ __attribute__((nonnull)) INLINE static void gravity_M2M(
   /* "shift" the softening */
   m_a->max_softening = m_b->max_softening;
 
+  /* "shift" the minimal acceleration */
+  m_a->min_old_a_grav_norm = m_b->min_old_a_grav_norm;
+
   /* Shift 0th order term */
   m_a->M_000 = m_b->M_000;
 
@@ -1191,228 +1288,291 @@ __attribute__((nonnull)) INLINE static void gravity_M2M(
   const double dx[3] = {pos_a[0] - pos_b[0], pos_a[1] - pos_b[1],
                         pos_a[2] - pos_b[2]};
 
-  /* Shift 1st order term */
-  m_a->M_100 = m_b->M_100 + X_100(dx) * m_b->M_000;
-  m_a->M_010 = m_b->M_010 + X_010(dx) * m_b->M_000;
-  m_a->M_001 = m_b->M_001 + X_001(dx) * m_b->M_000;
+  /* Shift 1st order term (all 0 (after add) since we expand around CoM) */
+  // m_a->M_100 = m_b->M_100 + X_100(dx) * m_b->M_000;
+  // m_a->M_010 = m_b->M_010 + X_010(dx) * m_b->M_000;
+  // m_a->M_001 = m_b->M_001 + X_001(dx) * m_b->M_000;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
 
-  /* Shift 2nd order term */
-  m_a->M_200 = m_b->M_200 + X_100(dx) * m_b->M_100 + X_200(dx) * m_b->M_000;
-  m_a->M_020 = m_b->M_020 + X_010(dx) * m_b->M_010 + X_020(dx) * m_b->M_000;
-  m_a->M_002 = m_b->M_002 + X_001(dx) * m_b->M_001 + X_002(dx) * m_b->M_000;
-  m_a->M_110 = m_b->M_110 + X_100(dx) * m_b->M_010 + X_010(dx) * m_b->M_100 +
-               X_110(dx) * m_b->M_000;
-  m_a->M_101 = m_b->M_101 + X_100(dx) * m_b->M_001 + X_001(dx) * m_b->M_100 +
-               X_101(dx) * m_b->M_000;
-  m_a->M_011 = m_b->M_011 + X_010(dx) * m_b->M_001 + X_001(dx) * m_b->M_010 +
-               X_011(dx) * m_b->M_000;
+  /* Shift 2nd order terms (1st order mpole (all 0) commented out) */
+  m_a->M_002 =
+      m_b->M_002 /* + X_001(dx) * m_b->M_001 */ + X_002(dx) * m_b->M_000;
+  m_a->M_011 =
+      m_b->M_011 /* + X_001(dx) * m_b->M_010 */ /* + X_010(dx) * m_b->M_001 */ +
+      X_011(dx) * m_b->M_000;
+  m_a->M_020 =
+      m_b->M_020 /* + X_010(dx) * m_b->M_010 */ + X_020(dx) * m_b->M_000;
+  m_a->M_101 =
+      m_b->M_101 /* + X_001(dx) * m_b->M_100 */ /* + X_100(dx) * m_b->M_001 */ +
+      X_101(dx) * m_b->M_000;
+  m_a->M_110 =
+      m_b->M_110 /* + X_010(dx) * m_b->M_100 */ /* + X_100(dx) * m_b->M_010 */ +
+      X_110(dx) * m_b->M_000;
+  m_a->M_200 =
+      m_b->M_200 /* + X_100(dx) * m_b->M_100 */ + X_200(dx) * m_b->M_000;
 #endif
+
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
 
-  /* Shift 3rd order term */
-  m_a->M_300 = m_b->M_300 + X_100(dx) * m_b->M_200 + X_200(dx) * m_b->M_100 +
-               X_300(dx) * m_b->M_000;
-  m_a->M_030 = m_b->M_030 + X_010(dx) * m_b->M_020 + X_020(dx) * m_b->M_010 +
-               X_030(dx) * m_b->M_000;
-  m_a->M_003 = m_b->M_003 + X_001(dx) * m_b->M_002 + X_002(dx) * m_b->M_001 +
+  /* Shift 3rd order terms (1st order mpole (all 0) commented out) */
+  m_a->M_003 = m_b->M_003 +
+               X_001(dx) * m_b->M_002 /* + X_002(dx) * m_b->M_001 */ +
                X_003(dx) * m_b->M_000;
-  m_a->M_210 = m_b->M_210 + X_100(dx) * m_b->M_110 + X_010(dx) * m_b->M_200 +
-               X_200(dx) * m_b->M_010 + X_110(dx) * m_b->M_100 +
-               X_210(dx) * m_b->M_000;
-  m_a->M_201 = m_b->M_201 + X_100(dx) * m_b->M_101 + X_001(dx) * m_b->M_200 +
-               X_200(dx) * m_b->M_001 + X_101(dx) * m_b->M_100 +
-               X_201(dx) * m_b->M_000;
-  m_a->M_120 = m_b->M_120 + X_010(dx) * m_b->M_110 + X_100(dx) * m_b->M_020 +
-               X_020(dx) * m_b->M_100 + X_110(dx) * m_b->M_010 +
-               X_120(dx) * m_b->M_000;
-  m_a->M_021 = m_b->M_021 + X_010(dx) * m_b->M_011 + X_001(dx) * m_b->M_020 +
-               X_020(dx) * m_b->M_001 + X_011(dx) * m_b->M_010 +
-               X_021(dx) * m_b->M_000;
-  m_a->M_102 = m_b->M_102 + X_001(dx) * m_b->M_101 + X_100(dx) * m_b->M_002 +
-               X_002(dx) * m_b->M_100 + X_101(dx) * m_b->M_001 +
-               X_102(dx) * m_b->M_000;
-  m_a->M_012 = m_b->M_012 + X_001(dx) * m_b->M_011 + X_010(dx) * m_b->M_002 +
-               X_002(dx) * m_b->M_010 + X_011(dx) * m_b->M_001 +
+  m_a->M_012 = m_b->M_012 +
+               X_001(dx) * m_b->M_011 /* + X_002(dx) * m_b->M_010 */ +
+               X_010(dx) * m_b->M_002 /* + X_011(dx) * m_b->M_001 */ +
                X_012(dx) * m_b->M_000;
-  m_a->M_111 = m_b->M_111 + X_100(dx) * m_b->M_011 + X_010(dx) * m_b->M_101 +
-               X_001(dx) * m_b->M_110 + X_110(dx) * m_b->M_001 +
-               X_101(dx) * m_b->M_010 + X_011(dx) * m_b->M_100 +
-               X_111(dx) * m_b->M_000;
+  m_a->M_021 = m_b->M_021 + X_001(dx) * m_b->M_020 +
+               X_010(dx) * m_b->M_011 /* + X_011(dx) * m_b->M_010 */
+                                      /* + X_020(dx) * m_b->M_001 */
+               + X_021(dx) * m_b->M_000;
+  m_a->M_030 = m_b->M_030 +
+               X_010(dx) * m_b->M_020 /* + X_020(dx) * m_b->M_010 */ +
+               X_030(dx) * m_b->M_000;
+  m_a->M_102 = m_b->M_102 +
+               X_001(dx) * m_b->M_101 /* + X_002(dx) * m_b->M_100 */ +
+               X_100(dx) * m_b->M_002 /* + X_101(dx) * m_b->M_001 */ +
+               X_102(dx) * m_b->M_000;
+  m_a->M_111 = m_b->M_111 + X_001(dx) * m_b->M_110 +
+               X_010(dx) * m_b->M_101 /* + X_011(dx) * m_b->M_100 */ +
+               X_100(dx) * m_b->M_011 /* + X_101(dx) * m_b->M_010 */
+                                      /* + X_110(dx) * m_b->M_001 */
+               + X_111(dx) * m_b->M_000;
+  m_a->M_120 = m_b->M_120 +
+               X_010(dx) * m_b->M_110 /* + X_020(dx) * m_b->M_100 */ +
+               X_100(dx) * m_b->M_020 /* + X_110(dx) * m_b->M_010 */ +
+               X_120(dx) * m_b->M_000;
+  m_a->M_201 = m_b->M_201 + X_001(dx) * m_b->M_200 +
+               X_100(dx) * m_b->M_101 /* + X_101(dx) * m_b->M_100 */
+                                      /* + X_200(dx) * m_b->M_001 */
+               + X_201(dx) * m_b->M_000;
+  m_a->M_210 = m_b->M_210 + X_010(dx) * m_b->M_200 +
+               X_100(dx) * m_b->M_110 /* + X_110(dx) * m_b->M_100 */
+                                      /* + X_200(dx) * m_b->M_010 */
+               + X_210(dx) * m_b->M_000;
+  m_a->M_300 = m_b->M_300 +
+               X_100(dx) * m_b->M_200 /* + X_200(dx) * m_b->M_100 */ +
+               X_300(dx) * m_b->M_000;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 3
-  /* Shift 4th order terms */
-  m_a->M_004 = m_b->M_004 + X_001(dx) * m_b->M_003 + X_002(dx) * m_b->M_002 +
-               X_003(dx) * m_b->M_001 + X_004(dx) * m_b->M_000;
-  m_a->M_013 = m_b->M_013 + X_001(dx) * m_b->M_012 + X_002(dx) * m_b->M_011 +
-               X_003(dx) * m_b->M_010 + X_010(dx) * m_b->M_003 +
-               X_011(dx) * m_b->M_002 + X_012(dx) * m_b->M_001 +
+
+  /* Shift 4th order terms (1st order mpole (all 0) commented out) */
+  m_a->M_004 = m_b->M_004 + X_001(dx) * m_b->M_003 +
+               X_002(dx) * m_b->M_002 /* + X_003(dx) * m_b->M_001 */ +
+               X_004(dx) * m_b->M_000;
+  m_a->M_013 = m_b->M_013 + X_001(dx) * m_b->M_012 +
+               X_002(dx) * m_b->M_011 /* + X_003(dx) * m_b->M_010 */ +
+               X_010(dx) * m_b->M_003 +
+               X_011(dx) * m_b->M_002 /* + X_012(dx) * m_b->M_001 */ +
                X_013(dx) * m_b->M_000;
   m_a->M_022 = m_b->M_022 + X_001(dx) * m_b->M_021 + X_002(dx) * m_b->M_020 +
-               X_010(dx) * m_b->M_012 + X_011(dx) * m_b->M_011 +
-               X_012(dx) * m_b->M_010 + X_020(dx) * m_b->M_002 +
-               X_021(dx) * m_b->M_001 + X_022(dx) * m_b->M_000;
+               X_010(dx) * m_b->M_012 +
+               X_011(dx) * m_b->M_011 /* + X_012(dx) * m_b->M_010 */ +
+               X_020(dx) * m_b->M_002 /* + X_021(dx) * m_b->M_001 */ +
+               X_022(dx) * m_b->M_000;
   m_a->M_031 = m_b->M_031 + X_001(dx) * m_b->M_030 + X_010(dx) * m_b->M_021 +
-               X_011(dx) * m_b->M_020 + X_020(dx) * m_b->M_011 +
-               X_021(dx) * m_b->M_010 + X_030(dx) * m_b->M_001 +
-               X_031(dx) * m_b->M_000;
-  m_a->M_040 = m_b->M_040 + X_010(dx) * m_b->M_030 + X_020(dx) * m_b->M_020 +
-               X_030(dx) * m_b->M_010 + X_040(dx) * m_b->M_000;
-  m_a->M_103 = m_b->M_103 + X_001(dx) * m_b->M_102 + X_002(dx) * m_b->M_101 +
-               X_003(dx) * m_b->M_100 + X_100(dx) * m_b->M_003 +
-               X_101(dx) * m_b->M_002 + X_102(dx) * m_b->M_001 +
+               X_011(dx) * m_b->M_020 +
+               X_020(dx) * m_b->M_011 /* + X_021(dx) * m_b->M_010 */
+                                      /* + X_030(dx) * m_b->M_001 */
+               + X_031(dx) * m_b->M_000;
+  m_a->M_040 = m_b->M_040 + X_010(dx) * m_b->M_030 +
+               X_020(dx) * m_b->M_020 /* + X_030(dx) * m_b->M_010 */ +
+               X_040(dx) * m_b->M_000;
+  m_a->M_103 = m_b->M_103 + X_001(dx) * m_b->M_102 +
+               X_002(dx) * m_b->M_101 /* + X_003(dx) * m_b->M_100 */ +
+               X_100(dx) * m_b->M_003 +
+               X_101(dx) * m_b->M_002 /* + X_102(dx) * m_b->M_001 */ +
                X_103(dx) * m_b->M_000;
-  m_a->M_112 =
-      m_b->M_112 + X_001(dx) * m_b->M_111 + X_002(dx) * m_b->M_110 +
-      X_010(dx) * m_b->M_102 + X_011(dx) * m_b->M_101 + X_012(dx) * m_b->M_100 +
-      X_100(dx) * m_b->M_012 + X_101(dx) * m_b->M_011 + X_102(dx) * m_b->M_010 +
-      X_110(dx) * m_b->M_002 + X_111(dx) * m_b->M_001 + X_112(dx) * m_b->M_000;
-  m_a->M_121 =
-      m_b->M_121 + X_001(dx) * m_b->M_120 + X_010(dx) * m_b->M_111 +
-      X_011(dx) * m_b->M_110 + X_020(dx) * m_b->M_101 + X_021(dx) * m_b->M_100 +
-      X_100(dx) * m_b->M_021 + X_101(dx) * m_b->M_020 + X_110(dx) * m_b->M_011 +
-      X_111(dx) * m_b->M_010 + X_120(dx) * m_b->M_001 + X_121(dx) * m_b->M_000;
-  m_a->M_130 = m_b->M_130 + X_010(dx) * m_b->M_120 + X_020(dx) * m_b->M_110 +
-               X_030(dx) * m_b->M_100 + X_100(dx) * m_b->M_030 +
-               X_110(dx) * m_b->M_020 + X_120(dx) * m_b->M_010 +
+  m_a->M_112 = m_b->M_112 + X_001(dx) * m_b->M_111 + X_002(dx) * m_b->M_110 +
+               X_010(dx) * m_b->M_102 +
+               X_011(dx) * m_b->M_101 /* + X_012(dx) * m_b->M_100 */ +
+               X_100(dx) * m_b->M_012 +
+               X_101(dx) * m_b->M_011 /* + X_102(dx) * m_b->M_010 */ +
+               X_110(dx) * m_b->M_002 /* + X_111(dx) * m_b->M_001 */ +
+               X_112(dx) * m_b->M_000;
+  m_a->M_121 = m_b->M_121 + X_001(dx) * m_b->M_120 + X_010(dx) * m_b->M_111 +
+               X_011(dx) * m_b->M_110 +
+               X_020(dx) * m_b->M_101 /* + X_021(dx) * m_b->M_100 */ +
+               X_100(dx) * m_b->M_021 + X_101(dx) * m_b->M_020 +
+               X_110(dx) * m_b->M_011 /* + X_111(dx) * m_b->M_010 */
+                                      /* + X_120(dx) * m_b->M_001 */
+               + X_121(dx) * m_b->M_000;
+  m_a->M_130 = m_b->M_130 + X_010(dx) * m_b->M_120 +
+               X_020(dx) * m_b->M_110 /* + X_030(dx) * m_b->M_100 */ +
+               X_100(dx) * m_b->M_030 +
+               X_110(dx) * m_b->M_020 /* + X_120(dx) * m_b->M_010 */ +
                X_130(dx) * m_b->M_000;
   m_a->M_202 = m_b->M_202 + X_001(dx) * m_b->M_201 + X_002(dx) * m_b->M_200 +
-               X_100(dx) * m_b->M_102 + X_101(dx) * m_b->M_101 +
-               X_102(dx) * m_b->M_100 + X_200(dx) * m_b->M_002 +
-               X_201(dx) * m_b->M_001 + X_202(dx) * m_b->M_000;
-  m_a->M_211 =
-      m_b->M_211 + X_001(dx) * m_b->M_210 + X_010(dx) * m_b->M_201 +
-      X_011(dx) * m_b->M_200 + X_100(dx) * m_b->M_111 + X_101(dx) * m_b->M_110 +
-      X_110(dx) * m_b->M_101 + X_111(dx) * m_b->M_100 + X_200(dx) * m_b->M_011 +
-      X_201(dx) * m_b->M_010 + X_210(dx) * m_b->M_001 + X_211(dx) * m_b->M_000;
+               X_100(dx) * m_b->M_102 +
+               X_101(dx) * m_b->M_101 /* + X_102(dx) * m_b->M_100 */ +
+               X_200(dx) * m_b->M_002 /* + X_201(dx) * m_b->M_001 */ +
+               X_202(dx) * m_b->M_000;
+  m_a->M_211 = m_b->M_211 + X_001(dx) * m_b->M_210 + X_010(dx) * m_b->M_201 +
+               X_011(dx) * m_b->M_200 + X_100(dx) * m_b->M_111 +
+               X_101(dx) * m_b->M_110 +
+               X_110(dx) * m_b->M_101 /* + X_111(dx) * m_b->M_100 */ +
+               X_200(dx) * m_b->M_011 /* + X_201(dx) * m_b->M_010 */
+                                      /* + X_210(dx) * m_b->M_001 */
+               + X_211(dx) * m_b->M_000;
   m_a->M_220 = m_b->M_220 + X_010(dx) * m_b->M_210 + X_020(dx) * m_b->M_200 +
-               X_100(dx) * m_b->M_120 + X_110(dx) * m_b->M_110 +
-               X_120(dx) * m_b->M_100 + X_200(dx) * m_b->M_020 +
-               X_210(dx) * m_b->M_010 + X_220(dx) * m_b->M_000;
+               X_100(dx) * m_b->M_120 +
+               X_110(dx) * m_b->M_110 /* + X_120(dx) * m_b->M_100 */ +
+               X_200(dx) * m_b->M_020 /* + X_210(dx) * m_b->M_010 */ +
+               X_220(dx) * m_b->M_000;
   m_a->M_301 = m_b->M_301 + X_001(dx) * m_b->M_300 + X_100(dx) * m_b->M_201 +
-               X_101(dx) * m_b->M_200 + X_200(dx) * m_b->M_101 +
-               X_201(dx) * m_b->M_100 + X_300(dx) * m_b->M_001 +
-               X_301(dx) * m_b->M_000;
+               X_101(dx) * m_b->M_200 +
+               X_200(dx) * m_b->M_101 /* + X_201(dx) * m_b->M_100 */
+                                      /* + X_300(dx) * m_b->M_001 */
+               + X_301(dx) * m_b->M_000;
   m_a->M_310 = m_b->M_310 + X_010(dx) * m_b->M_300 + X_100(dx) * m_b->M_210 +
-               X_110(dx) * m_b->M_200 + X_200(dx) * m_b->M_110 +
-               X_210(dx) * m_b->M_100 + X_300(dx) * m_b->M_010 +
-               X_310(dx) * m_b->M_000;
-  m_a->M_400 = m_b->M_400 + X_100(dx) * m_b->M_300 + X_200(dx) * m_b->M_200 +
-               X_300(dx) * m_b->M_100 + X_400(dx) * m_b->M_000;
+               X_110(dx) * m_b->M_200 +
+               X_200(dx) * m_b->M_110 /* + X_210(dx) * m_b->M_100 */
+                                      /* + X_300(dx) * m_b->M_010 */
+               + X_310(dx) * m_b->M_000;
+  m_a->M_400 = m_b->M_400 + X_100(dx) * m_b->M_300 +
+               X_200(dx) * m_b->M_200 /* + X_300(dx) * m_b->M_100 */ +
+               X_400(dx) * m_b->M_000;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 4
-  /* Shift 5th order terms */
+
+  /* Shift 5th order terms (1st order mpole (all 0) commented out) */
   m_a->M_005 = m_b->M_005 + X_001(dx) * m_b->M_004 + X_002(dx) * m_b->M_003 +
-               X_003(dx) * m_b->M_002 + X_004(dx) * m_b->M_001 +
+               X_003(dx) * m_b->M_002 /* + X_004(dx) * m_b->M_001 */ +
                X_005(dx) * m_b->M_000;
   m_a->M_014 = m_b->M_014 + X_001(dx) * m_b->M_013 + X_002(dx) * m_b->M_012 +
-               X_003(dx) * m_b->M_011 + X_004(dx) * m_b->M_010 +
+               X_003(dx) * m_b->M_011 /* + X_004(dx) * m_b->M_010 */ +
                X_010(dx) * m_b->M_004 + X_011(dx) * m_b->M_003 +
-               X_012(dx) * m_b->M_002 + X_013(dx) * m_b->M_001 +
+               X_012(dx) * m_b->M_002 /* + X_013(dx) * m_b->M_001 */ +
                X_014(dx) * m_b->M_000;
-  m_a->M_023 =
-      m_b->M_023 + X_001(dx) * m_b->M_022 + X_002(dx) * m_b->M_021 +
-      X_003(dx) * m_b->M_020 + X_010(dx) * m_b->M_013 + X_011(dx) * m_b->M_012 +
-      X_012(dx) * m_b->M_011 + X_013(dx) * m_b->M_010 + X_020(dx) * m_b->M_003 +
-      X_021(dx) * m_b->M_002 + X_022(dx) * m_b->M_001 + X_023(dx) * m_b->M_000;
-  m_a->M_032 =
-      m_b->M_032 + X_001(dx) * m_b->M_031 + X_002(dx) * m_b->M_030 +
-      X_010(dx) * m_b->M_022 + X_011(dx) * m_b->M_021 + X_012(dx) * m_b->M_020 +
-      X_020(dx) * m_b->M_012 + X_021(dx) * m_b->M_011 + X_022(dx) * m_b->M_010 +
-      X_030(dx) * m_b->M_002 + X_031(dx) * m_b->M_001 + X_032(dx) * m_b->M_000;
+  m_a->M_023 = m_b->M_023 + X_001(dx) * m_b->M_022 + X_002(dx) * m_b->M_021 +
+               X_003(dx) * m_b->M_020 + X_010(dx) * m_b->M_013 +
+               X_011(dx) * m_b->M_012 +
+               X_012(dx) * m_b->M_011 /* + X_013(dx) * m_b->M_010 */ +
+               X_020(dx) * m_b->M_003 +
+               X_021(dx) * m_b->M_002 /* + X_022(dx) * m_b->M_001 */ +
+               X_023(dx) * m_b->M_000;
+  m_a->M_032 = m_b->M_032 + X_001(dx) * m_b->M_031 + X_002(dx) * m_b->M_030 +
+               X_010(dx) * m_b->M_022 + X_011(dx) * m_b->M_021 +
+               X_012(dx) * m_b->M_020 + X_020(dx) * m_b->M_012 +
+               X_021(dx) * m_b->M_011 /* + X_022(dx) * m_b->M_010 */ +
+               X_030(dx) * m_b->M_002 /* + X_031(dx) * m_b->M_001 */ +
+               X_032(dx) * m_b->M_000;
   m_a->M_041 = m_b->M_041 + X_001(dx) * m_b->M_040 + X_010(dx) * m_b->M_031 +
                X_011(dx) * m_b->M_030 + X_020(dx) * m_b->M_021 +
-               X_021(dx) * m_b->M_020 + X_030(dx) * m_b->M_011 +
-               X_031(dx) * m_b->M_010 + X_040(dx) * m_b->M_001 +
-               X_041(dx) * m_b->M_000;
+               X_021(dx) * m_b->M_020 +
+               X_030(dx) * m_b->M_011 /* + X_031(dx) * m_b->M_010 */
+                                      /* + X_040(dx) * m_b->M_001 */
+               + X_041(dx) * m_b->M_000;
   m_a->M_050 = m_b->M_050 + X_010(dx) * m_b->M_040 + X_020(dx) * m_b->M_030 +
-               X_030(dx) * m_b->M_020 + X_040(dx) * m_b->M_010 +
+               X_030(dx) * m_b->M_020 /* + X_040(dx) * m_b->M_010 */ +
                X_050(dx) * m_b->M_000;
   m_a->M_104 = m_b->M_104 + X_001(dx) * m_b->M_103 + X_002(dx) * m_b->M_102 +
-               X_003(dx) * m_b->M_101 + X_004(dx) * m_b->M_100 +
+               X_003(dx) * m_b->M_101 /* + X_004(dx) * m_b->M_100 */ +
                X_100(dx) * m_b->M_004 + X_101(dx) * m_b->M_003 +
-               X_102(dx) * m_b->M_002 + X_103(dx) * m_b->M_001 +
+               X_102(dx) * m_b->M_002 /* + X_103(dx) * m_b->M_001 */ +
                X_104(dx) * m_b->M_000;
-  m_a->M_113 =
-      m_b->M_113 + X_001(dx) * m_b->M_112 + X_002(dx) * m_b->M_111 +
-      X_003(dx) * m_b->M_110 + X_010(dx) * m_b->M_103 + X_011(dx) * m_b->M_102 +
-      X_012(dx) * m_b->M_101 + X_013(dx) * m_b->M_100 + X_100(dx) * m_b->M_013 +
-      X_101(dx) * m_b->M_012 + X_102(dx) * m_b->M_011 + X_103(dx) * m_b->M_010 +
-      X_110(dx) * m_b->M_003 + X_111(dx) * m_b->M_002 + X_112(dx) * m_b->M_001 +
-      X_113(dx) * m_b->M_000;
-  m_a->M_122 =
-      m_b->M_122 + X_001(dx) * m_b->M_121 + X_002(dx) * m_b->M_120 +
-      X_010(dx) * m_b->M_112 + X_011(dx) * m_b->M_111 + X_012(dx) * m_b->M_110 +
-      X_020(dx) * m_b->M_102 + X_021(dx) * m_b->M_101 + X_022(dx) * m_b->M_100 +
-      X_100(dx) * m_b->M_022 + X_101(dx) * m_b->M_021 + X_102(dx) * m_b->M_020 +
-      X_110(dx) * m_b->M_012 + X_111(dx) * m_b->M_011 + X_112(dx) * m_b->M_010 +
-      X_120(dx) * m_b->M_002 + X_121(dx) * m_b->M_001 + X_122(dx) * m_b->M_000;
-  m_a->M_131 =
-      m_b->M_131 + X_001(dx) * m_b->M_130 + X_010(dx) * m_b->M_121 +
-      X_011(dx) * m_b->M_120 + X_020(dx) * m_b->M_111 + X_021(dx) * m_b->M_110 +
-      X_030(dx) * m_b->M_101 + X_031(dx) * m_b->M_100 + X_100(dx) * m_b->M_031 +
-      X_101(dx) * m_b->M_030 + X_110(dx) * m_b->M_021 + X_111(dx) * m_b->M_020 +
-      X_120(dx) * m_b->M_011 + X_121(dx) * m_b->M_010 + X_130(dx) * m_b->M_001 +
-      X_131(dx) * m_b->M_000;
+  m_a->M_113 = m_b->M_113 + X_001(dx) * m_b->M_112 + X_002(dx) * m_b->M_111 +
+               X_003(dx) * m_b->M_110 + X_010(dx) * m_b->M_103 +
+               X_011(dx) * m_b->M_102 +
+               X_012(dx) * m_b->M_101 /* + X_013(dx) * m_b->M_100 */ +
+               X_100(dx) * m_b->M_013 + X_101(dx) * m_b->M_012 +
+               X_102(dx) * m_b->M_011 /* + X_103(dx) * m_b->M_010 */ +
+               X_110(dx) * m_b->M_003 +
+               X_111(dx) * m_b->M_002 /* + X_112(dx) * m_b->M_001 */ +
+               X_113(dx) * m_b->M_000;
+  m_a->M_122 = m_b->M_122 + X_001(dx) * m_b->M_121 + X_002(dx) * m_b->M_120 +
+               X_010(dx) * m_b->M_112 + X_011(dx) * m_b->M_111 +
+               X_012(dx) * m_b->M_110 + X_020(dx) * m_b->M_102 +
+               X_021(dx) * m_b->M_101 /* + X_022(dx) * m_b->M_100 */ +
+               X_100(dx) * m_b->M_022 + X_101(dx) * m_b->M_021 +
+               X_102(dx) * m_b->M_020 + X_110(dx) * m_b->M_012 +
+               X_111(dx) * m_b->M_011 /* + X_112(dx) * m_b->M_010 */ +
+               X_120(dx) * m_b->M_002 /* + X_121(dx) * m_b->M_001 */ +
+               X_122(dx) * m_b->M_000;
+  m_a->M_131 = m_b->M_131 + X_001(dx) * m_b->M_130 + X_010(dx) * m_b->M_121 +
+               X_011(dx) * m_b->M_120 + X_020(dx) * m_b->M_111 +
+               X_021(dx) * m_b->M_110 +
+               X_030(dx) * m_b->M_101 /* + X_031(dx) * m_b->M_100 */ +
+               X_100(dx) * m_b->M_031 + X_101(dx) * m_b->M_030 +
+               X_110(dx) * m_b->M_021 + X_111(dx) * m_b->M_020 +
+               X_120(dx) * m_b->M_011 /* + X_121(dx) * m_b->M_010 */
+                                      /* + X_130(dx) * m_b->M_001 */
+               + X_131(dx) * m_b->M_000;
   m_a->M_140 = m_b->M_140 + X_010(dx) * m_b->M_130 + X_020(dx) * m_b->M_120 +
-               X_030(dx) * m_b->M_110 + X_040(dx) * m_b->M_100 +
+               X_030(dx) * m_b->M_110 /* + X_040(dx) * m_b->M_100 */ +
                X_100(dx) * m_b->M_040 + X_110(dx) * m_b->M_030 +
-               X_120(dx) * m_b->M_020 + X_130(dx) * m_b->M_010 +
+               X_120(dx) * m_b->M_020 /* + X_130(dx) * m_b->M_010 */ +
                X_140(dx) * m_b->M_000;
-  m_a->M_203 =
-      m_b->M_203 + X_001(dx) * m_b->M_202 + X_002(dx) * m_b->M_201 +
-      X_003(dx) * m_b->M_200 + X_100(dx) * m_b->M_103 + X_101(dx) * m_b->M_102 +
-      X_102(dx) * m_b->M_101 + X_103(dx) * m_b->M_100 + X_200(dx) * m_b->M_003 +
-      X_201(dx) * m_b->M_002 + X_202(dx) * m_b->M_001 + X_203(dx) * m_b->M_000;
-  m_a->M_212 =
-      m_b->M_212 + X_001(dx) * m_b->M_211 + X_002(dx) * m_b->M_210 +
-      X_010(dx) * m_b->M_202 + X_011(dx) * m_b->M_201 + X_012(dx) * m_b->M_200 +
-      X_100(dx) * m_b->M_112 + X_101(dx) * m_b->M_111 + X_102(dx) * m_b->M_110 +
-      X_110(dx) * m_b->M_102 + X_111(dx) * m_b->M_101 + X_112(dx) * m_b->M_100 +
-      X_200(dx) * m_b->M_012 + X_201(dx) * m_b->M_011 + X_202(dx) * m_b->M_010 +
-      X_210(dx) * m_b->M_002 + X_211(dx) * m_b->M_001 + X_212(dx) * m_b->M_000;
-  m_a->M_221 =
-      m_b->M_221 + X_001(dx) * m_b->M_220 + X_010(dx) * m_b->M_211 +
-      X_011(dx) * m_b->M_210 + X_020(dx) * m_b->M_201 + X_021(dx) * m_b->M_200 +
-      X_100(dx) * m_b->M_121 + X_101(dx) * m_b->M_120 + X_110(dx) * m_b->M_111 +
-      X_111(dx) * m_b->M_110 + X_120(dx) * m_b->M_101 + X_121(dx) * m_b->M_100 +
-      X_200(dx) * m_b->M_021 + X_201(dx) * m_b->M_020 + X_210(dx) * m_b->M_011 +
-      X_211(dx) * m_b->M_010 + X_220(dx) * m_b->M_001 + X_221(dx) * m_b->M_000;
-  m_a->M_230 =
-      m_b->M_230 + X_010(dx) * m_b->M_220 + X_020(dx) * m_b->M_210 +
-      X_030(dx) * m_b->M_200 + X_100(dx) * m_b->M_130 + X_110(dx) * m_b->M_120 +
-      X_120(dx) * m_b->M_110 + X_130(dx) * m_b->M_100 + X_200(dx) * m_b->M_030 +
-      X_210(dx) * m_b->M_020 + X_220(dx) * m_b->M_010 + X_230(dx) * m_b->M_000;
-  m_a->M_302 =
-      m_b->M_302 + X_001(dx) * m_b->M_301 + X_002(dx) * m_b->M_300 +
-      X_100(dx) * m_b->M_202 + X_101(dx) * m_b->M_201 + X_102(dx) * m_b->M_200 +
-      X_200(dx) * m_b->M_102 + X_201(dx) * m_b->M_101 + X_202(dx) * m_b->M_100 +
-      X_300(dx) * m_b->M_002 + X_301(dx) * m_b->M_001 + X_302(dx) * m_b->M_000;
-  m_a->M_311 =
-      m_b->M_311 + X_001(dx) * m_b->M_310 + X_010(dx) * m_b->M_301 +
-      X_011(dx) * m_b->M_300 + X_100(dx) * m_b->M_211 + X_101(dx) * m_b->M_210 +
-      X_110(dx) * m_b->M_201 + X_111(dx) * m_b->M_200 + X_200(dx) * m_b->M_111 +
-      X_201(dx) * m_b->M_110 + X_210(dx) * m_b->M_101 + X_211(dx) * m_b->M_100 +
-      X_300(dx) * m_b->M_011 + X_301(dx) * m_b->M_010 + X_310(dx) * m_b->M_001 +
-      X_311(dx) * m_b->M_000;
-  m_a->M_320 =
-      m_b->M_320 + X_010(dx) * m_b->M_310 + X_020(dx) * m_b->M_300 +
-      X_100(dx) * m_b->M_220 + X_110(dx) * m_b->M_210 + X_120(dx) * m_b->M_200 +
-      X_200(dx) * m_b->M_120 + X_210(dx) * m_b->M_110 + X_220(dx) * m_b->M_100 +
-      X_300(dx) * m_b->M_020 + X_310(dx) * m_b->M_010 + X_320(dx) * m_b->M_000;
+  m_a->M_203 = m_b->M_203 + X_001(dx) * m_b->M_202 + X_002(dx) * m_b->M_201 +
+               X_003(dx) * m_b->M_200 + X_100(dx) * m_b->M_103 +
+               X_101(dx) * m_b->M_102 +
+               X_102(dx) * m_b->M_101 /* + X_103(dx) * m_b->M_100 */ +
+               X_200(dx) * m_b->M_003 +
+               X_201(dx) * m_b->M_002 /* + X_202(dx) * m_b->M_001 */ +
+               X_203(dx) * m_b->M_000;
+  m_a->M_212 = m_b->M_212 + X_001(dx) * m_b->M_211 + X_002(dx) * m_b->M_210 +
+               X_010(dx) * m_b->M_202 + X_011(dx) * m_b->M_201 +
+               X_012(dx) * m_b->M_200 + X_100(dx) * m_b->M_112 +
+               X_101(dx) * m_b->M_111 + X_102(dx) * m_b->M_110 +
+               X_110(dx) * m_b->M_102 +
+               X_111(dx) * m_b->M_101 /* + X_112(dx) * m_b->M_100 */ +
+               X_200(dx) * m_b->M_012 +
+               X_201(dx) * m_b->M_011 /* + X_202(dx) * m_b->M_010 */ +
+               X_210(dx) * m_b->M_002 /* + X_211(dx) * m_b->M_001 */ +
+               X_212(dx) * m_b->M_000;
+  m_a->M_221 = m_b->M_221 + X_001(dx) * m_b->M_220 + X_010(dx) * m_b->M_211 +
+               X_011(dx) * m_b->M_210 + X_020(dx) * m_b->M_201 +
+               X_021(dx) * m_b->M_200 + X_100(dx) * m_b->M_121 +
+               X_101(dx) * m_b->M_120 + X_110(dx) * m_b->M_111 +
+               X_111(dx) * m_b->M_110 +
+               X_120(dx) * m_b->M_101 /* + X_121(dx) * m_b->M_100 */ +
+               X_200(dx) * m_b->M_021 + X_201(dx) * m_b->M_020 +
+               X_210(dx) * m_b->M_011 /* + X_211(dx) * m_b->M_010 */
+                                      /* + X_220(dx) * m_b->M_001 */
+               + X_221(dx) * m_b->M_000;
+  m_a->M_230 = m_b->M_230 + X_010(dx) * m_b->M_220 + X_020(dx) * m_b->M_210 +
+               X_030(dx) * m_b->M_200 + X_100(dx) * m_b->M_130 +
+               X_110(dx) * m_b->M_120 +
+               X_120(dx) * m_b->M_110 /* + X_130(dx) * m_b->M_100 */ +
+               X_200(dx) * m_b->M_030 +
+               X_210(dx) * m_b->M_020 /* + X_220(dx) * m_b->M_010 */ +
+               X_230(dx) * m_b->M_000;
+  m_a->M_302 = m_b->M_302 + X_001(dx) * m_b->M_301 + X_002(dx) * m_b->M_300 +
+               X_100(dx) * m_b->M_202 + X_101(dx) * m_b->M_201 +
+               X_102(dx) * m_b->M_200 + X_200(dx) * m_b->M_102 +
+               X_201(dx) * m_b->M_101 /* + X_202(dx) * m_b->M_100 */ +
+               X_300(dx) * m_b->M_002 /* + X_301(dx) * m_b->M_001 */ +
+               X_302(dx) * m_b->M_000;
+  m_a->M_311 = m_b->M_311 + X_001(dx) * m_b->M_310 + X_010(dx) * m_b->M_301 +
+               X_011(dx) * m_b->M_300 + X_100(dx) * m_b->M_211 +
+               X_101(dx) * m_b->M_210 + X_110(dx) * m_b->M_201 +
+               X_111(dx) * m_b->M_200 + X_200(dx) * m_b->M_111 +
+               X_201(dx) * m_b->M_110 +
+               X_210(dx) * m_b->M_101 /* + X_211(dx) * m_b->M_100 */ +
+               X_300(dx) * m_b->M_011 /* + X_301(dx) * m_b->M_010 */
+                                      /* + X_310(dx) * m_b->M_001 */
+               + X_311(dx) * m_b->M_000;
+  m_a->M_320 = m_b->M_320 + X_010(dx) * m_b->M_310 + X_020(dx) * m_b->M_300 +
+               X_100(dx) * m_b->M_220 + X_110(dx) * m_b->M_210 +
+               X_120(dx) * m_b->M_200 + X_200(dx) * m_b->M_120 +
+               X_210(dx) * m_b->M_110 /* + X_220(dx) * m_b->M_100 */ +
+               X_300(dx) * m_b->M_020 /* + X_310(dx) * m_b->M_010 */ +
+               X_320(dx) * m_b->M_000;
   m_a->M_401 = m_b->M_401 + X_001(dx) * m_b->M_400 + X_100(dx) * m_b->M_301 +
                X_101(dx) * m_b->M_300 + X_200(dx) * m_b->M_201 +
-               X_201(dx) * m_b->M_200 + X_300(dx) * m_b->M_101 +
-               X_301(dx) * m_b->M_100 + X_400(dx) * m_b->M_001 +
-               X_401(dx) * m_b->M_000;
+               X_201(dx) * m_b->M_200 +
+               X_300(dx) * m_b->M_101 /* + X_301(dx) * m_b->M_100 */
+                                      /* + X_400(dx) * m_b->M_001 */
+               + X_401(dx) * m_b->M_000;
   m_a->M_410 = m_b->M_410 + X_010(dx) * m_b->M_400 + X_100(dx) * m_b->M_310 +
                X_110(dx) * m_b->M_300 + X_200(dx) * m_b->M_210 +
-               X_210(dx) * m_b->M_200 + X_300(dx) * m_b->M_110 +
-               X_310(dx) * m_b->M_100 + X_400(dx) * m_b->M_010 +
-               X_410(dx) * m_b->M_000;
+               X_210(dx) * m_b->M_200 +
+               X_300(dx) * m_b->M_110 /* + X_310(dx) * m_b->M_100 */
+                                      /* + X_400(dx) * m_b->M_010 */
+               + X_410(dx) * m_b->M_000;
   m_a->M_500 = m_b->M_500 + X_100(dx) * m_b->M_400 + X_200(dx) * m_b->M_300 +
-               X_300(dx) * m_b->M_200 + X_400(dx) * m_b->M_100 +
+               X_300(dx) * m_b->M_200 /* + X_400(dx) * m_b->M_100 */ +
                X_500(dx) * m_b->M_000;
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 5
@@ -1926,6 +2086,150 @@ __attribute__((nonnull)) INLINE static void gravity_M2L_symmetric(
   gravity_M2L_apply(l_a, m_b, &pot);
 }
 
+/**
+ * @brief Compute the field tensor due to a multipole and the symmetric
+ * equivalent.
+ *
+ * @param l_b The field tensor to compute.
+ * @param ga The @gpart sourcing the field.
+ * @param pos_b The position of field tensor b.
+ * @param props The #gravity_props of this calculation.
+ * @param periodic Is the calculation periodic ?
+ * @param dim The size of the simulation box.
+ * @param rs_inv The inverse of the gravity mesh-smoothing scale.
+ */
+__attribute__((nonnull)) INLINE static void gravity_P2L(
+    struct grav_tensor *l_b, const struct gpart *ga, const double pos_b[3],
+    const struct gravity_props *props, const int periodic, const double dim[3],
+    const float rs_inv) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Count all interactions
+   * Note that despite being in a section of the code protected by locks,
+   * we must use atomics here as the long-range task may update this
+   * counter in a lock-free section of code. */
+  accumulate_inc_ll(&l_b->num_interacted);
+#endif
+
+#ifdef SWIFT_GRAVITY_FORCE_CHECKS
+  /* Count tree interactions
+   * Note that despite being in a section of the code protected by locks,
+   * we must use atomics here as the long-range task may update this
+   * counter in a lock-free section of code. */
+  accumulate_inc_ll(&l_b->num_interacted_tree);
+#endif
+
+  /* Record that this tensor has received contributions */
+  l_b->interacted = 1;
+
+  /* Recover some constants */
+  const float eps = gravity_get_softening(ga, props);
+  const float mass = ga->mass;
+
+  /* Compute distance vector */
+  float dx = (float)(pos_b[0] - ga->x[0]);
+  float dy = (float)(pos_b[1] - ga->x[1]);
+  float dz = (float)(pos_b[2] - ga->x[2]);
+
+  /* Apply BC */
+  if (periodic) {
+    dx = nearest(dx, dim[0]);
+    dy = nearest(dy, dim[1]);
+    dz = nearest(dz, dim[2]);
+  }
+
+  /* Compute distance */
+  const float r2 = dx * dx + dy * dy + dz * dz;
+  const float r_inv = 1. / sqrtf(r2);
+
+  /* Compute all derivatives */
+  struct potential_derivatives_M2L pot;
+  potential_derivatives_compute_M2L(dx, dy, dz, r2, r_inv, eps, periodic,
+                                    rs_inv, &pot);
+
+  /* 0th order contributions */
+  l_b->F_000 += mass * pot.D_000;
+
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
+
+  /* 1st order contributions */
+  l_b->F_001 += mass * pot.D_001;
+  l_b->F_010 += mass * pot.D_010;
+  l_b->F_100 += mass * pot.D_100;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+
+  /* 2nd order contributions */
+  l_b->F_002 += mass * pot.D_002;
+  l_b->F_011 += mass * pot.D_011;
+  l_b->F_020 += mass * pot.D_020;
+  l_b->F_101 += mass * pot.D_101;
+  l_b->F_110 += mass * pot.D_110;
+  l_b->F_200 += mass * pot.D_200;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
+
+  /* 3rd order contributions */
+  l_b->F_003 += mass * pot.D_003;
+  l_b->F_012 += mass * pot.D_012;
+  l_b->F_021 += mass * pot.D_021;
+  l_b->F_030 += mass * pot.D_030;
+  l_b->F_102 += mass * pot.D_102;
+  l_b->F_111 += mass * pot.D_111;
+  l_b->F_120 += mass * pot.D_120;
+  l_b->F_201 += mass * pot.D_201;
+  l_b->F_210 += mass * pot.D_210;
+  l_b->F_300 += mass * pot.D_300;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
+
+  /* 4th order contributions */
+  l_b->F_004 += mass * pot.D_004;
+  l_b->F_013 += mass * pot.D_013;
+  l_b->F_022 += mass * pot.D_022;
+  l_b->F_031 += mass * pot.D_031;
+  l_b->F_040 += mass * pot.D_040;
+  l_b->F_103 += mass * pot.D_103;
+  l_b->F_112 += mass * pot.D_112;
+  l_b->F_121 += mass * pot.D_121;
+  l_b->F_130 += mass * pot.D_130;
+  l_b->F_202 += mass * pot.D_202;
+  l_b->F_211 += mass * pot.D_211;
+  l_b->F_220 += mass * pot.D_220;
+  l_b->F_301 += mass * pot.D_301;
+  l_b->F_310 += mass * pot.D_310;
+  l_b->F_400 += mass * pot.D_400;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
+
+  /* 5th order contributions */
+  l_b->F_005 += mass * pot.D_005;
+  l_b->F_014 += mass * pot.D_014;
+  l_b->F_023 += mass * pot.D_023;
+  l_b->F_032 += mass * pot.D_032;
+  l_b->F_041 += mass * pot.D_041;
+  l_b->F_050 += mass * pot.D_050;
+  l_b->F_104 += mass * pot.D_104;
+  l_b->F_113 += mass * pot.D_113;
+  l_b->F_122 += mass * pot.D_122;
+  l_b->F_131 += mass * pot.D_131;
+  l_b->F_140 += mass * pot.D_140;
+  l_b->F_203 += mass * pot.D_203;
+  l_b->F_212 += mass * pot.D_212;
+  l_b->F_221 += mass * pot.D_221;
+  l_b->F_230 += mass * pot.D_230;
+  l_b->F_302 += mass * pot.D_302;
+  l_b->F_311 += mass * pot.D_311;
+  l_b->F_320 += mass * pot.D_320;
+  l_b->F_401 += mass * pot.D_401;
+  l_b->F_410 += mass * pot.D_410;
+  l_b->F_500 += mass * pot.D_500;
+#endif
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
+#error "Missing implementation for order >5"
+#endif
+}
+
 /**
  * @brief Compute the reduced field tensor due to a multipole
  *
@@ -1942,7 +2246,7 @@ __attribute__((nonnull)) INLINE static void gravity_M2L_symmetric(
  * @param rs_inv The inverse of the gravity mesh-smoothing scale.
  * @param l (return) The #reduced_grav_tensor to compute.
  */
-__attribute__((nonnull)) INLINE static void gravity_M2P(
+__attribute__((always_inline, nonnull)) INLINE static void gravity_M2P(
     const struct multipole *const m, const float r_x, const float r_y,
     const float r_z, const float r2, const float eps, const int periodic,
     const float rs_inv, struct reduced_grav_tensor *const l) {
@@ -2689,77 +2993,16 @@ __attribute__((nonnull)) INLINE static void gravity_L2P(
 #endif
 
   /* Update the particle */
-  accumulate_add_f(&gp->a_grav[0], a_grav[0]);
-  accumulate_add_f(&gp->a_grav[1], a_grav[1]);
-  accumulate_add_f(&gp->a_grav[2], a_grav[2]);
+  gp->a_grav[0] += a_grav[0];
+  gp->a_grav[1] += a_grav[1];
+  gp->a_grav[2] += a_grav[2];
   gravity_add_comoving_potential(gp, pot);
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-  accumulate_add_f(&gp->a_grav_m2l[0], a_grav[0]);
-  accumulate_add_f(&gp->a_grav_m2l[1], a_grav[1]);
-  accumulate_add_f(&gp->a_grav_m2l[2], a_grav[2]);
+  gp->a_grav_m2l[0] += a_grav[0];
+  gp->a_grav_m2l[1] += a_grav[1];
+  gp->a_grav_m2l[2] += a_grav[2];
 #endif
 }
 
-/**
- * @brief Checks whether a cell-cell interaction can be appromixated by a M-M
- * interaction using the distance and cell radius.
- *
- * We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179,
- * Issue 1, pp.27-42, equation 10.
- *
- * We also additionally check that the distance between the multipoles
- * is larger than the softening lengths (here the distance at which
- * the gravity becomes Newtonian again, not the Plummer-equivalent quantity).
- *
- * @param r_crit_a The size of the multipole A.
- * @param r_crit_b The size of the multipole B.
- * @param theta_crit2 The square of the critical opening angle.
- * @param r2 Square of the distance (periodically wrapped) between the
- * multipoles.
- * @param epsilon_a The maximal softening length of any particle in A.
- * @param epsilon_b The maximal softening length of any particle in B.
- */
-__attribute__((always_inline, const)) INLINE static int gravity_M2L_accept(
-    const double r_crit_a, const double r_crit_b, const double theta_crit2,
-    const double r2, const double epsilon_a, const double epsilon_b) {
-
-  const double size = r_crit_a + r_crit_b;
-  const double size2 = size * size;
-  const double epsilon_a2 = epsilon_a * epsilon_a;
-  const double epsilon_b2 = epsilon_b * epsilon_b;
-
-  // MATTHIEU: Make this mass-dependent ?
-
-  /* Multipole acceptance criterion (Dehnen 2002, eq.10) */
-  return (r2 * theta_crit2 > size2) && (r2 > epsilon_a2) && (r2 > epsilon_b2);
-}
-
-/**
- * @brief Checks whether a particle-cell interaction can be appromixated by a
- * M2P interaction using the distance and cell radius.
- *
- * We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179,
- * Issue 1, pp.27-42, equation 10.
- *
- * We also additionally check that the distance between the particle and the
- * multipole is larger than the softening length (here the distance at which
- * the gravity becomes Newtonian again, not the Plummer-equivalent quantity).
- *
- * @param r_max2 The square of the size of the multipole.
- * @param theta_crit2 The square of the critical opening angle.
- * @param r2 Square of the distance (periodically wrapped) between the
- * particle and the multipole.
- * @param epsilon The softening length of the particle.
- */
-__attribute__((always_inline, const)) INLINE static int gravity_M2P_accept(
-    const float r_max2, const float theta_crit2, const float r2,
-    const float epsilon) {
-
-  // MATTHIEU: Make this mass-dependent ?
-
-  /* Multipole acceptance criterion (Dehnen 2002, eq.10) */
-  return (r2 * theta_crit2 > r_max2) && (r2 > epsilon * epsilon);
-}
-
 #endif /* SWIFT_MULTIPOLE_H */
diff --git a/src/multipole_accept.h b/src/multipole_accept.h
new file mode 100644
index 0000000000000000000000000000000000000000..ead2139571f4a037376ac703aa8997e7bfa05282
--- /dev/null
+++ b/src/multipole_accept.h
@@ -0,0 +1,285 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_MULTIPOLE_ACCEPT_H
+#define SWIFT_MULTIPOLE_ACCEPT_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local includes */
+#include "binomial.h"
+#include "gravity_properties.h"
+#include "integer_power.h"
+#include "kernel_long_gravity.h"
+#include "minmax.h"
+#include "multipole_struct.h"
+
+/**
+ * @brief Compute the inverse of the force estimator entering the MAC
+ *
+ * Note that in the unsofted case, the first condition is naturally
+ * never reached (as H == 0). In the non-periodic (non-truncated) case
+ * the second condition is never reached (as r_s == inf, r_s_inv == 0).
+ *
+ * @param H The spline softening length.
+ * @param r_s_inv The inverse of the scale of the gravity mesh.
+ * @param r2 The square of the distance between the multipoles.
+ */
+__attribute__((const)) INLINE static float gravity_f_MAC_inverse(
+    const float H, const float r_s_inv, const float r2) {
+
+  if (r2 < (25.f / 81.f) * H * H) {
+
+    /* Below softening radius */
+    return (25.f / 81.f) * H * H;
+
+  } else if (r_s_inv * r_s_inv * r2 > (25.f / 9.f)) {
+
+    /* Above truncation radius */
+    return (9.f / 25.f) * r_s_inv * r_s_inv * r2 * r2;
+
+  } else {
+
+    /* Normal Newtonian case */
+    return r2;
+  }
+}
+
+/**
+ * @brief Checks whether The multipole in B can be used to update the field
+ * tensor in A.
+ *
+ * We use the MAC of Dehnen 2014 eq. 16.
+ *
+ * Note: this is *not* symmetric in A<->B unless the purely geometric criterion
+ * is used.
+ *
+ * @param props The properties of the gravity scheme.
+ * @param A The gravity tensors that we want to update (sink).
+ * @param B The gravity tensors that act as a source.
+ * @param r2 The square of the distance between the centres of mass of A and B.
+ * @param use_rebuild_sizes Are we considering the sizes at the last tree-build
+ * (1) or current sizes (0)?
+ * @param periodic Are we using periodic BCs?
+ */
+__attribute__((nonnull, pure)) INLINE static int gravity_M2L_accept(
+    const struct gravity_props *props, const struct gravity_tensors *restrict A,
+    const struct gravity_tensors *restrict B, const float r2,
+    const int use_rebuild_sizes, const int periodic) {
+
+  /* Order of the expansion */
+  const int p = SELF_GRAVITY_MULTIPOLE_ORDER;
+
+  /* Sizes of the multipoles */
+  const float rho_A = use_rebuild_sizes ? A->r_max_rebuild : A->r_max;
+  const float rho_B = use_rebuild_sizes ? B->r_max_rebuild : B->r_max;
+
+  /* Get the softening */
+  const float max_softening =
+      max(A->m_pole.max_softening, B->m_pole.max_softening);
+
+  /* Compute the error estimator (without the 1/M_B term that cancels out) */
+  float E_BA_term = 0.f;
+  for (int n = 0; n <= p; ++n) {
+    E_BA_term +=
+        binomial(p, n) * B->m_pole.power[n] * integer_powf(rho_A, p - n);
+  }
+  E_BA_term *= 8.f;
+  if (rho_A + rho_B > 0.f) {
+    E_BA_term *= max(rho_A, rho_B);
+    E_BA_term /= (rho_A + rho_B);
+  }
+
+    /* Compute r^p */
+#if SELF_GRAVITY_MULTIPOLE_ORDER % 2 == 1
+  const float r_to_p = integer_powf(sqrtf(r2), p);
+#else
+  const float r_to_p = integer_powf(r2, (p / 2));
+#endif
+
+  float f_MAC_inv;
+  if (props->consider_truncation_in_MAC) {
+    f_MAC_inv = gravity_f_MAC_inverse(max_softening, props->r_s_inv, r2);
+  } else {
+    f_MAC_inv = r2;
+  }
+
+  /* Get the mimimal acceleration in A */
+  const float min_a_grav = A->m_pole.min_old_a_grav_norm;
+
+  /* Get the relative tolerance */
+  const float eps = props->adaptive_tolerance;
+
+  /* Get the basic geometric critical angle */
+  const float theta_crit = props->theta_crit;
+  const float theta_crit2 = theta_crit * theta_crit;
+
+  /* Get the sum of the multipole sizes */
+  const float rho_sum = rho_A + rho_B;
+
+  if (props->use_advanced_MAC) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (min_a_grav == 0.) error("Acceleration is 0");
+#endif
+
+    /* Test the different conditions */
+
+    /* Condition 1: We are in the converging part of the Taylor expansion */
+    const int cond_1 = rho_sum * rho_sum < r2;
+
+    /* Condition 2: We are not below softening */
+    const int cond_2 =
+        props->use_tree_below_softening || max_softening * max_softening < r2;
+
+    /* Condition 3: The contribution is accurate enough
+     * (E_BA * (1 / r^(p)) * ((1 / r^2) * W) < eps * a_min) */
+    const int cond_3 = E_BA_term < eps * min_a_grav * r_to_p * f_MAC_inv;
+
+    return cond_1 && cond_2 && cond_3;
+
+  } else {
+
+    /* Condition 1: We are obeying the purely geometric criterion */
+    const int cond_1 = rho_sum * rho_sum < theta_crit2 * r2;
+
+    /* Condition 2: We are not below softening */
+    const int cond_2 =
+        props->use_tree_below_softening || max_softening * max_softening < r2;
+
+    return cond_1 && cond_2;
+  }
+}
+
+/**
+ * @brief Checks whether The multipole in B can be used to update the field
+ * tensor in A and whether the multipole in A can be used to update the field
+ * tensor in B.
+ *
+ * We use the MAC of Dehnen 2014 eq. 16.
+ *
+ * @param props The properties of the gravity scheme.
+ * @param A The first set of multipole and gravity tensors.
+ * @param B The second set of multipole and gravity tensors.
+ * @param r2 The square of the distance between the centres of mass of A and B.
+ * @param use_rebuild_sizes Are we considering the sizes at the last tree-build
+ * (1) or current sizes (0)?
+ * @param periodic Are we using periodic BCs?
+ */
+__attribute__((nonnull, pure)) INLINE static int gravity_M2L_accept_symmetric(
+    const struct gravity_props *props, const struct gravity_tensors *restrict A,
+    const struct gravity_tensors *restrict B, const float r2,
+    const int use_rebuild_sizes, const int periodic) {
+
+  return gravity_M2L_accept(props, A, B, r2, use_rebuild_sizes, periodic) &&
+         gravity_M2L_accept(props, B, A, r2, use_rebuild_sizes, periodic);
+}
+
+/**
+ * @brief Checks whether The multipole in B can be used to update the particle
+ * pa
+ *
+ * We use the MAC of Dehnen 2014 eq. 16.
+ *
+ * @param props The properties of the gravity scheme.
+ * @param pa The particle we want to compute forces for (sink)
+ * @param B The gravity tensors that act as a source.
+ * @param r2 The square of the distance between pa and the centres of mass of B.
+ * @param periodic Are we using periodic BCs?
+ */
+__attribute__((nonnull, pure)) INLINE static int gravity_M2P_accept(
+    const struct gravity_props *props, const struct gpart *pa,
+    const struct gravity_tensors *B, const float r2, const int periodic) {
+
+  /* Order of the expansion */
+  const int p = SELF_GRAVITY_MULTIPOLE_ORDER;
+
+  /* Sizes of the multipoles */
+  const float rho_B = B->r_max;
+
+  /* Get the maximal softening */
+  const float max_softening =
+      max(B->m_pole.max_softening, gravity_get_softening(pa, props));
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (rho_B == 0.) error("Size of multipole B is 0!");
+#endif
+
+  /* Compute the error estimator (without the 1/M_B term that cancels out) */
+  const float E_BA_term = 8.f * B->m_pole.power[p];
+
+  /* Compute r^p */
+#if SELF_GRAVITY_MULTIPOLE_ORDER % 2 == 1
+  const float r_to_p = integer_powf(sqrtf(r2), p);
+#else
+  const float r_to_p = integer_powf(r2, (p / 2));
+#endif
+
+  float f_MAC_inv;
+  if (props->consider_truncation_in_MAC) {
+    f_MAC_inv = gravity_f_MAC_inverse(max_softening, props->r_s_inv, r2);
+  } else {
+    f_MAC_inv = r2;
+  }
+
+  /* Get the estimate of the acceleration */
+  const float old_a_grav = pa->old_a_grav_norm;
+
+  /* Get the relative tolerance */
+  const float eps = props->adaptive_tolerance;
+
+  /* Get the basic geometric critical angle */
+  const float theta_crit = props->theta_crit;
+  const float theta_crit2 = theta_crit * theta_crit;
+
+  if (props->use_advanced_MAC) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (old_a_grav == 0.) error("Acceleration is 0");
+#endif
+
+    /* Test the different conditions */
+
+    /* Condition 1: We are in the converging part of the Taylor expansion */
+    const int cond_1 = rho_B * rho_B < r2;
+
+    /* Condition 2: We are not below softening */
+    const int cond_2 =
+        props->use_tree_below_softening || max_softening * max_softening < r2;
+
+    /* Condition 3: The contribution is accurate enough
+     * (E_BA * (1 / r^(p)) * ((1 / r^2) * W) < eps * a_min) */
+    const int cond_3 = E_BA_term < eps * old_a_grav * r_to_p * f_MAC_inv;
+
+    return cond_1 && cond_2 && cond_3;
+
+  } else {
+
+    /* Condition 1: We are obeying the purely geometric criterion */
+    const int cond_1 = rho_B * rho_B < theta_crit2 * r2;
+
+    /* Condition 2: We are not below softening */
+    const int cond_2 =
+        props->use_tree_below_softening || max_softening * max_softening < r2;
+
+    return cond_1 && cond_2;
+  }
+}
+
+#endif /* SWIFT_MULTIPOLE_ACCEPT_H */
diff --git a/src/multipole_struct.h b/src/multipole_struct.h
index ee5e525e286434d385cd2e16d7ed62668702a2ff..ffd615cd798aa3963491ad31fb86f6af8f9e5d89 100644
--- a/src/multipole_struct.h
+++ b/src/multipole_struct.h
@@ -121,19 +121,27 @@ struct multipole {
   /*! Maximal co-moving softening of all the #gpart in the mulipole */
   float max_softening;
 
+  /*! Minimal acceleration norm of all the #gpart in the mulipole */
+  float min_old_a_grav_norm;
+
+  /*! Mulipole power for the different orders */
+  float power[SELF_GRAVITY_MULTIPOLE_ORDER + 1];
+
   /* 0th order term */
   float M_000;
 
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 0
 
-  /* 1st order terms */
-  float M_100, M_010, M_001;
+  /* 1st order terms (all 0 since we expand around CoM) */
+  // float M_100, M_010, M_001;
+
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 1
 
   /* 2nd order terms */
   float M_200, M_020, M_002;
   float M_110, M_101, M_011;
+
 #endif
 #if SELF_GRAVITY_MULTIPOLE_ORDER > 2
 
@@ -225,4 +233,13 @@ struct reduced_grav_tensor {
   float F_001;
 };
 
+#ifdef WITH_MPI
+/* MPI datatypes for transfers */
+extern MPI_Datatype multipole_mpi_type;
+extern MPI_Op multipole_mpi_reduce_op;
+
+void multipole_create_mpi_types(void);
+void multipole_free_mpi_types(void);
+#endif
+
 #endif /* SWIFT_MULTIPOLE_STRUCT_H */
diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c
index 91abe9e91060b19fa6df1bbbb29e0de6e6442cad..c1d24b68505ee7d0fd7804ce6f26ec3fc8224772 100644
--- a/src/runner_doiact_grav.c
+++ b/src/runner_doiact_grav.c
@@ -99,6 +99,11 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {
 
     if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
 
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+    /* Lock the cell for the particle updates */
+    lock_lock(&c->grav.plock);
+#endif
+
     /* Cell properties */
     struct gpart *gparts = c->grav.parts;
     const int gcount = c->grav.count;
@@ -133,11 +138,397 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {
         gravity_L2P(pot, CoM, gp);
       }
     }
+
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+    /* All done -> unlock the cell */
+    if (lock_unlock(&c->grav.plock) != 0) error("Error unlocking cell");
+#endif
   }
 
   if (timer) TIMER_TOC(timer_dograv_down);
 }
 
+/**
+ * @brief Compute the fully Newtoning gravitational forces from particles
+ * one array onto the particles in another array
+ *
+ * This function *must* be called at the leaf level for particles i.
+ *
+ * @param gparts_i The particles receiving forces (at leaf level).
+ * @param gcount_i The number of particles receiving forces.
+ * @param gparts_j The particles giving forces (at any level).
+ * @param gcount_j The number of particles giving forces.
+ * @param e The @engine structure.
+ * @param grav_props The properties of the gravity scheme.
+ * @param cache_i The gravity cache to use to store the results in i.
+ * @param ci The (leaf-)cell containing the particles i.
+ * @param multi_j The multipole in cell j.
+ */
+static INLINE void runner_dopair_grav_pp_full_no_cache(
+    struct gpart *restrict gparts_i, const int gcount_i,
+    const struct gpart *restrict gparts_j, const int gcount_j,
+    const struct engine *e, const struct gravity_props *grav_props,
+    struct gravity_cache *cache_i, struct cell *ci,
+    const struct gravity_tensors *multi_j) {
+
+  /* Prepare the i cache */
+  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
+  gravity_cache_zero_output(cache_i, gcount_padded_i);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->split) error("Using function above leaf level!");
+#endif
+
+  /* Loop over sink particles */
+  for (int i = 0; i < gcount_i; ++i) {
+
+    struct gpart *gpi = &gparts_i[i];
+
+    /* Ignore inactive particles */
+    if (!gpart_is_active(gpi, e)) continue;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Check that particles have been drifted to the current time */
+    if (gpi->ti_drift != e->ti_current)
+      error("gpi not drifted to current time");
+
+    /* Check that the particle was initialised */
+    if (gpi->initialised == 0)
+      error("Adding forces to an un-initialised gpart.");
+#endif
+
+    const float x_i = gpi->x[0];
+    const float y_i = gpi->x[1];
+    const float z_i = gpi->x[2];
+    const float h_i = gravity_get_softening(gpi, grav_props);
+
+    /* Local accumulators for the acceleration and potential */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
+
+    /* Now, we can start the interactions for that particle */
+
+    /* Distance to the Multipole */
+    const float CoM_j[3] = {multi_j->CoM[0], multi_j->CoM[1], multi_j->CoM[2]};
+    const float dx_multi = CoM_j[0] - x_i;
+    const float dy_multi = CoM_j[1] - y_i;
+    const float dz_multi = CoM_j[2] - z_i;
+
+    const float r2_multi =
+        dx_multi * dx_multi + dy_multi * dy_multi + dz_multi * dz_multi;
+
+    /* Can we use the Mulipole here? */
+    if (gcount_j > 1 && gravity_M2P_accept(grav_props, gpi, multi_j, r2_multi,
+                                           /*periodic=*/1)) {
+
+      const float h_inv_i = 1.f / h_i;
+
+      /* Interact! */
+      float f_x, f_y, f_z, pot_ij;
+      runner_iact_grav_pm_full(dx_multi, dy_multi, dz_multi, r2_multi, h_i,
+                               h_inv_i, &multi_j->m_pole, &f_x, &f_y, &f_z,
+                               &pot_ij);
+
+      /* Store it back */
+      a_x += f_x;
+      a_y += f_y;
+      a_z += f_z;
+      pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Update the interaction counter */
+      accumulate_add_ll(&gparts_i[i].num_interacted, multi_j->m_pole.num_gpart);
+#endif
+
+#ifdef SWIFT_GRAVITY_FORCE_CHECKS
+      /* Update the M2P interaction counter and forces. */
+      accumulate_add_ll(&gparts_i[i].num_interacted_m2p,
+                        multi_j->m_pole.num_gpart);
+      gparts_i[i].a_grav_m2p[0] += f_x;
+      gparts_i[i].a_grav_m2p[1] += f_y;
+      gparts_i[i].a_grav_m2p[2] += f_z;
+#endif
+
+    } else {
+
+      /* Loop over source particles */
+      for (int j = 0; j < gcount_j; ++j) {
+
+        const struct gpart *gpj = &gparts_j[j];
+
+        /* Ignore inhibited particles */
+        if (gpart_is_inhibited(gpj, e)) continue;
+
+        /* Get info about j */
+        const float x_j = gpj->x[0];
+        const float y_j = gpj->x[1];
+        const float z_j = gpj->x[2];
+        const float mass_j = gpj->mass;
+        const float h_j = gravity_get_softening(gpj, grav_props);
+
+        /* Compute the pairwise distance.
+           Note: no need for box wrap here! This is non-periodic */
+        const float dx = x_j - x_i;
+        const float dy = y_j - y_i;
+        const float dz = z_j - z_i;
+
+        const float r2 = dx * dx + dy * dy + dz * dz;
+
+        /* Pick the maximal softening length of i and j */
+        const float h = max(h_i, h_j);
+        const float h2 = h * h;
+        const float h_inv = 1.f / h;
+        const float h_inv_3 = h_inv * h_inv * h_inv;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (r2 == 0.f && h2 == 0.)
+          error("Interacting particles with 0 distance and 0 softening.");
+
+        /* Check that particles have been drifted to the current time */
+        if (gpj->ti_drift != e->ti_current)
+          error("gpj not drifted to current time");
+#endif
+
+        /* Interact! */
+        float f_ij, pot_ij;
+        runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij,
+                                 &pot_ij);
+
+        /* Store it back */
+        a_x += f_ij * dx;
+        a_y += f_ij * dy;
+        a_z += f_ij * dz;
+        pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Update the interaction counter */
+        accumulate_inc_ll(&gparts_i[i].num_interacted);
+#endif
+
+#ifdef SWIFT_GRAVITY_FORCE_CHECKS
+        /* Update the p2p interaction counter */
+        accumulate_inc_ll(&gparts_i[i].num_interacted_p2p);
+        gparts_i[i].a_grav_p2p[0] += a_x;
+        gparts_i[i].a_grav_p2p[1] += a_y;
+        gparts_i[i].a_grav_p2p[2] += a_z;
+#endif
+      }
+    }
+    /* Store everything back in cache */
+    cache_i->a_x[i] += a_x;
+    cache_i->a_y[i] += a_y;
+    cache_i->a_z[i] += a_z;
+    cache_i->pot[i] += pot;
+  }
+
+    /* Write back to the particle data */
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+  lock_lock(&ci->grav.plock);
+#endif
+  gravity_cache_write_back(cache_i, ci->grav.parts, gcount_i);
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+  if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell");
+#endif
+}
+
+/**
+ * @brief Compute the long-range truncated gravitational forces from particles
+ * one array onto the particles in another array
+ *
+ * This function *must* be called at the leaf level for particles i.
+ *
+ * @param gparts_i The particles receiving forces (at leaf level).
+ * @param gcount_i The number of particles receiving forces.
+ * @param gparts_j The particles giving forces (at any level).
+ * @param gcount_j The number of particles giving forces.
+ * @param dim The size of the computational domain.
+ * @param e The @engine structure.
+ * @param grav_props The properties of the gravity scheme.
+ * @param cache_i The gravity cache to use to store the results in i.
+ * @param ci The (leaf-)cell containing the particles i.
+ * @param multi_j The multipole in cell j.
+ */
+static INLINE void runner_dopair_grav_pp_truncated_no_cache(
+    struct gpart *restrict gparts_i, const int gcount_i,
+    const struct gpart *restrict gparts_j, const int gcount_j,
+    const float dim[3], const struct engine *e,
+    const struct gravity_props *grav_props, struct gravity_cache *cache_i,
+    struct cell *ci, const struct gravity_tensors *multi_j) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!e->s->periodic)
+    error("Calling truncated PP function in non-periodic setup.");
+
+  if (ci->split) error("Using function above leaf level!");
+#endif
+
+  const float r_s_inv = grav_props->r_s_inv;
+
+  /* Prepare the i cache */
+  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
+  gravity_cache_zero_output(cache_i, gcount_padded_i);
+
+  /* Loop over sink particles */
+  for (int i = 0; i < gcount_i; ++i) {
+
+    struct gpart *gpi = &gparts_i[i];
+
+    /* Ignore inactive particles */
+    if (!gpart_is_active(gpi, e)) continue;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Check that particles have been drifted to the current time */
+    if (gpi->ti_drift != e->ti_current)
+      error("gpi not drifted to current time");
+
+    /* Check that the particle was initialised */
+    if (gpi->initialised == 0)
+      error("Adding forces to an un-initialised gpart.");
+#endif
+
+    const float x_i = gpi->x[0];
+    const float y_i = gpi->x[1];
+    const float z_i = gpi->x[2];
+    const float h_i = gravity_get_softening(gpi, grav_props);
+
+    /* Local accumulators for the acceleration and potential */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
+
+    /* Now, we can start the interactions for that particle */
+
+    /* Distance to the Multipole */
+    const float CoM_j[3] = {multi_j->CoM[0], multi_j->CoM[1], multi_j->CoM[2]};
+    float dx_multi = CoM_j[0] - x_i;
+    float dy_multi = CoM_j[1] - y_i;
+    float dz_multi = CoM_j[2] - z_i;
+
+    /* Apply periodic BCs */
+    dx_multi = nearestf(dx_multi, dim[0]);
+    dy_multi = nearestf(dy_multi, dim[1]);
+    dz_multi = nearestf(dz_multi, dim[2]);
+
+    const float r2_multi =
+        dx_multi * dx_multi + dy_multi * dy_multi + dz_multi * dz_multi;
+
+    /* Can we use the Mulipole here? */
+    if (gcount_j > 1 && gravity_M2P_accept(grav_props, gpi, multi_j, r2_multi,
+                                           /*periodic=*/1)) {
+
+      const float h_inv_i = 1.f / h_i;
+
+      /* Interact! */
+      float f_x, f_y, f_z, pot_ij;
+      runner_iact_grav_pm_truncated(dx_multi, dy_multi, dz_multi, r2_multi, h_i,
+                                    h_inv_i, r_s_inv, &multi_j->m_pole, &f_x,
+                                    &f_y, &f_z, &pot_ij);
+
+      /* Store it back */
+      a_x += f_x;
+      a_y += f_y;
+      a_z += f_z;
+      pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Update the interaction counter */
+      accumulate_add_ll(&gparts_i[i].num_interacted, multi_j->m_pole.num_gpart);
+#endif
+
+#ifdef SWIFT_GRAVITY_FORCE_CHECKS
+      /* Update the M2P interaction counter and forces. */
+      accumulate_add_ll(&gparts_i[i].num_interacted_m2p,
+                        multi_j->m_pole.num_gpart);
+      gparts_i[i].a_grav_m2p[0] += f_x;
+      gparts_i[i].a_grav_m2p[1] += f_y;
+      gparts_i[i].a_grav_m2p[2] += f_z;
+#endif
+
+    } else {
+
+      /* Loop over source particles */
+      for (int j = 0; j < gcount_j; ++j) {
+
+        const struct gpart *gpj = &gparts_j[j];
+
+        /* Ignore inhibited particles */
+        if (gpart_is_inhibited(gpj, e)) continue;
+
+        /* Get info about j */
+        const float x_j = gpj->x[0];
+        const float y_j = gpj->x[1];
+        const float z_j = gpj->x[2];
+        const float mass_j = gpj->mass;
+        const float h_j = gravity_get_softening(gpj, grav_props);
+
+        /* Compute the pairwise distance.
+           Note: no need for box wrap here! This is non-periodic */
+        float dx = x_j - x_i;
+        float dy = y_j - y_i;
+        float dz = z_j - z_i;
+
+        /* Correct for periodic BCs */
+        dx = nearestf(dx, dim[0]);
+        dy = nearestf(dy, dim[1]);
+        dz = nearestf(dz, dim[2]);
+
+        const float r2 = dx * dx + dy * dy + dz * dz;
+
+        /* Pick the maximal softening length of i and j */
+        const float h = max(h_i, h_j);
+        const float h2 = h * h;
+        const float h_inv = 1.f / h;
+        const float h_inv_3 = h_inv * h_inv * h_inv;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (r2 == 0.f && h2 == 0.)
+          error("Interacting particles with 0 distance and 0 softening.");
+
+        /* Check that particles have been drifted to the current time */
+        if (gpj->ti_drift != e->ti_current)
+          error("gpj not drifted to current time");
+#endif
+
+        /* Interact! */
+        float f_ij, pot_ij;
+        runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv,
+                                      &f_ij, &pot_ij);
+
+        /* Store it back */
+        a_x += f_ij * dx;
+        a_y += f_ij * dy;
+        a_z += f_ij * dz;
+        pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Update the interaction counter */
+        accumulate_inc_ll(&gparts_i[i].num_interacted);
+#endif
+
+#ifdef SWIFT_GRAVITY_FORCE_CHECKS
+        /* Update the p2p interaction counter */
+        accumulate_inc_ll(&gparts_i[i].num_interacted_p2p);
+        gparts_i[i].a_grav_p2p[0] += a_x;
+        gparts_i[i].a_grav_p2p[1] += a_y;
+        gparts_i[i].a_grav_p2p[2] += a_z;
+#endif
+      }
+    }
+
+    /* Store everything back in cache */
+    cache_i->a_x[i] += a_x;
+    cache_i->a_y[i] += a_y;
+    cache_i->a_z[i] += a_z;
+    cache_i->pot[i] += pot;
+  }
+
+    /* Write back to the particle data */
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+  lock_lock(&ci->grav.plock);
+#endif
+  gravity_cache_write_back(cache_i, ci->grav.parts, gcount_i);
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+  if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell");
+#endif
+}
+
 /**
  * @brief Compute the non-truncated gravity interactions between all particles
  * of a cell and the particles of the other cell.
@@ -281,9 +672,9 @@ static INLINE void runner_dopair_grav_pp_full(
     ci_cache->pot[pid] += pot;
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-    accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x);
-    accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y);
-    accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z);
+    gparts_i[pid].a_grav_p2p[0] += a_x;
+    gparts_i[pid].a_grav_p2p[1] += a_y;
+    gparts_i[pid].a_grav_p2p[2] += a_z;
 #endif
   }
 }
@@ -437,9 +828,9 @@ static INLINE void runner_dopair_grav_pp_truncated(
     ci_cache->pot[pid] += pot;
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-    accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x);
-    accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y);
-    accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z);
+    gparts_i[pid].a_grav_p2p[0] += a_x;
+    gparts_i[pid].a_grav_p2p[1] += a_y;
+    gparts_i[pid].a_grav_p2p[2] += a_z;
 #endif
   }
 }
@@ -488,7 +879,12 @@ static INLINE void runner_dopair_grav_pm_full(
                             SWIFT_CACHE_ALIGNMENT);
   swift_assume_size(gcount_padded_i, VEC_SIZE);
 
+  const float multi_epsilon = multi_j->max_softening;
+
   /* Loop over all particles in ci... */
+#ifndef SWIFT_DEBUG_CHECKS
+#pragma omp simd
+#endif
   for (int pid = 0; pid < gcount_padded_i; pid++) {
 
     /* Skip inactive particles */
@@ -521,7 +917,7 @@ static INLINE void runner_dopair_grav_pm_full(
     const float z_i = z[pid];
 
     /* Some powers of the softening length */
-    const float h_i = epsilon[pid];
+    const float h_i = max(epsilon[pid], multi_epsilon);
     const float h_inv_i = 1.f / h_i;
 
     /* Distance to the Multipole */
@@ -539,16 +935,9 @@ static INLINE void runner_dopair_grav_pm_full(
     const float r2 = dx * dx + dy * dy + dz * dz;
 
 #ifdef SWIFT_DEBUG_CHECKS
-    const float r_max_j = cj->grav.multipole->r_max;
-    const float r_max2 = r_max_j * r_max_j;
-    const float theta_crit2 = e->gravity_properties->theta_crit2;
-
-    /* Note: 0.99 and 1.1 to avoid FP rounding false-positives */
-    if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i))
-      error(
-          "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e "
-          "%e], rmax=%e r=%e epsilon=%e",
-          CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j, sqrtf(r2), h_i);
+    if (!gravity_M2P_accept(e->gravity_properties, &gparts_i[pid],
+                            cj->grav.multipole, r2 * 1.01, periodic))
+      error("use_mpole[i] set when M2P accept fails");
 #endif
 
     /* Interact! */
@@ -574,9 +963,9 @@ static INLINE void runner_dopair_grav_pm_full(
     if (pid < gcount_i) {
       accumulate_add_ll(&gparts_i[pid].num_interacted_m2p,
                         cj->grav.multipole->m_pole.num_gpart);
-      accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x);
-      accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y);
-      accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z);
+      gparts_i[pid].a_grav_m2p[0] += f_x;
+      gparts_i[pid].a_grav_m2p[1] += f_y;
+      gparts_i[pid].a_grav_m2p[2] += f_z;
     }
 #endif
   }
@@ -633,7 +1022,12 @@ static INLINE void runner_dopair_grav_pm_truncated(
                             SWIFT_CACHE_ALIGNMENT);
   swift_assume_size(gcount_padded_i, VEC_SIZE);
 
+  const float multi_epsilon = multi_j->max_softening;
+
   /* Loop over all particles in ci... */
+#ifndef SWIFT_DEBUG_CHECKS
+#pragma omp simd
+#endif
   for (int pid = 0; pid < gcount_padded_i; pid++) {
 
     /* Skip inactive particles */
@@ -666,7 +1060,7 @@ static INLINE void runner_dopair_grav_pm_truncated(
     const float z_i = z[pid];
 
     /* Some powers of the softening length */
-    const float h_i = epsilon[pid];
+    const float h_i = max(epsilon[pid], multi_epsilon);
     const float h_inv_i = 1.f / h_i;
 
     /* Distance to the Multipole */
@@ -682,16 +1076,9 @@ static INLINE void runner_dopair_grav_pm_truncated(
     const float r2 = dx * dx + dy * dy + dz * dz;
 
 #ifdef SWIFT_DEBUG_CHECKS
-    const float r_max_j = cj->grav.multipole->r_max;
-    const float r_max2 = r_max_j * r_max_j;
-    const float theta_crit2 = e->gravity_properties->theta_crit2;
-
-    /* 0.99 and 1.1 to avoid FP rounding false-positives */
-    if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i))
-      error(
-          "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e "
-          "%e], rmax=%e",
-          CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j);
+    if (!gravity_M2P_accept(e->gravity_properties, &gparts_i[pid],
+                            cj->grav.multipole, r2 * 1.01, /*periodic=*/1))
+      error("use_mpole[i] set when M2P accept fails");
 #endif
 
     /* Interact! */
@@ -717,9 +1104,9 @@ static INLINE void runner_dopair_grav_pm_truncated(
     if (pid < gcount_i) {
       accumulate_add_ll(&gparts_i[pid].num_interacted_m2p,
                         cj->grav.multipole->m_pole.num_gpart);
-      accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x);
-      accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y);
-      accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z);
+      gparts_i[pid].a_grav_m2p[0] += f_x;
+      gparts_i[pid].a_grav_m2p[1] += f_y;
+      gparts_i[pid].a_grav_m2p[2] += f_z;
     }
 #endif
   }
@@ -742,7 +1129,7 @@ static INLINE void runner_dopair_grav_pm_truncated(
  * @param ci The first #cell.
  * @param cj The other #cell.
  * @param symmetric Are we updating both cells (1) or just ci (0) ?
- * @param allow_mpole Are we allowing the use of P2M interactions ?
+ * @param allow_mpole Are we allowing the use of M2P interactions ?
  */
 void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
                            const int symmetric, const int allow_mpole) {
@@ -767,6 +1154,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
   if (!ci_active && !cj_active) return;
   if (!ci_active && !symmetric) return;
 
+#ifdef SWIFT_DEBUG_CHECKS
   /* Check that we are not doing something stupid */
   if (ci->split || cj->split) error("Running P-P on splitable cells");
 
@@ -777,6 +1165,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
     error("Un-drifted multipole");
   if (ci_active && cj->grav.ti_old_multipole != e->ti_current)
     error("Un-drifted multipole");
+#endif
 
   /* Caches to play with */
   struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
@@ -789,8 +1178,6 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
   /* Recover the multipole info and shift the CoM locations */
   const float rmax_i = ci->grav.multipole->r_max;
   const float rmax_j = cj->grav.multipole->r_max;
-  const float rmax2_i = rmax_i * rmax_i;
-  const float rmax2_j = rmax_j * rmax_j;
   const struct multipole *multi_i = &ci->grav.multipole->m_pole;
   const struct multipole *multi_j = &cj->grav.multipole->m_pole;
   const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]),
@@ -815,13 +1202,18 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
           gcount_j);
 #endif
 
+  const int allow_multipole_i = allow_mpole && ci->grav.count > 1;
+  const int allow_multipole_j = allow_mpole && cj->grav.count > 1;
+
   /* Fill the caches */
-  gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim,
+  gravity_cache_populate(e->max_active_bin, allow_multipole_j, periodic, dim,
                          ci_cache, ci->grav.parts, gcount_i, gcount_padded_i,
-                         shift_i, CoM_j, rmax2_j, ci, e->gravity_properties);
-  gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim,
+                         shift_i, CoM_j, cj->grav.multipole, ci,
+                         e->gravity_properties);
+  gravity_cache_populate(e->max_active_bin, allow_multipole_i, periodic, dim,
                          cj_cache, cj->grav.parts, gcount_j, gcount_padded_j,
-                         shift_j, CoM_i, rmax2_i, cj, e->gravity_properties);
+                         shift_j, CoM_i, ci->grav.multipole, cj,
+                         e->gravity_properties);
 
   /* Can we use the Newtonian version or do we need the truncated one ? */
   if (!periodic) {
@@ -837,7 +1229,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
                                  ci->grav.parts, cj->grav.parts);
 
       /* Then the M2P */
-      if (allow_mpole)
+      if (allow_multipole_j)
         runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
                                    periodic, dim, e, ci->grav.parts, gcount_i,
                                    cj);
@@ -850,7 +1242,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
                                  cj->grav.parts, ci->grav.parts);
 
       /* Then the M2P */
-      if (allow_mpole)
+      if (allow_multipole_i)
         runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i,
                                    periodic, dim, e, cj->grav.parts, gcount_j,
                                    ci);
@@ -859,8 +1251,14 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
   } else { /* Periodic BC */
 
     /* Get the relative distance between the CoMs */
-    const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1],
-                          CoM_j[2] - CoM_i[2]};
+    double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1],
+                    CoM_j[2] - CoM_i[2]};
+
+    /* Correct for periodic BCs */
+    dx[0] = nearestf(dx[0], dim[0]);
+    dx[1] = nearestf(dx[1], dim[1]);
+    dx[2] = nearestf(dx[2], dim[2]);
+
     const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
 
     /* Get the maximal distance between any two particles */
@@ -880,7 +1278,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
                                         ci->grav.parts, cj->grav.parts);
 
         /* Then the M2P */
-        if (allow_mpole)
+        if (allow_multipole_j)
           runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j,
                                           multi_j, dim, r_s_inv, e,
                                           ci->grav.parts, gcount_i, cj);
@@ -893,7 +1291,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
                                         cj->grav.parts, ci->grav.parts);
 
         /* Then the M2P */
-        if (allow_mpole)
+        if (allow_multipole_i)
           runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i,
                                           multi_i, dim, r_s_inv, e,
                                           cj->grav.parts, gcount_j, ci);
@@ -912,7 +1310,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
                                    ci->grav.parts, cj->grav.parts);
 
         /* Then the M2P */
-        if (allow_mpole)
+        if (allow_multipole_j)
           runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
                                      periodic, dim, e, ci->grav.parts, gcount_i,
                                      cj);
@@ -925,7 +1323,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
                                    cj->grav.parts, ci->grav.parts);
 
         /* Then the M2P */
-        if (allow_mpole)
+        if (allow_multipole_i)
           runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i,
                                      periodic, dim, e, cj->grav.parts, gcount_j,
                                      ci);
@@ -933,14 +1331,89 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
     }
   }
 
-  /* Write back to the particles */
-  if (ci_active) gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i);
-  if (cj_active && symmetric)
+  /* Write back to the particles in ci */
+  if (ci_active) {
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+    lock_lock(&ci->grav.plock);
+#endif
+    gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i);
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+    if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell");
+#endif
+  }
+
+  /* Write back to the particles in cj */
+  if (cj_active && symmetric) {
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+    lock_lock(&cj->grav.plock);
+#endif
     gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j);
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+    if (lock_unlock(&cj->grav.plock) != 0) error("Error unlocking cell");
+#endif
+  }
 
   TIMER_TOC(timer_dopair_grav_pp);
 }
 
+/**
+ * @brief Compute the gravitational forces from particles in #cell cj onto
+ * particles in #cell ci without using a cache for cj.
+ *
+ * This function does not update the particles in cj. It also does not
+ * make use of the field tensors in ci.
+ * The function recurses to the leaf level in ci (not cj!) and then either uses
+ * M2P or P2P when too close.
+ *
+ * @param r The #runner object.
+ * @param ci The cell containing particles to update.
+ * @param cj The cell containing the particles sourcing the gravity.
+ */
+void runner_dopair_grav_pp_no_cache(struct runner *r, struct cell *restrict ci,
+                                    const struct cell *restrict cj) {
+
+  /* Recover some useful constants */
+  const struct engine *e = r->e;
+  const int periodic = e->mesh->periodic;
+  const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1],
+                        (float)e->mesh->dim[2]};
+
+  /* Record activity status */
+  const int ci_active =
+      cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID);
+
+  /* Anything to do here? */
+  if (!ci_active) return;
+  if (ci->grav.count == 0 || cj->grav.count == 0) return;
+
+  /* Recurse? */
+  if (ci->split) {
+
+    for (int k = 0; k < 8; ++k) {
+      if (ci->progeny[k] != NULL) {
+        runner_dopair_grav_pp_no_cache(r, ci->progeny[k], cj);
+      }
+    }
+
+  } else {
+
+    /* Can we use the Newtonian version or do we need the truncated one ? */
+    if (!periodic) {
+
+      runner_dopair_grav_pp_full_no_cache(
+          ci->grav.parts, ci->grav.count, cj->grav.parts, cj->grav.count, e,
+          e->gravity_properties, &r->ci_gravity_cache, ci, cj->grav.multipole);
+
+    } else {
+
+      runner_dopair_grav_pp_truncated_no_cache(
+          ci->grav.parts, ci->grav.count, cj->grav.parts, cj->grav.count, dim,
+          e, e->gravity_properties, &r->ci_gravity_cache, ci,
+          cj->grav.multipole);
+    }
+  }
+}
+
 /**
  * @brief Compute the non-truncated gravity interactions between all particles
  * of a cell and the particles of the other cell.
@@ -1062,9 +1535,9 @@ static INLINE void runner_doself_grav_pp_full(
     ci_cache->pot[pid] += pot;
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-    accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x);
-    accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y);
-    accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z);
+    gparts[pid].a_grav_p2p[0] += a_x;
+    gparts[pid].a_grav_p2p[1] += a_y;
+    gparts[pid].a_grav_p2p[2] += a_z;
 #endif
   }
 }
@@ -1201,9 +1674,9 @@ static INLINE void runner_doself_grav_pp_truncated(
     ci_cache->pot[pid] += pot;
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-    accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x);
-    accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y);
-    accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z);
+    gparts[pid].a_grav_p2p[0] += a_x;
+    gparts[pid].a_grav_p2p[1] += a_y;
+    gparts[pid].a_grav_p2p[2] += a_z;
 #endif
   }
 }
@@ -1295,8 +1768,14 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) {
     }
   }
 
-  /* Write back to the particles */
+    /* Write back to the particles */
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+  lock_lock(&c->grav.plock);
+#endif
   gravity_cache_write_back(ci_cache, c->grav.parts, gcount);
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+  if (lock_unlock(&c->grav.plock) != 0) error("Error unlocking cell");
+#endif
 
   TIMER_TOC(timer_doself_grav_pp);
 }
@@ -1571,15 +2050,19 @@ void runner_dopair_recursive_grav_pm(struct runner *r, struct cell *ci,
 
     /* Recover the multipole info and the CoM locations */
     const struct multipole *multi_j = &cj->grav.multipole->m_pole;
-    const float r_max = cj->grav.multipole->r_max;
     const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]),
                             (float)(cj->grav.multipole->CoM[1]),
                             (float)(cj->grav.multipole->CoM[2])};
 
+#ifdef SWIFT_DEBUG_CHECKS
+    if (cj->grav.count == 1)
+      error("Constructing cache for M2P interaction with multipole of size 0!");
+#endif
+
     /* Fill the cache */
     gravity_cache_populate_all_mpole(
         e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i,
-        gcount_padded_i, ci, CoM_j, r_max * r_max, e->gravity_properties);
+        gcount_padded_i, ci, CoM_j, cj->grav.multipole, e->gravity_properties);
 
     /* Can we use the Newtonian version or do we need the truncated one ? */
     if (!periodic) {
@@ -1595,8 +2078,14 @@ void runner_dopair_recursive_grav_pm(struct runner *r, struct cell *ci,
                                       cj);
     }
 
-    /* Write back to the particles */
+      /* Write back to the particles */
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+    lock_lock(&ci->grav.plock);
+#endif
     gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i);
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+    if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell");
+#endif
   }
 }
 
@@ -1616,14 +2105,13 @@ void runner_dopair_recursive_grav_pm(struct runner *r, struct cell *ci,
  * @param gettimer Are we timing this ?
  */
 void runner_dopair_recursive_grav(struct runner *r, struct cell *ci,
-                                  struct cell *cj, int gettimer) {
+                                  struct cell *cj, const int gettimer) {
 
   /* Some constants */
   const struct engine *e = r->e;
   const int nodeID = e->nodeID;
   const int periodic = e->mesh->periodic;
   const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
-  const double theta_crit2 = e->gravity_properties->theta_crit2;
   const double max_distance = e->mesh->r_cut_max;
 
   /* Anything to do here? */
@@ -1700,18 +2188,25 @@ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci,
   /* OK, we actually need to compute this pair. Let's find the cheapest
    * option... */
 
-  /* Can we use M-M interactions ? */
-  if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2,
-                         multi_i->m_pole.max_softening,
-                         multi_j->m_pole.max_softening)) {
+  if (ci->grav.count <= 1 || cj->grav.count <= 1) {
+
+    /* We have two cheap cells. Go P-P. */
+    runner_dopair_grav_pp_no_cache(r, ci, cj);
+    runner_dopair_grav_pp_no_cache(r, cj, ci);
+
+    /* Can we use M-M interactions ? */
+  } else if (gravity_M2L_accept_symmetric(e->gravity_properties, multi_i,
+                                          multi_j, r2,
+                                          /* use_rebuild_sizes=*/0, periodic)) {
 
     /* Go M-M */
     runner_dopair_grav_mm(r, ci, cj);
 
+    /* Did we reach the bottom? */
   } else if (!ci->split && !cj->split) {
 
     /* We have two leaves. Go P-P. */
-    runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles*/ 1);
+    runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles=*/1);
 
   } else {
 
@@ -1783,7 +2278,7 @@ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci,
  * @param gettimer Are we timing this ?
  */
 void runner_doself_recursive_grav(struct runner *r, struct cell *c,
-                                  int gettimer) {
+                                  const int gettimer) {
 
   /* Some constants */
   const struct engine *e = r->e;
@@ -1834,13 +2329,13 @@ void runner_doself_recursive_grav(struct runner *r, struct cell *c,
  * @param ci The #cell of interest.
  * @param timer Are we timing this ?
  */
-void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
+void runner_do_grav_long_range(struct runner *r, struct cell *ci,
+                               const int timer) {
 
   /* Some constants */
   const struct engine *e = r->e;
   const int periodic = e->mesh->periodic;
   const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
-  const double theta_crit2 = e->gravity_properties->theta_crit2;
   const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max;
 
   TIMER_TIC;
@@ -1866,12 +2361,6 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
   struct cell *top = ci;
   while (top->parent != NULL) top = top->parent;
 
-  /* Recover the top-level multipole (for distance checks) */
-  struct gravity_tensors *const multi_top = top->grav.multipole;
-  const double CoM_rebuild_top[3] = {multi_top->CoM_rebuild[0],
-                                     multi_top->CoM_rebuild[1],
-                                     multi_top->CoM_rebuild[2]};
-
   /* Loop over all the top-level cells and go for a M-M interaction if
    * well-separated */
   for (int n = 0; n < nr_cells_with_particles; ++n) {
@@ -1916,24 +2405,8 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
       }
     }
 
-    /* Get the distance between the CoMs at the last rebuild*/
-    double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0];
-    double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1];
-    double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2];
-
-    /* Apply BC */
-    if (periodic) {
-      dx_r = nearest(dx_r, dim[0]);
-      dy_r = nearest(dy_r, dim[1]);
-      dz_r = nearest(dz_r, dim[2]);
-    }
-    const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r;
-
-    /* Are we in charge of this cell pair? */
-    if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild,
-                           theta_crit2, r2_rebuild,
-                           multi_top->m_pole.max_softening,
-                           multi_j->m_pole.max_softening)) {
+    if (cell_can_use_pair_mm(top, cj, e, e->s, /*use_rebuild_data=*/1,
+                             /*is_tree_walk=*/0)) {
 
       /* Call the PM interaction fucntion on the active sub-cells of ci */
       runner_dopair_grav_mm_nonsym(r, ci, cj);
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 34f3e9ec147574357620cc8f485889b87880f06e..1afa2e0f5df900aea498a859b9672625bde773e7 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -27,6 +27,9 @@ struct cell;
 
 void runner_do_grav_down(struct runner *r, struct cell *c, int timer);
 
+void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
+                           const int symmetric, const int allow_mpole);
+
 void runner_doself_recursive_grav(struct runner *r, struct cell *c,
                                   int gettimer);
 
diff --git a/src/runner_others.c b/src/runner_others.c
index 4db77738670e66c5e072bdfbf247e7cfbfadfc53..6f0c4ac9e8c078285714fe89f9dce7deb6b7a001 100644
--- a/src/runner_others.c
+++ b/src/runner_others.c
@@ -130,8 +130,14 @@ void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) {
       if (c->progeny[k] != NULL) runner_do_grav_mesh(r, c->progeny[k], 0);
   } else {
 
-    /* Get the forces from the gravity mesh */
+  /* Get the forces from the gravity mesh */
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+    lock_lock(&c->grav.plock);
+#endif
     pm_mesh_interpolate_forces(e->mesh, e, gparts, gcount);
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+    if (lock_unlock(&c->grav.plock) != 0) error("Error unlocking cell");
+#endif
   }
 
   if (timer) TIMER_TOC(timer_dograv_mesh);
diff --git a/src/runner_time_integration.c b/src/runner_time_integration.c
index edb307442cfae0d3fc466706fc3c2f3ab024521f..d966800fc989e6553de3f0fb8b7c4760a1c98bb8 100644
--- a/src/runner_time_integration.c
+++ b/src/runner_time_integration.c
@@ -32,6 +32,7 @@
 #include "engine.h"
 #include "feedback.h"
 #include "kick.h"
+#include "multipole.h"
 #include "timers.h"
 #include "timestep.h"
 #include "timestep_limiter.h"
diff --git a/src/scheduler.c b/src/scheduler.c
index df6f908a3f620d9d6abb3c9e07d179d2d30b1f96..bb380e5a51f478fb53762e805ef1bbbdb0fceec7 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -872,8 +872,10 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
               for (int j = 0; j < 8; j++) {
                 if (cj->progeny[j] != NULL) {
                   /* Can we use a M-M interaction here? */
-                  if (cell_can_use_pair_mm_rebuild(ci->progeny[i],
-                                                   cj->progeny[j], e, sp)) {
+                  if (cell_can_use_pair_mm(ci->progeny[i], cj->progeny[j], e,
+                                           sp, /*use_rebuild_data=*/1,
+                                           /*is_tree_walk=*/1)) {
+
                     /* Flag this pair as being treated by the M-M task.
                      * We use the 64 bits in the task->flags field to store
                      * this information. The corresponding taks will unpack
diff --git a/src/space.c b/src/space.c
index 5bdf075f40bb9c7e3a18da3330e7ec095b9be212..156d34372ffed29e0608b30bd16513771d915087 100644
--- a/src/space.c
+++ b/src/space.c
@@ -3661,10 +3661,8 @@ void space_split_recursive(struct space *s, struct cell *c,
       c->grav.multipole->CoM_rebuild[1] = c->grav.multipole->CoM[1];
       c->grav.multipole->CoM_rebuild[2] = c->grav.multipole->CoM[2];
 
-      /* We know the first-order multipole (dipole) is 0. */
-      c->grav.multipole->m_pole.M_100 = 0.f;
-      c->grav.multipole->m_pole.M_010 = 0.f;
-      c->grav.multipole->m_pole.M_001 = 0.f;
+      /* Compute the multipole power */
+      gravity_multipole_compute_power(&c->grav.multipole->m_pole);
 
     } /* Deal with gravity */
   }   /* Split or let it be? */
@@ -3803,6 +3801,9 @@ void space_split_recursive(struct space *s, struct cell *c,
         gravity_P2M(c->grav.multipole, c->grav.parts, c->grav.count,
                     e->gravity_properties);
 
+        /* Compute the multipole power */
+        gravity_multipole_compute_power(&c->grav.multipole->m_pole);
+
       } else {
 
         /* No gparts in that leaf cell */
diff --git a/src/task.c b/src/task.c
index 7844be685292463eb0e240bd58b1fa8ba061edbe..70741af01bbf869ec22b1f5fd8ac5c076784db2a 100644
--- a/src/task.c
+++ b/src/task.c
@@ -443,7 +443,6 @@ void task_unlock(struct task *t) {
       break;
 
     case task_type_drift_gpart:
-    case task_type_grav_mesh:
     case task_type_end_grav_force:
       cell_gunlocktree(ci);
       break;
@@ -538,6 +537,12 @@ void task_unlock(struct task *t) {
 #endif
       break;
 
+    case task_type_grav_mesh:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+      cell_gunlocktree(ci);
+#endif
+      break;
+
     case task_type_star_formation:
       cell_unlocktree(ci);
       cell_sunlocktree(ci);
@@ -623,7 +628,6 @@ int task_lock(struct task *t) {
 
     case task_type_drift_gpart:
     case task_type_end_grav_force:
-    case task_type_grav_mesh:
       if (ci->grav.phold) return 0;
       if (cell_glocktree(ci) != 0) return 0;
       break;
@@ -800,6 +804,14 @@ int task_lock(struct task *t) {
 #endif
       break;
 
+    case task_type_grav_mesh:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+      /* Lock the gparts */
+      if (ci->grav.phold) return 0;
+      if (cell_glocktree(ci) != 0) return 0;
+#endif
+      break;
+
     case task_type_star_formation:
       /* Lock the gas, gravity and star particles */
       if (ci->hydro.hold || ci->stars.hold || ci->grav.phold) return 0;
diff --git a/tests/Makefile.am b/tests/Makefile.am
index e24a2a69b07d9ebf8ff10f367f7eaf0724817f7c..347de263b05836f2579739f3666833736b64968f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -15,35 +15,35 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Add the source directory and the non-standard paths to the included library headers to CFLAGS
-AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS)
+AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(OPENMP_CFLAGS)
 
 AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS) $(NUMA_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(GSL_LIBS) $(PROFILER_LIBS)
 
 # List of programs and scripts to run in the test suite
-TESTS = testGreetings testMaths testReading.sh testKernel \
-        testActivePair.sh test27cells.sh test27cellsPerturbed.sh  \
+TESTS = testGreetings testMaths testReading.sh testKernel testKernelLongGrav \
+        testActivePair.sh test27cells.sh test27cellsPerturbed.sh testExp \
         testParser.sh test125cells.sh test125cellsPerturbed.sh testFFT \
-        testAdiabaticIndex testRandom testRandomSpacing \
+        testAdiabaticIndex testRandom testRandomSpacing testErfc \
         testMatrixInversion testThreadpool testDump testLogger testInteractions.sh \
         testVoronoi1D testVoronoi2D testVoronoi3D testGravityDerivatives \
 	testPeriodicBC.sh testPeriodicBCPerturbed.sh testPotentialSelf \
 	testPotentialPair testEOS testUtilities testSelectOutput.sh \
 	testCbrt testCosmology testOutputList testFormat.sh \
 	test27cellsStars.sh test27cellsStarsPerturbed.sh testHydroMPIrules \
-        testAtomic
+        testAtomic testGravitySpeed
 
 # List of test programs to compile
-check_PROGRAMS = testGreetings testReading testTimeIntegration \
+check_PROGRAMS = testGreetings testReading testTimeIntegration testKernelLongGrav \
 		 testActivePair test27cells test27cells_subset test125cells testParser \
-                 testKernel testFFT testInteractions testMaths testRandom \
-                 testSymmetry testThreadpool testRandomSpacing \
+                 testKernel testFFT testInteractions testMaths testRandom testExp \
+                 testSymmetry testThreadpool testRandomSpacing testErfc \
                  testAdiabaticIndex testRiemannExact testRiemannTRRS \
                  testRiemannHLLC testMatrixInversion testDump testLogger \
 		 testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \
 		 testGravityDerivatives testPotentialSelf testPotentialPair testEOS testUtilities \
 		 testSelectOutput testCbrt testCosmology testOutputList test27cellsStars \
 		 test27cellsStars_subset testCooling testComovingCooling testFeedback testHashmap \
-                 testAtomic testHydroMPIrules
+                 testAtomic testHydroMPIrules testGravitySpeed
 
 # Rebuild tests when SWIFT is updated.
 $(check_PROGRAMS): ../src/.libs/libswiftsim.a
@@ -96,6 +96,8 @@ testParser_SOURCES = testParser.c
 
 testKernel_SOURCES = testKernel.c
 
+testKernelLongGrav_SOURCES = testKernelLongGrav.c
+
 testFFT_SOURCES = testFFT.c
 
 testInteractions_SOURCES = testInteractions.c
@@ -122,8 +124,14 @@ testDump_SOURCES = testDump.c
 
 testLogger_SOURCES = testLogger.c
 
+testExp_SOURCES = testExp.c
+
+testErfc_SOURCES = testErfc.c
+
 testGravityDerivatives_SOURCES = testGravityDerivatives.c
 
+testGravitySpeed_SOURCES = testGravitySpeed.c
+
 testPotentialSelf_SOURCES = testPotentialSelf.c
 
 testPotentialPair_SOURCES = testPotentialPair.c
diff --git a/tests/testErfc.c b/tests/testErfc.c
new file mode 100644
index 0000000000000000000000000000000000000000..052b7ec0bea4a32cc15816b8c1558043e0383341
--- /dev/null
+++ b/tests/testErfc.c
@@ -0,0 +1,97 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "../config.h"
+
+#include "swift.h"
+
+/* Standard includes */
+#include <fenv.h>
+#include <math.h>
+
+/**
+ * Compute erfcf(u) using eq. 7.1.26 of
+ * Abramowitz & Stegun, 1972.
+ *
+ * This has a *relative* error of less than 4e-3 over
+ * the range of interest (0 < x < 5)
+ */
+float optimized_erfcf(const float x) {
+
+  const float x2 = x * x;
+  const float exp_x2 = expf(-x2);
+
+  const float t = 1.f / (1.f + 0.3275911f * x);
+
+  const float a1 = 0.254829592f;
+  const float a2 = -0.284496736f;
+  const float a3 = 1.421413741f;
+  const float a4 = -1.453152027;
+  const float a5 = 1.061405429f;
+
+  /* a1 * t + a2 * t^2 + a3 * t^3 + a4 * t^4 + a5 * t^5 */
+  float a = a5 * t + a4;
+  a = a * t + a3;
+  a = a * t + a2;
+  a = a * t + a1;
+  a = a * t;
+
+  return a * exp_x2;
+}
+
+/**
+ * @brief Check that a and b are consistent (up to some relative error)
+ *
+ * @param a First value
+ * @param b Second value
+ * @param s String used to identify this check in messages
+ */
+void check_value(const double a, const double b, const double rel_tol,
+                 const double abs_tol, const double x) {
+
+  if (fabs(a - b) / fabs(a + b) > rel_tol)
+    error("Values are inconsistent: %12.15e %12.15e rel=%e (for x=%e).", a, b,
+          fabs(a - b) / fabs(a + b), x);
+  if (fabs(a - b) > abs_tol)
+    error("Values are inconsistent: %12.15e %12.15e abs=%e (for x=%e).", a, b,
+          fabs(a - b), x);
+}
+
+int main(int argc, char* argv[]) {
+
+  /* Initialize CPU frequency, this also starts time. */
+  unsigned long long cpufreq = 0;
+  clocks_set_cpufreq(cpufreq);
+
+/* Choke on FPEs */
+#ifdef HAVE_FE_ENABLE_EXCEPT
+  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+#endif
+
+  /* Loop over some values */
+  for (float x = 0.f; x < 5.f; x += 0.000001f) {
+
+    const double exact = erfc(x);
+    const double swift_erfcf = optimized_erfcf(x);
+
+    check_value(exact, swift_erfcf, 3.358e-3, 6e-7, x);
+  }
+
+  return 0;
+}
diff --git a/tests/testExp.c b/tests/testExp.c
new file mode 100644
index 0000000000000000000000000000000000000000..01a65b8ccc677e5be9627ba40fe71c0bfbc6bb70
--- /dev/null
+++ b/tests/testExp.c
@@ -0,0 +1,70 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "../config.h"
+
+#include "swift.h"
+
+/* Standard includes */
+#include <fenv.h>
+#include <math.h>
+
+/**
+ * @brief Check that a and b are consistent (up to some relative error)
+ *
+ * @param a First value
+ * @param b Second value
+ * @param s String used to identify this check in messages
+ */
+void check_value(double a, double b, const double tol, const double x) {
+  if (fabs(a - b) / fabs(a + b) > tol)
+    error("Values are inconsistent: %12.15e %12.15e rel=%e (for x=%e).", a, b,
+          fabs(a - b) / fabs(a + b), x);
+}
+
+int main(int argc, char* argv[]) {
+
+  /* Initialize CPU frequency, this also starts time. */
+  unsigned long long cpufreq = 0;
+  clocks_set_cpufreq(cpufreq);
+
+/* Choke on FPEs */
+#ifdef HAVE_FE_ENABLE_EXCEPT
+  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+#endif
+
+  /* Get some randomness going */
+  const int seed = time(NULL);
+  message("Seed = %d", seed);
+  srand(seed);
+
+  /* Loop over some values */
+  for (float x = 0.; x < 32.; x += 0.000001) {
+
+    const double exact_p = exp(x);
+    const double exact_n = exp(-x);
+    const double swift_exp_p = optimized_expf(x);
+    const double swift_exp_n = optimized_expf(-x);
+
+    check_value(exact_p, swift_exp_p, 1.618e-6, x);
+    check_value(exact_n, swift_exp_n, 1.618e-6, x);
+  }
+
+  return 0;
+}
diff --git a/tests/testGravitySpeed.c b/tests/testGravitySpeed.c
new file mode 100644
index 0000000000000000000000000000000000000000..dc5e8bc2530fbad5c4616f4a006bed3627614139
--- /dev/null
+++ b/tests/testGravitySpeed.c
@@ -0,0 +1,326 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include "../config.h"
+
+/* Some standard headers. */
+#include <fenv.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/* Local headers. */
+#include "runner_doiact_grav.h"
+#include "swift.h"
+
+const int num_M2L_runs = 1 << 23;
+const int num_M2P_runs = 1 << 23;
+const int num_PP_runs = 1;  // << 8;
+
+void make_cell(struct cell *c, int N, const double loc[3], double width,
+               int id_base, const struct gravity_props *grav_props) {
+
+  bzero(c, sizeof(struct cell));
+
+  /* Start by setting the basics */
+  c->loc[0] = loc[0];
+  c->loc[1] = loc[1];
+  c->loc[2] = loc[2];
+  c->width[0] = width;
+  c->width[1] = width;
+  c->width[2] = width;
+
+  /* Initialise the locks */
+  lock_init(&c->grav.plock);
+  lock_init(&c->grav.mlock);
+
+  /* Set the time bins */
+  c->grav.ti_end_min = 1;
+  c->grav.ti_end_max = 1;
+  c->grav.ti_beg_max = 1;
+  c->grav.ti_old_part = 1;
+  c->grav.ti_old_multipole = 1;
+
+  /* Create the particles */
+  c->grav.count = N;
+  c->grav.count_total = N;
+  c->grav.parts = malloc(N * sizeof(struct gpart));
+  bzero(c->grav.parts, N * sizeof(struct gpart));
+  for (int i = 0.; i < N; ++i) {
+
+    c->grav.parts[i].id_or_neg_offset = id_base + i;
+    c->grav.parts[i].x[0] = loc[0] + width * rand() / ((double)RAND_MAX);
+    c->grav.parts[i].x[1] = loc[1] + width * rand() / ((double)RAND_MAX);
+    c->grav.parts[i].x[2] = loc[2] + width * rand() / ((double)RAND_MAX);
+    c->grav.parts[i].mass = 1.;
+    c->grav.parts[i].type = swift_type_dark_matter;
+    c->grav.parts[i].time_bin = 1;
+  }
+
+  /* Create the multipoles */
+  c->grav.multipole = malloc(sizeof(struct gravity_tensors));
+  gravity_reset(c->grav.multipole);
+  gravity_P2M(c->grav.multipole, c->grav.parts, N, grav_props);
+  gravity_multipole_compute_power(&c->grav.multipole->m_pole);
+}
+
+int main(int argc, char *argv[]) {
+
+  /* Initialize CPU frequency, this also starts time. */
+  unsigned long long cpufreq = 0;
+  clocks_set_cpufreq(cpufreq);
+
+  /* Choke on FPEs */
+#ifdef HAVE_FE_ENABLE_EXCEPT
+  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+#endif
+
+  /* Get some randomness going */
+  const int seed = time(NULL);
+  message("Seed = %d", seed);
+  srand(seed);
+
+  /* Construct gravity properties */
+  struct gravity_props grav_props;
+  bzero(&grav_props, sizeof(struct gravity_props));
+  grav_props.use_advanced_MAC = 1;
+  grav_props.use_adaptive_tolerance = 1;
+  grav_props.adaptive_tolerance = 1e-4;
+  grav_props.theta_crit = 0.5;
+  grav_props.G_Newton = 1.;
+  grav_props.mesh_size = 64;
+  grav_props.a_smooth = 1.25;
+
+  /* Space properites */
+  const double dim[3] = {100., 100., 100.};
+  const double r_s = grav_props.a_smooth * dim[0] / grav_props.mesh_size;
+  const double r_s_inv = 1. / r_s;
+
+  /* Mesh structure */
+  struct pm_mesh mesh;
+  mesh.periodic = 0;
+  mesh.dim[0] = dim[0];
+  mesh.dim[1] = dim[1];
+  mesh.dim[2] = dim[2];
+
+  /* Construct an engine */
+  struct engine e;
+  e.mesh = &mesh;
+  e.max_active_bin = 56;
+
+  /* Construct a runner */
+  struct runner r;
+  r.e = &e;
+
+  /* Construct two cells */
+  struct cell ci;
+  struct cell cj;
+  const double loc_i[3] = {0., 0., 0.};
+  const double loc_j[3] = {1., 1., 1.};
+  const int num_particles = 8;
+  make_cell(&ci, num_particles, loc_i, 1., 0, &grav_props);
+  make_cell(&cj, num_particles, loc_j, 1., num_particles, &grav_props);
+
+  message("Number of runs: %d", num_M2L_runs);
+
+  /* Construct arrays of multipoles to prevent too much optimization */
+  struct gravity_tensors *tensors_i =
+      malloc(num_M2L_runs * sizeof(struct gravity_tensors));
+  struct gravity_tensors *tensors_j =
+      malloc(num_M2L_runs * sizeof(struct gravity_tensors));
+  for (int n = 0; n < num_M2L_runs; ++n) {
+
+    memcpy(&tensors_i[n], ci.grav.multipole, sizeof(struct gravity_tensors));
+    memcpy(&tensors_j[n], cj.grav.multipole, sizeof(struct gravity_tensors));
+
+    /* Move the values a bit to prevent optimization in the actual loops */
+    tensors_i[n].CoM[0] += rand() / ((double)RAND_MAX);
+    tensors_i[n].CoM[1] += rand() / ((double)RAND_MAX);
+    tensors_i[n].CoM[1] += rand() / ((double)RAND_MAX);
+
+    tensors_j[n].CoM[0] += rand() / ((double)RAND_MAX);
+    tensors_j[n].CoM[1] += rand() / ((double)RAND_MAX);
+    tensors_j[n].CoM[1] += rand() / ((double)RAND_MAX);
+
+    tensors_i[n].m_pole.M_000 += rand() / ((double)RAND_MAX);
+    tensors_j[n].m_pole.M_000 += rand() / ((double)RAND_MAX);
+
+#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
+    tensors_i[n].m_pole.M_200 += rand() / ((double)RAND_MAX);
+    tensors_i[n].m_pole.M_020 += rand() / ((double)RAND_MAX);
+    tensors_i[n].m_pole.M_002 += rand() / ((double)RAND_MAX);
+
+    tensors_j[n].m_pole.M_200 += rand() / ((double)RAND_MAX);
+    tensors_j[n].m_pole.M_020 += rand() / ((double)RAND_MAX);
+    tensors_j[n].m_pole.M_002 += rand() / ((double)RAND_MAX);
+#endif
+  }
+
+  /* Now run a series of M2L kernels */
+
+  /********
+   * Symmetric non-periodic M2L
+   ********/
+  ticks tic = getticks();
+  for (int n = 0; n < num_M2L_runs; ++n) {
+
+    gravity_M2L_symmetric(&tensors_i[n].pot,     //
+                          &tensors_j[n].pot,     //
+                          &tensors_i[n].m_pole,  //
+                          &tensors_j[n].m_pole,  //
+                          tensors_i[n].CoM,      //
+                          tensors_j[n].CoM,      //
+                          &grav_props, /* periodic=*/0, dim, r_s_inv);
+  }
+  ticks toc = getticks();
+  message("%30s at order %d took %4d %s.", "Symmetric non-periodic M2L",
+          SELF_GRAVITY_MULTIPOLE_ORDER,
+          (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns");
+
+  /********
+   * Symmetric periodic M2L
+   ********/
+  tic = getticks();
+  for (int n = 0; n < num_M2L_runs; ++n) {
+
+    gravity_M2L_symmetric(&tensors_i[n].pot,     //
+                          &tensors_j[n].pot,     //
+                          &tensors_i[n].m_pole,  //
+                          &tensors_j[n].m_pole,  //
+                          tensors_i[n].CoM,      //
+                          tensors_j[n].CoM,      //
+                          &grav_props, /* periodic=*/1, dim, r_s_inv);
+  }
+  toc = getticks();
+  message("%30s at order %d took %4d %s.", "Symmetric periodic M2L",
+          SELF_GRAVITY_MULTIPOLE_ORDER,
+          (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns");
+
+  /********
+   * Non-symmetric non-periodic M2L
+   ********/
+  tic = getticks();
+  for (int n = 0; n < num_M2L_runs; ++n) {
+
+    gravity_M2L_nonsym(&tensors_i[n].pot,     //
+                       &tensors_j[n].m_pole,  //
+                       tensors_i[n].CoM,      //
+                       tensors_j[n].CoM,      //
+                       &grav_props, /* periodic=*/0, dim, r_s_inv);
+  }
+  toc = getticks();
+  message("%30s at order %d took %4d %s.", "Non-symmetric non-periodic M2L",
+          SELF_GRAVITY_MULTIPOLE_ORDER,
+          (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns");
+
+  /********
+   * Non-symmetric periodic M2L
+   ********/
+  tic = getticks();
+  for (int n = 0; n < num_M2L_runs; ++n) {
+
+    gravity_M2L_nonsym(&tensors_i[n].pot,     //
+                       &tensors_j[n].m_pole,  //
+                       tensors_i[n].CoM,      //
+                       tensors_j[n].CoM,      //
+                       &grav_props, /* periodic=*/1, dim, r_s_inv);
+  }
+  toc = getticks();
+  message("%30s at order %d took %4d %s.", "Non-symmetric periodic M2L",
+          SELF_GRAVITY_MULTIPOLE_ORDER,
+          (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns");
+
+  /* Now run a series of M2L kernels */
+
+  /********
+   * Non-periodic M2P
+   ********/
+  tic = getticks();
+  for (int n = 0; n < num_M2P_runs; ++n) {
+
+    const int index = n % num_particles;
+
+    const float r_x = tensors_j[n].CoM[0] - ci.grav.parts[index].x[0];
+    const float r_y = tensors_j[n].CoM[1] - ci.grav.parts[index].x[1];
+    const float r_z = tensors_j[n].CoM[2] - ci.grav.parts[index].x[2];
+    const float r2 = r_x * r_x + r_y * r_y + r_z * r_z;
+    const float eps = gravity_get_softening(&ci.grav.parts[index], &grav_props);
+
+    struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f};
+    gravity_M2P(&tensors_j[n].m_pole, r_x, r_y, r_z, r2, eps,
+                /*periodic=*/0, r_s_inv, &l);
+
+    ci.grav.parts[index].a_grav[0] += l.F_100;
+    ci.grav.parts[index].a_grav[1] += l.F_010;
+    ci.grav.parts[index].a_grav[2] += l.F_001;
+  }
+  toc = getticks();
+  message("%30s at order %d took %4d %s.", "Non-periodic M2P",
+          SELF_GRAVITY_MULTIPOLE_ORDER,
+          (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2P_runs), "ns");
+
+  /********
+   * Periodic M2P
+   ********/
+  tic = getticks();
+  for (int n = 0; n < num_M2P_runs; ++n) {
+
+    const int index = n % num_particles;
+
+    const float r_x = tensors_j[n].CoM[0] - ci.grav.parts[index].x[0];
+    const float r_y = tensors_j[n].CoM[1] - ci.grav.parts[index].x[1];
+    const float r_z = tensors_j[n].CoM[2] - ci.grav.parts[index].x[2];
+    const float r2 = r_x * r_x + r_y * r_y + r_z * r_z;
+    const float eps = gravity_get_softening(&ci.grav.parts[index], &grav_props);
+
+    struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f};
+    gravity_M2P(&tensors_j[n].m_pole, r_x, r_y, r_z, r2, eps,
+                /*periodic=*/1, r_s_inv, &l);
+
+    ci.grav.parts[index].a_grav[0] += l.F_100;
+    ci.grav.parts[index].a_grav[1] += l.F_010;
+    ci.grav.parts[index].a_grav[2] += l.F_001;
+  }
+  toc = getticks();
+  message("%30s at order %d took %4d %s.", "Periodic M2P",
+          SELF_GRAVITY_MULTIPOLE_ORDER,
+          (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2P_runs), "ns");
+
+  /* Print out to avoid optimization */
+  // gravity_field_tensors_print(&ci.grav.multipole->pot);
+  // gravity_field_tensors_print(&cj.grav.multipole->pot);
+
+  tic = getticks();
+  for (int n = 0; n < num_PP_runs; ++n) {
+    runner_dopair_grav_pp(&r, &ci, &cj, 1, 0);
+  }
+  toc = getticks();
+  message("%30s at order %d took %4d %s.", "dopair_grav (no mpole)",
+          SELF_GRAVITY_MULTIPOLE_ORDER,
+          (int)(1e6 * clocks_from_ticks(toc - tic) / num_PP_runs), "ns");
+
+  tic = getticks();
+  runner_dopair_grav_pp(&r, &ci, &cj, 1, 1);
+  toc = getticks();
+  message("%30s at order %d took %4d %s.", "dopair_grav (mpole)",
+          SELF_GRAVITY_MULTIPOLE_ORDER,
+          (int)(1e6 * clocks_from_ticks(toc - tic) / num_PP_runs), "ns");
+
+  return 0;
+}
diff --git a/tests/testKernelLongGrav.c b/tests/testKernelLongGrav.c
new file mode 100644
index 0000000000000000000000000000000000000000..cda2df9c2a5f7c670fb033120495dd37634e662d
--- /dev/null
+++ b/tests/testKernelLongGrav.c
@@ -0,0 +1,118 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "../config.h"
+
+#include "swift.h"
+
+/* Standard includes */
+#include <fenv.h>
+#include <math.h>
+
+const int num_tests = 1 << 10;
+
+/**
+ * @brief Check that a and b are consistent (up to some relative error)
+ *
+ * @param a First value
+ * @param b Second value
+ * @param s String used to identify this check in messages
+ */
+void check_value(double a, double b, const char* s, const double tol,
+                 const double r, const double r_s) {
+  if (fabs(a - b) / fabs(a + b) > tol)
+    error(
+        "Values are inconsistent: %12.15e %12.15e rel=%e (%s for r_s=%e "
+        "r/r_s=%e)!",
+        a, b, fabs(a - b) / fabs(a + b), s, r_s, r / r_s);
+}
+
+int main(int argc, char* argv[]) {
+
+  /* Initialize CPU frequency, this also starts time. */
+  unsigned long long cpufreq = 0;
+  clocks_set_cpufreq(cpufreq);
+
+/* Choke on FPEs */
+#ifdef HAVE_FE_ENABLE_EXCEPT
+  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+#endif
+
+  /* Get some randomness going */
+  const int seed = time(NULL);
+  message("Seed = %d", seed);
+  srand(seed);
+
+  for (int n = 0; n < num_tests; ++n) {
+
+    const double r_s = exp10(4. * rand() / ((double)RAND_MAX) - 2.);
+    const double r_s_inv = 1.f / r_s;
+
+    // message("Testing r_s=%e", r_s);
+
+    /* Loop over some radii */
+    for (double i = -4; i < 1; i += 0.001) {
+
+      /* Get a radius in the relevant range */
+      const double r = exp10(i) * r_s;
+
+      if (r > 5. * r_s) break;
+
+      /* Compute the SWIFT expressions */
+      struct chi_derivatives chi_swift;
+      kernel_long_grav_derivatives((float)r, (float)r_s_inv, &chi_swift);
+
+      /* Compute the exact expressions */
+      const double one_over_sqrt_pi = M_2_SQRTPI * 0.5;
+      const double u = 0.5 * r / r_s;
+      const double C = one_over_sqrt_pi * exp(-u * u);
+
+      const double chi_0 = erfc(u);
+      const double chi_1 = -C / r_s;
+      const double chi_2 = C * 0.5 * r * pow(r_s, -3.);
+      const double chi_3 = C * 0.25 * (2. * r_s * r_s - r * r) * pow(r_s, -5.);
+      const double chi_4 =
+          C * 0.125 * (r * r * r - 6. * r_s * r_s * r) * pow(r_s, -7.);
+      const double chi_5 =
+          C * 0.0625 *
+          (12. * pow(r_s, 4.) - 12. * r_s * r_s * r * r + pow(r, 4.)) *
+          pow(r_s, -9.);
+
+      check_value(chi_swift.chi_0, chi_0, "chi_0", 3.4e-3, r, r_s);
+      check_value(chi_swift.chi_1, chi_1, "chi_1", 1e-5, r, r_s);
+      check_value(chi_swift.chi_2, chi_2, "chi_2", 1e-5, r, r_s);
+      check_value(chi_swift.chi_3, chi_3, "chi_3", 1e-4, r, r_s);
+      check_value(chi_swift.chi_4, chi_4, "chi_4", 4e-4, r, r_s);
+      check_value(chi_swift.chi_5, chi_5, "chi_5", 4e-4, r, r_s);
+
+      /* Compute the expression for individual particles */
+      float swift_corr_f_lr, swift_corr_pot_lr;
+      kernel_long_grav_eval(r / r_s, &swift_corr_f_lr, &swift_corr_pot_lr);
+
+      /* And the exact ones */
+      const double corr_pot = erfc(u);
+      const double corr_f = erfc(u) + M_2_SQRTPI * u * exp(-u * u);
+
+      check_value(swift_corr_pot_lr, corr_pot, "corr_pot", 3.4e-3, r, r_s);
+      check_value(swift_corr_f_lr, corr_f, "corr_f", 2.4e-4, r, r_s);
+    }
+  }
+
+  return 0;
+}
diff --git a/tests/testPotentialPair.c b/tests/testPotentialPair.c
index d5fbda36a9ef79352f79627b5cef908030401da2..866d248c7ffe8ddbb735017977fffe8aa1ae40ee 100644
--- a/tests/testPotentialPair.c
+++ b/tests/testPotentialPair.c
@@ -124,7 +124,7 @@ int main(int argc, char *argv[]) {
   e.mesh = &mesh;
 
   struct gravity_props props;
-  props.theta_crit2 = 0.;
+  props.theta_crit = 0.;
   props.epsilon_DM_cur = eps;
   props.epsilon_baryon_cur = eps;
   e.gravity_properties = &props;
@@ -261,7 +261,7 @@ int main(int argc, char *argv[]) {
   /**********************************/
 
   /* Set an opening angle that allows P-M interactions */
-  props.theta_crit2 = 1.;
+  props.theta_crit = 1.;
 
   ci.grav.parts[0].mass = 0.;
   ci.grav.multipole->CoM[0] = 0.;
diff --git a/theory/Multipoles/fmm_mac.tex b/theory/Multipoles/fmm_mac.tex
new file mode 100644
index 0000000000000000000000000000000000000000..0396246fa9911af1eba4a9642c1b140d96b131e8
--- /dev/null
+++ b/theory/Multipoles/fmm_mac.tex
@@ -0,0 +1,213 @@
+\subsection{The multipole acceptance criterion}
+
+The main remaining question is to decide when two cells are far enough from
+each others that the truncated Taylor expansion used as approximation for
+the potential (eq. \ref{eq:fmm:expansion}) is accurate enough. The
+criterion used to make that decision is called the \emph{multipole
+  acceptance criterion} (MAC). \\
+We know that (\ref{eq:fmm:expansion}) is converging towards the correct
+answer provided $1>|\mathbf{r}_a + \mathbf{r}_b| / |\mathbf{R}|$. This is
+hence the most basic (and always necessary) MAC that can be designed. If
+this ratio is lower, the accuracy (at a fixed expansion order) is improved
+and it is hence common practice to define a critical \emph{opening angle}
+$\theta_{\rm cr}$ and allow the use of the multipole approximation between
+two cells if
+
+\begin{equation}
+  \theta_{\rm cr} > \frac{\rho_A + \rho_B} {|\mathbf{R}|}.
+  \label{eq:fmm:angle}
+\end{equation}
+This lets users have a second handle on the accuracy on the gravity
+calculation besides the much more involved change in the expansion order
+$p$ of the FMM method. Typical values for the opening angle are in the
+range $[0.3, 0.7]$, with the cost of the simulation growing as $\theta_{\rm
+  cr}$ decreases. \\
+This method has the drawback of using a uniform criterion across the entire
+simulation volume and time evolution, which means that the chosen value of
+$\theta_{\rm cr}$ could be too small in some regions (leading to too many
+operations for the expected accuracy) and too large in some other other
+ones (leading to a lower level of accuracy than expected). \swift instead
+uses a more adaptive criterion to decide when the multipole approximation
+can be used. This is based on the error analysis of FMM by
+\cite{Dehnen2014} and is summarised below for completeness. The key idea is
+to exploit the additional information about the distribution of particles
+that is encoded in the higher-order multipole terms.\\
+We start by defining the scalar quantity $P_{A,n}$, the
+\emph{power} of the multipole of order $n$ of the particles in cell $A$,
+via
+\begin{equation}
+  P_{A,n}^2 = \sum_{|\mathbf{m}|=n} \frac{\mathbf{m}!}{|\mathbf{m}|!}\mathsf{M}_{A,\mathbf{m}}^2,
+\end{equation}
+where the sum runs over all the multipole terms of order $n$ in the
+cell\footnote{Note that $P_{0} \equiv \mathsf{M}_{(0,0,0)}$ is
+  just the mass of the cell and since \swift uses the centre of mass as the
+  centre of expansion of the multipoles, $P_{1} = 0$.}. This
+quantity is a simple upper bound for the amplitude of the multipole
+($\mathsf{M}_{A, \mathbf{m}} < P_{A,|\mathbf{m}|}/|\mathbf{m}|!$)
+and can hence be used to estimate the importance of the terms of a given
+order in the Taylor series of the potential. Following \cite{Dehnen2014} we
+then consider a sink cell $A$ and a source cell $B$ (figure \ref{fig:fmm:cells}) for which we evaluate
+at order $p$ the scalar
+\begin{equation}
+  E_{BA,p} = \frac{1}{M_B|\mathbf{R}|^p} \sum_{n=0}^p \binom{p}{n} P_{B,n}
+  \rho_A^{p-n},
+  \label{eq:fmm:e_ab}
+\end{equation}
+with $M_B \equiv \mathsf{M}_{B,(0,0,0)}$, the sum of the mass of the
+particles in cell $B$. Note that since $P_{B,n} \leq M_B
+\rho_B^n$, we have $E_{BA, p} \leq \left((\rho_A +
+\rho_B)/|\mathbf{R}|\right)^p$, where the right-hand side is the
+expression used in the basic opening angle condition
+(\ref{eq:fmm:angle}). We finally scale the $E_{BA,p}$'s by the relative
+size of the two cells to define the error estimator $\tilde{E}_{BA,p}$:
+\begin{equation}
+  \tilde{E}_{BA,p} = 8\frac{\max(\rho_A, \rho_B)}{\rho_A + \rho_B}E_{BA,p}.
+  \label{eq:fmm:e_ab_tilde}
+\end{equation}
+As shown by \cite{Dehnen2014}, these quantities are excellent estimators of
+the error made in computing the accelerations between two cells using the
+M2L and M2P kernels at a given order. We can hence use this property to
+design a new MAC by demanding that the estimated acceleration error is no
+larger than a certain fraction of the smallest acceleration in the sink
+cell $A$. This means we can use the FMM approximation between to
+approximate the accelerations in cell $A$ due to the particles in cell $B$ if
+\begin{equation}
+  \tilde{E}_{BA,p} \frac{M_B}{|\mathbf{R}|^2} < \epsilon_{\rm FMM} \min_{a\in
+    A}\left(|\mathbf{a}_a|\right) \quad \rm{and} \quad \frac{\rho_A +
+    \rho_B} {|\mathbf{R}|} < 1,
+  \label{eq:fmm:mac}  
+\end{equation}
+where the $\mathbf{a}_a$ are the accelerations of the particles in cell $A$
+and $\epsilon_{\rm FMM}$ is a tolerance parameter. Since this is self-referencing
+(i.e. we need the accelerations to decide how to compute the
+accelerations), we need to use a an estimator of $|\mathbf{a}_a|$. In
+\swift, we follow the strategy used by \gadget and use the acceleration of
+the previous time-step\footnote{On the first time-step of a simulation this
+  value has not been computed yet. We hence run a fake 0th time-step with
+  the simpler MAC (eq. \ref{eq:fmm:angle}), which is good enough to obtain
+  approximations of the accelerations.}. The minimal norm of the
+acceleration in a given cell can be computed at the same time as the P2M
+and M2M kernels are evaluated in the tree construction phase. The second
+condition in (\ref{eq:fmm:mac}) is necessary to ensure the convergence of the
+Taylor expansion.\\
+One important difference between this criterion and the purely
+geometric one (\ref{eq:fmm:angle}) is that it is not symmetric in $A
+\leftrightarrow B$ (i.e. $E_{AB,p} \neq E_{BA,p}$). This implies that
+there are cases where a multipole in cell $A$ can be used to compute
+the field tensors in cell $B$ but the multipole in $B$ cannot be used
+to compute the $\mathsf{F}$ values of cell $A$ and vice versa. This
+affects the tree walk by breaking the symmetry and potentially leading
+to cells of different sizes interacting. \\
+For the M2P kernel, the sink is a single particle $a$ and hence
+$\rho_A = 0$, which simplifies some of the expressions above. In this
+case, at order $p$, we get:
+\begin{equation}
+  E_{BA,p} = \frac{P_{B,p}}{M_B |\mathbf{R}|^p}, \qquad
+  \tilde{E}_{BA,p} = 8E_{BA,p} \nonumber
+\end{equation}
+Note that, in this case, only the power term of the order of the
+scheme appears; not a sum over the lower-order ones. This leads to the
+following MAC for the M2P kernel:
+\begin{equation}
+  8\frac{P_{B,p}}{|\mathbf{R}|^{p+2}} < \epsilon_{\rm FMM} |\mathbf{a}_a| \quad
+  \rm{and} \quad \frac{\rho_B} {|\mathbf{R}|} < 1.
+    \label{eq:fmm:mac_m2p}  
+\end{equation}
+The value of $\epsilon_{\rm FMM}$ could in principle be different than the one
+used for the M2L MAC. One special case is of particular interest to
+link our expression to other results. Using the expression for order
+$2$ and the approximation $P_{B,p} \approx M_B \rho_B^p$, we
+get
+\begin{equation}
+  8\frac{M_B}{|\mathbf{R}|^2}\left(\frac{\rho_B}{|\mathbf{R}|}\right)^2
+  < \epsilon_{\rm FMM} |\mathbf{a}_a| \nonumber
+\end{equation}
+for our MAC.  This is the same expression as the adaptive opening
+angle used by \gadget \cite[see eq.18 of][]{Springel2005} up to
+numerical factors and definition of the size of a multipole ($\rho$
+vs. the cell edge). Note, however, that, in practice, since formally
+$P_{B,p} \leq M_B \rho_B^p$, the dependence is slightly
+different.\\
+We conclude this section by noting that whilst the derivation of the
+FMM equations and of the simple geometric MAC (eq. \ref{eq:fmm:angle})
+do not make any assumptions about the functional form of $\varphi(r)$,
+the more advanced MAC is valid in the specific case of the
+gravitational potential $\varphi(r) = m/r$ as can be inferred from the
+$m/r^2$ term appearing on the LHS of the criteria (\ref{eq:fmm:mac})
+and (\ref{eq:fmm:mac_m2p}).
+
+\subsubsection{Modifications for softened and truncated gravity}
+
+\begin{figure}
+\includegraphics[width=\columnwidth]{mac_potential.pdf}
+\caption{The gravitational forces $f_{\rm SWIFT}$ computed by SWIFT
+  (green line) including the force softening on the smallest scales
+  and the long-range periodic mesh truncation on the largest scales
+  for a simulation box of size $L$, a mesh scale-length $r_s$ and
+  Plummer-equivalent softening $\epsilon_{\rm Plummer}$. The
+  approximate fast estimator of the forces used in the MAC $f_{\rm
+    MAC}$ is shown using yellow dash-dotted lines. Note that, by
+  construction, $f_{\rm SWIFT} \leq f_{\rm MAC} \leq 1/r^2$ for all
+  distances $r$.}
+\label{fig:fmm:mac_potential}
+\end{figure}
+
+One drawback of using expression (\ref{eq:fmm:mac}) in the case of a
+softened potential (or a potential truncated to apply long-range
+forces from a mesh (Sec. \ref{ssec:mesh_summary}) is that the $M/R^2$
+term will overestimate the expected contribution from the multipole to
+the filed tensors, sometimes by large factors. This difference is
+shown on fig. \ref{fig:fmm:mac_potential}, with for instance a ratio
+of $3$ between the true forces and the Newtonian values reached a the
+scale of the Plummer softening. Using the simple expression
+(\ref{eq:fmm:mac}) will make the MAC too aggressive by preventing it
+from using a given multipole as it will be difficult to make the large
+term $M/R^2$ be below the fixed fraction $\epsilon_{\rm FMM}$ of the
+total acceleration of the receiving cell. This implies more
+computation as it will force the tree-walk algorithm to use more
+interactions by going to the daughter cells. The estimation of the
+contribution of the multipole in the MAC should hence be replaced by a
+more realistic term, closer to the one actually used in the
+interactions (eq. \ref{eq:fmm:force_norm}). In simulations with
+periodic boundary conditions, the same reasoning applies to the
+truncated force at the radii overlapping with the scale $r_s$ of the
+mesh forces.
+
+However, both the short- and long-range truncation functions are
+expensive to evaluate in the context of the MAC which is called a
+large number of times during a tree walk. We hence, construct a
+cheaper to evaluate estimator $f_{\rm MAC}$ that is closer to the true
+forces than the purely Newtonian term:
+\begin{align}
+f_{\rm MAC}(r) =
+\left\lbrace\begin{array}{rcl}
+  \left(\frac{9}{5}\right)^2 H^{-2} & \mbox{if} & r <
+  \frac{5}{9}H,\\
+  r^{-2} & \mbox{if} & \frac{5}{9}H \leq r < \frac{5}{3}r_s, \\
+  \left(\frac{5}{3}\right)^2 r_s^2 r^{-4} & \mbox{if} & \frac{5}{3}r_s \leq r. \\
+\end{array}
+\right.
+\label{eq:fmm:f_mac}
+\end{align}
+Since it is made of constants and even powers of the distance,
+computin this term is much cheaper than the true forces.  This
+esimator is shown as a dot-dashed line on
+Fig. \ref{fig:fmm:mac_potential} and obeys the relation $f_{\rm
+  SWIFT}(r) \leq f_{\rm MAC}(r) \leq 1/r^2$, with $f_{\rm SWIFT}(r)$
+being the true truncated and softened norm of the gravity forces the
+code solves for (green line). We use this expression in the multipole
+acceptance criterion instead of the $1/|\mathbf{R}|$ term:
+\begin{equation}
+  \tilde{E}_{BA,p} M_Bf_{\rm MAC}(|\mathbf{R}|) < \epsilon_{\rm FMM} \min_{a\in
+    A}\left(|\mathbf{a}_a|\right).
+  \label{eq:fmm:mac_f_mac}  
+\end{equation}
+The same change is applied to the MAC used for the M2P kernel
+(eq. \ref{eq:fmm:mac_m2p}). In the non-truncated un-softened case,
+their expressions reduce to the \citep{Dehnen2014} one. Using this
+$f_{\rm MAC}$ instead of the simpler purely-Newtonian one only makes a
+difference in simulations where a lot of particles cluster below the
+scale of the softening, which is often the case for hydrodynamical
+simulations including radiative cooling processes. The use of this
+term over the simpler $1/r^2$ estimator is a runtime parameter.
+
diff --git a/theory/Multipoles/fmm_standalone.tex b/theory/Multipoles/fmm_standalone.tex
index 1b597fa636650cd09469b9952f7a14bdf22ce35f..81509e1e78ae14ce144dcaff011a87d606c2ae2a 100644
--- a/theory/Multipoles/fmm_standalone.tex
+++ b/theory/Multipoles/fmm_standalone.tex
@@ -22,7 +22,6 @@
 \label{firstpage}
 
 \begin{abstract}
-Making gravity great again.
 \end{abstract}
 
 \begin{keywords}
@@ -35,6 +34,7 @@ Making gravity great again.
 \input{fmm_summary}
 %\input{gravity_derivatives}
 \input{mesh_summary}
+\input{fmm_mac}
 \input{exact_forces}
 
 \bibliographystyle{mnras}
diff --git a/theory/Multipoles/fmm_summary.tex b/theory/Multipoles/fmm_summary.tex
index c263c241673094756966ffc9e5eae7111dd843ea..f3d0ee6f05a8c1e3bd664386bdca25547813fbce 100644
--- a/theory/Multipoles/fmm_summary.tex
+++ b/theory/Multipoles/fmm_summary.tex
@@ -179,29 +179,49 @@ the other kernels:
   a_x(\mathbf{x}_a) &= G\sum_{\mathbf{m}}^p \mathsf{M}_{\mathbf{m}} \mathsf{D}_{\mathbf{m}+\left(1,0,0\right)}(\mathbf{R}).
   \label{eq:fmm:M2P}
 \end{align}
-A traditional tree-code uses solely that kernel to obtain the forces
-from the multipoles (or often just monopoles, i.e. setting $p=0$ throughout)
-to the particles.\\
-All the kernels (Eqs.~\ref{eq:fmm:P2M}-\ref{eq:fmm:M2P}) are rather
-straightforward to evaluate as they are only made of additions and
-multiplications (provided $\mathsf{D}$ can be evaluated quickly),
-which are extremely efficient instructions on modern architectures
-(see Appendix \ref{sec:pot_derivatives} for the full
-expressions). However, the fully expanded sums can lead to rather
-large and prone to typo expressions. To avoid any mishaps, we use a
-\texttt{python} script to generate C code in which all the sums are
-unrolled and correct by construction. In \swift, we implemented the
-kernels up to order $p=5$, as it proved to be accurate enough for our
-purpose, but this could be extended to higher order easily. This
-implies storing $56$ numbers per cell for each $\textsf{M}$ and
-$\textsf{F}$ plus three numbers for the location of the centre of
-mass. For leaf-cells with large numbers of particles, as in \swift,
-this is a small memory overhead. One further small improvement
-consists in choosing $\mathbf{z}_A$ to be the centre of mass of cell
-$A$ rather than its geometrical centre. The first order multipoles
+A traditional tree-code uses solely that kernel to obtain the forces from
+the multipoles (or often just monopoles, i.e. setting $p=0$ throughout) to
+the particles. Similarly, the field tensor of a cell can receive the
+contribution from a single particle at a distance $\mathbf{R} \equiv
+\mathbf{z}_A - \mathbf{x}_b$ via the P2L kernel:
+\begin{equation}
+  \mathsf{F}_{\mathbf{n}}(\mathbf{z}_A) = G m_b
+  \mathsf{D}_{\mathbf{n}}(\mathbf{R}).
+  \label{eq:fmm:P2L}
+\end{equation}
+The M2P and P2L kernels can be used to speed up the calculations
+involving only as single particle.  All the kernels
+(Eqs.~\ref{eq:fmm:P2M}-\ref{eq:fmm:P2L}) are rather straightforward to
+evaluate as they are only made of additions and multiplications
+(provided $\mathsf{D}$ can be evaluated quickly), which are extremely
+efficient instructions on modern architectures (see Appendix
+\ref{sec:pot_derivatives} for the full expressions). However, the
+fully expanded sums can lead to rather large and prone to typo
+expressions. To avoid any mishaps, we use a \texttt{python} script to
+generate C code in which all the sums are unrolled and correct by
+construction. In \swift, we implemented the kernels up to order $p=5$,
+as it proved to be accurate enough for our purpose, but this could be
+extended to higher order easily. This implies storing $56$ numbers per
+cell for each $\textsf{M}$ and $\textsf{F}$ plus three numbers for the
+location of the centre of mass. For leaf-cells with large numbers of
+particles, as in \swift, this is a small memory overhead. One further
+small improvement consists in choosing $\mathbf{z}_A$ to be the centre
+of mass of cell $A$ rather than its geometrical centre. The first
+order multipoles
 ($\mathsf{M}_{100},\mathsf{M}_{010},\mathsf{M}_{001}$) then vanish by
 construction. This allows us to simplify some of the expressions and
 helps reduce, albeit by a small fraction, the memory footprint of the
 tree structure.
 
-\subsubsection{The Multipole acceptance criterion}
+\subsubsection{Computing the accelerations via a tree-walk}
+
+We define the maximal distance between a centre of mass of a cell $B$ and
+any particle in that cell as
+
+\begin{equation}
+  \rho_B = \max_{b \in B}(|\mathbf{r}_b|)
+\end{equation}
+\\
+\textcolor{red}{MORE WORDS HERE}
+
+
diff --git a/theory/Multipoles/generate_multipoles/multipoles.py b/theory/Multipoles/generate_multipoles/multipoles.py
index ef263d09f22e0186bf3ae2e9572cb89cf156f8a0..d1fc6ad4db3d7e137f39a563ee5de1050fa01938 100644
--- a/theory/Multipoles/generate_multipoles/multipoles.py
+++ b/theory/Multipoles/generate_multipoles/multipoles.py
@@ -1,390 +1,496 @@
 import numpy as np
 import sys
 
+
 def factorial(x):
     if x == 0:
         return 1
     else:
-        return x * factorial(x-1)
+        return x * factorial(x - 1)
+
+
+SUFFIXES = {1: "st", 2: "nd", 3: "rd"}
+
 
-SUFFIXES = {1: 'st', 2: 'nd', 3: 'rd'}
 def ordinal(num):
-    suffix = SUFFIXES.get(num % 10, 'th')
+    suffix = SUFFIXES.get(num % 10, "th")
     return str(num) + suffix
 
+
 # Get the order
 order = int(sys.argv[1])
 
-print "-------------------------------------------------"
-print "Generating code for multipoles of order", order, "(only)."
-print "-------------------------------------------------\n"
+print("-------------------------------------------------")
+print("Generating code for multipoles of order", order, "(only).")
+print("-------------------------------------------------\n")
 
-print "-------------------------------------------------"
-print "Multipole structure:"
-print "-------------------------------------------------\n"
+print("-------------------------------------------------")
+print("Multipole structure:")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1)
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1))
+
+print("/* %s order terms */" % ordinal(order))
 
-print "/* %s order terms */"%ordinal(order)
-    
 # Create all the terms relevent for this order
-for i in range(order+1):
-    for j in range(order+1):
-        for k in range(order+1):
+for i in range(order + 1):
+    for j in range(order + 1):
+        for k in range(order + 1):
             if i + j + k == order:
-                print "float M_%d%d%d;"%(i,j,k)
+                print("float M_%d%d%d;" % (i, j, k))
 
 if order > 0:
-    print "#endif"
+    print("#endif")
 
-print ""
-print "-------------------------------------------------"
+print("")
+print("-------------------------------------------------")
 
-print "Field tensor structure:"
-print "-------------------------------------------------\n"
+print("Field tensor structure:")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1)
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1))
+
+print("/* %s order terms */" % ordinal(order))
 
-print "/* %s order terms */"%ordinal(order)
-    
 # Create all the terms relevent for this order
-for i in range(order+1):
-    for j in range(order+1):
-        for k in range(order+1):
+for i in range(order + 1):
+    for j in range(order + 1):
+        for k in range(order + 1):
             if i + j + k == order:
-                print "float F_%d%d%d;"%(i,j,k)
+                print("float F_%d%d%d;" % (i, j, k))
 if order > 0:
-    print "#endif"
+    print("#endif")
 
-print ""
-print "-------------------------------------------------"
+print("")
+print("-------------------------------------------------")
 
-print "gravity_field_tensors_add():"
-print "-------------------------------------------------\n"
+print("gravity_field_tensors_add():")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1)
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1))
+
+print("/* %s order terms */" % ordinal(order))
 
-print "/* %s order terms */"%ordinal(order)
-    
 # Create all the terms relevent for this order
-for i in range(order+1):
-    for j in range(order+1):
-        for k in range(order+1):
+for i in range(order + 1):
+    for j in range(order + 1):
+        for k in range(order + 1):
             if i + j + k == order:
-                print "la->F_%d%d%d += lb->F_%d%d%d;"%(i,j,k,i,j,k)
+                print("la->F_%d%d%d += lb->F_%d%d%d;" % (i, j, k, i, j, k))
 if order > 0:
-    print "#endif"
+    print("#endif")
 
-print ""
-print "-------------------------------------------------"
+print("")
+print("-------------------------------------------------")
 
-print "gravity_multipole_add():"
-print "-------------------------------------------------\n"
+print("gravity_multipole_add():")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1)
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1))
+
+print("/* %s order terms */" % ordinal(order))
 
-print "/* %s order terms */"%ordinal(order)
-    
 # Create all the terms relevent for this order
-for i in range(order+1):
-    for j in range(order+1):
-        for k in range(order+1):
+for i in range(order + 1):
+    for j in range(order + 1):
+        for k in range(order + 1):
             if i + j + k == order:
-                print "ma->M_%d%d%d += mb->M_%d%d%d;"%(i,j,k,i,j,k)
+                print("ma->M_%d%d%d += mb->M_%d%d%d;" % (i, j, k, i, j, k))
+
 if order > 0:
-    print "#endif"
+    print("#endif")
 
-print ""
-print "-------------------------------------------------"
+print("")
+print("-------------------------------------------------")
 
-print "gravity_multipole_equal():"
-print "-------------------------------------------------\n"
+print("gravity_multipole_equal():")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1)
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1))
 
 # Create all the terms relevent for this order
-print "/* Manhattan Norm of %s order terms */"%ordinal(order)
-print "const float order%d_norm = "%order,
+print("/* Manhattan Norm of %s order terms */" % ordinal(order))
+print("const float order%d_norm = " % order, end=" ")
 first = True
-for i in range(order+1):
-    for j in range(order+1):
-        for k in range(order+1):
+for i in range(order + 1):
+    for j in range(order + 1):
+        for k in range(order + 1):
             if i + j + k == order:
                 if first:
                     first = False
                 else:
-                    print "+",
-                print "fabsf(ma->M_%d%d%d)"%(i,j,k),
-                print "+ fabsf(mb->M_%d%d%d)"%(i,j,k),
-print ";\n"
-print "/* Compare %s order terms above 1%% of norm */"%ordinal(order)
-for i in range(order+1):
-    for j in range(order+1):
-        for k in range(order+1):
+                    print("+", end=" ")
+                print("fabsf(ma->M_%d%d%d)" % (i, j, k), end=" ")
+                print("+ fabsf(mb->M_%d%d%d)" % (i, j, k), end=" ")
+print(";\n")
+print("/* Compare %s order terms above 1%% of norm */" % ordinal(order))
+for i in range(order + 1):
+    for j in range(order + 1):
+        for k in range(order + 1):
             if i + j + k == order:
-                print "if (fabsf(ma->M_%d%d%d + mb->M_%d%d%d) > 0.01f * order%d_norm &&"%(i,j,k,i,j,k,order)
-                print "    fabsf(ma->M_%d%d%d - mb->M_%d%d%d) / fabsf(ma->M_%d%d%d + mb->M_%d%d%d) > tolerance) {"%(i,j,k,i,j,k,i,j,k,i,j,k)
-                print "  message(\"M_%d%d%d term different\");"%(i,j,k)
-                print "  return 0;"
-                print "}"
+                print(
+                    "if (fabsf(ma->M_%d%d%d + mb->M_%d%d%d) > 0.01f * order%d_norm &&"
+                    % (i, j, k, i, j, k, order)
+                )
+                print(
+                    "    fabsf(ma->M_%d%d%d - mb->M_%d%d%d) / fabsf(ma->M_%d%d%d + mb->M_%d%d%d) > tolerance) {"
+                    % (i, j, k, i, j, k, i, j, k, i, j, k)
+                )
+                print('  message("M_%d%d%d term different");' % (i, j, k))
+                print("  return 0;")
+                print("}")
+
+if order > 0:
+    print("#endif")
+
+
+print("")
+print("-------------------------------------------------")
+
+print("gravity_multipole_compute_power():")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#endif"
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1))
 
+print("/* %s order terms */" % ordinal(order))
 
-print ""
-print "-------------------------------------------------"
+# Add the terms to the multipole power
+for i in range(order + 1):
+    for j in range(order + 1):
+        for k in range(order + 1):
+            if i + j + k == order:
+                fact1 = factorial(i) * factorial(j) * factorial(k)
+                fact2 = float(factorial(i + j + k))
+                frac = fact1 / fact2
+                if frac == 1.0:
+                    print(
+                        "power[%d] += m->M_%d%d%d * m->M_%d%d%d;"
+                        % (order, i, j, k, i, j, k)
+                    )
+                else:
+                    print(
+                        "power[%d] += %12.15e * m->M_%d%d%d * m->M_%d%d%d;"
+                        % (order, frac, i, j, k, i, j, k)
+                    )
 
-print "gravity_P2M(): (loop)"
-print "-------------------------------------------------\n"
+print("")
+print("m->power[%d] = sqrt(power[%d]);" % (order, order))
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1)
+    print("#endif")
+
+print("")
+print("-------------------------------------------------")
+
+print("gravity_P2M(): (loop)")
+print("-------------------------------------------------\n")
+
+if order > 0:
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1))
+
+print("/* %s order terms */" % ordinal(order))
 
-print "/* %s order terms */"%ordinal(order)
-    
 # Create all the terms relevent for this order
-for i in range(order+1):
-    for j in range(order+1):
-        for k in range(order+1):
+for i in range(order + 1):
+    for j in range(order + 1):
+        for k in range(order + 1):
             if i + j + k == order:
                 if order % 2 == 0:
-                    print "M_%d%d%d += m * X_%d%d%d(dx);"%(i,j,k,i,j,k)
+                    print("M_%d%d%d += m * X_%d%d%d(dx);" % (i, j, k, i, j, k))
                 else:
-                    print "M_%d%d%d += -m * X_%d%d%d(dx);"%(i,j,k,i,j,k)
+                    print("M_%d%d%d += -m * X_%d%d%d(dx);" % (i, j, k, i, j, k))
+
 if order > 0:
-    print "#endif"
+    print("#endif")
 
-print ""
-print "-------------------------------------------------"
-    
-print "gravity_P2M(): (storing)"
-print "-------------------------------------------------\n"
+print("")
+print("-------------------------------------------------")
+
+print("gravity_P2M(): (storing)")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1)
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1))
+
+print("/* %s order terms */" % ordinal(order))
 
-print "/* %s order terms */"%ordinal(order)
-    
 # Create all the terms relevent for this order
-for i in range(order+1):
-    for j in range(order+1):
-        for k in range(order+1):
+for i in range(order + 1):
+    for j in range(order + 1):
+        for k in range(order + 1):
             if i + j + k == order:
-                print "m->m_pole.M_%d%d%d = M_%d%d%d;"%(i,j,k,i,j,k)
+                print("m->m_pole.M_%d%d%d = M_%d%d%d;" % (i, j, k, i, j, k))
+
 if order > 0:
-    print "#endif"
+    print("#endif")
 
 
-print ""
-print "-------------------------------------------------"
+print("")
+print("-------------------------------------------------")
 
-print "gravity_M2M():"
-print "-------------------------------------------------\n"
+print("gravity_M2M():")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1)
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1))
+
+print(
+    "/* Shift %s order terms (1st order mpole (all 0) commented out) */"
+    % ordinal(order)
+)
 
-print "/* Shift %s order terms */"%ordinal(order)
-    
 # Create all the terms relevent for this order
-for i in range(order+1):
-    for j in range(order+1):
-        for k in range(order+1):
+for i in range(order + 1):
+    for j in range(order + 1):
+        for k in range(order + 1):
             if i + j + k == order:
-                print "m_a->M_%d%d%d = m_b->M_%d%d%d"%(i,j,k,i,j,k),
-
-                for ii in range(order+1):
-                    for jj in range(order+1):
-                        for kk in range(order+1):
+                print("m_a->M_%d%d%d = m_b->M_%d%d%d" % (i, j, k, i, j, k), end=" ")
+
+                for ii in range(order + 1):
+                    for jj in range(order + 1):
+                        for kk in range(order + 1):
+
+                            if not (ii == 0 and jj == 0 and kk == 0):
+                                for iii in range(order + 1):
+                                    for jjj in range(order + 1):
+                                        for kkk in range(order + 1):
+                                            if (
+                                                ii + iii == i
+                                                and jj + jjj == j
+                                                and kk + kkk == k
+                                            ):
+                                                if iii + jjj + kkk == 1:
+                                                    print(
+                                                        "/* + X_%d%d%d(dx) * m_b->M_%d%d%d */"
+                                                        % (ii, jj, kk, iii, jjj, kkk),
+                                                        end=" ",
+                                                    )
+                                                else:
+                                                    print(
+                                                        "+ X_%d%d%d(dx) * m_b->M_%d%d%d"
+                                                        % (ii, jj, kk, iii, jjj, kkk),
+                                                        end=" ",
+                                                    )
+
+                print(";")
 
-                            if not(ii == 0 and jj == 0 and kk == 0):
-                                for iii in range(order+1):
-                                    for jjj in range(order+1):
-                                        for kkk in range(order+1):
-                                            if ii+iii == i and jj+jjj == j and kk+kkk == k:
-                                                print "+ X_%d%d%d(dx) * m_b->M_%d%d%d"%(ii, jj, kk, iii, jjj, kkk),
-
-                                        
-                print ";"
 if order > 0:
-    print "#endif"
+    print("#endif")
+
 
-    
-print ""
-print "-------------------------------------------------"
+print("")
+print("-------------------------------------------------")
 
-print "gravity_M2L():"
-print "-------------------------------------------------\n"
+print("gravity_M2L():")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1)
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1))
 
 # Loop over LHS order
 for l in range(order + 1):
-    print "/* Compute %s order field tensor terms (addition to rank %d) */"%(ordinal(order), l)
-
-    for i in range(l+1):
-        for j in range(l+1):
-            for k in range(l+1):
+    print(
+        "/* Compute %s order field tensor terms (addition to rank %d) */"
+        % (ordinal(order), l)
+    )
+
+    for i in range(l + 1):
+        for j in range(l + 1):
+            for k in range(l + 1):
                 if i + j + k == l:
-                    print "l_b->F_%d%d%d +="%(i,j,k),
+                    print("l_b->F_%d%d%d +=" % (i, j, k), end=" ")
 
                     first = True
-                    for ii in range(order+1):
-                        for jj in range(order+1):
-                            for kk in range(order+1):
-                                if ii + jj + kk  == order - l:
+                    for ii in range(order + 1):
+                        for jj in range(order + 1):
+                            for kk in range(order + 1):
+                                if ii + jj + kk == order - l:
                                     if first:
                                         first = False
                                     else:
-                                        print "+",
-                                    print "m_a->M_%d%d%d * D_%d%d%d(dx, dy, dz, r_inv)"%(ii,jj,kk,i+ii,j+jj,k+kk),
-                    print ";"
-    print ""
-    
+                                        print("+", end=" ")
+                                    print(
+                                        "m_a->M_%d%d%d * D_%d%d%d(dx, dy, dz, r_inv)"
+                                        % (ii, jj, kk, i + ii, j + jj, k + kk),
+                                        end=" ",
+                                    )
+                    print(";")
+    print("")
+
 if order > 0:
-    print "#endif"
+    print("#endif")
 
 
-print ""
-print "-------------------------------------------------"
+print("")
+print("-------------------------------------------------")
 
-print "gravity_L2L():"
-print "-------------------------------------------------\n"
+print("gravity_P2L():")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1)
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1))
+
+print("/* %s order contributions */" % ordinal(order))
 
 # Loop over LHS order
-for l in range(order + 1):
-    print "/* Shift %s order field tensor terms (addition to rank %d) */"%(ordinal(order), l)
+for i in range(order + 1):
+    for j in range(order + 1):
+        for k in range(order + 1):
+            if i + j + k == order:
+                print("l_b->F_%d%d%d += mass * pot.D_%d%d%d;" % (i, j, k, i, j, k))
+
+if order > 0:
+    print("#endif")
+
+
+print("")
+print("-------------------------------------------------")
 
-    for i in range(l+1):
-        for j in range(l+1):
-            for k in range(l+1):
+print("gravity_L2L():")
+print("-------------------------------------------------\n")
+
+if order > 0:
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1))
+
+# Loop over LHS order
+for l in range(order + 1):
+    print(
+        "/* Shift %s order field tensor terms (addition to rank %d) */"
+        % (ordinal(order), l)
+    )
+
+    for i in range(l + 1):
+        for j in range(l + 1):
+            for k in range(l + 1):
                 if i + j + k == l:
-                    print "la->F_%d%d%d +="%(i,j,k),
+                    print("la->F_%d%d%d +=" % (i, j, k), end=" ")
 
                     first = True
-                    for ii in range(order+1):
-                        for jj in range(order+1):
-                            for kk in range(order+1):
-                                if ii + jj + kk  == order - l:
+                    for ii in range(order + 1):
+                        for jj in range(order + 1):
+                            for kk in range(order + 1):
+                                if ii + jj + kk == order - l:
                                     if first:
                                         first = False
                                     else:
-                                        print "+",
-                                    print "X_%d%d%d(dx) * lb->F_%d%d%d"%(ii,jj,kk,i+ii,j+jj,k+kk),
-                    print ";"
-    print ""
-    
+                                        print("+", end=" ")
+                                    print(
+                                        "X_%d%d%d(dx) * lb->F_%d%d%d"
+                                        % (ii, jj, kk, i + ii, j + jj, k + kk),
+                                        end=" ",
+                                    )
+                    print(";")
+    print("")
+
 if order > 0:
-    print "#endif"
+    print("#endif")
 
-print ""
-print "-------------------------------------------------"
+print("")
+print("-------------------------------------------------")
 
-print "gravity_L2P():"
-print "-------------------------------------------------\n"
+print("gravity_L2P():")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1)
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1))
 
-    print "/* %s order contributions */"%(ordinal(order-1))
+    print("/* %s order contributions */" % (ordinal(order - 1)))
 
     for r in range(3):
-        print "gp->a_grav[%d] +="%(r),
+        print("gp->a_grav[%d] +=" % (r), end=" ")
 
         first = True
         for i in range(order + 1):
             for j in range(order + 1):
                 for k in range(order + 1):
-                    if i + j + k == order-1:
+                    if i + j + k == order - 1:
                         if first:
                             first = False
                         else:
-                            print "+",
+                            print("+", end=" ")
                         if r == 0:
-                            ii = i+1
+                            ii = i + 1
                             jj = j
                             kk = k
                         if r == 1:
                             ii = i
-                            jj = j+1
+                            jj = j + 1
                             kk = k
                         if r == 2:
                             ii = i
                             jj = j
-                            kk = k+1
-                        print "X_%d%d%d(dx) * lb->F_%d%d%d"%(i,j,k,ii,jj,kk),
-        print ";"
-
-    print ""
+                            kk = k + 1
+                        print(
+                            "X_%d%d%d(dx) * lb->F_%d%d%d" % (i, j, k, ii, jj, kk),
+                            end=" ",
+                        )
+        print(";")
+
+    print("")
 if order > 0:
-    print "#endif"
+    print("#endif")
 
-print ""
-print "-------------------------------------------------"
+print("")
+print("-------------------------------------------------")
 
-print "gravity_M2P():"
-print "-------------------------------------------------\n"
+print("gravity_M2P():")
+print("-------------------------------------------------\n")
 
 if order > 0:
-    print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1)
-    
-print "/* %s order contributions */"%(ordinal(order))
+    print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1))
+
+print("/* %s order contributions */" % (ordinal(order)))
+
 
-    
 for r in range(4):
     if r == 0:
-        print "*f_x =",
+        print("*f_x =", end=" ")
     if r == 1:
-        print "*f_y =",
+        print("*f_y =", end=" ")
     if r == 2:
-        print "*f_z =",
+        print("*f_z =", end=" ")
     if r == 3:
-        print "*pot =",
-        
+        print("*pot =", end=" ")
+
     first = True
-    for i in range(order+1):
-        for j in range(order+1):
-            for k in range(order+1):
+    for i in range(order + 1):
+        for j in range(order + 1):
+            for k in range(order + 1):
                 if i + j + k == order:
                     if first:
                         first = False
                     else:
-                        print "+",
+                        print("+", end=" ")
                     if r == 0:
-                        ii = i+1
+                        ii = i + 1
                         jj = j
                         kk = k
                     if r == 1:
                         ii = i
-                        jj = j+1
+                        jj = j + 1
                         kk = k
                     if r == 2:
                         ii = i
                         jj = j
-                        kk = k+1
+                        kk = k + 1
                     if r == 3:
                         ii = i
                         jj = j
                         kk = k
-                    print "m->M_%d%d%d * d.D_%d%d%d"%(i,j,k,ii,jj,kk),
-                    
-    print ";"
-        
-print ""
+                    print("m->M_%d%d%d * d.D_%d%d%d" % (i, j, k, ii, jj, kk), end=" ")
 
-if order > 0:
-    print "#endif"
+    print(";")
+
+print("")
 
-print ""
-print "-------------------------------------------------"
+if order > 0:
+    print("#endif")
 
+print("")
+print("-------------------------------------------------")
diff --git a/theory/Multipoles/mesh_summary.tex b/theory/Multipoles/mesh_summary.tex
index 19524ee21b9ef85e45927182d6632dbf17ab3275..f66a9c36cb7f4a8c447b99016315fdf72cbef226 100644
--- a/theory/Multipoles/mesh_summary.tex
+++ b/theory/Multipoles/mesh_summary.tex
@@ -53,7 +53,7 @@ of particles that are in tree-leaves larger than $1/10$ of the mesh
 size or between
 two tree-leaves distant by more than that amount.\\
 
-MORE WORDS HERE.\\
+\textcolor{red}{MORE WORDS HERE.}\\
 
 The truncation function in Fourier space reads
 
diff --git a/theory/Multipoles/plot_derivatives.py b/theory/Multipoles/plot_derivatives.py
index bd086608c1a8bd8874eb147cd3b42b6485468736..e5f2936dea81ce27a3d1d562239df94191922340 100644
--- a/theory/Multipoles/plot_derivatives.py
+++ b/theory/Multipoles/plot_derivatives.py
@@ -1,22 +1,23 @@
 ###############################################################################
- # This file is part of SWIFT.
- # Copyright (c) 2016  Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- # 
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU Lesser General Public License as published
- # by the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- # 
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- # GNU General Public License for more details.
- # 
- # You should have received a copy of the GNU Lesser General Public License
- # along with this program.  If not, see <http://www.gnu.org/licenses/>.
- # 
- ##############################################################################
+# This file is part of SWIFT.
+# Copyright (c) 2016  Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+##############################################################################
 import matplotlib
+
 matplotlib.use("Agg")
 from pylab import *
 from scipy import integrate
@@ -26,30 +27,31 @@ from matplotlib.font_manager import FontProperties
 import numpy
 import math
 
-params = {'axes.labelsize': 9,
-'axes.titlesize': 10,
-'font.size': 10,
-'legend.fontsize': 10,
-'xtick.labelsize': 8,
-'ytick.labelsize': 8,
-'text.usetex': True,
-'figure.figsize' : (3.15,3.15),
-'figure.subplot.left'    : 0.12,
-'figure.subplot.right'   : 0.99  ,
-'figure.subplot.bottom'  : 0.065  ,
-'figure.subplot.top'     : 0.99  ,
-'figure.subplot.wspace'  : 0.  ,
-'figure.subplot.hspace'  : 0.  ,
-'lines.markersize' : 6,
-'lines.linewidth' : 3.,
-'text.latex.unicode': True
+params = {
+    "axes.labelsize": 9,
+    "axes.titlesize": 10,
+    "font.size": 10,
+    "legend.fontsize": 10,
+    "xtick.labelsize": 8,
+    "ytick.labelsize": 8,
+    "text.usetex": True,
+    "figure.figsize": (3.15, 3.15),
+    "figure.subplot.left": 0.12,
+    "figure.subplot.right": 0.99,
+    "figure.subplot.bottom": 0.065,
+    "figure.subplot.top": 0.99,
+    "figure.subplot.wspace": 0.0,
+    "figure.subplot.hspace": 0.0,
+    "lines.markersize": 6,
+    "lines.linewidth": 3.0,
+    "text.latex.unicode": True,
 }
 rcParams.update(params)
-rc('font',**{'family':'sans-serif','sans-serif':['Times']})
+rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]})
 
 # Parameters
-r_min = 0.
-r_max = 10.
+r_min = 0.0
+r_max = 10.0
 r_s = 1.7
 
 # Radius
@@ -57,12 +59,12 @@ r = linspace(r_min, r_max, 401)
 w = 2 * r / r_s
 
 # Powers of alpha ####################################################
-alpha = 1. / (1. + exp(w))
-alpha2 = alpha**2
-alpha3 = alpha**3
-alpha4 = alpha**4
-alpha5 = alpha**5
-alpha6 = alpha**6
+alpha = 1.0 / (1.0 + exp(w))
+alpha2 = alpha ** 2
+alpha3 = alpha ** 3
+alpha4 = alpha ** 4
+alpha5 = alpha ** 5
+alpha6 = alpha ** 6
 
 figure()
 plot(w, alpha, label="$\\alpha^1$")
@@ -72,11 +74,11 @@ plot(w, alpha4, label="$\\alpha^4$")
 plot(w, alpha5, label="$\\alpha^5$")
 plot(w, alpha6, label="$\\alpha^6$")
 
-xlabel("w", labelpad=-4.)
-ylabel("$\\alpha^n(w)$", labelpad=-4.)
+xlabel("w", labelpad=-4.0)
+ylabel("$\\alpha^n(w)$", labelpad=-4.0)
 
 xlim(0, 7.2)
-ylim(0., 0.52)
+ylim(0.0, 0.52)
 
 legend(loc="upper right")
 
@@ -85,10 +87,17 @@ savefig("alpha_powers.pdf")
 
 # Derivatives of alpha ###############################################
 alpha_1 = -alpha + alpha2
-alpha_2 = alpha - 3.* alpha2 + 2.*alpha3
-alpha_3 = -alpha + 7.*alpha2 - 12.*alpha3 + 6.*alpha4
-alpha_4 = alpha - 15. * alpha2 + 50.*alpha3 - 60.*alpha4 + 24.*alpha5
-alpha_5 = -alpha + 31. * alpha2 - 180. * alpha3 + 390.*alpha4 - 360.*alpha5 + 120. * alpha6
+alpha_2 = alpha - 3.0 * alpha2 + 2.0 * alpha3
+alpha_3 = -alpha + 7.0 * alpha2 - 12.0 * alpha3 + 6.0 * alpha4
+alpha_4 = alpha - 15.0 * alpha2 + 50.0 * alpha3 - 60.0 * alpha4 + 24.0 * alpha5
+alpha_5 = (
+    -alpha
+    + 31.0 * alpha2
+    - 180.0 * alpha3
+    + 390.0 * alpha4
+    - 360.0 * alpha5
+    + 120.0 * alpha6
+)
 
 
 figure()
@@ -99,8 +108,8 @@ plot(w, alpha_3, label="$\\alpha^{(3)}$")
 plot(w, alpha_4, label="$\\alpha^{(4)}$")
 plot(w, alpha_5, label="$\\alpha^{(5)}$")
 
-xlabel("w", labelpad=-4.)
-ylabel("$\\alpha^{(n)}(w)$", labelpad=-5.)
+xlabel("w", labelpad=-4.0)
+ylabel("$\\alpha^{(n)}(w)$", labelpad=-5.0)
 
 xlim(0, 7.2)
 ylim(-0.26, 0.16)
@@ -110,14 +119,13 @@ legend(loc="lower right")
 savefig("alpha_derivatives.pdf")
 
 
-
 # Derivatives of sigma ###############################################
 sigma = exp(w) * alpha
 sigma_1 = exp(w) * alpha2
-sigma_2 = exp(w) * (2*alpha3 - alpha2)
-sigma_3 = exp(w) * (6*alpha4 - 6*alpha3 + alpha2)
-sigma_4 = exp(w) * (24*alpha5 -36*alpha4 + 14*alpha3 - alpha2)
-sigma_5 = exp(w) * (120*alpha6 -240*alpha5 + 150*alpha4 - 30*alpha3 + alpha2)
+sigma_2 = exp(w) * (2 * alpha3 - alpha2)
+sigma_3 = exp(w) * (6 * alpha4 - 6 * alpha3 + alpha2)
+sigma_4 = exp(w) * (24 * alpha5 - 36 * alpha4 + 14 * alpha3 - alpha2)
+sigma_5 = exp(w) * (120 * alpha6 - 240 * alpha5 + 150 * alpha4 - 30 * alpha3 + alpha2)
 
 
 figure()
@@ -128,8 +136,8 @@ plot(w, sigma_3, label="$\\sigma^{(3)}$")
 plot(w, sigma_4, label="$\\sigma^{(4)}$")
 plot(w, sigma_5, label="$\\sigma^{(5)}$")
 
-xlabel("w", labelpad=-4.)
-ylabel("$\\sigma^{(n)}(w)$", labelpad=-5.)
+xlabel("w", labelpad=-4.0)
+ylabel("$\\sigma^{(n)}(w)$", labelpad=-5.0)
 
 xlim(0, 7.2)
 ylim(-0.22, 1.02)
@@ -139,20 +147,24 @@ legend(loc="center right")
 savefig("sigma_derivatives.pdf")
 
 
-
 # Derivatives of chi ###############################################
 c1 = 2 / r_s
-c2 = (2 / r_s)**2
-c3 = (2 / r_s)**3
-c4 = (2 / r_s)**4
-c5 = (2 / r_s)**5
+c2 = (2 / r_s) ** 2
+c3 = (2 / r_s) ** 3
+c4 = (2 / r_s) ** 4
+c5 = (2 / r_s) ** 5
 
 chi = 2 - 2 * exp(w) * alpha
 chi_1 = -2 * c1 * exp(w) * alpha2
-chi_2 = -2 * c2 * exp(w) * (2*alpha3 - alpha2)
-chi_3 = -2 * c3 * exp(w) * (6*alpha4 - 6*alpha3 + alpha2)
-chi_4 = -2 * c4 * exp(w) * (24*alpha5 - 36*alpha4 + 14*alpha3 - alpha2)
-chi_5 = -2 * c5 * exp(w) * (120*alpha6 - 240*alpha5 + 150*alpha4 - 30*alpha3 + alpha2)
+chi_2 = -2 * c2 * exp(w) * (2 * alpha3 - alpha2)
+chi_3 = -2 * c3 * exp(w) * (6 * alpha4 - 6 * alpha3 + alpha2)
+chi_4 = -2 * c4 * exp(w) * (24 * alpha5 - 36 * alpha4 + 14 * alpha3 - alpha2)
+chi_5 = (
+    -2
+    * c5
+    * exp(w)
+    * (120 * alpha6 - 240 * alpha5 + 150 * alpha4 - 30 * alpha3 + alpha2)
+)
 
 figure()
 plot(r, chi, label="$\\chi^{(0)}$")
@@ -162,10 +174,10 @@ plot(r, chi_3, label="$\\chi^{(3)}$")
 plot(r, chi_4, label="$\\chi^{(4)}$")
 plot(r, chi_5, label="$\\chi^{(5)}$")
 
-plot([r_s, r_s], [-10, 10], 'k--', lw=1)
+plot([r_s, r_s], [-10, 10], "k--", lw=1)
 
-xlabel("r", labelpad=-4.)
-ylabel("$\\chi^{(n)}(r,r_s)$", labelpad=-5.)
+xlabel("r", labelpad=-4.0)
+ylabel("$\\chi^{(n)}(r,r_s)$", labelpad=-5.0)
 
 xlim(0, 7.2)
 ylim(-1.52, 1.02)
diff --git a/theory/Multipoles/plot_mac_potential.py b/theory/Multipoles/plot_mac_potential.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a3fea2c606d62b1806433dea9ca2dd7fbed2d04
--- /dev/null
+++ b/theory/Multipoles/plot_mac_potential.py
@@ -0,0 +1,212 @@
+import matplotlib
+
+matplotlib.use("Agg")
+from pylab import *
+from scipy import special
+import numpy as np
+import math
+
+
+e_plummer = 1.0 / 3.0
+box_size = 25000
+mesh_size = 64
+a_smooth = 1.25
+r_cut_ratio = 4.5
+
+####################################################################
+
+params = {
+    "axes.labelsize": 9,
+    "axes.titlesize": 10,
+    "font.size": 10,
+    "legend.fontsize": 10,
+    "xtick.labelsize": 7,
+    "ytick.labelsize": 7,
+    "figure.figsize": (3.15, 3.15),
+    "text.latex.unicode": True,
+    "text.usetex": True,
+}
+rcParams.update(params)
+
+plummer_to_spline_ratio = 3.0
+
+H = plummer_to_spline_ratio * e_plummer
+r_s = a_smooth * box_size / mesh_size
+r_cut = r_s * r_cut_ratio
+
+MAC_lo_limit = (5.0 / 9.0) * H
+MAC_hi_limit = (5.0 / 3.0) * r_s
+
+print(("Potential softened below", H, "kpc and truncated above", r_s, "kpc"))
+
+####################################################################
+
+r = np.logspace(np.log10(e_plummer) - 1.2, np.log10(box_size) + 0.2, 10000)
+
+# Newtonian gravity
+f_newton = 1 / r ** 2
+
+# Simulated gravity
+u = r / H
+u = u[u <= 1]
+
+W_swift = 21.0 * u ** 6 - 90.0 * u ** 5 + 140.0 * u ** 4 - 84.0 * u ** 3 + 14.0 * u
+f_swift = f_newton * (
+    special.erfc(0.5 * r / r_s)
+    + (1.0 / math.sqrt(math.pi)) * (r / r_s) * np.exp(-0.25 * (r / r_s) ** 2)
+)
+f_swift[r <= H] = W_swift / H ** 2
+f_swift[r > r_cut] = 0
+
+W_gadget = u * (
+    21.333333 - 48 * u + 38.4 * u ** 2 - 10.6666667 * u ** 3 - 0.06666667 * u ** -3
+)
+W_gadget[u < 0.5] = u[u < 0.5] * (
+    10.666667 + u[u < 0.5] ** 2 * (32.0 * u[u < 0.5] - 38.4)
+)
+f_gadget = f_newton * (
+    special.erfc(0.5 * r / r_s)
+    + (1.0 / math.sqrt(math.pi)) * (r / r_s) * np.exp(-0.25 * (r / r_s) ** 2)
+)
+f_gadget[r <= H] = W_gadget / H ** 2
+f_gadget[r > r_cut] = 0
+
+f_MAC = np.copy(f_newton)
+f_MAC[r < MAC_lo_limit] = (1 / r[r < MAC_lo_limit]) ** 0 * MAC_lo_limit ** -2
+f_MAC[r > MAC_hi_limit] = (1 / r[r > MAC_hi_limit]) ** 4 * MAC_hi_limit ** 2
+f_MAC[r > r_cut] = 0
+
+# range_test = np.logical_and(r > 0.01 * e_plummer, r < 2 * r_cut)
+# print(np.max(f_swift[range_test] / f_MAC[range_test]))
+
+####################################################################
+
+fig = figure()
+colors = ["#4477AA", "#CC6677", "#DDCC77", "#117733"]
+gs1 = fig.add_gridspec(
+    nrows=4,
+    ncols=1,
+    left=0.14,
+    right=0.99,
+    wspace=0.0,
+    hspace=0.0,
+    top=0.99,
+    bottom=0.1,
+)
+fig.add_subplot(gs1[0:3, :], xscale="log", yscale="log")
+
+plot(r, f_newton, "--", color=colors[0], label="Newtonian")
+plot(r, f_swift, "-", color=colors[3], label="SWIFT")
+plot(r, f_MAC, "-.", color=colors[2], label="MAC estimator")
+# plot(r, f_gadget, '-.', color=colors[2], label="Gadget")
+
+plot([e_plummer, e_plummer], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7)
+plot([H, H], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7)
+plot([r_s, r_s], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7)
+plot([r_cut, r_cut], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7)
+plot([box_size, box_size], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7)
+
+text(
+    e_plummer,
+    1e-9,
+    "$\\epsilon_{\\rm Plummer}$",
+    rotation=90,
+    backgroundcolor="w",
+    ha="center",
+    alpha=0.3,
+)
+# text(H, 1e-9, "$\\epsilon_{\\rm spline}$", rotation=90, backgroundcolor='w', ha="center", alpha=0.3)
+text(H, 1e-9, "$H$", rotation=90, backgroundcolor="w", ha="center", alpha=0.3)
+text(
+    r_s,
+    1e-1,
+    "$r_{\\rm s}$",
+    rotation=90,
+    backgroundcolor="w",
+    ha="center",
+    va="top",
+    alpha=0.3,
+)
+text(
+    r_cut,
+    1e-1,
+    "$r_{\\rm cut}$",
+    rotation=90,
+    backgroundcolor="w",
+    ha="center",
+    va="top",
+    alpha=0.3,
+)
+text(
+    box_size,
+    1e-1,
+    "$L$",
+    rotation=90,
+    backgroundcolor="w",
+    ha="center",
+    va="top",
+    alpha=0.3,
+)
+
+legend(
+    loc="upper right",
+    frameon=True,
+    handletextpad=0.3,
+    handlelength=1.6,
+    fontsize=8,
+    framealpha=1.0,
+)
+
+ylim(0.1 * (box_size) ** -2, 2 * (e_plummer / 30) ** -2)
+xlim(e_plummer / 30, box_size * 2.5)
+
+tick_params(axis="x", which="both", labelbottom=False)
+
+xlabel("$r$")
+ylabel("$|f(r)|$", labelpad=-2)
+
+##################################################################################
+fig.add_subplot(gs1[3, :], xscale="log", yscale="log")
+
+
+plot(r, f_newton * r ** 2, "--", color=colors[0], label="Newtonian")
+plot(r, f_swift * r ** 2, "-", color=colors[3], label="SWIFT")
+plot(r, f_MAC * r ** 2, "-.", color=colors[2], label="MAC estimator")
+# plot(r, f_gadget * r**2, '-.', color=colors[2], label="Gadget")
+
+plot([e_plummer, e_plummer], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7)
+plot([H, H], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7)
+plot([r_s, r_s], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7)
+plot([r_cut, r_cut], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7)
+plot([box_size, box_size], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7)
+
+ylim(0.08, 2.2)
+xlim(e_plummer / 30, box_size * 2.5)
+
+yticks([0.1, 1], ["$0.1$", "$1$"])
+
+xlabel("$r$", labelpad=0)
+ylabel("$|f(r)| \\times r^2$", labelpad=0)
+
+##################################################################################
+# fig.add_subplot(gs1[4, :], xscale="log", yscale="log")
+
+# plot(r, f_newton / f_swift, '--', color=colors[0], label="Newtonian")
+# plot(r, f_swift / f_swift, '-', color=colors[3], label="SWIFT")
+# plot(r, f_MAC / f_swift, ':', color=colors[1], label="MAC estimator")
+# plot(r, f_gadget / f_swift, '-.', color=colors[2], label="Gadget")
+
+# plot([e_plummer, e_plummer], [1e-20, 1e20], 'k--', alpha=0.3, lw=0.7)
+# plot([H, H], [1e-20, 1e20], 'k--', alpha=0.3, lw=0.7)
+# plot([r_s, r_s], [1e-20, 1e20], 'k--', alpha=0.3, lw=0.7)
+# plot([r_cut, r_cut], [1e-20, 1e20], 'k--', alpha=0.3, lw=0.7)
+# plot([box_size, box_size], [1e-20, 1e20], 'k--', alpha=0.3, lw=0.7)
+
+# ylim(0.5, 13.)
+# xlim(e_plummer / 30, box_size * 1.6)
+
+# xlabel("$r$", labelpad=0)
+# ylabel("$|f(r)| / |f_{SWIFT}(r)|$", labelpad=2)
+
+
+savefig("mac_potential.pdf")
diff --git a/theory/Multipoles/plot_mesh.py b/theory/Multipoles/plot_mesh.py
index 64f88c4e4ee751e1df9654de9fd7d10c6f85c828..ce8c7da95380a678fd3be1d124a3c8562769bbd7 100644
--- a/theory/Multipoles/plot_mesh.py
+++ b/theory/Multipoles/plot_mesh.py
@@ -1,22 +1,23 @@
 ###############################################################################
- # This file is part of SWIFT.
- # Copyright (c) 2016  Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- # 
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU Lesser General Public License as published
- # by the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- # 
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- # GNU General Public License for more details.
- # 
- # You should have received a copy of the GNU Lesser General Public License
- # along with this program.  If not, see <http://www.gnu.org/licenses/>.
- # 
- ##############################################################################
+# This file is part of SWIFT.
+# Copyright (c) 2016  Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+##############################################################################
 import matplotlib
+
 matplotlib.use("Agg")
 from pylab import *
 from scipy import integrate
@@ -27,31 +28,32 @@ from matplotlib.font_manager import FontProperties
 import numpy
 import math
 
-params = {'axes.labelsize': 9,
-'axes.titlesize': 10,
-'font.size': 10,
-'legend.fontsize': 10,
-'xtick.labelsize': 8,
-'ytick.labelsize': 8,
-'text.usetex': True,
-'figure.figsize' : (3.15,3.15),
-'figure.subplot.left'    : 0.14,
-'figure.subplot.right'   : 0.99  ,
-'figure.subplot.bottom'  : 0.1  ,
-'figure.subplot.top'     : 0.99  ,
-'figure.subplot.wspace'  : 0.  ,
-'figure.subplot.hspace'  : 0.  ,
-'lines.markersize' : 6,
-'lines.linewidth' : 3.,
-'text.latex.unicode': True
+params = {
+    "axes.labelsize": 9,
+    "axes.titlesize": 10,
+    "font.size": 10,
+    "legend.fontsize": 10,
+    "xtick.labelsize": 8,
+    "ytick.labelsize": 8,
+    "text.usetex": True,
+    "figure.figsize": (3.15, 3.15),
+    "figure.subplot.left": 0.14,
+    "figure.subplot.right": 0.99,
+    "figure.subplot.bottom": 0.1,
+    "figure.subplot.top": 0.99,
+    "figure.subplot.wspace": 0.0,
+    "figure.subplot.hspace": 0.0,
+    "lines.markersize": 6,
+    "lines.linewidth": 3.0,
+    "text.latex.unicode": True,
 }
 rcParams.update(params)
-rc('font',**{'family':'sans-serif','sans-serif':['Times']})
-colors=['#4477AA', '#CC6677', '#DDCC77', '#117733']
+rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]})
+colors = ["#4477AA", "#CC6677", "#DDCC77", "#117733"]
 
 
 # Parameters
-r_s = 2.
+r_s = 2.0
 r_min = 3e-2
 r_max = 1.5e2
 
@@ -59,95 +61,120 @@ r_max = 1.5e2
 r = logspace(log10(r_min), log10(r_max), 401)
 r_rs = r / r_s
 
-k = logspace(log10(r_min/r_s**2), log10(r_max/r_s**2), 401)
+k = logspace(log10(r_min / r_s ** 2), log10(r_max / r_s ** 2), 401)
 k_rs = k * r_s
 
 # Newtonian solution
-phi_newton = 1. / r
-phit_newton = 1. / k**2
-force_newton = 1. / r**2
+phi_newton = 1.0 / r
+phit_newton = 1.0 / k ** 2
+force_newton = 1.0 / r ** 2
+
 
 def my_exp(x):
-    return 1. + x + (x**2 / 2.) + (x**3 / 6.) + (x**4 / 24.) + (x**5 / 120.) + (x**6 / 720.)
-    #return exp(x)
-    
-def term(x): # 1 / (1 + e^x)
-    return 1. / (1. + exp(x))
-    
+    return (
+        1.0
+        + x
+        + (x ** 2 / 2.0)
+        + (x ** 3 / 6.0)
+        + (x ** 4 / 24.0)
+        + (x ** 5 / 120.0)
+        + (x ** 6 / 720.0)
+    )
+    # return exp(x)
+
+
+def term(x):  # 1 / (1 + e^x)
+    return 1.0 / (1.0 + exp(x))
+
+
 def my_term(x):  # 1 / (1 + e^x)
-    #return 0.5 - 0.25 * x + (x**3 / 48.) - (x**5 / 480)
-    return 1. / (1. + my_exp(x))
-    
-def csch(x): # hyperbolic cosecant
-    return 1. / sinh(x)
+    # return 0.5 - 0.25 * x + (x**3 / 48.) - (x**5 / 480)
+    return 1.0 / (1.0 + my_exp(x))
+
+
+def csch(x):  # hyperbolic cosecant
+    return 1.0 / sinh(x)
+
 
 def sigmoid(x):
     return exp(x) * term(x)
 
+
 def d_sigmoid(x):
-    return exp(x) * term(x)**2
+    return exp(x) * term(x) ** 2
+
 
 def my_sigmoid(x):
-    #return my_exp(x) / (my_exp(x) + 1.)
+    # return my_exp(x) / (my_exp(x) + 1.)
     return my_exp(x) * my_term(x)
 
+
 def my_d_sigmoid(x):
-    #return my_exp(x) / ((my_exp(x) + 1)**2)
-    return my_exp(x) * my_term(x)**2
+    # return my_exp(x) / ((my_exp(x) + 1)**2)
+    return my_exp(x) * my_term(x) ** 2
+
 
 def swift_corr(x):
-    return 2 * sigmoid( 4 * x ) - 1
+    return 2 * sigmoid(4 * x) - 1
+
 
 def swift_corr2(x):
-    return 2 * my_sigmoid( 4 * x ) - 1
+    return 2 * my_sigmoid(4 * x) - 1
+
 
 figure()
 x = linspace(-4, 4, 100)
-plot(x, special.erf(x), '-', color=colors[2])
-plot(x, swift_corr(x), '-', color=colors[3])
-plot(x, swift_corr2(x), '-.', color=colors[3])
-plot(x, x, '-', color=colors[0])
+plot(x, special.erf(x), "-", color=colors[2])
+plot(x, swift_corr(x), "-", color=colors[3])
+plot(x, swift_corr2(x), "-.", color=colors[3])
+plot(x, x, "-", color=colors[0])
 ylim(-1.1, 1.1)
 xlim(-4.1, 4.1)
 savefig("temp.pdf")
 
+
 def alpha(x):
-    return 1. / (1. + exp(x))
+    return 1.0 / (1.0 + exp(x))
+
 
 # Correction in real space
-corr_short_gadget2 = special.erf(r / (2.*r_s))
-corr_short_swift = swift_corr(r / (2.*r_s))
-corr_short_swift2 = swift_corr2(r / (2.*r_s)) 
-eta_short_gadget2 = special.erfc(r / (2.*r_s)) + (r / (r_s * math.sqrt(math.pi))) * exp(-r**2 / (4.*r_s**2))
-eta_short_swift = 4. * (r / r_s) * d_sigmoid(2. * r / r_s) - 2. * sigmoid(2 * r / r_s) + 2.
-eta_short_swift2 = 4. * (r / r_s) * my_d_sigmoid(2. * r / r_s) - 2. * my_sigmoid(2 * r / r_s) + 2.
-
-#x = 2. * r / r_s
-#force_corr = 2. * (1. - exp(x) * (alpha(x) - x * alpha(x)**2))
-#force_corr = 2. * (1.- x*exp(x)*alpha(x)**2 - exp(x)*alpha(x))
-#force_corr = 2. * (x*alpha(x) - x*alpha(x)**2 -exp(x)*alpha(x) + 1)
-#force_corr = abs(2 * (1. - exp(x) * alpha(x) + x * exp(2*x)*alpha(x)**2 - x*exp(x)*alpha(x)))
-#force_corr = abs(force_corr)
+corr_short_gadget2 = special.erf(r / (2.0 * r_s))
+corr_short_swift = swift_corr(r / (2.0 * r_s))
+corr_short_swift2 = swift_corr2(r / (2.0 * r_s))
+eta_short_gadget2 = special.erfc(r / (2.0 * r_s)) + (
+    r / (r_s * math.sqrt(math.pi))
+) * exp(-r ** 2 / (4.0 * r_s ** 2))
+eta_short_swift = (
+    4.0 * (r / r_s) * d_sigmoid(2.0 * r / r_s) - 2.0 * sigmoid(2 * r / r_s) + 2.0
+)
+eta_short_swift2 = (
+    4.0 * (r / r_s) * my_d_sigmoid(2.0 * r / r_s) - 2.0 * my_sigmoid(2 * r / r_s) + 2.0
+)
+
+# x = 2. * r / r_s
+# force_corr = 2. * (1. - exp(x) * (alpha(x) - x * alpha(x)**2))
+# force_corr = 2. * (1.- x*exp(x)*alpha(x)**2 - exp(x)*alpha(x))
+# force_corr = 2. * (x*alpha(x) - x*alpha(x)**2 -exp(x)*alpha(x) + 1)
+# force_corr = abs(2 * (1. - exp(x) * alpha(x) + x * exp(2*x)*alpha(x)**2 - x*exp(x)*alpha(x)))
+# force_corr = abs(force_corr)
 
 # Corection in Fourier space
-corr_long_gadget2 = exp(-k**2*r_s**2)
-corr_long_swift = math.pi * k * r_s * csch(0.5 * math.pi * r_s * k) / 2.
+corr_long_gadget2 = exp(-k ** 2 * r_s ** 2)
+corr_long_swift = math.pi * k * r_s * csch(0.5 * math.pi * r_s * k) / 2.0
 
 # Shortrange term
-phi_short_gadget2 = (1.  / r ) * (1. - corr_short_gadget2)
-phi_short_swift = (1.  / r ) * (1. - corr_short_swift)
-phi_short_swift2 = (1.  / r ) * (1. - corr_short_swift2)
-force_short_gadget2 = (1. / r**2) * eta_short_gadget2
-force_short_swift = (1. / r**2) * eta_short_swift
-force_short_swift2 = (1. / r**2) * eta_short_swift2
+phi_short_gadget2 = (1.0 / r) * (1.0 - corr_short_gadget2)
+phi_short_swift = (1.0 / r) * (1.0 - corr_short_swift)
+phi_short_swift2 = (1.0 / r) * (1.0 - corr_short_swift2)
+force_short_gadget2 = (1.0 / r ** 2) * eta_short_gadget2
+force_short_swift = (1.0 / r ** 2) * eta_short_swift
+force_short_swift2 = (1.0 / r ** 2) * eta_short_swift2
 
 # Long-range term
-phi_long_gadget2 = (1.  / r ) * corr_short_gadget2
-phi_long_swift = (1.  / r ) * corr_short_swift
-phit_long_gadget2 = corr_long_gadget2 / k**2
-phit_long_swift = corr_long_swift / k**2
-
-
+phi_long_gadget2 = (1.0 / r) * corr_short_gadget2
+phi_long_swift = (1.0 / r) * corr_short_swift
+phit_long_gadget2 = corr_long_gadget2 / k ** 2
+phit_long_swift = corr_long_swift / k ** 2
 
 
 figure()
@@ -155,50 +182,58 @@ figure()
 # Potential
 subplot(311, xscale="log", yscale="log")
 
-plot(r_rs, phi_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0])
-plot(r_rs, phi_short_gadget2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2])
-plot(r_rs, phi_short_swift, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
-plot(r_rs, phi_short_swift2, ':', lw=1.4, color=colors[3])
-plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5)
+plot(r_rs, phi_newton, "--", lw=1.4, label="${\\rm Newtonian}$", color=colors[0])
+plot(r_rs, phi_short_gadget2, "-", lw=1.4, label="${\\rm Gadget}$", color=colors[2])
+plot(r_rs, phi_short_swift, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
+plot(r_rs, phi_short_swift2, ":", lw=1.4, color=colors[3])
+plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5)
 
-xlim(1.1*r_min/ r_s, 0.9*r_max / r_s)
-ylim(1.1/r_max, 0.9/r_min)
+xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s)
+ylim(1.1 / r_max, 0.9 / r_min)
 ylabel("$\\varphi_s(r)$", labelpad=-3)
 
 legend(loc="upper right", frameon=True, handletextpad=0.3, handlelength=1.6, fontsize=8)
 
 # Correction
 subplot(312, xscale="log", yscale="log")
-plot(r_rs, np.ones(np.size(r)), '--', lw=1.4, color=colors[0])
-plot(r_rs, 1. - corr_short_gadget2, '-', lw=1.4, color=colors[2])
-plot(r_rs, 1. - corr_short_swift, '-', lw=1.4, color=colors[3])
-plot(r_rs, 1. - corr_short_swift2, ':', lw=1.4, color=colors[3])
-plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5)
-plot([1., 1.], [-1e5, 1e5], 'k-.', alpha=0.5, lw=0.5)
-plot([-1, -1], [-1, -1], 'k-', lw=1.2, label="${\\textrm{Exact}~e^x}$")
-plot([-1, -1], [-1, -1], 'k:', lw=1.2, label="${6^\\textrm{th}~\\textrm{order~series}~e^x}$")
+plot(r_rs, np.ones(np.size(r)), "--", lw=1.4, color=colors[0])
+plot(r_rs, 1.0 - corr_short_gadget2, "-", lw=1.4, color=colors[2])
+plot(r_rs, 1.0 - corr_short_swift, "-", lw=1.4, color=colors[3])
+plot(r_rs, 1.0 - corr_short_swift2, ":", lw=1.4, color=colors[3])
+plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5)
+plot([1.0, 1.0], [-1e5, 1e5], "k-.", alpha=0.5, lw=0.5)
+plot([-1, -1], [-1, -1], "k-", lw=1.2, label="${\\textrm{Exact}~e^x}$")
+plot(
+    [-1, -1],
+    [-1, -1],
+    "k:",
+    lw=1.2,
+    label="${6^\\textrm{th}~\\textrm{order~series}~e^x}$",
+)
 
 yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"])
-xlim(1.1*r_min/r_s, 0.9*r_max/r_s)
+xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s)
 ylim(3e-3, 1.5)
-#ylabel("$\\chi_s(r)$", labelpad=-3)
+# ylabel("$\\chi_s(r)$", labelpad=-3)
 ylabel("$\\varphi_s(r) \\times r$", labelpad=-2)
 
-legend(loc="center left", frameon=False, handletextpad=0.3, handlelength=1.6, fontsize=7)
+legend(
+    loc="center left", frameon=False, handletextpad=0.3, handlelength=1.6, fontsize=7
+)
 
 # 1 - Correction
 subplot(313, xscale="log", yscale="log")
-plot(r_rs, corr_short_gadget2, '-', lw=1.4, color=colors[2])
-plot(r_rs, corr_short_swift, '-', lw=1.4, color=colors[3])
-plot(r_rs, corr_short_swift2, ':', lw=1.4, color=colors[3])
+plot(r_rs, corr_short_gadget2, "-", lw=1.4, color=colors[2])
+plot(r_rs, corr_short_swift, "-", lw=1.4, color=colors[3])
+plot(r_rs, corr_short_swift2, ":", lw=1.4, color=colors[3])
 
-plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5)
-plot(r_rs, np.ones(np.size(r)), 'k-.', alpha=0.5, lw=0.5)
-plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5)
+plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5)
+plot(r_rs, np.ones(np.size(r)), "k-.", alpha=0.5, lw=0.5)
+plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5)
 
-xlim(1.1*r_min/r_s, 0.9*r_max/r_s)
+xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s)
 ylim(3e-3, 1.5)
-#ylabel("$1 - \\chi_s(r)$", labelpad=-2)
+# ylabel("$1 - \\chi_s(r)$", labelpad=-2)
 ylabel("$1 - \\varphi_s(r) \\times r$", labelpad=-2)
 yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"])
 xlabel("$r / r_s$", labelpad=1)
@@ -212,15 +247,15 @@ savefig("potential_short.pdf")
 figure()
 subplot(311, xscale="log", yscale="log")
 
-plot(r_rs, force_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0])
-plot(r_rs, force_short_gadget2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2])
-plot(r_rs, force_short_swift, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
-#plot(r_rs, (1./r**2) * force_corr, '-', lw=1.2, color='r')
-plot(r_rs, force_short_swift2, ':', lw=1.4, color=colors[3])
-plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5)
+plot(r_rs, force_newton, "--", lw=1.4, label="${\\rm Newtonian}$", color=colors[0])
+plot(r_rs, force_short_gadget2, "-", lw=1.4, label="${\\rm Gadget}$", color=colors[2])
+plot(r_rs, force_short_swift, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
+# plot(r_rs, (1./r**2) * force_corr, '-', lw=1.2, color='r')
+plot(r_rs, force_short_swift2, ":", lw=1.4, color=colors[3])
+plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5)
 
-xlim(1.1*r_min/ r_s, 0.9*r_max / r_s)
-ylim(1.1/r_max**2, 0.9/r_min**2)
+xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s)
+ylim(1.1 / r_max ** 2, 0.9 / r_min ** 2)
 ylabel("$|\\mathbf{f}_s(r)|$", labelpad=-3)
 yticks([1e-4, 1e-2, 1e0, 1e2], ["$10^{-4}$", "$10^{-2}$", "$10^{0}$", "$10^{2}$"])
 
@@ -228,33 +263,41 @@ legend(loc="upper right", frameon=True, handletextpad=0.3, handlelength=1.6, fon
 
 # Correction
 subplot(312, xscale="log", yscale="log")
-plot(r_rs, np.ones(np.size(r)), '--', lw=1.4, color=colors[0])
-plot(r_rs, eta_short_gadget2, '-', lw=1.4, color=colors[2])
-plot(r_rs, eta_short_swift, '-', lw=1.4, color=colors[3])
-plot(r_rs, eta_short_swift2, ':', lw=1.4, color=colors[3])
-plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5)
-plot([1., 1.], [-1e5, 1e5], 'k-.', alpha=0.5, lw=0.5)
-plot([-1, -1], [-1, -1], 'k-', lw=1.2, label="${\\textrm{Exact}~e^x}$")
-plot([-1, -1], [-1, -1], 'k:', lw=1.2, label="${6^\\textrm{th}~\\textrm{order~series}~e^x}$")
+plot(r_rs, np.ones(np.size(r)), "--", lw=1.4, color=colors[0])
+plot(r_rs, eta_short_gadget2, "-", lw=1.4, color=colors[2])
+plot(r_rs, eta_short_swift, "-", lw=1.4, color=colors[3])
+plot(r_rs, eta_short_swift2, ":", lw=1.4, color=colors[3])
+plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5)
+plot([1.0, 1.0], [-1e5, 1e5], "k-.", alpha=0.5, lw=0.5)
+plot([-1, -1], [-1, -1], "k-", lw=1.2, label="${\\textrm{Exact}~e^x}$")
+plot(
+    [-1, -1],
+    [-1, -1],
+    "k:",
+    lw=1.2,
+    label="${6^\\textrm{th}~\\textrm{order~series}~e^x}$",
+)
 
 yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"])
-xlim(1.1*r_min/r_s, 0.9*r_max/r_s)
+xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s)
 ylim(3e-3, 1.5)
 ylabel("$|\\mathbf{f}_s(r)|\\times r^2$", labelpad=-2)
 
-legend(loc="center left", frameon=False, handletextpad=0.3, handlelength=1.6, fontsize=7)
+legend(
+    loc="center left", frameon=False, handletextpad=0.3, handlelength=1.6, fontsize=7
+)
 
 # 1 - Correction
 subplot(313, xscale="log", yscale="log")
-plot(r_rs, 1. - eta_short_gadget2, '-', lw=1.4, color=colors[2])
-plot(r_rs, 1. - eta_short_swift, '-', lw=1.4, color=colors[3])
-plot(r_rs, 1. - eta_short_swift2, ':', lw=1.4, color=colors[3])
+plot(r_rs, 1.0 - eta_short_gadget2, "-", lw=1.4, color=colors[2])
+plot(r_rs, 1.0 - eta_short_swift, "-", lw=1.4, color=colors[3])
+plot(r_rs, 1.0 - eta_short_swift2, ":", lw=1.4, color=colors[3])
 
-plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5)
-plot(r_rs, np.ones(np.size(r)), 'k-.', alpha=0.5, lw=0.5)
-plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5)
+plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5)
+plot(r_rs, np.ones(np.size(r)), "k-.", alpha=0.5, lw=0.5)
+plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5)
 
-xlim(1.1*r_min/r_s, 0.9*r_max/r_s)
+xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s)
 ylim(3e-3, 1.5)
 ylabel("$1 - |\\mathbf{f}_s(r)|\\times r^2$", labelpad=-3)
 yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"])
@@ -268,41 +311,71 @@ figure()
 subplot(311, xscale="log", yscale="log")
 
 # Potential
-plot(k_rs, phit_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0])
-plot(k_rs, phit_long_gadget2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2])
-plot(k_rs, phit_long_swift, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
-plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5)
+plot(k_rs, phit_newton, "--", lw=1.4, label="${\\rm Newtonian}$", color=colors[0])
+plot(k_rs, phit_long_gadget2, "-", lw=1.4, label="${\\rm Gadget}$", color=colors[2])
+plot(k_rs, phit_long_swift, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
+plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5)
 
 legend(loc="lower left", frameon=True, handletextpad=0.3, handlelength=1.6, fontsize=8)
 
-xlim(1.1*r_min/ r_s, 0.9*r_max / r_s)
-ylim(1.1/r_max**2, 0.9/r_min**2)
+xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s)
+ylim(1.1 / r_max ** 2, 0.9 / r_min ** 2)
 ylabel("$\\tilde{\\varphi_l}(k)$", labelpad=-3)
 yticks([1e-4, 1e-2, 1e0, 1e2], ["$10^{-4}$", "$10^{-2}$", "$10^{0}$", "$10^{2}$"])
 
 subplot(312, xscale="log", yscale="log")
 
 # Potential normalized
-plot(k_rs, phit_newton * k**2, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0])
-plot(k_rs, phit_long_gadget2 * k**2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2])
-plot(k_rs, phit_long_swift * k**2, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
-plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5)
-plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5)
-
-xlim(1.1*r_min/ r_s, 0.9*r_max / r_s)
+plot(
+    k_rs,
+    phit_newton * k ** 2,
+    "--",
+    lw=1.4,
+    label="${\\rm Newtonian}$",
+    color=colors[0],
+)
+plot(
+    k_rs,
+    phit_long_gadget2 * k ** 2,
+    "-",
+    lw=1.4,
+    label="${\\rm Gadget}$",
+    color=colors[2],
+)
+plot(
+    k_rs, phit_long_swift * k ** 2, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3]
+)
+plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5)
+plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5)
+
+xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s)
 ylim(3e-3, 1.5)
 ylabel("$k^2 \\times \\tilde{\\varphi_l}(k)$", labelpad=-3)
 yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"])
 
 subplot(313, xscale="log", yscale="log")
 
-plot(k_rs, 1. - phit_long_gadget2 * k**2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2])
-plot(k_rs, 1. - phit_long_swift * k**2, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
-plot([1., 1.], [1e-5, 1e5], 'k-', alpha=0.5, lw=0.5)
-plot(r_rs, np.ones(np.size(r)), 'k-.', alpha=0.5, lw=0.5)
-plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5)
-
-xlim(1.1*r_min/ r_s, 0.9*r_max / r_s)
+plot(
+    k_rs,
+    1.0 - phit_long_gadget2 * k ** 2,
+    "-",
+    lw=1.4,
+    label="${\\rm Gadget}$",
+    color=colors[2],
+)
+plot(
+    k_rs,
+    1.0 - phit_long_swift * k ** 2,
+    "-",
+    lw=1.4,
+    label="${\\rm SWIFT}$",
+    color=colors[3],
+)
+plot([1.0, 1.0], [1e-5, 1e5], "k-", alpha=0.5, lw=0.5)
+plot(r_rs, np.ones(np.size(r)), "k-.", alpha=0.5, lw=0.5)
+plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5)
+
+xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s)
 ylim(3e-3, 1.5)
 ylabel("$1 - k^2 \\times \\tilde{\\varphi_l}(k)$", labelpad=-3)
 yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"])
diff --git a/theory/Multipoles/plot_potential.py b/theory/Multipoles/plot_potential.py
index 56e8dc37be581e02a59db51c2579ea80c6109987..27753ffc7946d27f4d7833205a46699846154b0a 100644
--- a/theory/Multipoles/plot_potential.py
+++ b/theory/Multipoles/plot_potential.py
@@ -1,22 +1,23 @@
 ###############################################################################
- # This file is part of SWIFT.
- # Copyright (c) 2016  Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- # 
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU Lesser General Public License as published
- # by the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- # 
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- # GNU General Public License for more details.
- # 
- # You should have received a copy of the GNU Lesser General Public License
- # along with this program.  If not, see <http://www.gnu.org/licenses/>.
- # 
- ##############################################################################
+# This file is part of SWIFT.
+# Copyright (c) 2016  Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+##############################################################################
 import matplotlib
+
 matplotlib.use("Agg")
 from pylab import *
 from scipy import integrate
@@ -26,30 +27,31 @@ from matplotlib.font_manager import FontProperties
 import numpy
 import math
 
-params = {'axes.labelsize': 9,
-'axes.titlesize': 10,
-'font.size': 10,
-'legend.fontsize': 10,
-'xtick.labelsize': 8,
-'ytick.labelsize': 8,
-'text.usetex': True,
-'figure.figsize' : (3.15,3.15),
-'figure.subplot.left'    : 0.14,
-'figure.subplot.right'   : 0.99  ,
-'figure.subplot.bottom'  : 0.1  ,
-'figure.subplot.top'     : 0.99  ,
-'figure.subplot.wspace'  : 0.  ,
-'figure.subplot.hspace'  : 0.  ,
-'lines.markersize' : 6,
-'lines.linewidth' : 3.,
-'text.latex.unicode': True
+params = {
+    "axes.labelsize": 9,
+    "axes.titlesize": 10,
+    "font.size": 10,
+    "legend.fontsize": 10,
+    "xtick.labelsize": 8,
+    "ytick.labelsize": 8,
+    "text.usetex": True,
+    "figure.figsize": (3.15, 3.15),
+    "figure.subplot.left": 0.14,
+    "figure.subplot.right": 0.99,
+    "figure.subplot.bottom": 0.1,
+    "figure.subplot.top": 0.99,
+    "figure.subplot.wspace": 0.0,
+    "figure.subplot.hspace": 0.0,
+    "lines.markersize": 6,
+    "lines.linewidth": 3.0,
+    "text.latex.unicode": True,
 }
 rcParams.update(params)
-rc('font',**{'family':'sans-serif','sans-serif':['Times']})
+rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]})
 
 # Parameters
-epsilon = 2.
-r_min = 0.
+epsilon = 2.0
+r_min = 0.0
 r_max = 4
 r_max_plot = 2.6
 
@@ -59,9 +61,9 @@ r[0] += 1e-9
 u = r / epsilon
 
 # Newtonian solution
-phi_newton = 1. / r
-F_newton = 1. / r**2
-W_newton = 0. * r
+phi_newton = 1.0 / r
+F_newton = 1.0 / r ** 2
+W_newton = 0.0 * r
 
 # Softened potential
 phi = np.zeros(np.size(r))
@@ -69,98 +71,199 @@ W = np.zeros(np.size(r))
 F = np.zeros(np.size(r))
 for i in range(np.size(r)):
     if r[i] > epsilon:
-        phi[i] = 1. / r[i]
-        W[i] = 0.
-        F[i] = 1. / r[i]**2
+        phi[i] = 1.0 / r[i]
+        W[i] = 0.0
+        F[i] = 1.0 / r[i] ** 2
     else:
-        phi[i] = (-1./epsilon) * (3.*u[i]**7 - 15.*u[i]**6 + 28.*u[i]**5 - 21.*u[i]**4 + 7.*u[i]**2 - 3.)
-        W[i] = (21. / (2.*math.pi)) * (4.*u[i]**5 - 15.*u[i]**4 + 20.*u[i]**3 - 10.*u[i]**2 + 1.) / epsilon**3
-        F[i] = (1./epsilon**2) * (21.*u[i]**6 - 90*u[i]**5 + 140.*u[i]**4 - 84.*u[i]**3 + 14*u[i])
+        phi[i] = (-1.0 / epsilon) * (
+            3.0 * u[i] ** 7
+            - 15.0 * u[i] ** 6
+            + 28.0 * u[i] ** 5
+            - 21.0 * u[i] ** 4
+            + 7.0 * u[i] ** 2
+            - 3.0
+        )
+        W[i] = (
+            (21.0 / (2.0 * math.pi))
+            * (
+                4.0 * u[i] ** 5
+                - 15.0 * u[i] ** 4
+                + 20.0 * u[i] ** 3
+                - 10.0 * u[i] ** 2
+                + 1.0
+            )
+            / epsilon ** 3
+        )
+        F[i] = (1.0 / epsilon ** 2) * (
+            21.0 * u[i] ** 6
+            - 90 * u[i] ** 5
+            + 140.0 * u[i] ** 4
+            - 84.0 * u[i] ** 3
+            + 14 * u[i]
+        )
 
 plummer_equivalent_factor = phi[0] * epsilon
 
-print "Plummer-equivalent factor:", plummer_equivalent_factor
-        
+print("Plummer-equivalent factor:", plummer_equivalent_factor)
+
 epsilon_plummer = epsilon / plummer_equivalent_factor
-        
+
 # Plummer potential
-phi_plummer = (1. / epsilon_plummer) * (1 + (r / epsilon_plummer)**2)**(-1./2.)
-F_plummer = (1. / epsilon_plummer**3) * r / (1 + (r / epsilon_plummer )**2)**(3./2.)
+phi_plummer = (1.0 / epsilon_plummer) * (1 + (r / epsilon_plummer) ** 2) ** (-1.0 / 2.0)
+F_plummer = (
+    (1.0 / epsilon_plummer ** 3) * r / (1 + (r / epsilon_plummer) ** 2) ** (3.0 / 2.0)
+)
+
+
 def eta_plummer(r):
-    return (3. / (4.*math.pi)) * 1. / (1 + r**2)**(5./2.)
-W_plummer = (1. / epsilon_plummer**3) * eta_plummer(r / epsilon_plummer)
+    return (3.0 / (4.0 * math.pi)) * 1.0 / (1 + r ** 2) ** (5.0 / 2.0)
+
+
+W_plummer = (1.0 / epsilon_plummer ** 3) * eta_plummer(r / epsilon_plummer)
 
 
 # Gadget-2 potential
-epsilon_gadget = epsilon #/ plummer_equivalent_factor * 2.8
+epsilon_gadget = epsilon  # / plummer_equivalent_factor * 2.8
 phi_gadget2 = np.zeros(np.size(r))
 W_gadget2 = np.zeros(np.size(r))
 F_gadget2 = np.zeros(np.size(r))
 for i in range(np.size(r)):
     if r[i] > epsilon_gadget:
-        phi_gadget2[i] = 1. / r[i]
-        W_gadget2[i] = 0.
-        F_gadget2[i] = 1. / r[i]**2
-    elif r[i] > epsilon_gadget / 2.:
-        phi_gadget2[i] = -((32/3.)*u[i]**2 - 16.*u[i]**3 + (96./10.)*u[i]**4 - (64./30.)*u[i]**5 - (16./5.) + 1./(15.*u[i])  )/ (epsilon_gadget)
-        W_gadget2[i] = (8. / math.pi) * (2. * (1- u[i])**3) / epsilon_gadget**3
-        F_gadget2[i] = u[i] * (21.333333 - 48*u[i] + 38.4*u[i]**2 - 10.6666667*u[i]**3 - 0.06666667*u[i]**-3) / epsilon_gadget**2   
+        phi_gadget2[i] = 1.0 / r[i]
+        W_gadget2[i] = 0.0
+        F_gadget2[i] = 1.0 / r[i] ** 2
+    elif r[i] > epsilon_gadget / 2.0:
+        phi_gadget2[i] = -(
+            (32 / 3.0) * u[i] ** 2
+            - 16.0 * u[i] ** 3
+            + (96.0 / 10.0) * u[i] ** 4
+            - (64.0 / 30.0) * u[i] ** 5
+            - (16.0 / 5.0)
+            + 1.0 / (15.0 * u[i])
+        ) / (epsilon_gadget)
+        W_gadget2[i] = (8.0 / math.pi) * (2.0 * (1 - u[i]) ** 3) / epsilon_gadget ** 3
+        F_gadget2[i] = (
+            u[i]
+            * (
+                21.333333
+                - 48 * u[i]
+                + 38.4 * u[i] ** 2
+                - 10.6666667 * u[i] ** 3
+                - 0.06666667 * u[i] ** -3
+            )
+            / epsilon_gadget ** 2
+        )
     else:
-        phi_gadget2[i] = -((16./3.)*u[i]**2 - (96./10.)*u[i]**4 + (64./10.)*u[i]**5 - (14./5.)  ) / (epsilon_gadget)
-        W_gadget2[i] = (8. / math.pi) * (1. - 6.*u[i]**2 + 6.*u[i]**3) / epsilon_gadget**3
-        F_gadget2[i] = u[i] * (10.666667 + u[i]**2 * (32. * u[i] - 38.4)) / epsilon_gadget**2
+        phi_gadget2[i] = -(
+            (16.0 / 3.0) * u[i] ** 2
+            - (96.0 / 10.0) * u[i] ** 4
+            + (64.0 / 10.0) * u[i] ** 5
+            - (14.0 / 5.0)
+        ) / (epsilon_gadget)
+        W_gadget2[i] = (
+            (8.0 / math.pi)
+            * (1.0 - 6.0 * u[i] ** 2 + 6.0 * u[i] ** 3)
+            / epsilon_gadget ** 3
+        )
+        F_gadget2[i] = (
+            u[i] * (10.666667 + u[i] ** 2 * (32.0 * u[i] - 38.4)) / epsilon_gadget ** 2
+        )
 
 figure()
-colors=['#4477AA', '#CC6677', '#DDCC77', '#117733']
+colors = ["#4477AA", "#CC6677", "#DDCC77", "#117733"]
 
 # Density
 subplot(311)
-plot(r, W_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0])
-plot(r, W_plummer, ':', lw=1.4, label="${\\rm Plummer}$", color=colors[1])
-plot(r, W_gadget2, '-', lw=1.4, label="${\\rm Spline}$", color=colors[2])
-plot(r, W, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
-plot([epsilon, epsilon], [0, 10], 'k-', alpha=0.5, lw=0.5)
-plot([epsilon/plummer_equivalent_factor, epsilon/plummer_equivalent_factor], [0, 10], 'k-', alpha=0.5, lw=0.5)
+plot(r, W_newton - 1, "--", lw=1.4, label="${\\rm Newtonian}$", color=colors[0])
+plot(r, W_plummer, ":", lw=1.4, label="${\\rm Plummer}$", color=colors[1])
+plot(r, W_gadget2, "-.", lw=1.4, label="${\\rm Cubic~spline}$", color=colors[2])
+plot(r, W, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
+# plot([epsilon, epsilon], [0, 10], 'k--', alpha=0.5, lw=0.5)
+plot(
+    [epsilon / plummer_equivalent_factor, epsilon / plummer_equivalent_factor],
+    [0, 10],
+    "k--",
+    alpha=0.5,
+    lw=0.5,
+)
 
-legend(loc="upper right", frameon=True, handletextpad=0.3, handlelength=1.6, fontsize=8, framealpha=1.)
+legend(
+    loc="upper right",
+    frameon=True,
+    handletextpad=0.3,
+    handlelength=1.6,
+    fontsize=8,
+    framealpha=1.0,
+)
 
-xlim(0,r_max_plot)
-xticks([0., 0.5, 1., 1.5, 2., 2.5], ["", "", "", "", "", ""])
+xlim(0, r_max_plot)
+xticks([0.0, 0.5, 1.0, 1.5, 2.0, 2.5], ["", "", "", "", "", ""])
 
-ylim(0., 0.84)
+ylim(0.0, 0.84)
 yticks([0, 0.2, 0.4, 0.6, 0.8], ["$0$", "$0.2$", "$0.4$", "$0.6$", "$0.8$"])
 ylabel("$\\rho(r)$", labelpad=2)
 
 # Potential
 subplot(312)
-plot(r, phi_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0])
-plot(r, phi_plummer, ':', lw=1.4, label="${\\rm Plummer}$", color=colors[1])
-plot(r, phi_gadget2, '-', lw=1.4, label="${\\rm Spline}$", color=colors[2])
-plot(r, phi, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
-plot([epsilon, epsilon], [-10, 10], 'k-', alpha=0.5, lw=0.5)
-plot([epsilon/plummer_equivalent_factor, epsilon/plummer_equivalent_factor], [0, 10], 'k-', alpha=0.5, lw=0.5)
+plot(r, phi_newton, "--", lw=1.4, label="${\\rm Newtonian}$", color=colors[0])
+plot(r, phi_plummer, ":", lw=1.4, label="${\\rm Plummer}$", color=colors[1])
+plot(r, phi_gadget2, "-.", lw=1.4, label="${\\rm Spline}$", color=colors[2])
+plot(r, phi, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3])
+# plot([epsilon, epsilon], [-10, 10], 'k--', alpha=0.5, lw=0.5)
+plot(
+    [epsilon / plummer_equivalent_factor, epsilon / plummer_equivalent_factor],
+    [0, 10],
+    "k--",
+    alpha=0.5,
+    lw=0.5,
+)
 
 ylim(0, 2.3)
 ylabel("$\\varphi(r)$", labelpad=1)
-#yticks([0., 0.5, 1., 1.5, 2., 2.5], ["$%.1f$"%(0.*epsilon), "$%.1f$"%(0.5*epsilon), "$%.1f$"%(1.*epsilon), "$%.1f$"%(1.5*epsilon), "$%.1f$"%(2.*epsilon)])
+# yticks([0., 0.5, 1., 1.5, 2., 2.5], ["$%.1f$"%(0.*epsilon), "$%.1f$"%(0.5*epsilon), "$%.1f$"%(1.*epsilon), "$%.1f$"%(1.5*epsilon), "$%.1f$"%(2.*epsilon)])
 
-xlim(0,r_max_plot)
-xticks([0., 0.5, 1., 1.5, 2., 2.5], ["", "", "", "", "", ""])
+xlim(0, r_max_plot)
+xticks([0.0, 0.5, 1.0, 1.5, 2.0, 2.5], ["", "", "", "", "", ""])
 
 # Force
 subplot(313)
-plot(r, F_newton, '--', lw=1.4, color=colors[0])
-plot(r, F_plummer, ':', lw=1.4, color=colors[1])
-plot(r, F_gadget2, '-', lw=1.4, color=colors[2])
-plot(r, F, '-', lw=1.4, color=colors[3])
-plot([epsilon, epsilon], [0, 10], 'k-', alpha=0.5, lw=0.5)
-plot([epsilon/plummer_equivalent_factor, epsilon/plummer_equivalent_factor], [0, 10], 'k-', alpha=0.5, lw=0.5)
-text(epsilon+0.03, 0.05, "$\\epsilon$", color='k', alpha=0.5, rotation=90, va="bottom", ha="left", fontsize=8)
-text(epsilon/plummer_equivalent_factor+0.03, 0.05, "$\\epsilon_{\\rm Plummer}$", color='k', alpha=0.5, rotation=90, va="bottom", ha="left", fontsize=8) 
-
-xlim(0,r_max_plot)
-xticks([0., 0.5, 1., 1.5, 2., 2.5], ["$%.1f$"%(0./epsilon), "", "$%.1f$"%(1./epsilon), "", "$%.1f$"%(2./epsilon)])
-xlabel("$r/H$", labelpad=-2.)
+plot(r, F_newton, "--", lw=1.4, color=colors[0])
+plot(r, F_plummer, ":", lw=1.4, color=colors[1])
+plot(r, F_gadget2, "-.", lw=1.4, color=colors[2])
+plot(r, F, "-", lw=1.4, color=colors[3])
+# plot([epsilon, epsilon], [0, 10], 'k--', alpha=0.5, lw=0.5)
+plot(
+    [epsilon / plummer_equivalent_factor, epsilon / plummer_equivalent_factor],
+    [0, 10],
+    "k--",
+    alpha=0.5,
+    lw=0.5,
+)
+# text(epsilon+0.03, 0.05, "$\\epsilon$", color='k', alpha=0.5, rotation=90, va="bottom", ha="left", fontsize=8)
+text(
+    epsilon / plummer_equivalent_factor + 0.03,
+    0.05,
+    "$\\epsilon_{\\rm Plummer}$",
+    color="k",
+    alpha=0.5,
+    rotation=90,
+    va="bottom",
+    ha="left",
+    fontsize=8,
+)
+
+xlim(0, r_max_plot)
+xticks(
+    [0.0, 0.5, 1.0, 1.5, 2.0, 2.5],
+    [
+        "$%.1f$" % (0.0 / epsilon),
+        "",
+        "$%.1f$" % (1.0 / epsilon),
+        "",
+        "$%.1f$" % (2.0 / epsilon),
+    ],
+)
+xlabel("$r/H$", labelpad=-2.0)
 
 ylim(0, 0.95)
 ylabel("$|\\overrightarrow{\\nabla}\\varphi(r)|$", labelpad=0)
diff --git a/theory/Multipoles/potential_derivatives.tex b/theory/Multipoles/potential_derivatives.tex
index 14ddbb9792b72e3e815f8362858785f6e889192c..763e03b5f00dc818d0fe93a92214da2c2d536d40 100644
--- a/theory/Multipoles/potential_derivatives.tex
+++ b/theory/Multipoles/potential_derivatives.tex
@@ -74,11 +74,235 @@ truncated an softened gravity field $\varphi (\mathbf{r}, r_s, H)
 %   \chi^{(5)}(r, r_s) &= \frac{32}{r_s^5} \left(240\alpha(x)^6 - 720\alpha(x)^5 + 780\alpha(x)^4 - 360\alpha(x)^3 + 62\alpha(x)^2 - 2\alpha(x) \right) \nonumber
 % \end{align}
 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{align}
+  \mathsf{\tilde{D}}_{1}(r, r_s, H) = % D_tilde_tilde_1 = D_tilde_1 
+  \left\lbrace\begin{array}{rcl}
+  f(u)\times  H^{-1} & \mbox{if} & u < 1,\\
+  %r^{-1} & \mbox{if} & u \geq 1,
+  \chi \times r^{-1} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\
+  r^{-1} & \mbox{if} & u \geq 1~\mbox{and not periodic}. 
+  \end{array}
+  \right.\nonumber
+\end{align}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{align}
+  \mathsf{\tilde{D}}_{2}(r, r_s, H) = % D_tilde_tilde_3 = D_tilde_3 * r
+  \left\lbrace\begin{array}{rcl}
+  f'(u)\times  H^{-2}& \mbox{if} & u < 1,\\
+  %-1 \times r^{-3} & \mbox{if} & u \geq 1,
+  \left(r\chi' - \chi\right) \times r^{-2} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\
+  -1 \times r^{-2} & \mbox{if} & u \geq 1~\mbox{and not periodic}. 
+  \end{array}
+  \right.\nonumber
+\end{align}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{align} 
+  \mathsf{\tilde{D}}_{3}(r, r_s, H) =  % D_tilde_tilde_5 = D_tilde_5 * r^2
+  \left\lbrace\begin{array}{rcl}
+  \left(f''(u) - u^{-1}f'(u)\right)\times  H^{-3}& \mbox{if} & u < 1,\\
+  %3\times r^{-5} & \mbox{if} & u \geq 1,
+  \left(r^2\chi'' - 3r\chi' + 3\chi \right)\times r^{-3} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\
+  3 \times  r^{-3} & \mbox{if} & u \geq 1~\mbox{and not periodic}. 
+  \end{array}
+  \right.\nonumber
+\end{align}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{align}
+  \mathsf{\tilde{D}}_{4}(r, r_s, H) = % D_tilde_tilde_7 = D_tilde_7 * r^3
+  \left\lbrace\begin{array}{rcl}
+  \left(f^{(3)}(u)-3u^{-1}f''(u)+3u^{-2}f'(u)\right)\times  H^{-4} & \mbox{if} & u < 1,\\
+  %-15\times r^{-7} & \mbox{if} & u \geq 1,
+  \left(r^3\chi^{(3)} - 6r^2\chi''+15r\chi'-15\chi\right) \times r^{-4} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\
+  -15 \times r^{-4} & \mbox{if} & u \geq 1~\mbox{and not periodic}. 
+  \end{array}
+  \right.\nonumber
+\end{align}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{align}
+  \mathsf{\tilde{D}}_{5}(r, r_s, H) = % D_tilde_tilde_9 = D_tilde_9 * r^4
+  \left\lbrace\begin{array}{rcl}
+  \left(f^{(4)}(u)-6u^{-1}f^{(3)}(u)+15u^{-2}f''(u)-15u^{-3}f'(u)\right)\times  H^{-5}& \mbox{if} & u < 1,\\
+  %105\times r^{-9} & \mbox{if} & u \geq 1.
+  \left(r^4\chi^{(4)} - 10r^3\chi^{(3)} + 45r^2\chi'' - 105r\chi' + 105\chi \right) \times r^{-5} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\
+  105 \times r^{-5} & \mbox{if} & u \geq 1~\mbox{and not periodic}.
+  \end{array}
+  \right.\nonumber
+\end{align}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{align}
+  \mathsf{\tilde{D}}_{6}(r, r_s, H) = % D_tilde_tilde_11 = D_tilde_11 * r^4
+  \left\lbrace\begin{array}{rcl}
+  \left(f^{(5)}(u) -10u^{-1}f^{(4)}(u) +45u^{-2}f^{(3)} -105u^{-3}f''(u) + 105u^{-4}f'(u)\right)\times  H^{-6}& \mbox{if} & u < 1,\\
+  %-945\times r^{-11} & \mbox{if} & u \geq 1.
+  \left(r^5\chi^{(5)} - 15r^4\chi^{(4)} + 105r^3\chi^{(3)} - 420r^2\chi'' + 945r \chi' - 945\chi\right) \times r^{-6} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\
+  -945\times r^{-6} & \mbox{if} & u \geq 1~\mbox{and not periodic}. 
+  \end{array}
+  \right.\nonumber
+\end{align}
+In the case $u<1$ and using $f(u)$ given by \ref{eq:fmm:potential}, we can simplify the expressions to get:
+\begin{align}
+  \mathsf{\tilde{D}}_{1} &= (-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3) \times H^{-1}, \nonumber \\
+  \mathsf{\tilde{D}}_{2} &= (-21u^6 + 90u^5 - 140u^4 + 84u^3 - 14u) \times H^{-2}, \nonumber \\
+  \mathsf{\tilde{D}}_{3} &= (-105u^5 + 360u^4 - 420u^3 + 168u^2) \times H^{-3}, \nonumber \\
+  \mathsf{\tilde{D}}_{4} &= (-315u^4 + 720u^3 - 420u^2) \times H^{-4}, \nonumber \\
+  \mathsf{\tilde{D}}_{5} &= (-315u^3 + 420u) \times H^{-5}, \nonumber \\
+  \mathsf{\tilde{D}}_{6} &= (315u^2 - 1260) \times H^{-6}. \nonumber 
+\end{align}
+These expressions only use low powers of $u$ and, in particular, no terms
+involving $1/u$ as would be the case when using a cubic spline kernel for
+$f(u)$. This makes this choice of softening kernel much faster to evaluate than
+ones using divisions. Similarly, the expressions in the periodic case for $u>1$
+can be simplified to:
+\begin{align}
+  \mathsf{\tilde{D}}_{1} &= \chi r^{-1}, \nonumber \\
+  \mathsf{\tilde{D}}_{2} &= -\chi r^{-2} + \chi' r^{-1}, \nonumber \\
+  \mathsf{\tilde{D}}_{3} &= 3\chi r^{-3} - 3\chi' r^{-2} + \chi'' r^{-1}, \nonumber \\
+  \mathsf{\tilde{D}}_{4} &= -15\chi r^{-4} + 15\chi' r^{-3} - 6\chi''r^{-2} + \chi^{(3)} r^{-1}, \nonumber \\
+  \mathsf{\tilde{D}}_{5} &= 105\chi r^{-5} -105\chi' r^{-4} + 45\chi''r^{-3} - 10\chi^{(3)} r^{-2} + \chi^{(4)} r^{-1}\nonumber, \\
+  \mathsf{\tilde{D}}_{6} &= -945\chi r^{-6} + 945 \chi' r^{-5} -420 \chi'' r^{-4} + 105 \chi^{(3)} r^{-3} - 15\chi^{(4)} r^{-2} +  \chi^{(5)} r^{-1}. \nonumber
+\end{align}
+We can now write out all the derivatives used in the M2L and
+M2P kernels:
+\begin{align}
+  \mathsf{D}_{000}(\mathbf{r}) = \varphi (\mathbf{r}, r_s, H) =
+    \mathsf{\tilde{D}}_{1} \nonumber
+\end{align}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\noindent\rule{6cm}{0.4pt}
+\begin{align}
+  \mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \varphi (\mathbf{r}, r_s, H) =
+    \left(\frac{r_x}{r}\right) \mathsf{\tilde{D}}_{2} \nonumber
+\end{align}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\noindent\rule{6cm}{0.4pt}
+\begin{align}
+\mathsf{D}_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \varphi (\mathbf{r}, r_s, H) = 
+\left(\frac{r_x}{r}\right)^2 \mathsf{\tilde{D}}_{3} + \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{2}\nonumber
+\end{align}
+
+\begin{align}
+\mathsf{D}_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \varphi (\mathbf{r}, r_s, H) =
+\left(\frac{r_x}{r}\right) \left(\frac{r_y}{r}\right)  \mathsf{\tilde{D}}_{3} \nonumber
+\end{align}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\noindent\rule{6cm}{0.4pt}
+\begin{align}
+\mathsf{D}_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r}, r_s, H) = 
+  \left(\frac{r_x}{r}\right)^3 \mathsf{\tilde{D}}_{4}
+  + 3 \left(\frac{r_x}{r}\right) \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{3} \nonumber
+\end{align}
+
+\begin{align}
+\mathsf{D}_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^2 r_y} \varphi (\mathbf{r}, r_s, H) = 
+  \left(\frac{r_x}{r}\right)^2 \left(\frac{r_y}{r}\right) \mathsf{\tilde{D}}_{4} + \left(\frac{r_y}{r}\right) \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{3} \nonumber
+\end{align}
+
+\begin{align}
+\mathsf{D}_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \varphi (\mathbf{r}, r_s, H) = 
+\left(\frac{r_x}{r}\right)\left(\frac{r_y}{r}\right)\left(\frac{r_z}{r}\right) \mathsf{\tilde{D}}_{4} \nonumber
+\end{align}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\noindent\rule{6cm}{0.4pt}
+\begin{align}
+  \mathsf{D}_{400}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^4}
+  \varphi (\mathbf{r}, r_s, H) =
+  \left(\frac{r_x}{r}\right)^4 \mathsf{\tilde{D}}_{5}+
+  6\left(\frac{r_x}{r}\right)^2 \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{4} +
+  3 \left(\frac{1}{r}\right)^2 \mathsf{\tilde{D}}_{3}
+  \nonumber
+\end{align}
+
+\begin{align}
+  \mathsf{D}_{310}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^3
+    \partial r_y} \varphi (\mathbf{r}, r_s, H) =
+  \left(\frac{r_x}{r}\right)^3 \left(\frac{r_y}{r}\right) \mathsf{\tilde{D}}_{5} +
+  3 \left(\frac{r_x}{r}\right) \left(\frac{r_y}{r}\right) \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{4}
+  \nonumber
+\end{align}
+
+\begin{align}
+  \mathsf{D}_{220}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^2
+    \partial r_y^2} \varphi (\mathbf{r}, r_s, H) =
+    \left(\frac{r_x}{r}\right)^2 \left(\frac{r_y}{r}\right)^2 \mathsf{\tilde{D}}_{5} +
+    \left(\frac{r_x}{r}\right)^2 \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{4} +
+    \left(\frac{r_y}{r}\right)^2 \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{4} +
+    \left(\frac{1}{r}\right)^2 \mathsf{\tilde{D}}_{3}
+  \nonumber
+\end{align}
+
+\begin{align}
+  \mathsf{D}_{211}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^2
+    \partial r_y   \partial r_z} \varphi (\mathbf{r}, r_s, H) =
+    \left(\frac{r_x}{r}\right)^2\left(\frac{r_y}{r}\right)\left(\frac{r_z}{r}\right) \mathsf{\tilde{D}}_{5} +
+    \left(\frac{r_y}{r}\right)\left(\frac{r_z}{r}\right)\left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{4}
+  \nonumber
+\end{align}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\noindent\rule{6cm}{0.4pt}
+\begin{align}
+  \mathsf{D}_{500}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^5}
+  \varphi (\mathbf{r}, r_s, H) =
+  \left(\frac{r_x}{r}\right)^5 \mathsf{\tilde{D}}_{6} +
+  10\left(\frac{r_x}{r}\right)^3\left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5} +
+  15\left(\frac{r_x}{r}\right)\left(\frac{1}{r}\right)^2\mathsf{\tilde{D}}_{4}
+  \nonumber
+\end{align}
+
+\begin{align}
+  \mathsf{D}_{410}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^4
+    \partial r_y} \varphi (\mathbf{r}, r_s, H) =
+  \left(\frac{r_x}{r}\right)^4 \left(\frac{r_y}{r}\right) \mathsf{\tilde{D}}_{6} +
+  6 \left(\frac{r_x}{r}\right)^2 \left(\frac{r_y}{r}\right)\left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{5} + 
+  3 \left(\frac{r_y}{r}\right) \left(\frac{1}{r}\right)^2\mathsf{\tilde{D}}_{4}
+  \nonumber
+\end{align}
+
+\begin{align}
+  \mathsf{D}_{320}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^3
+    \partial r_y^2} \varphi (\mathbf{r}, r_s, H) =
+  \left(\frac{r_x}{r}\right)^3 \left(\frac{r_y}{r}\right)^2 \mathsf{\tilde{D}}_{6} +
+  \left(\frac{r_x}{r}\right)^3 \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5} +
+  3 \left(\frac{r_x}{r}\right) \left(\frac{r_y}{r}\right)^2 \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5} + 
+  3 \left(\frac{r_x}{r}\right) \left(\frac{1}{r}\right)^2\mathsf{\tilde{D}}_{4}
+  \nonumber
+\end{align}
+
+\begin{align}
+  \mathsf{D}_{311}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^3
+    \partial r_y \partial r_z} \varphi (\mathbf{r}, r_s, H) =
+  \left(\frac{r_x}{r}\right)^3 \left(\frac{r_y}{r}\right) \left(\frac{r_z}{r}\right) \mathsf{\tilde{D}}_{6} +
+  3 \left(\frac{r_x}{r}\right) \left(\frac{r_y}{r}\right) \left(\frac{r_z}{r}\right) \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5}
+  \nonumber
+\end{align}
+
+\begin{align}
+  \mathsf{D}_{221}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^2
+    \partial r_y^2 \partial r_z} \varphi (\mathbf{r}, r_s, H) =
+  \left(\frac{r_x}{r}\right)^2 \left(\frac{r_y}{r}\right)^2 \left(\frac{r_z}{r}\right) \mathsf{\tilde{D}}_{6} +
+  \left(\frac{r_x}{r}\right)^2 \left(\frac{r_z}{r}\right) \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5} +
+  \left(\frac{r_y}{r}\right)^2 \left(\frac{r_z}{r}\right) \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5} +
+  \left(\frac{r_z}{r}\right) \left(\frac{1}{r}\right)^2\mathsf{\tilde{D}}_{4}
+  \nonumber
+\end{align}
+
+\begin{comment}
+\noindent\rule{12cm}{1pt}\\
+Old version \\
+\noindent\rule{12cm}{1pt}
+
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{align}
   \mathsf{\tilde{D}}_{1}(r, r_s, H) =
   \left\lbrace\begin{array}{rcl}
-  \left(-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3\right)\times  H^{-1} & \mbox{if} & u < 1,\\
+  -\left(3u^7 - 15u^6 + 28u^5 - 21u^4 + 7u^2 - 3\right)\times  H^{-1} & \mbox{if} & u < 1,\\
   %r^{-1} & \mbox{if} & u \geq 1,
   \chi(r, r_s) \times r^{-1} & \mbox{if} & u \geq 1,
   \end{array}
@@ -98,7 +322,7 @@ truncated an softened gravity field $\varphi (\mathbf{r}, r_s, H)
 \begin{align}
   \mathsf{\tilde{D}}_{5}(r, r_s, H) =
   \left\lbrace\begin{array}{rcl}
-  \left(-105u^3 + 360u^2 - 420u + 168\right)\times  H^{-5}& \mbox{if} & u < 1,\\
+  -\left(35u^3 - 120u^2 + 140u - 56\right)\times  H^{-5}& \mbox{if} & u < 1,\\
   %3\times r^{-5} & \mbox{if} & u \geq 1,
   \left(r^2\chi''(r, r_s) - 3r\chi'(r, r_s) + 3\chi(r, r_s) \right)\times r^{-5} & \mbox{if} & u \geq 1, 
   \end{array}
@@ -108,7 +332,7 @@ truncated an softened gravity field $\varphi (\mathbf{r}, r_s, H)
 \begin{align}
   \mathsf{\tilde{D}}_{7}(r, r_s, H) =
   \left\lbrace\begin{array}{rcl}
-  -\left(315u - 720 + 420u^{-1}\right)\times  H^{-7} & \mbox{if} & u < 1,\\
+  -\left(21u - 48 + 28u^{-1}\right)\times  H^{-7} & \mbox{if} & u < 1,\\
   %-15\times r^{-7} & \mbox{if} & u \geq 1,
   \left(r^3\chi^{(3)}(r, r_s) - 6r^2\chi''(r, r_s)+15r\chi'(r, r_s)-15\chi(r, r_s)\right) \times r^{-7} & \mbox{if} & u \geq 1, 
   \end{array}
@@ -118,7 +342,7 @@ truncated an softened gravity field $\varphi (\mathbf{r}, r_s, H)
 \begin{align}
   \mathsf{\tilde{D}}_{9}(r, r_s, H) =
   \left\lbrace\begin{array}{rcl}
-  \left(-315u^{-1} + 420u^{-3}\right)\times  H^{-9}& \mbox{if} & u < 1,\\
+  -\left(3u^{-1} - 4u^{-3}\right)\times  H^{-9}& \mbox{if} & u < 1,\\
   %105\times r^{-9} & \mbox{if} & u \geq 1.
   \left(r^4\chi^{(4)}(r, r_s) - 10r^3\chi^{(3)} + 45r^2\chi''(r, r_s) - 105r\chi'(r, r_s) + 105\chi(r, r_s) \right) \times r^{-9} & \mbox{if} & u \geq 1
   \end{array}
@@ -128,7 +352,7 @@ truncated an softened gravity field $\varphi (\mathbf{r}, r_s, H)
 \begin{align}
   \mathsf{\tilde{D}}_{11}(r, r_s, H) =
   \left\lbrace\begin{array}{rcl}
-  -\left(315u^{-3} - 1260u^{-5}\right)\times  H^{-11}& \mbox{if} & u < 1,\\
+  -\left(\frac{1}{3}u^{-3} - \frac{4}{3}u^{-5}\right)\times  H^{-11}& \mbox{if} & u < 1,\\
   %-945\times r^{-11} & \mbox{if} & u \geq 1.
   \left(r^5\chi^{(5)}(r, r_s) - 15r^4\chi^{(4)}(r, r_s) + 105r^3\chi^{(3)}(r, r_s) - 420r^2\chi''(r, r_s) + 945r \chi'(r, r_s) - 945\chi(r, r_s)\right) \times r^{-11} & \mbox{if} & u \geq 1.
   \end{array}
@@ -262,10 +486,11 @@ r_y \mathsf{\tilde{D}}_{5}(r, r_s, H) \nonumber
   r_x^2 r_y^2 r_z \mathsf{\tilde{D}}_{11}(r, r_s, H) +
   r_x^2 r_z \mathsf{\tilde{D}}_{9}(r, r_s, H) +
   r_y^2 r_z \mathsf{\tilde{D}}_{9}(r, r_s, H) +
-  r_z \mathsf{\tilde{D}}_{y}(r, r_s, H)
+  r_z \mathsf{\tilde{D}}_{7}(r, r_s, H)
   \nonumber
 \end{align}
 
+\end{comment}
 
 
 
@@ -285,94 +510,3 @@ r_y \mathsf{\tilde{D}}_{5}(r, r_s, H) \nonumber
 
 
 
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{comment}
-
-\noindent\rule{6cm}{0.4pt}
-
-\begin{align}
-\mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \varphi (\mathbf{r},H) = 
-\left\lbrace\begin{array}{rcl}
--\frac{r_x}{H^3} \left(21u^5 - 90u^4 + 140u^3 - 84u^2 + 14\right) & \mbox{if} & u < 1,\\
--\frac{r_x}{r^3} & \mbox{if} & u \geq 1, 
-\end{array}
-\right.\nonumber
-\end{align}
-
-\noindent\rule{6cm}{0.4pt}
-
-\begin{align}
-\mathsf{D}_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \varphi (\mathbf{r},H) = 
-\left\lbrace\begin{array}{rcl}
-\frac{r_x^2}{H^5}\left(-105u^3+360u^2-420u+168\right) -
-\frac{1}{H^3} \left(21u^5 - 90u^4 + 140u^3 - 84u^2 + 14\right) & \mbox{if} & u < 1,\\
-3\frac{r_x^2}{r^5} - \frac{1}{r^3} & \mbox{if} & u \geq 1, 
-\end{array}
-\right.\nonumber
-\end{align}
-
-\begin{align}
-\mathsf{D}_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \varphi (\mathbf{r},H) = 
-\left\lbrace\begin{array}{rcl}
-\frac{r_xr_y}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\
-3\frac{r_xr_y}{r^5} & \mbox{if} & u \geq 1, 
-\end{array}
-\right.\nonumber
-\end{align}
-
-\noindent\rule{6cm}{0.4pt}
-
-\begin{align}
-\mathsf{D}_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r},H) = 
-\left\lbrace\begin{array}{rcl}
--\frac{r_x^3}{H^7} \left(315u - 720 + 420u^{-1}\right) +
-\frac{3r_x}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\
--15\frac{r_x^3}{r^7} + 9 \frac{r_x}{r^5} & \mbox{if} & u \geq 1, 
-\end{array}
-\right.\nonumber
-\end{align}
-
-\begin{align}
-\mathsf{D}_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r},H) = 
-\left\lbrace\begin{array}{rcl}
--\frac{r_x^2r_y}{H^7} \left(315u - 720 + 420u^{-1}\right) +
-\frac{r_y}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\
--15\frac{r_x^2r_y}{r^7} + 3 \frac{r_y}{r^5} & \mbox{if} & u \geq 1, 
-\end{array}
-\right.\nonumber
-\end{align}
-
-
-\begin{align}
-\mathsf{D}_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \varphi (\mathbf{r},H) = 
-\left\lbrace\begin{array}{rcl}
--\frac{r_xr_yr_z}{H^7} \left(315u - 720 + 420u^{-1}\right) & \mbox{if} & u < 1,\\
--15\frac{r_xr_yr_z}{r^7} & \mbox{if} & u \geq 1, 
-\end{array}
-\right.\nonumber
-\end{align}
-
-\noindent\rule{6cm}{0.4pt}
-
-\begin{align}
-  \mathsf{D}_{400}(\mathbf{r}) &=
-  \nonumber
-\end{align}
-
-\begin{align}
-  \mathsf{D}_{310}(\mathbf{r}) &=
-  \nonumber
-\end{align}
-
-\begin{align}
-  \mathsf{D}_{220}(\mathbf{r}) &=
-  \nonumber
-\end{align}
-
-\begin{align}
-  \mathsf{D}_{211}(\mathbf{r}) &=
-  \nonumber
-\end{align}
-
-\end{comment}
diff --git a/theory/Multipoles/potential_softening.tex b/theory/Multipoles/potential_softening.tex
index 4041bdad3cdfd470c90a46739f487caa4787892d..47fe4f392bf9bd11487f270421fba3d0dff69af0 100644
--- a/theory/Multipoles/potential_softening.tex
+++ b/theory/Multipoles/potential_softening.tex
@@ -2,18 +2,17 @@
 \label{ssec:potential_softening}
 
 To avoid artificial two-body relaxation, the Dirac
-$\delta$-distribution of particles is convolved with a softening
-kernel of a given fixed, but time-variable, scale-length
-$\epsilon$. Instead of the commonly used spline kernel of
+$\delta$-distribution corresponding to each particle is convolved with
+a softening kernel of a given fixed, but time-variable, scale-length
+$H$. Instead of the commonly used spline kernel of
 \cite{Monaghan1985} (e.g. in \textsc{Gadget}), we use a C2 kernel
 \citep{Wendland1995} which leads to an expression for the force that
 is cheaper to compute and has a very similar overall shape. The C2
 kernel has the advantage of being branch-free leading to an expression
 which is faster to evaluate using vector units available on modern
 architectures; it also does not require any divisions to evaluate the
-softened forces. We set
-$\tilde\delta(\mathbf{x}) = \rho(|\mathbf{x}|) = W(|\mathbf{x}|,
-3\epsilon_{\rm Plummer})$, with $W(r, H)$ given by
+softened forces. We set $\tilde\delta(\mathbf{r}) = \rho(|\mathbf{r}|)
+= W(|\mathbf{r}|, 3\epsilon_{\rm Plummer})$, with $W(r, H)$ given by
 
 \begin{align}
 W(r,H) &= \frac{21}{2\pi H^3} \times \nonumber \\
@@ -25,34 +24,65 @@ W(r,H) &= \frac{21}{2\pi H^3} \times \nonumber \\
 \end{align}
 and $u = r/H$. The potential $\varphi(r,H)$ corresponding to this density distribution reads
 \begin{align}
-\varphi = 
+\varphi(r,H) = 
 \left\lbrace\begin{array}{rcl}
-\frac{1}{H} (-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3) & \mbox{if} & u < 1,\\
-\frac{1}{r} & \mbox{if} & u \geq 1.
+f(\frac{r}{H}) \times H^{-1} & \mbox{if} & r < H,\\
+r^{-1} & \mbox{if} & r \geq H,
 \end{array}
 \right.
 \label{eq:fmm:potential}
 \end{align}
-
-These choices, lead to a potential at $|\mathbf{x}| = 0$ equal to the
-central potential of a Plummer sphere (i.e.
-$\varphi(0) = 1/\epsilon_{\rm Plummer}$)\footnote{Note the factor $3$
-  in the definition of $\rho(|\mathbf{x}|)$ which differs from the
-  factor $2.8$ used in \textsc{Gadget} as a consequence of the change
-  of kernel shape.}. The softened density profile, its corresponding
-potential and resulting forces are shown on
-Fig. \ref{fig:fmm:softening} (for details of these are obtained see
-section 2 of~\cite{Price2007}). For comparison purposes, we also
-implemented the more traditional spline-kernel softening in \swift.
-
-
+with $f(u) \equiv -3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3$. These
+choices lead to a potential at $|\mathbf{x}| = 0$ equal to the central
+potential of a Plummer sphere (i.e. $\varphi(0) = 1/\epsilon_{\rm
+  Plummer}$)\footnote{Note the factor $3$ in the definition of
+  $\rho(|\mathbf{x}|)$ which differs from the factor $2.8$ used for
+  the cubic spline kernel as a consequence of the change of the functional
+  form of $W$.}. From this expression the softened gravitational force can
+be easily obtained:
+\begin{align}
+\mathbf{\nabla}\varphi(r,H) = \mathbf{r} \cdot
+\left\lbrace\begin{array}{rcl}
+g(\frac{r}{H}) \times H^{-3} & \mbox{if} & r < H,\\
+r^{-3} & \mbox{if} & r \geq H,
+\end{array}
+\right.
+\label{eq:fmm:force}
+\end{align}
+with $g(u) \equiv f'(u)/u = -21u^5+90u^4-140u^3+84u^2-14$. This last
+expression has the advantage of not containing any divisions or
+branching (besides the always necessary check for $r<H$), making it
+faster to evaluate than the softened force derived from the
+\cite{Monaghan1985} spline kernel. Note also, the useful expression
+for the norm of the forces:
+\begin{align}
+|\mathbf{\nabla}\varphi(r,H)| = 
+\left\lbrace\begin{array}{rcl}
+f'(\frac{r}{H}) \times H^{-2} & \mbox{if} & r < H,\\
+r^{-2} & \mbox{if} & r \geq H.
+\end{array}
+\right.
+\label{eq:fmm:force_norm}
+\end{align}
+The softened density profile, its corresponding potential and
+resulting forces are shown on Fig. \ref{fig:fmm:softening} (for more
+details about how these are constructed see section 2
+of~\cite{Price2007}). For comparison purposes, we also implemented the
+more traditional spline-kernel softening in \swift.
 \begin{figure}
 \includegraphics[width=\columnwidth]{potential.pdf}
 \caption{The density (top), potential (middle) and forces (bottom)
   generated py a point mass in our softened gravitational scheme.  A
   Plummer-equivalent sphere is shown for comparison. The spline kernel
-  of \citet{Monaghan1985}, used for instance in \textsc{Gadget}, is
-  shown for comparison but note that it has not been re-scaled to
-  match the Plummer-sphere potential at $r=0$.  }
+  of \citet{Monaghan1985} is also depicted but note that it has not
+  been normalised to match the Plummer-sphere potential at $r=0$ (as
+  is done in simulations) but rather normalised to the Newtonian
+  potential at $r=H$ to better highlight the differences in shapes.}
 \label{fig:fmm:softening}
 \end{figure}
+Users specify the value of the Plummer-equivalent softening
+$\epsilon_{\rm Plummer}$ in the parameter file.
+
+\subsubsection{Interaction of bodies with different softening lengths}
+
+\textcolor{red}{MORE WORDS HERE.}\\
diff --git a/theory/Multipoles/run.sh b/theory/Multipoles/run.sh
index eaaa9bc94d78d7cb3f55d7e669314aae24306d68..e3e6e6738b35af9b6cf2c2fa6a4fb3de8a6b4e34 100755
--- a/theory/Multipoles/run.sh
+++ b/theory/Multipoles/run.sh
@@ -2,17 +2,22 @@
 if [ ! -e potential.pdf ]
 then
     echo "Generating 1st figure..."
-    python plot_potential.py
+    python3 plot_potential.py
 fi
 if [ ! -e potential_short.pdf ]
 then
     echo "Generating 2nd figures..."
-    python plot_mesh.py
+    python3 plot_mesh.py
 fi
 if [ ! -e alpha_powers.pdf ]
 then
     echo "Generating derivative figures..."
-    python plot_derivatives.py
+    python3 plot_derivatives.py
+fi
+if [ ! -e mac_potential.pdf ]
+then
+    echo "Generating derivative figures..."
+    python3 plot_mac_potential.py
 fi
 echo "Generating PDF..."
 pdflatex -jobname=fmm fmm_standalone.tex