diff --git a/.gitignore b/.gitignore index 99aa018c9d8f826076cb26ae7cdf57fc314b8852..f2e5dac6918d93a9dbd2ce2e017d18a58185a872 100644 --- a/.gitignore +++ b/.gitignore @@ -114,8 +114,11 @@ tests/testReading tests/testSingle tests/testTimeIntegration tests/testSPHStep +tests/testExp +tests/testErfc tests/testKernel tests/testKernelGrav +tests/testKernelLongGrav tests/testFFT tests/testInteractions tests/testInteractions.sh @@ -154,6 +157,7 @@ tests/testDump tests/testLogger tests/benchmarkInteractions tests/testGravityDerivatives +tests/testGravitySpeed tests/testPotentialSelf tests/testPotentialPair tests/testEOS @@ -184,6 +188,7 @@ theory/Multipoles/potential.pdf theory/Multipoles/potential_long.pdf theory/Multipoles/potential_short.pdf theory/Multipoles/force_short.pdf +theory/Multipoles/mac_potential.pdf theory/Cosmology/cosmology.pdf theory/Cooling/eagle_cooling.pdf theory/Gizmo/gizmo-implementation-details/gizmo-implementation-details.pdf diff --git a/configure.ac b/configure.ac index e044a46b42e4b54cc39182882527735a558e6fde..46d9fb2a8a09fa3c010899b3e9496095c00a7a9a 100644 --- a/configure.ac +++ b/configure.ac @@ -53,6 +53,7 @@ AM_CONFIG_HEADER(config.h) AX_CHECK_ENABLE_DEBUG AC_PROG_CC AM_PROG_CC_C_O +AC_OPENMP # If debug is selected then we also define SWIFT_DEVELOP_MODE to control # any developer code options. @@ -2319,7 +2320,7 @@ AC_MSG_RESULT([ Compiler : $CC - vendor : $ax_cv_c_compiler_vendor - version : $ax_cv_c_compiler_version - - flags : $CFLAGS + - flags : $CFLAGS $OPENMP_CFLAGS MPI enabled : $enable_mpi HDF5 enabled : $with_hdf5 - parallel : $have_parallel_hdf5 diff --git a/examples/Cooling/CoolingRates/Makefile.am b/examples/Cooling/CoolingRates/Makefile.am index 8bb0afa44436c5059f93b585ab6f8893752ce294..7fa7d5f6cad1f3a8c5512722d5afe3c994e1619f 100644 --- a/examples/Cooling/CoolingRates/Makefile.am +++ b/examples/Cooling/CoolingRates/Makefile.am @@ -15,7 +15,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Add the source directory and the non-standard paths to the included library headers to CFLAGS -AM_CFLAGS = -I$(top_srcdir)/src -I$(top_builddir)/examples $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) +AM_CFLAGS = -I$(top_srcdir)/src -I$(top_builddir)/examples $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(OPENMP_CFLAGS) AM_LDFLAGS = $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS) $(NUMA_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(GSL_LIBS) $(PROFILER_LIBS) diff --git a/examples/Cosmology/ConstantCosmoVolume/constant_volume.yml b/examples/Cosmology/ConstantCosmoVolume/constant_volume.yml index a6ff72555ef68964508493856127d4cc739b7722..84764d333c416d5ef43fbe6896c9b5479c35b805 100644 --- a/examples/Cosmology/ConstantCosmoVolume/constant_volume.yml +++ b/examples/Cosmology/ConstantCosmoVolume/constant_volume.yml @@ -47,8 +47,10 @@ Scheduler: Gravity: mesh_side_length: 32 - eta: 0.025 - theta: 0.3 + eta: 0.025 + MAC: adaptive + theta_cr: 0.5 + epsilon_fmm: 0.0001 comoving_DM_softening: 0.08 # 80 kpc = 1/25 of mean inter-particle separation max_physical_DM_softening: 0.08 # 80 kpc = 1/25 of mean inter-particle separation comoving_baryon_softening: 0.08 # 80 kpc = 1/25 of mean inter-particle separation diff --git a/examples/Cosmology/ZeldovichPancake_3D/zeldovichPancake.yml b/examples/Cosmology/ZeldovichPancake_3D/zeldovichPancake.yml index d43c78972b0bc8d1f250b95190dafef305abca3f..5e904fbe07adeeb3709d1e3a21d23a108470f475 100644 --- a/examples/Cosmology/ZeldovichPancake_3D/zeldovichPancake.yml +++ b/examples/Cosmology/ZeldovichPancake_3D/zeldovichPancake.yml @@ -48,9 +48,10 @@ Scheduler: Gravity: mesh_side_length: 32 - eta: 0.025 - theta: 0.3 - r_cut_max: 5. + eta: 0.025 + MAC: adaptive + theta_cr: 0.5 + epsilon_fmm: 0.0001 comoving_DM_softening: 0.001 max_physical_DM_softening: 0.001 comoving_baryon_softening: 0.001 diff --git a/examples/EAGLE_DMO_low_z/EAGLE_DMO_100/eagle_100.yml b/examples/EAGLE_DMO_low_z/EAGLE_DMO_100/eagle_100.yml index f4c5316aa0faac7de843b21792af6a2b635131d6..ef5c27b8b95578e680bbf5cbed91e77e5c71d83b 100644 --- a/examples/EAGLE_DMO_low_z/EAGLE_DMO_100/eagle_100.yml +++ b/examples/EAGLE_DMO_low_z/EAGLE_DMO_100/eagle_100.yml @@ -41,9 +41,11 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.85 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 512 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 256 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). diff --git a/examples/EAGLE_DMO_low_z/EAGLE_DMO_12/eagle_12.yml b/examples/EAGLE_DMO_low_z/EAGLE_DMO_12/eagle_12.yml index f5a7c57458e1cea04701eb44ac6a0dda41419334..c127384d004985f4954e989db70b3c77ee637b08 100644 --- a/examples/EAGLE_DMO_low_z/EAGLE_DMO_12/eagle_12.yml +++ b/examples/EAGLE_DMO_low_z/EAGLE_DMO_12/eagle_12.yml @@ -41,9 +41,11 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 32 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 32 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). diff --git a/examples/EAGLE_DMO_low_z/EAGLE_DMO_25/eagle_25.yml b/examples/EAGLE_DMO_low_z/EAGLE_DMO_25/eagle_25.yml index 15df02b2a0c4b735d9ce6b1e252a237b67354cb7..75743434bfb879a402b216ca0e7fd52918f28808 100644 --- a/examples/EAGLE_DMO_low_z/EAGLE_DMO_25/eagle_25.yml +++ b/examples/EAGLE_DMO_low_z/EAGLE_DMO_25/eagle_25.yml @@ -41,9 +41,11 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 64 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 64 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). diff --git a/examples/EAGLE_DMO_low_z/EAGLE_DMO_50/eagle_50.yml b/examples/EAGLE_DMO_low_z/EAGLE_DMO_50/eagle_50.yml index b9c7237b819802f7fa12dd975bb53c8f698c20ff..25a9e255598471096b7a3f181958418982a25132 100644 --- a/examples/EAGLE_DMO_low_z/EAGLE_DMO_50/eagle_50.yml +++ b/examples/EAGLE_DMO_low_z/EAGLE_DMO_50/eagle_50.yml @@ -40,9 +40,11 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 128 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 128 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). diff --git a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml index ca539e3dcf262fc025d8e182e45c541c22a8c7d7..0a80d4abfade02a8d01ab46cd343f5de007416a0 100644 --- a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml @@ -37,14 +37,15 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 64 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric # Use the geometric opening angle condition + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 64 comoving_DM_softening: 0.003320 # Comoving softening for DM (3.32 ckpc) max_physical_DM_softening: 0.001300 # Physical softening for DM (1.30 pkpc) comoving_baryon_softening: 0.001790 # Comoving softening for baryons (1.79 ckpc) max_physical_baryon_softening: 0.000700 # Physical softening for baryons (0.70 pkpc) - dithering: 0 # Parameters for the hydrodynamics scheme SPH: diff --git a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml index e598e96436f0fc3391e46e6bd0fcd0226fb2838e..bff0d1caaed588feacaab80ab1ecd84167ebe430 100644 --- a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml @@ -37,14 +37,15 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 128 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric # Use the geometric opening angle condition + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 128 comoving_DM_softening: 0.003320 # Comoving softening for DM (3.32 ckpc) max_physical_DM_softening: 0.001300 # Physical softening for DM (1.30 pkpc) comoving_baryon_softening: 0.001790 # Comoving softening for baryons (1.79 ckpc) max_physical_baryon_softening: 0.000700 # Physical softening for baryons (0.70 pkpc) - dithering: 0 # Parameters for the hydrodynamics scheme SPH: diff --git a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml index 825669a8e2d6b065dc8ae3da5869be577f62bf2c..7a89423aefe6b18a2c18310364fe07a0af8e7e89 100644 --- a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml +++ b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml @@ -37,14 +37,15 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 256 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric # Use the geometric opening angle condition + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 256 comoving_DM_softening: 0.003320 # Comoving softening for DM (3.32 ckpc) max_physical_DM_softening: 0.001300 # Physical softening for DM (1.30 pkpc) comoving_baryon_softening: 0.001790 # Comoving softening for baryons (1.79 ckpc) max_physical_baryon_softening: 0.000700 # Physical softening for baryons (0.70 pkpc) - dithering: 0 # Parameters for the hydrodynamics scheme SPH: diff --git a/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml b/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml index 12919bf5d598fe4e4fece1bf1f7539ac377244cd..5ef3d21716cf54f638b5e8ee89a5e3e71bab298c 100644 --- a/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml +++ b/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml @@ -43,10 +43,11 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.85 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 256 - dithering: 0 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 256 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). comoving_baryon_softening: 0.0026994 # Comoving DM softening length (in internal units). diff --git a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml index 68a599cb6bd25ec2e4ea459414e8690e6c52d154..a07a08d0fcf1e273c3e77a80965843516f22607b 100644 --- a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml @@ -44,15 +44,15 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 32 - dithering: 0 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 32 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). comoving_baryon_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_baryon_softening: 0.0007 # Max physical DM softening length (in internal units). - # Parameters for the hydrodynamics scheme SPH: diff --git a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml index 93981aac4264d75180d19e81223ab9a6c619686c..c84352abc28c5403ae06923c0fd9627bc39297d4 100644 --- a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml @@ -51,10 +51,11 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 64 - dithering: 0 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 64 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). comoving_baryon_softening: 0.0026994 # Comoving DM softening length (in internal units). diff --git a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml index 607fef8950b1583416bf87673072ffbbc715ade6..2db295f14d95e47d6acf0dbfbdd6600d8a856a64 100644 --- a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml +++ b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml @@ -43,10 +43,11 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 128 - dithering: 0 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 128 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). comoving_baryon_softening: 0.0026994 # Comoving DM softening length (in internal units). diff --git a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml index dba6d0d57ccafd86f1c56c274536aeead8fe0be7..192a76113f1f88f475496c2aae2f37db50575a0b 100644 --- a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml +++ b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml @@ -54,10 +54,11 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 16 - dithering: 0 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 16 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). comoving_baryon_softening: 0.0026994 # Comoving DM softening length (in internal units). diff --git a/examples/GEAR/AgoraDisk/agora_disk.yml b/examples/GEAR/AgoraDisk/agora_disk.yml index 43371536035eb66dd1e677e1951c7408963fd8f3..3804b454b7c18eda4e397bb21b7ffc493944ecb1 100644 --- a/examples/GEAR/AgoraDisk/agora_disk.yml +++ b/examples/GEAR/AgoraDisk/agora_disk.yml @@ -39,8 +39,10 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.05 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) + eta: 0.05 # Constant dimensionless multiplier for time integration. + MAC: geometric + theta_cr: 0.7 + use_tree_below_softening: 1 comoving_DM_softening: 0.08 # Comoving softening length (in internal units). max_physical_DM_softening: 0.08 # Physical softening length (in internal units). comoving_baryon_softening: 0.08 # Comoving softening length (in internal units). diff --git a/examples/GEAR/ZoomIn/zoom_in.yml b/examples/GEAR/ZoomIn/zoom_in.yml index 99ade12d7f457a2a0ff41cbf604255e1a2fb5bc4..08bada9a1fbb30d07310041a198762e00d0c758c 100644 --- a/examples/GEAR/ZoomIn/zoom_in.yml +++ b/examples/GEAR/ZoomIn/zoom_in.yml @@ -40,8 +40,11 @@ Statistics: delta_time: 1.05 # Time between statistics output Gravity: - eta: 0.002 # Constant dimensionless multiplier for time integration. - theta: 0.6 # Opening angle (Multipole acceptance criterion) + eta: 0.002 # Constant dimensionless multiplier for time integration. + MAC: adaptive + theta_cr: 0.7 + epsilon_fmm: 0.001 + use_tree_below_softening: 1 comoving_DM_softening: 0.1278 # Comoving softening length (in internal units). max_physical_DM_softening: 0.03365 # Physical softening length (in internal units). comoving_baryon_softening: 0.03365 # Comoving softening length (in internal units). diff --git a/examples/HydroTests/EvrardCollapse_3D/evrard.yml b/examples/HydroTests/EvrardCollapse_3D/evrard.yml index ab438646383cd510dae4727abaf9a7f1fab5681b..7ca3c8ac216ab3338bcb56253e55eb8a181318ed 100644 --- a/examples/HydroTests/EvrardCollapse_3D/evrard.yml +++ b/examples/HydroTests/EvrardCollapse_3D/evrard.yml @@ -31,8 +31,10 @@ SPH: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 + eta: 0.025 + MAC: adaptive + theta_cr: 0.7 + epsilon_fmm: 0.001 max_physical_baryon_softening: 0.001 # Physical softening length (in internal units). # Parameters related to the initial conditions diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_NFW_MN/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_NFW_MN/isolated_galaxy.yml index d2ca7aae80862e1bc1e89e645c6a1bb3adfcba1b..a66446dd3245e1fcdbaa7eabdb39b2567ee9c89e 100644 --- a/examples/IsolatedGalaxy/IsolatedGalaxy_NFW_MN/isolated_galaxy.yml +++ b/examples/IsolatedGalaxy/IsolatedGalaxy_NFW_MN/isolated_galaxy.yml @@ -9,7 +9,9 @@ InternalUnitSystem: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion). + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion). + use_tree_below_softening: 1 max_physical_baryon_softening: 0.100 # Physical softening length (in internal units). # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_dmparticles/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_dmparticles/isolated_galaxy.yml index 27ab01d984319fea68d4d1ae8fb435a6adf895ce..aaa2f8bbacfa9500b4a99cf7c24308aff1fe3767 100644 --- a/examples/IsolatedGalaxy/IsolatedGalaxy_dmparticles/isolated_galaxy.yml +++ b/examples/IsolatedGalaxy/IsolatedGalaxy_dmparticles/isolated_galaxy.yml @@ -9,7 +9,9 @@ InternalUnitSystem: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion). + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion). + use_tree_below_softening: 1 max_physical_DM_softening: 0.7 # Physical softening length (in internal units). # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml index 0642e9312f0926cc0d005f9ec4b6d3f5215ea7d4..48b659c453b28e4a89778d601793976df39b3926 100644 --- a/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml +++ b/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml @@ -9,7 +9,9 @@ InternalUnitSystem: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion). + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion). + use_tree_below_softening: 1 max_physical_DM_softening: 0.35 # Physical softening length (in internal units). max_physical_baryon_softening: 0.35 # Physical softening length (in internal units). diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_potential/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_potential/isolated_galaxy.yml index 3bd743c2ec329057c7f63f987e4809744a07f7ba..d82c3544d6bb966d0ca7fb0db72883eff4a829ca 100644 --- a/examples/IsolatedGalaxy/IsolatedGalaxy_potential/isolated_galaxy.yml +++ b/examples/IsolatedGalaxy/IsolatedGalaxy_potential/isolated_galaxy.yml @@ -9,7 +9,9 @@ InternalUnitSystem: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion). + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion). + use_tree_below_softening: 1 max_physical_baryon_softening: 0.100 # Physical softening length (in internal units). # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml index 1403e13afd27122568fac083eb5cecef96ed330d..c2bbcac6aa7ed474646314d02f15e63ef0b93f0e 100644 --- a/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml +++ b/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml @@ -9,7 +9,9 @@ InternalUnitSystem: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion). + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion). + use_tree_below_softening: 1 max_physical_DM_softening: 0.2 # Physical softening length (in internal units). max_physical_baryon_softening: 0.2 # Physical softening length (in internal units). diff --git a/examples/Makefile.am b/examples/Makefile.am index 3de490970d27ef91eda85ccc1d2c57aba2fff0fe..64735be2e177d2839c69d718303edb8192b3e72f 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -20,7 +20,7 @@ MYFLAGS = # Add the source directory and the non-standard paths to the included library headers to CFLAGS AM_CFLAGS = -I$(top_srcdir)/src -I$(top_srcdir)/argparse $(HDF5_CPPFLAGS) \ - $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(GRACKLE_INCS) + $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(GRACKLE_INCS) $(OPENMP_CFLAGS) AM_LDFLAGS = $(HDF5_LDFLAGS) diff --git a/examples/PMillennium/PMillennium-1536/p-mill-1536.yml b/examples/PMillennium/PMillennium-1536/p-mill-1536.yml index f343650452a24f620edef88e42666e09213dbd64..e2e8439a71dac106b6f4fe0128805a96ff48f796 100644 --- a/examples/PMillennium/PMillennium-1536/p-mill-1536.yml +++ b/examples/PMillennium/PMillennium-1536/p-mill-1536.yml @@ -42,7 +42,9 @@ Statistics: # Parameters for the self-gravity scheme Gravity: eta: 0.025 - theta: 0.5 + MAC: adaptive + theta_cr: 0.7 + epsilon_fmm: 0.001 comoving_DM_softening: 0.0208333 # 20.8333 kpc = 1/25 mean inter-particle separation max_physical_DM_softening: 0.0208333 # 20.8333 kpc = 1/25 mean inter-particle separation mesh_side_length: 512 diff --git a/examples/PMillennium/PMillennium-384/p-mill-384.yml b/examples/PMillennium/PMillennium-384/p-mill-384.yml index 0e68969d0b590cec8058805e4644b2763c543be1..0e058b5b83f81de967be34c3237d1cf7060e1e4a 100644 --- a/examples/PMillennium/PMillennium-384/p-mill-384.yml +++ b/examples/PMillennium/PMillennium-384/p-mill-384.yml @@ -41,8 +41,10 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 - theta: 0.5 + eta: 0.025 + MAC: adaptive + theta_cr: 0.7 + epsilon_fmm: 0.001 comoving_DM_softening: 0.08333 # 83.333 kpc = 1/25 mean inter-particle separation max_physical_DM_softening: 0.08333 # 83.333 kpc = 1/25 mean inter-particle separation mesh_side_length: 128 diff --git a/examples/PMillennium/PMillennium-768/p-mill-768.yml b/examples/PMillennium/PMillennium-768/p-mill-768.yml index 1cd9e63b1f03ac65baf72701de276b8ff43c9575..3a058c2db9f2a0e171c8f7a56dec0b03e593c081 100644 --- a/examples/PMillennium/PMillennium-768/p-mill-768.yml +++ b/examples/PMillennium/PMillennium-768/p-mill-768.yml @@ -42,7 +42,9 @@ Statistics: # Parameters for the self-gravity scheme Gravity: eta: 0.025 - theta: 0.5 + MAC: adaptive + theta_cr: 0.7 + epsilon_fmm: 0.001 comoving_DM_softening: 0.041666 # 41.6666 kpc = 1/25 mean inter-particle separation max_physical_DM_softening: 0.041666 # 41.6666 kpc = 1/25 mean inter-particle separation mesh_side_length: 256 diff --git a/examples/Planetary/EarthImpact/earth_impact.yml b/examples/Planetary/EarthImpact/earth_impact.yml index 336a0eb82d570a1544c1344ab2261e35dbf3d59e..9f147b0135ffab70da2c3f17f1bda1e111a803aa 100644 --- a/examples/Planetary/EarthImpact/earth_impact.yml +++ b/examples/Planetary/EarthImpact/earth_impact.yml @@ -45,7 +45,10 @@ SPH: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) + MAC: adaptive + theta_cr: 0.7 + epsilon_fmm: 0.001 + use_tree_below_softening: 1 max_physical_baryon_softening: 0.003 # Physical softening length (in internal units). # Parameters for the task scheduling diff --git a/examples/QuickLymanAlpha/L050N0752/qla_50.yml b/examples/QuickLymanAlpha/L050N0752/qla_50.yml index 9eeac77e732eeeb32c5440dd5586199c401fedf1..0748fa112b10c5d8e93502f69984bbbda54edca1 100644 --- a/examples/QuickLymanAlpha/L050N0752/qla_50.yml +++ b/examples/QuickLymanAlpha/L050N0752/qla_50.yml @@ -37,14 +37,16 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - mesh_side_length: 256 + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: adaptive + epsilon_fmm: 0.001 + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion) + use_tree_below_softening: 1 + mesh_side_length: 256 comoving_DM_softening: 0.003320 # Comoving softening for DM (3.32 ckpc) max_physical_DM_softening: 0.001300 # Physical softening for DM (1.30 pkpc) comoving_baryon_softening: 0.001790 # Comoving softening for baryons (1.79 ckpc) max_physical_baryon_softening: 0.000700 # Physical softening for baryons (0.70 pkpc) - dithering: 0 # Parameters for the hydrodynamics scheme SPH: diff --git a/examples/SantaBarbara/SantaBarbara-128/santa_barbara.yml b/examples/SantaBarbara/SantaBarbara-128/santa_barbara.yml index 5ee17b5a5877c60eebab5075aebb1bec53429c60..d71394d1899de182a6df527ddd2b0c69007b2f68 100644 --- a/examples/SantaBarbara/SantaBarbara-128/santa_barbara.yml +++ b/examples/SantaBarbara/SantaBarbara-128/santa_barbara.yml @@ -41,8 +41,11 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 - theta: 0.5 + eta: 0.025 + MAC: adpative + theta_cr: 0.7 + epsilon_fmm: 0.001 + use_tree_below_softening: 1 comoving_DM_softening: 0.02 # 20 kpc = 1/25 mean inter-particle separation max_physical_DM_softening: 0.00526 # 20 ckpc = 5.26 pkpc at z=2.8 (EAGLE-like evolution of softening). comoving_baryon_softening: 0.02 # 20 kpc = 1/25 mean inter-particle separation diff --git a/examples/SantaBarbara/SantaBarbara-256/santa_barbara.yml b/examples/SantaBarbara/SantaBarbara-256/santa_barbara.yml index e83a136c21404f3dcb4a6a0c9921653d71080fdd..e9a8549cee72f5219182fd8c41c3477b1f02322a 100644 --- a/examples/SantaBarbara/SantaBarbara-256/santa_barbara.yml +++ b/examples/SantaBarbara/SantaBarbara-256/santa_barbara.yml @@ -41,8 +41,11 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 - theta: 0.5 + eta: 0.025 + MAC: adpative + theta_cr: 0.7 + epsilon_fmm: 0.001 + use_tree_below_softening: 1 comoving_DM_softening: 0.01 # 10 kpc = 1/25 mean inter-particle separation max_physical_DM_softening: 0.00263 # 10 ckpc = 2.63 pkpc at z=2.8 (EAGLE-like evolution of softening). comoving_baryon_softening: 0.01 # 10 kpc = 1/25 mean inter-particle separation diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml b/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml index 95991e9f1d7e385826e33f6ae0fa343173966084..85abb33b38ad87b716021e094fd483813e8a51ce 100644 --- a/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml +++ b/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml @@ -21,8 +21,10 @@ TimeIntegration: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 - theta: 0.5 + eta: 0.025 + MAC: adpative + theta_cr: 0.7 + epsilon_fmm: 0.001 comoving_DM_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc max_physical_DM_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc mesh_side_length: 64 diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_Snipshots/small_cosmo_volume.yml b/examples/SmallCosmoVolume/SmallCosmoVolume_Snipshots/small_cosmo_volume.yml index dc554e3dd8182b717803a902d1fb8b9f698d2f8e..5d297a3dc88674a41414311286d2b02032e83f17 100644 --- a/examples/SmallCosmoVolume/SmallCosmoVolume_Snipshots/small_cosmo_volume.yml +++ b/examples/SmallCosmoVolume/SmallCosmoVolume_Snipshots/small_cosmo_volume.yml @@ -21,8 +21,10 @@ TimeIntegration: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 - theta: 0.5 + eta: 0.025 + MAC: adpative + theta_cr: 0.7 + epsilon_fmm: 0.001 comoving_DM_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc max_physical_DM_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc comoving_baryon_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml b/examples/SmallCosmoVolume/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml index 0142e806db0c1e2e6261825a2d531f1937defced..2cd7e3efd85e70c41e85bc1a66101245845a9d1c 100644 --- a/examples/SmallCosmoVolume/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml +++ b/examples/SmallCosmoVolume/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml @@ -21,8 +21,10 @@ TimeIntegration: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 - theta: 0.5 + eta: 0.025 + MAC: adpative + theta_cr: 0.7 + epsilon_fmm: 0.001 comoving_DM_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc max_physical_DM_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc comoving_baryon_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_cooling/small_cosmo_volume.yml b/examples/SmallCosmoVolume/SmallCosmoVolume_cooling/small_cosmo_volume.yml index 0d93406eebc903c880a53ffa15f874e21902675b..2528a81eeacfe4bebf3dd304ccf22411a22d0956 100644 --- a/examples/SmallCosmoVolume/SmallCosmoVolume_cooling/small_cosmo_volume.yml +++ b/examples/SmallCosmoVolume/SmallCosmoVolume_cooling/small_cosmo_volume.yml @@ -21,8 +21,10 @@ TimeIntegration: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 - theta: 0.3 + eta: 0.025 + MAC: adpative + theta_cr: 0.7 + epsilon_fmm: 0.001 comoving_DM_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc max_physical_DM_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc comoving_baryon_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_hydro/small_cosmo_volume.yml b/examples/SmallCosmoVolume/SmallCosmoVolume_hydro/small_cosmo_volume.yml index 250fdc653bdad771c09f8e76819a8a475393d504..da08070bbcd78086a9f53d1bb808d8d48a7c655a 100644 --- a/examples/SmallCosmoVolume/SmallCosmoVolume_hydro/small_cosmo_volume.yml +++ b/examples/SmallCosmoVolume/SmallCosmoVolume_hydro/small_cosmo_volume.yml @@ -21,8 +21,10 @@ TimeIntegration: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 - theta: 0.5 + eta: 0.025 + MAC: adpative + theta_cr: 0.7 + epsilon_fmm: 0.001 comoving_DM_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc max_physical_DM_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc comoving_baryon_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc diff --git a/examples/SubgridTests/PressureFloor/pressureFloor.yml b/examples/SubgridTests/PressureFloor/pressureFloor.yml index f9df74dcefb14204264a4ffc9a7326d3453f718d..b20eb3a3026972ad7756a155865fefd88c5a798b 100644 --- a/examples/SubgridTests/PressureFloor/pressureFloor.yml +++ b/examples/SubgridTests/PressureFloor/pressureFloor.yml @@ -45,9 +45,10 @@ GEARPressureFloor: # Parameters for the self-gravity scheme Gravity: - mesh_side_length: 12 # Number of cells along each axis for the periodic gravity mesh. - eta: 2.5 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion). - max_physical_DM_softening: 0.5 # Maximal Plummer-equivalent softening length in physical coordinates for DM particles (in internal units). - max_physical_baryon_softening: 0.5 # Maximal Plummer-equivalent softening length in physical coordinates for baryon particles (in internal units). - softening_ratio_background: 0.04 # Fraction of the mean inter-particle separation to use as Plummer-equivalent softening for the background DM particles. + mesh_side_length: 16 # Number of cells along each axis for the periodic gravity mesh. + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: geometric + theta_cr: 0.7 # Opening angle (Multipole acceptance criterion). + max_physical_DM_softening: 0.5 # Maximal Plummer-equivalent softening length in physical coordinates for DM particles (in internal units). + max_physical_baryon_softening: 0.5 # Maximal Plummer-equivalent softening length in physical coordinates for baryon particles (in internal units). + softening_ratio_background: 0.04 # Fraction of the mean inter-particle separation to use as Plummer-equivalent softening for the background DM particles. diff --git a/examples/main.c b/examples/main.c index c01c5494644eeb7c5603e99e63a09e0060887b92..e785edf837445ee5622c1b10474762401bb0d286 100644 --- a/examples/main.c +++ b/examples/main.c @@ -1117,7 +1117,7 @@ int main(int argc, char *argv[]) { gravity_props_init(&gravity_properties, params, &prog_const, &cosmo, with_cosmology, with_external_gravity, with_baryon_particles, with_DM_particles, - with_DM_background_particles, periodic); + with_DM_background_particles, periodic, s.dim); /* Initialise the external potential properties */ bzero(&potential, sizeof(struct external_potential)); diff --git a/examples/main_fof.c b/examples/main_fof.c index ee683ebb5b8258b81bfdc877eba1421a19918860..ff029cc2eb5fdee29d4acd5b4b0c1dc1e8b971bf 100644 --- a/examples/main_fof.c +++ b/examples/main_fof.c @@ -534,7 +534,7 @@ int main(int argc, char *argv[]) { gravity_props_init(&gravity_properties, params, &prog_const, &cosmo, /*with_cosmology=*/1, /*with_external_gravity=*/0, with_baryon_particles, with_DM_particles, - with_DM_background_particles, periodic); + with_DM_background_particles, periodic, s.dim); /* Initialise the long-range gravity mesh */ if (periodic) { diff --git a/examples/nIFTyCluster/Baryonic/nifty.yml b/examples/nIFTyCluster/Baryonic/nifty.yml index 7b58c80009e9401f4d6ee5b9656940fa89dd2409..501e9578bbc6face9efc7fb1ab9fa0120ae897e5 100644 --- a/examples/nIFTyCluster/Baryonic/nifty.yml +++ b/examples/nIFTyCluster/Baryonic/nifty.yml @@ -40,8 +40,11 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.5 # Opening angle (Multipole acceptance criterion) + eta: 0.025 # Constant dimensionless multiplier for time integration. + MAC: adaptive + epsilon_fmm: 0.001 + theta_cr: 0.7 + use_tree_below_softening: 1 comoving_DM_softening: 0.02 # Comoving softening length (in internal units). comoving_baryon_softening: 0.02 # Comoving softening length (in internal units). max_physical_DM_softening: 0.005 # Max physical softening length (in internal units). diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 1bc907c17f36c62c822ceee7b6163be012991f03..86f3980d7288e419cac570ffb9b2ab708338cf8b 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -64,7 +64,11 @@ Stars: Gravity: mesh_side_length: 128 # Number of cells along each axis for the periodic gravity mesh. eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion). + MAC: adaptive # Choice of mulitpole acceptance criterion: 'adaptive' OR 'geometric'. + epsilon_fmm: 0.001 # Tolerance parameter for the adaptive multipole acceptance criterion. + theta_cr: 0.7 # Opening angle for the purely gemoetric criterion. + use_tree_below_softening: 0 # (Optional) Can the gravity code use the multipole interactions below the softening scale? + allow_truncation_in_MAC: 0 # (Optional) Can the Multipole acceptance criterion use the truncated force estimator? comoving_DM_softening: 0.0026994 # Comoving Plummer-equivalent softening length for DM particles (in internal units). max_physical_DM_softening: 0.0007 # Maximal Plummer-equivalent softening length in physical coordinates for DM particles (in internal units). comoving_baryon_softening: 0.0026994 # Comoving Plummer-equivalent softening length for baryon particles (in internal units). @@ -74,7 +78,7 @@ Gravity: a_smooth: 1.25 # (Optional) Smoothing scale in top-level cell sizes to smooth the long-range forces over (this is the default value). r_cut_max: 4.5 # (Optional) Cut-off in number of top-level cells beyond which no FMM forces are computed (this is the default value). r_cut_min: 0.1 # (Optional) Cut-off in number of top-level cells below which no truncation of FMM forces are performed (this is the default value). - dithering: 1 # (Optional) Activate the dithering of the gravity mesh at every rebuild (this is the default value). + dithering: 0 # (Optional) Activate the dithering of the gravity mesh at every rebuild (this is the default value). dithering_ratio: 1.0 # (Optional) Magnitude of each component of the dithering vector in units of the top-level cell sizes (this is the default value). # Parameters when running with SWIFT_GRAVITY_FORCE_CHECKS diff --git a/src/Makefile.am b/src/Makefile.am index 4369a1a37a6969b37d2d38d01d775472729e4481..12b5273ffaa007eb332df18e9bce174c13bc03b6 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -16,7 +16,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Add the non-standard paths to the included library headers -AM_CFLAGS = $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(GRACKLE_INCS) +AM_CFLAGS = $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(GRACKLE_INCS) $(OPENMP_CFLAGS) # Assign a "safe" version number AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) @@ -52,7 +52,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \ mesh_gravity.h cbrt.h exp10.h velociraptor_interface.h swift_velociraptor_part.h output_list.h \ logger_io.h tracers_io.h tracers.h tracers_struct.h star_formation_io.h fof.h fof_struct.h fof_io.h \ - multipole.h multipole_struct.h sincos.h \ + multipole.h multipole_accept.h multipole_struct.h binomial.h integer_power.h sincos.h \ star_formation_struct.h star_formation.h star_formation_iact.h \ star_formation_logger.h star_formation_logger_struct.h \ pressure_floor.h pressure_floor_struct.h pressure_floor_iact.h \ diff --git a/src/approx_math.h b/src/approx_math.h index 4015e6040b102839234c452783838dadddb86cf6..70cb3e203cefa7e3c8ce2e4d721d9e23588641ef 100644 --- a/src/approx_math.h +++ b/src/approx_math.h @@ -21,31 +21,6 @@ #include "inline.h" -/** - * @brief Approximate version of the complementay error function erfcf(x). - * - * This is based on eq. 7.1.27 of Abramowitz & Stegun, 1972. - * The absolute error is < 4.7*10^-4 over the range 0 < x < infinity. - * - * Returns garbage for x < 0. - * @param x The number to compute erfc for. - */ -__attribute__((always_inline, const)) INLINE static float approx_erfcf( - float x) { - - /* 1 + 0.278393*x + 0.230389*x^2 + 0.000972*x^3 + 0.078108*x^4 */ - float arg = 0.078108f; - arg = x * arg + 0.000972f; - arg = x * arg + 0.230389f; - arg = x * arg + 0.278393f; - arg = x * arg + 1.f; - - /* 1 / arg^4 */ - const float arg2 = arg * arg; - const float arg4 = arg2 * arg2; - return 1.f / arg4; -} - /** * @brief Approximate version of expf(x) using a 4th order Taylor expansion * diff --git a/src/binomial.h b/src/binomial.h new file mode 100644 index 0000000000000000000000000000000000000000..b1caf1258a75b47b0f47c26b2e16ac01d76f6454 --- /dev/null +++ b/src/binomial.h @@ -0,0 +1,56 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_BINOMIAL_H +#define SWIFT_BINOMIAL_H + +/* Config parameters. */ +#include "../config.h" + +/* Local headers */ +#include "error.h" +#include "inline.h" + +/** + * @brief Compute the binomial coefficient (n, k) + * + * Only valid for values 0 <= n <= 8 and 0 <= k <= n. + */ +__attribute__((const)) INLINE static int binomial(const int n, const int k) { + +#ifdef SWIFT_DEBUG_CHECKS + assert(n >= 0); + assert(k >= 0); + assert(n <= 8); + assert(k <= n); +#endif + + /* Hello Pascal! Nice to meet again */ + static const int coeffs[9][9] = { + {1, 0, 0, 0, 0, 0, 0, 0, 0}, {1, 1, 0, 0, 0, 0, 0, 0, 0}, + {1, 2, 1, 0, 0, 0, 0, 0, 0}, {1, 3, 3, 1, 0, 0, 0, 0, 0}, + {1, 4, 6, 4, 1, 0, 0, 0, 0}, {1, 5, 10, 10, 5, 1, 0, 0, 0}, + {1, 6, 15, 20, 15, 6, 1, 0, 0}, {1, 7, 21, 35, 35, 21, 7, 1, 0}, + {1, 8, 28, 56, 70, 56, 28, 8, 1} + + }; + + return coeffs[n][k]; +} + +#endif /* SWIFT_BINOMIAL_H */ diff --git a/src/black_holes/EAGLE/black_holes_iact.h b/src/black_holes/EAGLE/black_holes_iact.h index 30186f892e1eaf9b34e07ddcc5a419312696a78b..a08c4e6293a5612ac6cf823ec6d1c4a17abbf5e2 100644 --- a/src/black_holes/EAGLE/black_holes_iact.h +++ b/src/black_holes/EAGLE/black_holes_iact.h @@ -417,8 +417,8 @@ runner_iact_nonsym_bh_bh_swallow(const float r2, const float *dx, (r_12 < grav_props->epsilon_baryon_cur)) { /* If BHs are within softening range, take this into account */ - float w_grav; - kernel_grav_pot_eval(r_12 / grav_props->epsilon_baryon_cur, &w_grav); + const float w_grav = + kernel_grav_pot_eval(r_12 / grav_props->epsilon_baryon_cur); const float r_mod = w_grav / grav_props->epsilon_baryon_cur; v2_threshold = 2.f * G_Newton * M / (r_mod); diff --git a/src/cell.c b/src/cell.c index 88c8b84e00e536ca9b91783d5f0bc009ae9186c9..8124e41d418d733460648db24db5c63062b2fa1b 100644 --- a/src/cell.c +++ b/src/cell.c @@ -61,6 +61,7 @@ #include "hydro_properties.h" #include "memswap.h" #include "minmax.h" +#include "multipole.h" #include "pressure_floor.h" #include "scheduler.h" #include "space.h" @@ -2241,6 +2242,7 @@ void cell_make_multipoles(struct cell *c, integertime_t ti_current, gravity_reset(c->grav.multipole); if (c->split) { + /* Start by recursing */ for (int k = 0; k < 8; ++k) { if (c->progeny[k] != NULL) @@ -2303,9 +2305,13 @@ void cell_make_multipoles(struct cell *c, integertime_t ti_current, /* Take minimum of both limits */ c->grav.multipole->r_max = min(r_max, sqrt(dx * dx + dy * dy + dz * dz)); + /* Compute the multipole power */ + gravity_multipole_compute_power(&c->grav.multipole->m_pole); + } else { if (c->grav.count > 0) { gravity_P2M(c->grav.multipole, c->grav.parts, c->grav.count, grav_props); + gravity_multipole_compute_power(&c->grav.multipole->m_pole); const double dx = c->grav.multipole->CoM[0] > c->loc[0] + c->width[0] * 0.5 ? c->grav.multipole->CoM[0] - c->loc[0] @@ -2399,6 +2405,7 @@ void cell_check_multipole(struct cell *c, if (c->grav.count > 0) { /* Brute-force calculation */ gravity_P2M(&ma, c->grav.parts, c->grav.count, grav_props); + gravity_multipole_compute_power(&ma.m_pole); /* Now compare the multipole expansion */ if (!gravity_multipole_equal(&ma, c->grav.multipole, tolerance)) { @@ -3343,7 +3350,9 @@ void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj, if (lock_unlock(&cj->grav.mlock) != 0) error("Impossible to unlock m-pole"); /* Can we use multipoles ? */ - if (cell_can_use_pair_mm(ci, cj, e, sp)) { + if (cell_can_use_pair_mm(ci, cj, e, sp, /*use_rebuild_data=*/0, + /*is_tree_walk=*/1)) { + /* Ok, no need to drift anything */ return; } @@ -4732,6 +4741,7 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { const integertime_t ti_old_gpart = c->grav.ti_old_part; const integertime_t ti_current = e->ti_current; struct gpart *const gparts = c->grav.parts; + const struct gravity_props *grav_props = e->gravity_properties; /* Drift irrespective of cell flags? */ force = (force || cell_get_flag(c, cell_flag_do_grav_drift)); @@ -4793,7 +4803,7 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { if (gpart_is_inhibited(gp, e)) continue; /* Drift... */ - drift_gpart(gp, dt_drift, ti_old_gpart, ti_current); + drift_gpart(gp, dt_drift, ti_old_gpart, ti_current, grav_props); #ifdef SWIFT_DEBUG_CHECKS /* Make sure the particle does not drift by more than a box length. */ @@ -6399,91 +6409,46 @@ void cell_reorder_extra_gparts(struct cell *c, struct part *parts, /** * @brief Can we use the MM interactions fo a given pair of cells? * - * @param ci The first #cell. - * @param cj The second #cell. - * @param e The #engine. - * @param s The #space. - */ -int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj, - const struct engine *e, const struct space *s) { - const double theta_crit2 = e->gravity_properties->theta_crit2; - const int periodic = s->periodic; - const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - - /* Recover the multipole information */ - const struct gravity_tensors *const multi_i = ci->grav.multipole; - const struct gravity_tensors *const multi_j = cj->grav.multipole; - - /* Get the distance between the CoMs */ - double dx = multi_i->CoM[0] - multi_j->CoM[0]; - double dy = multi_i->CoM[1] - multi_j->CoM[1]; - double dz = multi_i->CoM[2] - multi_j->CoM[2]; - - /* Apply BC */ - if (periodic) { - dx = nearest(dx, dim[0]); - dy = nearest(dy, dim[1]); - dz = nearest(dz, dim[2]); - } - const double r2 = dx * dx + dy * dy + dz * dz; - - const double epsilon_i = multi_i->m_pole.max_softening; - const double epsilon_j = multi_j->m_pole.max_softening; - - return gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2, - epsilon_i, epsilon_j); -} - -/** - * @brief Can we use the MM interactions fo a given pair of cells? - * - * This function uses the information gathered in the multipole at rebuild - * time and not the current position and radius of the multipole. + * The two cells have to be different! * * @param ci The first #cell. * @param cj The second #cell. * @param e The #engine. * @param s The #space. + * @param use_rebuild_data Are we considering the data at the last tree-build + * (1) or current data (0)? + * @param is_tree_walk Are we calling this in the tree walk (1) or for the + * top-level task construction (0)? */ -int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj, - const struct engine *e, - const struct space *s) { - const double theta_crit2 = e->gravity_properties->theta_crit2; +int cell_can_use_pair_mm(const struct cell *restrict ci, + const struct cell *restrict cj, const struct engine *e, + const struct space *s, const int use_rebuild_data, + const int is_tree_walk) { + + const struct gravity_props *props = e->gravity_properties; const int periodic = s->periodic; const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - /* Recover the multipole information */ - const struct gravity_tensors *const multi_i = ci->grav.multipole; - const struct gravity_tensors *const multi_j = cj->grav.multipole; + /* Check for trivial cases */ + if (is_tree_walk && ci->grav.count <= 1) return 0; + if (is_tree_walk && cj->grav.count <= 1) return 0; -#ifdef SWIFT_DEBUG_CHECKS + /* Recover the multipole information */ + const struct gravity_tensors *restrict multi_i = ci->grav.multipole; + const struct gravity_tensors *restrict multi_j = cj->grav.multipole; - if (multi_i->CoM_rebuild[0] < ci->loc[0] || - multi_i->CoM_rebuild[0] > ci->loc[0] + ci->width[0]) - error("Invalid multipole position ci"); - if (multi_i->CoM_rebuild[1] < ci->loc[1] || - multi_i->CoM_rebuild[1] > ci->loc[1] + ci->width[1]) - error("Invalid multipole position ci"); - if (multi_i->CoM_rebuild[2] < ci->loc[2] || - multi_i->CoM_rebuild[2] > ci->loc[2] + ci->width[2]) - error("Invalid multipole position ci"); - - if (multi_j->CoM_rebuild[0] < cj->loc[0] || - multi_j->CoM_rebuild[0] > cj->loc[0] + cj->width[0]) - error("Invalid multipole position cj"); - if (multi_j->CoM_rebuild[1] < cj->loc[1] || - multi_j->CoM_rebuild[1] > cj->loc[1] + cj->width[1]) - error("Invalid multipole position cj"); - if (multi_j->CoM_rebuild[2] < cj->loc[2] || - multi_j->CoM_rebuild[2] > cj->loc[2] + cj->width[2]) - error("Invalid multipole position cj"); - -#endif + double dx, dy, dz; /* Get the distance between the CoMs */ - double dx = multi_i->CoM_rebuild[0] - multi_j->CoM_rebuild[0]; - double dy = multi_i->CoM_rebuild[1] - multi_j->CoM_rebuild[1]; - double dz = multi_i->CoM_rebuild[2] - multi_j->CoM_rebuild[2]; + if (use_rebuild_data) { + dx = multi_i->CoM_rebuild[0] - multi_j->CoM_rebuild[0]; + dy = multi_i->CoM_rebuild[1] - multi_j->CoM_rebuild[1]; + dz = multi_i->CoM_rebuild[2] - multi_j->CoM_rebuild[2]; + } else { + dx = multi_i->CoM[0] - multi_j->CoM[0]; + dy = multi_i->CoM[1] - multi_j->CoM[1]; + dz = multi_i->CoM[2] - multi_j->CoM[2]; + } /* Apply BC */ if (periodic) { @@ -6493,9 +6458,6 @@ int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj, } const double r2 = dx * dx + dy * dy + dz * dz; - const double epsilon_i = multi_i->m_pole.max_softening; - const double epsilon_j = multi_j->m_pole.max_softening; - - return gravity_M2L_accept(multi_i->r_max_rebuild, multi_j->r_max_rebuild, - theta_crit2, r2, epsilon_i, epsilon_j); + return gravity_M2L_accept_symmetric(props, multi_i, multi_j, r2, + use_rebuild_data, periodic); } diff --git a/src/cell.h b/src/cell.h index e84e00bcd77972be5e2a89d89b7d180a96bd637a..9666d44d1fba32e9543ebf44151ebf69d3a84b52 100644 --- a/src/cell.h +++ b/src/cell.h @@ -973,9 +973,8 @@ void cell_reorder_extra_gparts(struct cell *c, struct part *parts, struct spart *sparts); void cell_reorder_extra_sparts(struct cell *c, const ptrdiff_t sparts_offset); int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj, - const struct engine *e, const struct space *s); -int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj, - const struct engine *e, const struct space *s); + const struct engine *e, const struct space *s, + const int use_rebuild_data, const int is_tree_walk); /** * @brief Compute the square of the minimal distance between any two points in diff --git a/src/drift.h b/src/drift.h index 74ac46346f8b2038f65793cb172f5138bbfa1545..2d1ee00378c51ac165c699d4ce0c47f24b64f49a 100644 --- a/src/drift.h +++ b/src/drift.h @@ -43,7 +43,7 @@ */ __attribute__((always_inline)) INLINE static void drift_gpart( struct gpart *restrict gp, double dt_drift, integertime_t ti_old, - integertime_t ti_current) { + integertime_t ti_current, const struct gravity_props *grav_props) { #ifdef SWIFT_DEBUG_CHECKS if (gp->ti_drift != ti_old) @@ -60,6 +60,8 @@ __attribute__((always_inline)) INLINE static void drift_gpart( gp->x[0] += gp->v_full[0] * dt_drift; gp->x[1] += gp->v_full[1] * dt_drift; gp->x[2] += gp->v_full[2] * dt_drift; + + gravity_predict_extra(gp, grav_props); } /** diff --git a/src/engine.c b/src/engine.c index e7fe2968a9b964c0526258e0f46d0d7364847c4f..15bc1b8ba7ea20c84220b69bc94da2fe0295c0c6 100644 --- a/src/engine.c +++ b/src/engine.c @@ -77,6 +77,7 @@ #include "memuse.h" #include "minmax.h" #include "mpiuse.h" +#include "multipole_struct.h" #include "output_list.h" #include "output_options.h" #include "parallel_io.h" @@ -2044,19 +2045,18 @@ void engine_skip_force_and_kick(struct engine *e) { t->type == task_type_timestep || t->type == task_type_timestep_limiter || t->type == task_type_timestep_sync || - t->subtype == task_subtype_force || - t->subtype == task_subtype_limiter || t->subtype == task_subtype_grav || - t->type == task_type_end_hydro_force || - t->type == task_type_end_grav_force || - t->type == task_type_grav_long_range || t->type == task_type_grav_mm || - t->type == task_type_grav_down || t->type == task_type_grav_down_in || - t->type == task_type_drift_gpart_out || t->type == task_type_cooling || + t->type == task_type_end_hydro_force || t->type == task_type_cooling || t->type == task_type_stars_in || t->type == task_type_stars_out || t->type == task_type_star_formation || t->type == task_type_stars_resort || t->type == task_type_extra_ghost || + t->type == task_type_stars_ghost || + t->type == task_type_stars_ghost_in || + t->type == task_type_stars_ghost_out || t->type == task_type_bh_swallow_ghost1 || t->type == task_type_bh_swallow_ghost2 || - t->type == task_type_bh_swallow_ghost3 || + t->type == task_type_bh_swallow_ghost3 || t->type == task_type_bh_in || + t->type == task_type_bh_out || t->subtype == task_subtype_force || + t->subtype == task_subtype_limiter || t->subtype == task_subtype_gradient || t->subtype == task_subtype_stars_feedback || t->subtype == task_subtype_bh_feedback || @@ -2072,8 +2072,7 @@ void engine_skip_force_and_kick(struct engine *e) { t->subtype == task_subtype_tend_gpart || t->subtype == task_subtype_tend_spart || t->subtype == task_subtype_tend_bpart || - t->subtype == task_subtype_rho || t->subtype == task_subtype_gpart || - t->subtype == task_subtype_sf_counts) + t->subtype == task_subtype_rho || t->subtype == task_subtype_sf_counts) t->skip = 1; } @@ -2196,7 +2195,8 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, if (e->nodeID == 0) message("Setting particles to a valid state..."); engine_first_init_particles(e); - if (e->nodeID == 0) message("Computing initial gas densities."); + if (e->nodeID == 0) + message("Computing initial gas densities and approximate gravity."); /* Construct all cells and tasks to start everything */ engine_rebuild(e, 0, clean_h_values); @@ -2268,6 +2268,10 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, /* Now time to get ready for the first time-step */ if (e->nodeID == 0) message("Running initial fake time-step."); + /* Update the MAC strategy if necessary */ + if (e->policy & engine_policy_self_gravity) + gravity_props_update_MAC_choice(e->gravity_properties); + /* Construct all cells again for a new round (need to update h_max) */ engine_rebuild(e, 0, 0); @@ -3131,8 +3135,8 @@ void engine_makeproxies(struct engine *e) { /* Get some info about the physics */ const int with_hydro = (e->policy & engine_policy_hydro); const int with_gravity = (e->policy & engine_policy_self_gravity); - const double theta_crit_inv = e->gravity_properties->theta_crit_inv; - const double theta_crit2 = e->gravity_properties->theta_crit2; + const double theta_crit = e->gravity_properties->theta_crit; + const double theta_crit_inv = 1. / e->gravity_properties->theta_crit; const double max_mesh_dist = e->mesh->r_cut_max; const double max_mesh_dist2 = max_mesh_dist * max_mesh_dist; @@ -3276,26 +3280,17 @@ void engine_makeproxies(struct engine *e) { sqrt(min_dist_centres2) - 2. * delta_CoM; const double min_dist_CoM2 = min_dist_CoM * min_dist_CoM; - /* We also assume that the softening is negligible compared - to the cell size */ - const double epsilon_i = 0.; - const double epsilon_j = 0.; - /* Are we beyond the distance where the truncated forces are 0 * but not too far such that M2L can be used? */ if (periodic) { if ((min_dist_CoM2 < max_mesh_dist2) && - (!gravity_M2L_accept(r_max, r_max, theta_crit2, - min_dist_CoM2, epsilon_i, - epsilon_j))) + !(2. * r_max < theta_crit * min_dist_CoM2)) proxy_type |= (int)proxy_cell_type_gravity; } else { - if (!gravity_M2L_accept(r_max, r_max, theta_crit2, - min_dist_CoM2, epsilon_i, - epsilon_j)) + if (!(2. * r_max < theta_crit * min_dist_CoM2)) proxy_type |= (int)proxy_cell_type_gravity; } } diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index 9e48e5826fc13256647235ffa1c5dad00adb76bb..b73aa475c008cbb77f53e981d49f61c43ab845fb 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -1381,7 +1381,8 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, if (periodic && min_radius2 > max_distance2) continue; /* Are the cells too close for a MM interaction ? */ - if (!cell_can_use_pair_mm_rebuild(ci, cj, e, s)) { + if (!cell_can_use_pair_mm(ci, cj, e, s, /*use_rebuild_data=*/1, + /*is_tree_walk=*/0)) { /* Ok, we need to add a direct pair calculation */ scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, diff --git a/src/exp.h b/src/exp.h new file mode 100644 index 0000000000000000000000000000000000000000..e3820a56c7e7fa6ca565374bc984514798c03faa --- /dev/null +++ b/src/exp.h @@ -0,0 +1,72 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_OPTIMIZED_EXP_H +#define SWIFT_OPTIMIZED_EXP_H + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "inline.h" + +/* Standard headers */ +#include <math.h> + +/** + * @brief Compute the exponential of a number. + * + * This function has a relative accuracy of 1.618e-6 over the input + * range [-32., 32.]. + * + * @param x The number to take the exponential of. + */ +__attribute__((always_inline, const)) INLINE static float optimized_expf( + const float x) { + + /* Let's first express e^x as 2^i * e^f with + * f in the range [-ln(2)/2, ln(2)/2] */ + const float i = rintf(x * ((float)M_LOG2E)); + const float f = x - ((float)M_LN2) * i; + + /* We can now compute exp(f) using a polynomial + * approximation valid over the range [-ln(2)/2, ln(2)/2]. + * The coefficients come from the Cephes library and + * have been obtained using a minmax algorithm */ + float exp_f = 0.041944388f; + exp_f = exp_f * f + 0.168006673f; + exp_f = exp_f * f + 0.499999940f; + exp_f = exp_f * f + 0.999956906f; + exp_f = exp_f * f + 0.999999642f; + + union { + int i; + float f; + } e; + + /* We can now construct the result by taking exp_f + * as the mantissa of the answer and bit-shifting i + * into the exponent part of the floating-point + * number */ + e.f = exp_f; + e.i += ((int)i) << 23; + + return e.f; +} + +#endif /* SWIFT_OPTIMIZED_EXP_H */ diff --git a/src/gravity.c b/src/gravity.c index 6285acc01f5379dbb9d6e40c10b8189923d4645f..8c013139aa6771342e598bd543558bfa0ffb98fa 100644 --- a/src/gravity.c +++ b/src/gravity.c @@ -36,6 +36,8 @@ /* Local headers. */ #include "active.h" #include "error.h" +#include "kernel_gravity.h" +#include "kernel_long_gravity.h" #include "threadpool.h" #include "version.h" diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h index ca91265c54262badcb3ebde5b32bd7270be1080a..f258e8604f18c45df2dac8a8966c7be276c3d24d 100644 --- a/src/gravity/Default/gravity.h +++ b/src/gravity/Default/gravity.h @@ -212,6 +212,17 @@ __attribute__((always_inline)) INLINE static void gravity_end_force( #endif } +/** + * @brief Update the #gpart after a drift step. + * + * This is typically used to update the softening lengths. + * + * @param gp The particle to act upon + * @param grav_props The global properties of the gravity calculation. + */ +__attribute__((always_inline)) INLINE static void gravity_predict_extra( + struct gpart* gp, const struct gravity_props* grav_props) {} + /** * @brief Kick the additional variables * diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h index 7201db6864f77b989b2e28d1e275713a52480f62..85f2236bc5610294faae6a868fc2282d3c7a0ad4 100644 --- a/src/gravity/Default/gravity_iact.h +++ b/src/gravity/Default/gravity_iact.h @@ -61,9 +61,7 @@ runner_iact_grav_pp_full(const float r2, const float h2, const float h_inv, const float r = r2 * r_inv; const float ui = r * h_inv; - - float W_f_ij; - kernel_grav_force_eval(ui, &W_f_ij); + const float W_f_ij = kernel_grav_force_eval(ui); /* Get softened gravity */ *f_ij = mass * h_inv3 * W_f_ij; @@ -108,9 +106,7 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv, } else { const float ui = r * h_inv; - float W_f_ij; - - kernel_grav_force_eval(ui, &W_f_ij); + const float W_f_ij = kernel_grav_force_eval(ui); /* Get softened gravity */ *f_ij = mass * h_inv3 * W_f_ij; @@ -118,8 +114,8 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv, /* Get long-range correction */ const float u_lr = r * r_s_inv; - float corr_f_lr; - kernel_long_grav_force_eval(u_lr, &corr_f_lr); + float corr_f_lr, dummy; + kernel_long_grav_eval(u_lr, &corr_f_lr, &dummy); *f_ij *= corr_f_lr; /* No potential calculation */ @@ -149,7 +145,12 @@ runner_iact_grav_pm_full(const float r_x, const float r_y, const float r_z, float *restrict pot) { /* Use the M2P kernel */ - struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f}; + struct reduced_grav_tensor l; + l.F_000 = 0.f; + l.F_100 = 0.f; + l.F_010 = 0.f; + l.F_001 = 0.f; + gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/0, /*rs_inv=*/0.f, &l); /* Write back */ @@ -186,7 +187,12 @@ runner_iact_grav_pm_truncated(const float r_x, const float r_y, const float r_z, float *restrict f_z, float *restrict pot) { /* Use the M2P kernel */ - struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f}; + struct reduced_grav_tensor l; + l.F_000 = 0.f; + l.F_100 = 0.f; + l.F_010 = 0.f; + l.F_001 = 0.f; + gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/1, r_s_inv, &l); /* Write back */ diff --git a/src/gravity/MultiSoftening/gravity.h b/src/gravity/MultiSoftening/gravity.h index d7716cf712254054e3cf364a807f3cb1c12355f3..e436cc3c144571645f32879d2ce950d64d080825 100644 --- a/src/gravity/MultiSoftening/gravity.h +++ b/src/gravity/MultiSoftening/gravity.h @@ -23,7 +23,6 @@ #include <float.h> /* Local includes. */ -#include "accumulate.h" #include "cosmology.h" #include "error.h" #include "gravity_properties.h" @@ -50,23 +49,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_mass( __attribute__((always_inline)) INLINE static float gravity_get_softening( const struct gpart* gp, const struct gravity_props* restrict grav_props) { - switch (gp->type) { - case swift_type_dark_matter: - return grav_props->epsilon_DM_cur; - case swift_type_stars: - return grav_props->epsilon_baryon_cur; - case swift_type_gas: - return grav_props->epsilon_baryon_cur; - case swift_type_black_hole: - return grav_props->epsilon_baryon_cur; - case swift_type_dark_matter_background: - return grav_props->epsilon_background_fac * cbrtf(gp->mass); - default: -#ifdef SWIFT_DEBUG_CHECKS - error("Invalid gpart type!"); -#endif - return 0.f; - } + return gp->epsilon; } /** @@ -78,7 +61,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening( __attribute__((always_inline)) INLINE static void gravity_add_comoving_potential(struct gpart* restrict gp, float pot) { - accumulate_add_f(&gp->potential, pot); + gp->potential += pot; } /** @@ -206,6 +189,18 @@ __attribute__((always_inline)) INLINE static void gravity_end_force( /* Apply the periodic correction to the peculiar potential */ if (periodic) gp->potential += potential_normalisation; + /* Record the norm of the acceleration for the adaptive opening criteria. + * Will always be an (active) timestep behind. */ + gp->old_a_grav_norm = gp->a_grav[0] * gp->a_grav[0] + + gp->a_grav[1] * gp->a_grav[1] + + gp->a_grav[2] * gp->a_grav[2]; + + gp->old_a_grav_norm = sqrtf(gp->old_a_grav_norm); + +#ifdef SWIFT_DEBUG_CHECKS + if (gp->old_a_grav_norm == 0.f) error("Old acceleration is 0!"); +#endif + /* Let's get physical... */ gp->a_grav[0] *= const_G; gp->a_grav[1] *= const_G; @@ -227,6 +222,41 @@ __attribute__((always_inline)) INLINE static void gravity_end_force( #endif } +/** + * @brief Update the #gpart after a drift step. + * + * This is typically used to update the softening lengths. + * + * @param gp The particle to act upon + * @param grav_props The global properties of the gravity calculation. + */ +__attribute__((always_inline)) INLINE static void gravity_predict_extra( + struct gpart* gp, const struct gravity_props* grav_props) { + + switch (gp->type) { + case swift_type_dark_matter: + gp->epsilon = grav_props->epsilon_DM_cur; + break; + case swift_type_stars: + gp->epsilon = grav_props->epsilon_baryon_cur; + break; + case swift_type_gas: + gp->epsilon = grav_props->epsilon_baryon_cur; + break; + case swift_type_black_hole: + gp->epsilon = grav_props->epsilon_baryon_cur; + break; + case swift_type_dark_matter_background: + gp->epsilon = grav_props->epsilon_background_fac * cbrtf(gp->mass); + break; + default: +#ifdef SWIFT_DEBUG_CHECKS + error("Invalid gpart type!"); +#endif + break; + } +} + /** * @brief Kick the additional variables * @@ -258,6 +288,30 @@ __attribute__((always_inline)) INLINE static void gravity_first_init_gpart( struct gpart* gp, const struct gravity_props* grav_props) { gp->time_bin = 0; + gp->old_a_grav_norm = 0.f; + + switch (gp->type) { + case swift_type_dark_matter: + gp->epsilon = grav_props->epsilon_DM_cur; + break; + case swift_type_stars: + gp->epsilon = grav_props->epsilon_baryon_cur; + break; + case swift_type_gas: + gp->epsilon = grav_props->epsilon_baryon_cur; + break; + case swift_type_black_hole: + gp->epsilon = grav_props->epsilon_baryon_cur; + break; + case swift_type_dark_matter_background: + gp->epsilon = grav_props->epsilon_background_fac * cbrtf(gp->mass); + break; + default: +#ifdef SWIFT_DEBUG_CHECKS + error("Invalid gpart type!"); +#endif + break; + } gravity_init_gpart(gp); } diff --git a/src/gravity/MultiSoftening/gravity_iact.h b/src/gravity/MultiSoftening/gravity_iact.h index 42ac2bebea16ed46b4cac74b7029e21f310afc1d..2b51e1e7ff31dcb2874be89e4f17f435ab7e5a05 100644 --- a/src/gravity/MultiSoftening/gravity_iact.h +++ b/src/gravity/MultiSoftening/gravity_iact.h @@ -62,10 +62,8 @@ runner_iact_grav_pp_full(const float r2, const float h2, const float h_inv, const float r = r2 * r_inv; const float ui = r * h_inv; - - float W_f_ij, W_pot_ij; - kernel_grav_force_eval(ui, &W_f_ij); - kernel_grav_pot_eval(ui, &W_pot_ij); + const float W_f_ij = kernel_grav_force_eval(ui); + const float W_pot_ij = kernel_grav_pot_eval(ui); /* Get softened gravity */ *f_ij = mass * h_inv3 * W_f_ij; @@ -109,10 +107,8 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv, } else { const float ui = r * h_inv; - float W_f_ij, W_pot_ij; - - kernel_grav_force_eval(ui, &W_f_ij); - kernel_grav_pot_eval(ui, &W_pot_ij); + const float W_f_ij = kernel_grav_force_eval(ui); + const float W_pot_ij = kernel_grav_pot_eval(ui); /* Get softened gravity */ *f_ij = mass * h_inv3 * W_f_ij; @@ -122,8 +118,7 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv, /* Get long-range correction */ const float u_lr = r * r_s_inv; float corr_f_lr, corr_pot_lr; - kernel_long_grav_force_eval(u_lr, &corr_f_lr); - kernel_long_grav_pot_eval(u_lr, &corr_pot_lr); + kernel_long_grav_eval(u_lr, &corr_f_lr, &corr_pot_lr); *f_ij *= corr_f_lr; *pot_ij *= corr_pot_lr; } @@ -151,7 +146,12 @@ runner_iact_grav_pm_full(const float r_x, const float r_y, const float r_z, float *restrict pot) { /* Use the M2P kernel */ - struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f}; + struct reduced_grav_tensor l; + l.F_000 = 0.f; + l.F_100 = 0.f; + l.F_010 = 0.f; + l.F_001 = 0.f; + gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/0, /*rs_inv=*/0.f, &l); /* Write back */ @@ -186,7 +186,12 @@ runner_iact_grav_pm_truncated(const float r_x, const float r_y, const float r_z, float *restrict f_z, float *restrict pot) { /* Use the M2P kernel */ - struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f}; + struct reduced_grav_tensor l; + l.F_000 = 0.f; + l.F_100 = 0.f; + l.F_010 = 0.f; + l.F_001 = 0.f; + gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/1, r_s_inv, &l); /* Write back */ diff --git a/src/gravity/MultiSoftening/gravity_part.h b/src/gravity/MultiSoftening/gravity_part.h index 2d6d0d9bfbb18040faa282b63a0aa573b751a182..49594f342606734951a099e621d9fab64ed64bbd 100644 --- a/src/gravity/MultiSoftening/gravity_part.h +++ b/src/gravity/MultiSoftening/gravity_part.h @@ -43,6 +43,12 @@ struct gpart { /*! Particle mass. */ float mass; + /*! Norm of the acceleration at the previous step. */ + float old_a_grav_norm; + + /*! Current co-moving spline softening of the particle */ + float epsilon; + /*! Particle FoF properties (group ID, group size, ...) */ struct fof_gpart_data fof_data; diff --git a/src/gravity/Potential/gravity.h b/src/gravity/Potential/gravity.h index f9a9502a528c161fbc82b3028f303b7f9cad49f8..4ce1e73ef3cef94065aca3ece3fa344cc5231d70 100644 --- a/src/gravity/Potential/gravity.h +++ b/src/gravity/Potential/gravity.h @@ -23,7 +23,6 @@ #include <float.h> /* Local includes. */ -#include "accumulate.h" #include "cosmology.h" #include "gravity_properties.h" #include "kernel_gravity.h" @@ -64,7 +63,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening( __attribute__((always_inline)) INLINE static void gravity_add_comoving_potential(struct gpart* restrict gp, float pot) { - accumulate_add_f(&gp->potential, pot); + gp->potential += pot; } /** @@ -195,6 +194,17 @@ __attribute__((always_inline)) INLINE static void gravity_end_force( #endif } +/** + * @brief Update the #gpart after a drift step. + * + * This is typically used to update the softening lengths. + * + * @param gp The particle to act upon + * @param grav_props The global properties of the gravity calculation. + */ +__attribute__((always_inline)) INLINE static void gravity_predict_extra( + struct gpart* gp, const struct gravity_props* grav_props) {} + /** * @brief Kick the additional variables * diff --git a/src/gravity/Potential/gravity_iact.h b/src/gravity/Potential/gravity_iact.h index 66e971eae861d5205fb1289a6fee8a09bd941031..feb0abecdb3ecd43a4b9f2d9152ddb0028939e8a 100644 --- a/src/gravity/Potential/gravity_iact.h +++ b/src/gravity/Potential/gravity_iact.h @@ -62,10 +62,8 @@ runner_iact_grav_pp_full(const float r2, const float h2, const float h_inv, const float r = r2 * r_inv; const float ui = r * h_inv; - - float W_f_ij, W_pot_ij; - kernel_grav_force_eval(ui, &W_f_ij); - kernel_grav_pot_eval(ui, &W_pot_ij); + const float W_f_ij = kernel_grav_force_eval(ui); + const float W_pot_ij = kernel_grav_pot_eval(ui); /* Get softened gravity */ *f_ij = mass * h_inv3 * W_f_ij; @@ -109,10 +107,8 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv, } else { const float ui = r * h_inv; - float W_f_ij, W_pot_ij; - - kernel_grav_force_eval(ui, &W_f_ij); - kernel_grav_pot_eval(ui, &W_pot_ij); + const float W_f_ij = kernel_grav_force_eval(ui); + const float W_pot_ij = kernel_grav_pot_eval(ui); /* Get softened gravity */ *f_ij = mass * h_inv3 * W_f_ij; @@ -122,8 +118,7 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv, /* Get long-range correction */ const float u_lr = r * r_s_inv; float corr_f_lr, corr_pot_lr; - kernel_long_grav_force_eval(u_lr, &corr_f_lr); - kernel_long_grav_pot_eval(u_lr, &corr_pot_lr); + kernel_long_grav_eval(u_lr, &corr_f_lr, &corr_pot_lr); *f_ij *= corr_f_lr; *pot_ij *= corr_pot_lr; } @@ -151,7 +146,12 @@ runner_iact_grav_pm_full(const float r_x, const float r_y, const float r_z, float *restrict pot) { /* Use the M2P kernel */ - struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f}; + struct reduced_grav_tensor l; + l.F_000 = 0.f; + l.F_100 = 0.f; + l.F_010 = 0.f; + l.F_001 = 0.f; + gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/0, /*rs_inv=*/0.f, &l); /* Write back */ @@ -186,7 +186,12 @@ runner_iact_grav_pm_truncated(const float r_x, const float r_y, const float r_z, float *restrict f_z, float *restrict pot) { /* Use the M2P kernel */ - struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f}; + struct reduced_grav_tensor l; + l.F_000 = 0.f; + l.F_100 = 0.f; + l.F_010 = 0.f; + l.F_001 = 0.f; + gravity_M2P(m, r_x, r_y, r_z, r2, h, /*periodic=*/1, r_s_inv, &l); /* Write back */ diff --git a/src/gravity_cache.h b/src/gravity_cache.h index e96f1ada2109eb4fffea531de16ab4258faaa1de..75fb640457f21f26e2bd81b216692fb6fd52790e 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -27,7 +27,7 @@ #include "align.h" #include "error.h" #include "gravity.h" -#include "multipole.h" +#include "multipole_accept.h" #include "vector.h" /** @@ -154,8 +154,8 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, * @param c The #gravity_cache to zero. * @param gcount_padded The padded size of the cache arrays. */ -__attribute__((always_inline)) INLINE static void gravity_cache_zero_output( - struct gravity_cache *c, const int gcount_padded) { +INLINE static void gravity_cache_zero_output(struct gravity_cache *c, + const int gcount_padded) { #ifdef SWIFT_DEBUG_CHECKS if (gcount_padded % VEC_SIZE != 0) @@ -193,20 +193,18 @@ __attribute__((always_inline)) INLINE static void gravity_cache_zero_output( * multiple of the vector length. * @param shift A shift to apply to all the particles. * @param CoM The position of the multipole. - * @param r_max2 The square of the multipole radius. + * @param multipole The mulipole to check for. * @param cell The cell we play with (to get reasonable padding positions). * @param grav_props The global gravity properties. */ -__attribute__((always_inline)) INLINE static void gravity_cache_populate( +INLINE static void gravity_cache_populate( const timebin_t max_active_bin, const int allow_mpole, const int periodic, const float dim[3], struct gravity_cache *c, const struct gpart *restrict gparts, const int gcount, const int gcount_padded, const double shift[3], const float CoM[3], - const float r_max2, const struct cell *cell, + const struct gravity_tensors *multipole, const struct cell *cell, const struct gravity_props *grav_props) { - const float theta_crit2 = grav_props->theta_crit2; - #ifdef SWIFT_DEBUG_CHECKS if (gcount_padded < gcount) error("Invalid padded cache size. Too small."); if (gcount_padded % VEC_SIZE != 0) @@ -227,6 +225,9 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate( swift_assume_size(gcount_padded, VEC_SIZE); /* Fill the input caches */ +#ifndef SWIFT_DEBUG_CHECKS +#pragma omp simd +#endif for (int i = 0; i < gcount; ++i) { x[i] = (float)(gparts[i].x[0] - shift[0]); @@ -257,8 +258,8 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate( const float r2 = dx * dx + dy * dy + dz * dz; /* Check whether we can use the multipole instead of P-P */ - use_mpole[i] = - allow_mpole && gravity_M2P_accept(r_max2, theta_crit2, r2, epsilon[i]); + use_mpole[i] = allow_mpole && gravity_M2P_accept(grav_props, &gparts[i], + multipole, r2, periodic); } #ifdef SWIFT_DEBUG_CHECKS @@ -300,13 +301,11 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate( * @param cell The cell we play with (to get reasonable padding positions). * @param grav_props The global gravity properties. */ -__attribute__((always_inline)) INLINE static void -gravity_cache_populate_no_mpole(const timebin_t max_active_bin, - struct gravity_cache *c, - const struct gpart *restrict gparts, - const int gcount, const int gcount_padded, - const double shift[3], const struct cell *cell, - const struct gravity_props *grav_props) { +INLINE static void gravity_cache_populate_no_mpole( + const timebin_t max_active_bin, struct gravity_cache *c, + const struct gpart *restrict gparts, const int gcount, + const int gcount_padded, const double shift[3], const struct cell *cell, + const struct gravity_props *grav_props) { #ifdef SWIFT_DEBUG_CHECKS if (gcount_padded < gcount) error("Invalid padded cache size. Too small."); @@ -381,18 +380,15 @@ gravity_cache_populate_no_mpole(const timebin_t max_active_bin, * multiple of the vector length. * @param cell The cell we play with (to get reasonable padding positions). * @param CoM The position of the multipole. - * @param r_max2 The square of the multipole radius. + * @param multipole The mulipole to check for. * @param grav_props The global gravity properties. */ -__attribute__((always_inline)) INLINE static void -gravity_cache_populate_all_mpole(const timebin_t max_active_bin, - const int periodic, const float dim[3], - struct gravity_cache *c, - const struct gpart *restrict gparts, - const int gcount, const int gcount_padded, - const struct cell *cell, const float CoM[3], - const float r_max2, - const struct gravity_props *grav_props) { +INLINE static void gravity_cache_populate_all_mpole( + const timebin_t max_active_bin, const int periodic, const float dim[3], + struct gravity_cache *c, const struct gpart *restrict gparts, + const int gcount, const int gcount_padded, const struct cell *cell, + const float CoM[3], const struct gravity_tensors *multipole, + const struct gravity_props *grav_props) { #ifdef SWIFT_DEBUG_CHECKS if (gcount_padded < gcount) error("Invalid padded cache size. Too small."); @@ -400,8 +396,6 @@ gravity_cache_populate_all_mpole(const timebin_t max_active_bin, error("Padded gravity cache size invalid. Not a multiple of SIMD length."); if (c->count < gcount_padded) error("Size of the gravity cache is not large enough."); - - const float theta_crit2 = grav_props->theta_crit2; #endif /* Make the compiler understand we are in happy vectorization land */ @@ -439,7 +433,7 @@ gravity_cache_populate_all_mpole(const timebin_t max_active_bin, } const float r2 = dx * dx + dy * dy + dz * dz; - if (!gravity_M2P_accept(r_max2, theta_crit2, r2, epsilon[i])) + if (!gravity_M2P_accept(grav_props, &gparts[i], multipole, r2, periodic)) error("Using m-pole where the test fails"); #endif } @@ -479,9 +473,9 @@ gravity_cache_populate_all_mpole(const timebin_t max_active_bin, * @param gparts The #gpart array to write to. * @param gcount The number of particles to write. */ -__attribute__((always_inline)) INLINE static void gravity_cache_write_back( - const struct gravity_cache *c, struct gpart *restrict gparts, - const int gcount) { +INLINE static void gravity_cache_write_back(const struct gravity_cache *c, + struct gpart *restrict gparts, + const int gcount) { /* Make the compiler understand we are in happy vectorization land */ swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT); @@ -491,11 +485,14 @@ __attribute__((always_inline)) INLINE static void gravity_cache_write_back( swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT); /* Write stuff back to the particles */ +#ifndef SWIFT_DEBUG_CHECKS +#pragma omp simd +#endif for (int i = 0; i < gcount; ++i) { if (active[i]) { - accumulate_add_f(&gparts[i].a_grav[0], a_x[i]); - accumulate_add_f(&gparts[i].a_grav[1], a_y[i]); - accumulate_add_f(&gparts[i].a_grav[2], a_z[i]); + gparts[i].a_grav[0] += a_x[i]; + gparts[i].a_grav[1] += a_y[i]; + gparts[i].a_grav[2] += a_z[i]; gravity_add_comoving_potential(&gparts[i], pot[i]); } } diff --git a/src/gravity_derivatives.h b/src/gravity_derivatives.h index 86b4a6c3de4db3c63c64d49a708a4d8ac19a5d93..b14ecc4e738b8c3331f7ad0039e23cb2623a2d2e 100644 --- a/src/gravity_derivatives.h +++ b/src/gravity_derivatives.h @@ -220,53 +220,79 @@ potential_derivatives_compute_M2L(const float r_x, const float r_y, const int periodic, const float r_s_inv, struct potential_derivatives_M2L *pot) { -#ifdef SWIFT_DEBUG_CHECKS - if (r2 < 0.99f * eps * eps) - error("Computing M2L derivatives below softening length"); + float Dt_1; +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + float Dt_2; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + float Dt_3; #endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + float Dt_4; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + float Dt_5; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + float Dt_6; +#endif + + /* Softened case */ + if (r2 < eps * eps) { - float Dt_1 = 0.f; + const float eps_inv = 1.f / eps; + const float r = r2 * r_inv; + const float u = r * eps_inv; + + Dt_1 = eps_inv * D_soft_1(u); #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - float Dt_3 = 0.f; + const float eps_inv2 = eps_inv * eps_inv; + Dt_2 = eps_inv2 * D_soft_2(u); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - float Dt_5 = 0.f; + const float eps_inv3 = eps_inv2 * eps_inv; + Dt_3 = eps_inv3 * D_soft_3(u); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - float Dt_7 = 0.f; + const float eps_inv4 = eps_inv3 * eps_inv; + Dt_4 = eps_inv4 * D_soft_4(u); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 - float Dt_9 = 0.f; + const float eps_inv5 = eps_inv4 * eps_inv; + Dt_5 = eps_inv5 * D_soft_5(u); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 - float Dt_11 = 0.f; + const float eps_inv6 = eps_inv5 * eps_inv; + Dt_6 = eps_inv6 * D_soft_6(u); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 5 +#error "Missing implementation for order >5" #endif - /* Un-truncated case (Newtonian potential) */ - if (!periodic) { + /* Un-truncated un-softened case (Newtonian potential) */ + } else if (!periodic) { - Dt_1 = r_inv; -#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - const float r_inv2 = r_inv * r_inv; - Dt_3 = -1.f * Dt_1 * r_inv2; /* -1 / r^3 */ + Dt_1 = r_inv; /* 1 / r */ +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + Dt_2 = -1.f * Dt_1 * r_inv; /* -1 / r^2 */ #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - Dt_5 = -3.f * Dt_3 * r_inv2; /* 3 / r^5 */ + Dt_3 = -3.f * Dt_2 * r_inv; /* 3 / r^3 */ #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - Dt_7 = -5.f * Dt_5 * r_inv2; /* -15 / r^7 */ + Dt_4 = -5.f * Dt_3 * r_inv; /* -15 / r^4 */ #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 - Dt_9 = -7.f * Dt_7 * r_inv2; /* 105 / r^9 */ + Dt_5 = -7.f * Dt_4 * r_inv; /* 105 / r^5 */ #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 - Dt_11 = -9.f * Dt_9 * r_inv2; /* -945 / r^11 */ + Dt_6 = -9.f * Dt_5 * r_inv; /* -945 / r^6 */ #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 5 #error "Missing implementation for order >5" #endif - /* Truncated case */ + /* Truncated case (long-range) */ } else { /* Get the derivatives of the truncated potential */ @@ -275,37 +301,57 @@ potential_derivatives_compute_M2L(const float r_x, const float r_y, kernel_long_grav_derivatives(r, r_s_inv, &derivs); Dt_1 = derivs.chi_0 * r_inv; + #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - const float r_inv2 = r_inv * r_inv; - const float r_inv3 = r_inv2 * r_inv; - Dt_3 = (r * derivs.chi_1 - derivs.chi_0) * r_inv3; + + /* -chi^0 r_i^2 + chi^1 r_i^1 */ + Dt_2 = derivs.chi_1 - derivs.chi_0 * r_inv; + Dt_2 = Dt_2 * r_inv; + #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - const float r_inv5 = r_inv2 * r_inv3; - Dt_5 = - (r * r * derivs.chi_2 - 3.f * r * derivs.chi_1 + 3.f * derivs.chi_0) * - r_inv5; + + /* 3chi^0 r_i^3 - 3 chi^1 r_i^2 + chi^2 r_i^1 */ + Dt_3 = derivs.chi_0 * r_inv - derivs.chi_1; + Dt_3 = Dt_3 * 3.f; + Dt_3 = Dt_3 * r_inv + derivs.chi_2; + Dt_3 = Dt_3 * r_inv; + #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - const float r_inv7 = r_inv2 * r_inv5; - Dt_7 = (r * r * r * derivs.chi_3 - 6.f * r * r * derivs.chi_2 + - 15.f * r * derivs.chi_1 - 15.f * derivs.chi_0) * - r_inv7; + + /* -15chi^0 r_i^4 + 15 chi^1 r_i^3 - 6 chi^2 r_i^2 + chi^3 r_i^1 */ + Dt_4 = -derivs.chi_0 * r_inv + derivs.chi_1; + Dt_4 = Dt_4 * 15.f; + Dt_4 = Dt_4 * r_inv - 6.f * derivs.chi_2; + Dt_4 = Dt_4 * r_inv + derivs.chi_3; + Dt_4 = Dt_4 * r_inv; + #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 - const float r_inv9 = r_inv2 * r_inv7; - Dt_9 = (r * r * r * r * derivs.chi_4 - 10.f * r * r * r * derivs.chi_3 + - 45.f * r * r * derivs.chi_2 - 105.f * r * derivs.chi_1 + - 105.f * derivs.chi_0) * - r_inv9; + + /* 105chi^0 r_i^5 - 105 chi^1 r_i^4 + 45 chi^2 r_i^3 - 10 chi^3 r_i^2 + + * chi^4 r_i^1 */ + Dt_5 = derivs.chi_0 * r_inv - derivs.chi_1; + Dt_5 = Dt_5 * 105.f; + Dt_5 = Dt_5 * r_inv + 45.f * derivs.chi_2; + Dt_5 = Dt_5 * r_inv - 10.f * derivs.chi_3; + Dt_5 = Dt_5 * r_inv + derivs.chi_4; + Dt_5 = Dt_5 * r_inv; + #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 - const float r_inv11 = r_inv2 * r_inv9; - Dt_11 = (r * r * r * r * r * derivs.chi_5 - - 15.f * r * r * r * r * derivs.chi_4 + - 105.f * r * r * r * derivs.chi_3 - 420.f * r * r * derivs.chi_2 + - 945.f * r * derivs.chi_1 - 945.f * derivs.chi_0) * - r_inv11; + + /* -945chi^0 r_i^6 + 945 chi^1 r_i^5 - 420 chi^2 r_i^4 + 105 chi^3 r_i^3 - + * 15 chi^4 r_i^2 + chi^5 r_i^1 */ + Dt_6 = -derivs.chi_0 * r_inv + derivs.chi_1; + Dt_6 = Dt_6 * 945.f; + Dt_6 = Dt_6 * r_inv - 420.f * derivs.chi_2; + Dt_6 = Dt_6 * r_inv + 105.f * derivs.chi_3; + Dt_6 = Dt_6 * r_inv - 15.f * derivs.chi_4; + Dt_6 = Dt_6 * r_inv + derivs.chi_5; + Dt_6 = Dt_6 * r_inv; + #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 5 #error "Missing implementation for order >5" @@ -314,112 +360,135 @@ potential_derivatives_compute_M2L(const float r_x, const float r_y, /* Alright, let's get the full terms */ -/* Compute some powers of r_x, r_y and r_z */ + /* Compute some powers of (r_x / r), (r_y / r) and (r_z / r) */ +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + const float rx_r = r_x * r_inv; + const float ry_r = r_y * r_inv; + const float rz_r = r_z * r_inv; +#endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - const float r_x2 = r_x * r_x; - const float r_y2 = r_y * r_y; - const float r_z2 = r_z * r_z; + const float rx_r2 = rx_r * rx_r; + const float ry_r2 = ry_r * ry_r; + const float rz_r2 = rz_r * rz_r; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - const float r_x3 = r_x2 * r_x; - const float r_y3 = r_y2 * r_y; - const float r_z3 = r_z2 * r_z; + const float rx_r3 = rx_r2 * rx_r; + const float ry_r3 = ry_r2 * ry_r; + const float rz_r3 = rz_r2 * rz_r; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 - const float r_x4 = r_x3 * r_x; - const float r_y4 = r_y3 * r_y; - const float r_z4 = r_z3 * r_z; + const float rx_r4 = rx_r3 * rx_r; + const float ry_r4 = ry_r3 * ry_r; + const float rz_r4 = rz_r3 * rz_r; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 - const float r_x5 = r_x4 * r_x; - const float r_y5 = r_y4 * r_y; - const float r_z5 = r_z4 * r_z; -#endif -#if SELF_GRAVITY_MULTIPOLE_ORDER > 5 -#error "Missing implementation for order >5" + const float rx_r5 = rx_r4 * rx_r; + const float ry_r5 = ry_r4 * ry_r; + const float rz_r5 = rz_r4 * rz_r; #endif /* Get the 0th order term */ pot->D_000 = Dt_1; -#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 /* 1st order derivatives */ - pot->D_100 = r_x * Dt_3; - pot->D_010 = r_y * Dt_3; - pot->D_001 = r_z * Dt_3; + pot->D_100 = rx_r * Dt_2; + pot->D_010 = ry_r * Dt_2; + pot->D_001 = rz_r * Dt_2; #endif + #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + + Dt_2 *= r_inv; + /* 2nd order derivatives */ - pot->D_200 = r_x2 * Dt_5 + Dt_3; - pot->D_020 = r_y2 * Dt_5 + Dt_3; - pot->D_002 = r_z2 * Dt_5 + Dt_3; - pot->D_110 = r_x * r_y * Dt_5; - pot->D_101 = r_x * r_z * Dt_5; - pot->D_011 = r_y * r_z * Dt_5; + pot->D_200 = rx_r2 * Dt_3 + Dt_2; + pot->D_020 = ry_r2 * Dt_3 + Dt_2; + pot->D_002 = rz_r2 * Dt_3 + Dt_2; + pot->D_110 = rx_r * ry_r * Dt_3; + pot->D_101 = rx_r * rz_r * Dt_3; + pot->D_011 = ry_r * rz_r * Dt_3; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + + Dt_3 *= r_inv; + /* 3rd order derivatives */ - pot->D_300 = r_x3 * Dt_7 + 3.f * r_x * Dt_5; - pot->D_030 = r_y3 * Dt_7 + 3.f * r_y * Dt_5; - pot->D_003 = r_z3 * Dt_7 + 3.f * r_z * Dt_5; - pot->D_210 = r_x2 * r_y * Dt_7 + r_y * Dt_5; - pot->D_201 = r_x2 * r_z * Dt_7 + r_z * Dt_5; - pot->D_120 = r_y2 * r_x * Dt_7 + r_x * Dt_5; - pot->D_021 = r_y2 * r_z * Dt_7 + r_z * Dt_5; - pot->D_102 = r_z2 * r_x * Dt_7 + r_x * Dt_5; - pot->D_012 = r_z2 * r_y * Dt_7 + r_y * Dt_5; - pot->D_111 = r_x * r_y * r_z * Dt_7; + pot->D_300 = rx_r3 * Dt_4 + 3.f * rx_r * Dt_3; + pot->D_030 = ry_r3 * Dt_4 + 3.f * ry_r * Dt_3; + pot->D_003 = rz_r3 * Dt_4 + 3.f * rz_r * Dt_3; + pot->D_210 = rx_r2 * ry_r * Dt_4 + ry_r * Dt_3; + pot->D_201 = rx_r2 * rz_r * Dt_4 + rz_r * Dt_3; + pot->D_120 = ry_r2 * rx_r * Dt_4 + rx_r * Dt_3; + pot->D_021 = ry_r2 * rz_r * Dt_4 + rz_r * Dt_3; + pot->D_102 = rz_r2 * rx_r * Dt_4 + rx_r * Dt_3; + pot->D_012 = rz_r2 * ry_r * Dt_4 + ry_r * Dt_3; + pot->D_111 = rx_r * ry_r * rz_r * Dt_4; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + + Dt_3 *= r_inv; + Dt_4 *= r_inv; + /* 4th order derivatives */ - pot->D_400 = r_x4 * Dt_9 + 6.f * r_x2 * Dt_7 + 3.f * Dt_5; - pot->D_040 = r_y4 * Dt_9 + 6.f * r_y2 * Dt_7 + 3.f * Dt_5; - pot->D_004 = r_z4 * Dt_9 + 6.f * r_z2 * Dt_7 + 3.f * Dt_5; - pot->D_310 = r_x3 * r_y * Dt_9 + 3.f * r_x * r_y * Dt_7; - pot->D_301 = r_x3 * r_z * Dt_9 + 3.f * r_x * r_z * Dt_7; - pot->D_130 = r_y3 * r_x * Dt_9 + 3.f * r_y * r_x * Dt_7; - pot->D_031 = r_y3 * r_z * Dt_9 + 3.f * r_y * r_z * Dt_7; - pot->D_103 = r_z3 * r_x * Dt_9 + 3.f * r_z * r_x * Dt_7; - pot->D_013 = r_z3 * r_y * Dt_9 + 3.f * r_z * r_y * Dt_7; - pot->D_220 = r_x2 * r_y2 * Dt_9 + r_x2 * Dt_7 + r_y2 * Dt_7 + Dt_5; - pot->D_202 = r_x2 * r_z2 * Dt_9 + r_x2 * Dt_7 + r_z2 * Dt_7 + Dt_5; - pot->D_022 = r_y2 * r_z2 * Dt_9 + r_y2 * Dt_7 + r_z2 * Dt_7 + Dt_5; - pot->D_211 = r_x2 * r_y * r_z * Dt_9 + r_y * r_z * Dt_7; - pot->D_121 = r_y2 * r_x * r_z * Dt_9 + r_x * r_z * Dt_7; - pot->D_112 = r_z2 * r_x * r_y * Dt_9 + r_x * r_y * Dt_7; + pot->D_400 = rx_r4 * Dt_5 + 6.f * rx_r2 * Dt_4 + 3.f * Dt_3; + pot->D_040 = ry_r4 * Dt_5 + 6.f * ry_r2 * Dt_4 + 3.f * Dt_3; + pot->D_004 = rz_r4 * Dt_5 + 6.f * rz_r2 * Dt_4 + 3.f * Dt_3; + pot->D_310 = rx_r3 * ry_r * Dt_5 + 3.f * rx_r * ry_r * Dt_4; + pot->D_301 = rx_r3 * rz_r * Dt_5 + 3.f * rx_r * rz_r * Dt_4; + pot->D_130 = ry_r3 * rx_r * Dt_5 + 3.f * ry_r * rx_r * Dt_4; + pot->D_031 = ry_r3 * rz_r * Dt_5 + 3.f * ry_r * rz_r * Dt_4; + pot->D_103 = rz_r3 * rx_r * Dt_5 + 3.f * rz_r * rx_r * Dt_4; + pot->D_013 = rz_r3 * ry_r * Dt_5 + 3.f * rz_r * ry_r * Dt_4; + pot->D_220 = rx_r2 * ry_r2 * Dt_5 + rx_r2 * Dt_4 + ry_r2 * Dt_4 + Dt_3; + pot->D_202 = rx_r2 * rz_r2 * Dt_5 + rx_r2 * Dt_4 + rz_r2 * Dt_4 + Dt_3; + pot->D_022 = ry_r2 * rz_r2 * Dt_5 + ry_r2 * Dt_4 + rz_r2 * Dt_4 + Dt_3; + pot->D_211 = rx_r2 * ry_r * rz_r * Dt_5 + ry_r * rz_r * Dt_4; + pot->D_121 = ry_r2 * rx_r * rz_r * Dt_5 + rx_r * rz_r * Dt_4; + pot->D_112 = rz_r2 * rx_r * ry_r * Dt_5 + rx_r * ry_r * Dt_4; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + + Dt_4 *= r_inv; + Dt_5 *= r_inv; + /* 5th order derivatives */ - pot->D_500 = r_x5 * Dt_11 + 10.f * r_x3 * Dt_9 + 15.f * r_x * Dt_7; - pot->D_050 = r_y5 * Dt_11 + 10.f * r_y3 * Dt_9 + 15.f * r_y * Dt_7; - pot->D_005 = r_z5 * Dt_11 + 10.f * r_z3 * Dt_9 + 15.f * r_z * Dt_7; - pot->D_410 = r_x4 * r_y * Dt_11 + 6.f * r_x2 * r_y * Dt_9 + 3.f * r_y * Dt_7; - pot->D_401 = r_x4 * r_z * Dt_11 + 6.f * r_x2 * r_z * Dt_9 + 3.f * r_z * Dt_7; - pot->D_140 = r_y4 * r_x * Dt_11 + 6.f * r_y2 * r_x * Dt_9 + 3.f * r_x * Dt_7; - pot->D_041 = r_y4 * r_z * Dt_11 + 6.f * r_y2 * r_z * Dt_9 + 3.f * r_z * Dt_7; - pot->D_104 = r_z4 * r_x * Dt_11 + 6.f * r_z2 * r_x * Dt_9 + 3.f * r_x * Dt_7; - pot->D_014 = r_z4 * r_y * Dt_11 + 6.f * r_z2 * r_y * Dt_9 + 3.f * r_y * Dt_7; - pot->D_320 = r_x3 * r_y2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_y2 * Dt_9 + - 3.f * r_x * Dt_7; - pot->D_302 = r_x3 * r_z2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_z2 * Dt_9 + - 3.f * r_x * Dt_7; - pot->D_230 = r_y3 * r_x2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_x2 * Dt_9 + - 3.f * r_y * Dt_7; - pot->D_032 = r_y3 * r_z2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_z2 * Dt_9 + - 3.f * r_y * Dt_7; - pot->D_203 = r_z3 * r_x2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_x2 * Dt_9 + - 3.f * r_z * Dt_7; - pot->D_023 = r_z3 * r_y2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_y2 * Dt_9 + - 3.f * r_z * Dt_7; - pot->D_311 = r_x3 * r_y * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9; - pot->D_131 = r_y3 * r_x * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9; - pot->D_113 = r_z3 * r_x * r_y * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9; - pot->D_122 = r_x * r_y2 * r_z2 * Dt_11 + r_x * r_y2 * Dt_9 + - r_x * r_z2 * Dt_9 + r_x * Dt_7; - pot->D_212 = r_y * r_x2 * r_z2 * Dt_11 + r_y * r_x2 * Dt_9 + - r_y * r_z2 * Dt_9 + r_y * Dt_7; - pot->D_221 = r_z * r_x2 * r_y2 * Dt_11 + r_z * r_x2 * Dt_9 + - r_z * r_y2 * Dt_9 + r_z * Dt_7; + pot->D_500 = rx_r5 * Dt_6 + 10.f * rx_r3 * Dt_5 + 15.f * rx_r * Dt_4; + pot->D_050 = ry_r5 * Dt_6 + 10.f * ry_r3 * Dt_5 + 15.f * ry_r * Dt_4; + pot->D_005 = rz_r5 * Dt_6 + 10.f * rz_r3 * Dt_5 + 15.f * rz_r * Dt_4; + pot->D_410 = + rx_r4 * ry_r * Dt_6 + 6.f * rx_r2 * ry_r * Dt_5 + 3.f * ry_r * Dt_4; + pot->D_401 = + rx_r4 * rz_r * Dt_6 + 6.f * rx_r2 * rz_r * Dt_5 + 3.f * rz_r * Dt_4; + pot->D_140 = + ry_r4 * rx_r * Dt_6 + 6.f * ry_r2 * rx_r * Dt_5 + 3.f * rx_r * Dt_4; + pot->D_041 = + ry_r4 * rz_r * Dt_6 + 6.f * ry_r2 * rz_r * Dt_5 + 3.f * rz_r * Dt_4; + pot->D_104 = + rz_r4 * rx_r * Dt_6 + 6.f * rz_r2 * rx_r * Dt_5 + 3.f * rx_r * Dt_4; + pot->D_014 = + rz_r4 * ry_r * Dt_6 + 6.f * rz_r2 * ry_r * Dt_5 + 3.f * ry_r * Dt_4; + pot->D_320 = rx_r3 * ry_r2 * Dt_6 + rx_r3 * Dt_5 + 3.f * rx_r * ry_r2 * Dt_5 + + 3.f * rx_r * Dt_4; + pot->D_302 = rx_r3 * rz_r2 * Dt_6 + rx_r3 * Dt_5 + 3.f * rx_r * rz_r2 * Dt_5 + + 3.f * rx_r * Dt_4; + pot->D_230 = ry_r3 * rx_r2 * Dt_6 + ry_r3 * Dt_5 + 3.f * ry_r * rx_r2 * Dt_5 + + 3.f * ry_r * Dt_4; + pot->D_032 = ry_r3 * rz_r2 * Dt_6 + ry_r3 * Dt_5 + 3.f * ry_r * rz_r2 * Dt_5 + + 3.f * ry_r * Dt_4; + pot->D_203 = rz_r3 * rx_r2 * Dt_6 + rz_r3 * Dt_5 + 3.f * rz_r * rx_r2 * Dt_5 + + 3.f * rz_r * Dt_4; + pot->D_023 = rz_r3 * ry_r2 * Dt_6 + rz_r3 * Dt_5 + 3.f * rz_r * ry_r2 * Dt_5 + + 3.f * rz_r * Dt_4; + pot->D_311 = rx_r3 * ry_r * rz_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5; + pot->D_131 = ry_r3 * rx_r * rz_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5; + pot->D_113 = rz_r3 * rx_r * ry_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5; + pot->D_122 = rx_r * ry_r2 * rz_r2 * Dt_6 + rx_r * ry_r2 * Dt_5 + + rx_r * rz_r2 * Dt_5 + rx_r * Dt_4; + pot->D_212 = ry_r * rx_r2 * rz_r2 * Dt_6 + ry_r * rx_r2 * Dt_5 + + ry_r * rz_r2 * Dt_5 + ry_r * Dt_4; + pot->D_221 = rz_r * rx_r2 * ry_r2 * Dt_6 + rz_r * rx_r2 * Dt_5 + + rz_r * ry_r2 * Dt_5 + rz_r * Dt_4; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 5 #error "Missing implementation for orders >5" @@ -450,46 +519,68 @@ potential_derivatives_compute_M2P(const float r_x, const float r_y, const int periodic, const float r_s_inv, struct potential_derivatives_M2P *pot) { -#ifdef SWIFT_DEBUG_CHECKS - if (r2 < 0.99f * eps * eps) - error("Computing M2P derivatives below softening length"); -#endif - float Dt_1; - float Dt_3; + float Dt_2; #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + float Dt_3; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + float Dt_4; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 float Dt_5; #endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + float Dt_6; +#endif + + /* Softened case */ + if (r2 < eps * eps) { + + const float eps_inv = 1.f / eps; + const float r = r2 * r_inv; + const float u = r * eps_inv; + + Dt_1 = eps_inv * D_soft_1(u); + + const float eps_inv2 = eps_inv * eps_inv; + Dt_2 = eps_inv2 * D_soft_2(u); +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + const float eps_inv3 = eps_inv2 * eps_inv; + Dt_3 = eps_inv3 * D_soft_3(u); +#endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - float Dt_7; + const float eps_inv4 = eps_inv3 * eps_inv; + Dt_4 = eps_inv4 * D_soft_4(u); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - float Dt_9; + const float eps_inv5 = eps_inv4 * eps_inv; + Dt_5 = eps_inv5 * D_soft_5(u); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 - float Dt_11; + const float eps_inv6 = eps_inv5 * eps_inv; + Dt_6 = eps_inv6 * D_soft_6(u); #endif - /* Un-truncated case (Newtonian potential) */ - if (!periodic) { + /* Un-truncated un-softened case (Newtonian potential) */ + } else if (!periodic) { - const float r_inv2 = r_inv * r_inv; - Dt_1 = r_inv; - Dt_3 = -1.f * Dt_1 * r_inv2; /* -1 / r^3 */ + Dt_1 = r_inv; /* 1 / r */ + Dt_2 = -1.f * Dt_1 * r_inv; /* -1 / r^2 */ #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - Dt_5 = -3.f * Dt_3 * r_inv2; /* 3 / r^5 */ + Dt_3 = -3.f * Dt_2 * r_inv; /* 3 / r^3 */ #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - Dt_7 = -5.f * Dt_5 * r_inv2; /* -15 / r^7 */ + Dt_4 = -5.f * Dt_3 * r_inv; /* -15 / r^4 */ #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - Dt_9 = -7.f * Dt_7 * r_inv2; /* 105 / r^9 */ + Dt_5 = -7.f * Dt_4 * r_inv; /* 105 / r^5 */ #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 - Dt_11 = -9.f * Dt_9 * r_inv2; /* -945 / r^11 */ + Dt_6 = -9.f * Dt_5 * r_inv; /* -945 / r^6 */ #endif - /* Truncated case */ + /* Truncated case (long-range) */ } else { /* Get the derivatives of the truncated potential */ @@ -497,143 +588,186 @@ potential_derivatives_compute_M2P(const float r_x, const float r_y, struct chi_derivatives derivs; kernel_long_grav_derivatives(r, r_s_inv, &derivs); - const float r_inv2 = r_inv * r_inv; - const float r_inv3 = r_inv2 * r_inv; Dt_1 = derivs.chi_0 * r_inv; - Dt_3 = (r * derivs.chi_1 - derivs.chi_0) * r_inv3; + + /* -chi^0 r_i^2 + chi^1 r_i^1 */ + Dt_2 = derivs.chi_1 - derivs.chi_0 * r_inv; + Dt_2 = Dt_2 * r_inv; + #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - const float r_inv5 = r_inv2 * r_inv3; - Dt_5 = - (r * r * derivs.chi_2 - 3.f * r * derivs.chi_1 + 3.f * derivs.chi_0) * - r_inv5; + + /* 3chi^0 r_i^3 - 3 chi^1 r_i^2 + chi^2 r_i^1 */ + Dt_3 = derivs.chi_0 * r_inv - derivs.chi_1; + Dt_3 = Dt_3 * 3.f; + Dt_3 = Dt_3 * r_inv + derivs.chi_2; + Dt_3 = Dt_3 * r_inv; + #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - const float r_inv7 = r_inv2 * r_inv5; - Dt_7 = (r * r * r * derivs.chi_3 - 6.f * r * r * derivs.chi_2 + - 15.f * r * derivs.chi_1 - 15.f * derivs.chi_0) * - r_inv7; + + /* -15chi^0 r_i^4 + 15 chi^1 r_i^3 - 6 chi^2 r_i^2 + chi^3 r_i^1 */ + Dt_4 = -derivs.chi_0 * r_inv + derivs.chi_1; + Dt_4 = Dt_4 * 15.f; + Dt_4 = Dt_4 * r_inv - 6.f * derivs.chi_2; + Dt_4 = Dt_4 * r_inv + derivs.chi_3; + Dt_4 = Dt_4 * r_inv; + #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - const float r_inv9 = r_inv2 * r_inv7; - Dt_9 = (r * r * r * r * derivs.chi_4 - 10.f * r * r * r * derivs.chi_3 + - 45.f * r * r * derivs.chi_2 - 105.f * r * derivs.chi_1 + - 105.f * derivs.chi_0) * - r_inv9; + + /* 105chi^0 r_i^5 - 105 chi^1 r_i^4 + 45 chi^2 r_i^3 - 10 chi^3 r_i^2 + + * chi^4 r_i^1 */ + Dt_5 = derivs.chi_0 * r_inv - derivs.chi_1; + Dt_5 = Dt_5 * 105.f; + Dt_5 = Dt_5 * r_inv + 45.f * derivs.chi_2; + Dt_5 = Dt_5 * r_inv - 10.f * derivs.chi_3; + Dt_5 = Dt_5 * r_inv + derivs.chi_4; + Dt_5 = Dt_5 * r_inv; + #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 - const float r_inv11 = r_inv2 * r_inv9; - Dt_11 = (r * r * r * r * r * derivs.chi_5 - - 15.f * r * r * r * r * derivs.chi_4 + - 105.f * r * r * r * derivs.chi_3 - 420.f * r * r * derivs.chi_2 + - 945.f * r * derivs.chi_1 - 945.f * derivs.chi_0) * - r_inv11; + + /* -945chi^0 r_i^6 + 945 chi^1 r_i^5 - 420 chi^2 r_i^4 + 105 chi^3 r_i^3 - + * 15 chi^4 r_i^2 + chi^5 r_i^1 */ + Dt_6 = -derivs.chi_0 * r_inv + derivs.chi_1; + Dt_6 = Dt_6 * 945.f; + Dt_6 = Dt_6 * r_inv - 420.f * derivs.chi_2; + Dt_6 = Dt_6 * r_inv + 105.f * derivs.chi_3; + Dt_6 = Dt_6 * r_inv - 15.f * derivs.chi_4; + Dt_6 = Dt_6 * r_inv + derivs.chi_5; + Dt_6 = Dt_6 * r_inv; + #endif } -/* Alright, let's get the full terms */ + /* Alright, let's get the full terms */ + + /* Compute some powers of (r_x / r), (r_y / r) and (r_z / r) */ + const float rx_r = r_x * r_inv; + const float ry_r = r_y * r_inv; + const float rz_r = r_z * r_inv; -/* Compute some powers of r_x, r_y and r_z */ #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - const float r_x2 = r_x * r_x; - const float r_y2 = r_y * r_y; - const float r_z2 = r_z * r_z; + const float rx_r2 = rx_r * rx_r; + const float ry_r2 = ry_r * ry_r; + const float rz_r2 = rz_r * rz_r; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - const float r_x3 = r_x2 * r_x; - const float r_y3 = r_y2 * r_y; - const float r_z3 = r_z2 * r_z; + const float rx_r3 = rx_r2 * rx_r; + const float ry_r3 = ry_r2 * ry_r; + const float rz_r3 = rz_r2 * rz_r; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - const float r_x4 = r_x3 * r_x; - const float r_y4 = r_y3 * r_y; - const float r_z4 = r_z3 * r_z; + const float rx_r4 = rx_r3 * rx_r; + const float ry_r4 = ry_r3 * ry_r; + const float rz_r4 = rz_r3 * rz_r; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 - const float r_x5 = r_x4 * r_x; - const float r_y5 = r_y4 * r_y; - const float r_z5 = r_z4 * r_z; + const float rx_r5 = rx_r4 * rx_r; + const float ry_r5 = ry_r4 * ry_r; + const float rz_r5 = rz_r4 * rz_r; #endif /* Get the 0th order term */ pot->D_000 = Dt_1; /* 1st order derivatives */ - pot->D_100 = r_x * Dt_3; - pot->D_010 = r_y * Dt_3; - pot->D_001 = r_z * Dt_3; + pot->D_100 = rx_r * Dt_2; + pot->D_010 = ry_r * Dt_2; + pot->D_001 = rz_r * Dt_2; #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + + Dt_2 *= r_inv; + /* 2nd order derivatives */ - pot->D_200 = r_x2 * Dt_5 + Dt_3; - pot->D_020 = r_y2 * Dt_5 + Dt_3; - pot->D_002 = r_z2 * Dt_5 + Dt_3; - pot->D_110 = r_x * r_y * Dt_5; - pot->D_101 = r_x * r_z * Dt_5; - pot->D_011 = r_y * r_z * Dt_5; + pot->D_200 = rx_r2 * Dt_3 + Dt_2; + pot->D_020 = ry_r2 * Dt_3 + Dt_2; + pot->D_002 = rz_r2 * Dt_3 + Dt_2; + pot->D_110 = rx_r * ry_r * Dt_3; + pot->D_101 = rx_r * rz_r * Dt_3; + pot->D_011 = ry_r * rz_r * Dt_3; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + + Dt_3 *= r_inv; + /* 3rd order derivatives */ - pot->D_300 = r_x3 * Dt_7 + 3.f * r_x * Dt_5; - pot->D_030 = r_y3 * Dt_7 + 3.f * r_y * Dt_5; - pot->D_003 = r_z3 * Dt_7 + 3.f * r_z * Dt_5; - pot->D_210 = r_x2 * r_y * Dt_7 + r_y * Dt_5; - pot->D_201 = r_x2 * r_z * Dt_7 + r_z * Dt_5; - pot->D_120 = r_y2 * r_x * Dt_7 + r_x * Dt_5; - pot->D_021 = r_y2 * r_z * Dt_7 + r_z * Dt_5; - pot->D_102 = r_z2 * r_x * Dt_7 + r_x * Dt_5; - pot->D_012 = r_z2 * r_y * Dt_7 + r_y * Dt_5; - pot->D_111 = r_x * r_y * r_z * Dt_7; + pot->D_300 = rx_r3 * Dt_4 + 3.f * rx_r * Dt_3; + pot->D_030 = ry_r3 * Dt_4 + 3.f * ry_r * Dt_3; + pot->D_003 = rz_r3 * Dt_4 + 3.f * rz_r * Dt_3; + pot->D_210 = rx_r2 * ry_r * Dt_4 + ry_r * Dt_3; + pot->D_201 = rx_r2 * rz_r * Dt_4 + rz_r * Dt_3; + pot->D_120 = ry_r2 * rx_r * Dt_4 + rx_r * Dt_3; + pot->D_021 = ry_r2 * rz_r * Dt_4 + rz_r * Dt_3; + pot->D_102 = rz_r2 * rx_r * Dt_4 + rx_r * Dt_3; + pot->D_012 = rz_r2 * ry_r * Dt_4 + ry_r * Dt_3; + pot->D_111 = rx_r * ry_r * rz_r * Dt_4; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + + Dt_3 *= r_inv; + Dt_4 *= r_inv; + /* 4th order derivatives */ - pot->D_400 = r_x4 * Dt_9 + 6.f * r_x2 * Dt_7 + 3.f * Dt_5; - pot->D_040 = r_y4 * Dt_9 + 6.f * r_y2 * Dt_7 + 3.f * Dt_5; - pot->D_004 = r_z4 * Dt_9 + 6.f * r_z2 * Dt_7 + 3.f * Dt_5; - pot->D_310 = r_x3 * r_y * Dt_9 + 3.f * r_x * r_y * Dt_7; - pot->D_301 = r_x3 * r_z * Dt_9 + 3.f * r_x * r_z * Dt_7; - pot->D_130 = r_y3 * r_x * Dt_9 + 3.f * r_y * r_x * Dt_7; - pot->D_031 = r_y3 * r_z * Dt_9 + 3.f * r_y * r_z * Dt_7; - pot->D_103 = r_z3 * r_x * Dt_9 + 3.f * r_z * r_x * Dt_7; - pot->D_013 = r_z3 * r_y * Dt_9 + 3.f * r_z * r_y * Dt_7; - pot->D_220 = r_x2 * r_y2 * Dt_9 + r_x2 * Dt_7 + r_y2 * Dt_7 + Dt_5; - pot->D_202 = r_x2 * r_z2 * Dt_9 + r_x2 * Dt_7 + r_z2 * Dt_7 + Dt_5; - pot->D_022 = r_y2 * r_z2 * Dt_9 + r_y2 * Dt_7 + r_z2 * Dt_7 + Dt_5; - pot->D_211 = r_x2 * r_y * r_z * Dt_9 + r_y * r_z * Dt_7; - pot->D_121 = r_y2 * r_x * r_z * Dt_9 + r_x * r_z * Dt_7; - pot->D_112 = r_z2 * r_x * r_y * Dt_9 + r_x * r_y * Dt_7; + pot->D_400 = rx_r4 * Dt_5 + 6.f * rx_r2 * Dt_4 + 3.f * Dt_3; + pot->D_040 = ry_r4 * Dt_5 + 6.f * ry_r2 * Dt_4 + 3.f * Dt_3; + pot->D_004 = rz_r4 * Dt_5 + 6.f * rz_r2 * Dt_4 + 3.f * Dt_3; + pot->D_310 = rx_r3 * ry_r * Dt_5 + 3.f * rx_r * ry_r * Dt_4; + pot->D_301 = rx_r3 * rz_r * Dt_5 + 3.f * rx_r * rz_r * Dt_4; + pot->D_130 = ry_r3 * rx_r * Dt_5 + 3.f * ry_r * rx_r * Dt_4; + pot->D_031 = ry_r3 * rz_r * Dt_5 + 3.f * ry_r * rz_r * Dt_4; + pot->D_103 = rz_r3 * rx_r * Dt_5 + 3.f * rz_r * rx_r * Dt_4; + pot->D_013 = rz_r3 * ry_r * Dt_5 + 3.f * rz_r * ry_r * Dt_4; + pot->D_220 = rx_r2 * ry_r2 * Dt_5 + rx_r2 * Dt_4 + ry_r2 * Dt_4 + Dt_3; + pot->D_202 = rx_r2 * rz_r2 * Dt_5 + rx_r2 * Dt_4 + rz_r2 * Dt_4 + Dt_3; + pot->D_022 = ry_r2 * rz_r2 * Dt_5 + ry_r2 * Dt_4 + rz_r2 * Dt_4 + Dt_3; + pot->D_211 = rx_r2 * ry_r * rz_r * Dt_5 + ry_r * rz_r * Dt_4; + pot->D_121 = ry_r2 * rx_r * rz_r * Dt_5 + rx_r * rz_r * Dt_4; + pot->D_112 = rz_r2 * rx_r * ry_r * Dt_5 + rx_r * ry_r * Dt_4; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + + Dt_4 *= r_inv; + Dt_5 *= r_inv; + /* 5th order derivatives */ - pot->D_500 = r_x5 * Dt_11 + 10.f * r_x3 * Dt_9 + 15.f * r_x * Dt_7; - pot->D_050 = r_y5 * Dt_11 + 10.f * r_y3 * Dt_9 + 15.f * r_y * Dt_7; - pot->D_005 = r_z5 * Dt_11 + 10.f * r_z3 * Dt_9 + 15.f * r_z * Dt_7; - pot->D_410 = r_x4 * r_y * Dt_11 + 6.f * r_x2 * r_y * Dt_9 + 3.f * r_y * Dt_7; - pot->D_401 = r_x4 * r_z * Dt_11 + 6.f * r_x2 * r_z * Dt_9 + 3.f * r_z * Dt_7; - pot->D_140 = r_y4 * r_x * Dt_11 + 6.f * r_y2 * r_x * Dt_9 + 3.f * r_x * Dt_7; - pot->D_041 = r_y4 * r_z * Dt_11 + 6.f * r_y2 * r_z * Dt_9 + 3.f * r_z * Dt_7; - pot->D_104 = r_z4 * r_x * Dt_11 + 6.f * r_z2 * r_x * Dt_9 + 3.f * r_x * Dt_7; - pot->D_014 = r_z4 * r_y * Dt_11 + 6.f * r_z2 * r_y * Dt_9 + 3.f * r_y * Dt_7; - pot->D_320 = r_x3 * r_y2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_y2 * Dt_9 + - 3.f * r_x * Dt_7; - pot->D_302 = r_x3 * r_z2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_z2 * Dt_9 + - 3.f * r_x * Dt_7; - pot->D_230 = r_y3 * r_x2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_x2 * Dt_9 + - 3.f * r_y * Dt_7; - pot->D_032 = r_y3 * r_z2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_z2 * Dt_9 + - 3.f * r_y * Dt_7; - pot->D_203 = r_z3 * r_x2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_x2 * Dt_9 + - 3.f * r_z * Dt_7; - pot->D_023 = r_z3 * r_y2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_y2 * Dt_9 + - 3.f * r_z * Dt_7; - pot->D_311 = r_x3 * r_y * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9; - pot->D_131 = r_y3 * r_x * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9; - pot->D_113 = r_z3 * r_x * r_y * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9; - pot->D_122 = r_x * r_y2 * r_z2 * Dt_11 + r_x * r_y2 * Dt_9 + - r_x * r_z2 * Dt_9 + r_x * Dt_7; - pot->D_212 = r_y * r_x2 * r_z2 * Dt_11 + r_y * r_x2 * Dt_9 + - r_y * r_z2 * Dt_9 + r_y * Dt_7; - pot->D_221 = r_z * r_x2 * r_y2 * Dt_11 + r_z * r_x2 * Dt_9 + - r_z * r_y2 * Dt_9 + r_z * Dt_7; + pot->D_500 = rx_r5 * Dt_6 + 10.f * rx_r3 * Dt_5 + 15.f * rx_r * Dt_4; + pot->D_050 = ry_r5 * Dt_6 + 10.f * ry_r3 * Dt_5 + 15.f * ry_r * Dt_4; + pot->D_005 = rz_r5 * Dt_6 + 10.f * rz_r3 * Dt_5 + 15.f * rz_r * Dt_4; + pot->D_410 = + rx_r4 * ry_r * Dt_6 + 6.f * rx_r2 * ry_r * Dt_5 + 3.f * ry_r * Dt_4; + pot->D_401 = + rx_r4 * rz_r * Dt_6 + 6.f * rx_r2 * rz_r * Dt_5 + 3.f * rz_r * Dt_4; + pot->D_140 = + ry_r4 * rx_r * Dt_6 + 6.f * ry_r2 * rx_r * Dt_5 + 3.f * rx_r * Dt_4; + pot->D_041 = + ry_r4 * rz_r * Dt_6 + 6.f * ry_r2 * rz_r * Dt_5 + 3.f * rz_r * Dt_4; + pot->D_104 = + rz_r4 * rx_r * Dt_6 + 6.f * rz_r2 * rx_r * Dt_5 + 3.f * rx_r * Dt_4; + pot->D_014 = + rz_r4 * ry_r * Dt_6 + 6.f * rz_r2 * ry_r * Dt_5 + 3.f * ry_r * Dt_4; + pot->D_320 = rx_r3 * ry_r2 * Dt_6 + rx_r3 * Dt_5 + 3.f * rx_r * ry_r2 * Dt_5 + + 3.f * rx_r * Dt_4; + pot->D_302 = rx_r3 * rz_r2 * Dt_6 + rx_r3 * Dt_5 + 3.f * rx_r * rz_r2 * Dt_5 + + 3.f * rx_r * Dt_4; + pot->D_230 = ry_r3 * rx_r2 * Dt_6 + ry_r3 * Dt_5 + 3.f * ry_r * rx_r2 * Dt_5 + + 3.f * ry_r * Dt_4; + pot->D_032 = ry_r3 * rz_r2 * Dt_6 + ry_r3 * Dt_5 + 3.f * ry_r * rz_r2 * Dt_5 + + 3.f * ry_r * Dt_4; + pot->D_203 = rz_r3 * rx_r2 * Dt_6 + rz_r3 * Dt_5 + 3.f * rz_r * rx_r2 * Dt_5 + + 3.f * rz_r * Dt_4; + pot->D_023 = rz_r3 * ry_r2 * Dt_6 + rz_r3 * Dt_5 + 3.f * rz_r * ry_r2 * Dt_5 + + 3.f * rz_r * Dt_4; + pot->D_311 = rx_r3 * ry_r * rz_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5; + pot->D_131 = ry_r3 * rx_r * rz_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5; + pot->D_113 = rz_r3 * rx_r * ry_r * Dt_6 + 3.f * rx_r * ry_r * rz_r * Dt_5; + pot->D_122 = rx_r * ry_r2 * rz_r2 * Dt_6 + rx_r * ry_r2 * Dt_5 + + rx_r * rz_r2 * Dt_5 + rx_r * Dt_4; + pot->D_212 = ry_r * rx_r2 * rz_r2 * Dt_6 + ry_r * rx_r2 * Dt_5 + + ry_r * rz_r2 * Dt_5 + ry_r * Dt_4; + pot->D_221 = rz_r * rx_r2 * ry_r2 * Dt_6 + rz_r * rx_r2 * Dt_5 + + rz_r * ry_r2 * Dt_5 + rz_r * Dt_4; #endif } diff --git a/src/gravity_properties.c b/src/gravity_properties.c index cd7a8909170810997684c67a54552c9fe1f0618e..a58e822ab808d08aab48b746912c601f20fcaa54 100644 --- a/src/gravity_properties.c +++ b/src/gravity_properties.c @@ -23,6 +23,7 @@ /* Standard headers */ #include <float.h> #include <math.h> +#include <string.h> /* Local headers. */ #include "adiabatic_index.h" @@ -32,6 +33,7 @@ #include "gravity.h" #include "kernel_gravity.h" #include "kernel_long_gravity.h" +#include "restart.h" #define gravity_props_default_a_smooth 1.25f #define gravity_props_default_r_cut_max 4.5f @@ -43,7 +45,8 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params, const struct cosmology *cosmo, const int with_cosmology, const int with_external_potential, const int has_baryons, const int has_DM, - const int is_zoom_simulation, const int periodic) { + const int is_zoom_simulation, const int periodic, + const double dim[3]) { /* Tree updates */ p->rebuild_frequency = @@ -63,6 +66,9 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params, p->r_cut_min_ratio = parser_get_opt_param_float( params, "Gravity:r_cut_min", gravity_props_default_r_cut_min); + p->r_s = p->a_smooth * dim[0] / p->mesh_size; + p->r_s_inv = 1. / p->r_s; + /* Some basic checks of what we read */ if (p->mesh_size % 2 != 0) error("The mesh side-length must be an even number."); @@ -76,6 +82,8 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params, } else { p->mesh_size = 0; p->a_smooth = 0.f; + p->r_s = FLT_MAX; + p->r_s_inv = 0.f; p->r_cut_min_ratio = 0.f; p->r_cut_max_ratio = 0.f; } @@ -83,16 +91,53 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params, /* Time integration */ p->eta = parser_get_param_float(params, "Gravity:eta"); - /* Opening angle */ - p->theta_crit = parser_get_param_double(params, "Gravity:theta"); + /* Read the choice of multipole acceptance criterion */ + char buffer[32] = {0}; + parser_get_param_string(params, "Gravity:MAC", buffer); + + if (strcmp(buffer, "adaptive") == 0) { + p->use_adaptive_tolerance = 1; + } else if (strcmp(buffer, "geometric") == 0) { + p->use_adaptive_tolerance = 0; + } else { + error( + "Invalid choice of multipole acceptance criterion: '%s'. Should be " + "'adaptive' or 'geometric'", + buffer); + } + + /* We always start with the geometric MAC */ + p->use_advanced_MAC = 0; + + /* Geometric opening angle */ + p->theta_crit = parser_get_param_double(params, "Gravity:theta_cr"); if (p->theta_crit >= 1.) error("Theta too large. FMM won't converge."); - p->theta_crit2 = p->theta_crit * p->theta_crit; - p->theta_crit_inv = 1. / p->theta_crit; + + /* Adaptive opening angle tolerance */ + if (p->use_adaptive_tolerance) + p->adaptive_tolerance = + parser_get_param_float(params, "Gravity:epsilon_fmm"); + + /* Consider truncated forces in the MAC? */ + if (p->use_adaptive_tolerance) + p->consider_truncation_in_MAC = + parser_get_opt_param_int(params, "Gravity:allow_truncation_in_MAC", 0); + + /* Are we allowing tree use below softening? */ + p->use_tree_below_softening = + parser_get_opt_param_int(params, "Gravity:use_tree_below_softening", 0); + +#ifdef GADGET2_SOFTENING_CORRECTION + if (p->use_tree_below_softening) + error( + "Cannot solve gravity via the tree below softening with the " + "Gadget2-type softening kernel"); +#endif /* Mesh dithering */ if (periodic && !with_external_potential) { p->with_dithering = - parser_get_opt_param_int(params, "Gravity:dithering", 1); + parser_get_opt_param_int(params, "Gravity:dithering", 0); if (p->with_dithering) { p->dithering_ratio = parser_get_opt_param_double(params, "Gravity:dithering_ratio", 1.0); @@ -168,6 +213,13 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params, gravity_props_update(p, cosmo); } +void gravity_props_update_MAC_choice(struct gravity_props *p) { + + /* Now that we have run initial accelerations, + * switch to the better MAC */ + if (p->use_adaptive_tolerance) p->use_advanced_MAC = 1; +} + void gravity_props_update(struct gravity_props *p, const struct cosmology *cosmo) { diff --git a/src/gravity_properties.h b/src/gravity_properties.h index 8a4abe4bd372c52cccdb880839353d0dcc3d1e25..51ac0cc11e3535a32bb837586851a256d35bf207 100644 --- a/src/gravity_properties.h +++ b/src/gravity_properties.h @@ -26,10 +26,6 @@ #include <hdf5.h> #endif -/* Local includes. */ -#include "kernel_gravity.h" -#include "restart.h" - /* Forward declarations */ struct cosmology; struct phys_const; @@ -58,14 +54,23 @@ struct gravity_props { /* -------------- Properties of the FFM gravity ---------------------- */ + /*! What MAC are we currently using? */ + int use_advanced_MAC; + + /*! Are we using the adaptive opening angle? (as read from param file) */ + int use_adaptive_tolerance; + + /*! Accuracy parameter of the advanced MAC */ + float adaptive_tolerance; + /*! Tree opening angle (Multipole acceptance criterion) */ double theta_crit; - /*! Square of opening angle */ - double theta_crit2; + /*! Are we allowing tree gravity below softening? */ + int use_tree_below_softening; - /*! Inverse of opening angle */ - double theta_crit_inv; + /*! Are we applying long-range truncation to the forces in the MAC? */ + int consider_truncation_in_MAC; /* ------------- Properties of the softened gravity ------------------ */ @@ -111,12 +116,20 @@ struct gravity_props { * a_smooth */ float r_cut_max_ratio; + /*! Long-range gravity mesh scale. */ + float r_s; + + /*! Inverse of the long-range gravity mesh scale. */ + float r_s_inv; + /*! Are we dithering the particles at every rebuild? */ int with_dithering; /*! Fraction of the top-level cell size used to normalize the dithering */ double dithering_ratio; + /* ------------- Physical constants ---------------------------------- */ + /*! Gravitational constant (in internal units, copied from the physical * constants) */ float G_Newton; @@ -128,10 +141,11 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params, const struct cosmology *cosmo, const int with_cosmology, const int with_external_potential, const int has_baryons, const int has_DM, - const int is_zoom_simulation, const int periodic); + const int is_zoom_simulation, const int periodic, + const double dim[3]); void gravity_props_update(struct gravity_props *p, const struct cosmology *cosmo); - +void gravity_props_update_MAC_choice(struct gravity_props *p); #if defined(HAVE_HDF5) void gravity_props_print_snapshot(hid_t h_grpsph, const struct gravity_props *p); diff --git a/src/integer_power.h b/src/integer_power.h new file mode 100644 index 0000000000000000000000000000000000000000..3aec75fd4f828aedd800512060edccf8d18cb8da --- /dev/null +++ b/src/integer_power.h @@ -0,0 +1,118 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_INTEGER_POWER_H +#define SWIFT_INTEGER_POWER_H + +/* Config parameters. */ +#include "../config.h" + +/* Local headers */ +#include "error.h" +#include "inline.h" + +/* Standard headers */ +#include <math.h> + +/** + * @brief Computes the power of x to the n for a (small) positive integer n. + * + * Only optimized for values 0 <= n <= 8. Defaults to pow() above. + */ +__attribute__((const)) INLINE static double integer_pow(const double x, + const unsigned int n) { + + switch (n) { + case 0: + return 1.; + case 1: + return x; + case 2: + return x * x; + case 3: + return x * x * x; + case 4: { + const double y = x * x; + return y * y; + } + case 5: { + const double y = x * x; + return x * y * y; + } + case 6: { + const double y = x * x; + return y * y * y; + } + case 7: { + const double y = x * x; + return x * y * y * y; + } + case 8: { + const double y = x * x; + const double z = y * y; + return z * z; + } + default: + return pow(x, (double)n); + } +} + +/** + * @brief Computes the power of x to the n for a (small) positive integer n. + * + * Only optimized for values 0 <= n <= 8. Defaults to powf() above. + */ +__attribute__((const)) INLINE static float integer_powf(const float x, + const unsigned int n) { + + switch (n) { + case 0: + return 1.f; + case 1: + return x; + case 2: + return x * x; + case 3: + return x * x * x; + case 4: { + const float y = x * x; + return y * y; + } + case 5: { + const float y = x * x; + return x * y * y; + } + case 6: { + const float y = x * x; + return y * y * y; + } + case 7: { + const float y = x * x; + return x * y * y * y; + } + case 8: { + const float y = x * x; + const float z = y * y; + return z * z; + } + default: + return powf(x, (float)n); + } +} + +#endif /* SWIFT_INTEGER_POWER_H */ diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h index 7f7d2453f7720458e24794db088c96e6ff180944..4f6385ca39d823ed2762b4cedf87cbfbe85ee67b 100644 --- a/src/kernel_gravity.h +++ b/src/kernel_gravity.h @@ -39,63 +39,63 @@ #endif /* GADGET2_SOFTENING_CORRECTION */ /** - * @brief Computes the gravity softening function for potential. + * @brief Computes the gravity softening kernel for the potential. * * This functions assumes 0 < u < 1. * - * @param u The ratio of the distance to the softening length $u = x/h$. - * @param W (return) The value of the kernel function $W(x,h)$. + * @param u The ratio of the distance to the spline softening length $u = x/H$. */ -__attribute__((always_inline, nonnull)) INLINE static void kernel_grav_pot_eval( - const float u, float *const W) { +__attribute__((const)) INLINE static float kernel_grav_pot_eval(const float u) { + float W; #ifdef GADGET2_SOFTENING_CORRECTION if (u < 0.5f) - *W = -2.8f + u * u * (5.333333333333f + u * u * (6.4f * u - 9.6f)); + W = -2.8f + u * u * (5.333333333333f + u * u * (6.4f * u - 9.6f)); else - *W = - -3.2f + 0.066666666667f / u + + W = -3.2f + 0.066666666667f / u + u * u * (10.666666666667f + u * (-16.f + u * (9.6f - 2.133333333333f * u))); #else /* W(u) = 3u^7 - 15u^6 + 28u^5 - 21u^4 + 7u^2 - 3 */ - *W = 3.f * u - 15.f; - *W = *W * u + 28.f; - *W = *W * u - 21.f; - *W = *W * u; - *W = *W * u + 7.f; - *W = *W * u; - *W = *W * u - 3.f; + W = 3.f * u - 15.f; + W = W * u + 28.f; + W = W * u - 21.f; + W = W * u; + W = W * u + 7.f; + W = W * u; + W = W * u - 3.f; #endif + return W; } /** - * @brief Computes the gravity softening function for forces. + * @brief Computes the gravity softening kernel for the forces. * * This functions assumes 0 < u < 1. * - * @param u The ratio of the distance to the softening length $u = x/h$. - * @param W (return) The value of the kernel function $W(x,h)$. + * @param u The ratio of the distance to the spline softening length $u = x/H$. */ -__attribute__((always_inline, nonnull)) INLINE static void -kernel_grav_force_eval(const float u, float *const W) { +__attribute__((const)) INLINE static float kernel_grav_force_eval( + const float u) { + float W; #ifdef GADGET2_SOFTENING_CORRECTION if (u < 0.5f) - *W = 10.6666667f + u * u * (32.f * u - 38.4f); + W = 10.6666667f + u * u * (32.f * u - 38.4f); else - *W = 21.3333333f - 48.f * u + 38.4f * u * u - 10.6666667f * u * u * u - - 0.06666667f / (u * u * u); + W = 21.3333333f - 48.f * u + 38.4f * u * u - 10.6666667f * u * u * u - + 0.06666667f / (u * u * u); #else /* W(u) = 21u^5 - 90u^4 + 140u^3 - 84u^2 + 14 */ - *W = 21.f * u - 90.f; - *W = *W * u + 140.f; - *W = *W * u - 84.f; - *W = *W * u; - *W = *W * u + 14.f; + W = 21.f * u - 90.f; + W = W * u + 140.f; + W = W * u - 84.f; + W = W * u; + W = W * u + 14.f; #endif + return W; } #ifdef SWIFT_GRAVITY_FORCE_CHECKS @@ -162,64 +162,101 @@ __attribute__((always_inline)) INLINE static void kernel_grav_eval_force_double( } #endif /* SWIFT_GRAVITY_FORCE_CHECKS */ -#undef GADGET2_SOFTENING_CORRECTION - /************************************************/ /* Derivatives of softening kernel used for FMM */ /************************************************/ -__attribute__((always_inline)) INLINE static float D_soft_1(float u, - float u_inv) { +__attribute__((const)) INLINE static float D_soft_1(const float u) { + +#ifdef GADGET2_SOFTENING_CORRECTION + error("Invalid choice of softening kernel shape"); +#endif - /* phi(u) = 3u^7 - 15u^6 + 28u^5 - 21u^4 + 7u^2 - 3 */ - float phi = 3.f * u - 15.f; - phi = phi * u + 28.f; - phi = phi * u - 21.f; + /* -3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3 */ + float phi = -3.f * u + 15.f; + phi = phi * u - 28.f; + phi = phi * u + 21.f; phi = phi * u; - phi = phi * u + 7.f; + phi = phi * u - 7.f; phi = phi * u; - phi = phi * u - 3.f; + phi = phi * u + 3.f; return phi; } -__attribute__((always_inline)) INLINE static float D_soft_3(float u, - float u_inv) { +__attribute__((const)) INLINE static float D_soft_2(const float u) { - /* phi'(u)/u = 21u^5 - 90u^4 + 140u^3 - 84u^2 + 14 */ - float phi = 21.f * u - 90.f; - phi = phi * u + 140.f; - phi = phi * u - 84.f; +#ifdef GADGET2_SOFTENING_CORRECTION + error("Invalid choice of softening kernel shape"); +#endif + + /* -21u^6 + 90u^5 - 140u^4 + 84u^3 - 14u */ + float phi = -21.f * u + 90.f; + phi = phi * u - 140.f; + phi = phi * u + 84.f; + phi = phi * u; + phi = phi * u - 14.f; phi = phi * u; - phi = phi * u + 14.f; return phi; } -__attribute__((always_inline)) INLINE static float D_soft_5(float u, - float u_inv) { +__attribute__((const)) INLINE static float D_soft_3(const float u) { - /* (phi'(u)/u)'/u = 105u^3 - 360u^2 + 420u - 168 */ - float phi = 105.f * u - 360.f; - phi = phi * u + 420.f; - phi = phi * u - 168.f; +#ifdef GADGET2_SOFTENING_CORRECTION + error("Invalid choice of softening kernel shape"); +#endif + + /* -105u^5 + 360u^4 - 420u^3 + 168u^2 */ + float phi = -105.f * u + 360.f; + phi = phi * u - 420.f; + phi = phi * u + 168.f; + phi = phi * u; + phi = phi * u; return phi; } -__attribute__((always_inline)) INLINE static float D_soft_7(float u, - float u_inv) { - return 0.f; +__attribute__((const)) INLINE static float D_soft_4(const float u) { + +#ifdef GADGET2_SOFTENING_CORRECTION + error("Invalid choice of softening kernel shape"); +#endif + + /* -315u^4 + 720u^3 - 420u^2 */ + float phi = -315.f * u + 720.f; + phi = phi * u - 420.f; + phi = phi * u; + phi = phi * u; + + return phi; } -__attribute__((always_inline)) INLINE static float D_soft_9(float u, - float u_inv) { - return 0.f; +__attribute__((const)) INLINE static float D_soft_5(const float u) { + +#ifdef GADGET2_SOFTENING_CORRECTION + error("Invalid choice of softening kernel shape"); +#endif + + /* -315u^3 + 420u */ + float phi = -315.f * u; + phi = phi * u + 420.f; + phi = phi * u; + + return phi; } -__attribute__((always_inline)) INLINE static float D_soft_11(float u, - float u_inv) { - return 0.f; +__attribute__((const)) INLINE static float D_soft_6(const float u) { + +#ifdef GADGET2_SOFTENING_CORRECTION + error("Invalid choice of softening kernel shape"); +#endif + + /* 315u^2 - 1260 */ + float phi = 315 * u; + phi = phi * u - 1260.f; + + return phi; } #endif /* SWIFT_KERNEL_GRAVITY_H */ diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h index af4a0f42b101b07163a74e42498f10e5fb664697..a565df4f59874bf633c10e7ac8809ce9bddfd0c0 100644 --- a/src/kernel_long_gravity.h +++ b/src/kernel_long_gravity.h @@ -23,8 +23,8 @@ #include "../config.h" /* Local headers. */ -#include "approx_math.h" #include "const.h" +#include "exp.h" #include "inline.h" /* Standard headers */ @@ -78,35 +78,75 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv, #ifdef GADGET2_LONG_RANGE_CORRECTION - /* Powers of u=r/2r_s */ + /* Powers of u = (1/2) * (r / r_s) */ const float u = 0.5f * r * r_s_inv; const float u2 = u * u; - const float u3 = u2 * u; - const float u4 = u3 * u; + const float u4 = u2 * u2; - /* Powers of (1/r_s) */ - const float r_s_inv2 = r_s_inv * r_s_inv; - const float r_s_inv3 = r_s_inv2 * r_s_inv; - const float r_s_inv4 = r_s_inv3 * r_s_inv; - const float r_s_inv5 = r_s_inv4 * r_s_inv; + const float exp_u2 = expf(-u2); - /* Derivatives of \chi */ - derivs->chi_0 = approx_erfcf(u); - derivs->chi_1 = -r_s_inv; - derivs->chi_2 = r_s_inv2 * u; - derivs->chi_3 = -r_s_inv3 * (u2 - 0.5f); - derivs->chi_4 = r_s_inv4 * (u3 - 1.5f * u); - derivs->chi_5 = -r_s_inv5 * (u4 - 3.f * u2 + 0.75f); + /* Compute erfcf(u) using eq. 7.1.26 of + * Abramowitz & Stegun, 1972. + * + * This has a *relative* error of less than 4e-3 over + * the range of interest (0 < u < 5) + * + * This is a good approximation to use since we already + * need exp(-u2) */ + + const float t = 1.f / (1.f + 0.3275911f * u); + + const float a1 = 0.254829592f; + const float a2 = -0.284496736f; + const float a3 = 1.421413741f; + const float a4 = -1.453152027; + const float a5 = 1.061405429f; + /* a1 * t + a2 * t^2 + a3 * t^3 + a4 * t^4 + a5 * t^5 */ + float a = a5 * t + a4; + a = a * t + a3; + a = a * t + a2; + a = a * t + a1; + a = a * t; + + const float erfc_u = a * exp_u2; + + /* C = (1/sqrt(pi)) * expf(-u^2) */ const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5)); - const float common_factor = one_over_sqrt_pi * expf(-u2); + const float common_factor = one_over_sqrt_pi * exp_u2; + + /* (1/r_s)^n * C */ + const float r_s_inv_times_C = r_s_inv * common_factor; + const float r_s_inv2_times_C = r_s_inv_times_C * r_s_inv; + const float r_s_inv3_times_C = r_s_inv2_times_C * r_s_inv; + const float r_s_inv4_times_C = r_s_inv3_times_C * r_s_inv; + const float r_s_inv5_times_C = r_s_inv4_times_C * r_s_inv; - /* Multiply in the common factors */ - derivs->chi_1 *= common_factor; - derivs->chi_2 *= common_factor; - derivs->chi_3 *= common_factor; - derivs->chi_4 *= common_factor; - derivs->chi_5 *= common_factor; + /* Now, compute the derivatives of \chi */ +#ifdef GRAVITY_USE_EXACT_LONG_RANGE_MATH + + /* erfc(u) */ + derivs->chi_0 = erfcf(u); +#else + + /* erfc(u) */ + derivs->chi_0 = erfc_u; +#endif + + /* (-1/r_s) * (1/sqrt(pi)) * expf(-u^2) */ + derivs->chi_1 = -r_s_inv_times_C; + + /* (1/r_s)^2 * u * (1/sqrt(pi)) * expf(-u^2) */ + derivs->chi_2 = r_s_inv2_times_C * u; + + /* (1/r_s)^3 * (1/2 - u^2) * (1/sqrt(pi)) * expf(-u^2) */ + derivs->chi_3 = r_s_inv3_times_C * (0.5f - u2); + + /* (1/r_s)^4 * (u^3 - 3/2 u) * (1/sqrt(pi)) * expf(-u^2) */ + derivs->chi_4 = r_s_inv4_times_C * (u2 - 1.5f) * u; + + /* (1/r_s)^5 * (3/4 - 3u^2 + u^4) * (1/sqrt(pi)) * expf(-u^2) */ + derivs->chi_5 = r_s_inv5_times_C * (0.75f - 3.f * u2 + u4); #else @@ -147,65 +187,75 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv, } /** - * @brief Computes the long-range correction term for the potential calculation - * coming from FFT. + * @brief Computes the long-range correction terms for the potential and + * force calculations due to the mesh truncation. + * + * We use an approximation to the erfc() that gives a *relative* accuracy + * for the potential tem of 3.4e-3 and 2.4e-4 for the force term over the + * range [0, 5] of r_over_r_s. + * The accuracy is much better in the range [0, 2] (6e-5 and 2e-5 respectively). * * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$. - * @param W (return) The value of the kernel function. */ __attribute__((always_inline, nonnull)) INLINE static void -kernel_long_grav_pot_eval(const float u, float *const W) { +kernel_long_grav_eval(const float r_over_r_s, float *restrict corr_f, + float *restrict corr_pot) { #ifdef GADGET2_LONG_RANGE_CORRECTION - const float arg1 = u * 0.5f; - const float term1 = approx_erfcf(arg1); - - *W = term1; -#else + const float two_over_sqrt_pi = ((float)M_2_SQRTPI); - const float x = 2.f * u; - const float exp_x = expf(x); // good_approx_expf(x); - const float alpha = 1.f / (1.f + exp_x); + const float u = 0.5f * r_over_r_s; + const float u2 = u * u; + const float exp_u2 = expf(-u2); - /* We want 2 - 2 exp(x) * alpha */ - *W = 1.f - alpha * exp_x; - *W *= 2.f; -#endif -} + /* Compute erfcf(u) using eq. 7.1.26 of + * Abramowitz & Stegun, 1972. + * + * This has a *relative* error of less than 4e-3 over + * the range of interest (0 < u < 5)\ + * + * This is a good approximation to use since we already + * need exp(-u2) */ -/** - * @brief Computes the long-range correction term for the force calculation - * coming from FFT. - * - * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$. - * @param W (return) The value of the kernel function. - */ -__attribute__((always_inline, nonnull)) INLINE static void -kernel_long_grav_force_eval(const float u, float *const W) { + const float t = 1.f / (1.f + 0.3275911f * u); -#ifdef GADGET2_LONG_RANGE_CORRECTION + const float a1 = 0.254829592f; + const float a2 = -0.284496736f; + const float a3 = 1.421413741f; + const float a4 = -1.453152027; + const float a5 = 1.061405429f; - const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5)); + /* a1 * t + a2 * t^2 + a3 * t^3 + a4 * t^4 + a5 * t^5 */ + float a = a5 * t + a4; + a = a * t + a3; + a = a * t + a2; + a = a * t + a1; + a = a * t; - const float arg1 = u * 0.5f; - const float arg2 = -arg1 * arg1; + const float erfc_u = a * exp_u2; - const float term1 = approx_erfcf(arg1); - const float term2 = u * one_over_sqrt_pi * expf(arg2); + *corr_pot = erfc_u; + *corr_f = erfc_u + two_over_sqrt_pi * u * exp_u2; - *W = term1 + term2; #else - - const float x = 2.f * u; + const float x = 2.f * r_over_r_s; const float exp_x = expf(x); // good_approx_expf(x); const float alpha = 1.f / (1.f + exp_x); + /* We want 2 - 2 exp(x) * alpha */ + float W = 1.f - alpha * exp_x; + W = W * 2.f; + + *corr_pot = W; + /* We want 2*(x*alpha - x*alpha^2 - exp(x)*alpha + 1) */ - *W = 1.f - alpha; - *W = *W * x - exp_x; - *W = *W * alpha + 1.f; - *W *= 2.f; + W = 1.f - alpha; + W = W * x - exp_x; + W = W * alpha + 1.f; + W = W * 2.f; + + *corr_f = W; #endif } diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c index bbd4496112114277f650582432799b5743422a14..fd81a658ad4ffe784c20c1d69897f91a07e02bec 100644 --- a/src/mesh_gravity.c +++ b/src/mesh_gravity.c @@ -28,7 +28,6 @@ #include "mesh_gravity.h" /* Local includes. */ -#include "accumulate.h" #include "active.h" #include "debug.h" #include "engine.h" @@ -336,9 +335,9 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, const int N, /* ---- */ /* Store things back */ - accumulate_add_f(&gp->a_grav[0], fac * a[0]); - accumulate_add_f(&gp->a_grav[1], fac * a[1]); - accumulate_add_f(&gp->a_grav[2], fac * a[2]); + gp->a_grav[0] += fac * a[0]; + gp->a_grav[1] += fac * a[1]; + gp->a_grav[2] += fac * a[2]; gravity_add_comoving_potential(gp, p); #ifdef SWIFT_GRAVITY_FORCE_CHECKS gp->potential_PM = p; @@ -726,7 +725,7 @@ void pm_mesh_free(struct pm_mesh* mesh) { * @param nr_threads The number of threads on this MPI rank. */ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props, - double dim[3], int nr_threads) { + const double dim[3], int nr_threads) { #ifdef HAVE_FFTW diff --git a/src/mesh_gravity.h b/src/mesh_gravity.h index e9c07a0de0327984686d65bb9738cde643a7cab8..79c4d1b619cb3f73bc8aa39e97f1c5b2f6386386 100644 --- a/src/mesh_gravity.h +++ b/src/mesh_gravity.h @@ -26,6 +26,7 @@ #include "gravity_properties.h" /* Forward declarations */ +struct engine; struct space; struct gpart; struct threadpool; @@ -67,7 +68,7 @@ struct pm_mesh { }; void pm_mesh_init(struct pm_mesh *mesh, const struct gravity_props *props, - double dim[3], int nr_threads); + const double dim[3], int nr_threads); void pm_mesh_init_no_mesh(struct pm_mesh *mesh, double dim[3]); void pm_mesh_compute_potential(struct pm_mesh *mesh, const struct space *s, struct threadpool *tp, int verbose); diff --git a/src/multipole.h b/src/multipole.h index 1ac413111fe7a75bc929bc2d95a4ac79a1533d03..8169bff2291b00310074a8aaa9f9974c51b773b9 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -43,14 +43,6 @@ #include "periodic.h" #include "vector_power.h" -#ifdef WITH_MPI -/* MPI datatypes for transfers */ -extern MPI_Datatype multipole_mpi_type; -extern MPI_Op multipole_mpi_reduce_op; -void multipole_create_mpi_types(void); -void multipole_free_mpi_types(void); -#endif - /** * @brief Reset the data of a #multipole. * @@ -59,8 +51,8 @@ void multipole_free_mpi_types(void); __attribute__((nonnull)) INLINE static void gravity_reset( struct gravity_tensors *m) { - /* Just bzero the struct. */ bzero(m, sizeof(struct gravity_tensors)); + m->m_pole.min_old_a_grav_norm = FLT_MAX; } /** @@ -294,6 +286,7 @@ __attribute__((nonnull)) INLINE static void gravity_multipole_init( struct multipole *m) { bzero(m, sizeof(struct multipole)); + m->min_old_a_grav_norm = FLT_MAX; } /** @@ -312,8 +305,7 @@ __attribute__((nonnull)) INLINE static void gravity_multipole_print( printf("M_000= %12.5e\n", m->M_000); #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 printf("-------------------------\n"); - printf("M_100= %12.5e M_010= %12.5e M_001= %12.5e\n", m->M_100, m->M_010, - m->M_001); + printf("M_100= %12.5e M_010= %12.5e M_001= %12.5e\n", 0., 0., 0.); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 printf("-------------------------\n"); @@ -363,14 +355,18 @@ __attribute__((nonnull)) INLINE static void gravity_multipole_add( /* Maximum of both softenings */ ma->max_softening = max(ma->max_softening, mb->max_softening); + /* Minimum of both old accelerations */ + ma->min_old_a_grav_norm = + min(ma->min_old_a_grav_norm, mb->min_old_a_grav_norm); + /* Add 0th order term */ ma->M_000 += mb->M_000; #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - /* Add 1st order terms */ - ma->M_100 += mb->M_100; - ma->M_010 += mb->M_010; - ma->M_001 += mb->M_001; + /* Add 1st order terms (all 0 since we expand around CoM) */ + /* ma->M_100 += mb->M_100; */ + /* ma->M_010 += mb->M_010; */ + /* ma->M_001 += mb->M_001; */ #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 /* Add 2nd order terms */ @@ -489,6 +485,14 @@ __attribute__((nonnull)) INLINE static int gravity_multipole_equal( return 0; } + /* Check minimal old acceleration norm */ + if (fabsf(ma->min_old_a_grav_norm - mb->min_old_a_grav_norm) / + fabsf(ma->min_old_a_grav_norm + mb->min_old_a_grav_norm + FLT_MIN) > + tolerance) { + message("min old_a_grav_norm different!"); + return 0; + } + /* Check bulk velocity (if non-zero and component > 1% of norm)*/ if (fabsf(ma->vel[0] + mb->vel[0]) > 1e-10 && (ma->vel[0] * ma->vel[0]) > 0.0001 * v2 && @@ -522,27 +526,9 @@ __attribute__((nonnull)) INLINE static int gravity_multipole_equal( return 0; } #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - /* Manhattan Norm of 1st order terms */ - const float order1_norm = fabsf(ma->M_001) + fabsf(mb->M_001) + - fabsf(ma->M_010) + fabsf(mb->M_010) + - fabsf(ma->M_100) + fabsf(mb->M_100); - - /* Compare 1st order terms above 1% of norm */ - if (fabsf(ma->M_001 + mb->M_001) > 0.01f * order1_norm && - fabsf(ma->M_001 - mb->M_001) / fabsf(ma->M_001 + mb->M_001) > tolerance) { - message("M_001 term different"); - return 0; - } - if (fabsf(ma->M_010 + mb->M_010) > 0.01f * order1_norm && - fabsf(ma->M_010 - mb->M_010) / fabsf(ma->M_010 + mb->M_010) > tolerance) { - message("M_010 term different"); - return 0; - } - if (fabsf(ma->M_100 + mb->M_100) > 0.01f * order1_norm && - fabsf(ma->M_100 - mb->M_100) / fabsf(ma->M_100 + mb->M_100) > tolerance) { - message("M_100 term different"); - return 0; - } + /* Manhattan Norm of 1st order terms */ + /* Nothing to do here all the 1st order terms are 0 since we expand around + * CoM */ #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 /* Manhattan Norm of 2nd order terms */ @@ -868,10 +854,121 @@ __attribute__((nonnull)) INLINE static int gravity_multipole_equal( #error "Missing implementation for order >5" #endif + /* Compare the multipole power */ + for (int i = 0; i < SELF_GRAVITY_MULTIPOLE_ORDER + 1; ++i) { + + /* Ignore the order 1 power to avoid FPE since it's always 0 */ + if (i == 1 || (ma->power[i] + mb->power[i] == 0.)) continue; + + if (fabsf(ma->power[i] - mb->power[i]) / + fabsf(ma->power[i] + mb->power[i]) > + tolerance) + message("Power of order %d different", i); + } + /* All is good */ return 1; } +/** + * @brief Compute the multipole power of a #multipole. + * + * @param m The #multipole. + */ +__attribute__((nonnull)) INLINE static void gravity_multipole_compute_power( + struct multipole *m) { + + double power[SELF_GRAVITY_MULTIPOLE_ORDER + 1] = {0.}; + + /* 0th order terms */ + m->power[0] = m->M_000; + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + /* 1st order terms (all 0 since we expand around CoM) */ + // power[1] += m->M_001 * m->M_001; + // power[1] += m->M_010 * m->M_010; + // power[1] += m->M_100 * m->M_100; + + // m->power[1] = sqrt(power[1]); + m->power[1] = 0.; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + /* 2nd order terms */ + power[2] += m->M_002 * m->M_002; + power[2] += 5.000000000000000e-01 * m->M_011 * m->M_011; + power[2] += m->M_020 * m->M_020; + power[2] += 5.000000000000000e-01 * m->M_101 * m->M_101; + power[2] += 5.000000000000000e-01 * m->M_110 * m->M_110; + power[2] += m->M_200 * m->M_200; + + m->power[2] = sqrt(power[2]); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + /* 3rd order terms */ + power[3] += m->M_003 * m->M_003; + power[3] += 3.333333333333333e-01 * m->M_012 * m->M_012; + power[3] += 3.333333333333333e-01 * m->M_021 * m->M_021; + power[3] += m->M_030 * m->M_030; + power[3] += 3.333333333333333e-01 * m->M_102 * m->M_102; + power[3] += 1.666666666666667e-01 * m->M_111 * m->M_111; + power[3] += 3.333333333333333e-01 * m->M_120 * m->M_120; + power[3] += 3.333333333333333e-01 * m->M_201 * m->M_201; + power[3] += 3.333333333333333e-01 * m->M_210 * m->M_210; + power[3] += m->M_300 * m->M_300; + + m->power[3] = sqrt(power[3]); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + /* 4th order terms */ + power[4] += m->M_004 * m->M_004; + power[4] += 2.500000000000000e-01 * m->M_013 * m->M_013; + power[4] += 1.666666666666667e-01 * m->M_022 * m->M_022; + power[4] += 2.500000000000000e-01 * m->M_031 * m->M_031; + power[4] += m->M_040 * m->M_040; + power[4] += 2.500000000000000e-01 * m->M_103 * m->M_103; + power[4] += 8.333333333333333e-02 * m->M_112 * m->M_112; + power[4] += 8.333333333333333e-02 * m->M_121 * m->M_121; + power[4] += 2.500000000000000e-01 * m->M_130 * m->M_130; + power[4] += 1.666666666666667e-01 * m->M_202 * m->M_202; + power[4] += 8.333333333333333e-02 * m->M_211 * m->M_211; + power[4] += 1.666666666666667e-01 * m->M_220 * m->M_220; + power[4] += 2.500000000000000e-01 * m->M_301 * m->M_301; + power[4] += 2.500000000000000e-01 * m->M_310 * m->M_310; + power[4] += m->M_400 * m->M_400; + + m->power[4] = sqrt(power[4]); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + /* 5th order terms */ + power[5] += m->M_005 * m->M_005; + power[5] += 2.000000000000000e-01 * m->M_014 * m->M_014; + power[5] += 1.000000000000000e-01 * m->M_023 * m->M_023; + power[5] += 1.000000000000000e-01 * m->M_032 * m->M_032; + power[5] += 2.000000000000000e-01 * m->M_041 * m->M_041; + power[5] += m->M_050 * m->M_050; + power[5] += 2.000000000000000e-01 * m->M_104 * m->M_104; + power[5] += 5.000000000000000e-02 * m->M_113 * m->M_113; + power[5] += 3.333333333333333e-02 * m->M_122 * m->M_122; + power[5] += 5.000000000000000e-02 * m->M_131 * m->M_131; + power[5] += 2.000000000000000e-01 * m->M_140 * m->M_140; + power[5] += 1.000000000000000e-01 * m->M_203 * m->M_203; + power[5] += 3.333333333333333e-02 * m->M_212 * m->M_212; + power[5] += 3.333333333333333e-02 * m->M_221 * m->M_221; + power[5] += 1.000000000000000e-01 * m->M_230 * m->M_230; + power[5] += 1.000000000000000e-01 * m->M_302 * m->M_302; + power[5] += 5.000000000000000e-02 * m->M_311 * m->M_311; + power[5] += 1.000000000000000e-01 * m->M_320 * m->M_320; + power[5] += 2.000000000000000e-01 * m->M_401 * m->M_401; + power[5] += 2.000000000000000e-01 * m->M_410 * m->M_410; + power[5] += m->M_500 * m->M_500; + + m->power[5] = sqrt(power[5]); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 5 +#error "Missing implementation for order >5" +#endif +} + /** * @brief Constructs the #multipole of a bunch of particles around their * centre of mass. @@ -889,6 +986,7 @@ __attribute__((nonnull)) INLINE static void gravity_P2M( /* Temporary variables */ float epsilon_max = 0.f; + float min_old_a_grav_norm = FLT_MAX; double mass = 0.0; double com[3] = {0.0, 0.0, 0.0}; double vel[3] = {0.f, 0.f, 0.f}; @@ -904,6 +1002,7 @@ __attribute__((nonnull)) INLINE static void gravity_P2M( #endif epsilon_max = max(epsilon_max, epsilon); + min_old_a_grav_norm = min(min_old_a_grav_norm, gparts[k].old_a_grav_norm); mass += m; com[0] += gparts[k].x[0] * m; com[1] += gparts[k].x[1] * m; @@ -1060,19 +1159,13 @@ __attribute__((nonnull)) INLINE static void gravity_P2M( #endif } -#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - - /* We know the first-order multipole (dipole) is 0. */ - M_100 = M_010 = M_001 = 0.f; -#endif - /* Store the data on the multipole. */ - multi->m_pole.max_softening = epsilon_max; - multi->m_pole.M_000 = mass; multi->r_max = sqrt(r_max2); multi->CoM[0] = com[0]; multi->CoM[1] = com[1]; multi->CoM[2] = com[2]; + multi->m_pole.max_softening = epsilon_max; + multi->m_pole.min_old_a_grav_norm = min_old_a_grav_norm; multi->m_pole.vel[0] = vel[0]; multi->m_pole.vel[1] = vel[1]; multi->m_pole.vel[2] = vel[2]; @@ -1082,13 +1175,14 @@ __attribute__((nonnull)) INLINE static void gravity_P2M( multi->m_pole.min_delta_vel[0] = min_delta_vel[0]; multi->m_pole.min_delta_vel[1] = min_delta_vel[1]; multi->m_pole.min_delta_vel[2] = min_delta_vel[2]; + multi->m_pole.M_000 = mass; #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - /* 1st order terms */ - multi->m_pole.M_100 = M_100; - multi->m_pole.M_010 = M_010; - multi->m_pole.M_001 = M_001; + /* 1st order terms (all 0 since we expand around CoM) */ + // multi->m_pole.M_100 = M_100; + // multi->m_pole.M_010 = M_010; + // multi->m_pole.M_001 = M_001; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 @@ -1184,6 +1278,9 @@ __attribute__((nonnull)) INLINE static void gravity_M2M( /* "shift" the softening */ m_a->max_softening = m_b->max_softening; + /* "shift" the minimal acceleration */ + m_a->min_old_a_grav_norm = m_b->min_old_a_grav_norm; + /* Shift 0th order term */ m_a->M_000 = m_b->M_000; @@ -1191,228 +1288,291 @@ __attribute__((nonnull)) INLINE static void gravity_M2M( const double dx[3] = {pos_a[0] - pos_b[0], pos_a[1] - pos_b[1], pos_a[2] - pos_b[2]}; - /* Shift 1st order term */ - m_a->M_100 = m_b->M_100 + X_100(dx) * m_b->M_000; - m_a->M_010 = m_b->M_010 + X_010(dx) * m_b->M_000; - m_a->M_001 = m_b->M_001 + X_001(dx) * m_b->M_000; + /* Shift 1st order term (all 0 (after add) since we expand around CoM) */ + // m_a->M_100 = m_b->M_100 + X_100(dx) * m_b->M_000; + // m_a->M_010 = m_b->M_010 + X_010(dx) * m_b->M_000; + // m_a->M_001 = m_b->M_001 + X_001(dx) * m_b->M_000; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - /* Shift 2nd order term */ - m_a->M_200 = m_b->M_200 + X_100(dx) * m_b->M_100 + X_200(dx) * m_b->M_000; - m_a->M_020 = m_b->M_020 + X_010(dx) * m_b->M_010 + X_020(dx) * m_b->M_000; - m_a->M_002 = m_b->M_002 + X_001(dx) * m_b->M_001 + X_002(dx) * m_b->M_000; - m_a->M_110 = m_b->M_110 + X_100(dx) * m_b->M_010 + X_010(dx) * m_b->M_100 + - X_110(dx) * m_b->M_000; - m_a->M_101 = m_b->M_101 + X_100(dx) * m_b->M_001 + X_001(dx) * m_b->M_100 + - X_101(dx) * m_b->M_000; - m_a->M_011 = m_b->M_011 + X_010(dx) * m_b->M_001 + X_001(dx) * m_b->M_010 + - X_011(dx) * m_b->M_000; + /* Shift 2nd order terms (1st order mpole (all 0) commented out) */ + m_a->M_002 = + m_b->M_002 /* + X_001(dx) * m_b->M_001 */ + X_002(dx) * m_b->M_000; + m_a->M_011 = + m_b->M_011 /* + X_001(dx) * m_b->M_010 */ /* + X_010(dx) * m_b->M_001 */ + + X_011(dx) * m_b->M_000; + m_a->M_020 = + m_b->M_020 /* + X_010(dx) * m_b->M_010 */ + X_020(dx) * m_b->M_000; + m_a->M_101 = + m_b->M_101 /* + X_001(dx) * m_b->M_100 */ /* + X_100(dx) * m_b->M_001 */ + + X_101(dx) * m_b->M_000; + m_a->M_110 = + m_b->M_110 /* + X_010(dx) * m_b->M_100 */ /* + X_100(dx) * m_b->M_010 */ + + X_110(dx) * m_b->M_000; + m_a->M_200 = + m_b->M_200 /* + X_100(dx) * m_b->M_100 */ + X_200(dx) * m_b->M_000; #endif + #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - /* Shift 3rd order term */ - m_a->M_300 = m_b->M_300 + X_100(dx) * m_b->M_200 + X_200(dx) * m_b->M_100 + - X_300(dx) * m_b->M_000; - m_a->M_030 = m_b->M_030 + X_010(dx) * m_b->M_020 + X_020(dx) * m_b->M_010 + - X_030(dx) * m_b->M_000; - m_a->M_003 = m_b->M_003 + X_001(dx) * m_b->M_002 + X_002(dx) * m_b->M_001 + + /* Shift 3rd order terms (1st order mpole (all 0) commented out) */ + m_a->M_003 = m_b->M_003 + + X_001(dx) * m_b->M_002 /* + X_002(dx) * m_b->M_001 */ + X_003(dx) * m_b->M_000; - m_a->M_210 = m_b->M_210 + X_100(dx) * m_b->M_110 + X_010(dx) * m_b->M_200 + - X_200(dx) * m_b->M_010 + X_110(dx) * m_b->M_100 + - X_210(dx) * m_b->M_000; - m_a->M_201 = m_b->M_201 + X_100(dx) * m_b->M_101 + X_001(dx) * m_b->M_200 + - X_200(dx) * m_b->M_001 + X_101(dx) * m_b->M_100 + - X_201(dx) * m_b->M_000; - m_a->M_120 = m_b->M_120 + X_010(dx) * m_b->M_110 + X_100(dx) * m_b->M_020 + - X_020(dx) * m_b->M_100 + X_110(dx) * m_b->M_010 + - X_120(dx) * m_b->M_000; - m_a->M_021 = m_b->M_021 + X_010(dx) * m_b->M_011 + X_001(dx) * m_b->M_020 + - X_020(dx) * m_b->M_001 + X_011(dx) * m_b->M_010 + - X_021(dx) * m_b->M_000; - m_a->M_102 = m_b->M_102 + X_001(dx) * m_b->M_101 + X_100(dx) * m_b->M_002 + - X_002(dx) * m_b->M_100 + X_101(dx) * m_b->M_001 + - X_102(dx) * m_b->M_000; - m_a->M_012 = m_b->M_012 + X_001(dx) * m_b->M_011 + X_010(dx) * m_b->M_002 + - X_002(dx) * m_b->M_010 + X_011(dx) * m_b->M_001 + + m_a->M_012 = m_b->M_012 + + X_001(dx) * m_b->M_011 /* + X_002(dx) * m_b->M_010 */ + + X_010(dx) * m_b->M_002 /* + X_011(dx) * m_b->M_001 */ + X_012(dx) * m_b->M_000; - m_a->M_111 = m_b->M_111 + X_100(dx) * m_b->M_011 + X_010(dx) * m_b->M_101 + - X_001(dx) * m_b->M_110 + X_110(dx) * m_b->M_001 + - X_101(dx) * m_b->M_010 + X_011(dx) * m_b->M_100 + - X_111(dx) * m_b->M_000; + m_a->M_021 = m_b->M_021 + X_001(dx) * m_b->M_020 + + X_010(dx) * m_b->M_011 /* + X_011(dx) * m_b->M_010 */ + /* + X_020(dx) * m_b->M_001 */ + + X_021(dx) * m_b->M_000; + m_a->M_030 = m_b->M_030 + + X_010(dx) * m_b->M_020 /* + X_020(dx) * m_b->M_010 */ + + X_030(dx) * m_b->M_000; + m_a->M_102 = m_b->M_102 + + X_001(dx) * m_b->M_101 /* + X_002(dx) * m_b->M_100 */ + + X_100(dx) * m_b->M_002 /* + X_101(dx) * m_b->M_001 */ + + X_102(dx) * m_b->M_000; + m_a->M_111 = m_b->M_111 + X_001(dx) * m_b->M_110 + + X_010(dx) * m_b->M_101 /* + X_011(dx) * m_b->M_100 */ + + X_100(dx) * m_b->M_011 /* + X_101(dx) * m_b->M_010 */ + /* + X_110(dx) * m_b->M_001 */ + + X_111(dx) * m_b->M_000; + m_a->M_120 = m_b->M_120 + + X_010(dx) * m_b->M_110 /* + X_020(dx) * m_b->M_100 */ + + X_100(dx) * m_b->M_020 /* + X_110(dx) * m_b->M_010 */ + + X_120(dx) * m_b->M_000; + m_a->M_201 = m_b->M_201 + X_001(dx) * m_b->M_200 + + X_100(dx) * m_b->M_101 /* + X_101(dx) * m_b->M_100 */ + /* + X_200(dx) * m_b->M_001 */ + + X_201(dx) * m_b->M_000; + m_a->M_210 = m_b->M_210 + X_010(dx) * m_b->M_200 + + X_100(dx) * m_b->M_110 /* + X_110(dx) * m_b->M_100 */ + /* + X_200(dx) * m_b->M_010 */ + + X_210(dx) * m_b->M_000; + m_a->M_300 = m_b->M_300 + + X_100(dx) * m_b->M_200 /* + X_200(dx) * m_b->M_100 */ + + X_300(dx) * m_b->M_000; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 - /* Shift 4th order terms */ - m_a->M_004 = m_b->M_004 + X_001(dx) * m_b->M_003 + X_002(dx) * m_b->M_002 + - X_003(dx) * m_b->M_001 + X_004(dx) * m_b->M_000; - m_a->M_013 = m_b->M_013 + X_001(dx) * m_b->M_012 + X_002(dx) * m_b->M_011 + - X_003(dx) * m_b->M_010 + X_010(dx) * m_b->M_003 + - X_011(dx) * m_b->M_002 + X_012(dx) * m_b->M_001 + + + /* Shift 4th order terms (1st order mpole (all 0) commented out) */ + m_a->M_004 = m_b->M_004 + X_001(dx) * m_b->M_003 + + X_002(dx) * m_b->M_002 /* + X_003(dx) * m_b->M_001 */ + + X_004(dx) * m_b->M_000; + m_a->M_013 = m_b->M_013 + X_001(dx) * m_b->M_012 + + X_002(dx) * m_b->M_011 /* + X_003(dx) * m_b->M_010 */ + + X_010(dx) * m_b->M_003 + + X_011(dx) * m_b->M_002 /* + X_012(dx) * m_b->M_001 */ + X_013(dx) * m_b->M_000; m_a->M_022 = m_b->M_022 + X_001(dx) * m_b->M_021 + X_002(dx) * m_b->M_020 + - X_010(dx) * m_b->M_012 + X_011(dx) * m_b->M_011 + - X_012(dx) * m_b->M_010 + X_020(dx) * m_b->M_002 + - X_021(dx) * m_b->M_001 + X_022(dx) * m_b->M_000; + X_010(dx) * m_b->M_012 + + X_011(dx) * m_b->M_011 /* + X_012(dx) * m_b->M_010 */ + + X_020(dx) * m_b->M_002 /* + X_021(dx) * m_b->M_001 */ + + X_022(dx) * m_b->M_000; m_a->M_031 = m_b->M_031 + X_001(dx) * m_b->M_030 + X_010(dx) * m_b->M_021 + - X_011(dx) * m_b->M_020 + X_020(dx) * m_b->M_011 + - X_021(dx) * m_b->M_010 + X_030(dx) * m_b->M_001 + - X_031(dx) * m_b->M_000; - m_a->M_040 = m_b->M_040 + X_010(dx) * m_b->M_030 + X_020(dx) * m_b->M_020 + - X_030(dx) * m_b->M_010 + X_040(dx) * m_b->M_000; - m_a->M_103 = m_b->M_103 + X_001(dx) * m_b->M_102 + X_002(dx) * m_b->M_101 + - X_003(dx) * m_b->M_100 + X_100(dx) * m_b->M_003 + - X_101(dx) * m_b->M_002 + X_102(dx) * m_b->M_001 + + X_011(dx) * m_b->M_020 + + X_020(dx) * m_b->M_011 /* + X_021(dx) * m_b->M_010 */ + /* + X_030(dx) * m_b->M_001 */ + + X_031(dx) * m_b->M_000; + m_a->M_040 = m_b->M_040 + X_010(dx) * m_b->M_030 + + X_020(dx) * m_b->M_020 /* + X_030(dx) * m_b->M_010 */ + + X_040(dx) * m_b->M_000; + m_a->M_103 = m_b->M_103 + X_001(dx) * m_b->M_102 + + X_002(dx) * m_b->M_101 /* + X_003(dx) * m_b->M_100 */ + + X_100(dx) * m_b->M_003 + + X_101(dx) * m_b->M_002 /* + X_102(dx) * m_b->M_001 */ + X_103(dx) * m_b->M_000; - m_a->M_112 = - m_b->M_112 + X_001(dx) * m_b->M_111 + X_002(dx) * m_b->M_110 + - X_010(dx) * m_b->M_102 + X_011(dx) * m_b->M_101 + X_012(dx) * m_b->M_100 + - X_100(dx) * m_b->M_012 + X_101(dx) * m_b->M_011 + X_102(dx) * m_b->M_010 + - X_110(dx) * m_b->M_002 + X_111(dx) * m_b->M_001 + X_112(dx) * m_b->M_000; - m_a->M_121 = - m_b->M_121 + X_001(dx) * m_b->M_120 + X_010(dx) * m_b->M_111 + - X_011(dx) * m_b->M_110 + X_020(dx) * m_b->M_101 + X_021(dx) * m_b->M_100 + - X_100(dx) * m_b->M_021 + X_101(dx) * m_b->M_020 + X_110(dx) * m_b->M_011 + - X_111(dx) * m_b->M_010 + X_120(dx) * m_b->M_001 + X_121(dx) * m_b->M_000; - m_a->M_130 = m_b->M_130 + X_010(dx) * m_b->M_120 + X_020(dx) * m_b->M_110 + - X_030(dx) * m_b->M_100 + X_100(dx) * m_b->M_030 + - X_110(dx) * m_b->M_020 + X_120(dx) * m_b->M_010 + + m_a->M_112 = m_b->M_112 + X_001(dx) * m_b->M_111 + X_002(dx) * m_b->M_110 + + X_010(dx) * m_b->M_102 + + X_011(dx) * m_b->M_101 /* + X_012(dx) * m_b->M_100 */ + + X_100(dx) * m_b->M_012 + + X_101(dx) * m_b->M_011 /* + X_102(dx) * m_b->M_010 */ + + X_110(dx) * m_b->M_002 /* + X_111(dx) * m_b->M_001 */ + + X_112(dx) * m_b->M_000; + m_a->M_121 = m_b->M_121 + X_001(dx) * m_b->M_120 + X_010(dx) * m_b->M_111 + + X_011(dx) * m_b->M_110 + + X_020(dx) * m_b->M_101 /* + X_021(dx) * m_b->M_100 */ + + X_100(dx) * m_b->M_021 + X_101(dx) * m_b->M_020 + + X_110(dx) * m_b->M_011 /* + X_111(dx) * m_b->M_010 */ + /* + X_120(dx) * m_b->M_001 */ + + X_121(dx) * m_b->M_000; + m_a->M_130 = m_b->M_130 + X_010(dx) * m_b->M_120 + + X_020(dx) * m_b->M_110 /* + X_030(dx) * m_b->M_100 */ + + X_100(dx) * m_b->M_030 + + X_110(dx) * m_b->M_020 /* + X_120(dx) * m_b->M_010 */ + X_130(dx) * m_b->M_000; m_a->M_202 = m_b->M_202 + X_001(dx) * m_b->M_201 + X_002(dx) * m_b->M_200 + - X_100(dx) * m_b->M_102 + X_101(dx) * m_b->M_101 + - X_102(dx) * m_b->M_100 + X_200(dx) * m_b->M_002 + - X_201(dx) * m_b->M_001 + X_202(dx) * m_b->M_000; - m_a->M_211 = - m_b->M_211 + X_001(dx) * m_b->M_210 + X_010(dx) * m_b->M_201 + - X_011(dx) * m_b->M_200 + X_100(dx) * m_b->M_111 + X_101(dx) * m_b->M_110 + - X_110(dx) * m_b->M_101 + X_111(dx) * m_b->M_100 + X_200(dx) * m_b->M_011 + - X_201(dx) * m_b->M_010 + X_210(dx) * m_b->M_001 + X_211(dx) * m_b->M_000; + X_100(dx) * m_b->M_102 + + X_101(dx) * m_b->M_101 /* + X_102(dx) * m_b->M_100 */ + + X_200(dx) * m_b->M_002 /* + X_201(dx) * m_b->M_001 */ + + X_202(dx) * m_b->M_000; + m_a->M_211 = m_b->M_211 + X_001(dx) * m_b->M_210 + X_010(dx) * m_b->M_201 + + X_011(dx) * m_b->M_200 + X_100(dx) * m_b->M_111 + + X_101(dx) * m_b->M_110 + + X_110(dx) * m_b->M_101 /* + X_111(dx) * m_b->M_100 */ + + X_200(dx) * m_b->M_011 /* + X_201(dx) * m_b->M_010 */ + /* + X_210(dx) * m_b->M_001 */ + + X_211(dx) * m_b->M_000; m_a->M_220 = m_b->M_220 + X_010(dx) * m_b->M_210 + X_020(dx) * m_b->M_200 + - X_100(dx) * m_b->M_120 + X_110(dx) * m_b->M_110 + - X_120(dx) * m_b->M_100 + X_200(dx) * m_b->M_020 + - X_210(dx) * m_b->M_010 + X_220(dx) * m_b->M_000; + X_100(dx) * m_b->M_120 + + X_110(dx) * m_b->M_110 /* + X_120(dx) * m_b->M_100 */ + + X_200(dx) * m_b->M_020 /* + X_210(dx) * m_b->M_010 */ + + X_220(dx) * m_b->M_000; m_a->M_301 = m_b->M_301 + X_001(dx) * m_b->M_300 + X_100(dx) * m_b->M_201 + - X_101(dx) * m_b->M_200 + X_200(dx) * m_b->M_101 + - X_201(dx) * m_b->M_100 + X_300(dx) * m_b->M_001 + - X_301(dx) * m_b->M_000; + X_101(dx) * m_b->M_200 + + X_200(dx) * m_b->M_101 /* + X_201(dx) * m_b->M_100 */ + /* + X_300(dx) * m_b->M_001 */ + + X_301(dx) * m_b->M_000; m_a->M_310 = m_b->M_310 + X_010(dx) * m_b->M_300 + X_100(dx) * m_b->M_210 + - X_110(dx) * m_b->M_200 + X_200(dx) * m_b->M_110 + - X_210(dx) * m_b->M_100 + X_300(dx) * m_b->M_010 + - X_310(dx) * m_b->M_000; - m_a->M_400 = m_b->M_400 + X_100(dx) * m_b->M_300 + X_200(dx) * m_b->M_200 + - X_300(dx) * m_b->M_100 + X_400(dx) * m_b->M_000; + X_110(dx) * m_b->M_200 + + X_200(dx) * m_b->M_110 /* + X_210(dx) * m_b->M_100 */ + /* + X_300(dx) * m_b->M_010 */ + + X_310(dx) * m_b->M_000; + m_a->M_400 = m_b->M_400 + X_100(dx) * m_b->M_300 + + X_200(dx) * m_b->M_200 /* + X_300(dx) * m_b->M_100 */ + + X_400(dx) * m_b->M_000; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 - /* Shift 5th order terms */ + + /* Shift 5th order terms (1st order mpole (all 0) commented out) */ m_a->M_005 = m_b->M_005 + X_001(dx) * m_b->M_004 + X_002(dx) * m_b->M_003 + - X_003(dx) * m_b->M_002 + X_004(dx) * m_b->M_001 + + X_003(dx) * m_b->M_002 /* + X_004(dx) * m_b->M_001 */ + X_005(dx) * m_b->M_000; m_a->M_014 = m_b->M_014 + X_001(dx) * m_b->M_013 + X_002(dx) * m_b->M_012 + - X_003(dx) * m_b->M_011 + X_004(dx) * m_b->M_010 + + X_003(dx) * m_b->M_011 /* + X_004(dx) * m_b->M_010 */ + X_010(dx) * m_b->M_004 + X_011(dx) * m_b->M_003 + - X_012(dx) * m_b->M_002 + X_013(dx) * m_b->M_001 + + X_012(dx) * m_b->M_002 /* + X_013(dx) * m_b->M_001 */ + X_014(dx) * m_b->M_000; - m_a->M_023 = - m_b->M_023 + X_001(dx) * m_b->M_022 + X_002(dx) * m_b->M_021 + - X_003(dx) * m_b->M_020 + X_010(dx) * m_b->M_013 + X_011(dx) * m_b->M_012 + - X_012(dx) * m_b->M_011 + X_013(dx) * m_b->M_010 + X_020(dx) * m_b->M_003 + - X_021(dx) * m_b->M_002 + X_022(dx) * m_b->M_001 + X_023(dx) * m_b->M_000; - m_a->M_032 = - m_b->M_032 + X_001(dx) * m_b->M_031 + X_002(dx) * m_b->M_030 + - X_010(dx) * m_b->M_022 + X_011(dx) * m_b->M_021 + X_012(dx) * m_b->M_020 + - X_020(dx) * m_b->M_012 + X_021(dx) * m_b->M_011 + X_022(dx) * m_b->M_010 + - X_030(dx) * m_b->M_002 + X_031(dx) * m_b->M_001 + X_032(dx) * m_b->M_000; + m_a->M_023 = m_b->M_023 + X_001(dx) * m_b->M_022 + X_002(dx) * m_b->M_021 + + X_003(dx) * m_b->M_020 + X_010(dx) * m_b->M_013 + + X_011(dx) * m_b->M_012 + + X_012(dx) * m_b->M_011 /* + X_013(dx) * m_b->M_010 */ + + X_020(dx) * m_b->M_003 + + X_021(dx) * m_b->M_002 /* + X_022(dx) * m_b->M_001 */ + + X_023(dx) * m_b->M_000; + m_a->M_032 = m_b->M_032 + X_001(dx) * m_b->M_031 + X_002(dx) * m_b->M_030 + + X_010(dx) * m_b->M_022 + X_011(dx) * m_b->M_021 + + X_012(dx) * m_b->M_020 + X_020(dx) * m_b->M_012 + + X_021(dx) * m_b->M_011 /* + X_022(dx) * m_b->M_010 */ + + X_030(dx) * m_b->M_002 /* + X_031(dx) * m_b->M_001 */ + + X_032(dx) * m_b->M_000; m_a->M_041 = m_b->M_041 + X_001(dx) * m_b->M_040 + X_010(dx) * m_b->M_031 + X_011(dx) * m_b->M_030 + X_020(dx) * m_b->M_021 + - X_021(dx) * m_b->M_020 + X_030(dx) * m_b->M_011 + - X_031(dx) * m_b->M_010 + X_040(dx) * m_b->M_001 + - X_041(dx) * m_b->M_000; + X_021(dx) * m_b->M_020 + + X_030(dx) * m_b->M_011 /* + X_031(dx) * m_b->M_010 */ + /* + X_040(dx) * m_b->M_001 */ + + X_041(dx) * m_b->M_000; m_a->M_050 = m_b->M_050 + X_010(dx) * m_b->M_040 + X_020(dx) * m_b->M_030 + - X_030(dx) * m_b->M_020 + X_040(dx) * m_b->M_010 + + X_030(dx) * m_b->M_020 /* + X_040(dx) * m_b->M_010 */ + X_050(dx) * m_b->M_000; m_a->M_104 = m_b->M_104 + X_001(dx) * m_b->M_103 + X_002(dx) * m_b->M_102 + - X_003(dx) * m_b->M_101 + X_004(dx) * m_b->M_100 + + X_003(dx) * m_b->M_101 /* + X_004(dx) * m_b->M_100 */ + X_100(dx) * m_b->M_004 + X_101(dx) * m_b->M_003 + - X_102(dx) * m_b->M_002 + X_103(dx) * m_b->M_001 + + X_102(dx) * m_b->M_002 /* + X_103(dx) * m_b->M_001 */ + X_104(dx) * m_b->M_000; - m_a->M_113 = - m_b->M_113 + X_001(dx) * m_b->M_112 + X_002(dx) * m_b->M_111 + - X_003(dx) * m_b->M_110 + X_010(dx) * m_b->M_103 + X_011(dx) * m_b->M_102 + - X_012(dx) * m_b->M_101 + X_013(dx) * m_b->M_100 + X_100(dx) * m_b->M_013 + - X_101(dx) * m_b->M_012 + X_102(dx) * m_b->M_011 + X_103(dx) * m_b->M_010 + - X_110(dx) * m_b->M_003 + X_111(dx) * m_b->M_002 + X_112(dx) * m_b->M_001 + - X_113(dx) * m_b->M_000; - m_a->M_122 = - m_b->M_122 + X_001(dx) * m_b->M_121 + X_002(dx) * m_b->M_120 + - X_010(dx) * m_b->M_112 + X_011(dx) * m_b->M_111 + X_012(dx) * m_b->M_110 + - X_020(dx) * m_b->M_102 + X_021(dx) * m_b->M_101 + X_022(dx) * m_b->M_100 + - X_100(dx) * m_b->M_022 + X_101(dx) * m_b->M_021 + X_102(dx) * m_b->M_020 + - X_110(dx) * m_b->M_012 + X_111(dx) * m_b->M_011 + X_112(dx) * m_b->M_010 + - X_120(dx) * m_b->M_002 + X_121(dx) * m_b->M_001 + X_122(dx) * m_b->M_000; - m_a->M_131 = - m_b->M_131 + X_001(dx) * m_b->M_130 + X_010(dx) * m_b->M_121 + - X_011(dx) * m_b->M_120 + X_020(dx) * m_b->M_111 + X_021(dx) * m_b->M_110 + - X_030(dx) * m_b->M_101 + X_031(dx) * m_b->M_100 + X_100(dx) * m_b->M_031 + - X_101(dx) * m_b->M_030 + X_110(dx) * m_b->M_021 + X_111(dx) * m_b->M_020 + - X_120(dx) * m_b->M_011 + X_121(dx) * m_b->M_010 + X_130(dx) * m_b->M_001 + - X_131(dx) * m_b->M_000; + m_a->M_113 = m_b->M_113 + X_001(dx) * m_b->M_112 + X_002(dx) * m_b->M_111 + + X_003(dx) * m_b->M_110 + X_010(dx) * m_b->M_103 + + X_011(dx) * m_b->M_102 + + X_012(dx) * m_b->M_101 /* + X_013(dx) * m_b->M_100 */ + + X_100(dx) * m_b->M_013 + X_101(dx) * m_b->M_012 + + X_102(dx) * m_b->M_011 /* + X_103(dx) * m_b->M_010 */ + + X_110(dx) * m_b->M_003 + + X_111(dx) * m_b->M_002 /* + X_112(dx) * m_b->M_001 */ + + X_113(dx) * m_b->M_000; + m_a->M_122 = m_b->M_122 + X_001(dx) * m_b->M_121 + X_002(dx) * m_b->M_120 + + X_010(dx) * m_b->M_112 + X_011(dx) * m_b->M_111 + + X_012(dx) * m_b->M_110 + X_020(dx) * m_b->M_102 + + X_021(dx) * m_b->M_101 /* + X_022(dx) * m_b->M_100 */ + + X_100(dx) * m_b->M_022 + X_101(dx) * m_b->M_021 + + X_102(dx) * m_b->M_020 + X_110(dx) * m_b->M_012 + + X_111(dx) * m_b->M_011 /* + X_112(dx) * m_b->M_010 */ + + X_120(dx) * m_b->M_002 /* + X_121(dx) * m_b->M_001 */ + + X_122(dx) * m_b->M_000; + m_a->M_131 = m_b->M_131 + X_001(dx) * m_b->M_130 + X_010(dx) * m_b->M_121 + + X_011(dx) * m_b->M_120 + X_020(dx) * m_b->M_111 + + X_021(dx) * m_b->M_110 + + X_030(dx) * m_b->M_101 /* + X_031(dx) * m_b->M_100 */ + + X_100(dx) * m_b->M_031 + X_101(dx) * m_b->M_030 + + X_110(dx) * m_b->M_021 + X_111(dx) * m_b->M_020 + + X_120(dx) * m_b->M_011 /* + X_121(dx) * m_b->M_010 */ + /* + X_130(dx) * m_b->M_001 */ + + X_131(dx) * m_b->M_000; m_a->M_140 = m_b->M_140 + X_010(dx) * m_b->M_130 + X_020(dx) * m_b->M_120 + - X_030(dx) * m_b->M_110 + X_040(dx) * m_b->M_100 + + X_030(dx) * m_b->M_110 /* + X_040(dx) * m_b->M_100 */ + X_100(dx) * m_b->M_040 + X_110(dx) * m_b->M_030 + - X_120(dx) * m_b->M_020 + X_130(dx) * m_b->M_010 + + X_120(dx) * m_b->M_020 /* + X_130(dx) * m_b->M_010 */ + X_140(dx) * m_b->M_000; - m_a->M_203 = - m_b->M_203 + X_001(dx) * m_b->M_202 + X_002(dx) * m_b->M_201 + - X_003(dx) * m_b->M_200 + X_100(dx) * m_b->M_103 + X_101(dx) * m_b->M_102 + - X_102(dx) * m_b->M_101 + X_103(dx) * m_b->M_100 + X_200(dx) * m_b->M_003 + - X_201(dx) * m_b->M_002 + X_202(dx) * m_b->M_001 + X_203(dx) * m_b->M_000; - m_a->M_212 = - m_b->M_212 + X_001(dx) * m_b->M_211 + X_002(dx) * m_b->M_210 + - X_010(dx) * m_b->M_202 + X_011(dx) * m_b->M_201 + X_012(dx) * m_b->M_200 + - X_100(dx) * m_b->M_112 + X_101(dx) * m_b->M_111 + X_102(dx) * m_b->M_110 + - X_110(dx) * m_b->M_102 + X_111(dx) * m_b->M_101 + X_112(dx) * m_b->M_100 + - X_200(dx) * m_b->M_012 + X_201(dx) * m_b->M_011 + X_202(dx) * m_b->M_010 + - X_210(dx) * m_b->M_002 + X_211(dx) * m_b->M_001 + X_212(dx) * m_b->M_000; - m_a->M_221 = - m_b->M_221 + X_001(dx) * m_b->M_220 + X_010(dx) * m_b->M_211 + - X_011(dx) * m_b->M_210 + X_020(dx) * m_b->M_201 + X_021(dx) * m_b->M_200 + - X_100(dx) * m_b->M_121 + X_101(dx) * m_b->M_120 + X_110(dx) * m_b->M_111 + - X_111(dx) * m_b->M_110 + X_120(dx) * m_b->M_101 + X_121(dx) * m_b->M_100 + - X_200(dx) * m_b->M_021 + X_201(dx) * m_b->M_020 + X_210(dx) * m_b->M_011 + - X_211(dx) * m_b->M_010 + X_220(dx) * m_b->M_001 + X_221(dx) * m_b->M_000; - m_a->M_230 = - m_b->M_230 + X_010(dx) * m_b->M_220 + X_020(dx) * m_b->M_210 + - X_030(dx) * m_b->M_200 + X_100(dx) * m_b->M_130 + X_110(dx) * m_b->M_120 + - X_120(dx) * m_b->M_110 + X_130(dx) * m_b->M_100 + X_200(dx) * m_b->M_030 + - X_210(dx) * m_b->M_020 + X_220(dx) * m_b->M_010 + X_230(dx) * m_b->M_000; - m_a->M_302 = - m_b->M_302 + X_001(dx) * m_b->M_301 + X_002(dx) * m_b->M_300 + - X_100(dx) * m_b->M_202 + X_101(dx) * m_b->M_201 + X_102(dx) * m_b->M_200 + - X_200(dx) * m_b->M_102 + X_201(dx) * m_b->M_101 + X_202(dx) * m_b->M_100 + - X_300(dx) * m_b->M_002 + X_301(dx) * m_b->M_001 + X_302(dx) * m_b->M_000; - m_a->M_311 = - m_b->M_311 + X_001(dx) * m_b->M_310 + X_010(dx) * m_b->M_301 + - X_011(dx) * m_b->M_300 + X_100(dx) * m_b->M_211 + X_101(dx) * m_b->M_210 + - X_110(dx) * m_b->M_201 + X_111(dx) * m_b->M_200 + X_200(dx) * m_b->M_111 + - X_201(dx) * m_b->M_110 + X_210(dx) * m_b->M_101 + X_211(dx) * m_b->M_100 + - X_300(dx) * m_b->M_011 + X_301(dx) * m_b->M_010 + X_310(dx) * m_b->M_001 + - X_311(dx) * m_b->M_000; - m_a->M_320 = - m_b->M_320 + X_010(dx) * m_b->M_310 + X_020(dx) * m_b->M_300 + - X_100(dx) * m_b->M_220 + X_110(dx) * m_b->M_210 + X_120(dx) * m_b->M_200 + - X_200(dx) * m_b->M_120 + X_210(dx) * m_b->M_110 + X_220(dx) * m_b->M_100 + - X_300(dx) * m_b->M_020 + X_310(dx) * m_b->M_010 + X_320(dx) * m_b->M_000; + m_a->M_203 = m_b->M_203 + X_001(dx) * m_b->M_202 + X_002(dx) * m_b->M_201 + + X_003(dx) * m_b->M_200 + X_100(dx) * m_b->M_103 + + X_101(dx) * m_b->M_102 + + X_102(dx) * m_b->M_101 /* + X_103(dx) * m_b->M_100 */ + + X_200(dx) * m_b->M_003 + + X_201(dx) * m_b->M_002 /* + X_202(dx) * m_b->M_001 */ + + X_203(dx) * m_b->M_000; + m_a->M_212 = m_b->M_212 + X_001(dx) * m_b->M_211 + X_002(dx) * m_b->M_210 + + X_010(dx) * m_b->M_202 + X_011(dx) * m_b->M_201 + + X_012(dx) * m_b->M_200 + X_100(dx) * m_b->M_112 + + X_101(dx) * m_b->M_111 + X_102(dx) * m_b->M_110 + + X_110(dx) * m_b->M_102 + + X_111(dx) * m_b->M_101 /* + X_112(dx) * m_b->M_100 */ + + X_200(dx) * m_b->M_012 + + X_201(dx) * m_b->M_011 /* + X_202(dx) * m_b->M_010 */ + + X_210(dx) * m_b->M_002 /* + X_211(dx) * m_b->M_001 */ + + X_212(dx) * m_b->M_000; + m_a->M_221 = m_b->M_221 + X_001(dx) * m_b->M_220 + X_010(dx) * m_b->M_211 + + X_011(dx) * m_b->M_210 + X_020(dx) * m_b->M_201 + + X_021(dx) * m_b->M_200 + X_100(dx) * m_b->M_121 + + X_101(dx) * m_b->M_120 + X_110(dx) * m_b->M_111 + + X_111(dx) * m_b->M_110 + + X_120(dx) * m_b->M_101 /* + X_121(dx) * m_b->M_100 */ + + X_200(dx) * m_b->M_021 + X_201(dx) * m_b->M_020 + + X_210(dx) * m_b->M_011 /* + X_211(dx) * m_b->M_010 */ + /* + X_220(dx) * m_b->M_001 */ + + X_221(dx) * m_b->M_000; + m_a->M_230 = m_b->M_230 + X_010(dx) * m_b->M_220 + X_020(dx) * m_b->M_210 + + X_030(dx) * m_b->M_200 + X_100(dx) * m_b->M_130 + + X_110(dx) * m_b->M_120 + + X_120(dx) * m_b->M_110 /* + X_130(dx) * m_b->M_100 */ + + X_200(dx) * m_b->M_030 + + X_210(dx) * m_b->M_020 /* + X_220(dx) * m_b->M_010 */ + + X_230(dx) * m_b->M_000; + m_a->M_302 = m_b->M_302 + X_001(dx) * m_b->M_301 + X_002(dx) * m_b->M_300 + + X_100(dx) * m_b->M_202 + X_101(dx) * m_b->M_201 + + X_102(dx) * m_b->M_200 + X_200(dx) * m_b->M_102 + + X_201(dx) * m_b->M_101 /* + X_202(dx) * m_b->M_100 */ + + X_300(dx) * m_b->M_002 /* + X_301(dx) * m_b->M_001 */ + + X_302(dx) * m_b->M_000; + m_a->M_311 = m_b->M_311 + X_001(dx) * m_b->M_310 + X_010(dx) * m_b->M_301 + + X_011(dx) * m_b->M_300 + X_100(dx) * m_b->M_211 + + X_101(dx) * m_b->M_210 + X_110(dx) * m_b->M_201 + + X_111(dx) * m_b->M_200 + X_200(dx) * m_b->M_111 + + X_201(dx) * m_b->M_110 + + X_210(dx) * m_b->M_101 /* + X_211(dx) * m_b->M_100 */ + + X_300(dx) * m_b->M_011 /* + X_301(dx) * m_b->M_010 */ + /* + X_310(dx) * m_b->M_001 */ + + X_311(dx) * m_b->M_000; + m_a->M_320 = m_b->M_320 + X_010(dx) * m_b->M_310 + X_020(dx) * m_b->M_300 + + X_100(dx) * m_b->M_220 + X_110(dx) * m_b->M_210 + + X_120(dx) * m_b->M_200 + X_200(dx) * m_b->M_120 + + X_210(dx) * m_b->M_110 /* + X_220(dx) * m_b->M_100 */ + + X_300(dx) * m_b->M_020 /* + X_310(dx) * m_b->M_010 */ + + X_320(dx) * m_b->M_000; m_a->M_401 = m_b->M_401 + X_001(dx) * m_b->M_400 + X_100(dx) * m_b->M_301 + X_101(dx) * m_b->M_300 + X_200(dx) * m_b->M_201 + - X_201(dx) * m_b->M_200 + X_300(dx) * m_b->M_101 + - X_301(dx) * m_b->M_100 + X_400(dx) * m_b->M_001 + - X_401(dx) * m_b->M_000; + X_201(dx) * m_b->M_200 + + X_300(dx) * m_b->M_101 /* + X_301(dx) * m_b->M_100 */ + /* + X_400(dx) * m_b->M_001 */ + + X_401(dx) * m_b->M_000; m_a->M_410 = m_b->M_410 + X_010(dx) * m_b->M_400 + X_100(dx) * m_b->M_310 + X_110(dx) * m_b->M_300 + X_200(dx) * m_b->M_210 + - X_210(dx) * m_b->M_200 + X_300(dx) * m_b->M_110 + - X_310(dx) * m_b->M_100 + X_400(dx) * m_b->M_010 + - X_410(dx) * m_b->M_000; + X_210(dx) * m_b->M_200 + + X_300(dx) * m_b->M_110 /* + X_310(dx) * m_b->M_100 */ + /* + X_400(dx) * m_b->M_010 */ + + X_410(dx) * m_b->M_000; m_a->M_500 = m_b->M_500 + X_100(dx) * m_b->M_400 + X_200(dx) * m_b->M_300 + - X_300(dx) * m_b->M_200 + X_400(dx) * m_b->M_100 + + X_300(dx) * m_b->M_200 /* + X_400(dx) * m_b->M_100 */ + X_500(dx) * m_b->M_000; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 5 @@ -1926,6 +2086,150 @@ __attribute__((nonnull)) INLINE static void gravity_M2L_symmetric( gravity_M2L_apply(l_a, m_b, &pot); } +/** + * @brief Compute the field tensor due to a multipole and the symmetric + * equivalent. + * + * @param l_b The field tensor to compute. + * @param ga The @gpart sourcing the field. + * @param pos_b The position of field tensor b. + * @param props The #gravity_props of this calculation. + * @param periodic Is the calculation periodic ? + * @param dim The size of the simulation box. + * @param rs_inv The inverse of the gravity mesh-smoothing scale. + */ +__attribute__((nonnull)) INLINE static void gravity_P2L( + struct grav_tensor *l_b, const struct gpart *ga, const double pos_b[3], + const struct gravity_props *props, const int periodic, const double dim[3], + const float rs_inv) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Count all interactions + * Note that despite being in a section of the code protected by locks, + * we must use atomics here as the long-range task may update this + * counter in a lock-free section of code. */ + accumulate_inc_ll(&l_b->num_interacted); +#endif + +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + /* Count tree interactions + * Note that despite being in a section of the code protected by locks, + * we must use atomics here as the long-range task may update this + * counter in a lock-free section of code. */ + accumulate_inc_ll(&l_b->num_interacted_tree); +#endif + + /* Record that this tensor has received contributions */ + l_b->interacted = 1; + + /* Recover some constants */ + const float eps = gravity_get_softening(ga, props); + const float mass = ga->mass; + + /* Compute distance vector */ + float dx = (float)(pos_b[0] - ga->x[0]); + float dy = (float)(pos_b[1] - ga->x[1]); + float dz = (float)(pos_b[2] - ga->x[2]); + + /* Apply BC */ + if (periodic) { + dx = nearest(dx, dim[0]); + dy = nearest(dy, dim[1]); + dz = nearest(dz, dim[2]); + } + + /* Compute distance */ + const float r2 = dx * dx + dy * dy + dz * dz; + const float r_inv = 1. / sqrtf(r2); + + /* Compute all derivatives */ + struct potential_derivatives_M2L pot; + potential_derivatives_compute_M2L(dx, dy, dz, r2, r_inv, eps, periodic, + rs_inv, &pot); + + /* 0th order contributions */ + l_b->F_000 += mass * pot.D_000; + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + + /* 1st order contributions */ + l_b->F_001 += mass * pot.D_001; + l_b->F_010 += mass * pot.D_010; + l_b->F_100 += mass * pot.D_100; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + + /* 2nd order contributions */ + l_b->F_002 += mass * pot.D_002; + l_b->F_011 += mass * pot.D_011; + l_b->F_020 += mass * pot.D_020; + l_b->F_101 += mass * pot.D_101; + l_b->F_110 += mass * pot.D_110; + l_b->F_200 += mass * pot.D_200; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + + /* 3rd order contributions */ + l_b->F_003 += mass * pot.D_003; + l_b->F_012 += mass * pot.D_012; + l_b->F_021 += mass * pot.D_021; + l_b->F_030 += mass * pot.D_030; + l_b->F_102 += mass * pot.D_102; + l_b->F_111 += mass * pot.D_111; + l_b->F_120 += mass * pot.D_120; + l_b->F_201 += mass * pot.D_201; + l_b->F_210 += mass * pot.D_210; + l_b->F_300 += mass * pot.D_300; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + + /* 4th order contributions */ + l_b->F_004 += mass * pot.D_004; + l_b->F_013 += mass * pot.D_013; + l_b->F_022 += mass * pot.D_022; + l_b->F_031 += mass * pot.D_031; + l_b->F_040 += mass * pot.D_040; + l_b->F_103 += mass * pot.D_103; + l_b->F_112 += mass * pot.D_112; + l_b->F_121 += mass * pot.D_121; + l_b->F_130 += mass * pot.D_130; + l_b->F_202 += mass * pot.D_202; + l_b->F_211 += mass * pot.D_211; + l_b->F_220 += mass * pot.D_220; + l_b->F_301 += mass * pot.D_301; + l_b->F_310 += mass * pot.D_310; + l_b->F_400 += mass * pot.D_400; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + + /* 5th order contributions */ + l_b->F_005 += mass * pot.D_005; + l_b->F_014 += mass * pot.D_014; + l_b->F_023 += mass * pot.D_023; + l_b->F_032 += mass * pot.D_032; + l_b->F_041 += mass * pot.D_041; + l_b->F_050 += mass * pot.D_050; + l_b->F_104 += mass * pot.D_104; + l_b->F_113 += mass * pot.D_113; + l_b->F_122 += mass * pot.D_122; + l_b->F_131 += mass * pot.D_131; + l_b->F_140 += mass * pot.D_140; + l_b->F_203 += mass * pot.D_203; + l_b->F_212 += mass * pot.D_212; + l_b->F_221 += mass * pot.D_221; + l_b->F_230 += mass * pot.D_230; + l_b->F_302 += mass * pot.D_302; + l_b->F_311 += mass * pot.D_311; + l_b->F_320 += mass * pot.D_320; + l_b->F_401 += mass * pot.D_401; + l_b->F_410 += mass * pot.D_410; + l_b->F_500 += mass * pot.D_500; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 5 +#error "Missing implementation for order >5" +#endif +} + /** * @brief Compute the reduced field tensor due to a multipole * @@ -1942,7 +2246,7 @@ __attribute__((nonnull)) INLINE static void gravity_M2L_symmetric( * @param rs_inv The inverse of the gravity mesh-smoothing scale. * @param l (return) The #reduced_grav_tensor to compute. */ -__attribute__((nonnull)) INLINE static void gravity_M2P( +__attribute__((always_inline, nonnull)) INLINE static void gravity_M2P( const struct multipole *const m, const float r_x, const float r_y, const float r_z, const float r2, const float eps, const int periodic, const float rs_inv, struct reduced_grav_tensor *const l) { @@ -2689,77 +2993,16 @@ __attribute__((nonnull)) INLINE static void gravity_L2P( #endif /* Update the particle */ - accumulate_add_f(&gp->a_grav[0], a_grav[0]); - accumulate_add_f(&gp->a_grav[1], a_grav[1]); - accumulate_add_f(&gp->a_grav[2], a_grav[2]); + gp->a_grav[0] += a_grav[0]; + gp->a_grav[1] += a_grav[1]; + gp->a_grav[2] += a_grav[2]; gravity_add_comoving_potential(gp, pot); #ifdef SWIFT_GRAVITY_FORCE_CHECKS - accumulate_add_f(&gp->a_grav_m2l[0], a_grav[0]); - accumulate_add_f(&gp->a_grav_m2l[1], a_grav[1]); - accumulate_add_f(&gp->a_grav_m2l[2], a_grav[2]); + gp->a_grav_m2l[0] += a_grav[0]; + gp->a_grav_m2l[1] += a_grav[1]; + gp->a_grav_m2l[2] += a_grav[2]; #endif } -/** - * @brief Checks whether a cell-cell interaction can be appromixated by a M-M - * interaction using the distance and cell radius. - * - * We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179, - * Issue 1, pp.27-42, equation 10. - * - * We also additionally check that the distance between the multipoles - * is larger than the softening lengths (here the distance at which - * the gravity becomes Newtonian again, not the Plummer-equivalent quantity). - * - * @param r_crit_a The size of the multipole A. - * @param r_crit_b The size of the multipole B. - * @param theta_crit2 The square of the critical opening angle. - * @param r2 Square of the distance (periodically wrapped) between the - * multipoles. - * @param epsilon_a The maximal softening length of any particle in A. - * @param epsilon_b The maximal softening length of any particle in B. - */ -__attribute__((always_inline, const)) INLINE static int gravity_M2L_accept( - const double r_crit_a, const double r_crit_b, const double theta_crit2, - const double r2, const double epsilon_a, const double epsilon_b) { - - const double size = r_crit_a + r_crit_b; - const double size2 = size * size; - const double epsilon_a2 = epsilon_a * epsilon_a; - const double epsilon_b2 = epsilon_b * epsilon_b; - - // MATTHIEU: Make this mass-dependent ? - - /* Multipole acceptance criterion (Dehnen 2002, eq.10) */ - return (r2 * theta_crit2 > size2) && (r2 > epsilon_a2) && (r2 > epsilon_b2); -} - -/** - * @brief Checks whether a particle-cell interaction can be appromixated by a - * M2P interaction using the distance and cell radius. - * - * We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179, - * Issue 1, pp.27-42, equation 10. - * - * We also additionally check that the distance between the particle and the - * multipole is larger than the softening length (here the distance at which - * the gravity becomes Newtonian again, not the Plummer-equivalent quantity). - * - * @param r_max2 The square of the size of the multipole. - * @param theta_crit2 The square of the critical opening angle. - * @param r2 Square of the distance (periodically wrapped) between the - * particle and the multipole. - * @param epsilon The softening length of the particle. - */ -__attribute__((always_inline, const)) INLINE static int gravity_M2P_accept( - const float r_max2, const float theta_crit2, const float r2, - const float epsilon) { - - // MATTHIEU: Make this mass-dependent ? - - /* Multipole acceptance criterion (Dehnen 2002, eq.10) */ - return (r2 * theta_crit2 > r_max2) && (r2 > epsilon * epsilon); -} - #endif /* SWIFT_MULTIPOLE_H */ diff --git a/src/multipole_accept.h b/src/multipole_accept.h new file mode 100644 index 0000000000000000000000000000000000000000..ead2139571f4a037376ac703aa8997e7bfa05282 --- /dev/null +++ b/src/multipole_accept.h @@ -0,0 +1,285 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_MULTIPOLE_ACCEPT_H +#define SWIFT_MULTIPOLE_ACCEPT_H + +/* Config parameters. */ +#include "../config.h" + +/* Local includes */ +#include "binomial.h" +#include "gravity_properties.h" +#include "integer_power.h" +#include "kernel_long_gravity.h" +#include "minmax.h" +#include "multipole_struct.h" + +/** + * @brief Compute the inverse of the force estimator entering the MAC + * + * Note that in the unsofted case, the first condition is naturally + * never reached (as H == 0). In the non-periodic (non-truncated) case + * the second condition is never reached (as r_s == inf, r_s_inv == 0). + * + * @param H The spline softening length. + * @param r_s_inv The inverse of the scale of the gravity mesh. + * @param r2 The square of the distance between the multipoles. + */ +__attribute__((const)) INLINE static float gravity_f_MAC_inverse( + const float H, const float r_s_inv, const float r2) { + + if (r2 < (25.f / 81.f) * H * H) { + + /* Below softening radius */ + return (25.f / 81.f) * H * H; + + } else if (r_s_inv * r_s_inv * r2 > (25.f / 9.f)) { + + /* Above truncation radius */ + return (9.f / 25.f) * r_s_inv * r_s_inv * r2 * r2; + + } else { + + /* Normal Newtonian case */ + return r2; + } +} + +/** + * @brief Checks whether The multipole in B can be used to update the field + * tensor in A. + * + * We use the MAC of Dehnen 2014 eq. 16. + * + * Note: this is *not* symmetric in A<->B unless the purely geometric criterion + * is used. + * + * @param props The properties of the gravity scheme. + * @param A The gravity tensors that we want to update (sink). + * @param B The gravity tensors that act as a source. + * @param r2 The square of the distance between the centres of mass of A and B. + * @param use_rebuild_sizes Are we considering the sizes at the last tree-build + * (1) or current sizes (0)? + * @param periodic Are we using periodic BCs? + */ +__attribute__((nonnull, pure)) INLINE static int gravity_M2L_accept( + const struct gravity_props *props, const struct gravity_tensors *restrict A, + const struct gravity_tensors *restrict B, const float r2, + const int use_rebuild_sizes, const int periodic) { + + /* Order of the expansion */ + const int p = SELF_GRAVITY_MULTIPOLE_ORDER; + + /* Sizes of the multipoles */ + const float rho_A = use_rebuild_sizes ? A->r_max_rebuild : A->r_max; + const float rho_B = use_rebuild_sizes ? B->r_max_rebuild : B->r_max; + + /* Get the softening */ + const float max_softening = + max(A->m_pole.max_softening, B->m_pole.max_softening); + + /* Compute the error estimator (without the 1/M_B term that cancels out) */ + float E_BA_term = 0.f; + for (int n = 0; n <= p; ++n) { + E_BA_term += + binomial(p, n) * B->m_pole.power[n] * integer_powf(rho_A, p - n); + } + E_BA_term *= 8.f; + if (rho_A + rho_B > 0.f) { + E_BA_term *= max(rho_A, rho_B); + E_BA_term /= (rho_A + rho_B); + } + + /* Compute r^p */ +#if SELF_GRAVITY_MULTIPOLE_ORDER % 2 == 1 + const float r_to_p = integer_powf(sqrtf(r2), p); +#else + const float r_to_p = integer_powf(r2, (p / 2)); +#endif + + float f_MAC_inv; + if (props->consider_truncation_in_MAC) { + f_MAC_inv = gravity_f_MAC_inverse(max_softening, props->r_s_inv, r2); + } else { + f_MAC_inv = r2; + } + + /* Get the mimimal acceleration in A */ + const float min_a_grav = A->m_pole.min_old_a_grav_norm; + + /* Get the relative tolerance */ + const float eps = props->adaptive_tolerance; + + /* Get the basic geometric critical angle */ + const float theta_crit = props->theta_crit; + const float theta_crit2 = theta_crit * theta_crit; + + /* Get the sum of the multipole sizes */ + const float rho_sum = rho_A + rho_B; + + if (props->use_advanced_MAC) { + +#ifdef SWIFT_DEBUG_CHECKS + if (min_a_grav == 0.) error("Acceleration is 0"); +#endif + + /* Test the different conditions */ + + /* Condition 1: We are in the converging part of the Taylor expansion */ + const int cond_1 = rho_sum * rho_sum < r2; + + /* Condition 2: We are not below softening */ + const int cond_2 = + props->use_tree_below_softening || max_softening * max_softening < r2; + + /* Condition 3: The contribution is accurate enough + * (E_BA * (1 / r^(p)) * ((1 / r^2) * W) < eps * a_min) */ + const int cond_3 = E_BA_term < eps * min_a_grav * r_to_p * f_MAC_inv; + + return cond_1 && cond_2 && cond_3; + + } else { + + /* Condition 1: We are obeying the purely geometric criterion */ + const int cond_1 = rho_sum * rho_sum < theta_crit2 * r2; + + /* Condition 2: We are not below softening */ + const int cond_2 = + props->use_tree_below_softening || max_softening * max_softening < r2; + + return cond_1 && cond_2; + } +} + +/** + * @brief Checks whether The multipole in B can be used to update the field + * tensor in A and whether the multipole in A can be used to update the field + * tensor in B. + * + * We use the MAC of Dehnen 2014 eq. 16. + * + * @param props The properties of the gravity scheme. + * @param A The first set of multipole and gravity tensors. + * @param B The second set of multipole and gravity tensors. + * @param r2 The square of the distance between the centres of mass of A and B. + * @param use_rebuild_sizes Are we considering the sizes at the last tree-build + * (1) or current sizes (0)? + * @param periodic Are we using periodic BCs? + */ +__attribute__((nonnull, pure)) INLINE static int gravity_M2L_accept_symmetric( + const struct gravity_props *props, const struct gravity_tensors *restrict A, + const struct gravity_tensors *restrict B, const float r2, + const int use_rebuild_sizes, const int periodic) { + + return gravity_M2L_accept(props, A, B, r2, use_rebuild_sizes, periodic) && + gravity_M2L_accept(props, B, A, r2, use_rebuild_sizes, periodic); +} + +/** + * @brief Checks whether The multipole in B can be used to update the particle + * pa + * + * We use the MAC of Dehnen 2014 eq. 16. + * + * @param props The properties of the gravity scheme. + * @param pa The particle we want to compute forces for (sink) + * @param B The gravity tensors that act as a source. + * @param r2 The square of the distance between pa and the centres of mass of B. + * @param periodic Are we using periodic BCs? + */ +__attribute__((nonnull, pure)) INLINE static int gravity_M2P_accept( + const struct gravity_props *props, const struct gpart *pa, + const struct gravity_tensors *B, const float r2, const int periodic) { + + /* Order of the expansion */ + const int p = SELF_GRAVITY_MULTIPOLE_ORDER; + + /* Sizes of the multipoles */ + const float rho_B = B->r_max; + + /* Get the maximal softening */ + const float max_softening = + max(B->m_pole.max_softening, gravity_get_softening(pa, props)); + +#ifdef SWIFT_DEBUG_CHECKS + if (rho_B == 0.) error("Size of multipole B is 0!"); +#endif + + /* Compute the error estimator (without the 1/M_B term that cancels out) */ + const float E_BA_term = 8.f * B->m_pole.power[p]; + + /* Compute r^p */ +#if SELF_GRAVITY_MULTIPOLE_ORDER % 2 == 1 + const float r_to_p = integer_powf(sqrtf(r2), p); +#else + const float r_to_p = integer_powf(r2, (p / 2)); +#endif + + float f_MAC_inv; + if (props->consider_truncation_in_MAC) { + f_MAC_inv = gravity_f_MAC_inverse(max_softening, props->r_s_inv, r2); + } else { + f_MAC_inv = r2; + } + + /* Get the estimate of the acceleration */ + const float old_a_grav = pa->old_a_grav_norm; + + /* Get the relative tolerance */ + const float eps = props->adaptive_tolerance; + + /* Get the basic geometric critical angle */ + const float theta_crit = props->theta_crit; + const float theta_crit2 = theta_crit * theta_crit; + + if (props->use_advanced_MAC) { + +#ifdef SWIFT_DEBUG_CHECKS + if (old_a_grav == 0.) error("Acceleration is 0"); +#endif + + /* Test the different conditions */ + + /* Condition 1: We are in the converging part of the Taylor expansion */ + const int cond_1 = rho_B * rho_B < r2; + + /* Condition 2: We are not below softening */ + const int cond_2 = + props->use_tree_below_softening || max_softening * max_softening < r2; + + /* Condition 3: The contribution is accurate enough + * (E_BA * (1 / r^(p)) * ((1 / r^2) * W) < eps * a_min) */ + const int cond_3 = E_BA_term < eps * old_a_grav * r_to_p * f_MAC_inv; + + return cond_1 && cond_2 && cond_3; + + } else { + + /* Condition 1: We are obeying the purely geometric criterion */ + const int cond_1 = rho_B * rho_B < theta_crit2 * r2; + + /* Condition 2: We are not below softening */ + const int cond_2 = + props->use_tree_below_softening || max_softening * max_softening < r2; + + return cond_1 && cond_2; + } +} + +#endif /* SWIFT_MULTIPOLE_ACCEPT_H */ diff --git a/src/multipole_struct.h b/src/multipole_struct.h index ee5e525e286434d385cd2e16d7ed62668702a2ff..ffd615cd798aa3963491ad31fb86f6af8f9e5d89 100644 --- a/src/multipole_struct.h +++ b/src/multipole_struct.h @@ -121,19 +121,27 @@ struct multipole { /*! Maximal co-moving softening of all the #gpart in the mulipole */ float max_softening; + /*! Minimal acceleration norm of all the #gpart in the mulipole */ + float min_old_a_grav_norm; + + /*! Mulipole power for the different orders */ + float power[SELF_GRAVITY_MULTIPOLE_ORDER + 1]; + /* 0th order term */ float M_000; #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - /* 1st order terms */ - float M_100, M_010, M_001; + /* 1st order terms (all 0 since we expand around CoM) */ + // float M_100, M_010, M_001; + #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 /* 2nd order terms */ float M_200, M_020, M_002; float M_110, M_101, M_011; + #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 @@ -225,4 +233,13 @@ struct reduced_grav_tensor { float F_001; }; +#ifdef WITH_MPI +/* MPI datatypes for transfers */ +extern MPI_Datatype multipole_mpi_type; +extern MPI_Op multipole_mpi_reduce_op; + +void multipole_create_mpi_types(void); +void multipole_free_mpi_types(void); +#endif + #endif /* SWIFT_MULTIPOLE_STRUCT_H */ diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c index 91abe9e91060b19fa6df1bbbb29e0de6e6442cad..c1d24b68505ee7d0fd7804ce6f26ec3fc8224772 100644 --- a/src/runner_doiact_grav.c +++ b/src/runner_doiact_grav.c @@ -99,6 +99,11 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + /* Lock the cell for the particle updates */ + lock_lock(&c->grav.plock); +#endif + /* Cell properties */ struct gpart *gparts = c->grav.parts; const int gcount = c->grav.count; @@ -133,11 +138,397 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { gravity_L2P(pot, CoM, gp); } } + +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + /* All done -> unlock the cell */ + if (lock_unlock(&c->grav.plock) != 0) error("Error unlocking cell"); +#endif } if (timer) TIMER_TOC(timer_dograv_down); } +/** + * @brief Compute the fully Newtoning gravitational forces from particles + * one array onto the particles in another array + * + * This function *must* be called at the leaf level for particles i. + * + * @param gparts_i The particles receiving forces (at leaf level). + * @param gcount_i The number of particles receiving forces. + * @param gparts_j The particles giving forces (at any level). + * @param gcount_j The number of particles giving forces. + * @param e The @engine structure. + * @param grav_props The properties of the gravity scheme. + * @param cache_i The gravity cache to use to store the results in i. + * @param ci The (leaf-)cell containing the particles i. + * @param multi_j The multipole in cell j. + */ +static INLINE void runner_dopair_grav_pp_full_no_cache( + struct gpart *restrict gparts_i, const int gcount_i, + const struct gpart *restrict gparts_j, const int gcount_j, + const struct engine *e, const struct gravity_props *grav_props, + struct gravity_cache *cache_i, struct cell *ci, + const struct gravity_tensors *multi_j) { + + /* Prepare the i cache */ + const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; + gravity_cache_zero_output(cache_i, gcount_padded_i); + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->split) error("Using function above leaf level!"); +#endif + + /* Loop over sink particles */ + for (int i = 0; i < gcount_i; ++i) { + + struct gpart *gpi = &gparts_i[i]; + + /* Ignore inactive particles */ + if (!gpart_is_active(gpi, e)) continue; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gpi->ti_drift != e->ti_current) + error("gpi not drifted to current time"); + + /* Check that the particle was initialised */ + if (gpi->initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + + const float x_i = gpi->x[0]; + const float y_i = gpi->x[1]; + const float z_i = gpi->x[2]; + const float h_i = gravity_get_softening(gpi, grav_props); + + /* Local accumulators for the acceleration and potential */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; + + /* Now, we can start the interactions for that particle */ + + /* Distance to the Multipole */ + const float CoM_j[3] = {multi_j->CoM[0], multi_j->CoM[1], multi_j->CoM[2]}; + const float dx_multi = CoM_j[0] - x_i; + const float dy_multi = CoM_j[1] - y_i; + const float dz_multi = CoM_j[2] - z_i; + + const float r2_multi = + dx_multi * dx_multi + dy_multi * dy_multi + dz_multi * dz_multi; + + /* Can we use the Mulipole here? */ + if (gcount_j > 1 && gravity_M2P_accept(grav_props, gpi, multi_j, r2_multi, + /*periodic=*/1)) { + + const float h_inv_i = 1.f / h_i; + + /* Interact! */ + float f_x, f_y, f_z, pot_ij; + runner_iact_grav_pm_full(dx_multi, dy_multi, dz_multi, r2_multi, h_i, + h_inv_i, &multi_j->m_pole, &f_x, &f_y, &f_z, + &pot_ij); + + /* Store it back */ + a_x += f_x; + a_y += f_y; + a_z += f_z; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter */ + accumulate_add_ll(&gparts_i[i].num_interacted, multi_j->m_pole.num_gpart); +#endif + +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + /* Update the M2P interaction counter and forces. */ + accumulate_add_ll(&gparts_i[i].num_interacted_m2p, + multi_j->m_pole.num_gpart); + gparts_i[i].a_grav_m2p[0] += f_x; + gparts_i[i].a_grav_m2p[1] += f_y; + gparts_i[i].a_grav_m2p[2] += f_z; +#endif + + } else { + + /* Loop over source particles */ + for (int j = 0; j < gcount_j; ++j) { + + const struct gpart *gpj = &gparts_j[j]; + + /* Ignore inhibited particles */ + if (gpart_is_inhibited(gpj, e)) continue; + + /* Get info about j */ + const float x_j = gpj->x[0]; + const float y_j = gpj->x[1]; + const float z_j = gpj->x[2]; + const float mass_j = gpj->mass; + const float h_j = gravity_get_softening(gpj, grav_props); + + /* Compute the pairwise distance. + Note: no need for box wrap here! This is non-periodic */ + const float dx = x_j - x_i; + const float dy = y_j - y_i; + const float dz = z_j - z_i; + + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Pick the maximal softening length of i and j */ + const float h = max(h_i, h_j); + const float h2 = h * h; + const float h_inv = 1.f / h; + const float h_inv_3 = h_inv * h_inv * h_inv; + +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f && h2 == 0.) + error("Interacting particles with 0 distance and 0 softening."); + + /* Check that particles have been drifted to the current time */ + if (gpj->ti_drift != e->ti_current) + error("gpj not drifted to current time"); +#endif + + /* Interact! */ + float f_ij, pot_ij; + runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, + &pot_ij); + + /* Store it back */ + a_x += f_ij * dx; + a_y += f_ij * dy; + a_z += f_ij * dz; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter */ + accumulate_inc_ll(&gparts_i[i].num_interacted); +#endif + +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + /* Update the p2p interaction counter */ + accumulate_inc_ll(&gparts_i[i].num_interacted_p2p); + gparts_i[i].a_grav_p2p[0] += a_x; + gparts_i[i].a_grav_p2p[1] += a_y; + gparts_i[i].a_grav_p2p[2] += a_z; +#endif + } + } + /* Store everything back in cache */ + cache_i->a_x[i] += a_x; + cache_i->a_y[i] += a_y; + cache_i->a_z[i] += a_z; + cache_i->pot[i] += pot; + } + + /* Write back to the particle data */ +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + lock_lock(&ci->grav.plock); +#endif + gravity_cache_write_back(cache_i, ci->grav.parts, gcount_i); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell"); +#endif +} + +/** + * @brief Compute the long-range truncated gravitational forces from particles + * one array onto the particles in another array + * + * This function *must* be called at the leaf level for particles i. + * + * @param gparts_i The particles receiving forces (at leaf level). + * @param gcount_i The number of particles receiving forces. + * @param gparts_j The particles giving forces (at any level). + * @param gcount_j The number of particles giving forces. + * @param dim The size of the computational domain. + * @param e The @engine structure. + * @param grav_props The properties of the gravity scheme. + * @param cache_i The gravity cache to use to store the results in i. + * @param ci The (leaf-)cell containing the particles i. + * @param multi_j The multipole in cell j. + */ +static INLINE void runner_dopair_grav_pp_truncated_no_cache( + struct gpart *restrict gparts_i, const int gcount_i, + const struct gpart *restrict gparts_j, const int gcount_j, + const float dim[3], const struct engine *e, + const struct gravity_props *grav_props, struct gravity_cache *cache_i, + struct cell *ci, const struct gravity_tensors *multi_j) { + +#ifdef SWIFT_DEBUG_CHECKS + if (!e->s->periodic) + error("Calling truncated PP function in non-periodic setup."); + + if (ci->split) error("Using function above leaf level!"); +#endif + + const float r_s_inv = grav_props->r_s_inv; + + /* Prepare the i cache */ + const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; + gravity_cache_zero_output(cache_i, gcount_padded_i); + + /* Loop over sink particles */ + for (int i = 0; i < gcount_i; ++i) { + + struct gpart *gpi = &gparts_i[i]; + + /* Ignore inactive particles */ + if (!gpart_is_active(gpi, e)) continue; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gpi->ti_drift != e->ti_current) + error("gpi not drifted to current time"); + + /* Check that the particle was initialised */ + if (gpi->initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + + const float x_i = gpi->x[0]; + const float y_i = gpi->x[1]; + const float z_i = gpi->x[2]; + const float h_i = gravity_get_softening(gpi, grav_props); + + /* Local accumulators for the acceleration and potential */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; + + /* Now, we can start the interactions for that particle */ + + /* Distance to the Multipole */ + const float CoM_j[3] = {multi_j->CoM[0], multi_j->CoM[1], multi_j->CoM[2]}; + float dx_multi = CoM_j[0] - x_i; + float dy_multi = CoM_j[1] - y_i; + float dz_multi = CoM_j[2] - z_i; + + /* Apply periodic BCs */ + dx_multi = nearestf(dx_multi, dim[0]); + dy_multi = nearestf(dy_multi, dim[1]); + dz_multi = nearestf(dz_multi, dim[2]); + + const float r2_multi = + dx_multi * dx_multi + dy_multi * dy_multi + dz_multi * dz_multi; + + /* Can we use the Mulipole here? */ + if (gcount_j > 1 && gravity_M2P_accept(grav_props, gpi, multi_j, r2_multi, + /*periodic=*/1)) { + + const float h_inv_i = 1.f / h_i; + + /* Interact! */ + float f_x, f_y, f_z, pot_ij; + runner_iact_grav_pm_truncated(dx_multi, dy_multi, dz_multi, r2_multi, h_i, + h_inv_i, r_s_inv, &multi_j->m_pole, &f_x, + &f_y, &f_z, &pot_ij); + + /* Store it back */ + a_x += f_x; + a_y += f_y; + a_z += f_z; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter */ + accumulate_add_ll(&gparts_i[i].num_interacted, multi_j->m_pole.num_gpart); +#endif + +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + /* Update the M2P interaction counter and forces. */ + accumulate_add_ll(&gparts_i[i].num_interacted_m2p, + multi_j->m_pole.num_gpart); + gparts_i[i].a_grav_m2p[0] += f_x; + gparts_i[i].a_grav_m2p[1] += f_y; + gparts_i[i].a_grav_m2p[2] += f_z; +#endif + + } else { + + /* Loop over source particles */ + for (int j = 0; j < gcount_j; ++j) { + + const struct gpart *gpj = &gparts_j[j]; + + /* Ignore inhibited particles */ + if (gpart_is_inhibited(gpj, e)) continue; + + /* Get info about j */ + const float x_j = gpj->x[0]; + const float y_j = gpj->x[1]; + const float z_j = gpj->x[2]; + const float mass_j = gpj->mass; + const float h_j = gravity_get_softening(gpj, grav_props); + + /* Compute the pairwise distance. + Note: no need for box wrap here! This is non-periodic */ + float dx = x_j - x_i; + float dy = y_j - y_i; + float dz = z_j - z_i; + + /* Correct for periodic BCs */ + dx = nearestf(dx, dim[0]); + dy = nearestf(dy, dim[1]); + dz = nearestf(dz, dim[2]); + + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Pick the maximal softening length of i and j */ + const float h = max(h_i, h_j); + const float h2 = h * h; + const float h_inv = 1.f / h; + const float h_inv_3 = h_inv * h_inv * h_inv; + +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f && h2 == 0.) + error("Interacting particles with 0 distance and 0 softening."); + + /* Check that particles have been drifted to the current time */ + if (gpj->ti_drift != e->ti_current) + error("gpj not drifted to current time"); +#endif + + /* Interact! */ + float f_ij, pot_ij; + runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, + &f_ij, &pot_ij); + + /* Store it back */ + a_x += f_ij * dx; + a_y += f_ij * dy; + a_z += f_ij * dz; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter */ + accumulate_inc_ll(&gparts_i[i].num_interacted); +#endif + +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + /* Update the p2p interaction counter */ + accumulate_inc_ll(&gparts_i[i].num_interacted_p2p); + gparts_i[i].a_grav_p2p[0] += a_x; + gparts_i[i].a_grav_p2p[1] += a_y; + gparts_i[i].a_grav_p2p[2] += a_z; +#endif + } + } + + /* Store everything back in cache */ + cache_i->a_x[i] += a_x; + cache_i->a_y[i] += a_y; + cache_i->a_z[i] += a_z; + cache_i->pot[i] += pot; + } + + /* Write back to the particle data */ +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + lock_lock(&ci->grav.plock); +#endif + gravity_cache_write_back(cache_i, ci->grav.parts, gcount_i); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell"); +#endif +} + /** * @brief Compute the non-truncated gravity interactions between all particles * of a cell and the particles of the other cell. @@ -281,9 +672,9 @@ static INLINE void runner_dopair_grav_pp_full( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); - accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); - accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); + gparts_i[pid].a_grav_p2p[0] += a_x; + gparts_i[pid].a_grav_p2p[1] += a_y; + gparts_i[pid].a_grav_p2p[2] += a_z; #endif } } @@ -437,9 +828,9 @@ static INLINE void runner_dopair_grav_pp_truncated( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); - accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); - accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); + gparts_i[pid].a_grav_p2p[0] += a_x; + gparts_i[pid].a_grav_p2p[1] += a_y; + gparts_i[pid].a_grav_p2p[2] += a_z; #endif } } @@ -488,7 +879,12 @@ static INLINE void runner_dopair_grav_pm_full( SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded_i, VEC_SIZE); + const float multi_epsilon = multi_j->max_softening; + /* Loop over all particles in ci... */ +#ifndef SWIFT_DEBUG_CHECKS +#pragma omp simd +#endif for (int pid = 0; pid < gcount_padded_i; pid++) { /* Skip inactive particles */ @@ -521,7 +917,7 @@ static INLINE void runner_dopair_grav_pm_full( const float z_i = z[pid]; /* Some powers of the softening length */ - const float h_i = epsilon[pid]; + const float h_i = max(epsilon[pid], multi_epsilon); const float h_inv_i = 1.f / h_i; /* Distance to the Multipole */ @@ -539,16 +935,9 @@ static INLINE void runner_dopair_grav_pm_full( const float r2 = dx * dx + dy * dy + dz * dz; #ifdef SWIFT_DEBUG_CHECKS - const float r_max_j = cj->grav.multipole->r_max; - const float r_max2 = r_max_j * r_max_j; - const float theta_crit2 = e->gravity_properties->theta_crit2; - - /* Note: 0.99 and 1.1 to avoid FP rounding false-positives */ - if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i)) - error( - "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " - "%e], rmax=%e r=%e epsilon=%e", - CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j, sqrtf(r2), h_i); + if (!gravity_M2P_accept(e->gravity_properties, &gparts_i[pid], + cj->grav.multipole, r2 * 1.01, periodic)) + error("use_mpole[i] set when M2P accept fails"); #endif /* Interact! */ @@ -574,9 +963,9 @@ static INLINE void runner_dopair_grav_pm_full( if (pid < gcount_i) { accumulate_add_ll(&gparts_i[pid].num_interacted_m2p, cj->grav.multipole->m_pole.num_gpart); - accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); - accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); - accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); + gparts_i[pid].a_grav_m2p[0] += f_x; + gparts_i[pid].a_grav_m2p[1] += f_y; + gparts_i[pid].a_grav_m2p[2] += f_z; } #endif } @@ -633,7 +1022,12 @@ static INLINE void runner_dopair_grav_pm_truncated( SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded_i, VEC_SIZE); + const float multi_epsilon = multi_j->max_softening; + /* Loop over all particles in ci... */ +#ifndef SWIFT_DEBUG_CHECKS +#pragma omp simd +#endif for (int pid = 0; pid < gcount_padded_i; pid++) { /* Skip inactive particles */ @@ -666,7 +1060,7 @@ static INLINE void runner_dopair_grav_pm_truncated( const float z_i = z[pid]; /* Some powers of the softening length */ - const float h_i = epsilon[pid]; + const float h_i = max(epsilon[pid], multi_epsilon); const float h_inv_i = 1.f / h_i; /* Distance to the Multipole */ @@ -682,16 +1076,9 @@ static INLINE void runner_dopair_grav_pm_truncated( const float r2 = dx * dx + dy * dy + dz * dz; #ifdef SWIFT_DEBUG_CHECKS - const float r_max_j = cj->grav.multipole->r_max; - const float r_max2 = r_max_j * r_max_j; - const float theta_crit2 = e->gravity_properties->theta_crit2; - - /* 0.99 and 1.1 to avoid FP rounding false-positives */ - if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i)) - error( - "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " - "%e], rmax=%e", - CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j); + if (!gravity_M2P_accept(e->gravity_properties, &gparts_i[pid], + cj->grav.multipole, r2 * 1.01, /*periodic=*/1)) + error("use_mpole[i] set when M2P accept fails"); #endif /* Interact! */ @@ -717,9 +1104,9 @@ static INLINE void runner_dopair_grav_pm_truncated( if (pid < gcount_i) { accumulate_add_ll(&gparts_i[pid].num_interacted_m2p, cj->grav.multipole->m_pole.num_gpart); - accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); - accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); - accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); + gparts_i[pid].a_grav_m2p[0] += f_x; + gparts_i[pid].a_grav_m2p[1] += f_y; + gparts_i[pid].a_grav_m2p[2] += f_z; } #endif } @@ -742,7 +1129,7 @@ static INLINE void runner_dopair_grav_pm_truncated( * @param ci The first #cell. * @param cj The other #cell. * @param symmetric Are we updating both cells (1) or just ci (0) ? - * @param allow_mpole Are we allowing the use of P2M interactions ? + * @param allow_mpole Are we allowing the use of M2P interactions ? */ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, const int symmetric, const int allow_mpole) { @@ -767,6 +1154,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, if (!ci_active && !cj_active) return; if (!ci_active && !symmetric) return; +#ifdef SWIFT_DEBUG_CHECKS /* Check that we are not doing something stupid */ if (ci->split || cj->split) error("Running P-P on splitable cells"); @@ -777,6 +1165,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, error("Un-drifted multipole"); if (ci_active && cj->grav.ti_old_multipole != e->ti_current) error("Un-drifted multipole"); +#endif /* Caches to play with */ struct gravity_cache *const ci_cache = &r->ci_gravity_cache; @@ -789,8 +1178,6 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, /* Recover the multipole info and shift the CoM locations */ const float rmax_i = ci->grav.multipole->r_max; const float rmax_j = cj->grav.multipole->r_max; - const float rmax2_i = rmax_i * rmax_i; - const float rmax2_j = rmax_j * rmax_j; const struct multipole *multi_i = &ci->grav.multipole->m_pole; const struct multipole *multi_j = &cj->grav.multipole->m_pole; const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]), @@ -815,13 +1202,18 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, gcount_j); #endif + const int allow_multipole_i = allow_mpole && ci->grav.count > 1; + const int allow_multipole_j = allow_mpole && cj->grav.count > 1; + /* Fill the caches */ - gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, + gravity_cache_populate(e->max_active_bin, allow_multipole_j, periodic, dim, ci_cache, ci->grav.parts, gcount_i, gcount_padded_i, - shift_i, CoM_j, rmax2_j, ci, e->gravity_properties); - gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, + shift_i, CoM_j, cj->grav.multipole, ci, + e->gravity_properties); + gravity_cache_populate(e->max_active_bin, allow_multipole_i, periodic, dim, cj_cache, cj->grav.parts, gcount_j, gcount_padded_j, - shift_j, CoM_i, rmax2_i, cj, e->gravity_properties); + shift_j, CoM_i, ci->grav.multipole, cj, + e->gravity_properties); /* Can we use the Newtonian version or do we need the truncated one ? */ if (!periodic) { @@ -837,7 +1229,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, ci->grav.parts, cj->grav.parts); /* Then the M2P */ - if (allow_mpole) + if (allow_multipole_j) runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, periodic, dim, e, ci->grav.parts, gcount_i, cj); @@ -850,7 +1242,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, cj->grav.parts, ci->grav.parts); /* Then the M2P */ - if (allow_mpole) + if (allow_multipole_i) runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, periodic, dim, e, cj->grav.parts, gcount_j, ci); @@ -859,8 +1251,14 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, } else { /* Periodic BC */ /* Get the relative distance between the CoMs */ - const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1], - CoM_j[2] - CoM_i[2]}; + double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1], + CoM_j[2] - CoM_i[2]}; + + /* Correct for periodic BCs */ + dx[0] = nearestf(dx[0], dim[0]); + dx[1] = nearestf(dx[1], dim[1]); + dx[2] = nearestf(dx[2], dim[2]); + const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Get the maximal distance between any two particles */ @@ -880,7 +1278,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, ci->grav.parts, cj->grav.parts); /* Then the M2P */ - if (allow_mpole) + if (allow_multipole_j) runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j, dim, r_s_inv, e, ci->grav.parts, gcount_i, cj); @@ -893,7 +1291,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, cj->grav.parts, ci->grav.parts); /* Then the M2P */ - if (allow_mpole) + if (allow_multipole_i) runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i, multi_i, dim, r_s_inv, e, cj->grav.parts, gcount_j, ci); @@ -912,7 +1310,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, ci->grav.parts, cj->grav.parts); /* Then the M2P */ - if (allow_mpole) + if (allow_multipole_j) runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, periodic, dim, e, ci->grav.parts, gcount_i, cj); @@ -925,7 +1323,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, cj->grav.parts, ci->grav.parts); /* Then the M2P */ - if (allow_mpole) + if (allow_multipole_i) runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, periodic, dim, e, cj->grav.parts, gcount_j, ci); @@ -933,14 +1331,89 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, } } - /* Write back to the particles */ - if (ci_active) gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); - if (cj_active && symmetric) + /* Write back to the particles in ci */ + if (ci_active) { +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + lock_lock(&ci->grav.plock); +#endif + gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell"); +#endif + } + + /* Write back to the particles in cj */ + if (cj_active && symmetric) { +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + lock_lock(&cj->grav.plock); +#endif gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + if (lock_unlock(&cj->grav.plock) != 0) error("Error unlocking cell"); +#endif + } TIMER_TOC(timer_dopair_grav_pp); } +/** + * @brief Compute the gravitational forces from particles in #cell cj onto + * particles in #cell ci without using a cache for cj. + * + * This function does not update the particles in cj. It also does not + * make use of the field tensors in ci. + * The function recurses to the leaf level in ci (not cj!) and then either uses + * M2P or P2P when too close. + * + * @param r The #runner object. + * @param ci The cell containing particles to update. + * @param cj The cell containing the particles sourcing the gravity. + */ +void runner_dopair_grav_pp_no_cache(struct runner *r, struct cell *restrict ci, + const struct cell *restrict cj) { + + /* Recover some useful constants */ + const struct engine *e = r->e; + const int periodic = e->mesh->periodic; + const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], + (float)e->mesh->dim[2]}; + + /* Record activity status */ + const int ci_active = + cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID); + + /* Anything to do here? */ + if (!ci_active) return; + if (ci->grav.count == 0 || cj->grav.count == 0) return; + + /* Recurse? */ + if (ci->split) { + + for (int k = 0; k < 8; ++k) { + if (ci->progeny[k] != NULL) { + runner_dopair_grav_pp_no_cache(r, ci->progeny[k], cj); + } + } + + } else { + + /* Can we use the Newtonian version or do we need the truncated one ? */ + if (!periodic) { + + runner_dopair_grav_pp_full_no_cache( + ci->grav.parts, ci->grav.count, cj->grav.parts, cj->grav.count, e, + e->gravity_properties, &r->ci_gravity_cache, ci, cj->grav.multipole); + + } else { + + runner_dopair_grav_pp_truncated_no_cache( + ci->grav.parts, ci->grav.count, cj->grav.parts, cj->grav.count, dim, + e, e->gravity_properties, &r->ci_gravity_cache, ci, + cj->grav.multipole); + } + } +} + /** * @brief Compute the non-truncated gravity interactions between all particles * of a cell and the particles of the other cell. @@ -1062,9 +1535,9 @@ static INLINE void runner_doself_grav_pp_full( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x); - accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y); - accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z); + gparts[pid].a_grav_p2p[0] += a_x; + gparts[pid].a_grav_p2p[1] += a_y; + gparts[pid].a_grav_p2p[2] += a_z; #endif } } @@ -1201,9 +1674,9 @@ static INLINE void runner_doself_grav_pp_truncated( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x); - accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y); - accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z); + gparts[pid].a_grav_p2p[0] += a_x; + gparts[pid].a_grav_p2p[1] += a_y; + gparts[pid].a_grav_p2p[2] += a_z; #endif } } @@ -1295,8 +1768,14 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { } } - /* Write back to the particles */ + /* Write back to the particles */ +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + lock_lock(&c->grav.plock); +#endif gravity_cache_write_back(ci_cache, c->grav.parts, gcount); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + if (lock_unlock(&c->grav.plock) != 0) error("Error unlocking cell"); +#endif TIMER_TOC(timer_doself_grav_pp); } @@ -1571,15 +2050,19 @@ void runner_dopair_recursive_grav_pm(struct runner *r, struct cell *ci, /* Recover the multipole info and the CoM locations */ const struct multipole *multi_j = &cj->grav.multipole->m_pole; - const float r_max = cj->grav.multipole->r_max; const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]), (float)(cj->grav.multipole->CoM[1]), (float)(cj->grav.multipole->CoM[2])}; +#ifdef SWIFT_DEBUG_CHECKS + if (cj->grav.count == 1) + error("Constructing cache for M2P interaction with multipole of size 0!"); +#endif + /* Fill the cache */ gravity_cache_populate_all_mpole( e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i, - gcount_padded_i, ci, CoM_j, r_max * r_max, e->gravity_properties); + gcount_padded_i, ci, CoM_j, cj->grav.multipole, e->gravity_properties); /* Can we use the Newtonian version or do we need the truncated one ? */ if (!periodic) { @@ -1595,8 +2078,14 @@ void runner_dopair_recursive_grav_pm(struct runner *r, struct cell *ci, cj); } - /* Write back to the particles */ + /* Write back to the particles */ +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + lock_lock(&ci->grav.plock); +#endif gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell"); +#endif } } @@ -1616,14 +2105,13 @@ void runner_dopair_recursive_grav_pm(struct runner *r, struct cell *ci, * @param gettimer Are we timing this ? */ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, - struct cell *cj, int gettimer) { + struct cell *cj, const int gettimer) { /* Some constants */ const struct engine *e = r->e; const int nodeID = e->nodeID; const int periodic = e->mesh->periodic; const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; - const double theta_crit2 = e->gravity_properties->theta_crit2; const double max_distance = e->mesh->r_cut_max; /* Anything to do here? */ @@ -1700,18 +2188,25 @@ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, /* OK, we actually need to compute this pair. Let's find the cheapest * option... */ - /* Can we use M-M interactions ? */ - if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2, - multi_i->m_pole.max_softening, - multi_j->m_pole.max_softening)) { + if (ci->grav.count <= 1 || cj->grav.count <= 1) { + + /* We have two cheap cells. Go P-P. */ + runner_dopair_grav_pp_no_cache(r, ci, cj); + runner_dopair_grav_pp_no_cache(r, cj, ci); + + /* Can we use M-M interactions ? */ + } else if (gravity_M2L_accept_symmetric(e->gravity_properties, multi_i, + multi_j, r2, + /* use_rebuild_sizes=*/0, periodic)) { /* Go M-M */ runner_dopair_grav_mm(r, ci, cj); + /* Did we reach the bottom? */ } else if (!ci->split && !cj->split) { /* We have two leaves. Go P-P. */ - runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles*/ 1); + runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles=*/1); } else { @@ -1783,7 +2278,7 @@ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, * @param gettimer Are we timing this ? */ void runner_doself_recursive_grav(struct runner *r, struct cell *c, - int gettimer) { + const int gettimer) { /* Some constants */ const struct engine *e = r->e; @@ -1834,13 +2329,13 @@ void runner_doself_recursive_grav(struct runner *r, struct cell *c, * @param ci The #cell of interest. * @param timer Are we timing this ? */ -void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { +void runner_do_grav_long_range(struct runner *r, struct cell *ci, + const int timer) { /* Some constants */ const struct engine *e = r->e; const int periodic = e->mesh->periodic; const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; - const double theta_crit2 = e->gravity_properties->theta_crit2; const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max; TIMER_TIC; @@ -1866,12 +2361,6 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { struct cell *top = ci; while (top->parent != NULL) top = top->parent; - /* Recover the top-level multipole (for distance checks) */ - struct gravity_tensors *const multi_top = top->grav.multipole; - const double CoM_rebuild_top[3] = {multi_top->CoM_rebuild[0], - multi_top->CoM_rebuild[1], - multi_top->CoM_rebuild[2]}; - /* Loop over all the top-level cells and go for a M-M interaction if * well-separated */ for (int n = 0; n < nr_cells_with_particles; ++n) { @@ -1916,24 +2405,8 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { } } - /* Get the distance between the CoMs at the last rebuild*/ - double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0]; - double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1]; - double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2]; - - /* Apply BC */ - if (periodic) { - dx_r = nearest(dx_r, dim[0]); - dy_r = nearest(dy_r, dim[1]); - dz_r = nearest(dz_r, dim[2]); - } - const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r; - - /* Are we in charge of this cell pair? */ - if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild, - theta_crit2, r2_rebuild, - multi_top->m_pole.max_softening, - multi_j->m_pole.max_softening)) { + if (cell_can_use_pair_mm(top, cj, e, e->s, /*use_rebuild_data=*/1, + /*is_tree_walk=*/0)) { /* Call the PM interaction fucntion on the active sub-cells of ci */ runner_dopair_grav_mm_nonsym(r, ci, cj); diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 34f3e9ec147574357620cc8f485889b87880f06e..1afa2e0f5df900aea498a859b9672625bde773e7 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -27,6 +27,9 @@ struct cell; void runner_do_grav_down(struct runner *r, struct cell *c, int timer); +void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, + const int symmetric, const int allow_mpole); + void runner_doself_recursive_grav(struct runner *r, struct cell *c, int gettimer); diff --git a/src/runner_others.c b/src/runner_others.c index 4db77738670e66c5e072bdfbf247e7cfbfadfc53..6f0c4ac9e8c078285714fe89f9dce7deb6b7a001 100644 --- a/src/runner_others.c +++ b/src/runner_others.c @@ -130,8 +130,14 @@ void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) { if (c->progeny[k] != NULL) runner_do_grav_mesh(r, c->progeny[k], 0); } else { - /* Get the forces from the gravity mesh */ + /* Get the forces from the gravity mesh */ +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + lock_lock(&c->grav.plock); +#endif pm_mesh_interpolate_forces(e->mesh, e, gparts, gcount); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + if (lock_unlock(&c->grav.plock) != 0) error("Error unlocking cell"); +#endif } if (timer) TIMER_TOC(timer_dograv_mesh); diff --git a/src/runner_time_integration.c b/src/runner_time_integration.c index edb307442cfae0d3fc466706fc3c2f3ab024521f..d966800fc989e6553de3f0fb8b7c4760a1c98bb8 100644 --- a/src/runner_time_integration.c +++ b/src/runner_time_integration.c @@ -32,6 +32,7 @@ #include "engine.h" #include "feedback.h" #include "kick.h" +#include "multipole.h" #include "timers.h" #include "timestep.h" #include "timestep_limiter.h" diff --git a/src/scheduler.c b/src/scheduler.c index df6f908a3f620d9d6abb3c9e07d179d2d30b1f96..bb380e5a51f478fb53762e805ef1bbbdb0fceec7 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -872,8 +872,10 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { for (int j = 0; j < 8; j++) { if (cj->progeny[j] != NULL) { /* Can we use a M-M interaction here? */ - if (cell_can_use_pair_mm_rebuild(ci->progeny[i], - cj->progeny[j], e, sp)) { + if (cell_can_use_pair_mm(ci->progeny[i], cj->progeny[j], e, + sp, /*use_rebuild_data=*/1, + /*is_tree_walk=*/1)) { + /* Flag this pair as being treated by the M-M task. * We use the 64 bits in the task->flags field to store * this information. The corresponding taks will unpack diff --git a/src/space.c b/src/space.c index 5bdf075f40bb9c7e3a18da3330e7ec095b9be212..156d34372ffed29e0608b30bd16513771d915087 100644 --- a/src/space.c +++ b/src/space.c @@ -3661,10 +3661,8 @@ void space_split_recursive(struct space *s, struct cell *c, c->grav.multipole->CoM_rebuild[1] = c->grav.multipole->CoM[1]; c->grav.multipole->CoM_rebuild[2] = c->grav.multipole->CoM[2]; - /* We know the first-order multipole (dipole) is 0. */ - c->grav.multipole->m_pole.M_100 = 0.f; - c->grav.multipole->m_pole.M_010 = 0.f; - c->grav.multipole->m_pole.M_001 = 0.f; + /* Compute the multipole power */ + gravity_multipole_compute_power(&c->grav.multipole->m_pole); } /* Deal with gravity */ } /* Split or let it be? */ @@ -3803,6 +3801,9 @@ void space_split_recursive(struct space *s, struct cell *c, gravity_P2M(c->grav.multipole, c->grav.parts, c->grav.count, e->gravity_properties); + /* Compute the multipole power */ + gravity_multipole_compute_power(&c->grav.multipole->m_pole); + } else { /* No gparts in that leaf cell */ diff --git a/src/task.c b/src/task.c index 7844be685292463eb0e240bd58b1fa8ba061edbe..70741af01bbf869ec22b1f5fd8ac5c076784db2a 100644 --- a/src/task.c +++ b/src/task.c @@ -443,7 +443,6 @@ void task_unlock(struct task *t) { break; case task_type_drift_gpart: - case task_type_grav_mesh: case task_type_end_grav_force: cell_gunlocktree(ci); break; @@ -538,6 +537,12 @@ void task_unlock(struct task *t) { #endif break; + case task_type_grav_mesh: +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + cell_gunlocktree(ci); +#endif + break; + case task_type_star_formation: cell_unlocktree(ci); cell_sunlocktree(ci); @@ -623,7 +628,6 @@ int task_lock(struct task *t) { case task_type_drift_gpart: case task_type_end_grav_force: - case task_type_grav_mesh: if (ci->grav.phold) return 0; if (cell_glocktree(ci) != 0) return 0; break; @@ -800,6 +804,14 @@ int task_lock(struct task *t) { #endif break; + case task_type_grav_mesh: +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + /* Lock the gparts */ + if (ci->grav.phold) return 0; + if (cell_glocktree(ci) != 0) return 0; +#endif + break; + case task_type_star_formation: /* Lock the gas, gravity and star particles */ if (ci->hydro.hold || ci->stars.hold || ci->grav.phold) return 0; diff --git a/tests/Makefile.am b/tests/Makefile.am index e24a2a69b07d9ebf8ff10f367f7eaf0724817f7c..347de263b05836f2579739f3666833736b64968f 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -15,35 +15,35 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Add the source directory and the non-standard paths to the included library headers to CFLAGS -AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) +AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(NUMA_INCS) $(OPENMP_CFLAGS) AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS) $(NUMA_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(GSL_LIBS) $(PROFILER_LIBS) # List of programs and scripts to run in the test suite -TESTS = testGreetings testMaths testReading.sh testKernel \ - testActivePair.sh test27cells.sh test27cellsPerturbed.sh \ +TESTS = testGreetings testMaths testReading.sh testKernel testKernelLongGrav \ + testActivePair.sh test27cells.sh test27cellsPerturbed.sh testExp \ testParser.sh test125cells.sh test125cellsPerturbed.sh testFFT \ - testAdiabaticIndex testRandom testRandomSpacing \ + testAdiabaticIndex testRandom testRandomSpacing testErfc \ testMatrixInversion testThreadpool testDump testLogger testInteractions.sh \ testVoronoi1D testVoronoi2D testVoronoi3D testGravityDerivatives \ testPeriodicBC.sh testPeriodicBCPerturbed.sh testPotentialSelf \ testPotentialPair testEOS testUtilities testSelectOutput.sh \ testCbrt testCosmology testOutputList testFormat.sh \ test27cellsStars.sh test27cellsStarsPerturbed.sh testHydroMPIrules \ - testAtomic + testAtomic testGravitySpeed # List of test programs to compile -check_PROGRAMS = testGreetings testReading testTimeIntegration \ +check_PROGRAMS = testGreetings testReading testTimeIntegration testKernelLongGrav \ testActivePair test27cells test27cells_subset test125cells testParser \ - testKernel testFFT testInteractions testMaths testRandom \ - testSymmetry testThreadpool testRandomSpacing \ + testKernel testFFT testInteractions testMaths testRandom testExp \ + testSymmetry testThreadpool testRandomSpacing testErfc \ testAdiabaticIndex testRiemannExact testRiemannTRRS \ testRiemannHLLC testMatrixInversion testDump testLogger \ testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \ testGravityDerivatives testPotentialSelf testPotentialPair testEOS testUtilities \ testSelectOutput testCbrt testCosmology testOutputList test27cellsStars \ test27cellsStars_subset testCooling testComovingCooling testFeedback testHashmap \ - testAtomic testHydroMPIrules + testAtomic testHydroMPIrules testGravitySpeed # Rebuild tests when SWIFT is updated. $(check_PROGRAMS): ../src/.libs/libswiftsim.a @@ -96,6 +96,8 @@ testParser_SOURCES = testParser.c testKernel_SOURCES = testKernel.c +testKernelLongGrav_SOURCES = testKernelLongGrav.c + testFFT_SOURCES = testFFT.c testInteractions_SOURCES = testInteractions.c @@ -122,8 +124,14 @@ testDump_SOURCES = testDump.c testLogger_SOURCES = testLogger.c +testExp_SOURCES = testExp.c + +testErfc_SOURCES = testErfc.c + testGravityDerivatives_SOURCES = testGravityDerivatives.c +testGravitySpeed_SOURCES = testGravitySpeed.c + testPotentialSelf_SOURCES = testPotentialSelf.c testPotentialPair_SOURCES = testPotentialPair.c diff --git a/tests/testErfc.c b/tests/testErfc.c new file mode 100644 index 0000000000000000000000000000000000000000..052b7ec0bea4a32cc15816b8c1558043e0383341 --- /dev/null +++ b/tests/testErfc.c @@ -0,0 +1,97 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include "../config.h" + +#include "swift.h" + +/* Standard includes */ +#include <fenv.h> +#include <math.h> + +/** + * Compute erfcf(u) using eq. 7.1.26 of + * Abramowitz & Stegun, 1972. + * + * This has a *relative* error of less than 4e-3 over + * the range of interest (0 < x < 5) + */ +float optimized_erfcf(const float x) { + + const float x2 = x * x; + const float exp_x2 = expf(-x2); + + const float t = 1.f / (1.f + 0.3275911f * x); + + const float a1 = 0.254829592f; + const float a2 = -0.284496736f; + const float a3 = 1.421413741f; + const float a4 = -1.453152027; + const float a5 = 1.061405429f; + + /* a1 * t + a2 * t^2 + a3 * t^3 + a4 * t^4 + a5 * t^5 */ + float a = a5 * t + a4; + a = a * t + a3; + a = a * t + a2; + a = a * t + a1; + a = a * t; + + return a * exp_x2; +} + +/** + * @brief Check that a and b are consistent (up to some relative error) + * + * @param a First value + * @param b Second value + * @param s String used to identify this check in messages + */ +void check_value(const double a, const double b, const double rel_tol, + const double abs_tol, const double x) { + + if (fabs(a - b) / fabs(a + b) > rel_tol) + error("Values are inconsistent: %12.15e %12.15e rel=%e (for x=%e).", a, b, + fabs(a - b) / fabs(a + b), x); + if (fabs(a - b) > abs_tol) + error("Values are inconsistent: %12.15e %12.15e abs=%e (for x=%e).", a, b, + fabs(a - b), x); +} + +int main(int argc, char* argv[]) { + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + +/* Choke on FPEs */ +#ifdef HAVE_FE_ENABLE_EXCEPT + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + + /* Loop over some values */ + for (float x = 0.f; x < 5.f; x += 0.000001f) { + + const double exact = erfc(x); + const double swift_erfcf = optimized_erfcf(x); + + check_value(exact, swift_erfcf, 3.358e-3, 6e-7, x); + } + + return 0; +} diff --git a/tests/testExp.c b/tests/testExp.c new file mode 100644 index 0000000000000000000000000000000000000000..01a65b8ccc677e5be9627ba40fe71c0bfbc6bb70 --- /dev/null +++ b/tests/testExp.c @@ -0,0 +1,70 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include "../config.h" + +#include "swift.h" + +/* Standard includes */ +#include <fenv.h> +#include <math.h> + +/** + * @brief Check that a and b are consistent (up to some relative error) + * + * @param a First value + * @param b Second value + * @param s String used to identify this check in messages + */ +void check_value(double a, double b, const double tol, const double x) { + if (fabs(a - b) / fabs(a + b) > tol) + error("Values are inconsistent: %12.15e %12.15e rel=%e (for x=%e).", a, b, + fabs(a - b) / fabs(a + b), x); +} + +int main(int argc, char* argv[]) { + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + +/* Choke on FPEs */ +#ifdef HAVE_FE_ENABLE_EXCEPT + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + + /* Get some randomness going */ + const int seed = time(NULL); + message("Seed = %d", seed); + srand(seed); + + /* Loop over some values */ + for (float x = 0.; x < 32.; x += 0.000001) { + + const double exact_p = exp(x); + const double exact_n = exp(-x); + const double swift_exp_p = optimized_expf(x); + const double swift_exp_n = optimized_expf(-x); + + check_value(exact_p, swift_exp_p, 1.618e-6, x); + check_value(exact_n, swift_exp_n, 1.618e-6, x); + } + + return 0; +} diff --git a/tests/testGravitySpeed.c b/tests/testGravitySpeed.c new file mode 100644 index 0000000000000000000000000000000000000000..dc5e8bc2530fbad5c4616f4a006bed3627614139 --- /dev/null +++ b/tests/testGravitySpeed.c @@ -0,0 +1,326 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "../config.h" + +/* Some standard headers. */ +#include <fenv.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* Local headers. */ +#include "runner_doiact_grav.h" +#include "swift.h" + +const int num_M2L_runs = 1 << 23; +const int num_M2P_runs = 1 << 23; +const int num_PP_runs = 1; // << 8; + +void make_cell(struct cell *c, int N, const double loc[3], double width, + int id_base, const struct gravity_props *grav_props) { + + bzero(c, sizeof(struct cell)); + + /* Start by setting the basics */ + c->loc[0] = loc[0]; + c->loc[1] = loc[1]; + c->loc[2] = loc[2]; + c->width[0] = width; + c->width[1] = width; + c->width[2] = width; + + /* Initialise the locks */ + lock_init(&c->grav.plock); + lock_init(&c->grav.mlock); + + /* Set the time bins */ + c->grav.ti_end_min = 1; + c->grav.ti_end_max = 1; + c->grav.ti_beg_max = 1; + c->grav.ti_old_part = 1; + c->grav.ti_old_multipole = 1; + + /* Create the particles */ + c->grav.count = N; + c->grav.count_total = N; + c->grav.parts = malloc(N * sizeof(struct gpart)); + bzero(c->grav.parts, N * sizeof(struct gpart)); + for (int i = 0.; i < N; ++i) { + + c->grav.parts[i].id_or_neg_offset = id_base + i; + c->grav.parts[i].x[0] = loc[0] + width * rand() / ((double)RAND_MAX); + c->grav.parts[i].x[1] = loc[1] + width * rand() / ((double)RAND_MAX); + c->grav.parts[i].x[2] = loc[2] + width * rand() / ((double)RAND_MAX); + c->grav.parts[i].mass = 1.; + c->grav.parts[i].type = swift_type_dark_matter; + c->grav.parts[i].time_bin = 1; + } + + /* Create the multipoles */ + c->grav.multipole = malloc(sizeof(struct gravity_tensors)); + gravity_reset(c->grav.multipole); + gravity_P2M(c->grav.multipole, c->grav.parts, N, grav_props); + gravity_multipole_compute_power(&c->grav.multipole->m_pole); +} + +int main(int argc, char *argv[]) { + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + + /* Choke on FPEs */ +#ifdef HAVE_FE_ENABLE_EXCEPT + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + + /* Get some randomness going */ + const int seed = time(NULL); + message("Seed = %d", seed); + srand(seed); + + /* Construct gravity properties */ + struct gravity_props grav_props; + bzero(&grav_props, sizeof(struct gravity_props)); + grav_props.use_advanced_MAC = 1; + grav_props.use_adaptive_tolerance = 1; + grav_props.adaptive_tolerance = 1e-4; + grav_props.theta_crit = 0.5; + grav_props.G_Newton = 1.; + grav_props.mesh_size = 64; + grav_props.a_smooth = 1.25; + + /* Space properites */ + const double dim[3] = {100., 100., 100.}; + const double r_s = grav_props.a_smooth * dim[0] / grav_props.mesh_size; + const double r_s_inv = 1. / r_s; + + /* Mesh structure */ + struct pm_mesh mesh; + mesh.periodic = 0; + mesh.dim[0] = dim[0]; + mesh.dim[1] = dim[1]; + mesh.dim[2] = dim[2]; + + /* Construct an engine */ + struct engine e; + e.mesh = &mesh; + e.max_active_bin = 56; + + /* Construct a runner */ + struct runner r; + r.e = &e; + + /* Construct two cells */ + struct cell ci; + struct cell cj; + const double loc_i[3] = {0., 0., 0.}; + const double loc_j[3] = {1., 1., 1.}; + const int num_particles = 8; + make_cell(&ci, num_particles, loc_i, 1., 0, &grav_props); + make_cell(&cj, num_particles, loc_j, 1., num_particles, &grav_props); + + message("Number of runs: %d", num_M2L_runs); + + /* Construct arrays of multipoles to prevent too much optimization */ + struct gravity_tensors *tensors_i = + malloc(num_M2L_runs * sizeof(struct gravity_tensors)); + struct gravity_tensors *tensors_j = + malloc(num_M2L_runs * sizeof(struct gravity_tensors)); + for (int n = 0; n < num_M2L_runs; ++n) { + + memcpy(&tensors_i[n], ci.grav.multipole, sizeof(struct gravity_tensors)); + memcpy(&tensors_j[n], cj.grav.multipole, sizeof(struct gravity_tensors)); + + /* Move the values a bit to prevent optimization in the actual loops */ + tensors_i[n].CoM[0] += rand() / ((double)RAND_MAX); + tensors_i[n].CoM[1] += rand() / ((double)RAND_MAX); + tensors_i[n].CoM[1] += rand() / ((double)RAND_MAX); + + tensors_j[n].CoM[0] += rand() / ((double)RAND_MAX); + tensors_j[n].CoM[1] += rand() / ((double)RAND_MAX); + tensors_j[n].CoM[1] += rand() / ((double)RAND_MAX); + + tensors_i[n].m_pole.M_000 += rand() / ((double)RAND_MAX); + tensors_j[n].m_pole.M_000 += rand() / ((double)RAND_MAX); + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + tensors_i[n].m_pole.M_200 += rand() / ((double)RAND_MAX); + tensors_i[n].m_pole.M_020 += rand() / ((double)RAND_MAX); + tensors_i[n].m_pole.M_002 += rand() / ((double)RAND_MAX); + + tensors_j[n].m_pole.M_200 += rand() / ((double)RAND_MAX); + tensors_j[n].m_pole.M_020 += rand() / ((double)RAND_MAX); + tensors_j[n].m_pole.M_002 += rand() / ((double)RAND_MAX); +#endif + } + + /* Now run a series of M2L kernels */ + + /******** + * Symmetric non-periodic M2L + ********/ + ticks tic = getticks(); + for (int n = 0; n < num_M2L_runs; ++n) { + + gravity_M2L_symmetric(&tensors_i[n].pot, // + &tensors_j[n].pot, // + &tensors_i[n].m_pole, // + &tensors_j[n].m_pole, // + tensors_i[n].CoM, // + tensors_j[n].CoM, // + &grav_props, /* periodic=*/0, dim, r_s_inv); + } + ticks toc = getticks(); + message("%30s at order %d took %4d %s.", "Symmetric non-periodic M2L", + SELF_GRAVITY_MULTIPOLE_ORDER, + (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns"); + + /******** + * Symmetric periodic M2L + ********/ + tic = getticks(); + for (int n = 0; n < num_M2L_runs; ++n) { + + gravity_M2L_symmetric(&tensors_i[n].pot, // + &tensors_j[n].pot, // + &tensors_i[n].m_pole, // + &tensors_j[n].m_pole, // + tensors_i[n].CoM, // + tensors_j[n].CoM, // + &grav_props, /* periodic=*/1, dim, r_s_inv); + } + toc = getticks(); + message("%30s at order %d took %4d %s.", "Symmetric periodic M2L", + SELF_GRAVITY_MULTIPOLE_ORDER, + (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns"); + + /******** + * Non-symmetric non-periodic M2L + ********/ + tic = getticks(); + for (int n = 0; n < num_M2L_runs; ++n) { + + gravity_M2L_nonsym(&tensors_i[n].pot, // + &tensors_j[n].m_pole, // + tensors_i[n].CoM, // + tensors_j[n].CoM, // + &grav_props, /* periodic=*/0, dim, r_s_inv); + } + toc = getticks(); + message("%30s at order %d took %4d %s.", "Non-symmetric non-periodic M2L", + SELF_GRAVITY_MULTIPOLE_ORDER, + (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns"); + + /******** + * Non-symmetric periodic M2L + ********/ + tic = getticks(); + for (int n = 0; n < num_M2L_runs; ++n) { + + gravity_M2L_nonsym(&tensors_i[n].pot, // + &tensors_j[n].m_pole, // + tensors_i[n].CoM, // + tensors_j[n].CoM, // + &grav_props, /* periodic=*/1, dim, r_s_inv); + } + toc = getticks(); + message("%30s at order %d took %4d %s.", "Non-symmetric periodic M2L", + SELF_GRAVITY_MULTIPOLE_ORDER, + (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns"); + + /* Now run a series of M2L kernels */ + + /******** + * Non-periodic M2P + ********/ + tic = getticks(); + for (int n = 0; n < num_M2P_runs; ++n) { + + const int index = n % num_particles; + + const float r_x = tensors_j[n].CoM[0] - ci.grav.parts[index].x[0]; + const float r_y = tensors_j[n].CoM[1] - ci.grav.parts[index].x[1]; + const float r_z = tensors_j[n].CoM[2] - ci.grav.parts[index].x[2]; + const float r2 = r_x * r_x + r_y * r_y + r_z * r_z; + const float eps = gravity_get_softening(&ci.grav.parts[index], &grav_props); + + struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f}; + gravity_M2P(&tensors_j[n].m_pole, r_x, r_y, r_z, r2, eps, + /*periodic=*/0, r_s_inv, &l); + + ci.grav.parts[index].a_grav[0] += l.F_100; + ci.grav.parts[index].a_grav[1] += l.F_010; + ci.grav.parts[index].a_grav[2] += l.F_001; + } + toc = getticks(); + message("%30s at order %d took %4d %s.", "Non-periodic M2P", + SELF_GRAVITY_MULTIPOLE_ORDER, + (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2P_runs), "ns"); + + /******** + * Periodic M2P + ********/ + tic = getticks(); + for (int n = 0; n < num_M2P_runs; ++n) { + + const int index = n % num_particles; + + const float r_x = tensors_j[n].CoM[0] - ci.grav.parts[index].x[0]; + const float r_y = tensors_j[n].CoM[1] - ci.grav.parts[index].x[1]; + const float r_z = tensors_j[n].CoM[2] - ci.grav.parts[index].x[2]; + const float r2 = r_x * r_x + r_y * r_y + r_z * r_z; + const float eps = gravity_get_softening(&ci.grav.parts[index], &grav_props); + + struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f}; + gravity_M2P(&tensors_j[n].m_pole, r_x, r_y, r_z, r2, eps, + /*periodic=*/1, r_s_inv, &l); + + ci.grav.parts[index].a_grav[0] += l.F_100; + ci.grav.parts[index].a_grav[1] += l.F_010; + ci.grav.parts[index].a_grav[2] += l.F_001; + } + toc = getticks(); + message("%30s at order %d took %4d %s.", "Periodic M2P", + SELF_GRAVITY_MULTIPOLE_ORDER, + (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2P_runs), "ns"); + + /* Print out to avoid optimization */ + // gravity_field_tensors_print(&ci.grav.multipole->pot); + // gravity_field_tensors_print(&cj.grav.multipole->pot); + + tic = getticks(); + for (int n = 0; n < num_PP_runs; ++n) { + runner_dopair_grav_pp(&r, &ci, &cj, 1, 0); + } + toc = getticks(); + message("%30s at order %d took %4d %s.", "dopair_grav (no mpole)", + SELF_GRAVITY_MULTIPOLE_ORDER, + (int)(1e6 * clocks_from_ticks(toc - tic) / num_PP_runs), "ns"); + + tic = getticks(); + runner_dopair_grav_pp(&r, &ci, &cj, 1, 1); + toc = getticks(); + message("%30s at order %d took %4d %s.", "dopair_grav (mpole)", + SELF_GRAVITY_MULTIPOLE_ORDER, + (int)(1e6 * clocks_from_ticks(toc - tic) / num_PP_runs), "ns"); + + return 0; +} diff --git a/tests/testKernelLongGrav.c b/tests/testKernelLongGrav.c new file mode 100644 index 0000000000000000000000000000000000000000..cda2df9c2a5f7c670fb033120495dd37634e662d --- /dev/null +++ b/tests/testKernelLongGrav.c @@ -0,0 +1,118 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include "../config.h" + +#include "swift.h" + +/* Standard includes */ +#include <fenv.h> +#include <math.h> + +const int num_tests = 1 << 10; + +/** + * @brief Check that a and b are consistent (up to some relative error) + * + * @param a First value + * @param b Second value + * @param s String used to identify this check in messages + */ +void check_value(double a, double b, const char* s, const double tol, + const double r, const double r_s) { + if (fabs(a - b) / fabs(a + b) > tol) + error( + "Values are inconsistent: %12.15e %12.15e rel=%e (%s for r_s=%e " + "r/r_s=%e)!", + a, b, fabs(a - b) / fabs(a + b), s, r_s, r / r_s); +} + +int main(int argc, char* argv[]) { + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + +/* Choke on FPEs */ +#ifdef HAVE_FE_ENABLE_EXCEPT + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + + /* Get some randomness going */ + const int seed = time(NULL); + message("Seed = %d", seed); + srand(seed); + + for (int n = 0; n < num_tests; ++n) { + + const double r_s = exp10(4. * rand() / ((double)RAND_MAX) - 2.); + const double r_s_inv = 1.f / r_s; + + // message("Testing r_s=%e", r_s); + + /* Loop over some radii */ + for (double i = -4; i < 1; i += 0.001) { + + /* Get a radius in the relevant range */ + const double r = exp10(i) * r_s; + + if (r > 5. * r_s) break; + + /* Compute the SWIFT expressions */ + struct chi_derivatives chi_swift; + kernel_long_grav_derivatives((float)r, (float)r_s_inv, &chi_swift); + + /* Compute the exact expressions */ + const double one_over_sqrt_pi = M_2_SQRTPI * 0.5; + const double u = 0.5 * r / r_s; + const double C = one_over_sqrt_pi * exp(-u * u); + + const double chi_0 = erfc(u); + const double chi_1 = -C / r_s; + const double chi_2 = C * 0.5 * r * pow(r_s, -3.); + const double chi_3 = C * 0.25 * (2. * r_s * r_s - r * r) * pow(r_s, -5.); + const double chi_4 = + C * 0.125 * (r * r * r - 6. * r_s * r_s * r) * pow(r_s, -7.); + const double chi_5 = + C * 0.0625 * + (12. * pow(r_s, 4.) - 12. * r_s * r_s * r * r + pow(r, 4.)) * + pow(r_s, -9.); + + check_value(chi_swift.chi_0, chi_0, "chi_0", 3.4e-3, r, r_s); + check_value(chi_swift.chi_1, chi_1, "chi_1", 1e-5, r, r_s); + check_value(chi_swift.chi_2, chi_2, "chi_2", 1e-5, r, r_s); + check_value(chi_swift.chi_3, chi_3, "chi_3", 1e-4, r, r_s); + check_value(chi_swift.chi_4, chi_4, "chi_4", 4e-4, r, r_s); + check_value(chi_swift.chi_5, chi_5, "chi_5", 4e-4, r, r_s); + + /* Compute the expression for individual particles */ + float swift_corr_f_lr, swift_corr_pot_lr; + kernel_long_grav_eval(r / r_s, &swift_corr_f_lr, &swift_corr_pot_lr); + + /* And the exact ones */ + const double corr_pot = erfc(u); + const double corr_f = erfc(u) + M_2_SQRTPI * u * exp(-u * u); + + check_value(swift_corr_pot_lr, corr_pot, "corr_pot", 3.4e-3, r, r_s); + check_value(swift_corr_f_lr, corr_f, "corr_f", 2.4e-4, r, r_s); + } + } + + return 0; +} diff --git a/tests/testPotentialPair.c b/tests/testPotentialPair.c index d5fbda36a9ef79352f79627b5cef908030401da2..866d248c7ffe8ddbb735017977fffe8aa1ae40ee 100644 --- a/tests/testPotentialPair.c +++ b/tests/testPotentialPair.c @@ -124,7 +124,7 @@ int main(int argc, char *argv[]) { e.mesh = &mesh; struct gravity_props props; - props.theta_crit2 = 0.; + props.theta_crit = 0.; props.epsilon_DM_cur = eps; props.epsilon_baryon_cur = eps; e.gravity_properties = &props; @@ -261,7 +261,7 @@ int main(int argc, char *argv[]) { /**********************************/ /* Set an opening angle that allows P-M interactions */ - props.theta_crit2 = 1.; + props.theta_crit = 1.; ci.grav.parts[0].mass = 0.; ci.grav.multipole->CoM[0] = 0.; diff --git a/theory/Multipoles/fmm_mac.tex b/theory/Multipoles/fmm_mac.tex new file mode 100644 index 0000000000000000000000000000000000000000..0396246fa9911af1eba4a9642c1b140d96b131e8 --- /dev/null +++ b/theory/Multipoles/fmm_mac.tex @@ -0,0 +1,213 @@ +\subsection{The multipole acceptance criterion} + +The main remaining question is to decide when two cells are far enough from +each others that the truncated Taylor expansion used as approximation for +the potential (eq. \ref{eq:fmm:expansion}) is accurate enough. The +criterion used to make that decision is called the \emph{multipole + acceptance criterion} (MAC). \\ +We know that (\ref{eq:fmm:expansion}) is converging towards the correct +answer provided $1>|\mathbf{r}_a + \mathbf{r}_b| / |\mathbf{R}|$. This is +hence the most basic (and always necessary) MAC that can be designed. If +this ratio is lower, the accuracy (at a fixed expansion order) is improved +and it is hence common practice to define a critical \emph{opening angle} +$\theta_{\rm cr}$ and allow the use of the multipole approximation between +two cells if + +\begin{equation} + \theta_{\rm cr} > \frac{\rho_A + \rho_B} {|\mathbf{R}|}. + \label{eq:fmm:angle} +\end{equation} +This lets users have a second handle on the accuracy on the gravity +calculation besides the much more involved change in the expansion order +$p$ of the FMM method. Typical values for the opening angle are in the +range $[0.3, 0.7]$, with the cost of the simulation growing as $\theta_{\rm + cr}$ decreases. \\ +This method has the drawback of using a uniform criterion across the entire +simulation volume and time evolution, which means that the chosen value of +$\theta_{\rm cr}$ could be too small in some regions (leading to too many +operations for the expected accuracy) and too large in some other other +ones (leading to a lower level of accuracy than expected). \swift instead +uses a more adaptive criterion to decide when the multipole approximation +can be used. This is based on the error analysis of FMM by +\cite{Dehnen2014} and is summarised below for completeness. The key idea is +to exploit the additional information about the distribution of particles +that is encoded in the higher-order multipole terms.\\ +We start by defining the scalar quantity $P_{A,n}$, the +\emph{power} of the multipole of order $n$ of the particles in cell $A$, +via +\begin{equation} + P_{A,n}^2 = \sum_{|\mathbf{m}|=n} \frac{\mathbf{m}!}{|\mathbf{m}|!}\mathsf{M}_{A,\mathbf{m}}^2, +\end{equation} +where the sum runs over all the multipole terms of order $n$ in the +cell\footnote{Note that $P_{0} \equiv \mathsf{M}_{(0,0,0)}$ is + just the mass of the cell and since \swift uses the centre of mass as the + centre of expansion of the multipoles, $P_{1} = 0$.}. This +quantity is a simple upper bound for the amplitude of the multipole +($\mathsf{M}_{A, \mathbf{m}} < P_{A,|\mathbf{m}|}/|\mathbf{m}|!$) +and can hence be used to estimate the importance of the terms of a given +order in the Taylor series of the potential. Following \cite{Dehnen2014} we +then consider a sink cell $A$ and a source cell $B$ (figure \ref{fig:fmm:cells}) for which we evaluate +at order $p$ the scalar +\begin{equation} + E_{BA,p} = \frac{1}{M_B|\mathbf{R}|^p} \sum_{n=0}^p \binom{p}{n} P_{B,n} + \rho_A^{p-n}, + \label{eq:fmm:e_ab} +\end{equation} +with $M_B \equiv \mathsf{M}_{B,(0,0,0)}$, the sum of the mass of the +particles in cell $B$. Note that since $P_{B,n} \leq M_B +\rho_B^n$, we have $E_{BA, p} \leq \left((\rho_A + +\rho_B)/|\mathbf{R}|\right)^p$, where the right-hand side is the +expression used in the basic opening angle condition +(\ref{eq:fmm:angle}). We finally scale the $E_{BA,p}$'s by the relative +size of the two cells to define the error estimator $\tilde{E}_{BA,p}$: +\begin{equation} + \tilde{E}_{BA,p} = 8\frac{\max(\rho_A, \rho_B)}{\rho_A + \rho_B}E_{BA,p}. + \label{eq:fmm:e_ab_tilde} +\end{equation} +As shown by \cite{Dehnen2014}, these quantities are excellent estimators of +the error made in computing the accelerations between two cells using the +M2L and M2P kernels at a given order. We can hence use this property to +design a new MAC by demanding that the estimated acceleration error is no +larger than a certain fraction of the smallest acceleration in the sink +cell $A$. This means we can use the FMM approximation between to +approximate the accelerations in cell $A$ due to the particles in cell $B$ if +\begin{equation} + \tilde{E}_{BA,p} \frac{M_B}{|\mathbf{R}|^2} < \epsilon_{\rm FMM} \min_{a\in + A}\left(|\mathbf{a}_a|\right) \quad \rm{and} \quad \frac{\rho_A + + \rho_B} {|\mathbf{R}|} < 1, + \label{eq:fmm:mac} +\end{equation} +where the $\mathbf{a}_a$ are the accelerations of the particles in cell $A$ +and $\epsilon_{\rm FMM}$ is a tolerance parameter. Since this is self-referencing +(i.e. we need the accelerations to decide how to compute the +accelerations), we need to use a an estimator of $|\mathbf{a}_a|$. In +\swift, we follow the strategy used by \gadget and use the acceleration of +the previous time-step\footnote{On the first time-step of a simulation this + value has not been computed yet. We hence run a fake 0th time-step with + the simpler MAC (eq. \ref{eq:fmm:angle}), which is good enough to obtain + approximations of the accelerations.}. The minimal norm of the +acceleration in a given cell can be computed at the same time as the P2M +and M2M kernels are evaluated in the tree construction phase. The second +condition in (\ref{eq:fmm:mac}) is necessary to ensure the convergence of the +Taylor expansion.\\ +One important difference between this criterion and the purely +geometric one (\ref{eq:fmm:angle}) is that it is not symmetric in $A +\leftrightarrow B$ (i.e. $E_{AB,p} \neq E_{BA,p}$). This implies that +there are cases where a multipole in cell $A$ can be used to compute +the field tensors in cell $B$ but the multipole in $B$ cannot be used +to compute the $\mathsf{F}$ values of cell $A$ and vice versa. This +affects the tree walk by breaking the symmetry and potentially leading +to cells of different sizes interacting. \\ +For the M2P kernel, the sink is a single particle $a$ and hence +$\rho_A = 0$, which simplifies some of the expressions above. In this +case, at order $p$, we get: +\begin{equation} + E_{BA,p} = \frac{P_{B,p}}{M_B |\mathbf{R}|^p}, \qquad + \tilde{E}_{BA,p} = 8E_{BA,p} \nonumber +\end{equation} +Note that, in this case, only the power term of the order of the +scheme appears; not a sum over the lower-order ones. This leads to the +following MAC for the M2P kernel: +\begin{equation} + 8\frac{P_{B,p}}{|\mathbf{R}|^{p+2}} < \epsilon_{\rm FMM} |\mathbf{a}_a| \quad + \rm{and} \quad \frac{\rho_B} {|\mathbf{R}|} < 1. + \label{eq:fmm:mac_m2p} +\end{equation} +The value of $\epsilon_{\rm FMM}$ could in principle be different than the one +used for the M2L MAC. One special case is of particular interest to +link our expression to other results. Using the expression for order +$2$ and the approximation $P_{B,p} \approx M_B \rho_B^p$, we +get +\begin{equation} + 8\frac{M_B}{|\mathbf{R}|^2}\left(\frac{\rho_B}{|\mathbf{R}|}\right)^2 + < \epsilon_{\rm FMM} |\mathbf{a}_a| \nonumber +\end{equation} +for our MAC. This is the same expression as the adaptive opening +angle used by \gadget \cite[see eq.18 of][]{Springel2005} up to +numerical factors and definition of the size of a multipole ($\rho$ +vs. the cell edge). Note, however, that, in practice, since formally +$P_{B,p} \leq M_B \rho_B^p$, the dependence is slightly +different.\\ +We conclude this section by noting that whilst the derivation of the +FMM equations and of the simple geometric MAC (eq. \ref{eq:fmm:angle}) +do not make any assumptions about the functional form of $\varphi(r)$, +the more advanced MAC is valid in the specific case of the +gravitational potential $\varphi(r) = m/r$ as can be inferred from the +$m/r^2$ term appearing on the LHS of the criteria (\ref{eq:fmm:mac}) +and (\ref{eq:fmm:mac_m2p}). + +\subsubsection{Modifications for softened and truncated gravity} + +\begin{figure} +\includegraphics[width=\columnwidth]{mac_potential.pdf} +\caption{The gravitational forces $f_{\rm SWIFT}$ computed by SWIFT + (green line) including the force softening on the smallest scales + and the long-range periodic mesh truncation on the largest scales + for a simulation box of size $L$, a mesh scale-length $r_s$ and + Plummer-equivalent softening $\epsilon_{\rm Plummer}$. The + approximate fast estimator of the forces used in the MAC $f_{\rm + MAC}$ is shown using yellow dash-dotted lines. Note that, by + construction, $f_{\rm SWIFT} \leq f_{\rm MAC} \leq 1/r^2$ for all + distances $r$.} +\label{fig:fmm:mac_potential} +\end{figure} + +One drawback of using expression (\ref{eq:fmm:mac}) in the case of a +softened potential (or a potential truncated to apply long-range +forces from a mesh (Sec. \ref{ssec:mesh_summary}) is that the $M/R^2$ +term will overestimate the expected contribution from the multipole to +the filed tensors, sometimes by large factors. This difference is +shown on fig. \ref{fig:fmm:mac_potential}, with for instance a ratio +of $3$ between the true forces and the Newtonian values reached a the +scale of the Plummer softening. Using the simple expression +(\ref{eq:fmm:mac}) will make the MAC too aggressive by preventing it +from using a given multipole as it will be difficult to make the large +term $M/R^2$ be below the fixed fraction $\epsilon_{\rm FMM}$ of the +total acceleration of the receiving cell. This implies more +computation as it will force the tree-walk algorithm to use more +interactions by going to the daughter cells. The estimation of the +contribution of the multipole in the MAC should hence be replaced by a +more realistic term, closer to the one actually used in the +interactions (eq. \ref{eq:fmm:force_norm}). In simulations with +periodic boundary conditions, the same reasoning applies to the +truncated force at the radii overlapping with the scale $r_s$ of the +mesh forces. + +However, both the short- and long-range truncation functions are +expensive to evaluate in the context of the MAC which is called a +large number of times during a tree walk. We hence, construct a +cheaper to evaluate estimator $f_{\rm MAC}$ that is closer to the true +forces than the purely Newtonian term: +\begin{align} +f_{\rm MAC}(r) = +\left\lbrace\begin{array}{rcl} + \left(\frac{9}{5}\right)^2 H^{-2} & \mbox{if} & r < + \frac{5}{9}H,\\ + r^{-2} & \mbox{if} & \frac{5}{9}H \leq r < \frac{5}{3}r_s, \\ + \left(\frac{5}{3}\right)^2 r_s^2 r^{-4} & \mbox{if} & \frac{5}{3}r_s \leq r. \\ +\end{array} +\right. +\label{eq:fmm:f_mac} +\end{align} +Since it is made of constants and even powers of the distance, +computin this term is much cheaper than the true forces. This +esimator is shown as a dot-dashed line on +Fig. \ref{fig:fmm:mac_potential} and obeys the relation $f_{\rm + SWIFT}(r) \leq f_{\rm MAC}(r) \leq 1/r^2$, with $f_{\rm SWIFT}(r)$ +being the true truncated and softened norm of the gravity forces the +code solves for (green line). We use this expression in the multipole +acceptance criterion instead of the $1/|\mathbf{R}|$ term: +\begin{equation} + \tilde{E}_{BA,p} M_Bf_{\rm MAC}(|\mathbf{R}|) < \epsilon_{\rm FMM} \min_{a\in + A}\left(|\mathbf{a}_a|\right). + \label{eq:fmm:mac_f_mac} +\end{equation} +The same change is applied to the MAC used for the M2P kernel +(eq. \ref{eq:fmm:mac_m2p}). In the non-truncated un-softened case, +their expressions reduce to the \citep{Dehnen2014} one. Using this +$f_{\rm MAC}$ instead of the simpler purely-Newtonian one only makes a +difference in simulations where a lot of particles cluster below the +scale of the softening, which is often the case for hydrodynamical +simulations including radiative cooling processes. The use of this +term over the simpler $1/r^2$ estimator is a runtime parameter. + diff --git a/theory/Multipoles/fmm_standalone.tex b/theory/Multipoles/fmm_standalone.tex index 1b597fa636650cd09469b9952f7a14bdf22ce35f..81509e1e78ae14ce144dcaff011a87d606c2ae2a 100644 --- a/theory/Multipoles/fmm_standalone.tex +++ b/theory/Multipoles/fmm_standalone.tex @@ -22,7 +22,6 @@ \label{firstpage} \begin{abstract} -Making gravity great again. \end{abstract} \begin{keywords} @@ -35,6 +34,7 @@ Making gravity great again. \input{fmm_summary} %\input{gravity_derivatives} \input{mesh_summary} +\input{fmm_mac} \input{exact_forces} \bibliographystyle{mnras} diff --git a/theory/Multipoles/fmm_summary.tex b/theory/Multipoles/fmm_summary.tex index c263c241673094756966ffc9e5eae7111dd843ea..f3d0ee6f05a8c1e3bd664386bdca25547813fbce 100644 --- a/theory/Multipoles/fmm_summary.tex +++ b/theory/Multipoles/fmm_summary.tex @@ -179,29 +179,49 @@ the other kernels: a_x(\mathbf{x}_a) &= G\sum_{\mathbf{m}}^p \mathsf{M}_{\mathbf{m}} \mathsf{D}_{\mathbf{m}+\left(1,0,0\right)}(\mathbf{R}). \label{eq:fmm:M2P} \end{align} -A traditional tree-code uses solely that kernel to obtain the forces -from the multipoles (or often just monopoles, i.e. setting $p=0$ throughout) -to the particles.\\ -All the kernels (Eqs.~\ref{eq:fmm:P2M}-\ref{eq:fmm:M2P}) are rather -straightforward to evaluate as they are only made of additions and -multiplications (provided $\mathsf{D}$ can be evaluated quickly), -which are extremely efficient instructions on modern architectures -(see Appendix \ref{sec:pot_derivatives} for the full -expressions). However, the fully expanded sums can lead to rather -large and prone to typo expressions. To avoid any mishaps, we use a -\texttt{python} script to generate C code in which all the sums are -unrolled and correct by construction. In \swift, we implemented the -kernels up to order $p=5$, as it proved to be accurate enough for our -purpose, but this could be extended to higher order easily. This -implies storing $56$ numbers per cell for each $\textsf{M}$ and -$\textsf{F}$ plus three numbers for the location of the centre of -mass. For leaf-cells with large numbers of particles, as in \swift, -this is a small memory overhead. One further small improvement -consists in choosing $\mathbf{z}_A$ to be the centre of mass of cell -$A$ rather than its geometrical centre. The first order multipoles +A traditional tree-code uses solely that kernel to obtain the forces from +the multipoles (or often just monopoles, i.e. setting $p=0$ throughout) to +the particles. Similarly, the field tensor of a cell can receive the +contribution from a single particle at a distance $\mathbf{R} \equiv +\mathbf{z}_A - \mathbf{x}_b$ via the P2L kernel: +\begin{equation} + \mathsf{F}_{\mathbf{n}}(\mathbf{z}_A) = G m_b + \mathsf{D}_{\mathbf{n}}(\mathbf{R}). + \label{eq:fmm:P2L} +\end{equation} +The M2P and P2L kernels can be used to speed up the calculations +involving only as single particle. All the kernels +(Eqs.~\ref{eq:fmm:P2M}-\ref{eq:fmm:P2L}) are rather straightforward to +evaluate as they are only made of additions and multiplications +(provided $\mathsf{D}$ can be evaluated quickly), which are extremely +efficient instructions on modern architectures (see Appendix +\ref{sec:pot_derivatives} for the full expressions). However, the +fully expanded sums can lead to rather large and prone to typo +expressions. To avoid any mishaps, we use a \texttt{python} script to +generate C code in which all the sums are unrolled and correct by +construction. In \swift, we implemented the kernels up to order $p=5$, +as it proved to be accurate enough for our purpose, but this could be +extended to higher order easily. This implies storing $56$ numbers per +cell for each $\textsf{M}$ and $\textsf{F}$ plus three numbers for the +location of the centre of mass. For leaf-cells with large numbers of +particles, as in \swift, this is a small memory overhead. One further +small improvement consists in choosing $\mathbf{z}_A$ to be the centre +of mass of cell $A$ rather than its geometrical centre. The first +order multipoles ($\mathsf{M}_{100},\mathsf{M}_{010},\mathsf{M}_{001}$) then vanish by construction. This allows us to simplify some of the expressions and helps reduce, albeit by a small fraction, the memory footprint of the tree structure. -\subsubsection{The Multipole acceptance criterion} +\subsubsection{Computing the accelerations via a tree-walk} + +We define the maximal distance between a centre of mass of a cell $B$ and +any particle in that cell as + +\begin{equation} + \rho_B = \max_{b \in B}(|\mathbf{r}_b|) +\end{equation} +\\ +\textcolor{red}{MORE WORDS HERE} + + diff --git a/theory/Multipoles/generate_multipoles/multipoles.py b/theory/Multipoles/generate_multipoles/multipoles.py index ef263d09f22e0186bf3ae2e9572cb89cf156f8a0..d1fc6ad4db3d7e137f39a563ee5de1050fa01938 100644 --- a/theory/Multipoles/generate_multipoles/multipoles.py +++ b/theory/Multipoles/generate_multipoles/multipoles.py @@ -1,390 +1,496 @@ import numpy as np import sys + def factorial(x): if x == 0: return 1 else: - return x * factorial(x-1) + return x * factorial(x - 1) + + +SUFFIXES = {1: "st", 2: "nd", 3: "rd"} + -SUFFIXES = {1: 'st', 2: 'nd', 3: 'rd'} def ordinal(num): - suffix = SUFFIXES.get(num % 10, 'th') + suffix = SUFFIXES.get(num % 10, "th") return str(num) + suffix + # Get the order order = int(sys.argv[1]) -print "-------------------------------------------------" -print "Generating code for multipoles of order", order, "(only)." -print "-------------------------------------------------\n" +print("-------------------------------------------------") +print("Generating code for multipoles of order", order, "(only).") +print("-------------------------------------------------\n") -print "-------------------------------------------------" -print "Multipole structure:" -print "-------------------------------------------------\n" +print("-------------------------------------------------") +print("Multipole structure:") +print("-------------------------------------------------\n") if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1) + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1)) + +print("/* %s order terms */" % ordinal(order)) -print "/* %s order terms */"%ordinal(order) - # Create all the terms relevent for this order -for i in range(order+1): - for j in range(order+1): - for k in range(order+1): +for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): if i + j + k == order: - print "float M_%d%d%d;"%(i,j,k) + print("float M_%d%d%d;" % (i, j, k)) if order > 0: - print "#endif" + print("#endif") -print "" -print "-------------------------------------------------" +print("") +print("-------------------------------------------------") -print "Field tensor structure:" -print "-------------------------------------------------\n" +print("Field tensor structure:") +print("-------------------------------------------------\n") if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1) + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1)) + +print("/* %s order terms */" % ordinal(order)) -print "/* %s order terms */"%ordinal(order) - # Create all the terms relevent for this order -for i in range(order+1): - for j in range(order+1): - for k in range(order+1): +for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): if i + j + k == order: - print "float F_%d%d%d;"%(i,j,k) + print("float F_%d%d%d;" % (i, j, k)) if order > 0: - print "#endif" + print("#endif") -print "" -print "-------------------------------------------------" +print("") +print("-------------------------------------------------") -print "gravity_field_tensors_add():" -print "-------------------------------------------------\n" +print("gravity_field_tensors_add():") +print("-------------------------------------------------\n") if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1) + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1)) + +print("/* %s order terms */" % ordinal(order)) -print "/* %s order terms */"%ordinal(order) - # Create all the terms relevent for this order -for i in range(order+1): - for j in range(order+1): - for k in range(order+1): +for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): if i + j + k == order: - print "la->F_%d%d%d += lb->F_%d%d%d;"%(i,j,k,i,j,k) + print("la->F_%d%d%d += lb->F_%d%d%d;" % (i, j, k, i, j, k)) if order > 0: - print "#endif" + print("#endif") -print "" -print "-------------------------------------------------" +print("") +print("-------------------------------------------------") -print "gravity_multipole_add():" -print "-------------------------------------------------\n" +print("gravity_multipole_add():") +print("-------------------------------------------------\n") if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1) + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1)) + +print("/* %s order terms */" % ordinal(order)) -print "/* %s order terms */"%ordinal(order) - # Create all the terms relevent for this order -for i in range(order+1): - for j in range(order+1): - for k in range(order+1): +for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): if i + j + k == order: - print "ma->M_%d%d%d += mb->M_%d%d%d;"%(i,j,k,i,j,k) + print("ma->M_%d%d%d += mb->M_%d%d%d;" % (i, j, k, i, j, k)) + if order > 0: - print "#endif" + print("#endif") -print "" -print "-------------------------------------------------" +print("") +print("-------------------------------------------------") -print "gravity_multipole_equal():" -print "-------------------------------------------------\n" +print("gravity_multipole_equal():") +print("-------------------------------------------------\n") if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1) + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1)) # Create all the terms relevent for this order -print "/* Manhattan Norm of %s order terms */"%ordinal(order) -print "const float order%d_norm = "%order, +print("/* Manhattan Norm of %s order terms */" % ordinal(order)) +print("const float order%d_norm = " % order, end=" ") first = True -for i in range(order+1): - for j in range(order+1): - for k in range(order+1): +for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): if i + j + k == order: if first: first = False else: - print "+", - print "fabsf(ma->M_%d%d%d)"%(i,j,k), - print "+ fabsf(mb->M_%d%d%d)"%(i,j,k), -print ";\n" -print "/* Compare %s order terms above 1%% of norm */"%ordinal(order) -for i in range(order+1): - for j in range(order+1): - for k in range(order+1): + print("+", end=" ") + print("fabsf(ma->M_%d%d%d)" % (i, j, k), end=" ") + print("+ fabsf(mb->M_%d%d%d)" % (i, j, k), end=" ") +print(";\n") +print("/* Compare %s order terms above 1%% of norm */" % ordinal(order)) +for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): if i + j + k == order: - print "if (fabsf(ma->M_%d%d%d + mb->M_%d%d%d) > 0.01f * order%d_norm &&"%(i,j,k,i,j,k,order) - print " fabsf(ma->M_%d%d%d - mb->M_%d%d%d) / fabsf(ma->M_%d%d%d + mb->M_%d%d%d) > tolerance) {"%(i,j,k,i,j,k,i,j,k,i,j,k) - print " message(\"M_%d%d%d term different\");"%(i,j,k) - print " return 0;" - print "}" + print( + "if (fabsf(ma->M_%d%d%d + mb->M_%d%d%d) > 0.01f * order%d_norm &&" + % (i, j, k, i, j, k, order) + ) + print( + " fabsf(ma->M_%d%d%d - mb->M_%d%d%d) / fabsf(ma->M_%d%d%d + mb->M_%d%d%d) > tolerance) {" + % (i, j, k, i, j, k, i, j, k, i, j, k) + ) + print(' message("M_%d%d%d term different");' % (i, j, k)) + print(" return 0;") + print("}") + +if order > 0: + print("#endif") + + +print("") +print("-------------------------------------------------") + +print("gravity_multipole_compute_power():") +print("-------------------------------------------------\n") if order > 0: - print "#endif" + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1)) +print("/* %s order terms */" % ordinal(order)) -print "" -print "-------------------------------------------------" +# Add the terms to the multipole power +for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): + if i + j + k == order: + fact1 = factorial(i) * factorial(j) * factorial(k) + fact2 = float(factorial(i + j + k)) + frac = fact1 / fact2 + if frac == 1.0: + print( + "power[%d] += m->M_%d%d%d * m->M_%d%d%d;" + % (order, i, j, k, i, j, k) + ) + else: + print( + "power[%d] += %12.15e * m->M_%d%d%d * m->M_%d%d%d;" + % (order, frac, i, j, k, i, j, k) + ) -print "gravity_P2M(): (loop)" -print "-------------------------------------------------\n" +print("") +print("m->power[%d] = sqrt(power[%d]);" % (order, order)) if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1) + print("#endif") + +print("") +print("-------------------------------------------------") + +print("gravity_P2M(): (loop)") +print("-------------------------------------------------\n") + +if order > 0: + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1)) + +print("/* %s order terms */" % ordinal(order)) -print "/* %s order terms */"%ordinal(order) - # Create all the terms relevent for this order -for i in range(order+1): - for j in range(order+1): - for k in range(order+1): +for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): if i + j + k == order: if order % 2 == 0: - print "M_%d%d%d += m * X_%d%d%d(dx);"%(i,j,k,i,j,k) + print("M_%d%d%d += m * X_%d%d%d(dx);" % (i, j, k, i, j, k)) else: - print "M_%d%d%d += -m * X_%d%d%d(dx);"%(i,j,k,i,j,k) + print("M_%d%d%d += -m * X_%d%d%d(dx);" % (i, j, k, i, j, k)) + if order > 0: - print "#endif" + print("#endif") -print "" -print "-------------------------------------------------" - -print "gravity_P2M(): (storing)" -print "-------------------------------------------------\n" +print("") +print("-------------------------------------------------") + +print("gravity_P2M(): (storing)") +print("-------------------------------------------------\n") if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1) + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1)) + +print("/* %s order terms */" % ordinal(order)) -print "/* %s order terms */"%ordinal(order) - # Create all the terms relevent for this order -for i in range(order+1): - for j in range(order+1): - for k in range(order+1): +for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): if i + j + k == order: - print "m->m_pole.M_%d%d%d = M_%d%d%d;"%(i,j,k,i,j,k) + print("m->m_pole.M_%d%d%d = M_%d%d%d;" % (i, j, k, i, j, k)) + if order > 0: - print "#endif" + print("#endif") -print "" -print "-------------------------------------------------" +print("") +print("-------------------------------------------------") -print "gravity_M2M():" -print "-------------------------------------------------\n" +print("gravity_M2M():") +print("-------------------------------------------------\n") if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d"%(order-1) + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d" % (order - 1)) + +print( + "/* Shift %s order terms (1st order mpole (all 0) commented out) */" + % ordinal(order) +) -print "/* Shift %s order terms */"%ordinal(order) - # Create all the terms relevent for this order -for i in range(order+1): - for j in range(order+1): - for k in range(order+1): +for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): if i + j + k == order: - print "m_a->M_%d%d%d = m_b->M_%d%d%d"%(i,j,k,i,j,k), - - for ii in range(order+1): - for jj in range(order+1): - for kk in range(order+1): + print("m_a->M_%d%d%d = m_b->M_%d%d%d" % (i, j, k, i, j, k), end=" ") + + for ii in range(order + 1): + for jj in range(order + 1): + for kk in range(order + 1): + + if not (ii == 0 and jj == 0 and kk == 0): + for iii in range(order + 1): + for jjj in range(order + 1): + for kkk in range(order + 1): + if ( + ii + iii == i + and jj + jjj == j + and kk + kkk == k + ): + if iii + jjj + kkk == 1: + print( + "/* + X_%d%d%d(dx) * m_b->M_%d%d%d */" + % (ii, jj, kk, iii, jjj, kkk), + end=" ", + ) + else: + print( + "+ X_%d%d%d(dx) * m_b->M_%d%d%d" + % (ii, jj, kk, iii, jjj, kkk), + end=" ", + ) + + print(";") - if not(ii == 0 and jj == 0 and kk == 0): - for iii in range(order+1): - for jjj in range(order+1): - for kkk in range(order+1): - if ii+iii == i and jj+jjj == j and kk+kkk == k: - print "+ X_%d%d%d(dx) * m_b->M_%d%d%d"%(ii, jj, kk, iii, jjj, kkk), - - - print ";" if order > 0: - print "#endif" + print("#endif") + - -print "" -print "-------------------------------------------------" +print("") +print("-------------------------------------------------") -print "gravity_M2L():" -print "-------------------------------------------------\n" +print("gravity_M2L():") +print("-------------------------------------------------\n") if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1) + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1)) # Loop over LHS order for l in range(order + 1): - print "/* Compute %s order field tensor terms (addition to rank %d) */"%(ordinal(order), l) - - for i in range(l+1): - for j in range(l+1): - for k in range(l+1): + print( + "/* Compute %s order field tensor terms (addition to rank %d) */" + % (ordinal(order), l) + ) + + for i in range(l + 1): + for j in range(l + 1): + for k in range(l + 1): if i + j + k == l: - print "l_b->F_%d%d%d +="%(i,j,k), + print("l_b->F_%d%d%d +=" % (i, j, k), end=" ") first = True - for ii in range(order+1): - for jj in range(order+1): - for kk in range(order+1): - if ii + jj + kk == order - l: + for ii in range(order + 1): + for jj in range(order + 1): + for kk in range(order + 1): + if ii + jj + kk == order - l: if first: first = False else: - print "+", - print "m_a->M_%d%d%d * D_%d%d%d(dx, dy, dz, r_inv)"%(ii,jj,kk,i+ii,j+jj,k+kk), - print ";" - print "" - + print("+", end=" ") + print( + "m_a->M_%d%d%d * D_%d%d%d(dx, dy, dz, r_inv)" + % (ii, jj, kk, i + ii, j + jj, k + kk), + end=" ", + ) + print(";") + print("") + if order > 0: - print "#endif" + print("#endif") -print "" -print "-------------------------------------------------" +print("") +print("-------------------------------------------------") -print "gravity_L2L():" -print "-------------------------------------------------\n" +print("gravity_P2L():") +print("-------------------------------------------------\n") if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1) + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1)) + +print("/* %s order contributions */" % ordinal(order)) # Loop over LHS order -for l in range(order + 1): - print "/* Shift %s order field tensor terms (addition to rank %d) */"%(ordinal(order), l) +for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): + if i + j + k == order: + print("l_b->F_%d%d%d += mass * pot.D_%d%d%d;" % (i, j, k, i, j, k)) + +if order > 0: + print("#endif") + + +print("") +print("-------------------------------------------------") - for i in range(l+1): - for j in range(l+1): - for k in range(l+1): +print("gravity_L2L():") +print("-------------------------------------------------\n") + +if order > 0: + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1)) + +# Loop over LHS order +for l in range(order + 1): + print( + "/* Shift %s order field tensor terms (addition to rank %d) */" + % (ordinal(order), l) + ) + + for i in range(l + 1): + for j in range(l + 1): + for k in range(l + 1): if i + j + k == l: - print "la->F_%d%d%d +="%(i,j,k), + print("la->F_%d%d%d +=" % (i, j, k), end=" ") first = True - for ii in range(order+1): - for jj in range(order+1): - for kk in range(order+1): - if ii + jj + kk == order - l: + for ii in range(order + 1): + for jj in range(order + 1): + for kk in range(order + 1): + if ii + jj + kk == order - l: if first: first = False else: - print "+", - print "X_%d%d%d(dx) * lb->F_%d%d%d"%(ii,jj,kk,i+ii,j+jj,k+kk), - print ";" - print "" - + print("+", end=" ") + print( + "X_%d%d%d(dx) * lb->F_%d%d%d" + % (ii, jj, kk, i + ii, j + jj, k + kk), + end=" ", + ) + print(";") + print("") + if order > 0: - print "#endif" + print("#endif") -print "" -print "-------------------------------------------------" +print("") +print("-------------------------------------------------") -print "gravity_L2P():" -print "-------------------------------------------------\n" +print("gravity_L2P():") +print("-------------------------------------------------\n") if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1) + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1)) - print "/* %s order contributions */"%(ordinal(order-1)) + print("/* %s order contributions */" % (ordinal(order - 1))) for r in range(3): - print "gp->a_grav[%d] +="%(r), + print("gp->a_grav[%d] +=" % (r), end=" ") first = True for i in range(order + 1): for j in range(order + 1): for k in range(order + 1): - if i + j + k == order-1: + if i + j + k == order - 1: if first: first = False else: - print "+", + print("+", end=" ") if r == 0: - ii = i+1 + ii = i + 1 jj = j kk = k if r == 1: ii = i - jj = j+1 + jj = j + 1 kk = k if r == 2: ii = i jj = j - kk = k+1 - print "X_%d%d%d(dx) * lb->F_%d%d%d"%(i,j,k,ii,jj,kk), - print ";" - - print "" + kk = k + 1 + print( + "X_%d%d%d(dx) * lb->F_%d%d%d" % (i, j, k, ii, jj, kk), + end=" ", + ) + print(";") + + print("") if order > 0: - print "#endif" + print("#endif") -print "" -print "-------------------------------------------------" +print("") +print("-------------------------------------------------") -print "gravity_M2P():" -print "-------------------------------------------------\n" +print("gravity_M2P():") +print("-------------------------------------------------\n") if order > 0: - print "#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n"%(order-1) - -print "/* %s order contributions */"%(ordinal(order)) + print("#if SELF_GRAVITY_MULTIPOLE_ORDER > %d\n" % (order - 1)) + +print("/* %s order contributions */" % (ordinal(order))) + - for r in range(4): if r == 0: - print "*f_x =", + print("*f_x =", end=" ") if r == 1: - print "*f_y =", + print("*f_y =", end=" ") if r == 2: - print "*f_z =", + print("*f_z =", end=" ") if r == 3: - print "*pot =", - + print("*pot =", end=" ") + first = True - for i in range(order+1): - for j in range(order+1): - for k in range(order+1): + for i in range(order + 1): + for j in range(order + 1): + for k in range(order + 1): if i + j + k == order: if first: first = False else: - print "+", + print("+", end=" ") if r == 0: - ii = i+1 + ii = i + 1 jj = j kk = k if r == 1: ii = i - jj = j+1 + jj = j + 1 kk = k if r == 2: ii = i jj = j - kk = k+1 + kk = k + 1 if r == 3: ii = i jj = j kk = k - print "m->M_%d%d%d * d.D_%d%d%d"%(i,j,k,ii,jj,kk), - - print ";" - -print "" + print("m->M_%d%d%d * d.D_%d%d%d" % (i, j, k, ii, jj, kk), end=" ") -if order > 0: - print "#endif" + print(";") + +print("") -print "" -print "-------------------------------------------------" +if order > 0: + print("#endif") +print("") +print("-------------------------------------------------") diff --git a/theory/Multipoles/mesh_summary.tex b/theory/Multipoles/mesh_summary.tex index 19524ee21b9ef85e45927182d6632dbf17ab3275..f66a9c36cb7f4a8c447b99016315fdf72cbef226 100644 --- a/theory/Multipoles/mesh_summary.tex +++ b/theory/Multipoles/mesh_summary.tex @@ -53,7 +53,7 @@ of particles that are in tree-leaves larger than $1/10$ of the mesh size or between two tree-leaves distant by more than that amount.\\ -MORE WORDS HERE.\\ +\textcolor{red}{MORE WORDS HERE.}\\ The truncation function in Fourier space reads diff --git a/theory/Multipoles/plot_derivatives.py b/theory/Multipoles/plot_derivatives.py index bd086608c1a8bd8874eb147cd3b42b6485468736..e5f2936dea81ce27a3d1d562239df94191922340 100644 --- a/theory/Multipoles/plot_derivatives.py +++ b/theory/Multipoles/plot_derivatives.py @@ -1,22 +1,23 @@ ############################################################################### - # This file is part of SWIFT. - # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) - # - # This program is free software: you can redistribute it and/or modify - # it under the terms of the GNU Lesser General Public License as published - # by the Free Software Foundation, either version 3 of the License, or - # (at your option) any later version. - # - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. - # - # You should have received a copy of the GNU Lesser General Public License - # along with this program. If not, see <http://www.gnu.org/licenses/>. - # - ############################################################################## +# This file is part of SWIFT. +# Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +############################################################################## import matplotlib + matplotlib.use("Agg") from pylab import * from scipy import integrate @@ -26,30 +27,31 @@ from matplotlib.font_manager import FontProperties import numpy import math -params = {'axes.labelsize': 9, -'axes.titlesize': 10, -'font.size': 10, -'legend.fontsize': 10, -'xtick.labelsize': 8, -'ytick.labelsize': 8, -'text.usetex': True, -'figure.figsize' : (3.15,3.15), -'figure.subplot.left' : 0.12, -'figure.subplot.right' : 0.99 , -'figure.subplot.bottom' : 0.065 , -'figure.subplot.top' : 0.99 , -'figure.subplot.wspace' : 0. , -'figure.subplot.hspace' : 0. , -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True +params = { + "axes.labelsize": 9, + "axes.titlesize": 10, + "font.size": 10, + "legend.fontsize": 10, + "xtick.labelsize": 8, + "ytick.labelsize": 8, + "text.usetex": True, + "figure.figsize": (3.15, 3.15), + "figure.subplot.left": 0.12, + "figure.subplot.right": 0.99, + "figure.subplot.bottom": 0.065, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.0, + "figure.subplot.hspace": 0.0, + "lines.markersize": 6, + "lines.linewidth": 3.0, + "text.latex.unicode": True, } rcParams.update(params) -rc('font',**{'family':'sans-serif','sans-serif':['Times']}) +rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]}) # Parameters -r_min = 0. -r_max = 10. +r_min = 0.0 +r_max = 10.0 r_s = 1.7 # Radius @@ -57,12 +59,12 @@ r = linspace(r_min, r_max, 401) w = 2 * r / r_s # Powers of alpha #################################################### -alpha = 1. / (1. + exp(w)) -alpha2 = alpha**2 -alpha3 = alpha**3 -alpha4 = alpha**4 -alpha5 = alpha**5 -alpha6 = alpha**6 +alpha = 1.0 / (1.0 + exp(w)) +alpha2 = alpha ** 2 +alpha3 = alpha ** 3 +alpha4 = alpha ** 4 +alpha5 = alpha ** 5 +alpha6 = alpha ** 6 figure() plot(w, alpha, label="$\\alpha^1$") @@ -72,11 +74,11 @@ plot(w, alpha4, label="$\\alpha^4$") plot(w, alpha5, label="$\\alpha^5$") plot(w, alpha6, label="$\\alpha^6$") -xlabel("w", labelpad=-4.) -ylabel("$\\alpha^n(w)$", labelpad=-4.) +xlabel("w", labelpad=-4.0) +ylabel("$\\alpha^n(w)$", labelpad=-4.0) xlim(0, 7.2) -ylim(0., 0.52) +ylim(0.0, 0.52) legend(loc="upper right") @@ -85,10 +87,17 @@ savefig("alpha_powers.pdf") # Derivatives of alpha ############################################### alpha_1 = -alpha + alpha2 -alpha_2 = alpha - 3.* alpha2 + 2.*alpha3 -alpha_3 = -alpha + 7.*alpha2 - 12.*alpha3 + 6.*alpha4 -alpha_4 = alpha - 15. * alpha2 + 50.*alpha3 - 60.*alpha4 + 24.*alpha5 -alpha_5 = -alpha + 31. * alpha2 - 180. * alpha3 + 390.*alpha4 - 360.*alpha5 + 120. * alpha6 +alpha_2 = alpha - 3.0 * alpha2 + 2.0 * alpha3 +alpha_3 = -alpha + 7.0 * alpha2 - 12.0 * alpha3 + 6.0 * alpha4 +alpha_4 = alpha - 15.0 * alpha2 + 50.0 * alpha3 - 60.0 * alpha4 + 24.0 * alpha5 +alpha_5 = ( + -alpha + + 31.0 * alpha2 + - 180.0 * alpha3 + + 390.0 * alpha4 + - 360.0 * alpha5 + + 120.0 * alpha6 +) figure() @@ -99,8 +108,8 @@ plot(w, alpha_3, label="$\\alpha^{(3)}$") plot(w, alpha_4, label="$\\alpha^{(4)}$") plot(w, alpha_5, label="$\\alpha^{(5)}$") -xlabel("w", labelpad=-4.) -ylabel("$\\alpha^{(n)}(w)$", labelpad=-5.) +xlabel("w", labelpad=-4.0) +ylabel("$\\alpha^{(n)}(w)$", labelpad=-5.0) xlim(0, 7.2) ylim(-0.26, 0.16) @@ -110,14 +119,13 @@ legend(loc="lower right") savefig("alpha_derivatives.pdf") - # Derivatives of sigma ############################################### sigma = exp(w) * alpha sigma_1 = exp(w) * alpha2 -sigma_2 = exp(w) * (2*alpha3 - alpha2) -sigma_3 = exp(w) * (6*alpha4 - 6*alpha3 + alpha2) -sigma_4 = exp(w) * (24*alpha5 -36*alpha4 + 14*alpha3 - alpha2) -sigma_5 = exp(w) * (120*alpha6 -240*alpha5 + 150*alpha4 - 30*alpha3 + alpha2) +sigma_2 = exp(w) * (2 * alpha3 - alpha2) +sigma_3 = exp(w) * (6 * alpha4 - 6 * alpha3 + alpha2) +sigma_4 = exp(w) * (24 * alpha5 - 36 * alpha4 + 14 * alpha3 - alpha2) +sigma_5 = exp(w) * (120 * alpha6 - 240 * alpha5 + 150 * alpha4 - 30 * alpha3 + alpha2) figure() @@ -128,8 +136,8 @@ plot(w, sigma_3, label="$\\sigma^{(3)}$") plot(w, sigma_4, label="$\\sigma^{(4)}$") plot(w, sigma_5, label="$\\sigma^{(5)}$") -xlabel("w", labelpad=-4.) -ylabel("$\\sigma^{(n)}(w)$", labelpad=-5.) +xlabel("w", labelpad=-4.0) +ylabel("$\\sigma^{(n)}(w)$", labelpad=-5.0) xlim(0, 7.2) ylim(-0.22, 1.02) @@ -139,20 +147,24 @@ legend(loc="center right") savefig("sigma_derivatives.pdf") - # Derivatives of chi ############################################### c1 = 2 / r_s -c2 = (2 / r_s)**2 -c3 = (2 / r_s)**3 -c4 = (2 / r_s)**4 -c5 = (2 / r_s)**5 +c2 = (2 / r_s) ** 2 +c3 = (2 / r_s) ** 3 +c4 = (2 / r_s) ** 4 +c5 = (2 / r_s) ** 5 chi = 2 - 2 * exp(w) * alpha chi_1 = -2 * c1 * exp(w) * alpha2 -chi_2 = -2 * c2 * exp(w) * (2*alpha3 - alpha2) -chi_3 = -2 * c3 * exp(w) * (6*alpha4 - 6*alpha3 + alpha2) -chi_4 = -2 * c4 * exp(w) * (24*alpha5 - 36*alpha4 + 14*alpha3 - alpha2) -chi_5 = -2 * c5 * exp(w) * (120*alpha6 - 240*alpha5 + 150*alpha4 - 30*alpha3 + alpha2) +chi_2 = -2 * c2 * exp(w) * (2 * alpha3 - alpha2) +chi_3 = -2 * c3 * exp(w) * (6 * alpha4 - 6 * alpha3 + alpha2) +chi_4 = -2 * c4 * exp(w) * (24 * alpha5 - 36 * alpha4 + 14 * alpha3 - alpha2) +chi_5 = ( + -2 + * c5 + * exp(w) + * (120 * alpha6 - 240 * alpha5 + 150 * alpha4 - 30 * alpha3 + alpha2) +) figure() plot(r, chi, label="$\\chi^{(0)}$") @@ -162,10 +174,10 @@ plot(r, chi_3, label="$\\chi^{(3)}$") plot(r, chi_4, label="$\\chi^{(4)}$") plot(r, chi_5, label="$\\chi^{(5)}$") -plot([r_s, r_s], [-10, 10], 'k--', lw=1) +plot([r_s, r_s], [-10, 10], "k--", lw=1) -xlabel("r", labelpad=-4.) -ylabel("$\\chi^{(n)}(r,r_s)$", labelpad=-5.) +xlabel("r", labelpad=-4.0) +ylabel("$\\chi^{(n)}(r,r_s)$", labelpad=-5.0) xlim(0, 7.2) ylim(-1.52, 1.02) diff --git a/theory/Multipoles/plot_mac_potential.py b/theory/Multipoles/plot_mac_potential.py new file mode 100644 index 0000000000000000000000000000000000000000..3a3fea2c606d62b1806433dea9ca2dd7fbed2d04 --- /dev/null +++ b/theory/Multipoles/plot_mac_potential.py @@ -0,0 +1,212 @@ +import matplotlib + +matplotlib.use("Agg") +from pylab import * +from scipy import special +import numpy as np +import math + + +e_plummer = 1.0 / 3.0 +box_size = 25000 +mesh_size = 64 +a_smooth = 1.25 +r_cut_ratio = 4.5 + +#################################################################### + +params = { + "axes.labelsize": 9, + "axes.titlesize": 10, + "font.size": 10, + "legend.fontsize": 10, + "xtick.labelsize": 7, + "ytick.labelsize": 7, + "figure.figsize": (3.15, 3.15), + "text.latex.unicode": True, + "text.usetex": True, +} +rcParams.update(params) + +plummer_to_spline_ratio = 3.0 + +H = plummer_to_spline_ratio * e_plummer +r_s = a_smooth * box_size / mesh_size +r_cut = r_s * r_cut_ratio + +MAC_lo_limit = (5.0 / 9.0) * H +MAC_hi_limit = (5.0 / 3.0) * r_s + +print(("Potential softened below", H, "kpc and truncated above", r_s, "kpc")) + +#################################################################### + +r = np.logspace(np.log10(e_plummer) - 1.2, np.log10(box_size) + 0.2, 10000) + +# Newtonian gravity +f_newton = 1 / r ** 2 + +# Simulated gravity +u = r / H +u = u[u <= 1] + +W_swift = 21.0 * u ** 6 - 90.0 * u ** 5 + 140.0 * u ** 4 - 84.0 * u ** 3 + 14.0 * u +f_swift = f_newton * ( + special.erfc(0.5 * r / r_s) + + (1.0 / math.sqrt(math.pi)) * (r / r_s) * np.exp(-0.25 * (r / r_s) ** 2) +) +f_swift[r <= H] = W_swift / H ** 2 +f_swift[r > r_cut] = 0 + +W_gadget = u * ( + 21.333333 - 48 * u + 38.4 * u ** 2 - 10.6666667 * u ** 3 - 0.06666667 * u ** -3 +) +W_gadget[u < 0.5] = u[u < 0.5] * ( + 10.666667 + u[u < 0.5] ** 2 * (32.0 * u[u < 0.5] - 38.4) +) +f_gadget = f_newton * ( + special.erfc(0.5 * r / r_s) + + (1.0 / math.sqrt(math.pi)) * (r / r_s) * np.exp(-0.25 * (r / r_s) ** 2) +) +f_gadget[r <= H] = W_gadget / H ** 2 +f_gadget[r > r_cut] = 0 + +f_MAC = np.copy(f_newton) +f_MAC[r < MAC_lo_limit] = (1 / r[r < MAC_lo_limit]) ** 0 * MAC_lo_limit ** -2 +f_MAC[r > MAC_hi_limit] = (1 / r[r > MAC_hi_limit]) ** 4 * MAC_hi_limit ** 2 +f_MAC[r > r_cut] = 0 + +# range_test = np.logical_and(r > 0.01 * e_plummer, r < 2 * r_cut) +# print(np.max(f_swift[range_test] / f_MAC[range_test])) + +#################################################################### + +fig = figure() +colors = ["#4477AA", "#CC6677", "#DDCC77", "#117733"] +gs1 = fig.add_gridspec( + nrows=4, + ncols=1, + left=0.14, + right=0.99, + wspace=0.0, + hspace=0.0, + top=0.99, + bottom=0.1, +) +fig.add_subplot(gs1[0:3, :], xscale="log", yscale="log") + +plot(r, f_newton, "--", color=colors[0], label="Newtonian") +plot(r, f_swift, "-", color=colors[3], label="SWIFT") +plot(r, f_MAC, "-.", color=colors[2], label="MAC estimator") +# plot(r, f_gadget, '-.', color=colors[2], label="Gadget") + +plot([e_plummer, e_plummer], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7) +plot([H, H], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7) +plot([r_s, r_s], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7) +plot([r_cut, r_cut], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7) +plot([box_size, box_size], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7) + +text( + e_plummer, + 1e-9, + "$\\epsilon_{\\rm Plummer}$", + rotation=90, + backgroundcolor="w", + ha="center", + alpha=0.3, +) +# text(H, 1e-9, "$\\epsilon_{\\rm spline}$", rotation=90, backgroundcolor='w', ha="center", alpha=0.3) +text(H, 1e-9, "$H$", rotation=90, backgroundcolor="w", ha="center", alpha=0.3) +text( + r_s, + 1e-1, + "$r_{\\rm s}$", + rotation=90, + backgroundcolor="w", + ha="center", + va="top", + alpha=0.3, +) +text( + r_cut, + 1e-1, + "$r_{\\rm cut}$", + rotation=90, + backgroundcolor="w", + ha="center", + va="top", + alpha=0.3, +) +text( + box_size, + 1e-1, + "$L$", + rotation=90, + backgroundcolor="w", + ha="center", + va="top", + alpha=0.3, +) + +legend( + loc="upper right", + frameon=True, + handletextpad=0.3, + handlelength=1.6, + fontsize=8, + framealpha=1.0, +) + +ylim(0.1 * (box_size) ** -2, 2 * (e_plummer / 30) ** -2) +xlim(e_plummer / 30, box_size * 2.5) + +tick_params(axis="x", which="both", labelbottom=False) + +xlabel("$r$") +ylabel("$|f(r)|$", labelpad=-2) + +################################################################################## +fig.add_subplot(gs1[3, :], xscale="log", yscale="log") + + +plot(r, f_newton * r ** 2, "--", color=colors[0], label="Newtonian") +plot(r, f_swift * r ** 2, "-", color=colors[3], label="SWIFT") +plot(r, f_MAC * r ** 2, "-.", color=colors[2], label="MAC estimator") +# plot(r, f_gadget * r**2, '-.', color=colors[2], label="Gadget") + +plot([e_plummer, e_plummer], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7) +plot([H, H], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7) +plot([r_s, r_s], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7) +plot([r_cut, r_cut], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7) +plot([box_size, box_size], [1e-20, 1e20], "k--", alpha=0.3, lw=0.7) + +ylim(0.08, 2.2) +xlim(e_plummer / 30, box_size * 2.5) + +yticks([0.1, 1], ["$0.1$", "$1$"]) + +xlabel("$r$", labelpad=0) +ylabel("$|f(r)| \\times r^2$", labelpad=0) + +################################################################################## +# fig.add_subplot(gs1[4, :], xscale="log", yscale="log") + +# plot(r, f_newton / f_swift, '--', color=colors[0], label="Newtonian") +# plot(r, f_swift / f_swift, '-', color=colors[3], label="SWIFT") +# plot(r, f_MAC / f_swift, ':', color=colors[1], label="MAC estimator") +# plot(r, f_gadget / f_swift, '-.', color=colors[2], label="Gadget") + +# plot([e_plummer, e_plummer], [1e-20, 1e20], 'k--', alpha=0.3, lw=0.7) +# plot([H, H], [1e-20, 1e20], 'k--', alpha=0.3, lw=0.7) +# plot([r_s, r_s], [1e-20, 1e20], 'k--', alpha=0.3, lw=0.7) +# plot([r_cut, r_cut], [1e-20, 1e20], 'k--', alpha=0.3, lw=0.7) +# plot([box_size, box_size], [1e-20, 1e20], 'k--', alpha=0.3, lw=0.7) + +# ylim(0.5, 13.) +# xlim(e_plummer / 30, box_size * 1.6) + +# xlabel("$r$", labelpad=0) +# ylabel("$|f(r)| / |f_{SWIFT}(r)|$", labelpad=2) + + +savefig("mac_potential.pdf") diff --git a/theory/Multipoles/plot_mesh.py b/theory/Multipoles/plot_mesh.py index 64f88c4e4ee751e1df9654de9fd7d10c6f85c828..ce8c7da95380a678fd3be1d124a3c8562769bbd7 100644 --- a/theory/Multipoles/plot_mesh.py +++ b/theory/Multipoles/plot_mesh.py @@ -1,22 +1,23 @@ ############################################################################### - # This file is part of SWIFT. - # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) - # - # This program is free software: you can redistribute it and/or modify - # it under the terms of the GNU Lesser General Public License as published - # by the Free Software Foundation, either version 3 of the License, or - # (at your option) any later version. - # - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. - # - # You should have received a copy of the GNU Lesser General Public License - # along with this program. If not, see <http://www.gnu.org/licenses/>. - # - ############################################################################## +# This file is part of SWIFT. +# Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +############################################################################## import matplotlib + matplotlib.use("Agg") from pylab import * from scipy import integrate @@ -27,31 +28,32 @@ from matplotlib.font_manager import FontProperties import numpy import math -params = {'axes.labelsize': 9, -'axes.titlesize': 10, -'font.size': 10, -'legend.fontsize': 10, -'xtick.labelsize': 8, -'ytick.labelsize': 8, -'text.usetex': True, -'figure.figsize' : (3.15,3.15), -'figure.subplot.left' : 0.14, -'figure.subplot.right' : 0.99 , -'figure.subplot.bottom' : 0.1 , -'figure.subplot.top' : 0.99 , -'figure.subplot.wspace' : 0. , -'figure.subplot.hspace' : 0. , -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True +params = { + "axes.labelsize": 9, + "axes.titlesize": 10, + "font.size": 10, + "legend.fontsize": 10, + "xtick.labelsize": 8, + "ytick.labelsize": 8, + "text.usetex": True, + "figure.figsize": (3.15, 3.15), + "figure.subplot.left": 0.14, + "figure.subplot.right": 0.99, + "figure.subplot.bottom": 0.1, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.0, + "figure.subplot.hspace": 0.0, + "lines.markersize": 6, + "lines.linewidth": 3.0, + "text.latex.unicode": True, } rcParams.update(params) -rc('font',**{'family':'sans-serif','sans-serif':['Times']}) -colors=['#4477AA', '#CC6677', '#DDCC77', '#117733'] +rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]}) +colors = ["#4477AA", "#CC6677", "#DDCC77", "#117733"] # Parameters -r_s = 2. +r_s = 2.0 r_min = 3e-2 r_max = 1.5e2 @@ -59,95 +61,120 @@ r_max = 1.5e2 r = logspace(log10(r_min), log10(r_max), 401) r_rs = r / r_s -k = logspace(log10(r_min/r_s**2), log10(r_max/r_s**2), 401) +k = logspace(log10(r_min / r_s ** 2), log10(r_max / r_s ** 2), 401) k_rs = k * r_s # Newtonian solution -phi_newton = 1. / r -phit_newton = 1. / k**2 -force_newton = 1. / r**2 +phi_newton = 1.0 / r +phit_newton = 1.0 / k ** 2 +force_newton = 1.0 / r ** 2 + def my_exp(x): - return 1. + x + (x**2 / 2.) + (x**3 / 6.) + (x**4 / 24.) + (x**5 / 120.) + (x**6 / 720.) - #return exp(x) - -def term(x): # 1 / (1 + e^x) - return 1. / (1. + exp(x)) - + return ( + 1.0 + + x + + (x ** 2 / 2.0) + + (x ** 3 / 6.0) + + (x ** 4 / 24.0) + + (x ** 5 / 120.0) + + (x ** 6 / 720.0) + ) + # return exp(x) + + +def term(x): # 1 / (1 + e^x) + return 1.0 / (1.0 + exp(x)) + + def my_term(x): # 1 / (1 + e^x) - #return 0.5 - 0.25 * x + (x**3 / 48.) - (x**5 / 480) - return 1. / (1. + my_exp(x)) - -def csch(x): # hyperbolic cosecant - return 1. / sinh(x) + # return 0.5 - 0.25 * x + (x**3 / 48.) - (x**5 / 480) + return 1.0 / (1.0 + my_exp(x)) + + +def csch(x): # hyperbolic cosecant + return 1.0 / sinh(x) + def sigmoid(x): return exp(x) * term(x) + def d_sigmoid(x): - return exp(x) * term(x)**2 + return exp(x) * term(x) ** 2 + def my_sigmoid(x): - #return my_exp(x) / (my_exp(x) + 1.) + # return my_exp(x) / (my_exp(x) + 1.) return my_exp(x) * my_term(x) + def my_d_sigmoid(x): - #return my_exp(x) / ((my_exp(x) + 1)**2) - return my_exp(x) * my_term(x)**2 + # return my_exp(x) / ((my_exp(x) + 1)**2) + return my_exp(x) * my_term(x) ** 2 + def swift_corr(x): - return 2 * sigmoid( 4 * x ) - 1 + return 2 * sigmoid(4 * x) - 1 + def swift_corr2(x): - return 2 * my_sigmoid( 4 * x ) - 1 + return 2 * my_sigmoid(4 * x) - 1 + figure() x = linspace(-4, 4, 100) -plot(x, special.erf(x), '-', color=colors[2]) -plot(x, swift_corr(x), '-', color=colors[3]) -plot(x, swift_corr2(x), '-.', color=colors[3]) -plot(x, x, '-', color=colors[0]) +plot(x, special.erf(x), "-", color=colors[2]) +plot(x, swift_corr(x), "-", color=colors[3]) +plot(x, swift_corr2(x), "-.", color=colors[3]) +plot(x, x, "-", color=colors[0]) ylim(-1.1, 1.1) xlim(-4.1, 4.1) savefig("temp.pdf") + def alpha(x): - return 1. / (1. + exp(x)) + return 1.0 / (1.0 + exp(x)) + # Correction in real space -corr_short_gadget2 = special.erf(r / (2.*r_s)) -corr_short_swift = swift_corr(r / (2.*r_s)) -corr_short_swift2 = swift_corr2(r / (2.*r_s)) -eta_short_gadget2 = special.erfc(r / (2.*r_s)) + (r / (r_s * math.sqrt(math.pi))) * exp(-r**2 / (4.*r_s**2)) -eta_short_swift = 4. * (r / r_s) * d_sigmoid(2. * r / r_s) - 2. * sigmoid(2 * r / r_s) + 2. -eta_short_swift2 = 4. * (r / r_s) * my_d_sigmoid(2. * r / r_s) - 2. * my_sigmoid(2 * r / r_s) + 2. - -#x = 2. * r / r_s -#force_corr = 2. * (1. - exp(x) * (alpha(x) - x * alpha(x)**2)) -#force_corr = 2. * (1.- x*exp(x)*alpha(x)**2 - exp(x)*alpha(x)) -#force_corr = 2. * (x*alpha(x) - x*alpha(x)**2 -exp(x)*alpha(x) + 1) -#force_corr = abs(2 * (1. - exp(x) * alpha(x) + x * exp(2*x)*alpha(x)**2 - x*exp(x)*alpha(x))) -#force_corr = abs(force_corr) +corr_short_gadget2 = special.erf(r / (2.0 * r_s)) +corr_short_swift = swift_corr(r / (2.0 * r_s)) +corr_short_swift2 = swift_corr2(r / (2.0 * r_s)) +eta_short_gadget2 = special.erfc(r / (2.0 * r_s)) + ( + r / (r_s * math.sqrt(math.pi)) +) * exp(-r ** 2 / (4.0 * r_s ** 2)) +eta_short_swift = ( + 4.0 * (r / r_s) * d_sigmoid(2.0 * r / r_s) - 2.0 * sigmoid(2 * r / r_s) + 2.0 +) +eta_short_swift2 = ( + 4.0 * (r / r_s) * my_d_sigmoid(2.0 * r / r_s) - 2.0 * my_sigmoid(2 * r / r_s) + 2.0 +) + +# x = 2. * r / r_s +# force_corr = 2. * (1. - exp(x) * (alpha(x) - x * alpha(x)**2)) +# force_corr = 2. * (1.- x*exp(x)*alpha(x)**2 - exp(x)*alpha(x)) +# force_corr = 2. * (x*alpha(x) - x*alpha(x)**2 -exp(x)*alpha(x) + 1) +# force_corr = abs(2 * (1. - exp(x) * alpha(x) + x * exp(2*x)*alpha(x)**2 - x*exp(x)*alpha(x))) +# force_corr = abs(force_corr) # Corection in Fourier space -corr_long_gadget2 = exp(-k**2*r_s**2) -corr_long_swift = math.pi * k * r_s * csch(0.5 * math.pi * r_s * k) / 2. +corr_long_gadget2 = exp(-k ** 2 * r_s ** 2) +corr_long_swift = math.pi * k * r_s * csch(0.5 * math.pi * r_s * k) / 2.0 # Shortrange term -phi_short_gadget2 = (1. / r ) * (1. - corr_short_gadget2) -phi_short_swift = (1. / r ) * (1. - corr_short_swift) -phi_short_swift2 = (1. / r ) * (1. - corr_short_swift2) -force_short_gadget2 = (1. / r**2) * eta_short_gadget2 -force_short_swift = (1. / r**2) * eta_short_swift -force_short_swift2 = (1. / r**2) * eta_short_swift2 +phi_short_gadget2 = (1.0 / r) * (1.0 - corr_short_gadget2) +phi_short_swift = (1.0 / r) * (1.0 - corr_short_swift) +phi_short_swift2 = (1.0 / r) * (1.0 - corr_short_swift2) +force_short_gadget2 = (1.0 / r ** 2) * eta_short_gadget2 +force_short_swift = (1.0 / r ** 2) * eta_short_swift +force_short_swift2 = (1.0 / r ** 2) * eta_short_swift2 # Long-range term -phi_long_gadget2 = (1. / r ) * corr_short_gadget2 -phi_long_swift = (1. / r ) * corr_short_swift -phit_long_gadget2 = corr_long_gadget2 / k**2 -phit_long_swift = corr_long_swift / k**2 - - +phi_long_gadget2 = (1.0 / r) * corr_short_gadget2 +phi_long_swift = (1.0 / r) * corr_short_swift +phit_long_gadget2 = corr_long_gadget2 / k ** 2 +phit_long_swift = corr_long_swift / k ** 2 figure() @@ -155,50 +182,58 @@ figure() # Potential subplot(311, xscale="log", yscale="log") -plot(r_rs, phi_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) -plot(r_rs, phi_short_gadget2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2]) -plot(r_rs, phi_short_swift, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) -plot(r_rs, phi_short_swift2, ':', lw=1.4, color=colors[3]) -plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5) +plot(r_rs, phi_newton, "--", lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) +plot(r_rs, phi_short_gadget2, "-", lw=1.4, label="${\\rm Gadget}$", color=colors[2]) +plot(r_rs, phi_short_swift, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) +plot(r_rs, phi_short_swift2, ":", lw=1.4, color=colors[3]) +plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5) -xlim(1.1*r_min/ r_s, 0.9*r_max / r_s) -ylim(1.1/r_max, 0.9/r_min) +xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s) +ylim(1.1 / r_max, 0.9 / r_min) ylabel("$\\varphi_s(r)$", labelpad=-3) legend(loc="upper right", frameon=True, handletextpad=0.3, handlelength=1.6, fontsize=8) # Correction subplot(312, xscale="log", yscale="log") -plot(r_rs, np.ones(np.size(r)), '--', lw=1.4, color=colors[0]) -plot(r_rs, 1. - corr_short_gadget2, '-', lw=1.4, color=colors[2]) -plot(r_rs, 1. - corr_short_swift, '-', lw=1.4, color=colors[3]) -plot(r_rs, 1. - corr_short_swift2, ':', lw=1.4, color=colors[3]) -plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5) -plot([1., 1.], [-1e5, 1e5], 'k-.', alpha=0.5, lw=0.5) -plot([-1, -1], [-1, -1], 'k-', lw=1.2, label="${\\textrm{Exact}~e^x}$") -plot([-1, -1], [-1, -1], 'k:', lw=1.2, label="${6^\\textrm{th}~\\textrm{order~series}~e^x}$") +plot(r_rs, np.ones(np.size(r)), "--", lw=1.4, color=colors[0]) +plot(r_rs, 1.0 - corr_short_gadget2, "-", lw=1.4, color=colors[2]) +plot(r_rs, 1.0 - corr_short_swift, "-", lw=1.4, color=colors[3]) +plot(r_rs, 1.0 - corr_short_swift2, ":", lw=1.4, color=colors[3]) +plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5) +plot([1.0, 1.0], [-1e5, 1e5], "k-.", alpha=0.5, lw=0.5) +plot([-1, -1], [-1, -1], "k-", lw=1.2, label="${\\textrm{Exact}~e^x}$") +plot( + [-1, -1], + [-1, -1], + "k:", + lw=1.2, + label="${6^\\textrm{th}~\\textrm{order~series}~e^x}$", +) yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"]) -xlim(1.1*r_min/r_s, 0.9*r_max/r_s) +xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s) ylim(3e-3, 1.5) -#ylabel("$\\chi_s(r)$", labelpad=-3) +# ylabel("$\\chi_s(r)$", labelpad=-3) ylabel("$\\varphi_s(r) \\times r$", labelpad=-2) -legend(loc="center left", frameon=False, handletextpad=0.3, handlelength=1.6, fontsize=7) +legend( + loc="center left", frameon=False, handletextpad=0.3, handlelength=1.6, fontsize=7 +) # 1 - Correction subplot(313, xscale="log", yscale="log") -plot(r_rs, corr_short_gadget2, '-', lw=1.4, color=colors[2]) -plot(r_rs, corr_short_swift, '-', lw=1.4, color=colors[3]) -plot(r_rs, corr_short_swift2, ':', lw=1.4, color=colors[3]) +plot(r_rs, corr_short_gadget2, "-", lw=1.4, color=colors[2]) +plot(r_rs, corr_short_swift, "-", lw=1.4, color=colors[3]) +plot(r_rs, corr_short_swift2, ":", lw=1.4, color=colors[3]) -plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5) -plot(r_rs, np.ones(np.size(r)), 'k-.', alpha=0.5, lw=0.5) -plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5) +plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5) +plot(r_rs, np.ones(np.size(r)), "k-.", alpha=0.5, lw=0.5) +plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5) -xlim(1.1*r_min/r_s, 0.9*r_max/r_s) +xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s) ylim(3e-3, 1.5) -#ylabel("$1 - \\chi_s(r)$", labelpad=-2) +# ylabel("$1 - \\chi_s(r)$", labelpad=-2) ylabel("$1 - \\varphi_s(r) \\times r$", labelpad=-2) yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"]) xlabel("$r / r_s$", labelpad=1) @@ -212,15 +247,15 @@ savefig("potential_short.pdf") figure() subplot(311, xscale="log", yscale="log") -plot(r_rs, force_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) -plot(r_rs, force_short_gadget2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2]) -plot(r_rs, force_short_swift, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) -#plot(r_rs, (1./r**2) * force_corr, '-', lw=1.2, color='r') -plot(r_rs, force_short_swift2, ':', lw=1.4, color=colors[3]) -plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5) +plot(r_rs, force_newton, "--", lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) +plot(r_rs, force_short_gadget2, "-", lw=1.4, label="${\\rm Gadget}$", color=colors[2]) +plot(r_rs, force_short_swift, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) +# plot(r_rs, (1./r**2) * force_corr, '-', lw=1.2, color='r') +plot(r_rs, force_short_swift2, ":", lw=1.4, color=colors[3]) +plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5) -xlim(1.1*r_min/ r_s, 0.9*r_max / r_s) -ylim(1.1/r_max**2, 0.9/r_min**2) +xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s) +ylim(1.1 / r_max ** 2, 0.9 / r_min ** 2) ylabel("$|\\mathbf{f}_s(r)|$", labelpad=-3) yticks([1e-4, 1e-2, 1e0, 1e2], ["$10^{-4}$", "$10^{-2}$", "$10^{0}$", "$10^{2}$"]) @@ -228,33 +263,41 @@ legend(loc="upper right", frameon=True, handletextpad=0.3, handlelength=1.6, fon # Correction subplot(312, xscale="log", yscale="log") -plot(r_rs, np.ones(np.size(r)), '--', lw=1.4, color=colors[0]) -plot(r_rs, eta_short_gadget2, '-', lw=1.4, color=colors[2]) -plot(r_rs, eta_short_swift, '-', lw=1.4, color=colors[3]) -plot(r_rs, eta_short_swift2, ':', lw=1.4, color=colors[3]) -plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5) -plot([1., 1.], [-1e5, 1e5], 'k-.', alpha=0.5, lw=0.5) -plot([-1, -1], [-1, -1], 'k-', lw=1.2, label="${\\textrm{Exact}~e^x}$") -plot([-1, -1], [-1, -1], 'k:', lw=1.2, label="${6^\\textrm{th}~\\textrm{order~series}~e^x}$") +plot(r_rs, np.ones(np.size(r)), "--", lw=1.4, color=colors[0]) +plot(r_rs, eta_short_gadget2, "-", lw=1.4, color=colors[2]) +plot(r_rs, eta_short_swift, "-", lw=1.4, color=colors[3]) +plot(r_rs, eta_short_swift2, ":", lw=1.4, color=colors[3]) +plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5) +plot([1.0, 1.0], [-1e5, 1e5], "k-.", alpha=0.5, lw=0.5) +plot([-1, -1], [-1, -1], "k-", lw=1.2, label="${\\textrm{Exact}~e^x}$") +plot( + [-1, -1], + [-1, -1], + "k:", + lw=1.2, + label="${6^\\textrm{th}~\\textrm{order~series}~e^x}$", +) yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"]) -xlim(1.1*r_min/r_s, 0.9*r_max/r_s) +xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s) ylim(3e-3, 1.5) ylabel("$|\\mathbf{f}_s(r)|\\times r^2$", labelpad=-2) -legend(loc="center left", frameon=False, handletextpad=0.3, handlelength=1.6, fontsize=7) +legend( + loc="center left", frameon=False, handletextpad=0.3, handlelength=1.6, fontsize=7 +) # 1 - Correction subplot(313, xscale="log", yscale="log") -plot(r_rs, 1. - eta_short_gadget2, '-', lw=1.4, color=colors[2]) -plot(r_rs, 1. - eta_short_swift, '-', lw=1.4, color=colors[3]) -plot(r_rs, 1. - eta_short_swift2, ':', lw=1.4, color=colors[3]) +plot(r_rs, 1.0 - eta_short_gadget2, "-", lw=1.4, color=colors[2]) +plot(r_rs, 1.0 - eta_short_swift, "-", lw=1.4, color=colors[3]) +plot(r_rs, 1.0 - eta_short_swift2, ":", lw=1.4, color=colors[3]) -plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5) -plot(r_rs, np.ones(np.size(r)), 'k-.', alpha=0.5, lw=0.5) -plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5) +plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5) +plot(r_rs, np.ones(np.size(r)), "k-.", alpha=0.5, lw=0.5) +plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5) -xlim(1.1*r_min/r_s, 0.9*r_max/r_s) +xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s) ylim(3e-3, 1.5) ylabel("$1 - |\\mathbf{f}_s(r)|\\times r^2$", labelpad=-3) yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"]) @@ -268,41 +311,71 @@ figure() subplot(311, xscale="log", yscale="log") # Potential -plot(k_rs, phit_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) -plot(k_rs, phit_long_gadget2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2]) -plot(k_rs, phit_long_swift, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) -plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5) +plot(k_rs, phit_newton, "--", lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) +plot(k_rs, phit_long_gadget2, "-", lw=1.4, label="${\\rm Gadget}$", color=colors[2]) +plot(k_rs, phit_long_swift, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) +plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5) legend(loc="lower left", frameon=True, handletextpad=0.3, handlelength=1.6, fontsize=8) -xlim(1.1*r_min/ r_s, 0.9*r_max / r_s) -ylim(1.1/r_max**2, 0.9/r_min**2) +xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s) +ylim(1.1 / r_max ** 2, 0.9 / r_min ** 2) ylabel("$\\tilde{\\varphi_l}(k)$", labelpad=-3) yticks([1e-4, 1e-2, 1e0, 1e2], ["$10^{-4}$", "$10^{-2}$", "$10^{0}$", "$10^{2}$"]) subplot(312, xscale="log", yscale="log") # Potential normalized -plot(k_rs, phit_newton * k**2, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) -plot(k_rs, phit_long_gadget2 * k**2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2]) -plot(k_rs, phit_long_swift * k**2, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) -plot([1., 1.], [1e-5, 1e5], 'k-.', alpha=0.5, lw=0.5) -plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5) - -xlim(1.1*r_min/ r_s, 0.9*r_max / r_s) +plot( + k_rs, + phit_newton * k ** 2, + "--", + lw=1.4, + label="${\\rm Newtonian}$", + color=colors[0], +) +plot( + k_rs, + phit_long_gadget2 * k ** 2, + "-", + lw=1.4, + label="${\\rm Gadget}$", + color=colors[2], +) +plot( + k_rs, phit_long_swift * k ** 2, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3] +) +plot([1.0, 1.0], [1e-5, 1e5], "k-.", alpha=0.5, lw=0.5) +plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5) + +xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s) ylim(3e-3, 1.5) ylabel("$k^2 \\times \\tilde{\\varphi_l}(k)$", labelpad=-3) yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"]) subplot(313, xscale="log", yscale="log") -plot(k_rs, 1. - phit_long_gadget2 * k**2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2]) -plot(k_rs, 1. - phit_long_swift * k**2, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) -plot([1., 1.], [1e-5, 1e5], 'k-', alpha=0.5, lw=0.5) -plot(r_rs, np.ones(np.size(r)), 'k-.', alpha=0.5, lw=0.5) -plot(r_rs, np.ones(np.size(r))*0.01, 'k-.', alpha=0.5, lw=0.5) - -xlim(1.1*r_min/ r_s, 0.9*r_max / r_s) +plot( + k_rs, + 1.0 - phit_long_gadget2 * k ** 2, + "-", + lw=1.4, + label="${\\rm Gadget}$", + color=colors[2], +) +plot( + k_rs, + 1.0 - phit_long_swift * k ** 2, + "-", + lw=1.4, + label="${\\rm SWIFT}$", + color=colors[3], +) +plot([1.0, 1.0], [1e-5, 1e5], "k-", alpha=0.5, lw=0.5) +plot(r_rs, np.ones(np.size(r)), "k-.", alpha=0.5, lw=0.5) +plot(r_rs, np.ones(np.size(r)) * 0.01, "k-.", alpha=0.5, lw=0.5) + +xlim(1.1 * r_min / r_s, 0.9 * r_max / r_s) ylim(3e-3, 1.5) ylabel("$1 - k^2 \\times \\tilde{\\varphi_l}(k)$", labelpad=-3) yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"]) diff --git a/theory/Multipoles/plot_potential.py b/theory/Multipoles/plot_potential.py index 56e8dc37be581e02a59db51c2579ea80c6109987..27753ffc7946d27f4d7833205a46699846154b0a 100644 --- a/theory/Multipoles/plot_potential.py +++ b/theory/Multipoles/plot_potential.py @@ -1,22 +1,23 @@ ############################################################################### - # This file is part of SWIFT. - # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) - # - # This program is free software: you can redistribute it and/or modify - # it under the terms of the GNU Lesser General Public License as published - # by the Free Software Foundation, either version 3 of the License, or - # (at your option) any later version. - # - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. - # - # You should have received a copy of the GNU Lesser General Public License - # along with this program. If not, see <http://www.gnu.org/licenses/>. - # - ############################################################################## +# This file is part of SWIFT. +# Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +############################################################################## import matplotlib + matplotlib.use("Agg") from pylab import * from scipy import integrate @@ -26,30 +27,31 @@ from matplotlib.font_manager import FontProperties import numpy import math -params = {'axes.labelsize': 9, -'axes.titlesize': 10, -'font.size': 10, -'legend.fontsize': 10, -'xtick.labelsize': 8, -'ytick.labelsize': 8, -'text.usetex': True, -'figure.figsize' : (3.15,3.15), -'figure.subplot.left' : 0.14, -'figure.subplot.right' : 0.99 , -'figure.subplot.bottom' : 0.1 , -'figure.subplot.top' : 0.99 , -'figure.subplot.wspace' : 0. , -'figure.subplot.hspace' : 0. , -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True +params = { + "axes.labelsize": 9, + "axes.titlesize": 10, + "font.size": 10, + "legend.fontsize": 10, + "xtick.labelsize": 8, + "ytick.labelsize": 8, + "text.usetex": True, + "figure.figsize": (3.15, 3.15), + "figure.subplot.left": 0.14, + "figure.subplot.right": 0.99, + "figure.subplot.bottom": 0.1, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.0, + "figure.subplot.hspace": 0.0, + "lines.markersize": 6, + "lines.linewidth": 3.0, + "text.latex.unicode": True, } rcParams.update(params) -rc('font',**{'family':'sans-serif','sans-serif':['Times']}) +rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]}) # Parameters -epsilon = 2. -r_min = 0. +epsilon = 2.0 +r_min = 0.0 r_max = 4 r_max_plot = 2.6 @@ -59,9 +61,9 @@ r[0] += 1e-9 u = r / epsilon # Newtonian solution -phi_newton = 1. / r -F_newton = 1. / r**2 -W_newton = 0. * r +phi_newton = 1.0 / r +F_newton = 1.0 / r ** 2 +W_newton = 0.0 * r # Softened potential phi = np.zeros(np.size(r)) @@ -69,98 +71,199 @@ W = np.zeros(np.size(r)) F = np.zeros(np.size(r)) for i in range(np.size(r)): if r[i] > epsilon: - phi[i] = 1. / r[i] - W[i] = 0. - F[i] = 1. / r[i]**2 + phi[i] = 1.0 / r[i] + W[i] = 0.0 + F[i] = 1.0 / r[i] ** 2 else: - phi[i] = (-1./epsilon) * (3.*u[i]**7 - 15.*u[i]**6 + 28.*u[i]**5 - 21.*u[i]**4 + 7.*u[i]**2 - 3.) - W[i] = (21. / (2.*math.pi)) * (4.*u[i]**5 - 15.*u[i]**4 + 20.*u[i]**3 - 10.*u[i]**2 + 1.) / epsilon**3 - F[i] = (1./epsilon**2) * (21.*u[i]**6 - 90*u[i]**5 + 140.*u[i]**4 - 84.*u[i]**3 + 14*u[i]) + phi[i] = (-1.0 / epsilon) * ( + 3.0 * u[i] ** 7 + - 15.0 * u[i] ** 6 + + 28.0 * u[i] ** 5 + - 21.0 * u[i] ** 4 + + 7.0 * u[i] ** 2 + - 3.0 + ) + W[i] = ( + (21.0 / (2.0 * math.pi)) + * ( + 4.0 * u[i] ** 5 + - 15.0 * u[i] ** 4 + + 20.0 * u[i] ** 3 + - 10.0 * u[i] ** 2 + + 1.0 + ) + / epsilon ** 3 + ) + F[i] = (1.0 / epsilon ** 2) * ( + 21.0 * u[i] ** 6 + - 90 * u[i] ** 5 + + 140.0 * u[i] ** 4 + - 84.0 * u[i] ** 3 + + 14 * u[i] + ) plummer_equivalent_factor = phi[0] * epsilon -print "Plummer-equivalent factor:", plummer_equivalent_factor - +print("Plummer-equivalent factor:", plummer_equivalent_factor) + epsilon_plummer = epsilon / plummer_equivalent_factor - + # Plummer potential -phi_plummer = (1. / epsilon_plummer) * (1 + (r / epsilon_plummer)**2)**(-1./2.) -F_plummer = (1. / epsilon_plummer**3) * r / (1 + (r / epsilon_plummer )**2)**(3./2.) +phi_plummer = (1.0 / epsilon_plummer) * (1 + (r / epsilon_plummer) ** 2) ** (-1.0 / 2.0) +F_plummer = ( + (1.0 / epsilon_plummer ** 3) * r / (1 + (r / epsilon_plummer) ** 2) ** (3.0 / 2.0) +) + + def eta_plummer(r): - return (3. / (4.*math.pi)) * 1. / (1 + r**2)**(5./2.) -W_plummer = (1. / epsilon_plummer**3) * eta_plummer(r / epsilon_plummer) + return (3.0 / (4.0 * math.pi)) * 1.0 / (1 + r ** 2) ** (5.0 / 2.0) + + +W_plummer = (1.0 / epsilon_plummer ** 3) * eta_plummer(r / epsilon_plummer) # Gadget-2 potential -epsilon_gadget = epsilon #/ plummer_equivalent_factor * 2.8 +epsilon_gadget = epsilon # / plummer_equivalent_factor * 2.8 phi_gadget2 = np.zeros(np.size(r)) W_gadget2 = np.zeros(np.size(r)) F_gadget2 = np.zeros(np.size(r)) for i in range(np.size(r)): if r[i] > epsilon_gadget: - phi_gadget2[i] = 1. / r[i] - W_gadget2[i] = 0. - F_gadget2[i] = 1. / r[i]**2 - elif r[i] > epsilon_gadget / 2.: - phi_gadget2[i] = -((32/3.)*u[i]**2 - 16.*u[i]**3 + (96./10.)*u[i]**4 - (64./30.)*u[i]**5 - (16./5.) + 1./(15.*u[i]) )/ (epsilon_gadget) - W_gadget2[i] = (8. / math.pi) * (2. * (1- u[i])**3) / epsilon_gadget**3 - F_gadget2[i] = u[i] * (21.333333 - 48*u[i] + 38.4*u[i]**2 - 10.6666667*u[i]**3 - 0.06666667*u[i]**-3) / epsilon_gadget**2 + phi_gadget2[i] = 1.0 / r[i] + W_gadget2[i] = 0.0 + F_gadget2[i] = 1.0 / r[i] ** 2 + elif r[i] > epsilon_gadget / 2.0: + phi_gadget2[i] = -( + (32 / 3.0) * u[i] ** 2 + - 16.0 * u[i] ** 3 + + (96.0 / 10.0) * u[i] ** 4 + - (64.0 / 30.0) * u[i] ** 5 + - (16.0 / 5.0) + + 1.0 / (15.0 * u[i]) + ) / (epsilon_gadget) + W_gadget2[i] = (8.0 / math.pi) * (2.0 * (1 - u[i]) ** 3) / epsilon_gadget ** 3 + F_gadget2[i] = ( + u[i] + * ( + 21.333333 + - 48 * u[i] + + 38.4 * u[i] ** 2 + - 10.6666667 * u[i] ** 3 + - 0.06666667 * u[i] ** -3 + ) + / epsilon_gadget ** 2 + ) else: - phi_gadget2[i] = -((16./3.)*u[i]**2 - (96./10.)*u[i]**4 + (64./10.)*u[i]**5 - (14./5.) ) / (epsilon_gadget) - W_gadget2[i] = (8. / math.pi) * (1. - 6.*u[i]**2 + 6.*u[i]**3) / epsilon_gadget**3 - F_gadget2[i] = u[i] * (10.666667 + u[i]**2 * (32. * u[i] - 38.4)) / epsilon_gadget**2 + phi_gadget2[i] = -( + (16.0 / 3.0) * u[i] ** 2 + - (96.0 / 10.0) * u[i] ** 4 + + (64.0 / 10.0) * u[i] ** 5 + - (14.0 / 5.0) + ) / (epsilon_gadget) + W_gadget2[i] = ( + (8.0 / math.pi) + * (1.0 - 6.0 * u[i] ** 2 + 6.0 * u[i] ** 3) + / epsilon_gadget ** 3 + ) + F_gadget2[i] = ( + u[i] * (10.666667 + u[i] ** 2 * (32.0 * u[i] - 38.4)) / epsilon_gadget ** 2 + ) figure() -colors=['#4477AA', '#CC6677', '#DDCC77', '#117733'] +colors = ["#4477AA", "#CC6677", "#DDCC77", "#117733"] # Density subplot(311) -plot(r, W_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) -plot(r, W_plummer, ':', lw=1.4, label="${\\rm Plummer}$", color=colors[1]) -plot(r, W_gadget2, '-', lw=1.4, label="${\\rm Spline}$", color=colors[2]) -plot(r, W, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) -plot([epsilon, epsilon], [0, 10], 'k-', alpha=0.5, lw=0.5) -plot([epsilon/plummer_equivalent_factor, epsilon/plummer_equivalent_factor], [0, 10], 'k-', alpha=0.5, lw=0.5) +plot(r, W_newton - 1, "--", lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) +plot(r, W_plummer, ":", lw=1.4, label="${\\rm Plummer}$", color=colors[1]) +plot(r, W_gadget2, "-.", lw=1.4, label="${\\rm Cubic~spline}$", color=colors[2]) +plot(r, W, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) +# plot([epsilon, epsilon], [0, 10], 'k--', alpha=0.5, lw=0.5) +plot( + [epsilon / plummer_equivalent_factor, epsilon / plummer_equivalent_factor], + [0, 10], + "k--", + alpha=0.5, + lw=0.5, +) -legend(loc="upper right", frameon=True, handletextpad=0.3, handlelength=1.6, fontsize=8, framealpha=1.) +legend( + loc="upper right", + frameon=True, + handletextpad=0.3, + handlelength=1.6, + fontsize=8, + framealpha=1.0, +) -xlim(0,r_max_plot) -xticks([0., 0.5, 1., 1.5, 2., 2.5], ["", "", "", "", "", ""]) +xlim(0, r_max_plot) +xticks([0.0, 0.5, 1.0, 1.5, 2.0, 2.5], ["", "", "", "", "", ""]) -ylim(0., 0.84) +ylim(0.0, 0.84) yticks([0, 0.2, 0.4, 0.6, 0.8], ["$0$", "$0.2$", "$0.4$", "$0.6$", "$0.8$"]) ylabel("$\\rho(r)$", labelpad=2) # Potential subplot(312) -plot(r, phi_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) -plot(r, phi_plummer, ':', lw=1.4, label="${\\rm Plummer}$", color=colors[1]) -plot(r, phi_gadget2, '-', lw=1.4, label="${\\rm Spline}$", color=colors[2]) -plot(r, phi, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) -plot([epsilon, epsilon], [-10, 10], 'k-', alpha=0.5, lw=0.5) -plot([epsilon/plummer_equivalent_factor, epsilon/plummer_equivalent_factor], [0, 10], 'k-', alpha=0.5, lw=0.5) +plot(r, phi_newton, "--", lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) +plot(r, phi_plummer, ":", lw=1.4, label="${\\rm Plummer}$", color=colors[1]) +plot(r, phi_gadget2, "-.", lw=1.4, label="${\\rm Spline}$", color=colors[2]) +plot(r, phi, "-", lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) +# plot([epsilon, epsilon], [-10, 10], 'k--', alpha=0.5, lw=0.5) +plot( + [epsilon / plummer_equivalent_factor, epsilon / plummer_equivalent_factor], + [0, 10], + "k--", + alpha=0.5, + lw=0.5, +) ylim(0, 2.3) ylabel("$\\varphi(r)$", labelpad=1) -#yticks([0., 0.5, 1., 1.5, 2., 2.5], ["$%.1f$"%(0.*epsilon), "$%.1f$"%(0.5*epsilon), "$%.1f$"%(1.*epsilon), "$%.1f$"%(1.5*epsilon), "$%.1f$"%(2.*epsilon)]) +# yticks([0., 0.5, 1., 1.5, 2., 2.5], ["$%.1f$"%(0.*epsilon), "$%.1f$"%(0.5*epsilon), "$%.1f$"%(1.*epsilon), "$%.1f$"%(1.5*epsilon), "$%.1f$"%(2.*epsilon)]) -xlim(0,r_max_plot) -xticks([0., 0.5, 1., 1.5, 2., 2.5], ["", "", "", "", "", ""]) +xlim(0, r_max_plot) +xticks([0.0, 0.5, 1.0, 1.5, 2.0, 2.5], ["", "", "", "", "", ""]) # Force subplot(313) -plot(r, F_newton, '--', lw=1.4, color=colors[0]) -plot(r, F_plummer, ':', lw=1.4, color=colors[1]) -plot(r, F_gadget2, '-', lw=1.4, color=colors[2]) -plot(r, F, '-', lw=1.4, color=colors[3]) -plot([epsilon, epsilon], [0, 10], 'k-', alpha=0.5, lw=0.5) -plot([epsilon/plummer_equivalent_factor, epsilon/plummer_equivalent_factor], [0, 10], 'k-', alpha=0.5, lw=0.5) -text(epsilon+0.03, 0.05, "$\\epsilon$", color='k', alpha=0.5, rotation=90, va="bottom", ha="left", fontsize=8) -text(epsilon/plummer_equivalent_factor+0.03, 0.05, "$\\epsilon_{\\rm Plummer}$", color='k', alpha=0.5, rotation=90, va="bottom", ha="left", fontsize=8) - -xlim(0,r_max_plot) -xticks([0., 0.5, 1., 1.5, 2., 2.5], ["$%.1f$"%(0./epsilon), "", "$%.1f$"%(1./epsilon), "", "$%.1f$"%(2./epsilon)]) -xlabel("$r/H$", labelpad=-2.) +plot(r, F_newton, "--", lw=1.4, color=colors[0]) +plot(r, F_plummer, ":", lw=1.4, color=colors[1]) +plot(r, F_gadget2, "-.", lw=1.4, color=colors[2]) +plot(r, F, "-", lw=1.4, color=colors[3]) +# plot([epsilon, epsilon], [0, 10], 'k--', alpha=0.5, lw=0.5) +plot( + [epsilon / plummer_equivalent_factor, epsilon / plummer_equivalent_factor], + [0, 10], + "k--", + alpha=0.5, + lw=0.5, +) +# text(epsilon+0.03, 0.05, "$\\epsilon$", color='k', alpha=0.5, rotation=90, va="bottom", ha="left", fontsize=8) +text( + epsilon / plummer_equivalent_factor + 0.03, + 0.05, + "$\\epsilon_{\\rm Plummer}$", + color="k", + alpha=0.5, + rotation=90, + va="bottom", + ha="left", + fontsize=8, +) + +xlim(0, r_max_plot) +xticks( + [0.0, 0.5, 1.0, 1.5, 2.0, 2.5], + [ + "$%.1f$" % (0.0 / epsilon), + "", + "$%.1f$" % (1.0 / epsilon), + "", + "$%.1f$" % (2.0 / epsilon), + ], +) +xlabel("$r/H$", labelpad=-2.0) ylim(0, 0.95) ylabel("$|\\overrightarrow{\\nabla}\\varphi(r)|$", labelpad=0) diff --git a/theory/Multipoles/potential_derivatives.tex b/theory/Multipoles/potential_derivatives.tex index 14ddbb9792b72e3e815f8362858785f6e889192c..763e03b5f00dc818d0fe93a92214da2c2d536d40 100644 --- a/theory/Multipoles/potential_derivatives.tex +++ b/theory/Multipoles/potential_derivatives.tex @@ -74,11 +74,235 @@ truncated an softened gravity field $\varphi (\mathbf{r}, r_s, H) % \chi^{(5)}(r, r_s) &= \frac{32}{r_s^5} \left(240\alpha(x)^6 - 720\alpha(x)^5 + 780\alpha(x)^4 - 360\alpha(x)^3 + 62\alpha(x)^2 - 2\alpha(x) \right) \nonumber % \end{align} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{1}(r, r_s, H) = % D_tilde_tilde_1 = D_tilde_1 + \left\lbrace\begin{array}{rcl} + f(u)\times H^{-1} & \mbox{if} & u < 1,\\ + %r^{-1} & \mbox{if} & u \geq 1, + \chi \times r^{-1} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\ + r^{-1} & \mbox{if} & u \geq 1~\mbox{and not periodic}. + \end{array} + \right.\nonumber +\end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{2}(r, r_s, H) = % D_tilde_tilde_3 = D_tilde_3 * r + \left\lbrace\begin{array}{rcl} + f'(u)\times H^{-2}& \mbox{if} & u < 1,\\ + %-1 \times r^{-3} & \mbox{if} & u \geq 1, + \left(r\chi' - \chi\right) \times r^{-2} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\ + -1 \times r^{-2} & \mbox{if} & u \geq 1~\mbox{and not periodic}. + \end{array} + \right.\nonumber +\end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{3}(r, r_s, H) = % D_tilde_tilde_5 = D_tilde_5 * r^2 + \left\lbrace\begin{array}{rcl} + \left(f''(u) - u^{-1}f'(u)\right)\times H^{-3}& \mbox{if} & u < 1,\\ + %3\times r^{-5} & \mbox{if} & u \geq 1, + \left(r^2\chi'' - 3r\chi' + 3\chi \right)\times r^{-3} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\ + 3 \times r^{-3} & \mbox{if} & u \geq 1~\mbox{and not periodic}. + \end{array} + \right.\nonumber +\end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{4}(r, r_s, H) = % D_tilde_tilde_7 = D_tilde_7 * r^3 + \left\lbrace\begin{array}{rcl} + \left(f^{(3)}(u)-3u^{-1}f''(u)+3u^{-2}f'(u)\right)\times H^{-4} & \mbox{if} & u < 1,\\ + %-15\times r^{-7} & \mbox{if} & u \geq 1, + \left(r^3\chi^{(3)} - 6r^2\chi''+15r\chi'-15\chi\right) \times r^{-4} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\ + -15 \times r^{-4} & \mbox{if} & u \geq 1~\mbox{and not periodic}. + \end{array} + \right.\nonumber +\end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{5}(r, r_s, H) = % D_tilde_tilde_9 = D_tilde_9 * r^4 + \left\lbrace\begin{array}{rcl} + \left(f^{(4)}(u)-6u^{-1}f^{(3)}(u)+15u^{-2}f''(u)-15u^{-3}f'(u)\right)\times H^{-5}& \mbox{if} & u < 1,\\ + %105\times r^{-9} & \mbox{if} & u \geq 1. + \left(r^4\chi^{(4)} - 10r^3\chi^{(3)} + 45r^2\chi'' - 105r\chi' + 105\chi \right) \times r^{-5} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\ + 105 \times r^{-5} & \mbox{if} & u \geq 1~\mbox{and not periodic}. + \end{array} + \right.\nonumber +\end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{6}(r, r_s, H) = % D_tilde_tilde_11 = D_tilde_11 * r^4 + \left\lbrace\begin{array}{rcl} + \left(f^{(5)}(u) -10u^{-1}f^{(4)}(u) +45u^{-2}f^{(3)} -105u^{-3}f''(u) + 105u^{-4}f'(u)\right)\times H^{-6}& \mbox{if} & u < 1,\\ + %-945\times r^{-11} & \mbox{if} & u \geq 1. + \left(r^5\chi^{(5)} - 15r^4\chi^{(4)} + 105r^3\chi^{(3)} - 420r^2\chi'' + 945r \chi' - 945\chi\right) \times r^{-6} & \mbox{if} & u \geq 1~\mbox{and periodic}, \\ + -945\times r^{-6} & \mbox{if} & u \geq 1~\mbox{and not periodic}. + \end{array} + \right.\nonumber +\end{align} +In the case $u<1$ and using $f(u)$ given by \ref{eq:fmm:potential}, we can simplify the expressions to get: +\begin{align} + \mathsf{\tilde{D}}_{1} &= (-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3) \times H^{-1}, \nonumber \\ + \mathsf{\tilde{D}}_{2} &= (-21u^6 + 90u^5 - 140u^4 + 84u^3 - 14u) \times H^{-2}, \nonumber \\ + \mathsf{\tilde{D}}_{3} &= (-105u^5 + 360u^4 - 420u^3 + 168u^2) \times H^{-3}, \nonumber \\ + \mathsf{\tilde{D}}_{4} &= (-315u^4 + 720u^3 - 420u^2) \times H^{-4}, \nonumber \\ + \mathsf{\tilde{D}}_{5} &= (-315u^3 + 420u) \times H^{-5}, \nonumber \\ + \mathsf{\tilde{D}}_{6} &= (315u^2 - 1260) \times H^{-6}. \nonumber +\end{align} +These expressions only use low powers of $u$ and, in particular, no terms +involving $1/u$ as would be the case when using a cubic spline kernel for +$f(u)$. This makes this choice of softening kernel much faster to evaluate than +ones using divisions. Similarly, the expressions in the periodic case for $u>1$ +can be simplified to: +\begin{align} + \mathsf{\tilde{D}}_{1} &= \chi r^{-1}, \nonumber \\ + \mathsf{\tilde{D}}_{2} &= -\chi r^{-2} + \chi' r^{-1}, \nonumber \\ + \mathsf{\tilde{D}}_{3} &= 3\chi r^{-3} - 3\chi' r^{-2} + \chi'' r^{-1}, \nonumber \\ + \mathsf{\tilde{D}}_{4} &= -15\chi r^{-4} + 15\chi' r^{-3} - 6\chi''r^{-2} + \chi^{(3)} r^{-1}, \nonumber \\ + \mathsf{\tilde{D}}_{5} &= 105\chi r^{-5} -105\chi' r^{-4} + 45\chi''r^{-3} - 10\chi^{(3)} r^{-2} + \chi^{(4)} r^{-1}\nonumber, \\ + \mathsf{\tilde{D}}_{6} &= -945\chi r^{-6} + 945 \chi' r^{-5} -420 \chi'' r^{-4} + 105 \chi^{(3)} r^{-3} - 15\chi^{(4)} r^{-2} + \chi^{(5)} r^{-1}. \nonumber +\end{align} +We can now write out all the derivatives used in the M2L and +M2P kernels: +\begin{align} + \mathsf{D}_{000}(\mathbf{r}) = \varphi (\mathbf{r}, r_s, H) = + \mathsf{\tilde{D}}_{1} \nonumber +\end{align} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\noindent\rule{6cm}{0.4pt} +\begin{align} + \mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right) \mathsf{\tilde{D}}_{2} \nonumber +\end{align} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\noindent\rule{6cm}{0.4pt} +\begin{align} +\mathsf{D}_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \varphi (\mathbf{r}, r_s, H) = +\left(\frac{r_x}{r}\right)^2 \mathsf{\tilde{D}}_{3} + \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{2}\nonumber +\end{align} + +\begin{align} +\mathsf{D}_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \varphi (\mathbf{r}, r_s, H) = +\left(\frac{r_x}{r}\right) \left(\frac{r_y}{r}\right) \mathsf{\tilde{D}}_{3} \nonumber +\end{align} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\noindent\rule{6cm}{0.4pt} +\begin{align} +\mathsf{D}_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right)^3 \mathsf{\tilde{D}}_{4} + + 3 \left(\frac{r_x}{r}\right) \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{3} \nonumber +\end{align} + +\begin{align} +\mathsf{D}_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^2 r_y} \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right)^2 \left(\frac{r_y}{r}\right) \mathsf{\tilde{D}}_{4} + \left(\frac{r_y}{r}\right) \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{3} \nonumber +\end{align} + +\begin{align} +\mathsf{D}_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \varphi (\mathbf{r}, r_s, H) = +\left(\frac{r_x}{r}\right)\left(\frac{r_y}{r}\right)\left(\frac{r_z}{r}\right) \mathsf{\tilde{D}}_{4} \nonumber +\end{align} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\noindent\rule{6cm}{0.4pt} +\begin{align} + \mathsf{D}_{400}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^4} + \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right)^4 \mathsf{\tilde{D}}_{5}+ + 6\left(\frac{r_x}{r}\right)^2 \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{4} + + 3 \left(\frac{1}{r}\right)^2 \mathsf{\tilde{D}}_{3} + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{310}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^3 + \partial r_y} \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right)^3 \left(\frac{r_y}{r}\right) \mathsf{\tilde{D}}_{5} + + 3 \left(\frac{r_x}{r}\right) \left(\frac{r_y}{r}\right) \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{4} + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{220}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^2 + \partial r_y^2} \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right)^2 \left(\frac{r_y}{r}\right)^2 \mathsf{\tilde{D}}_{5} + + \left(\frac{r_x}{r}\right)^2 \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{4} + + \left(\frac{r_y}{r}\right)^2 \left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{4} + + \left(\frac{1}{r}\right)^2 \mathsf{\tilde{D}}_{3} + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{211}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^2 + \partial r_y \partial r_z} \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right)^2\left(\frac{r_y}{r}\right)\left(\frac{r_z}{r}\right) \mathsf{\tilde{D}}_{5} + + \left(\frac{r_y}{r}\right)\left(\frac{r_z}{r}\right)\left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{4} + \nonumber +\end{align} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\noindent\rule{6cm}{0.4pt} +\begin{align} + \mathsf{D}_{500}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^5} + \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right)^5 \mathsf{\tilde{D}}_{6} + + 10\left(\frac{r_x}{r}\right)^3\left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5} + + 15\left(\frac{r_x}{r}\right)\left(\frac{1}{r}\right)^2\mathsf{\tilde{D}}_{4} + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{410}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^4 + \partial r_y} \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right)^4 \left(\frac{r_y}{r}\right) \mathsf{\tilde{D}}_{6} + + 6 \left(\frac{r_x}{r}\right)^2 \left(\frac{r_y}{r}\right)\left(\frac{1}{r}\right) \mathsf{\tilde{D}}_{5} + + 3 \left(\frac{r_y}{r}\right) \left(\frac{1}{r}\right)^2\mathsf{\tilde{D}}_{4} + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{320}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^3 + \partial r_y^2} \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right)^3 \left(\frac{r_y}{r}\right)^2 \mathsf{\tilde{D}}_{6} + + \left(\frac{r_x}{r}\right)^3 \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5} + + 3 \left(\frac{r_x}{r}\right) \left(\frac{r_y}{r}\right)^2 \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5} + + 3 \left(\frac{r_x}{r}\right) \left(\frac{1}{r}\right)^2\mathsf{\tilde{D}}_{4} + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{311}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^3 + \partial r_y \partial r_z} \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right)^3 \left(\frac{r_y}{r}\right) \left(\frac{r_z}{r}\right) \mathsf{\tilde{D}}_{6} + + 3 \left(\frac{r_x}{r}\right) \left(\frac{r_y}{r}\right) \left(\frac{r_z}{r}\right) \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5} + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{221}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^2 + \partial r_y^2 \partial r_z} \varphi (\mathbf{r}, r_s, H) = + \left(\frac{r_x}{r}\right)^2 \left(\frac{r_y}{r}\right)^2 \left(\frac{r_z}{r}\right) \mathsf{\tilde{D}}_{6} + + \left(\frac{r_x}{r}\right)^2 \left(\frac{r_z}{r}\right) \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5} + + \left(\frac{r_y}{r}\right)^2 \left(\frac{r_z}{r}\right) \left(\frac{1}{r}\right)\mathsf{\tilde{D}}_{5} + + \left(\frac{r_z}{r}\right) \left(\frac{1}{r}\right)^2\mathsf{\tilde{D}}_{4} + \nonumber +\end{align} + +\begin{comment} +\noindent\rule{12cm}{1pt}\\ +Old version \\ +\noindent\rule{12cm}{1pt} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{align} \mathsf{\tilde{D}}_{1}(r, r_s, H) = \left\lbrace\begin{array}{rcl} - \left(-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3\right)\times H^{-1} & \mbox{if} & u < 1,\\ + -\left(3u^7 - 15u^6 + 28u^5 - 21u^4 + 7u^2 - 3\right)\times H^{-1} & \mbox{if} & u < 1,\\ %r^{-1} & \mbox{if} & u \geq 1, \chi(r, r_s) \times r^{-1} & \mbox{if} & u \geq 1, \end{array} @@ -98,7 +322,7 @@ truncated an softened gravity field $\varphi (\mathbf{r}, r_s, H) \begin{align} \mathsf{\tilde{D}}_{5}(r, r_s, H) = \left\lbrace\begin{array}{rcl} - \left(-105u^3 + 360u^2 - 420u + 168\right)\times H^{-5}& \mbox{if} & u < 1,\\ + -\left(35u^3 - 120u^2 + 140u - 56\right)\times H^{-5}& \mbox{if} & u < 1,\\ %3\times r^{-5} & \mbox{if} & u \geq 1, \left(r^2\chi''(r, r_s) - 3r\chi'(r, r_s) + 3\chi(r, r_s) \right)\times r^{-5} & \mbox{if} & u \geq 1, \end{array} @@ -108,7 +332,7 @@ truncated an softened gravity field $\varphi (\mathbf{r}, r_s, H) \begin{align} \mathsf{\tilde{D}}_{7}(r, r_s, H) = \left\lbrace\begin{array}{rcl} - -\left(315u - 720 + 420u^{-1}\right)\times H^{-7} & \mbox{if} & u < 1,\\ + -\left(21u - 48 + 28u^{-1}\right)\times H^{-7} & \mbox{if} & u < 1,\\ %-15\times r^{-7} & \mbox{if} & u \geq 1, \left(r^3\chi^{(3)}(r, r_s) - 6r^2\chi''(r, r_s)+15r\chi'(r, r_s)-15\chi(r, r_s)\right) \times r^{-7} & \mbox{if} & u \geq 1, \end{array} @@ -118,7 +342,7 @@ truncated an softened gravity field $\varphi (\mathbf{r}, r_s, H) \begin{align} \mathsf{\tilde{D}}_{9}(r, r_s, H) = \left\lbrace\begin{array}{rcl} - \left(-315u^{-1} + 420u^{-3}\right)\times H^{-9}& \mbox{if} & u < 1,\\ + -\left(3u^{-1} - 4u^{-3}\right)\times H^{-9}& \mbox{if} & u < 1,\\ %105\times r^{-9} & \mbox{if} & u \geq 1. \left(r^4\chi^{(4)}(r, r_s) - 10r^3\chi^{(3)} + 45r^2\chi''(r, r_s) - 105r\chi'(r, r_s) + 105\chi(r, r_s) \right) \times r^{-9} & \mbox{if} & u \geq 1 \end{array} @@ -128,7 +352,7 @@ truncated an softened gravity field $\varphi (\mathbf{r}, r_s, H) \begin{align} \mathsf{\tilde{D}}_{11}(r, r_s, H) = \left\lbrace\begin{array}{rcl} - -\left(315u^{-3} - 1260u^{-5}\right)\times H^{-11}& \mbox{if} & u < 1,\\ + -\left(\frac{1}{3}u^{-3} - \frac{4}{3}u^{-5}\right)\times H^{-11}& \mbox{if} & u < 1,\\ %-945\times r^{-11} & \mbox{if} & u \geq 1. \left(r^5\chi^{(5)}(r, r_s) - 15r^4\chi^{(4)}(r, r_s) + 105r^3\chi^{(3)}(r, r_s) - 420r^2\chi''(r, r_s) + 945r \chi'(r, r_s) - 945\chi(r, r_s)\right) \times r^{-11} & \mbox{if} & u \geq 1. \end{array} @@ -262,10 +486,11 @@ r_y \mathsf{\tilde{D}}_{5}(r, r_s, H) \nonumber r_x^2 r_y^2 r_z \mathsf{\tilde{D}}_{11}(r, r_s, H) + r_x^2 r_z \mathsf{\tilde{D}}_{9}(r, r_s, H) + r_y^2 r_z \mathsf{\tilde{D}}_{9}(r, r_s, H) + - r_z \mathsf{\tilde{D}}_{y}(r, r_s, H) + r_z \mathsf{\tilde{D}}_{7}(r, r_s, H) \nonumber \end{align} +\end{comment} @@ -285,94 +510,3 @@ r_y \mathsf{\tilde{D}}_{5}(r, r_s, H) \nonumber - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{comment} - -\noindent\rule{6cm}{0.4pt} - -\begin{align} -\mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \varphi (\mathbf{r},H) = -\left\lbrace\begin{array}{rcl} --\frac{r_x}{H^3} \left(21u^5 - 90u^4 + 140u^3 - 84u^2 + 14\right) & \mbox{if} & u < 1,\\ --\frac{r_x}{r^3} & \mbox{if} & u \geq 1, -\end{array} -\right.\nonumber -\end{align} - -\noindent\rule{6cm}{0.4pt} - -\begin{align} -\mathsf{D}_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \varphi (\mathbf{r},H) = -\left\lbrace\begin{array}{rcl} -\frac{r_x^2}{H^5}\left(-105u^3+360u^2-420u+168\right) - -\frac{1}{H^3} \left(21u^5 - 90u^4 + 140u^3 - 84u^2 + 14\right) & \mbox{if} & u < 1,\\ -3\frac{r_x^2}{r^5} - \frac{1}{r^3} & \mbox{if} & u \geq 1, -\end{array} -\right.\nonumber -\end{align} - -\begin{align} -\mathsf{D}_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \varphi (\mathbf{r},H) = -\left\lbrace\begin{array}{rcl} -\frac{r_xr_y}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ -3\frac{r_xr_y}{r^5} & \mbox{if} & u \geq 1, -\end{array} -\right.\nonumber -\end{align} - -\noindent\rule{6cm}{0.4pt} - -\begin{align} -\mathsf{D}_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r},H) = -\left\lbrace\begin{array}{rcl} --\frac{r_x^3}{H^7} \left(315u - 720 + 420u^{-1}\right) + -\frac{3r_x}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ --15\frac{r_x^3}{r^7} + 9 \frac{r_x}{r^5} & \mbox{if} & u \geq 1, -\end{array} -\right.\nonumber -\end{align} - -\begin{align} -\mathsf{D}_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r},H) = -\left\lbrace\begin{array}{rcl} --\frac{r_x^2r_y}{H^7} \left(315u - 720 + 420u^{-1}\right) + -\frac{r_y}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ --15\frac{r_x^2r_y}{r^7} + 3 \frac{r_y}{r^5} & \mbox{if} & u \geq 1, -\end{array} -\right.\nonumber -\end{align} - - -\begin{align} -\mathsf{D}_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \varphi (\mathbf{r},H) = -\left\lbrace\begin{array}{rcl} --\frac{r_xr_yr_z}{H^7} \left(315u - 720 + 420u^{-1}\right) & \mbox{if} & u < 1,\\ --15\frac{r_xr_yr_z}{r^7} & \mbox{if} & u \geq 1, -\end{array} -\right.\nonumber -\end{align} - -\noindent\rule{6cm}{0.4pt} - -\begin{align} - \mathsf{D}_{400}(\mathbf{r}) &= - \nonumber -\end{align} - -\begin{align} - \mathsf{D}_{310}(\mathbf{r}) &= - \nonumber -\end{align} - -\begin{align} - \mathsf{D}_{220}(\mathbf{r}) &= - \nonumber -\end{align} - -\begin{align} - \mathsf{D}_{211}(\mathbf{r}) &= - \nonumber -\end{align} - -\end{comment} diff --git a/theory/Multipoles/potential_softening.tex b/theory/Multipoles/potential_softening.tex index 4041bdad3cdfd470c90a46739f487caa4787892d..47fe4f392bf9bd11487f270421fba3d0dff69af0 100644 --- a/theory/Multipoles/potential_softening.tex +++ b/theory/Multipoles/potential_softening.tex @@ -2,18 +2,17 @@ \label{ssec:potential_softening} To avoid artificial two-body relaxation, the Dirac -$\delta$-distribution of particles is convolved with a softening -kernel of a given fixed, but time-variable, scale-length -$\epsilon$. Instead of the commonly used spline kernel of +$\delta$-distribution corresponding to each particle is convolved with +a softening kernel of a given fixed, but time-variable, scale-length +$H$. Instead of the commonly used spline kernel of \cite{Monaghan1985} (e.g. in \textsc{Gadget}), we use a C2 kernel \citep{Wendland1995} which leads to an expression for the force that is cheaper to compute and has a very similar overall shape. The C2 kernel has the advantage of being branch-free leading to an expression which is faster to evaluate using vector units available on modern architectures; it also does not require any divisions to evaluate the -softened forces. We set -$\tilde\delta(\mathbf{x}) = \rho(|\mathbf{x}|) = W(|\mathbf{x}|, -3\epsilon_{\rm Plummer})$, with $W(r, H)$ given by +softened forces. We set $\tilde\delta(\mathbf{r}) = \rho(|\mathbf{r}|) += W(|\mathbf{r}|, 3\epsilon_{\rm Plummer})$, with $W(r, H)$ given by \begin{align} W(r,H) &= \frac{21}{2\pi H^3} \times \nonumber \\ @@ -25,34 +24,65 @@ W(r,H) &= \frac{21}{2\pi H^3} \times \nonumber \\ \end{align} and $u = r/H$. The potential $\varphi(r,H)$ corresponding to this density distribution reads \begin{align} -\varphi = +\varphi(r,H) = \left\lbrace\begin{array}{rcl} -\frac{1}{H} (-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3) & \mbox{if} & u < 1,\\ -\frac{1}{r} & \mbox{if} & u \geq 1. +f(\frac{r}{H}) \times H^{-1} & \mbox{if} & r < H,\\ +r^{-1} & \mbox{if} & r \geq H, \end{array} \right. \label{eq:fmm:potential} \end{align} - -These choices, lead to a potential at $|\mathbf{x}| = 0$ equal to the -central potential of a Plummer sphere (i.e. -$\varphi(0) = 1/\epsilon_{\rm Plummer}$)\footnote{Note the factor $3$ - in the definition of $\rho(|\mathbf{x}|)$ which differs from the - factor $2.8$ used in \textsc{Gadget} as a consequence of the change - of kernel shape.}. The softened density profile, its corresponding -potential and resulting forces are shown on -Fig. \ref{fig:fmm:softening} (for details of these are obtained see -section 2 of~\cite{Price2007}). For comparison purposes, we also -implemented the more traditional spline-kernel softening in \swift. - - +with $f(u) \equiv -3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3$. These +choices lead to a potential at $|\mathbf{x}| = 0$ equal to the central +potential of a Plummer sphere (i.e. $\varphi(0) = 1/\epsilon_{\rm + Plummer}$)\footnote{Note the factor $3$ in the definition of + $\rho(|\mathbf{x}|)$ which differs from the factor $2.8$ used for + the cubic spline kernel as a consequence of the change of the functional + form of $W$.}. From this expression the softened gravitational force can +be easily obtained: +\begin{align} +\mathbf{\nabla}\varphi(r,H) = \mathbf{r} \cdot +\left\lbrace\begin{array}{rcl} +g(\frac{r}{H}) \times H^{-3} & \mbox{if} & r < H,\\ +r^{-3} & \mbox{if} & r \geq H, +\end{array} +\right. +\label{eq:fmm:force} +\end{align} +with $g(u) \equiv f'(u)/u = -21u^5+90u^4-140u^3+84u^2-14$. This last +expression has the advantage of not containing any divisions or +branching (besides the always necessary check for $r<H$), making it +faster to evaluate than the softened force derived from the +\cite{Monaghan1985} spline kernel. Note also, the useful expression +for the norm of the forces: +\begin{align} +|\mathbf{\nabla}\varphi(r,H)| = +\left\lbrace\begin{array}{rcl} +f'(\frac{r}{H}) \times H^{-2} & \mbox{if} & r < H,\\ +r^{-2} & \mbox{if} & r \geq H. +\end{array} +\right. +\label{eq:fmm:force_norm} +\end{align} +The softened density profile, its corresponding potential and +resulting forces are shown on Fig. \ref{fig:fmm:softening} (for more +details about how these are constructed see section 2 +of~\cite{Price2007}). For comparison purposes, we also implemented the +more traditional spline-kernel softening in \swift. \begin{figure} \includegraphics[width=\columnwidth]{potential.pdf} \caption{The density (top), potential (middle) and forces (bottom) generated py a point mass in our softened gravitational scheme. A Plummer-equivalent sphere is shown for comparison. The spline kernel - of \citet{Monaghan1985}, used for instance in \textsc{Gadget}, is - shown for comparison but note that it has not been re-scaled to - match the Plummer-sphere potential at $r=0$. } + of \citet{Monaghan1985} is also depicted but note that it has not + been normalised to match the Plummer-sphere potential at $r=0$ (as + is done in simulations) but rather normalised to the Newtonian + potential at $r=H$ to better highlight the differences in shapes.} \label{fig:fmm:softening} \end{figure} +Users specify the value of the Plummer-equivalent softening +$\epsilon_{\rm Plummer}$ in the parameter file. + +\subsubsection{Interaction of bodies with different softening lengths} + +\textcolor{red}{MORE WORDS HERE.}\\ diff --git a/theory/Multipoles/run.sh b/theory/Multipoles/run.sh index eaaa9bc94d78d7cb3f55d7e669314aae24306d68..e3e6e6738b35af9b6cf2c2fa6a4fb3de8a6b4e34 100755 --- a/theory/Multipoles/run.sh +++ b/theory/Multipoles/run.sh @@ -2,17 +2,22 @@ if [ ! -e potential.pdf ] then echo "Generating 1st figure..." - python plot_potential.py + python3 plot_potential.py fi if [ ! -e potential_short.pdf ] then echo "Generating 2nd figures..." - python plot_mesh.py + python3 plot_mesh.py fi if [ ! -e alpha_powers.pdf ] then echo "Generating derivative figures..." - python plot_derivatives.py + python3 plot_derivatives.py +fi +if [ ! -e mac_potential.pdf ] +then + echo "Generating derivative figures..." + python3 plot_mac_potential.py fi echo "Generating PDF..." pdflatex -jobname=fmm fmm_standalone.tex