diff --git a/.gitignore b/.gitignore index 775f2d24b64ecda6a036b0d7b4b2ea62a1a24210..c29fa3e3a48e9846b5c7c422b746589cb740802d 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ examples/*/*/*.rst examples/*/*/*.hdf5 examples/*/*/*.csv examples/*/*/*.dot +examples/*/*/cell_hierarchy.html examples/*/*/energy.txt examples/*/*/task_level.txt examples/*/*/timesteps_*.txt diff --git a/Makefile.am b/Makefile.am index c71cc8d00c797f0e2afc034cb1abfff7eba14c88..40ba64dcdd1c7270712288bce938ab56e918694d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -23,6 +23,9 @@ SUBDIRS = src argparse examples doc tests tools if HAVEEAGLECOOLING SUBDIRS += examples/Cooling/CoolingRates endif +if HAVELOGGER +SUBDIRS += logger +endif # Non-standard files that should be part of the distribution. EXTRA_DIST = INSTALL.swift .clang-format format.sh diff --git a/README b/README index ee7abd5a5709c81ecef1a89c1a651a925ce2f4a9..8d722a66da5083889e0adfb5af51206509bef53d 100644 --- a/README +++ b/README @@ -71,6 +71,8 @@ Parameters: from all ranks. -y, --task-dumps=<int> Time-step frequency at which task analysis files and/or tasks are dumped. + --cell-dumps=<int> Time-step frequency at which cell graphs + are dumped. -Y, --threadpool-dumps=<int> Time-step frequency at which threadpool tasks are dumped. diff --git a/README.md b/README.md index efffc9b4c43ff8f0821c4d7d49721ff7ff5949d0..f91b03d3f6a9656e33adc3216a15ed41e7b971de 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,8 @@ Parameters: from all ranks. -y, --task-dumps=<int> Time-step frequency at which task analysis files and/or tasks are dumped. + --cell-dumps=<int> Time-step frequency at which cell graphs + are dumped. -Y, --threadpool-dumps=<int> Time-step frequency at which threadpool tasks are dumped. diff --git a/configure.ac b/configure.ac index 338edec60f956c37f666f7592a931b2c20a9f6e8..8d189c1210abf48304ca39b0fc6450323091eb7e 100644 --- a/configure.ac +++ b/configure.ac @@ -86,6 +86,7 @@ AC_ARG_ENABLE([logger], if test "$with_logger" = "yes"; then AC_DEFINE([WITH_LOGGER], 1, [logger enabled]) fi +AM_CONDITIONAL([HAVELOGGER],[test $with_logger = "yes"]) # Interprocedural optimization support. Needs special handling for linking and # archiving as well as compilation with Intels, needs to be done before @@ -996,6 +997,41 @@ fi AC_SUBST([TBBMALLOC_LIBS]) AM_CONDITIONAL([HAVETBBMALLOC],[test -n "$TBBMALLOC_LIBS"]) +# Check for python. +have_python="no" +AC_ARG_WITH([python], + [AS_HELP_STRING([--with-python=PATH], + [root directory where python is installed @<:@yes/no@:>@] + )], + [with_python="$withval"], + [with_python="no"] +) +if test "x$with_python" != "xno"; then + if test "$with_python" == ""; then + # use linux default python + with_python="/usr/" + fi + AM_PATH_PYTHON([3], [], [AC_MSG_ERROR(python not found)]) + AC_ARG_VAR([PYTHON_INCS], [Include flags for python, bypassing python-config]) + AC_ARG_VAR([PYTHON_CONFIG], [Path to python-config]) + AS_IF([test -z "$PYTHON_INCS"], [ + AS_IF([test -z "$PYTHON_CONFIG"], [ + AC_PATH_PROGS([PYTHON_CONFIG], + [python$PYTHON_VERSION-config python-config], + [no], + [`dirname $PYTHON`]) + AS_IF([test "$PYTHON_CONFIG" = no], [AC_MSG_ERROR([cannot find python-config for $PYTHON.])]) + ]) + AC_MSG_CHECKING([python include flags]) + PYTHON_INCS=`$PYTHON_CONFIG --includes` + AC_MSG_RESULT([$PYTHON_INCS]) + ]) + have_python="yes" +fi +AC_SUBST([PYTHON_INCS]) +AM_CONDITIONAL([HAVEPYTHON],[test -n "$PYTHON_INCS"]) + + # Check for HDF5. This is required. AX_LIB_HDF5 if test "$with_hdf5" != "yes"; then @@ -1991,7 +2027,7 @@ AM_CONDITIONAL([HAVEEAGLEFEEDBACK], [test $with_feedback = "EAGLE"]) # Handle .in files. AC_CONFIG_FILES([Makefile src/Makefile examples/Makefile examples/Cooling/CoolingRates/Makefile doc/Makefile doc/Doxyfile tests/Makefile]) -AC_CONFIG_FILES([argparse/Makefile tools/Makefile]) +AC_CONFIG_FILES([argparse/Makefile tools/Makefile logger/Makefile logger/tests/Makefile]) AC_CONFIG_FILES([tests/testReading.sh], [chmod +x tests/testReading.sh]) AC_CONFIG_FILES([tests/testActivePair.sh], [chmod +x tests/testActivePair.sh]) AC_CONFIG_FILES([tests/test27cells.sh], [chmod +x tests/test27cells.sh]) @@ -2044,7 +2080,6 @@ AC_MSG_RESULT([ CPU profiler : $have_profiler Pthread barriers : $have_pthread_barrier VELOCIraptor enabled : $have_velociraptor - Particle Logger : $with_logger FoF activated: : $enable_fof Hydro scheme : $with_hydro @@ -2082,4 +2117,7 @@ AC_MSG_RESULT([ Custom icbrtf : $enable_custom_icbrtf Boundary particles : $boundary_particles + Particle Logger : $with_logger + Python enabled : $have_python + ------------------------]) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index c6b3046d2d3591c937dfd98cf75fb7697b90110f..94424f644e2f9e6dc4c436a42423ba667186e02b 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -777,6 +777,7 @@ INPUT += @top_srcdir@/src/tracers/EAGLE INPUT += @top_srcdir@/src/stars/EAGLE INPUT += @top_srcdir@/src/feedback/EAGLE INPUT += @top_srcdir@/src/black_holes/EAGLE +INPUT += @top_srcdir@/logger # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/doc/RTD/source/AnalysisTools/index.rst b/doc/RTD/source/AnalysisTools/index.rst index f7f2f979666270ce371b532b6eab7bad3a23c9bd..8b4467f5f36a5e07f0b5446f4f590b2643990731 100644 --- a/doc/RTD/source/AnalysisTools/index.rst +++ b/doc/RTD/source/AnalysisTools/index.rst @@ -21,11 +21,13 @@ Cell graph ---------- An interactive graph of the cells is available with the configuration option ``--enable-cell-graph``. -During a run, SWIFT will generate a ``cell_hierarchy_*.csv`` file per MPI rank. -The command ``tools/make_cell_hierarchy.sh cell_hierarchy_*.csv`` merges the files together and generates the file ``cell_hierarchy.html`` +During a run, SWIFT will generate a ``cell_hierarchy_*.csv`` file per MPI rank at the frequency given by the parameter ``--cell-dumps=n``. +The command ``tools/make_cell_hierarchy.sh cell_hierarchy_0000_*.csv`` merges the files at time step 0 together and generates the file ``cell_hierarchy.html`` that contains the graph and can be read with your favorite web browser. -With chrome, you cannot access the files directly, you will need to either access them through an existing server (e.g. public http provided by your university) +With most web browsers, you cannot access the files directly. +If it is the case, the cells will never appear (but everything else should be fine). +To solve this problem, you will need to either access them through an existing server (e.g. public http provided by your university) or install ``npm`` and then run the following commands .. code-block:: bash @@ -34,6 +36,14 @@ or install ``npm`` and then run the following commands http-server . Now you can open the web page ``http://localhost:8080/cell_hierarchy.html``. +When running a large simulation, the data loading may take a while (a few seconds for EAGLE_6). +Your browser should not be hanging, but will seems to be idle. + +If you wish to add some information to the graph, you can do it by modifying the files ``src/space.c`` and ``tools/data/cell_hierarchy.html``. +In the first one, you will need to modify the calls to ``fprintf`` in the functions ``space_write_cell_hierarchy`` and ``space_write_cell``. +Here the code is simply writing CSV files containing all the required information about the cells. +In the second one, you will need to find the function ``mouseover`` and add the field that you have created. +You can also increase the size of the bubble through the style parameter ``height``. Memory usage reports -------------------- diff --git a/doc/RTD/source/CommandLineOptions/index.rst b/doc/RTD/source/CommandLineOptions/index.rst index 1144477548062bb61e47a88d3a1ee062b89b97cf..5251b36f7394465c59577932155544a755c0ee43 100644 --- a/doc/RTD/source/CommandLineOptions/index.rst +++ b/doc/RTD/source/CommandLineOptions/index.rst @@ -11,7 +11,10 @@ For instance, just running the ``swift`` binary will not use any SPH or gravity; the particles will just sit still! Below is a list of the command line options and when they should be used. The same list -can be found by typing ``./swift -h``:: +can be found by typing ``./swift -h``: + +.. code-block:: none + -h, --help show this help message and exit @@ -65,5 +68,7 @@ can be found by typing ``./swift -h``:: from all ranks. -y, --task-dumps=<int> Time-step frequency at which task analysis files and/or tasks are dumped. + --cell-dumps=<int> Time-step frequency at which cell graphs + are dumped. -Y, --threadpool-dumps=<int> Time-step frequency at which threadpool tasks are dumped. diff --git a/doc/RTD/source/ParameterFiles/parameter_description.rst b/doc/RTD/source/ParameterFiles/parameter_description.rst index d3a79588f148cfb0b84fc533c7e77cc29891f1bd..4107bb836b20840a2b0fe4473fc816b6cf1dca90 100644 --- a/doc/RTD/source/ParameterFiles/parameter_description.rst +++ b/doc/RTD/source/ParameterFiles/parameter_description.rst @@ -370,6 +370,45 @@ The full section to start a typical cosmological run would be: H_mass_fraction: 0.755 H_ionization_temperature: 1e4 +.. _Parameters_Stars: + +Stars +----- + +The ``Stars`` section is used to set parameters that describe the Stars +calculations when doing feedback or enrichment. Note that if stars only act +gravitationally (i.e. SWIFT is run *without* ``--feedback``) no parameters +in this section are used. + +The first four parameters are related to the neighbour search: + +* The (relative) tolerance to converge smoothing lengths within: + ``h_tolerance`` (Default: same as SPH scheme) +* The maximal smoothing length in internal units: ``h_max`` (Default: same + as SPH scheme) +* The minimal allowed smoothing length in terms of the gravitational + softening: ``h_min_ratio`` (Default: same as SPH scheme) +* The maximal (relative) allowed change in volume over one time-step: + ``max_volume_change`` (Default: same as SPH scheme) + +These four parameters are optional and will default to their SPH equivalent +if left unspecified. That is the value specified by the user in that +section or the default SPH value if left unspecified there as well. + +The two remaining parameters can be used to overwrite the birth time (or +scale-factor) of the stars that were read from the ICs. This can be useful +to start a simulation with stars already of a given age. The parameters +are: + +* Whether or not to overwrite anything: ``overwrite_birth_time`` + (Default: 0) +* The value to use: ``birth_time`` + +If the birth time is set to ``-1`` then the stars will never enter any +feedback or enrichment loop. When these values are not specified, SWIFT +will start and use the birth times specified in the ICs. If no values are +given in the ICs, the stars' birth times will be zeroed, which can cause +issues depending on the type of run performed. .. _Parameters_time_integration: diff --git a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml index 93c8c740f58efb23a017a7d229f81a685e837b1a..ad20f401d26bde02a6a44299843b25e07a8f83d9 100644 --- a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml @@ -38,7 +38,7 @@ Statistics: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) + theta: 0.5 # Opening angle (Multipole acceptance criterion) mesh_side_length: 64 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). @@ -53,6 +53,11 @@ SPH: minimal_temperature: 100.0 # (internal units) initial_temperature: 268.7 +# Parameters of the stars neighbour search +Stars: + resolution_eta: 1.1642 # Target smoothing length in units of the mean inter-particle separation + h_tolerance: 7e-3 + # Parameters for the Friends-Of-Friends algorithm FOF: basename: fof_output # Filename for the FOF outputs. @@ -64,7 +69,6 @@ FOF: Scheduler: max_top_level_cells: 16 - cell_split_size: 100 tasks_per_cell: 5 Restarts: diff --git a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml index d5307533c0ffccae7644f06dbe33c27bf46f4114..fd902d27daaca3c6c3ca9c5d52fbf43b1283c581 100644 --- a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml @@ -38,7 +38,7 @@ Statistics: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) + theta: 0.5 # Opening angle (Multipole acceptance criterion) mesh_side_length: 128 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). @@ -53,6 +53,11 @@ SPH: minimal_temperature: 100.0 # (internal units) initial_temperature: 268.7 +# Parameters of the stars neighbour search +Stars: + resolution_eta: 1.1642 # Target smoothing length in units of the mean inter-particle separation + h_tolerance: 7e-3 + # Parameters for the Friends-Of-Friends algorithm FOF: basename: fof_output # Filename for the FOF outputs. @@ -64,7 +69,6 @@ FOF: Scheduler: max_top_level_cells: 16 - cell_split_size: 100 tasks_per_cell: 5 Restarts: diff --git a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml index f757dc9dcf104237c6ecc5e472d29f79375a1d53..3091fb0be35111f0e6046fd99f0c426840d00231 100644 --- a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml +++ b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml @@ -38,7 +38,7 @@ Statistics: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) + theta: 0.5 # Opening angle (Multipole acceptance criterion) mesh_side_length: 256 comoving_DM_softening: 0.0026994 # Comoving DM softening length (in internal units). max_physical_DM_softening: 0.0007 # Max physical DM softening length (in internal units). @@ -53,6 +53,11 @@ SPH: minimal_temperature: 100.0 # (internal units) initial_temperature: 268.7 +# Parameters of the stars neighbour search +Stars: + resolution_eta: 1.1642 # Target smoothing length in units of the mean inter-particle separation + h_tolerance: 7e-3 + # Parameters for the Friends-Of-Friends algorithm FOF: basename: fof_output # Filename for the FOF outputs. @@ -64,7 +69,6 @@ FOF: Scheduler: max_top_level_cells: 32 - cell_split_size: 100 tasks_per_cell: 5 Restarts: diff --git a/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml b/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml index f77036b5d55f33b4fd3f42c7bea0ccc124003a40..0cc97babbd2b89a7507808bfcad2648e0c03ce47 100644 --- a/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml +++ b/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml @@ -57,6 +57,13 @@ SPH: h_min_ratio: 0.1 # Minimal smoothing in units of softening. CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. minimal_temperature: 100 # (internal units) + overwrite_birth_time: 1 + birth_time: 0.33333 # Pretend all the stars were born at z = 2 + +# Parameters of the stars neighbour search +Stars: + resolution_eta: 1.1642 # Target smoothing length in units of the mean inter-particle separation + h_tolerance: 7e-3 # Parameters related to the initial conditions InitialConditions: diff --git a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml index f470c691a5a76207998f6d854f6e8d44f0a1aebb..73f4e1a8d4269567d4139af6b992754d17494d3d 100644 --- a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml @@ -59,6 +59,13 @@ SPH: CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. minimal_temperature: 100 # (internal units) +# Parameters of the stars neighbour search +Stars: + resolution_eta: 1.1642 # Target smoothing length in units of the mean inter-particle separation + h_tolerance: 7e-3 + overwrite_birth_time: 1 + birth_time: 0.33333 # Pretend all the stars were born at z = 2 + # Parameters for the Friends-Of-Friends algorithm FOF: basename: fof_output # Filename for the FOF outputs. diff --git a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml index 07685bf783b34b2872df4a32610fa791db01cded..f7a9394299fbf641a98b2ffc2d7c4bac364c164e 100644 --- a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml @@ -67,6 +67,13 @@ SPH: CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. minimal_temperature: 100 # (internal units) +# Parameters of the stars neighbour search +Stars: + resolution_eta: 1.1642 # Target smoothing length in units of the mean inter-particle separation + h_tolerance: 7e-3 + overwrite_birth_time: 1 + birth_time: 0.33333 # Pretend all the stars were born at z = 2 + # Parameters for the Friends-Of-Friends algorithm FOF: basename: fof_output # Filename for the FOF outputs. diff --git a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml index 943c64c7a29fd87b9b5f78a4edded6b14e0f3c57..ea46a9ad677d8e37ec48a83645a4501e8bdc842f 100644 --- a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml +++ b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml @@ -58,6 +58,13 @@ SPH: CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. minimal_temperature: 100 # (internal units) +# Parameters of the stars neighbour search +Stars: + resolution_eta: 1.1642 # Target smoothing length in units of the mean inter-particle separation + h_tolerance: 7e-3 + overwrite_birth_time: 1 + birth_time: 0.33333 # Pretend all the stars were born at z = 2 + # Parameters for the Friends-Of-Friends algorithm FOF: basename: fof_output # Filename for the FOF outputs. diff --git a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml index 48a825750fd2a927ba08dfc5a8a4607a490fe0d8..27082dd0b881279c6631dfdc1edb0ac8ea3d07c6 100644 --- a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml +++ b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml @@ -68,6 +68,13 @@ SPH: CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. minimal_temperature: 100 # (internal units) +# Parameters of the stars neighbour search +Stars: + resolution_eta: 1.1642 # Target smoothing length in units of the mean inter-particle separation + h_tolerance: 7e-3 + overwrite_birth_time: 1 + birth_time: 0.33333 # Pretend all the stars were born at z = 2 + # Parameters for the Friends-Of-Friends algorithm FOF: basename: fof_output # Filename for the FOF outputs. diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml index 79fe5682692347127081f021ed6930df57bbfa02..dcd580243c51b0cbfb24c684709e1e511829f089 100644 --- a/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml +++ b/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml @@ -45,6 +45,11 @@ SPH: h_max: 10. minimal_temperature: 100. +# Parameters for the stars neighbour search +Stars: + overwrite_birth_time: 1 # Make sure the stars in the ICs do not do any feedback + birth_time: -1. # by setting all of their birth times to -1 + # Standard EAGLE cooling options EAGLECooling: dir_name: ./coolingtables/ # Location of the Wiersma+08 cooling tables diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml index d917f926724c022cd15524058ddde2a7466acaab..fe57f693b0fcba6d8bc70c0fddf2d9dce2e60b99 100644 --- a/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml +++ b/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml @@ -45,6 +45,11 @@ SPH: h_max: 10. minimal_temperature: 10. # Kelvin +# Parameters for the stars neighbour search +Stars: + overwrite_birth_time: 1 # Make sure the stars in the ICs do not do any feedback + birth_time: -1. # by setting all of their birth times to -1 + # Standard EAGLE cooling options EAGLECooling: dir_name: ./coolingtables/ # Location of the Wiersma+08 cooling tables diff --git a/examples/SubgridTests/CosmologicalStellarEvolution/stellar_evolution.yml b/examples/SubgridTests/CosmologicalStellarEvolution/stellar_evolution.yml index 9b8c3e34dad20eba560a7316f16364b76a088c05..b3d318d68b69d0940d7a37b17ae5331a711b140f 100644 --- a/examples/SubgridTests/CosmologicalStellarEvolution/stellar_evolution.yml +++ b/examples/SubgridTests/CosmologicalStellarEvolution/stellar_evolution.yml @@ -40,7 +40,8 @@ SPH: # Properties of the stars Stars: - birth_time: 0.00991 # Give the star in the ICs a decent birth time + overwrite_birth_time: 1 + birth_time: 0.00991 # Give the star in the ICs a decent birth time # Parameters related to the initial conditions InitialConditions: diff --git a/examples/SubgridTests/StellarEvolution/stellar_evolution.yml b/examples/SubgridTests/StellarEvolution/stellar_evolution.yml index 230ce6c8b8a51603c1dab9a308845be3e984febb..63c7a4d2624793af26bdbaf628715243e2ab511d 100644 --- a/examples/SubgridTests/StellarEvolution/stellar_evolution.yml +++ b/examples/SubgridTests/StellarEvolution/stellar_evolution.yml @@ -34,7 +34,8 @@ SPH: # Properties of the stars Stars: - birth_time: 0. # Give the star in the ICs a decent birth time + overwrite_birth_time: 1 + birth_time: 0. # Give the star in the ICs a decent birth time # Parameters related to the initial conditions InitialConditions: diff --git a/examples/main.c b/examples/main.c index 27af0897a4bdd12287fd0460579a4eb9ea3f08c1..9f9c0a471370a208251fe1c3628d3d980b476af4 100644 --- a/examples/main.c +++ b/examples/main.c @@ -148,6 +148,7 @@ int main(int argc, char *argv[]) { int with_aff = 0; int dry_run = 0; int dump_tasks = 0; + int dump_cells = 0; int dump_threadpool = 0; int nsteps = -2; int restart = 0; @@ -263,6 +264,9 @@ int main(int argc, char *argv[]) { OPT_INTEGER('y', "task-dumps", &dump_tasks, "Time-step frequency at which task graphs are dumped.", NULL, 0, 0), + OPT_INTEGER(0, "cell-dumps", &dump_cells, + "Time-step frequency at which cell graphs are dumped.", NULL, + 0, 0), OPT_INTEGER('Y', "threadpool-dumps", &dump_threadpool, "Time-step frequency at which threadpool tasks are dumped.", NULL, 0, 0), @@ -323,6 +327,16 @@ int main(int argc, char *argv[]) { } #endif +#ifndef SWIFT_CELL_GRAPH + if (dump_cells) { + if (myrank == 0) { + error( + "complete cell dumps are only created when " + "configured with --enable-cell-graph."); + } + } +#endif + #ifndef SWIFT_DEBUG_THREADPOOL if (dump_threadpool) { printf( @@ -542,9 +556,12 @@ int main(int argc, char *argv[]) { if (with_mpole_reconstruction && nr_nodes > 1) error("Cannot reconstruct m-poles every step over MPI (yet)."); if (with_limiter) error("Can't run with time-step limiter over MPI (yet)"); +#ifdef WITH_LOGGER + error("Can't run with the particle logger over MPI (yet)"); +#endif #endif - /* Temporary early aborts for modes not supported with hand-vec. */ + /* Temporary early aborts for modes not supported with hand-vec. */ #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \ !defined(CHEMISTRY_NONE) error( @@ -1261,6 +1278,13 @@ int main(int argc, char *argv[]) { task_dump_stats(dumpfile, &e, /* header = */ 0, /* allranks = */ 1); } +#ifdef SWIFT_CELL_GRAPH + /* Dump the cell data using the given frequency. */ + if (dump_cells && (dump_cells == 1 || j % dump_cells == 1)) { + space_write_cell_hierarchy(e.s, j + 1); + } +#endif + /* Dump memory use report if collected. */ #ifdef SWIFT_MEMUSE_REPORTS { diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 3b2aa8b8eed305051829603b73d6fbe62048e573..c029a0a5b862a2d20188c87873f71179a45f20e8 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -54,7 +54,8 @@ Stars: h_tolerance: 1e-4 # (Optional) Relative accuracy of the Netwon-Raphson scheme for the smoothing lengths. Defaults to the SPH value. max_ghost_iterations: 30 # (Optional) Maximal number of iterations allowed to converge towards the smoothing length. Defaults to the SPH value. max_volume_change: 1.4 # (Optional) Maximal allowed change of kernel volume over one time-step. Defaults to the SPH value. - birth_time: -1 # (Optional) Initial birth time of *all* the stars. If not -1, this value will overwrite all the values read from the ICs. + overwrite_birth_time: 0 # (Optional) Do we want to overwrite the birth time of the stars read from the ICs? (default: 0). + birth_time: -1 # (Optional) Initial birth times of *all* the stars to be used if we are overwriting them. (-1 means the stars remain inactive feedback-wise througout the run). # Parameters for the self-gravity scheme Gravity: @@ -134,9 +135,9 @@ Snapshots: # Parameters governing the logger snapshot system Logger: delta_step: 10 # Update the particle log every this many updates - initial_buffer_size: 1 # buffer size in GB - buffer_scale: 10 # (Optional) When buffer size is too small, update it with required memory times buffer_scale basename: index # Common part of the filenames + initial_buffer_size: 1 # (Optional) Buffer size in GB + buffer_scale: 10 # (Optional) When buffer size is too small, update it with required memory times buffer_scale # Parameters governing the conserved quantities statistics Statistics: diff --git a/logger/Makefile.am b/logger/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..3bfd5af848c504d50fe201e02f49186287fbfb5a --- /dev/null +++ b/logger/Makefile.am @@ -0,0 +1,73 @@ +# This file is part of SWIFT. +# Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), +# Matthieu Schaller (matthieu.schaller@durham.ac.uk). +# Loic Hausammann (loic.hausammann@epfl.ch) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Add the non-standard paths to the included library headers +AM_CFLAGS = $(PYTHON_INCS) -I$(top_srcdir)/src $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(GRACKLE_INCS) + + +AM_LDFLAGS = $(HDF5_LDFLAGS) + +# Assign a "safe" version number +BIN_LDFLAGS = -version-info 0:0:0 + +# The git command, if available. +GIT_CMD = @GIT_CMD@ + +# Additional dependencies for shared libraries. +EXTRA_LIBS = $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(HDF5_LIBS) $(FFTW_LIBS) $(GRACKLE_LIBS) \ + $(VELOCIRAPTOR_LIBS) $(GSL_LIBS) -L../src/.libs -lswiftsim + +# MPI libraries. +# MPI_LIBS = $(MPI_THREAD_LIBS) +# MPI_FLAGS = -DWITH_MPI + +# Build the liblogger library +lib_LTLIBRARIES = liblogger.la +# Build a MPI-enabled version too? +# if HAVEMPI +# lib_LTLIBRARIES += liblogger_mpi.la +# endif + +# subdirectories +SUBDIRS = tests + +# List required headers +include_HEADERS = logger_header.h logger_loader_io.h logger_particle.h logger_time.h logger_tools.h logger_reader.h \ + logger_logfile.h + +# Common source files +AM_SOURCES = logger_header.c logger_loader_io.c logger_particle.c logger_time.c logger_tools.c logger_reader.c \ + logger_logfile.c +if HAVEPYTHON +AM_SOURCES += logger_python_wrapper.c +endif + +# Include files for distribution, not installation. +nobase_noinst_HEADERS = + +# Sources and flags for regular library +liblogger_la_SOURCES = $(AM_SOURCES) +liblogger_la_CFLAGS = $(AM_CFLAGS) +liblogger_la_LDFLAGS = $(AM_LDFLAGS) $(EXTRA_LIBS) $(BIN_LDFLAGS) + +# Sources and flags for MPI library +# liblogger_mpi_la_SOURCES = $(AM_SOURCES) +# liblogger_mpi_la_CFLAGS = $(AM_CFLAGS) $(MPI_FLAGS) +# liblogger_mpi_la_LDFLAGS = $(AM_LDFLAGS) $(MPI_LIBS) $(EXTRA_LIBS) +# liblogger_mpi_la_SHORTNAME = mpi +# liblogger_mpi_la_LIBADD = diff --git a/logger/logger_header.c b/logger/logger_header.c new file mode 100644 index 0000000000000000000000000000000000000000..61e5da246c9aa07eeeb42e751832f017fa04ca0a --- /dev/null +++ b/logger/logger_header.c @@ -0,0 +1,196 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "logger_header.h" + +#include "logger_loader_io.h" +#include "logger_logfile.h" +#include "logger_reader.h" +#include "logger_tools.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* Name of each offset direction. */ +const char *logger_offset_name[logger_offset_count] = { + "Forward", + "Backward", + "Corrupted", +}; + +/** + * @brief Print the properties of the header to stdout. + * + * @param h The #header. + */ +void header_print(const struct header *h) { +#ifdef SWIFT_DEBUG_CHECKS + message("Debug checks enabled."); +#endif + message("First Offset: %lu.", h->offset_first_record); + message("Offset direction: %s.", logger_offset_name[h->offset_direction]); + message("Number masks: %i.", h->number_mask); + + for (size_t i = 0; i < h->number_mask; i++) { + message(" Mask: %s.", h->masks[i].name); + message(" Value: %u.", h->masks[i].mask); + message(" Size: %i.", h->masks[i].size); + message(""); + } +}; + +/** + * @brief free the allocated memory. + * + * @param h The #header. + */ +void header_free(struct header *h) { free(h->masks); }; + +/** + * @brief Check if a field is present in the header. + * + * @param h The #header. + * @param field name of the requested field. + * @return Index of the field (-1 if not found). + */ +int header_get_field_index(const struct header *h, const char *field) { + for (size_t i = 0; i < h->number_mask; i++) { + if (strcmp(h->masks[i].name, field) == 0) { + return i; + } + } + + return -1; +}; + +/** + * @brief Update the offset direction in the structure and + * write it to the logfile. + * + * @param h #header file structure. + * @param new_value The new value to write. + * + */ +void header_change_offset_direction(struct header *h, + enum logger_offset_direction new_value) { + h->offset_direction = new_value; + /* Skip file format and version numbers. */ + size_t offset = LOGGER_VERSION_SIZE + 2 * sizeof(int); + + logger_loader_io_write_data(h->log->log.map + offset, sizeof(unsigned int), + &new_value); +} + +/** + * @brief read the logger header. + * + * @param h out: The #header. + * @param log The #logger_logfile. + */ +void header_read(struct header *h, struct logger_logfile *log) { + void *map = log->log.map; + + /* Set pointer to log. */ + h->log = log; + + /* read the file format. */ + char file_format[STRING_SIZE]; + map = logger_loader_io_read_data(map, LOGGER_VERSION_SIZE, &file_format); + if (strcmp(file_format, "SWIFT_LOGGER")) + error("Wrong file format (%s).", file_format); + + /* Read the major version number. */ + map = logger_loader_io_read_data(map, sizeof(int), &h->major_version); + + /* Read the minor version number. */ + map = logger_loader_io_read_data(map, sizeof(int), &h->minor_version); + + struct logger_reader *reader = log->reader; + if (&reader->log != log) error("Wrong link to the reader."); + + if (reader->verbose > 0) + message("File version %i.%i.", h->major_version, h->minor_version); + + /* Read the offset directions. */ + map = logger_loader_io_read_data(map, sizeof(int), &h->offset_direction); + + if (!header_is_forward(h) && !header_is_backward(h) && + !header_is_corrupted(h)) + error("Wrong offset value in the header (%i).", h->offset_direction); + + /* Read offset to first record. */ + map = logger_loader_io_read_data(map, LOGGER_OFFSET_SIZE, + &h->offset_first_record); + + /* Read the size of the strings. */ + map = + logger_loader_io_read_data(map, sizeof(unsigned int), &h->string_length); + + /* Check if value defined in this file is large enough. */ + if (STRING_SIZE < h->string_length) { + error("Name too large in log file %i.", h->string_length); + } + + /* Read the number of masks. */ + map = logger_loader_io_read_data(map, sizeof(unsigned int), &h->number_mask); + + /* Allocate the masks memory. */ + h->masks = malloc(sizeof(struct mask_data) * h->number_mask); + + /* Loop over all masks. */ + for (size_t i = 0; i < h->number_mask; i++) { + /* Read the mask name. */ + map = logger_loader_io_read_data(map, h->string_length, h->masks[i].name); + + /* Set the mask value. */ + h->masks[i].mask = 1 << i; + + /* Read the mask data size. */ + map = logger_loader_io_read_data(map, sizeof(unsigned int), + &h->masks[i].size); + } + + /* Check the logfile header's size. */ + if (map != log->log.map + h->offset_first_record) { + header_print(h); + size_t offset = map - log->log.map; + error("Wrong header size (in header %zi, current %zi).", + h->offset_first_record, offset); + } +}; + +/** + * @brief Count number of bits in a given mask (without the record header). + * + * @param h #header file structure. + * @param mask Mask to compute. + * + * @return number of bits in mask. + */ +size_t header_get_record_size_from_mask(const struct header *h, + const size_t mask) { + size_t count = 0; + /* Loop over each masks. */ + for (size_t i = 0; i < h->number_mask; i++) { + if (mask & h->masks[i].mask) { + count += h->masks[i].size; + } + } + return count; +} diff --git a/logger/logger_header.h b/logger/logger_header.h new file mode 100644 index 0000000000000000000000000000000000000000..c388ef65cda21d00f53ddc54e97f43671edf1aeb --- /dev/null +++ b/logger/logger_header.h @@ -0,0 +1,119 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef LOGGER_LOGGER_HEADER_H +#define LOGGER_LOGGER_HEADER_H + +#include "logger_tools.h" + +#include <stdio.h> +#include <stdlib.h> + +#define LOGGER_VERSION_SIZE 20 +#define LOGGER_OFFSET_SIZE 7 +#define LOGGER_MASK_SIZE 1 + +enum logger_offset_direction { + logger_offset_backward = 0, + logger_offset_forward, + logger_offset_corrupted, + /* Number of offset type. */ + logger_offset_count, +}; + +/** + * @brief Names of the offset directions. + */ +extern const char *logger_offset_name[]; + +struct logger_logfile; + +/** + * @brief This structure contains everything from the file header. + * + * This structure is initialized by #header_read and need to be freed + * with #header_free. + * + * The information contained by the header can be easily access with + * the functions #header_get_record_size_from_mask and #header_get_field_index. + * + * The only function that modify the file is #header_change_offset_direction. + */ +struct header { + /* Dump's major version. */ + int major_version; + + /* Dump's minor version. */ + int minor_version; + + /* Offset of the first record. */ + size_t offset_first_record; + + /* Number of bytes for strings. */ + unsigned int string_length; + + /* Number of masks. */ + unsigned int number_mask; + + /* List of masks. */ + struct mask_data *masks; + + /* Direction of the offset in the records. */ + enum logger_offset_direction offset_direction; + + /* The corresponding log. */ + struct logger_logfile *log; +}; + +void header_print(const struct header *h); +void header_free(struct header *h); +int header_get_field_index(const struct header *h, const char *field); +void header_read(struct header *h, struct logger_logfile *log); +size_t header_get_record_size_from_mask(const struct header *h, + const size_t mask); +void header_change_offset_direction(struct header *h, + enum logger_offset_direction new_value); + +/** + * @brief Check if the offset are forward. + * @param h The #header. + */ +__attribute__((always_inline)) INLINE static int header_is_forward( + const struct header *h) { + return h->offset_direction == logger_offset_forward; +} + +/** + * @brief Check if the offset are backward. + * @param h The #header. + */ +__attribute__((always_inline)) INLINE static int header_is_backward( + const struct header *h) { + return h->offset_direction == logger_offset_backward; +} + +/** + * @brief Check if the offset are corrupted. + * @param h The #header. + */ +__attribute__((always_inline)) INLINE static int header_is_corrupted( + const struct header *h) { + return h->offset_direction == logger_offset_corrupted; +} + +#endif // LOGGER_LOGGER_HEADER_H diff --git a/logger/logger_loader_io.c b/logger/logger_loader_io.c new file mode 100644 index 0000000000000000000000000000000000000000..f18f9bb7eb2eaf88ba11eaf916c0a68a27cfd2d2 --- /dev/null +++ b/logger/logger_loader_io.c @@ -0,0 +1,95 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "logger_header.h" +#include "logger_loader_io.h" +#include "logger_tools.h" + +/** + * @brief get the size of a file. + * + * @param fd file id. + * + * @return file size. + */ +size_t logger_loader_io_get_file_size(int fd) { + struct stat s; + int status = fstat(fd, &s); + if (status != 0) error("Unable to get file size (%s).", strerror(errno)); + return s.st_size; +} + +/** + * @brief Map a file. + * + * #logger_loader_io_munmap_file should be called to unmap the file. + * + * @param filename file to read. + * @param file_size (out) size of the file. + * @param read_only Open the file in read only mode? + * + */ +void *logger_loader_io_mmap_file(char *filename, size_t *file_size, + int read_only) { + /* open the file. */ + int fd; + + if (read_only) + fd = open(filename, O_RDONLY); + else + fd = open(filename, O_RDWR); + + if (fd == -1) + error("Unable to open file %s (%s).", filename, strerror(errno)); + + /* get the file size. */ + *file_size = logger_loader_io_get_file_size(fd); + + /* map the memory. */ + int mode = PROT_READ; + if (!read_only) mode |= PROT_WRITE; + + void *map = mmap(NULL, *file_size, mode, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) + error("Failed to allocate map of size %zi bytes (%s).", *file_size, + strerror(errno)); + + /* Close the file. */ + close(fd); + + return map; +} + +/** + * @brief Unmap a file. + * + * @param map file mapping. + * @param file_size The file size. + * + */ +void logger_loader_io_munmap_file(void *map, size_t file_size) { + /* unmap the file. */ + if (munmap(map, file_size) != 0) { + error("Unable to unmap the file (%s).", strerror(errno)); + } +} diff --git a/logger/logger_loader_io.h b/logger/logger_loader_io.h new file mode 100644 index 0000000000000000000000000000000000000000..d44fea673017644306e73261afdbc6dec26948c6 --- /dev/null +++ b/logger/logger_loader_io.h @@ -0,0 +1,98 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +/** + * @file logger_loader_io.h + * @brief This file contains basic IO function. + */ +#ifndef LOGGER_LOGGER_LOADER_IO_H +#define LOGGER_LOGGER_LOADER_IO_H + +#include "logger_header.h" +#include "logger_tools.h" + +#include <stdio.h> +#include <stdlib.h> + +size_t logger_loader_io_get_file_size(int fd); +void *logger_loader_io_mmap_file(char *filename, size_t *file_size, + int read_only); +void logger_loader_io_munmap_file(void *map, size_t file_size); + +/** + * @brief read a mask with its offset. + * + * @param h #header file structure. + * @param data Pointer to the data to read. + * @param mask (output) mask read from the data. + * @param diff_offset (output) offset difference to previous/next corresponding + * record. + * + * @return memory after the record header. + */ +__attribute__((always_inline)) INLINE static void *logger_loader_io_read_mask( + const struct header *h, void *data, size_t *mask, size_t *diff_offset) { + /* read mask */ + if (mask) { + *mask = 0; + memcpy(mask, data, LOGGER_MASK_SIZE); + } + data += LOGGER_MASK_SIZE; + + /* read offset */ + if (diff_offset) { + *diff_offset = 0; + memcpy(diff_offset, data, LOGGER_OFFSET_SIZE); + } + data += LOGGER_OFFSET_SIZE; + + return data; +} + +/** + * @brief read a single value from a file. + * + * @param data Pointer to the data to read. + * @param size size of the data to read. + * @param p pointer where to store the data. + + * @return memory after the data written. + */ +__attribute__((always_inline)) INLINE static void *logger_loader_io_read_data( + void *data, const size_t size, void *p) { + memcpy(p, data, size); + return data + size; +}; + +/** + * @brief write a single value in a file. + * + * @param data Pointer to the data to read. + * @param size size of the data to write. + * @param p pointer to the data. + * + * @return memory after the data written. + */ +__attribute__((always_inline)) INLINE static void *logger_loader_io_write_data( + void *data, const size_t size, const void *p) { + memcpy(data, p, size); + + return data + size; +}; + +#endif // LOGGER_LOGGER_LOADER_IO_H diff --git a/logger/logger_logfile.c b/logger/logger_logfile.c new file mode 100644 index 0000000000000000000000000000000000000000..c70068cd24c01a5ba231e97e343a0c076dc0ecb4 --- /dev/null +++ b/logger/logger_logfile.c @@ -0,0 +1,175 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "logger_logfile.h" +#include "logger_loader_io.h" +#include "logger_reader.h" + +/** + * @brief Initialize the #logger_logfile. + * + * If required this function will also reverse the offsets. + * @param log The #logger_logfile. + * @param filename the log's filename. + * @param reader The #logger_reader. + * @param only_header Read only the header. + */ +void logger_logfile_init_from_file(struct logger_logfile *log, char *filename, + struct logger_reader *reader, + int only_header) { + + /* Set the pointer to the reader. */ + log->reader = reader; + if (&reader->log != log) error("Wrong link to the reader."); + + /* Set pointers to zero. */ + time_array_init(&log->times); + + /* Open file, map it and get its size. */ + if (reader->verbose > 1) message("Mapping the log file."); + log->log.map = logger_loader_io_mmap_file(filename, &log->log.file_size, + /* read_only */ 1); + + /* Read the header. */ + if (reader->verbose > 1) message("Reading the header."); + header_read(&log->header, log); + + /* Print the header. */ + if (reader->verbose > 0) { + header_print(&log->header); + } + + /* No need to continue if only the + header is required. */ + if (only_header) return; + + /* Check if the offset are corrupted. */ + if (header_is_corrupted(&log->header)) { + error("The offsets have been corrupted."); + } + + /* Reverse the offsets direction. */ + if (header_is_backward(&log->header)) { + logger_logfile_reverse_offset(log, filename); + } + + /* Initialize the time array. */ + if (reader->verbose > 1) message("Reading the time stamps."); + time_array_populate(&log->times, log); + + /* Print the time array. */ + if (reader->verbose > 0) { + time_array_print(&log->times); + } +} + +/** + * @brief Free the allocated memory and unmap the file. + * + * @param log The #logger_logfile. + */ +void logger_logfile_free(struct logger_logfile *log) { + logger_loader_io_munmap_file(log->log.map, log->log.file_size); + + time_array_free(&log->times); +} + +/** + * @brief Reverse offset in log file + * + * @param log The #logger_logfile + * @param filename The log's filename. + */ +void logger_logfile_reverse_offset(struct logger_logfile *log, char *filename) { + + /* Close and reopen the file in write mode. */ + logger_loader_io_munmap_file(log->log.map, log->log.file_size); + log->log.map = logger_loader_io_mmap_file(filename, &log->log.file_size, + /* read_only */ 0); + + /* Get pointers */ + struct header *header = &log->header; + const struct logger_reader *reader = log->reader; + if (&reader->log != log) error("Wrong link to the reader."); + + /* Check if the offsets need to be reversed. */ + if (!header_is_backward(header)) { + error("The offsets are already reversed."); + } + +#ifdef SWIFT_DEBUG_CHECKS + if (reader->verbose > 0) { + message("Check record's headers..."); + } + + /* check that the record offset points to another record. */ + for (size_t offset_debug = header->offset_first_record; + offset_debug < log->log.file_size; + offset_debug = tools_check_record_consistency(reader, offset_debug)) { + } + + if (reader->verbose > 0) { + message("Record's headers are correct."); + } +#endif + + message("WARNING: Modifying the logfile, do not kill the job!"); + + /* Set the offset direction to a corrupted status. */ + header_change_offset_direction(header, logger_offset_corrupted); + + if (reader->verbose > 0) { + message("Reversing offsets..."); + } + + /* reverse the record's offset. */ + for (size_t offset = header->offset_first_record; offset < log->log.file_size; + offset = tools_reverse_offset(header, log->log.map, offset)) { + } + + if (reader->verbose > 0) { + message("Reversing done."); + } + + /* Now that the offset are effectively reversed, can set the direction to + forward. */ + header_change_offset_direction(header, logger_offset_forward); + + message("WARNING: Modification done, you can now safely kill the job."); + +#ifdef SWIFT_DEBUG_CHECKS + if (reader->verbose > 0) { + message("Check record's headers..."); + } + + /* check that the record offset points to another record. */ + for (size_t offset_debug = header->offset_first_record; + offset_debug < log->log.file_size; + offset_debug = tools_check_record_consistency(reader, offset_debug)) { + } + + if (reader->verbose > 0) { + message("Record's headers are correct."); + } +#endif + + /* Close and reopen the file in read mode. */ + logger_loader_io_munmap_file(log->log.map, log->log.file_size); + log->log.map = logger_loader_io_mmap_file(filename, &log->log.file_size, + /* read_only */ 1); +} diff --git a/logger/logger_logfile.h b/logger/logger_logfile.h new file mode 100644 index 0000000000000000000000000000000000000000..0b6ef728d524bb104b83fc28b9250c51a764dfd4 --- /dev/null +++ b/logger/logger_logfile.h @@ -0,0 +1,69 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +/** + * @file logger_logfile.h + * @brief This file contains the high level function for the log. + */ +#ifndef LOGGER_LOGGER_LOGFILE_H +#define LOGGER_LOGGER_LOGFILE_H + +#include "logger_header.h" +#include "logger_time.h" + +struct logger_reader; + +/** + * @brief This structure deals with the log file. + * + * This structure is initialized by the #logger_reader + * and deals with the log file. + * It maps it, reverse the offsets (if required) and unmap it. + * + * The structure is initialized with #logger_logfile_init_from_file and + * freed with #logger_logfile_free. + */ +struct logger_logfile { + + /* Information contained in the file header. */ + struct header header; + + /* The reader that is using this log file. */ + struct logger_reader *reader; + + /* Information about the time records. */ + struct time_array times; + + /* The log's variables. */ + struct { + /* Mapped data. */ + void *map; + + /* File size. */ + size_t file_size; + + } log; +}; + +void logger_logfile_init_from_file(struct logger_logfile *log, char *filename, + struct logger_reader *reader, + int only_header); +void logger_logfile_reverse_offset(struct logger_logfile *log, char *filename); +void logger_logfile_free(struct logger_logfile *log); + +#endif // LOGGER_LOGGER_LOGFILE_H diff --git a/logger/logger_particle.c b/logger/logger_particle.c new file mode 100644 index 0000000000000000000000000000000000000000..6809e0edf6125e66cbb8807cc98eeb31b5e04ecd --- /dev/null +++ b/logger/logger_particle.c @@ -0,0 +1,253 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "logger_particle.h" +#include "logger_header.h" +#include "logger_loader_io.h" +#include "logger_reader.h" +#include "logger_time.h" +#include "logger_tools.h" + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +/** + * @brief Print the properties of a logger_particle. + * + * @param p The #logger_particle to print + */ +void logger_particle_print(const struct logger_particle *p) { + message("ID: %lu.", p->id); + message("Mass: %g", p->mass); + message("Time: %g.", p->time); + message("Cutoff Radius: %g.", p->h); + message("Positions: (%g, %g, %g).", p->pos[0], p->pos[1], p->pos[2]); + message("Velocities: (%g, %g, %g).", p->vel[0], p->vel[1], p->vel[2]); + message("Accelerations: (%g, %g, %g).", p->acc[0], p->acc[1], p->acc[2]); + message("Entropy: %g.", p->entropy); + message("Density: %g.", p->density); +} + +/** + * @brief Initialize a logger_particle. + * + * @param part The #logger_particle to initialize. + */ +void logger_particle_init(struct logger_particle *part) { + for (size_t k = 0; k < DIM; k++) { + part->pos[k] = 0; + part->vel[k] = 0; + part->acc[k] = 0; + } + + part->entropy = -1; + part->density = -1; + part->h = -1; + part->mass = -1; + part->id = SIZE_MAX; +} + +/** + * @brief Read a single named entry for a particle. + * + * @param part The #logger_particle to update. + * @param map The mapped data. + * @param field field to read. + * @param size number of bits to read. + * + * @return mapped data after the block read. + */ +void *logger_particle_read_field(struct logger_particle *part, void *map, + const char *field, const size_t size) { + void *p = NULL; + + /* Get the correct pointer. */ + if (strcmp("positions", field) == 0) { + p = &part->pos; + } else if (strcmp("velocities", field) == 0) { + p = &part->vel; + } else if (strcmp("accelerations", field) == 0) { + p = &part->acc; + } else if (strcmp("entropy", field) == 0) { + p = &part->entropy; + } else if (strcmp("smoothing length", field) == 0) { + p = &part->h; + } else if (strcmp("density", field) == 0) { + p = &part->density; + } else if (strcmp("consts", field) == 0) { + p = malloc(size); + } else { + error("Type %s not defined.", field); + } + + /* read the data. */ + map = logger_loader_io_read_data(map, size, p); + + /* Split the required fields. */ + if (strcmp("consts", field) == 0) { + part->mass = 0; + part->id = 0; + memcpy(&part->mass, p, sizeof(float)); + p += sizeof(float); + memcpy(&part->id, p, sizeof(size_t)); + p -= sizeof(float); + free(p); + } + + return map; +} + +/** + * @brief Read a particle entry in the log file. + * + * @param reader The #logger_reader. + * @param part The #logger_particle to update. + * @param offset offset of the record to read. + * @param time time to interpolate. + * @param reader_type #logger_reader_type. + * + * @return position after the record. + */ +size_t logger_particle_read(struct logger_particle *part, + const struct logger_reader *reader, size_t offset, + const double time, + const enum logger_reader_type reader_type) { + + /* Get a few pointers. */ + const struct header *h = &reader->log.header; + void *map = reader->log.log.map; + + const struct time_array *times = &reader->log.times; + + size_t mask = 0; + size_t h_offset = 0; + + logger_particle_init(part); + + /* Read the record's mask. */ + map = logger_loader_io_read_mask(h, map + offset, &mask, &h_offset); + + /* Check if it is not a time record. */ + if (mask == 128) error("Unexpected mask: %lu.", mask); + + /* Read all the fields. */ + for (size_t i = 0; i < h->number_mask; i++) { + if (mask & h->masks[i].mask) { + map = logger_particle_read_field(part, map, h->masks[i].name, + h->masks[i].size); + } + } + + /* Get the time of current record. + This check is required for the manipulating the file before + the initialization of the time_array. */ + if (times->size != 0) { + part->time = time_array_get_time(times, offset); + } else + part->time = -1; + + /* update the offset. */ + offset = (size_t)(map - reader->log.log.map); + + /* Check if an interpolation is required. */ + if (reader_type == logger_reader_const) return offset; + + /* Start reading next record. */ + struct logger_particle part_next; + + /* Check that the offset are in the correct direction. */ + if (!header_is_forward(h)) { + error("Cannot read a particle with non forward offsets."); + } + + /* No next particle. */ + if (h_offset == 0) return (size_t)(map - reader->log.log.map); + + /* get absolute offset of next particle. */ + h_offset += offset - header_get_record_size_from_mask(h, mask) - + LOGGER_MASK_SIZE - LOGGER_OFFSET_SIZE; + + /* Get time of next record. */ + part_next.time = time_array_get_time(times, h_offset); + + /* Read next record. */ + h_offset = logger_particle_read(&part_next, reader, h_offset, part_next.time, + logger_reader_const); + + /* Interpolate the two particles. */ + logger_particle_interpolate(part, &part_next, time); + + return offset; +} + +/** + * @brief interpolate two particles at a given time + * + * @param part_curr #logger_particle In: current particle (before time), Out: + * interpolated particle + * @param part_next #logger_particle next particle (after time) + * @param time interpolation time + * + */ +void logger_particle_interpolate(struct logger_particle *part_curr, + const struct logger_particle *part_next, + const double time) { + + /* Check that a particle is provided. */ + if (!part_curr) error("part_curr is NULL."); + if (!part_next) error("part_next is NULL."); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check the particle order. */ + if (part_next->time <= part_curr->time) + error("Wrong particle order (next before current)."); + if ((time < part_curr->time) || (part_next->time < time)) + error( + "Cannot extrapolate (particle time: %f, " + "interpolating time: %f, next particle time: %f).", + part_curr->time, time, part_next->time); +#endif + + /* Compute the interpolation scaling. */ + double scaling = part_next->time - part_curr->time; + + scaling = (time - part_curr->time) / scaling; + + double tmp; + float ftmp; + + /* interpolate vectors. */ + for (size_t i = 0; i < DIM; i++) { + tmp = (part_next->pos[i] - part_curr->pos[i]); + part_curr->pos[i] += tmp * scaling; + + ftmp = (part_next->vel[i] - part_curr->vel[i]); + part_curr->vel[i] += ftmp * scaling; + + ftmp = (part_next->acc[i] - part_curr->acc[i]); + part_curr->acc[i] += ftmp * scaling; + } + + /* interpolate scalars. */ + ftmp = (part_next->entropy - part_curr->entropy); + part_curr->entropy += ftmp * scaling; + + /* set time. */ + part_curr->time = time; +} diff --git a/logger/logger_particle.h b/logger/logger_particle.h new file mode 100644 index 0000000000000000000000000000000000000000..addd23564b65a734152ae8f538596d79019dd36f --- /dev/null +++ b/logger/logger_particle.h @@ -0,0 +1,107 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef LOGGER_LOGGER_PARTICLE_H +#define LOGGER_LOGGER_PARTICLE_H + +#include "logger_header.h" +#include "logger_time.h" +#include "logger_tools.h" + +#include <stdio.h> +#include <stdlib.h> + +#if defined(HYDRO_DIMENSION_1D) +#define DIM 1 +#elif defined(HYDRO_DIMENSION_2D) +#define DIM 2 +#elif defined(HYDRO_DIMENSION_3D) +#define DIM 3 +#endif + +struct logger_reader; + +/** + * @brief Store the data from a record. + * + * This structure contains all the required fields + * present in a file. + * + * As we need only a few particles, no need to keep it small. + * + * The particle is initialized with #logger_particle_init + * and can be updated with a record through #logger_particle_read. + * + * In #logger_particle_read, we use #logger_particle_read_field on + * each field and #logger_particle_interpolate if a linear + * interpolation is required. + */ +struct logger_particle { + /* position. */ + double pos[DIM]; + + /* velocity. */ + float vel[DIM]; + + /* acceleration. */ + float acc[DIM]; + + /* entropy. */ + float entropy; + + /* smoothing length. */ + float h; + + /* density. */ + float density; + + /* mass. */ + float mass; + + /* unique id. */ + size_t id; + + /* time of the record. */ + double time; +}; + +/** + * @brief Defines the type of interpolation + */ +enum logger_reader_type { + logger_reader_const, /* Constant interpolation. */ + logger_reader_lin, /* Linear interpolation. */ +}; + +void logger_particle_print(const struct logger_particle *p); + +size_t logger_particle_read(struct logger_particle *part, + const struct logger_reader *reader, size_t offset, + const double time, + const enum logger_reader_type reader_type); + +void logger_particle_init(struct logger_particle *part); + +void *logger_particle_read_field(struct logger_particle *part, void *map, + const char *field, const size_t size); + +void logger_particle_interpolate(struct logger_particle *part_curr, + const struct logger_particle *part_next, + const double time); + +#endif // LOGGER_LOGGER_PARTICLE_H diff --git a/logger/logger_python_wrapper.c b/logger/logger_python_wrapper.c new file mode 100644 index 0000000000000000000000000000000000000000..07c87b4989896977c56ddff4df243a5310d393a7 --- /dev/null +++ b/logger/logger_python_wrapper.c @@ -0,0 +1,290 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "logger_header.h" +#include "logger_loader_io.h" +#include "logger_particle.h" +#include "logger_reader.h" +#include "logger_time.h" + +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + +#include <Python.h> +#include <errno.h> +#include <numpy/arrayobject.h> +#include <stdio.h> +#include <stdlib.h> + +/** + * @brief load data from the offset without any interpolation + * + * <b>offset</b> PyArrayObject list of offset for each particle. + * + * <b>filename</b> string filename of the log file. + * + * <b>verbose</b> Verbose level. + * + * <b>returns</b> dictionnary containing the data read. + */ +static PyObject *loadFromIndex(__attribute__((unused)) PyObject *self, + PyObject *args) { + + /* input variables. */ + PyArrayObject *offset = NULL; + char *filename = NULL; + + /* output variables. */ + PyArrayObject *pos = NULL; + PyArrayObject *vel = NULL; + PyArrayObject *acc = NULL; + PyArrayObject *entropy = NULL; + PyArrayObject *h_sph = NULL; + PyArrayObject *rho = NULL; + PyArrayObject *mass = NULL; + PyArrayObject *id = NULL; + + size_t time_offset; + int verbose = 2; + + /* parse arguments. */ + if (!PyArg_ParseTuple(args, "OsL|i", &offset, &filename, &time_offset, + &verbose)) + return NULL; + + if (!PyArray_Check(offset)) { + error("Offset is not a numpy array."); + } + if (PyArray_NDIM(offset) != 1) { + error("Offset is not a 1 dimensional array."); + } + if (PyArray_TYPE(offset) != NPY_UINT64) { + error("Offset does not contain unsigned int."); + } + + /* initialize the reader. */ + struct logger_reader reader; + logger_reader_init(&reader, filename, verbose); + struct header *h = &reader.log.header; + + /* init array. */ + npy_intp dim[2]; + dim[0] = PyArray_DIMS(offset)[0]; + dim[1] = DIM; + + /* Get required time. */ + double time = time_array_get_time(&reader.log.times, time_offset); + + /* init output. */ + if (header_get_field_index(h, "positions") != -1) { + pos = (PyArrayObject *)PyArray_SimpleNew(2, dim, NPY_DOUBLE); + } + + if (header_get_field_index(h, "velocities") != -1) { + vel = (PyArrayObject *)PyArray_SimpleNew(2, dim, NPY_FLOAT); + } + + if (header_get_field_index(h, "accelerations") != -1) { + acc = (PyArrayObject *)PyArray_SimpleNew(2, dim, NPY_FLOAT); + } + + if (header_get_field_index(h, "entropy") != -1) { + entropy = + (PyArrayObject *)PyArray_SimpleNew(1, PyArray_DIMS(offset), NPY_FLOAT); + } + + if (header_get_field_index(h, "smoothing length") != -1) { + h_sph = + (PyArrayObject *)PyArray_SimpleNew(1, PyArray_DIMS(offset), NPY_FLOAT); + } + + if (header_get_field_index(h, "density") != -1) { + rho = + (PyArrayObject *)PyArray_SimpleNew(1, PyArray_DIMS(offset), NPY_FLOAT); + } + + if (header_get_field_index(h, "consts") != -1) { + mass = + (PyArrayObject *)PyArray_SimpleNew(1, PyArray_DIMS(offset), NPY_FLOAT); + id = (PyArrayObject *)PyArray_SimpleNew(1, PyArray_DIMS(offset), NPY_ULONG); + } + + if (verbose > 1) message("Reading particles."); + + /* loop over all particles. */ + for (npy_intp i = 0; i < PyArray_DIMS(offset)[0]; i++) { + struct logger_particle part; + + /* Get the offset. */ + size_t offset_particle = *(size_t *)PyArray_GETPTR1(offset, i); + + /* Read the particle. */ + logger_particle_read(&part, &reader, offset_particle, time, + logger_reader_lin); + + double *dtmp; + float *ftmp; + size_t *stmp; + + /* copy the data. */ + for (size_t k = 0; k < DIM; k++) { + if (pos) { + dtmp = PyArray_GETPTR2(pos, i, k); + *dtmp = part.pos[k]; + } + + if (vel) { + ftmp = PyArray_GETPTR2(vel, i, k); + *ftmp = part.vel[k]; + } + + if (acc) { + ftmp = PyArray_GETPTR2(acc, i, k); + *ftmp = part.acc[k]; + } + } + + if (entropy) { + ftmp = PyArray_GETPTR1(entropy, i); + *ftmp = part.entropy; + } + + if (rho) { + ftmp = PyArray_GETPTR1(rho, i); + *ftmp = part.density; + } + + if (h_sph) { + ftmp = PyArray_GETPTR1(h_sph, i); + *ftmp = part.h; + } + + if (mass) { + ftmp = PyArray_GETPTR1(mass, i); + *ftmp = part.mass; + } + + if (id) { + stmp = PyArray_GETPTR1(id, i); + *stmp = part.id; + } + } + + /* Free the memory. */ + logger_reader_free(&reader); + + /* construct return value. */ + PyObject *dict = PyDict_New(); + PyObject *key = PyUnicode_FromString("positions"); + PyDict_SetItem(dict, key, PyArray_Return(pos)); + + if (vel) { + key = PyUnicode_FromString("velocities"); + PyDict_SetItem(dict, key, PyArray_Return(vel)); + } + + if (acc) { + key = PyUnicode_FromString("accelerations"); + PyDict_SetItem(dict, key, PyArray_Return(acc)); + } + + if (entropy) { + key = PyUnicode_FromString("entropy"); + PyDict_SetItem(dict, key, PyArray_Return(entropy)); + } + + if (rho) { + key = PyUnicode_FromString("rho"); + PyDict_SetItem(dict, key, PyArray_Return(rho)); + } + + if (h_sph) { + key = PyUnicode_FromString("h_sph"); + PyDict_SetItem(dict, key, PyArray_Return(h_sph)); + } + + if (mass) { + key = PyUnicode_FromString("mass"); + PyDict_SetItem(dict, key, PyArray_Return(mass)); + } + + if (id) { + key = PyUnicode_FromString("id"); + PyDict_SetItem(dict, key, PyArray_Return(id)); + } + + return dict; +} + +/** + * @brief Reverse offset in log file + * + * <b>filename</b> string filename of the log file + * <b>verbose</b> Verbose level + */ +static PyObject *pyReverseOffset(__attribute__((unused)) PyObject *self, + PyObject *args) { + /* input variables. */ + char *filename = NULL; + + int verbose = 0; + + /* parse the arguments. */ + if (!PyArg_ParseTuple(args, "s|i", &filename, &verbose)) return NULL; + + /* initialize the reader which reverse the offset if necessary. */ + struct logger_reader reader; + logger_reader_init(&reader, filename, verbose); + + /* Free the reader. */ + logger_reader_free(&reader); + + return Py_BuildValue(""); +} + +/* definition of the method table. */ + +static PyMethodDef libloggerMethods[] = { + {"loadFromIndex", loadFromIndex, METH_VARARGS, + "Load snapshot directly from the offset in an index file."}, + {"reverseOffset", pyReverseOffset, METH_VARARGS, + "Reverse the offset (from pointing backward to forward)."}, + + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static struct PyModuleDef libloggermodule = { + PyModuleDef_HEAD_INIT, + "liblogger", + "Module reading a SWIFTsim logger snapshot", + -1, + libloggerMethods, + NULL, /* m_slots */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ +}; + +PyMODINIT_FUNC PyInit_liblogger(void) { + PyObject *m; + m = PyModule_Create(&libloggermodule); + if (m == NULL) return NULL; + + import_array(); + + return m; +} diff --git a/logger/logger_reader.c b/logger/logger_reader.c new file mode 100644 index 0000000000000000000000000000000000000000..0954b9c5a8e56213de4d5b2a445aeeb9105e327c --- /dev/null +++ b/logger/logger_reader.c @@ -0,0 +1,90 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include "logger_reader.h" + +/** + * @brief Initialize the reader. + * + * @param reader The #logger_reader. + * @param filename The log filename. + * @param verbose The verbose level. + */ +void logger_reader_init(struct logger_reader *reader, char *filename, + int verbose) { + if (verbose > 1) message("Initializing the reader."); + + /* Initialize the reader variables. */ + reader->verbose = verbose; + + /* Initialize the log file. */ + logger_logfile_init_from_file(&reader->log, filename, reader, + /* only_header */ 0); + + if (verbose > 1) message("Initialization done."); +} + +/** + * @brief Free the reader. + * + * @param reader The #logger_reader. + */ +void logger_reader_free(struct logger_reader *reader) { + /* Free the log. */ + logger_logfile_free(&reader->log); +} + +/** + * @brief Read a record (timestamp or particle) + * + * @param reader The #logger_reader. + * @param lp (out) The #logger_particle (if the record is a particle). + * @param time (out) The time read (if the record is a timestamp). + * @param is_particle Is the record a particle (or a timestamp)? + * @param offset The offset in the file. + * + * @return The offset after this record. + */ +size_t reader_read_record(struct logger_reader *reader, + struct logger_particle *lp, double *time, + int *is_particle, size_t offset) { + + struct logger_logfile *log = &reader->log; + + /* Read mask to find out if timestamp or particle. */ + size_t mask = 0; + logger_loader_io_read_mask(&log->header, log->log.map + offset, &mask, NULL); + + /* Check if timestamp or not. */ + int ind = header_get_field_index(&log->header, "timestamp"); + if (ind == -1) { + error("File header does not contain a mask for time."); + } + if (log->header.masks[ind].mask == mask) { + *is_particle = 0; + integertime_t int_time = 0; + offset = time_read(&int_time, time, reader, offset); + } else { + *is_particle = 1; + offset = + logger_particle_read(lp, reader, offset, *time, logger_reader_const); + } + + return offset; +} diff --git a/logger/logger_reader.h b/logger/logger_reader.h new file mode 100644 index 0000000000000000000000000000000000000000..124d271f57587a26dbfb59299678f0ce5cfbdf79 --- /dev/null +++ b/logger/logger_reader.h @@ -0,0 +1,81 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +/** + * @file logger_reader.h + * @brief This file contains the C functions shown to the external user. + * + * Here is a quick summary of our different elements: + * + * The logger is a time adaptive way to write snapshots. + * It consists of a set of files: the log file, the parameter file and the index + * files. + * + * The <b>parameter file</b> contains all the information related to the code + * (e.g. boxsize). + * + * The <b>index files</b> are not mandatory files that indicates the position of + * the particles in the log file at a given time step. They are useful to + * speedup the reading. + * + * The <b>log file</b> consists in a large file where the particles are logged + * one after the other. It contains a <b>log file header</b> at the beginning of + * the file and a large collection of <b>records</b>. + * + * The records are logged one after the other and each contains a <b>record + * header</b> and then a list of <b>named entries</b>. In the record header, a + * <b>mask</b> is provided that corresponds to the type of named entries present + * in this record. It also contains the <b>offset</b> to the previous or next + * record for this particle. + */ + +#ifndef LOGGER_LOGGER_READER_H +#define LOGGER_LOGGER_READER_H + +#include "logger_loader_io.h" +#include "logger_logfile.h" +#include "logger_particle.h" + +/** + * @brief Main structure of the logger. + * + * This structure contains all the variables required for the logger. + * It should be the only structure that the user see. + * + * It is initialized with #logger_reader_init and freed with + * #logger_reader_free. + */ +struct logger_reader { + + /* Time of each index file. #TODO */ + double *times; + + /* Informations contained in the file header. */ + struct logger_logfile log; + + /* Level of verbosity. */ + int verbose; +}; + +void logger_reader_init(struct logger_reader *reader, char *filename, + int verbose); +void logger_reader_free(struct logger_reader *reader); +size_t reader_read_record(struct logger_reader *reader, + struct logger_particle *lp, double *time, + int *is_particle, size_t offset); +#endif // LOGGER_LOGGER_READER_H diff --git a/logger/logger_time.c b/logger/logger_time.c new file mode 100644 index 0000000000000000000000000000000000000000..d2c6ebc3f9e3171ba7fdec6c6a63eb23d7001df6 --- /dev/null +++ b/logger/logger_time.c @@ -0,0 +1,315 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "logger_time.h" +#include "logger_loader_io.h" +#include "logger_logfile.h" +#include "logger_reader.h" + +/** + * @brief Check if enough space is available and increase it if required. + * + * @param t The #time_array. + */ +void time_array_ensure_size(struct time_array *t) { + /* Check if we still have some place. */ + if (t->size < t->capacity) return; + + /* Increase the size */ + t->capacity *= 2; + + /* Allocate the new array */ + struct time_record *tmp = malloc(sizeof(struct time_record) * t->capacity); + if (tmp == NULL) error("Failed to allocate the time records."); + + /* Copy the memory */ + memcpy(tmp, t->records, sizeof(struct time_record) * t->size); + + /* Cleanup the memory */ + free(t->records); + + /* Set the pointer to the new array */ + t->records = tmp; +} + +/** + * @brief Add an element to the #time_array. + * + * @param t The #time_array. + * @param int_time The time in integer. + * @param time The time in double. + * @param offset The offset of the record. + */ +void time_array_append(struct time_array *t, const integertime_t int_time, + const double time, const size_t offset) { + + /* Increase the available space if required */ + time_array_ensure_size(t); + + /* Copy the values */ + t->records[t->size].time = time; + t->records[t->size].int_time = int_time; + t->records[t->size].offset = offset; + + /* Increase the size used. */ + t->size += 1; +} + +/** + * @brief read a time record. + * + * @param int_time integer time read. + * @param time time read. + * @param reader The #logger_reader. + * @param offset position in the file. + * + */ +size_t time_read(integertime_t *int_time, double *time, + const struct logger_reader *reader, size_t offset) { + + /* Initialize variables. */ + const struct header *h = &reader->log.header; + void *map = h->log->log.map; + + size_t mask = 0; + size_t prev_offset = 0; + *int_time = 0; + *time = 0; + + /* read record header. */ + map = logger_loader_io_read_mask(h, map + offset, &mask, &prev_offset); + +#ifdef SWIFT_DEBUG_CHECKS + + /* check if time mask is present in log file header. */ + int ind = header_get_field_index(h, "timestamp"); + if (ind == -1) error("File header does not contain a mask for time."); + + /* check if reading a time record. */ + if (h->masks[ind].mask != mask) error("Not a time record."); +#endif + + /* read the record. */ + map = + logger_loader_io_read_data(map, sizeof(unsigned long long int), int_time); + map = logger_loader_io_read_data(map, sizeof(double), time); + + return map - h->log->log.map; +} + +/** + * @brief get offset of first time record + * + * @param h file #header + * @return offset of first time record + * + */ +size_t time_offset_first_record(const struct header *h) { + + /* Initialize a few variables. */ + size_t offset = h->offset_first_record; + void *map = h->log->log.map; + + /* Check that the first record is really a time record. */ + int i = header_get_field_index(h, "timestamp"); + + if (i == -1) error("Time mask not present in the log file header."); + + size_t mask = 0; + logger_loader_io_read_mask(h, map + offset, &mask, NULL); + + if (mask != h->masks[i].mask) error("Log file should begin by timestep."); + + return h->offset_first_record; +} + +/** + * @brief Initialize an empty time array. + * + * @param t #time_array to initialize. + */ +void time_array_init(struct time_array *t) { + /* Allocate the arrays */ + t->records = malloc(sizeof(struct time_record) * LOGGER_TIME_INIT_SIZE); + if (t->records == NULL) error("Failed to initialize the time records."); + + /* Initialize the sizes */ + t->size = 0; + t->capacity = LOGGER_TIME_INIT_SIZE; +} + +/** + * @brief Initialize a time array from a file. + * + * @param t #time_array to initialize. + * @param log The #logger_logfile. + */ +void time_array_populate(struct time_array *t, struct logger_logfile *log) { + + /* Initialize a few variables. */ + integertime_t int_time = 0; + double time = 0; + + /* get file size. */ + size_t file_size = log->log.file_size; + + /* get first time stamp. */ + size_t offset = time_offset_first_record(&log->header); + while (offset < file_size) { + /* read current time record and store it. */ + size_t tmp_offset = offset; + time_read(&int_time, &time, log->reader, tmp_offset); + time_array_append(t, int_time, time, offset); + + /* get next record. */ + int test = tools_get_next_record(&log->header, log->log.map, &offset, + log->log.file_size); + if (test == -1) break; + } +} + +/** + * @brief access the time of a given record (by its offset). + * + * @param t #time_array to access. + * @param offset offset of the record. + * + * @return integer time of the record. + */ +integertime_t time_array_get_integertime(struct time_array *t, + const size_t offset) { + size_t ind = time_array_get_index(t, offset); + return t->records[ind].int_time; +} + +/** + * @brief access the time of a given record (by its offset). + * + * @param t #time_array to access. + * @param offset offset of the record. + * + * @return time of the record. + */ +double time_array_get_time(const struct time_array *t, const size_t offset) { + size_t ind = time_array_get_index(t, offset); + return t->records[ind].time; +} + +/** + * @brief Find the index of the last time record written before a given offset. + * + * @param t #time_array to access. + * @param offset offset of the record. + * + * @return The index of the last time record. + */ +size_t time_array_get_index(const struct time_array *t, const size_t offset) { + +#ifdef SWIFT_DEBUG_CHECKS + if (!t) error("NULL pointer."); + + if (offset < t->records[0].offset || offset > t->records[t->size - 1].offset) + error("Offset outside of range."); +#endif + + /* left will contain the index at the end of the loop */ + size_t left = 0; + size_t right = t->size - 1; + + /* Find the time_array with the correct offset through a bisection method. */ + while (left <= right) { + size_t center = (left + right) / 2; + const size_t offset_center = t->records[center].offset; + + if (offset > offset_center) { + left = center + 1; + } else if (offset < offset_center) { + right = center - 1; + } else { + return center; + } + } + + return right; +} + +/** + * @brief free memory of a #time_array + * + * @param t #time_array to free + */ +void time_array_free(struct time_array *t) { + /* Free the arrays */ + free(t->records); + t->records = NULL; + + /* Reset the counters */ + t->size = 0; + t->capacity = 0; +} + +/** + * @brief print a #time_array + * + * @param t #time_array to print + */ +void time_array_print(const struct time_array *t) { + const size_t threshold = 4; + + size_t n = t->size; + size_t up_threshold = n - threshold; + + printf("Times (size %lu): [%lli (%g)", n, t->records[0].int_time, + t->records[0].time); + + /* Loop over all elements. */ + for (size_t i = 1; i < n; i++) { + /* Skip the times at the center of the array. */ + if (i < threshold || i > up_threshold) + printf(", %lli (%g)", t->records[i].int_time, t->records[i].time); + + if (i == threshold) printf(", ..."); + } + + printf("]\n"); +} + +/** + * @brief print a #time_array (offset) + * + * @param t #time_array to print + */ +void time_array_print_offset(const struct time_array *t) { + const size_t threshold = 4; + + size_t n = t->size; + size_t up_threshold = n - threshold; + + printf("Times (size %lu): [%lu", n, t->records[0].offset); + + /* Loop over all elements. */ + for (size_t i = 1; i < n; i++) { + /* Skip the offset in the middle of the array. */ + if (i < threshold || i > up_threshold) + printf(", %lu", t->records[i].offset); + + if (i == threshold) printf(", ..."); + } + + printf("]\n"); +} diff --git a/logger/logger_time.h b/logger/logger_time.h new file mode 100644 index 0000000000000000000000000000000000000000..b27abffb9c1b3aa02c82c1739d1206b43f3ac431 --- /dev/null +++ b/logger/logger_time.h @@ -0,0 +1,95 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef LOGGER_LOGGER_TIMELINE_H +#define LOGGER_LOGGER_TIMELINE_H + +#include "logger_header.h" +#include "logger_tools.h" + +typedef int8_t timebin_t; +typedef long long integertime_t; + +struct logger_reader; + +#define LOGGER_TIME_INIT_SIZE 1024 + +/** + * @brief This structure contains all the information present in a time record. + */ +struct time_record { + /* Integertime of the records. */ + integertime_t int_time; + + /* Double time of the records. */ + double time; + + /* Offset in the file of the time records. */ + size_t offset; +}; + +/** + * @brief This structure contains all the time record. + * + * In order to obtain easily the time step of a record, + * this structure is required. It contains all the time step + * with their integer time, double time and position in the file. + * + * This structure is initialized with #time_array_init and #time_array_populate, + * and freed with #time_array_free. + * + * The time step of an offset can be obtained with + * #time_array_get_integertime, #time_array_get_time and + * #time_array_get_index. + */ +struct time_array { + + /* The complete list of time record */ + struct time_record *records; + + /* Number of element in the arrays. */ + size_t size; + + /* Maximum number of element available */ + size_t capacity; +}; + +void time_array_append(struct time_array *t, const integertime_t int_time, + const double time, const size_t offset); +size_t time_read(integertime_t *int_time, double *time, + const struct logger_reader *reader, size_t offset); + +void time_array_init(struct time_array *t); +void time_array_populate(struct time_array *t, struct logger_logfile *log); + +integertime_t time_array_get_integertime(struct time_array *t, + const size_t offset); + +double time_array_get_time(const struct time_array *t, const size_t offset); + +size_t time_array_get_index(const struct time_array *t, const size_t offset); + +void time_array_free(struct time_array *t); + +void time_array_print(const struct time_array *t); + +void time_array_print_offset(const struct time_array *t); + +size_t time_offset_first_record(const struct header *h); + +#endif // LOGGER_LOGGER_TIMELINE_H diff --git a/logger/logger_tools.c b/logger/logger_tools.c new file mode 100644 index 0000000000000000000000000000000000000000..a9a6ecfcb0acf72b11898d00fdfeff90fd70406d --- /dev/null +++ b/logger/logger_tools.c @@ -0,0 +1,231 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "logger_tools.h" +#include "logger_header.h" +#include "logger_loader_io.h" +#include "logger_reader.h" + +#include "logger_particle.h" + +#include <stdio.h> + +/** + * @brief get the offset of the next corresponding record. + * + * @param h #header structure of the file + * @param map file mapping + * @param offset In: initial offset, Out: offset of the next record + * @param file_size The file size. + * + * @return -1 if no next record, otherwise 0 + */ +int tools_get_next_record(const struct header *h, void *map, size_t *offset, + size_t file_size) { + if (header_is_forward(h)) + return _tools_get_next_record_forward(h, map, offset); + if (header_is_backward(h)) + return _tools_get_next_record_backward(h, map, offset, file_size); + else + error("Offsets are corrupted."); +} + +/** + * @brief internal function of #tools_get_next_record. Should not be used + * outside. + * + * @param h #header structure of the file + * @param map file mapping + * @param offset (Out) offset of the next record + * + * @return error code, -1 if no next record + */ +int _tools_get_next_record_forward(const struct header *h, void *map, + size_t *offset) { + size_t diff_offset = 0; + + /* Read the offset. */ + map = logger_loader_io_read_mask(h, map + *offset, NULL, &diff_offset); + + if (diff_offset == 0) return -1; + + /* Set the absolute offset. */ + *offset += diff_offset; + return 0; +} + +/** + * @brief internal function of #tools_get_next_record. Should not be used (very + * slow) + * + * @param h #header structure of the file + * @param map file mapping + * @param offset In: initial offset, Out: offset of the next record + * @param file_size The file size. + * + * @return error code, -1 if no next record + */ +int _tools_get_next_record_backward(const struct header *h, void *map, + size_t *offset, size_t file_size) { +#ifndef SWIFT_DEBUG_CHECKS + error("Should not be used, method too slow"); +#endif + size_t current_offset = *offset; + size_t record_header = LOGGER_MASK_SIZE + LOGGER_OFFSET_SIZE; + + while (current_offset < file_size) { + size_t mask = 0; + size_t prev_offset; + logger_loader_io_read_mask(h, map + current_offset, &mask, &prev_offset); + + prev_offset = current_offset - prev_offset - record_header; + if (*offset == prev_offset) { + *offset = current_offset - record_header; + return 0; + } + + current_offset += header_get_record_size_from_mask(h, mask); + } + + return -1; +} + +/** + * @brief switch side offset. + * + * From current record, switch side of the offset of the previous one. + * @param h #header structure of the file. + * @param file_map file mapping. + * @param offset position of the record. + * + * @return position after the record. + */ +size_t tools_reverse_offset(const struct header *h, void *file_map, + size_t offset) { + size_t mask = 0; + size_t prev_offset = 0; + const size_t cur_offset = offset; + void *map = file_map; + + /* read mask + offset. */ + map = logger_loader_io_read_mask(h, map + offset, &mask, &prev_offset); + + /* write offset of zero (in case it is the last record). */ + const size_t zero = 0; + map -= LOGGER_OFFSET_SIZE; + map = logger_loader_io_write_data(map, LOGGER_OFFSET_SIZE, &zero); + + /* set offset after current record. */ + map += header_get_record_size_from_mask(h, mask); + size_t after_current_record = (size_t)(map - file_map); + + /* first records do not have a previous partner. */ + if (prev_offset == cur_offset) return after_current_record; + + if (prev_offset > cur_offset) + error("Unexpected offset: header %lu, current %lu.", prev_offset, + cur_offset); + + /* modify previous offset. */ + map = file_map + cur_offset - prev_offset + LOGGER_MASK_SIZE; + map = logger_loader_io_write_data(map, LOGGER_OFFSET_SIZE, &prev_offset); + +#ifdef SWIFT_DEBUG_CHECKS + size_t prev_mask = 0; + map -= LOGGER_MASK_SIZE + LOGGER_OFFSET_SIZE; + logger_loader_io_read_mask(h, map, &prev_mask, NULL); + + /* Check if we are not mixing time stamp and particles */ + if ((prev_mask != 128 && mask == 128) || (prev_mask == 128 && mask != 128)) + error("Unexpected mask: %lu, got %lu.", mask, prev_mask); + +#endif // SWIFT_DEBUG_CHECKS + + return after_current_record; +} + +/** + * @brief debugging function checking the offset and the mask of a record. + * + * Compare the mask with the one pointed by the header. + * if the record is a particle, check the id too. + * + * @param reader The #logger_reader. + * @param offset position of the record. + * + * @return position after the record. + */ +size_t tools_check_record_consistency(const struct logger_reader *reader, + size_t offset) { +#ifndef SWIFT_DEBUG_CHECKS + error("Should not check in non debug mode."); +#endif + + const struct header *h = &reader->log.header; + void *file_init = reader->log.log.map; + void *map = file_init + offset; + + size_t mask; + size_t pointed_offset; + + /* read mask + offset. */ + map = logger_loader_io_read_mask(h, map, &mask, &pointed_offset); + + /* get absolute offset. */ + if (header_is_forward(h)) + pointed_offset += offset; + else if (header_is_backward(h)) { + if (offset < pointed_offset) + error("Offset too large (%lu) at %lu with mask %lu.", pointed_offset, + offset, mask); + pointed_offset = offset - pointed_offset; + } else { + error("Offset are corrupted."); + } + + /* set offset after current record. */ + map += header_get_record_size_from_mask(h, mask); + + if (pointed_offset == offset || pointed_offset == 0) + return (size_t)(map - file_init); + + /* read mask of the pointed record. */ + size_t pointed_mask = 0; + logger_loader_io_read_mask(h, file_init + pointed_offset, &pointed_mask, + NULL); + + /* check if not mixing time stamp and particles. */ + if ((pointed_mask != 128 && mask == 128) || + (pointed_mask == 128 && mask != 128)) + error("Error in the offset (mask %lu at %lu != %lu at %lu).", mask, offset, + pointed_mask, pointed_offset); + + if (pointed_mask == 128) return (size_t)(map - file_init); + + struct logger_particle part; + logger_particle_read(&part, reader, offset, 0, logger_reader_const); + + size_t id = part.id; + logger_particle_read(&part, reader, pointed_offset, 0, logger_reader_const); + + if (id != part.id) + error("Offset wrong, id incorrect (%lu != %lu) at %lu.", id, part.id, + pointed_offset); + + return (size_t)(map - file_init); +} diff --git a/logger/logger_tools.h b/logger/logger_tools.h new file mode 100644 index 0000000000000000000000000000000000000000..21a59e42fca144a0381b15e8771ca14ceed46b33 --- /dev/null +++ b/logger/logger_tools.h @@ -0,0 +1,59 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +/** + * @brief This file contains functions that help to navigate in the logs. + */ +#ifndef LOGGER_LOGGER_TOOLS_H +#define LOGGER_LOGGER_TOOLS_H + +#include "../config.h" + +/* Swift include */ +#include "../src/dimension.h" +#include "../src/error.h" +#include "../src/inline.h" +#include "../src/logger.h" +#include "../src/part_type.h" + +#ifdef HAVE_PYTHON +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include <Python.h> +#endif + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define STRING_SIZE 200 + +struct header; +struct logger_reader; + +int tools_get_next_record(const struct header *h, void *map, size_t *offset, + size_t file_size); +int _tools_get_next_record_backward(const struct header *h, void *map, + size_t *offset, size_t file_size); +int _tools_get_next_record_forward(const struct header *h, void *map, + size_t *offset); +size_t tools_reverse_offset(const struct header *h, void *map, size_t offset); +size_t tools_check_record_consistency(const struct logger_reader *reader, + size_t offset); + +#endif // LOGGER_LOGGER_TOOLS_H diff --git a/logger/python/reader_example.py b/logger/python/reader_example.py new file mode 100644 index 0000000000000000000000000000000000000000..6ace309c5b68b4fc4f1088b6206cd1ae3ccd69a5 --- /dev/null +++ b/logger/python/reader_example.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +""" +Read a logger file by using an index. +Example: ./reader_example.py ../../examples/SedovBlast_3D/index.dump ../../examples/SedovBlast_3D/index_0005.hdf5 +""" +import sys +from h5py import File +import numpy as np +import matplotlib.pyplot as plt +sys.path.append("../.libs/") + +import liblogger as logger + +# Get filenames +if len(sys.argv) != 3: + print("WARNING missing arguments. Will use the default ones") + index = "../../examples/HydroTests/SedovBlast_3D/index_0002.hdf5" + dump = "../../examples/HydroTests/SedovBlast_3D/index.dump" +else: + index = sys.argv[-1] + dump = sys.argv[-2] + +# constant +offset_name = "PartType0/Offset" +header = "Header" +time_name = "Time Offset" + +# Read index file +with File(index, "r") as f: + if offset_name not in f: + raise Exception("Unable to find the offset dataset") + offset = f[offset_name][:] + + if header not in f: + raise Exception("Unable to find the header") + if time_name not in f[header].attrs: + raise Exception("Unable to find the time offset") + time_offset = f[header].attrs[time_name] + +# read dump +data = logger.loadFromIndex(offset, dump, time_offset) + +# Compute distance from center +pos = data["positions"] +center = pos.mean() +r2 = np.sum((pos - center)**2, axis=1) + +# plot entropy vs distance +plt.plot(np.sqrt(r2), data["entropy"], '.') + +plt.xlim(0., 0.5) +plt.ylim(-5, 50) +plt.xlabel("Radius") +plt.ylabel("Entropy") +plt.show() diff --git a/logger/tests/Makefile.am b/logger/tests/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..dd94462b8b98b0a089d0f959b81c603c29911a76 --- /dev/null +++ b/logger/tests/Makefile.am @@ -0,0 +1,37 @@ +# This file is part of SWIFT. +# Copyright (c) 2019 loic.hausammann@epfl.ch. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Add the source directory and the non-standard paths to the included library headers to CFLAGS +AM_CFLAGS = -I$(top_srcdir)/src -I$(top_srcdir)/logger $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) + +AM_LDFLAGS = ../../src/.libs/libswiftsim.a ../.libs/liblogger.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(GSL_LIBS) $(PROFILER_LIBS) + +# List of programs and scripts to run in the test suite +TESTS = testLogfileHeader testLogfileReader testTimeArray + +# List of test programs to compile +check_PROGRAMS = testLogfileHeader testLogfileReader testTimeArray + +# Rebuild tests when SWIFT is updated. +$(check_PROGRAMS): ../../src/.libs/libswiftsim.a ../.libs/liblogger.a + +# Sources for the individual programs +testLogfileHeader_SOURCES = testLogfileHeader.c +testLogfileReader_SOURCES = testLogfileReader.c +testTimeArray_SOURCES = testTimeArray.c + +# Files necessary for distribution +EXTRA_DIST = testLogfileHeader.yml testLogfileReader.yml diff --git a/logger/tests/testLogfileHeader.c b/logger/tests/testLogfileHeader.c new file mode 100644 index 0000000000000000000000000000000000000000..0f2c8a5df7942d50cbb641b99e3173a05fe1d539 --- /dev/null +++ b/logger/tests/testLogfileHeader.c @@ -0,0 +1,95 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include "logger_header.h" +#include "logger_logfile.h" +#include "logger_reader.h" +#include "swift.h" + +int main(int argc, char *argv[]) { + + /* + First generate the file. + */ + + message("Generating the dump."); + /* Create required structures. */ + struct logger_writer log; + struct swift_params params; + char filename[200] = "testLogfileHeader.yml"; + + /* Read parameters. */ + parser_read_file(filename, ¶ms); + + /* Initialize the logger. */ + logger_init(&log, ¶ms); + + /* get dump filename. */ + char dump_filename[PARSER_MAX_LINE_SIZE]; + strcpy(dump_filename, log.base_name); + strcat(dump_filename, ".dump"); + + /* Write file header. */ + logger_write_file_header(&log); + + /* clean memory. */ + logger_free(&log); + /* + Then read the file. + */ + + message("Reading the header."); + /* Generate required structure for reading. */ + struct logger_reader reader; + struct logger_logfile *logfile = &reader.log; + logfile->reader = &reader; + + /* Set verbose level. */ + reader.verbose = 1; + + /* Read the header */ + logger_logfile_init_from_file(logfile, dump_filename, &reader, + /* only_header */ 1); + /* + Finally check everything. + */ + + struct header *h = &logfile->header; + message("Checking versions."); + assert(h->major_version == logger_major_version); + assert(h->minor_version == logger_minor_version); + + message("Checking offset of first record"); + assert(h->offset_first_record == logfile->log.file_size); + + message("Checking number of masks"); + assert(h->number_mask == logger_count_mask); + + message("Checking masks"); + for (int i = 0; i < logger_count_mask; i++) { + assert(logger_mask_data[i].size == h->masks[i].size); + assert(logger_mask_data[i].mask == h->masks[i].mask); + assert(strcmp(logger_mask_data[i].name, h->masks[i].name) == 0); + } + + message("Checking offset direction"); + assert(h->offset_direction == logger_offset_backward); + + return 0; +} diff --git a/logger/tests/testLogfileHeader.yml b/logger/tests/testLogfileHeader.yml new file mode 100644 index 0000000000000000000000000000000000000000..b97c513fa9ee1c3d9816b54afed38f4124dc3957 --- /dev/null +++ b/logger/tests/testLogfileHeader.yml @@ -0,0 +1,6 @@ +# Parameter file for the tests +Logger: + delta_step: 10 + initial_buffer_size: 0.1 # in GB + buffer_scale: 10 + basename: test_header \ No newline at end of file diff --git a/logger/tests/testLogfileReader.c b/logger/tests/testLogfileReader.c new file mode 100644 index 0000000000000000000000000000000000000000..751c6b7d628fcd1191e8deba9135cddd8cd04bf8 --- /dev/null +++ b/logger/tests/testLogfileReader.c @@ -0,0 +1,311 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2019 Loic Hausammann (loic.hausammann@epfl.ch). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include "logger_header.h" +#include "logger_loader_io.h" +#include "logger_particle.h" +#include "logger_reader.h" +#include "swift.h" + +#define number_parts 100 +/* Not all the fields are written at every step. + * Here we define how often a few fields are written. + */ +#define period_rho 2 +#define period_h 4 + +/** + * @brief Initialize the particles. + * + * @param p The array of #part. + * @param xp The array of #xpart. + */ +void init_particles(struct part *p, struct xpart *xp) { + struct hydro_space hs; + + for (int i = 0; i < number_parts; i++) { + /* Set internal energy. */ + hydro_set_init_internal_energy(&p[i], 100); + + /* Initialize particle. */ + hydro_first_init_part(&p[i], &xp[i]); + hydro_init_part(&p[i], &hs); + + for (int j = 0; j < 3; j++) { + p[i].x[j] = i; + p[i].v[j] = (j == 0) ? -1 : 0; + p[i].a_hydro[j] = (j == 1) ? 1e-2 : 0; + } + p[i].h = 15; + p[i].rho = 50; + p[i].id = i; + hydro_set_mass(&p[i], 1.5); + xp[i].logger_data.last_offset = 0; + + /* Add time bin in order to skip particles. */ + p[i].time_bin = (i % 10) + 1; + } +} + +/** Provides a integer time given the step number.*/ +integertime_t get_integer_time(int step) { return step; } + +/** Provides a double time given the step number. */ +double get_double_time(int step) { + const double time_base = 1e-4; + return step * time_base; +} + +/** + * @brief Write a few particles during multiple time steps. + * + * As only the logger is tested, there is no need to really + * evolve the particles. + */ +void write_particles(struct logger_writer *log, struct part *parts, + struct xpart *xparts) { + + const int number_steps = 100; + + /* Loop over all the steps. */ + for (int i = 0; i < number_steps; i++) { + integertime_t ti_int = get_integer_time(i); + double ti_double = get_double_time(i); + + /* Mark the current time step in the particle logger file. */ + logger_log_timestamp(log, ti_int, ti_double, &log->timestamp_offset); + /* Make sure that we have enough space in the particle logger file + * to store the particles in current time step. */ + logger_ensure_size(log, number_parts, /* number gpart */ 0, 0); + + /* Loop over all the particles. */ + for (int j = 0; j < number_parts; j++) { + + /* Skip some particles. */ + if (i % parts[j].time_bin != 0) continue; + + /* Write a time information to check that the correct particle is read. */ + parts[j].x[0] = i; + + /* Write this particle. */ + unsigned int mask = + logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask | + logger_mask_data[logger_a].mask | logger_mask_data[logger_u].mask | + logger_mask_data[logger_consts].mask; + + int number_particle_step = i / parts[j].time_bin; + + if (number_particle_step % period_h == 0) + mask |= logger_mask_data[logger_h].mask; + if (number_particle_step % period_rho == 0) + mask |= logger_mask_data[logger_rho].mask; + + logger_log_part(log, &parts[j], mask, &xparts[j].logger_data.last_offset); + } + + // TODO write index files. + } + + /* Mark the current time step in the particle logger file. */ + integertime_t ti_int = get_integer_time(number_steps); + double ti_double = get_double_time(number_steps); + logger_log_timestamp(log, ti_int, ti_double, &log->timestamp_offset); +} + +/** Count the number of active particles. */ +int get_number_active_particles(int step, struct part *p) { + int count = 0; + for (int i = 0; i < number_parts; i++) { + if (step % p[i].time_bin == 0) count += 1; + } + return count; +} +/** + * @brief Check that the reader contains the correct data + * + * @param reader The #logger_reader. + */ +void check_data(struct logger_reader *reader, struct part *parts, + struct xpart *xparts) { + + /* No need to check the header, this is already done in testHeader.c */ + + /* Get required structures. */ + struct logger_logfile *logfile = &reader->log; + + struct logger_particle lp; + logger_particle_init(&lp); + + /* Define a few variables */ + double time = get_double_time(0); + int is_particle = 0; + int step = -1; + + /* Number of particle found during this time step. */ + int count = 0; + /* Set it to an impossible value in order to flag it. */ + const size_t id_flag = 5 * number_parts; + size_t previous_id = id_flag; + + /* Loop over each record. */ + for (size_t offset = reader_read_record(reader, &lp, &time, &is_particle, + logfile->header.offset_first_record); + offset < logfile->log.file_size; + offset = reader_read_record(reader, &lp, &time, &is_particle, offset)) { + + /* Do the particle case */ + if (is_particle) { + count += 1; + + /* + Check that we are really increasing the id in the logfile. + See the writing part to see that we are always increasing the id. + */ + if (previous_id != id_flag && previous_id >= lp.id) { + error("Wrong particle found"); + previous_id = lp.id; + } + + /* Get the corresponding particle */ + if (lp.id >= number_parts) error("Wrong id %zi", lp.id); + + struct part *p = &parts[lp.id]; + + /* Check the record's data. */ + for (int i = 0; i < 3; i++) { + /* in the first index, we are storing the step information. */ + if (i == 0) + assert(step == lp.pos[i]); + else + assert(p->x[i] == lp.pos[i]); + assert(p->v[i] == lp.vel[i]); + assert(p->a_hydro[i] == lp.acc[i]); + } + + assert(p->entropy == lp.entropy); + assert(p->mass == lp.mass); + + /* Check optional fields. */ + int number_steps = step / p->time_bin; + if (number_steps % period_h == 0) { + assert(p->h == lp.h); + } else { + assert(-1 == lp.h); + } + if (number_steps % period_rho == 0) { + assert(p->rho == lp.density); + } else { + assert(-1 == lp.density); + } + } + /* Time stamp case. */ + else { + + /* Check if we have the current amount of particles in previous step. */ + if (step != -1 && count != get_number_active_particles(step, parts)) + error( + "The reader did not find the correct number of particles during " + "step %i", + step); + + step += 1; + + /* Reset some variables. */ + previous_id = id_flag; + count = 0; + + /* Check the record's data. */ + assert(time == get_double_time(step)); + } + } +} + +int main(int argc, char *argv[]) { + + /* + First generate the file. + */ + + message("Generating the dump."); + + /* Create required structures. */ + struct logger_writer log; + struct swift_params params; + char filename[200] = "testLogfileReader.yml"; + + /* Read parameters. */ + parser_read_file(filename, ¶ms); + + /* Initialize the particles. */ + struct part *parts; + if ((parts = (struct part *)malloc(sizeof(struct part) * number_parts)) == + NULL) + error("Failed to allocate particles array."); + + struct xpart *xparts; + if ((xparts = (struct xpart *)malloc(sizeof(struct xpart) * number_parts)) == + NULL) + error("Failed to allocate xparticles array."); + + init_particles(parts, xparts); + + /* Initialize the logger. */ + logger_init(&log, ¶ms); + + /* get dump filename. */ + char dump_filename[PARSER_MAX_LINE_SIZE]; + message("%s", log.base_name); + strcpy(dump_filename, log.base_name); + strcat(dump_filename, ".dump"); + + /* Write file header. */ + logger_write_file_header(&log); + + /* Write particles. */ + write_particles(&log, parts, xparts); + + /* clean memory */ + logger_free(&log); + /* + Then read the file. + */ + + message("Reading the header."); + + /* Generate required structure for reading. */ + struct logger_reader reader; + + /* Set verbose level. */ + reader.verbose = 1; + + /* Read the header. */ + logger_reader_init(&reader, dump_filename, /* verbose */ 1); + + /* + Finally check everything. + */ + + check_data(&reader, parts, xparts); + + /* Do some cleanup. */ + free(parts); + free(xparts); + + return 0; +} diff --git a/logger/tests/testLogfileReader.yml b/logger/tests/testLogfileReader.yml new file mode 100644 index 0000000000000000000000000000000000000000..1ac5e2da909f1fe53cba052bbd24c5c3ce98dfed --- /dev/null +++ b/logger/tests/testLogfileReader.yml @@ -0,0 +1,6 @@ +# Parameter file for the tests +Logger: + delta_step: 10 + initial_buffer_size: 0.01 # in GB + buffer_scale: 10 + basename: test_reader \ No newline at end of file diff --git a/logger/tests/testTimeArray.c b/logger/tests/testTimeArray.c new file mode 100644 index 0000000000000000000000000000000000000000..929a7124baa8ab05fd3452f87076d95c88c2f3b2 --- /dev/null +++ b/logger/tests/testTimeArray.c @@ -0,0 +1,78 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <stdlib.h> +#include <time.h> +#include "logger_time.h" + +#define NUMBER_OF_ELEMENT 10000 +#define TIME_BASE 0.04 +#define OFFSET_BASE 1000 + +int main(int argc, char *argv[]) { + + /* Check that we are really testing the reallocation */ + if (NUMBER_OF_ELEMENT < LOGGER_TIME_INIT_SIZE) { + error("Not testing the reallocation."); + } + + /* Fix the random seed in order to reproduce the results */ + srand(100); + + /* Initialize the time array */ + struct time_array times; + time_array_init(×); + + /* Add elements */ + for (size_t i = 0; i < NUMBER_OF_ELEMENT; i++) { + integertime_t int_time = i; + double time = i * TIME_BASE; + size_t offset = i * OFFSET_BASE; + + time_array_append(×, int_time, time, offset); + } + + /* Check the elements */ + for (size_t i = 0; i < NUMBER_OF_ELEMENT; i++) { + integertime_t int_time = i; + double time = i * TIME_BASE; + size_t offset = i * OFFSET_BASE; + + /* Ensure that we can get the correct offset when looking + in between the records. */ + int r = rand() % OFFSET_BASE; + size_t read_offset = offset + r; + + /* The offset cannot be larger than the largest one */ + if (i == NUMBER_OF_ELEMENT - 1) { + read_offset = offset; + } + + /* Get the index from the offset */ + size_t ind = time_array_get_index(×, read_offset); + + /* Check the values obtained */ + assert(i == ind); + assert(int_time == times.records[ind].int_time); + assert(time == times.records[ind].time); + assert(offset == times.records[ind].offset); + } + + return 0; +} diff --git a/src/Makefile.am b/src/Makefile.am index 947fd1a82c487a514c7f1556fa0c1b469a408d8c..665aa4b24c94162fb8f772edd346f3c95a1d7ddb 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -44,7 +44,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ common_io.h single_io.h multipole.h map.h tools.h partition.h partition_fixed_costs.h \ clocks.h parser.h physical_constants.h physical_constants_cgs.h potential.h version.h \ hydro_properties.h riemann.h threadpool.h cooling_io.h cooling.h cooling_struct.h \ - statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h entropy_floor.h \ + statistics.h memswap.h cache.h runner_doiact_hydro_vec.h profiler.h entropy_floor.h \ dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \ gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h \ chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \ @@ -69,13 +69,18 @@ EAGLE_FEEDBACK_SOURCES += feedback/EAGLE/feedback.c endif # Common source files -AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c \ - engine_marktasks.c engine_drift.c serial_io.c timers.c debug.c scheduler.c \ +AM_SOURCES = space.c runner_main.c runner_doiact_hydro.c runner_doiact_grav.c \ + runner_doiact_stars.c runner_doiact_black_holes.c runner_ghost.c runner_recv.c \ + runner_sort.c runner_drift.c runner_black_holes.c runner_time_integration.c \ + runner_doiact_hydro_vec.c runner_others.c\ + queue.c task.c cell.c engine.c engine_maketasks.c \ + engine_marktasks.c engine_drift.c engine_unskip.c engine_collect_end_of_step.c \ + engine_redistribute.c engine_fof.c serial_io.c timers.c debug.c scheduler.c \ proxy.c parallel_io.c units.c common_io.c single_io.c multipole.c version.c map.c \ kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \ physical_constants.c potential.c hydro_properties.c \ threadpool.c cooling.c star_formation.c \ - statistics.c runner_doiact_vec.c profiler.c dump.c logger.c \ + statistics.c profiler.c dump.c logger.c \ part_type.c xmf.c gravity_properties.c gravity.c \ collectgroup.c hydro_space.c equation_of_state.c \ chemistry.c cosmology.c restart.c mesh_gravity.c velociraptor_interface.c \ @@ -85,8 +90,10 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ - gravity_iact.h kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h \ - runner_doiact_nosort.h runner_doiact_stars.h runner_doiact_black_holes.h units.h intrinsics.h minmax.h \ + gravity_iact.h kernel_long_gravity.h vector.h cache.h \ + runner_doiact_nosort.h runner_doiact_hydro.h runner_doiact_stars.h runner_doiact_black_holes.h runner_doiact_grav.h \ + runner_doiact_functions_hydro.h runner_doiact_functions_stars.h runner_doiact_functions_black_holes.h \ + units.h intrinsics.h minmax.h \ kick.h timestep.h drift.h adiabatic_index.h io_properties.h dimension.h part_type.h periodic.h memswap.h \ dump.h logger.h sign.h logger_io.h timestep_limiter.h hashmap.h \ gravity.h gravity_io.h gravity_cache.h \ diff --git a/src/cell.c b/src/cell.c index 4b9746e92e31f2a5ae6c4b5d01ade8d83939e412..92f53d7ca8499457248baa6b1bd0fb86380e4159 100644 --- a/src/cell.c +++ b/src/cell.c @@ -2443,7 +2443,8 @@ void cell_activate_star_resort_tasks(struct cell *c, struct scheduler *s) { /* The resort tasks are at either the chosen depth or the super level, * whichever comes first. */ - if (c->depth == engine_star_resort_task_depth || c->hydro.super == c) { + if ((c->depth == engine_star_resort_task_depth || c->hydro.super == c) && + c->hydro.count > 0) { scheduler_activate(s, c->hydro.stars_resort); } else { for (int k = 0; k < 8; ++k) { @@ -2478,6 +2479,50 @@ void cell_activate_star_formation_tasks(struct cell *c, struct scheduler *s) { cell_activate_star_resort_tasks(c, s); } +/** + * @brief Recursively activate the hydro ghosts (and implicit links) in a cell + * hierarchy. + * + * @param c The #cell. + * @param s The #scheduler. + * @param e The #engine. + */ +void cell_recursively_activate_hydro_ghosts(struct cell *c, struct scheduler *s, + const struct engine *e) { + /* Early abort? */ + if ((c->hydro.count == 0) || !cell_is_active_hydro(c, e)) return; + + /* Is the ghost at this level? */ + if (c->hydro.ghost != NULL) { + scheduler_activate(s, c->hydro.ghost); + } else { + +#ifdef SWIFT_DEBUG_CHECKS + if (!c->split) + error("Reached the leaf level without finding a hydro ghost!"); +#endif + + /* Keep recursing */ + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + cell_recursively_activate_hydro_ghosts(c->progeny[k], s, e); + } +} + +/** + * @brief Activate the hydro ghosts (and implicit links) in a cell hierarchy. + * + * @param c The #cell. + * @param s The #scheduler. + * @param e The #engine. + */ +void cell_activate_hydro_ghosts(struct cell *c, struct scheduler *s, + const struct engine *e) { + scheduler_activate(s, c->hydro.ghost_in); + scheduler_activate(s, c->hydro.ghost_out); + cell_recursively_activate_hydro_ghosts(c, s, e); +} + /** * @brief Recurse down in a cell hierarchy until the hydro.super level is * reached and activate the spart drift at that level. @@ -2486,6 +2531,10 @@ void cell_activate_star_formation_tasks(struct cell *c, struct scheduler *s) { * @param s The #scheduler. */ void cell_activate_super_spart_drifts(struct cell *c, struct scheduler *s) { + + /* Early abort? */ + if (c->hydro.count == 0) return; + if (c == c->hydro.super) { cell_activate_drift_spart(c, s); } else { @@ -3500,9 +3549,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { if (c->hydro.extra_ghost != NULL) scheduler_activate(s, c->hydro.extra_ghost); - if (c->hydro.ghost_in != NULL) scheduler_activate(s, c->hydro.ghost_in); - if (c->hydro.ghost_out != NULL) scheduler_activate(s, c->hydro.ghost_out); - if (c->hydro.ghost != NULL) scheduler_activate(s, c->hydro.ghost); + if (c->hydro.ghost_in != NULL) cell_activate_hydro_ghosts(c, s, e); if (c->kick1 != NULL) scheduler_activate(s, c->kick1); if (c->kick2 != NULL) scheduler_activate(s, c->kick2); if (c->timestep != NULL) scheduler_activate(s, c->timestep); diff --git a/src/cell.h b/src/cell.h index 8067a3189818ab8738de848ea698fbf25c78ebba..10a3e2bddfebd907b8efaffa472f94c421e4966c 100644 --- a/src/cell.h +++ b/src/cell.h @@ -273,8 +273,10 @@ struct pcell_sf { } stars; }; -/** Bitmasks for the cell flags. Beware when adding flags that you don't exceed - the size of the flags variable in the struct cell. */ +/** + * @brief Bitmasks for the cell flags. Beware when adding flags that you don't + * exceed the size of the flags variable in the struct cell. + */ enum cell_flags { cell_flag_split = (1UL << 0), cell_flag_do_hydro_drift = (1UL << 1), @@ -289,7 +291,8 @@ enum cell_flags { cell_flag_do_stars_sub_drift = (1UL << 10), cell_flag_do_bh_drift = (1UL << 11), cell_flag_do_bh_sub_drift = (1UL << 12), - cell_flag_do_stars_resort = (1UL << 13) + cell_flag_do_stars_resort = (1UL << 13), + cell_flag_has_tasks = (1UL << 14), }; /** diff --git a/src/cooling/EAGLE/cooling_tables.c b/src/cooling/EAGLE/cooling_tables.c index 4261e9ac0a6fee9f77c03afe22b7a9b66ade487d..1de3265df6298eeb955758e272c7e17afb64de00 100644 --- a/src/cooling/EAGLE/cooling_tables.c +++ b/src/cooling/EAGLE/cooling_tables.c @@ -293,7 +293,10 @@ void read_cooling_header(const char *fname, cooling->nH[i] = log10(cooling->nH[i]); } - /* Compute inverse of solar mass fractions */ + /* Compute inverse of solar mass fractions */ +#if defined(__ICC) +#pragma novector +#endif for (int i = 0; i < N_SolarAbundances; ++i) { cooling->SolarAbundances_inv[i] = 1.f / cooling->SolarAbundances[i]; } diff --git a/src/engine.c b/src/engine.c index 6784e8b271353eae2a238e5c7a90a9aeb9fc06db..1b5a409cdf0488a545448c8bbcec562d04a748bc 100644 --- a/src/engine.c +++ b/src/engine.c @@ -44,11 +44,6 @@ #include <numa.h> #endif -/* Load the profiler header, if needed. */ -#ifdef WITH_PROFILER -#include <gperftools/profiler.h> -#endif - /* This object's header. */ #include "engine.h" @@ -72,7 +67,6 @@ #include "logger.h" #include "logger_io.h" #include "map.h" -#include "memswap.h" #include "memuse.h" #include "minmax.h" #include "outputlist.h" @@ -133,22 +127,6 @@ int engine_current_step; extern int engine_max_parts_per_ghost; extern int engine_max_sparts_per_ghost; -/** - * @brief Data collected from the cells at the end of a time-step - */ -struct end_of_step_data { - - size_t updated, g_updated, s_updated, b_updated; - size_t inhibited, g_inhibited, s_inhibited, b_inhibited; - integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; - integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; - integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max; - integertime_t ti_black_holes_end_min, ti_black_holes_end_max, - ti_black_holes_beg_max; - struct engine *e; - struct star_formation_history sfh; -}; - /** * @brief Link a density/force task to a cell. * @@ -2777,544 +2755,6 @@ void engine_barrier(struct engine *e) { swift_barrier_wait(&e->run_barrier); } -/** - * @brief Recursive function gathering end-of-step data. - * - * We recurse until we encounter a timestep or time-step MPI recv task - * as the values will have been set at that level. We then bring these - * values upwards. - * - * @param c The #cell to recurse into. - * @param e The #engine. - */ -void engine_collect_end_of_step_recurse_hydro(struct cell *c, - const struct engine *e) { - - /* Skip super-cells (Their values are already set) */ - if (c->timestep != NULL) return; -#ifdef WITH_MPI - if (cell_get_recv(c, task_subtype_tend_part) != NULL) return; -#endif /* WITH_MPI */ - -#ifdef SWIFT_DEBUG_CHECKS - /* if (!c->split) error("Reached a leaf without finding a time-step task! - * c->depth=%d c->maxdepth=%d c->count=%d c->node=%d", */ - /* c->depth, c->maxdepth, c->hydro.count, c->nodeID); */ -#endif - - /* Counters for the different quantities. */ - size_t updated = 0; - integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, - ti_hydro_beg_max = 0; - - /* Local Star formation history properties */ - struct star_formation_history sfh_updated; - - /* Initialize the star formation structs */ - star_formation_logger_init(&sfh_updated); - - /* Collect the values from the progeny. */ - for (int k = 0; k < 8; k++) { - struct cell *cp = c->progeny[k]; - if (cp != NULL && cp->hydro.count > 0) { - - /* Recurse */ - engine_collect_end_of_step_recurse_hydro(cp, e); - - /* And update */ - ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min); - ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max); - ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max); - - updated += cp->hydro.updated; - - /* Check if the cell is inactive and in that case reorder the SFH */ - if (!cell_is_starting_hydro(cp, e)) { - star_formation_logger_log_inactive_cell(&cp->stars.sfh); - } - - /* Add the star formation history in this cell to sfh_updated */ - star_formation_logger_add(&sfh_updated, &cp->stars.sfh); - - /* Collected, so clear for next time. */ - cp->hydro.updated = 0; - } - } - - /* Store the collected values in the cell. */ - c->hydro.ti_end_min = ti_hydro_end_min; - c->hydro.ti_end_max = ti_hydro_end_max; - c->hydro.ti_beg_max = ti_hydro_beg_max; - c->hydro.updated = updated; - // c->hydro.inhibited = inhibited; - - /* Store the star formation history in the parent cell */ - star_formation_logger_add(&c->stars.sfh, &sfh_updated); -} - -/** - * @brief Recursive function gathering end-of-step data. - * - * We recurse until we encounter a timestep or time-step MPI recv task - * as the values will have been set at that level. We then bring these - * values upwards. - * - * @param c The #cell to recurse into. - * @param e The #engine. - */ -void engine_collect_end_of_step_recurse_grav(struct cell *c, - const struct engine *e) { - - /* Skip super-cells (Their values are already set) */ - if (c->timestep != NULL) return; -#ifdef WITH_MPI - if (cell_get_recv(c, task_subtype_tend_gpart) != NULL) return; -#endif /* WITH_MPI */ - -#ifdef SWIFT_DEBUG_CHECKS - // if (!c->split) error("Reached a leaf without finding a time-step - // task!"); -#endif - - /* Counters for the different quantities. */ - size_t updated = 0; - integertime_t ti_grav_end_min = max_nr_timesteps, ti_grav_end_max = 0, - ti_grav_beg_max = 0; - - /* Collect the values from the progeny. */ - for (int k = 0; k < 8; k++) { - struct cell *cp = c->progeny[k]; - if (cp != NULL && cp->grav.count > 0) { - - /* Recurse */ - engine_collect_end_of_step_recurse_grav(cp, e); - - /* And update */ - ti_grav_end_min = min(ti_grav_end_min, cp->grav.ti_end_min); - ti_grav_end_max = max(ti_grav_end_max, cp->grav.ti_end_max); - ti_grav_beg_max = max(ti_grav_beg_max, cp->grav.ti_beg_max); - - updated += cp->grav.updated; - - /* Collected, so clear for next time. */ - cp->grav.updated = 0; - } - } - - /* Store the collected values in the cell. */ - c->grav.ti_end_min = ti_grav_end_min; - c->grav.ti_end_max = ti_grav_end_max; - c->grav.ti_beg_max = ti_grav_beg_max; - c->grav.updated = updated; -} - -/** - * @brief Recursive function gathering end-of-step data. - * - * We recurse until we encounter a timestep or time-step MPI recv task - * as the values will have been set at that level. We then bring these - * values upwards. - * - * @param c The #cell to recurse into. - * @param e The #engine. - */ -void engine_collect_end_of_step_recurse_stars(struct cell *c, - const struct engine *e) { - - /* Skip super-cells (Their values are already set) */ - if (c->timestep != NULL) return; -#ifdef WITH_MPI - if (cell_get_recv(c, task_subtype_tend_spart) != NULL) return; -#endif /* WITH_MPI */ - -#ifdef SWIFT_DEBUG_CHECKS - // if (!c->split) error("Reached a leaf without finding a time-step task!"); -#endif - - /* Counters for the different quantities. */ - size_t updated = 0; - integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, - ti_stars_beg_max = 0; - - /* Collect the values from the progeny. */ - for (int k = 0; k < 8; k++) { - struct cell *cp = c->progeny[k]; - if (cp != NULL && cp->stars.count > 0) { - - /* Recurse */ - engine_collect_end_of_step_recurse_stars(cp, e); - - /* And update */ - ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); - ti_stars_end_max = max(ti_stars_end_max, cp->stars.ti_end_max); - ti_stars_beg_max = max(ti_stars_beg_max, cp->stars.ti_beg_max); - - updated += cp->stars.updated; - - /* Collected, so clear for next time. */ - cp->stars.updated = 0; - } - } - - /* Store the collected values in the cell. */ - c->stars.ti_end_min = ti_stars_end_min; - c->stars.ti_end_max = ti_stars_end_max; - c->stars.ti_beg_max = ti_stars_beg_max; - c->stars.updated = updated; -} - -/** - * @brief Recursive function gathering end-of-step data. - * - * We recurse until we encounter a timestep or time-step MPI recv task - * as the values will have been set at that level. We then bring these - * values upwards. - * - * @param c The #cell to recurse into. - * @param e The #engine. - */ -void engine_collect_end_of_step_recurse_black_holes(struct cell *c, - const struct engine *e) { - - /* Skip super-cells (Their values are already set) */ - if (c->timestep != NULL) return; -#ifdef WITH_MPI - if (cell_get_recv(c, task_subtype_tend_bpart) != NULL) return; -#endif /* WITH_MPI */ - -#ifdef SWIFT_DEBUG_CHECKS - // if (!c->split) error("Reached a leaf without finding a time-step task!"); -#endif - - /* Counters for the different quantities. */ - size_t updated = 0; - integertime_t ti_black_holes_end_min = max_nr_timesteps, - ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; - - /* Collect the values from the progeny. */ - for (int k = 0; k < 8; k++) { - struct cell *cp = c->progeny[k]; - if (cp != NULL && cp->black_holes.count > 0) { - - /* Recurse */ - engine_collect_end_of_step_recurse_black_holes(cp, e); - - /* And update */ - ti_black_holes_end_min = - min(ti_black_holes_end_min, cp->black_holes.ti_end_min); - ti_black_holes_end_max = - max(ti_black_holes_end_max, cp->black_holes.ti_end_max); - ti_black_holes_beg_max = - max(ti_black_holes_beg_max, cp->black_holes.ti_beg_max); - - updated += cp->black_holes.updated; - - /* Collected, so clear for next time. */ - cp->black_holes.updated = 0; - } - } - - /* Store the collected values in the cell. */ - c->black_holes.ti_end_min = ti_black_holes_end_min; - c->black_holes.ti_end_max = ti_black_holes_end_max; - c->black_holes.ti_beg_max = ti_black_holes_beg_max; - c->black_holes.updated = updated; -} - -/** - * @brief Mapping function to collect the data from the end of the step - * - * This function will call a recursive function on all the top-level cells - * to collect the information we are after. - * - * @param map_data The list of cells with tasks on this node. - * @param num_elements The number of elements in the list this thread will work - * on. - * @param extra_data The #engine. - */ -void engine_collect_end_of_step_mapper(void *map_data, int num_elements, - void *extra_data) { - - struct end_of_step_data *data = (struct end_of_step_data *)extra_data; - const struct engine *e = data->e; - const int with_hydro = (e->policy & engine_policy_hydro); - const int with_self_grav = (e->policy & engine_policy_self_gravity); - const int with_ext_grav = (e->policy & engine_policy_external_gravity); - const int with_grav = (with_self_grav || with_ext_grav); - const int with_stars = (e->policy & engine_policy_stars); - const int with_black_holes = (e->policy & engine_policy_black_holes); - struct space *s = e->s; - int *local_cells = (int *)map_data; - struct star_formation_history *sfh_top = &data->sfh; - - /* Local collectible */ - size_t updated = 0, g_updated = 0, s_updated = 0, b_updated = 0; - integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, - ti_hydro_beg_max = 0; - integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, - ti_gravity_beg_max = 0; - integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, - ti_stars_beg_max = 0; - integertime_t ti_black_holes_end_min = max_nr_timesteps, - ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; - - /* Local Star formation history properties */ - struct star_formation_history sfh_updated; - - /* Initialize the star formation structs for this engine to zero */ - star_formation_logger_init(&sfh_updated); - - for (int ind = 0; ind < num_elements; ind++) { - struct cell *c = &s->cells_top[local_cells[ind]]; - - if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0 || - c->black_holes.count > 0) { - - /* Make the top-cells recurse */ - if (with_hydro) { - engine_collect_end_of_step_recurse_hydro(c, e); - } - if (with_grav) { - engine_collect_end_of_step_recurse_grav(c, e); - } - if (with_stars) { - engine_collect_end_of_step_recurse_stars(c, e); - } - if (with_black_holes) { - engine_collect_end_of_step_recurse_black_holes(c, e); - } - - /* And aggregate */ - if (c->hydro.ti_end_min > e->ti_current) - ti_hydro_end_min = min(ti_hydro_end_min, c->hydro.ti_end_min); - ti_hydro_end_max = max(ti_hydro_end_max, c->hydro.ti_end_max); - ti_hydro_beg_max = max(ti_hydro_beg_max, c->hydro.ti_beg_max); - - if (c->grav.ti_end_min > e->ti_current) - ti_gravity_end_min = min(ti_gravity_end_min, c->grav.ti_end_min); - ti_gravity_end_max = max(ti_gravity_end_max, c->grav.ti_end_max); - ti_gravity_beg_max = max(ti_gravity_beg_max, c->grav.ti_beg_max); - - if (c->stars.ti_end_min > e->ti_current) - ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min); - ti_stars_end_max = max(ti_stars_end_max, c->stars.ti_end_max); - ti_stars_beg_max = max(ti_stars_beg_max, c->stars.ti_beg_max); - - if (c->black_holes.ti_end_min > e->ti_current) - ti_black_holes_end_min = - min(ti_black_holes_end_min, c->black_holes.ti_end_min); - ti_black_holes_end_max = - max(ti_black_holes_end_max, c->black_holes.ti_end_max); - ti_black_holes_beg_max = - max(ti_black_holes_beg_max, c->black_holes.ti_beg_max); - - updated += c->hydro.updated; - g_updated += c->grav.updated; - s_updated += c->stars.updated; - b_updated += c->black_holes.updated; - - /* Check if the cell is inactive and in that case reorder the SFH */ - if (!cell_is_starting_hydro(c, e)) { - star_formation_logger_log_inactive_cell(&c->stars.sfh); - } - - /* Get the star formation history from the current cell and store it in - * the star formation history struct */ - star_formation_logger_add(&sfh_updated, &c->stars.sfh); - - /* Collected, so clear for next time. */ - c->hydro.updated = 0; - c->grav.updated = 0; - c->stars.updated = 0; - c->black_holes.updated = 0; - } - } - - /* Let's write back to the global data. - * We use the space lock to garanty single access*/ - if (lock_lock(&s->lock) == 0) { - data->updated += updated; - data->g_updated += g_updated; - data->s_updated += s_updated; - data->b_updated += b_updated; - - /* Add the SFH information from this engine to the global data */ - star_formation_logger_add(sfh_top, &sfh_updated); - - if (ti_hydro_end_min > e->ti_current) - data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min); - data->ti_hydro_end_max = max(ti_hydro_end_max, data->ti_hydro_end_max); - data->ti_hydro_beg_max = max(ti_hydro_beg_max, data->ti_hydro_beg_max); - - if (ti_gravity_end_min > e->ti_current) - data->ti_gravity_end_min = - min(ti_gravity_end_min, data->ti_gravity_end_min); - data->ti_gravity_end_max = - max(ti_gravity_end_max, data->ti_gravity_end_max); - data->ti_gravity_beg_max = - max(ti_gravity_beg_max, data->ti_gravity_beg_max); - - if (ti_stars_end_min > e->ti_current) - data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min); - data->ti_stars_end_max = max(ti_stars_end_max, data->ti_stars_end_max); - data->ti_stars_beg_max = max(ti_stars_beg_max, data->ti_stars_beg_max); - - if (ti_black_holes_end_min > e->ti_current) - data->ti_black_holes_end_min = - min(ti_black_holes_end_min, data->ti_black_holes_end_min); - data->ti_black_holes_end_max = - max(ti_black_holes_end_max, data->ti_black_holes_end_max); - data->ti_black_holes_beg_max = - max(ti_black_holes_beg_max, data->ti_black_holes_beg_max); - } - - if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space"); -} - -/** - * @brief Collects the next time-step and rebuild flag. - * - * The next time-step is determined by making each super-cell recurse to - * collect the minimal of ti_end and the number of updated particles. When in - * MPI mode this routines reduces these across all nodes and also collects the - * forcerebuild flag -- this is so that we only use a single collective MPI - * call per step for all these values. - * - * Note that the results are stored in e->collect_group1 struct not in the - * engine fields, unless apply is true. These can be applied field-by-field - * or all at once using collectgroup1_copy(); - * - * @param e The #engine. - * @param apply whether to apply the results to the engine or just keep in the - * group1 struct. - */ -void engine_collect_end_of_step(struct engine *e, int apply) { - - const ticks tic = getticks(); - struct space *s = e->s; - struct end_of_step_data data; - data.updated = 0, data.g_updated = 0, data.s_updated = 0, data.b_updated = 0; - data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0, - data.ti_hydro_beg_max = 0; - data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0, - data.ti_gravity_beg_max = 0; - data.ti_stars_end_min = max_nr_timesteps, data.ti_stars_end_max = 0, - data.ti_stars_beg_max = 0; - data.ti_black_holes_end_min = max_nr_timesteps, - data.ti_black_holes_end_max = 0, data.ti_black_holes_beg_max = 0; - data.e = e; - - /* Initialize the total SFH of the simulation to zero */ - star_formation_logger_init(&data.sfh); - - /* Collect information from the local top-level cells */ - threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper, - s->local_cells_with_tasks_top, s->nr_local_cells_with_tasks, - sizeof(int), 0, &data); - - /* Get the number of inhibited particles from the space-wide counters - * since these have been updated atomically during the time-steps. */ - data.inhibited = s->nr_inhibited_parts; - data.g_inhibited = s->nr_inhibited_gparts; - data.s_inhibited = s->nr_inhibited_sparts; - data.b_inhibited = s->nr_inhibited_bparts; - - /* Store these in the temporary collection group. */ - collectgroup1_init( - &e->collect_group1, data.updated, data.g_updated, data.s_updated, - data.b_updated, data.inhibited, data.g_inhibited, data.s_inhibited, - data.b_inhibited, data.ti_hydro_end_min, data.ti_hydro_end_max, - data.ti_hydro_beg_max, data.ti_gravity_end_min, data.ti_gravity_end_max, - data.ti_gravity_beg_max, data.ti_stars_end_min, data.ti_stars_end_max, - data.ti_stars_beg_max, data.ti_black_holes_end_min, - data.ti_black_holes_end_max, data.ti_black_holes_beg_max, e->forcerebuild, - e->s->tot_cells, e->sched.nr_tasks, - (float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh); - -/* Aggregate collective data from the different nodes for this step. */ -#ifdef WITH_MPI - collectgroup1_reduce(&e->collect_group1); - -#ifdef SWIFT_DEBUG_CHECKS - { - /* Check the above using the original MPI calls. */ - integertime_t in_i[2], out_i[2]; - in_i[0] = 0; - in_i[1] = 0; - out_i[0] = data.ti_hydro_end_min; - out_i[1] = data.ti_gravity_end_min; - if (MPI_Allreduce(out_i, in_i, 2, MPI_LONG_LONG_INT, MPI_MIN, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate ti_end_min."); - if (in_i[0] != (long long)e->collect_group1.ti_hydro_end_min) - error("Failed to get same ti_hydro_end_min, is %lld, should be %lld", - in_i[0], e->collect_group1.ti_hydro_end_min); - if (in_i[1] != (long long)e->collect_group1.ti_gravity_end_min) - error("Failed to get same ti_gravity_end_min, is %lld, should be %lld", - in_i[1], e->collect_group1.ti_gravity_end_min); - - long long in_ll[4], out_ll[4]; - out_ll[0] = data.updated; - out_ll[1] = data.g_updated; - out_ll[2] = data.s_updated; - out_ll[3] = data.b_updated; - if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate particle counts."); - if (in_ll[0] != (long long)e->collect_group1.updated) - error("Failed to get same updated, is %lld, should be %lld", in_ll[0], - e->collect_group1.updated); - if (in_ll[1] != (long long)e->collect_group1.g_updated) - error("Failed to get same g_updated, is %lld, should be %lld", in_ll[1], - e->collect_group1.g_updated); - if (in_ll[2] != (long long)e->collect_group1.s_updated) - error("Failed to get same s_updated, is %lld, should be %lld", in_ll[2], - e->collect_group1.s_updated); - if (in_ll[3] != (long long)e->collect_group1.b_updated) - error("Failed to get same b_updated, is %lld, should be %lld", in_ll[3], - e->collect_group1.b_updated); - - out_ll[0] = data.inhibited; - out_ll[1] = data.g_inhibited; - out_ll[2] = data.s_inhibited; - out_ll[3] = data.b_inhibited; - if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate particle counts."); - if (in_ll[0] != (long long)e->collect_group1.inhibited) - error("Failed to get same inhibited, is %lld, should be %lld", in_ll[0], - e->collect_group1.inhibited); - if (in_ll[1] != (long long)e->collect_group1.g_inhibited) - error("Failed to get same g_inhibited, is %lld, should be %lld", in_ll[1], - e->collect_group1.g_inhibited); - if (in_ll[2] != (long long)e->collect_group1.s_inhibited) - error("Failed to get same s_inhibited, is %lld, should be %lld", in_ll[2], - e->collect_group1.s_inhibited); - if (in_ll[3] != (long long)e->collect_group1.b_inhibited) - error("Failed to get same b_inhibited, is %lld, should be %lld", in_ll[3], - e->collect_group1.b_inhibited); - - int buff = 0; - if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate the rebuild flag across nodes."); - if (!!buff != !!e->collect_group1.forcerebuild) - error( - "Failed to get same rebuild flag from all nodes, is %d," - "should be %d", - buff, e->collect_group1.forcerebuild); - } -#endif -#endif - - /* Apply to the engine, if requested. */ - if (apply) collectgroup1_apply(&e->collect_group1, e); - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - /** * @brief Print the conserved quantities statistics to a log file * @@ -3642,7 +3082,6 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, #endif scheduler_write_dependencies(&e->sched, e->verbose); - space_write_cell_hierarchy(e->s); if (e->nodeID == 0) scheduler_write_task_level(&e->sched); /* Run the 0th time-step */ @@ -4217,64 +3656,6 @@ int engine_is_done(struct engine *e) { return !(e->ti_current < max_nr_timesteps); } -/** - * @brief Unskip all the tasks that act on active cells at this time. - * - * @param e The #engine. - */ -void engine_unskip(struct engine *e) { - - const ticks tic = getticks(); - struct space *s = e->s; - const int nodeID = e->nodeID; - - const int with_hydro = e->policy & engine_policy_hydro; - const int with_self_grav = e->policy & engine_policy_self_gravity; - const int with_ext_grav = e->policy & engine_policy_external_gravity; - const int with_stars = e->policy & engine_policy_stars; - const int with_feedback = e->policy & engine_policy_feedback; - const int with_black_holes = e->policy & engine_policy_black_holes; - -#ifdef WITH_PROFILER - static int count = 0; - char filename[100]; - sprintf(filename, "/tmp/swift_runner_do_usnkip_mapper_%06i.prof", count++); - ProfilerStart(filename); -#endif // WITH_PROFILER - - /* Move the active local cells to the top of the list. */ - int *local_cells = e->s->local_cells_with_tasks_top; - int num_active_cells = 0; - for (int k = 0; k < s->nr_local_cells_with_tasks; k++) { - struct cell *c = &s->cells_top[local_cells[k]]; - - if ((with_hydro && cell_is_active_hydro(c, e)) || - (with_self_grav && cell_is_active_gravity(c, e)) || - (with_ext_grav && c->nodeID == nodeID && - cell_is_active_gravity(c, e)) || - (with_feedback && cell_is_active_stars(c, e)) || - (with_stars && c->nodeID == nodeID && cell_is_active_stars(c, e)) || - (with_black_holes && cell_is_active_black_holes(c, e))) { - - if (num_active_cells != k) - memswap(&local_cells[k], &local_cells[num_active_cells], sizeof(int)); - num_active_cells += 1; - } - } - - /* Activate all the regular tasks */ - threadpool_map(&e->threadpool, runner_do_unskip_mapper, local_cells, - num_active_cells, sizeof(int), 1, e); - -#ifdef WITH_PROFILER - ProfilerStop(); -#endif // WITH_PROFILER - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - void engine_do_reconstruct_multipoles_mapper(void *map_data, int num_elements, void *extra_data) { @@ -4877,7 +4258,7 @@ void engine_dump_snapshot(struct engine *e) { */ void engine_dump_index(struct engine *e) { -#if defined(WITH_LOGGER) +#if defined(WITH_LOGGER) && !defined(WITH_MPI) struct clocks_time time1, time2; clocks_gettime(&time1); @@ -5094,7 +4475,7 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, e->total_nr_tasks = 0; #if defined(WITH_LOGGER) - e->logger = (struct logger *)malloc(sizeof(struct logger)); + e->logger = (struct logger_writer *)malloc(sizeof(struct logger_writer)); logger_init(e->logger, params); #endif @@ -5842,7 +5223,7 @@ void engine_config(int restart, int fof, struct engine *e, #ifdef WITH_LOGGER /* Write the particle logger header */ - logger_write_file_header(e->logger, e); + logger_write_file_header(e->logger); #endif /* Initialise the structure finder */ @@ -6360,7 +5741,7 @@ void engine_clean(struct engine *e, const int fof) { swift_free("links", e->links); #if defined(WITH_LOGGER) - logger_clean(e->logger); + logger_free(e->logger); free(e->logger); #endif scheduler_clean(&e->sched); @@ -6577,127 +5958,3 @@ void engine_struct_restore(struct engine *e, FILE *stream) { e->forcerebuild = 1; e->forcerepart = 0; } - -/** - * @brief Activate all the #gpart communications in preparation - * fof a call to FOF. - * - * @param e The #engine to act on. - */ -void engine_activate_gpart_comms(struct engine *e) { - -#ifdef WITH_MPI - - const ticks tic = getticks(); - - struct scheduler *s = &e->sched; - const int nr_tasks = s->nr_tasks; - struct task *tasks = s->tasks; - - for (int k = 0; k < nr_tasks; ++k) { - - struct task *t = &tasks[k]; - - if ((t->type == task_type_send) && (t->subtype == task_subtype_gpart)) { - scheduler_activate(s, t); - } else if ((t->type == task_type_recv) && - (t->subtype == task_subtype_gpart)) { - scheduler_activate(s, t); - } else { - t->skip = 1; - } - } - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); - -#else - error("Calling an MPI function in non-MPI mode."); -#endif -} - -/** - * @brief Activate all the FOF tasks. - * - * Marks all the other task types to be skipped. - * - * @param e The #engine to act on. - */ -void engine_activate_fof_tasks(struct engine *e) { - - const ticks tic = getticks(); - - struct scheduler *s = &e->sched; - const int nr_tasks = s->nr_tasks; - struct task *tasks = s->tasks; - - for (int k = 0; k < nr_tasks; k++) { - - struct task *t = &tasks[k]; - - if (t->type == task_type_fof_self || t->type == task_type_fof_pair) - scheduler_activate(s, t); - else - t->skip = 1; - } - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - -/** - * @brief Run a FOF search. - * - * @param e the engine - * @param dump_results Are we writing group catalogues to output files? - * @param seed_black_holes Are we seeding black holes? - */ -void engine_fof(struct engine *e, const int dump_results, - const int seed_black_holes) { - -#ifdef WITH_FOF - - ticks tic = getticks(); - - /* Compute number of DM particles */ - const long long total_nr_baryons = - e->total_nr_parts + e->total_nr_sparts + e->total_nr_bparts; - const long long total_nr_dmparts = - e->total_nr_gparts - e->total_nr_DM_background_gparts - total_nr_baryons; - - /* Initialise FOF parameters and allocate FOF arrays. */ - fof_allocate(e->s, total_nr_dmparts, e->fof_properties); - - /* Make FOF tasks */ - engine_make_fof_tasks(e); - - /* and activate them. */ - engine_activate_fof_tasks(e); - - /* Perform local FOF tasks. */ - engine_launch(e); - - /* Perform FOF search over foreign particles and - * find groups which require black hole seeding. */ - fof_search_tree(e->fof_properties, e->black_holes_properties, - e->physical_constants, e->cosmology, e->s, dump_results, - seed_black_holes); - - /* Reset flag. */ - e->run_fof = 0; - - /* Flag that a FOF has taken place */ - e->step_props |= engine_step_prop_fof; - - /* ... and find the next FOF time */ - if (seed_black_holes) engine_compute_next_fof_time(e); - - if (engine_rank == 0) - message("Complete FOF search took: %.3f %s.", - clocks_from_ticks(getticks() - tic), clocks_getunit()); -#else - error("SWIFT was not compiled with FOF enabled!"); -#endif -} diff --git a/src/engine.h b/src/engine.h index d7da2942dac7c03dba9d66ea1499e8a4fd2202d9..68a4df10c08325d5d810b361dab863bf9ee68ea6 100644 --- a/src/engine.h +++ b/src/engine.h @@ -383,7 +383,7 @@ struct engine { struct repartition *reparttype; #ifdef WITH_LOGGER - struct logger *logger; + struct logger_writer *logger; #endif /* How many steps have we done with the same set of tasks? */ @@ -494,6 +494,7 @@ void engine_reconstruct_multipoles(struct engine *e); void engine_allocate_foreign_particles(struct engine *e); void engine_print_stats(struct engine *e); void engine_check_for_dumps(struct engine *e); +void engine_collect_end_of_step(struct engine *e, int apply); void engine_dump_snapshot(struct engine *e); void engine_init_output_lists(struct engine *e, struct swift_params *params); void engine_init(struct engine *e, struct space *s, struct swift_params *params, diff --git a/src/engine_collect_end_of_step.c b/src/engine_collect_end_of_step.c new file mode 100644 index 0000000000000000000000000000000000000000..ec02acfefdf65aca13d44a7cf90d48f31b99778f --- /dev/null +++ b/src/engine_collect_end_of_step.c @@ -0,0 +1,584 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "engine.h" + +/* Local headers. */ +#include "active.h" +#include "timeline.h" + +/** + * @brief Data collected from the cells at the end of a time-step + */ +struct end_of_step_data { + + size_t updated, g_updated, s_updated, b_updated; + size_t inhibited, g_inhibited, s_inhibited, b_inhibited; + integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; + integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; + integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max; + integertime_t ti_black_holes_end_min, ti_black_holes_end_max, + ti_black_holes_beg_max; + struct engine *e; + struct star_formation_history sfh; +}; + +/** + * @brief Recursive function gathering end-of-step data. + * + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. + */ +void engine_collect_end_of_step_recurse_hydro(struct cell *c, + const struct engine *e) { + + /* Skip super-cells (Their values are already set) */ + if (c->timestep != NULL) return; +#ifdef WITH_MPI + if (cell_get_recv(c, task_subtype_tend_part) != NULL) return; +#endif /* WITH_MPI */ + +#ifdef SWIFT_DEBUG_CHECKS + /* if (!c->split) error("Reached a leaf without finding a time-step task! + * c->depth=%d c->maxdepth=%d c->count=%d c->node=%d", */ + /* c->depth, c->maxdepth, c->hydro.count, c->nodeID); */ +#endif + + /* Counters for the different quantities. */ + size_t updated = 0; + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + + /* Local Star formation history properties */ + struct star_formation_history sfh_updated; + + /* Initialize the star formation structs */ + star_formation_logger_init(&sfh_updated); + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && cp->hydro.count > 0) { + + /* Recurse */ + engine_collect_end_of_step_recurse_hydro(cp, e); + + /* And update */ + ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min); + ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max); + ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max); + + updated += cp->hydro.updated; + + /* Check if the cell is inactive and in that case reorder the SFH */ + if (!cell_is_starting_hydro(cp, e)) { + star_formation_logger_log_inactive_cell(&cp->stars.sfh); + } + + /* Add the star formation history in this cell to sfh_updated */ + star_formation_logger_add(&sfh_updated, &cp->stars.sfh); + + /* Collected, so clear for next time. */ + cp->hydro.updated = 0; + } + } + + /* Store the collected values in the cell. */ + c->hydro.ti_end_min = ti_hydro_end_min; + c->hydro.ti_end_max = ti_hydro_end_max; + c->hydro.ti_beg_max = ti_hydro_beg_max; + c->hydro.updated = updated; + // c->hydro.inhibited = inhibited; + + /* Store the star formation history in the parent cell */ + star_formation_logger_add(&c->stars.sfh, &sfh_updated); +} + +/** + * @brief Recursive function gathering end-of-step data. + * + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. + */ +void engine_collect_end_of_step_recurse_grav(struct cell *c, + const struct engine *e) { + + /* Skip super-cells (Their values are already set) */ + if (c->timestep != NULL) return; +#ifdef WITH_MPI + if (cell_get_recv(c, task_subtype_tend_gpart) != NULL) return; +#endif /* WITH_MPI */ + +#ifdef SWIFT_DEBUG_CHECKS + // if (!c->split) error("Reached a leaf without finding a time-step + // task!"); +#endif + + /* Counters for the different quantities. */ + size_t updated = 0; + integertime_t ti_grav_end_min = max_nr_timesteps, ti_grav_end_max = 0, + ti_grav_beg_max = 0; + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && cp->grav.count > 0) { + + /* Recurse */ + engine_collect_end_of_step_recurse_grav(cp, e); + + /* And update */ + ti_grav_end_min = min(ti_grav_end_min, cp->grav.ti_end_min); + ti_grav_end_max = max(ti_grav_end_max, cp->grav.ti_end_max); + ti_grav_beg_max = max(ti_grav_beg_max, cp->grav.ti_beg_max); + + updated += cp->grav.updated; + + /* Collected, so clear for next time. */ + cp->grav.updated = 0; + } + } + + /* Store the collected values in the cell. */ + c->grav.ti_end_min = ti_grav_end_min; + c->grav.ti_end_max = ti_grav_end_max; + c->grav.ti_beg_max = ti_grav_beg_max; + c->grav.updated = updated; +} + +/** + * @brief Recursive function gathering end-of-step data. + * + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. + */ +void engine_collect_end_of_step_recurse_stars(struct cell *c, + const struct engine *e) { + + /* Skip super-cells (Their values are already set) */ + if (c->timestep != NULL) return; +#ifdef WITH_MPI + if (cell_get_recv(c, task_subtype_tend_spart) != NULL) return; +#endif /* WITH_MPI */ + +#ifdef SWIFT_DEBUG_CHECKS + // if (!c->split) error("Reached a leaf without finding a time-step task!"); +#endif + + /* Counters for the different quantities. */ + size_t updated = 0; + integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, + ti_stars_beg_max = 0; + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && cp->stars.count > 0) { + + /* Recurse */ + engine_collect_end_of_step_recurse_stars(cp, e); + + /* And update */ + ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); + ti_stars_end_max = max(ti_stars_end_max, cp->stars.ti_end_max); + ti_stars_beg_max = max(ti_stars_beg_max, cp->stars.ti_beg_max); + + updated += cp->stars.updated; + + /* Collected, so clear for next time. */ + cp->stars.updated = 0; + } + } + + /* Store the collected values in the cell. */ + c->stars.ti_end_min = ti_stars_end_min; + c->stars.ti_end_max = ti_stars_end_max; + c->stars.ti_beg_max = ti_stars_beg_max; + c->stars.updated = updated; +} + +/** + * @brief Recursive function gathering end-of-step data. + * + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. + */ +void engine_collect_end_of_step_recurse_black_holes(struct cell *c, + const struct engine *e) { + + /* Skip super-cells (Their values are already set) */ + if (c->timestep != NULL) return; +#ifdef WITH_MPI + if (cell_get_recv(c, task_subtype_tend_bpart) != NULL) return; +#endif /* WITH_MPI */ + +#ifdef SWIFT_DEBUG_CHECKS + // if (!c->split) error("Reached a leaf without finding a time-step task!"); +#endif + + /* Counters for the different quantities. */ + size_t updated = 0; + integertime_t ti_black_holes_end_min = max_nr_timesteps, + ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && cp->black_holes.count > 0) { + + /* Recurse */ + engine_collect_end_of_step_recurse_black_holes(cp, e); + + /* And update */ + ti_black_holes_end_min = + min(ti_black_holes_end_min, cp->black_holes.ti_end_min); + ti_black_holes_end_max = + max(ti_black_holes_end_max, cp->black_holes.ti_end_max); + ti_black_holes_beg_max = + max(ti_black_holes_beg_max, cp->black_holes.ti_beg_max); + + updated += cp->black_holes.updated; + + /* Collected, so clear for next time. */ + cp->black_holes.updated = 0; + } + } + + /* Store the collected values in the cell. */ + c->black_holes.ti_end_min = ti_black_holes_end_min; + c->black_holes.ti_end_max = ti_black_holes_end_max; + c->black_holes.ti_beg_max = ti_black_holes_beg_max; + c->black_holes.updated = updated; +} + +/** + * @brief Mapping function to collect the data from the end of the step + * + * This function will call a recursive function on all the top-level cells + * to collect the information we are after. + * + * @param map_data The list of cells with tasks on this node. + * @param num_elements The number of elements in the list this thread will work + * on. + * @param extra_data The #engine. + */ +void engine_collect_end_of_step_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct end_of_step_data *data = (struct end_of_step_data *)extra_data; + const struct engine *e = data->e; + const int with_hydro = (e->policy & engine_policy_hydro); + const int with_self_grav = (e->policy & engine_policy_self_gravity); + const int with_ext_grav = (e->policy & engine_policy_external_gravity); + const int with_grav = (with_self_grav || with_ext_grav); + const int with_stars = (e->policy & engine_policy_stars); + const int with_black_holes = (e->policy & engine_policy_black_holes); + struct space *s = e->s; + int *local_cells = (int *)map_data; + struct star_formation_history *sfh_top = &data->sfh; + + /* Local collectible */ + size_t updated = 0, g_updated = 0, s_updated = 0, b_updated = 0; + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, + ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, + ti_stars_beg_max = 0; + integertime_t ti_black_holes_end_min = max_nr_timesteps, + ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; + + /* Local Star formation history properties */ + struct star_formation_history sfh_updated; + + /* Initialize the star formation structs for this engine to zero */ + star_formation_logger_init(&sfh_updated); + + for (int ind = 0; ind < num_elements; ind++) { + struct cell *c = &s->cells_top[local_cells[ind]]; + + if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0 || + c->black_holes.count > 0) { + + /* Make the top-cells recurse */ + if (with_hydro) { + engine_collect_end_of_step_recurse_hydro(c, e); + } + if (with_grav) { + engine_collect_end_of_step_recurse_grav(c, e); + } + if (with_stars) { + engine_collect_end_of_step_recurse_stars(c, e); + } + if (with_black_holes) { + engine_collect_end_of_step_recurse_black_holes(c, e); + } + + /* And aggregate */ + if (c->hydro.ti_end_min > e->ti_current) + ti_hydro_end_min = min(ti_hydro_end_min, c->hydro.ti_end_min); + ti_hydro_end_max = max(ti_hydro_end_max, c->hydro.ti_end_max); + ti_hydro_beg_max = max(ti_hydro_beg_max, c->hydro.ti_beg_max); + + if (c->grav.ti_end_min > e->ti_current) + ti_gravity_end_min = min(ti_gravity_end_min, c->grav.ti_end_min); + ti_gravity_end_max = max(ti_gravity_end_max, c->grav.ti_end_max); + ti_gravity_beg_max = max(ti_gravity_beg_max, c->grav.ti_beg_max); + + if (c->stars.ti_end_min > e->ti_current) + ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min); + ti_stars_end_max = max(ti_stars_end_max, c->stars.ti_end_max); + ti_stars_beg_max = max(ti_stars_beg_max, c->stars.ti_beg_max); + + if (c->black_holes.ti_end_min > e->ti_current) + ti_black_holes_end_min = + min(ti_black_holes_end_min, c->black_holes.ti_end_min); + ti_black_holes_end_max = + max(ti_black_holes_end_max, c->black_holes.ti_end_max); + ti_black_holes_beg_max = + max(ti_black_holes_beg_max, c->black_holes.ti_beg_max); + + updated += c->hydro.updated; + g_updated += c->grav.updated; + s_updated += c->stars.updated; + b_updated += c->black_holes.updated; + + /* Check if the cell is inactive and in that case reorder the SFH */ + if (!cell_is_starting_hydro(c, e)) { + star_formation_logger_log_inactive_cell(&c->stars.sfh); + } + + /* Get the star formation history from the current cell and store it in + * the star formation history struct */ + star_formation_logger_add(&sfh_updated, &c->stars.sfh); + + /* Collected, so clear for next time. */ + c->hydro.updated = 0; + c->grav.updated = 0; + c->stars.updated = 0; + c->black_holes.updated = 0; + } + } + + /* Let's write back to the global data. + * We use the space lock to garanty single access*/ + if (lock_lock(&s->lock) == 0) { + data->updated += updated; + data->g_updated += g_updated; + data->s_updated += s_updated; + data->b_updated += b_updated; + + /* Add the SFH information from this engine to the global data */ + star_formation_logger_add(sfh_top, &sfh_updated); + + if (ti_hydro_end_min > e->ti_current) + data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min); + data->ti_hydro_end_max = max(ti_hydro_end_max, data->ti_hydro_end_max); + data->ti_hydro_beg_max = max(ti_hydro_beg_max, data->ti_hydro_beg_max); + + if (ti_gravity_end_min > e->ti_current) + data->ti_gravity_end_min = + min(ti_gravity_end_min, data->ti_gravity_end_min); + data->ti_gravity_end_max = + max(ti_gravity_end_max, data->ti_gravity_end_max); + data->ti_gravity_beg_max = + max(ti_gravity_beg_max, data->ti_gravity_beg_max); + + if (ti_stars_end_min > e->ti_current) + data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min); + data->ti_stars_end_max = max(ti_stars_end_max, data->ti_stars_end_max); + data->ti_stars_beg_max = max(ti_stars_beg_max, data->ti_stars_beg_max); + + if (ti_black_holes_end_min > e->ti_current) + data->ti_black_holes_end_min = + min(ti_black_holes_end_min, data->ti_black_holes_end_min); + data->ti_black_holes_end_max = + max(ti_black_holes_end_max, data->ti_black_holes_end_max); + data->ti_black_holes_beg_max = + max(ti_black_holes_beg_max, data->ti_black_holes_beg_max); + } + + if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space"); +} + +/** + * @brief Collects the next time-step and rebuild flag. + * + * The next time-step is determined by making each super-cell recurse to + * collect the minimal of ti_end and the number of updated particles. When in + * MPI mode this routines reduces these across all nodes and also collects the + * forcerebuild flag -- this is so that we only use a single collective MPI + * call per step for all these values. + * + * Note that the results are stored in e->collect_group1 struct not in the + * engine fields, unless apply is true. These can be applied field-by-field + * or all at once using collectgroup1_copy(); + * + * @param e The #engine. + * @param apply whether to apply the results to the engine or just keep in the + * group1 struct. + */ +void engine_collect_end_of_step(struct engine *e, int apply) { + + const ticks tic = getticks(); + struct space *s = e->s; + struct end_of_step_data data; + data.updated = 0, data.g_updated = 0, data.s_updated = 0, data.b_updated = 0; + data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0, + data.ti_hydro_beg_max = 0; + data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0, + data.ti_gravity_beg_max = 0; + data.ti_stars_end_min = max_nr_timesteps, data.ti_stars_end_max = 0, + data.ti_stars_beg_max = 0; + data.ti_black_holes_end_min = max_nr_timesteps, + data.ti_black_holes_end_max = 0, data.ti_black_holes_beg_max = 0; + data.e = e; + + /* Initialize the total SFH of the simulation to zero */ + star_formation_logger_init(&data.sfh); + + /* Collect information from the local top-level cells */ + threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper, + s->local_cells_with_tasks_top, s->nr_local_cells_with_tasks, + sizeof(int), 0, &data); + + /* Get the number of inhibited particles from the space-wide counters + * since these have been updated atomically during the time-steps. */ + data.inhibited = s->nr_inhibited_parts; + data.g_inhibited = s->nr_inhibited_gparts; + data.s_inhibited = s->nr_inhibited_sparts; + data.b_inhibited = s->nr_inhibited_bparts; + + /* Store these in the temporary collection group. */ + collectgroup1_init( + &e->collect_group1, data.updated, data.g_updated, data.s_updated, + data.b_updated, data.inhibited, data.g_inhibited, data.s_inhibited, + data.b_inhibited, data.ti_hydro_end_min, data.ti_hydro_end_max, + data.ti_hydro_beg_max, data.ti_gravity_end_min, data.ti_gravity_end_max, + data.ti_gravity_beg_max, data.ti_stars_end_min, data.ti_stars_end_max, + data.ti_stars_beg_max, data.ti_black_holes_end_min, + data.ti_black_holes_end_max, data.ti_black_holes_beg_max, e->forcerebuild, + e->s->tot_cells, e->sched.nr_tasks, + (float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh); + +/* Aggregate collective data from the different nodes for this step. */ +#ifdef WITH_MPI + collectgroup1_reduce(&e->collect_group1); + +#ifdef SWIFT_DEBUG_CHECKS + { + /* Check the above using the original MPI calls. */ + integertime_t in_i[2], out_i[2]; + in_i[0] = 0; + in_i[1] = 0; + out_i[0] = data.ti_hydro_end_min; + out_i[1] = data.ti_gravity_end_min; + if (MPI_Allreduce(out_i, in_i, 2, MPI_LONG_LONG_INT, MPI_MIN, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate ti_end_min."); + if (in_i[0] != (long long)e->collect_group1.ti_hydro_end_min) + error("Failed to get same ti_hydro_end_min, is %lld, should be %lld", + in_i[0], e->collect_group1.ti_hydro_end_min); + if (in_i[1] != (long long)e->collect_group1.ti_gravity_end_min) + error("Failed to get same ti_gravity_end_min, is %lld, should be %lld", + in_i[1], e->collect_group1.ti_gravity_end_min); + + long long in_ll[4], out_ll[4]; + out_ll[0] = data.updated; + out_ll[1] = data.g_updated; + out_ll[2] = data.s_updated; + out_ll[3] = data.b_updated; + if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate particle counts."); + if (in_ll[0] != (long long)e->collect_group1.updated) + error("Failed to get same updated, is %lld, should be %lld", in_ll[0], + e->collect_group1.updated); + if (in_ll[1] != (long long)e->collect_group1.g_updated) + error("Failed to get same g_updated, is %lld, should be %lld", in_ll[1], + e->collect_group1.g_updated); + if (in_ll[2] != (long long)e->collect_group1.s_updated) + error("Failed to get same s_updated, is %lld, should be %lld", in_ll[2], + e->collect_group1.s_updated); + if (in_ll[3] != (long long)e->collect_group1.b_updated) + error("Failed to get same b_updated, is %lld, should be %lld", in_ll[3], + e->collect_group1.b_updated); + + out_ll[0] = data.inhibited; + out_ll[1] = data.g_inhibited; + out_ll[2] = data.s_inhibited; + out_ll[3] = data.b_inhibited; + if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate particle counts."); + if (in_ll[0] != (long long)e->collect_group1.inhibited) + error("Failed to get same inhibited, is %lld, should be %lld", in_ll[0], + e->collect_group1.inhibited); + if (in_ll[1] != (long long)e->collect_group1.g_inhibited) + error("Failed to get same g_inhibited, is %lld, should be %lld", in_ll[1], + e->collect_group1.g_inhibited); + if (in_ll[2] != (long long)e->collect_group1.s_inhibited) + error("Failed to get same s_inhibited, is %lld, should be %lld", in_ll[2], + e->collect_group1.s_inhibited); + if (in_ll[3] != (long long)e->collect_group1.b_inhibited) + error("Failed to get same b_inhibited, is %lld, should be %lld", in_ll[3], + e->collect_group1.b_inhibited); + + int buff = 0; + if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate the rebuild flag across nodes."); + if (!!buff != !!e->collect_group1.forcerebuild) + error( + "Failed to get same rebuild flag from all nodes, is %d," + "should be %d", + buff, e->collect_group1.forcerebuild); + } +#endif +#endif + + /* Apply to the engine, if requested. */ + if (apply) collectgroup1_apply(&e->collect_group1, e); + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} diff --git a/src/engine_fof.c b/src/engine_fof.c new file mode 100644 index 0000000000000000000000000000000000000000..f1bb5b452104642f68b4a9987a1ab8d8e3b0162b --- /dev/null +++ b/src/engine_fof.c @@ -0,0 +1,150 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "engine.h" + +/** + * @brief Activate all the #gpart communications in preparation + * fof a call to FOF. + * + * @param e The #engine to act on. + */ +void engine_activate_gpart_comms(struct engine *e) { + +#ifdef WITH_MPI + + const ticks tic = getticks(); + + struct scheduler *s = &e->sched; + const int nr_tasks = s->nr_tasks; + struct task *tasks = s->tasks; + + for (int k = 0; k < nr_tasks; ++k) { + + struct task *t = &tasks[k]; + + if ((t->type == task_type_send) && (t->subtype == task_subtype_gpart)) { + scheduler_activate(s, t); + } else if ((t->type == task_type_recv) && + (t->subtype == task_subtype_gpart)) { + scheduler_activate(s, t); + } else { + t->skip = 1; + } + } + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + +#else + error("Calling an MPI function in non-MPI mode."); +#endif +} + +/** + * @brief Activate all the FOF tasks. + * + * Marks all the other task types to be skipped. + * + * @param e The #engine to act on. + */ +void engine_activate_fof_tasks(struct engine *e) { + + const ticks tic = getticks(); + + struct scheduler *s = &e->sched; + const int nr_tasks = s->nr_tasks; + struct task *tasks = s->tasks; + + for (int k = 0; k < nr_tasks; k++) { + + struct task *t = &tasks[k]; + + if (t->type == task_type_fof_self || t->type == task_type_fof_pair) + scheduler_activate(s, t); + else + t->skip = 1; + } + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +/** + * @brief Run a FOF search. + * + * @param e the engine + * @param dump_results Are we writing group catalogues to output files? + * @param seed_black_holes Are we seeding black holes? + */ +void engine_fof(struct engine *e, const int dump_results, + const int seed_black_holes) { + +#ifdef WITH_FOF + + ticks tic = getticks(); + + /* Compute number of DM particles */ + const long long total_nr_baryons = + e->total_nr_parts + e->total_nr_sparts + e->total_nr_bparts; + const long long total_nr_dmparts = + e->total_nr_gparts - e->total_nr_DM_background_gparts - total_nr_baryons; + + /* Initialise FOF parameters and allocate FOF arrays. */ + fof_allocate(e->s, total_nr_dmparts, e->fof_properties); + + /* Make FOF tasks */ + engine_make_fof_tasks(e); + + /* and activate them. */ + engine_activate_fof_tasks(e); + + /* Perform local FOF tasks. */ + engine_launch(e); + + /* Perform FOF search over foreign particles and + * find groups which require black hole seeding. */ + fof_search_tree(e->fof_properties, e->black_holes_properties, + e->physical_constants, e->cosmology, e->s, dump_results, + seed_black_holes); + + /* Reset flag. */ + e->run_fof = 0; + + /* Flag that a FOF has taken place */ + e->step_props |= engine_step_prop_fof; + + /* ... and find the next FOF time */ + if (seed_black_holes) engine_compute_next_fof_time(e); + + if (engine_rank == 0) + message("Complete FOF search took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); +#else + error("SWIFT was not compiled with FOF enabled!"); +#endif +} diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index 05bde9091dd55904063133c1a70cc004f0a05512..42590cb5f41539d11ca39639c369ea68472e9826 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -74,6 +74,9 @@ void engine_addtasks_send_gravity(struct engine *e, struct cell *ci, struct scheduler *s = &e->sched; const int nodeID = cj->nodeID; + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(ci, cell_flag_has_tasks)) return; + /* Check if any of the gravity tasks are for the target node. */ for (l = ci->grav.grav; l != NULL; l = l->next) if (l->t->ci->nodeID == nodeID || @@ -141,6 +144,9 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, struct scheduler *s = &e->sched; const int nodeID = cj->nodeID; + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(ci, cell_flag_has_tasks)) return; + /* Check if any of the density tasks are for the target node. */ for (l = ci->hydro.density; l != NULL; l = l->next) if (l->t->ci->nodeID == nodeID || @@ -248,6 +254,9 @@ void engine_addtasks_send_stars(struct engine *e, struct cell *ci, struct scheduler *s = &e->sched; const int nodeID = cj->nodeID; + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(ci, cell_flag_has_tasks)) return; + if (t_sf_counts == NULL && with_star_formation && ci->hydro.count > 0) { #ifdef SWIFT_DEBUG_CHECKS if (ci->depth != 0) @@ -339,6 +348,9 @@ void engine_addtasks_send_black_holes(struct engine *e, struct cell *ci, struct scheduler *s = &e->sched; const int nodeID = cj->nodeID; + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(ci, cell_flag_has_tasks)) return; + /* Check if any of the density tasks are for the target node. */ for (l = ci->black_holes.density; l != NULL; l = l->next) if (l->t->ci->nodeID == nodeID || @@ -434,6 +446,9 @@ void engine_addtasks_recv_hydro(struct engine *e, struct cell *c, #ifdef WITH_MPI struct scheduler *s = &e->sched; + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(c, cell_flag_has_tasks)) return; + /* Have we reached a level where there are any hydro tasks ? */ if (t_xv == NULL && c->hydro.density != NULL) { @@ -533,6 +548,9 @@ void engine_addtasks_recv_stars(struct engine *e, struct cell *c, #ifdef WITH_MPI struct scheduler *s = &e->sched; + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(c, cell_flag_has_tasks)) return; + if (t_sf_counts == NULL && with_star_formation && c->hydro.count > 0) { #ifdef SWIFT_DEBUG_CHECKS if (c->depth != 0) @@ -624,6 +642,9 @@ void engine_addtasks_recv_black_holes(struct engine *e, struct cell *c, #ifdef WITH_MPI struct scheduler *s = &e->sched; + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(c, cell_flag_has_tasks)) return; + /* Have we reached a level where there are any black_holes tasks ? */ if (t_rho == NULL && c->black_holes.density != NULL) { @@ -714,6 +735,9 @@ void engine_addtasks_recv_gravity(struct engine *e, struct cell *c, #ifdef WITH_MPI struct scheduler *s = &e->sched; + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(c, cell_flag_has_tasks)) return; + /* Have we reached a level where there are any gravity tasks ? */ if (t_grav == NULL && c->grav.grav != NULL) { diff --git a/src/engine_redistribute.c b/src/engine_redistribute.c new file mode 100644 index 0000000000000000000000000000000000000000..3132ad2665c67cd244ae1ec9ece75726788c1506 --- /dev/null +++ b/src/engine_redistribute.c @@ -0,0 +1,1031 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "engine.h" + +/* Local headers. */ +#include "memswap.h" + +#ifdef WITH_MPI + +/** + * Do the exchange of one type of particles with all the other nodes. + * + * @param label a label for the memory allocations of this particle type. + * @param counts 2D array with the counts of particles to exchange with + * each other node. + * @param parts the particle data to exchange + * @param new_nr_parts the number of particles this node will have after all + * exchanges have completed. + * @param sizeofparts sizeof the particle struct. + * @param alignsize the memory alignment required for this particle type. + * @param mpi_type the MPI_Datatype for these particles. + * @param nr_nodes the number of nodes to exchange with. + * @param nodeID the id of this node. + * + * @result new particle data constructed from all the exchanges with the + * given alignment. + */ +static void *engine_do_redistribute(const char *label, int *counts, char *parts, + size_t new_nr_parts, size_t sizeofparts, + size_t alignsize, MPI_Datatype mpi_type, + int nr_nodes, int nodeID) { + + /* Allocate a new particle array with some extra margin */ + char *parts_new = NULL; + if (swift_memalign( + label, (void **)&parts_new, alignsize, + sizeofparts * new_nr_parts * engine_redistribute_alloc_margin) != 0) + error("Failed to allocate new particle data."); + + /* Prepare MPI requests for the asynchronous communications */ + MPI_Request *reqs; + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * nr_nodes)) == + NULL) + error("Failed to allocate MPI request list."); + + /* Only send and receive only "chunk" particles per request. So we need to + * loop as many times as necessary here. Make 2Gb/sizeofparts so we only + * send 2Gb packets. */ + const int chunk = INT_MAX / sizeofparts; + int sent = 0; + int recvd = 0; + + int activenodes = 1; + while (activenodes) { + + for (int k = 0; k < 2 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; + + /* Emit the sends and recvs for the data. */ + size_t offset_send = sent; + size_t offset_recv = recvd; + activenodes = 0; + + for (int k = 0; k < nr_nodes; k++) { + + /* Indices in the count arrays of the node of interest */ + const int ind_send = nodeID * nr_nodes + k; + const int ind_recv = k * nr_nodes + nodeID; + + /* Are we sending any data this loop? */ + int sending = counts[ind_send] - sent; + if (sending > 0) { + activenodes++; + if (sending > chunk) sending = chunk; + + /* If the send and receive is local then just copy. */ + if (k == nodeID) { + int receiving = counts[ind_recv] - recvd; + if (receiving > chunk) receiving = chunk; + memcpy(&parts_new[offset_recv * sizeofparts], + &parts[offset_send * sizeofparts], sizeofparts * receiving); + } else { + /* Otherwise send it. */ + int res = + MPI_Isend(&parts[offset_send * sizeofparts], sending, mpi_type, k, + ind_send, MPI_COMM_WORLD, &reqs[2 * k + 0]); + if (res != MPI_SUCCESS) + mpi_error(res, "Failed to isend parts to node %i.", k); + } + } + + /* If we're sending to this node, then move past it to next. */ + if (counts[ind_send] > 0) offset_send += counts[ind_send]; + + /* Are we receiving any data from this node? Note already done if coming + * from this node. */ + if (k != nodeID) { + int receiving = counts[ind_recv] - recvd; + if (receiving > 0) { + activenodes++; + if (receiving > chunk) receiving = chunk; + int res = MPI_Irecv(&parts_new[offset_recv * sizeofparts], receiving, + mpi_type, k, ind_recv, MPI_COMM_WORLD, + &reqs[2 * k + 1]); + if (res != MPI_SUCCESS) + mpi_error(res, "Failed to emit irecv of parts from node %i.", k); + } + } + + /* If we're receiving from this node, then move past it to next. */ + if (counts[ind_recv] > 0) offset_recv += counts[ind_recv]; + } + + /* Wait for all the sends and recvs to tumble in. */ + MPI_Status stats[2 * nr_nodes]; + int res; + if ((res = MPI_Waitall(2 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { + for (int k = 0; k < 2 * nr_nodes; k++) { + char buff[MPI_MAX_ERROR_STRING]; + MPI_Error_string(stats[k].MPI_ERROR, buff, &res); + message("request from source %i, tag %i has error '%s'.", + stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff); + } + error("Failed during waitall for part data."); + } + + /* Move to next chunks. */ + sent += chunk; + recvd += chunk; + } + + /* Free temps. */ + free(reqs); + + /* And return new memory. */ + return parts_new; +} +#endif + +#ifdef WITH_MPI /* redist_mapper */ + +/* Support for engine_redistribute threadpool dest mappers. */ +struct redist_mapper_data { + int *counts; + int *dest; + int nodeID; + int nr_nodes; + struct cell *cells; + struct space *s; + void *base; +}; + +/* Generic function for accumulating counts for TYPE parts. Note + * we use a local counts array to avoid the atomic_add in the parts + * loop. */ +#define ENGINE_REDISTRIBUTE_DEST_MAPPER(TYPE) \ + engine_redistribute_dest_mapper_##TYPE(void *map_data, int num_elements, \ + void *extra_data) { \ + struct TYPE *parts = (struct TYPE *)map_data; \ + struct redist_mapper_data *mydata = \ + (struct redist_mapper_data *)extra_data; \ + struct space *s = mydata->s; \ + int *dest = \ + mydata->dest + (ptrdiff_t)(parts - (struct TYPE *)mydata->base); \ + int *lcounts = NULL; \ + if ((lcounts = (int *)calloc( \ + sizeof(int), mydata->nr_nodes * mydata->nr_nodes)) == NULL) \ + error("Failed to allocate counts thread-specific buffer"); \ + for (int k = 0; k < num_elements; k++) { \ + for (int j = 0; j < 3; j++) { \ + if (parts[k].x[j] < 0.0) \ + parts[k].x[j] += s->dim[j]; \ + else if (parts[k].x[j] >= s->dim[j]) \ + parts[k].x[j] -= s->dim[j]; \ + } \ + const int cid = cell_getid(s->cdim, parts[k].x[0] * s->iwidth[0], \ + parts[k].x[1] * s->iwidth[1], \ + parts[k].x[2] * s->iwidth[2]); \ + dest[k] = s->cells_top[cid].nodeID; \ + size_t ind = mydata->nodeID * mydata->nr_nodes + dest[k]; \ + lcounts[ind] += 1; \ + } \ + for (int k = 0; k < (mydata->nr_nodes * mydata->nr_nodes); k++) \ + atomic_add(&mydata->counts[k], lcounts[k]); \ + free(lcounts); \ + } + +/** + * @brief Accumulate the counts of particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * part version. + */ +static void ENGINE_REDISTRIBUTE_DEST_MAPPER(part); + +/** + * @brief Accumulate the counts of star particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * spart version. + */ +static void ENGINE_REDISTRIBUTE_DEST_MAPPER(spart); + +/** + * @brief Accumulate the counts of gravity particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * gpart version. + */ +static void ENGINE_REDISTRIBUTE_DEST_MAPPER(gpart); + +/** + * @brief Accumulate the counts of black holes particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * bpart version. + */ +static void ENGINE_REDISTRIBUTE_DEST_MAPPER(bpart); + +#endif /* redist_mapper_data */ + +#ifdef WITH_MPI /* savelink_mapper_data */ + +/* Support for saving the linkage between gparts and parts/sparts. */ +struct savelink_mapper_data { + int nr_nodes; + int *counts; + void *parts; + int nodeID; +}; + +/** + * @brief Save the offset of each gravity partner of a part or spart. + * + * The offset is from the start of the sorted particles to be sent to a node. + * This is possible as parts without gravity partners have a positive id. + * These offsets are used to restore the pointers on the receiving node. + * + * CHECKS should be eliminated as dead code when optimizing. + */ +#define ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(TYPE, CHECKS) \ + engine_redistribute_savelink_mapper_##TYPE(void *map_data, int num_elements, \ + void *extra_data) { \ + int *nodes = (int *)map_data; \ + struct savelink_mapper_data *mydata = \ + (struct savelink_mapper_data *)extra_data; \ + int nodeID = mydata->nodeID; \ + int nr_nodes = mydata->nr_nodes; \ + int *counts = mydata->counts; \ + struct TYPE *parts = (struct TYPE *)mydata->parts; \ + \ + for (int j = 0; j < num_elements; j++) { \ + int node = nodes[j]; \ + int count = 0; \ + size_t offset = 0; \ + for (int i = 0; i < node; i++) offset += counts[nodeID * nr_nodes + i]; \ + \ + for (int k = 0; k < counts[nodeID * nr_nodes + node]; k++) { \ + if (parts[k + offset].gpart != NULL) { \ + if (CHECKS) \ + if (parts[k + offset].gpart->id_or_neg_offset > 0) \ + error("Trying to link a partnerless " #TYPE "!"); \ + parts[k + offset].gpart->id_or_neg_offset = -count; \ + count++; \ + } \ + } \ + } \ + } + +/** + * @brief Save position of part-gpart links. + * Threadpool helper for accumulating the counts of particles per cell. + */ +#ifdef SWIFT_DEBUG_CHECKS +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 1); +#else +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 0); +#endif + +/** + * @brief Save position of spart-gpart links. + * Threadpool helper for accumulating the counts of particles per cell. + */ +#ifdef SWIFT_DEBUG_CHECKS +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 1); +#else +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 0); +#endif + +/** + * @brief Save position of bpart-gpart links. + * Threadpool helper for accumulating the counts of particles per cell. + */ +#ifdef SWIFT_DEBUG_CHECKS +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 1); +#else +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 0); +#endif + +#endif /* savelink_mapper_data */ + +#ifdef WITH_MPI /* relink_mapper_data */ + +/* Support for relinking parts, gparts, sparts and bparts after moving between + * nodes. */ +struct relink_mapper_data { + int nodeID; + int nr_nodes; + int *counts; + int *s_counts; + int *g_counts; + int *b_counts; + struct space *s; +}; + +/** + * @brief Restore the part/gpart and spart/gpart links for a list of nodes. + * + * @param map_data address of nodes to process. + * @param num_elements the number nodes to process. + * @param extra_data additional data defining the context (a + * relink_mapper_data). + */ +static void engine_redistribute_relink_mapper(void *map_data, int num_elements, + void *extra_data) { + + int *nodes = (int *)map_data; + struct relink_mapper_data *mydata = (struct relink_mapper_data *)extra_data; + + int nodeID = mydata->nodeID; + int nr_nodes = mydata->nr_nodes; + int *counts = mydata->counts; + int *g_counts = mydata->g_counts; + int *s_counts = mydata->s_counts; + int *b_counts = mydata->b_counts; + struct space *s = mydata->s; + + for (int i = 0; i < num_elements; i++) { + + int node = nodes[i]; + + /* Get offsets to correct parts of the counts arrays for this node. */ + size_t offset_parts = 0; + size_t offset_gparts = 0; + size_t offset_sparts = 0; + size_t offset_bparts = 0; + for (int n = 0; n < node; n++) { + int ind_recv = n * nr_nodes + nodeID; + offset_parts += counts[ind_recv]; + offset_gparts += g_counts[ind_recv]; + offset_sparts += s_counts[ind_recv]; + offset_bparts += b_counts[ind_recv]; + } + + /* Number of gparts sent from this node. */ + int ind_recv = node * nr_nodes + nodeID; + const size_t count_gparts = g_counts[ind_recv]; + + /* Loop over the gparts received from this node */ + for (size_t k = offset_gparts; k < offset_gparts + count_gparts; k++) { + + /* Does this gpart have a gas partner ? */ + if (s->gparts[k].type == swift_type_gas) { + + const ptrdiff_t partner_index = + offset_parts - s->gparts[k].id_or_neg_offset; + + /* Re-link */ + s->gparts[k].id_or_neg_offset = -partner_index; + s->parts[partner_index].gpart = &s->gparts[k]; + } + + /* Does this gpart have a star partner ? */ + else if (s->gparts[k].type == swift_type_stars) { + + const ptrdiff_t partner_index = + offset_sparts - s->gparts[k].id_or_neg_offset; + + /* Re-link */ + s->gparts[k].id_or_neg_offset = -partner_index; + s->sparts[partner_index].gpart = &s->gparts[k]; + } + + /* Does this gpart have a black hole partner ? */ + else if (s->gparts[k].type == swift_type_black_hole) { + + const ptrdiff_t partner_index = + offset_bparts - s->gparts[k].id_or_neg_offset; + + /* Re-link */ + s->gparts[k].id_or_neg_offset = -partner_index; + s->bparts[partner_index].gpart = &s->gparts[k]; + } + } + } +} + +#endif /* relink_mapper_data */ + +/** + * @brief Redistribute the particles amongst the nodes according + * to their cell's node IDs. + * + * The strategy here is as follows: + * 1) Each node counts the number of particles it has to send to each other + * node. + * 2) The number of particles of each type is then exchanged. + * 3) The particles to send are placed in a temporary buffer in which the + * part-gpart links are preserved. + * 4) Each node allocates enough space for the new particles. + * 5) (Asynchronous) communications are issued to transfer the data. + * + * + * @param e The #engine. + */ +void engine_redistribute(struct engine *e) { + +#ifdef WITH_MPI + + const int nr_nodes = e->nr_nodes; + const int nodeID = e->nodeID; + struct space *s = e->s; + struct cell *cells = s->cells_top; + const int nr_cells = s->nr_cells; + struct xpart *xparts = s->xparts; + struct part *parts = s->parts; + struct gpart *gparts = s->gparts; + struct spart *sparts = s->sparts; + struct bpart *bparts = s->bparts; + ticks tic = getticks(); + + size_t nr_parts = s->nr_parts; + size_t nr_gparts = s->nr_gparts; + size_t nr_sparts = s->nr_sparts; + size_t nr_bparts = s->nr_bparts; + + /* Start by moving inhibited particles to the end of the arrays */ + for (size_t k = 0; k < nr_parts; /* void */) { + if (parts[k].time_bin == time_bin_inhibited || + parts[k].time_bin == time_bin_not_created) { + nr_parts -= 1; + + /* Swap the particle */ + memswap(&parts[k], &parts[nr_parts], sizeof(struct part)); + + /* Swap the xpart */ + memswap(&xparts[k], &xparts[nr_parts], sizeof(struct xpart)); + + /* Swap the link with the gpart */ + if (parts[k].gpart != NULL) { + parts[k].gpart->id_or_neg_offset = -k; + } + if (parts[nr_parts].gpart != NULL) { + parts[nr_parts].gpart->id_or_neg_offset = -nr_parts; + } + } else { + k++; + } + } + + /* Now move inhibited star particles to the end of the arrays */ + for (size_t k = 0; k < nr_sparts; /* void */) { + if (sparts[k].time_bin == time_bin_inhibited || + sparts[k].time_bin == time_bin_not_created) { + nr_sparts -= 1; + + /* Swap the particle */ + memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart)); + + /* Swap the link with the gpart */ + if (s->sparts[k].gpart != NULL) { + s->sparts[k].gpart->id_or_neg_offset = -k; + } + if (s->sparts[nr_sparts].gpart != NULL) { + s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts; + } + } else { + k++; + } + } + + /* Now move inhibited black hole particles to the end of the arrays */ + for (size_t k = 0; k < nr_bparts; /* void */) { + if (bparts[k].time_bin == time_bin_inhibited || + bparts[k].time_bin == time_bin_not_created) { + nr_bparts -= 1; + + /* Swap the particle */ + memswap(&s->bparts[k], &s->bparts[nr_bparts], sizeof(struct bpart)); + + /* Swap the link with the gpart */ + if (s->bparts[k].gpart != NULL) { + s->bparts[k].gpart->id_or_neg_offset = -k; + } + if (s->bparts[nr_bparts].gpart != NULL) { + s->bparts[nr_bparts].gpart->id_or_neg_offset = -nr_bparts; + } + } else { + k++; + } + } + + /* Finally do the same with the gravity particles */ + for (size_t k = 0; k < nr_gparts; /* void */) { + if (gparts[k].time_bin == time_bin_inhibited || + gparts[k].time_bin == time_bin_not_created) { + nr_gparts -= 1; + + /* Swap the particle */ + memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart)); + + /* Swap the link with part/spart */ + if (s->gparts[k].type == swift_type_gas) { + s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } else if (s->gparts[k].type == swift_type_stars) { + s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } else if (s->gparts[k].type == swift_type_black_hole) { + s->bparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } + + if (s->gparts[nr_gparts].type == swift_type_gas) { + s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; + } else if (s->gparts[nr_gparts].type == swift_type_stars) { + s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; + } else if (s->gparts[nr_gparts].type == swift_type_black_hole) { + s->bparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; + } + } else { + k++; + } + } + + /* Now we are ready to deal with real particles and can start the exchange. */ + + /* Allocate temporary arrays to store the counts of particles to be sent + * and the destination of each particle */ + int *counts; + if ((counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate counts temporary buffer."); + + int *dest; + if ((dest = (int *)swift_malloc("dest", sizeof(int) * nr_parts)) == NULL) + error("Failed to allocate dest temporary buffer."); + + /* Simple index of node IDs, used for mappers over nodes. */ + int *nodes = NULL; + if ((nodes = (int *)malloc(sizeof(int) * nr_nodes)) == NULL) + error("Failed to allocate nodes temporary buffer."); + for (int k = 0; k < nr_nodes; k++) nodes[k] = k; + + /* Get destination of each particle */ + struct redist_mapper_data redist_data; + redist_data.s = s; + redist_data.nodeID = nodeID; + redist_data.nr_nodes = nr_nodes; + + redist_data.counts = counts; + redist_data.dest = dest; + redist_data.base = (void *)parts; + + threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_part, parts, + nr_parts, sizeof(struct part), 0, &redist_data); + + /* Sort the particles according to their cell index. */ + if (nr_parts > 0) + space_parts_sort(s->parts, s->xparts, dest, &counts[nodeID * nr_nodes], + nr_nodes, 0); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the part have been sorted correctly. */ + for (size_t k = 0; k < nr_parts; k++) { + const struct part *p = &s->parts[k]; + + if (p->time_bin == time_bin_inhibited) + error("Inhibited particle found after sorting!"); + + if (p->time_bin == time_bin_not_created) + error("Inhibited particle found after sorting!"); + + /* New cell index */ + const int new_cid = + cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1], + p->x[2] * s->iwidth[2]); + + /* New cell of this part */ + const struct cell *c = &s->cells_top[new_cid]; + const int new_node = c->nodeID; + + if (dest[k] != new_node) + error("part's new node index not matching sorted index."); + + if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] || + p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] || + p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2]) + error("part not sorted into the right top-level cell!"); + } +#endif + + /* We will need to re-link the gpart partners of parts, so save their + * relative positions in the sent lists. */ + if (nr_parts > 0 && nr_gparts > 0) { + + struct savelink_mapper_data savelink_data; + savelink_data.nr_nodes = nr_nodes; + savelink_data.counts = counts; + savelink_data.parts = (void *)parts; + savelink_data.nodeID = nodeID; + threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_part, + nodes, nr_nodes, sizeof(int), 0, &savelink_data); + } + swift_free("dest", dest); + + /* Get destination of each s-particle */ + int *s_counts; + if ((s_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate s_counts temporary buffer."); + + int *s_dest; + if ((s_dest = (int *)swift_malloc("s_dest", sizeof(int) * nr_sparts)) == NULL) + error("Failed to allocate s_dest temporary buffer."); + + redist_data.counts = s_counts; + redist_data.dest = s_dest; + redist_data.base = (void *)sparts; + + threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_spart, sparts, + nr_sparts, sizeof(struct spart), 0, &redist_data); + + /* Sort the particles according to their cell index. */ + if (nr_sparts > 0) + space_sparts_sort(s->sparts, s_dest, &s_counts[nodeID * nr_nodes], nr_nodes, + 0); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the spart have been sorted correctly. */ + for (size_t k = 0; k < nr_sparts; k++) { + const struct spart *sp = &s->sparts[k]; + + if (sp->time_bin == time_bin_inhibited) + error("Inhibited particle found after sorting!"); + + if (sp->time_bin == time_bin_not_created) + error("Inhibited particle found after sorting!"); + + /* New cell index */ + const int new_cid = + cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1], + sp->x[2] * s->iwidth[2]); + + /* New cell of this spart */ + const struct cell *c = &s->cells_top[new_cid]; + const int new_node = c->nodeID; + + if (s_dest[k] != new_node) + error("spart's new node index not matching sorted index."); + + if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] || + sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] || + sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2]) + error("spart not sorted into the right top-level cell!"); + } +#endif + + /* We need to re-link the gpart partners of sparts. */ + if (nr_sparts > 0) { + + struct savelink_mapper_data savelink_data; + savelink_data.nr_nodes = nr_nodes; + savelink_data.counts = s_counts; + savelink_data.parts = (void *)sparts; + savelink_data.nodeID = nodeID; + threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_spart, + nodes, nr_nodes, sizeof(int), 0, &savelink_data); + } + swift_free("s_dest", s_dest); + + /* Get destination of each b-particle */ + int *b_counts; + if ((b_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate b_counts temporary buffer."); + + int *b_dest; + if ((b_dest = (int *)swift_malloc("b_dest", sizeof(int) * nr_bparts)) == NULL) + error("Failed to allocate b_dest temporary buffer."); + + redist_data.counts = b_counts; + redist_data.dest = b_dest; + redist_data.base = (void *)bparts; + + threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_bpart, bparts, + nr_bparts, sizeof(struct bpart), 0, &redist_data); + + /* Sort the particles according to their cell index. */ + if (nr_bparts > 0) + space_bparts_sort(s->bparts, b_dest, &b_counts[nodeID * nr_nodes], nr_nodes, + 0); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the bpart have been sorted correctly. */ + for (size_t k = 0; k < nr_bparts; k++) { + const struct bpart *bp = &s->bparts[k]; + + if (bp->time_bin == time_bin_inhibited) + error("Inhibited particle found after sorting!"); + + if (bp->time_bin == time_bin_not_created) + error("Inhibited particle found after sorting!"); + + /* New cell index */ + const int new_cid = + cell_getid(s->cdim, bp->x[0] * s->iwidth[0], bp->x[1] * s->iwidth[1], + bp->x[2] * s->iwidth[2]); + + /* New cell of this bpart */ + const struct cell *c = &s->cells_top[new_cid]; + const int new_node = c->nodeID; + + if (b_dest[k] != new_node) + error("bpart's new node index not matching sorted index."); + + if (bp->x[0] < c->loc[0] || bp->x[0] > c->loc[0] + c->width[0] || + bp->x[1] < c->loc[1] || bp->x[1] > c->loc[1] + c->width[1] || + bp->x[2] < c->loc[2] || bp->x[2] > c->loc[2] + c->width[2]) + error("bpart not sorted into the right top-level cell!"); + } +#endif + + /* We need to re-link the gpart partners of bparts. */ + if (nr_bparts > 0) { + + struct savelink_mapper_data savelink_data; + savelink_data.nr_nodes = nr_nodes; + savelink_data.counts = b_counts; + savelink_data.parts = (void *)bparts; + savelink_data.nodeID = nodeID; + threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_bpart, + nodes, nr_nodes, sizeof(int), 0, &savelink_data); + } + swift_free("b_dest", b_dest); + + /* Get destination of each g-particle */ + int *g_counts; + if ((g_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate g_gcount temporary buffer."); + + int *g_dest; + if ((g_dest = (int *)swift_malloc("g_dest", sizeof(int) * nr_gparts)) == NULL) + error("Failed to allocate g_dest temporary buffer."); + + redist_data.counts = g_counts; + redist_data.dest = g_dest; + redist_data.base = (void *)gparts; + + threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_gpart, gparts, + nr_gparts, sizeof(struct gpart), 0, &redist_data); + + /* Sort the gparticles according to their cell index. */ + if (nr_gparts > 0) + space_gparts_sort(s->gparts, s->parts, s->sparts, s->bparts, g_dest, + &g_counts[nodeID * nr_nodes], nr_nodes); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the gpart have been sorted correctly. */ + for (size_t k = 0; k < nr_gparts; k++) { + const struct gpart *gp = &s->gparts[k]; + + if (gp->time_bin == time_bin_inhibited) + error("Inhibited particle found after sorting!"); + + if (gp->time_bin == time_bin_not_created) + error("Inhibited particle found after sorting!"); + + /* New cell index */ + const int new_cid = + cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1], + gp->x[2] * s->iwidth[2]); + + /* New cell of this gpart */ + const struct cell *c = &s->cells_top[new_cid]; + const int new_node = c->nodeID; + + if (g_dest[k] != new_node) + error("gpart's new node index not matching sorted index (%d != %d).", + g_dest[k], new_node); + + if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] || + gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] || + gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2]) + error("gpart not sorted into the right top-level cell!"); + } +#endif + + swift_free("g_dest", g_dest); + + /* Get all the counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce particle transfer counts."); + + /* Get all the g_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce gparticle transfer counts."); + + /* Get all the s_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce sparticle transfer counts."); + + /* Get all the b_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, b_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce bparticle transfer counts."); + + /* Report how many particles will be moved. */ + if (e->verbose) { + if (e->nodeID == 0) { + size_t total = 0, g_total = 0, s_total = 0, b_total = 0; + size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0, b_unmoved = 0; + for (int p = 0, r = 0; p < nr_nodes; p++) { + for (int n = 0; n < nr_nodes; n++) { + total += counts[r]; + g_total += g_counts[r]; + s_total += s_counts[r]; + b_total += b_counts[r]; + if (p == n) { + unmoved += counts[r]; + g_unmoved += g_counts[r]; + s_unmoved += s_counts[r]; + b_unmoved += b_counts[r]; + } + r++; + } + } + if (total > 0) + message("%zu of %zu (%.2f%%) of particles moved", total - unmoved, + total, 100.0 * (double)(total - unmoved) / (double)total); + if (g_total > 0) + message("%zu of %zu (%.2f%%) of g-particles moved", g_total - g_unmoved, + g_total, + 100.0 * (double)(g_total - g_unmoved) / (double)g_total); + if (s_total > 0) + message("%zu of %zu (%.2f%%) of s-particles moved", s_total - s_unmoved, + s_total, + 100.0 * (double)(s_total - s_unmoved) / (double)s_total); + if (b_total > 0) + message("%ld of %ld (%.2f%%) of b-particles moved", b_total - b_unmoved, + b_total, + 100.0 * (double)(b_total - b_unmoved) / (double)b_total); + } + } + + /* Now each node knows how many parts, sparts, bparts, and gparts will be + * transferred to every other node. Get the new numbers of particles for this + * node. */ + size_t nr_parts_new = 0, nr_gparts_new = 0, nr_sparts_new = 0, + nr_bparts_new = 0; + for (int k = 0; k < nr_nodes; k++) + nr_parts_new += counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_gparts_new += g_counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_sparts_new += s_counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_bparts_new += b_counts[k * nr_nodes + nodeID]; + + /* Now exchange the particles, type by type to keep the memory required + * under control. */ + + /* SPH particles. */ + void *new_parts = engine_do_redistribute( + "parts", counts, (char *)s->parts, nr_parts_new, sizeof(struct part), + part_align, part_mpi_type, nr_nodes, nodeID); + swift_free("parts", s->parts); + s->parts = (struct part *)new_parts; + s->nr_parts = nr_parts_new; + s->size_parts = engine_redistribute_alloc_margin * nr_parts_new; + + /* Extra SPH particle properties. */ + new_parts = engine_do_redistribute( + "xparts", counts, (char *)s->xparts, nr_parts_new, sizeof(struct xpart), + xpart_align, xpart_mpi_type, nr_nodes, nodeID); + swift_free("xparts", s->xparts); + s->xparts = (struct xpart *)new_parts; + + /* Gravity particles. */ + new_parts = engine_do_redistribute( + "gparts", g_counts, (char *)s->gparts, nr_gparts_new, + sizeof(struct gpart), gpart_align, gpart_mpi_type, nr_nodes, nodeID); + swift_free("gparts", s->gparts); + s->gparts = (struct gpart *)new_parts; + s->nr_gparts = nr_gparts_new; + s->size_gparts = engine_redistribute_alloc_margin * nr_gparts_new; + + /* Star particles. */ + new_parts = engine_do_redistribute( + "sparts", s_counts, (char *)s->sparts, nr_sparts_new, + sizeof(struct spart), spart_align, spart_mpi_type, nr_nodes, nodeID); + swift_free("sparts", s->sparts); + s->sparts = (struct spart *)new_parts; + s->nr_sparts = nr_sparts_new; + s->size_sparts = engine_redistribute_alloc_margin * nr_sparts_new; + + /* Black holes particles. */ + new_parts = engine_do_redistribute( + "bparts", b_counts, (char *)s->bparts, nr_bparts_new, + sizeof(struct bpart), bpart_align, bpart_mpi_type, nr_nodes, nodeID); + swift_free("bparts", s->bparts); + s->bparts = (struct bpart *)new_parts; + s->nr_bparts = nr_bparts_new; + s->size_bparts = engine_redistribute_alloc_margin * nr_bparts_new; + + /* All particles have now arrived. Time for some final operations on the + stuff we just received */ + + /* Restore the part<->gpart and spart<->gpart links. + * Generate indices and counts for threadpool tasks. Note we process a node + * at a time. */ + struct relink_mapper_data relink_data; + relink_data.s = s; + relink_data.counts = counts; + relink_data.g_counts = g_counts; + relink_data.s_counts = s_counts; + relink_data.b_counts = b_counts; + relink_data.nodeID = nodeID; + relink_data.nr_nodes = nr_nodes; + + threadpool_map(&e->threadpool, engine_redistribute_relink_mapper, nodes, + nr_nodes, sizeof(int), 1, &relink_data); + free(nodes); + + /* Clean up the counts now we are done. */ + free(counts); + free(g_counts); + free(s_counts); + free(b_counts); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that all parts are in the right place. */ + for (size_t k = 0; k < nr_parts_new; k++) { + const int cid = cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0], + s->parts[k].x[1] * s->iwidth[1], + s->parts[k].x[2] * s->iwidth[2]); + if (cells[cid].nodeID != nodeID) + error("Received particle (%zu) that does not belong here (nodeID=%i).", k, + cells[cid].nodeID); + } + for (size_t k = 0; k < nr_gparts_new; k++) { + const int cid = cell_getid(s->cdim, s->gparts[k].x[0] * s->iwidth[0], + s->gparts[k].x[1] * s->iwidth[1], + s->gparts[k].x[2] * s->iwidth[2]); + if (cells[cid].nodeID != nodeID) + error("Received g-particle (%zu) that does not belong here (nodeID=%i).", + k, cells[cid].nodeID); + } + for (size_t k = 0; k < nr_sparts_new; k++) { + const int cid = cell_getid(s->cdim, s->sparts[k].x[0] * s->iwidth[0], + s->sparts[k].x[1] * s->iwidth[1], + s->sparts[k].x[2] * s->iwidth[2]); + if (cells[cid].nodeID != nodeID) + error("Received s-particle (%zu) that does not belong here (nodeID=%i).", + k, cells[cid].nodeID); + } + for (size_t k = 0; k < nr_bparts_new; k++) { + const int cid = cell_getid(s->cdim, s->bparts[k].x[0] * s->iwidth[0], + s->bparts[k].x[1] * s->iwidth[1], + s->bparts[k].x[2] * s->iwidth[2]); + if (cells[cid].nodeID != nodeID) + error("Received b-particle (%zu) that does not belong here (nodeID=%i).", + k, cells[cid].nodeID); + } + + /* Verify that the links are correct */ + part_verify_links(s->parts, s->gparts, s->sparts, s->bparts, nr_parts_new, + nr_gparts_new, nr_sparts_new, nr_bparts_new, e->verbose); + +#endif + + /* Be verbose about what just happened. */ + if (e->verbose) { + int my_cells = 0; + for (int k = 0; k < nr_cells; k++) + if (cells[k].nodeID == nodeID) my_cells += 1; + message( + "node %i now has %zu parts, %zu sparts, %zu bparts and %zu gparts in " + "%i cells.", + nodeID, nr_parts_new, nr_sparts_new, nr_bparts_new, nr_gparts_new, + my_cells); + } + + /* Flag that we do not have any extra particles any more */ + s->nr_extra_parts = 0; + s->nr_extra_gparts = 0; + s->nr_extra_sparts = 0; + s->nr_extra_bparts = 0; + + /* Flag that a redistribute has taken place */ + e->step_props |= engine_step_prop_redistribute; + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +#else + error("SWIFT was not compiled with MPI support."); +#endif +} diff --git a/src/engine_unskip.c b/src/engine_unskip.c new file mode 100644 index 0000000000000000000000000000000000000000..dfadfa5ca1a6aebd0d7a277164eca9707ac97a62 --- /dev/null +++ b/src/engine_unskip.c @@ -0,0 +1,400 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "engine.h" + +/* Local headers. */ +#include "active.h" +#include "cell.h" +#include "memswap.h" + +/* Load the profiler header, if needed. */ +#ifdef WITH_PROFILER +#include <gperftools/profiler.h> +#endif + +/** + * @brief Broad categories of tasks. + * + * Each category is unskipped independently + * of the others. + */ +enum task_broad_types { + task_broad_types_hydro = 1, + task_broad_types_gravity, + task_broad_types_stars, + task_broad_types_black_holes, + task_broad_types_count, +}; + +/** + * @brief Meta-data for the unskipping + */ +struct unskip_data { + + /*! The #engine */ + struct engine *e; + + /*! Pointer to the start of the list of cells to unskip */ + int *list_base; + + /*! Number of times the list has been duplicated */ + int multiplier; + + /*! The number of active cells (without dulication) */ + int num_active_cells; + + /*! The #task_broad_types corresponding to each copy of the list */ + enum task_broad_types task_types[task_broad_types_count]; +}; + +/** + * @brief Unskip any hydro tasks associated with active cells. + * + * @param c The cell. + * @param e The engine. + */ +static void engine_do_unskip_hydro(struct cell *c, struct engine *e) { + + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(c, cell_flag_has_tasks)) return; + + /* Ignore empty cells. */ + if (c->hydro.count == 0) return; + + /* Skip inactive cells. */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + engine_do_unskip_hydro(cp, e); + } + } + } + + /* Unskip any active tasks. */ + const int forcerebuild = cell_unskip_hydro_tasks(c, &e->sched); + if (forcerebuild) atomic_inc(&e->forcerebuild); +} + +/** + * @brief Unskip any stars tasks associated with active cells. + * + * @param c The cell. + * @param e The engine. + * @param with_star_formation Are we running with star formation switched on? + */ +static void engine_do_unskip_stars(struct cell *c, struct engine *e, + const int with_star_formation) { + + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(c, cell_flag_has_tasks)) return; + + const int non_empty = + c->stars.count > 0 || (with_star_formation && c->hydro.count > 0); + + /* Ignore empty cells. */ + if (!non_empty) return; + + const int ci_active = cell_is_active_stars(c, e) || + (with_star_formation && cell_is_active_hydro(c, e)); + + /* Skip inactive cells. */ + if (!ci_active) return; + + /* Recurse */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + engine_do_unskip_stars(cp, e, with_star_formation); + } + } + } + + /* Unskip any active tasks. */ + const int forcerebuild = + cell_unskip_stars_tasks(c, &e->sched, with_star_formation); + if (forcerebuild) atomic_inc(&e->forcerebuild); +} + +/** + * @brief Unskip any black hole tasks associated with active cells. + * + * @param c The cell. + * @param e The engine. + */ +static void engine_do_unskip_black_holes(struct cell *c, struct engine *e) { + + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(c, cell_flag_has_tasks)) return; + + /* Ignore empty cells. */ + if (c->black_holes.count == 0) return; + + /* Skip inactive cells. */ + if (!cell_is_active_black_holes(c, e)) return; + + /* Recurse */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + engine_do_unskip_black_holes(cp, e); + } + } + } + + /* Unskip any active tasks. */ + const int forcerebuild = cell_unskip_black_holes_tasks(c, &e->sched); + if (forcerebuild) atomic_inc(&e->forcerebuild); +} + +/** + * @brief Unskip any gravity tasks associated with active cells. + * + * @param c The cell. + * @param e The engine. + */ +static void engine_do_unskip_gravity(struct cell *c, struct engine *e) { + + /* Early abort (are we below the level where tasks are)? */ + if (!cell_get_flag(c, cell_flag_has_tasks)) return; + + /* Ignore empty cells. */ + if (c->grav.count == 0) return; + + /* Skip inactive cells. */ + if (!cell_is_active_gravity(c, e)) return; + + /* Recurse */ + if (c->split && ((c->maxdepth - c->depth) >= space_subdepth_diff_grav)) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + engine_do_unskip_gravity(cp, e); + } + } + } + + /* Unskip any active tasks. */ + cell_unskip_gravity_tasks(c, &e->sched); +} + +/** + * @brief Mapper function to unskip active tasks. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an unskip_data structure. + */ +void engine_do_unskip_mapper(void *map_data, int num_elements, + void *extra_data) { + + /* Unpack the meta data */ + struct unskip_data *data = (struct unskip_data *)extra_data; + const int num_active_cells = data->num_active_cells; + const enum task_broad_types *const task_types = data->task_types; + const int *const list_base = data->list_base; + struct engine *e = data->e; + struct cell *const cells_top = e->s->cells_top; + + /* What policies are we running? */ + const int with_star_formation = e->policy & engine_policy_star_formation; + + /* The current chunk of active cells */ + const int *const local_cells = (int *)map_data; + + /* Loop over this thread's chunk of cells to unskip */ + for (int ind = 0; ind < num_elements; ind++) { + + /* Handle on the cell */ + struct cell *const c = &cells_top[local_cells[ind]]; + + /* In what copy of the global list are we? + * This gives us the broad type of task we are working on. */ + const ptrdiff_t delta = &local_cells[ind] - list_base; + const int type = delta / num_active_cells; + +#ifdef SWIFT_DEBUG_CHECKS + if (type >= data->multiplier) error("Invalid broad task type!"); + if (c == NULL) error("Got an invalid cell index!"); +#endif + + /* What broad type of tasks are we unskipping? */ + switch (task_types[type]) { + case task_broad_types_hydro: +#ifdef SWIFT_DEBUG_CHECKS + if (!(e->policy & engine_policy_hydro)) + error("Trying to unskip hydro tasks in a non-hydro run!"); +#endif + engine_do_unskip_hydro(c, e); + break; + case task_broad_types_gravity: +#ifdef SWIFT_DEBUG_CHECKS + if (!(e->policy & engine_policy_self_gravity) && + !(e->policy & engine_policy_external_gravity)) + error("Trying to unskip gravity tasks in a non-gravity run!"); +#endif + engine_do_unskip_gravity(c, e); + break; + case task_broad_types_stars: +#ifdef SWIFT_DEBUG_CHECKS + if (!(e->policy & engine_policy_stars)) + error("Trying to unskip star tasks in a non-stars run!"); +#endif + engine_do_unskip_stars(c, e, with_star_formation); + break; + case task_broad_types_black_holes: +#ifdef SWIFT_DEBUG_CHECKS + if (!(e->policy & engine_policy_black_holes)) + error("Trying to unskip black holes tasks in a non-BH run!"); +#endif + engine_do_unskip_black_holes(c, e); + break; + default: +#ifdef SWIFT_DEBUG_CHECKS + error("Invalid broad task type!"); +#endif + continue; + } + } +} + +/** + * @brief Unskip all the tasks that act on active cells at this time. + * + * @param e The #engine. + */ +void engine_unskip(struct engine *e) { + + const ticks tic = getticks(); + struct space *s = e->s; + const int nodeID = e->nodeID; + + const int with_hydro = e->policy & engine_policy_hydro; + const int with_self_grav = e->policy & engine_policy_self_gravity; + const int with_ext_grav = e->policy & engine_policy_external_gravity; + const int with_stars = e->policy & engine_policy_stars; + const int with_feedback = e->policy & engine_policy_feedback; + const int with_black_holes = e->policy & engine_policy_black_holes; + +#ifdef WITH_PROFILER + static int count = 0; + char filename[100]; + sprintf(filename, "/tmp/swift_engine_do_usnkip_mapper_%06i.prof", count++); + ProfilerStart(filename); +#endif // WITH_PROFILER + + /* Move the active local cells to the top of the list. */ + int *local_cells = e->s->local_cells_with_tasks_top; + int num_active_cells = 0; + for (int k = 0; k < s->nr_local_cells_with_tasks; k++) { + struct cell *c = &s->cells_top[local_cells[k]]; + + if ((with_hydro && cell_is_active_hydro(c, e)) || + (with_self_grav && cell_is_active_gravity(c, e)) || + (with_ext_grav && c->nodeID == nodeID && + cell_is_active_gravity(c, e)) || + (with_feedback && cell_is_active_stars(c, e)) || + (with_stars && c->nodeID == nodeID && cell_is_active_stars(c, e)) || + (with_black_holes && cell_is_active_black_holes(c, e))) { + + if (num_active_cells != k) + memswap(&local_cells[k], &local_cells[num_active_cells], sizeof(int)); + num_active_cells += 1; + } + } + + /* What kind of tasks do we have? */ + struct unskip_data data; + bzero(&data, sizeof(struct unskip_data)); + int multiplier = 0; + if (with_hydro) { + data.task_types[multiplier] = task_broad_types_hydro; + multiplier++; + } + if (with_self_grav || with_ext_grav) { + data.task_types[multiplier] = task_broad_types_gravity; + multiplier++; + } + if (with_feedback || with_stars) { + data.task_types[multiplier] = task_broad_types_stars; + multiplier++; + } + if (with_black_holes) { + data.task_types[multiplier] = task_broad_types_black_holes; + multiplier++; + } + + /* Should we duplicate the list of active cells to better parallelise the + unskip over the threads ? */ + int *local_active_cells; + if (multiplier > 1) { + + /* Make space for copies of the list */ + local_active_cells = + (int *)malloc(multiplier * num_active_cells * sizeof(int)); + if (local_active_cells == NULL) + error( + "Couldn't allocate memory for duplicated list of local active " + "cells."); + + /* Make blind copies of the list */ + for (int m = 0; m < multiplier; m++) { + memcpy(local_active_cells + m * num_active_cells, local_cells, + num_active_cells * sizeof(int)); + } + } else { + local_active_cells = local_cells; + } + + /* We now have a list of local active cells duplicated as many times as + * we have broad task types. We can now release all the threads on the list */ + + data.e = e; + data.list_base = local_active_cells; + data.num_active_cells = num_active_cells; + data.multiplier = multiplier; + + /* Activate all the regular tasks */ + threadpool_map(&e->threadpool, engine_do_unskip_mapper, local_active_cells, + num_active_cells * multiplier, sizeof(int), 1, &data); + +#ifdef WITH_PROFILER + ProfilerStop(); +#endif // WITH_PROFILER + + /* Free stuff? */ + if (multiplier > 1) { + free(local_active_cells); + } + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} diff --git a/src/logger.c b/src/logger.c index 8be521b27f949ea0d496a5207335f1ec68208489..762eb516077ef82f08b6c34da09cd7bc9eb6a280 100644 --- a/src/logger.c +++ b/src/logger.c @@ -44,44 +44,41 @@ /* * Thoses are definitions from the format and therefore should not be changed! */ -/* number of bytes for a mask */ +/* Number of bytes for a mask. */ // TODO change this to number of bits #define logger_mask_size 1 -/* number of bits for chunk header */ +/* Number of bits for chunk header. */ #define logger_header_bytes 8 -/* number bytes for an offset */ +/* Number bytes for an offset. */ #define logger_offset_size logger_header_bytes - logger_mask_size -/* number of bytes for the version information */ -#define logger_version_size 20 +/* Number of bytes for the file format information. */ +#define logger_format_size 20 -/* number of bytes for the labels in the header */ +/* Number of bytes for the labels in the header. */ #define logger_label_size 20 -/* number of bytes for the number in the header */ -#define logger_number_size 4 - -char logger_version[logger_version_size] = "0.1"; +char logger_file_format[logger_format_size] = "SWIFT_LOGGER"; const struct mask_data logger_mask_data[logger_count_mask] = { - /* Particle's position */ + /* Particle's position. */ {3 * sizeof(double), 1 << logger_x, "positions"}, - /* Particle's velocity */ + /* Particle's velocity. */ {3 * sizeof(float), 1 << logger_v, "velocities"}, - /* Particle's acceleration */ + /* Particle's acceleration. */ {3 * sizeof(float), 1 << logger_a, "accelerations"}, - /* Particle's entropy */ + /* Particle's entropy. */ {sizeof(float), 1 << logger_u, "entropy"}, - /* Particle's smoothing length */ + /* Particle's smoothing length. */ {sizeof(float), 1 << logger_h, "smoothing length"}, - /* Particle's density */ + /* Particle's density. */ {sizeof(float), 1 << logger_rho, "density"}, - /* Particle's constants: mass (float) and ID (long long) */ + /* Particle's constants: mass (float) and ID (long long). */ {sizeof(float) + sizeof(long long), 1 << logger_consts, "consts"}, /* Simulation time stamp: integertime and double time (e.g. scale - factor or time) */ + factor or time). */ {sizeof(integertime_t) + sizeof(double), 1 << logger_timestamp, "timestamp"}}; @@ -99,11 +96,11 @@ const struct mask_data logger_mask_data[logger_count_mask] = { */ char *logger_write_chunk_header(char *buff, const unsigned int *mask, const size_t *offset, const size_t offset_new) { - /* write mask */ + /* write mask. */ memcpy(buff, mask, logger_mask_size); buff += logger_mask_size; - /* write offset */ + /* write offset. */ size_t diff_offset = offset_new - *offset; memcpy(buff, &diff_offset, logger_offset_size); buff += logger_offset_size; @@ -112,7 +109,7 @@ char *logger_write_chunk_header(char *buff, const unsigned int *mask, } /** - * @brief Write to the dump + * @brief Write to the dump. * * @param d #dump file * @param offset (return) offset of the data @@ -121,13 +118,13 @@ char *logger_write_chunk_header(char *buff, const unsigned int *mask, */ void logger_write_data(struct dump *d, size_t *offset, size_t size, const void *p) { - /* get buffer */ + /* get buffer. */ char *buff = dump_get(d, size, offset); - /* write data to the buffer */ + /* write data to the buffer. */ memcpy(buff, p, size); - /* Update offset to end of chunk */ + /* Update offset to end of chunk. */ *offset += size; } @@ -171,15 +168,15 @@ int logger_compute_chunk_size(unsigned int mask) { * @param log The #logger * @param e The #engine */ -void logger_log_all(struct logger *log, const struct engine *e) { +void logger_log_all(struct logger_writer *log, const struct engine *e) { - /* Ensure that enough space is available */ + /* Ensure that enough space is available. */ logger_ensure_size(log, e->total_nr_parts, e->total_nr_gparts, 0); #ifdef SWIFT_DEBUG_CHECKS message("Need to implement stars"); #endif - /* some constants */ + /* some constants. */ const struct space *s = e->s; const unsigned int mask = logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask | @@ -187,17 +184,17 @@ void logger_log_all(struct logger *log, const struct engine *e) { logger_mask_data[logger_h].mask | logger_mask_data[logger_rho].mask | logger_mask_data[logger_consts].mask; - /* loop over all parts */ + /* loop over all parts. */ for (long long i = 0; i < e->total_nr_parts; i++) { logger_log_part(log, &s->parts[i], mask, &s->xparts[i].logger_data.last_offset); s->xparts[i].logger_data.steps_since_last_output = 0; } - /* loop over all gparts */ + /* loop over all gparts. */ if (e->total_nr_gparts > 0) error("Not implemented"); - /* loop over all sparts */ + /* loop over all sparts. */ // TODO } @@ -210,7 +207,7 @@ void logger_log_all(struct logger *log, const struct engine *e) { * @param offset Pointer to the offset of the previous log of this particle; * (return) offset of this log. */ -void logger_log_part(struct logger *log, const struct part *p, +void logger_log_part(struct logger_writer *log, const struct part *p, unsigned int mask, size_t *offset) { /* Make sure we're not writing a timestamp. */ @@ -289,7 +286,7 @@ void logger_log_part(struct logger *log, const struct part *p, * @param offset Pointer to the offset of the previous log of this particle; * (return) offset of this log. */ -void logger_log_gpart(struct logger *log, const struct gpart *p, +void logger_log_gpart(struct logger_writer *log, const struct gpart *p, unsigned int mask, size_t *offset) { /* Make sure we're not writing a timestamp. */ @@ -331,7 +328,7 @@ void logger_log_gpart(struct logger *log, const struct gpart *p, /* Particle constants, which is a bit more complicated. */ if (mask & logger_mask_data[logger_consts].mask) { - // TODO make it dependent of logger_mask_data + // TODO make it dependent of logger_mask_data. memcpy(buff, &p->mass, sizeof(float)); buff += sizeof(float); memcpy(buff, &p->id_or_neg_offset, sizeof(long long)); @@ -351,7 +348,7 @@ void logger_log_gpart(struct logger *log, const struct gpart *p, * @param offset Pointer to the offset of the previous log of this particle; * (return) offset of this log. */ -void logger_log_timestamp(struct logger *log, integertime_t timestamp, +void logger_log_timestamp(struct logger_writer *log, integertime_t timestamp, double time, size_t *offset) { struct dump *dump = &log->dump; @@ -368,11 +365,11 @@ void logger_log_timestamp(struct logger *log, integertime_t timestamp, buff = logger_write_chunk_header(buff, &mask, offset, offset_new); /* Store the timestamp. */ - // TODO make it dependent of logger_mask_data + // TODO make it dependent of logger_mask_data. memcpy(buff, ×tamp, sizeof(integertime_t)); buff += sizeof(integertime_t); - /* Store the time */ + /* Store the time. */ memcpy(buff, &time, sizeof(double)); /* Update the log message offset. */ @@ -390,21 +387,21 @@ void logger_log_timestamp(struct logger *log, integertime_t timestamp, * @param total_nr_gparts total number of gpart * @param total_nr_sparts total number of spart */ -void logger_ensure_size(struct logger *log, size_t total_nr_parts, +void logger_ensure_size(struct logger_writer *log, size_t total_nr_parts, size_t total_nr_gparts, size_t total_nr_sparts) { - /* count part memory */ + /* count part memory. */ size_t limit = log->max_chunk_size; limit *= total_nr_parts; - /* count gpart memory */ + /* count gpart memory. */ if (total_nr_gparts > 0) error("Not implemented"); - /* count spart memory */ + /* count spart memory. */ if (total_nr_sparts > 0) error("Not implemented"); - /* ensure enough space in dump */ + /* ensure enough space in dump. */ dump_ensure(&log->dump, limit, log->buffer_scale * limit); } @@ -414,8 +411,8 @@ void logger_ensure_size(struct logger *log, size_t total_nr_parts, * @param log The #logger * @param params The #swift_params */ -void logger_init(struct logger *log, struct swift_params *params) { - /* read parameters */ +void logger_init(struct logger_writer *log, struct swift_params *params) { + /* read parameters. */ log->delta_step = parser_get_param_int(params, "Logger:delta_step"); size_t buffer_size = parser_get_opt_param_float(params, "Logger:initial_buffer_size", 0.5) * @@ -424,24 +421,24 @@ void logger_init(struct logger *log, struct swift_params *params) { parser_get_opt_param_float(params, "Logger:buffer_scale", 10); parser_get_param_string(params, "Logger:basename", log->base_name); - /* set initial value of parameters */ + /* set initial value of parameters. */ log->timestamp_offset = 0; - /* generate dump filename */ + /* generate dump filename. */ char logger_name_file[PARSER_MAX_LINE_SIZE]; strcpy(logger_name_file, log->base_name); strcat(logger_name_file, ".dump"); - /* Compute max size for a particle chunk */ + /* Compute max size for a particle chunk. */ int max_size = logger_offset_size + logger_mask_size; - /* Loop over all fields except timestamp */ + /* Loop over all fields except timestamp. */ for (int i = 0; i < logger_count_mask - 1; i++) { max_size += logger_mask_data[i].size; } log->max_chunk_size = max_size; - /* init dump */ + /* init dump. */ dump_init(&log->dump, logger_name_file, buffer_size); } @@ -450,18 +447,17 @@ void logger_init(struct logger *log, struct swift_params *params) { * * @param log The #logger */ -void logger_clean(struct logger *log) { dump_close(&log->dump); } +void logger_free(struct logger_writer *log) { dump_close(&log->dump); } /** * @brief Write a file header to a logger file * * @param log The #logger - * @param dump The #dump in which to log the particle data. * */ -void logger_write_file_header(struct logger *log, const struct engine *e) { +void logger_write_file_header(struct logger_writer *log) { - /* get required variables */ + /* get required variables. */ struct dump *dump = &log->dump; size_t file_offset = dump->file_offset; @@ -471,37 +467,46 @@ void logger_write_file_header(struct logger *log, const struct engine *e) { "The logger is not empty." "This function should be called before writing anything in the logger"); - /* Write version information */ - logger_write_data(dump, &file_offset, logger_version_size, &logger_version); + /* Write format information. */ + logger_write_data(dump, &file_offset, logger_format_size, + &logger_file_format); + + /* Write the major version number. */ + int major = logger_major_version; + logger_write_data(dump, &file_offset, sizeof(int), &major); - /* write offset direction */ + /* Write the minor version number. */ + int minor = logger_minor_version; + logger_write_data(dump, &file_offset, sizeof(int), &minor); + + /* write offset direction. */ const int reversed = 0; - logger_write_data(dump, &file_offset, logger_number_size, &reversed); + logger_write_data(dump, &file_offset, sizeof(int), &reversed); - /* placeholder to write the offset of the first log here */ + /* placeholder to write the offset of the first log here. */ char *skip_header = dump_get(dump, logger_offset_size, &file_offset); - /* write number of bytes used for names */ - const int label_size = logger_label_size; - logger_write_data(dump, &file_offset, logger_number_size, &label_size); + /* write number of bytes used for names. */ + const unsigned int label_size = logger_label_size; + logger_write_data(dump, &file_offset, sizeof(unsigned int), &label_size); - /* write number of masks */ - int count_mask = logger_count_mask; - logger_write_data(dump, &file_offset, logger_number_size, &count_mask); + /* write number of masks. */ + const unsigned int count_mask = logger_count_mask; + logger_write_data(dump, &file_offset, sizeof(unsigned int), &count_mask); - /* write masks */ - // loop over all mask type + /* write masks. */ + // loop over all mask type. for (int i = 0; i < logger_count_mask; i++) { - // mask name + // mask name. logger_write_data(dump, &file_offset, logger_label_size, &logger_mask_data[i].name); - // mask size - logger_write_data(dump, &file_offset, logger_number_size, + // mask size. + logger_write_data(dump, &file_offset, sizeof(unsigned int), &logger_mask_data[i].size); } - /* last step: write first offset */ + /* last step: write first offset. */ memcpy(skip_header, &file_offset, logger_offset_size); } @@ -591,7 +596,7 @@ int logger_read_part(struct part *p, size_t *offset, const char *buff) { /* Particle constants, which is a bit more complicated. */ if (mask & logger_mask_data[logger_rho].mask) { - // TODO make it dependent of logger_mask_data + // TODO make it dependent of logger_mask_data. memcpy(&p->mass, buff, sizeof(float)); buff += sizeof(float); memcpy(&p->id, buff, sizeof(long long)); @@ -694,7 +699,7 @@ int logger_read_timestamp(unsigned long long int *t, double *time, error("Timestamp message contains extra fields."); /* Copy the timestamp value from the buffer. */ - // TODO make it dependent of logger_mask_data + // TODO make it dependent of logger_mask_data. memcpy(t, buff, sizeof(unsigned long long int)); buff += sizeof(unsigned long long int); diff --git a/src/logger.h b/src/logger.h index 56e2c8ab94c66b24df1800877bb9cfb129c3e645..ed2d6374fa9031f526e79e790572c89f6176df4b 100644 --- a/src/logger.h +++ b/src/logger.h @@ -28,13 +28,15 @@ #include "timeline.h" #include "units.h" -/* Forward declaration */ +/* Forward declaration. */ struct dump; struct gpart; struct part; -/* TODO remove dependency */ struct engine; +#define logger_major_version 0 +#define logger_minor_version 1 + /** * Logger entries contain messages representing the particle data at a given * point in time during the simulation. @@ -82,16 +84,18 @@ enum logger_masks_number { logger_h = 4, logger_rho = 5, logger_consts = 6, - logger_timestamp = 7, /* expect it to be before count */ - logger_count_mask = 8, /* Need to be the last */ + logger_timestamp = 7, /* expect it to be before count. */ + logger_count_mask = 8, /* Need to be the last. */ } __attribute__((packed)); struct mask_data { - /* Number of bytes for a mask */ + /* Number of bytes for a mask. */ int size; - /* Mask value */ + + /* Mask value. */ unsigned int mask; - /* name of the mask */ + + /* Name of the mask. */ char name[100]; }; @@ -100,51 +104,52 @@ extern const struct mask_data logger_mask_data[logger_count_mask]; /* Size of the strings. */ #define logger_string_length 200 -/* structure containing global data */ -struct logger { - /* Number of particle steps between dumping a chunk of data */ +/* structure containing global data. */ +struct logger_writer { + /* Number of particle steps between dumping a chunk of data. */ short int delta_step; - /* Logger basename */ + /* Logger basename. */ char base_name[logger_string_length]; - /* Dump file */ + /* Dump file (In the reader, the dump is cleaned, therefore it is renamed + * logfile). */ struct dump dump; - /* timestamp offset for logger*/ + /* timestamp offset for logger. */ size_t timestamp_offset; - /* scaling factor when buffer is too small */ + /* scaling factor when buffer is too small. */ float buffer_scale; - /* Size of a chunk if every mask are activated */ + /* Size of a chunk if every mask are activated. */ int max_chunk_size; } SWIFT_STRUCT_ALIGN; -/* required structure for each particle type */ +/* required structure for each particle type. */ struct logger_part_data { - /* Number of particle updates since last output */ + /* Number of particle updates since last output. */ int steps_since_last_output; - /* offset of last particle log entry */ + /* offset of last particle log entry. */ size_t last_offset; }; /* Function prototypes. */ int logger_compute_chunk_size(unsigned int mask); -void logger_log_all(struct logger *log, const struct engine *e); -void logger_log_part(struct logger *log, const struct part *p, +void logger_log_all(struct logger_writer *log, const struct engine *e); +void logger_log_part(struct logger_writer *log, const struct part *p, unsigned int mask, size_t *offset); -void logger_log_gpart(struct logger *log, const struct gpart *p, +void logger_log_gpart(struct logger_writer *log, const struct gpart *p, unsigned int mask, size_t *offset); -void logger_init(struct logger *log, struct swift_params *params); -void logger_clean(struct logger *log); -void logger_log_timestamp(struct logger *log, integertime_t t, double time, - size_t *offset); -void logger_ensure_size(struct logger *log, size_t total_nr_parts, +void logger_init(struct logger_writer *log, struct swift_params *params); +void logger_free(struct logger_writer *log); +void logger_log_timestamp(struct logger_writer *log, integertime_t t, + double time, size_t *offset); +void logger_ensure_size(struct logger_writer *log, size_t total_nr_parts, size_t total_nr_gparts, size_t total_nr_sparts); -void logger_write_file_header(struct logger *log, const struct engine *e); +void logger_write_file_header(struct logger_writer *log); int logger_read_part(struct part *p, size_t *offset, const char *buff); int logger_read_gpart(struct gpart *p, size_t *offset, const char *buff); @@ -164,12 +169,14 @@ INLINE static void logger_part_data_init(struct logger_part_data *logger) { /** * @brief Should this particle write its data now ? * - * @param xp The #xpart. - * @param e The #engine containing information about the current time. - * @return 1 if the #part should write, 0 otherwise. + * @param logger_data The #logger_part_data of a particle. + * @param log The #logger. + * + * @return 1 if the particule should be writen, 0 otherwise. */ __attribute__((always_inline)) INLINE static int logger_should_write( - const struct logger_part_data *logger_data, const struct logger *log) { + const struct logger_part_data *logger_data, + const struct logger_writer *log) { return (logger_data->steps_since_last_output > log->delta_step); } diff --git a/src/logger_io.c b/src/logger_io.c index 3cef3497b2912411cea6763f5418bc76a7f5ece0..c6be1f292434c759e20064542e91caa2cd238a4d 100644 --- a/src/logger_io.c +++ b/src/logger_io.c @@ -21,7 +21,7 @@ /* Config parameters. */ #include "../config.h" -#ifdef WITH_LOGGER +#if defined(WITH_LOGGER) && defined(HAVE_HDF5) && !defined(WITH_MPI) /* Some standard headers. */ #include <hdf5.h> @@ -87,7 +87,7 @@ void write_index_single(struct engine* e, const char* baseName, // struct spart* sparts = e->s->sparts; static int outputCount = 0; - struct logger* log = e->logger; + struct logger_writer* log = e->logger; /* Number of unassociated gparts */ const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0; @@ -296,4 +296,4 @@ void write_index_single(struct engine* e, const char* baseName, ++outputCount; } -#endif /* HAVE_HDF5 */ +#endif /* WITH_LOGGER && HAVE_HDF5 && !WITH_MPI */ diff --git a/src/logger_io.h b/src/logger_io.h index f5b1274fb7b957d5b48bc8425bf784c586ac6a08..a424c5c104b9f1090c69f7e0bb37e72635636f82 100644 --- a/src/logger_io.h +++ b/src/logger_io.h @@ -50,11 +50,13 @@ __attribute__((always_inline)) INLINE static void hydro_write_index( *num_fields = 2; /* List what we want to write */ - list[0] = io_make_output_field("ParticleIDs", ULONGLONG, 1, - UNIT_CONV_NO_UNITS, parts, id); + list[0] = + io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, 0.f, + parts, id, "will be erased"); - list[1] = io_make_output_field("Offset", ULONGLONG, 1, UNIT_CONV_NO_UNITS, - xparts, logger_data.last_offset); + list[1] = + io_make_output_field("Offset", ULONGLONG, 1, UNIT_CONV_NO_UNITS, 0.f, + xparts, logger_data.last_offset, "will be erased"); } #endif diff --git a/src/parallel_io.c b/src/parallel_io.c index ccba33d07500f4e22e365942622f6392cfbb0166..d469de729bd08c79889b031e9d25d796cabad28e 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -680,6 +680,8 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, * @param bparts (output) The array of #bpart read from the file. * @param Ngas (output) The number of particles read from the file. * @param Ngparts (output) The number of particles read from the file. + * @param Ngparts_background (output) The number of background DM particles read + * from the file. * @param Nstars (output) The number of particles read from the file. * @param Nblackholes (output) The number of particles read from the file. * @param flag_entropy (output) 1 if the ICs contained Entropy in the diff --git a/src/runner.c b/src/runner.c deleted file mode 100644 index db7e512873b51a7329e19e75a763b69521efb0eb..0000000000000000000000000000000000000000 --- a/src/runner.c +++ /dev/null @@ -1,5048 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) - * Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) - * 2016 John A. Regan (john.a.regan@durham.ac.uk) - * Tom Theuns (tom.theuns@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ - -/* Config parameters. */ -#include "../config.h" - -/* Some standard headers. */ -#include <float.h> -#include <limits.h> -#include <stdlib.h> - -/* MPI headers. */ -#ifdef WITH_MPI -#include <mpi.h> -#endif - -/* This object's header. */ -#include "runner.h" - -/* Local headers. */ -#include "active.h" -#include "approx_math.h" -#include "atomic.h" -#include "black_holes.h" -#include "black_holes_properties.h" -#include "cell.h" -#include "chemistry.h" -#include "const.h" -#include "cooling.h" -#include "debug.h" -#include "drift.h" -#include "engine.h" -#include "entropy_floor.h" -#include "error.h" -#include "feedback.h" -#include "gravity.h" -#include "hydro.h" -#include "hydro_properties.h" -#include "kick.h" -#include "logger.h" -#include "memuse.h" -#include "minmax.h" -#include "pressure_floor.h" -#include "pressure_floor_iact.h" -#include "runner_doiact_vec.h" -#include "scheduler.h" -#include "sort_part.h" -#include "space.h" -#include "space_getsid.h" -#include "star_formation.h" -#include "star_formation_logger.h" -#include "stars.h" -#include "task.h" -#include "timers.h" -#include "timestep.h" -#include "timestep_limiter.h" -#include "tracers.h" - -/* Unique identifier of loop types */ -#define TASK_LOOP_DENSITY 0 -#define TASK_LOOP_GRADIENT 1 -#define TASK_LOOP_FORCE 2 -#define TASK_LOOP_LIMITER 3 -#define TASK_LOOP_FEEDBACK 4 -#define TASK_LOOP_SWALLOW 5 - -/* Import the density loop functions. */ -#define FUNCTION density -#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY -#include "runner_doiact.h" -#undef FUNCTION -#undef FUNCTION_TASK_LOOP - -/* Import the gradient loop functions (if required). */ -#ifdef EXTRA_HYDRO_LOOP -#define FUNCTION gradient -#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT -#include "runner_doiact.h" -#undef FUNCTION -#undef FUNCTION_TASK_LOOP -#endif - -/* Import the force loop functions. */ -#define FUNCTION force -#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE -#include "runner_doiact.h" -#undef FUNCTION -#undef FUNCTION_TASK_LOOP - -/* Import the limiter loop functions. */ -#define FUNCTION limiter -#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER -#include "runner_doiact.h" -#undef FUNCTION -#undef FUNCTION_TASK_LOOP - -/* Import the gravity loop functions. */ -#include "runner_doiact_grav.h" - -/* Import the stars density loop functions. */ -#define FUNCTION density -#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY -#include "runner_doiact_stars.h" -#undef FUNCTION_TASK_LOOP -#undef FUNCTION - -/* Import the stars feedback loop functions. */ -#define FUNCTION feedback -#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK -#include "runner_doiact_stars.h" -#undef FUNCTION_TASK_LOOP -#undef FUNCTION - -/* Import the black hole density loop functions. */ -#define FUNCTION density -#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY -#include "runner_doiact_black_holes.h" -#undef FUNCTION_TASK_LOOP -#undef FUNCTION - -/* Import the black hole feedback loop functions. */ -#define FUNCTION swallow -#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW -#include "runner_doiact_black_holes.h" -#undef FUNCTION_TASK_LOOP -#undef FUNCTION - -/* Import the black hole feedback loop functions. */ -#define FUNCTION feedback -#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK -#include "runner_doiact_black_holes.h" -#undef FUNCTION_TASK_LOOP -#undef FUNCTION - -/** - * @brief Intermediate task after the density to check that the smoothing - * lengths are correct. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { - - struct spart *restrict sparts = c->stars.parts; - const struct engine *e = r->e; - const struct unit_system *us = e->internal_units; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const struct cosmology *cosmo = e->cosmology; - const struct feedback_props *feedback_props = e->feedback_props; - const float stars_h_max = e->hydro_properties->h_max; - const float stars_h_min = e->hydro_properties->h_min; - const float eps = e->stars_properties->h_tolerance; - const float stars_eta_dim = - pow_dimension(e->stars_properties->eta_neighbours); - const int max_smoothing_iter = e->stars_properties->max_smoothing_iterations; - int redo = 0, scount = 0; - - /* Running value of the maximal smoothing length */ - double h_max = c->stars.h_max; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != e->nodeID) - error("Running the star ghost on a foreign node!"); -#endif - - /* Anything to do here? */ - if (c->stars.count == 0) return; - if (!cell_is_active_stars(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - runner_do_stars_ghost(r, c->progeny[k], 0); - - /* Update h_max */ - h_max = max(h_max, c->progeny[k]->stars.h_max); - } - } - } else { - - /* Init the list of active particles that have to be updated. */ - int *sid = NULL; - float *h_0 = NULL; - float *left = NULL; - float *right = NULL; - if ((sid = (int *)malloc(sizeof(int) * c->stars.count)) == NULL) - error("Can't allocate memory for sid."); - if ((h_0 = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) - error("Can't allocate memory for h_0."); - if ((left = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) - error("Can't allocate memory for left."); - if ((right = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) - error("Can't allocate memory for right."); - for (int k = 0; k < c->stars.count; k++) - if (spart_is_active(&sparts[k], e) && - feedback_is_active(&sparts[k], e->time, cosmo, with_cosmology)) { - sid[scount] = k; - h_0[scount] = sparts[k].h; - left[scount] = 0.f; - right[scount] = stars_h_max; - ++scount; - } - - /* While there are particles that need to be updated... */ - for (int num_reruns = 0; scount > 0 && num_reruns < max_smoothing_iter; - num_reruns++) { - - /* Reset the redo-count. */ - redo = 0; - - /* Loop over the remaining active parts in this cell. */ - for (int i = 0; i < scount; i++) { - - /* Get a direct pointer on the part. */ - struct spart *sp = &sparts[sid[i]]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Is this part within the timestep? */ - if (!spart_is_active(sp, e)) - error("Ghost applied to inactive particle"); -#endif - - /* Get some useful values */ - const float h_init = h_0[i]; - const float h_old = sp->h; - const float h_old_dim = pow_dimension(h_old); - const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); - - float h_new; - int has_no_neighbours = 0; - - if (sp->density.wcount == 0.f) { /* No neighbours case */ - - /* Flag that there were no neighbours */ - has_no_neighbours = 1; - - /* Double h and try again */ - h_new = 2.f * h_old; - - } else { - - /* Finish the density calculation */ - stars_end_density(sp, cosmo); - - /* Compute one step of the Newton-Raphson scheme */ - const float n_sum = sp->density.wcount * h_old_dim; - const float n_target = stars_eta_dim; - const float f = n_sum - n_target; - const float f_prime = - sp->density.wcount_dh * h_old_dim + - hydro_dimension * sp->density.wcount * h_old_dim_minus_one; - - /* Improve the bisection bounds */ - if (n_sum < n_target) - left[i] = max(left[i], h_old); - else if (n_sum > n_target) - right[i] = min(right[i], h_old); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check the validity of the left and right bounds */ - if (left[i] > right[i]) - error("Invalid left (%e) and right (%e)", left[i], right[i]); -#endif - - /* Skip if h is already h_max and we don't have enough neighbours */ - /* Same if we are below h_min */ - if (((sp->h >= stars_h_max) && (f < 0.f)) || - ((sp->h <= stars_h_min) && (f > 0.f))) { - - stars_reset_feedback(sp); - - /* Only do feedback if stars have a reasonable birth time */ - if (feedback_do_feedback(sp)) { - - const integertime_t ti_step = get_integer_timestep(sp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(e->ti_current - 1, sp->time_bin); - - /* Get particle time-step */ - double dt; - if (with_cosmology) { - dt = cosmology_get_delta_time(e->cosmology, ti_begin, - ti_begin + ti_step); - } else { - dt = get_timestep(sp->time_bin, e->time_base); - } - - /* Calculate age of the star at current time */ - double star_age_end_of_step; - if (with_cosmology) { - star_age_end_of_step = - cosmology_get_delta_time_from_scale_factors( - cosmo, (double)sp->birth_scale_factor, cosmo->a); - } else { - star_age_end_of_step = (float)e->time - sp->birth_time; - } - - /* Has this star been around for a while ? */ - if (star_age_end_of_step > 0.) { - - /* Age of the star at the start of the step */ - const double star_age_beg_of_step = - max(star_age_end_of_step - dt, 0.); - - /* Compute the stellar evolution */ - feedback_evolve_spart(sp, feedback_props, cosmo, us, - star_age_beg_of_step, dt); - } else { - - /* Reset the feedback fields of the star particle */ - feedback_reset_feedback(sp, feedback_props); - } - } else { - - feedback_reset_feedback(sp, feedback_props); - } - - /* Ok, we are done with this particle */ - continue; - } - - /* Normal case: Use Newton-Raphson to get a better value of h */ - - /* Avoid floating point exception from f_prime = 0 */ - h_new = h_old - f / (f_prime + FLT_MIN); - - /* Be verbose about the particles that struggle to converge */ - if (num_reruns > max_smoothing_iter - 10) { - - message( - "Smoothing length convergence problem: iter=%d p->id=%lld " - "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " - "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", - num_reruns, sp->id, h_init, h_old, h_new, f, f_prime, n_sum, - n_target, left[i], right[i]); - } - - /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ - h_new = min(h_new, 2.f * h_old); - h_new = max(h_new, 0.5f * h_old); - - /* Verify that we are actually progrssing towards the answer */ - h_new = max(h_new, left[i]); - h_new = min(h_new, right[i]); - } - - /* Check whether the particle has an inappropriate smoothing length */ - if (fabsf(h_new - h_old) > eps * h_old) { - - /* Ok, correct then */ - - /* Case where we have been oscillating around the solution */ - if ((h_new == left[i] && h_old == right[i]) || - (h_old == left[i] && h_new == right[i])) { - - /* Bissect the remaining interval */ - sp->h = pow_inv_dimension( - 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); - - } else { - - /* Normal case */ - sp->h = h_new; - } - - /* If below the absolute maximum, try again */ - if (sp->h < stars_h_max && sp->h > stars_h_min) { - - /* Flag for another round of fun */ - sid[redo] = sid[i]; - h_0[redo] = h_0[i]; - left[redo] = left[i]; - right[redo] = right[i]; - redo += 1; - - /* Re-initialise everything */ - stars_init_spart(sp); - feedback_init_spart(sp); - - /* Off we go ! */ - continue; - - } else if (sp->h <= stars_h_min) { - - /* Ok, this particle is a lost cause... */ - sp->h = stars_h_min; - - } else if (sp->h >= stars_h_max) { - - /* Ok, this particle is a lost cause... */ - sp->h = stars_h_max; - - /* Do some damage control if no neighbours at all were found */ - if (has_no_neighbours) { - stars_spart_has_no_neighbours(sp, cosmo); - } - - } else { - error( - "Fundamental problem with the smoothing length iteration " - "logic."); - } - } - - /* We now have a particle whose smoothing length has converged */ - - /* Check if h_max has increased */ - h_max = max(h_max, sp->h); - - stars_reset_feedback(sp); - - /* Only do feedback if stars have a reasonable birth time */ - if (feedback_do_feedback(sp)) { - - const integertime_t ti_step = get_integer_timestep(sp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(e->ti_current - 1, sp->time_bin); - - /* Get particle time-step */ - double dt; - if (with_cosmology) { - dt = cosmology_get_delta_time(e->cosmology, ti_begin, - ti_begin + ti_step); - } else { - dt = get_timestep(sp->time_bin, e->time_base); - } - - /* Calculate age of the star at current time */ - double star_age_end_of_step; - if (with_cosmology) { - star_age_end_of_step = cosmology_get_delta_time_from_scale_factors( - cosmo, sp->birth_scale_factor, (float)cosmo->a); - } else { - star_age_end_of_step = (float)e->time - sp->birth_time; - } - - /* Has this star been around for a while ? */ - if (star_age_end_of_step > 0.) { - - /* Age of the star at the start of the step */ - const double star_age_beg_of_step = - max(star_age_end_of_step - dt, 0.); - - /* Compute the stellar evolution */ - feedback_evolve_spart(sp, feedback_props, cosmo, us, - star_age_beg_of_step, dt); - } else { - - /* Reset the feedback fields of the star particle */ - feedback_reset_feedback(sp, feedback_props); - } - } else { - - /* Reset the feedback fields of the star particle */ - feedback_reset_feedback(sp, feedback_props); - } - } - - /* We now need to treat the particles whose smoothing length had not - * converged again */ - - /* Re-set the counter for the next loop (potentially). */ - scount = redo; - if (scount > 0) { - - /* Climb up the cell hierarchy. */ - for (struct cell *finger = c; finger != NULL; finger = finger->parent) { - - /* Run through this cell's density interactions. */ - for (struct link *l = finger->stars.density; l != NULL; l = l->next) { - -#ifdef SWIFT_DEBUG_CHECKS - if (l->t->ti_run < r->e->ti_current) - error("Density task should have been run."); -#endif - - /* Self-interaction? */ - if (l->t->type == task_type_self) - runner_doself_subset_branch_stars_density(r, finger, sparts, sid, - scount); - - /* Otherwise, pair interaction? */ - else if (l->t->type == task_type_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dopair_subset_branch_stars_density( - r, finger, sparts, sid, scount, l->t->cj); - else - runner_dopair_subset_branch_stars_density( - r, finger, sparts, sid, scount, l->t->ci); - } - - /* Otherwise, sub-self interaction? */ - else if (l->t->type == task_type_sub_self) - runner_dosub_subset_stars_density(r, finger, sparts, sid, scount, - NULL, 1); - - /* Otherwise, sub-pair interaction? */ - else if (l->t->type == task_type_sub_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dosub_subset_stars_density(r, finger, sparts, sid, - scount, l->t->cj, 1); - else - runner_dosub_subset_stars_density(r, finger, sparts, sid, - scount, l->t->ci, 1); - } - } - } - } - } - - if (scount) { - error("Smoothing length failed to converge on %i particles.", scount); - } - - /* Be clean */ - free(left); - free(right); - free(sid); - free(h_0); - } - - /* Update h_max */ - c->stars.h_max = h_max; - - /* The ghost may not always be at the top level. - * Therefore we need to update h_max between the super- and top-levels */ - if (c->stars.ghost) { - for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { - atomic_max_d(&tmp->stars.h_max, h_max); - } - } - - if (timer) TIMER_TOC(timer_do_stars_ghost); -} - -/** - * @brief Intermediate task after the density to check that the smoothing - * lengths are correct. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c, - int timer) { - - struct bpart *restrict bparts = c->black_holes.parts; - const struct engine *e = r->e; - const struct cosmology *cosmo = e->cosmology; - const float black_holes_h_max = e->hydro_properties->h_max; - const float black_holes_h_min = e->hydro_properties->h_min; - const float eps = e->black_holes_properties->h_tolerance; - const float black_holes_eta_dim = - pow_dimension(e->black_holes_properties->eta_neighbours); - const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations; - int redo = 0, bcount = 0; - - /* Running value of the maximal smoothing length */ - double h_max = c->black_holes.h_max; - - TIMER_TIC; - - /* Anything to do here? */ - if (c->black_holes.count == 0) return; - if (!cell_is_active_black_holes(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - runner_do_black_holes_density_ghost(r, c->progeny[k], 0); - - /* Update h_max */ - h_max = max(h_max, c->progeny[k]->black_holes.h_max); - } - } - } else { - - /* Init the list of active particles that have to be updated. */ - int *sid = NULL; - float *h_0 = NULL; - float *left = NULL; - float *right = NULL; - if ((sid = (int *)malloc(sizeof(int) * c->black_holes.count)) == NULL) - error("Can't allocate memory for sid."); - if ((h_0 = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) - error("Can't allocate memory for h_0."); - if ((left = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) - error("Can't allocate memory for left."); - if ((right = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) - error("Can't allocate memory for right."); - for (int k = 0; k < c->black_holes.count; k++) - if (bpart_is_active(&bparts[k], e)) { - sid[bcount] = k; - h_0[bcount] = bparts[k].h; - left[bcount] = 0.f; - right[bcount] = black_holes_h_max; - ++bcount; - } - - /* While there are particles that need to be updated... */ - for (int num_reruns = 0; bcount > 0 && num_reruns < max_smoothing_iter; - num_reruns++) { - - /* Reset the redo-count. */ - redo = 0; - - /* Loop over the remaining active parts in this cell. */ - for (int i = 0; i < bcount; i++) { - - /* Get a direct pointer on the part. */ - struct bpart *bp = &bparts[sid[i]]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Is this part within the timestep? */ - if (!bpart_is_active(bp, e)) - error("Ghost applied to inactive particle"); -#endif - - /* Get some useful values */ - const float h_init = h_0[i]; - const float h_old = bp->h; - const float h_old_dim = pow_dimension(h_old); - const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); - - float h_new; - int has_no_neighbours = 0; - - if (bp->density.wcount == 0.f) { /* No neighbours case */ - - /* Flag that there were no neighbours */ - has_no_neighbours = 1; - - /* Double h and try again */ - h_new = 2.f * h_old; - - } else { - - /* Finish the density calculation */ - black_holes_end_density(bp, cosmo); - - /* Compute one step of the Newton-Raphson scheme */ - const float n_sum = bp->density.wcount * h_old_dim; - const float n_target = black_holes_eta_dim; - const float f = n_sum - n_target; - const float f_prime = - bp->density.wcount_dh * h_old_dim + - hydro_dimension * bp->density.wcount * h_old_dim_minus_one; - - /* Improve the bisection bounds */ - if (n_sum < n_target) - left[i] = max(left[i], h_old); - else if (n_sum > n_target) - right[i] = min(right[i], h_old); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check the validity of the left and right bounds */ - if (left[i] > right[i]) - error("Invalid left (%e) and right (%e)", left[i], right[i]); -#endif - - /* Skip if h is already h_max and we don't have enough neighbours */ - /* Same if we are below h_min */ - if (((bp->h >= black_holes_h_max) && (f < 0.f)) || - ((bp->h <= black_holes_h_min) && (f > 0.f))) { - - black_holes_reset_feedback(bp); - - /* Ok, we are done with this particle */ - continue; - } - - /* Normal case: Use Newton-Raphson to get a better value of h */ - - /* Avoid floating point exception from f_prime = 0 */ - h_new = h_old - f / (f_prime + FLT_MIN); - - /* Be verbose about the particles that struggle to converge */ - if (num_reruns > max_smoothing_iter - 10) { - - message( - "Smoothing length convergence problem: iter=%d p->id=%lld " - "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " - "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", - num_reruns, bp->id, h_init, h_old, h_new, f, f_prime, n_sum, - n_target, left[i], right[i]); - } - - /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ - h_new = min(h_new, 2.f * h_old); - h_new = max(h_new, 0.5f * h_old); - - /* Verify that we are actually progrssing towards the answer */ - h_new = max(h_new, left[i]); - h_new = min(h_new, right[i]); - } - - /* Check whether the particle has an inappropriate smoothing length */ - if (fabsf(h_new - h_old) > eps * h_old) { - - /* Ok, correct then */ - - /* Case where we have been oscillating around the solution */ - if ((h_new == left[i] && h_old == right[i]) || - (h_old == left[i] && h_new == right[i])) { - - /* Bissect the remaining interval */ - bp->h = pow_inv_dimension( - 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); - - } else { - - /* Normal case */ - bp->h = h_new; - } - - /* If below the absolute maximum, try again */ - if (bp->h < black_holes_h_max && bp->h > black_holes_h_min) { - - /* Flag for another round of fun */ - sid[redo] = sid[i]; - h_0[redo] = h_0[i]; - left[redo] = left[i]; - right[redo] = right[i]; - redo += 1; - - /* Re-initialise everything */ - black_holes_init_bpart(bp); - - /* Off we go ! */ - continue; - - } else if (bp->h <= black_holes_h_min) { - - /* Ok, this particle is a lost cause... */ - bp->h = black_holes_h_min; - - } else if (bp->h >= black_holes_h_max) { - - /* Ok, this particle is a lost cause... */ - bp->h = black_holes_h_max; - - /* Do some damage control if no neighbours at all were found */ - if (has_no_neighbours) { - black_holes_bpart_has_no_neighbours(bp, cosmo); - } - - } else { - error( - "Fundamental problem with the smoothing length iteration " - "logic."); - } - } - - /* We now have a particle whose smoothing length has converged */ - - black_holes_reset_feedback(bp); - - /* Check if h_max has increased */ - h_max = max(h_max, bp->h); - } - - /* We now need to treat the particles whose smoothing length had not - * converged again */ - - /* Re-set the counter for the next loop (potentially). */ - bcount = redo; - if (bcount > 0) { - - /* Climb up the cell hierarchy. */ - for (struct cell *finger = c; finger != NULL; finger = finger->parent) { - - /* Run through this cell's density interactions. */ - for (struct link *l = finger->black_holes.density; l != NULL; - l = l->next) { - -#ifdef SWIFT_DEBUG_CHECKS - if (l->t->ti_run < r->e->ti_current) - error("Density task should have been run."); -#endif - - /* Self-interaction? */ - if (l->t->type == task_type_self) - runner_doself_subset_branch_bh_density(r, finger, bparts, sid, - bcount); - - /* Otherwise, pair interaction? */ - else if (l->t->type == task_type_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dopair_subset_branch_bh_density(r, finger, bparts, sid, - bcount, l->t->cj); - else - runner_dopair_subset_branch_bh_density(r, finger, bparts, sid, - bcount, l->t->ci); - } - - /* Otherwise, sub-self interaction? */ - else if (l->t->type == task_type_sub_self) - runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, - NULL, 1); - - /* Otherwise, sub-pair interaction? */ - else if (l->t->type == task_type_sub_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, - l->t->cj, 1); - else - runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, - l->t->ci, 1); - } - } - } - } - } - - if (bcount) { - error("Smoothing length failed to converge on %i particles.", bcount); - } - - /* Be clean */ - free(left); - free(right); - free(sid); - free(h_0); - } - - /* Update h_max */ - c->black_holes.h_max = h_max; - - /* The ghost may not always be at the top level. - * Therefore we need to update h_max between the super- and top-levels */ - if (c->black_holes.density_ghost) { - for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { - atomic_max_d(&tmp->black_holes.h_max, h_max); - } - } - - if (timer) TIMER_TOC(timer_do_black_holes_ghost); -} - -/** - * @brief Intermediate task after the BHs have done their swallowing step. - * This is used to update the BH quantities if necessary. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c, - int timer) { - - struct bpart *restrict bparts = c->black_holes.parts; - const int count = c->black_holes.count; - const struct engine *e = r->e; - const int with_cosmology = e->policy & engine_policy_cosmology; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - runner_do_black_holes_swallow_ghost(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int i = 0; i < count; i++) { - - /* Get a direct pointer on the part. */ - struct bpart *bp = &bparts[i]; - - if (bpart_is_active(bp, e)) { - - /* Compute the final operations for repositioning of this BH */ - black_holes_end_reposition(bp, e->black_holes_properties, - e->physical_constants, e->cosmology); - - /* Get particle time-step */ - double dt; - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(bp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(e->ti_current - 1, bp->time_bin); - - dt = cosmology_get_delta_time(e->cosmology, ti_begin, - ti_begin + ti_step); - } else { - dt = get_timestep(bp->time_bin, e->time_base); - } - - /* Compute variables required for the feedback loop */ - black_holes_prepare_feedback(bp, e->black_holes_properties, - e->physical_constants, e->cosmology, dt); - } - } - } - - if (timer) TIMER_TOC(timer_do_black_holes_ghost); -} - -/** - * @brief Calculate gravity acceleration from external potential - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { - - struct gpart *restrict gparts = c->grav.parts; - const int gcount = c->grav.count; - const struct engine *e = r->e; - const struct external_potential *potential = e->external_potential; - const struct phys_const *constants = e->physical_constants; - const double time = r->e->time; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0); - } else { - - /* Loop over the gparts in this cell. */ - for (int i = 0; i < gcount; i++) { - - /* Get a direct pointer on the part. */ - struct gpart *restrict gp = &gparts[i]; - - /* Is this part within the time step? */ - if (gpart_is_active(gp, e)) { - external_gravity_acceleration(time, potential, constants, gp); - } - } - } - - if (timer) TIMER_TOC(timer_dograv_external); -} - -/** - * @brief Calculate gravity accelerations from the periodic mesh - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) { - - struct gpart *restrict gparts = c->grav.parts; - const int gcount = c->grav.count; - const struct engine *e = r->e; - -#ifdef SWIFT_DEBUG_CHECKS - if (!e->s->periodic) error("Calling mesh forces in non-periodic mode."); -#endif - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_grav_mesh(r, c->progeny[k], 0); - } else { - - /* Get the forces from the gravity mesh */ - pm_mesh_interpolate_forces(e->mesh, e, gparts, gcount); - } - - if (timer) TIMER_TOC(timer_dograv_mesh); -} - -/** - * @brief Calculate change in thermal state of particles induced - * by radiative cooling and heating. - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void runner_do_cooling(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - const struct cosmology *cosmo = e->cosmology; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const struct cooling_function_data *cooling_func = e->cooling_func; - const struct phys_const *constants = e->physical_constants; - const struct unit_system *us = e->internal_units; - const struct hydro_props *hydro_props = e->hydro_properties; - const struct entropy_floor_properties *entropy_floor_props = e->entropy_floor; - const double time_base = e->time_base; - const integertime_t ti_current = e->ti_current; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - const int count = c->hydro.count; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_cooling(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int i = 0; i < count; i++) { - - /* Get a direct pointer on the part. */ - struct part *restrict p = &parts[i]; - struct xpart *restrict xp = &xparts[i]; - - if (part_is_active(p, e)) { - - double dt_cool, dt_therm; - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current - 1, p->time_bin); - - dt_cool = - cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); - dt_therm = cosmology_get_therm_kick_factor(e->cosmology, ti_begin, - ti_begin + ti_step); - - } else { - dt_cool = get_timestep(p->time_bin, time_base); - dt_therm = get_timestep(p->time_bin, time_base); - } - - /* Let's cool ! */ - cooling_cool_part(constants, us, cosmo, hydro_props, - entropy_floor_props, cooling_func, p, xp, dt_cool, - dt_therm); - } - } - } - - if (timer) TIMER_TOC(timer_do_cooling); -} - -/** - * - */ -void runner_do_star_formation(struct runner *r, struct cell *c, int timer) { - - struct engine *e = r->e; - const struct cosmology *cosmo = e->cosmology; - const struct star_formation *sf_props = e->star_formation; - const struct phys_const *phys_const = e->physical_constants; - const int count = c->hydro.count; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const int with_feedback = (e->policy & engine_policy_feedback); - const struct hydro_props *restrict hydro_props = e->hydro_properties; - const struct unit_system *restrict us = e->internal_units; - struct cooling_function_data *restrict cooling = e->cooling_func; - const struct entropy_floor_properties *entropy_floor = e->entropy_floor; - const double time_base = e->time_base; - const integertime_t ti_current = e->ti_current; - const int current_stars_count = c->stars.count; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != e->nodeID) - error("Running star formation task on a foreign node!"); -#endif - - /* Anything to do here? */ - if (c->hydro.count == 0 || !cell_is_active_hydro(c, e)) { - star_formation_logger_log_inactive_cell(&c->stars.sfh); - return; - } - - /* Reset the SFR */ - star_formation_logger_init(&c->stars.sfh); - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) { - /* Load the child cell */ - struct cell *restrict cp = c->progeny[k]; - - /* Do the recursion */ - runner_do_star_formation(r, cp, 0); - - /* Update current cell using child cells */ - star_formation_logger_add(&c->stars.sfh, &cp->stars.sfh); - } - } else { - - /* Loop over the gas particles in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* Only work on active particles */ - if (part_is_active(p, e)) { - - /* Is this particle star forming? */ - if (star_formation_is_star_forming(p, xp, sf_props, phys_const, cosmo, - hydro_props, us, cooling, - entropy_floor)) { - - /* Time-step size for this particle */ - double dt_star; - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current - 1, p->time_bin); - - dt_star = - cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); - - } else { - dt_star = get_timestep(p->time_bin, time_base); - } - - /* Compute the SF rate of the particle */ - star_formation_compute_SFR(p, xp, sf_props, phys_const, cosmo, - dt_star); - - /* Add the SFR and SFR*dt to the SFH struct of this cell */ - star_formation_logger_log_active_part(p, xp, &c->stars.sfh, dt_star); - - /* Are we forming a star particle from this SF rate? */ - if (star_formation_should_convert_to_star(p, xp, sf_props, e, - dt_star)) { - - /* Convert the gas particle to a star particle */ - struct spart *sp = cell_convert_part_to_spart(e, c, p, xp); - - /* Did we get a star? (Or did we run out of spare ones?) */ - if (sp != NULL) { - - /* message("We formed a star id=%lld cellID=%d", sp->id, - * c->cellID); */ - - /* Copy the properties of the gas particle to the star particle */ - star_formation_copy_properties(p, xp, sp, e, sf_props, cosmo, - with_cosmology, phys_const, - hydro_props, us, cooling); - - /* Update the Star formation history */ - star_formation_logger_log_new_spart(sp, &c->stars.sfh); - } - } - - } else { /* Are we not star-forming? */ - - /* Update the particle to flag it as not star-forming */ - star_formation_update_part_not_SFR(p, xp, e, sf_props, - with_cosmology); - - } /* Not Star-forming? */ - - } else { /* is active? */ - - /* Check if the particle is not inhibited */ - if (!part_is_inhibited(p, e)) { - star_formation_logger_log_inactive_part(p, xp, &c->stars.sfh); - } - } - } /* Loop over particles */ - } - - /* If we formed any stars, the star sorts are now invalid. We need to - * re-compute them. */ - if (with_feedback && (c == c->top) && - (current_stars_count != c->stars.count)) { - cell_set_star_resort_flag(c); - } - - if (timer) TIMER_TOC(timer_do_star_formation); -} - -/** - * @brief Sorts again all the stars in a given cell hierarchy. - * - * This is intended to be used after the star formation task has been run - * to get the cells back into a state where self/pair star tasks can be run. - * - * @param r The thread #runner. - * @param c The top-level cell to run on. - * @param timer Are we timing this? - */ -void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != r->e->nodeID) error("Task must be run locally!"); -#endif - - TIMER_TIC; - - /* Did we demand a recalculation of the stars'sorts? */ - if (cell_get_flag(c, cell_flag_do_stars_resort)) { - runner_do_all_stars_sort(r, c); - cell_clear_flag(c, cell_flag_do_stars_resort); - } - - if (timer) TIMER_TOC(timer_do_stars_resort); -} - -/** - * @brief Sort the entries in ascending order using QuickSort. - * - * @param sort The entries - * @param N The number of entries. - */ -void runner_do_sort_ascending(struct sort_entry *sort, int N) { - - struct { - short int lo, hi; - } qstack[10]; - int qpos, i, j, lo, hi, imin; - struct sort_entry temp; - float pivot; - - /* Sort parts in cell_i in decreasing order with quicksort */ - qstack[0].lo = 0; - qstack[0].hi = N - 1; - qpos = 0; - while (qpos >= 0) { - lo = qstack[qpos].lo; - hi = qstack[qpos].hi; - qpos -= 1; - if (hi - lo < 15) { - for (i = lo; i < hi; i++) { - imin = i; - for (j = i + 1; j <= hi; j++) - if (sort[j].d < sort[imin].d) imin = j; - if (imin != i) { - temp = sort[imin]; - sort[imin] = sort[i]; - sort[i] = temp; - } - } - } else { - pivot = sort[(lo + hi) / 2].d; - i = lo; - j = hi; - while (i <= j) { - while (sort[i].d < pivot) i++; - while (sort[j].d > pivot) j--; - if (i <= j) { - if (i < j) { - temp = sort[i]; - sort[i] = sort[j]; - sort[j] = temp; - } - i += 1; - j -= 1; - } - } - if (j > (lo + hi) / 2) { - if (lo < j) { - qpos += 1; - qstack[qpos].lo = lo; - qstack[qpos].hi = j; - } - if (i < hi) { - qpos += 1; - qstack[qpos].lo = i; - qstack[qpos].hi = hi; - } - } else { - if (i < hi) { - qpos += 1; - qstack[qpos].lo = i; - qstack[qpos].hi = hi; - } - if (lo < j) { - qpos += 1; - qstack[qpos].lo = lo; - qstack[qpos].hi = j; - } - } - } - } -} - -#ifdef SWIFT_DEBUG_CHECKS -/** - * @brief Recursively checks that the flags are consistent in a cell hierarchy. - * - * Debugging function. Exists in two flavours: hydro & stars. - */ -#define RUNNER_CHECK_SORTS(TYPE) \ - void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ - \ - if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \ - if (c->split) \ - for (int k = 0; k < 8; k++) \ - if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0) \ - runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted); \ - } -#else -#define RUNNER_CHECK_SORTS(TYPE) \ - void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ - error("Calling debugging code without debugging flag activated."); \ - } -#endif - -RUNNER_CHECK_SORTS(hydro) -RUNNER_CHECK_SORTS(stars) - -/** - * @brief Sort the particles in the given cell along all cardinal directions. - * - * @param r The #runner. - * @param c The #cell. - * @param flags Cell flag. - * @param cleanup If true, re-build the sorts for the selected flags instead - * of just adding them. - * @param clock Flag indicating whether to record the timing or not, needed - * for recursive calls. - */ -void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, - int cleanup, int clock) { - - struct sort_entry *fingers[8]; - const int count = c->hydro.count; - const struct part *parts = c->hydro.parts; - struct xpart *xparts = c->hydro.xparts; - float buff[8]; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.super == NULL) error("Task called above the super level!!!"); -#endif - - /* We need to do the local sorts plus whatever was requested further up. */ - flags |= c->hydro.do_sort; - if (cleanup) { - c->hydro.sorted = 0; - } else { - flags &= ~c->hydro.sorted; - } - if (flags == 0 && !cell_get_flag(c, cell_flag_do_hydro_sub_sort)) return; - - /* Check that the particles have been moved to the current time */ - if (flags && !cell_are_part_drifted(c, r->e)) - error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); - -#ifdef SWIFT_DEBUG_CHECKS - /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts_hydro(c, c->hydro.sorted); - - /* Make sure the sort flags are consistent (upard). */ - for (struct cell *finger = c->parent; finger != NULL; - finger = finger->parent) { - if (finger->hydro.sorted & ~c->hydro.sorted) - error("Inconsistent sort flags (upward)."); - } - - /* Update the sort timer which represents the last time the sorts - were re-set. */ - if (c->hydro.sorted == 0) c->hydro.ti_sort = r->e->ti_current; -#endif - - /* Allocate memory for sorting. */ - cell_malloc_hydro_sorts(c, flags); - - /* Does this cell have any progeny? */ - if (c->split) { - - /* Fill in the gaps within the progeny. */ - float dx_max_sort = 0.0f; - float dx_max_sort_old = 0.0f; - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - - if (c->progeny[k]->hydro.count > 0) { - - /* Only propagate cleanup if the progeny is stale. */ - runner_do_hydro_sort( - r, c->progeny[k], flags, - cleanup && (c->progeny[k]->hydro.dx_max_sort_old > - space_maxreldx * c->progeny[k]->dmin), - 0); - dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort); - dx_max_sort_old = - max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old); - } else { - - /* We need to clean up the unused flags that were in case the - number of particles in the cell would change */ - cell_clear_hydro_sort_flags(c->progeny[k], /*clear_unused_flags=*/1); - } - } - } - c->hydro.dx_max_sort = dx_max_sort; - c->hydro.dx_max_sort_old = dx_max_sort_old; - - /* Loop over the 13 different sort arrays. */ - for (int j = 0; j < 13; j++) { - - /* Has this sort array been flagged? */ - if (!(flags & (1 << j))) continue; - - /* Init the particle index offsets. */ - int off[8]; - off[0] = 0; - for (int k = 1; k < 8; k++) - if (c->progeny[k - 1] != NULL) - off[k] = off[k - 1] + c->progeny[k - 1]->hydro.count; - else - off[k] = off[k - 1]; - - /* Init the entries and indices. */ - int inds[8]; - for (int k = 0; k < 8; k++) { - inds[k] = k; - if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { - fingers[k] = c->progeny[k]->hydro.sort[j]; - buff[k] = fingers[k]->d; - off[k] = off[k]; - } else - buff[k] = FLT_MAX; - } - - /* Sort the buffer. */ - for (int i = 0; i < 7; i++) - for (int k = i + 1; k < 8; k++) - if (buff[inds[k]] < buff[inds[i]]) { - int temp_i = inds[i]; - inds[i] = inds[k]; - inds[k] = temp_i; - } - - /* For each entry in the new sort list. */ - struct sort_entry *finger = c->hydro.sort[j]; - for (int ind = 0; ind < count; ind++) { - - /* Copy the minimum into the new sort array. */ - finger[ind].d = buff[inds[0]]; - finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; - - /* Update the buffer. */ - fingers[inds[0]] += 1; - buff[inds[0]] = fingers[inds[0]]->d; - - /* Find the smallest entry. */ - for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { - int temp_i = inds[k - 1]; - inds[k - 1] = inds[k]; - inds[k] = temp_i; - } - - } /* Merge. */ - - /* Add a sentinel. */ - c->hydro.sort[j][count].d = FLT_MAX; - c->hydro.sort[j][count].i = 0; - - /* Mark as sorted. */ - atomic_or(&c->hydro.sorted, 1 << j); - - } /* loop over sort arrays. */ - - } /* progeny? */ - - /* Otherwise, just sort. */ - else { - - /* Reset the sort distance */ - if (c->hydro.sorted == 0) { -#ifdef SWIFT_DEBUG_CHECKS - if (xparts != NULL && c->nodeID != engine_rank) - error("Have non-NULL xparts in foreign cell"); -#endif - - /* And the individual sort distances if we are a local cell */ - if (xparts != NULL) { - for (int k = 0; k < count; k++) { - xparts[k].x_diff_sort[0] = 0.0f; - xparts[k].x_diff_sort[1] = 0.0f; - xparts[k].x_diff_sort[2] = 0.0f; - } - } - c->hydro.dx_max_sort_old = 0.f; - c->hydro.dx_max_sort = 0.f; - } - - /* Fill the sort array. */ - for (int k = 0; k < count; k++) { - const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]}; - for (int j = 0; j < 13; j++) - if (flags & (1 << j)) { - c->hydro.sort[j][k].i = k; - c->hydro.sort[j][k].d = px[0] * runner_shift[j][0] + - px[1] * runner_shift[j][1] + - px[2] * runner_shift[j][2]; - } - } - - /* Add the sentinel and sort. */ - for (int j = 0; j < 13; j++) - if (flags & (1 << j)) { - c->hydro.sort[j][count].d = FLT_MAX; - c->hydro.sort[j][count].i = 0; - runner_do_sort_ascending(c->hydro.sort[j], count); - atomic_or(&c->hydro.sorted, 1 << j); - } - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify the sorting. */ - for (int j = 0; j < 13; j++) { - if (!(flags & (1 << j))) continue; - struct sort_entry *finger = c->hydro.sort[j]; - for (int k = 1; k < count; k++) { - if (finger[k].d < finger[k - 1].d) - error("Sorting failed, ascending array."); - if (finger[k].i >= count) error("Sorting failed, indices borked."); - } - } - - /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts_hydro(c, flags); - - /* Make sure the sort flags are consistent (upward). */ - for (struct cell *finger = c->parent; finger != NULL; - finger = finger->parent) { - if (finger->hydro.sorted & ~c->hydro.sorted) - error("Inconsistent sort flags."); - } -#endif - - /* Clear the cell's sort flags. */ - c->hydro.do_sort = 0; - cell_clear_flag(c, cell_flag_do_hydro_sub_sort); - c->hydro.requires_sorts = 0; - - if (clock) TIMER_TOC(timer_dosort); -} - -/** - * @brief Sort the stars particles in the given cell along all cardinal - * directions. - * - * @param r The #runner. - * @param c The #cell. - * @param flags Cell flag. - * @param cleanup If true, re-build the sorts for the selected flags instead - * of just adding them. - * @param clock Flag indicating whether to record the timing or not, needed - * for recursive calls. - */ -void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, - int cleanup, int clock) { - - struct sort_entry *fingers[8]; - const int count = c->stars.count; - struct spart *sparts = c->stars.parts; - float buff[8]; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.super == NULL) error("Task called above the super level!!!"); -#endif - - /* We need to do the local sorts plus whatever was requested further up. */ - flags |= c->stars.do_sort; - if (cleanup) { - c->stars.sorted = 0; - } else { - flags &= ~c->stars.sorted; - } - if (flags == 0 && !cell_get_flag(c, cell_flag_do_stars_sub_sort)) return; - - /* Check that the particles have been moved to the current time */ - if (flags && !cell_are_spart_drifted(c, r->e)) { - error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts_stars(c, c->stars.sorted); - - /* Make sure the sort flags are consistent (upward). */ - for (struct cell *finger = c->parent; finger != NULL; - finger = finger->parent) { - if (finger->stars.sorted & ~c->stars.sorted) - error("Inconsistent sort flags (upward)."); - } - - /* Update the sort timer which represents the last time the sorts - were re-set. */ - if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current; -#endif - - /* start by allocating the entry arrays in the requested dimensions. */ - cell_malloc_stars_sorts(c, flags); - - /* Does this cell have any progeny? */ - if (c->split) { - - /* Fill in the gaps within the progeny. */ - float dx_max_sort = 0.0f; - float dx_max_sort_old = 0.0f; - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - - if (c->progeny[k]->stars.count > 0) { - - /* Only propagate cleanup if the progeny is stale. */ - const int cleanup_prog = - cleanup && (c->progeny[k]->stars.dx_max_sort_old > - space_maxreldx * c->progeny[k]->dmin); - runner_do_stars_sort(r, c->progeny[k], flags, cleanup_prog, 0); - dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort); - dx_max_sort_old = - max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old); - } else { - - /* We need to clean up the unused flags that were in case the - number of particles in the cell would change */ - cell_clear_stars_sort_flags(c->progeny[k], /*clear_unused_flags=*/1); - } - } - } - c->stars.dx_max_sort = dx_max_sort; - c->stars.dx_max_sort_old = dx_max_sort_old; - - /* Loop over the 13 different sort arrays. */ - for (int j = 0; j < 13; j++) { - - /* Has this sort array been flagged? */ - if (!(flags & (1 << j))) continue; - - /* Init the particle index offsets. */ - int off[8]; - off[0] = 0; - for (int k = 1; k < 8; k++) - if (c->progeny[k - 1] != NULL) - off[k] = off[k - 1] + c->progeny[k - 1]->stars.count; - else - off[k] = off[k - 1]; - - /* Init the entries and indices. */ - int inds[8]; - for (int k = 0; k < 8; k++) { - inds[k] = k; - if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { - fingers[k] = c->progeny[k]->stars.sort[j]; - buff[k] = fingers[k]->d; - off[k] = off[k]; - } else - buff[k] = FLT_MAX; - } - - /* Sort the buffer. */ - for (int i = 0; i < 7; i++) - for (int k = i + 1; k < 8; k++) - if (buff[inds[k]] < buff[inds[i]]) { - int temp_i = inds[i]; - inds[i] = inds[k]; - inds[k] = temp_i; - } - - /* For each entry in the new sort list. */ - struct sort_entry *finger = c->stars.sort[j]; - for (int ind = 0; ind < count; ind++) { - - /* Copy the minimum into the new sort array. */ - finger[ind].d = buff[inds[0]]; - finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; - - /* Update the buffer. */ - fingers[inds[0]] += 1; - buff[inds[0]] = fingers[inds[0]]->d; - - /* Find the smallest entry. */ - for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { - int temp_i = inds[k - 1]; - inds[k - 1] = inds[k]; - inds[k] = temp_i; - } - - } /* Merge. */ - - /* Add a sentinel. */ - c->stars.sort[j][count].d = FLT_MAX; - c->stars.sort[j][count].i = 0; - - /* Mark as sorted. */ - atomic_or(&c->stars.sorted, 1 << j); - - } /* loop over sort arrays. */ - - } /* progeny? */ - - /* Otherwise, just sort. */ - else { - - /* Reset the sort distance */ - if (c->stars.sorted == 0) { - - /* And the individual sort distances if we are a local cell */ - for (int k = 0; k < count; k++) { - sparts[k].x_diff_sort[0] = 0.0f; - sparts[k].x_diff_sort[1] = 0.0f; - sparts[k].x_diff_sort[2] = 0.0f; - } - c->stars.dx_max_sort_old = 0.f; - c->stars.dx_max_sort = 0.f; - } - - /* Fill the sort array. */ - for (int k = 0; k < count; k++) { - const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]}; - for (int j = 0; j < 13; j++) - if (flags & (1 << j)) { - c->stars.sort[j][k].i = k; - c->stars.sort[j][k].d = px[0] * runner_shift[j][0] + - px[1] * runner_shift[j][1] + - px[2] * runner_shift[j][2]; - } - } - - /* Add the sentinel and sort. */ - for (int j = 0; j < 13; j++) - if (flags & (1 << j)) { - c->stars.sort[j][count].d = FLT_MAX; - c->stars.sort[j][count].i = 0; - runner_do_sort_ascending(c->stars.sort[j], count); - atomic_or(&c->stars.sorted, 1 << j); - } - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify the sorting. */ - for (int j = 0; j < 13; j++) { - if (!(flags & (1 << j))) continue; - struct sort_entry *finger = c->stars.sort[j]; - for (int k = 1; k < count; k++) { - if (finger[k].d < finger[k - 1].d) - error("Sorting failed, ascending array."); - if (finger[k].i >= count) error("Sorting failed, indices borked."); - } - } - - /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts_stars(c, flags); - - /* Make sure the sort flags are consistent (upward). */ - for (struct cell *finger = c->parent; finger != NULL; - finger = finger->parent) { - if (finger->stars.sorted & ~c->stars.sorted) - error("Inconsistent sort flags."); - } -#endif - - /* Clear the cell's sort flags. */ - c->stars.do_sort = 0; - cell_clear_flag(c, cell_flag_do_stars_sub_sort); - c->stars.requires_sorts = 0; - - if (clock) TIMER_TOC(timer_do_stars_sort); -} - -/** - * @brief Recurse into a cell until reaching the super level and call - * the hydro sorting function there. - * - * This function must be called at or above the super level! - * - * This function will sort the particles in all 13 directions. - * - * @param r the #runner. - * @param c the #cell. - */ -void runner_do_all_hydro_sort(struct runner *r, struct cell *c) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != engine_rank) error("Function called on a foreign cell!"); -#endif - - if (!cell_is_active_hydro(c, r->e)) return; - - /* Shall we sort at this level? */ - if (c->hydro.super == c) { - - /* Sort everything */ - runner_do_hydro_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0); - - } else { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.super != NULL) error("Function called below the super level!"); -#endif - - /* Ok, then, let's try lower */ - if (c->split) { - for (int k = 0; k < 8; ++k) { - if (c->progeny[k] != NULL) runner_do_all_hydro_sort(r, c->progeny[k]); - } - } else { -#ifdef SWIFT_DEBUG_CHECKS - error("Reached a leaf without encountering a hydro super cell!"); -#endif - } - } -} - -/** - * @brief Recurse into a cell until reaching the super level and call - * the star sorting function there. - * - * This function must be called at or above the super level! - * - * This function will sort the particles in all 13 directions. - * - * @param r the #runner. - * @param c the #cell. - */ -void runner_do_all_stars_sort(struct runner *r, struct cell *c) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != engine_rank) error("Function called on a foreign cell!"); -#endif - - if (!cell_is_active_stars(c, r->e) && !cell_is_active_hydro(c, r->e)) return; - - /* Shall we sort at this level? */ - if (c->hydro.super == c) { - - /* Sort everything */ - runner_do_stars_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0); - - } else { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.super != NULL) error("Function called below the super level!"); -#endif - - /* Ok, then, let's try lower */ - if (c->split) { - for (int k = 0; k < 8; ++k) { - if (c->progeny[k] != NULL) runner_do_all_stars_sort(r, c->progeny[k]); - } - } else { -#ifdef SWIFT_DEBUG_CHECKS - error("Reached a leaf without encountering a hydro super cell!"); -#endif - } - } -} - -/** - * @brief Initialize the multipoles before the gravity calculation. - * - * @param r The runner thread. - * @param c The cell. - * @param timer 1 if the time is to be recorded. - */ -void runner_do_init_grav(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (!(e->policy & engine_policy_self_gravity)) - error("Grav-init task called outside of self-gravity calculation"); -#endif - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* Reset the gravity acceleration tensors */ - gravity_field_tensors_init(&c->grav.multipole->pot, e->ti_current); - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) runner_do_init_grav(r, c->progeny[k], 0); - } - } - - if (timer) TIMER_TOC(timer_init_grav); -} - -/** - * @brief Intermediate task after the gradient loop that does final operations - * on the gradient quantities and optionally slope limits the gradients - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) { - -#ifdef EXTRA_HYDRO_LOOP - - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - const int count = c->hydro.count; - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const double time_base = e->time_base; - const struct cosmology *cosmo = e->cosmology; - const struct hydro_props *hydro_props = e->hydro_properties; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_extra_ghost(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int i = 0; i < count; i++) { - - /* Get a direct pointer on the part. */ - struct part *restrict p = &parts[i]; - struct xpart *restrict xp = &xparts[i]; - - if (part_is_active(p, e)) { - - /* Finish the gradient calculation */ - hydro_end_gradient(p); - - /* As of here, particle force variables will be set. */ - - /* Calculate the time-step for passing to hydro_prepare_force. - * This is the physical time between the start and end of the time-step - * without any scale-factor powers. */ - double dt_alpha; - - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current - 1, p->time_bin); - - dt_alpha = - cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); - } else { - dt_alpha = get_timestep(p->time_bin, time_base); - } - - /* Compute variables required for the force loop */ - hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); - - /* The particle force values are now set. Do _NOT_ - try to read any particle density variables! */ - - /* Prepare the particle for the force loop over neighbours */ - hydro_reset_acceleration(p); - } - } - } - - if (timer) TIMER_TOC(timer_do_extra_ghost); - -#else - error("SWIFT was not compiled with the extra hydro loop activated."); -#endif -} - -/** - * @brief Intermediate task after the density to check that the smoothing - * lengths are correct. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_ghost(struct runner *r, struct cell *c, int timer) { - - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - const struct engine *e = r->e; - const struct space *s = e->s; - const struct hydro_space *hs = &s->hs; - const struct cosmology *cosmo = e->cosmology; - const struct chemistry_global_data *chemistry = e->chemistry; - - const int with_cosmology = (e->policy & engine_policy_cosmology); - - const float hydro_h_max = e->hydro_properties->h_max; - const float hydro_h_min = e->hydro_properties->h_min; - const float eps = e->hydro_properties->h_tolerance; - const float hydro_eta_dim = - pow_dimension(e->hydro_properties->eta_neighbours); - const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations; - int redo = 0, count = 0; - - /* Running value of the maximal smoothing length */ - double h_max = c->hydro.h_max; - - TIMER_TIC; - - /* Anything to do here? */ - if (c->hydro.count == 0) return; - if (!cell_is_active_hydro(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - runner_do_ghost(r, c->progeny[k], 0); - - /* Update h_max */ - h_max = max(h_max, c->progeny[k]->hydro.h_max); - } - } - } else { - - /* Init the list of active particles that have to be updated and their - * current smoothing lengths. */ - int *pid = NULL; - float *h_0 = NULL; - float *left = NULL; - float *right = NULL; - if ((pid = (int *)malloc(sizeof(int) * c->hydro.count)) == NULL) - error("Can't allocate memory for pid."); - if ((h_0 = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) - error("Can't allocate memory for h_0."); - if ((left = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) - error("Can't allocate memory for left."); - if ((right = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) - error("Can't allocate memory for right."); - for (int k = 0; k < c->hydro.count; k++) - if (part_is_active(&parts[k], e)) { - pid[count] = k; - h_0[count] = parts[k].h; - left[count] = 0.f; - right[count] = hydro_h_max; - ++count; - } - - /* While there are particles that need to be updated... */ - for (int num_reruns = 0; count > 0 && num_reruns < max_smoothing_iter; - num_reruns++) { - - /* Reset the redo-count. */ - redo = 0; - - /* Loop over the remaining active parts in this cell. */ - for (int i = 0; i < count; i++) { - - /* Get a direct pointer on the part. */ - struct part *p = &parts[pid[i]]; - struct xpart *xp = &xparts[pid[i]]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Is this part within the timestep? */ - if (!part_is_active(p, e)) error("Ghost applied to inactive particle"); -#endif - - /* Get some useful values */ - const float h_init = h_0[i]; - const float h_old = p->h; - const float h_old_dim = pow_dimension(h_old); - const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); - - float h_new; - int has_no_neighbours = 0; - - if (p->density.wcount == 0.f) { /* No neighbours case */ - - /* Flag that there were no neighbours */ - has_no_neighbours = 1; - - /* Double h and try again */ - h_new = 2.f * h_old; - - } else { - - /* Finish the density calculation */ - hydro_end_density(p, cosmo); - chemistry_end_density(p, chemistry, cosmo); - pressure_floor_end_density(p, cosmo); - - /* Compute one step of the Newton-Raphson scheme */ - const float n_sum = p->density.wcount * h_old_dim; - const float n_target = hydro_eta_dim; - const float f = n_sum - n_target; - const float f_prime = - p->density.wcount_dh * h_old_dim + - hydro_dimension * p->density.wcount * h_old_dim_minus_one; - - /* Improve the bisection bounds */ - if (n_sum < n_target) - left[i] = max(left[i], h_old); - else if (n_sum > n_target) - right[i] = min(right[i], h_old); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check the validity of the left and right bounds */ - if (left[i] > right[i]) - error("Invalid left (%e) and right (%e)", left[i], right[i]); -#endif - - /* Skip if h is already h_max and we don't have enough neighbours */ - /* Same if we are below h_min */ - if (((p->h >= hydro_h_max) && (f < 0.f)) || - ((p->h <= hydro_h_min) && (f > 0.f))) { - - /* We have a particle whose smoothing length is already set (wants - * to be larger but has already hit the maximum OR wants to be - * smaller but has already reached the minimum). So, just tidy up as - * if the smoothing length had converged correctly */ - -#ifdef EXTRA_HYDRO_LOOP - - /* As of here, particle gradient variables will be set. */ - /* The force variables are set in the extra ghost. */ - - /* Compute variables required for the gradient loop */ - hydro_prepare_gradient(p, xp, cosmo); - - /* The particle gradient values are now set. Do _NOT_ - try to read any particle density variables! */ - - /* Prepare the particle for the gradient loop over neighbours */ - hydro_reset_gradient(p); - -#else - const struct hydro_props *hydro_props = e->hydro_properties; - - /* Calculate the time-step for passing to hydro_prepare_force, used - * for the evolution of alpha factors (i.e. those involved in the - * artificial viscosity and thermal conduction terms) */ - const double time_base = e->time_base; - const integertime_t ti_current = e->ti_current; - double dt_alpha; - - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current - 1, p->time_bin); - - dt_alpha = - cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); - } else { - dt_alpha = get_timestep(p->time_bin, time_base); - } - - /* As of here, particle force variables will be set. */ - - /* Compute variables required for the force loop */ - hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); - - /* The particle force values are now set. Do _NOT_ - try to read any particle density variables! */ - - /* Prepare the particle for the force loop over neighbours */ - hydro_reset_acceleration(p); - -#endif /* EXTRA_HYDRO_LOOP */ - - /* Ok, we are done with this particle */ - continue; - } - - /* Normal case: Use Newton-Raphson to get a better value of h */ - - /* Avoid floating point exception from f_prime = 0 */ - h_new = h_old - f / (f_prime + FLT_MIN); - - /* Be verbose about the particles that struggle to converge */ - if (num_reruns > max_smoothing_iter - 10) { - - message( - "Smoothing length convergence problem: iter=%d p->id=%lld " - "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " - "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", - num_reruns, p->id, h_init, h_old, h_new, f, f_prime, n_sum, - n_target, left[i], right[i]); - } - -#ifdef SWIFT_DEBUG_CHECKS - if ((f > 0.f && h_new > h_old) || (f < 0.f && h_new < h_old)) - error( - "Smoothing length correction not going in the right direction"); -#endif - - /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ - h_new = min(h_new, 2.f * h_old); - h_new = max(h_new, 0.5f * h_old); - - /* Verify that we are actually progrssing towards the answer */ - h_new = max(h_new, left[i]); - h_new = min(h_new, right[i]); - } - - /* Check whether the particle has an inappropriate smoothing length */ - if (fabsf(h_new - h_old) > eps * h_old) { - - /* Ok, correct then */ - - /* Case where we have been oscillating around the solution */ - if ((h_new == left[i] && h_old == right[i]) || - (h_old == left[i] && h_new == right[i])) { - - /* Bissect the remaining interval */ - p->h = pow_inv_dimension( - 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); - - } else { - - /* Normal case */ - p->h = h_new; - } - - /* If within the allowed range, try again */ - if (p->h < hydro_h_max && p->h > hydro_h_min) { - - /* Flag for another round of fun */ - pid[redo] = pid[i]; - h_0[redo] = h_0[i]; - left[redo] = left[i]; - right[redo] = right[i]; - redo += 1; - - /* Re-initialise everything */ - hydro_init_part(p, hs); - chemistry_init_part(p, chemistry); - pressure_floor_init_part(p, xp); - tracers_after_init(p, xp, e->internal_units, e->physical_constants, - with_cosmology, e->cosmology, - e->hydro_properties, e->cooling_func, e->time); - - /* Off we go ! */ - continue; - - } else if (p->h <= hydro_h_min) { - - /* Ok, this particle is a lost cause... */ - p->h = hydro_h_min; - - } else if (p->h >= hydro_h_max) { - - /* Ok, this particle is a lost cause... */ - p->h = hydro_h_max; - - /* Do some damage control if no neighbours at all were found */ - if (has_no_neighbours) { - hydro_part_has_no_neighbours(p, xp, cosmo); - chemistry_part_has_no_neighbours(p, xp, chemistry, cosmo); - pressure_floor_part_has_no_neighbours(p, xp, cosmo); - } - - } else { - error( - "Fundamental problem with the smoothing length iteration " - "logic."); - } - } - - /* We now have a particle whose smoothing length has converged */ - - /* Check if h_max is increased */ - h_max = max(h_max, p->h); - -#ifdef EXTRA_HYDRO_LOOP - - /* As of here, particle gradient variables will be set. */ - /* The force variables are set in the extra ghost. */ - - /* Compute variables required for the gradient loop */ - hydro_prepare_gradient(p, xp, cosmo); - - /* The particle gradient values are now set. Do _NOT_ - try to read any particle density variables! */ - - /* Prepare the particle for the gradient loop over neighbours */ - hydro_reset_gradient(p); - -#else - const struct hydro_props *hydro_props = e->hydro_properties; - - /* Calculate the time-step for passing to hydro_prepare_force, used for - * the evolution of alpha factors (i.e. those involved in the artificial - * viscosity and thermal conduction terms) */ - const double time_base = e->time_base; - const integertime_t ti_current = e->ti_current; - double dt_alpha; - - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current - 1, p->time_bin); - - dt_alpha = - cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); - } else { - dt_alpha = get_timestep(p->time_bin, time_base); - } - - /* As of here, particle force variables will be set. */ - - /* Compute variables required for the force loop */ - hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); - - /* The particle force values are now set. Do _NOT_ - try to read any particle density variables! */ - - /* Prepare the particle for the force loop over neighbours */ - hydro_reset_acceleration(p); - -#endif /* EXTRA_HYDRO_LOOP */ - } - - /* We now need to treat the particles whose smoothing length had not - * converged again */ - - /* Re-set the counter for the next loop (potentially). */ - count = redo; - if (count > 0) { - - /* Climb up the cell hierarchy. */ - for (struct cell *finger = c; finger != NULL; finger = finger->parent) { - - /* Run through this cell's density interactions. */ - for (struct link *l = finger->hydro.density; l != NULL; l = l->next) { - -#ifdef SWIFT_DEBUG_CHECKS - if (l->t->ti_run < r->e->ti_current) - error("Density task should have been run."); -#endif - - /* Self-interaction? */ - if (l->t->type == task_type_self) - runner_doself_subset_branch_density(r, finger, parts, pid, count); - - /* Otherwise, pair interaction? */ - else if (l->t->type == task_type_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dopair_subset_branch_density(r, finger, parts, pid, - count, l->t->cj); - else - runner_dopair_subset_branch_density(r, finger, parts, pid, - count, l->t->ci); - } - - /* Otherwise, sub-self interaction? */ - else if (l->t->type == task_type_sub_self) - runner_dosub_subset_density(r, finger, parts, pid, count, NULL, - 1); - - /* Otherwise, sub-pair interaction? */ - else if (l->t->type == task_type_sub_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dosub_subset_density(r, finger, parts, pid, count, - l->t->cj, 1); - else - runner_dosub_subset_density(r, finger, parts, pid, count, - l->t->ci, 1); - } - } - } - } - } - - if (count) { - error("Smoothing length failed to converge on %i particles.", count); - } - - /* Be clean */ - free(left); - free(right); - free(pid); - free(h_0); - } - - /* Update h_max */ - c->hydro.h_max = h_max; - - /* The ghost may not always be at the top level. - * Therefore we need to update h_max between the super- and top-levels */ - if (c->hydro.ghost) { - for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { - atomic_max_d(&tmp->hydro.h_max, h_max); - } - } - - if (timer) TIMER_TOC(timer_do_ghost); -} - -/** - * @brief Unskip any hydro tasks associated with active cells. - * - * @param c The cell. - * @param e The engine. - */ -static void runner_do_unskip_hydro(struct cell *c, struct engine *e) { - - /* Ignore empty cells. */ - if (c->hydro.count == 0) return; - - /* Skip inactive cells. */ - if (!cell_is_active_hydro(c, e)) return; - - /* Recurse */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *cp = c->progeny[k]; - runner_do_unskip_hydro(cp, e); - } - } - } - - /* Unskip any active tasks. */ - const int forcerebuild = cell_unskip_hydro_tasks(c, &e->sched); - if (forcerebuild) atomic_inc(&e->forcerebuild); -} - -/** - * @brief Unskip any stars tasks associated with active cells. - * - * @param c The cell. - * @param e The engine. - * @param with_star_formation Are we running with star formation switched on? - */ -static void runner_do_unskip_stars(struct cell *c, struct engine *e, - const int with_star_formation) { - - const int non_empty = - c->stars.count > 0 || (with_star_formation && c->hydro.count > 0); - - /* Ignore empty cells. */ - if (!non_empty) return; - - const int ci_active = cell_is_active_stars(c, e) || - (with_star_formation && cell_is_active_hydro(c, e)); - - /* Skip inactive cells. */ - if (!ci_active) return; - - /* Recurse */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *cp = c->progeny[k]; - runner_do_unskip_stars(cp, e, with_star_formation); - } - } - } - - /* Unskip any active tasks. */ - const int forcerebuild = - cell_unskip_stars_tasks(c, &e->sched, with_star_formation); - if (forcerebuild) atomic_inc(&e->forcerebuild); -} - -/** - * @brief Unskip any black hole tasks associated with active cells. - * - * @param c The cell. - * @param e The engine. - */ -static void runner_do_unskip_black_holes(struct cell *c, struct engine *e) { - - /* Ignore empty cells. */ - if (c->black_holes.count == 0) return; - - /* Skip inactive cells. */ - if (!cell_is_active_black_holes(c, e)) return; - - /* Recurse */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *cp = c->progeny[k]; - runner_do_unskip_black_holes(cp, e); - } - } - } - - /* Unskip any active tasks. */ - const int forcerebuild = cell_unskip_black_holes_tasks(c, &e->sched); - if (forcerebuild) atomic_inc(&e->forcerebuild); -} - -/** - * @brief Unskip any gravity tasks associated with active cells. - * - * @param c The cell. - * @param e The engine. - */ -static void runner_do_unskip_gravity(struct cell *c, struct engine *e) { - - /* Ignore empty cells. */ - if (c->grav.count == 0) return; - - /* Skip inactive cells. */ - if (!cell_is_active_gravity(c, e)) return; - - /* Recurse */ - if (c->split && ((c->maxdepth - c->depth) >= space_subdepth_diff_grav)) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *cp = c->progeny[k]; - runner_do_unskip_gravity(cp, e); - } - } - } - - /* Unskip any active tasks. */ - cell_unskip_gravity_tasks(c, &e->sched); -} - -/** - * @brief Mapper function to unskip active tasks. - * - * @param map_data An array of #cell%s. - * @param num_elements Chunk size. - * @param extra_data Pointer to an #engine. - */ -void runner_do_unskip_mapper(void *map_data, int num_elements, - void *extra_data) { - - struct engine *e = (struct engine *)extra_data; - const int with_star_formation = e->policy & engine_policy_star_formation; - const int nodeID = e->nodeID; - struct space *s = e->s; - int *local_cells = (int *)map_data; - - for (int ind = 0; ind < num_elements; ind++) { - struct cell *c = &s->cells_top[local_cells[ind]]; - if (c != NULL) { - - /* Hydro tasks */ - if (e->policy & engine_policy_hydro) runner_do_unskip_hydro(c, e); - - /* All gravity tasks */ - if ((e->policy & engine_policy_self_gravity) || - ((e->policy & engine_policy_external_gravity) && c->nodeID == nodeID)) - runner_do_unskip_gravity(c, e); - - /* Stars tasks */ - if (e->policy & engine_policy_stars) - runner_do_unskip_stars(c, e, with_star_formation); - - /* Black hole tasks */ - if (e->policy & engine_policy_black_holes) - runner_do_unskip_black_holes(c, e); - } - } -} - -/** - * @brief Drift all part in a cell. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_drift_part(struct runner *r, struct cell *c, int timer) { - - TIMER_TIC; - - cell_drift_part(c, r->e, 0); - - if (timer) TIMER_TOC(timer_drift_part); -} - -/** - * @brief Drift all gpart in a cell. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) { - - TIMER_TIC; - - cell_drift_gpart(c, r->e, 0); - - if (timer) TIMER_TOC(timer_drift_gpart); -} - -/** - * @brief Drift all spart in a cell. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_drift_spart(struct runner *r, struct cell *c, int timer) { - - TIMER_TIC; - - cell_drift_spart(c, r->e, 0); - - if (timer) TIMER_TOC(timer_drift_spart); -} - -/** - * @brief Drift all bpart in a cell. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer) { - - TIMER_TIC; - - cell_drift_bpart(c, r->e, 0); - - if (timer) TIMER_TOC(timer_drift_bpart); -} - -/** - * @brief Perform the first half-kick on all the active particles in a cell. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_kick1(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - const struct cosmology *cosmo = e->cosmology; - const struct hydro_props *hydro_props = e->hydro_properties; - const struct entropy_floor_properties *entropy_floor = e->entropy_floor; - const int with_cosmology = (e->policy & engine_policy_cosmology); - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - struct gpart *restrict gparts = c->grav.parts; - struct spart *restrict sparts = c->stars.parts; - const int count = c->hydro.count; - const int gcount = c->grav.count; - const int scount = c->stars.count; - const integertime_t ti_current = e->ti_current; - const double time_base = e->time_base; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_starting_hydro(c, e) && !cell_is_starting_gravity(c, e) && - !cell_is_starting_stars(c, e) && !cell_is_starting_black_holes(c, e)) - return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_kick1(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* If particle needs to be kicked */ - if (part_is_starting(p, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - if (p->wakeup == time_bin_awake) - error("Woken-up particle that has not been processed in kick1"); -#endif - - /* Skip particles that have been woken up and treated by the limiter. */ - if (p->wakeup != time_bin_not_awake) continue; - - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current + 1, p->time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - const integertime_t ti_end = ti_begin + ti_step; - - if (ti_begin != ti_current) - error( - "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " - "ti_step=%lld time_bin=%d wakeup=%d ti_current=%lld", - ti_end, ti_begin, ti_step, p->time_bin, p->wakeup, ti_current); -#endif - - /* Time interval for this half-kick */ - double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr; - if (with_cosmology) { - dt_kick_hydro = cosmology_get_hydro_kick_factor( - cosmo, ti_begin, ti_begin + ti_step / 2); - dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, - ti_begin + ti_step / 2); - dt_kick_therm = cosmology_get_therm_kick_factor( - cosmo, ti_begin, ti_begin + ti_step / 2); - dt_kick_corr = cosmology_get_corr_kick_factor(cosmo, ti_begin, - ti_begin + ti_step / 2); - } else { - dt_kick_hydro = (ti_step / 2) * time_base; - dt_kick_grav = (ti_step / 2) * time_base; - dt_kick_therm = (ti_step / 2) * time_base; - dt_kick_corr = (ti_step / 2) * time_base; - } - - /* do the kick */ - kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, - dt_kick_corr, cosmo, hydro_props, entropy_floor, ti_begin, - ti_begin + ti_step / 2); - - /* Update the accelerations to be used in the drift for hydro */ - if (p->gpart != NULL) { - - xp->a_grav[0] = p->gpart->a_grav[0]; - xp->a_grav[1] = p->gpart->a_grav[1]; - xp->a_grav[2] = p->gpart->a_grav[2]; - } - } - } - - /* Loop over the gparts in this cell. */ - for (int k = 0; k < gcount; k++) { - - /* Get a handle on the part. */ - struct gpart *restrict gp = &gparts[k]; - - /* If the g-particle has no counterpart and needs to be kicked */ - if ((gp->type == swift_type_dark_matter || - gp->type == swift_type_dark_matter_background) && - gpart_is_starting(gp, e)) { - - const integertime_t ti_step = get_integer_timestep(gp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current + 1, gp->time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - const integertime_t ti_end = - get_integer_time_end(ti_current + 1, gp->time_bin); - - if (ti_begin != ti_current) - error( - "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " - "ti_step=%lld time_bin=%d ti_current=%lld", - ti_end, ti_begin, ti_step, gp->time_bin, ti_current); -#endif - - /* Time interval for this half-kick */ - double dt_kick_grav; - if (with_cosmology) { - dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, - ti_begin + ti_step / 2); - } else { - dt_kick_grav = (ti_step / 2) * time_base; - } - - /* do the kick */ - kick_gpart(gp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2); - } - } - - /* Loop over the stars particles in this cell. */ - for (int k = 0; k < scount; k++) { - - /* Get a handle on the s-part. */ - struct spart *restrict sp = &sparts[k]; - - /* If particle needs to be kicked */ - if (spart_is_starting(sp, e)) { - - const integertime_t ti_step = get_integer_timestep(sp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current + 1, sp->time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - const integertime_t ti_end = - get_integer_time_end(ti_current + 1, sp->time_bin); - - if (ti_begin != ti_current) - error( - "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " - "ti_step=%lld time_bin=%d ti_current=%lld", - ti_end, ti_begin, ti_step, sp->time_bin, ti_current); -#endif - - /* Time interval for this half-kick */ - double dt_kick_grav; - if (with_cosmology) { - dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, - ti_begin + ti_step / 2); - } else { - dt_kick_grav = (ti_step / 2) * time_base; - } - - /* do the kick */ - kick_spart(sp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2); - } - } - } - - if (timer) TIMER_TOC(timer_kick1); -} - -/** - * @brief Perform the second half-kick on all the active particles in a cell. - * - * Also prepares particles to be drifted. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_kick2(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - const struct cosmology *cosmo = e->cosmology; - const struct hydro_props *hydro_props = e->hydro_properties; - const struct entropy_floor_properties *entropy_floor = e->entropy_floor; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const int count = c->hydro.count; - const int gcount = c->grav.count; - const int scount = c->stars.count; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - struct gpart *restrict gparts = c->grav.parts; - struct spart *restrict sparts = c->stars.parts; - const integertime_t ti_current = e->ti_current; - const double time_base = e->time_base; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) && - !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) - return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_kick2(r, c->progeny[k], 0); - } else { - - /* Loop over the particles in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* If particle needs to be kicked */ - if (part_is_active(p, e)) { - - integertime_t ti_begin, ti_end, ti_step; - -#ifdef SWIFT_DEBUG_CHECKS - if (p->wakeup == time_bin_awake) - error("Woken-up particle that has not been processed in kick1"); -#endif - - if (p->wakeup == time_bin_not_awake) { - - /* Time-step from a regular kick */ - ti_step = get_integer_timestep(p->time_bin); - ti_begin = get_integer_time_begin(ti_current, p->time_bin); - ti_end = ti_begin + ti_step; - - } else { - - /* Time-step that follows a wake-up call */ - ti_begin = get_integer_time_begin(ti_current, p->wakeup); - ti_end = get_integer_time_end(ti_current, p->time_bin); - ti_step = ti_end - ti_begin; - - /* Reset the flag. Everything is back to normal from now on. */ - p->wakeup = time_bin_awake; - } - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_begin + ti_step != ti_current) - error( - "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld " - "time_bin=%d wakeup=%d ti_current=%lld", - ti_begin, ti_step, p->time_bin, p->wakeup, ti_current); -#endif - /* Time interval for this half-kick */ - double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr; - if (with_cosmology) { - dt_kick_hydro = cosmology_get_hydro_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_end); - dt_kick_grav = cosmology_get_grav_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_end); - dt_kick_therm = cosmology_get_therm_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_end); - dt_kick_corr = cosmology_get_corr_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_end); - } else { - dt_kick_hydro = (ti_end - (ti_begin + ti_step / 2)) * time_base; - dt_kick_grav = (ti_end - (ti_begin + ti_step / 2)) * time_base; - dt_kick_therm = (ti_end - (ti_begin + ti_step / 2)) * time_base; - dt_kick_corr = (ti_end - (ti_begin + ti_step / 2)) * time_base; - } - - /* Finish the time-step with a second half-kick */ - kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, - dt_kick_corr, cosmo, hydro_props, entropy_floor, - ti_begin + ti_step / 2, ti_end); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that kick and the drift are synchronized */ - if (p->ti_drift != p->ti_kick) error("Error integrating part in time."); -#endif - - /* Prepare the values to be drifted */ - hydro_reset_predicted_values(p, xp, cosmo); - } - } - - /* Loop over the g-particles in this cell. */ - for (int k = 0; k < gcount; k++) { - - /* Get a handle on the part. */ - struct gpart *restrict gp = &gparts[k]; - - /* If the g-particle has no counterpart and needs to be kicked */ - if ((gp->type == swift_type_dark_matter || - gp->type == swift_type_dark_matter_background) && - gpart_is_active(gp, e)) { - - const integertime_t ti_step = get_integer_timestep(gp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current, gp->time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_begin + ti_step != ti_current) - error("Particle in wrong time-bin"); -#endif - - /* Time interval for this half-kick */ - double dt_kick_grav; - if (with_cosmology) { - dt_kick_grav = cosmology_get_grav_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); - } else { - dt_kick_grav = (ti_step / 2) * time_base; - } - - /* Finish the time-step with a second half-kick */ - kick_gpart(gp, dt_kick_grav, ti_begin + ti_step / 2, - ti_begin + ti_step); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that kick and the drift are synchronized */ - if (gp->ti_drift != gp->ti_kick) - error("Error integrating g-part in time."); -#endif - - /* Prepare the values to be drifted */ - gravity_reset_predicted_values(gp); - } - } - - /* Loop over the particles in this cell. */ - for (int k = 0; k < scount; k++) { - - /* Get a handle on the part. */ - struct spart *restrict sp = &sparts[k]; - - /* If particle needs to be kicked */ - if (spart_is_active(sp, e)) { - - const integertime_t ti_step = get_integer_timestep(sp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current, sp->time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_begin + ti_step != ti_current) - error("Particle in wrong time-bin"); -#endif - - /* Time interval for this half-kick */ - double dt_kick_grav; - if (with_cosmology) { - dt_kick_grav = cosmology_get_grav_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); - } else { - dt_kick_grav = (ti_step / 2) * time_base; - } - - /* Finish the time-step with a second half-kick */ - kick_spart(sp, dt_kick_grav, ti_begin + ti_step / 2, - ti_begin + ti_step); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that kick and the drift are synchronized */ - if (sp->ti_drift != sp->ti_kick) - error("Error integrating s-part in time."); -#endif - - /* Prepare the values to be drifted */ - stars_reset_predicted_values(sp); - } - } - } - if (timer) TIMER_TOC(timer_kick2); -} - -/** - * @brief Computes the next time-step of all active particles in this cell - * and update the cell's statistics. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_timestep(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const int count = c->hydro.count; - const int gcount = c->grav.count; - const int scount = c->stars.count; - const int bcount = c->black_holes.count; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - struct gpart *restrict gparts = c->grav.parts; - struct spart *restrict sparts = c->stars.parts; - struct bpart *restrict bparts = c->black_holes.parts; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) && - !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) { - c->hydro.updated = 0; - c->grav.updated = 0; - c->stars.updated = 0; - c->black_holes.updated = 0; - return; - } - - int updated = 0, g_updated = 0, s_updated = 0, b_updated = 0; - integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, - ti_hydro_beg_max = 0; - integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, - ti_gravity_beg_max = 0; - integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, - ti_stars_beg_max = 0; - integertime_t ti_black_holes_end_min = max_nr_timesteps, - ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; - - /* No children? */ - if (!c->split) { - - /* Loop over the particles in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* If particle needs updating */ - if (part_is_active(p, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Current end of time-step */ - const integertime_t ti_end = - get_integer_time_end(ti_current, p->time_bin); - - if (ti_end != ti_current) - error("Computing time-step of rogue particle."); -#endif - - /* Get new time-step */ - const integertime_t ti_new_step = get_part_timestep(p, xp, e); - - /* Update particle */ - p->time_bin = get_time_bin(ti_new_step); - if (p->gpart != NULL) p->gpart->time_bin = p->time_bin; - - /* Update the tracers properties */ - tracers_after_timestep(p, xp, e->internal_units, e->physical_constants, - with_cosmology, e->cosmology, - e->hydro_properties, e->cooling_func, e->time); - - /* Number of updated particles */ - updated++; - if (p->gpart != NULL) g_updated++; - - /* What is the next sync-point ? */ - ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min); - ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max); - - /* What is the next starting point for this cell ? */ - ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max); - - if (p->gpart != NULL) { - - /* What is the next sync-point ? */ - ti_gravity_end_min = - min(ti_current + ti_new_step, ti_gravity_end_min); - ti_gravity_end_max = - max(ti_current + ti_new_step, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - } - } - - else { /* part is inactive */ - - if (!part_is_inhibited(p, e)) { - - const integertime_t ti_end = - get_integer_time_end(ti_current, p->time_bin); - - const integertime_t ti_beg = - get_integer_time_begin(ti_current + 1, p->time_bin); - - /* What is the next sync-point ? */ - ti_hydro_end_min = min(ti_end, ti_hydro_end_min); - ti_hydro_end_max = max(ti_end, ti_hydro_end_max); - - /* What is the next starting point for this cell ? */ - ti_hydro_beg_max = max(ti_beg, ti_hydro_beg_max); - - if (p->gpart != NULL) { - - /* What is the next sync-point ? */ - ti_gravity_end_min = min(ti_end, ti_gravity_end_min); - ti_gravity_end_max = max(ti_end, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); - } - } - } - } - - /* Loop over the g-particles in this cell. */ - for (int k = 0; k < gcount; k++) { - - /* Get a handle on the part. */ - struct gpart *restrict gp = &gparts[k]; - - /* If the g-particle has no counterpart */ - if (gp->type == swift_type_dark_matter || - gp->type == swift_type_dark_matter_background) { - - /* need to be updated ? */ - if (gpart_is_active(gp, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Current end of time-step */ - const integertime_t ti_end = - get_integer_time_end(ti_current, gp->time_bin); - - if (ti_end != ti_current) - error("Computing time-step of rogue particle."); -#endif - - /* Get new time-step */ - const integertime_t ti_new_step = get_gpart_timestep(gp, e); - - /* Update particle */ - gp->time_bin = get_time_bin(ti_new_step); - - /* Number of updated g-particles */ - g_updated++; - - /* What is the next sync-point ? */ - ti_gravity_end_min = - min(ti_current + ti_new_step, ti_gravity_end_min); - ti_gravity_end_max = - max(ti_current + ti_new_step, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - - } else { /* gpart is inactive */ - - if (!gpart_is_inhibited(gp, e)) { - - const integertime_t ti_end = - get_integer_time_end(ti_current, gp->time_bin); - - /* What is the next sync-point ? */ - ti_gravity_end_min = min(ti_end, ti_gravity_end_min); - ti_gravity_end_max = max(ti_end, ti_gravity_end_max); - - const integertime_t ti_beg = - get_integer_time_begin(ti_current + 1, gp->time_bin); - - /* What is the next starting point for this cell ? */ - ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); - } - } - } - } - - /* Loop over the star particles in this cell. */ - for (int k = 0; k < scount; k++) { - - /* Get a handle on the part. */ - struct spart *restrict sp = &sparts[k]; - - /* need to be updated ? */ - if (spart_is_active(sp, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Current end of time-step */ - const integertime_t ti_end = - get_integer_time_end(ti_current, sp->time_bin); - - if (ti_end != ti_current) - error("Computing time-step of rogue particle."); -#endif - /* Get new time-step */ - const integertime_t ti_new_step = get_spart_timestep(sp, e); - - /* Update particle */ - sp->time_bin = get_time_bin(ti_new_step); - sp->gpart->time_bin = get_time_bin(ti_new_step); - - /* Number of updated s-particles */ - s_updated++; - g_updated++; - - ti_stars_end_min = min(ti_current + ti_new_step, ti_stars_end_min); - ti_stars_end_max = max(ti_current + ti_new_step, ti_stars_end_max); - ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); - ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_stars_beg_max = max(ti_current, ti_stars_beg_max); - ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - - /* star particle is inactive but not inhibited */ - } else { - - if (!spart_is_inhibited(sp, e)) { - - const integertime_t ti_end = - get_integer_time_end(ti_current, sp->time_bin); - - const integertime_t ti_beg = - get_integer_time_begin(ti_current + 1, sp->time_bin); - - ti_stars_end_min = min(ti_end, ti_stars_end_min); - ti_stars_end_max = max(ti_end, ti_stars_end_max); - ti_gravity_end_min = min(ti_end, ti_gravity_end_min); - ti_gravity_end_max = max(ti_end, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_stars_beg_max = max(ti_beg, ti_stars_beg_max); - ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); - } - } - } - - /* Loop over the star particles in this cell. */ - for (int k = 0; k < bcount; k++) { - - /* Get a handle on the part. */ - struct bpart *restrict bp = &bparts[k]; - - /* need to be updated ? */ - if (bpart_is_active(bp, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Current end of time-step */ - const integertime_t ti_end = - get_integer_time_end(ti_current, bp->time_bin); - - if (ti_end != ti_current) - error("Computing time-step of rogue particle."); -#endif - /* Get new time-step */ - const integertime_t ti_new_step = get_bpart_timestep(bp, e); - - /* Update particle */ - bp->time_bin = get_time_bin(ti_new_step); - bp->gpart->time_bin = get_time_bin(ti_new_step); - - /* Number of updated s-particles */ - b_updated++; - g_updated++; - - ti_black_holes_end_min = - min(ti_current + ti_new_step, ti_black_holes_end_min); - ti_black_holes_end_max = - max(ti_current + ti_new_step, ti_black_holes_end_max); - ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); - ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_black_holes_beg_max = max(ti_current, ti_black_holes_beg_max); - ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - - /* star particle is inactive but not inhibited */ - } else { - - if (!bpart_is_inhibited(bp, e)) { - - const integertime_t ti_end = - get_integer_time_end(ti_current, bp->time_bin); - - const integertime_t ti_beg = - get_integer_time_begin(ti_current + 1, bp->time_bin); - - ti_black_holes_end_min = min(ti_end, ti_black_holes_end_min); - ti_black_holes_end_max = max(ti_end, ti_black_holes_end_max); - ti_gravity_end_min = min(ti_end, ti_gravity_end_min); - ti_gravity_end_max = max(ti_end, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_black_holes_beg_max = max(ti_beg, ti_black_holes_beg_max); - ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); - } - } - } - - } else { - - /* Loop over the progeny. */ - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *restrict cp = c->progeny[k]; - - /* Recurse */ - runner_do_timestep(r, cp, 0); - - /* And aggregate */ - updated += cp->hydro.updated; - g_updated += cp->grav.updated; - s_updated += cp->stars.updated; - b_updated += cp->black_holes.updated; - - ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min); - ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max); - ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max); - - ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); - ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); - ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); - - ti_stars_end_min = min(cp->stars.ti_end_min, ti_stars_end_min); - ti_stars_end_max = max(cp->grav.ti_end_max, ti_stars_end_max); - ti_stars_beg_max = max(cp->grav.ti_beg_max, ti_stars_beg_max); - - ti_black_holes_end_min = - min(cp->black_holes.ti_end_min, ti_black_holes_end_min); - ti_black_holes_end_max = - max(cp->grav.ti_end_max, ti_black_holes_end_max); - ti_black_holes_beg_max = - max(cp->grav.ti_beg_max, ti_black_holes_beg_max); - } - } - } - - /* Store the values. */ - c->hydro.updated = updated; - c->grav.updated = g_updated; - c->stars.updated = s_updated; - c->black_holes.updated = b_updated; - - c->hydro.ti_end_min = ti_hydro_end_min; - c->hydro.ti_end_max = ti_hydro_end_max; - c->hydro.ti_beg_max = ti_hydro_beg_max; - c->grav.ti_end_min = ti_gravity_end_min; - c->grav.ti_end_max = ti_gravity_end_max; - c->grav.ti_beg_max = ti_gravity_beg_max; - c->stars.ti_end_min = ti_stars_end_min; - c->stars.ti_end_max = ti_stars_end_max; - c->stars.ti_beg_max = ti_stars_beg_max; - c->black_holes.ti_end_min = ti_black_holes_end_min; - c->black_holes.ti_end_max = ti_black_holes_end_max; - c->black_holes.ti_beg_max = ti_black_holes_beg_max; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.ti_end_min == e->ti_current && - c->hydro.ti_end_min < max_nr_timesteps) - error("End of next hydro step is current time!"); - if (c->grav.ti_end_min == e->ti_current && - c->grav.ti_end_min < max_nr_timesteps) - error("End of next gravity step is current time!"); - if (c->stars.ti_end_min == e->ti_current && - c->stars.ti_end_min < max_nr_timesteps) - error("End of next stars step is current time!"); - if (c->black_holes.ti_end_min == e->ti_current && - c->black_holes.ti_end_min < max_nr_timesteps) - error("End of next black holes step is current time!"); -#endif - - if (timer) TIMER_TOC(timer_timestep); -} - -/** - * @brief Apply the time-step limiter to all awaken particles in a cell - * hierarchy. - * - * @param r The task #runner. - * @param c The #cell. - * @param force Limit the particles irrespective of the #cell flags. - * @param timer Are we timing this ? - */ -void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer) { - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const int count = c->hydro.count; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that we only limit local cells. */ - if (c->nodeID != engine_rank) error("Limiting dt of a foreign cell is nope."); -#endif - - integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, - ti_hydro_beg_max = 0; - integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, - ti_gravity_beg_max = 0; - - /* Limit irrespective of cell flags? */ - force = (force || cell_get_flag(c, cell_flag_do_hydro_limiter)); - - /* Early abort? */ - if (c->hydro.count == 0) { - - /* Clear the limiter flags. */ - cell_clear_flag( - c, cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter); - return; - } - - /* Loop over the progeny ? */ - if (c->split && (force || cell_get_flag(c, cell_flag_do_hydro_sub_limiter))) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *restrict cp = c->progeny[k]; - - /* Recurse */ - runner_do_limiter(r, cp, force, 0); - - /* And aggregate */ - ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min); - ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max); - ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max); - ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); - ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); - ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); - } - } - - /* Store the updated values */ - c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min); - c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max); - c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max); - c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min); - c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max); - c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max); - - } else if (!c->split && force) { - - ti_hydro_end_min = c->hydro.ti_end_min; - ti_hydro_end_max = c->hydro.ti_end_max; - ti_hydro_beg_max = c->hydro.ti_beg_max; - ti_gravity_end_min = c->grav.ti_end_min; - ti_gravity_end_max = c->grav.ti_end_max; - ti_gravity_beg_max = c->grav.ti_beg_max; - - /* Loop over the gas particles in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* Avoid inhibited particles */ - if (part_is_inhibited(p, e)) continue; - - /* If the particle will be active no need to wake it up */ - if (part_is_active(p, e) && p->wakeup != time_bin_not_awake) - p->wakeup = time_bin_not_awake; - - /* Bip, bip, bip... wake-up time */ - if (p->wakeup <= time_bin_awake) { - - /* Apply the limiter and get the new time-step size */ - const integertime_t ti_new_step = timestep_limit_part(p, xp, e); - - /* What is the next sync-point ? */ - ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min); - ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max); - - /* What is the next starting point for this cell ? */ - ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max); - - /* Also limit the gpart counter-part */ - if (p->gpart != NULL) { - - /* Register the time-bin */ - p->gpart->time_bin = p->time_bin; - - /* What is the next sync-point ? */ - ti_gravity_end_min = - min(ti_current + ti_new_step, ti_gravity_end_min); - ti_gravity_end_max = - max(ti_current + ti_new_step, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - } - } - } - - /* Store the updated values */ - c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min); - c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max); - c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max); - c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min); - c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max); - c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max); - } - - /* Clear the limiter flags. */ - cell_clear_flag(c, - cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter); - - if (timer) TIMER_TOC(timer_do_limiter); -} - -/** - * @brief End the hydro force calculation of all active particles in a cell - * by multiplying the acccelerations by the relevant constants - * - * @param r The #runner thread. - * @param c The #cell. - * @param timer Are we timing this ? - */ -void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_end_hydro_force(r, c->progeny[k], 0); - } else { - - const struct cosmology *cosmo = e->cosmology; - const int count = c->hydro.count; - struct part *restrict parts = c->hydro.parts; - - /* Loop over the gas particles in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - - if (part_is_active(p, e)) { - - /* Finish the force loop */ - hydro_end_force(p, cosmo); - chemistry_end_force(p, cosmo); - -#ifdef SWIFT_BOUNDARY_PARTICLES - - /* Get the ID of the part */ - const long long id = p->id; - - /* Cancel hdyro forces of these particles */ - if (id < SWIFT_BOUNDARY_PARTICLES) { - - /* Don't move ! */ - hydro_reset_acceleration(p); - -#if defined(GIZMO_MFV_SPH) || defined(GIZMO_MFM_SPH) - - /* Some values need to be reset in the Gizmo case. */ - hydro_prepare_force(p, &c->hydro.xparts[k], cosmo, - e->hydro_properties, 0); -#endif - } -#endif - } - } - } - - if (timer) TIMER_TOC(timer_end_hydro_force); -} - -/** - * @brief End the gravity force calculation of all active particles in a cell - * by multiplying the acccelerations by the relevant constants - * - * @param r The #runner thread. - * @param c The #cell. - * @param timer Are we timing this ? - */ -void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_end_grav_force(r, c->progeny[k], 0); - } else { - - const struct space *s = e->s; - const int periodic = s->periodic; - const float G_newton = e->physical_constants->const_newton_G; - - /* Potential normalisation in the case of periodic gravity */ - float potential_normalisation = 0.; - if (periodic && (e->policy & engine_policy_self_gravity)) { - const double volume = s->dim[0] * s->dim[1] * s->dim[2]; - const double r_s = e->mesh->r_s; - potential_normalisation = 4. * M_PI * e->total_mass * r_s * r_s / volume; - } - - const int gcount = c->grav.count; - struct gpart *restrict gparts = c->grav.parts; - - /* Loop over the g-particles in this cell. */ - for (int k = 0; k < gcount; k++) { - - /* Get a handle on the gpart. */ - struct gpart *restrict gp = &gparts[k]; - - if (gpart_is_active(gp, e)) { - - /* Finish the force calculation */ - gravity_end_force(gp, G_newton, potential_normalisation, periodic); - -#ifdef SWIFT_MAKE_GRAVITY_GLASS - - /* Negate the gravity forces */ - gp->a_grav[0] *= -1.f; - gp->a_grav[1] *= -1.f; - gp->a_grav[2] *= -1.f; -#endif - -#ifdef SWIFT_NO_GRAVITY_BELOW_ID - - /* Get the ID of the gpart */ - long long id = 0; - if (gp->type == swift_type_gas) - id = e->s->parts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_stars) - id = e->s->sparts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_black_hole) - error("Unexisting type"); - else - id = gp->id_or_neg_offset; - - /* Cancel gravity forces of these particles */ - if (id < SWIFT_NO_GRAVITY_BELOW_ID) { - - /* Don't move ! */ - gp->a_grav[0] = 0.f; - gp->a_grav[1] = 0.f; - gp->a_grav[2] = 0.f; - } -#endif - -#ifdef SWIFT_DEBUG_CHECKS - if ((e->policy & engine_policy_self_gravity) && - !(e->policy & engine_policy_black_holes)) { - - /* Let's add a self interaction to simplify the count */ - gp->num_interacted++; - - /* Check that this gpart has interacted with all the other - * particles (via direct or multipoles) in the box */ - if (gp->num_interacted != - e->total_nr_gparts - e->count_inhibited_gparts) { - - /* Get the ID of the gpart */ - long long my_id = 0; - if (gp->type == swift_type_gas) - my_id = e->s->parts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_stars) - my_id = e->s->sparts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_black_hole) - error("Unexisting type"); - else - my_id = gp->id_or_neg_offset; - - error( - "g-particle (id=%lld, type=%s) did not interact " - "gravitationally with all other gparts " - "gp->num_interacted=%lld, total_gparts=%lld (local " - "num_gparts=%zd inhibited_gparts=%lld)", - my_id, part_type_names[gp->type], gp->num_interacted, - e->total_nr_gparts, e->s->nr_gparts, e->count_inhibited_gparts); - } - } -#endif - } - } - } - if (timer) TIMER_TOC(timer_end_grav_force); -} - -/** - * @brief Process all the gas particles in a cell that have been flagged for - * swallowing by a black hole. - * - * This is done by recursing down to the leaf-level and skipping the sub-cells - * that have not been drifted as they would not have any particles with - * swallowing flag. We then loop over the particles with a flag and look into - * the space-wide list of black holes for the particle with the corresponding - * ID. If found, the BH swallows the gas particle and the gas particle is - * removed. If the cell is local, we may be looking for a foreign BH, in which - * case, we do not update the BH (that will be done on its node) but just remove - * the gas particle. - * - * @param r The thread #runner. - * @param c The #cell. - * @param timer Are we timing this? - */ -void runner_do_gas_swallow(struct runner *r, struct cell *c, int timer) { - - struct engine *e = r->e; - struct space *s = e->s; - struct bpart *bparts = s->bparts; - const size_t nr_bpart = s->nr_bparts; -#ifdef WITH_MPI - struct bpart *bparts_foreign = s->bparts_foreign; - const size_t nr_bparts_foreign = s->nr_bparts_foreign; -#endif - - struct part *parts = c->hydro.parts; - struct xpart *xparts = c->hydro.xparts; - - /* Early abort? - * (We only want cells for which we drifted the gas as these are - * the only ones that could have gas particles that have been flagged - * for swallowing) */ - if (c->hydro.count == 0 || c->hydro.ti_old_part != e->ti_current) { - return; - } - - /* Loop over the progeny ? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *restrict cp = c->progeny[k]; - - runner_do_gas_swallow(r, cp, 0); - } - } - } else { - - /* Loop over all the gas particles in the cell - * Note that the cell (and hence the parts) may be local or foreign. */ - const size_t nr_parts = c->hydro.count; - for (size_t k = 0; k < nr_parts; k++) { - - /* Get a handle on the part. */ - struct part *const p = &parts[k]; - struct xpart *const xp = &xparts[k]; - - /* Ignore inhibited particles (they have already been removed!) */ - if (part_is_inhibited(p, e)) continue; - - /* Get the ID of the black holes that will swallow this part */ - const long long swallow_id = - black_holes_get_part_swallow_id(&p->black_holes_data); - - /* Has this particle been flagged for swallowing? */ - if (swallow_id >= 0) { - -#ifdef SWIFT_DEBUG_CHECKS - if (p->ti_drift != e->ti_current) - error("Trying to swallow an un-drifted particle."); -#endif - - /* ID of the BH swallowing this particle */ - const long long BH_id = swallow_id; - - /* Have we found this particle's BH already? */ - int found = 0; - - /* Let's look for the hungry black hole in the local list */ - for (size_t i = 0; i < nr_bpart; ++i) { - - /* Get a handle on the bpart. */ - struct bpart *bp = &bparts[i]; - - if (bp->id == BH_id) { - - /* Lock the space as we are going to work directly on the bpart list - */ - lock_lock(&s->lock); - - /* Swallow the gas particle (i.e. update the BH properties) */ - black_holes_swallow_part(bp, p, xp, e->cosmology); - - /* Release the space as we are done updating the bpart */ - if (lock_unlock(&s->lock) != 0) - error("Failed to unlock the space."); - - message("BH %lld swallowing gas particle %lld", bp->id, p->id); - - /* If the gas particle is local, remove it */ - if (c->nodeID == e->nodeID) { - - message("BH %lld removing gas particle %lld", bp->id, p->id); - - lock_lock(&e->s->lock); - - /* Re-check that the particle has not been removed - * by another thread before we do the deed. */ - if (!part_is_inhibited(p, e)) { - - /* Finally, remove the gas particle from the system - * Recall that the gpart associated with it is also removed - * at the same time. */ - cell_remove_part(e, c, p, xp); - } - - if (lock_unlock(&e->s->lock) != 0) - error("Failed to unlock the space!"); - } - - /* In any case, prevent the particle from being re-swallowed */ - black_holes_mark_part_as_swallowed(&p->black_holes_data); - - found = 1; - break; - } - - } /* Loop over local BHs */ - -#ifdef WITH_MPI - - /* We could also be in the case of a local gas particle being - * swallowed by a foreign BH. In this case, we won't update the - * BH but just remove the particle from the local list. */ - if (c->nodeID == e->nodeID && !found) { - - /* Let's look for the foreign hungry black hole */ - for (size_t i = 0; i < nr_bparts_foreign; ++i) { - - /* Get a handle on the bpart. */ - struct bpart *bp = &bparts_foreign[i]; - - if (bp->id == BH_id) { - - message("BH %lld removing gas particle %lld (foreign BH case)", - bp->id, p->id); - - lock_lock(&e->s->lock); - - /* Re-check that the particle has not been removed - * by another thread before we do the deed. */ - if (!part_is_inhibited(p, e)) { - - /* Finally, remove the gas particle from the system */ - cell_remove_part(e, c, p, xp); - } - - if (lock_unlock(&e->s->lock) != 0) - error("Failed to unlock the space!"); - - found = 1; - break; - } - } /* Loop over foreign BHs */ - } /* Is the cell local? */ -#endif - - /* If we have a local particle, we must have found the BH in one - * of our list of black holes. */ - if (c->nodeID == e->nodeID && !found) { - error("Gas particle %lld could not find BH %lld to be swallowed", - p->id, swallow_id); - } - } /* Part was flagged for swallowing */ - } /* Loop over the parts */ - } /* Cell is not split */ -} - -/** - * @brief Processing of gas particles to swallow - self task case. - * - * @param r The thread #runner. - * @param c The #cell. - * @param timer Are we timing this? - */ -void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != r->e->nodeID) error("Running self task on foreign node"); - if (!cell_is_active_black_holes(c, r->e)) - error("Running self task on inactive cell"); -#endif - - runner_do_gas_swallow(r, c, timer); -} - -/** - * @brief Processing of gas particles to swallow - pair task case. - * - * @param r The thread #runner. - * @param ci First #cell. - * @param cj Second #cell. - * @param timer Are we timing this? - */ -void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci, - struct cell *cj, int timer) { - - const struct engine *e = r->e; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID) - error("Running pair task on foreign node"); -#endif - - /* Run the swallowing loop only in the cell that is the neighbour of the - * active BH */ - if (cell_is_active_black_holes(cj, e)) runner_do_gas_swallow(r, ci, timer); - if (cell_is_active_black_holes(ci, e)) runner_do_gas_swallow(r, cj, timer); -} - -/** - * @brief Process all the BH particles in a cell that have been flagged for - * swallowing by a black hole. - * - * This is done by recursing down to the leaf-level and skipping the sub-cells - * that have not been drifted as they would not have any particles with - * swallowing flag. We then loop over the particles with a flag and look into - * the space-wide list of black holes for the particle with the corresponding - * ID. If found, the BH swallows the BH particle and the BH particle is - * removed. If the cell is local, we may be looking for a foreign BH, in which - * case, we do not update the BH (that will be done on its node) but just remove - * the BH particle. - * - * @param r The thread #runner. - * @param c The #cell. - * @param timer Are we timing this? - */ -void runner_do_bh_swallow(struct runner *r, struct cell *c, int timer) { - - struct engine *e = r->e; - struct space *s = e->s; - struct bpart *bparts = s->bparts; - const size_t nr_bpart = s->nr_bparts; -#ifdef WITH_MPI - struct bpart *bparts_foreign = s->bparts_foreign; - const size_t nr_bparts_foreign = s->nr_bparts_foreign; -#endif - - struct bpart *cell_bparts = c->black_holes.parts; - - /* Early abort? - * (We only want cells for which we drifted the BH as these are - * the only ones that could have BH particles that have been flagged - * for swallowing) */ - if (c->black_holes.count == 0 || - c->black_holes.ti_old_part != e->ti_current) { - return; - } - - /* Loop over the progeny ? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *restrict cp = c->progeny[k]; - - runner_do_bh_swallow(r, cp, 0); - } - } - } else { - - /* Loop over all the gas particles in the cell - * Note that the cell (and hence the bparts) may be local or foreign. */ - const size_t nr_cell_bparts = c->black_holes.count; - for (size_t k = 0; k < nr_cell_bparts; k++) { - - /* Get a handle on the part. */ - struct bpart *const cell_bp = &cell_bparts[k]; - - /* Ignore inhibited particles (they have already been removed!) */ - if (bpart_is_inhibited(cell_bp, e)) continue; - - /* Get the ID of the black holes that will swallow this part */ - const long long swallow_id = - black_holes_get_bpart_swallow_id(&cell_bp->merger_data); - - /* message("OO id=%lld swallow_id = %lld", cell_bp->id, */ - /* swallow_id); */ - - /* Has this particle been flagged for swallowing? */ - if (swallow_id >= 0) { - -#ifdef SWIFT_DEBUG_CHECKS - if (cell_bp->ti_drift != e->ti_current) - error("Trying to swallow an un-drifted particle."); -#endif - - /* ID of the BH swallowing this particle */ - const long long BH_id = swallow_id; - - /* Have we found this particle's BH already? */ - int found = 0; - - /* Let's look for the hungry black hole in the local list */ - for (size_t i = 0; i < nr_bpart; ++i) { - - /* Get a handle on the bpart. */ - struct bpart *bp = &bparts[i]; - - if (bp->id == BH_id) { - - /* Lock the space as we are going to work directly on the bpart list - */ - lock_lock(&s->lock); - - /* Swallow the gas particle (i.e. update the BH properties) */ - black_holes_swallow_bpart(bp, cell_bp, e->cosmology); - - /* Release the space as we are done updating the bpart */ - if (lock_unlock(&s->lock) != 0) - error("Failed to unlock the space."); - - message("BH %lld swallowing BH particle %lld", bp->id, cell_bp->id); - - /* If the gas particle is local, remove it */ - if (c->nodeID == e->nodeID) { - - message("BH %lld removing BH particle %lld", bp->id, cell_bp->id); - - /* Finally, remove the gas particle from the system - * Recall that the gpart associated with it is also removed - * at the same time. */ - cell_remove_bpart(e, c, cell_bp); - } - - /* In any case, prevent the particle from being re-swallowed */ - black_holes_mark_bpart_as_merged(&cell_bp->merger_data); - - found = 1; - break; - } - - } /* Loop over local BHs */ - -#ifdef WITH_MPI - - /* We could also be in the case of a local BH particle being - * swallowed by a foreign BH. In this case, we won't update the - * foreign BH but just remove the particle from the local list. */ - if (c->nodeID == e->nodeID && !found) { - - /* Let's look for the foreign hungry black hole */ - for (size_t i = 0; i < nr_bparts_foreign; ++i) { - - /* Get a handle on the bpart. */ - struct bpart *bp = &bparts_foreign[i]; - - if (bp->id == BH_id) { - - message("BH %lld removing BH particle %lld (foreign BH case)", - bp->id, cell_bp->id); - - /* Finally, remove the gas particle from the system */ - cell_remove_bpart(e, c, cell_bp); - - found = 1; - break; - } - } /* Loop over foreign BHs */ - } /* Is the cell local? */ -#endif - - /* If we have a local particle, we must have found the BH in one - * of our list of black holes. */ - if (c->nodeID == e->nodeID && !found) { - error("BH particle %lld could not find BH %lld to be swallowed", - cell_bp->id, swallow_id); - } - } /* Part was flagged for swallowing */ - } /* Loop over the parts */ - } /* Cell is not split */ -} - -/** - * @brief Processing of bh particles to swallow - self task case. - * - * @param r The thread #runner. - * @param c The #cell. - * @param timer Are we timing this? - */ -void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != r->e->nodeID) error("Running self task on foreign node"); - if (!cell_is_active_black_holes(c, r->e)) - error("Running self task on inactive cell"); -#endif - - runner_do_bh_swallow(r, c, timer); -} - -/** - * @brief Processing of bh particles to swallow - pair task case. - * - * @param r The thread #runner. - * @param ci First #cell. - * @param cj Second #cell. - * @param timer Are we timing this? - */ -void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci, - struct cell *cj, int timer) { - - const struct engine *e = r->e; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID) - error("Running pair task on foreign node"); -#endif - - /* Run the swallowing loop only in the cell that is the neighbour of the - * active BH */ - if (cell_is_active_black_holes(cj, e)) runner_do_bh_swallow(r, ci, timer); - if (cell_is_active_black_holes(ci, e)) runner_do_bh_swallow(r, cj, timer); -} - -/** - * @brief Construct the cell properties from the received #part. - * - * @param r The runner thread. - * @param c The cell. - * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? - * @param timer Are we timing this ? - */ -void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, - int timer) { -#ifdef WITH_MPI - - const struct part *restrict parts = c->hydro.parts; - const size_t nr_parts = c->hydro.count; - const integertime_t ti_current = r->e->ti_current; - - TIMER_TIC; - - integertime_t ti_hydro_end_min = max_nr_timesteps; - integertime_t ti_hydro_end_max = 0; - timebin_t time_bin_min = num_time_bins; - timebin_t time_bin_max = 0; - float h_max = 0.f; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID == engine_rank) error("Updating a local cell!"); -#endif - - /* Clear this cell's sorted mask. */ - if (clear_sorts) c->hydro.sorted = 0; - - /* If this cell is a leaf, collect the particle data. */ - if (!c->split) { - - /* Collect everything... */ - for (size_t k = 0; k < nr_parts; k++) { - if (parts[k].time_bin == time_bin_inhibited) continue; - time_bin_min = min(time_bin_min, parts[k].time_bin); - time_bin_max = max(time_bin_max, parts[k].time_bin); - h_max = max(h_max, parts[k].h); - } - - /* Convert into a time */ - ti_hydro_end_min = get_integer_time_end(ti_current, time_bin_min); - ti_hydro_end_max = get_integer_time_end(ti_current, time_bin_max); - } - - /* Otherwise, recurse and collect. */ - else { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { - runner_do_recv_part(r, c->progeny[k], clear_sorts, 0); - ti_hydro_end_min = - min(ti_hydro_end_min, c->progeny[k]->hydro.ti_end_min); - ti_hydro_end_max = - max(ti_hydro_end_max, c->progeny[k]->hydro.ti_end_max); - h_max = max(h_max, c->progeny[k]->hydro.h_max); - } - } - } - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_hydro_end_min < ti_current) - error( - "Received a cell at an incorrect time c->ti_end_min=%lld, " - "e->ti_current=%lld.", - ti_hydro_end_min, ti_current); -#endif - - /* ... and store. */ - // c->hydro.ti_end_min = ti_hydro_end_min; - // c->hydro.ti_end_max = ti_hydro_end_max; - c->hydro.ti_old_part = ti_current; - c->hydro.h_max = h_max; - - if (timer) TIMER_TOC(timer_dorecv_part); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Construct the cell properties from the received #gpart. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { - -#ifdef WITH_MPI - - const struct gpart *restrict gparts = c->grav.parts; - const size_t nr_gparts = c->grav.count; - const integertime_t ti_current = r->e->ti_current; - - TIMER_TIC; - - integertime_t ti_gravity_end_min = max_nr_timesteps; - integertime_t ti_gravity_end_max = 0; - timebin_t time_bin_min = num_time_bins; - timebin_t time_bin_max = 0; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID == engine_rank) error("Updating a local cell!"); -#endif - - /* If this cell is a leaf, collect the particle data. */ - if (!c->split) { - - /* Collect everything... */ - for (size_t k = 0; k < nr_gparts; k++) { - if (gparts[k].time_bin == time_bin_inhibited) continue; - time_bin_min = min(time_bin_min, gparts[k].time_bin); - time_bin_max = max(time_bin_max, gparts[k].time_bin); - } - - /* Convert into a time */ - ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min); - ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max); - } - - /* Otherwise, recurse and collect. */ - else { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->grav.count > 0) { - runner_do_recv_gpart(r, c->progeny[k], 0); - ti_gravity_end_min = - min(ti_gravity_end_min, c->progeny[k]->grav.ti_end_min); - ti_gravity_end_max = - max(ti_gravity_end_max, c->progeny[k]->grav.ti_end_max); - } - } - } - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_gravity_end_min < ti_current) - error( - "Received a cell at an incorrect time c->ti_end_min=%lld, " - "e->ti_current=%lld.", - ti_gravity_end_min, ti_current); -#endif - - /* ... and store. */ - // c->grav.ti_end_min = ti_gravity_end_min; - // c->grav.ti_end_max = ti_gravity_end_max; - c->grav.ti_old_part = ti_current; - - if (timer) TIMER_TOC(timer_dorecv_gpart); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Construct the cell properties from the received #spart. - * - * @param r The runner thread. - * @param c The cell. - * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? - * @param timer Are we timing this ? - */ -void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts, - int timer) { - -#ifdef WITH_MPI - - struct spart *restrict sparts = c->stars.parts; - const size_t nr_sparts = c->stars.count; - const integertime_t ti_current = r->e->ti_current; - - TIMER_TIC; - - integertime_t ti_stars_end_min = max_nr_timesteps; - integertime_t ti_stars_end_max = 0; - timebin_t time_bin_min = num_time_bins; - timebin_t time_bin_max = 0; - float h_max = 0.f; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID == engine_rank) error("Updating a local cell!"); -#endif - - /* Clear this cell's sorted mask. */ - if (clear_sorts) c->stars.sorted = 0; - - /* If this cell is a leaf, collect the particle data. */ - if (!c->split) { - - /* Collect everything... */ - for (size_t k = 0; k < nr_sparts; k++) { -#ifdef DEBUG_INTERACTIONS_STARS - sparts[k].num_ngb_force = 0; -#endif - if (sparts[k].time_bin == time_bin_inhibited) continue; - time_bin_min = min(time_bin_min, sparts[k].time_bin); - time_bin_max = max(time_bin_max, sparts[k].time_bin); - h_max = max(h_max, sparts[k].h); - } - - /* Convert into a time */ - ti_stars_end_min = get_integer_time_end(ti_current, time_bin_min); - ti_stars_end_max = get_integer_time_end(ti_current, time_bin_max); - } - - /* Otherwise, recurse and collect. */ - else { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { - runner_do_recv_spart(r, c->progeny[k], clear_sorts, 0); - ti_stars_end_min = - min(ti_stars_end_min, c->progeny[k]->stars.ti_end_min); - ti_stars_end_max = - max(ti_stars_end_max, c->progeny[k]->stars.ti_end_max); - h_max = max(h_max, c->progeny[k]->stars.h_max); - } - } - } - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_stars_end_min < ti_current && - !(r->e->policy & engine_policy_star_formation)) - error( - "Received a cell at an incorrect time c->ti_end_min=%lld, " - "e->ti_current=%lld.", - ti_stars_end_min, ti_current); -#endif - - /* ... and store. */ - // c->grav.ti_end_min = ti_gravity_end_min; - // c->grav.ti_end_max = ti_gravity_end_max; - c->stars.ti_old_part = ti_current; - c->stars.h_max = h_max; - - if (timer) TIMER_TOC(timer_dorecv_spart); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Construct the cell properties from the received #bpart. - * - * Note that we do not need to clear the sorts since we do not sort - * the black holes. - * - * @param r The runner thread. - * @param c The cell. - * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? - * @param timer Are we timing this ? - */ -void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts, - int timer) { - -#ifdef WITH_MPI - - struct bpart *restrict bparts = c->black_holes.parts; - const size_t nr_bparts = c->black_holes.count; - const integertime_t ti_current = r->e->ti_current; - - TIMER_TIC; - - integertime_t ti_black_holes_end_min = max_nr_timesteps; - integertime_t ti_black_holes_end_max = 0; - timebin_t time_bin_min = num_time_bins; - timebin_t time_bin_max = 0; - float h_max = 0.f; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID == engine_rank) error("Updating a local cell!"); -#endif - - /* If this cell is a leaf, collect the particle data. */ - if (!c->split) { - - /* Collect everything... */ - for (size_t k = 0; k < nr_bparts; k++) { -#ifdef DEBUG_INTERACTIONS_BLACK_HOLES - bparts[k].num_ngb_force = 0; -#endif - - /* message("Receiving bparts id=%lld time_bin=%d", */ - /* bparts[k].id, bparts[k].time_bin); */ - - if (bparts[k].time_bin == time_bin_inhibited) continue; - time_bin_min = min(time_bin_min, bparts[k].time_bin); - time_bin_max = max(time_bin_max, bparts[k].time_bin); - h_max = max(h_max, bparts[k].h); - } - - /* Convert into a time */ - ti_black_holes_end_min = get_integer_time_end(ti_current, time_bin_min); - ti_black_holes_end_max = get_integer_time_end(ti_current, time_bin_max); - } - - /* Otherwise, recurse and collect. */ - else { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->black_holes.count > 0) { - runner_do_recv_bpart(r, c->progeny[k], clear_sorts, 0); - ti_black_holes_end_min = - min(ti_black_holes_end_min, c->progeny[k]->black_holes.ti_end_min); - ti_black_holes_end_max = - max(ti_black_holes_end_max, c->progeny[k]->black_holes.ti_end_max); - h_max = max(h_max, c->progeny[k]->black_holes.h_max); - } - } - } - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_black_holes_end_min < ti_current) - error( - "Received a cell at an incorrect time c->ti_end_min=%lld, " - "e->ti_current=%lld.", - ti_black_holes_end_min, ti_current); -#endif - - /* ... and store. */ - // c->grav.ti_end_min = ti_gravity_end_min; - // c->grav.ti_end_max = ti_gravity_end_max; - c->black_holes.ti_old_part = ti_current; - c->black_holes.h_max = h_max; - - if (timer) TIMER_TOC(timer_dorecv_bpart); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief The #runner main thread routine. - * - * @param data A pointer to this thread's data. - */ -void *runner_main(void *data) { - - struct runner *r = (struct runner *)data; - struct engine *e = r->e; - struct scheduler *sched = &e->sched; - unsigned int seed = r->id; - pthread_setspecific(sched->local_seed_pointer, &seed); - /* Main loop. */ - while (1) { - - /* Wait at the barrier. */ - engine_barrier(e); - - /* Can we go home yet? */ - if (e->step_props & engine_step_prop_done) break; - - /* Re-set the pointer to the previous task, as there is none. */ - struct task *t = NULL; - struct task *prev = NULL; - - /* Loop while there are tasks... */ - while (1) { - - /* If there's no old task, try to get a new one. */ - if (t == NULL) { - - /* Get the task. */ - TIMER_TIC - t = scheduler_gettask(sched, r->qid, prev); - TIMER_TOC(timer_gettask); - - /* Did I get anything? */ - if (t == NULL) break; - } - - /* Get the cells. */ - struct cell *ci = t->ci; - struct cell *cj = t->cj; - -#ifdef SWIFT_DEBUG_TASKS - /* Mark the thread we run on */ - t->rid = r->cpuid; - - /* And recover the pair direction */ - if (t->type == task_type_pair || t->type == task_type_sub_pair) { - struct cell *ci_temp = ci; - struct cell *cj_temp = cj; - double shift[3]; - t->sid = space_getsid(e->s, &ci_temp, &cj_temp, shift); - } else { - t->sid = -1; - } -#endif - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that we haven't scheduled an inactive task */ - t->ti_run = e->ti_current; - /* Store the task that will be running (for debugging only) */ - r->t = t; -#endif - - /* Different types of tasks... */ - switch (t->type) { - case task_type_self: - if (t->subtype == task_subtype_density) - runner_doself1_branch_density(r, ci); -#ifdef EXTRA_HYDRO_LOOP - else if (t->subtype == task_subtype_gradient) - runner_doself1_branch_gradient(r, ci); -#endif - else if (t->subtype == task_subtype_force) - runner_doself2_branch_force(r, ci); - else if (t->subtype == task_subtype_limiter) - runner_doself2_branch_limiter(r, ci); - else if (t->subtype == task_subtype_grav) - runner_doself_recursive_grav(r, ci, 1); - else if (t->subtype == task_subtype_external_grav) - runner_do_grav_external(r, ci, 1); - else if (t->subtype == task_subtype_stars_density) - runner_doself_branch_stars_density(r, ci); - else if (t->subtype == task_subtype_stars_feedback) - runner_doself_branch_stars_feedback(r, ci); - else if (t->subtype == task_subtype_bh_density) - runner_doself_branch_bh_density(r, ci); - else if (t->subtype == task_subtype_bh_swallow) - runner_doself_branch_bh_swallow(r, ci); - else if (t->subtype == task_subtype_do_gas_swallow) - runner_do_gas_swallow_self(r, ci, 1); - else if (t->subtype == task_subtype_do_bh_swallow) - runner_do_bh_swallow_self(r, ci, 1); - else if (t->subtype == task_subtype_bh_feedback) - runner_doself_branch_bh_feedback(r, ci); - else - error("Unknown/invalid task subtype (%s).", - subtaskID_names[t->subtype]); - break; - - case task_type_pair: - if (t->subtype == task_subtype_density) - runner_dopair1_branch_density(r, ci, cj); -#ifdef EXTRA_HYDRO_LOOP - else if (t->subtype == task_subtype_gradient) - runner_dopair1_branch_gradient(r, ci, cj); -#endif - else if (t->subtype == task_subtype_force) - runner_dopair2_branch_force(r, ci, cj); - else if (t->subtype == task_subtype_limiter) - runner_dopair2_branch_limiter(r, ci, cj); - else if (t->subtype == task_subtype_grav) - runner_dopair_recursive_grav(r, ci, cj, 1); - else if (t->subtype == task_subtype_stars_density) - runner_dopair_branch_stars_density(r, ci, cj); - else if (t->subtype == task_subtype_stars_feedback) - runner_dopair_branch_stars_feedback(r, ci, cj); - else if (t->subtype == task_subtype_bh_density) - runner_dopair_branch_bh_density(r, ci, cj); - else if (t->subtype == task_subtype_bh_swallow) - runner_dopair_branch_bh_swallow(r, ci, cj); - else if (t->subtype == task_subtype_do_gas_swallow) - runner_do_gas_swallow_pair(r, ci, cj, 1); - else if (t->subtype == task_subtype_do_bh_swallow) - runner_do_bh_swallow_pair(r, ci, cj, 1); - else if (t->subtype == task_subtype_bh_feedback) - runner_dopair_branch_bh_feedback(r, ci, cj); - else - error("Unknown/invalid task subtype (%s/%s).", - taskID_names[t->type], subtaskID_names[t->subtype]); - break; - - case task_type_sub_self: - if (t->subtype == task_subtype_density) - runner_dosub_self1_density(r, ci, 1); -#ifdef EXTRA_HYDRO_LOOP - else if (t->subtype == task_subtype_gradient) - runner_dosub_self1_gradient(r, ci, 1); -#endif - else if (t->subtype == task_subtype_force) - runner_dosub_self2_force(r, ci, 1); - else if (t->subtype == task_subtype_limiter) - runner_dosub_self2_limiter(r, ci, 1); - else if (t->subtype == task_subtype_stars_density) - runner_dosub_self_stars_density(r, ci, 1); - else if (t->subtype == task_subtype_stars_feedback) - runner_dosub_self_stars_feedback(r, ci, 1); - else if (t->subtype == task_subtype_bh_density) - runner_dosub_self_bh_density(r, ci, 1); - else if (t->subtype == task_subtype_bh_swallow) - runner_dosub_self_bh_swallow(r, ci, 1); - else if (t->subtype == task_subtype_do_gas_swallow) - runner_do_gas_swallow_self(r, ci, 1); - else if (t->subtype == task_subtype_do_bh_swallow) - runner_do_bh_swallow_self(r, ci, 1); - else if (t->subtype == task_subtype_bh_feedback) - runner_dosub_self_bh_feedback(r, ci, 1); - else - error("Unknown/invalid task subtype (%s/%s).", - taskID_names[t->type], subtaskID_names[t->subtype]); - break; - - case task_type_sub_pair: - if (t->subtype == task_subtype_density) - runner_dosub_pair1_density(r, ci, cj, 1); -#ifdef EXTRA_HYDRO_LOOP - else if (t->subtype == task_subtype_gradient) - runner_dosub_pair1_gradient(r, ci, cj, 1); -#endif - else if (t->subtype == task_subtype_force) - runner_dosub_pair2_force(r, ci, cj, 1); - else if (t->subtype == task_subtype_limiter) - runner_dosub_pair2_limiter(r, ci, cj, 1); - else if (t->subtype == task_subtype_stars_density) - runner_dosub_pair_stars_density(r, ci, cj, 1); - else if (t->subtype == task_subtype_stars_feedback) - runner_dosub_pair_stars_feedback(r, ci, cj, 1); - else if (t->subtype == task_subtype_bh_density) - runner_dosub_pair_bh_density(r, ci, cj, 1); - else if (t->subtype == task_subtype_bh_swallow) - runner_dosub_pair_bh_swallow(r, ci, cj, 1); - else if (t->subtype == task_subtype_do_gas_swallow) - runner_do_gas_swallow_pair(r, ci, cj, 1); - else if (t->subtype == task_subtype_do_bh_swallow) - runner_do_bh_swallow_pair(r, ci, cj, 1); - else if (t->subtype == task_subtype_bh_feedback) - runner_dosub_pair_bh_feedback(r, ci, cj, 1); - else - error("Unknown/invalid task subtype (%s/%s).", - taskID_names[t->type], subtaskID_names[t->subtype]); - break; - - case task_type_sort: - /* Cleanup only if any of the indices went stale. */ - runner_do_hydro_sort( - r, ci, t->flags, - ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, 1); - /* Reset the sort flags as our work here is done. */ - t->flags = 0; - break; - case task_type_stars_sort: - /* Cleanup only if any of the indices went stale. */ - runner_do_stars_sort( - r, ci, t->flags, - ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin, 1); - /* Reset the sort flags as our work here is done. */ - t->flags = 0; - break; - case task_type_init_grav: - runner_do_init_grav(r, ci, 1); - break; - case task_type_ghost: - runner_do_ghost(r, ci, 1); - break; -#ifdef EXTRA_HYDRO_LOOP - case task_type_extra_ghost: - runner_do_extra_ghost(r, ci, 1); - break; -#endif - case task_type_stars_ghost: - runner_do_stars_ghost(r, ci, 1); - break; - case task_type_bh_density_ghost: - runner_do_black_holes_density_ghost(r, ci, 1); - break; - case task_type_bh_swallow_ghost3: - runner_do_black_holes_swallow_ghost(r, ci, 1); - break; - case task_type_drift_part: - runner_do_drift_part(r, ci, 1); - break; - case task_type_drift_spart: - runner_do_drift_spart(r, ci, 1); - break; - case task_type_drift_bpart: - runner_do_drift_bpart(r, ci, 1); - break; - case task_type_drift_gpart: - runner_do_drift_gpart(r, ci, 1); - break; - case task_type_kick1: - runner_do_kick1(r, ci, 1); - break; - case task_type_kick2: - runner_do_kick2(r, ci, 1); - break; - case task_type_end_hydro_force: - runner_do_end_hydro_force(r, ci, 1); - break; - case task_type_end_grav_force: - runner_do_end_grav_force(r, ci, 1); - break; - case task_type_logger: - runner_do_logger(r, ci, 1); - break; - case task_type_timestep: - runner_do_timestep(r, ci, 1); - break; - case task_type_timestep_limiter: - runner_do_limiter(r, ci, 0, 1); - break; -#ifdef WITH_MPI - case task_type_send: - if (t->subtype == task_subtype_tend_part) { - free(t->buff); - } else if (t->subtype == task_subtype_tend_gpart) { - free(t->buff); - } else if (t->subtype == task_subtype_tend_spart) { - free(t->buff); - } else if (t->subtype == task_subtype_tend_bpart) { - free(t->buff); - } else if (t->subtype == task_subtype_sf_counts) { - free(t->buff); - } else if (t->subtype == task_subtype_part_swallow) { - free(t->buff); - } else if (t->subtype == task_subtype_bpart_merger) { - free(t->buff); - } - break; - case task_type_recv: - if (t->subtype == task_subtype_tend_part) { - cell_unpack_end_step_hydro(ci, (struct pcell_step_hydro *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_tend_gpart) { - cell_unpack_end_step_grav(ci, (struct pcell_step_grav *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_tend_spart) { - cell_unpack_end_step_stars(ci, (struct pcell_step_stars *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_tend_bpart) { - cell_unpack_end_step_black_holes( - ci, (struct pcell_step_black_holes *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_sf_counts) { - cell_unpack_sf_counts(ci, (struct pcell_sf *)t->buff); - cell_clear_stars_sort_flags(ci, /*clear_unused_flags=*/0); - free(t->buff); - } else if (t->subtype == task_subtype_xv) { - runner_do_recv_part(r, ci, 1, 1); - } else if (t->subtype == task_subtype_rho) { - runner_do_recv_part(r, ci, 0, 1); - } else if (t->subtype == task_subtype_gradient) { - runner_do_recv_part(r, ci, 0, 1); - } else if (t->subtype == task_subtype_part_swallow) { - cell_unpack_part_swallow(ci, - (struct black_holes_part_data *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_bpart_merger) { - cell_unpack_bpart_swallow(ci, - (struct black_holes_bpart_data *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_limiter) { - runner_do_recv_part(r, ci, 0, 1); - } else if (t->subtype == task_subtype_gpart) { - runner_do_recv_gpart(r, ci, 1); - } else if (t->subtype == task_subtype_spart) { - runner_do_recv_spart(r, ci, 1, 1); - } else if (t->subtype == task_subtype_bpart_rho) { - runner_do_recv_bpart(r, ci, 1, 1); - } else if (t->subtype == task_subtype_bpart_swallow) { - runner_do_recv_bpart(r, ci, 0, 1); - } else if (t->subtype == task_subtype_bpart_feedback) { - runner_do_recv_bpart(r, ci, 0, 1); - } else if (t->subtype == task_subtype_multipole) { - cell_unpack_multipoles(ci, (struct gravity_tensors *)t->buff); - free(t->buff); - } else { - error("Unknown/invalid task subtype (%d).", t->subtype); - } - break; -#endif - case task_type_grav_down: - runner_do_grav_down(r, t->ci, 1); - break; - case task_type_grav_mesh: - runner_do_grav_mesh(r, t->ci, 1); - break; - case task_type_grav_long_range: - runner_do_grav_long_range(r, t->ci, 1); - break; - case task_type_grav_mm: - runner_dopair_grav_mm_progenies(r, t->flags, t->ci, t->cj); - break; - case task_type_cooling: - runner_do_cooling(r, t->ci, 1); - break; - case task_type_star_formation: - runner_do_star_formation(r, t->ci, 1); - break; - case task_type_stars_resort: - runner_do_stars_resort(r, t->ci, 1); - break; - case task_type_fof_self: - runner_do_fof_self(r, t->ci, 1); - break; - case task_type_fof_pair: - runner_do_fof_pair(r, t->ci, t->cj, 1); - break; - default: - error("Unknown/invalid task type (%d).", t->type); - } - -/* Mark that we have run this task on these cells */ -#ifdef SWIFT_DEBUG_CHECKS - if (ci != NULL) { - ci->tasks_executed[t->type]++; - ci->subtasks_executed[t->subtype]++; - } - if (cj != NULL) { - cj->tasks_executed[t->type]++; - cj->subtasks_executed[t->subtype]++; - } - - /* This runner is not doing a task anymore */ - r->t = NULL; -#endif - - /* We're done with this task, see if we get a next one. */ - prev = t; - t = scheduler_done(sched, t); - - } /* main loop. */ - } - - /* Be kind, rewind. */ - return NULL; -} - -/** - * @brief Write the required particles through the logger. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_logger(struct runner *r, struct cell *c, int timer) { - -#ifdef WITH_LOGGER - TIMER_TIC; - - const struct engine *e = r->e; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - const int count = c->hydro.count; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return; - - /* Recurse? Avoid spending too much time in useless cells. */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_logger(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* If particle needs to be log */ - /* This is the same function than part_is_active, except for - * debugging checks */ - if (part_is_active(p, e)) { - - if (logger_should_write(&xp->logger_data, e->logger)) { - /* Write particle */ - /* Currently writing everything, should adapt it through time */ - logger_log_part(e->logger, p, - logger_mask_data[logger_x].mask | - logger_mask_data[logger_v].mask | - logger_mask_data[logger_a].mask | - logger_mask_data[logger_u].mask | - logger_mask_data[logger_h].mask | - logger_mask_data[logger_rho].mask | - logger_mask_data[logger_consts].mask, - &xp->logger_data.last_offset); - - /* Set counter back to zero */ - xp->logger_data.steps_since_last_output = 0; - } else - /* Update counter */ - xp->logger_data.steps_since_last_output += 1; - } - } - } - - if (c->grav.count > 0) error("gparts not implemented"); - - if (c->stars.count > 0) error("sparts not implemented"); - - if (timer) TIMER_TOC(timer_logger); - -#else - error("Logger disabled, please enable it during configuration"); -#endif -} - -/** - * @brief Recursively search for FOF groups in a single cell. - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void runner_do_fof_self(struct runner *r, struct cell *c, int timer) { - -#ifdef WITH_FOF - - TIMER_TIC; - - const struct engine *e = r->e; - struct space *s = e->s; - const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - const int periodic = s->periodic; - const struct gpart *const gparts = s->gparts; - const double search_r2 = e->fof_properties->l_x2; - - rec_fof_search_self(e->fof_properties, dim, search_r2, periodic, gparts, c); - - if (timer) TIMER_TOC(timer_fof_self); - -#else - error("SWIFT was not compiled with FOF enabled!"); -#endif -} - -/** - * @brief Recursively search for FOF groups between a pair of cells. - * - * @param r runner task - * @param ci cell i - * @param cj cell j - * @param timer 1 if the time is to be recorded. - */ -void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj, - int timer) { - -#ifdef WITH_FOF - - TIMER_TIC; - - const struct engine *e = r->e; - struct space *s = e->s; - const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - const int periodic = s->periodic; - const struct gpart *const gparts = s->gparts; - const double search_r2 = e->fof_properties->l_x2; - - rec_fof_search_pair(e->fof_properties, dim, search_r2, periodic, gparts, ci, - cj); - - if (timer) TIMER_TOC(timer_fof_pair); -#else - error("SWIFT was not compiled with FOF enabled!"); -#endif -} diff --git a/src/runner.h b/src/runner.h index 1dc62ad6f5dc1c92851cf841a4ab55836d084bac..7e8d0459efb5485ea1301c923e8c7a3396b6fc7e 100644 --- a/src/runner.h +++ b/src/runner.h @@ -26,13 +26,21 @@ /* Config parameters. */ #include "../config.h" -/* Includes. */ +/* Local headers. */ #include "cache.h" #include "gravity_cache.h" -#include "task.h" struct cell; struct engine; +struct task; + +/* Unique identifier of loop types */ +#define TASK_LOOP_DENSITY 0 +#define TASK_LOOP_GRADIENT 1 +#define TASK_LOOP_FORCE 2 +#define TASK_LOOP_LIMITER 3 +#define TASK_LOOP_FEEDBACK 4 +#define TASK_LOOP_SWALLOW 5 /** * @brief A struct representing a runner's thread and its data. @@ -75,6 +83,12 @@ struct runner { /* Function prototypes. */ void runner_do_ghost(struct runner *r, struct cell *c, int timer); void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer); +void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer); +void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c, + int timer); +void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c, + int timer); +void runner_do_init_grav(struct runner *r, struct cell *c, int timer); void runner_do_hydro_sort(struct runner *r, struct cell *c, int flag, int cleanup, int clock); void runner_do_stars_sort(struct runner *r, struct cell *c, int flag, @@ -84,21 +98,38 @@ void runner_do_all_stars_sort(struct runner *r, struct cell *c); void runner_do_drift_part(struct runner *r, struct cell *c, int timer); void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer); void runner_do_drift_spart(struct runner *r, struct cell *c, int timer); +void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer); void runner_do_kick1(struct runner *r, struct cell *c, int timer); void runner_do_kick2(struct runner *r, struct cell *c, int timer); +void runner_do_timestep(struct runner *r, struct cell *c, int timer); void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer); +void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer); void runner_do_init(struct runner *r, struct cell *c, int timer); void runner_do_cooling(struct runner *r, struct cell *c, int timer); +void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer); +void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer); void runner_do_grav_external(struct runner *r, struct cell *c, int timer); void runner_do_grav_fft(struct runner *r, int timer); void runner_do_logger(struct runner *r, struct cell *c, int timer); void runner_do_fof_self(struct runner *r, struct cell *c, int timer); void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj, int timer); +void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer); +void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer); +void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci, + struct cell *cj, int timer); +void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci, + struct cell *cj, int timer); +void runner_do_star_formation(struct runner *r, struct cell *c, int timer); +void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer); + +void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer); +void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, + int timer); +void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts, + int timer); +void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts, + int timer); void *runner_main(void *data); -void runner_do_unskip_mapper(void *map_data, int num_elements, - void *extra_data); -void runner_do_drift_all_mapper(void *map_data, int num_elements, - void *extra_data); #endif /* SWIFT_RUNNER_H */ diff --git a/src/runner_black_holes.c b/src/runner_black_holes.c new file mode 100644 index 0000000000000000000000000000000000000000..d9bb62201d7b087670aef0ce2346a51bf61a3868 --- /dev/null +++ b/src/runner_black_holes.c @@ -0,0 +1,459 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "black_holes.h" +#include "cell.h" +#include "engine.h" +#include "timers.h" + +/** + * @brief Process all the gas particles in a cell that have been flagged for + * swallowing by a black hole. + * + * This is done by recursing down to the leaf-level and skipping the sub-cells + * that have not been drifted as they would not have any particles with + * swallowing flag. We then loop over the particles with a flag and look into + * the space-wide list of black holes for the particle with the corresponding + * ID. If found, the BH swallows the gas particle and the gas particle is + * removed. If the cell is local, we may be looking for a foreign BH, in which + * case, we do not update the BH (that will be done on its node) but just remove + * the gas particle. + * + * @param r The thread #runner. + * @param c The #cell. + * @param timer Are we timing this? + */ +void runner_do_gas_swallow(struct runner *r, struct cell *c, int timer) { + + struct engine *e = r->e; + struct space *s = e->s; + struct bpart *bparts = s->bparts; + const size_t nr_bpart = s->nr_bparts; +#ifdef WITH_MPI + struct bpart *bparts_foreign = s->bparts_foreign; + const size_t nr_bparts_foreign = s->nr_bparts_foreign; +#endif + + struct part *parts = c->hydro.parts; + struct xpart *xparts = c->hydro.xparts; + + /* Early abort? + * (We only want cells for which we drifted the gas as these are + * the only ones that could have gas particles that have been flagged + * for swallowing) */ + if (c->hydro.count == 0 || c->hydro.ti_old_part != e->ti_current) { + return; + } + + /* Loop over the progeny ? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *restrict cp = c->progeny[k]; + + runner_do_gas_swallow(r, cp, 0); + } + } + } else { + + /* Loop over all the gas particles in the cell + * Note that the cell (and hence the parts) may be local or foreign. */ + const size_t nr_parts = c->hydro.count; + for (size_t k = 0; k < nr_parts; k++) { + + /* Get a handle on the part. */ + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; + + /* Ignore inhibited particles (they have already been removed!) */ + if (part_is_inhibited(p, e)) continue; + + /* Get the ID of the black holes that will swallow this part */ + const long long swallow_id = + black_holes_get_part_swallow_id(&p->black_holes_data); + + /* Has this particle been flagged for swallowing? */ + if (swallow_id >= 0) { + +#ifdef SWIFT_DEBUG_CHECKS + if (p->ti_drift != e->ti_current) + error("Trying to swallow an un-drifted particle."); +#endif + + /* ID of the BH swallowing this particle */ + const long long BH_id = swallow_id; + + /* Have we found this particle's BH already? */ + int found = 0; + + /* Let's look for the hungry black hole in the local list */ + for (size_t i = 0; i < nr_bpart; ++i) { + + /* Get a handle on the bpart. */ + struct bpart *bp = &bparts[i]; + + if (bp->id == BH_id) { + + /* Lock the space as we are going to work directly on the bpart list + */ + lock_lock(&s->lock); + + /* Swallow the gas particle (i.e. update the BH properties) */ + black_holes_swallow_part(bp, p, xp, e->cosmology); + + /* Release the space as we are done updating the bpart */ + if (lock_unlock(&s->lock) != 0) + error("Failed to unlock the space."); + + message("BH %lld swallowing gas particle %lld", bp->id, p->id); + + /* If the gas particle is local, remove it */ + if (c->nodeID == e->nodeID) { + + message("BH %lld removing gas particle %lld", bp->id, p->id); + + lock_lock(&e->s->lock); + + /* Re-check that the particle has not been removed + * by another thread before we do the deed. */ + if (!part_is_inhibited(p, e)) { + + /* Finally, remove the gas particle from the system + * Recall that the gpart associated with it is also removed + * at the same time. */ + cell_remove_part(e, c, p, xp); + } + + if (lock_unlock(&e->s->lock) != 0) + error("Failed to unlock the space!"); + } + + /* In any case, prevent the particle from being re-swallowed */ + black_holes_mark_part_as_swallowed(&p->black_holes_data); + + found = 1; + break; + } + + } /* Loop over local BHs */ + +#ifdef WITH_MPI + + /* We could also be in the case of a local gas particle being + * swallowed by a foreign BH. In this case, we won't update the + * BH but just remove the particle from the local list. */ + if (c->nodeID == e->nodeID && !found) { + + /* Let's look for the foreign hungry black hole */ + for (size_t i = 0; i < nr_bparts_foreign; ++i) { + + /* Get a handle on the bpart. */ + struct bpart *bp = &bparts_foreign[i]; + + if (bp->id == BH_id) { + + message("BH %lld removing gas particle %lld (foreign BH case)", + bp->id, p->id); + + lock_lock(&e->s->lock); + + /* Re-check that the particle has not been removed + * by another thread before we do the deed. */ + if (!part_is_inhibited(p, e)) { + + /* Finally, remove the gas particle from the system */ + cell_remove_part(e, c, p, xp); + } + + if (lock_unlock(&e->s->lock) != 0) + error("Failed to unlock the space!"); + + found = 1; + break; + } + } /* Loop over foreign BHs */ + } /* Is the cell local? */ +#endif + + /* If we have a local particle, we must have found the BH in one + * of our list of black holes. */ + if (c->nodeID == e->nodeID && !found) { + error("Gas particle %lld could not find BH %lld to be swallowed", + p->id, swallow_id); + } + } /* Part was flagged for swallowing */ + } /* Loop over the parts */ + } /* Cell is not split */ +} + +/** + * @brief Processing of gas particles to swallow - self task case. + * + * @param r The thread #runner. + * @param c The #cell. + * @param timer Are we timing this? + */ +void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != r->e->nodeID) error("Running self task on foreign node"); + if (!cell_is_active_black_holes(c, r->e)) + error("Running self task on inactive cell"); +#endif + + runner_do_gas_swallow(r, c, timer); +} + +/** + * @brief Processing of gas particles to swallow - pair task case. + * + * @param r The thread #runner. + * @param ci First #cell. + * @param cj Second #cell. + * @param timer Are we timing this? + */ +void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci, + struct cell *cj, int timer) { + + const struct engine *e = r->e; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID) + error("Running pair task on foreign node"); +#endif + + /* Run the swallowing loop only in the cell that is the neighbour of the + * active BH */ + if (cell_is_active_black_holes(cj, e)) runner_do_gas_swallow(r, ci, timer); + if (cell_is_active_black_holes(ci, e)) runner_do_gas_swallow(r, cj, timer); +} + +/** + * @brief Process all the BH particles in a cell that have been flagged for + * swallowing by a black hole. + * + * This is done by recursing down to the leaf-level and skipping the sub-cells + * that have not been drifted as they would not have any particles with + * swallowing flag. We then loop over the particles with a flag and look into + * the space-wide list of black holes for the particle with the corresponding + * ID. If found, the BH swallows the BH particle and the BH particle is + * removed. If the cell is local, we may be looking for a foreign BH, in which + * case, we do not update the BH (that will be done on its node) but just remove + * the BH particle. + * + * @param r The thread #runner. + * @param c The #cell. + * @param timer Are we timing this? + */ +void runner_do_bh_swallow(struct runner *r, struct cell *c, int timer) { + + struct engine *e = r->e; + struct space *s = e->s; + struct bpart *bparts = s->bparts; + const size_t nr_bpart = s->nr_bparts; +#ifdef WITH_MPI + struct bpart *bparts_foreign = s->bparts_foreign; + const size_t nr_bparts_foreign = s->nr_bparts_foreign; +#endif + + struct bpart *cell_bparts = c->black_holes.parts; + + /* Early abort? + * (We only want cells for which we drifted the BH as these are + * the only ones that could have BH particles that have been flagged + * for swallowing) */ + if (c->black_holes.count == 0 || + c->black_holes.ti_old_part != e->ti_current) { + return; + } + + /* Loop over the progeny ? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *restrict cp = c->progeny[k]; + + runner_do_bh_swallow(r, cp, 0); + } + } + } else { + + /* Loop over all the gas particles in the cell + * Note that the cell (and hence the bparts) may be local or foreign. */ + const size_t nr_cell_bparts = c->black_holes.count; + for (size_t k = 0; k < nr_cell_bparts; k++) { + + /* Get a handle on the part. */ + struct bpart *const cell_bp = &cell_bparts[k]; + + /* Ignore inhibited particles (they have already been removed!) */ + if (bpart_is_inhibited(cell_bp, e)) continue; + + /* Get the ID of the black holes that will swallow this part */ + const long long swallow_id = + black_holes_get_bpart_swallow_id(&cell_bp->merger_data); + + /* message("OO id=%lld swallow_id = %lld", cell_bp->id, */ + /* swallow_id); */ + + /* Has this particle been flagged for swallowing? */ + if (swallow_id >= 0) { + +#ifdef SWIFT_DEBUG_CHECKS + if (cell_bp->ti_drift != e->ti_current) + error("Trying to swallow an un-drifted particle."); +#endif + + /* ID of the BH swallowing this particle */ + const long long BH_id = swallow_id; + + /* Have we found this particle's BH already? */ + int found = 0; + + /* Let's look for the hungry black hole in the local list */ + for (size_t i = 0; i < nr_bpart; ++i) { + + /* Get a handle on the bpart. */ + struct bpart *bp = &bparts[i]; + + if (bp->id == BH_id) { + + /* Lock the space as we are going to work directly on the bpart list + */ + lock_lock(&s->lock); + + /* Swallow the gas particle (i.e. update the BH properties) */ + black_holes_swallow_bpart(bp, cell_bp, e->cosmology); + + /* Release the space as we are done updating the bpart */ + if (lock_unlock(&s->lock) != 0) + error("Failed to unlock the space."); + + message("BH %lld swallowing BH particle %lld", bp->id, cell_bp->id); + + /* If the gas particle is local, remove it */ + if (c->nodeID == e->nodeID) { + + message("BH %lld removing BH particle %lld", bp->id, cell_bp->id); + + /* Finally, remove the gas particle from the system + * Recall that the gpart associated with it is also removed + * at the same time. */ + cell_remove_bpart(e, c, cell_bp); + } + + /* In any case, prevent the particle from being re-swallowed */ + black_holes_mark_bpart_as_merged(&cell_bp->merger_data); + + found = 1; + break; + } + + } /* Loop over local BHs */ + +#ifdef WITH_MPI + + /* We could also be in the case of a local BH particle being + * swallowed by a foreign BH. In this case, we won't update the + * foreign BH but just remove the particle from the local list. */ + if (c->nodeID == e->nodeID && !found) { + + /* Let's look for the foreign hungry black hole */ + for (size_t i = 0; i < nr_bparts_foreign; ++i) { + + /* Get a handle on the bpart. */ + struct bpart *bp = &bparts_foreign[i]; + + if (bp->id == BH_id) { + + message("BH %lld removing BH particle %lld (foreign BH case)", + bp->id, cell_bp->id); + + /* Finally, remove the gas particle from the system */ + cell_remove_bpart(e, c, cell_bp); + + found = 1; + break; + } + } /* Loop over foreign BHs */ + } /* Is the cell local? */ +#endif + + /* If we have a local particle, we must have found the BH in one + * of our list of black holes. */ + if (c->nodeID == e->nodeID && !found) { + error("BH particle %lld could not find BH %lld to be swallowed", + cell_bp->id, swallow_id); + } + } /* Part was flagged for swallowing */ + } /* Loop over the parts */ + } /* Cell is not split */ +} + +/** + * @brief Processing of bh particles to swallow - self task case. + * + * @param r The thread #runner. + * @param c The #cell. + * @param timer Are we timing this? + */ +void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != r->e->nodeID) error("Running self task on foreign node"); + if (!cell_is_active_black_holes(c, r->e)) + error("Running self task on inactive cell"); +#endif + + runner_do_bh_swallow(r, c, timer); +} + +/** + * @brief Processing of bh particles to swallow - pair task case. + * + * @param r The thread #runner. + * @param ci First #cell. + * @param cj Second #cell. + * @param timer Are we timing this? + */ +void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci, + struct cell *cj, int timer) { + + const struct engine *e = r->e; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID) + error("Running pair task on foreign node"); +#endif + + /* Run the swallowing loop only in the cell that is the neighbour of the + * active BH */ + if (cell_is_active_black_holes(cj, e)) runner_do_bh_swallow(r, ci, timer); + if (cell_is_active_black_holes(ci, e)) runner_do_bh_swallow(r, cj, timer); +} diff --git a/src/runner_doiact_black_holes.c b/src/runner_doiact_black_holes.c new file mode 100644 index 0000000000000000000000000000000000000000..5c139eada6cf7403076194c42261948db5e0f7f4 --- /dev/null +++ b/src/runner_doiact_black_holes.c @@ -0,0 +1,53 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "active.h" +#include "black_holes.h" +#include "cell.h" +#include "engine.h" +#include "runner.h" +#include "space_getsid.h" +#include "timers.h" + +/* Import the black hole density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_functions_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole feedback loop functions. */ +#define FUNCTION swallow +#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW +#include "runner_doiact_functions_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole feedback loop functions. */ +#define FUNCTION feedback +#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK +#include "runner_doiact_functions_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION diff --git a/src/runner_doiact_black_holes.h b/src/runner_doiact_black_holes.h index ce159c7ac24a508bc625070ed50b3aad7dd9fa8d..763e557babb9ca94a05a28d1ea5ed0f1141684ff 100644 --- a/src/runner_doiact_black_holes.h +++ b/src/runner_doiact_black_holes.h @@ -85,852 +85,20 @@ #define _IACT_BH_BH(f) PASTE(runner_iact_nonsym_bh_bh, f) #define IACT_BH_BH _IACT_BH_BH(FUNCTION) -/** - * @brief Calculate the number density of #part around the #bpart - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void DOSELF1_BH(struct runner *r, struct cell *c, int timer) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - TIMER_TIC; - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Anything to do here? */ - if (c->black_holes.count == 0) return; - if (!cell_is_active_black_holes(c, e)) return; - - const int bcount = c->black_holes.count; - const int count = c->hydro.count; - struct bpart *restrict bparts = c->black_holes.parts; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - - /* Do we actually have any gas neighbours? */ - if (c->hydro.count != 0) { - - /* Loop over the bparts in ci. */ - for (int bid = 0; bid < bcount; bid++) { - - /* Get a hold of the ith bpart in ci. */ - struct bpart *restrict bi = &bparts[bid]; - - /* Skip inactive particles */ - if (!bpart_is_active(bi, e)) continue; - - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - const float bix[3] = {(float)(bi->x[0] - c->loc[0]), - (float)(bi->x[1] - c->loc[1]), - (float)(bi->x[2] - c->loc[2])}; - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts[pjd]; - struct xpart *restrict xpj = &xparts[pjd]; - const float hj = pj->h; - - /* Early abort? */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), - (float)(pj->x[1] - c->loc[1]), - (float)(pj->x[2] - c->loc[2])}; - float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (bi->ti_drift != e->ti_current) - error("Particle bi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, - ti_current); - } - } /* loop over the parts in ci. */ - } /* loop over the bparts in ci. */ - } /* Do we have gas particles in the cell? */ - - /* When doing BH swallowing, we need a quick loop also over the BH - * neighbours */ -#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) - - /* Loop over the bparts in ci. */ - for (int bid = 0; bid < bcount; bid++) { - - /* Get a hold of the ith bpart in ci. */ - struct bpart *restrict bi = &bparts[bid]; - - /* Skip inactive particles */ - if (!bpart_is_active(bi, e)) continue; - - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - const float bix[3] = {(float)(bi->x[0] - c->loc[0]), - (float)(bi->x[1] - c->loc[1]), - (float)(bi->x[2] - c->loc[2])}; - - /* Loop over the parts in cj. */ - for (int bjd = 0; bjd < bcount; bjd++) { - - /* Skip self interaction */ - if (bid == bjd) continue; - - /* Get a pointer to the jth particle. */ - struct bpart *restrict bj = &bparts[bjd]; - const float hj = bj->h; - - /* Early abort? */ - if (bpart_is_inhibited(bj, e)) continue; - - /* Compute the pairwise distance. */ - const float bjx[3] = {(float)(bj->x[0] - c->loc[0]), - (float)(bj->x[1] - c->loc[1]), - (float)(bj->x[2] - c->loc[2])}; - float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (bi->ti_drift != e->ti_current) - error("Particle bi not drifted to current time"); - if (bj->ti_drift != e->ti_current) - error("Particle bj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties, - ti_current); - } - } /* loop over the bparts in ci. */ - } /* loop over the bparts in ci. */ - -#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */ - - TIMER_TOC(TIMER_DOSELF_BH); -} - -/** - * @brief Calculate the number density of cj #part around the ci #bpart - * - * @param r runner task - * @param ci The first #cell - * @param cj The second #cell - */ -void DO_NONSYM_PAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { - -#ifdef SWIFT_DEBUG_CHECKS -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - if (cj->nodeID != engine_rank) error("Should be run on a different node"); -#endif -#endif - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Anything to do here? */ - if (ci->black_holes.count == 0) return; - if (!cell_is_active_black_holes(ci, e)) return; - - const int bcount_i = ci->black_holes.count; - const int count_j = cj->hydro.count; - struct bpart *restrict bparts_i = ci->black_holes.parts; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - - /* Get the relative distance between the pairs, wrapping. */ - double shift[3] = {0.0, 0.0, 0.0}; - for (int k = 0; k < 3; k++) { - if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) - shift[k] = e->s->dim[k]; - else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) - shift[k] = -e->s->dim[k]; - } - - /* Do we actually have any gas neighbours? */ - if (cj->hydro.count != 0) { - - /* Loop over the bparts in ci. */ - for (int bid = 0; bid < bcount_i; bid++) { - - /* Get a hold of the ith bpart in ci. */ - struct bpart *restrict bi = &bparts_i[bid]; - - /* Skip inactive particles */ - if (!bpart_is_active(bi, e)) continue; - - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])), - (float)(bi->x[1] - (cj->loc[1] + shift[1])), - (float)(bi->x[2] - (cj->loc[2] + shift[2]))}; - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - const float hj = pj->h; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), - (float)(pj->x[1] - cj->loc[1]), - (float)(pj->x[2] - cj->loc[2])}; - float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (bi->ti_drift != e->ti_current) - error("Particle bi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, - ti_current); - } - } /* loop over the parts in cj. */ - } /* loop over the bparts in ci. */ - } /* Do we have gas particles in the cell? */ - - /* When doing BH swallowing, we need a quick loop also over the BH - * neighbours */ -#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) - - const int bcount_j = cj->black_holes.count; - struct bpart *restrict bparts_j = cj->black_holes.parts; - - /* Loop over the bparts in ci. */ - for (int bid = 0; bid < bcount_i; bid++) { - - /* Get a hold of the ith bpart in ci. */ - struct bpart *restrict bi = &bparts_i[bid]; - - /* Skip inactive particles */ - if (!bpart_is_active(bi, e)) continue; - - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])), - (float)(bi->x[1] - (cj->loc[1] + shift[1])), - (float)(bi->x[2] - (cj->loc[2] + shift[2]))}; - - /* Loop over the bparts in cj. */ - for (int bjd = 0; bjd < bcount_j; bjd++) { - - /* Get a pointer to the jth particle. */ - struct bpart *restrict bj = &bparts_j[bjd]; - const float hj = bj->h; - - /* Skip inhibited particles. */ - if (bpart_is_inhibited(bj, e)) continue; - - /* Compute the pairwise distance. */ - const float bjx[3] = {(float)(bj->x[0] - cj->loc[0]), - (float)(bj->x[1] - cj->loc[1]), - (float)(bj->x[2] - cj->loc[2])}; - float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (bi->ti_drift != e->ti_current) - error("Particle bi not drifted to current time"); - if (bj->ti_drift != e->ti_current) - error("Particle bj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties, - ti_current); - } - } /* loop over the bparts in cj. */ - } /* loop over the bparts in ci. */ - -#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */ -} - -void DOPAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj, int timer) { - - TIMER_TIC; - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_bh = ci->nodeID == r->e->nodeID; - const int do_cj_bh = cj->nodeID == r->e->nodeID; -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_bh = cj->nodeID == r->e->nodeID; - const int do_cj_bh = ci->nodeID == r->e->nodeID; -#else - /* The swallow task is executed on both sides */ - const int do_ci_bh = 1; - const int do_cj_bh = 1; -#endif - - if (do_ci_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, ci, cj); - if (do_cj_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, cj, ci); - - TIMER_TOC(TIMER_DOPAIR_BH); -} - -/** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. - * - * Version using a brute-force algorithm. - * - * @param r The #runner. - * @param ci The first #cell. - * @param bparts_i The #bpart to interact with @c cj. - * @param ind The list of indices of particles in @c ci to interact with. - * @param bcount The number of particles in @c ind. - * @param cj The second #cell. - * @param shift The shift vector to apply to the particles in ci. - */ -void DOPAIR1_SUBSET_BH_NAIVE(struct runner *r, struct cell *restrict ci, - struct bpart *restrict bparts_i, int *restrict ind, - const int bcount, struct cell *restrict cj, - const double *shift) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - const int count_j = cj->hydro.count; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - - /* Early abort? */ - if (count_j == 0) return; - - /* Loop over the parts_i. */ - for (int bid = 0; bid < bcount; bid++) { - - /* Get a hold of the ith part in ci. */ - struct bpart *restrict bi = &bparts_i[ind[bid]]; - - const double bix = bi->x[0] - (shift[0]); - const double biy = bi->x[1] - (shift[1]); - const double biz = bi->x[2] - (shift[2]); - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - -#ifdef SWIFT_DEBUG_CHECKS - if (!bpart_is_active(bi, e)) - error("Trying to correct smoothing length of inactive particle !"); -#endif - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - - /* Skip inhibited particles */ - if (part_is_inhibited(pj, e)) continue; - - const double pjx = pj->x[0]; - const double pjy = pj->x[1]; - const double pjz = pj->x[2]; - const float hj = pj->h; - - /* Compute the pairwise distance. */ - float dx[3] = {(float)(bix - pjx), (float)(biy - pjy), - (float)(biz - pjz)}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - /* Hit or miss? */ - if (r2 < hig2) { - IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, - ti_current); - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ -} - -/** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. - * - * @param r The #runner. - * @param ci The first #cell. - * @param bparts The #bpart to interact. - * @param ind The list of indices of particles in @c ci to interact with. - * @param bcount The number of particles in @c ind. - */ -void DOSELF1_SUBSET_BH(struct runner *r, struct cell *restrict ci, - struct bpart *restrict bparts, int *restrict ind, - const int bcount) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - const int count_i = ci->hydro.count; - struct part *restrict parts_j = ci->hydro.parts; - struct xpart *restrict xparts_j = ci->hydro.xparts; +void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c); +void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj); - /* Early abort? */ - if (count_i == 0) return; - - /* Loop over the parts in ci. */ - for (int bid = 0; bid < bcount; bid++) { - - /* Get a hold of the ith part in ci. */ - struct bpart *bi = &bparts[ind[bid]]; - const float bix[3] = {(float)(bi->x[0] - ci->loc[0]), - (float)(bi->x[1] - ci->loc[1]), - (float)(bi->x[2] - ci->loc[2])}; - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - -#ifdef SWIFT_DEBUG_CHECKS - if (!bpart_is_active(bi, e)) error("Inactive particle in subset function!"); -#endif - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_i; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - - /* Early abort? */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]), - (float)(pj->x[1] - ci->loc[1]), - (float)(pj->x[2] - ci->loc[2])}; - float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - IACT_BH_GAS(r2, dx, hi, pj->h, bi, pj, xpj, cosmo, - e->gravity_properties, ti_current); - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ -} +void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer); +void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer); -/** - * @brief Determine which version of DOSELF1_SUBSET_BH needs to be called - * depending on the optimisation level. - * - * @param r The #runner. - * @param ci The first #cell. - * @param bparts The #bpart to interact. - * @param ind The list of indices of particles in @c ci to interact with. - * @param bcount The number of particles in @c ind. - */ void DOSELF1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci, struct bpart *restrict bparts, int *restrict ind, - const int bcount) { - - DOSELF1_SUBSET_BH(r, ci, bparts, ind, bcount); -} - -/** - * @brief Determine which version of DOPAIR1_SUBSET_BH needs to be called - * depending on the orientation of the cells or whether DOPAIR1_SUBSET_BH - * needs to be called at all. - * - * @param r The #runner. - * @param ci The first #cell. - * @param bparts_i The #bpart to interact with @c cj. - * @param ind The list of indices of particles in @c ci to interact with. - * @param bcount The number of particles in @c ind. - * @param cj The second #cell. - */ + const int bcount); void DOPAIR1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci, struct bpart *restrict bparts_i, int *restrict ind, int const bcount, - struct cell *restrict cj) { - - const struct engine *e = r->e; - - /* Anything to do here? */ - if (cj->hydro.count == 0) return; - - /* Get the relative distance between the pairs, wrapping. */ - double shift[3] = {0.0, 0.0, 0.0}; - for (int k = 0; k < 3; k++) { - if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) - shift[k] = e->s->dim[k]; - else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) - shift[k] = -e->s->dim[k]; - } - - DOPAIR1_SUBSET_BH_NAIVE(r, ci, bparts_i, ind, bcount, cj, shift); -} + struct cell *restrict cj); void DOSUB_SUBSET_BH(struct runner *r, struct cell *ci, struct bpart *bparts, - int *ind, const int bcount, struct cell *cj, - int gettimer) { - - const struct engine *e = r->e; - struct space *s = e->s; - - /* Should we even bother? */ - if (!cell_is_active_black_holes(ci, e) && - (cj == NULL || !cell_is_active_black_holes(cj, e))) - return; - - /* Find out in which sub-cell of ci the parts are. */ - struct cell *sub = NULL; - if (ci->split) { - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) { - if (&bparts[ind[0]] >= &ci->progeny[k]->black_holes.parts[0] && - &bparts[ind[0]] < - &ci->progeny[k] - ->black_holes.parts[ci->progeny[k]->black_holes.count]) { - sub = ci->progeny[k]; - break; - } - } - } - } - - /* Is this a single cell? */ - if (cj == NULL) { - - /* Recurse? */ - if (cell_can_recurse_in_self_black_holes_task(ci)) { - - /* Loop over all progeny. */ - DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, NULL, 0); - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != sub && ci->progeny[j] != NULL) - DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, ci->progeny[j], 0); - - } - - /* Otherwise, compute self-interaction. */ - else - DOSELF1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount); - } /* self-interaction. */ - - /* Otherwise, it's a pair interaction. */ - else { - - /* Recurse? */ - if (cell_can_recurse_in_pair_black_holes_task(ci, cj) && - cell_can_recurse_in_pair_black_holes_task(cj, ci)) { - - /* Get the type of pair and flip ci/cj if needed. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(s, &ci, &cj, shift); - - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) - DOSUB_SUBSET_BH(r, ci->progeny[pid], bparts, ind, bcount, - cj->progeny[pjd], 0); - if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) - DOSUB_SUBSET_BH(r, cj->progeny[pjd], bparts, ind, bcount, - ci->progeny[pid], 0); - } - } - - /* Otherwise, compute the pair directly. */ - else if (cell_is_active_black_holes(ci, e) && cj->hydro.count > 0) { - - /* Do any of the cells need to be drifted first? */ - if (cell_is_active_black_holes(ci, e)) { - if (!cell_are_bpart_drifted(ci, e)) error("Cell should be drifted!"); - if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); - } - - DOPAIR1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount, cj); - } - - } /* otherwise, pair interaction. */ -} - -/** - * @brief Determine which version of DOSELF1_BH needs to be called depending - * on the optimisation level. - * - * @param r #runner - * @param c #cell c - * - */ -void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c) { - - const struct engine *restrict e = r->e; - - /* Anything to do here? */ - if (c->black_holes.count == 0) return; - - /* Anything to do here? */ - if (!cell_is_active_black_holes(c, e)) return; - - /* Did we mess up the recursion? */ - if (c->black_holes.h_max_old * kernel_gamma > c->dmin) - error("Cell smaller than smoothing length"); - - DOSELF1_BH(r, c, 1); -} - -/** - * @brief Determine which version of DOPAIR1_BH needs to be called depending - * on the orientation of the cells or whether DOPAIR1_BH needs to be called - * at all. - * - * @param r #runner - * @param ci #cell ci - * @param cj #cell cj - * - */ -void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj) { - - const struct engine *restrict e = r->e; - - const int ci_active = cell_is_active_black_holes(ci, e); - const int cj_active = cell_is_active_black_holes(cj, e); -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_bh = ci->nodeID == e->nodeID; - const int do_cj_bh = cj->nodeID == e->nodeID; -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_bh = cj->nodeID == e->nodeID; - const int do_cj_bh = ci->nodeID == e->nodeID; -#else - /* The swallow task is executed on both sides */ - const int do_ci_bh = 1; - const int do_cj_bh = 1; -#endif - - const int do_ci = (ci->black_holes.count != 0 && cj->hydro.count != 0 && - ci_active && do_ci_bh); - const int do_cj = (cj->black_holes.count != 0 && ci->hydro.count != 0 && - cj_active && do_cj_bh); - - /* Anything to do here? */ - if (!do_ci && !do_cj) return; - - /* Check that cells are drifted. */ - if (do_ci && - (!cell_are_bpart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) - error("Interacting undrifted cells."); - - if (do_cj && - (!cell_are_part_drifted(ci, e) || !cell_are_bpart_drifted(cj, e))) - error("Interacting undrifted cells."); - - /* No sorted intreactions here -> use the naive ones */ - DOPAIR1_BH_NAIVE(r, ci, cj, 1); -} - -/** - * @brief Compute grouped sub-cell interactions for pairs - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The second #cell. - * @param gettimer Do we have a timer ? - * - * @todo Hard-code the sid on the recursive calls to avoid the - * redundant computations to find the sid on-the-fly. - */ -void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer) { - - TIMER_TIC; - - struct space *s = r->e->s; - const struct engine *e = r->e; - - /* Should we even bother? - * In the swallow case we care about BH-BH and BH-gas - * interactions. - * In all other cases only BH-gas so we can abort if there is - * is no gas in the cell */ -#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) - const int should_do_ci = - ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e); - const int should_do_cj = - cj->black_holes.count != 0 && cell_is_active_black_holes(cj, e); -#else - const int should_do_ci = ci->black_holes.count != 0 && cj->hydro.count != 0 && - cell_is_active_black_holes(ci, e); - const int should_do_cj = cj->black_holes.count != 0 && ci->hydro.count != 0 && - cell_is_active_black_holes(cj, e); - -#endif - - if (!should_do_ci && !should_do_cj) return; - - /* Get the type of pair and flip ci/cj if needed. */ - double shift[3]; - const int sid = space_getsid(s, &ci, &cj, shift); - - /* Recurse? */ - if (cell_can_recurse_in_pair_black_holes_task(ci, cj) && - cell_can_recurse_in_pair_black_holes_task(cj, ci)) { - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) - DOSUB_PAIR1_BH(r, ci->progeny[pid], cj->progeny[pjd], 0); - } - } - - /* Otherwise, compute the pair directly. */ - else { - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_bh = ci->nodeID == e->nodeID; - const int do_cj_bh = cj->nodeID == e->nodeID; -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - /* Here we are updating the hydro -> switch ci, cj */ - const int do_ci_bh = cj->nodeID == e->nodeID; - const int do_cj_bh = ci->nodeID == e->nodeID; -#else - /* Here we perform the task on both sides */ - const int do_ci_bh = 1; - const int do_cj_bh = 1; -#endif - - const int do_ci = ci->black_holes.count != 0 && - cell_is_active_black_holes(ci, e) && do_ci_bh; - const int do_cj = cj->black_holes.count != 0 && - cell_is_active_black_holes(cj, e) && do_cj_bh; - - if (do_ci) { - - /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_bpart_drifted(ci, e)) - error("Interacting undrifted cells (bparts)."); - - if (cj->hydro.count != 0 && !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells (parts)."); - } - - if (do_cj) { - - /* Make sure both cells are drifted to the current timestep. */ - if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e)) - error("Interacting undrifted cells (parts)."); - - if (!cell_are_bpart_drifted(cj, e)) - error("Interacting undrifted cells (bparts)."); - } - - if (do_ci || do_cj) DOPAIR1_BRANCH_BH(r, ci, cj); - } - - TIMER_TOC(TIMER_DOSUB_PAIR_BH); -} - -/** - * @brief Compute grouped sub-cell interactions for self tasks - * - * @param r The #runner. - * @param ci The first #cell. - * @param gettimer Do we have a timer ? - */ -void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer) { - - TIMER_TIC; - - const struct engine *e = r->e; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) - error("This function should not be called on foreign cells"); -#endif - - /* Should we even bother? - * In the swallow case we care about BH-BH and BH-gas - * interactions. - * In all other cases only BH-gas so we can abort if there is - * is no gas in the cell */ -#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) - const int should_do_ci = - ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e); -#else - const int should_do_ci = ci->black_holes.count != 0 && ci->hydro.count != 0 && - cell_is_active_black_holes(ci, e); -#endif - - if (!should_do_ci) return; - - /* Recurse? */ - if (cell_can_recurse_in_self_black_holes_task(ci)) { - - /* Loop over all progeny. */ - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) { - DOSUB_SELF1_BH(r, ci->progeny[k], 0); - for (int j = k + 1; j < 8; j++) - if (ci->progeny[j] != NULL) - DOSUB_PAIR1_BH(r, ci->progeny[k], ci->progeny[j], 0); - } - } - - /* Otherwise, compute self-interaction. */ - else { - - /* Check we did drift to the current time */ - if (!cell_are_bpart_drifted(ci, e)) error("Interacting undrifted cell."); - - if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e)) - error("Interacting undrifted cells (bparts)."); - - DOSELF1_BRANCH_BH(r, ci); - } - - TIMER_TOC(TIMER_DOSUB_SELF_BH); -} + int *ind, const int bcount, struct cell *cj, int gettimer); diff --git a/src/runner_doiact_functions_black_holes.h b/src/runner_doiact_functions_black_holes.h new file mode 100644 index 0000000000000000000000000000000000000000..f8af37c751a9f7a89455ae5c9a7ef72ec55a1c64 --- /dev/null +++ b/src/runner_doiact_functions_black_holes.h @@ -0,0 +1,877 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Before including this file, define FUNCTION, which is the + name of the interaction function. This creates the interaction functions + runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION, + and runner_dosub_FUNCTION calling the pairwise interaction function + runner_iact_FUNCTION. */ + +#include "runner_doiact_black_holes.h" + +/** + * @brief Calculate the number density of #part around the #bpart + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void DOSELF1_BH(struct runner *r, struct cell *c, int timer) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + TIMER_TIC; + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Anything to do here? */ + if (c->black_holes.count == 0) return; + if (!cell_is_active_black_holes(c, e)) return; + + const int bcount = c->black_holes.count; + const int count = c->hydro.count; + struct bpart *restrict bparts = c->black_holes.parts; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + + /* Do we actually have any gas neighbours? */ + if (c->hydro.count != 0) { + + /* Loop over the bparts in ci. */ + for (int bid = 0; bid < bcount; bid++) { + + /* Get a hold of the ith bpart in ci. */ + struct bpart *restrict bi = &bparts[bid]; + + /* Skip inactive particles */ + if (!bpart_is_active(bi, e)) continue; + + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + const float bix[3] = {(float)(bi->x[0] - c->loc[0]), + (float)(bi->x[1] - c->loc[1]), + (float)(bi->x[2] - c->loc[2])}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts[pjd]; + struct xpart *restrict xpj = &xparts[pjd]; + const float hj = pj->h; + + /* Early abort? */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), + (float)(pj->x[1] - c->loc[1]), + (float)(pj->x[2] - c->loc[2])}; + float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (bi->ti_drift != e->ti_current) + error("Particle bi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, + ti_current); + } + } /* loop over the parts in ci. */ + } /* loop over the bparts in ci. */ + } /* Do we have gas particles in the cell? */ + + /* When doing BH swallowing, we need a quick loop also over the BH + * neighbours */ +#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) + + /* Loop over the bparts in ci. */ + for (int bid = 0; bid < bcount; bid++) { + + /* Get a hold of the ith bpart in ci. */ + struct bpart *restrict bi = &bparts[bid]; + + /* Skip inactive particles */ + if (!bpart_is_active(bi, e)) continue; + + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + const float bix[3] = {(float)(bi->x[0] - c->loc[0]), + (float)(bi->x[1] - c->loc[1]), + (float)(bi->x[2] - c->loc[2])}; + + /* Loop over the parts in cj. */ + for (int bjd = 0; bjd < bcount; bjd++) { + + /* Skip self interaction */ + if (bid == bjd) continue; + + /* Get a pointer to the jth particle. */ + struct bpart *restrict bj = &bparts[bjd]; + const float hj = bj->h; + + /* Early abort? */ + if (bpart_is_inhibited(bj, e)) continue; + + /* Compute the pairwise distance. */ + const float bjx[3] = {(float)(bj->x[0] - c->loc[0]), + (float)(bj->x[1] - c->loc[1]), + (float)(bj->x[2] - c->loc[2])}; + float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (bi->ti_drift != e->ti_current) + error("Particle bi not drifted to current time"); + if (bj->ti_drift != e->ti_current) + error("Particle bj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties, + ti_current); + } + } /* loop over the bparts in ci. */ + } /* loop over the bparts in ci. */ + +#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */ + + TIMER_TOC(TIMER_DOSELF_BH); +} + +/** + * @brief Calculate the number density of cj #part around the ci #bpart + * + * @param r runner task + * @param ci The first #cell + * @param cj The second #cell + */ +void DO_NONSYM_PAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj) { + +#ifdef SWIFT_DEBUG_CHECKS +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + if (cj->nodeID != engine_rank) error("Should be run on a different node"); +#endif +#endif + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Anything to do here? */ + if (ci->black_holes.count == 0) return; + if (!cell_is_active_black_holes(ci, e)) return; + + const int bcount_i = ci->black_holes.count; + const int count_j = cj->hydro.count; + struct bpart *restrict bparts_i = ci->black_holes.parts; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Do we actually have any gas neighbours? */ + if (cj->hydro.count != 0) { + + /* Loop over the bparts in ci. */ + for (int bid = 0; bid < bcount_i; bid++) { + + /* Get a hold of the ith bpart in ci. */ + struct bpart *restrict bi = &bparts_i[bid]; + + /* Skip inactive particles */ + if (!bpart_is_active(bi, e)) continue; + + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])), + (float)(bi->x[1] - (cj->loc[1] + shift[1])), + (float)(bi->x[2] - (cj->loc[2] + shift[2]))}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + const float hj = pj->h; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), + (float)(pj->x[1] - cj->loc[1]), + (float)(pj->x[2] - cj->loc[2])}; + float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (bi->ti_drift != e->ti_current) + error("Particle bi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, + ti_current); + } + } /* loop over the parts in cj. */ + } /* loop over the bparts in ci. */ + } /* Do we have gas particles in the cell? */ + + /* When doing BH swallowing, we need a quick loop also over the BH + * neighbours */ +#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) + + const int bcount_j = cj->black_holes.count; + struct bpart *restrict bparts_j = cj->black_holes.parts; + + /* Loop over the bparts in ci. */ + for (int bid = 0; bid < bcount_i; bid++) { + + /* Get a hold of the ith bpart in ci. */ + struct bpart *restrict bi = &bparts_i[bid]; + + /* Skip inactive particles */ + if (!bpart_is_active(bi, e)) continue; + + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])), + (float)(bi->x[1] - (cj->loc[1] + shift[1])), + (float)(bi->x[2] - (cj->loc[2] + shift[2]))}; + + /* Loop over the bparts in cj. */ + for (int bjd = 0; bjd < bcount_j; bjd++) { + + /* Get a pointer to the jth particle. */ + struct bpart *restrict bj = &bparts_j[bjd]; + const float hj = bj->h; + + /* Skip inhibited particles. */ + if (bpart_is_inhibited(bj, e)) continue; + + /* Compute the pairwise distance. */ + const float bjx[3] = {(float)(bj->x[0] - cj->loc[0]), + (float)(bj->x[1] - cj->loc[1]), + (float)(bj->x[2] - cj->loc[2])}; + float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (bi->ti_drift != e->ti_current) + error("Particle bi not drifted to current time"); + if (bj->ti_drift != e->ti_current) + error("Particle bj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties, + ti_current); + } + } /* loop over the bparts in cj. */ + } /* loop over the bparts in ci. */ + +#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */ +} + +void DOPAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj, int timer) { + + TIMER_TIC; + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_bh = ci->nodeID == r->e->nodeID; + const int do_cj_bh = cj->nodeID == r->e->nodeID; +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + /* here we are updating the hydro -> switch ci, cj */ + const int do_ci_bh = cj->nodeID == r->e->nodeID; + const int do_cj_bh = ci->nodeID == r->e->nodeID; +#else + /* The swallow task is executed on both sides */ + const int do_ci_bh = 1; + const int do_cj_bh = 1; +#endif + + if (do_ci_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, ci, cj); + if (do_cj_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, cj, ci); + + TIMER_TOC(TIMER_DOPAIR_BH); +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * Version using a brute-force algorithm. + * + * @param r The #runner. + * @param ci The first #cell. + * @param bparts_i The #bpart to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param bcount The number of particles in @c ind. + * @param cj The second #cell. + * @param shift The shift vector to apply to the particles in ci. + */ +void DOPAIR1_SUBSET_BH_NAIVE(struct runner *r, struct cell *restrict ci, + struct bpart *restrict bparts_i, int *restrict ind, + const int bcount, struct cell *restrict cj, + const double *shift) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + const int count_j = cj->hydro.count; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + + /* Early abort? */ + if (count_j == 0) return; + + /* Loop over the parts_i. */ + for (int bid = 0; bid < bcount; bid++) { + + /* Get a hold of the ith part in ci. */ + struct bpart *restrict bi = &bparts_i[ind[bid]]; + + const double bix = bi->x[0] - (shift[0]); + const double biy = bi->x[1] - (shift[1]); + const double biz = bi->x[2] - (shift[2]); + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!bpart_is_active(bi, e)) + error("Trying to correct smoothing length of inactive particle !"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + + /* Skip inhibited particles */ + if (part_is_inhibited(pj, e)) continue; + + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + float dx[3] = {(float)(bix - pjx), (float)(biy - pjy), + (float)(biz - pjz)}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ + if (r2 < hig2) { + IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, + ti_current); + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param bparts The #bpart to interact. + * @param ind The list of indices of particles in @c ci to interact with. + * @param bcount The number of particles in @c ind. + */ +void DOSELF1_SUBSET_BH(struct runner *r, struct cell *restrict ci, + struct bpart *restrict bparts, int *restrict ind, + const int bcount) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + const int count_i = ci->hydro.count; + struct part *restrict parts_j = ci->hydro.parts; + struct xpart *restrict xparts_j = ci->hydro.xparts; + + /* Early abort? */ + if (count_i == 0) return; + + /* Loop over the parts in ci. */ + for (int bid = 0; bid < bcount; bid++) { + + /* Get a hold of the ith part in ci. */ + struct bpart *bi = &bparts[ind[bid]]; + const float bix[3] = {(float)(bi->x[0] - ci->loc[0]), + (float)(bi->x[1] - ci->loc[1]), + (float)(bi->x[2] - ci->loc[2])}; + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!bpart_is_active(bi, e)) error("Inactive particle in subset function!"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_i; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + + /* Early abort? */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]), + (float)(pj->x[1] - ci->loc[1]), + (float)(pj->x[2] - ci->loc[2])}; + float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_BH_GAS(r2, dx, hi, pj->h, bi, pj, xpj, cosmo, + e->gravity_properties, ti_current); + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Determine which version of DOSELF1_SUBSET_BH needs to be called + * depending on the optimisation level. + * + * @param r The #runner. + * @param ci The first #cell. + * @param bparts The #bpart to interact. + * @param ind The list of indices of particles in @c ci to interact with. + * @param bcount The number of particles in @c ind. + */ +void DOSELF1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci, + struct bpart *restrict bparts, int *restrict ind, + const int bcount) { + + DOSELF1_SUBSET_BH(r, ci, bparts, ind, bcount); +} + +/** + * @brief Determine which version of DOPAIR1_SUBSET_BH needs to be called + * depending on the orientation of the cells or whether DOPAIR1_SUBSET_BH + * needs to be called at all. + * + * @param r The #runner. + * @param ci The first #cell. + * @param bparts_i The #bpart to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param bcount The number of particles in @c ind. + * @param cj The second #cell. + */ +void DOPAIR1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci, + struct bpart *restrict bparts_i, + int *restrict ind, int const bcount, + struct cell *restrict cj) { + + const struct engine *e = r->e; + + /* Anything to do here? */ + if (cj->hydro.count == 0) return; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + DOPAIR1_SUBSET_BH_NAIVE(r, ci, bparts_i, ind, bcount, cj, shift); +} + +void DOSUB_SUBSET_BH(struct runner *r, struct cell *ci, struct bpart *bparts, + int *ind, const int bcount, struct cell *cj, + int gettimer) { + + const struct engine *e = r->e; + struct space *s = e->s; + + /* Should we even bother? */ + if (!cell_is_active_black_holes(ci, e) && + (cj == NULL || !cell_is_active_black_holes(cj, e))) + return; + + /* Find out in which sub-cell of ci the parts are. */ + struct cell *sub = NULL; + if (ci->split) { + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) { + if (&bparts[ind[0]] >= &ci->progeny[k]->black_holes.parts[0] && + &bparts[ind[0]] < + &ci->progeny[k] + ->black_holes.parts[ci->progeny[k]->black_holes.count]) { + sub = ci->progeny[k]; + break; + } + } + } + } + + /* Is this a single cell? */ + if (cj == NULL) { + + /* Recurse? */ + if (cell_can_recurse_in_self_black_holes_task(ci)) { + + /* Loop over all progeny. */ + DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, NULL, 0); + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != sub && ci->progeny[j] != NULL) + DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, ci->progeny[j], 0); + + } + + /* Otherwise, compute self-interaction. */ + else + DOSELF1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount); + } /* self-interaction. */ + + /* Otherwise, it's a pair interaction. */ + else { + + /* Recurse? */ + if (cell_can_recurse_in_pair_black_holes_task(ci, cj) && + cell_can_recurse_in_pair_black_holes_task(cj, ci)) { + + /* Get the type of pair and flip ci/cj if needed. */ + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(s, &ci, &cj, shift); + + struct cell_split_pair *csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) + DOSUB_SUBSET_BH(r, ci->progeny[pid], bparts, ind, bcount, + cj->progeny[pjd], 0); + if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) + DOSUB_SUBSET_BH(r, cj->progeny[pjd], bparts, ind, bcount, + ci->progeny[pid], 0); + } + } + + /* Otherwise, compute the pair directly. */ + else if (cell_is_active_black_holes(ci, e) && cj->hydro.count > 0) { + + /* Do any of the cells need to be drifted first? */ + if (cell_is_active_black_holes(ci, e)) { + if (!cell_are_bpart_drifted(ci, e)) error("Cell should be drifted!"); + if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); + } + + DOPAIR1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount, cj); + } + + } /* otherwise, pair interaction. */ +} + +/** + * @brief Determine which version of DOSELF1_BH needs to be called depending + * on the optimisation level. + * + * @param r #runner + * @param c #cell c + * + */ +void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c) { + + const struct engine *restrict e = r->e; + + /* Anything to do here? */ + if (c->black_holes.count == 0) return; + + /* Anything to do here? */ + if (!cell_is_active_black_holes(c, e)) return; + + /* Did we mess up the recursion? */ + if (c->black_holes.h_max_old * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); + + DOSELF1_BH(r, c, 1); +} + +/** + * @brief Determine which version of DOPAIR1_BH needs to be called depending + * on the orientation of the cells or whether DOPAIR1_BH needs to be called + * at all. + * + * @param r #runner + * @param ci #cell ci + * @param cj #cell cj + * + */ +void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *restrict e = r->e; + + const int ci_active = cell_is_active_black_holes(ci, e); + const int cj_active = cell_is_active_black_holes(cj, e); +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_bh = ci->nodeID == e->nodeID; + const int do_cj_bh = cj->nodeID == e->nodeID; +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + /* here we are updating the hydro -> switch ci, cj */ + const int do_ci_bh = cj->nodeID == e->nodeID; + const int do_cj_bh = ci->nodeID == e->nodeID; +#else + /* The swallow task is executed on both sides */ + const int do_ci_bh = 1; + const int do_cj_bh = 1; +#endif + + const int do_ci = (ci->black_holes.count != 0 && cj->hydro.count != 0 && + ci_active && do_ci_bh); + const int do_cj = (cj->black_holes.count != 0 && ci->hydro.count != 0 && + cj_active && do_cj_bh); + + /* Anything to do here? */ + if (!do_ci && !do_cj) return; + + /* Check that cells are drifted. */ + if (do_ci && + (!cell_are_bpart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) + error("Interacting undrifted cells."); + + if (do_cj && + (!cell_are_part_drifted(ci, e) || !cell_are_bpart_drifted(cj, e))) + error("Interacting undrifted cells."); + + /* No sorted intreactions here -> use the naive ones */ + DOPAIR1_BH_NAIVE(r, ci, cj, 1); +} + +/** + * @brief Compute grouped sub-cell interactions for pairs + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + * @param gettimer Do we have a timer ? + * + * @todo Hard-code the sid on the recursive calls to avoid the + * redundant computations to find the sid on-the-fly. + */ +void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer) { + + TIMER_TIC; + + struct space *s = r->e->s; + const struct engine *e = r->e; + + /* Should we even bother? + * In the swallow case we care about BH-BH and BH-gas + * interactions. + * In all other cases only BH-gas so we can abort if there is + * is no gas in the cell */ +#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) + const int should_do_ci = + ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e); + const int should_do_cj = + cj->black_holes.count != 0 && cell_is_active_black_holes(cj, e); +#else + const int should_do_ci = ci->black_holes.count != 0 && cj->hydro.count != 0 && + cell_is_active_black_holes(ci, e); + const int should_do_cj = cj->black_holes.count != 0 && ci->hydro.count != 0 && + cell_is_active_black_holes(cj, e); + +#endif + + if (!should_do_ci && !should_do_cj) return; + + /* Get the type of pair and flip ci/cj if needed. */ + double shift[3]; + const int sid = space_getsid(s, &ci, &cj, shift); + + /* Recurse? */ + if (cell_can_recurse_in_pair_black_holes_task(ci, cj) && + cell_can_recurse_in_pair_black_holes_task(cj, ci)) { + struct cell_split_pair *csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) + DOSUB_PAIR1_BH(r, ci->progeny[pid], cj->progeny[pjd], 0); + } + } + + /* Otherwise, compute the pair directly. */ + else { + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_bh = ci->nodeID == e->nodeID; + const int do_cj_bh = cj->nodeID == e->nodeID; +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + /* Here we are updating the hydro -> switch ci, cj */ + const int do_ci_bh = cj->nodeID == e->nodeID; + const int do_cj_bh = ci->nodeID == e->nodeID; +#else + /* Here we perform the task on both sides */ + const int do_ci_bh = 1; + const int do_cj_bh = 1; +#endif + + const int do_ci = ci->black_holes.count != 0 && + cell_is_active_black_holes(ci, e) && do_ci_bh; + const int do_cj = cj->black_holes.count != 0 && + cell_is_active_black_holes(cj, e) && do_cj_bh; + + if (do_ci) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_bpart_drifted(ci, e)) + error("Interacting undrifted cells (bparts)."); + + if (cj->hydro.count != 0 && !cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells (parts)."); + } + + if (do_cj) { + + /* Make sure both cells are drifted to the current timestep. */ + if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e)) + error("Interacting undrifted cells (parts)."); + + if (!cell_are_bpart_drifted(cj, e)) + error("Interacting undrifted cells (bparts)."); + } + + if (do_ci || do_cj) DOPAIR1_BRANCH_BH(r, ci, cj); + } + + TIMER_TOC(TIMER_DOSUB_PAIR_BH); +} + +/** + * @brief Compute grouped sub-cell interactions for self tasks + * + * @param r The #runner. + * @param ci The first #cell. + * @param gettimer Do we have a timer ? + */ +void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer) { + + TIMER_TIC; + + const struct engine *e = r->e; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) + error("This function should not be called on foreign cells"); +#endif + + /* Should we even bother? + * In the swallow case we care about BH-BH and BH-gas + * interactions. + * In all other cases only BH-gas so we can abort if there is + * is no gas in the cell */ +#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) + const int should_do_ci = + ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e); +#else + const int should_do_ci = ci->black_holes.count != 0 && ci->hydro.count != 0 && + cell_is_active_black_holes(ci, e); +#endif + + if (!should_do_ci) return; + + /* Recurse? */ + if (cell_can_recurse_in_self_black_holes_task(ci)) { + + /* Loop over all progeny. */ + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) { + DOSUB_SELF1_BH(r, ci->progeny[k], 0); + for (int j = k + 1; j < 8; j++) + if (ci->progeny[j] != NULL) + DOSUB_PAIR1_BH(r, ci->progeny[k], ci->progeny[j], 0); + } + } + + /* Otherwise, compute self-interaction. */ + else { + + /* Check we did drift to the current time */ + if (!cell_are_bpart_drifted(ci, e)) error("Interacting undrifted cell."); + + if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e)) + error("Interacting undrifted cells (bparts)."); + + DOSELF1_BRANCH_BH(r, ci); + } + + TIMER_TOC(TIMER_DOSUB_SELF_BH); +} diff --git a/src/runner_doiact.h b/src/runner_doiact_functions_hydro.h similarity index 96% rename from src/runner_doiact.h rename to src/runner_doiact_functions_hydro.h index 8aabb05d177385c6bbee1a91eb2ea231ccbca3e4..c324c759b5acc9db75cf0849d0e417b2141978f4 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact_functions_hydro.h @@ -24,106 +24,7 @@ and runner_dosub_FUNCTION calling the pairwise interaction function runner_iact_FUNCTION. */ -#define PASTE(x, y) x##_##y - -#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f) -#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION) - -#define _DOPAIR1(f) PASTE(runner_dopair1, f) -#define DOPAIR1 _DOPAIR1(FUNCTION) - -#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f) -#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION) - -#define _DOPAIR2(f) PASTE(runner_dopair2, f) -#define DOPAIR2 _DOPAIR2(FUNCTION) - -#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f) -#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION) - -#define _DOPAIR_SUBSET_BRANCH(f) PASTE(runner_dopair_subset_branch, f) -#define DOPAIR_SUBSET_BRANCH _DOPAIR_SUBSET_BRANCH(FUNCTION) - -#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f) -#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION) - -#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f) -#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION) - -#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f) -#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION) - -#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f) -#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION) - -#define _DOSELF1_NAIVE(f) PASTE(runner_doself1_naive, f) -#define DOSELF1_NAIVE _DOSELF1_NAIVE(FUNCTION) - -#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f) -#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION) - -#define _DOSELF1_BRANCH(f) PASTE(runner_doself1_branch, f) -#define DOSELF1_BRANCH _DOSELF1_BRANCH(FUNCTION) - -#define _DOSELF1(f) PASTE(runner_doself1, f) -#define DOSELF1 _DOSELF1(FUNCTION) - -#define _DOSELF2_BRANCH(f) PASTE(runner_doself2_branch, f) -#define DOSELF2_BRANCH _DOSELF2_BRANCH(FUNCTION) - -#define _DOSELF2(f) PASTE(runner_doself2, f) -#define DOSELF2 _DOSELF2(FUNCTION) - -#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f) -#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION) - -#define _DOSELF_SUBSET_BRANCH(f) PASTE(runner_doself_subset_branch, f) -#define DOSELF_SUBSET_BRANCH _DOSELF_SUBSET_BRANCH(FUNCTION) - -#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f) -#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION) - -#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f) -#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION) - -#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f) -#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION) - -#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f) -#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION) - -#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f) -#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION) - -#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f) -#define IACT_NONSYM _IACT_NONSYM(FUNCTION) - -#define _IACT(f) PASTE(runner_iact, f) -#define IACT _IACT(FUNCTION) - -#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f) -#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION) - -#define _IACT_VEC(f) PASTE(runner_iact_vec, f) -#define IACT_VEC _IACT_VEC(FUNCTION) - -#define _TIMER_DOSELF(f) PASTE(timer_doself, f) -#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION) - -#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f) -#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION) - -#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f) -#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION) - -#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f) -#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION) - -#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f) -#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION) - -#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f) -#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) +#include "runner_doiact_hydro.h" /** * @brief Compute the interactions between a cell pair (non-symmetric case). diff --git a/src/runner_doiact_functions_stars.h b/src/runner_doiact_functions_stars.h new file mode 100644 index 0000000000000000000000000000000000000000..b0d731857e9b4b0474e47c3ac3fca540eecb1cbb --- /dev/null +++ b/src/runner_doiact_functions_stars.h @@ -0,0 +1,1332 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Before including this file, define FUNCTION, which is the + name of the interaction function. This creates the interaction functions + runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION, + and runner_dosub_FUNCTION calling the pairwise interaction function + runner_iact_FUNCTION. */ + +#include "runner_doiact_stars.h" + +/** + * @brief Calculate the number density of #part around the #spart + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + TIMER_TIC; + + const struct engine *e = r->e; + const int with_cosmology = e->policy & engine_policy_cosmology; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Anything to do here? */ + if (c->hydro.count == 0 || c->stars.count == 0) return; + if (!cell_is_active_stars(c, e)) return; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int scount = c->stars.count; + const int count = c->hydro.count; + struct spart *restrict sparts = c->stars.parts; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + + /* Loop over the sparts in ci. */ + for (int sid = 0; sid < scount; sid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict si = &sparts[sid]; + + /* Skip inactive particles */ + if (!spart_is_active(si, e)) continue; + + /* Skip inactive particles */ + if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue; + + const float hi = si->h; + const float hig2 = hi * hi * kernel_gamma2; + const float six[3] = {(float)(si->x[0] - c->loc[0]), + (float)(si->x[1] - c->loc[1]), + (float)(si->x[2] - c->loc[2])}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts[pjd]; + struct xpart *restrict xpj = &xparts[pjd]; + const float hj = pj->h; + + /* Early abort? */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), + (float)(pj->x[1] - c->loc[1]), + (float)(pj->x[2] - c->loc[2])}; + float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, si, pj, a, H); +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo, + ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in ci. */ + } /* loop over the sparts in ci. */ + + TIMER_TOC(TIMER_DOSELF_STARS); +} + +/** + * @brief Calculate the number density of cj #part around the ci #spart + * + * @param r runner task + * @param ci The first #cell + * @param cj The second #cell + */ +void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj) { + +#ifdef SWIFT_DEBUG_CHECKS +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#else + if (cj->nodeID != engine_rank) error("Should be run on a different node"); +#endif +#endif + + const struct engine *e = r->e; + const int with_cosmology = e->policy & engine_policy_cosmology; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Anything to do here? */ + if (cj->hydro.count == 0 || ci->stars.count == 0) return; + if (!cell_is_active_stars(ci, e)) return; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int scount_i = ci->stars.count; + const int count_j = cj->hydro.count; + struct spart *restrict sparts_i = ci->stars.parts; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Loop over the sparts in ci. */ + for (int sid = 0; sid < scount_i; sid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict si = &sparts_i[sid]; + + /* Skip inactive particles */ + if (!spart_is_active(si, e)) continue; + + /* Skip inactive particles */ + if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue; + + const float hi = si->h; + const float hig2 = hi * hi * kernel_gamma2; + const float six[3] = {(float)(si->x[0] - (cj->loc[0] + shift[0])), + (float)(si->x[1] - (cj->loc[1] + shift[1])), + (float)(si->x[2] - (cj->loc[2] + shift[2]))}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + const float hj = pj->h; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), + (float)(pj->x[1] - cj->loc[1]), + (float)(pj->x[2] - cj->loc[2])}; + float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, si, pj, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo, + ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Compute the interactions between a cell pair. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + * @param sid The direction of the pair. + * @param shift The shift vector to apply to the particles in ci. + */ +void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, + const int sid, const double *shift) { + + TIMER_TIC; + + const struct engine *e = r->e; + const int with_cosmology = e->policy & engine_policy_cosmology; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + /* Get the cutoff shift. */ + double rshift = 0.0; + for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_stars = (ci->nodeID == e->nodeID) && (ci->stars.count != 0) && + (cj->hydro.count != 0) && cell_is_active_stars(ci, e); + const int do_cj_stars = (cj->nodeID == e->nodeID) && (cj->stars.count != 0) && + (ci->hydro.count != 0) && cell_is_active_stars(cj, e); +#else + /* here we are updating the hydro -> switch ci, cj for local */ + const int do_ci_stars = (cj->nodeID == e->nodeID) && (ci->stars.count != 0) && + (cj->hydro.count != 0) && cell_is_active_stars(ci, e); + const int do_cj_stars = (ci->nodeID == e->nodeID) && (cj->stars.count != 0) && + (ci->hydro.count != 0) && cell_is_active_stars(cj, e); +#endif + + if (do_ci_stars) { + + /* Pick-out the sorted lists. */ + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; + const struct sort_entry *restrict sort_i = ci->stars.sort[sid]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Some constants used to checks that the parts are in the right frame */ + const float shift_threshold_x = + 2. * ci->width[0] + + 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); + const float shift_threshold_y = + 2. * ci->width[1] + + 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); + const float shift_threshold_z = + 2. * ci->width[2] + + 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); +#endif /* SWIFT_DEBUG_CHECKS */ + + /* Get some other useful values. */ + const double hi_max = ci->stars.h_max * kernel_gamma - rshift; + const int count_i = ci->stars.count; + const int count_j = cj->hydro.count; + struct spart *restrict sparts_i = ci->stars.parts; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + const double dj_min = sort_j[0].d; + const float dx_max_rshift = + (ci->stars.dx_max_sort + cj->hydro.dx_max_sort) - rshift; + const float dx_max = (ci->stars.dx_max_sort + cj->hydro.dx_max_sort); + + /* Loop over the sparts in ci. */ + for (int pid = count_i - 1; + pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { + + /* Get a hold of the ith part in ci. */ + struct spart *restrict spi = &sparts_i[sort_i[pid].i]; + const float hi = spi->h; + + /* Skip inactive particles */ + if (!spart_is_active(spi, e)) continue; + + /* Skip inactive particles */ + if (!feedback_is_active(spi, e->time, cosmo, with_cosmology)) continue; + + /* Compute distance from the other cell. */ + const double px[3] = {spi->x[0], spi->x[1], spi->x[2]}; + float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] + + px[2] * runner_shift[sid][2]; + + /* Is there anything we need to interact with ? */ + const double di = dist + hi * kernel_gamma + dx_max_rshift; + if (di < dj_min) continue; + + /* Get some additional information about pi */ + const float hig2 = hi * hi * kernel_gamma2; + const float pix = spi->x[0] - (cj->loc[0] + shift[0]); + const float piy = spi->x[1] - (cj->loc[1] + shift[1]); + const float piz = spi->x[2] - (cj->loc[2] + shift[2]); + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { + + /* Recover pj */ + struct part *pj = &parts_j[sort_j[pjd].i]; + struct xpart *xpj = &xparts_j[sort_j[pjd].i]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + + const float hj = pj->h; + const float pjx = pj->x[0] - cj->loc[0]; + const float pjy = pj->x[1] - cj->loc[1]; + const float pjz = pj->x[2] - cj->loc[2]; + + /* Compute the pairwise distance. */ + float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles are in the correct frame after the shifts */ + if (pix > shift_threshold_x || pix < -shift_threshold_x) + error( + "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", + pix, ci->width[0]); + if (piy > shift_threshold_y || piy < -shift_threshold_y) + error( + "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", + piy, ci->width[1]); + if (piz > shift_threshold_z || piz < -shift_threshold_z) + error( + "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", + piz, ci->width[2]); + if (pjx > shift_threshold_x || pjx < -shift_threshold_x) + error( + "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", + pjx, ci->width[0]); + if (pjy > shift_threshold_y || pjy < -shift_threshold_y) + error( + "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", + pjy, ci->width[1]); + if (pjz > shift_threshold_z || pjz < -shift_threshold_z) + error( + "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", + pjz, ci->width[2]); + + /* Check that particles have been drifted to the current time */ + if (spi->ti_drift != e->ti_current) + error("Particle spi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, + cosmo, ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ + } /* do_ci_stars */ + + if (do_cj_stars) { + /* Pick-out the sorted lists. */ + const struct sort_entry *restrict sort_i = ci->hydro.sort[sid]; + const struct sort_entry *restrict sort_j = cj->stars.sort[sid]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Some constants used to checks that the parts are in the right frame */ + const float shift_threshold_x = + 2. * ci->width[0] + + 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); + const float shift_threshold_y = + 2. * ci->width[1] + + 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); + const float shift_threshold_z = + 2. * ci->width[2] + + 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); +#endif /* SWIFT_DEBUG_CHECKS */ + + /* Get some other useful values. */ + const double hj_max = cj->hydro.h_max * kernel_gamma; + const int count_i = ci->hydro.count; + const int count_j = cj->stars.count; + struct part *restrict parts_i = ci->hydro.parts; + struct xpart *restrict xparts_i = ci->hydro.xparts; + struct spart *restrict sparts_j = cj->stars.parts; + const double di_max = sort_i[count_i - 1].d - rshift; + const float dx_max_rshift = + (ci->hydro.dx_max_sort + cj->stars.dx_max_sort) + rshift; + const float dx_max = (ci->hydro.dx_max_sort + cj->stars.dx_max_sort); + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; + pjd++) { + + /* Get a hold of the jth part in cj. */ + struct spart *spj = &sparts_j[sort_j[pjd].i]; + const float hj = spj->h; + + /* Skip inactive particles */ + if (!spart_is_active(spj, e)) continue; + + /* Skip inactive particles */ + if (!feedback_is_active(spj, e->time, cosmo, with_cosmology)) continue; + + /* Compute distance from the other cell. */ + const double px[3] = {spj->x[0], spj->x[1], spj->x[2]}; + float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] + + px[2] * runner_shift[sid][2]; + + /* Is there anything we need to interact with ? */ + const double dj = dist - hj * kernel_gamma - dx_max_rshift; + if (dj - rshift > di_max) continue; + + /* Get some additional information about pj */ + const float hjg2 = hj * hj * kernel_gamma2; + const float pjx = spj->x[0] - cj->loc[0]; + const float pjy = spj->x[1] - cj->loc[1]; + const float pjz = spj->x[2] - cj->loc[2]; + + /* Loop over the parts in ci. */ + for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { + + /* Recover pi */ + struct part *pi = &parts_i[sort_i[pid].i]; + struct xpart *xpi = &xparts_i[sort_i[pid].i]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pi, e)) continue; + + const float hi = pi->h; + const float pix = pi->x[0] - (cj->loc[0] + shift[0]); + const float piy = pi->x[1] - (cj->loc[1] + shift[1]); + const float piz = pi->x[2] - (cj->loc[2] + shift[2]); + + /* Compute the pairwise distance. */ + float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles are in the correct frame after the shifts */ + if (pix > shift_threshold_x || pix < -shift_threshold_x) + error( + "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", + pix, ci->width[0]); + if (piy > shift_threshold_y || piy < -shift_threshold_y) + error( + "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", + piy, ci->width[1]); + if (piz > shift_threshold_z || piz < -shift_threshold_z) + error( + "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", + piz, ci->width[2]); + if (pjx > shift_threshold_x || pjx < -shift_threshold_x) + error( + "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", + pjx, ci->width[0]); + if (pjy > shift_threshold_y || pjy < -shift_threshold_y) + error( + "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", + pjy, ci->width[1]); + if (pjz > shift_threshold_z || pjz < -shift_threshold_z) + error( + "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", + pjz, ci->width[2]); + + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (spj->ti_drift != e->ti_current) + error("Particle spj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hjg2) { + + IACT_STARS(r2, dx, hj, hi, spj, pi, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hj, hi, spj, pi, xpi, + cosmo, ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hj, hi, spj, pi, xpi, cosmo, + ti_current); +#endif + } + } /* loop over the parts in ci. */ + } /* loop over the parts in cj. */ + } /* Cell cj is active */ + + TIMER_TOC(TIMER_DOPAIR_STARS); +} + +void DOPAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj, int timer) { + + TIMER_TIC; + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_stars = ci->nodeID == r->e->nodeID; + const int do_cj_stars = cj->nodeID == r->e->nodeID; +#else + /* here we are updating the hydro -> switch ci, cj */ + const int do_ci_stars = cj->nodeID == r->e->nodeID; + const int do_cj_stars = ci->nodeID == r->e->nodeID; +#endif + if (do_ci_stars && ci->stars.count != 0 && cj->hydro.count != 0) + DO_NONSYM_PAIR1_STARS_NAIVE(r, ci, cj); + if (do_cj_stars && cj->stars.count != 0 && ci->hydro.count != 0) + DO_NONSYM_PAIR1_STARS_NAIVE(r, cj, ci); + + TIMER_TOC(TIMER_DOPAIR_STARS); +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * Version using a brute-force algorithm. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts_i The #part to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + * @param cj The second #cell. + * @param sid The direction of the pair. + * @param flipped Flag to check whether the cells have been flipped or not. + * @param shift The shift vector to apply to the particles in ci. + */ +void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts_i, int *restrict ind, + int scount, struct cell *restrict cj, const int sid, + const int flipped, const double *shift) { + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int count_j = cj->hydro.count; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + + /* Early abort? */ + if (count_j == 0) return; + + /* Pick-out the sorted lists. */ + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; + const float dxj = cj->hydro.dx_max_sort; + + /* Sparts are on the left? */ + if (!flipped) { + + /* Loop over the sparts_i. */ + for (int pid = 0; pid < scount; pid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict spi = &sparts_i[ind[pid]]; + const double pix = spi->x[0] - (shift[0]); + const double piy = spi->x[1] - (shift[1]); + const double piz = spi->x[2] - (shift[2]); + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] + + piy * runner_shift[sid][1] + piz * runner_shift[sid][2]; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[sort_j[pjd].i]; + struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), + (float)(piz - pjz)}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (spi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, + cosmo, ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the sparts in ci. */ + } + + /* Sparts are on the right. */ + else { + + /* Loop over the sparts_i. */ + for (int pid = 0; pid < scount; pid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict spi = &sparts_i[ind[pid]]; + const double pix = spi->x[0] - (shift[0]); + const double piy = spi->x[1] - (shift[1]); + const double piz = spi->x[2] - (shift[2]); + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] + + piy * runner_shift[sid][1] + piz * runner_shift[sid][2]; + + /* Loop over the parts in cj. */ + for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[sort_j[pjd].i]; + struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), + (float)(piz - pjz)}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (spi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, + cosmo, ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the sparts in ci. */ + } +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * Version using a brute-force algorithm. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts_i The #part to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + * @param cj The second #cell. + * @param shift The shift vector to apply to the particles in ci. + */ +void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts_i, + int *restrict ind, int scount, + struct cell *restrict cj, const double *shift) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int count_j = cj->hydro.count; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + + /* Early abort? */ + if (count_j == 0) return; + + /* Loop over the parts_i. */ + for (int pid = 0; pid < scount; pid++) { + + /* Get a hold of the ith part in ci. */ + struct spart *restrict spi = &sparts_i[ind[pid]]; + + const double pix = spi->x[0] - (shift[0]); + const double piy = spi->x[1] - (shift[1]); + const double piz = spi->x[2] - (shift[2]); + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!spart_is_active(spi, e)) + error("Trying to correct smoothing length of inactive particle !"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + + /* Skip inhibited particles */ + if (part_is_inhibited(pj, e)) continue; + + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), + (float)(piz - pjz)}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, cosmo, + ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts The #spart to interact. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + */ +void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts, int *restrict ind, + int scount) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int count_i = ci->hydro.count; + struct part *restrict parts_j = ci->hydro.parts; + struct xpart *restrict xparts_j = ci->hydro.xparts; + + /* Early abort? */ + if (count_i == 0) return; + + /* Loop over the parts in ci. */ + for (int spid = 0; spid < scount; spid++) { + + /* Get a hold of the ith part in ci. */ + struct spart *spi = &sparts[ind[spid]]; + const float spix[3] = {(float)(spi->x[0] - ci->loc[0]), + (float)(spi->x[1] - ci->loc[1]), + (float)(spi->x[2] - ci->loc[2])}; + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!spart_is_active(spi, e)) + error("Inactive particle in subset function!"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_i; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + + /* Early abort? */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]), + (float)(pj->x[1] - ci->loc[1]), + (float)(pj->x[2] - ci->loc[2])}; + float dx[3] = {spix[0] - pjx[0], spix[1] - pjx[1], spix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, pj->h, spi, pj, a, H); +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, pj->h, spi, pj, xpj, + cosmo, ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, pj->h, spi, pj, xpj, + cosmo, ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Determine which version of DOSELF1_SUBSET_STARS needs to be called + * depending on the optimisation level. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts The #spart to interact. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + */ +void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts, + int *restrict ind, int scount) { + + DOSELF1_SUBSET_STARS(r, ci, sparts, ind, scount); +} + +/** + * @brief Determine which version of DOPAIR1_SUBSET_STARS needs to be called + * depending on the orientation of the cells or whether DOPAIR1_SUBSET_STARS + * needs to be called at all. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts_i The #spart to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + * @param cj The second #cell. + */ +void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts_i, + int *restrict ind, int scount, + struct cell *restrict cj) { + + const struct engine *e = r->e; + + /* Anything to do here? */ + if (cj->hydro.count == 0) return; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + +#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS + DOPAIR1_SUBSET_STARS_NAIVE(r, ci, sparts_i, ind, scount, cj, shift); +#else + /* Get the sorting index. */ + int sid = 0; + for (int k = 0; k < 3; k++) + sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0) + ? 0 + : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1); + + /* Switch the cells around? */ + const int flipped = runner_flip[sid]; + sid = sortlistID[sid]; + + /* Has the cell cj been sorted? */ + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) + error("Interacting unsorted cells."); + + DOPAIR1_SUBSET_STARS(r, ci, sparts_i, ind, scount, cj, sid, flipped, shift); +#endif +} + +void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts, + int *ind, int scount, struct cell *cj, int gettimer) { + + const struct engine *e = r->e; + struct space *s = e->s; + + /* Should we even bother? */ + if (!cell_is_active_stars(ci, e) && + (cj == NULL || !cell_is_active_stars(cj, e))) + return; + + /* Find out in which sub-cell of ci the parts are. */ + struct cell *sub = NULL; + if (ci->split) { + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) { + if (&sparts[ind[0]] >= &ci->progeny[k]->stars.parts[0] && + &sparts[ind[0]] < + &ci->progeny[k]->stars.parts[ci->progeny[k]->stars.count]) { + sub = ci->progeny[k]; + break; + } + } + } + } + + /* Is this a single cell? */ + if (cj == NULL) { + + /* Recurse? */ + if (cell_can_recurse_in_self_stars_task(ci)) { + + /* Loop over all progeny. */ + DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, NULL, 0); + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != sub && ci->progeny[j] != NULL) + DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, ci->progeny[j], 0); + + } + + /* Otherwise, compute self-interaction. */ + else + DOSELF1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount); + } /* self-interaction. */ + + /* Otherwise, it's a pair interaction. */ + else { + + /* Recurse? */ + if (cell_can_recurse_in_pair_stars_task(ci, cj) && + cell_can_recurse_in_pair_stars_task(cj, ci)) { + + /* Get the type of pair and flip ci/cj if needed. */ + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(s, &ci, &cj, shift); + + struct cell_split_pair *csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[pid], sparts, ind, scount, + cj->progeny[pjd], 0); + if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[pjd], sparts, ind, scount, + ci->progeny[pid], 0); + } + } + + /* Otherwise, compute the pair directly. */ + else if (cell_is_active_stars(ci, e) && cj->hydro.count > 0) { + + /* Do any of the cells need to be drifted first? */ + if (cell_is_active_stars(ci, e)) { + if (!cell_are_spart_drifted(ci, e)) error("Cell should be drifted!"); + if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); + } + + DOPAIR1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount, cj); + } + + } /* otherwise, pair interaction. */ +} + +/** + * @brief Determine which version of DOSELF1_STARS needs to be called depending + * on the optimisation level. + * + * @param r #runner + * @param c #cell c + * + */ +void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) { + + const struct engine *restrict e = r->e; + + /* Anything to do here? */ + if (c->stars.count == 0) return; + + /* Anything to do here? */ + if (!cell_is_active_stars(c, e)) return; + + /* Did we mess up the recursion? */ + if (c->stars.h_max_old * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); + + DOSELF1_STARS(r, c, 1); +} + +#define RUNNER_CHECK_SORT(TYPE, PART, cj, ci, sid) \ + ({ \ + const struct sort_entry *restrict sort_j = cj->TYPE.sort[sid]; \ + \ + for (int pjd = 0; pjd < cj->TYPE.count; pjd++) { \ + const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i]; \ + if (PART##_is_inhibited(p, e)) continue; \ + \ + const float d = p->x[0] * runner_shift[sid][0] + \ + p->x[1] * runner_shift[sid][1] + \ + p->x[2] * runner_shift[sid][2]; \ + if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ + 1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) && \ + (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ + cj->width[0] * 1.0e-10) \ + error( \ + "particle shift diff exceeds dx_max_sort in cell cj. " \ + "cj->nodeID=%d " \ + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE \ + ".dx_max_sort=%e " \ + "cj->" #TYPE \ + ".dx_max_sort_old=%e, cellID=%i super->cellID=%i" \ + "cj->depth=%d cj->maxdepth=%d", \ + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \ + cj->TYPE.dx_max_sort_old, cj->cellID, cj->hydro.super->cellID, \ + cj->depth, cj->maxdepth); \ + } \ + }) + +/** + * @brief Determine which version of DOPAIR1_STARS needs to be called depending + * on the orientation of the cells or whether DOPAIR1_STARS needs to be called + * at all. + * + * @param r #runner + * @param ci #cell ci + * @param cj #cell cj + * + */ +void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *restrict e = r->e; + + /* Get the sort ID. */ + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(e->s, &ci, &cj, shift); + + const int ci_active = cell_is_active_stars(ci, e); + const int cj_active = cell_is_active_stars(cj, e); +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_stars = ci->nodeID == e->nodeID; + const int do_cj_stars = cj->nodeID == e->nodeID; +#else + /* here we are updating the hydro -> switch ci, cj */ + const int do_ci_stars = cj->nodeID == e->nodeID; + const int do_cj_stars = ci->nodeID == e->nodeID; +#endif + const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 && + ci_active && do_ci_stars); + const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 && + cj_active && do_cj_stars); + + /* Anything to do here? */ + if (!do_ci && !do_cj) return; + + /* Check that cells are drifted. */ + if (do_ci && + (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) + error("Interacting undrifted cells."); + + /* Have the cells been sorted? */ + if (do_ci && (!(ci->stars.sorted & (1 << sid)) || + ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin)) + error("Interacting unsorted cells."); + + if (do_ci && (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)) + error("Interacting unsorted cells."); + + if (do_cj && + (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e))) + error("Interacting undrifted cells."); + + /* Have the cells been sorted? */ + if (do_cj && (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin)) + error("Interacting unsorted cells."); + + if (do_cj && (!(cj->stars.sorted & (1 << sid)) || + cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin)) + error("Interacting unsorted cells."); + +#ifdef SWIFT_DEBUG_CHECKS + if (do_ci) { + // MATTHIEU: This test is faulty. To be fixed... + // RUNNER_CHECK_SORT(hydro, part, cj, ci, sid); + RUNNER_CHECK_SORT(stars, spart, ci, cj, sid); + } + + if (do_cj) { + // MATTHIEU: This test is faulty. To be fixed... + // RUNNER_CHECK_SORT(hydro, part, ci, cj, sid); + RUNNER_CHECK_SORT(stars, spart, cj, ci, sid); + } +#endif /* SWIFT_DEBUG_CHECKS */ + +#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS + DOPAIR1_STARS_NAIVE(r, ci, cj, 1); +#else + DO_SYM_PAIR1_STARS(r, ci, cj, sid, shift); +#endif +} + +/** + * @brief Compute grouped sub-cell interactions for pairs + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + * @param gettimer Do we have a timer ? + * + * @todo Hard-code the sid on the recursive calls to avoid the + * redundant computations to find the sid on-the-fly. + */ +void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer) { + + TIMER_TIC; + + struct space *s = r->e->s; + const struct engine *e = r->e; + + /* Should we even bother? */ + const int should_do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e); + const int should_do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e); + if (!should_do_ci && !should_do_cj) return; + + /* Get the type of pair and flip ci/cj if needed. */ + double shift[3]; + const int sid = space_getsid(s, &ci, &cj, shift); + + /* Recurse? */ + if (cell_can_recurse_in_pair_stars_task(ci, cj) && + cell_can_recurse_in_pair_stars_task(cj, ci)) { + struct cell_split_pair *csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[pid], cj->progeny[pjd], 0); + } + } + + /* Otherwise, compute the pair directly. */ + else { + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_stars = ci->nodeID == e->nodeID; + const int do_cj_stars = cj->nodeID == e->nodeID; +#else + /* here we are updating the hydro -> switch ci, cj */ + const int do_ci_stars = cj->nodeID == e->nodeID; + const int do_cj_stars = ci->nodeID == e->nodeID; +#endif + const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e) && do_ci_stars; + const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e) && do_cj_stars; + + if (do_ci) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_spart_drifted(ci, e)) + error("Interacting undrifted cells (sparts)."); + + if (!cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells (parts)."); + + /* Do any of the cells need to be sorted first? */ + if (!(ci->stars.sorted & (1 << sid)) || + ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) { + error("Interacting unsorted cell (sparts)."); + } + + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cell (parts). %i", cj->nodeID); + } + + if (do_cj) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_part_drifted(ci, e)) + error("Interacting undrifted cells (parts)."); + + if (!cell_are_spart_drifted(cj, e)) + error("Interacting undrifted cells (sparts)."); + + /* Do any of the cells need to be sorted first? */ + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { + error("Interacting unsorted cell (parts)."); + } + + if (!(cj->stars.sorted & (1 << sid)) || + cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) { + error("Interacting unsorted cell (sparts)."); + } + } + + if (do_ci || do_cj) DOPAIR1_BRANCH_STARS(r, ci, cj); + } + + TIMER_TOC(TIMER_DOSUB_PAIR_STARS); +} + +/** + * @brief Compute grouped sub-cell interactions for self tasks + * + * @param r The #runner. + * @param ci The first #cell. + * @param gettimer Do we have a timer ? + */ +void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer) { + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) + error("This function should not be called on foreign cells"); +#endif + + /* Should we even bother? */ + if (ci->hydro.count == 0 || ci->stars.count == 0 || + !cell_is_active_stars(ci, r->e)) + return; + + /* Recurse? */ + if (cell_can_recurse_in_self_stars_task(ci)) { + + /* Loop over all progeny. */ + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) { + DOSUB_SELF1_STARS(r, ci->progeny[k], 0); + for (int j = k + 1; j < 8; j++) + if (ci->progeny[j] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[k], ci->progeny[j], 0); + } + } + + /* Otherwise, compute self-interaction. */ + else { + + /* Drift the cell to the current timestep if needed. */ + if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell."); + + DOSELF1_BRANCH_STARS(r, ci); + } + + TIMER_TOC(TIMER_DOSUB_SELF_STARS); +} diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c new file mode 100644 index 0000000000000000000000000000000000000000..d4b71b7e94ad1d5731cd81747e296a0aed05e520 --- /dev/null +++ b/src/runner_doiact_grav.c @@ -0,0 +1,1824 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "../config.h" + +/* This object's header. */ +#include "runner_doiact_grav.h" + +/* Local includes. */ +#include "active.h" +#include "cell.h" +#include "gravity.h" +#include "gravity_cache.h" +#include "gravity_iact.h" +#include "inline.h" +#include "part.h" +#include "space_getsid.h" +#include "timers.h" + +/** + * @brief Recursively propagate the multipoles down the tree by applying the + * L2L and L2P kernels. + * + * @param r The #runner. + * @param c The #cell we are working on. + * @param timer Are we timing this ? + */ +void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { + + /* Some constants */ + const struct engine *e = r->e; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->grav.ti_old_multipole != e->ti_current) + error("c->multipole not drifted."); + if (c->grav.multipole->pot.ti_init != e->ti_current) + error("c->field tensor not initialised"); +#endif + + if (c->split) { + + /* Node case */ + + /* Add the field-tensor to all the 8 progenitors */ + for (int k = 0; k < 8; ++k) { + struct cell *cp = c->progeny[k]; + + /* Do we have a progenitor with any active g-particles ? */ + if (cp != NULL && cell_is_active_gravity(cp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + if (cp->grav.ti_old_multipole != e->ti_current) + error("cp->multipole not drifted."); + if (cp->grav.multipole->pot.ti_init != e->ti_current) + error("cp->field tensor not initialised"); +#endif + /* If the tensor received any contribution, push it down */ + if (c->grav.multipole->pot.interacted) { + + struct grav_tensor shifted_tensor; + + /* Shift the field tensor */ + gravity_L2L(&shifted_tensor, &c->grav.multipole->pot, + cp->grav.multipole->CoM, c->grav.multipole->CoM); + + /* Add it to this level's tensor */ + gravity_field_tensors_add(&cp->grav.multipole->pot, &shifted_tensor); + } + + /* Recurse */ + runner_do_grav_down(r, cp, 0); + } + } + + } else { + + /* Leaf case */ + + /* We can abort early if no interactions via multipole happened */ + if (!c->grav.multipole->pot.interacted) return; + + if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); + + /* Cell properties */ + struct gpart *gparts = c->grav.parts; + const int gcount = c->grav.count; + const struct grav_tensor *pot = &c->grav.multipole->pot; + const double CoM[3] = {c->grav.multipole->CoM[0], c->grav.multipole->CoM[1], + c->grav.multipole->CoM[2]}; + + /* Apply accelerations to the particles */ + for (int i = 0; i < gcount; ++i) { + + /* Get a handle on the gpart */ + struct gpart *gp = &gparts[i]; + + /* Update if active */ + if (gpart_is_active(gp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gp->ti_drift != e->ti_current) + error("gpart not drifted to current time"); + if (c->grav.multipole->pot.ti_init != e->ti_current) + error("c->field tensor not initialised"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(gp, e)) error("Updating an inhibited particle!"); + + /* Check that the particle was initialised */ + if (gp->initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + /* Apply the kernel */ + gravity_L2P(pot, CoM, gp); + } + } + } + + if (timer) TIMER_TOC(timer_dograv_down); +} + +/** + * @brief Compute the non-truncated gravity interactions between all particles + * of a cell and the particles of the other cell. + * + * The calculation is performed non-symmetrically using the pre-filled + * #gravity_cache structures. The loop over the j cache should auto-vectorize. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param cj_cache #gravity_cache contaning the source particles. + * @param gcount_i The number of particles in the cell i. + * @param gcount_padded_j The number of particles in the cell j padded to the + * vector length. + * @param periodic Is the calculation using periodic BCs ? + * @param dim The size of the simulation volume. + * + * @param e The #engine (for debugging checks only). + * @param gparts_i The #gpart in cell i (for debugging checks only). + * @param gparts_j The #gpart in cell j (for debugging checks only). + * @param gcount_j The number of particles in the cell j (for debugging checks + * only). + */ +static INLINE void runner_dopair_grav_pp_full( + struct gravity_cache *restrict ci_cache, + struct gravity_cache *restrict cj_cache, const int gcount_i, + const int gcount_j, const int gcount_padded_j, const int periodic, + const float dim[3], const struct engine *restrict e, + struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_i; pid++) { + + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; + + /* Skip particle that can use the multipole */ + if (ci_cache->use_mpole[pid]) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (!gpart_is_active(&gparts_i[pid], e)) + error("Inactive particle went through the cache"); +#endif + + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; + const float h_i = ci_cache->epsilon[pid]; + + /* Local accumulators for the acceleration and potential */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; + + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_j, VEC_SIZE); + + /* Loop over every particle in the other cell. */ + for (int pjd = 0; pjd < gcount_padded_j; pjd++) { + + /* Get info about j */ + const float x_j = cj_cache->x[pjd]; + const float y_j = cj_cache->y[pjd]; + const float z_j = cj_cache->z[pjd]; + const float mass_j = cj_cache->m[pjd]; + const float h_j = cj_cache->epsilon[pjd]; + + /* Compute the pairwise distance. */ + float dx = x_j - x_i; + float dy = y_j - y_i; + float dz = z_j - z_i; + + /* Correct for periodic BCs */ + if (periodic) { + dx = nearestf(dx, dim[0]); + dy = nearestf(dy, dim[1]); + dz = nearestf(dz, dim[2]); + } + + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Pick the maximal softening length of i and j */ + const float h = max(h_i, h_j); + const float h2 = h * h; + const float h_inv = 1.f / h; + const float h_inv_3 = h_inv * h_inv * h_inv; + +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f && h2 == 0.) + error("Interacting particles with 0 distance and 0 softening."); + + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts_j[pjd], e)) + error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && + mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + + /* Interact! */ + float f_ij, pot_ij; + runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij); + + /* Store it back */ + a_x += f_ij * dx; + a_y += f_ij * dy; + a_z += f_ij * dz; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) + gparts_i[pid].num_interacted++; +#endif + } + + /* Store everything back in cache */ + ci_cache->a_x[pid] += a_x; + ci_cache->a_y[pid] += a_y; + ci_cache->a_z[pid] += a_z; + ci_cache->pot[pid] += pot; + } +} + +/** + * @brief Compute the truncated gravity interactions between all particles + * of a cell and the particles of the other cell. + * + * The calculation is performed non-symmetrically using the pre-filled + * #gravity_cache structures. The loop over the j cache should auto-vectorize. + * + * This function only makes sense in periodic BCs. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param cj_cache #gravity_cache contaning the source particles. + * @param gcount_i The number of particles in the cell i. + * @param gcount_padded_j The number of particles in the cell j padded to the + * vector length. + * @param dim The size of the simulation volume. + * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. + * + * @param e The #engine (for debugging checks only). + * @param gparts_i The #gpart in cell i (for debugging checks only). + * @param gparts_j The #gpart in cell j (for debugging checks only). + * @param gcount_j The number of particles in the cell j (for debugging checks + * only). + */ +static INLINE void runner_dopair_grav_pp_truncated( + struct gravity_cache *restrict ci_cache, + struct gravity_cache *restrict cj_cache, const int gcount_i, + const int gcount_j, const int gcount_padded_j, const float dim[3], + const float r_s_inv, const struct engine *restrict e, + struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { + +#ifdef SWIFT_DEBUG_CHECKS + if (!e->s->periodic) + error("Calling truncated PP function in non-periodic setup."); +#endif + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_i; pid++) { + + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; + + /* Skip particle that can use the multipole */ + if (ci_cache->use_mpole[pid]) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (!gpart_is_active(&gparts_i[pid], e)) + error("Inactive particle went through the cache"); +#endif + + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; + const float h_i = ci_cache->epsilon[pid]; + + /* Local accumulators for the acceleration and potential */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; + + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_j, VEC_SIZE); + + /* Loop over every particle in the other cell. */ + for (int pjd = 0; pjd < gcount_padded_j; pjd++) { + + /* Get info about j */ + const float x_j = cj_cache->x[pjd]; + const float y_j = cj_cache->y[pjd]; + const float z_j = cj_cache->z[pjd]; + const float mass_j = cj_cache->m[pjd]; + const float h_j = cj_cache->epsilon[pjd]; + + /* Compute the pairwise distance. */ + float dx = x_j - x_i; + float dy = y_j - y_i; + float dz = z_j - z_i; + + /* Correct for periodic BCs */ + dx = nearestf(dx, dim[0]); + dy = nearestf(dy, dim[1]); + dz = nearestf(dz, dim[2]); + + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Pick the maximal softening length of i and j */ + const float h = max(h_i, h_j); + const float h2 = h * h; + const float h_inv = 1.f / h; + const float h_inv_3 = h_inv * h_inv * h_inv; + +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f && h2 == 0.) + error("Interacting particles with 0 distance and 0 softening."); + + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts_j[pjd], e)) + error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && + mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + + /* Interact! */ + float f_ij, pot_ij; + runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, + &f_ij, &pot_ij); + + /* Store it back */ + a_x += f_ij * dx; + a_y += f_ij * dy; + a_z += f_ij * dz; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) + gparts_i[pid].num_interacted++; +#endif + } + + /* Store everything back in cache */ + ci_cache->a_x[pid] += a_x; + ci_cache->a_y[pid] += a_y; + ci_cache->a_z[pid] += a_z; + ci_cache->pot[pid] += pot; + } +} + +/** + * @brief Compute the gravity interactions between all particles + * of a cell and the multipole of the other cell. + * + * The calculation is performedusing the pre-filled + * #gravity_cache structure. The loop over the i cache should auto-vectorize. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param gcount_padded_i The number of particles in the cell i padded to the + * vector length. + * @param CoM_j Position of the #multipole in #cell j. + * @param multi_j The #multipole in #cell j. + * @param periodic Is the calculation using periodic BCs ? + * @param dim The size of the simulation volume. + * + * @param e The #engine (for debugging checks only). + * @param gparts_i The #gpart in cell i (for debugging checks only). + * @param gcount_i The number of particles in the cell i (for debugging checks + * only). + * @param cj The #cell j (for debugging checks only). + */ +static INLINE void runner_dopair_grav_pm_full( + struct gravity_cache *ci_cache, const int gcount_padded_i, + const float CoM_j[3], const struct multipole *restrict multi_j, + const int periodic, const float dim[3], const struct engine *restrict e, + struct gpart *restrict gparts_i, const int gcount_i, + const struct cell *restrict cj) { + + /* Make the compiler understand we are in happy vectorization land */ + swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, ci_cache->active, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, + SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_i, VEC_SIZE); + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_padded_i; pid++) { + + /* Skip inactive particles */ + if (!active[pid]) continue; + + /* Skip particle that cannot use the multipole */ + if (!use_mpole[pid]) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) + error("Active particle went through the cache"); + + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); + + if (pid >= gcount_i) error("Adding forces to padded particle"); +#endif + + const float x_i = x[pid]; + const float y_i = y[pid]; + const float z_i = z[pid]; + + /* Some powers of the softening length */ + const float h_i = epsilon[pid]; + const float h_inv_i = 1.f / h_i; + + /* Distance to the Multipole */ + float dx = CoM_j[0] - x_i; + float dy = CoM_j[1] - y_i; + float dz = CoM_j[2] - z_i; + + /* Apply periodic BCs? */ + if (periodic) { + dx = nearestf(dx, dim[0]); + dy = nearestf(dy, dim[1]); + dz = nearestf(dz, dim[2]); + } + + const float r2 = dx * dx + dy * dy + dz * dz; + +#ifdef SWIFT_DEBUG_CHECKS + const float r_max_j = cj->grav.multipole->r_max; + const float r_max2 = r_max_j * r_max_j; + const float theta_crit2 = e->gravity_properties->theta_crit2; + + /* Note: 0.99 and 1.1 to avoid FP rounding false-positives */ + if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i)) + error( + "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " + "%e], rmax=%e r=%e epsilon=%e", + CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j, sqrtf(r2), h_i); +#endif + + /* Interact! */ + float f_x, f_y, f_z, pot_ij; + runner_iact_grav_pm_full(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, + &f_z, &pot_ij); + + /* Store it back */ + a_x[pid] += f_x; + a_y[pid] += f_y; + a_z[pid] += f_z; + pot[pid] += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter */ + if (pid < gcount_i) + gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; +#endif + } +} + +/** + * @brief Compute the gravity interactions between all particles + * of a cell and the multipole of the other cell. + * + * The calculation is performedusing the pre-filled + * #gravity_cache structure. The loop over the i cache should auto-vectorize. + * + * This function only makes sense in periodic BCs. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param gcount_padded_i The number of particles in the cell i padded to the + * vector length. + * @param CoM_j Position of the #multipole in #cell j. + * @param multi_j The #multipole in #cell j. + * @param dim The size of the simulation volume. + * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. + * + * @param e The #engine (for debugging checks only). + * @param gparts_i The #gpart in cell i (for debugging checks only). + * @param gcount_i The number of particles in the cell i (for debugging checks + * only). + * @param cj The #cell j (for debugging checks only). + */ +static INLINE void runner_dopair_grav_pm_truncated( + struct gravity_cache *ci_cache, const int gcount_padded_i, + const float CoM_j[3], const struct multipole *restrict multi_j, + const float dim[3], const float r_s_inv, const struct engine *restrict e, + struct gpart *restrict gparts_i, const int gcount_i, + const struct cell *restrict cj) { + +#ifdef SWIFT_DEBUG_CHECKS + if (!e->s->periodic) + error("Calling truncated PP function in non-periodic setup."); +#endif + + /* Make the compiler understand we are in happy vectorization land */ + swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, ci_cache->active, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, + SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_i, VEC_SIZE); + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_padded_i; pid++) { + + /* Skip inactive particles */ + if (!active[pid]) continue; + + /* Skip particle that cannot use the multipole */ + if (!use_mpole[pid]) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) + error("Active particle went through the cache"); + + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); + + if (pid >= gcount_i) error("Adding forces to padded particle"); +#endif + + const float x_i = x[pid]; + const float y_i = y[pid]; + const float z_i = z[pid]; + + /* Some powers of the softening length */ + const float h_i = epsilon[pid]; + const float h_inv_i = 1.f / h_i; + + /* Distance to the Multipole */ + float dx = CoM_j[0] - x_i; + float dy = CoM_j[1] - y_i; + float dz = CoM_j[2] - z_i; + + /* Apply periodic BCs */ + dx = nearestf(dx, dim[0]); + dy = nearestf(dy, dim[1]); + dz = nearestf(dz, dim[2]); + + const float r2 = dx * dx + dy * dy + dz * dz; + +#ifdef SWIFT_DEBUG_CHECKS + const float r_max_j = cj->grav.multipole->r_max; + const float r_max2 = r_max_j * r_max_j; + const float theta_crit2 = e->gravity_properties->theta_crit2; + + /* 0.99 and 1.1 to avoid FP rounding false-positives */ + if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i)) + error( + "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " + "%e], rmax=%e", + CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j); +#endif + + /* Interact! */ + float f_x, f_y, f_z, pot_ij; + runner_iact_grav_pm_truncated(dx, dy, dz, r2, h_i, h_inv_i, r_s_inv, + multi_j, &f_x, &f_y, &f_z, &pot_ij); + + /* Store it back */ + a_x[pid] += f_x; + a_y[pid] += f_y; + a_z[pid] += f_z; + pot[pid] += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter */ + if (pid < gcount_i) + gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; +#endif + } +} + +/** + * @brief Computes the interaction of all the particles in a cell with all the + * particles of another cell. + * + * This function switches between the full potential and the truncated one + * depending on needs. It will also use the M2P (multipole) interaction + * for the subset of particles in either cell for which the distance criterion + * is valid. + * + * This function starts by constructing the require #gravity_cache for both + * cells and then call the specialised functions doing the actual work on + * the caches. It then write the data back to the particles. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The other #cell. + * @param symmetric Are we updating both cells (1) or just ci (0) ? + * @param allow_mpole Are we allowing the use of P2M interactions ? + */ +void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, + const int symmetric, const int allow_mpole) { + + /* Recover some useful constants */ + const struct engine *e = r->e; + const int periodic = e->mesh->periodic; + const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], + (float)e->mesh->dim[2]}; + const float r_s_inv = e->mesh->r_s_inv; + const double min_trunc = e->mesh->r_cut_min; + + TIMER_TIC; + + /* Record activity status */ + const int ci_active = + cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID); + const int cj_active = + cell_is_active_gravity(cj, e) && (cj->nodeID == e->nodeID); + + /* Anything to do here? */ + if (!ci_active && !cj_active) return; + if (!ci_active && !symmetric) return; + + /* Check that we are not doing something stupid */ + if (ci->split || cj->split) error("Running P-P on splitable cells"); + + /* Let's start by checking things are drifted */ + if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts"); + if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts"); + if (cj_active && ci->grav.ti_old_multipole != e->ti_current) + error("Un-drifted multipole"); + if (ci_active && cj->grav.ti_old_multipole != e->ti_current) + error("Un-drifted multipole"); + + /* Caches to play with */ + struct gravity_cache *const ci_cache = &r->ci_gravity_cache; + struct gravity_cache *const cj_cache = &r->cj_gravity_cache; + + /* Shift to apply to the particles in each cell */ + const double shift_i[3] = {0., 0., 0.}; + const double shift_j[3] = {0., 0., 0.}; + + /* Recover the multipole info and shift the CoM locations */ + const float rmax_i = ci->grav.multipole->r_max; + const float rmax_j = cj->grav.multipole->r_max; + const float rmax2_i = rmax_i * rmax_i; + const float rmax2_j = rmax_j * rmax_j; + const struct multipole *multi_i = &ci->grav.multipole->m_pole; + const struct multipole *multi_j = &cj->grav.multipole->m_pole; + const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]), + (float)(ci->grav.multipole->CoM[1] - shift_i[1]), + (float)(ci->grav.multipole->CoM[2] - shift_i[2])}; + const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0] - shift_j[0]), + (float)(cj->grav.multipole->CoM[1] - shift_j[1]), + (float)(cj->grav.multipole->CoM[2] - shift_j[2])}; + + /* Start by constructing particle caches */ + + /* Computed the padded counts */ + const int gcount_i = ci->grav.count; + const int gcount_j = cj->grav.count; + const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; + const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we fit in cache */ + if (gcount_i > ci_cache->count || gcount_j > cj_cache->count) + error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i, + gcount_j); +#endif + + /* Fill the caches */ + gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, + ci_cache, ci->grav.parts, gcount_i, gcount_padded_i, + shift_i, CoM_j, rmax2_j, ci, e->gravity_properties); + gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, + cj_cache, cj->grav.parts, gcount_j, gcount_padded_j, + shift_j, CoM_i, rmax2_i, cj, e->gravity_properties); + + /* Can we use the Newtonian version or do we need the truncated one ? */ + if (!periodic) { + + /* Not periodic -> Can always use Newtonian potential */ + + /* Let's updated the active cell(s) only */ + if (ci_active) { + + /* First the P2P */ + runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, + gcount_padded_j, periodic, dim, e, + ci->grav.parts, cj->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, + periodic, dim, e, ci->grav.parts, gcount_i, + cj); + } + if (cj_active && symmetric) { + + /* First the P2P */ + runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, + gcount_padded_i, periodic, dim, e, + cj->grav.parts, ci->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, + periodic, dim, e, cj->grav.parts, gcount_j, + ci); + } + + } else { /* Periodic BC */ + + /* Get the relative distance between the CoMs */ + const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1], + CoM_j[2] - CoM_i[2]}; + const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + + /* Get the maximal distance between any two particles */ + const double max_r = sqrt(r2) + rmax_i + rmax_j; + + /* Do we need to use the truncated interactions ? */ + if (max_r > min_trunc) { + + /* Periodic but far-away cells must use the truncated potential */ + + /* Let's updated the active cell(s) only */ + if (ci_active) { + + /* First the (truncated) P2P */ + runner_dopair_grav_pp_truncated(ci_cache, cj_cache, gcount_i, gcount_j, + gcount_padded_j, dim, r_s_inv, e, + ci->grav.parts, cj->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, + multi_j, dim, r_s_inv, e, + ci->grav.parts, gcount_i, cj); + } + if (cj_active && symmetric) { + + /* First the (truncated) P2P */ + runner_dopair_grav_pp_truncated(cj_cache, ci_cache, gcount_j, gcount_i, + gcount_padded_i, dim, r_s_inv, e, + cj->grav.parts, ci->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i, + multi_i, dim, r_s_inv, e, + cj->grav.parts, gcount_j, ci); + } + + } else { + + /* Periodic but close-by cells can use the full Newtonian potential */ + + /* Let's updated the active cell(s) only */ + if (ci_active) { + + /* First the (Newtonian) P2P */ + runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, + gcount_padded_j, periodic, dim, e, + ci->grav.parts, cj->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, + periodic, dim, e, ci->grav.parts, gcount_i, + cj); + } + if (cj_active && symmetric) { + + /* First the (Newtonian) P2P */ + runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, + gcount_padded_i, periodic, dim, e, + cj->grav.parts, ci->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, + periodic, dim, e, cj->grav.parts, gcount_j, + ci); + } + } + } + + /* Write back to the particles */ + if (ci_active) gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); + if (cj_active && symmetric) + gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j); + + TIMER_TOC(timer_dopair_grav_pp); +} + +/** + * @brief Compute the non-truncated gravity interactions between all particles + * of a cell and the particles of the other cell. + * + * The calculation is performed non-symmetrically using the pre-filled + * #gravity_cache structures. The loop over the j cache should auto-vectorize. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param gcount The number of particles in the cell. + * @param gcount_padded The number of particles in the cell padded to the + * vector length. + * + * @param e The #engine (for debugging checks only). + * @param gparts The #gpart in the cell (for debugging checks only). + */ +static INLINE void runner_doself_grav_pp_full( + struct gravity_cache *restrict ci_cache, const int gcount, + const int gcount_padded, const struct engine *e, struct gpart *gparts) { + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount; pid++) { + + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; + + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; + const float h_i = ci_cache->epsilon[pid]; + + /* Local accumulators for the acceleration */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; + + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded, VEC_SIZE); + + /* Loop over every other particle in the cell. */ + for (int pjd = 0; pjd < gcount_padded; pjd++) { + + /* No self interaction */ + if (pid == pjd) continue; + + /* Get info about j */ + const float x_j = ci_cache->x[pjd]; + const float y_j = ci_cache->y[pjd]; + const float z_j = ci_cache->z[pjd]; + const float mass_j = ci_cache->m[pjd]; + const float h_j = ci_cache->epsilon[pjd]; + + /* Compute the pairwise (square) distance. */ + /* Note: no need for periodic wrapping inside a cell */ + const float dx = x_j - x_i; + const float dy = y_j - y_i; + const float dz = z_j - z_i; + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Pick the maximal softening length of i and j */ + const float h = max(h_i, h_j); + const float h2 = h * h; + const float h_inv = 1.f / h; + const float h_inv_3 = h_inv * h_inv * h_inv; + +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f && h2 == 0.) + error("Interacting particles with 0 distance and 0 softening."); + + /* Check that particles have been drifted to the current time */ + if (gparts[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts[pjd], e)) + error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + + /* Interact! */ + float f_ij, pot_ij; + runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij); + + /* Store it back */ + a_x += f_ij * dx; + a_y += f_ij * dy; + a_z += f_ij * dz; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) + gparts[pid].num_interacted++; +#endif + } + + /* Store everything back in cache */ + ci_cache->a_x[pid] += a_x; + ci_cache->a_y[pid] += a_y; + ci_cache->a_z[pid] += a_z; + ci_cache->pot[pid] += pot; + } +} + +/** + * @brief Compute the truncated gravity interactions between all particles + * of a cell and the particles of the other cell. + * + * The calculation is performed non-symmetrically using the pre-filled + * #gravity_cache structures. The loop over the j cache should auto-vectorize. + * + * This function only makes sense in periodic BCs. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param gcount The number of particles in the cell. + * @param gcount_padded The number of particles in the cell padded to the + * vector length. + * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. + * + * @param e The #engine (for debugging checks only). + * @param gparts The #gpart in the cell (for debugging checks only). + */ +static INLINE void runner_doself_grav_pp_truncated( + struct gravity_cache *restrict ci_cache, const int gcount, + const int gcount_padded, const float r_s_inv, const struct engine *e, + struct gpart *gparts) { + +#ifdef SWIFT_DEBUG_CHECKS + if (!e->s->periodic) + error("Calling truncated PP function in non-periodic setup."); +#endif + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount; pid++) { + + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; + + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; + const float h_i = ci_cache->epsilon[pid]; + + /* Local accumulators for the acceleration and potential */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; + + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded, VEC_SIZE); + + /* Loop over every other particle in the cell. */ + for (int pjd = 0; pjd < gcount_padded; pjd++) { + + /* No self interaction */ + if (pid == pjd) continue; + + /* Get info about j */ + const float x_j = ci_cache->x[pjd]; + const float y_j = ci_cache->y[pjd]; + const float z_j = ci_cache->z[pjd]; + const float mass_j = ci_cache->m[pjd]; + const float h_j = ci_cache->epsilon[pjd]; + + /* Compute the pairwise (square) distance. */ + /* Note: no need for periodic wrapping inside a cell */ + const float dx = x_j - x_i; + const float dy = y_j - y_i; + const float dz = z_j - z_i; + + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Pick the maximal softening length of i and j */ + const float h = max(h_i, h_j); + const float h2 = h * h; + const float h_inv = 1.f / h; + const float h_inv_3 = h_inv * h_inv * h_inv; + +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f && h2 == 0.) + error("Interacting particles with 0 distance and 0 softening."); + + /* Check that particles have been drifted to the current time */ + if (gparts[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts[pjd], e)) + error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + + /* Interact! */ + float f_ij, pot_ij; + runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, + &f_ij, &pot_ij); + + /* Store it back */ + a_x += f_ij * dx; + a_y += f_ij * dy; + a_z += f_ij * dz; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) + gparts[pid].num_interacted++; +#endif + } + + /* Store everything back in cache */ + ci_cache->a_x[pid] += a_x; + ci_cache->a_y[pid] += a_y; + ci_cache->a_z[pid] += a_z; + ci_cache->pot[pid] += pot; + } +} + +/** + * @brief Computes the interaction of all the particles in a cell with all the + * other ones. + * + * This function switches between the full potential and the truncated one + * depending on needs. + * + * This function starts by constructing the require #gravity_cache for the + * cell and then call the specialised functions doing the actual work on + * the cache. It then write the data back to the particles. + * + * @param r The #runner. + * @param c The #cell. + */ +void runner_doself_grav_pp(struct runner *r, struct cell *c) { + + /* Recover some useful constants */ + const struct engine *e = r->e; + const int periodic = e->mesh->periodic; + const float r_s_inv = e->mesh->r_s_inv; + const double min_trunc = e->mesh->r_cut_min; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); +#endif + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* Check that we are not doing something stupid */ + if (c->split) error("Running P-P on a splitable cell"); + + /* Do we need to start by drifting things ? */ + if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); + + /* Start by constructing a cache for the particles */ + struct gravity_cache *const ci_cache = &r->ci_gravity_cache; + + /* Shift to apply to the particles in the cell */ + const double loc[3] = {c->loc[0] + 0.5 * c->width[0], + c->loc[1] + 0.5 * c->width[1], + c->loc[2] + 0.5 * c->width[2]}; + + /* Computed the padded counts */ + const int gcount = c->grav.count; + const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we fit in cache */ + if (gcount > ci_cache->count) + error("Not enough space in the cache! gcount=%d", gcount); +#endif + + /* Fill the cache */ + gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, c->grav.parts, + gcount, gcount_padded, loc, c, + e->gravity_properties); + + /* Can we use the Newtonian version or do we need the truncated one ? */ + if (!periodic) { + + /* Not periodic -> Can always use Newtonian potential */ + runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, + c->grav.parts); + + } else { + + /* Get the maximal distance between any two particles */ + const double max_r = 2. * c->grav.multipole->r_max; + + /* Do we need to use the truncated interactions ? */ + if (max_r > min_trunc) { + + /* Periodic but far-away cells must use the truncated potential */ + runner_doself_grav_pp_truncated(ci_cache, gcount, gcount_padded, r_s_inv, + e, c->grav.parts); + + } else { + + /* Periodic but close-by cells can use the full Newtonian potential */ + runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, + c->grav.parts); + } + } + + /* Write back to the particles */ + gravity_cache_write_back(ci_cache, c->grav.parts, gcount); + + TIMER_TOC(timer_doself_grav_pp); +} + +/** + * @brief Computes the interaction of the field tensor and multipole + * of two cells symmetrically. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ +static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, + struct cell *restrict ci, + struct cell *restrict cj) { + + /* Some constants */ + const struct engine *e = r->e; + const struct gravity_props *props = e->gravity_properties; + const int periodic = e->mesh->periodic; + const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const float r_s_inv = e->mesh->r_s_inv; + + TIMER_TIC; + + /* Anything to do here? */ + if ((!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) || + (!cell_is_active_gravity_mm(cj, e) || cj->nodeID != engine_rank)) + error("Invalid state in symmetric M-M calculation!"); + + /* Short-cut to the multipole */ + const struct multipole *multi_i = &ci->grav.multipole->m_pole; + const struct multipole *multi_j = &cj->grav.multipole->m_pole; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci == cj) error("Interacting a cell with itself using M2L"); + + if (multi_i->num_gpart == 0) + error("Multipole i does not seem to have been set."); + + if (multi_j->num_gpart == 0) + error("Multipole j does not seem to have been set."); + + if (ci->grav.multipole->pot.ti_init != e->ti_current) + error("ci->grav tensor not initialised."); + + if (ci->grav.multipole->pot.ti_init != e->ti_current) + error("cj->grav tensor not initialised."); + + if (ci->grav.ti_old_multipole != e->ti_current) + error( + "Undrifted multipole ci->grav.ti_old_multipole=%lld ci->nodeID=%d " + "cj->nodeID=%d e->ti_current=%lld", + ci->grav.ti_old_multipole, ci->nodeID, cj->nodeID, e->ti_current); + + if (cj->grav.ti_old_multipole != e->ti_current) + error( + "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " + "ci->nodeID=%d e->ti_current=%lld", + cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); +#endif + + /* Let's interact at this level */ + gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot, + multi_i, multi_j, ci->grav.multipole->CoM, + cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); + + TIMER_TOC(timer_dopair_grav_mm); +} + +/** + * @brief Computes the interaction of the field tensor in a cell with the + * multipole of another cell. + * + * @param r The #runner. + * @param ci The #cell with field tensor to interact. + * @param cj The #cell with the multipole. + */ +static INLINE void runner_dopair_grav_mm_nonsym( + struct runner *r, struct cell *restrict ci, + const struct cell *restrict cj) { + + /* Some constants */ + const struct engine *e = r->e; + const struct gravity_props *props = e->gravity_properties; + const int periodic = e->mesh->periodic; + const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const float r_s_inv = e->mesh->r_s_inv; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return; + + /* Short-cut to the multipole */ + const struct multipole *multi_j = &cj->grav.multipole->m_pole; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci == cj) error("Interacting a cell with itself using M2L"); + + if (multi_j->num_gpart == 0) + error("Multipole does not seem to have been set."); + + if (ci->grav.multipole->pot.ti_init != e->ti_current) + error("ci->grav tensor not initialised."); + + if (cj->grav.ti_old_multipole != e->ti_current) + error( + "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " + "ci->nodeID=%d e->ti_current=%lld", + cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); +#endif + + /* Let's interact at this level */ + gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM, + cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); + + TIMER_TOC(timer_dopair_grav_mm); +} + +/** + * @brief Call the M-M calculation on two cells if active. + * + * @param r The #runner object. + * @param ci The first #cell. + * @param cj The second #cell. + */ +static INLINE void runner_dopair_grav_mm(struct runner *r, + struct cell *restrict ci, + struct cell *restrict cj) { + + const struct engine *e = r->e; + + /* What do we need to do? */ + const int do_i = + cell_is_active_gravity_mm(ci, e) && (ci->nodeID == e->nodeID); + const int do_j = + cell_is_active_gravity_mm(cj, e) && (cj->nodeID == e->nodeID); + + /* Do we need drifting first? */ + if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); + if (cj->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e); + + /* Interact! */ + if (do_i && do_j) + runner_dopair_grav_mm_symmetric(r, ci, cj); + else if (do_i) + runner_dopair_grav_mm_nonsym(r, ci, cj); + else if (do_j) + runner_dopair_grav_mm_nonsym(r, cj, ci); +} + +/** + * @brief Computes all the M-M interactions between all the well-separated (at + * rebuild) pairs of progenies of the two cells. + * + * @param r The #runner thread. + * @param flags The task flag containing the list of well-separated pairs as a + * bit-field. + * @param ci The first #cell. + * @param cj The second #cell. + */ +void runner_dopair_grav_mm_progenies(struct runner *r, const long long flags, + struct cell *restrict ci, + struct cell *restrict cj) { + + /* Loop over all pairs of progenies */ + for (int i = 0; i < 8; i++) { + if (ci->progeny[i] != NULL) { + for (int j = 0; j < 8; j++) { + if (cj->progeny[j] != NULL) { + + struct cell *cpi = ci->progeny[i]; + struct cell *cpj = cj->progeny[j]; + + const int flag = i * 8 + j; + + /* Did we agree to use an M-M interaction here at the last rebuild? */ + if (flags & (1ULL << flag)) runner_dopair_grav_mm(r, cpi, cpj); + } + } + } + } +} + +static INLINE void runner_dopair_recursive_grav_pm(struct runner *r, + struct cell *ci, + const struct cell *cj) { + /* Some constants */ + const struct engine *e = r->e; + const int periodic = e->mesh->periodic; + const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], + (float)e->mesh->dim[2]}; + const float r_s_inv = e->mesh->r_s_inv; + + /* Anything to do here? */ + if (!(cell_is_active_gravity(ci, e) && ci->nodeID == e->nodeID)) return; + +#ifdef SWIFT_DEBUG_CHECKS + /* Early abort? */ + if (ci->grav.count == 0 || cj->grav.count == 0) + error("Doing pair gravity on an empty cell !"); + + /* Sanity check */ + if (ci == cj) error("Pair interaction between a cell and itself."); + + if (cj->grav.ti_old_multipole != e->ti_current) + error("cj->grav.multipole not drifted."); +#endif + + /* Can we recurse further? */ + if (ci->split) { + + /* Loop over ci's children */ + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + runner_dopair_recursive_grav_pm(r, ci->progeny[k], cj); + } + + /* Ok, let's do the interaction here */ + } else { + + /* Start by constructing particle caches */ + + /* Cache to play with */ + struct gravity_cache *const ci_cache = &r->ci_gravity_cache; + + /* Computed the padded counts */ + const int gcount_i = ci->grav.count; + const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we fit in cache */ + if (gcount_i > ci_cache->count) + error("Not enough space in the cache! gcount_i=%d", gcount_i); +#endif + + /* Recover the multipole info and the CoM locations */ + const struct multipole *multi_j = &cj->grav.multipole->m_pole; + const float r_max = cj->grav.multipole->r_max; + const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]), + (float)(cj->grav.multipole->CoM[1]), + (float)(cj->grav.multipole->CoM[2])}; + + /* Fill the cache */ + gravity_cache_populate_all_mpole( + e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i, + gcount_padded_i, ci, CoM_j, r_max * r_max, e->gravity_properties); + + /* Can we use the Newtonian version or do we need the truncated one ? */ + if (!periodic) { + + runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, + periodic, dim, e, ci->grav.parts, gcount_i, + cj); + + } else { + + runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j, + dim, r_s_inv, e, ci->grav.parts, gcount_i, + cj); + } + + /* Write back to the particles */ + gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); + } +} + +/** + * @brief Computes the interaction of all the particles in a cell with all the + * particles of another cell. + * + * This function will try to recurse as far down the tree as possible and only + * default to direct summation if there is no better option. + * + * If using periodic BCs, we will abort the recursion if th distance between the + * cells is larger than the set threshold. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The other #cell. + * @param gettimer Are we timing this ? + */ +void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, + struct cell *cj, int gettimer) { + + /* Some constants */ + const struct engine *e = r->e; + const int nodeID = e->nodeID; + const int periodic = e->mesh->periodic; + const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const double theta_crit2 = e->gravity_properties->theta_crit2; + const double max_distance = e->mesh->r_cut_max; + + /* Anything to do here? */ + if (!((cell_is_active_gravity(ci, e) && ci->nodeID == nodeID) || + (cell_is_active_gravity(cj, e) && cj->nodeID == nodeID))) + return; + +#ifdef SWIFT_DEBUG_CHECKS + + const int gcount_i = ci->grav.count; + const int gcount_j = cj->grav.count; + + /* Early abort? */ + if (gcount_i == 0 || gcount_j == 0) + error("Doing pair gravity on an empty cell !"); + + /* Sanity check */ + if (ci == cj) error("Pair interaction between a cell and itself."); + + if (cell_is_active_gravity(ci, e) && + ci->grav.ti_old_multipole != e->ti_current) + error("ci->grav.multipole not drifted."); + if (cell_is_active_gravity(cj, e) && + cj->grav.ti_old_multipole != e->ti_current) + error("cj->grav.multipole not drifted."); +#endif + + TIMER_TIC; + + /* Recover the multipole information */ + struct gravity_tensors *const multi_i = ci->grav.multipole; + struct gravity_tensors *const multi_j = cj->grav.multipole; + + /* Get the distance between the CoMs */ + double dx = multi_i->CoM[0] - multi_j->CoM[0]; + double dy = multi_i->CoM[1] - multi_j->CoM[1]; + double dz = multi_i->CoM[2] - multi_j->CoM[2]; + + /* Apply BC */ + if (periodic) { + dx = nearest(dx, dim[0]); + dy = nearest(dy, dim[1]); + dz = nearest(dz, dim[2]); + } + const double r2 = dx * dx + dy * dy + dz * dz; + + /* Minimal distance between any 2 particles in the two cells */ + const double r_lr_check = sqrt(r2) - (multi_i->r_max + multi_j->r_max); + + /* Are we beyond the distance where the truncated forces are 0? */ + if (periodic && r_lr_check > max_distance) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Need to account for the interactions we missed */ + if (cell_is_active_gravity(ci, e)) + multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; + if (cell_is_active_gravity(cj, e)) + multi_j->pot.num_interacted += multi_i->m_pole.num_gpart; +#endif + return; + } + + /* OK, we actually need to compute this pair. Let's find the cheapest + * option... */ + + /* Can we use M-M interactions ? */ + if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2, + multi_i->m_pole.max_softening, + multi_j->m_pole.max_softening)) { + + /* Go M-M */ + runner_dopair_grav_mm(r, ci, cj); + + } else if (!ci->split && !cj->split) { + + /* We have two leaves. Go P-P. */ + runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles*/ 1); + + } else { + + /* Alright, we'll have to split and recurse. */ + /* We know at least one of ci and cj is splittable */ + + const double ri_max = multi_i->r_max; + const double rj_max = multi_j->r_max; + + /* Split the larger of the two cells and start over again */ + if (ri_max > rj_max) { + + /* Can we actually split that interaction ? */ + if (ci->split) { + + /* Loop over ci's children */ + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0); + } + + } else { + /* cj is split */ + + /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ + + /* Loop over cj's children */ + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) + runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0); + } + } + } else { + + /* Can we actually split that interaction ? */ + if (cj->split) { + + /* Loop over cj's children */ + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) + runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0); + } + + } else { + /* ci is split */ + + /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ + + /* Loop over ci's children */ + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0); + } + } + } + } + + if (gettimer) TIMER_TOC(timer_dosub_pair_grav); +} + +/** + * @brief Computes the interaction of all the particles in a cell. + * + * This function will try to recurse as far down the tree as possible and only + * default to direct summation if there is no better option. + * + * @param r The #runner. + * @param c The first #cell. + * @param gettimer Are we timing this ? + */ +void runner_doself_recursive_grav(struct runner *r, struct cell *c, + int gettimer) { + + /* Some constants */ + const struct engine *e = r->e; + +#ifdef SWIFT_DEBUG_CHECKS + /* Early abort? */ + if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); +#endif + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* If the cell is split, interact each progeny with itself, and with + each of its siblings. */ + if (c->split) { + + for (int j = 0; j < 8; j++) { + if (c->progeny[j] != NULL) { + + runner_doself_recursive_grav(r, c->progeny[j], 0); + + for (int k = j + 1; k < 8; k++) { + if (c->progeny[k] != NULL) { + + runner_dopair_recursive_grav(r, c->progeny[j], c->progeny[k], 0); + } + } + } + } + } + + /* If the cell is not split, then just go for it... */ + else { + + runner_doself_grav_pp(r, c); + } + + if (gettimer) TIMER_TOC(timer_dosub_self_grav); +} + +/** + * @brief Performs all M-M interactions between a given top-level cell and all + * the other top-levels that are far enough. + * + * @param r The thread #runner. + * @param ci The #cell of interest. + * @param timer Are we timing this ? + */ +void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { + + /* Some constants */ + const struct engine *e = r->e; + const int periodic = e->mesh->periodic; + const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const double theta_crit2 = e->gravity_properties->theta_crit2; + const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max; + + TIMER_TIC; + + /* Recover the list of top-level cells */ + struct cell *cells = e->s->cells_top; + int *cells_with_particles = e->s->cells_with_particles_top; + const int nr_cells_with_particles = e->s->nr_cells_with_particles; + + /* Anything to do here? */ + if (!cell_is_active_gravity(ci, e)) return; + + if (ci->nodeID != engine_rank) + error("Non-local cell in long-range gravity task!"); + + /* Check multipole has been drifted */ + if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); + + /* Get this cell's multipole information */ + struct gravity_tensors *const multi_i = ci->grav.multipole; + + /* Find this cell's top-level (great-)parent */ + struct cell *top = ci; + while (top->parent != NULL) top = top->parent; + + /* Recover the top-level multipole (for distance checks) */ + struct gravity_tensors *const multi_top = top->grav.multipole; + const double CoM_rebuild_top[3] = {multi_top->CoM_rebuild[0], + multi_top->CoM_rebuild[1], + multi_top->CoM_rebuild[2]}; + + /* Loop over all the top-level cells and go for a M-M interaction if + * well-separated */ + for (int n = 0; n < nr_cells_with_particles; ++n) { + + /* Handle on the top-level cell and it's gravity business*/ + const struct cell *cj = &cells[cells_with_particles[n]]; + const struct gravity_tensors *const multi_j = cj->grav.multipole; + + /* Avoid self contributions */ + if (top == cj) continue; + + /* Skip empty cells */ + if (multi_j->m_pole.M_000 == 0.f) continue; + + /* Can we escape early in the periodic BC case? */ + if (periodic) { + + /* Minimal distance between any pair of particles */ + const double min_radius2 = + cell_min_dist2_same_size(top, cj, periodic, dim); + + /* Are we beyond the distance where the truncated forces are 0 ?*/ + if (min_radius2 > max_distance2) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Need to account for the interactions we missed */ + multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; +#endif + + /* Record that this multipole received a contribution */ + multi_i->pot.interacted = 1; + + /* We are done here. */ + continue; + } + } + + /* Get the distance between the CoMs at the last rebuild*/ + double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0]; + double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1]; + double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2]; + + /* Apply BC */ + if (periodic) { + dx_r = nearest(dx_r, dim[0]); + dy_r = nearest(dy_r, dim[1]); + dz_r = nearest(dz_r, dim[2]); + } + const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r; + + /* Are we in charge of this cell pair? */ + if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild, + theta_crit2, r2_rebuild, + multi_top->m_pole.max_softening, + multi_j->m_pole.max_softening)) { + + /* Call the PM interaction fucntion on the active sub-cells of ci */ + runner_dopair_grav_mm_nonsym(r, ci, cj); + // runner_dopair_recursive_grav_pm(r, ci, cj); + + /* Record that this multipole received a contribution */ + multi_i->pot.interacted = 1; + + } /* We are in charge of this pair */ + } /* Loop over top-level cells */ + + if (timer) TIMER_TOC(timer_dograv_long_range); +} diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index b4ee8225a7aada8cf595ae7bca251d61b5226f64..34f3e9ec147574357620cc8f485889b87880f06e 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -20,1810 +20,30 @@ #ifndef SWIFT_RUNNER_DOIACT_GRAV_H #define SWIFT_RUNNER_DOIACT_GRAV_H -/* Includes. */ -#include "active.h" -#include "cell.h" -#include "gravity.h" -#include "gravity_cache.h" -#include "gravity_iact.h" -#include "inline.h" -#include "part.h" -#include "space_getsid.h" -#include "timers.h" +#include "../config.h" -/** - * @brief Recursively propagate the multipoles down the tree by applying the - * L2L and L2P kernels. - * - * @param r The #runner. - * @param c The #cell we are working on. - * @param timer Are we timing this ? - */ -static INLINE void runner_do_grav_down(struct runner *r, struct cell *c, - int timer) { - - /* Some constants */ - const struct engine *e = r->e; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->grav.ti_old_multipole != e->ti_current) - error("c->multipole not drifted."); - if (c->grav.multipole->pot.ti_init != e->ti_current) - error("c->field tensor not initialised"); -#endif - - if (c->split) { - - /* Node case */ - - /* Add the field-tensor to all the 8 progenitors */ - for (int k = 0; k < 8; ++k) { - struct cell *cp = c->progeny[k]; - - /* Do we have a progenitor with any active g-particles ? */ - if (cp != NULL && cell_is_active_gravity(cp, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - if (cp->grav.ti_old_multipole != e->ti_current) - error("cp->multipole not drifted."); - if (cp->grav.multipole->pot.ti_init != e->ti_current) - error("cp->field tensor not initialised"); -#endif - /* If the tensor received any contribution, push it down */ - if (c->grav.multipole->pot.interacted) { - - struct grav_tensor shifted_tensor; - - /* Shift the field tensor */ - gravity_L2L(&shifted_tensor, &c->grav.multipole->pot, - cp->grav.multipole->CoM, c->grav.multipole->CoM); - - /* Add it to this level's tensor */ - gravity_field_tensors_add(&cp->grav.multipole->pot, &shifted_tensor); - } - - /* Recurse */ - runner_do_grav_down(r, cp, 0); - } - } - - } else { - - /* Leaf case */ - - /* We can abort early if no interactions via multipole happened */ - if (!c->grav.multipole->pot.interacted) return; - - if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); - - /* Cell properties */ - struct gpart *gparts = c->grav.parts; - const int gcount = c->grav.count; - const struct grav_tensor *pot = &c->grav.multipole->pot; - const double CoM[3] = {c->grav.multipole->CoM[0], c->grav.multipole->CoM[1], - c->grav.multipole->CoM[2]}; - - /* Apply accelerations to the particles */ - for (int i = 0; i < gcount; ++i) { - - /* Get a handle on the gpart */ - struct gpart *gp = &gparts[i]; - - /* Update if active */ - if (gpart_is_active(gp, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (gp->ti_drift != e->ti_current) - error("gpart not drifted to current time"); - if (c->grav.multipole->pot.ti_init != e->ti_current) - error("c->field tensor not initialised"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(gp, e)) error("Updating an inhibited particle!"); - - /* Check that the particle was initialised */ - if (gp->initialised == 0) - error("Adding forces to an un-initialised gpart."); -#endif - /* Apply the kernel */ - gravity_L2P(pot, CoM, gp); - } - } - } - - if (timer) TIMER_TOC(timer_dograv_down); -} - -/** - * @brief Compute the non-truncated gravity interactions between all particles - * of a cell and the particles of the other cell. - * - * The calculation is performed non-symmetrically using the pre-filled - * #gravity_cache structures. The loop over the j cache should auto-vectorize. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param cj_cache #gravity_cache contaning the source particles. - * @param gcount_i The number of particles in the cell i. - * @param gcount_padded_j The number of particles in the cell j padded to the - * vector length. - * @param periodic Is the calculation using periodic BCs ? - * @param dim The size of the simulation volume. - * - * @param e The #engine (for debugging checks only). - * @param gparts_i The #gpart in cell i (for debugging checks only). - * @param gparts_j The #gpart in cell j (for debugging checks only). - * @param gcount_j The number of particles in the cell j (for debugging checks - * only). - */ -static INLINE void runner_dopair_grav_pp_full( - struct gravity_cache *restrict ci_cache, - struct gravity_cache *restrict cj_cache, const int gcount_i, - const int gcount_j, const int gcount_padded_j, const int periodic, - const float dim[3], const struct engine *restrict e, - struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_i; pid++) { - - /* Skip inactive particles */ - if (!ci_cache->active[pid]) continue; - - /* Skip particle that can use the multipole */ - if (ci_cache->use_mpole[pid]) continue; - -#ifdef SWIFT_DEBUG_CHECKS - if (!gpart_is_active(&gparts_i[pid], e)) - error("Inactive particle went through the cache"); -#endif - - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float h_i = ci_cache->epsilon[pid]; - - /* Local accumulators for the acceleration and potential */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_j, VEC_SIZE); - - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_padded_j; pjd++) { - - /* Get info about j */ - const float x_j = cj_cache->x[pjd]; - const float y_j = cj_cache->y[pjd]; - const float z_j = cj_cache->z[pjd]; - const float mass_j = cj_cache->m[pjd]; - const float h_j = cj_cache->epsilon[pjd]; - - /* Compute the pairwise distance. */ - float dx = x_j - x_i; - float dy = y_j - y_i; - float dz = z_j - z_i; - - /* Correct for periodic BCs */ - if (periodic) { - dx = nearestf(dx, dim[0]); - dy = nearestf(dy, dim[1]); - dz = nearestf(dz, dim[2]); - } - - const float r2 = dx * dx + dy * dy + dz * dz; - - /* Pick the maximal softening length of i and j */ - const float h = max(h_i, h_j); - const float h2 = h * h; - const float h_inv = 1.f / h; - const float h_inv_3 = h_inv * h_inv * h_inv; - -#ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f && h2 == 0.) - error("Interacting particles with 0 distance and 0 softening."); - - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && - !gpart_is_inhibited(&gparts_j[pjd], e)) - error("gpj not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts_i[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle we interact with was not inhibited */ - if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && - mass_j != 0.f) - error("Inhibited particle used as gravity source."); - - /* Check that the particle was initialised */ - if (gparts_i[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); -#endif - - /* Interact! */ - float f_ij, pot_ij; - runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij); - - /* Store it back */ - a_x += f_ij * dx; - a_y += f_ij * dy; - a_z += f_ij * dz; - pot += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted++; -#endif - } - - /* Store everything back in cache */ - ci_cache->a_x[pid] += a_x; - ci_cache->a_y[pid] += a_y; - ci_cache->a_z[pid] += a_z; - ci_cache->pot[pid] += pot; - } -} - -/** - * @brief Compute the truncated gravity interactions between all particles - * of a cell and the particles of the other cell. - * - * The calculation is performed non-symmetrically using the pre-filled - * #gravity_cache structures. The loop over the j cache should auto-vectorize. - * - * This function only makes sense in periodic BCs. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param cj_cache #gravity_cache contaning the source particles. - * @param gcount_i The number of particles in the cell i. - * @param gcount_padded_j The number of particles in the cell j padded to the - * vector length. - * @param dim The size of the simulation volume. - * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. - * - * @param e The #engine (for debugging checks only). - * @param gparts_i The #gpart in cell i (for debugging checks only). - * @param gparts_j The #gpart in cell j (for debugging checks only). - * @param gcount_j The number of particles in the cell j (for debugging checks - * only). - */ -static INLINE void runner_dopair_grav_pp_truncated( - struct gravity_cache *restrict ci_cache, - struct gravity_cache *restrict cj_cache, const int gcount_i, - const int gcount_j, const int gcount_padded_j, const float dim[3], - const float r_s_inv, const struct engine *restrict e, - struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { - -#ifdef SWIFT_DEBUG_CHECKS - if (!e->s->periodic) - error("Calling truncated PP function in non-periodic setup."); -#endif - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_i; pid++) { - - /* Skip inactive particles */ - if (!ci_cache->active[pid]) continue; - - /* Skip particle that can use the multipole */ - if (ci_cache->use_mpole[pid]) continue; - -#ifdef SWIFT_DEBUG_CHECKS - if (!gpart_is_active(&gparts_i[pid], e)) - error("Inactive particle went through the cache"); -#endif - - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float h_i = ci_cache->epsilon[pid]; - - /* Local accumulators for the acceleration and potential */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_j, VEC_SIZE); - - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_padded_j; pjd++) { - - /* Get info about j */ - const float x_j = cj_cache->x[pjd]; - const float y_j = cj_cache->y[pjd]; - const float z_j = cj_cache->z[pjd]; - const float mass_j = cj_cache->m[pjd]; - const float h_j = cj_cache->epsilon[pjd]; - - /* Compute the pairwise distance. */ - float dx = x_j - x_i; - float dy = y_j - y_i; - float dz = z_j - z_i; - - /* Correct for periodic BCs */ - dx = nearestf(dx, dim[0]); - dy = nearestf(dy, dim[1]); - dz = nearestf(dz, dim[2]); - - const float r2 = dx * dx + dy * dy + dz * dz; - - /* Pick the maximal softening length of i and j */ - const float h = max(h_i, h_j); - const float h2 = h * h; - const float h_inv = 1.f / h; - const float h_inv_3 = h_inv * h_inv * h_inv; - -#ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f && h2 == 0.) - error("Interacting particles with 0 distance and 0 softening."); - - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && - !gpart_is_inhibited(&gparts_j[pjd], e)) - error("gpj not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts_i[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle we interact with was not inhibited */ - if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && - mass_j != 0.f) - error("Inhibited particle used as gravity source."); - - /* Check that the particle was initialised */ - if (gparts_i[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); -#endif - - /* Interact! */ - float f_ij, pot_ij; - runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, - &f_ij, &pot_ij); - - /* Store it back */ - a_x += f_ij * dx; - a_y += f_ij * dy; - a_z += f_ij * dz; - pot += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted++; -#endif - } - - /* Store everything back in cache */ - ci_cache->a_x[pid] += a_x; - ci_cache->a_y[pid] += a_y; - ci_cache->a_z[pid] += a_z; - ci_cache->pot[pid] += pot; - } -} - -/** - * @brief Compute the gravity interactions between all particles - * of a cell and the multipole of the other cell. - * - * The calculation is performedusing the pre-filled - * #gravity_cache structure. The loop over the i cache should auto-vectorize. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param gcount_padded_i The number of particles in the cell i padded to the - * vector length. - * @param CoM_j Position of the #multipole in #cell j. - * @param multi_j The #multipole in #cell j. - * @param periodic Is the calculation using periodic BCs ? - * @param dim The size of the simulation volume. - * - * @param e The #engine (for debugging checks only). - * @param gparts_i The #gpart in cell i (for debugging checks only). - * @param gcount_i The number of particles in the cell i (for debugging checks - * only). - * @param cj The #cell j (for debugging checks only). - */ -static INLINE void runner_dopair_grav_pm_full( - struct gravity_cache *ci_cache, const int gcount_padded_i, - const float CoM_j[3], const struct multipole *restrict multi_j, - const int periodic, const float dim[3], const struct engine *restrict e, - struct gpart *restrict gparts_i, const int gcount_i, - const struct cell *restrict cj) { - - /* Make the compiler understand we are in happy vectorization land */ - swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, - SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(int, active, ci_cache->active, - SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, - SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_i, VEC_SIZE); - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_padded_i; pid++) { - - /* Skip inactive particles */ - if (!active[pid]) continue; - - /* Skip particle that cannot use the multipole */ - if (!use_mpole[pid]) continue; - -#ifdef SWIFT_DEBUG_CHECKS - if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) - error("Active particle went through the cache"); - - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts_i[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle was initialised */ - if (gparts_i[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); - - if (pid >= gcount_i) error("Adding forces to padded particle"); -#endif - - const float x_i = x[pid]; - const float y_i = y[pid]; - const float z_i = z[pid]; - - /* Some powers of the softening length */ - const float h_i = epsilon[pid]; - const float h_inv_i = 1.f / h_i; - - /* Distance to the Multipole */ - float dx = CoM_j[0] - x_i; - float dy = CoM_j[1] - y_i; - float dz = CoM_j[2] - z_i; - - /* Apply periodic BCs? */ - if (periodic) { - dx = nearestf(dx, dim[0]); - dy = nearestf(dy, dim[1]); - dz = nearestf(dz, dim[2]); - } - - const float r2 = dx * dx + dy * dy + dz * dz; - -#ifdef SWIFT_DEBUG_CHECKS - const float r_max_j = cj->grav.multipole->r_max; - const float r_max2 = r_max_j * r_max_j; - const float theta_crit2 = e->gravity_properties->theta_crit2; - - /* Note: 0.99 and 1.1 to avoid FP rounding false-positives */ - if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i)) - error( - "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " - "%e], rmax=%e r=%e epsilon=%e", - CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j, sqrtf(r2), h_i); -#endif - - /* Interact! */ - float f_x, f_y, f_z, pot_ij; - runner_iact_grav_pm_full(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, - &f_z, &pot_ij); - - /* Store it back */ - a_x[pid] += f_x; - a_y[pid] += f_y; - a_z[pid] += f_z; - pot[pid] += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter */ - if (pid < gcount_i) - gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; -#endif - } -} - -/** - * @brief Compute the gravity interactions between all particles - * of a cell and the multipole of the other cell. - * - * The calculation is performedusing the pre-filled - * #gravity_cache structure. The loop over the i cache should auto-vectorize. - * - * This function only makes sense in periodic BCs. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param gcount_padded_i The number of particles in the cell i padded to the - * vector length. - * @param CoM_j Position of the #multipole in #cell j. - * @param multi_j The #multipole in #cell j. - * @param dim The size of the simulation volume. - * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. - * - * @param e The #engine (for debugging checks only). - * @param gparts_i The #gpart in cell i (for debugging checks only). - * @param gcount_i The number of particles in the cell i (for debugging checks - * only). - * @param cj The #cell j (for debugging checks only). - */ -static INLINE void runner_dopair_grav_pm_truncated( - struct gravity_cache *ci_cache, const int gcount_padded_i, - const float CoM_j[3], const struct multipole *restrict multi_j, - const float dim[3], const float r_s_inv, const struct engine *restrict e, - struct gpart *restrict gparts_i, const int gcount_i, - const struct cell *restrict cj) { - -#ifdef SWIFT_DEBUG_CHECKS - if (!e->s->periodic) - error("Calling truncated PP function in non-periodic setup."); -#endif - - /* Make the compiler understand we are in happy vectorization land */ - swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, - SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(int, active, ci_cache->active, - SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, - SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_i, VEC_SIZE); - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_padded_i; pid++) { - - /* Skip inactive particles */ - if (!active[pid]) continue; - - /* Skip particle that cannot use the multipole */ - if (!use_mpole[pid]) continue; - -#ifdef SWIFT_DEBUG_CHECKS - if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) - error("Active particle went through the cache"); - - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts_i[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle was initialised */ - if (gparts_i[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); - - if (pid >= gcount_i) error("Adding forces to padded particle"); -#endif - - const float x_i = x[pid]; - const float y_i = y[pid]; - const float z_i = z[pid]; - - /* Some powers of the softening length */ - const float h_i = epsilon[pid]; - const float h_inv_i = 1.f / h_i; - - /* Distance to the Multipole */ - float dx = CoM_j[0] - x_i; - float dy = CoM_j[1] - y_i; - float dz = CoM_j[2] - z_i; - - /* Apply periodic BCs */ - dx = nearestf(dx, dim[0]); - dy = nearestf(dy, dim[1]); - dz = nearestf(dz, dim[2]); - - const float r2 = dx * dx + dy * dy + dz * dz; - -#ifdef SWIFT_DEBUG_CHECKS - const float r_max_j = cj->grav.multipole->r_max; - const float r_max2 = r_max_j * r_max_j; - const float theta_crit2 = e->gravity_properties->theta_crit2; - - /* 0.99 and 1.1 to avoid FP rounding false-positives */ - if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i)) - error( - "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " - "%e], rmax=%e", - CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j); -#endif - - /* Interact! */ - float f_x, f_y, f_z, pot_ij; - runner_iact_grav_pm_truncated(dx, dy, dz, r2, h_i, h_inv_i, r_s_inv, - multi_j, &f_x, &f_y, &f_z, &pot_ij); - - /* Store it back */ - a_x[pid] += f_x; - a_y[pid] += f_y; - a_z[pid] += f_z; - pot[pid] += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter */ - if (pid < gcount_i) - gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; -#endif - } -} - -/** - * @brief Computes the interaction of all the particles in a cell with all the - * particles of another cell. - * - * This function switches between the full potential and the truncated one - * depending on needs. It will also use the M2P (multipole) interaction - * for the subset of particles in either cell for which the distance criterion - * is valid. - * - * This function starts by constructing the require #gravity_cache for both - * cells and then call the specialised functions doing the actual work on - * the caches. It then write the data back to the particles. - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The other #cell. - * @param symmetric Are we updating both cells (1) or just ci (0) ? - * @param allow_mpole Are we allowing the use of P2M interactions ? - */ -static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, - struct cell *cj, const int symmetric, - const int allow_mpole) { - - /* Recover some useful constants */ - const struct engine *e = r->e; - const int periodic = e->mesh->periodic; - const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], - (float)e->mesh->dim[2]}; - const float r_s_inv = e->mesh->r_s_inv; - const double min_trunc = e->mesh->r_cut_min; - - TIMER_TIC; - - /* Record activity status */ - const int ci_active = - cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID); - const int cj_active = - cell_is_active_gravity(cj, e) && (cj->nodeID == e->nodeID); - - /* Anything to do here? */ - if (!ci_active && !cj_active) return; - if (!ci_active && !symmetric) return; - - /* Check that we are not doing something stupid */ - if (ci->split || cj->split) error("Running P-P on splitable cells"); - - /* Let's start by checking things are drifted */ - if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts"); - if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts"); - if (cj_active && ci->grav.ti_old_multipole != e->ti_current) - error("Un-drifted multipole"); - if (ci_active && cj->grav.ti_old_multipole != e->ti_current) - error("Un-drifted multipole"); - - /* Caches to play with */ - struct gravity_cache *const ci_cache = &r->ci_gravity_cache; - struct gravity_cache *const cj_cache = &r->cj_gravity_cache; - - /* Shift to apply to the particles in each cell */ - const double shift_i[3] = {0., 0., 0.}; - const double shift_j[3] = {0., 0., 0.}; - - /* Recover the multipole info and shift the CoM locations */ - const float rmax_i = ci->grav.multipole->r_max; - const float rmax_j = cj->grav.multipole->r_max; - const float rmax2_i = rmax_i * rmax_i; - const float rmax2_j = rmax_j * rmax_j; - const struct multipole *multi_i = &ci->grav.multipole->m_pole; - const struct multipole *multi_j = &cj->grav.multipole->m_pole; - const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]), - (float)(ci->grav.multipole->CoM[1] - shift_i[1]), - (float)(ci->grav.multipole->CoM[2] - shift_i[2])}; - const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0] - shift_j[0]), - (float)(cj->grav.multipole->CoM[1] - shift_j[1]), - (float)(cj->grav.multipole->CoM[2] - shift_j[2])}; - - /* Start by constructing particle caches */ - - /* Computed the padded counts */ - const int gcount_i = ci->grav.count; - const int gcount_j = cj->grav.count; - const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; - const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that we fit in cache */ - if (gcount_i > ci_cache->count || gcount_j > cj_cache->count) - error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i, - gcount_j); -#endif - - /* Fill the caches */ - gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, - ci_cache, ci->grav.parts, gcount_i, gcount_padded_i, - shift_i, CoM_j, rmax2_j, ci, e->gravity_properties); - gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, - cj_cache, cj->grav.parts, gcount_j, gcount_padded_j, - shift_j, CoM_i, rmax2_i, cj, e->gravity_properties); - - /* Can we use the Newtonian version or do we need the truncated one ? */ - if (!periodic) { - - /* Not periodic -> Can always use Newtonian potential */ - - /* Let's updated the active cell(s) only */ - if (ci_active) { - - /* First the P2P */ - runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, - gcount_padded_j, periodic, dim, e, - ci->grav.parts, cj->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, - periodic, dim, e, ci->grav.parts, gcount_i, - cj); - } - if (cj_active && symmetric) { - - /* First the P2P */ - runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, - gcount_padded_i, periodic, dim, e, - cj->grav.parts, ci->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, - periodic, dim, e, cj->grav.parts, gcount_j, - ci); - } - - } else { /* Periodic BC */ - - /* Get the relative distance between the CoMs */ - const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1], - CoM_j[2] - CoM_i[2]}; - const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - - /* Get the maximal distance between any two particles */ - const double max_r = sqrt(r2) + rmax_i + rmax_j; - - /* Do we need to use the truncated interactions ? */ - if (max_r > min_trunc) { - - /* Periodic but far-away cells must use the truncated potential */ - - /* Let's updated the active cell(s) only */ - if (ci_active) { - - /* First the (truncated) P2P */ - runner_dopair_grav_pp_truncated(ci_cache, cj_cache, gcount_i, gcount_j, - gcount_padded_j, dim, r_s_inv, e, - ci->grav.parts, cj->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, - multi_j, dim, r_s_inv, e, - ci->grav.parts, gcount_i, cj); - } - if (cj_active && symmetric) { - - /* First the (truncated) P2P */ - runner_dopair_grav_pp_truncated(cj_cache, ci_cache, gcount_j, gcount_i, - gcount_padded_i, dim, r_s_inv, e, - cj->grav.parts, ci->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i, - multi_i, dim, r_s_inv, e, - cj->grav.parts, gcount_j, ci); - } - - } else { - - /* Periodic but close-by cells can use the full Newtonian potential */ - - /* Let's updated the active cell(s) only */ - if (ci_active) { - - /* First the (Newtonian) P2P */ - runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, - gcount_padded_j, periodic, dim, e, - ci->grav.parts, cj->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, - periodic, dim, e, ci->grav.parts, gcount_i, - cj); - } - if (cj_active && symmetric) { - - /* First the (Newtonian) P2P */ - runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, - gcount_padded_i, periodic, dim, e, - cj->grav.parts, ci->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, - periodic, dim, e, cj->grav.parts, gcount_j, - ci); - } - } - } - - /* Write back to the particles */ - if (ci_active) gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); - if (cj_active && symmetric) - gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j); - - TIMER_TOC(timer_dopair_grav_pp); -} - -/** - * @brief Compute the non-truncated gravity interactions between all particles - * of a cell and the particles of the other cell. - * - * The calculation is performed non-symmetrically using the pre-filled - * #gravity_cache structures. The loop over the j cache should auto-vectorize. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param gcount The number of particles in the cell. - * @param gcount_padded The number of particles in the cell padded to the - * vector length. - * - * @param e The #engine (for debugging checks only). - * @param gparts The #gpart in the cell (for debugging checks only). - */ -static INLINE void runner_doself_grav_pp_full( - struct gravity_cache *restrict ci_cache, const int gcount, - const int gcount_padded, const struct engine *e, struct gpart *gparts) { - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount; pid++) { - - /* Skip inactive particles */ - if (!ci_cache->active[pid]) continue; - - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float h_i = ci_cache->epsilon[pid]; - - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded, VEC_SIZE); - - /* Loop over every other particle in the cell. */ - for (int pjd = 0; pjd < gcount_padded; pjd++) { - - /* No self interaction */ - if (pid == pjd) continue; - - /* Get info about j */ - const float x_j = ci_cache->x[pjd]; - const float y_j = ci_cache->y[pjd]; - const float z_j = ci_cache->z[pjd]; - const float mass_j = ci_cache->m[pjd]; - const float h_j = ci_cache->epsilon[pjd]; - - /* Compute the pairwise (square) distance. */ - /* Note: no need for periodic wrapping inside a cell */ - const float dx = x_j - x_i; - const float dy = y_j - y_i; - const float dz = z_j - z_i; - const float r2 = dx * dx + dy * dy + dz * dz; - - /* Pick the maximal softening length of i and j */ - const float h = max(h_i, h_j); - const float h2 = h * h; - const float h_inv = 1.f / h; - const float h_inv_3 = h_inv * h_inv * h_inv; - -#ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f && h2 == 0.) - error("Interacting particles with 0 distance and 0 softening."); - - /* Check that particles have been drifted to the current time */ - if (gparts[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && - !gpart_is_inhibited(&gparts[pjd], e)) - error("gpj not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle we interact with was not inhibited */ - if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) - error("Inhibited particle used as gravity source."); - - /* Check that the particle was initialised */ - if (gparts[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); -#endif - - /* Interact! */ - float f_ij, pot_ij; - runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij); - - /* Store it back */ - a_x += f_ij * dx; - a_y += f_ij * dy; - a_z += f_ij * dz; - pot += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted++; -#endif - } - - /* Store everything back in cache */ - ci_cache->a_x[pid] += a_x; - ci_cache->a_y[pid] += a_y; - ci_cache->a_z[pid] += a_z; - ci_cache->pot[pid] += pot; - } -} - -/** - * @brief Compute the truncated gravity interactions between all particles - * of a cell and the particles of the other cell. - * - * The calculation is performed non-symmetrically using the pre-filled - * #gravity_cache structures. The loop over the j cache should auto-vectorize. - * - * This function only makes sense in periodic BCs. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param gcount The number of particles in the cell. - * @param gcount_padded The number of particles in the cell padded to the - * vector length. - * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. - * - * @param e The #engine (for debugging checks only). - * @param gparts The #gpart in the cell (for debugging checks only). - */ -static INLINE void runner_doself_grav_pp_truncated( - struct gravity_cache *restrict ci_cache, const int gcount, - const int gcount_padded, const float r_s_inv, const struct engine *e, - struct gpart *gparts) { - -#ifdef SWIFT_DEBUG_CHECKS - if (!e->s->periodic) - error("Calling truncated PP function in non-periodic setup."); -#endif - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount; pid++) { - - /* Skip inactive particles */ - if (!ci_cache->active[pid]) continue; - - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float h_i = ci_cache->epsilon[pid]; - - /* Local accumulators for the acceleration and potential */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded, VEC_SIZE); - - /* Loop over every other particle in the cell. */ - for (int pjd = 0; pjd < gcount_padded; pjd++) { - - /* No self interaction */ - if (pid == pjd) continue; - - /* Get info about j */ - const float x_j = ci_cache->x[pjd]; - const float y_j = ci_cache->y[pjd]; - const float z_j = ci_cache->z[pjd]; - const float mass_j = ci_cache->m[pjd]; - const float h_j = ci_cache->epsilon[pjd]; - - /* Compute the pairwise (square) distance. */ - /* Note: no need for periodic wrapping inside a cell */ - const float dx = x_j - x_i; - const float dy = y_j - y_i; - const float dz = z_j - z_i; - - const float r2 = dx * dx + dy * dy + dz * dz; - - /* Pick the maximal softening length of i and j */ - const float h = max(h_i, h_j); - const float h2 = h * h; - const float h_inv = 1.f / h; - const float h_inv_3 = h_inv * h_inv * h_inv; - -#ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f && h2 == 0.) - error("Interacting particles with 0 distance and 0 softening."); - - /* Check that particles have been drifted to the current time */ - if (gparts[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && - !gpart_is_inhibited(&gparts[pjd], e)) - error("gpj not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle we interact with was not inhibited */ - if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) - error("Inhibited particle used as gravity source."); - - /* Check that the particle was initialised */ - if (gparts[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); -#endif - - /* Interact! */ - float f_ij, pot_ij; - runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, - &f_ij, &pot_ij); - - /* Store it back */ - a_x += f_ij * dx; - a_y += f_ij * dy; - a_z += f_ij * dz; - pot += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted++; -#endif - } - - /* Store everything back in cache */ - ci_cache->a_x[pid] += a_x; - ci_cache->a_y[pid] += a_y; - ci_cache->a_z[pid] += a_z; - ci_cache->pot[pid] += pot; - } -} - -/** - * @brief Computes the interaction of all the particles in a cell with all the - * other ones. - * - * This function switches between the full potential and the truncated one - * depending on needs. - * - * This function starts by constructing the require #gravity_cache for the - * cell and then call the specialised functions doing the actual work on - * the cache. It then write the data back to the particles. - * - * @param r The #runner. - * @param c The #cell. - */ -static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) { - - /* Recover some useful constants */ - const struct engine *e = r->e; - const int periodic = e->mesh->periodic; - const float r_s_inv = e->mesh->r_s_inv; - const double min_trunc = e->mesh->r_cut_min; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); -#endif - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* Check that we are not doing something stupid */ - if (c->split) error("Running P-P on a splitable cell"); - - /* Do we need to start by drifting things ? */ - if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); - - /* Start by constructing a cache for the particles */ - struct gravity_cache *const ci_cache = &r->ci_gravity_cache; - - /* Shift to apply to the particles in the cell */ - const double loc[3] = {c->loc[0] + 0.5 * c->width[0], - c->loc[1] + 0.5 * c->width[1], - c->loc[2] + 0.5 * c->width[2]}; - - /* Computed the padded counts */ - const int gcount = c->grav.count; - const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that we fit in cache */ - if (gcount > ci_cache->count) - error("Not enough space in the cache! gcount=%d", gcount); -#endif - - /* Fill the cache */ - gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, c->grav.parts, - gcount, gcount_padded, loc, c, - e->gravity_properties); - - /* Can we use the Newtonian version or do we need the truncated one ? */ - if (!periodic) { - - /* Not periodic -> Can always use Newtonian potential */ - runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, - c->grav.parts); - - } else { - - /* Get the maximal distance between any two particles */ - const double max_r = 2. * c->grav.multipole->r_max; - - /* Do we need to use the truncated interactions ? */ - if (max_r > min_trunc) { - - /* Periodic but far-away cells must use the truncated potential */ - runner_doself_grav_pp_truncated(ci_cache, gcount, gcount_padded, r_s_inv, - e, c->grav.parts); - - } else { - - /* Periodic but close-by cells can use the full Newtonian potential */ - runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, - c->grav.parts); - } - } - - /* Write back to the particles */ - gravity_cache_write_back(ci_cache, c->grav.parts, gcount); - - TIMER_TOC(timer_doself_grav_pp); -} - -/** - * @brief Computes the interaction of the field tensor and multipole - * of two cells symmetrically. - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The second #cell. - */ -static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, - struct cell *restrict ci, - struct cell *restrict cj) { - - /* Some constants */ - const struct engine *e = r->e; - const struct gravity_props *props = e->gravity_properties; - const int periodic = e->mesh->periodic; - const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; - const float r_s_inv = e->mesh->r_s_inv; - - TIMER_TIC; - - /* Anything to do here? */ - if ((!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) || - (!cell_is_active_gravity_mm(cj, e) || cj->nodeID != engine_rank)) - error("Invalid state in symmetric M-M calculation!"); - - /* Short-cut to the multipole */ - const struct multipole *multi_i = &ci->grav.multipole->m_pole; - const struct multipole *multi_j = &cj->grav.multipole->m_pole; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci == cj) error("Interacting a cell with itself using M2L"); - - if (multi_i->num_gpart == 0) - error("Multipole i does not seem to have been set."); - - if (multi_j->num_gpart == 0) - error("Multipole j does not seem to have been set."); - - if (ci->grav.multipole->pot.ti_init != e->ti_current) - error("ci->grav tensor not initialised."); - - if (ci->grav.multipole->pot.ti_init != e->ti_current) - error("cj->grav tensor not initialised."); - - if (ci->grav.ti_old_multipole != e->ti_current) - error( - "Undrifted multipole ci->grav.ti_old_multipole=%lld ci->nodeID=%d " - "cj->nodeID=%d e->ti_current=%lld", - ci->grav.ti_old_multipole, ci->nodeID, cj->nodeID, e->ti_current); - - if (cj->grav.ti_old_multipole != e->ti_current) - error( - "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " - "ci->nodeID=%d e->ti_current=%lld", - cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); -#endif - - /* Let's interact at this level */ - gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot, - multi_i, multi_j, ci->grav.multipole->CoM, - cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); - - TIMER_TOC(timer_dopair_grav_mm); -} - -/** - * @brief Computes the interaction of the field tensor in a cell with the - * multipole of another cell. - * - * @param r The #runner. - * @param ci The #cell with field tensor to interact. - * @param cj The #cell with the multipole. - */ -static INLINE void runner_dopair_grav_mm_nonsym( - struct runner *r, struct cell *restrict ci, - const struct cell *restrict cj) { - - /* Some constants */ - const struct engine *e = r->e; - const struct gravity_props *props = e->gravity_properties; - const int periodic = e->mesh->periodic; - const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; - const float r_s_inv = e->mesh->r_s_inv; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return; - - /* Short-cut to the multipole */ - const struct multipole *multi_j = &cj->grav.multipole->m_pole; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci == cj) error("Interacting a cell with itself using M2L"); - - if (multi_j->num_gpart == 0) - error("Multipole does not seem to have been set."); - - if (ci->grav.multipole->pot.ti_init != e->ti_current) - error("ci->grav tensor not initialised."); - - if (cj->grav.ti_old_multipole != e->ti_current) - error( - "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " - "ci->nodeID=%d e->ti_current=%lld", - cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); -#endif - - /* Let's interact at this level */ - gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM, - cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); - - TIMER_TOC(timer_dopair_grav_mm); -} - -/** - * @brief Call the M-M calculation on two cells if active. - * - * @param r The #runner object. - * @param ci The first #cell. - * @param cj The second #cell. - */ -static INLINE void runner_dopair_grav_mm(struct runner *r, - struct cell *restrict ci, - struct cell *restrict cj) { - - const struct engine *e = r->e; - - /* What do we need to do? */ - const int do_i = - cell_is_active_gravity_mm(ci, e) && (ci->nodeID == e->nodeID); - const int do_j = - cell_is_active_gravity_mm(cj, e) && (cj->nodeID == e->nodeID); - - /* Do we need drifting first? */ - if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); - if (cj->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e); - - /* Interact! */ - if (do_i && do_j) - runner_dopair_grav_mm_symmetric(r, ci, cj); - else if (do_i) - runner_dopair_grav_mm_nonsym(r, ci, cj); - else if (do_j) - runner_dopair_grav_mm_nonsym(r, cj, ci); -} - -/** - * @brief Computes all the M-M interactions between all the well-separated (at - * rebuild) pairs of progenies of the two cells. - * - * @param r The #runner thread. - * @param flags The task flag containing the list of well-separated pairs as a - * bit-field. - * @param ci The first #cell. - * @param cj The second #cell. - */ -static INLINE void runner_dopair_grav_mm_progenies(struct runner *r, - const long long flags, - struct cell *restrict ci, - struct cell *restrict cj) { - - /* Loop over all pairs of progenies */ - for (int i = 0; i < 8; i++) { - if (ci->progeny[i] != NULL) { - for (int j = 0; j < 8; j++) { - if (cj->progeny[j] != NULL) { - - struct cell *cpi = ci->progeny[i]; - struct cell *cpj = cj->progeny[j]; - - const int flag = i * 8 + j; - - /* Did we agree to use an M-M interaction here at the last rebuild? */ - if (flags & (1ULL << flag)) runner_dopair_grav_mm(r, cpi, cpj); - } - } - } - } -} - -static INLINE void runner_dopair_recursive_grav_pm(struct runner *r, - struct cell *ci, - const struct cell *cj) { - /* Some constants */ - const struct engine *e = r->e; - const int periodic = e->mesh->periodic; - const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], - (float)e->mesh->dim[2]}; - const float r_s_inv = e->mesh->r_s_inv; - - /* Anything to do here? */ - if (!(cell_is_active_gravity(ci, e) && ci->nodeID == e->nodeID)) return; - -#ifdef SWIFT_DEBUG_CHECKS - /* Early abort? */ - if (ci->grav.count == 0 || cj->grav.count == 0) - error("Doing pair gravity on an empty cell !"); - - /* Sanity check */ - if (ci == cj) error("Pair interaction between a cell and itself."); - - if (cj->grav.ti_old_multipole != e->ti_current) - error("cj->grav.multipole not drifted."); -#endif - - /* Can we recurse further? */ - if (ci->split) { - - /* Loop over ci's children */ - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) - runner_dopair_recursive_grav_pm(r, ci->progeny[k], cj); - } - - /* Ok, let's do the interaction here */ - } else { - - /* Start by constructing particle caches */ - - /* Cache to play with */ - struct gravity_cache *const ci_cache = &r->ci_gravity_cache; - - /* Computed the padded counts */ - const int gcount_i = ci->grav.count; - const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that we fit in cache */ - if (gcount_i > ci_cache->count) - error("Not enough space in the cache! gcount_i=%d", gcount_i); -#endif - - /* Recover the multipole info and the CoM locations */ - const struct multipole *multi_j = &cj->grav.multipole->m_pole; - const float r_max = cj->grav.multipole->r_max; - const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]), - (float)(cj->grav.multipole->CoM[1]), - (float)(cj->grav.multipole->CoM[2])}; - - /* Fill the cache */ - gravity_cache_populate_all_mpole( - e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i, - gcount_padded_i, ci, CoM_j, r_max * r_max, e->gravity_properties); - - /* Can we use the Newtonian version or do we need the truncated one ? */ - if (!periodic) { - - runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, - periodic, dim, e, ci->grav.parts, gcount_i, - cj); - - } else { - - runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j, - dim, r_s_inv, e, ci->grav.parts, gcount_i, - cj); - } - - /* Write back to the particles */ - gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); - } -} - -/** - * @brief Computes the interaction of all the particles in a cell with all the - * particles of another cell. - * - * This function will try to recurse as far down the tree as possible and only - * default to direct summation if there is no better option. - * - * If using periodic BCs, we will abort the recursion if th distance between the - * cells is larger than the set threshold. - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The other #cell. - * @param gettimer Are we timing this ? - */ -static INLINE void runner_dopair_recursive_grav(struct runner *r, - struct cell *ci, - struct cell *cj, int gettimer) { - - /* Some constants */ - const struct engine *e = r->e; - const int nodeID = e->nodeID; - const int periodic = e->mesh->periodic; - const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; - const double theta_crit2 = e->gravity_properties->theta_crit2; - const double max_distance = e->mesh->r_cut_max; - - /* Anything to do here? */ - if (!((cell_is_active_gravity(ci, e) && ci->nodeID == nodeID) || - (cell_is_active_gravity(cj, e) && cj->nodeID == nodeID))) - return; - -#ifdef SWIFT_DEBUG_CHECKS - - const int gcount_i = ci->grav.count; - const int gcount_j = cj->grav.count; - - /* Early abort? */ - if (gcount_i == 0 || gcount_j == 0) - error("Doing pair gravity on an empty cell !"); - - /* Sanity check */ - if (ci == cj) error("Pair interaction between a cell and itself."); - - if (cell_is_active_gravity(ci, e) && - ci->grav.ti_old_multipole != e->ti_current) - error("ci->grav.multipole not drifted."); - if (cell_is_active_gravity(cj, e) && - cj->grav.ti_old_multipole != e->ti_current) - error("cj->grav.multipole not drifted."); -#endif - - TIMER_TIC; - - /* Recover the multipole information */ - struct gravity_tensors *const multi_i = ci->grav.multipole; - struct gravity_tensors *const multi_j = cj->grav.multipole; - - /* Get the distance between the CoMs */ - double dx = multi_i->CoM[0] - multi_j->CoM[0]; - double dy = multi_i->CoM[1] - multi_j->CoM[1]; - double dz = multi_i->CoM[2] - multi_j->CoM[2]; - - /* Apply BC */ - if (periodic) { - dx = nearest(dx, dim[0]); - dy = nearest(dy, dim[1]); - dz = nearest(dz, dim[2]); - } - const double r2 = dx * dx + dy * dy + dz * dz; - - /* Minimal distance between any 2 particles in the two cells */ - const double r_lr_check = sqrt(r2) - (multi_i->r_max + multi_j->r_max); - - /* Are we beyond the distance where the truncated forces are 0? */ - if (periodic && r_lr_check > max_distance) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Need to account for the interactions we missed */ - if (cell_is_active_gravity(ci, e)) - multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; - if (cell_is_active_gravity(cj, e)) - multi_j->pot.num_interacted += multi_i->m_pole.num_gpart; -#endif - return; - } - - /* OK, we actually need to compute this pair. Let's find the cheapest - * option... */ - - /* Can we use M-M interactions ? */ - if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2, - multi_i->m_pole.max_softening, - multi_j->m_pole.max_softening)) { - - /* Go M-M */ - runner_dopair_grav_mm(r, ci, cj); - - } else if (!ci->split && !cj->split) { - - /* We have two leaves. Go P-P. */ - runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles*/ 1); - - } else { - - /* Alright, we'll have to split and recurse. */ - /* We know at least one of ci and cj is splittable */ - - const double ri_max = multi_i->r_max; - const double rj_max = multi_j->r_max; - - /* Split the larger of the two cells and start over again */ - if (ri_max > rj_max) { - - /* Can we actually split that interaction ? */ - if (ci->split) { - - /* Loop over ci's children */ - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) - runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0); - } - - } else { - /* cj is split */ - - /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ - - /* Loop over cj's children */ - for (int k = 0; k < 8; k++) { - if (cj->progeny[k] != NULL) - runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0); - } - } - } else { - - /* Can we actually split that interaction ? */ - if (cj->split) { - - /* Loop over cj's children */ - for (int k = 0; k < 8; k++) { - if (cj->progeny[k] != NULL) - runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0); - } - - } else { - /* ci is split */ - - /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ - - /* Loop over ci's children */ - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) - runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0); - } - } - } - } - - if (gettimer) TIMER_TOC(timer_dosub_pair_grav); -} - -/** - * @brief Computes the interaction of all the particles in a cell. - * - * This function will try to recurse as far down the tree as possible and only - * default to direct summation if there is no better option. - * - * @param r The #runner. - * @param c The first #cell. - * @param gettimer Are we timing this ? - */ -static INLINE void runner_doself_recursive_grav(struct runner *r, - struct cell *c, int gettimer) { - - /* Some constants */ - const struct engine *e = r->e; - -#ifdef SWIFT_DEBUG_CHECKS - /* Early abort? */ - if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); -#endif - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* If the cell is split, interact each progeny with itself, and with - each of its siblings. */ - if (c->split) { - - for (int j = 0; j < 8; j++) { - if (c->progeny[j] != NULL) { - - runner_doself_recursive_grav(r, c->progeny[j], 0); - - for (int k = j + 1; k < 8; k++) { - if (c->progeny[k] != NULL) { - - runner_dopair_recursive_grav(r, c->progeny[j], c->progeny[k], 0); - } - } - } - } - } - - /* If the cell is not split, then just go for it... */ - else { - - runner_doself_grav_pp(r, c); - } - - if (gettimer) TIMER_TOC(timer_dosub_self_grav); -} - -/** - * @brief Performs all M-M interactions between a given top-level cell and all - * the other top-levels that are far enough. - * - * @param r The thread #runner. - * @param ci The #cell of interest. - * @param timer Are we timing this ? - */ -static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, - int timer) { - - /* Some constants */ - const struct engine *e = r->e; - const int periodic = e->mesh->periodic; - const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; - const double theta_crit2 = e->gravity_properties->theta_crit2; - const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max; - - TIMER_TIC; - - /* Recover the list of top-level cells */ - struct cell *cells = e->s->cells_top; - int *cells_with_particles = e->s->cells_with_particles_top; - const int nr_cells_with_particles = e->s->nr_cells_with_particles; - - /* Anything to do here? */ - if (!cell_is_active_gravity(ci, e)) return; - - if (ci->nodeID != engine_rank) - error("Non-local cell in long-range gravity task!"); - - /* Check multipole has been drifted */ - if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); - - /* Get this cell's multipole information */ - struct gravity_tensors *const multi_i = ci->grav.multipole; - - /* Find this cell's top-level (great-)parent */ - struct cell *top = ci; - while (top->parent != NULL) top = top->parent; - - /* Recover the top-level multipole (for distance checks) */ - struct gravity_tensors *const multi_top = top->grav.multipole; - const double CoM_rebuild_top[3] = {multi_top->CoM_rebuild[0], - multi_top->CoM_rebuild[1], - multi_top->CoM_rebuild[2]}; - - /* Loop over all the top-level cells and go for a M-M interaction if - * well-separated */ - for (int n = 0; n < nr_cells_with_particles; ++n) { - - /* Handle on the top-level cell and it's gravity business*/ - const struct cell *cj = &cells[cells_with_particles[n]]; - const struct gravity_tensors *const multi_j = cj->grav.multipole; - - /* Avoid self contributions */ - if (top == cj) continue; - - /* Skip empty cells */ - if (multi_j->m_pole.M_000 == 0.f) continue; - - /* Can we escape early in the periodic BC case? */ - if (periodic) { - - /* Minimal distance between any pair of particles */ - const double min_radius2 = - cell_min_dist2_same_size(top, cj, periodic, dim); - - /* Are we beyond the distance where the truncated forces are 0 ?*/ - if (min_radius2 > max_distance2) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Need to account for the interactions we missed */ - multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; -#endif - - /* Record that this multipole received a contribution */ - multi_i->pot.interacted = 1; +struct runner; +struct cell; - /* We are done here. */ - continue; - } - } +void runner_do_grav_down(struct runner *r, struct cell *c, int timer); - /* Get the distance between the CoMs at the last rebuild*/ - double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0]; - double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1]; - double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2]; +void runner_doself_recursive_grav(struct runner *r, struct cell *c, + int gettimer); - /* Apply BC */ - if (periodic) { - dx_r = nearest(dx_r, dim[0]); - dy_r = nearest(dy_r, dim[1]); - dz_r = nearest(dz_r, dim[2]); - } - const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r; +void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, + struct cell *cj, int gettimer); - /* Are we in charge of this cell pair? */ - if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild, - theta_crit2, r2_rebuild, - multi_top->m_pole.max_softening, - multi_j->m_pole.max_softening)) { +void runner_dopair_grav_mm_progenies(struct runner *r, const long long flags, + struct cell *restrict ci, + struct cell *restrict cj); - /* Call the PM interaction fucntion on the active sub-cells of ci */ - runner_dopair_grav_mm_nonsym(r, ci, cj); - // runner_dopair_recursive_grav_pm(r, ci, cj); +void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer); - /* Record that this multipole received a contribution */ - multi_i->pot.interacted = 1; +/* Internal functions (for unit tests and debugging) */ - } /* We are in charge of this pair */ - } /* Loop over top-level cells */ +void runner_doself_grav_pp(struct runner *r, struct cell *c); - if (timer) TIMER_TOC(timer_dograv_long_range); -} +void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, + const int symmetric, const int allow_mpole); #endif /* SWIFT_RUNNER_DOIACT_GRAV_H */ diff --git a/src/runner_doiact_hydro.c b/src/runner_doiact_hydro.c new file mode 100644 index 0000000000000000000000000000000000000000..480ea59f0a536aa340b7e4d8f838bef3a0cca072 --- /dev/null +++ b/src/runner_doiact_hydro.c @@ -0,0 +1,63 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "active.h" +#include "cell.h" +#include "engine.h" +#include "pressure_floor_iact.h" +#include "runner.h" +#include "runner_doiact_hydro_vec.h" +#include "space_getsid.h" +#include "timers.h" + +/* Import the density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_functions_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the gradient loop functions (if required). */ +#ifdef EXTRA_HYDRO_LOOP +#define FUNCTION gradient +#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT +#include "runner_doiact_functions_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP +#endif + +/* Import the force loop functions. */ +#define FUNCTION force +#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE +#include "runner_doiact_functions_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the limiter loop functions. */ +#define FUNCTION limiter +#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER +#include "runner_doiact_functions_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP diff --git a/src/runner_doiact_hydro.h b/src/runner_doiact_hydro.h new file mode 100644 index 0000000000000000000000000000000000000000..1fd54c1037e2d0b9c7a671311cfee4720ebe8d84 --- /dev/null +++ b/src/runner_doiact_hydro.h @@ -0,0 +1,151 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Before including this file, define FUNCTION, which is the + name of the interaction function. This creates the interaction functions + runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION, + and runner_dosub_FUNCTION calling the pairwise interaction function + runner_iact_FUNCTION. */ + +#define PASTE(x, y) x##_##y + +#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f) +#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION) + +#define _DOPAIR1(f) PASTE(runner_dopair1, f) +#define DOPAIR1 _DOPAIR1(FUNCTION) + +#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f) +#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION) + +#define _DOPAIR2(f) PASTE(runner_dopair2, f) +#define DOPAIR2 _DOPAIR2(FUNCTION) + +#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f) +#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION) + +#define _DOPAIR_SUBSET_BRANCH(f) PASTE(runner_dopair_subset_branch, f) +#define DOPAIR_SUBSET_BRANCH _DOPAIR_SUBSET_BRANCH(FUNCTION) + +#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f) +#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION) + +#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f) +#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION) + +#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f) +#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION) + +#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f) +#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION) + +#define _DOSELF1_NAIVE(f) PASTE(runner_doself1_naive, f) +#define DOSELF1_NAIVE _DOSELF1_NAIVE(FUNCTION) + +#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f) +#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION) + +#define _DOSELF1_BRANCH(f) PASTE(runner_doself1_branch, f) +#define DOSELF1_BRANCH _DOSELF1_BRANCH(FUNCTION) + +#define _DOSELF1(f) PASTE(runner_doself1, f) +#define DOSELF1 _DOSELF1(FUNCTION) + +#define _DOSELF2_BRANCH(f) PASTE(runner_doself2_branch, f) +#define DOSELF2_BRANCH _DOSELF2_BRANCH(FUNCTION) + +#define _DOSELF2(f) PASTE(runner_doself2, f) +#define DOSELF2 _DOSELF2(FUNCTION) + +#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f) +#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION) + +#define _DOSELF_SUBSET_BRANCH(f) PASTE(runner_doself_subset_branch, f) +#define DOSELF_SUBSET_BRANCH _DOSELF_SUBSET_BRANCH(FUNCTION) + +#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f) +#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION) + +#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f) +#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION) + +#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f) +#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION) + +#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f) +#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION) + +#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f) +#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION) + +#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f) +#define IACT_NONSYM _IACT_NONSYM(FUNCTION) + +#define _IACT(f) PASTE(runner_iact, f) +#define IACT _IACT(FUNCTION) + +#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f) +#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION) + +#define _IACT_VEC(f) PASTE(runner_iact_vec, f) +#define IACT_VEC _IACT_VEC(FUNCTION) + +#define _TIMER_DOSELF(f) PASTE(timer_doself, f) +#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION) + +#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f) +#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION) + +#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f) +#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION) + +#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f) +#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION) + +#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f) +#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION) + +#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f) +#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) + +void DOSELF1_BRANCH(struct runner *r, struct cell *c); +void DOSELF2_BRANCH(struct runner *r, struct cell *c); + +void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj); +void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj); + +void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer); +void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer); + +void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer); +void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer); + +void DOSELF_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, + struct part *restrict parts, int *restrict ind, + int count); + +void DOPAIR_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, + struct part *restrict parts_i, int *restrict ind, + int count, struct cell *restrict cj); + +void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, + int *ind, int count, struct cell *cj, int gettimer); diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_hydro_vec.c similarity index 99% rename from src/runner_doiact_vec.c rename to src/runner_doiact_hydro_vec.c index 68f34b0d3b8fc9c79097522f8a1618f86957612e..59401e4050dcb4481d1c56aa8857106558a06880 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_hydro_vec.c @@ -21,7 +21,7 @@ #include "../config.h" /* This object's header. */ -#include "runner_doiact_vec.h" +#include "runner_doiact_hydro_vec.h" #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) diff --git a/src/runner_doiact_vec.h b/src/runner_doiact_hydro_vec.h similarity index 100% rename from src/runner_doiact_vec.h rename to src/runner_doiact_hydro_vec.h diff --git a/src/runner_doiact_stars.c b/src/runner_doiact_stars.c new file mode 100644 index 0000000000000000000000000000000000000000..1e1267df5195f727a19252b6ee654629e23149b6 --- /dev/null +++ b/src/runner_doiact_stars.c @@ -0,0 +1,47 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "active.h" +#include "cell.h" +#include "engine.h" +#include "feedback.h" +#include "runner.h" +#include "space_getsid.h" +#include "stars.h" +#include "timers.h" + +/* Import the stars density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_functions_stars.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the stars feedback loop functions. */ +#define FUNCTION feedback +#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK +#include "runner_doiact_functions_stars.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h index 7e9780def83bbdbab83a431a757a52f3ba51d2e4..2d41d5a0bd1b1003039e1795eec205889b46baf6 100644 --- a/src/runner_doiact_stars.h +++ b/src/runner_doiact_stars.h @@ -86,1307 +86,21 @@ #define _IACT_STARS(f) PASTE(runner_iact_nonsym_stars, f) #define IACT_STARS _IACT_STARS(FUNCTION) -/** - * @brief Calculate the number density of #part around the #spart - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - TIMER_TIC; - - const struct engine *e = r->e; - const int with_cosmology = e->policy & engine_policy_cosmology; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Anything to do here? */ - if (c->hydro.count == 0 || c->stars.count == 0) return; - if (!cell_is_active_stars(c, e)) return; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - const int scount = c->stars.count; - const int count = c->hydro.count; - struct spart *restrict sparts = c->stars.parts; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - - /* Loop over the sparts in ci. */ - for (int sid = 0; sid < scount; sid++) { - - /* Get a hold of the ith spart in ci. */ - struct spart *restrict si = &sparts[sid]; - - /* Skip inactive particles */ - if (!spart_is_active(si, e)) continue; - - /* Skip inactive particles */ - if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue; - - const float hi = si->h; - const float hig2 = hi * hi * kernel_gamma2; - const float six[3] = {(float)(si->x[0] - c->loc[0]), - (float)(si->x[1] - c->loc[1]), - (float)(si->x[2] - c->loc[2])}; - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts[pjd]; - struct xpart *restrict xpj = &xparts[pjd]; - const float hj = pj->h; - - /* Early abort? */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), - (float)(pj->x[1] - c->loc[1]), - (float)(pj->x[2] - c->loc[2])}; - float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, si, pj, a, H); -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo, - ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in ci. */ - } /* loop over the sparts in ci. */ - - TIMER_TOC(TIMER_DOSELF_STARS); -} - -/** - * @brief Calculate the number density of cj #part around the ci #spart - * - * @param r runner task - * @param ci The first #cell - * @param cj The second #cell - */ -void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { - -#ifdef SWIFT_DEBUG_CHECKS -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#else - if (cj->nodeID != engine_rank) error("Should be run on a different node"); -#endif -#endif - - const struct engine *e = r->e; - const int with_cosmology = e->policy & engine_policy_cosmology; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Anything to do here? */ - if (cj->hydro.count == 0 || ci->stars.count == 0) return; - if (!cell_is_active_stars(ci, e)) return; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - const int scount_i = ci->stars.count; - const int count_j = cj->hydro.count; - struct spart *restrict sparts_i = ci->stars.parts; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - - /* Get the relative distance between the pairs, wrapping. */ - double shift[3] = {0.0, 0.0, 0.0}; - for (int k = 0; k < 3; k++) { - if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) - shift[k] = e->s->dim[k]; - else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) - shift[k] = -e->s->dim[k]; - } - - /* Loop over the sparts in ci. */ - for (int sid = 0; sid < scount_i; sid++) { - - /* Get a hold of the ith spart in ci. */ - struct spart *restrict si = &sparts_i[sid]; - - /* Skip inactive particles */ - if (!spart_is_active(si, e)) continue; - - /* Skip inactive particles */ - if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue; - - const float hi = si->h; - const float hig2 = hi * hi * kernel_gamma2; - const float six[3] = {(float)(si->x[0] - (cj->loc[0] + shift[0])), - (float)(si->x[1] - (cj->loc[1] + shift[1])), - (float)(si->x[2] - (cj->loc[2] + shift[2]))}; - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - const float hj = pj->h; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), - (float)(pj->x[1] - cj->loc[1]), - (float)(pj->x[2] - cj->loc[2])}; - float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, si, pj, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo, - ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ -} - -/** - * @brief Compute the interactions between a cell pair. - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The second #cell. - * @param sid The direction of the pair. - * @param shift The shift vector to apply to the particles in ci. - */ -void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, - const int sid, const double *shift) { - - TIMER_TIC; - - const struct engine *e = r->e; - const int with_cosmology = e->policy & engine_policy_cosmology; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - /* Get the cutoff shift. */ - double rshift = 0.0; - for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_stars = (ci->nodeID == e->nodeID) && (ci->stars.count != 0) && - (cj->hydro.count != 0) && cell_is_active_stars(ci, e); - const int do_cj_stars = (cj->nodeID == e->nodeID) && (cj->stars.count != 0) && - (ci->hydro.count != 0) && cell_is_active_stars(cj, e); -#else - /* here we are updating the hydro -> switch ci, cj for local */ - const int do_ci_stars = (cj->nodeID == e->nodeID) && (ci->stars.count != 0) && - (cj->hydro.count != 0) && cell_is_active_stars(ci, e); - const int do_cj_stars = (ci->nodeID == e->nodeID) && (cj->stars.count != 0) && - (ci->hydro.count != 0) && cell_is_active_stars(cj, e); -#endif - - if (do_ci_stars) { - - /* Pick-out the sorted lists. */ - const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; - const struct sort_entry *restrict sort_i = ci->stars.sort[sid]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Some constants used to checks that the parts are in the right frame */ - const float shift_threshold_x = - 2. * ci->width[0] + - 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); - const float shift_threshold_y = - 2. * ci->width[1] + - 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); - const float shift_threshold_z = - 2. * ci->width[2] + - 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); -#endif /* SWIFT_DEBUG_CHECKS */ - - /* Get some other useful values. */ - const double hi_max = ci->stars.h_max * kernel_gamma - rshift; - const int count_i = ci->stars.count; - const int count_j = cj->hydro.count; - struct spart *restrict sparts_i = ci->stars.parts; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - const double dj_min = sort_j[0].d; - const float dx_max_rshift = - (ci->stars.dx_max_sort + cj->hydro.dx_max_sort) - rshift; - const float dx_max = (ci->stars.dx_max_sort + cj->hydro.dx_max_sort); - - /* Loop over the sparts in ci. */ - for (int pid = count_i - 1; - pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { - - /* Get a hold of the ith part in ci. */ - struct spart *restrict spi = &sparts_i[sort_i[pid].i]; - const float hi = spi->h; - - /* Skip inactive particles */ - if (!spart_is_active(spi, e)) continue; - - /* Skip inactive particles */ - if (!feedback_is_active(spi, e->time, cosmo, with_cosmology)) continue; - - /* Compute distance from the other cell. */ - const double px[3] = {spi->x[0], spi->x[1], spi->x[2]}; - float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] + - px[2] * runner_shift[sid][2]; - - /* Is there anything we need to interact with ? */ - const double di = dist + hi * kernel_gamma + dx_max_rshift; - if (di < dj_min) continue; - - /* Get some additional information about pi */ - const float hig2 = hi * hi * kernel_gamma2; - const float pix = spi->x[0] - (cj->loc[0] + shift[0]); - const float piy = spi->x[1] - (cj->loc[1] + shift[1]); - const float piz = spi->x[2] - (cj->loc[2] + shift[2]); - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { - - /* Recover pj */ - struct part *pj = &parts_j[sort_j[pjd].i]; - struct xpart *xpj = &xparts_j[sort_j[pjd].i]; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pj, e)) continue; - - const float hj = pj->h; - const float pjx = pj->x[0] - cj->loc[0]; - const float pjy = pj->x[1] - cj->loc[1]; - const float pjz = pj->x[2] - cj->loc[2]; - - /* Compute the pairwise distance. */ - float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles are in the correct frame after the shifts */ - if (pix > shift_threshold_x || pix < -shift_threshold_x) - error( - "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", - pix, ci->width[0]); - if (piy > shift_threshold_y || piy < -shift_threshold_y) - error( - "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", - piy, ci->width[1]); - if (piz > shift_threshold_z || piz < -shift_threshold_z) - error( - "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", - piz, ci->width[2]); - if (pjx > shift_threshold_x || pjx < -shift_threshold_x) - error( - "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", - pjx, ci->width[0]); - if (pjy > shift_threshold_y || pjy < -shift_threshold_y) - error( - "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", - pjy, ci->width[1]); - if (pjz > shift_threshold_z || pjz < -shift_threshold_z) - error( - "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", - pjz, ci->width[2]); - - /* Check that particles have been drifted to the current time */ - if (spi->ti_drift != e->ti_current) - error("Particle spi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, - cosmo, ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ - } /* do_ci_stars */ - - if (do_cj_stars) { - /* Pick-out the sorted lists. */ - const struct sort_entry *restrict sort_i = ci->hydro.sort[sid]; - const struct sort_entry *restrict sort_j = cj->stars.sort[sid]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Some constants used to checks that the parts are in the right frame */ - const float shift_threshold_x = - 2. * ci->width[0] + - 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); - const float shift_threshold_y = - 2. * ci->width[1] + - 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); - const float shift_threshold_z = - 2. * ci->width[2] + - 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); -#endif /* SWIFT_DEBUG_CHECKS */ - - /* Get some other useful values. */ - const double hj_max = cj->hydro.h_max * kernel_gamma; - const int count_i = ci->hydro.count; - const int count_j = cj->stars.count; - struct part *restrict parts_i = ci->hydro.parts; - struct xpart *restrict xparts_i = ci->hydro.xparts; - struct spart *restrict sparts_j = cj->stars.parts; - const double di_max = sort_i[count_i - 1].d - rshift; - const float dx_max_rshift = - (ci->hydro.dx_max_sort + cj->stars.dx_max_sort) + rshift; - const float dx_max = (ci->hydro.dx_max_sort + cj->stars.dx_max_sort); - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; - pjd++) { - - /* Get a hold of the jth part in cj. */ - struct spart *spj = &sparts_j[sort_j[pjd].i]; - const float hj = spj->h; - - /* Skip inactive particles */ - if (!spart_is_active(spj, e)) continue; - - /* Skip inactive particles */ - if (!feedback_is_active(spj, e->time, cosmo, with_cosmology)) continue; - - /* Compute distance from the other cell. */ - const double px[3] = {spj->x[0], spj->x[1], spj->x[2]}; - float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] + - px[2] * runner_shift[sid][2]; - - /* Is there anything we need to interact with ? */ - const double dj = dist - hj * kernel_gamma - dx_max_rshift; - if (dj - rshift > di_max) continue; - - /* Get some additional information about pj */ - const float hjg2 = hj * hj * kernel_gamma2; - const float pjx = spj->x[0] - cj->loc[0]; - const float pjy = spj->x[1] - cj->loc[1]; - const float pjz = spj->x[2] - cj->loc[2]; - - /* Loop over the parts in ci. */ - for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { - - /* Recover pi */ - struct part *pi = &parts_i[sort_i[pid].i]; - struct xpart *xpi = &xparts_i[sort_i[pid].i]; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pi, e)) continue; - - const float hi = pi->h; - const float pix = pi->x[0] - (cj->loc[0] + shift[0]); - const float piy = pi->x[1] - (cj->loc[1] + shift[1]); - const float piz = pi->x[2] - (cj->loc[2] + shift[2]); - - /* Compute the pairwise distance. */ - float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles are in the correct frame after the shifts */ - if (pix > shift_threshold_x || pix < -shift_threshold_x) - error( - "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", - pix, ci->width[0]); - if (piy > shift_threshold_y || piy < -shift_threshold_y) - error( - "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", - piy, ci->width[1]); - if (piz > shift_threshold_z || piz < -shift_threshold_z) - error( - "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", - piz, ci->width[2]); - if (pjx > shift_threshold_x || pjx < -shift_threshold_x) - error( - "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", - pjx, ci->width[0]); - if (pjy > shift_threshold_y || pjy < -shift_threshold_y) - error( - "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", - pjy, ci->width[1]); - if (pjz > shift_threshold_z || pjz < -shift_threshold_z) - error( - "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", - pjz, ci->width[2]); - - /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (spj->ti_drift != e->ti_current) - error("Particle spj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hjg2) { - - IACT_STARS(r2, dx, hj, hi, spj, pi, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hj, hi, spj, pi, xpi, - cosmo, ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hj, hi, spj, pi, xpi, cosmo, - ti_current); -#endif - } - } /* loop over the parts in ci. */ - } /* loop over the parts in cj. */ - } /* Cell cj is active */ - - TIMER_TOC(TIMER_DOPAIR_STARS); -} - -void DOPAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj, int timer) { - - TIMER_TIC; - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_stars = ci->nodeID == r->e->nodeID; - const int do_cj_stars = cj->nodeID == r->e->nodeID; -#else - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_stars = cj->nodeID == r->e->nodeID; - const int do_cj_stars = ci->nodeID == r->e->nodeID; -#endif - if (do_ci_stars && ci->stars.count != 0 && cj->hydro.count != 0) - DO_NONSYM_PAIR1_STARS_NAIVE(r, ci, cj); - if (do_cj_stars && cj->stars.count != 0 && ci->hydro.count != 0) - DO_NONSYM_PAIR1_STARS_NAIVE(r, cj, ci); - - TIMER_TOC(TIMER_DOPAIR_STARS); -} - -/** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. - * - * Version using a brute-force algorithm. - * - * @param r The #runner. - * @param ci The first #cell. - * @param sparts_i The #part to interact with @c cj. - * @param ind The list of indices of particles in @c ci to interact with. - * @param scount The number of particles in @c ind. - * @param cj The second #cell. - * @param sid The direction of the pair. - * @param flipped Flag to check whether the cells have been flipped or not. - * @param shift The shift vector to apply to the particles in ci. - */ -void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, - struct spart *restrict sparts_i, int *restrict ind, - int scount, struct cell *restrict cj, const int sid, - const int flipped, const double *shift) { - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - const int count_j = cj->hydro.count; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - - /* Early abort? */ - if (count_j == 0) return; - - /* Pick-out the sorted lists. */ - const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; - const float dxj = cj->hydro.dx_max_sort; - - /* Sparts are on the left? */ - if (!flipped) { - - /* Loop over the sparts_i. */ - for (int pid = 0; pid < scount; pid++) { - - /* Get a hold of the ith spart in ci. */ - struct spart *restrict spi = &sparts_i[ind[pid]]; - const double pix = spi->x[0] - (shift[0]); - const double piy = spi->x[1] - (shift[1]); - const double piz = spi->x[2] - (shift[2]); - const float hi = spi->h; - const float hig2 = hi * hi * kernel_gamma2; - const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] + - piy * runner_shift[sid][1] + piz * runner_shift[sid][2]; - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; - struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i]; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pj, e)) continue; - - const double pjx = pj->x[0]; - const double pjy = pj->x[1]; - const double pjz = pj->x[2]; - const float hj = pj->h; - - /* Compute the pairwise distance. */ - float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), - (float)(piz - pjz)}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (spi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, - cosmo, ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the sparts in ci. */ - } - - /* Sparts are on the right. */ - else { - - /* Loop over the sparts_i. */ - for (int pid = 0; pid < scount; pid++) { - - /* Get a hold of the ith spart in ci. */ - struct spart *restrict spi = &sparts_i[ind[pid]]; - const double pix = spi->x[0] - (shift[0]); - const double piy = spi->x[1] - (shift[1]); - const double piz = spi->x[2] - (shift[2]); - const float hi = spi->h; - const float hig2 = hi * hi * kernel_gamma2; - const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] + - piy * runner_shift[sid][1] + piz * runner_shift[sid][2]; - - /* Loop over the parts in cj. */ - for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; - struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i]; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pj, e)) continue; - - const double pjx = pj->x[0]; - const double pjy = pj->x[1]; - const double pjz = pj->x[2]; - const float hj = pj->h; - - /* Compute the pairwise distance. */ - float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), - (float)(piz - pjz)}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (spi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, - cosmo, ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the sparts in ci. */ - } -} +void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c); +void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj); -/** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. - * - * Version using a brute-force algorithm. - * - * @param r The #runner. - * @param ci The first #cell. - * @param sparts_i The #part to interact with @c cj. - * @param ind The list of indices of particles in @c ci to interact with. - * @param scount The number of particles in @c ind. - * @param cj The second #cell. - * @param shift The shift vector to apply to the particles in ci. - */ -void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci, - struct spart *restrict sparts_i, - int *restrict ind, int scount, - struct cell *restrict cj, const double *shift) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - const int count_j = cj->hydro.count; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - - /* Early abort? */ - if (count_j == 0) return; - - /* Loop over the parts_i. */ - for (int pid = 0; pid < scount; pid++) { - - /* Get a hold of the ith part in ci. */ - struct spart *restrict spi = &sparts_i[ind[pid]]; - - const double pix = spi->x[0] - (shift[0]); - const double piy = spi->x[1] - (shift[1]); - const double piz = spi->x[2] - (shift[2]); - const float hi = spi->h; - const float hig2 = hi * hi * kernel_gamma2; - -#ifdef SWIFT_DEBUG_CHECKS - if (!spart_is_active(spi, e)) - error("Trying to correct smoothing length of inactive particle !"); -#endif - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - - /* Skip inhibited particles */ - if (part_is_inhibited(pj, e)) continue; - - const double pjx = pj->x[0]; - const double pjy = pj->x[1]; - const double pjz = pj->x[2]; - const float hj = pj->h; - - /* Compute the pairwise distance. */ - float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), - (float)(piz - pjz)}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - /* Hit or miss? */ - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, cosmo, - ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ -} - -/** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. - * - * @param r The #runner. - * @param ci The first #cell. - * @param sparts The #spart to interact. - * @param ind The list of indices of particles in @c ci to interact with. - * @param scount The number of particles in @c ind. - */ -void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, - struct spart *restrict sparts, int *restrict ind, - int scount) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - const int count_i = ci->hydro.count; - struct part *restrict parts_j = ci->hydro.parts; - struct xpart *restrict xparts_j = ci->hydro.xparts; - - /* Early abort? */ - if (count_i == 0) return; - - /* Loop over the parts in ci. */ - for (int spid = 0; spid < scount; spid++) { - - /* Get a hold of the ith part in ci. */ - struct spart *spi = &sparts[ind[spid]]; - const float spix[3] = {(float)(spi->x[0] - ci->loc[0]), - (float)(spi->x[1] - ci->loc[1]), - (float)(spi->x[2] - ci->loc[2])}; - const float hi = spi->h; - const float hig2 = hi * hi * kernel_gamma2; - -#ifdef SWIFT_DEBUG_CHECKS - if (!spart_is_active(spi, e)) - error("Inactive particle in subset function!"); -#endif - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_i; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - - /* Early abort? */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]), - (float)(pj->x[1] - ci->loc[1]), - (float)(pj->x[2] - ci->loc[2])}; - float dx[3] = {spix[0] - pjx[0], spix[1] - pjx[1], spix[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, pj->h, spi, pj, a, H); -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, pj->h, spi, pj, xpj, - cosmo, ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, pj->h, spi, pj, xpj, - cosmo, ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ -} +void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer); +void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer); -/** - * @brief Determine which version of DOSELF1_SUBSET_STARS needs to be called - * depending on the optimisation level. - * - * @param r The #runner. - * @param ci The first #cell. - * @param sparts The #spart to interact. - * @param ind The list of indices of particles in @c ci to interact with. - * @param scount The number of particles in @c ind. - */ void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, struct spart *restrict sparts, - int *restrict ind, int scount) { + int *restrict ind, int scount); - DOSELF1_SUBSET_STARS(r, ci, sparts, ind, scount); -} - -/** - * @brief Determine which version of DOPAIR1_SUBSET_STARS needs to be called - * depending on the orientation of the cells or whether DOPAIR1_SUBSET_STARS - * needs to be called at all. - * - * @param r The #runner. - * @param ci The first #cell. - * @param sparts_i The #spart to interact with @c cj. - * @param ind The list of indices of particles in @c ci to interact with. - * @param scount The number of particles in @c ind. - * @param cj The second #cell. - */ void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, struct spart *restrict sparts_i, int *restrict ind, int scount, - struct cell *restrict cj) { - - const struct engine *e = r->e; - - /* Anything to do here? */ - if (cj->hydro.count == 0) return; - - /* Get the relative distance between the pairs, wrapping. */ - double shift[3] = {0.0, 0.0, 0.0}; - for (int k = 0; k < 3; k++) { - if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) - shift[k] = e->s->dim[k]; - else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) - shift[k] = -e->s->dim[k]; - } - -#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS - DOPAIR1_SUBSET_STARS_NAIVE(r, ci, sparts_i, ind, scount, cj, shift); -#else - /* Get the sorting index. */ - int sid = 0; - for (int k = 0; k < 3; k++) - sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0) - ? 0 - : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1); - - /* Switch the cells around? */ - const int flipped = runner_flip[sid]; - sid = sortlistID[sid]; - - /* Has the cell cj been sorted? */ - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) - error("Interacting unsorted cells."); - - DOPAIR1_SUBSET_STARS(r, ci, sparts_i, ind, scount, cj, sid, flipped, shift); -#endif -} + struct cell *restrict cj); void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts, - int *ind, int scount, struct cell *cj, int gettimer) { - - const struct engine *e = r->e; - struct space *s = e->s; - - /* Should we even bother? */ - if (!cell_is_active_stars(ci, e) && - (cj == NULL || !cell_is_active_stars(cj, e))) - return; - - /* Find out in which sub-cell of ci the parts are. */ - struct cell *sub = NULL; - if (ci->split) { - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) { - if (&sparts[ind[0]] >= &ci->progeny[k]->stars.parts[0] && - &sparts[ind[0]] < - &ci->progeny[k]->stars.parts[ci->progeny[k]->stars.count]) { - sub = ci->progeny[k]; - break; - } - } - } - } - - /* Is this a single cell? */ - if (cj == NULL) { - - /* Recurse? */ - if (cell_can_recurse_in_self_stars_task(ci)) { - - /* Loop over all progeny. */ - DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, NULL, 0); - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != sub && ci->progeny[j] != NULL) - DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, ci->progeny[j], 0); - - } - - /* Otherwise, compute self-interaction. */ - else - DOSELF1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount); - } /* self-interaction. */ - - /* Otherwise, it's a pair interaction. */ - else { - - /* Recurse? */ - if (cell_can_recurse_in_pair_stars_task(ci, cj) && - cell_can_recurse_in_pair_stars_task(cj, ci)) { - - /* Get the type of pair and flip ci/cj if needed. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(s, &ci, &cj, shift); - - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) - DOSUB_SUBSET_STARS(r, ci->progeny[pid], sparts, ind, scount, - cj->progeny[pjd], 0); - if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) - DOSUB_SUBSET_STARS(r, cj->progeny[pjd], sparts, ind, scount, - ci->progeny[pid], 0); - } - } - - /* Otherwise, compute the pair directly. */ - else if (cell_is_active_stars(ci, e) && cj->hydro.count > 0) { - - /* Do any of the cells need to be drifted first? */ - if (cell_is_active_stars(ci, e)) { - if (!cell_are_spart_drifted(ci, e)) error("Cell should be drifted!"); - if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); - } - - DOPAIR1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount, cj); - } - - } /* otherwise, pair interaction. */ -} - -/** - * @brief Determine which version of DOSELF1_STARS needs to be called depending - * on the optimisation level. - * - * @param r #runner - * @param c #cell c - * - */ -void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) { - - const struct engine *restrict e = r->e; - - /* Anything to do here? */ - if (c->stars.count == 0) return; - - /* Anything to do here? */ - if (!cell_is_active_stars(c, e)) return; - - /* Did we mess up the recursion? */ - if (c->stars.h_max_old * kernel_gamma > c->dmin) - error("Cell smaller than smoothing length"); - - DOSELF1_STARS(r, c, 1); -} - -#define RUNNER_CHECK_SORT(TYPE, PART, cj, ci, sid) \ - ({ \ - const struct sort_entry *restrict sort_j = cj->TYPE.sort[sid]; \ - \ - for (int pjd = 0; pjd < cj->TYPE.count; pjd++) { \ - const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i]; \ - if (PART##_is_inhibited(p, e)) continue; \ - \ - const float d = p->x[0] * runner_shift[sid][0] + \ - p->x[1] * runner_shift[sid][1] + \ - p->x[2] * runner_shift[sid][2]; \ - if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ - 1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) && \ - (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ - cj->width[0] * 1.0e-10) \ - error( \ - "particle shift diff exceeds dx_max_sort in cell cj. " \ - "cj->nodeID=%d " \ - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE \ - ".dx_max_sort=%e " \ - "cj->" #TYPE \ - ".dx_max_sort_old=%e, cellID=%i super->cellID=%i" \ - "cj->depth=%d cj->maxdepth=%d", \ - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \ - cj->TYPE.dx_max_sort_old, cj->cellID, cj->hydro.super->cellID, \ - cj->depth, cj->maxdepth); \ - } \ - }) - -/** - * @brief Determine which version of DOPAIR1_STARS needs to be called depending - * on the orientation of the cells or whether DOPAIR1_STARS needs to be called - * at all. - * - * @param r #runner - * @param ci #cell ci - * @param cj #cell cj - * - */ -void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) { - - const struct engine *restrict e = r->e; - - /* Get the sort ID. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(e->s, &ci, &cj, shift); - - const int ci_active = cell_is_active_stars(ci, e); - const int cj_active = cell_is_active_stars(cj, e); -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_stars = ci->nodeID == e->nodeID; - const int do_cj_stars = cj->nodeID == e->nodeID; -#else - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_stars = cj->nodeID == e->nodeID; - const int do_cj_stars = ci->nodeID == e->nodeID; -#endif - const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 && - ci_active && do_ci_stars); - const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 && - cj_active && do_cj_stars); - - /* Anything to do here? */ - if (!do_ci && !do_cj) return; - - /* Check that cells are drifted. */ - if (do_ci && - (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) - error("Interacting undrifted cells."); - - /* Have the cells been sorted? */ - if (do_ci && (!(ci->stars.sorted & (1 << sid)) || - ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin)) - error("Interacting unsorted cells."); - - if (do_ci && (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)) - error("Interacting unsorted cells."); - - if (do_cj && - (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e))) - error("Interacting undrifted cells."); - - /* Have the cells been sorted? */ - if (do_cj && (!(ci->hydro.sorted & (1 << sid)) || - ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin)) - error("Interacting unsorted cells."); - - if (do_cj && (!(cj->stars.sorted & (1 << sid)) || - cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin)) - error("Interacting unsorted cells."); - -#ifdef SWIFT_DEBUG_CHECKS - if (do_ci) { - // MATTHIEU: This test is faulty. To be fixed... - // RUNNER_CHECK_SORT(hydro, part, cj, ci, sid); - RUNNER_CHECK_SORT(stars, spart, ci, cj, sid); - } - - if (do_cj) { - // MATTHIEU: This test is faulty. To be fixed... - // RUNNER_CHECK_SORT(hydro, part, ci, cj, sid); - RUNNER_CHECK_SORT(stars, spart, cj, ci, sid); - } -#endif /* SWIFT_DEBUG_CHECKS */ - -#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS - DOPAIR1_STARS_NAIVE(r, ci, cj, 1); -#else - DO_SYM_PAIR1_STARS(r, ci, cj, sid, shift); -#endif -} - -/** - * @brief Compute grouped sub-cell interactions for pairs - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The second #cell. - * @param gettimer Do we have a timer ? - * - * @todo Hard-code the sid on the recursive calls to avoid the - * redundant computations to find the sid on-the-fly. - */ -void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer) { - - TIMER_TIC; - - struct space *s = r->e->s; - const struct engine *e = r->e; - - /* Should we even bother? */ - const int should_do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && - cell_is_active_stars(ci, e); - const int should_do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && - cell_is_active_stars(cj, e); - if (!should_do_ci && !should_do_cj) return; - - /* Get the type of pair and flip ci/cj if needed. */ - double shift[3]; - const int sid = space_getsid(s, &ci, &cj, shift); - - /* Recurse? */ - if (cell_can_recurse_in_pair_stars_task(ci, cj) && - cell_can_recurse_in_pair_stars_task(cj, ci)) { - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) - DOSUB_PAIR1_STARS(r, ci->progeny[pid], cj->progeny[pjd], 0); - } - } - - /* Otherwise, compute the pair directly. */ - else { - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_stars = ci->nodeID == e->nodeID; - const int do_cj_stars = cj->nodeID == e->nodeID; -#else - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_stars = cj->nodeID == e->nodeID; - const int do_cj_stars = ci->nodeID == e->nodeID; -#endif - const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && - cell_is_active_stars(ci, e) && do_ci_stars; - const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && - cell_is_active_stars(cj, e) && do_cj_stars; - - if (do_ci) { - - /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_spart_drifted(ci, e)) - error("Interacting undrifted cells (sparts)."); - - if (!cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells (parts)."); - - /* Do any of the cells need to be sorted first? */ - if (!(ci->stars.sorted & (1 << sid)) || - ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) { - error("Interacting unsorted cell (sparts)."); - } - - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) - error("Interacting unsorted cell (parts). %i", cj->nodeID); - } - - if (do_cj) { - - /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_part_drifted(ci, e)) - error("Interacting undrifted cells (parts)."); - - if (!cell_are_spart_drifted(cj, e)) - error("Interacting undrifted cells (sparts)."); - - /* Do any of the cells need to be sorted first? */ - if (!(ci->hydro.sorted & (1 << sid)) || - ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { - error("Interacting unsorted cell (parts)."); - } - - if (!(cj->stars.sorted & (1 << sid)) || - cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) { - error("Interacting unsorted cell (sparts)."); - } - } - - if (do_ci || do_cj) DOPAIR1_BRANCH_STARS(r, ci, cj); - } - - TIMER_TOC(TIMER_DOSUB_PAIR_STARS); -} - -/** - * @brief Compute grouped sub-cell interactions for self tasks - * - * @param r The #runner. - * @param ci The first #cell. - * @param gettimer Do we have a timer ? - */ -void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer) { - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) - error("This function should not be called on foreign cells"); -#endif - - /* Should we even bother? */ - if (ci->hydro.count == 0 || ci->stars.count == 0 || - !cell_is_active_stars(ci, r->e)) - return; - - /* Recurse? */ - if (cell_can_recurse_in_self_stars_task(ci)) { - - /* Loop over all progeny. */ - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) { - DOSUB_SELF1_STARS(r, ci->progeny[k], 0); - for (int j = k + 1; j < 8; j++) - if (ci->progeny[j] != NULL) - DOSUB_PAIR1_STARS(r, ci->progeny[k], ci->progeny[j], 0); - } - } - - /* Otherwise, compute self-interaction. */ - else { - - /* Drift the cell to the current timestep if needed. */ - if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell."); - - DOSELF1_BRANCH_STARS(r, ci); - } - - TIMER_TOC(TIMER_DOSUB_SELF_STARS); -} + int *ind, int scount, struct cell *cj, int gettimer); diff --git a/src/runner_drift.c b/src/runner_drift.c new file mode 100644 index 0000000000000000000000000000000000000000..8c4376743cd50ffea4709cb471959864cedcc4b7 --- /dev/null +++ b/src/runner_drift.c @@ -0,0 +1,96 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "cell.h" +#include "engine.h" +#include "timers.h" + +/** + * @brief Drift all part in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_drift_part(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + cell_drift_part(c, r->e, 0); + + if (timer) TIMER_TOC(timer_drift_part); +} + +/** + * @brief Drift all gpart in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + cell_drift_gpart(c, r->e, 0); + + if (timer) TIMER_TOC(timer_drift_gpart); +} + +/** + * @brief Drift all spart in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_drift_spart(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + cell_drift_spart(c, r->e, 0); + + if (timer) TIMER_TOC(timer_drift_spart); +} + +/** + * @brief Drift all bpart in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + cell_drift_bpart(c, r->e, 0); + + if (timer) TIMER_TOC(timer_drift_bpart); +} diff --git a/src/runner_ghost.c b/src/runner_ghost.c new file mode 100644 index 0000000000000000000000000000000000000000..2c1e8cd7190858014f7914e293b5ffdadbdc2707 --- /dev/null +++ b/src/runner_ghost.c @@ -0,0 +1,1355 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "black_holes.h" +#include "cell.h" +#include "engine.h" +#include "feedback.h" +#include "pressure_floor.h" +#include "pressure_floor_iact.h" +#include "space_getsid.h" +#include "stars.h" +#include "timers.h" +#include "tracers.h" + +/* Import the density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the stars density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_stars.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/** + * @brief Intermediate task after the density to check that the smoothing + * lengths are correct. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { + + struct spart *restrict sparts = c->stars.parts; + const struct engine *e = r->e; + const struct unit_system *us = e->internal_units; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const struct cosmology *cosmo = e->cosmology; + const struct feedback_props *feedback_props = e->feedback_props; + const float stars_h_max = e->hydro_properties->h_max; + const float stars_h_min = e->hydro_properties->h_min; + const float eps = e->stars_properties->h_tolerance; + const float stars_eta_dim = + pow_dimension(e->stars_properties->eta_neighbours); + const int max_smoothing_iter = e->stars_properties->max_smoothing_iterations; + int redo = 0, scount = 0; + + /* Running value of the maximal smoothing length */ + double h_max = c->stars.h_max; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != e->nodeID) + error("Running the star ghost on a foreign node!"); +#endif + + /* Anything to do here? */ + if (c->stars.count == 0) return; + if (!cell_is_active_stars(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_stars_ghost(r, c->progeny[k], 0); + + /* Update h_max */ + h_max = max(h_max, c->progeny[k]->stars.h_max); + } + } + } else { + + /* Init the list of active particles that have to be updated. */ + int *sid = NULL; + float *h_0 = NULL; + float *left = NULL; + float *right = NULL; + if ((sid = (int *)malloc(sizeof(int) * c->stars.count)) == NULL) + error("Can't allocate memory for sid."); + if ((h_0 = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) + error("Can't allocate memory for h_0."); + if ((left = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) + error("Can't allocate memory for left."); + if ((right = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) + error("Can't allocate memory for right."); + for (int k = 0; k < c->stars.count; k++) + if (spart_is_active(&sparts[k], e) && + feedback_is_active(&sparts[k], e->time, cosmo, with_cosmology)) { + sid[scount] = k; + h_0[scount] = sparts[k].h; + left[scount] = 0.f; + right[scount] = stars_h_max; + ++scount; + } + + /* While there are particles that need to be updated... */ + for (int num_reruns = 0; scount > 0 && num_reruns < max_smoothing_iter; + num_reruns++) { + + /* Reset the redo-count. */ + redo = 0; + + /* Loop over the remaining active parts in this cell. */ + for (int i = 0; i < scount; i++) { + + /* Get a direct pointer on the part. */ + struct spart *sp = &sparts[sid[i]]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Is this part within the timestep? */ + if (!spart_is_active(sp, e)) + error("Ghost applied to inactive particle"); +#endif + + /* Get some useful values */ + const float h_init = h_0[i]; + const float h_old = sp->h; + const float h_old_dim = pow_dimension(h_old); + const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); + + float h_new; + int has_no_neighbours = 0; + + if (sp->density.wcount == 0.f) { /* No neighbours case */ + + /* Flag that there were no neighbours */ + has_no_neighbours = 1; + + /* Double h and try again */ + h_new = 2.f * h_old; + + } else { + + /* Finish the density calculation */ + stars_end_density(sp, cosmo); + + /* Compute one step of the Newton-Raphson scheme */ + const float n_sum = sp->density.wcount * h_old_dim; + const float n_target = stars_eta_dim; + const float f = n_sum - n_target; + const float f_prime = + sp->density.wcount_dh * h_old_dim + + hydro_dimension * sp->density.wcount * h_old_dim_minus_one; + + /* Improve the bisection bounds */ + if (n_sum < n_target) + left[i] = max(left[i], h_old); + else if (n_sum > n_target) + right[i] = min(right[i], h_old); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check the validity of the left and right bounds */ + if (left[i] > right[i]) + error("Invalid left (%e) and right (%e)", left[i], right[i]); +#endif + + /* Skip if h is already h_max and we don't have enough neighbours + */ + /* Same if we are below h_min */ + if (((sp->h >= stars_h_max) && (f < 0.f)) || + ((sp->h <= stars_h_min) && (f > 0.f))) { + + stars_reset_feedback(sp); + + /* Only do feedback if stars have a reasonable birth time */ + if (feedback_do_feedback(sp)) { + + const integertime_t ti_step = get_integer_timestep(sp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(e->ti_current - 1, sp->time_bin); + + /* Get particle time-step */ + double dt; + if (with_cosmology) { + dt = cosmology_get_delta_time(e->cosmology, ti_begin, + ti_begin + ti_step); + } else { + dt = get_timestep(sp->time_bin, e->time_base); + } + + /* Calculate age of the star at current time */ + double star_age_end_of_step; + if (with_cosmology) { + star_age_end_of_step = + cosmology_get_delta_time_from_scale_factors( + cosmo, (double)sp->birth_scale_factor, cosmo->a); + } else { + star_age_end_of_step = (float)e->time - sp->birth_time; + } + + /* Has this star been around for a while ? */ + if (star_age_end_of_step > 0.) { + + /* Age of the star at the start of the step */ + const double star_age_beg_of_step = + max(star_age_end_of_step - dt, 0.); + + /* Compute the stellar evolution */ + feedback_evolve_spart(sp, feedback_props, cosmo, us, + star_age_beg_of_step, dt); + } else { + + /* Reset the feedback fields of the star particle */ + feedback_reset_feedback(sp, feedback_props); + } + } else { + + feedback_reset_feedback(sp, feedback_props); + } + + /* Ok, we are done with this particle */ + continue; + } + + /* Normal case: Use Newton-Raphson to get a better value of h */ + + /* Avoid floating point exception from f_prime = 0 */ + h_new = h_old - f / (f_prime + FLT_MIN); + + /* Be verbose about the particles that struggle to converge */ + if (num_reruns > max_smoothing_iter - 10) { + + message( + "Smoothing length convergence problem: iter=%d p->id=%lld " + "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " + "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", + num_reruns, sp->id, h_init, h_old, h_new, f, f_prime, n_sum, + n_target, left[i], right[i]); + } + + /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ + h_new = min(h_new, 2.f * h_old); + h_new = max(h_new, 0.5f * h_old); + + /* Verify that we are actually progrssing towards the answer */ + h_new = max(h_new, left[i]); + h_new = min(h_new, right[i]); + } + + /* Check whether the particle has an inappropriate smoothing length + */ + if (fabsf(h_new - h_old) > eps * h_old) { + + /* Ok, correct then */ + + /* Case where we have been oscillating around the solution */ + if ((h_new == left[i] && h_old == right[i]) || + (h_old == left[i] && h_new == right[i])) { + + /* Bissect the remaining interval */ + sp->h = pow_inv_dimension( + 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); + + } else { + + /* Normal case */ + sp->h = h_new; + } + + /* If below the absolute maximum, try again */ + if (sp->h < stars_h_max && sp->h > stars_h_min) { + + /* Flag for another round of fun */ + sid[redo] = sid[i]; + h_0[redo] = h_0[i]; + left[redo] = left[i]; + right[redo] = right[i]; + redo += 1; + + /* Re-initialise everything */ + stars_init_spart(sp); + feedback_init_spart(sp); + + /* Off we go ! */ + continue; + + } else if (sp->h <= stars_h_min) { + + /* Ok, this particle is a lost cause... */ + sp->h = stars_h_min; + + } else if (sp->h >= stars_h_max) { + + /* Ok, this particle is a lost cause... */ + sp->h = stars_h_max; + + /* Do some damage control if no neighbours at all were found */ + if (has_no_neighbours) { + stars_spart_has_no_neighbours(sp, cosmo); + } + + } else { + error( + "Fundamental problem with the smoothing length iteration " + "logic."); + } + } + + /* We now have a particle whose smoothing length has converged */ + + /* Check if h_max has increased */ + h_max = max(h_max, sp->h); + + stars_reset_feedback(sp); + + /* Only do feedback if stars have a reasonable birth time */ + if (feedback_do_feedback(sp)) { + + const integertime_t ti_step = get_integer_timestep(sp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(e->ti_current - 1, sp->time_bin); + + /* Get particle time-step */ + double dt; + if (with_cosmology) { + dt = cosmology_get_delta_time(e->cosmology, ti_begin, + ti_begin + ti_step); + } else { + dt = get_timestep(sp->time_bin, e->time_base); + } + + /* Calculate age of the star at current time */ + double star_age_end_of_step; + if (with_cosmology) { + star_age_end_of_step = cosmology_get_delta_time_from_scale_factors( + cosmo, sp->birth_scale_factor, (float)cosmo->a); + } else { + star_age_end_of_step = (float)e->time - sp->birth_time; + } + + /* Has this star been around for a while ? */ + if (star_age_end_of_step > 0.) { + + /* Age of the star at the start of the step */ + const double star_age_beg_of_step = + max(star_age_end_of_step - dt, 0.); + + /* Compute the stellar evolution */ + feedback_evolve_spart(sp, feedback_props, cosmo, us, + star_age_beg_of_step, dt); + } else { + + /* Reset the feedback fields of the star particle */ + feedback_reset_feedback(sp, feedback_props); + } + } else { + + /* Reset the feedback fields of the star particle */ + feedback_reset_feedback(sp, feedback_props); + } + } + + /* We now need to treat the particles whose smoothing length had not + * converged again */ + + /* Re-set the counter for the next loop (potentially). */ + scount = redo; + if (scount > 0) { + + /* Climb up the cell hierarchy. */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + + /* Run through this cell's density interactions. */ + for (struct link *l = finger->stars.density; l != NULL; l = l->next) { + +#ifdef SWIFT_DEBUG_CHECKS + if (l->t->ti_run < r->e->ti_current) + error("Density task should have been run."); +#endif + + /* Self-interaction? */ + if (l->t->type == task_type_self) + runner_doself_subset_branch_stars_density(r, finger, sparts, sid, + scount); + + /* Otherwise, pair interaction? */ + else if (l->t->type == task_type_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dopair_subset_branch_stars_density( + r, finger, sparts, sid, scount, l->t->cj); + else + runner_dopair_subset_branch_stars_density( + r, finger, sparts, sid, scount, l->t->ci); + } + + /* Otherwise, sub-self interaction? */ + else if (l->t->type == task_type_sub_self) + runner_dosub_subset_stars_density(r, finger, sparts, sid, scount, + NULL, 1); + + /* Otherwise, sub-pair interaction? */ + else if (l->t->type == task_type_sub_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dosub_subset_stars_density(r, finger, sparts, sid, + scount, l->t->cj, 1); + else + runner_dosub_subset_stars_density(r, finger, sparts, sid, + scount, l->t->ci, 1); + } + } + } + } + } + + if (scount) { + error("Smoothing length failed to converge on %i particles.", scount); + } + + /* Be clean */ + free(left); + free(right); + free(sid); + free(h_0); + } + + /* Update h_max */ + c->stars.h_max = h_max; + + /* The ghost may not always be at the top level. + * Therefore we need to update h_max between the super- and top-levels */ + if (c->stars.ghost) { + for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { + atomic_max_d(&tmp->stars.h_max, h_max); + } + } + + if (timer) TIMER_TOC(timer_do_stars_ghost); +} + +/** + * @brief Intermediate task after the density to check that the smoothing + * lengths are correct. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c, + int timer) { + + struct bpart *restrict bparts = c->black_holes.parts; + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const float black_holes_h_max = e->hydro_properties->h_max; + const float black_holes_h_min = e->hydro_properties->h_min; + const float eps = e->black_holes_properties->h_tolerance; + const float black_holes_eta_dim = + pow_dimension(e->black_holes_properties->eta_neighbours); + const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations; + int redo = 0, bcount = 0; + + /* Running value of the maximal smoothing length */ + double h_max = c->black_holes.h_max; + + TIMER_TIC; + + /* Anything to do here? */ + if (c->black_holes.count == 0) return; + if (!cell_is_active_black_holes(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_black_holes_density_ghost(r, c->progeny[k], 0); + + /* Update h_max */ + h_max = max(h_max, c->progeny[k]->black_holes.h_max); + } + } + } else { + + /* Init the list of active particles that have to be updated. */ + int *sid = NULL; + float *h_0 = NULL; + float *left = NULL; + float *right = NULL; + if ((sid = (int *)malloc(sizeof(int) * c->black_holes.count)) == NULL) + error("Can't allocate memory for sid."); + if ((h_0 = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) + error("Can't allocate memory for h_0."); + if ((left = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) + error("Can't allocate memory for left."); + if ((right = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) + error("Can't allocate memory for right."); + for (int k = 0; k < c->black_holes.count; k++) + if (bpart_is_active(&bparts[k], e)) { + sid[bcount] = k; + h_0[bcount] = bparts[k].h; + left[bcount] = 0.f; + right[bcount] = black_holes_h_max; + ++bcount; + } + + /* While there are particles that need to be updated... */ + for (int num_reruns = 0; bcount > 0 && num_reruns < max_smoothing_iter; + num_reruns++) { + + /* Reset the redo-count. */ + redo = 0; + + /* Loop over the remaining active parts in this cell. */ + for (int i = 0; i < bcount; i++) { + + /* Get a direct pointer on the part. */ + struct bpart *bp = &bparts[sid[i]]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Is this part within the timestep? */ + if (!bpart_is_active(bp, e)) + error("Ghost applied to inactive particle"); +#endif + + /* Get some useful values */ + const float h_init = h_0[i]; + const float h_old = bp->h; + const float h_old_dim = pow_dimension(h_old); + const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); + + float h_new; + int has_no_neighbours = 0; + + if (bp->density.wcount == 0.f) { /* No neighbours case */ + + /* Flag that there were no neighbours */ + has_no_neighbours = 1; + + /* Double h and try again */ + h_new = 2.f * h_old; + + } else { + + /* Finish the density calculation */ + black_holes_end_density(bp, cosmo); + + /* Compute one step of the Newton-Raphson scheme */ + const float n_sum = bp->density.wcount * h_old_dim; + const float n_target = black_holes_eta_dim; + const float f = n_sum - n_target; + const float f_prime = + bp->density.wcount_dh * h_old_dim + + hydro_dimension * bp->density.wcount * h_old_dim_minus_one; + + /* Improve the bisection bounds */ + if (n_sum < n_target) + left[i] = max(left[i], h_old); + else if (n_sum > n_target) + right[i] = min(right[i], h_old); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check the validity of the left and right bounds */ + if (left[i] > right[i]) + error("Invalid left (%e) and right (%e)", left[i], right[i]); +#endif + + /* Skip if h is already h_max and we don't have enough neighbours + */ + /* Same if we are below h_min */ + if (((bp->h >= black_holes_h_max) && (f < 0.f)) || + ((bp->h <= black_holes_h_min) && (f > 0.f))) { + + black_holes_reset_feedback(bp); + + /* Ok, we are done with this particle */ + continue; + } + + /* Normal case: Use Newton-Raphson to get a better value of h */ + + /* Avoid floating point exception from f_prime = 0 */ + h_new = h_old - f / (f_prime + FLT_MIN); + + /* Be verbose about the particles that struggle to converge */ + if (num_reruns > max_smoothing_iter - 10) { + + message( + "Smoothing length convergence problem: iter=%d p->id=%lld " + "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " + "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", + num_reruns, bp->id, h_init, h_old, h_new, f, f_prime, n_sum, + n_target, left[i], right[i]); + } + + /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ + h_new = min(h_new, 2.f * h_old); + h_new = max(h_new, 0.5f * h_old); + + /* Verify that we are actually progrssing towards the answer */ + h_new = max(h_new, left[i]); + h_new = min(h_new, right[i]); + } + + /* Check whether the particle has an inappropriate smoothing length + */ + if (fabsf(h_new - h_old) > eps * h_old) { + + /* Ok, correct then */ + + /* Case where we have been oscillating around the solution */ + if ((h_new == left[i] && h_old == right[i]) || + (h_old == left[i] && h_new == right[i])) { + + /* Bissect the remaining interval */ + bp->h = pow_inv_dimension( + 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); + + } else { + + /* Normal case */ + bp->h = h_new; + } + + /* If below the absolute maximum, try again */ + if (bp->h < black_holes_h_max && bp->h > black_holes_h_min) { + + /* Flag for another round of fun */ + sid[redo] = sid[i]; + h_0[redo] = h_0[i]; + left[redo] = left[i]; + right[redo] = right[i]; + redo += 1; + + /* Re-initialise everything */ + black_holes_init_bpart(bp); + + /* Off we go ! */ + continue; + + } else if (bp->h <= black_holes_h_min) { + + /* Ok, this particle is a lost cause... */ + bp->h = black_holes_h_min; + + } else if (bp->h >= black_holes_h_max) { + + /* Ok, this particle is a lost cause... */ + bp->h = black_holes_h_max; + + /* Do some damage control if no neighbours at all were found */ + if (has_no_neighbours) { + black_holes_bpart_has_no_neighbours(bp, cosmo); + } + + } else { + error( + "Fundamental problem with the smoothing length iteration " + "logic."); + } + } + + /* We now have a particle whose smoothing length has converged */ + + black_holes_reset_feedback(bp); + + /* Check if h_max has increased */ + h_max = max(h_max, bp->h); + } + + /* We now need to treat the particles whose smoothing length had not + * converged again */ + + /* Re-set the counter for the next loop (potentially). */ + bcount = redo; + if (bcount > 0) { + + /* Climb up the cell hierarchy. */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + + /* Run through this cell's density interactions. */ + for (struct link *l = finger->black_holes.density; l != NULL; + l = l->next) { + +#ifdef SWIFT_DEBUG_CHECKS + if (l->t->ti_run < r->e->ti_current) + error("Density task should have been run."); +#endif + + /* Self-interaction? */ + if (l->t->type == task_type_self) + runner_doself_subset_branch_bh_density(r, finger, bparts, sid, + bcount); + + /* Otherwise, pair interaction? */ + else if (l->t->type == task_type_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dopair_subset_branch_bh_density(r, finger, bparts, sid, + bcount, l->t->cj); + else + runner_dopair_subset_branch_bh_density(r, finger, bparts, sid, + bcount, l->t->ci); + } + + /* Otherwise, sub-self interaction? */ + else if (l->t->type == task_type_sub_self) + runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, + NULL, 1); + + /* Otherwise, sub-pair interaction? */ + else if (l->t->type == task_type_sub_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, + l->t->cj, 1); + else + runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, + l->t->ci, 1); + } + } + } + } + } + + if (bcount) { + error("Smoothing length failed to converge on %i particles.", bcount); + } + + /* Be clean */ + free(left); + free(right); + free(sid); + free(h_0); + } + + /* Update h_max */ + c->black_holes.h_max = h_max; + + /* The ghost may not always be at the top level. + * Therefore we need to update h_max between the super- and top-levels */ + if (c->black_holes.density_ghost) { + for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { + atomic_max_d(&tmp->black_holes.h_max, h_max); + } + } + + if (timer) TIMER_TOC(timer_do_black_holes_ghost); +} + +/** + * @brief Intermediate task after the BHs have done their swallowing step. + * This is used to update the BH quantities if necessary. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c, + int timer) { + + struct bpart *restrict bparts = c->black_holes.parts; + const int count = c->black_holes.count; + const struct engine *e = r->e; + const int with_cosmology = e->policy & engine_policy_cosmology; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + runner_do_black_holes_swallow_ghost(r, c->progeny[k], 0); + } else { + + /* Loop over the parts in this cell. */ + for (int i = 0; i < count; i++) { + + /* Get a direct pointer on the part. */ + struct bpart *bp = &bparts[i]; + + if (bpart_is_active(bp, e)) { + + /* Compute the final operations for repositioning of this BH */ + black_holes_end_reposition(bp, e->black_holes_properties, + e->physical_constants, e->cosmology); + + /* Get particle time-step */ + double dt; + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(bp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(e->ti_current - 1, bp->time_bin); + + dt = cosmology_get_delta_time(e->cosmology, ti_begin, + ti_begin + ti_step); + } else { + dt = get_timestep(bp->time_bin, e->time_base); + } + + /* Compute variables required for the feedback loop */ + black_holes_prepare_feedback(bp, e->black_holes_properties, + e->physical_constants, e->cosmology, dt); + } + } + } + + if (timer) TIMER_TOC(timer_do_black_holes_ghost); +} + +/** + * @brief Intermediate task after the gradient loop that does final operations + * on the gradient quantities and optionally slope limits the gradients + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) { + +#ifdef EXTRA_HYDRO_LOOP + + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const int count = c->hydro.count; + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const double time_base = e->time_base; + const struct cosmology *cosmo = e->cosmology; + const struct hydro_props *hydro_props = e->hydro_properties; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_extra_ghost(r, c->progeny[k], 0); + } else { + + /* Loop over the parts in this cell. */ + for (int i = 0; i < count; i++) { + + /* Get a direct pointer on the part. */ + struct part *restrict p = &parts[i]; + struct xpart *restrict xp = &xparts[i]; + + if (part_is_active(p, e)) { + + /* Finish the gradient calculation */ + hydro_end_gradient(p); + + /* As of here, particle force variables will be set. */ + + /* Calculate the time-step for passing to hydro_prepare_force. + * This is the physical time between the start and end of the time-step + * without any scale-factor powers. */ + double dt_alpha; + + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_alpha = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); + } else { + dt_alpha = get_timestep(p->time_bin, time_base); + } + + /* Compute variables required for the force loop */ + hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); + + /* The particle force values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the force loop over neighbours */ + hydro_reset_acceleration(p); + } + } + } + + if (timer) TIMER_TOC(timer_do_extra_ghost); + +#else + error("SWIFT was not compiled with the extra hydro loop activated."); +#endif +} + +/** + * @brief Intermediate task after the density to check that the smoothing + * lengths are correct. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_ghost(struct runner *r, struct cell *c, int timer) { + + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const struct engine *e = r->e; + const struct space *s = e->s; + const struct hydro_space *hs = &s->hs; + const struct cosmology *cosmo = e->cosmology; + const struct chemistry_global_data *chemistry = e->chemistry; + + const int with_cosmology = (e->policy & engine_policy_cosmology); + + const float hydro_h_max = e->hydro_properties->h_max; + const float hydro_h_min = e->hydro_properties->h_min; + const float eps = e->hydro_properties->h_tolerance; + const float hydro_eta_dim = + pow_dimension(e->hydro_properties->eta_neighbours); + const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations; + int redo = 0, count = 0; + + /* Running value of the maximal smoothing length */ + double h_max = c->hydro.h_max; + + TIMER_TIC; + + /* Anything to do here? */ + if (c->hydro.count == 0) return; + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_ghost(r, c->progeny[k], 0); + + /* Update h_max */ + h_max = max(h_max, c->progeny[k]->hydro.h_max); + } + } + } else { + + /* Init the list of active particles that have to be updated and their + * current smoothing lengths. */ + int *pid = NULL; + float *h_0 = NULL; + float *left = NULL; + float *right = NULL; + if ((pid = (int *)malloc(sizeof(int) * c->hydro.count)) == NULL) + error("Can't allocate memory for pid."); + if ((h_0 = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) + error("Can't allocate memory for h_0."); + if ((left = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) + error("Can't allocate memory for left."); + if ((right = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) + error("Can't allocate memory for right."); + for (int k = 0; k < c->hydro.count; k++) + if (part_is_active(&parts[k], e)) { + pid[count] = k; + h_0[count] = parts[k].h; + left[count] = 0.f; + right[count] = hydro_h_max; + ++count; + } + + /* While there are particles that need to be updated... */ + for (int num_reruns = 0; count > 0 && num_reruns < max_smoothing_iter; + num_reruns++) { + + /* Reset the redo-count. */ + redo = 0; + + /* Loop over the remaining active parts in this cell. */ + for (int i = 0; i < count; i++) { + + /* Get a direct pointer on the part. */ + struct part *p = &parts[pid[i]]; + struct xpart *xp = &xparts[pid[i]]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Is this part within the timestep? */ + if (!part_is_active(p, e)) error("Ghost applied to inactive particle"); +#endif + + /* Get some useful values */ + const float h_init = h_0[i]; + const float h_old = p->h; + const float h_old_dim = pow_dimension(h_old); + const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); + + float h_new; + int has_no_neighbours = 0; + + if (p->density.wcount == 0.f) { /* No neighbours case */ + + /* Flag that there were no neighbours */ + has_no_neighbours = 1; + + /* Double h and try again */ + h_new = 2.f * h_old; + + } else { + + /* Finish the density calculation */ + hydro_end_density(p, cosmo); + chemistry_end_density(p, chemistry, cosmo); + pressure_floor_end_density(p, cosmo); + + /* Compute one step of the Newton-Raphson scheme */ + const float n_sum = p->density.wcount * h_old_dim; + const float n_target = hydro_eta_dim; + const float f = n_sum - n_target; + const float f_prime = + p->density.wcount_dh * h_old_dim + + hydro_dimension * p->density.wcount * h_old_dim_minus_one; + + /* Improve the bisection bounds */ + if (n_sum < n_target) + left[i] = max(left[i], h_old); + else if (n_sum > n_target) + right[i] = min(right[i], h_old); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check the validity of the left and right bounds */ + if (left[i] > right[i]) + error("Invalid left (%e) and right (%e)", left[i], right[i]); +#endif + + /* Skip if h is already h_max and we don't have enough neighbours */ + /* Same if we are below h_min */ + if (((p->h >= hydro_h_max) && (f < 0.f)) || + ((p->h <= hydro_h_min) && (f > 0.f))) { + + /* We have a particle whose smoothing length is already set (wants + * to be larger but has already hit the maximum OR wants to be + * smaller but has already reached the minimum). So, just tidy up + * as if the smoothing length had converged correctly */ + +#ifdef EXTRA_HYDRO_LOOP + + /* As of here, particle gradient variables will be set. */ + /* The force variables are set in the extra ghost. */ + + /* Compute variables required for the gradient loop */ + hydro_prepare_gradient(p, xp, cosmo); + + /* The particle gradient values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the gradient loop over neighbours + */ + hydro_reset_gradient(p); + +#else + const struct hydro_props *hydro_props = e->hydro_properties; + + /* Calculate the time-step for passing to hydro_prepare_force, used + * for the evolution of alpha factors (i.e. those involved in the + * artificial viscosity and thermal conduction terms) */ + const double time_base = e->time_base; + const integertime_t ti_current = e->ti_current; + double dt_alpha; + + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_alpha = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); + } else { + dt_alpha = get_timestep(p->time_bin, time_base); + } + + /* As of here, particle force variables will be set. */ + + /* Compute variables required for the force loop */ + hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); + + /* The particle force values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the force loop over neighbours */ + hydro_reset_acceleration(p); + +#endif /* EXTRA_HYDRO_LOOP */ + + /* Ok, we are done with this particle */ + continue; + } + + /* Normal case: Use Newton-Raphson to get a better value of h */ + + /* Avoid floating point exception from f_prime = 0 */ + h_new = h_old - f / (f_prime + FLT_MIN); + + /* Be verbose about the particles that struggle to converge */ + if (num_reruns > max_smoothing_iter - 10) { + + message( + "Smoothing length convergence problem: iter=%d p->id=%lld " + "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " + "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", + num_reruns, p->id, h_init, h_old, h_new, f, f_prime, n_sum, + n_target, left[i], right[i]); + } + +#ifdef SWIFT_DEBUG_CHECKS + if ((f > 0.f && h_new > h_old) || (f < 0.f && h_new < h_old)) + error( + "Smoothing length correction not going in the right direction"); +#endif + + /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ + h_new = min(h_new, 2.f * h_old); + h_new = max(h_new, 0.5f * h_old); + + /* Verify that we are actually progrssing towards the answer */ + h_new = max(h_new, left[i]); + h_new = min(h_new, right[i]); + } + + /* Check whether the particle has an inappropriate smoothing length + */ + if (fabsf(h_new - h_old) > eps * h_old) { + + /* Ok, correct then */ + + /* Case where we have been oscillating around the solution */ + if ((h_new == left[i] && h_old == right[i]) || + (h_old == left[i] && h_new == right[i])) { + + /* Bissect the remaining interval */ + p->h = pow_inv_dimension( + 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); + + } else { + + /* Normal case */ + p->h = h_new; + } + + /* If within the allowed range, try again */ + if (p->h < hydro_h_max && p->h > hydro_h_min) { + + /* Flag for another round of fun */ + pid[redo] = pid[i]; + h_0[redo] = h_0[i]; + left[redo] = left[i]; + right[redo] = right[i]; + redo += 1; + + /* Re-initialise everything */ + hydro_init_part(p, hs); + chemistry_init_part(p, chemistry); + pressure_floor_init_part(p, xp); + tracers_after_init(p, xp, e->internal_units, e->physical_constants, + with_cosmology, e->cosmology, + e->hydro_properties, e->cooling_func, e->time); + + /* Off we go ! */ + continue; + + } else if (p->h <= hydro_h_min) { + + /* Ok, this particle is a lost cause... */ + p->h = hydro_h_min; + + } else if (p->h >= hydro_h_max) { + + /* Ok, this particle is a lost cause... */ + p->h = hydro_h_max; + + /* Do some damage control if no neighbours at all were found */ + if (has_no_neighbours) { + hydro_part_has_no_neighbours(p, xp, cosmo); + chemistry_part_has_no_neighbours(p, xp, chemistry, cosmo); + pressure_floor_part_has_no_neighbours(p, xp, cosmo); + } + + } else { + error( + "Fundamental problem with the smoothing length iteration " + "logic."); + } + } + + /* We now have a particle whose smoothing length has converged */ + + /* Check if h_max is increased */ + h_max = max(h_max, p->h); + +#ifdef EXTRA_HYDRO_LOOP + + /* As of here, particle gradient variables will be set. */ + /* The force variables are set in the extra ghost. */ + + /* Compute variables required for the gradient loop */ + hydro_prepare_gradient(p, xp, cosmo); + + /* The particle gradient values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the gradient loop over neighbours */ + hydro_reset_gradient(p); + +#else + const struct hydro_props *hydro_props = e->hydro_properties; + + /* Calculate the time-step for passing to hydro_prepare_force, used + * for the evolution of alpha factors (i.e. those involved in the + * artificial viscosity and thermal conduction terms) */ + const double time_base = e->time_base; + const integertime_t ti_current = e->ti_current; + double dt_alpha; + + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_alpha = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); + } else { + dt_alpha = get_timestep(p->time_bin, time_base); + } + + /* As of here, particle force variables will be set. */ + + /* Compute variables required for the force loop */ + hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); + + /* The particle force values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the force loop over neighbours */ + hydro_reset_acceleration(p); + +#endif /* EXTRA_HYDRO_LOOP */ + } + + /* We now need to treat the particles whose smoothing length had not + * converged again */ + + /* Re-set the counter for the next loop (potentially). */ + count = redo; + if (count > 0) { + + /* Climb up the cell hierarchy. */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + + /* Run through this cell's density interactions. */ + for (struct link *l = finger->hydro.density; l != NULL; l = l->next) { + +#ifdef SWIFT_DEBUG_CHECKS + if (l->t->ti_run < r->e->ti_current) + error("Density task should have been run."); +#endif + + /* Self-interaction? */ + if (l->t->type == task_type_self) + runner_doself_subset_branch_density(r, finger, parts, pid, count); + + /* Otherwise, pair interaction? */ + else if (l->t->type == task_type_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dopair_subset_branch_density(r, finger, parts, pid, + count, l->t->cj); + else + runner_dopair_subset_branch_density(r, finger, parts, pid, + count, l->t->ci); + } + + /* Otherwise, sub-self interaction? */ + else if (l->t->type == task_type_sub_self) + runner_dosub_subset_density(r, finger, parts, pid, count, NULL, + 1); + + /* Otherwise, sub-pair interaction? */ + else if (l->t->type == task_type_sub_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dosub_subset_density(r, finger, parts, pid, count, + l->t->cj, 1); + else + runner_dosub_subset_density(r, finger, parts, pid, count, + l->t->ci, 1); + } + } + } + } + } + + if (count) { + error("Smoothing length failed to converge on %i particles.", count); + } + + /* Be clean */ + free(left); + free(right); + free(pid); + free(h_0); + } + + /* Update h_max */ + c->hydro.h_max = h_max; + + /* The ghost may not always be at the top level. + * Therefore we need to update h_max between the super- and top-levels */ + if (c->hydro.ghost) { + for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { + atomic_max_d(&tmp->hydro.h_max, h_max); + } + } + + if (timer) TIMER_TOC(timer_do_ghost); +} diff --git a/src/runner_main.c b/src/runner_main.c new file mode 100644 index 0000000000000000000000000000000000000000..a674b64ae671bf33df0b5ba9eaa951097d738ba9 --- /dev/null +++ b/src/runner_main.c @@ -0,0 +1,495 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "engine.h" +#include "scheduler.h" +#include "space_getsid.h" +#include "timers.h" + +/* Import the gravity loop functions. */ +#include "runner_doiact_grav.h" + +/* Import the density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the gradient loop functions (if required). */ +#ifdef EXTRA_HYDRO_LOOP +#define FUNCTION gradient +#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT +#include "runner_doiact_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP +#endif + +/* Import the force loop functions. */ +#define FUNCTION force +#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE +#include "runner_doiact_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the limiter loop functions. */ +#define FUNCTION limiter +#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER +#include "runner_doiact_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the stars density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_stars.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the stars feedback loop functions. */ +#define FUNCTION feedback +#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK +#include "runner_doiact_stars.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole feedback loop functions. */ +#define FUNCTION swallow +#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW +#include "runner_doiact_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole feedback loop functions. */ +#define FUNCTION feedback +#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK +#include "runner_doiact_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/** + * @brief The #runner main thread routine. + * + * @param data A pointer to this thread's data. + */ +void *runner_main(void *data) { + + struct runner *r = (struct runner *)data; + struct engine *e = r->e; + struct scheduler *sched = &e->sched; + unsigned int seed = r->id; + pthread_setspecific(sched->local_seed_pointer, &seed); + /* Main loop. */ + while (1) { + + /* Wait at the barrier. */ + engine_barrier(e); + + /* Can we go home yet? */ + if (e->step_props & engine_step_prop_done) break; + + /* Re-set the pointer to the previous task, as there is none. */ + struct task *t = NULL; + struct task *prev = NULL; + + /* Loop while there are tasks... */ + while (1) { + + /* If there's no old task, try to get a new one. */ + if (t == NULL) { + + /* Get the task. */ + TIMER_TIC + t = scheduler_gettask(sched, r->qid, prev); + TIMER_TOC(timer_gettask); + + /* Did I get anything? */ + if (t == NULL) break; + } + + /* Get the cells. */ + struct cell *ci = t->ci; + struct cell *cj = t->cj; + +#ifdef SWIFT_DEBUG_TASKS + /* Mark the thread we run on */ + t->rid = r->cpuid; + + /* And recover the pair direction */ + if (t->type == task_type_pair || t->type == task_type_sub_pair) { + struct cell *ci_temp = ci; + struct cell *cj_temp = cj; + double shift[3]; + t->sid = space_getsid(e->s, &ci_temp, &cj_temp, shift); + } else { + t->sid = -1; + } +#endif + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we haven't scheduled an inactive task */ + t->ti_run = e->ti_current; + /* Store the task that will be running (for debugging only) */ + r->t = t; +#endif + + /* Different types of tasks... */ + switch (t->type) { + case task_type_self: + if (t->subtype == task_subtype_density) + runner_doself1_branch_density(r, ci); +#ifdef EXTRA_HYDRO_LOOP + else if (t->subtype == task_subtype_gradient) + runner_doself1_branch_gradient(r, ci); +#endif + else if (t->subtype == task_subtype_force) + runner_doself2_branch_force(r, ci); + else if (t->subtype == task_subtype_limiter) + runner_doself2_branch_limiter(r, ci); + else if (t->subtype == task_subtype_grav) + runner_doself_recursive_grav(r, ci, 1); + else if (t->subtype == task_subtype_external_grav) + runner_do_grav_external(r, ci, 1); + else if (t->subtype == task_subtype_stars_density) + runner_doself_branch_stars_density(r, ci); + else if (t->subtype == task_subtype_stars_feedback) + runner_doself_branch_stars_feedback(r, ci); + else if (t->subtype == task_subtype_bh_density) + runner_doself_branch_bh_density(r, ci); + else if (t->subtype == task_subtype_bh_swallow) + runner_doself_branch_bh_swallow(r, ci); + else if (t->subtype == task_subtype_do_gas_swallow) + runner_do_gas_swallow_self(r, ci, 1); + else if (t->subtype == task_subtype_do_bh_swallow) + runner_do_bh_swallow_self(r, ci, 1); + else if (t->subtype == task_subtype_bh_feedback) + runner_doself_branch_bh_feedback(r, ci); + else + error("Unknown/invalid task subtype (%s).", + subtaskID_names[t->subtype]); + break; + + case task_type_pair: + if (t->subtype == task_subtype_density) + runner_dopair1_branch_density(r, ci, cj); +#ifdef EXTRA_HYDRO_LOOP + else if (t->subtype == task_subtype_gradient) + runner_dopair1_branch_gradient(r, ci, cj); +#endif + else if (t->subtype == task_subtype_force) + runner_dopair2_branch_force(r, ci, cj); + else if (t->subtype == task_subtype_limiter) + runner_dopair2_branch_limiter(r, ci, cj); + else if (t->subtype == task_subtype_grav) + runner_dopair_recursive_grav(r, ci, cj, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dopair_branch_stars_density(r, ci, cj); + else if (t->subtype == task_subtype_stars_feedback) + runner_dopair_branch_stars_feedback(r, ci, cj); + else if (t->subtype == task_subtype_bh_density) + runner_dopair_branch_bh_density(r, ci, cj); + else if (t->subtype == task_subtype_bh_swallow) + runner_dopair_branch_bh_swallow(r, ci, cj); + else if (t->subtype == task_subtype_do_gas_swallow) + runner_do_gas_swallow_pair(r, ci, cj, 1); + else if (t->subtype == task_subtype_do_bh_swallow) + runner_do_bh_swallow_pair(r, ci, cj, 1); + else if (t->subtype == task_subtype_bh_feedback) + runner_dopair_branch_bh_feedback(r, ci, cj); + else + error("Unknown/invalid task subtype (%s/%s).", + taskID_names[t->type], subtaskID_names[t->subtype]); + break; + + case task_type_sub_self: + if (t->subtype == task_subtype_density) + runner_dosub_self1_density(r, ci, 1); +#ifdef EXTRA_HYDRO_LOOP + else if (t->subtype == task_subtype_gradient) + runner_dosub_self1_gradient(r, ci, 1); +#endif + else if (t->subtype == task_subtype_force) + runner_dosub_self2_force(r, ci, 1); + else if (t->subtype == task_subtype_limiter) + runner_dosub_self2_limiter(r, ci, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dosub_self_stars_density(r, ci, 1); + else if (t->subtype == task_subtype_stars_feedback) + runner_dosub_self_stars_feedback(r, ci, 1); + else if (t->subtype == task_subtype_bh_density) + runner_dosub_self_bh_density(r, ci, 1); + else if (t->subtype == task_subtype_bh_swallow) + runner_dosub_self_bh_swallow(r, ci, 1); + else if (t->subtype == task_subtype_do_gas_swallow) + runner_do_gas_swallow_self(r, ci, 1); + else if (t->subtype == task_subtype_do_bh_swallow) + runner_do_bh_swallow_self(r, ci, 1); + else if (t->subtype == task_subtype_bh_feedback) + runner_dosub_self_bh_feedback(r, ci, 1); + else + error("Unknown/invalid task subtype (%s/%s).", + taskID_names[t->type], subtaskID_names[t->subtype]); + break; + + case task_type_sub_pair: + if (t->subtype == task_subtype_density) + runner_dosub_pair1_density(r, ci, cj, 1); +#ifdef EXTRA_HYDRO_LOOP + else if (t->subtype == task_subtype_gradient) + runner_dosub_pair1_gradient(r, ci, cj, 1); +#endif + else if (t->subtype == task_subtype_force) + runner_dosub_pair2_force(r, ci, cj, 1); + else if (t->subtype == task_subtype_limiter) + runner_dosub_pair2_limiter(r, ci, cj, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dosub_pair_stars_density(r, ci, cj, 1); + else if (t->subtype == task_subtype_stars_feedback) + runner_dosub_pair_stars_feedback(r, ci, cj, 1); + else if (t->subtype == task_subtype_bh_density) + runner_dosub_pair_bh_density(r, ci, cj, 1); + else if (t->subtype == task_subtype_bh_swallow) + runner_dosub_pair_bh_swallow(r, ci, cj, 1); + else if (t->subtype == task_subtype_do_gas_swallow) + runner_do_gas_swallow_pair(r, ci, cj, 1); + else if (t->subtype == task_subtype_do_bh_swallow) + runner_do_bh_swallow_pair(r, ci, cj, 1); + else if (t->subtype == task_subtype_bh_feedback) + runner_dosub_pair_bh_feedback(r, ci, cj, 1); + else + error("Unknown/invalid task subtype (%s/%s).", + taskID_names[t->type], subtaskID_names[t->subtype]); + break; + + case task_type_sort: + /* Cleanup only if any of the indices went stale. */ + runner_do_hydro_sort( + r, ci, t->flags, + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, 1); + /* Reset the sort flags as our work here is done. */ + t->flags = 0; + break; + case task_type_stars_sort: + /* Cleanup only if any of the indices went stale. */ + runner_do_stars_sort( + r, ci, t->flags, + ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin, 1); + /* Reset the sort flags as our work here is done. */ + t->flags = 0; + break; + case task_type_init_grav: + runner_do_init_grav(r, ci, 1); + break; + case task_type_ghost: + runner_do_ghost(r, ci, 1); + break; +#ifdef EXTRA_HYDRO_LOOP + case task_type_extra_ghost: + runner_do_extra_ghost(r, ci, 1); + break; +#endif + case task_type_stars_ghost: + runner_do_stars_ghost(r, ci, 1); + break; + case task_type_bh_density_ghost: + runner_do_black_holes_density_ghost(r, ci, 1); + break; + case task_type_bh_swallow_ghost3: + runner_do_black_holes_swallow_ghost(r, ci, 1); + break; + case task_type_drift_part: + runner_do_drift_part(r, ci, 1); + break; + case task_type_drift_spart: + runner_do_drift_spart(r, ci, 1); + break; + case task_type_drift_bpart: + runner_do_drift_bpart(r, ci, 1); + break; + case task_type_drift_gpart: + runner_do_drift_gpart(r, ci, 1); + break; + case task_type_kick1: + runner_do_kick1(r, ci, 1); + break; + case task_type_kick2: + runner_do_kick2(r, ci, 1); + break; + case task_type_end_hydro_force: + runner_do_end_hydro_force(r, ci, 1); + break; + case task_type_end_grav_force: + runner_do_end_grav_force(r, ci, 1); + break; + case task_type_logger: + runner_do_logger(r, ci, 1); + break; + case task_type_timestep: + runner_do_timestep(r, ci, 1); + break; + case task_type_timestep_limiter: + runner_do_limiter(r, ci, 0, 1); + break; +#ifdef WITH_MPI + case task_type_send: + if (t->subtype == task_subtype_tend_part) { + free(t->buff); + } else if (t->subtype == task_subtype_tend_gpart) { + free(t->buff); + } else if (t->subtype == task_subtype_tend_spart) { + free(t->buff); + } else if (t->subtype == task_subtype_tend_bpart) { + free(t->buff); + } else if (t->subtype == task_subtype_sf_counts) { + free(t->buff); + } else if (t->subtype == task_subtype_part_swallow) { + free(t->buff); + } else if (t->subtype == task_subtype_bpart_merger) { + free(t->buff); + } + break; + case task_type_recv: + if (t->subtype == task_subtype_tend_part) { + cell_unpack_end_step_hydro(ci, (struct pcell_step_hydro *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_tend_gpart) { + cell_unpack_end_step_grav(ci, (struct pcell_step_grav *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_tend_spart) { + cell_unpack_end_step_stars(ci, (struct pcell_step_stars *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_tend_bpart) { + cell_unpack_end_step_black_holes( + ci, (struct pcell_step_black_holes *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_sf_counts) { + cell_unpack_sf_counts(ci, (struct pcell_sf *)t->buff); + cell_clear_stars_sort_flags(ci, /*clear_unused_flags=*/0); + free(t->buff); + } else if (t->subtype == task_subtype_xv) { + runner_do_recv_part(r, ci, 1, 1); + } else if (t->subtype == task_subtype_rho) { + runner_do_recv_part(r, ci, 0, 1); + } else if (t->subtype == task_subtype_gradient) { + runner_do_recv_part(r, ci, 0, 1); + } else if (t->subtype == task_subtype_part_swallow) { + cell_unpack_part_swallow(ci, + (struct black_holes_part_data *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_bpart_merger) { + cell_unpack_bpart_swallow(ci, + (struct black_holes_bpart_data *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_limiter) { + runner_do_recv_part(r, ci, 0, 1); + } else if (t->subtype == task_subtype_gpart) { + runner_do_recv_gpart(r, ci, 1); + } else if (t->subtype == task_subtype_spart) { + runner_do_recv_spart(r, ci, 1, 1); + } else if (t->subtype == task_subtype_bpart_rho) { + runner_do_recv_bpart(r, ci, 1, 1); + } else if (t->subtype == task_subtype_bpart_swallow) { + runner_do_recv_bpart(r, ci, 0, 1); + } else if (t->subtype == task_subtype_bpart_feedback) { + runner_do_recv_bpart(r, ci, 0, 1); + } else if (t->subtype == task_subtype_multipole) { + cell_unpack_multipoles(ci, (struct gravity_tensors *)t->buff); + free(t->buff); + } else { + error("Unknown/invalid task subtype (%d).", t->subtype); + } + break; +#endif + case task_type_grav_down: + runner_do_grav_down(r, t->ci, 1); + break; + case task_type_grav_mesh: + runner_do_grav_mesh(r, t->ci, 1); + break; + case task_type_grav_long_range: + runner_do_grav_long_range(r, t->ci, 1); + break; + case task_type_grav_mm: + runner_dopair_grav_mm_progenies(r, t->flags, t->ci, t->cj); + break; + case task_type_cooling: + runner_do_cooling(r, t->ci, 1); + break; + case task_type_star_formation: + runner_do_star_formation(r, t->ci, 1); + break; + case task_type_stars_resort: + runner_do_stars_resort(r, t->ci, 1); + break; + case task_type_fof_self: + runner_do_fof_self(r, t->ci, 1); + break; + case task_type_fof_pair: + runner_do_fof_pair(r, t->ci, t->cj, 1); + break; + default: + error("Unknown/invalid task type (%d).", t->type); + } + +/* Mark that we have run this task on these cells */ +#ifdef SWIFT_DEBUG_CHECKS + if (ci != NULL) { + ci->tasks_executed[t->type]++; + ci->subtasks_executed[t->subtype]++; + } + if (cj != NULL) { + cj->tasks_executed[t->type]++; + cj->subtasks_executed[t->subtype]++; + } + + /* This runner is not doing a task anymore */ + r->t = NULL; +#endif + + /* We're done with this task, see if we get a next one. */ + prev = t; + t = scheduler_done(sched, t); + + } /* main loop. */ + } + + /* Be kind, rewind. */ + return NULL; +} diff --git a/src/runner_others.c b/src/runner_others.c new file mode 100644 index 0000000000000000000000000000000000000000..5ffaf7aa321f658b6e0e7e10a9cb8ad2f4a5a541 --- /dev/null +++ b/src/runner_others.c @@ -0,0 +1,660 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <float.h> +#include <limits.h> +#include <stdlib.h> + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "cell.h" +#include "chemistry.h" +#include "cooling.h" +#include "engine.h" +#include "error.h" +#include "gravity.h" +#include "hydro.h" +#include "logger.h" +#include "pressure_floor.h" +#include "space.h" +#include "star_formation.h" +#include "star_formation_logger.h" +#include "stars.h" +#include "timers.h" +#include "tracers.h" + +/** + * @brief Calculate gravity acceleration from external potential + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { + + struct gpart *restrict gparts = c->grav.parts; + const int gcount = c->grav.count; + const struct engine *e = r->e; + const struct external_potential *potential = e->external_potential; + const struct phys_const *constants = e->physical_constants; + const double time = r->e->time; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0); + } else { + + /* Loop over the gparts in this cell. */ + for (int i = 0; i < gcount; i++) { + + /* Get a direct pointer on the part. */ + struct gpart *restrict gp = &gparts[i]; + + /* Is this part within the time step? */ + if (gpart_is_active(gp, e)) { + external_gravity_acceleration(time, potential, constants, gp); + } + } + } + + if (timer) TIMER_TOC(timer_dograv_external); +} + +/** + * @brief Calculate gravity accelerations from the periodic mesh + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) { + + struct gpart *restrict gparts = c->grav.parts; + const int gcount = c->grav.count; + const struct engine *e = r->e; + +#ifdef SWIFT_DEBUG_CHECKS + if (!e->s->periodic) error("Calling mesh forces in non-periodic mode."); +#endif + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_grav_mesh(r, c->progeny[k], 0); + } else { + + /* Get the forces from the gravity mesh */ + pm_mesh_interpolate_forces(e->mesh, e, gparts, gcount); + } + + if (timer) TIMER_TOC(timer_dograv_mesh); +} + +/** + * @brief Calculate change in thermal state of particles induced + * by radiative cooling and heating. + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_do_cooling(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const struct cooling_function_data *cooling_func = e->cooling_func; + const struct phys_const *constants = e->physical_constants; + const struct unit_system *us = e->internal_units; + const struct hydro_props *hydro_props = e->hydro_properties; + const struct entropy_floor_properties *entropy_floor_props = e->entropy_floor; + const double time_base = e->time_base; + const integertime_t ti_current = e->ti_current; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const int count = c->hydro.count; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_cooling(r, c->progeny[k], 0); + } else { + + /* Loop over the parts in this cell. */ + for (int i = 0; i < count; i++) { + + /* Get a direct pointer on the part. */ + struct part *restrict p = &parts[i]; + struct xpart *restrict xp = &xparts[i]; + + if (part_is_active(p, e)) { + + double dt_cool, dt_therm; + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_cool = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); + dt_therm = cosmology_get_therm_kick_factor(e->cosmology, ti_begin, + ti_begin + ti_step); + + } else { + dt_cool = get_timestep(p->time_bin, time_base); + dt_therm = get_timestep(p->time_bin, time_base); + } + + /* Let's cool ! */ + cooling_cool_part(constants, us, cosmo, hydro_props, + entropy_floor_props, cooling_func, p, xp, dt_cool, + dt_therm); + } + } + } + + if (timer) TIMER_TOC(timer_do_cooling); +} + +/** + * + */ +void runner_do_star_formation(struct runner *r, struct cell *c, int timer) { + + struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const struct star_formation *sf_props = e->star_formation; + const struct phys_const *phys_const = e->physical_constants; + const int count = c->hydro.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const int with_feedback = (e->policy & engine_policy_feedback); + const struct hydro_props *restrict hydro_props = e->hydro_properties; + const struct unit_system *restrict us = e->internal_units; + struct cooling_function_data *restrict cooling = e->cooling_func; + const struct entropy_floor_properties *entropy_floor = e->entropy_floor; + const double time_base = e->time_base; + const integertime_t ti_current = e->ti_current; + const int current_stars_count = c->stars.count; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != e->nodeID) + error("Running star formation task on a foreign node!"); +#endif + + /* Anything to do here? */ + if (c->hydro.count == 0 || !cell_is_active_hydro(c, e)) { + star_formation_logger_log_inactive_cell(&c->stars.sfh); + return; + } + + /* Reset the SFR */ + star_formation_logger_init(&c->stars.sfh); + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + /* Load the child cell */ + struct cell *restrict cp = c->progeny[k]; + + /* Do the recursion */ + runner_do_star_formation(r, cp, 0); + + /* Update current cell using child cells */ + star_formation_logger_add(&c->stars.sfh, &cp->stars.sfh); + } + } else { + + /* Loop over the gas particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* Only work on active particles */ + if (part_is_active(p, e)) { + + /* Is this particle star forming? */ + if (star_formation_is_star_forming(p, xp, sf_props, phys_const, cosmo, + hydro_props, us, cooling, + entropy_floor)) { + + /* Time-step size for this particle */ + double dt_star; + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_star = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); + + } else { + dt_star = get_timestep(p->time_bin, time_base); + } + + /* Compute the SF rate of the particle */ + star_formation_compute_SFR(p, xp, sf_props, phys_const, cosmo, + dt_star); + + /* Add the SFR and SFR*dt to the SFH struct of this cell */ + star_formation_logger_log_active_part(p, xp, &c->stars.sfh, dt_star); + + /* Are we forming a star particle from this SF rate? */ + if (star_formation_should_convert_to_star(p, xp, sf_props, e, + dt_star)) { + + /* Convert the gas particle to a star particle */ + struct spart *sp = cell_convert_part_to_spart(e, c, p, xp); + + /* Did we get a star? (Or did we run out of spare ones?) */ + if (sp != NULL) { + + /* message("We formed a star id=%lld cellID=%d", sp->id, + * c->cellID); */ + + /* Copy the properties of the gas particle to the star particle */ + star_formation_copy_properties(p, xp, sp, e, sf_props, cosmo, + with_cosmology, phys_const, + hydro_props, us, cooling); + + /* Update the Star formation history */ + star_formation_logger_log_new_spart(sp, &c->stars.sfh); + } + } + + } else { /* Are we not star-forming? */ + + /* Update the particle to flag it as not star-forming */ + star_formation_update_part_not_SFR(p, xp, e, sf_props, + with_cosmology); + + } /* Not Star-forming? */ + + } else { /* is active? */ + + /* Check if the particle is not inhibited */ + if (!part_is_inhibited(p, e)) { + star_formation_logger_log_inactive_part(p, xp, &c->stars.sfh); + } + } + } /* Loop over particles */ + } + + /* If we formed any stars, the star sorts are now invalid. We need to + * re-compute them. */ + if (with_feedback && (c == c->top) && + (current_stars_count != c->stars.count)) { + cell_set_star_resort_flag(c); + } + + if (timer) TIMER_TOC(timer_do_star_formation); +} + +/** + * @brief End the hydro force calculation of all active particles in a cell + * by multiplying the acccelerations by the relevant constants + * + * @param r The #runner thread. + * @param c The #cell. + * @param timer Are we timing this ? + */ +void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_end_hydro_force(r, c->progeny[k], 0); + } else { + + const struct cosmology *cosmo = e->cosmology; + const int count = c->hydro.count; + struct part *restrict parts = c->hydro.parts; + + /* Loop over the gas particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + + if (part_is_active(p, e)) { + + /* Finish the force loop */ + hydro_end_force(p, cosmo); + chemistry_end_force(p, cosmo); + +#ifdef SWIFT_BOUNDARY_PARTICLES + + /* Get the ID of the part */ + const long long id = p->id; + + /* Cancel hdyro forces of these particles */ + if (id < SWIFT_BOUNDARY_PARTICLES) { + + /* Don't move ! */ + hydro_reset_acceleration(p); + +#if defined(GIZMO_MFV_SPH) || defined(GIZMO_MFM_SPH) + + /* Some values need to be reset in the Gizmo case. */ + hydro_prepare_force(p, &c->hydro.xparts[k], cosmo, + e->hydro_properties, 0); +#endif + } +#endif + } + } + } + + if (timer) TIMER_TOC(timer_end_hydro_force); +} + +/** + * @brief End the gravity force calculation of all active particles in a cell + * by multiplying the acccelerations by the relevant constants + * + * @param r The #runner thread. + * @param c The #cell. + * @param timer Are we timing this ? + */ +void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_end_grav_force(r, c->progeny[k], 0); + } else { + + const struct space *s = e->s; + const int periodic = s->periodic; + const float G_newton = e->physical_constants->const_newton_G; + + /* Potential normalisation in the case of periodic gravity */ + float potential_normalisation = 0.; + if (periodic && (e->policy & engine_policy_self_gravity)) { + const double volume = s->dim[0] * s->dim[1] * s->dim[2]; + const double r_s = e->mesh->r_s; + potential_normalisation = 4. * M_PI * e->total_mass * r_s * r_s / volume; + } + + const int gcount = c->grav.count; + struct gpart *restrict gparts = c->grav.parts; + + /* Loop over the g-particles in this cell. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the gpart. */ + struct gpart *restrict gp = &gparts[k]; + + if (gpart_is_active(gp, e)) { + + /* Finish the force calculation */ + gravity_end_force(gp, G_newton, potential_normalisation, periodic); + +#ifdef SWIFT_MAKE_GRAVITY_GLASS + + /* Negate the gravity forces */ + gp->a_grav[0] *= -1.f; + gp->a_grav[1] *= -1.f; + gp->a_grav[2] *= -1.f; +#endif + +#ifdef SWIFT_NO_GRAVITY_BELOW_ID + + /* Get the ID of the gpart */ + long long id = 0; + if (gp->type == swift_type_gas) + id = e->s->parts[-gp->id_or_neg_offset].id; + else if (gp->type == swift_type_stars) + id = e->s->sparts[-gp->id_or_neg_offset].id; + else if (gp->type == swift_type_black_hole) + error("Unexisting type"); + else + id = gp->id_or_neg_offset; + + /* Cancel gravity forces of these particles */ + if (id < SWIFT_NO_GRAVITY_BELOW_ID) { + + /* Don't move ! */ + gp->a_grav[0] = 0.f; + gp->a_grav[1] = 0.f; + gp->a_grav[2] = 0.f; + } +#endif + +#ifdef SWIFT_DEBUG_CHECKS + if ((e->policy & engine_policy_self_gravity) && + !(e->policy & engine_policy_black_holes)) { + + /* Let's add a self interaction to simplify the count */ + gp->num_interacted++; + + /* Check that this gpart has interacted with all the other + * particles (via direct or multipoles) in the box */ + if (gp->num_interacted != + e->total_nr_gparts - e->count_inhibited_gparts) { + + /* Get the ID of the gpart */ + long long my_id = 0; + if (gp->type == swift_type_gas) + my_id = e->s->parts[-gp->id_or_neg_offset].id; + else if (gp->type == swift_type_stars) + my_id = e->s->sparts[-gp->id_or_neg_offset].id; + else if (gp->type == swift_type_black_hole) + error("Unexisting type"); + else + my_id = gp->id_or_neg_offset; + + error( + "g-particle (id=%lld, type=%s) did not interact " + "gravitationally with all other gparts " + "gp->num_interacted=%lld, total_gparts=%lld (local " + "num_gparts=%zd inhibited_gparts=%lld)", + my_id, part_type_names[gp->type], gp->num_interacted, + e->total_nr_gparts, e->s->nr_gparts, e->count_inhibited_gparts); + } + } +#endif + } + } + } + if (timer) TIMER_TOC(timer_end_grav_force); +} + +/** + * @brief Write the required particles through the logger. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_logger(struct runner *r, struct cell *c, int timer) { + +#ifdef WITH_LOGGER + TIMER_TIC; + + const struct engine *e = r->e; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const int count = c->hydro.count; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return; + + /* Recurse? Avoid spending too much time in useless cells. */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_logger(r, c->progeny[k], 0); + } else { + + /* Loop over the parts in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* If particle needs to be log */ + /* This is the same function than part_is_active, except for + * debugging checks */ + if (part_is_active(p, e)) { + + if (logger_should_write(&xp->logger_data, e->logger)) { + /* Write particle */ + /* Currently writing everything, should adapt it through time */ + logger_log_part(e->logger, p, + logger_mask_data[logger_x].mask | + logger_mask_data[logger_v].mask | + logger_mask_data[logger_a].mask | + logger_mask_data[logger_u].mask | + logger_mask_data[logger_h].mask | + logger_mask_data[logger_rho].mask | + logger_mask_data[logger_consts].mask, + &xp->logger_data.last_offset); + + /* Set counter back to zero */ + xp->logger_data.steps_since_last_output = 0; + } else + /* Update counter */ + xp->logger_data.steps_since_last_output += 1; + } + } + } + + if (c->grav.count > 0) error("gparts not implemented"); + + if (c->stars.count > 0) error("sparts not implemented"); + + if (timer) TIMER_TOC(timer_logger); + +#else + error("Logger disabled, please enable it during configuration"); +#endif +} + +/** + * @brief Recursively search for FOF groups in a single cell. + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_do_fof_self(struct runner *r, struct cell *c, int timer) { + +#ifdef WITH_FOF + + TIMER_TIC; + + const struct engine *e = r->e; + struct space *s = e->s; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int periodic = s->periodic; + const struct gpart *const gparts = s->gparts; + const double search_r2 = e->fof_properties->l_x2; + + rec_fof_search_self(e->fof_properties, dim, search_r2, periodic, gparts, c); + + if (timer) TIMER_TOC(timer_fof_self); + +#else + error("SWIFT was not compiled with FOF enabled!"); +#endif +} + +/** + * @brief Recursively search for FOF groups between a pair of cells. + * + * @param r runner task + * @param ci cell i + * @param cj cell j + * @param timer 1 if the time is to be recorded. + */ +void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj, + int timer) { + +#ifdef WITH_FOF + + TIMER_TIC; + + const struct engine *e = r->e; + struct space *s = e->s; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int periodic = s->periodic; + const struct gpart *const gparts = s->gparts; + const double search_r2 = e->fof_properties->l_x2; + + rec_fof_search_pair(e->fof_properties, dim, search_r2, periodic, gparts, ci, + cj); + + if (timer) TIMER_TOC(timer_fof_pair); +#else + error("SWIFT was not compiled with FOF enabled!"); +#endif +} diff --git a/src/runner_recv.c b/src/runner_recv.c new file mode 100644 index 0000000000000000000000000000000000000000..803e68c2106933684109e798e24952a0dbdfea6e --- /dev/null +++ b/src/runner_recv.c @@ -0,0 +1,368 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "engine.h" +#include "timers.h" + +/** + * @brief Construct the cell properties from the received #part. + * + * @param r The runner thread. + * @param c The cell. + * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? + * @param timer Are we timing this ? + */ +void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, + int timer) { +#ifdef WITH_MPI + + const struct part *restrict parts = c->hydro.parts; + const size_t nr_parts = c->hydro.count; + const integertime_t ti_current = r->e->ti_current; + + TIMER_TIC; + + integertime_t ti_hydro_end_min = max_nr_timesteps; + integertime_t ti_hydro_end_max = 0; + timebin_t time_bin_min = num_time_bins; + timebin_t time_bin_max = 0; + float h_max = 0.f; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) error("Updating a local cell!"); +#endif + + /* Clear this cell's sorted mask. */ + if (clear_sorts) c->hydro.sorted = 0; + + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ + for (size_t k = 0; k < nr_parts; k++) { + if (parts[k].time_bin == time_bin_inhibited) continue; + time_bin_min = min(time_bin_min, parts[k].time_bin); + time_bin_max = max(time_bin_max, parts[k].time_bin); + h_max = max(h_max, parts[k].h); + } + + /* Convert into a time */ + ti_hydro_end_min = get_integer_time_end(ti_current, time_bin_min); + ti_hydro_end_max = get_integer_time_end(ti_current, time_bin_max); + } + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { + runner_do_recv_part(r, c->progeny[k], clear_sorts, 0); + ti_hydro_end_min = + min(ti_hydro_end_min, c->progeny[k]->hydro.ti_end_min); + ti_hydro_end_max = + max(ti_hydro_end_max, c->progeny[k]->hydro.ti_end_max); + h_max = max(h_max, c->progeny[k]->hydro.h_max); + } + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_hydro_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_hydro_end_min, ti_current); +#endif + + /* ... and store. */ + // c->hydro.ti_end_min = ti_hydro_end_min; + // c->hydro.ti_end_max = ti_hydro_end_max; + c->hydro.ti_old_part = ti_current; + c->hydro.h_max = h_max; + + if (timer) TIMER_TOC(timer_dorecv_part); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Construct the cell properties from the received #gpart. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { + +#ifdef WITH_MPI + + const struct gpart *restrict gparts = c->grav.parts; + const size_t nr_gparts = c->grav.count; + const integertime_t ti_current = r->e->ti_current; + + TIMER_TIC; + + integertime_t ti_gravity_end_min = max_nr_timesteps; + integertime_t ti_gravity_end_max = 0; + timebin_t time_bin_min = num_time_bins; + timebin_t time_bin_max = 0; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) error("Updating a local cell!"); +#endif + + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ + for (size_t k = 0; k < nr_gparts; k++) { + if (gparts[k].time_bin == time_bin_inhibited) continue; + time_bin_min = min(time_bin_min, gparts[k].time_bin); + time_bin_max = max(time_bin_max, gparts[k].time_bin); + } + + /* Convert into a time */ + ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min); + ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max); + } + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL && c->progeny[k]->grav.count > 0) { + runner_do_recv_gpart(r, c->progeny[k], 0); + ti_gravity_end_min = + min(ti_gravity_end_min, c->progeny[k]->grav.ti_end_min); + ti_gravity_end_max = + max(ti_gravity_end_max, c->progeny[k]->grav.ti_end_max); + } + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_gravity_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_gravity_end_min, ti_current); +#endif + + /* ... and store. */ + // c->grav.ti_end_min = ti_gravity_end_min; + // c->grav.ti_end_max = ti_gravity_end_max; + c->grav.ti_old_part = ti_current; + + if (timer) TIMER_TOC(timer_dorecv_gpart); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Construct the cell properties from the received #spart. + * + * @param r The runner thread. + * @param c The cell. + * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? + * @param timer Are we timing this ? + */ +void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts, + int timer) { + +#ifdef WITH_MPI + + struct spart *restrict sparts = c->stars.parts; + const size_t nr_sparts = c->stars.count; + const integertime_t ti_current = r->e->ti_current; + + TIMER_TIC; + + integertime_t ti_stars_end_min = max_nr_timesteps; + integertime_t ti_stars_end_max = 0; + timebin_t time_bin_min = num_time_bins; + timebin_t time_bin_max = 0; + float h_max = 0.f; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) error("Updating a local cell!"); +#endif + + /* Clear this cell's sorted mask. */ + if (clear_sorts) c->stars.sorted = 0; + + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ + for (size_t k = 0; k < nr_sparts; k++) { +#ifdef DEBUG_INTERACTIONS_STARS + sparts[k].num_ngb_force = 0; +#endif + if (sparts[k].time_bin == time_bin_inhibited) continue; + time_bin_min = min(time_bin_min, sparts[k].time_bin); + time_bin_max = max(time_bin_max, sparts[k].time_bin); + h_max = max(h_max, sparts[k].h); + } + + /* Convert into a time */ + ti_stars_end_min = get_integer_time_end(ti_current, time_bin_min); + ti_stars_end_max = get_integer_time_end(ti_current, time_bin_max); + } + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { + runner_do_recv_spart(r, c->progeny[k], clear_sorts, 0); + ti_stars_end_min = + min(ti_stars_end_min, c->progeny[k]->stars.ti_end_min); + ti_stars_end_max = + max(ti_stars_end_max, c->progeny[k]->stars.ti_end_max); + h_max = max(h_max, c->progeny[k]->stars.h_max); + } + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_stars_end_min < ti_current && + !(r->e->policy & engine_policy_star_formation)) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_stars_end_min, ti_current); +#endif + + /* ... and store. */ + // c->grav.ti_end_min = ti_gravity_end_min; + // c->grav.ti_end_max = ti_gravity_end_max; + c->stars.ti_old_part = ti_current; + c->stars.h_max = h_max; + + if (timer) TIMER_TOC(timer_dorecv_spart); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Construct the cell properties from the received #bpart. + * + * Note that we do not need to clear the sorts since we do not sort + * the black holes. + * + * @param r The runner thread. + * @param c The cell. + * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? + * @param timer Are we timing this ? + */ +void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts, + int timer) { + +#ifdef WITH_MPI + + struct bpart *restrict bparts = c->black_holes.parts; + const size_t nr_bparts = c->black_holes.count; + const integertime_t ti_current = r->e->ti_current; + + TIMER_TIC; + + integertime_t ti_black_holes_end_min = max_nr_timesteps; + integertime_t ti_black_holes_end_max = 0; + timebin_t time_bin_min = num_time_bins; + timebin_t time_bin_max = 0; + float h_max = 0.f; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) error("Updating a local cell!"); +#endif + + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ + for (size_t k = 0; k < nr_bparts; k++) { +#ifdef DEBUG_INTERACTIONS_BLACK_HOLES + bparts[k].num_ngb_force = 0; +#endif + + /* message("Receiving bparts id=%lld time_bin=%d", */ + /* bparts[k].id, bparts[k].time_bin); */ + + if (bparts[k].time_bin == time_bin_inhibited) continue; + time_bin_min = min(time_bin_min, bparts[k].time_bin); + time_bin_max = max(time_bin_max, bparts[k].time_bin); + h_max = max(h_max, bparts[k].h); + } + + /* Convert into a time */ + ti_black_holes_end_min = get_integer_time_end(ti_current, time_bin_min); + ti_black_holes_end_max = get_integer_time_end(ti_current, time_bin_max); + } + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL && c->progeny[k]->black_holes.count > 0) { + runner_do_recv_bpart(r, c->progeny[k], clear_sorts, 0); + ti_black_holes_end_min = + min(ti_black_holes_end_min, c->progeny[k]->black_holes.ti_end_min); + ti_black_holes_end_max = + max(ti_black_holes_end_max, c->progeny[k]->black_holes.ti_end_max); + h_max = max(h_max, c->progeny[k]->black_holes.h_max); + } + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_black_holes_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_black_holes_end_min, ti_current); +#endif + + /* ... and store. */ + // c->grav.ti_end_min = ti_gravity_end_min; + // c->grav.ti_end_max = ti_gravity_end_max; + c->black_holes.ti_old_part = ti_current; + c->black_holes.h_max = h_max; + + if (timer) TIMER_TOC(timer_dorecv_bpart); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} diff --git a/src/runner_sort.c b/src/runner_sort.c new file mode 100644 index 0000000000000000000000000000000000000000..914b64f93b970000885b1b578d762d3f15455332 --- /dev/null +++ b/src/runner_sort.c @@ -0,0 +1,708 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "cell.h" +#include "engine.h" +#include "timers.h" + +/** + * @brief Sorts again all the stars in a given cell hierarchy. + * + * This is intended to be used after the star formation task has been run + * to get the cells back into a state where self/pair star tasks can be run. + * + * @param r The thread #runner. + * @param c The top-level cell to run on. + * @param timer Are we timing this? + */ +void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != r->e->nodeID) error("Task must be run locally!"); +#endif + + TIMER_TIC; + + /* Did we demand a recalculation of the stars'sorts? */ + if (cell_get_flag(c, cell_flag_do_stars_resort)) { + runner_do_all_stars_sort(r, c); + cell_clear_flag(c, cell_flag_do_stars_resort); + } + + if (timer) TIMER_TOC(timer_do_stars_resort); +} + +/** + * @brief Sort the entries in ascending order using QuickSort. + * + * @param sort The entries + * @param N The number of entries. + */ +void runner_do_sort_ascending(struct sort_entry *sort, int N) { + + struct { + short int lo, hi; + } qstack[10]; + int qpos, i, j, lo, hi, imin; + struct sort_entry temp; + float pivot; + + /* Sort parts in cell_i in decreasing order with quicksort */ + qstack[0].lo = 0; + qstack[0].hi = N - 1; + qpos = 0; + while (qpos >= 0) { + lo = qstack[qpos].lo; + hi = qstack[qpos].hi; + qpos -= 1; + if (hi - lo < 15) { + for (i = lo; i < hi; i++) { + imin = i; + for (j = i + 1; j <= hi; j++) + if (sort[j].d < sort[imin].d) imin = j; + if (imin != i) { + temp = sort[imin]; + sort[imin] = sort[i]; + sort[i] = temp; + } + } + } else { + pivot = sort[(lo + hi) / 2].d; + i = lo; + j = hi; + while (i <= j) { + while (sort[i].d < pivot) i++; + while (sort[j].d > pivot) j--; + if (i <= j) { + if (i < j) { + temp = sort[i]; + sort[i] = sort[j]; + sort[j] = temp; + } + i += 1; + j -= 1; + } + } + if (j > (lo + hi) / 2) { + if (lo < j) { + qpos += 1; + qstack[qpos].lo = lo; + qstack[qpos].hi = j; + } + if (i < hi) { + qpos += 1; + qstack[qpos].lo = i; + qstack[qpos].hi = hi; + } + } else { + if (i < hi) { + qpos += 1; + qstack[qpos].lo = i; + qstack[qpos].hi = hi; + } + if (lo < j) { + qpos += 1; + qstack[qpos].lo = lo; + qstack[qpos].hi = j; + } + } + } + } +} + +#ifdef SWIFT_DEBUG_CHECKS +/** + * @brief Recursively checks that the flags are consistent in a cell hierarchy. + * + * Debugging function. Exists in two flavours: hydro & stars. + */ +#define RUNNER_CHECK_SORTS(TYPE) \ + void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ + \ + if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \ + if (c->split) \ + for (int k = 0; k < 8; k++) \ + if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0) \ + runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted); \ + } +#else +#define RUNNER_CHECK_SORTS(TYPE) \ + void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ + error("Calling debugging code without debugging flag activated."); \ + } +#endif + +RUNNER_CHECK_SORTS(hydro) +RUNNER_CHECK_SORTS(stars) + +/** + * @brief Sort the particles in the given cell along all cardinal directions. + * + * @param r The #runner. + * @param c The #cell. + * @param flags Cell flag. + * @param cleanup If true, re-build the sorts for the selected flags instead + * of just adding them. + * @param clock Flag indicating whether to record the timing or not, needed + * for recursive calls. + */ +void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, + int cleanup, int clock) { + + struct sort_entry *fingers[8]; + const int count = c->hydro.count; + const struct part *parts = c->hydro.parts; + struct xpart *xparts = c->hydro.xparts; + float buff[8]; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super == NULL) error("Task called above the super level!!!"); +#endif + + /* We need to do the local sorts plus whatever was requested further up. */ + flags |= c->hydro.do_sort; + if (cleanup) { + c->hydro.sorted = 0; + } else { + flags &= ~c->hydro.sorted; + } + if (flags == 0 && !cell_get_flag(c, cell_flag_do_hydro_sub_sort)) return; + + /* Check that the particles have been moved to the current time */ + if (flags && !cell_are_part_drifted(c, r->e)) + error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); + +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_hydro(c, c->hydro.sorted); + + /* Make sure the sort flags are consistent (upard). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->hydro.sorted & ~c->hydro.sorted) + error("Inconsistent sort flags (upward)."); + } + + /* Update the sort timer which represents the last time the sorts + were re-set. */ + if (c->hydro.sorted == 0) c->hydro.ti_sort = r->e->ti_current; +#endif + + /* Allocate memory for sorting. */ + cell_malloc_hydro_sorts(c, flags); + + /* Does this cell have any progeny? */ + if (c->split) { + + /* Fill in the gaps within the progeny. */ + float dx_max_sort = 0.0f; + float dx_max_sort_old = 0.0f; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + + if (c->progeny[k]->hydro.count > 0) { + + /* Only propagate cleanup if the progeny is stale. */ + runner_do_hydro_sort( + r, c->progeny[k], flags, + cleanup && (c->progeny[k]->hydro.dx_max_sort_old > + space_maxreldx * c->progeny[k]->dmin), + 0); + dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort); + dx_max_sort_old = + max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old); + } else { + + /* We need to clean up the unused flags that were in case the + number of particles in the cell would change */ + cell_clear_hydro_sort_flags(c->progeny[k], /*clear_unused_flags=*/1); + } + } + } + c->hydro.dx_max_sort = dx_max_sort; + c->hydro.dx_max_sort_old = dx_max_sort_old; + + /* Loop over the 13 different sort arrays. */ + for (int j = 0; j < 13; j++) { + + /* Has this sort array been flagged? */ + if (!(flags & (1 << j))) continue; + + /* Init the particle index offsets. */ + int off[8]; + off[0] = 0; + for (int k = 1; k < 8; k++) + if (c->progeny[k - 1] != NULL) + off[k] = off[k - 1] + c->progeny[k - 1]->hydro.count; + else + off[k] = off[k - 1]; + + /* Init the entries and indices. */ + int inds[8]; + for (int k = 0; k < 8; k++) { + inds[k] = k; + if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { + fingers[k] = c->progeny[k]->hydro.sort[j]; + buff[k] = fingers[k]->d; + off[k] = off[k]; + } else + buff[k] = FLT_MAX; + } + + /* Sort the buffer. */ + for (int i = 0; i < 7; i++) + for (int k = i + 1; k < 8; k++) + if (buff[inds[k]] < buff[inds[i]]) { + int temp_i = inds[i]; + inds[i] = inds[k]; + inds[k] = temp_i; + } + + /* For each entry in the new sort list. */ + struct sort_entry *finger = c->hydro.sort[j]; + for (int ind = 0; ind < count; ind++) { + + /* Copy the minimum into the new sort array. */ + finger[ind].d = buff[inds[0]]; + finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; + + /* Update the buffer. */ + fingers[inds[0]] += 1; + buff[inds[0]] = fingers[inds[0]]->d; + + /* Find the smallest entry. */ + for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { + int temp_i = inds[k - 1]; + inds[k - 1] = inds[k]; + inds[k] = temp_i; + } + + } /* Merge. */ + + /* Add a sentinel. */ + c->hydro.sort[j][count].d = FLT_MAX; + c->hydro.sort[j][count].i = 0; + + /* Mark as sorted. */ + atomic_or(&c->hydro.sorted, 1 << j); + + } /* loop over sort arrays. */ + + } /* progeny? */ + + /* Otherwise, just sort. */ + else { + + /* Reset the sort distance */ + if (c->hydro.sorted == 0) { +#ifdef SWIFT_DEBUG_CHECKS + if (xparts != NULL && c->nodeID != engine_rank) + error("Have non-NULL xparts in foreign cell"); +#endif + + /* And the individual sort distances if we are a local cell */ + if (xparts != NULL) { + for (int k = 0; k < count; k++) { + xparts[k].x_diff_sort[0] = 0.0f; + xparts[k].x_diff_sort[1] = 0.0f; + xparts[k].x_diff_sort[2] = 0.0f; + } + } + c->hydro.dx_max_sort_old = 0.f; + c->hydro.dx_max_sort = 0.f; + } + + /* Fill the sort array. */ + for (int k = 0; k < count; k++) { + const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]}; + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->hydro.sort[j][k].i = k; + c->hydro.sort[j][k].d = px[0] * runner_shift[j][0] + + px[1] * runner_shift[j][1] + + px[2] * runner_shift[j][2]; + } + } + + /* Add the sentinel and sort. */ + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->hydro.sort[j][count].d = FLT_MAX; + c->hydro.sort[j][count].i = 0; + runner_do_sort_ascending(c->hydro.sort[j], count); + atomic_or(&c->hydro.sorted, 1 << j); + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify the sorting. */ + for (int j = 0; j < 13; j++) { + if (!(flags & (1 << j))) continue; + struct sort_entry *finger = c->hydro.sort[j]; + for (int k = 1; k < count; k++) { + if (finger[k].d < finger[k - 1].d) + error("Sorting failed, ascending array."); + if (finger[k].i >= count) error("Sorting failed, indices borked."); + } + } + + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_hydro(c, flags); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->hydro.sorted & ~c->hydro.sorted) + error("Inconsistent sort flags."); + } +#endif + + /* Clear the cell's sort flags. */ + c->hydro.do_sort = 0; + cell_clear_flag(c, cell_flag_do_hydro_sub_sort); + c->hydro.requires_sorts = 0; + + if (clock) TIMER_TOC(timer_dosort); +} + +/** + * @brief Sort the stars particles in the given cell along all cardinal + * directions. + * + * @param r The #runner. + * @param c The #cell. + * @param flags Cell flag. + * @param cleanup If true, re-build the sorts for the selected flags instead + * of just adding them. + * @param clock Flag indicating whether to record the timing or not, needed + * for recursive calls. + */ +void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, + int cleanup, int clock) { + + struct sort_entry *fingers[8]; + const int count = c->stars.count; + struct spart *sparts = c->stars.parts; + float buff[8]; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super == NULL) error("Task called above the super level!!!"); +#endif + + /* We need to do the local sorts plus whatever was requested further up. */ + flags |= c->stars.do_sort; + if (cleanup) { + c->stars.sorted = 0; + } else { + flags &= ~c->stars.sorted; + } + if (flags == 0 && !cell_get_flag(c, cell_flag_do_stars_sub_sort)) return; + + /* Check that the particles have been moved to the current time */ + if (flags && !cell_are_spart_drifted(c, r->e)) { + error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_stars(c, c->stars.sorted); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->stars.sorted & ~c->stars.sorted) + error("Inconsistent sort flags (upward)."); + } + + /* Update the sort timer which represents the last time the sorts + were re-set. */ + if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current; +#endif + + /* start by allocating the entry arrays in the requested dimensions. */ + cell_malloc_stars_sorts(c, flags); + + /* Does this cell have any progeny? */ + if (c->split) { + + /* Fill in the gaps within the progeny. */ + float dx_max_sort = 0.0f; + float dx_max_sort_old = 0.0f; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + + if (c->progeny[k]->stars.count > 0) { + + /* Only propagate cleanup if the progeny is stale. */ + const int cleanup_prog = + cleanup && (c->progeny[k]->stars.dx_max_sort_old > + space_maxreldx * c->progeny[k]->dmin); + runner_do_stars_sort(r, c->progeny[k], flags, cleanup_prog, 0); + dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort); + dx_max_sort_old = + max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old); + } else { + + /* We need to clean up the unused flags that were in case the + number of particles in the cell would change */ + cell_clear_stars_sort_flags(c->progeny[k], /*clear_unused_flags=*/1); + } + } + } + c->stars.dx_max_sort = dx_max_sort; + c->stars.dx_max_sort_old = dx_max_sort_old; + + /* Loop over the 13 different sort arrays. */ + for (int j = 0; j < 13; j++) { + + /* Has this sort array been flagged? */ + if (!(flags & (1 << j))) continue; + + /* Init the particle index offsets. */ + int off[8]; + off[0] = 0; + for (int k = 1; k < 8; k++) + if (c->progeny[k - 1] != NULL) + off[k] = off[k - 1] + c->progeny[k - 1]->stars.count; + else + off[k] = off[k - 1]; + + /* Init the entries and indices. */ + int inds[8]; + for (int k = 0; k < 8; k++) { + inds[k] = k; + if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { + fingers[k] = c->progeny[k]->stars.sort[j]; + buff[k] = fingers[k]->d; + off[k] = off[k]; + } else + buff[k] = FLT_MAX; + } + + /* Sort the buffer. */ + for (int i = 0; i < 7; i++) + for (int k = i + 1; k < 8; k++) + if (buff[inds[k]] < buff[inds[i]]) { + int temp_i = inds[i]; + inds[i] = inds[k]; + inds[k] = temp_i; + } + + /* For each entry in the new sort list. */ + struct sort_entry *finger = c->stars.sort[j]; + for (int ind = 0; ind < count; ind++) { + + /* Copy the minimum into the new sort array. */ + finger[ind].d = buff[inds[0]]; + finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; + + /* Update the buffer. */ + fingers[inds[0]] += 1; + buff[inds[0]] = fingers[inds[0]]->d; + + /* Find the smallest entry. */ + for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { + int temp_i = inds[k - 1]; + inds[k - 1] = inds[k]; + inds[k] = temp_i; + } + + } /* Merge. */ + + /* Add a sentinel. */ + c->stars.sort[j][count].d = FLT_MAX; + c->stars.sort[j][count].i = 0; + + /* Mark as sorted. */ + atomic_or(&c->stars.sorted, 1 << j); + + } /* loop over sort arrays. */ + + } /* progeny? */ + + /* Otherwise, just sort. */ + else { + + /* Reset the sort distance */ + if (c->stars.sorted == 0) { + + /* And the individual sort distances if we are a local cell */ + for (int k = 0; k < count; k++) { + sparts[k].x_diff_sort[0] = 0.0f; + sparts[k].x_diff_sort[1] = 0.0f; + sparts[k].x_diff_sort[2] = 0.0f; + } + c->stars.dx_max_sort_old = 0.f; + c->stars.dx_max_sort = 0.f; + } + + /* Fill the sort array. */ + for (int k = 0; k < count; k++) { + const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]}; + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->stars.sort[j][k].i = k; + c->stars.sort[j][k].d = px[0] * runner_shift[j][0] + + px[1] * runner_shift[j][1] + + px[2] * runner_shift[j][2]; + } + } + + /* Add the sentinel and sort. */ + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->stars.sort[j][count].d = FLT_MAX; + c->stars.sort[j][count].i = 0; + runner_do_sort_ascending(c->stars.sort[j], count); + atomic_or(&c->stars.sorted, 1 << j); + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify the sorting. */ + for (int j = 0; j < 13; j++) { + if (!(flags & (1 << j))) continue; + struct sort_entry *finger = c->stars.sort[j]; + for (int k = 1; k < count; k++) { + if (finger[k].d < finger[k - 1].d) + error("Sorting failed, ascending array."); + if (finger[k].i >= count) error("Sorting failed, indices borked."); + } + } + + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_stars(c, flags); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->stars.sorted & ~c->stars.sorted) + error("Inconsistent sort flags."); + } +#endif + + /* Clear the cell's sort flags. */ + c->stars.do_sort = 0; + cell_clear_flag(c, cell_flag_do_stars_sub_sort); + c->stars.requires_sorts = 0; + + if (clock) TIMER_TOC(timer_do_stars_sort); +} + +/** + * @brief Recurse into a cell until reaching the super level and call + * the hydro sorting function there. + * + * This function must be called at or above the super level! + * + * This function will sort the particles in all 13 directions. + * + * @param r the #runner. + * @param c the #cell. + */ +void runner_do_all_hydro_sort(struct runner *r, struct cell *c) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != engine_rank) error("Function called on a foreign cell!"); +#endif + + if (!cell_is_active_hydro(c, r->e)) return; + + /* Shall we sort at this level? */ + if (c->hydro.super == c) { + + /* Sort everything */ + runner_do_hydro_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0); + + } else { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super != NULL) error("Function called below the super level!"); +#endif + + /* Ok, then, let's try lower */ + if (c->split) { + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) runner_do_all_hydro_sort(r, c->progeny[k]); + } + } else { +#ifdef SWIFT_DEBUG_CHECKS + error("Reached a leaf without encountering a hydro super cell!"); +#endif + } + } +} + +/** + * @brief Recurse into a cell until reaching the super level and call + * the star sorting function there. + * + * This function must be called at or above the super level! + * + * This function will sort the particles in all 13 directions. + * + * @param r the #runner. + * @param c the #cell. + */ +void runner_do_all_stars_sort(struct runner *r, struct cell *c) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != engine_rank) error("Function called on a foreign cell!"); +#endif + + if (!cell_is_active_stars(c, r->e) && !cell_is_active_hydro(c, r->e)) return; + + /* Shall we sort at this level? */ + if (c->hydro.super == c) { + + /* Sort everything */ + runner_do_stars_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0); + + } else { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super != NULL) error("Function called below the super level!"); +#endif + + /* Ok, then, let's try lower */ + if (c->split) { + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) runner_do_all_stars_sort(r, c->progeny[k]); + } + } else { +#ifdef SWIFT_DEBUG_CHECKS + error("Reached a leaf without encountering a hydro super cell!"); +#endif + } + } +} diff --git a/src/runner_time_integration.c b/src/runner_time_integration.c new file mode 100644 index 0000000000000000000000000000000000000000..e1f5de709da804330953b47a647d0f0ce13de7bb --- /dev/null +++ b/src/runner_time_integration.c @@ -0,0 +1,987 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "black_holes.h" +#include "cell.h" +#include "engine.h" +#include "kick.h" +#include "timers.h" +#include "timestep.h" +#include "timestep_limiter.h" +#include "tracers.h" + +/** + * @brief Initialize the multipoles before the gravity calculation. + * + * @param r The runner thread. + * @param c The cell. + * @param timer 1 if the time is to be recorded. + */ +void runner_do_init_grav(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (!(e->policy & engine_policy_self_gravity)) + error("Grav-init task called outside of self-gravity calculation"); +#endif + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* Reset the gravity acceleration tensors */ + gravity_field_tensors_init(&c->grav.multipole->pot, e->ti_current); + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) runner_do_init_grav(r, c->progeny[k], 0); + } + } + + if (timer) TIMER_TOC(timer_init_grav); +} + +/** + * @brief Perform the first half-kick on all the active particles in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_kick1(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const struct hydro_props *hydro_props = e->hydro_properties; + const struct entropy_floor_properties *entropy_floor = e->entropy_floor; + const int with_cosmology = (e->policy & engine_policy_cosmology); + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + struct gpart *restrict gparts = c->grav.parts; + struct spart *restrict sparts = c->stars.parts; + const int count = c->hydro.count; + const int gcount = c->grav.count; + const int scount = c->stars.count; + const integertime_t ti_current = e->ti_current; + const double time_base = e->time_base; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_starting_hydro(c, e) && !cell_is_starting_gravity(c, e) && + !cell_is_starting_stars(c, e) && !cell_is_starting_black_holes(c, e)) + return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_kick1(r, c->progeny[k], 0); + } else { + + /* Loop over the parts in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* If particle needs to be kicked */ + if (part_is_starting(p, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + if (p->wakeup == time_bin_awake) + error("Woken-up particle that has not been processed in kick1"); +#endif + + /* Skip particles that have been woken up and treated by the limiter. */ + if (p->wakeup != time_bin_not_awake) continue; + + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current + 1, p->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + const integertime_t ti_end = ti_begin + ti_step; + + if (ti_begin != ti_current) + error( + "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " + "ti_step=%lld time_bin=%d wakeup=%d ti_current=%lld", + ti_end, ti_begin, ti_step, p->time_bin, p->wakeup, ti_current); +#endif + + /* Time interval for this half-kick */ + double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr; + if (with_cosmology) { + dt_kick_hydro = cosmology_get_hydro_kick_factor( + cosmo, ti_begin, ti_begin + ti_step / 2); + dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, + ti_begin + ti_step / 2); + dt_kick_therm = cosmology_get_therm_kick_factor( + cosmo, ti_begin, ti_begin + ti_step / 2); + dt_kick_corr = cosmology_get_corr_kick_factor(cosmo, ti_begin, + ti_begin + ti_step / 2); + } else { + dt_kick_hydro = (ti_step / 2) * time_base; + dt_kick_grav = (ti_step / 2) * time_base; + dt_kick_therm = (ti_step / 2) * time_base; + dt_kick_corr = (ti_step / 2) * time_base; + } + + /* do the kick */ + kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, + dt_kick_corr, cosmo, hydro_props, entropy_floor, ti_begin, + ti_begin + ti_step / 2); + + /* Update the accelerations to be used in the drift for hydro */ + if (p->gpart != NULL) { + + xp->a_grav[0] = p->gpart->a_grav[0]; + xp->a_grav[1] = p->gpart->a_grav[1]; + xp->a_grav[2] = p->gpart->a_grav[2]; + } + } + } + + /* Loop over the gparts in this cell. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the part. */ + struct gpart *restrict gp = &gparts[k]; + + /* If the g-particle has no counterpart and needs to be kicked */ + if ((gp->type == swift_type_dark_matter || + gp->type == swift_type_dark_matter_background) && + gpart_is_starting(gp, e)) { + + const integertime_t ti_step = get_integer_timestep(gp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current + 1, gp->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + const integertime_t ti_end = + get_integer_time_end(ti_current + 1, gp->time_bin); + + if (ti_begin != ti_current) + error( + "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " + "ti_step=%lld time_bin=%d ti_current=%lld", + ti_end, ti_begin, ti_step, gp->time_bin, ti_current); +#endif + + /* Time interval for this half-kick */ + double dt_kick_grav; + if (with_cosmology) { + dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, + ti_begin + ti_step / 2); + } else { + dt_kick_grav = (ti_step / 2) * time_base; + } + + /* do the kick */ + kick_gpart(gp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2); + } + } + + /* Loop over the stars particles in this cell. */ + for (int k = 0; k < scount; k++) { + + /* Get a handle on the s-part. */ + struct spart *restrict sp = &sparts[k]; + + /* If particle needs to be kicked */ + if (spart_is_starting(sp, e)) { + + const integertime_t ti_step = get_integer_timestep(sp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current + 1, sp->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + const integertime_t ti_end = + get_integer_time_end(ti_current + 1, sp->time_bin); + + if (ti_begin != ti_current) + error( + "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " + "ti_step=%lld time_bin=%d ti_current=%lld", + ti_end, ti_begin, ti_step, sp->time_bin, ti_current); +#endif + + /* Time interval for this half-kick */ + double dt_kick_grav; + if (with_cosmology) { + dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, + ti_begin + ti_step / 2); + } else { + dt_kick_grav = (ti_step / 2) * time_base; + } + + /* do the kick */ + kick_spart(sp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2); + } + } + } + + if (timer) TIMER_TOC(timer_kick1); +} + +/** + * @brief Perform the second half-kick on all the active particles in a cell. + * + * Also prepares particles to be drifted. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_kick2(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const struct hydro_props *hydro_props = e->hydro_properties; + const struct entropy_floor_properties *entropy_floor = e->entropy_floor; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const int count = c->hydro.count; + const int gcount = c->grav.count; + const int scount = c->stars.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + struct gpart *restrict gparts = c->grav.parts; + struct spart *restrict sparts = c->stars.parts; + const integertime_t ti_current = e->ti_current; + const double time_base = e->time_base; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) && + !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) + return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_kick2(r, c->progeny[k], 0); + } else { + + /* Loop over the particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* If particle needs to be kicked */ + if (part_is_active(p, e)) { + + integertime_t ti_begin, ti_end, ti_step; + +#ifdef SWIFT_DEBUG_CHECKS + if (p->wakeup == time_bin_awake) + error("Woken-up particle that has not been processed in kick1"); +#endif + + if (p->wakeup == time_bin_not_awake) { + + /* Time-step from a regular kick */ + ti_step = get_integer_timestep(p->time_bin); + ti_begin = get_integer_time_begin(ti_current, p->time_bin); + ti_end = ti_begin + ti_step; + + } else { + + /* Time-step that follows a wake-up call */ + ti_begin = get_integer_time_begin(ti_current, p->wakeup); + ti_end = get_integer_time_end(ti_current, p->time_bin); + ti_step = ti_end - ti_begin; + + /* Reset the flag. Everything is back to normal from now on. */ + p->wakeup = time_bin_awake; + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_begin + ti_step != ti_current) + error( + "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld " + "time_bin=%d wakeup=%d ti_current=%lld", + ti_begin, ti_step, p->time_bin, p->wakeup, ti_current); +#endif + /* Time interval for this half-kick */ + double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr; + if (with_cosmology) { + dt_kick_hydro = cosmology_get_hydro_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_end); + dt_kick_grav = cosmology_get_grav_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_end); + dt_kick_therm = cosmology_get_therm_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_end); + dt_kick_corr = cosmology_get_corr_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_end); + } else { + dt_kick_hydro = (ti_end - (ti_begin + ti_step / 2)) * time_base; + dt_kick_grav = (ti_end - (ti_begin + ti_step / 2)) * time_base; + dt_kick_therm = (ti_end - (ti_begin + ti_step / 2)) * time_base; + dt_kick_corr = (ti_end - (ti_begin + ti_step / 2)) * time_base; + } + + /* Finish the time-step with a second half-kick */ + kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, + dt_kick_corr, cosmo, hydro_props, entropy_floor, + ti_begin + ti_step / 2, ti_end); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that kick and the drift are synchronized */ + if (p->ti_drift != p->ti_kick) error("Error integrating part in time."); +#endif + + /* Prepare the values to be drifted */ + hydro_reset_predicted_values(p, xp, cosmo); + } + } + + /* Loop over the g-particles in this cell. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the part. */ + struct gpart *restrict gp = &gparts[k]; + + /* If the g-particle has no counterpart and needs to be kicked */ + if ((gp->type == swift_type_dark_matter || + gp->type == swift_type_dark_matter_background) && + gpart_is_active(gp, e)) { + + const integertime_t ti_step = get_integer_timestep(gp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current, gp->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_begin + ti_step != ti_current) + error("Particle in wrong time-bin"); +#endif + + /* Time interval for this half-kick */ + double dt_kick_grav; + if (with_cosmology) { + dt_kick_grav = cosmology_get_grav_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); + } else { + dt_kick_grav = (ti_step / 2) * time_base; + } + + /* Finish the time-step with a second half-kick */ + kick_gpart(gp, dt_kick_grav, ti_begin + ti_step / 2, + ti_begin + ti_step); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that kick and the drift are synchronized */ + if (gp->ti_drift != gp->ti_kick) + error("Error integrating g-part in time."); +#endif + + /* Prepare the values to be drifted */ + gravity_reset_predicted_values(gp); + } + } + + /* Loop over the particles in this cell. */ + for (int k = 0; k < scount; k++) { + + /* Get a handle on the part. */ + struct spart *restrict sp = &sparts[k]; + + /* If particle needs to be kicked */ + if (spart_is_active(sp, e)) { + + const integertime_t ti_step = get_integer_timestep(sp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current, sp->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_begin + ti_step != ti_current) + error("Particle in wrong time-bin"); +#endif + + /* Time interval for this half-kick */ + double dt_kick_grav; + if (with_cosmology) { + dt_kick_grav = cosmology_get_grav_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); + } else { + dt_kick_grav = (ti_step / 2) * time_base; + } + + /* Finish the time-step with a second half-kick */ + kick_spart(sp, dt_kick_grav, ti_begin + ti_step / 2, + ti_begin + ti_step); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that kick and the drift are synchronized */ + if (sp->ti_drift != sp->ti_kick) + error("Error integrating s-part in time."); +#endif + + /* Prepare the values to be drifted */ + stars_reset_predicted_values(sp); + } + } + } + if (timer) TIMER_TOC(timer_kick2); +} + +/** + * @brief Computes the next time-step of all active particles in this cell + * and update the cell's statistics. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_timestep(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const int count = c->hydro.count; + const int gcount = c->grav.count; + const int scount = c->stars.count; + const int bcount = c->black_holes.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + struct gpart *restrict gparts = c->grav.parts; + struct spart *restrict sparts = c->stars.parts; + struct bpart *restrict bparts = c->black_holes.parts; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) && + !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) { + c->hydro.updated = 0; + c->grav.updated = 0; + c->stars.updated = 0; + c->black_holes.updated = 0; + return; + } + + int updated = 0, g_updated = 0, s_updated = 0, b_updated = 0; + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, + ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, + ti_stars_beg_max = 0; + integertime_t ti_black_holes_end_min = max_nr_timesteps, + ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; + + /* No children? */ + if (!c->split) { + + /* Loop over the particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* If particle needs updating */ + if (part_is_active(p, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Current end of time-step */ + const integertime_t ti_end = + get_integer_time_end(ti_current, p->time_bin); + + if (ti_end != ti_current) + error("Computing time-step of rogue particle."); +#endif + + /* Get new time-step */ + const integertime_t ti_new_step = get_part_timestep(p, xp, e); + + /* Update particle */ + p->time_bin = get_time_bin(ti_new_step); + if (p->gpart != NULL) p->gpart->time_bin = p->time_bin; + + /* Update the tracers properties */ + tracers_after_timestep(p, xp, e->internal_units, e->physical_constants, + with_cosmology, e->cosmology, + e->hydro_properties, e->cooling_func, e->time); + + /* Number of updated particles */ + updated++; + if (p->gpart != NULL) g_updated++; + + /* What is the next sync-point ? */ + ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min); + ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max); + + /* What is the next starting point for this cell ? */ + ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max); + + if (p->gpart != NULL) { + + /* What is the next sync-point ? */ + ti_gravity_end_min = + min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = + max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + } + } + + else { /* part is inactive */ + + if (!part_is_inhibited(p, e)) { + + const integertime_t ti_end = + get_integer_time_end(ti_current, p->time_bin); + + const integertime_t ti_beg = + get_integer_time_begin(ti_current + 1, p->time_bin); + + /* What is the next sync-point ? */ + ti_hydro_end_min = min(ti_end, ti_hydro_end_min); + ti_hydro_end_max = max(ti_end, ti_hydro_end_max); + + /* What is the next starting point for this cell ? */ + ti_hydro_beg_max = max(ti_beg, ti_hydro_beg_max); + + if (p->gpart != NULL) { + + /* What is the next sync-point ? */ + ti_gravity_end_min = min(ti_end, ti_gravity_end_min); + ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); + } + } + } + } + + /* Loop over the g-particles in this cell. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the part. */ + struct gpart *restrict gp = &gparts[k]; + + /* If the g-particle has no counterpart */ + if (gp->type == swift_type_dark_matter || + gp->type == swift_type_dark_matter_background) { + + /* need to be updated ? */ + if (gpart_is_active(gp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Current end of time-step */ + const integertime_t ti_end = + get_integer_time_end(ti_current, gp->time_bin); + + if (ti_end != ti_current) + error("Computing time-step of rogue particle."); +#endif + + /* Get new time-step */ + const integertime_t ti_new_step = get_gpart_timestep(gp, e); + + /* Update particle */ + gp->time_bin = get_time_bin(ti_new_step); + + /* Number of updated g-particles */ + g_updated++; + + /* What is the next sync-point ? */ + ti_gravity_end_min = + min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = + max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + + } else { /* gpart is inactive */ + + if (!gpart_is_inhibited(gp, e)) { + + const integertime_t ti_end = + get_integer_time_end(ti_current, gp->time_bin); + + /* What is the next sync-point ? */ + ti_gravity_end_min = min(ti_end, ti_gravity_end_min); + ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + + const integertime_t ti_beg = + get_integer_time_begin(ti_current + 1, gp->time_bin); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); + } + } + } + } + + /* Loop over the star particles in this cell. */ + for (int k = 0; k < scount; k++) { + + /* Get a handle on the part. */ + struct spart *restrict sp = &sparts[k]; + + /* need to be updated ? */ + if (spart_is_active(sp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Current end of time-step */ + const integertime_t ti_end = + get_integer_time_end(ti_current, sp->time_bin); + + if (ti_end != ti_current) + error("Computing time-step of rogue particle."); +#endif + /* Get new time-step */ + const integertime_t ti_new_step = get_spart_timestep(sp, e); + + /* Update particle */ + sp->time_bin = get_time_bin(ti_new_step); + sp->gpart->time_bin = get_time_bin(ti_new_step); + + /* Number of updated s-particles */ + s_updated++; + g_updated++; + + ti_stars_end_min = min(ti_current + ti_new_step, ti_stars_end_min); + ti_stars_end_max = max(ti_current + ti_new_step, ti_stars_end_max); + ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_stars_beg_max = max(ti_current, ti_stars_beg_max); + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + + /* star particle is inactive but not inhibited */ + } else { + + if (!spart_is_inhibited(sp, e)) { + + const integertime_t ti_end = + get_integer_time_end(ti_current, sp->time_bin); + + const integertime_t ti_beg = + get_integer_time_begin(ti_current + 1, sp->time_bin); + + ti_stars_end_min = min(ti_end, ti_stars_end_min); + ti_stars_end_max = max(ti_end, ti_stars_end_max); + ti_gravity_end_min = min(ti_end, ti_gravity_end_min); + ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_stars_beg_max = max(ti_beg, ti_stars_beg_max); + ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); + } + } + } + + /* Loop over the star particles in this cell. */ + for (int k = 0; k < bcount; k++) { + + /* Get a handle on the part. */ + struct bpart *restrict bp = &bparts[k]; + + /* need to be updated ? */ + if (bpart_is_active(bp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Current end of time-step */ + const integertime_t ti_end = + get_integer_time_end(ti_current, bp->time_bin); + + if (ti_end != ti_current) + error("Computing time-step of rogue particle."); +#endif + /* Get new time-step */ + const integertime_t ti_new_step = get_bpart_timestep(bp, e); + + /* Update particle */ + bp->time_bin = get_time_bin(ti_new_step); + bp->gpart->time_bin = get_time_bin(ti_new_step); + + /* Number of updated s-particles */ + b_updated++; + g_updated++; + + ti_black_holes_end_min = + min(ti_current + ti_new_step, ti_black_holes_end_min); + ti_black_holes_end_max = + max(ti_current + ti_new_step, ti_black_holes_end_max); + ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_black_holes_beg_max = max(ti_current, ti_black_holes_beg_max); + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + + /* star particle is inactive but not inhibited */ + } else { + + if (!bpart_is_inhibited(bp, e)) { + + const integertime_t ti_end = + get_integer_time_end(ti_current, bp->time_bin); + + const integertime_t ti_beg = + get_integer_time_begin(ti_current + 1, bp->time_bin); + + ti_black_holes_end_min = min(ti_end, ti_black_holes_end_min); + ti_black_holes_end_max = max(ti_end, ti_black_holes_end_max); + ti_gravity_end_min = min(ti_end, ti_gravity_end_min); + ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_black_holes_beg_max = max(ti_beg, ti_black_holes_beg_max); + ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); + } + } + } + + } else { + + /* Loop over the progeny. */ + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *restrict cp = c->progeny[k]; + + /* Recurse */ + runner_do_timestep(r, cp, 0); + + /* And aggregate */ + updated += cp->hydro.updated; + g_updated += cp->grav.updated; + s_updated += cp->stars.updated; + b_updated += cp->black_holes.updated; + + ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min); + ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max); + ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max); + + ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); + ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); + ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); + + ti_stars_end_min = min(cp->stars.ti_end_min, ti_stars_end_min); + ti_stars_end_max = max(cp->grav.ti_end_max, ti_stars_end_max); + ti_stars_beg_max = max(cp->grav.ti_beg_max, ti_stars_beg_max); + + ti_black_holes_end_min = + min(cp->black_holes.ti_end_min, ti_black_holes_end_min); + ti_black_holes_end_max = + max(cp->grav.ti_end_max, ti_black_holes_end_max); + ti_black_holes_beg_max = + max(cp->grav.ti_beg_max, ti_black_holes_beg_max); + } + } + } + + /* Store the values. */ + c->hydro.updated = updated; + c->grav.updated = g_updated; + c->stars.updated = s_updated; + c->black_holes.updated = b_updated; + + c->hydro.ti_end_min = ti_hydro_end_min; + c->hydro.ti_end_max = ti_hydro_end_max; + c->hydro.ti_beg_max = ti_hydro_beg_max; + c->grav.ti_end_min = ti_gravity_end_min; + c->grav.ti_end_max = ti_gravity_end_max; + c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.ti_end_min = ti_stars_end_min; + c->stars.ti_end_max = ti_stars_end_max; + c->stars.ti_beg_max = ti_stars_beg_max; + c->black_holes.ti_end_min = ti_black_holes_end_min; + c->black_holes.ti_end_max = ti_black_holes_end_max; + c->black_holes.ti_beg_max = ti_black_holes_beg_max; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.ti_end_min == e->ti_current && + c->hydro.ti_end_min < max_nr_timesteps) + error("End of next hydro step is current time!"); + if (c->grav.ti_end_min == e->ti_current && + c->grav.ti_end_min < max_nr_timesteps) + error("End of next gravity step is current time!"); + if (c->stars.ti_end_min == e->ti_current && + c->stars.ti_end_min < max_nr_timesteps) + error("End of next stars step is current time!"); + if (c->black_holes.ti_end_min == e->ti_current && + c->black_holes.ti_end_min < max_nr_timesteps) + error("End of next black holes step is current time!"); +#endif + + if (timer) TIMER_TOC(timer_timestep); +} + +/** + * @brief Apply the time-step limiter to all awaken particles in a cell + * hierarchy. + * + * @param r The task #runner. + * @param c The #cell. + * @param force Limit the particles irrespective of the #cell flags. + * @param timer Are we timing this ? + */ +void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer) { + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const int count = c->hydro.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we only limit local cells. */ + if (c->nodeID != engine_rank) error("Limiting dt of a foreign cell is nope."); +#endif + + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, + ti_gravity_beg_max = 0; + + /* Limit irrespective of cell flags? */ + force = (force || cell_get_flag(c, cell_flag_do_hydro_limiter)); + + /* Early abort? */ + if (c->hydro.count == 0) { + + /* Clear the limiter flags. */ + cell_clear_flag( + c, cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter); + return; + } + + /* Loop over the progeny ? */ + if (c->split && (force || cell_get_flag(c, cell_flag_do_hydro_sub_limiter))) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *restrict cp = c->progeny[k]; + + /* Recurse */ + runner_do_limiter(r, cp, force, 0); + + /* And aggregate */ + ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min); + ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max); + ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max); + ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); + ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); + ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); + } + } + + /* Store the updated values */ + c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min); + c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max); + c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max); + c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min); + c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max); + c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max); + + } else if (!c->split && force) { + + ti_hydro_end_min = c->hydro.ti_end_min; + ti_hydro_end_max = c->hydro.ti_end_max; + ti_hydro_beg_max = c->hydro.ti_beg_max; + ti_gravity_end_min = c->grav.ti_end_min; + ti_gravity_end_max = c->grav.ti_end_max; + ti_gravity_beg_max = c->grav.ti_beg_max; + + /* Loop over the gas particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* Avoid inhibited particles */ + if (part_is_inhibited(p, e)) continue; + + /* If the particle will be active no need to wake it up */ + if (part_is_active(p, e) && p->wakeup != time_bin_not_awake) + p->wakeup = time_bin_not_awake; + + /* Bip, bip, bip... wake-up time */ + if (p->wakeup <= time_bin_awake) { + + /* Apply the limiter and get the new time-step size */ + const integertime_t ti_new_step = timestep_limit_part(p, xp, e); + + /* What is the next sync-point ? */ + ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min); + ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max); + + /* What is the next starting point for this cell ? */ + ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max); + + /* Also limit the gpart counter-part */ + if (p->gpart != NULL) { + + /* Register the time-bin */ + p->gpart->time_bin = p->time_bin; + + /* What is the next sync-point ? */ + ti_gravity_end_min = + min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = + max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + } + } + } + + /* Store the updated values */ + c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min); + c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max); + c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max); + c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min); + c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max); + c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max); + } + + /* Clear the limiter flags. */ + cell_clear_flag(c, + cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter); + + if (timer) TIMER_TOC(timer_do_limiter); +} diff --git a/src/scheduler.c b/src/scheduler.c index 85c3727a1ebe9297943bf74a5b407ec5b5e46322..1fad63fd7141db2aad486aaaa7e4dc877a8aa3b8 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -601,7 +601,10 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Add the self tasks. */ int first_child = 0; while (ci->progeny[first_child] == NULL) first_child++; + t->ci = ci->progeny[first_child]; + cell_set_flag(t->ci, cell_flag_has_tasks); + for (int k = first_child + 1; k < 8; k++) { /* Do we have a non-empty progenitor? */ if (ci->progeny[k] != NULL && @@ -711,8 +714,12 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Loop over the sub-cell pairs for the current sid and add new tasks * for them. */ struct cell_split_pair *csp = &cell_split_pairs[sid]; + t->ci = ci->progeny[csp->pairs[0].pid]; t->cj = cj->progeny[csp->pairs[0].pjd]; + cell_set_flag(t->ci, cell_flag_has_tasks); + cell_set_flag(t->cj, cell_flag_has_tasks); + t->flags = csp->pairs[0].sid; for (int k = 1; k < csp->count; k++) { scheduler_splittask_hydro( @@ -796,7 +803,9 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { /* Add the self tasks. */ int first_child = 0; while (ci->progeny[first_child] == NULL) first_child++; + t->ci = ci->progeny[first_child]; + cell_set_flag(t->ci, cell_flag_has_tasks); for (int k = first_child + 1; k < 8; k++) if (ci->progeny[k] != NULL) @@ -1100,6 +1109,9 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, t->tic = 0; t->toc = 0; + if (ci != NULL) cell_set_flag(ci, cell_flag_has_tasks); + if (cj != NULL) cell_set_flag(cj, cell_flag_has_tasks); + /* Add an index for it. */ // lock_lock( &s->lock ); s->tasks_ind[atomic_inc(&s->nr_tasks)] = ind; @@ -1589,14 +1601,6 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements, * @param s The #scheduler. */ void scheduler_start(struct scheduler *s) { - /* Reset all task timers. */ - for (int i = 0; i < s->nr_tasks; ++i) { - s->tasks[i].tic = 0; - s->tasks[i].toc = 0; -#ifdef SWIFT_DEBUG_TASKS - s->tasks[i].rid = -1; -#endif - } /* Re-wait the tasks. */ if (s->active_count > 1000) { diff --git a/src/space.c b/src/space.c index a417117f6e3fa92e1a491efbc11f70c7c9e9ef97..eb498035d7c912f331870cfb0bb8bf84ad1559c4 100644 --- a/src/space.c +++ b/src/space.c @@ -5670,14 +5670,15 @@ void space_write_cell(const struct space *s, FILE *f, const struct cell *c) { * @brief Write a csv file containing the cell hierarchy * * @param s The #space. + * @param j The file number. */ -void space_write_cell_hierarchy(const struct space *s) { +void space_write_cell_hierarchy(const struct space *s, int j) { #ifdef SWIFT_CELL_GRAPH /* Open file */ char filename[200]; - sprintf(filename, "cell_hierarchy_%04i.csv", engine_rank); + sprintf(filename, "cell_hierarchy_%04i_%04i.csv", j, engine_rank); FILE *f = fopen(filename, "w"); if (f == NULL) error("Error opening task level file."); diff --git a/src/space.h b/src/space.h index 0b332716645e733636b7ab0da57a0a31b28e3d31..ad20641e4dc11559d33f512794fddf1b7453317a 100644 --- a/src/space.h +++ b/src/space.h @@ -374,6 +374,6 @@ void space_free_foreign_parts(struct space *s); void space_struct_dump(struct space *s, FILE *stream); void space_struct_restore(struct space *s, FILE *stream); -void space_write_cell_hierarchy(const struct space *s); +void space_write_cell_hierarchy(const struct space *s, int j); #endif /* SWIFT_SPACE_H */ diff --git a/src/stars/EAGLE/stars.h b/src/stars/EAGLE/stars.h index f102ddb22c0075d33b02c726d0447b64fa9d3df7..82ad2e25f474e6c66e6cf5e09f39188faa09b084 100644 --- a/src/stars/EAGLE/stars.h +++ b/src/stars/EAGLE/stars.h @@ -65,7 +65,7 @@ __attribute__((always_inline)) INLINE static void stars_first_init_spart( sp->time_bin = 0; sp->birth_density = 0.f; sp->f_E = -1.f; - if (stars_properties->spart_first_init_birth_time != -1.f) + if (stars_properties->overwrite_birth_time) sp->birth_time = stars_properties->spart_first_init_birth_time; stars_init_spart(sp); diff --git a/src/stars/EAGLE/stars_io.h b/src/stars/EAGLE/stars_io.h index b91b5cf94595a05acd280cfd4f51755f91cce04d..0baafd380addfa1d6f8d60491be3da4c30b2a3aa 100644 --- a/src/stars/EAGLE/stars_io.h +++ b/src/stars/EAGLE/stars_io.h @@ -217,10 +217,15 @@ INLINE static void stars_props_init(struct stars_props *sp, else sp->log_max_h_change = logf(powf(max_volume_change, hydro_dimension_inv)); - /* Read birth time to set all stars in ICs to (defaults to -1 to indicate star - * present in ICs) */ - sp->spart_first_init_birth_time = - parser_get_opt_param_float(params, "Stars:birth_time", -1.f); + /* Do we want to overwrite the stars' birth time? */ + sp->overwrite_birth_time = + parser_get_opt_param_int(params, "Stars:overwrite_birth_time", 0); + + /* Read birth time to set all stars in ICs */ + if (sp->overwrite_birth_time) { + sp->spart_first_init_birth_time = + parser_get_param_float(params, "Stars:birth_time"); + } } /** @@ -244,6 +249,10 @@ INLINE static void stars_props_print(const struct stars_props *sp) { message("Maximal iterations in ghost task set to %d", sp->max_smoothing_iterations); + + if (sp->overwrite_birth_time) + message("Stars' birth time read from the ICs will be overwritten to %f", + sp->spart_first_init_birth_time); } #if defined(HAVE_HDF5) diff --git a/src/stars/EAGLE/stars_part.h b/src/stars/EAGLE/stars_part.h index 4502b10edb7b646e9ba845e1ffffbb9255cdc01c..9114cb9107e1259698c365be82e5318d60d37ac7 100644 --- a/src/stars/EAGLE/stars_part.h +++ b/src/stars/EAGLE/stars_part.h @@ -144,7 +144,7 @@ struct stars_props { /*! Smoothing length tolerance */ float h_tolerance; - /*! Tolerance on neighbour number (for info only)*/ + /*! Tolerance on neighbour number (for info only) */ float delta_neighbours; /*! Maximal number of iterations to converge h */ @@ -153,7 +153,10 @@ struct stars_props { /*! Maximal change of h over one time-step */ float log_max_h_change; - /*! Value to set birth time of stars read from ICs if not set to -1 */ + /*! Are we overwriting the stars' birth time read from the ICs? */ + int overwrite_birth_time; + + /*! Value to set birth time of stars read from ICs */ float spart_first_init_birth_time; }; diff --git a/src/task.c b/src/task.c index 643f084b1fa4fc530125128e694a7012de3f302f..4d6cfa2482491b1a08f6b28f7188fb94448afb2e 100644 --- a/src/task.c +++ b/src/task.c @@ -893,7 +893,7 @@ void task_dump_all(struct engine *e, int step) { #ifdef SWIFT_DEBUG_TASKS /* Need this to convert ticks to seconds. */ - unsigned long long cpufreq = clocks_get_cpufreq(); + const unsigned long long cpufreq = clocks_get_cpufreq(); #ifdef WITH_MPI /* Make sure output file is empty, only on one rank. */ @@ -926,7 +926,8 @@ void task_dump_all(struct engine *e, int step) { e->s_updates, cpufreq); int count = 0; for (int l = 0; l < e->sched.nr_tasks; l++) { - if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) { + if (!e->sched.tasks[l].implicit && + e->sched.tasks[l].tic > e->tic_step) { fprintf( file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n", engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type, @@ -966,7 +967,7 @@ void task_dump_all(struct engine *e, int step) { (unsigned long long)e->toc_step, e->updates, e->g_updates, e->s_updates, 0, cpufreq); for (int l = 0; l < e->sched.nr_tasks; l++) { - if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) { + if (!e->sched.tasks[l].implicit && e->sched.tasks[l].tic > e->tic_step) { fprintf( file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n", e->sched.tasks[l].rid, e->sched.tasks[l].type, @@ -1037,8 +1038,8 @@ void task_dump_stats(const char *dumpfile, struct engine *e, int header, for (int l = 0; l < e->sched.nr_tasks; l++) { int type = e->sched.tasks[l].type; - /* Skip implicit tasks, tasks that didn't run. */ - if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) { + /* Skip implicit tasks, tasks that didn't run this step. */ + if (!e->sched.tasks[l].implicit && e->sched.tasks[l].tic > e->tic_step) { int subtype = e->sched.tasks[l].subtype; double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic; diff --git a/src/timestep_limiter.h b/src/timestep_limiter.h index d8555a352c8e1a799ac13d268932c9d37f30fe33..01b72daea5599b662c38fdc4b3ada8b2ac5b3d11 100644 --- a/src/timestep_limiter.h +++ b/src/timestep_limiter.h @@ -22,6 +22,9 @@ /* Config parameters. */ #include "../config.h" +/* Local headers. */ +#include "kick.h" + /** * @brief Wakes up a particle by rewinding it's kick1 back in time and applying * a new one such that the particle becomes active again in the next time-step. diff --git a/tests/testLogger.c b/tests/testLogger.c index c5be0d7cc18742bdc2fa6167462579c45fd43e92..d2c64e7fa3330ebd20cf8abc01a76e1dff08c8fc 100644 --- a/tests/testLogger.c +++ b/tests/testLogger.c @@ -32,8 +32,8 @@ /* Local headers. */ #include "swift.h" -void test_log_parts(struct logger *log) { - struct dump *d = log->dump; +void test_log_parts(struct logger_writer *log) { + struct dump *d = &log->dump; /* Write several copies of a part to the dump. */ struct part p; @@ -45,22 +45,27 @@ void test_log_parts(struct logger *log) { size_t offset = d->count; /* Write the full part. */ - logger_log_part(log, &p, - logger_mask_x | logger_mask_v | logger_mask_a | - logger_mask_u | logger_mask_h | logger_mask_rho | - logger_mask_consts, - &offset); + logger_log_part( + log, &p, + logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask | + logger_mask_data[logger_a].mask | logger_mask_data[logger_u].mask | + logger_mask_data[logger_h].mask | logger_mask_data[logger_rho].mask | + logger_mask_data[logger_consts].mask, + &offset); printf("Wrote part at offset %#016zx.\n", offset); /* Write only the position. */ p.x[0] = 2.0; - logger_log_part(log, &p, logger_mask_x, &offset); + logger_log_part(log, &p, logger_mask_data[logger_x].mask, &offset); printf("Wrote part at offset %#016zx.\n", offset); /* Write the position and velocity. */ p.x[0] = 3.0; p.v[0] = 0.3; - logger_log_part(log, &p, logger_mask_x | logger_mask_v, &offset); + logger_log_part( + log, &p, + logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask, + &offset); printf("Wrote part at offset %#016zx.\n", offset); /* Recover the last part from the dump. */ @@ -103,8 +108,8 @@ void test_log_parts(struct logger *log) { } } -void test_log_gparts(struct logger *log) { - struct dump *d = log->dump; +void test_log_gparts(struct logger_writer *log) { + struct dump *d = &log->dump; /* Write several copies of a part to the dump. */ struct gpart p; @@ -116,21 +121,26 @@ void test_log_gparts(struct logger *log) { size_t offset = d->count; /* Write the full part. */ - logger_log_gpart(log, &p, - logger_mask_x | logger_mask_v | logger_mask_a | - logger_mask_h | logger_mask_consts, - &offset); + logger_log_gpart( + log, &p, + logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask | + logger_mask_data[logger_a].mask | logger_mask_data[logger_h].mask | + logger_mask_data[logger_consts].mask, + &offset); printf("Wrote gpart at offset %#016zx.\n", offset); /* Write only the position. */ p.x[0] = 2.0; - logger_log_gpart(log, &p, logger_mask_x, &offset); + logger_log_gpart(log, &p, logger_mask_data[logger_x].mask, &offset); printf("Wrote gpart at offset %#016zx.\n", offset); /* Write the position and velocity. */ p.x[0] = 3.0; p.v_full[0] = 0.3; - logger_log_gpart(log, &p, logger_mask_x | logger_mask_v, &offset); + logger_log_gpart( + log, &p, + logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask, + &offset); printf("Wrote gpart at offset %#016zx.\n", offset); /* Recover the last part from the dump. */ @@ -173,8 +183,8 @@ void test_log_gparts(struct logger *log) { } } -void test_log_timestamps(struct logger *log) { - struct dump *d = log->dump; +void test_log_timestamps(struct logger_writer *log) { + struct dump *d = &log->dump; /* The timestamp to log. */ unsigned long long int t = 10; @@ -245,7 +255,7 @@ void test_log_timestamps(struct logger *log) { int main(int argc, char *argv[]) { /* Prepare a logger. */ - struct logger log; + struct logger_writer log; struct swift_params params; parser_read_file("logger.yml", ¶ms); logger_init(&log, ¶ms); @@ -265,7 +275,7 @@ int main(int argc, char *argv[]) { remove(filename); /* Clean the logger. */ - logger_clean(&log); + logger_free(&log); /* Return a happy number. */ return 0; diff --git a/tests/tolerance_125_perturbed.dat b/tests/tolerance_125_perturbed.dat index 95f5f78246a82b7c326c87f9b4edbac4f51c65e9..d6b21204ae9cec00f0d84a20e3c58bc34a4b4be1 100644 --- a/tests/tolerance_125_perturbed.dat +++ b/tests/tolerance_125_perturbed.dat @@ -1,4 +1,4 @@ # ID pos_x pos_y pos_z v_x v_y v_z h rho div_v S u P c a_x a_y a_z h_dt v_sig dS/dt du/dt 0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 3.6e-3 2e-3 2e-3 1e-4 1e-4 1e-4 1e-4 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-4 2e-4 2e-4 1e-6 1e-6 1e-6 1e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 5e-4 5e-4 5e-4 1e-6 1e-6 1e-6 1e-6 diff --git a/tools/make_cell_hierarchy.sh b/tools/make_cell_hierarchy.sh old mode 100644 new mode 100755 index 87fbe4c97f4aadcbb9be5867a62e8acb56415820..9d1d3caf7c4e2f0514c3d6ad5b2db48efa8958d5 --- a/tools/make_cell_hierarchy.sh +++ b/tools/make_cell_hierarchy.sh @@ -9,7 +9,7 @@ then rm $csv_output fi -for filename in ./cell_hierarchy_*.csv; +for filename in $@; do cat $filename >> cell_hierarchy.csv done