diff --git a/.gitignore b/.gitignore index 5615f036ef0f3a51db7c156afe69b8511e015d4c..5284b8c7208812c41b9044cd482e1047d3b13fd8 100644 --- a/.gitignore +++ b/.gitignore @@ -29,7 +29,7 @@ examples/*/*.h5 examples/*/*.png examples/*/*.mp4 examples/*/*.txt -examples/*/dependency_graph_*.csv +examples/*/dependency_graph.csv examples/*/restart/* examples/*/used_parameters.yml examples/*/unused_parameters.yml @@ -37,7 +37,6 @@ examples/*/*/*.xmf examples/*/*/*.png examples/*/*/*.mp4 examples/*/*/*.txt -examples/*/*/*.dot examples/*/*/*.rst examples/*/*/*.hdf5 examples/*/snapshots* diff --git a/README b/README index 272188b3f7926b562ba993da3d24ae547c6a0397..b51abc121f7cc7c1b4baa851c02045b5f4614bbb 100644 --- a/README +++ b/README @@ -36,7 +36,8 @@ Parameters: -s, --hydro Run with hydrodynamics. -S, --stars Run with stars. -x, --velociraptor Run with structure finding. - + --limiter Run with time-step limiter. + Control options: -a, --pin Pin runners using processor affinity. diff --git a/README.md b/README.md index 7a3c1287c79922a751595840295063a8ca347ef7..29415f27ee62f154b01dcd6a65414d7288a0a63f 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,7 @@ Parameters: -s, --hydro Run with hydrodynamics. -S, --stars Run with stars. -x, --velociraptor Run with structure finding. + --limiter Run with time-step limiter. Control options: diff --git a/configure.ac b/configure.ac index 04e55b047dd742a00d0629f6b3cce3f1e7fe8371..53ae0a717b60fc0e79b02e684de3e5eaf6b504fd 100644 --- a/configure.ac +++ b/configure.ac @@ -957,7 +957,7 @@ if test "x$with_velociraptor" != "xno"; then AC_PROG_FC AC_FC_LIBRARY_LDFLAGS if test "x$with_velociraptor" != "xyes" -a "x$with_velociraptor" != "x"; then - VELOCIRAPTOR_LIBS="-L$with_velociraptor -lvelociraptor -lstdc++ -lhdf5_cpp" + VELOCIRAPTOR_LIBS="-L$with_velociraptor -lvelociraptor -lmpi -lstdc++ -lhdf5_cpp" CFLAGS="$CFLAGS -fopenmp" else VELOCIRAPTOR_LIBS="" diff --git a/doc/RTD/source/CommandLineOptions/index.rst b/doc/RTD/source/CommandLineOptions/index.rst index bd58f031e622272d0245599621fc635891588a8f..e2603532b4ed4e64c86887f2a4f7c35f80cb08bf 100644 --- a/doc/RTD/source/CommandLineOptions/index.rst +++ b/doc/RTD/source/CommandLineOptions/index.rst @@ -31,6 +31,7 @@ can be found by typing ``./swift -h``:: -s, --hydro Run with hydrodynamics. -S, --stars Run with stars. -x, --velociraptor Run with structure finding. + --limiter Run with time-step limiter. Control options: diff --git a/doc/RTD/source/GettingStarted/compiling_code.rst b/doc/RTD/source/GettingStarted/compiling_code.rst index a0ce1c08eaf6b08a298ac4b720017273d4fa6559..696d5a232b53205f9dbd6e03647d9da86e2b1ceb 100644 --- a/doc/RTD/source/GettingStarted/compiling_code.rst +++ b/doc/RTD/source/GettingStarted/compiling_code.rst @@ -24,6 +24,15 @@ MPI A recent implementation of MPI, such as Open MPI (v2.x or higher), is required, or any library that implements at least the MPI 3 standard. +Running SWIFT on OmniPath atchitechtures with Open MPI +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When running SWIFT on an OmniPath system we suggest that Open MPI v3.1.3 or higher +is used. A bug in the ``psm2`` library causes communications to be lost. It is +possible to run SWIFT with older versions (tested with v2.1.x) of Open MPI so +long as ``psm`` is used instead of ``psm2``, i.e. that you invoke ``mpirun`` +with ``--mca btl vader,self -mca mtl psm``. + Libtool ~~~~~~~ The build system depends on libtool. diff --git a/doc/RTD/source/NewOption/index.rst b/doc/RTD/source/NewOption/index.rst index 441cd860ed79dabad2005b39ae4549d1496ab98d..08f1ff04efa9508145c1f7e04d72d2f40fe22f0d 100644 --- a/doc/RTD/source/NewOption/index.rst +++ b/doc/RTD/source/NewOption/index.rst @@ -7,8 +7,8 @@ General information for adding new schemes ========================================== The following steps are required for any new options (such as new -:ref:`hydro`, :ref:`chemistry`, :ref:`cooling`, -:ref:`equation_of_state`, :ref:`stars` or :ref:`gravity`) +:ref:`hydro`, chemistry, cooling, +:ref:`equation_of_state`, stars, or gravity) In order to add a new scheme, you will need to: diff --git a/doc/RTD/source/ParameterFiles/index.rst b/doc/RTD/source/ParameterFiles/index.rst index 93cf9b6e86895f9f20c8d644d8d24ccab5df93d6..488e8d37d7fa530f6dcd536f6bb39debeaab9f25 100644 --- a/doc/RTD/source/ParameterFiles/index.rst +++ b/doc/RTD/source/ParameterFiles/index.rst @@ -1,496 +1,18 @@ .. Parameter Files - Matthieu Schaller, 21st October 2018 + Josh Borrow 22nd January 2019 .. _Parameter_File_label: Parameter Files =============== -File format and basic information ---------------------------------- - -The parameter file uses a format similar to the `YAML format -<https://en.wikipedia.org/wiki/YAML>`_ but reduced to only the -elements required for the SWIFT parameters. Options are given by a -name followed by a column and the value of the parameter: - -.. code:: YAML - - ICs: santa_barbara.hdf5 - dt_max: 1.5 - shift: [2., 4., 5.] - -Comments can be inserted anywhere and start with a hash: - -.. code:: YAML - - # Description of the physics - viscosity_alpha: 2.0 - dt_max: 1.5 # seconds - -A typical SWIFT parameter file is split into multiple sections that -may or may not be present depending on the different configuration -options. The sections start with a label and can contain any number of -parameters: - -.. code:: YAML - - Cosmology: # Planck13 - Omega_m: 0.307 - Omega_lambda: 0.693 - Omega_b: 0.0455 - h: 0.6777 - a_begin: 0.0078125 # z = 127 - -The options can be integer values, floating point numbers, characters -or strings. If SWIFT expects a number and string is given, an error -will be raised. The code can also read an array of values: - -.. code:: YAML - - shift: [2., 4., 5.] - -Some options in the parameter file are optional and -when not provided, SWIFT will run with the default value. However, if -a compulsory parameter is missing an error will be raised at -start-up. - -Finally, SWIFT outputs two YAML files at the start of a run. The first -one ``used_parameters.yml`` contains all the parameters that were used -for this run, **including all the optional parameters with their -default values**. This file can be used to start an exact copy of the -run. The second file, ``unused_parameters.yml`` contains all the -values that were not read from the parameter file. This can be used to -simplify the parameter file or check that nothing important was -ignored (for instance because the code is not configured to use some -options). - -The rest of this page describes all the SWIFT parameters, split by -section. A list of all the possible parameters is kept in the file -``examples/parameter_examples.yml``. - -Internal Unit System --------------------- - -The ``InternalUnitSystem`` section describes the units used internally by the -code. This is the system of units in which all the equations are solved. All -physical constants are converted to this system and if the ICs use a different -system (see :ref:`ICs_units_label`) the particle quantities will be converted -when read in. - -The system of units is described using the value of the 5 basic units -of any system with respect to the CGS system. Instead of using a unit -of time we use a unit of velocity as this is more intuitive. Users -hence need to provide: - -* a unit of length: ``UnitLength_in_cgs``, -* a unit of mass: ``UnitMass_in_cgs``, -* a unit of velocity ``UnitVelocity_in_cgs``, -* a unit of electric current ``UnitCurrent_in_cgs``, -* a unit of temperature ``UnitTemp_in_cgs``. - -All these need to be expressed with respect to their cgs counter-part -(i.e. :math:`cm`, :math:`g`, :math:`cm/s`, :math:`A` and :math:`K`). Recall -that there are no h-factors in any of SWIFT's quantities; we, for instance, -use :math:`cm` and not :math:`cm/h`. - -For instance to use the commonly adopted system of 10^10 Msun as a -unit for mass, mega-parsec as a unit of length and km/s as a unit of -speed, we would use: - -.. code:: YAML - - # Common unit system for cosmo sims - InternalUnitSystem: - UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun in grams - UnitLength_in_cgs: 3.08567758e24 # 1 Mpc in centimeters - UnitVelocity_in_cgs: 1e5 # 1 km/s in centimeters per second - UnitCurrent_in_cgs: 1 # 1 Ampere - UnitTemp_in_cgs: 1 # 1 Kelvin - -Note that there are currently no variables in any of the SWIFT physics -schemes that make use of the unit of electric current. There is also -no incentive to use anything else than Kelvin but that makes the whole -system consistent with any possible unit system. - -If one is interested in using the more humorous `FFF unit -system <https://en.wikipedia.org/wiki/FFF_system>`_ one would use - -.. code:: YAML - - # FFF unit system - InternalUnitSystem: - UnitMass_in_cgs: 40823.3133 # 1 Firkin (fir) in grams - UnitLength_in_cgs: 20116.8 # 1 Furlong (fur) in cm - UnitVelocity_in_cgs: 0.01663095 # 1 Furlong (fur) per Fortnight (ftn) in cm/s - UnitCurrent_in_cgs: 1 # 1 Ampere - UnitTemp_in_cgs: 1 # 1 Kelvin - -The value of the physical constants in this system is left as an -exercise for the reader [#f1]_. - -Cosmology ---------- - -When running a cosmological simulation, the section ``Cosmology`` sets the values of the -cosmological model. The expanded :math:`\Lambda\rm{CDM}` parameters governing the -background evolution of the Universe need to be specified here. These are: - -* The reduced Hubble constant: :math:`h`: ``h``, -* The matter density parameter :math:`\Omega_m`: ``Omega_m``, -* The cosmological constant density parameter :math:`\Omega_\Lambda`: ``Omega_lambda``, -* The baryon density parameter :math:`\Omega_b`: ``Omega_b``, -* The radiation density parameter :math:`\Omega_r`: ``Omega_r``. - -The last parameter can be omitted and will default to :math:`\Omega_r = 0`. Note -that SWIFT will verify on start-up that the matter content of the initial conditions -matches the cosmology specified in this section. - -This section also specifies the start and end of the simulation expressed in -terms of scale-factors. The two parameters are: - -* Initial scale-factor: ``a_begin``, -* Final scale-factor: ``a_end``. - -Two additional optional parameters can be used to change the equation of -state of dark energy :math:`w(a)`. We use the evolution law :math:`w(a) = -w_0 + w_a (1 - a)`. The two parameters in the YAML file are: - -* The :math:`z=0` dark energy equation of state parameter :math:`w_0`: ``w_0`` -* The dark energy equation of state evolution parameter :math:`w_a`: ``w_a`` - -If unspecified these parameters default to the default -:math:`\Lambda\rm{CDM}` values of :math:`w_0 = -1` and :math:`w_a = 0`. - -For a Planck+13 cosmological model (ignoring radiation density as is -commonly done) and running from :math:`z=127` to :math:`z=0`, one would hence -use the following parameters: - -.. code:: YAML - - Cosmology: - a_begin: 0.0078125 # z = 127 - a_end: 1.0 # z = 0 - h: 0.6777 - Omega_m: 0.307 - Omega_lambda: 0.693 - Omega_b: 0.0455 - Omega_r: 0. # (Optional) - w_0: -1.0 # (Optional) - w_a: 0. # (Optional) - -When running a non-cosmological simulation (i.e. without the ``-c`` run-time -flag) this section of the YAML file is entirely ignored. - -Gravity -------- - -The behaviour of the self-gravity solver can be modified by the parameters -provided in the ``Gravity`` section. The theory document puts these parameters into the -context of the equations being solved. We give a brief overview here. - -* The Plummer-equivalent co-moving softening length used for all particles :math:`\epsilon_{com}`: ``comoving_softening``, -* The Plummer-equivalent maximal physical softening length used for all particles :math:`\epsilon_{max}`: ``comoving_softening``, - -At any redshift :math:`z`, the Plummer-equivalent softening length used by the -code will be :math:`\epsilon=\min(\epsilon_{max}, -\frac{\epsilon_{com}}{z+1})`. This is expressed in internal units. - -* The opening angle (multipole acceptance criterion) used in the FMM :math:`\theta`: ``theta``, -* The time-step size pre-factor :math:`\eta`: ``eta``, - -The time-step of a given particle is given by :math:`\Delta t = -\eta\sqrt{\frac{\epsilon}{|\overrightarrow{a}|}}`, where -:math:`\overrightarrow{a}` is the particle's acceleration. Power et al. (2003) recommend using :math:`\eta=0.025`. -The last tree-related parameter is - -* The tree rebuild frequency: ``rebuild_frequency``. - -The tree rebuild frequency is an optional parameter defaulting to -:math:`0.01`. It is used to trigger the re-construction of the tree every time a -fraction of the particles have been integrated (kicked) forward in time. - -Simulations using periodic boundary conditions use additional parameters for the -Particle-Mesh part of the calculation. The last three are optional: - -* The number cells along each axis of the mesh :math:`N`: ``mesh_side_length``, -* The mesh smoothing scale in units of the mesh cell-size :math:`a_{\rm - smooth}`: ``a_smooth`` (default: ``1.25``), -* The scale above which the short-range forces are assumed to be 0 (in units of - the mesh cell-size multiplied by :math:`a_{\rm smooth}`) :math:`r_{\rm - cut,max}`: ``r_cut_max`` (default: ``4.5``), -* The scale below which the short-range forces are assumed to be exactly Newtonian (in units of - the mesh cell-size multiplied by :math:`a_{\rm smooth}`) :math:`r_{\rm - cut,min}`: ``r_cut_min`` (default: ``0.1``), - -For most runs, the default values can be used. Only the number of cells along -each axis needs to be specified. The remaining three values are best described -in the context of the full set of equations in the theory documents. - -As a summary, here are the values used for the EAGLE :math:`100^3~{\rm Mpc}^3` -simulation: - -.. code:: YAML - - # Parameters for the self-gravity scheme for the EAGLE-100 box - Gravity: - eta: 0.025 - theta: 0.7 - comoving_softening: 0.0026994 # 0.7 proper kpc at z=2.8. - max_physical_softening: 0.0007 # 0.7 proper kpc - rebuild_frequency: 0.01 # Default optional value - mesh_side_length: 512 - a_smooth: 1.25 # Default optional value - r_cut_max: 4.5 # Default optional value - r_cut_min: 0.1 # Default optional value - - -SPH ---- - -Time Integration ----------------- - -The ``TimeIntegration`` section is used to set some general parameters related to time -integration. In all cases, users have to provide a minimal and maximal time-step -size: - -* Maximal time-step size: ``dt_max`` -* Minimal time-step size: ``dt_min`` - -These quantities are expressed in internal units. All particles will have their -time-step limited by the maximal value on top of all the other criteria that may -apply to them (gravity acceleration, Courant condition, etc.). If a particle -demands a time-step size smaller than the minimum, SWIFT will abort with an -error message. This is a safe-guard against simulations that would never -complete due to the number of steps to run being too large. - -When running a non-cosmological simulation, the user also has to provide the -time of the start and the time of the end of the simulation: - -* Start time: ``time_begin`` -* End time: ``time_end`` - -Both are expressed in internal units. The start time is typically set to ``0`` -but SWIFT can handle any value here. For cosmological runs, these values are -ignored and the start- and end-points of the runs are specified by the start and -end scale-factors in the cosmology section of the parameter file. - -Additionally, when running a cosmological volume, advanced users can specify the -value of the dimensionless pre-factor entering the time-step condition linked -with the motion of particles with respect to the background expansion and mesh -size. See the theory document for the exact equations. - -* Dimensionless pre-factor of the maximal allowed displacement: - ``max_dt_RMS_factor`` (default: ``0.25``) - -This value rarely needs altering. - -A full time-step section for a non-cosmological run would be: - -.. code:: YAML - - TimeIntegration: - time_begin: 0 # Start time in internal units. - time_end: 10. # End time in internal units. - dt_max: 1e-2 - dt_min: 1e-6 - -Whilst for a cosmological run, one would need: - -.. code:: YAML - - TimeIntegration: - dt_max: 1e-4 - dt_min: 1e-10 - max_dt_RMS_factor: 0.25 # Default optional value - -Initial Conditions ------------------- - -This ``InitialConditions`` section of the parameter file contains all the options related to -the initial conditions. The main two parameters are - -* The name of the initial conditions file: ``file_name``, -* Whether the problem uses periodic boundary conditions or not: ``periodic``. - -The file path is relative to where the code is being executed. These -parameters can be complemented by some optional values to drive some -specific behaviour of the code. - -* Whether to generate gas particles from the DM particles: ``generate_gas_in_ics`` (default: ``0``), -* Whether to activate an additional clean-up of the SPH smoothing lengths: ``cleanup_smoothing_lengths`` (default: ``0``) - -The procedure used to generate gas particles from the DM ones is -outlined in the theory documents and is too long for a full -description here. The cleaning of the smoothing lengths is an -expensive operation but can be necessary in the cases where the -initial conditions are of poor quality and the values of the smoothing -lengths are far from the values they should have. - -When starting from initial conditions created for Gadget, some -additional flags can be used to convert the values from h-full to -h-free and remove the additional :math:`\sqrt{a}` in the velocities: - -* Whether to re-scale all the fields to remove powers of h from the quantities: ``cleanup_h_factors`` (default: ``0``), -* Whether to re-scale the velocities to remove the :math:`\sqrt{a}` assumed by Gadget : ``cleanup_velocity_factors`` (default: ``0``). - -The h-factors are self-consistently removed according to their units -and this is applied to all the quantities irrespective of particle -types. The correct power of ``h`` is always calculated for each -quantity. - -Finally, SWIFT also offers these options: - -* A factor to re-scale all the smoothing-lengths by a fixed amount: ``smoothing_length_scaling`` (default: ``1.``), -* A shift to apply to all the particles: ``shift`` (default: ``[0.0,0.0,0.0]``), -* Whether to replicate the box along each axis: ``replicate`` (default: ``1``). - -The shift is expressed in internal units. The option to replicate the -box is especially useful for weak-scaling tests. When set to an -integer >1, the box size is multiplied by this integer along each axis -and the particles are duplicated and shifted such as to create exact -copies of the simulation volume. - -The full section to start a DM+hydro run from Gadget DM-only ICs would -be: - -.. code:: YAML - - InitialConditions: - file_name: my_ics.hdf5 - periodic: 1 - cleanup_h_factors: 1 - cleanup_velocity_factors: 1 - generate_gas_in_ics: 1 - cleanup_smoothing_lengths: 1 - - -Physical Constants ------------------- - -For some idealised test it can be useful to overwrite the value of -some physical constants; in particular the value of the gravitational -constant. SWIFT offers an optional parameter to overwrite the value of -:math:`G_N`. - -.. code:: YAML - - PhysicalConstants: - G: 1 - -Note that this set :math:`G` to the specified value in the internal system -of units. Setting a value of `1` when using the system of units (10^10 Msun, -Mpc, km/s) will mean that :math:`G_N=1` in these units [#f2]_ instead of the -normal value :math:`G_N=43.00927`. - -This option is only used for specific tests and debugging. This entire -section of the YAML file can typically be left out. More constants may -be handled in the same way in future versions. - -Snapshots ---------- - -Some additional specific options for the snapshot outputs are described in the -following pages: +This section desrcibes the options that are available in the +parameter files. .. toctree:: - :maxdepth: 1 + :maxdepth: 2 + :caption: Contents: + parameter_description output_selection -Statistics ----------- - -Restarts --------- - -SWIFT can write check-pointing files and restart from them. The behaviour of -this mechanism is driven by the options in the ``Restarts`` section of the YAML -parameter file. All the parameters are optional but default to values that -ensure a reasonable behaviour. - -* Whether or not to enable the dump of restart files: ``enable`` (default: - ``1``). - -This parameter acts a master-switch for the check-pointing capabilities. All the -other options require the ``enable`` parameter to be set to ``1``. - -* Whether or not to save a copy of the previous set of check-pointing files: - ``save`` (default: ``1``), -* Whether or not to dump a set of restart file on regular exit: ``onexit`` - (default: ``0``), -* The wall-clock time in hours between two sets of restart files: - ``delta_hours`` (default: ``6.0``). - -Note that there is no buffer time added to the ``delta_hours`` value. If the -system's batch queue run time limit is set to 6 hours, the user must specify a -smaller value to allow for enough time to safely dump the check-point files. - -* The sub-directory in which to store the restart files: ``subdir`` (default: - ``restart``), -* The basename of the restart files: ``basename`` (default: ``swift``) - -If the directory does not exist, SWIFT will create it. When resuming a run, -SWIFT, will look for files with the name provided in the sub-directory specified -here. The files themselves are named ``basename_000001.rst`` where the basename -is replaced by the user-specified name and the 6-digits number corresponds to -the MPI-rank. SWIFT writes one file per MPI rank. If the ``save`` option has -been activated, the previous set of restart files will be named -``basename_000000.rst.prev``. - -SWIFT can also be stopped by creating an empty file called ``stop`` in the -directory where the code runs. This will make SWIFT dump a fresh set of restart -file (irrespective of the specified ``delta_time`` between dumps) and exit -cleanly. One parameter governs this behaviour: - -* Number of steps between two checks for the presence of a ``stop`` file: - ``stop_steps`` (default: ``100``). - -The default value is chosen such that SWIFT does not need to poll the -file-system to often, which can take a significant amount of time on distributed -systems. For runs where the small time-steps take a much larger amount of time, -a smaller value is recommended to allow for a finer control over when the code -can be stopped. - -Finally, SWIFT can automatically stop after a specified amount of wall-clock -time. The code can also run a command when exiting in this fashion, which can be -used, for instance, to interact with the batch queue system: - -* Maximal wall-clock run time in hours: ``max_run_time`` (default: ``24.0``), -* Whether or not to run a command on exit: ``resubmit_on_exit`` (default: - ``0``), -* The command to run on exit: ``resubmit_command`` (default: ``./resub.sh``). - -Note that no check is performed on the validity of the command to run. SWIFT -simply calls ``system()`` with the user-specified command. - -To run SWIFT, dumping check-pointing files every 6 hours and running for 24 -hours after which a shell command will be run, one would use: - -.. code:: YAML - - Restarts: - enable: 1 - save: 1 # Keep copies - onexit: 0 - subdir: restart # Sub-directory of the directory where SWIFT is run - basename: swift - delta_hours: 6.0 - stop_steps: 100 - max_run_time: 24.0 # In hours - resubmit_on_exit: 1 - resubmit_command: ./resub.sh - - - -Scheduler ---------- - -Domain Decomposition --------------------- - -.. [#f1] The thorough reader (or overly keen SWIFT tester) would find that the speed of light is :math:`c=1.8026\times10^{12}\,\rm{fur}\,\rm{ftn}^{-1}`, Newton's constant becomes :math:`G_N=4.896735\times10^{-4}~\rm{fur}^3\,\rm{fir}^{-1}\,\rm{ftn}^{-2}` and Planck's constant turns into :math:`h=4.851453\times 10^{-34}~\rm{fur}^2\,\rm{fir}\,\rm{ftn}^{-1}`. - - -.. [#f2] which would translate into a constant :math:`G_N=1.5517771\times10^{-9}~cm^{3}\,g^{-1}\,s^{-2}` if expressed in the CGS system. diff --git a/doc/RTD/source/ParameterFiles/output_selection.rst b/doc/RTD/source/ParameterFiles/output_selection.rst index 90ab0f9a7c738c28832bc36de83c4034141d4b21..b84a776c7dcac2136dedd2324cfef43d7a5455ea 100644 --- a/doc/RTD/source/ParameterFiles/output_selection.rst +++ b/doc/RTD/source/ParameterFiles/output_selection.rst @@ -36,6 +36,10 @@ Example of file with redshift:: 10 5 +If an output list is specified, the basic values for the first +snapshot (``time_first``, ``scale_factor_first``) and difference +(``delta_time``) are ignored. + .. _Output_selection_label: Output Selection diff --git a/doc/RTD/source/ParameterFiles/parameter_description.rst b/doc/RTD/source/ParameterFiles/parameter_description.rst new file mode 100644 index 0000000000000000000000000000000000000000..6304b60c5eb6df77d79e2ff50b9ba895d31a7889 --- /dev/null +++ b/doc/RTD/source/ParameterFiles/parameter_description.rst @@ -0,0 +1,634 @@ +.. Parameter Description + Matthieu Schaller, 21st October 2018 + +.. _Parameters_basics: + +File format and basic information +--------------------------------- + +The parameter file uses a format similar to the `YAML format +<https://en.wikipedia.org/wiki/YAML>`_ but reduced to only the +elements required for the SWIFT parameters. Options are given by a +name followed by a column and the value of the parameter: + +.. code:: YAML + + ICs: santa_barbara.hdf5 + dt_max: 1.5 + shift: [2., 4., 5.] + +Comments can be inserted anywhere and start with a hash: + +.. code:: YAML + + # Description of the physics + viscosity_alpha: 2.0 + dt_max: 1.5 # seconds + +A typical SWIFT parameter file is split into multiple sections that +may or may not be present depending on the different configuration +options. The sections start with a label and can contain any number of +parameters: + +.. code:: YAML + + Cosmology: # Planck13 + Omega_m: 0.307 + Omega_lambda: 0.693 + Omega_b: 0.0455 + h: 0.6777 + a_begin: 0.0078125 # z = 127 + +The options can be integer values, floating point numbers, characters +or strings. If SWIFT expects a number and string is given, an error +will be raised. The code can also read an array of values: + +.. code:: YAML + + shift: [2., 4., 5.] + +Some options in the parameter file are optional and +when not provided, SWIFT will run with the default value. However, if +a compulsory parameter is missing an error will be raised at +start-up. + +Finally, SWIFT outputs two YAML files at the start of a run. The first one +``used_parameters.yml`` contains all the parameters that were used for this run, +**including all the optional parameters left unspecified with their default +values**. This file can be used to start an exact copy of the run. The second +file, ``unused_parameters.yml`` contains all the values that were not read from +the parameter file. This can be used to simplify the parameter file or check +that nothing important was ignored (for instance because the code is not +configured to use some options). + +The rest of this page describes all the SWIFT parameters, split by +section. A list of all the possible parameters is kept in the file +``examples/parameter_examples.yml``. + +.. _Parameters_units: + +Internal Unit System +-------------------- + +The ``InternalUnitSystem`` section describes the units used internally by the +code. This is the system of units in which all the equations are solved. All +physical constants are converted to this system and if the ICs use a different +system (see the snapshots' ref:`ICs_units_label` section of the documentation) +the particle quantities will be converted when read in. + +The system of units is described using the value of the 5 basic units +of any system with respect to the CGS system. Instead of using a unit +of time we use a unit of velocity as this is more intuitive. Users +hence need to provide: + +* a unit of length: ``UnitLength_in_cgs``, +* a unit of mass: ``UnitMass_in_cgs``, +* a unit of velocity ``UnitVelocity_in_cgs``, +* a unit of electric current ``UnitCurrent_in_cgs``, +* a unit of temperature ``UnitTemp_in_cgs``. + +All these need to be expressed with respect to their cgs counter-part +(i.e. :math:`cm`, :math:`g`, :math:`cm/s`, :math:`A` and :math:`K`). Recall +that there are no h-factors in any of SWIFT's quantities; we, for instance, +use :math:`cm` and not :math:`cm/h`. + +For instance to use the commonly adopted system of 10^10 Msun as a +unit for mass, mega-parsec as a unit of length and km/s as a unit of +speed, we would use: + +.. code:: YAML + + # Common unit system for cosmo sims + InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun in grams + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc in centimeters + UnitVelocity_in_cgs: 1e5 # 1 km/s in centimeters per second + UnitCurrent_in_cgs: 1 # 1 Ampere + UnitTemp_in_cgs: 1 # 1 Kelvin + +Note that there are currently no variables in any of the SWIFT physics +schemes that make use of the unit of electric current. There is also +no incentive to use anything else than Kelvin but that makes the whole +system consistent with any possible unit system. + +If one is interested in using the more humorous `FFF unit +system <https://en.wikipedia.org/wiki/FFF_system>`_ one would use + +.. code:: YAML + + # FFF unit system + InternalUnitSystem: + UnitMass_in_cgs: 40823.3133 # 1 Firkin (fir) in grams + UnitLength_in_cgs: 20116.8 # 1 Furlong (fur) in cm + UnitVelocity_in_cgs: 0.01663095 # 1 Furlong (fur) per Fortnight (ftn) in cm/s + UnitCurrent_in_cgs: 1 # 1 Ampere + UnitTemp_in_cgs: 1 # 1 Kelvin + +The value of the physical constants in this system is left as an +exercise for the reader [#f1]_. + +.. _Parameters_cosmology: + +Cosmology +--------- + +When running a cosmological simulation, the section ``Cosmology`` sets the values of the +cosmological model. The expanded :math:`\Lambda\rm{CDM}` parameters governing the +background evolution of the Universe need to be specified here. These are: + +* The reduced Hubble constant: :math:`h`: ``h``, +* The matter density parameter :math:`\Omega_m`: ``Omega_m``, +* The cosmological constant density parameter :math:`\Omega_\Lambda`: ``Omega_lambda``, +* The baryon density parameter :math:`\Omega_b`: ``Omega_b``, +* The radiation density parameter :math:`\Omega_r`: ``Omega_r``. + +The last parameter can be omitted and will default to :math:`\Omega_r = 0`. Note +that SWIFT will verify on start-up that the matter content of the initial conditions +matches the cosmology specified in this section. + +This section also specifies the start and end of the simulation expressed in +terms of scale-factors. The two parameters are: + +* Initial scale-factor: ``a_begin``, +* Final scale-factor: ``a_end``. + +Two additional optional parameters can be used to change the equation of +state of dark energy :math:`w(a)`. We use the evolution law :math:`w(a) = +w_0 + w_a (1 - a)`. The two parameters in the YAML file are: + +* The :math:`z=0` dark energy equation of state parameter :math:`w_0`: ``w_0`` +* The dark energy equation of state evolution parameter :math:`w_a`: ``w_a`` + +If unspecified these parameters default to the default +:math:`\Lambda\rm{CDM}` values of :math:`w_0 = -1` and :math:`w_a = 0`. + +For a Planck+13 cosmological model (ignoring radiation density as is +commonly done) and running from :math:`z=127` to :math:`z=0`, one would hence +use the following parameters: + +.. code:: YAML + + Cosmology: + a_begin: 0.0078125 # z = 127 + a_end: 1.0 # z = 0 + h: 0.6777 + Omega_m: 0.307 + Omega_lambda: 0.693 + Omega_b: 0.0455 + Omega_r: 0. # (Optional) + w_0: -1.0 # (Optional) + w_a: 0. # (Optional) + +When running a non-cosmological simulation (i.e. without the ``-c`` run-time +flag) this section of the YAML file is entirely ignored. + +.. _Parameters_gravity: + +Gravity +------- + +The behaviour of the self-gravity solver can be modified by the parameters +provided in the ``Gravity`` section. The theory document puts these parameters into the +context of the equations being solved. We give a brief overview here. + +* The Plummer-equivalent co-moving softening length used for all particles :math:`\epsilon_{com}`: ``comoving_softening``, +* The Plummer-equivalent maximal physical softening length used for all particles :math:`\epsilon_{max}`: ``comoving_softening``, + +At any redshift :math:`z`, the Plummer-equivalent softening length used by the +code will be :math:`\epsilon=\min(\epsilon_{max}, +\frac{\epsilon_{com}}{z+1})`. This is expressed in internal units. + +* The opening angle (multipole acceptance criterion) used in the FMM :math:`\theta`: ``theta``, +* The time-step size pre-factor :math:`\eta`: ``eta``, + +The time-step of a given particle is given by :math:`\Delta t = +\eta\sqrt{\frac{\epsilon}{|\overrightarrow{a}|}}`, where +:math:`\overrightarrow{a}` is the particle's acceleration. Power et al. (2003) recommend using :math:`\eta=0.025`. +The last tree-related parameter is + +* The tree rebuild frequency: ``rebuild_frequency``. + +The tree rebuild frequency is an optional parameter defaulting to +:math:`0.01`. It is used to trigger the re-construction of the tree every time a +fraction of the particles have been integrated (kicked) forward in time. + +Simulations using periodic boundary conditions use additional parameters for the +Particle-Mesh part of the calculation. The last three are optional: + +* The number cells along each axis of the mesh :math:`N`: ``mesh_side_length``, +* The mesh smoothing scale in units of the mesh cell-size :math:`a_{\rm + smooth}`: ``a_smooth`` (default: ``1.25``), +* The scale above which the short-range forces are assumed to be 0 (in units of + the mesh cell-size multiplied by :math:`a_{\rm smooth}`) :math:`r_{\rm + cut,max}`: ``r_cut_max`` (default: ``4.5``), +* The scale below which the short-range forces are assumed to be exactly Newtonian (in units of + the mesh cell-size multiplied by :math:`a_{\rm smooth}`) :math:`r_{\rm + cut,min}`: ``r_cut_min`` (default: ``0.1``), + +For most runs, the default values can be used. Only the number of cells along +each axis needs to be specified. The remaining three values are best described +in the context of the full set of equations in the theory documents. + +As a summary, here are the values used for the EAGLE :math:`100^3~{\rm Mpc}^3` +simulation: + +.. code:: YAML + + # Parameters for the self-gravity scheme for the EAGLE-100 box + Gravity: + eta: 0.025 + theta: 0.7 + comoving_softening: 0.0026994 # 0.7 proper kpc at z=2.8. + max_physical_softening: 0.0007 # 0.7 proper kpc + rebuild_frequency: 0.01 # Default optional value + mesh_side_length: 512 + a_smooth: 1.25 # Default optional value + r_cut_max: 4.5 # Default optional value + r_cut_min: 0.1 # Default optional value + + +.. _Parameters_SPH: + +SPH +--- + +.. _Parameters_time_integration: + +Time Integration +---------------- + +The ``TimeIntegration`` section is used to set some general parameters related to time +integration. In all cases, users have to provide a minimal and maximal time-step +size: + +* Maximal time-step size: ``dt_max`` +* Minimal time-step size: ``dt_min`` + +These quantities are expressed in internal units. All particles will have their +time-step limited by the maximal value on top of all the other criteria that may +apply to them (gravity acceleration, Courant condition, etc.). If a particle +demands a time-step size smaller than the minimum, SWIFT will abort with an +error message. This is a safe-guard against simulations that would never +complete due to the number of steps to run being too large. + +When running a non-cosmological simulation, the user also has to provide the +time of the start and the time of the end of the simulation: + +* Start time: ``time_begin`` +* End time: ``time_end`` + +Both are expressed in internal units. The start time is typically set to ``0`` +but SWIFT can handle any value here. For cosmological runs, these values are +ignored and the start- and end-points of the runs are specified by the start and +end scale-factors in the cosmology section of the parameter file. + +Additionally, when running a cosmological volume, advanced users can specify the +value of the dimensionless pre-factor entering the time-step condition linked +with the motion of particles with respect to the background expansion and mesh +size. See the theory document for the exact equations. + +* Dimensionless pre-factor of the maximal allowed displacement: + ``max_dt_RMS_factor`` (default: ``0.25``) + +This value rarely needs altering. + +A full time-step section for a non-cosmological run would be: + +.. code:: YAML + + TimeIntegration: + time_begin: 0 # Start time in internal units. + time_end: 10. # End time in internal units. + dt_max: 1e-2 + dt_min: 1e-6 + +Whilst for a cosmological run, one would need: + +.. code:: YAML + + TimeIntegration: + dt_max: 1e-4 + dt_min: 1e-10 + max_dt_RMS_factor: 0.25 # Default optional value + +.. _Parameters_ICs: + +Initial Conditions +------------------ + +The ``InitialConditions`` section of the parameter file contains all the options related to +the initial conditions. The main two parameters are + +* The name of the initial conditions file: ``file_name``, +* Whether the problem uses periodic boundary conditions or not: ``periodic``. + +The file path is relative to where the code is being executed. These +parameters can be complemented by some optional values to drive some +specific behaviour of the code. + +* Whether to generate gas particles from the DM particles: ``generate_gas_in_ics`` (default: ``0``), +* Whether to activate an additional clean-up of the SPH smoothing lengths: ``cleanup_smoothing_lengths`` (default: ``0``) + +The procedure used to generate gas particles from the DM ones is +outlined in the theory documents and is too long for a full +description here. The cleaning of the smoothing lengths is an +expensive operation but can be necessary in the cases where the +initial conditions are of poor quality and the values of the smoothing +lengths are far from the values they should have. + +When starting from initial conditions created for Gadget, some +additional flags can be used to convert the values from h-full to +h-free and remove the additional :math:`\sqrt{a}` in the velocities: + +* Whether to re-scale all the fields to remove powers of h from the quantities: ``cleanup_h_factors`` (default: ``0``), +* Whether to re-scale the velocities to remove the :math:`\sqrt{a}` assumed by Gadget : ``cleanup_velocity_factors`` (default: ``0``). + +The h-factors are self-consistently removed according to their units +and this is applied to all the quantities irrespective of particle +types. The correct power of ``h`` is always calculated for each +quantity. + +Finally, SWIFT also offers these options: + +* A factor to re-scale all the smoothing-lengths by a fixed amount: ``smoothing_length_scaling`` (default: ``1.``), +* A shift to apply to all the particles: ``shift`` (default: ``[0.0,0.0,0.0]``), +* Whether to replicate the box along each axis: ``replicate`` (default: ``1``). + +The shift is expressed in internal units. The option to replicate the +box is especially useful for weak-scaling tests. When set to an +integer >1, the box size is multiplied by this integer along each axis +and the particles are duplicated and shifted such as to create exact +copies of the simulation volume. + +The full section to start a DM+hydro run from Gadget DM-only ICs would +be: + +.. code:: YAML + + InitialConditions: + file_name: my_ics.hdf5 + periodic: 1 + cleanup_h_factors: 1 + cleanup_velocity_factors: 1 + generate_gas_in_ics: 1 + cleanup_smoothing_lengths: 1 + + +.. _Parameters_constants: + +Physical Constants +------------------ + +For some idealised test it can be useful to overwrite the value of +some physical constants; in particular the value of the gravitational +constant. SWIFT offers an optional parameter to overwrite the value of +:math:`G_N`. + +.. code:: YAML + + PhysicalConstants: + G: 1 + +Note that this set :math:`G` to the specified value in the internal system +of units. Setting a value of `1` when using the system of units (10^10 Msun, +Mpc, km/s) will mean that :math:`G_N=1` in these units [#f2]_ instead of the +normal value :math:`G_N=43.00927`. + +This option is only used for specific tests and debugging. This entire +section of the YAML file can typically be left out. More constants may +be handled in the same way in future versions. + +.. _Parameters_snapshots: + +Snapshots +--------- + +The ``Snapshots`` section of the parameter file contains all the options related to +the dump of simulation outputs in the form of HDF5 :ref:`snapshots`. The main +parameter is the base name that will be used for all the outputs in the run: + +* The base name of the HDF5 snapshots: ``basename``. + +This name will then be appended by an under-score and 4 digits followed by +``.hdf5`` (e.g. ``base_name_1234.hdf5``). The 4 digits are used to label the +different outputs, starting at ``0000``. In the default setup the digits simply +increase by one for each snapshot. However, if the optional parameter +``int_time_label_on`` is switched on, then we use 6 digits and these will the +physical time of the simulation rounded to the nearest integer +(e.g. ``base_name_001234.hdf5``) [#f3]_. + +The time of the first snapshot is controlled by the two following options: + +* Time of the first snapshot (non-cosmological runs): ``time_first``, +* Scale-factor of the first snapshot (cosmological runs): ``scale_factor_first``. + +One of those two parameters has to be provided depending on the type of run. In +the case of non-cosmological runs, the time of the first snapshot is expressed +in the internal units of time. Users also have to provide the difference in time +(or scale-factor) between consecutive outputs: + +* Time difference between consecutive outputs: ``delta_time``. + +In non-cosmological runs this is also expressed in internal units. For +cosmological runs, this value is *multiplied* to obtain the +scale-factor of the next snapshot. This implies that the outputs are +equally space in :math:`\log(a)` (See :ref:`Output_list_label` to have +snapshots not regularly spaced in time). + +When running the code with structure finding activated, it is often +useful to have a structure catalog written at the same simulation time +as the snapshots. To activate this, the following parameter can be +switched on: + +* Run VELOCIraptor every time a snapshot is dumped: ``invoke_stf`` + (default: ``0``). + +This produces catalogs using the options specified for the stand-alone +VELOCIraptor outputs (see the section :ref:`Parameters_structure_finding`) but +with a base name and output number that matches the snapshot name +(e.g. ``stf_base_name_1234.hdf5``) irrespective of the name specified in the +section dedicated to VELOCIraptor. Note that the invocation of VELOCIraptor at +every dump is done additionally to the stand-alone dumps that can be specified +in the corresponding section of the YAML parameter file. + +Users can optionally specify the level of compression used by the HDF5 library +using the parameter: + +* GZIP compression level of the HDF5 arrays: ``compression`` (default: ``0``). + +The default level of ``0`` implies no compression and values have to be in the +range :math:`[0-9]`. This integer is passed to the i/o library and used for the +lossless GZIP compression algorithm. Higher values imply higher compression but +also more time spent deflating and inflating the data. Note that up until HDF5 +1.10.x this option is not available when using the MPI-parallel version of the +i/o routines. + +Finally, it is possible to specify a different system of units for the snapshots +than the one that was used internally by SWIFT. The format is identical to the +one described above (See the :ref:`Parameters_units` section) and read: + +* a unit of length: ``UnitLength_in_cgs`` (default: ``InternalUnitSystem:UnitLength_in_cgs``), +* a unit of mass: ``UnitMass_in_cgs`` (default: ``InternalUnitSystem:UnitMass_in_cgs``), +* a unit of velocity ``UnitVelocity_in_cgs`` (default: ``InternalUnitSystem:UnitVelocity_in_cgs``), +* a unit of electric current ``UnitCurrent_in_cgs`` (default: ``InternalUnitSystem:UnitCurrent_in_cgs``), +* a unit of temperature ``UnitTemp_in_cgs`` (default: ``InternalUnitSystem:UnitTemp_in_cgs``). + +When un-specified, these all take the same value as assumed by the internal +system of units. These are rarely used but can offer a practical alternative to +converting data in the post-processing of the simulations. + +For a standard cosmological run with structure finding activated, the +full section would be: + +.. code:: YAML + + Snapshots: + basename: output + scale_factor_first: 0.02 # z = 49 + delta_time: 1.02 + invoke_stf: 1 + +Showing all the parameters for a basic hydro test-case, one would have: + +.. code:: YAML + + Snapshots: + basename: sedov + time_first: 0.01 + delta_time: 0.005 + invoke_stf: 0 + int_time_label_on: 0 + compression: 3 + UnitLength_in_cgs: 1. # Use cm in outputs + UnitMass_in_cgs: 1. # Use grams in outpus + UnitVelocity_in_cgs: 1. # Use cm/s in outputs + UnitCurrent_in_cgs: 1. # Use Ampere in outputs + UnitTemp_in_cgs: 1. # Use Kelvin in outputs + +Some additional specific options for the snapshot outputs are described in the +following pages: + +* :ref:`Output_list_label` (to have snapshots not evenly spaced in time), +* :ref:`Output_selection_label` (to select what particle fields to write). + + +.. _Parameters_statistics: + +Statistics +---------- + +Some additional specific options for the statistics outputs are described in the +following page: + +* :ref:`Output_list_label` (to have statistics outputs not evenly spaced in time). + +.. _Parameters_restarts: + +Restarts +-------- + +SWIFT can write check-pointing files and restart from them. The behaviour of +this mechanism is driven by the options in the ``Restarts`` section of the YAML +parameter file. All the parameters are optional but default to values that +ensure a reasonable behaviour. + +* Whether or not to enable the dump of restart files: ``enable`` (default: + ``1``). + +This parameter acts a master-switch for the check-pointing capabilities. All the +other options require the ``enable`` parameter to be set to ``1``. + +* Whether or not to save a copy of the previous set of check-pointing files: + ``save`` (default: ``1``), +* Whether or not to dump a set of restart file on regular exit: ``onexit`` + (default: ``0``), +* The wall-clock time in hours between two sets of restart files: + ``delta_hours`` (default: ``6.0``). + +Note that there is no buffer time added to the ``delta_hours`` value. If the +system's batch queue run time limit is set to 6 hours, the user must specify a +smaller value to allow for enough time to safely dump the check-point files. + +* The sub-directory in which to store the restart files: ``subdir`` (default: + ``restart``), +* The basename of the restart files: ``basename`` (default: ``swift``) + +If the directory does not exist, SWIFT will create it. When resuming a run, +SWIFT, will look for files with the name provided in the sub-directory specified +here. The files themselves are named ``basename_000001.rst`` where the basename +is replaced by the user-specified name and the 6-digits number corresponds to +the MPI-rank. SWIFT writes one file per MPI rank. If the ``save`` option has +been activated, the previous set of restart files will be named +``basename_000000.rst.prev``. + +SWIFT can also be stopped by creating an empty file called ``stop`` in the +directory where the code runs. This will make SWIFT dump a fresh set of restart +file (irrespective of the specified ``delta_time`` between dumps) and exit +cleanly. One parameter governs this behaviour: + +* Number of steps between two checks for the presence of a ``stop`` file: + ``stop_steps`` (default: ``100``). + +The default value is chosen such that SWIFT does not need to poll the +file-system to often, which can take a significant amount of time on distributed +systems. For runs where the small time-steps take a much larger amount of time, +a smaller value is recommended to allow for a finer control over when the code +can be stopped. + +Finally, SWIFT can automatically stop after a specified amount of wall-clock +time. The code can also run a command when exiting in this fashion, which can be +used, for instance, to interact with the batch queue system: + +* Maximal wall-clock run time in hours: ``max_run_time`` (default: ``24.0``), +* Whether or not to run a command on exit: ``resubmit_on_exit`` (default: + ``0``), +* The command to run on exit: ``resubmit_command`` (default: ``./resub.sh``). + +Note that no check is performed on the validity of the command to run. SWIFT +simply calls ``system()`` with the user-specified command. + +To run SWIFT, dumping check-pointing files every 6 hours and running for 24 +hours after which a shell command will be run, one would use: + +.. code:: YAML + + Restarts: + enable: 1 + save: 1 # Keep copies + onexit: 0 + subdir: restart # Sub-directory of the directory where SWIFT is run + basename: swift + delta_hours: 6.0 + stop_steps: 100 + max_run_time: 24.0 # In hours + resubmit_on_exit: 1 + resubmit_command: ./resub.sh + +.. _Parameters_scheduler: + +Scheduler +--------- + +.. _Parameters_domain_decomposition: + +Domain Decomposition +-------------------- + +.. _Parameters_structure_finding: + +Structure finding (VELOCIraptor) +-------------------------------- + + +.. [#f1] The thorough reader (or overly keen SWIFT tester) would find that the speed of light is :math:`c=1.8026\times10^{12}\,\rm{fur}\,\rm{ftn}^{-1}`, Newton's constant becomes :math:`G_N=4.896735\times10^{-4}~\rm{fur}^3\,\rm{fir}^{-1}\,\rm{ftn}^{-2}` and Planck's constant turns into :math:`h=4.851453\times 10^{-34}~\rm{fur}^2\,\rm{fir}\,\rm{ftn}^{-1}`. + + +.. [#f2] which would translate into a constant :math:`G_N=1.5517771\times10^{-9}~cm^{3}\,g^{-1}\,s^{-2}` if expressed in the CGS system. + +.. [#f3] This feature only makes sense for non-cosmological runs for which the + internal time unit is such that when rounded to the nearest integer a + sensible number is obtained. A use-case for this feature would be to + compare runs over the same physical time but with different numbers of + snapshots. Snapshots at a given time would always have the same set of + digits irrespective of the number of snapshots produced before. + diff --git a/doc/RTD/source/Snapshots/index.rst b/doc/RTD/source/Snapshots/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..30cdc0e1281ae0420b44d88001992ccbbe588136 --- /dev/null +++ b/doc/RTD/source/Snapshots/index.rst @@ -0,0 +1,199 @@ +.. Snapshots + Matthieu Schaller, 5th January 2019 + +.. _snapshots: + +Snapshots +========= + +The snapshots are stored using the HDF5 format and are almost compatible with +Gadget-2 (fully compatible outside of cosmological runs). They do, however, +contain a large set of extensions including units, meta-data about the code and +runs as well as facilities to quickly access the particles in a specific region +of the simulation volume. + +Header +------ + +Meta-data about the code and run +-------------------------------- + +Several groups at the root of the files only contain attributes and are used to +store some meta-data about the simulation and the code itself. + +Code +~~~~ + +The group ``/Code`` contains basic information about the version of the code +that was used to run the simulation that dumped this snapshot. Versions of the +libraries used to compile the code as well as information about the compiler and +the flags used are stored. The most important element here are the git SHA and +configuration parameters of the code. Alongside the compiler flags, policies and +used parameters, these allow to reproduce exactly an older run. + +Cosmology +~~~~~~~~~ + +The group ``/Cosmology`` contains information about the cosmological model used +for this simulation. The first important field is the attribute ``Cosmological +run`` which is set to ``1`` for cosmological runs and to ``0`` otherwise. This +allows users to quickly distinguish between these two main modes. Most values in +this section only make sense for cosmological runs. + +All quantities are expressed in the internal system of units (note that this may +differ from the units used in the particle arrays). Values like the look-back +time are given for the redshift (or scale-factor) of this snapshot. + +Policy +~~~~~~ + +The group ``/Policy`` list the engine policies (defined in ``src/engine.h``) +that were activated in the run that dumped this snapshot. The policies roughly +translate to the main run-time parameters of SWIFT. + +GravityScheme +~~~~~~~~~~~~~ + +HydroScheme +~~~~~~~~~~~ + +StarsScheme +~~~~~~~~~~~ + +SubgridScheme +~~~~~~~~~~~~~ + +Unit systems +------------ + +The snapshots contain *two* groups at the root containing information about the +unit systems used in the snapshots. + +The main one ``Units`` contains the units used in the snapshot. In a similar +fashion to what is done for the parameter files (see :ref:`Parameters_units`), +SWIFT specifies only the basic units. These are the units of mass (``U_M``), +length (``U_L``), time (``U_t``), electric current (``U_I``) and temperature +(``U_T``). These are specified in units of their CGS equivalents (gram, +centimeter, second, Ampere, Kelvin). All the quantities present in the particle +arrays are expressed in this system of units. For each quantity, SWIFT gives the +conversion factor in terms of these units. For instance, the internal energy per +unit mass would be expressed as ``U_L^2 U_t^-2``, which in the CGS unit system +translates to :math:`cm/s^2 = erg/g`. + +The second group ``InternalCodeUnits`` contains the unit system that was used +internally by the code when running the simulation. This is in most cases the +same system as given in ``Units`` but since users can specify a different +system for the snapshots, there might be cases where they differ. As this system +only relates to what was used inside the code and not in the snapshots +themselves, this group is mostly here to report on the code's run-time behaviour +and is used to express all the quantities in the meta-data (e.g. in the +cosmology group or the softening lengths in the gravity group). + +Used and unused run-time parameters +----------------------------------- + +The groups ``/Parameters`` and ``UnusedParameters`` located at the root of the file +contain the list of all the run-time parameters used by the run with their +values and the list of parameters that were in the YAML but were not read. The +content of these two groups is identical to the ``used_parameters.yml`` and +``unused_parameters.yml`` files produced by SWIFT when starting a run (See +the :ref:`Parameters_basics` section of the documentation). + +Structure of the particle arrays +-------------------------------- + +There are several groups that contain 'auxiliary' information, such as +``Header``. Particle data is placed in separate groups depending of the type of +the particles. The type use the naming convention of Gadget-2 (with +the OWLS and EAGLE extensions). + ++---------------------+------------------------+----------------------------+ +| HDF5 Group Name | Physical Particle Type | In code ``enum part_type`` | ++=====================+========================+============================+ +| ``/PartType0/`` | Gas | ``swift_type_gas`` | ++---------------------+------------------------+----------------------------+ +| ``/PartType1/`` | Dark Matter | ``swift_type_dark_matter`` | ++---------------------+------------------------+----------------------------+ +| ``/PartType4/`` | Stars | ``swift_type_star`` | ++---------------------+------------------------+----------------------------+ +| ``/PartType5/`` | Black Holes | ``swift_type_black_hole`` | ++---------------------+------------------------+----------------------------+ + +The last column in the table gives the ``enum`` value from ``part_type.h`` +corresponding to a given entry in the files. + +Quick access to particles via hash-tables +----------------------------------------- + +The particles are not sorted in a specific order when they are written to the +snapshots. However, the particles are sorted into the top-level cell structure +used internally by the code every time a tree rebuild is triggered. The +top-level cells are a coarse-grained mesh but knowing which particle belongs to +which cell can nevertheless be useful to rapidly access particles in a given +region only. + +One important caveat is that particles are free to drift out of their cells +between rebuilds of the tree (but not by more than one cell-length). If one +wants to have all the particles in a given cell, one has to read all the +neighbouring cells as well. We note that for image making purposes, for instance +to generate a slice, this is typically not necessary and reading just the cells +of interest is sufficient. + +At the root of the HDF5 file, the ``Cells`` group contains all the relevant +information. The dimension of the top-level grid (a triplet of integers) is +given by the attribute ``Cells/Meta-data/dimension`` and the size of each cell (a +triplet of floating-point numbers) is given by the attribute +``Cells/Meta-data/size``. All the cells have the same size but for non-cubic +simulation volumes the cells themselves can have different sizes along each +axis. + +The ``/Cells/Centres`` array gives the centre of each of the top-level cells in the +simulation volume. Both the cell sizes and positions of the centres are +expressed in the unit system used for the snapshots (see above) and are hence +consistent with the particle positions themselves. + +Once the cell(s) containing the region of interest has been located, users can +use the ``/Cells/Offsets/PartTypeN/Counts`` and +``/Cells/Offsets/PartTypeN/Offsets`` to retrieve the location of the particles +of type ``N`` in the ``/PartTypeN`` arrays. For instance, if one is interested +in retriving all the densities of the gas particles in the cell around the +position `[1, 1, 1]` one could use a piece of code similar to: + +.. code-block:: python + :linenos: + + import numpy as np + import h5py + + snapshot_file = h5py.File("snapshot.hdf5", "r") + + my_pos = [1, 1, 1] + + # Read in the cell centres and size + nr_cells = f["/Cells/Meta-data"].attrs["nr_cells"] + centres = f["/Cells/Centres"][:,:] + size = f["/Cells/Meta-data"].attrs["size"] + half_size = size / 2. + + # Look for the cell containing the position of interest + my_cell = -1 + for i in range(nr_cells): + if my_pos[0] > centres[i, 0] - half_size[0] and my_pos[0] < centres[i, 0] + half_size[0] and + my_pos[1] > centres[i, 1] - half_size[1] and my_pos[1] < centres[i, 1] + half_size[1] and + my_pos[2] > centres[i, 2] - half_size[2] and my_pos[2] < centres[i, 2] + half_size[2]: + my_cell = i + break + + # Print the position of the centre of the cell of interest + centre = snapshot_file["/Cells/Centres"][my_cell, :] + print("Centre of the cell:", centre) + + # Retrieve the offset and counts + my_offset = snapshot_file["/Cells/Offsets/PartType0"][my_cell] + my_count = snapshot_file["/Cells/Counts/PartType0"][my_cell] + + # Get the densities of the particles in this cell + rho = snapshot_file["/PartType0/Density"][my_offset:my_offset + my_count] + +For large simulations, this vastly reduces the amount of data that needs to be read +from the disk. diff --git a/doc/RTD/source/SubgridModels/EAGLE/index.rst b/doc/RTD/source/SubgridModels/EAGLE/index.rst index 6388f7d3d42859d8659d2bb13f9dfe5181927807..639d98cd1a994f6f30dfc2430c90294d7486fce0 100644 --- a/doc/RTD/source/SubgridModels/EAGLE/index.rst +++ b/doc/RTD/source/SubgridModels/EAGLE/index.rst @@ -9,19 +9,21 @@ This section of the documentation gives a brief description of the different components of the EAGLE sub-grid model. We mostly focus on the parameters and values output in the snapshots. +.. _EAGLE_chemical_tracers: + Chemical tracers ~~~~~~~~~~~~~~~~ -The gas particles in the EAGLE model carry metal abundance information -in the form of metal mass fractions. We follow the following 9 -elements: `H`, `He`, `C`, `N`, `O`, `Ne`, `Mg`, `Si` and `Fe`. We -additionally follow the total metal mass fraction (i.e. absolute -metallicity) `Z`. This is typically larger than the sum of the 7 +The gas particles in the EAGLE model carry metal abundance information in the +form of metal mass fractions. We follow explicitly 9 of the 11 elements that +`Wiersma et al. (2009)b <http://adsabs.harvard.edu/abs/2009MNRAS.399..574W>`_ +traced in their chemical enrichment model. These are: `H`, `He`, `C`, `N`, `O`, +`Ne`, `Mg`, `Si` and `Fe` [#f1]_. We additionally follow the total metal mass fraction +(i.e. absolute metallicity) `Z`. This is typically larger than the sum of the 7 metals that are individually traced since this will also contain the -contribution of all the elements that are not individually followed. -We note that all of definitions are independent of any definition of -solar the solar metallicity :math:`Z_\odot` or of any solar abundance -pattern. +contribution of all the elements that are not individually followed. We note +that all of definitions are independent of any definition of solar the solar +metallicity :math:`Z_\odot` or of any solar abundance pattern. As part of the diagnostics, we additionally trace the elements coming from the different stellar evolution channels. We store for each @@ -38,12 +40,12 @@ We finally also compute the smoothed versions of the individual element mass fractions, of the total metal mass fractions, and of the iron gas fraction from SNIa. -The chemistry module in ``src/chemistry/EAGLE`` includes all the arrays +The chemistry module in ``src/chemistry/EAGLE/`` includes all the arrays that are added to the particles and the functions used to compute the smoothed elements. -When a star is formed (see below), it inherits all the chemical -tracers of its parent gas particle. +When a star is formed (see the section :ref:`EAGLE_star_formation` below), it +inherits all the chemical tracers of its parent gas particle. In the snapshots, we output for each gas and star particle: @@ -100,7 +102,8 @@ In the snapshots, we output for each gas and star particle: The stars will lose mass over their lifetime (up to ~45%). The fractions will remain unchanged but if one is interested in computing an absolute metal mass -(say) for a star, the ``InitialMass`` (see below) of the star must be used. +(say) for a star, the ``InitialMass`` (see the section +:ref:`EAGLE_star_formation` below) of the star must be used. The chemistry model only requires a small number of parameters to be specified in the `EAGLEChemistry` section of the YAML file. These are the initial values @@ -141,12 +144,13 @@ Whilst one would use the following values for solar abundances init_abundance_Iron: 1.1032152e-3 # Mass fraction in Iron +.. _EAGLE_cooling: Gas cooling: Wiersma+2009a ~~~~~~~~~~~~~~~~~~~~~~~~~~ The gas cooling is based on the redshift-dependent tables of `Wiersma et -al. (2009) <http://adsabs.harvard.edu/abs/2009MNRAS.393...99W>`_ that include +al. (2009)a <http://adsabs.harvard.edu/abs/2009MNRAS.393...99W>`_ that include element-by-element cooling rates for the 11 elements (`H`, `He`, `C`, `N`, `O`, `Ne`, `Mg`, `Si`, `S`, `Ca` and `Fe`) that dominate the total rates. The tables assume that the gas is in ionization equilibrium with the cosmic microwave @@ -157,7 +161,8 @@ ignores *local* sources of ionization, self-shielding and non-equilibrium cooling/heating. The tables can be obtained from this `link <http://virgodb.cosma.dur.ac.uk/swift-webstorage/CoolingTables/EAGLE/coolingtables.tar.gz>`_ which is a re-packaged version of the `original tables -<http://www.strw.leidenuniv.nl/WSS08/>`_ +<http://www.strw.leidenuniv.nl/WSS08/>`_. The code reading and interpolating the +table is located in the directory ``src/cooling/EAGLE/``. The Wiersma tables containing the cooling rates as a function of redshift, Hydrogen number density, Helium fraction (:math:`X_{He} / (X_{He} + X_{H})`) and @@ -197,6 +202,27 @@ We note that the EAGLE cooling model does not impose any restriction on the particles' individual time-steps. The cooling takes place over the time span given by the other conditions (e.g the Courant condition). +Finelly, the cooling module also provides a function to compute the temperature +of a given gas particle based on its density, internal energy, abundances and +the current redshift. This temperature is the one used to compute the cooling +rate from the tables and similarly to the cooling rates, they assume that the +gas is in collisional equilibrium with the background radiation. The +temperatures are, in particular, computed every time a snapshot is written and +they are listed for every gas particle: + ++---------------------+-------------------------------------+-----------+-------------------------------------+ +| Name | Description | Units | Comments | ++=====================+=====================================+===========+=====================================+ +| ``Temperature`` | | Temperature of the gas as | [U_T] | | The calculation is performed | +| | | computed from the tables. | | | using quantities at the last | +| | | | | time-step the particle was active | ++---------------------+-------------------------------------+-----------+-------------------------------------+ + +Note that if one is running without cooling switched on at runtime, the +temperatures can be computed by passing the ``--temparature`` runtime flag (see +:ref:`cmdline-options`). Note that the tables then have to be available as in +the case with cooling switched on. + The cooling model is driven by a small number of parameter files in the `EAGLECooling` section of the YAML file. These are the re-ionization parameters, the path to the tables and optionally the modified abundances of `Ca` and `S` as @@ -221,25 +247,67 @@ And the optional parameters are: S_over_Si_in_solar: 1.0 # (Optional) Value of the Sulphur mass abundance ratio to solar in units of the Silicon ratio to solar. Default value: 1. newton_integration: 0 # (Optional) Set to 1 to use the Newton-Raphson scheme for the explicit cooling problem. - - +.. _EAGLE_tracers: + Particle tracers ~~~~~~~~~~~~~~~~ +Over the course of the simulation, the gas particles record some information +about their evolution. These are updated for a given particle every time it is +active. The EAGLE tracers module is located in the directory +``src/tracers/EAGLE/``. + +In the EAGLE model, we trace the maximal tempearature a particle has reached and +the time at which this happened. When a star is formed (see the section +:ref:`EAGLE_star_formation` below), it inherits all the tracer values of its parent +gas particle. There are no parameters to the model but two values are added to +the snapshots for each gas and star particle: + ++----------------------------------------+---------------------------------------+-----------+-----------------------------+ +| Name | Description | Units | Comments | ++========================================+=======================================+===========+=============================+ +| | ``Maximal Temperature`` | | Mximal temperature reached by | | [U_T] | | +| | | this particle. | | | ++----------------------------------------+---------------------------------------+-----------+-----------------------------+ +| | ``Maximal Temperature scale-factor`` | | Scale-factor (cosmological runs) | | [-] | | +| | OR | | or time (non-cosmological runs) at | | OR | | +| | ``Maximal Temperature time`` | | which the maximum value was reached.| | [U_t] | | ++----------------------------------------+---------------------------------------+-----------+-----------------------------+ + + +.. _EAGLE_star_formation: + Star formation: Schaye+2008 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _EAGLE_enrichment: + Stellar enrichment: Wiersma+2009b ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _EAGLE_feedback: + Supernova feedback: Dalla Vecchia+2012 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _EAGLE_black_hole_seeding: + Black-hole creation ~~~~~~~~~~~~~~~~~~~ +.. _EAGLE_black_hole_accretion: + Black-hole accretion ~~~~~~~~~~~~~~~~~~~~ +.. _EAGLE_black_hole_feedback: + AGN feedback ~~~~~~~~~~~~ + +.. [#f1] `Wiersma et al. (2009)b + <http://adsabs.harvard.edu/abs/2009MNRAS.399..574W>`_ originally also + followed explicitly `Ca` and and `S`. They are omitted in the EAGLE + model but, when needed, their abundance with respect to solar is + assumed to be the same as the abundance of `Si` with respect to solar + (See the section :ref:`EAGLE_cooling`) diff --git a/doc/RTD/source/VELOCIraptorInterface/stfwithswift.rst b/doc/RTD/source/VELOCIraptorInterface/stfwithswift.rst index a663c37f93a6cede8c4528583c44183059414432..ed261b76abbcefaf5643a69069bb4b8ea1a0894c 100644 --- a/doc/RTD/source/VELOCIraptorInterface/stfwithswift.rst +++ b/doc/RTD/source/VELOCIraptorInterface/stfwithswift.rst @@ -50,8 +50,10 @@ HDF5 library, not a parallel build. Compiling SWIFT --------------- The next part is compiling SWIFT with VELOCIraptor and assumes you already -downloaded SWIFT from the GitLab_, this can be done by running:: +downloaded SWIFT from the GitLab_, this can be done by running +.. code:: bash + ./autogen.sh ./configure --with-velociraptor=/path/to/VELOCIraptor-STF/src make @@ -60,16 +62,16 @@ In which ``./autogen.sh`` only needs to be run once after the code is cloned from the GitLab_, and ``/path/to/`` is the path to the ``VELOCIraptor-STF`` directory on your machine. In general ``./configure`` can be run with other options as desired. After this we can run SWIFT with VELOCIraptor, but for this -we first need to add several lines to the yaml file of our simulation:: +we first need to add several lines to the yaml file of our simulation - #structure finding options - StructureFinding: - config_file_name: stf_input_6dfof_dmonly_sub.cfg - basename: ./stf - output_time_format: 1 - scale_factor_first: 0.02 - delta_time: 1.02 +.. code:: YAML + + StructureFinding: + config_file_name: stf_input_6dfof_dmonly_sub.cfg + basename: ./stf + scale_factor_first: 0.02 + delta_time: 1.02 In which we specify the ``.cfg`` file that is used by VELOCIraptor and the other parameters which SWIFT needs to use. In the case of diff --git a/doc/RTD/source/conf.py b/doc/RTD/source/conf.py index 46cff147efff3e7f23ff3f618898a17da3f85459..2249faa2851846c28e743400b2c826bfa6780c0a 100644 --- a/doc/RTD/source/conf.py +++ b/doc/RTD/source/conf.py @@ -87,7 +87,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['.static'] +# html_static_path = ['.static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. diff --git a/doc/RTD/source/index.rst b/doc/RTD/source/index.rst index b9370c3f24b2ffb3c5174f2fe99fb9ec610e18f6..e04efe8c889fb8a005c88f691f1e01a387f19ebb 100644 --- a/doc/RTD/source/index.rst +++ b/doc/RTD/source/index.rst @@ -18,6 +18,7 @@ difference is the parameter file that will need to be adapted for SWIFT. CommandLineOptions/index ParameterFiles/index InitialConditions/index + Snapshots/index HydroSchemes/index SubgridModels/index EquationOfState/index diff --git a/examples/DwarfGalaxy/dwarf_galaxy.yml b/examples/DwarfGalaxy/dwarf_galaxy.yml index 0d815a99c42249bcbbdaf21dbaa34a55f61731aa..4c5e2a82b017725929138de011b1f3ed1fe9f1ef 100644 --- a/examples/DwarfGalaxy/dwarf_galaxy.yml +++ b/examples/DwarfGalaxy/dwarf_galaxy.yml @@ -10,10 +10,8 @@ InternalUnitSystem: StructureFinding: config_file_name: stf_input.cfg # Name of the STF config file. basename: ./stf # Common part of the name of output files. - output_time_format: 0 # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time). scale_factor_first: 0.92 # Scale-factor of the first snaphot (cosmological run) time_first: 0.01 # Time of the first structure finding output (in internal units). - delta_step: 1000 # Time difference between consecutive structure finding outputs (in internal units) in simulation steps. delta_time: 1.10 # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals. # Cosmological parameters diff --git a/examples/EAGLE_25/eagle_25.yml b/examples/EAGLE_25/eagle_25.yml index 0aec970db486a164696b23fdc1e281fbe4853486..cab0dbcd5efc0528ddc65a6dde1e5c2d7cb6b9a9 100644 --- a/examples/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_25/eagle_25.yml @@ -10,10 +10,8 @@ InternalUnitSystem: StructureFinding: config_file_name: stf_input.cfg # Name of the STF config file. basename: ./stf # Common part of the name of output files. - output_time_format: 0 # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time). scale_factor_first: 0.92 # Scale-factor of the first snaphot (cosmological run) time_first: 0.01 # Time of the first structure finding output (in internal units). - delta_step: 1000 # Time difference between consecutive structure finding outputs (in internal units) in simulation steps. delta_time: 1.10 # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals. # Cosmological parameters diff --git a/examples/EAGLE_25/run.sh b/examples/EAGLE_25/run.sh index af1218f70729663d8efe337c312f6ef2fe8d6620..5961cf01a3a011ee26f0b411e619dd7207d5db47 100755 --- a/examples/EAGLE_25/run.sh +++ b/examples/EAGLE_25/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift --cosmology --hydro --self-gravity --stars--threads=16 eagle_25.yml 2>&1 | tee output.log +../swift --cosmology --hydro --self-gravity --stars --threads=16 eagle_25.yml 2>&1 | tee output.log diff --git a/examples/EAGLE_6/eagle_6.yml b/examples/EAGLE_6/eagle_6.yml index 7c64c1cdedb6c8e9714471f4bad9611f548d05fa..e80fac8167a832c17cd10e1d2ae7cd854f314d17 100644 --- a/examples/EAGLE_6/eagle_6.yml +++ b/examples/EAGLE_6/eagle_6.yml @@ -10,10 +10,8 @@ InternalUnitSystem: StructureFinding: config_file_name: stf_input.cfg # Name of the STF config file. basename: ./stf # Common part of the name of output files. - output_time_format: 0 # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time). scale_factor_first: 0.92 # Scale-factor of the first snaphot (cosmological run) time_first: 0.01 # Time of the first structure finding output (in internal units). - delta_step: 1000 # Time difference between consecutive structure finding outputs (in internal units) in simulation steps. delta_time: 1.10 # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals. # Cosmological parameters diff --git a/examples/SedovBlast_1D/run.sh b/examples/SedovBlast_1D/run.sh index ba479214961c5957a2b19d6aa118e0f0e7ee0f63..e5674dc15e8fac1b36f43da07b829720c0ecd5f1 100755 --- a/examples/SedovBlast_1D/run.sh +++ b/examples/SedovBlast_1D/run.sh @@ -8,7 +8,7 @@ then fi # Run SWIFT -../swift --hydro --threads=1 sedov.yml 2>&1 | tee output.log +../swift --hydro --limiter --threads=1 sedov.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 5 diff --git a/examples/SedovBlast_1D/sedov.yml b/examples/SedovBlast_1D/sedov.yml index b4912a95e797440dc6eb0c9f48806a5954adbc41..b4252581d6eb3b2932a074e7545b2d308be51865 100644 --- a/examples/SedovBlast_1D/sedov.yml +++ b/examples/SedovBlast_1D/sedov.yml @@ -11,7 +11,7 @@ TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 5e-2 # The end time of the simulation (in internal units). dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-5 # The maximal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots Snapshots: @@ -21,7 +21,7 @@ Snapshots: # Parameters governing the conserved quantities statistics Statistics: - delta_time: 1e-5 # Time between statistics output + delta_time: 1e-3 # Time between statistics output # Parameters for the hydrodynamics scheme SPH: diff --git a/examples/SedovBlast_2D/run.sh b/examples/SedovBlast_2D/run.sh index b481d4555241c17015452a2139c04c541ccf1cdc..e2136f8f5e6ee9bde61d5189ed7955d53a3a9a6e 100755 --- a/examples/SedovBlast_2D/run.sh +++ b/examples/SedovBlast_2D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift --hydro --threads=1 sedov.yml 2>&1 | tee output.log +../swift --hydro --limiter --threads=1 sedov.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 5 diff --git a/examples/SedovBlast_2D/sedov.yml b/examples/SedovBlast_2D/sedov.yml index 84177ece31ef98ec55c41513276c9c0158e69bcf..b4252581d6eb3b2932a074e7545b2d308be51865 100644 --- a/examples/SedovBlast_2D/sedov.yml +++ b/examples/SedovBlast_2D/sedov.yml @@ -11,7 +11,7 @@ TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 5e-2 # The end time of the simulation (in internal units). dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots Snapshots: diff --git a/examples/SedovBlast_3D/run.sh b/examples/SedovBlast_3D/run.sh index 88aec36a7b96b5fd2a7fde41f0e0c9dc7185f1e8..7f0788cc822f1a6427fb6dbee4a921f79c942808 100755 --- a/examples/SedovBlast_3D/run.sh +++ b/examples/SedovBlast_3D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift --hydro --threads=4 sedov.yml 2>&1 | tee output.log +../swift --hydro --limiter --threads=4 sedov.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 5 diff --git a/examples/SedovBlast_3D/sedov.yml b/examples/SedovBlast_3D/sedov.yml index 6cf5b02427b8004787b646e6bcdd4bacaa25bc06..19e8c72538a748304ca4da076458c9ae27dc8f46 100644 --- a/examples/SedovBlast_3D/sedov.yml +++ b/examples/SedovBlast_3D/sedov.yml @@ -11,7 +11,7 @@ TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 5e-2 # The end time of the simulation (in internal units). dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots Snapshots: diff --git a/examples/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml b/examples/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml index 910137edc442c994a9f31a8c62e16818ca4ae97d..ebe3a78ee0d03eb53752b1dfa8fa749931a754a9 100644 --- a/examples/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml +++ b/examples/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml @@ -10,7 +10,6 @@ InternalUnitSystem: StructureFinding: config_file_name: stf_input_6dfof_dmonly_sub.cfg basename: ./stf - output_time_format: 1 scale_factor_first: 0.02 delta_time: 1.02 diff --git a/examples/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml b/examples/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml index c8157a7a0e0065b1f58667fb8437b9e3883eda75..d6b9a78fe3c2a891492affbdea9787d62916d3ed 100644 --- a/examples/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml +++ b/examples/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml @@ -37,8 +37,9 @@ SPH: # Parameters governing the snapshots Snapshots: basename: snap - delta_time: 1.02 + delta_time: 1.05 scale_factor_first: 0.02 + invoke_stf: 1 # Parameters governing the conserved quantities statistics Statistics: @@ -52,16 +53,16 @@ Scheduler: # Parameters related to the initial conditions InitialConditions: file_name: small_cosmo_volume.hdf5 + periodic: 1 cleanup_h_factors: 1 cleanup_velocity_factors: 1 - generate_gas_in_ics: 1 # Generate gas particles from the DM-only ICs - cleanup_smoothing_lengths: 1 # Since we generate gas, make use of the (expensive) cleaning-up procedure. + generate_gas_in_ics: 1 # Generate gas particles from the DM-only ICs + cleanup_smoothing_lengths: 1 # Since we generate gas, make use of the (expensive) cleaning-up procedure. # Structure finding options (requires velociraptor) StructureFinding: config_file_name: stfconfig_input.cfg basename: ./stf - output_time_format: 1 scale_factor_first: 0.02 delta_time: 1.02 diff --git a/examples/main.c b/examples/main.c index eaa94cfc699000234bd0010c32181cf9bace5651..3f558fe240b5efec5f6797837a415b8bc5b762ef 100644 --- a/examples/main.c +++ b/examples/main.c @@ -155,6 +155,7 @@ int main(int argc, char *argv[]) { int with_stars = 0; int with_star_formation = 0; int with_feedback = 0; + int with_limiter = 0; int with_fp_exceptions = 0; int with_drift_all = 0; int with_mpole_reconstruction = 0; @@ -204,6 +205,8 @@ int main(int argc, char *argv[]) { OPT_BOOLEAN('S', "stars", &with_stars, "Run with stars.", NULL, 0, 0), OPT_BOOLEAN('x', "velociraptor", &with_structure_finding, "Run with structure finding.", NULL, 0, 0), + OPT_BOOLEAN(0, "limiter", &with_limiter, "Run with time-step limiter.", + NULL, 0, 0), OPT_GROUP(" Control options:\n"), OPT_BOOLEAN('a', "pin", &with_aff, @@ -458,11 +461,7 @@ int main(int argc, char *argv[]) { if (with_feedback) error("Can't run with feedback over MPI (yet)."); if (with_star_formation) error("Can't run with star formation over MPI (yet)"); -#endif - -#if defined(WITH_MPI) && defined(HAVE_VELOCIRAPTOR) - if (with_structure_finding && nr_nodes > 1) - error("VEOCIraptor not yet enabled over MPI."); + if (with_limiter) error("Can't run with time-step limiter over MPI (yet)"); #endif /* Temporary early aborts for modes not supported with hand-vec. */ @@ -910,6 +909,7 @@ int main(int argc, char *argv[]) { engine_policies |= engine_policy_external_gravity; if (with_cosmology) engine_policies |= engine_policy_cosmology; if (with_temperature) engine_policies |= engine_policy_temperature; + if (with_limiter) engine_policies |= engine_policy_limiter; if (with_cooling) engine_policies |= engine_policy_cooling; if (with_stars) engine_policies |= engine_policy_stars; if (with_star_formation) engine_policies |= engine_policy_star_formation; @@ -934,6 +934,10 @@ int main(int argc, char *argv[]) { fflush(stdout); } +#ifdef HAVE_VELOCIRAPTOR + if (with_structure_finding) velociraptor_init(&e); +#endif + /* Get some info to the user. */ if (myrank == 0) { long long N_DM = N_total[1] - N_total[2] - N_total[0]; @@ -1216,14 +1220,6 @@ int main(int argc, char *argv[]) { #endif // write a final snapshot with logger, in order to facilitate a restart engine_dump_snapshot(&e); - -#ifdef HAVE_VELOCIRAPTOR - /* Call VELOCIraptor at the end of the run to find groups. */ - if (e.policy & engine_policy_structure_finding) { - velociraptor_init(&e); - velociraptor_invoke(&e); - } -#endif } #ifdef WITH_MPI diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index c1a4c867c77b5d770dc1349c4218e5b7c2e10a9b..67b5d051e0c837764a13d7bc45a7ab25f528a96b 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -27,8 +27,11 @@ SPH: resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. h_tolerance: 1e-4 # (Optional) Relative accuracy of the Netwon-Raphson scheme for the smoothing lengths. + h_max: 10. # (Optional) Maximal allowed smoothing length in internal units. Defaults to FLT_MAX if unspecified. max_volume_change: 1.4 # (Optional) Maximal allowed change of kernel volume over one time-step. max_ghost_iterations: 30 # (Optional) Maximal number of iterations allowed to converge towards the smoothing length. + initial_temperature: 0 # (Optional) Initial temperature (in internal units) to set the gas particles at start-up. Value is ignored if set to 0. + minimal_temperature: 0 # (Optional) Minimal temperature (in internal units) allowed for the gas particles. Value is ignored if set to 0. H_mass_fraction: 0.755 # (Optional) Hydrogen mass fraction used for initial conversion from temp to internal energy. Default value is derived from the physical constants. H_ionization_temperature: 1e4 # (Optional) Temperature of the transition from neutral to ionized Hydrogen for primoridal gas. viscosity_alpha: 0.8 # (Optional) Override for the initial value of the artificial viscosity. In schemes that have a fixed AV, this remains as alpha throughout the run. @@ -65,6 +68,7 @@ Scheduler: cell_extra_sparts: 400 # (Optional) Number of spare sparts per top-level allocated at rebuild time for on-the-fly creation. max_top_level_cells: 12 # (Optional) Maximal number of top-level cells in any dimension. The number of top-level cells will be the cube of this (this is the default value). tasks_per_cell: 0 # (Optional) The average number of tasks per cell. If not large enough the simulation will fail (means guess...). + links_per_tasks: 10 # (Optional) The average number of links per tasks (before adding the communication tasks). If not large enough the simulation will fail (means guess...). Defaults to 10. mpi_message_limit: 4096 # (Optional) Maximum MPI task message size to send non-buffered, KB. # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) @@ -81,6 +85,7 @@ Snapshots: scale_factor_first: 0.1 # (Optional) Scale-factor of the first snapshot if cosmological time-integration. time_first: 0. # (Optional) Time of the first output if non-cosmological time-integration (in internal units) delta_time: 0.01 # Time difference between consecutive outputs (in internal units) + invoke_stf: 0 # (Optional) Call VELOCIraptor every time a snapshot is written irrespective of the VELOCIraptor output strategy. compression: 0 # (Optional) Set the level of compression of the HDF5 datasets [0-9]. 0 does no compression. int_time_label_on: 0 # (Optional) Enable to label the snapshots using the time rounded to an integer (in internal units) UnitMass_in_cgs: 1 # (Optional) Unit system for the outputs (Grams) @@ -153,6 +158,16 @@ DomainDecomposition: itr: 100 # When adaptive defines the ratio of inter node communication time to data redistribution time, in the range 0.00001 to 10000000.0. # Lower values give less data movement during redistributions, at the cost of global balance which may require more communication. +# Structure finding options (requires velociraptor) +StructureFinding: + config_file_name: stf_input.cfg # Name of the STF config file. + basename: ./stf # Common part of the name of output files. + scale_factor_first: 0.92 # (Optional) Scale-factor of the first snaphot (cosmological run) + time_first: 0.01 # (Optional) Time of the first structure finding output (in internal units). + delta_time: 1.10 # (Optional) Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals. + output_list_on: 0 # (Optional) Enable the output list + output_list: stflist.txt # (Optional) File containing the output times (see documentation in "Parameter File" section) + # Parameters related to the equation of state ------------------------------------------ EoS: @@ -296,6 +311,8 @@ EAGLEChemistry: init_abundance_Silicon: 0.000 # Inital fraction of particle mass in Silicon init_abundance_Iron: 0.000 # Inital fraction of particle mass in Iron +# Parameters related to star formation models ----------------------------------------------- + # Schaye and Dalla Vecchia 2008 star formation SchayeSF: thresh_MinOverDens: 57.7 # The critical density contrast to form stars @@ -315,14 +332,3 @@ SchayeSF: EOS_Jeans_TemperatureNorm_K: 1e3 # No idea how this works EOS_JEANS_DensityNorm_HpCM3: 0.1 # No idea what the value is. -# Structure finding options (requires velociraptor) -StructureFinding: - config_file_name: stf_input.cfg # Name of the STF config file. - basename: ./stf # Common part of the name of output files. - output_time_format: 0 # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time). - scale_factor_first: 0.92 # Scale-factor of the first snaphot (cosmological run) - time_first: 0.01 # Time of the first structure finding output (in internal units). - delta_step: 1000 # Time difference between consecutive structure finding outputs (in internal units) in simulation steps. - delta_time: 1.10 # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals. - output_list_on: 0 # (Optional) Enable the output list - output_list: stflist.txt # (Optional) File containing the output times (see documentation in "Parameter File" section) diff --git a/src/Makefile.am b/src/Makefile.am index 276345b9e20bb29c23234a6e9a2aed0a5c320b88..1a975903082d47963125b175dd4967f76384bae0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -50,7 +50,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \ mesh_gravity.h cbrt.h exp10.h velociraptor_interface.h swift_velociraptor_part.h outputlist.h \ logger_io.h tracers_io.h tracers.h tracers_struct.h sftracers_io.h sftracers.h \ - sftracers_struct.h + sftracers_struct.h velociraptor_struct.h velociraptor_io.h # source files for EAGLE cooling EAGLE_COOLING_SOURCES = @@ -76,7 +76,7 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h gravity_iact.h kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h \ runner_doiact_nosort.h runner_doiact_stars.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h \ adiabatic_index.h io_properties.h dimension.h part_type.h periodic.h memswap.h dump.h logger.h sign.h \ - logger_io.h \ + logger_io.h timestep_limiter.h \ gravity.h gravity_io.h gravity_cache.h \ gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \ gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \ diff --git a/src/cache.h b/src/cache.h index 5dd8164b1dc80795a8593cc2af42c2c9e7e68885..92bf908a400eb3c2a5425fb7a31753e0c1f719fa 100644 --- a/src/cache.h +++ b/src/cache.h @@ -179,8 +179,9 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c, * * @param ci The #cell. * @param ci_cache The cache. + * @return uninhibited_count The no. of uninhibited particles. */ -__attribute__((always_inline)) INLINE void cache_read_particles( +__attribute__((always_inline)) INLINE int cache_read_particles( const struct cell *restrict const ci, struct cache *restrict const ci_cache) { @@ -197,12 +198,29 @@ __attribute__((always_inline)) INLINE void cache_read_particles( swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT); + const int count = ci->hydro.count; const struct part *restrict parts = ci->hydro.parts; const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]}; + const double max_dx = ci->hydro.dx_max_part; + const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), + -(2. * ci->width[1] + max_dx), + -(2. * ci->width[2] + max_dx)}; + const float h_padded = ci->hydro.h_max / 4.; /* Shift the particles positions to a local frame so single precision can be * used instead of double precision. */ - for (int i = 0; i < ci->hydro.count; i++) { + for (int i = 0; i < count; i++) { + + /* Pad inhibited particles. */ + if (parts[i].time_bin >= time_bin_inhibited) { + x[i] = pos_padded[0]; + y[i] = pos_padded[1]; + z[i] = pos_padded[2]; + h[i] = h_padded; + + continue; + } + x[i] = (float)(parts[i].x[0] - loc[0]); y[i] = (float)(parts[i].x[1] - loc[1]); z[i] = (float)(parts[i].x[2] - loc[2]); @@ -213,6 +231,26 @@ __attribute__((always_inline)) INLINE void cache_read_particles( vz[i] = parts[i].v[2]; } + /* Pad cache if the no. of particles is not a multiple of double the vector + * length. */ + int count_align = count; + const int rem = count % (NUM_VEC_PROC * VEC_SIZE); + if (rem != 0) { + count_align += (NUM_VEC_PROC * VEC_SIZE) - rem; + + /* Set positions to something outside of the range of any particle */ + for (int i = count; i < count_align; i++) { + x[i] = pos_padded[0]; + y[i] = pos_padded[1]; + z[i] = pos_padded[2]; + } + } + + return count_align; + +#else + error("Can't call the cache reading function with this flavour of SPH!"); + return 0; #endif } @@ -261,10 +299,32 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( if (*last_pi + pad < ci->hydro.count) *last_pi += pad; } + const double max_dx = ci->hydro.dx_max_part; + const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), + -(2. * ci->width[1] + max_dx), + -(2. * ci->width[2] + max_dx)}; + const float h_padded = ci->hydro.h_max / 4.; + /* Shift the particles positions to a local frame so single precision can be * used instead of double precision. */ for (int i = 0; i < *last_pi; i++) { const int idx = sort_i[i].i; + + /* Put inhibited particles out of range. */ + if (parts[idx].time_bin >= time_bin_inhibited) { + x[i] = pos_padded[0]; + y[i] = pos_padded[1]; + z[i] = pos_padded[2]; + h[i] = h_padded; + + m[i] = 1.f; + vx[i] = 1.f; + vy[i] = 1.f; + vz[i] = 1.f; + + continue; + } + x[i] = (float)(parts[idx].x[0] - loc[0]); y[i] = (float)(parts[idx].x[1] - loc[1]); z[i] = (float)(parts[idx].x[2] - loc[2]); @@ -278,12 +338,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( /* Pad cache with fake particles that exist outside the cell so will not * interact. We use values of the same magnitude (but negative!) as the real * particles to avoid overflow problems. */ - const double max_dx = ci->hydro.dx_max_part; - const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), - -(2. * ci->width[1] + max_dx), - -(2. * ci->width[2] + max_dx)}; - const float h_padded = ci->hydro.parts[0].h; - for (int i = *last_pi; i < *last_pi + VEC_SIZE; i++) { x[i] = pos_padded[0]; y[i] = pos_padded[1]; @@ -308,11 +362,32 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( } const int ci_cache_count = ci->hydro.count - *first_pi; + const double max_dx = ci->hydro.dx_max_part; + const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), + -(2. * ci->width[1] + max_dx), + -(2. * ci->width[2] + max_dx)}; + const float h_padded = ci->hydro.h_max / 4.; /* Shift the particles positions to a local frame so single precision can be * used instead of double precision. */ for (int i = 0; i < ci_cache_count; i++) { const int idx = sort_i[i + *first_pi].i; + + /* Put inhibited particles out of range. */ + if (parts[idx].time_bin >= time_bin_inhibited) { + x[i] = pos_padded[0]; + y[i] = pos_padded[1]; + z[i] = pos_padded[2]; + h[i] = h_padded; + + m[i] = 1.f; + vx[i] = 1.f; + vy[i] = 1.f; + vz[i] = 1.f; + + continue; + } + x[i] = (float)(parts[idx].x[0] - loc[0]); y[i] = (float)(parts[idx].x[1] - loc[1]); z[i] = (float)(parts[idx].x[2] - loc[2]); @@ -326,12 +401,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( /* Pad cache with fake particles that exist outside the cell so will not * interact. We use values of the same magnitude (but negative!) as the real * particles to avoid overflow problems. */ - const double max_dx = ci->hydro.dx_max_part; - const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), - -(2. * ci->width[1] + max_dx), - -(2. * ci->width[2] + max_dx)}; - const float h_padded = ci->hydro.parts[0].h; - for (int i = ci->hydro.count - *first_pi; i < ci->hydro.count - *first_pi + VEC_SIZE; i++) { x[i] = pos_padded[0]; @@ -355,8 +424,9 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( * * @param ci The #cell. * @param ci_cache The cache. + * @return uninhibited_count The no. of uninhibited particles. */ -__attribute__((always_inline)) INLINE void cache_read_force_particles( +__attribute__((always_inline)) INLINE int cache_read_force_particles( const struct cell *restrict const ci, struct cache *restrict const ci_cache) { @@ -382,12 +452,34 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles( swift_declare_aligned_ptr(float, soundspeed, ci_cache->soundspeed, SWIFT_CACHE_ALIGNMENT); + const int count = ci->hydro.count; const struct part *restrict parts = ci->hydro.parts; const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]}; + const double max_dx = ci->hydro.dx_max_part; + const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), + -(2. * ci->width[1] + max_dx), + -(2. * ci->width[2] + max_dx)}; + const float h_padded = ci->hydro.h_max / 4.; /* Shift the particles positions to a local frame so single precision can be * used instead of double precision. */ - for (int i = 0; i < ci->hydro.count; i++) { + for (int i = 0; i < count; i++) { + + /* Skip inhibited particles. */ + if (parts[i].time_bin >= time_bin_inhibited) { + x[i] = pos_padded[0]; + y[i] = pos_padded[1]; + z[i] = pos_padded[2]; + h[i] = h_padded; + rho[i] = 1.f; + grad_h[i] = 1.f; + pOrho2[i] = 1.f; + balsara[i] = 1.f; + soundspeed[i] = 1.f; + + continue; + } + x[i] = (float)(parts[i].x[0] - loc[0]); y[i] = (float)(parts[i].x[1] - loc[1]); z[i] = (float)(parts[i].x[2] - loc[2]); @@ -403,6 +495,32 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles( soundspeed[i] = parts[i].force.soundspeed; } + /* Pad cache if there is a serial remainder. */ + int count_align = count; + const int rem = count % VEC_SIZE; + if (rem != 0) { + count_align += VEC_SIZE - rem; + + /* Set positions to the same as particle pi so when the r2 > 0 mask is + * applied these extra contributions are masked out.*/ + for (int i = count; i < count_align; i++) { + x[i] = pos_padded[0]; + y[i] = pos_padded[1]; + z[i] = pos_padded[2]; + h[i] = h_padded; + rho[i] = 1.f; + grad_h[i] = 1.f; + pOrho2[i] = 1.f; + balsara[i] = 1.f; + soundspeed[i] = 1.f; + } + } + + return count_align; + +#else + error("Can't call the cache reading function with this flavour of SPH!"); + return 0; #endif } @@ -472,11 +590,32 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT); int ci_cache_count = ci->hydro.count - first_pi_align; + const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); + const float pos_padded_i[3] = {-(2. * ci->width[0] + max_dx), + -(2. * ci->width[1] + max_dx), + -(2. * ci->width[2] + max_dx)}; + const float h_padded_i = ci->hydro.h_max / 4.; /* Shift the particles positions to a local frame (ci frame) so single * precision can be used instead of double precision. */ for (int i = 0; i < ci_cache_count; i++) { const int idx = sort_i[i + first_pi_align].i; + + /* Put inhibited particles out of range. */ + if (parts_i[idx].time_bin >= time_bin_inhibited) { + x[i] = pos_padded_i[0]; + y[i] = pos_padded_i[1]; + z[i] = pos_padded_i[2]; + h[i] = h_padded_i; + + m[i] = 1.f; + vx[i] = 1.f; + vy[i] = 1.f; + vz[i] = 1.f; + + continue; + } + x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]); y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]); z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]); @@ -532,18 +671,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( /* Pad cache with fake particles that exist outside the cell so will not * interact. We use values of the same magnitude (but negative!) as the real * particles to avoid overflow problems. */ - const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); - const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), - -(2. * ci->width[1] + max_dx), - -(2. * ci->width[2] + max_dx)}; - const float h_padded = ci->hydro.parts[0].h; - for (int i = ci->hydro.count - first_pi_align; i < ci->hydro.count - first_pi_align + VEC_SIZE; i++) { - x[i] = pos_padded[0]; - y[i] = pos_padded[1]; - z[i] = pos_padded[2]; - h[i] = h_padded; + x[i] = pos_padded_i[0]; + y[i] = pos_padded_i[1]; + z[i] = pos_padded_i[2]; + h[i] = h_padded_i; m[i] = 1.f; vx[i] = 1.f; @@ -562,8 +695,29 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( swift_declare_aligned_ptr(float, vyj, cj_cache->vy, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, vzj, cj_cache->vz, SWIFT_CACHE_ALIGNMENT); + const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx), + -(2. * cj->width[1] + max_dx), + -(2. * cj->width[2] + max_dx)}; + const float h_padded_j = cj->hydro.h_max / 4.; + for (int i = 0; i <= last_pj_align; i++) { const int idx = sort_j[i].i; + + /* Put inhibited particles out of range. */ + if (parts_j[idx].time_bin >= time_bin_inhibited) { + xj[i] = pos_padded_j[0]; + yj[i] = pos_padded_j[1]; + zj[i] = pos_padded_j[2]; + hj[i] = h_padded_j; + + mj[i] = 1.f; + vxj[i] = 1.f; + vyj[i] = 1.f; + vzj[i] = 1.f; + + continue; + } + xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]); yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]); zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]); @@ -609,11 +763,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( /* Pad cache with fake particles that exist outside the cell so will not * interact. We use values of the same magnitude (but negative!) as the real * particles to avoid overflow problems. */ - const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx), - -(2. * cj->width[1] + max_dx), - -(2. * cj->width[2] + max_dx)}; - const float h_padded_j = cj->hydro.parts[0].h; - for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) { xj[i] = pos_padded_j[0]; yj[i] = pos_padded_j[1]; @@ -701,11 +850,37 @@ cache_read_two_partial_cells_sorted_force( SWIFT_CACHE_ALIGNMENT); int ci_cache_count = ci->hydro.count - first_pi_align; + const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); + const float pos_padded_i[3] = {-(2. * ci->width[0] + max_dx), + -(2. * ci->width[1] + max_dx), + -(2. * ci->width[2] + max_dx)}; + const float h_padded_i = ci->hydro.h_max / 4.; + /* Shift the particles positions to a local frame (ci frame) so single * precision can be used instead of double precision. */ for (int i = 0; i < ci_cache_count; i++) { const int idx = sort_i[i + first_pi_align].i; + + /* Put inhibited particles out of range. */ + if (parts_i[idx].time_bin >= time_bin_inhibited) { + x[i] = pos_padded_i[0]; + y[i] = pos_padded_i[1]; + z[i] = pos_padded_i[2]; + h[i] = h_padded_i; + m[i] = 1.f; + vx[i] = 1.f; + vy[i] = 1.f; + vz[i] = 1.f; + rho[i] = 1.f; + grad_h[i] = 1.f; + pOrho2[i] = 1.f; + balsara[i] = 1.f; + soundspeed[i] = 1.f; + + continue; + } + x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]); y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]); z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]); @@ -726,18 +901,12 @@ cache_read_two_partial_cells_sorted_force( /* Pad cache with fake particles that exist outside the cell so will not * interact. We use values of the same magnitude (but negative!) as the real * particles to avoid overflow problems. */ - const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); - const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), - -(2. * ci->width[1] + max_dx), - -(2. * ci->width[2] + max_dx)}; - const float h_padded = ci->hydro.parts[0].h; - for (int i = ci->hydro.count - first_pi_align; i < ci->hydro.count - first_pi_align + VEC_SIZE; i++) { - x[i] = pos_padded[0]; - y[i] = pos_padded[1]; - z[i] = pos_padded[2]; - h[i] = h_padded; + x[i] = pos_padded_i[0]; + y[i] = pos_padded_i[1]; + z[i] = pos_padded_i[2]; + h[i] = h_padded_i; m[i] = 1.f; vx[i] = 1.f; vy[i] = 1.f; @@ -769,8 +938,33 @@ cache_read_two_partial_cells_sorted_force( swift_declare_aligned_ptr(float, soundspeedj, cj_cache->soundspeed, SWIFT_CACHE_ALIGNMENT); + const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx), + -(2. * cj->width[1] + max_dx), + -(2. * cj->width[2] + max_dx)}; + const float h_padded_j = cj->hydro.h_max / 4.; + for (int i = 0; i <= last_pj_align; i++) { const int idx = sort_j[i].i; + + /* Put inhibited particles out of range. */ + if (parts_j[idx].time_bin == time_bin_inhibited) { + xj[i] = pos_padded_j[0]; + yj[i] = pos_padded_j[1]; + zj[i] = pos_padded_j[2]; + hj[i] = h_padded_j; + mj[i] = 1.f; + vxj[i] = 1.f; + vyj[i] = 1.f; + vzj[i] = 1.f; + rhoj[i] = 1.f; + grad_hj[i] = 1.f; + pOrho2j[i] = 1.f; + balsaraj[i] = 1.f; + soundspeedj[i] = 1.f; + + continue; + } + xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]); yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]); zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]); @@ -791,11 +985,6 @@ cache_read_two_partial_cells_sorted_force( /* Pad cache with fake particles that exist outside the cell so will not * interact. We use values of the same magnitude (but negative!) as the real * particles to avoid overflow problems. */ - const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx), - -(2. * cj->width[1] + max_dx), - -(2. * cj->width[2] + max_dx)}; - const float h_padded_j = cj->hydro.parts[0].h; - for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) { xj[i] = pos_padded_j[0]; yj[i] = pos_padded_j[1]; @@ -813,7 +1002,8 @@ cache_read_two_partial_cells_sorted_force( } } -/* @brief Clean the memory allocated by a #cache object. +/** + * @brief Clean the memory allocated by a #cache object. * * @param c The #cache to clean. */ diff --git a/src/cell.c b/src/cell.c index a7914a8a5a20d596a5516d61959e5c826c737b15..bd1022f1fa23b5911c4056b602008601fa36ce68 100644 --- a/src/cell.c +++ b/src/cell.c @@ -98,6 +98,14 @@ int cell_getsize(struct cell *c) { */ int cell_link_parts(struct cell *c, struct part *parts) { +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Linking foreign particles in a local cell!"); + + if (c->hydro.parts != NULL) + error("Linking parts into a cell that was already linked"); +#endif + c->hydro.parts = parts; /* Fill the progeny recursively, depth-first. */ @@ -123,6 +131,14 @@ int cell_link_parts(struct cell *c, struct part *parts) { */ int cell_link_gparts(struct cell *c, struct gpart *gparts) { +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Linking foreign particles in a local cell!"); + + if (c->grav.parts != NULL) + error("Linking gparts into a cell that was already linked"); +#endif + c->grav.parts = gparts; /* Fill the progeny recursively, depth-first. */ @@ -148,6 +164,14 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts) { */ int cell_link_sparts(struct cell *c, struct spart *sparts) { +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Linking foreign particles in a local cell!"); + + if (c->stars.parts != NULL) + error("Linking sparts into a cell that was already linked"); +#endif + c->stars.parts = sparts; /* Fill the progeny recursively, depth-first. */ @@ -163,6 +187,182 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) { return c->stars.count; } +/** + * @brief Recurse down foreign cells until reaching one with hydro + * tasks; then trigger the linking of the #part array from that + * level. + * + * @param c The #cell. + * @param parts The #part array. + * + * @return The number of particles linked. + */ +int cell_link_foreign_parts(struct cell *c, struct part *parts) { + +#ifdef WITH_MPI + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Linking foreign particles in a local cell!"); +#endif + + /* Do we have a hydro task at this level? */ + if (c->mpi.hydro.recv_xv != NULL) { + + /* Recursively attach the parts */ + const int counts = cell_link_parts(c, parts); +#ifdef SWIFT_DEBUG_CHECKS + if (counts != c->hydro.count) + error("Something is wrong with the foreign counts"); +#endif + return counts; + } + + /* Go deeper to find the level where the tasks are */ + if (c->split) { + int count = 0; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + count += cell_link_foreign_parts(c->progeny[k], &parts[count]); + } + } + return count; + } else { + return 0; + } + +#else + error("Calling linking of foregin particles in non-MPI mode."); +#endif +} + +/** + * @brief Recurse down foreign cells until reaching one with gravity + * tasks; then trigger the linking of the #gpart array from that + * level. + * + * @param c The #cell. + * @param gparts The #gpart array. + * + * @return The number of particles linked. + */ +int cell_link_foreign_gparts(struct cell *c, struct gpart *gparts) { + +#ifdef WITH_MPI + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Linking foreign particles in a local cell!"); +#endif + + /* Do we have a hydro task at this level? */ + if (c->mpi.grav.recv != NULL) { + + /* Recursively attach the gparts */ + const int counts = cell_link_gparts(c, gparts); +#ifdef SWIFT_DEBUG_CHECKS + if (counts != c->grav.count) + error("Something is wrong with the foreign counts"); +#endif + return counts; + } + + /* Go deeper to find the level where the tasks are */ + if (c->split) { + int count = 0; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + count += cell_link_foreign_gparts(c->progeny[k], &gparts[count]); + } + } + return count; + } else { + return 0; + } + +#else + error("Calling linking of foregin particles in non-MPI mode."); +#endif +} + +/** + * @brief Recursively count the number of #part in foreign cells that + * are in cells with hydro-related tasks. + * + * @param c The #cell. + * + * @return The number of particles linked. + */ +int cell_count_parts_for_tasks(const struct cell *c) { + +#ifdef WITH_MPI + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Counting foreign particles in a local cell!"); +#endif + + /* Do we have a hydro task at this level? */ + if (c->mpi.hydro.recv_xv != NULL) { + return c->hydro.count; + } + + if (c->split) { + int count = 0; + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) { + count += cell_count_parts_for_tasks(c->progeny[k]); + } + } + return count; + } else { + return 0; + } + +#else + error("Calling linking of foregin particles in non-MPI mode."); +#endif +} + +/** + * @brief Recursively count the number of #gpart in foreign cells that + * are in cells with gravity-related tasks. + * + * @param c The #cell. + * + * @return The number of particles linked. + */ +int cell_count_gparts_for_tasks(const struct cell *c) { + +#ifdef WITH_MPI + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Counting foreign particles in a local cell!"); +#endif + + /* Do we have a hydro task at this level? */ + if (c->mpi.grav.recv != NULL) { + return c->grav.count; + } + + if (c->split) { + int count = 0; + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) { + count += cell_count_gparts_for_tasks(c->progeny[k]); + } + } + return count; + } else { + return 0; + } + +#else + error("Calling linking of foregin particles in non-MPI mode."); +#endif +} + /** * @brief Pack the data of the given cell and all it's sub-cells. * @@ -1232,8 +1432,11 @@ void cell_clean_links(struct cell *c, void *data) { c->hydro.density = NULL; c->hydro.gradient = NULL; c->hydro.force = NULL; + c->hydro.limiter = NULL; c->grav.grav = NULL; c->grav.mm = NULL; + c->stars.density = NULL; + c->stars.feedback = NULL; } /** @@ -1599,6 +1802,14 @@ void cell_clear_drift_flags(struct cell *c, void *data) { c->grav.do_sub_drift = 0; } +/** + * @brief Clear the limiter flags on the given cell. + */ +void cell_clear_limiter_flags(struct cell *c, void *data) { + c->hydro.do_limiter = 0; + c->hydro.do_sub_limiter = 0; +} + /** * @brief Activate the #part drifts on the given cell. */ @@ -1622,7 +1833,10 @@ void cell_activate_drift_part(struct cell *c, struct scheduler *s) { for (struct cell *parent = c->parent; parent != NULL && !parent->hydro.do_sub_drift; parent = parent->parent) { + + /* Mark this cell for drifting */ parent->hydro.do_sub_drift = 1; + if (parent == c->hydro.super) { #ifdef SWIFT_DEBUG_CHECKS if (parent->hydro.drift == NULL) @@ -1686,6 +1900,45 @@ void cell_activate_drift_spart(struct cell *c, struct scheduler *s) { cell_activate_drift_gpart(c, s); } +/** + * @brief Activate the drifts on the given cell. + */ +void cell_activate_limiter(struct cell *c, struct scheduler *s) { + + /* If this cell is already marked for drift, quit early. */ + if (c->hydro.do_limiter) return; + + /* Mark this cell for limiting. */ + c->hydro.do_limiter = 1; + + /* Set the do_sub_limiter all the way up and activate the super limiter + if this has not yet been done. */ + if (c == c->super) { +#ifdef SWIFT_DEBUG_CHECKS + if (c->timestep_limiter == NULL) + error("Trying to activate un-existing c->timestep_limiter"); +#endif + scheduler_activate(s, c->timestep_limiter); + } else { + for (struct cell *parent = c->parent; + parent != NULL && !parent->hydro.do_sub_limiter; + parent = parent->parent) { + + /* Mark this cell for limiting */ + parent->hydro.do_sub_limiter = 1; + + if (parent == c->super) { +#ifdef SWIFT_DEBUG_CHECKS + if (parent->timestep_limiter == NULL) + error("Trying to activate un-existing parent->timestep_limiter"); +#endif + scheduler_activate(s, parent->timestep_limiter); + break; + } + } + } +} + /** * @brief Activate the sorts up a cell hierarchy. */ @@ -1816,6 +2069,7 @@ void cell_activate_stars_sorts(struct cell *c, int sid, struct scheduler *s) { void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, struct scheduler *s) { const struct engine *e = s->space->e; + const int with_limiter = (e->policy & engine_policy_limiter); /* Store the current dx_max and h_max values. */ ci->hydro.dx_max_part_old = ci->hydro.dx_max_part; @@ -1849,6 +2103,7 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, /* We have reached the bottom of the tree: activate drift */ cell_activate_drift_part(ci, s); + if (with_limiter) cell_activate_limiter(ci, s); } } @@ -2154,6 +2409,12 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); + /* Also activate the time-step limiter */ + if (ci->nodeID == engine_rank && with_limiter) + cell_activate_limiter(ci, s); + if (cj->nodeID == engine_rank && with_limiter) + cell_activate_limiter(cj, s); + /* Do we need to sort the cells? */ cell_activate_hydro_sorts(ci, sid, s); cell_activate_hydro_sorts(cj, sid, s); @@ -2718,6 +2979,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { struct engine *e = s->space->e; const int nodeID = e->nodeID; + const int with_limiter = (e->policy & engine_policy_limiter); int rebuild = 0; /* Un-skip the density tasks involved with this cell. */ @@ -2743,6 +3005,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* Activate hydro drift */ if (t->type == task_type_self) { if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); + if (ci_nodeID == nodeID && with_limiter) cell_activate_limiter(ci, s); } /* Set the correct sorting flags and activate hydro drifts */ @@ -2757,6 +3020,10 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); + /* Activate the limiter tasks. */ + if (ci_nodeID == nodeID && with_limiter) cell_activate_limiter(ci, s); + if (cj_nodeID == nodeID && with_limiter) cell_activate_limiter(cj, s); + /* Check the sorts and activate them if needed. */ cell_activate_hydro_sorts(ci, t->flags, s); cell_activate_hydro_sorts(cj, t->flags, s); @@ -2791,7 +3058,11 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { } /* If the foreign cell is active, we want its ti_end values. */ - if (ci_active) scheduler_activate(s, ci->mpi.recv_ti); + if (ci_active || with_limiter) scheduler_activate(s, ci->mpi.recv_ti); + + if (with_limiter) scheduler_activate(s, ci->mpi.limiter.recv); + if (with_limiter) + scheduler_activate_send(s, cj->mpi.limiter.send, ci->nodeID); /* Is the foreign cell active and will need stuff from us? */ if (ci_active) { @@ -2801,6 +3072,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* Drift the cell which will be sent; note that not all sent particles will be drifted, only those that are needed. */ cell_activate_drift_part(cj, s); + if (with_limiter) cell_activate_limiter(cj, s); /* If the local cell is also active, more stuff will be needed. */ if (cj_active) { @@ -2813,7 +3085,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { } /* If the local cell is active, send its ti_end values. */ - if (cj_active) scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); + if (cj_active || with_limiter) + scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); } else if (cj_nodeID != nodeID) { @@ -2830,7 +3103,11 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { } /* If the foreign cell is active, we want its ti_end values. */ - if (cj_active) scheduler_activate(s, cj->mpi.recv_ti); + if (cj_active || with_limiter) scheduler_activate(s, cj->mpi.recv_ti); + + if (with_limiter) scheduler_activate(s, cj->mpi.limiter.recv); + if (with_limiter) + scheduler_activate_send(s, ci->mpi.limiter.send, cj->nodeID); /* Is the foreign cell active and will need stuff from us? */ if (cj_active) { @@ -2840,6 +3117,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* Drift the cell which will be sent; note that not all sent particles will be drifted, only those that are needed. */ cell_activate_drift_part(ci, s); + if (with_limiter) cell_activate_limiter(ci, s); /* If the local cell is also active, more stuff will be needed. */ if (ci_active) { @@ -2853,7 +3131,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { } /* If the local cell is active, send its ti_end values. */ - if (ci_active) scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); + if (ci_active || with_limiter) + scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); } #endif } @@ -2866,6 +3145,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, l->t); for (struct link *l = c->hydro.force; l != NULL; l = l->next) scheduler_activate(s, l->t); + for (struct link *l = c->hydro.limiter; l != NULL; l = l->next) + scheduler_activate(s, l->t); if (c->hydro.extra_ghost != NULL) scheduler_activate(s, c->hydro.extra_ghost); @@ -2879,7 +3160,6 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { if (c->hydro.cooling != NULL) scheduler_activate(s, c->hydro.cooling); if (c->hydro.star_formation != NULL) scheduler_activate(s, c->hydro.star_formation); - if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms); if (c->logger != NULL) scheduler_activate(s, c->logger); } diff --git a/src/cell.h b/src/cell.h index 9452accbab6312f235764e689522ac3edbaafe61..c5fbc9b8c02b0e008d337189fdbf582faf4fa600 100644 --- a/src/cell.h +++ b/src/cell.h @@ -263,6 +263,9 @@ struct cell { /*! Linked list of the tasks computing this cell's hydro forces. */ struct link *force; + /*! Linked list of the tasks computing this cell's limiter. */ + struct link *limiter; + /*! Dependency implicit task for the ghost (in->ghost->out)*/ struct task *ghost_in; @@ -348,6 +351,12 @@ struct cell { /*! Do any of this cell's sub-cells need to be sorted? */ char do_sub_sort; + /*! Does this cell need to be limited? */ + char do_limiter; + + /*! Do any of this cell's sub-cells need to be limited? */ + char do_sub_limiter; + #ifdef SWIFT_DEBUG_CHECKS /*! Last (integer) time the cell's sort arrays were updated. */ @@ -570,6 +579,15 @@ struct cell { struct link *send; } grav; + struct { + + /* Task receiving gpart data. */ + struct task *recv; + + /* Linked list for sending gpart data. */ + struct link *send; + } limiter; + /* Task receiving data (time-step). */ struct task *recv_ti; @@ -603,8 +621,8 @@ struct cell { /*! The task to compute time-steps */ struct task *timestep; - /*! Task for source terms */ - struct task *sourceterms; + /*! The task to limit the time-step of inactive particles */ + struct task *timestep_limiter; /*! The logger task */ struct task *logger; @@ -673,6 +691,10 @@ int cell_getsize(struct cell *c); int cell_link_parts(struct cell *c, struct part *parts); int cell_link_gparts(struct cell *c, struct gpart *gparts); int cell_link_sparts(struct cell *c, struct spart *sparts); +int cell_link_foreign_parts(struct cell *c, struct part *parts); +int cell_link_foreign_gparts(struct cell *c, struct gpart *gparts); +int cell_count_parts_for_tasks(const struct cell *c); +int cell_count_gparts_for_tasks(const struct cell *c); void cell_clean_links(struct cell *c, void *data); void cell_make_multipoles(struct cell *c, integertime_t ti_current); void cell_check_multipole(struct cell *c); @@ -705,7 +727,9 @@ void cell_activate_drift_gpart(struct cell *c, struct scheduler *s); void cell_activate_drift_spart(struct cell *c, struct scheduler *s); void cell_activate_hydro_sorts(struct cell *c, int sid, struct scheduler *s); void cell_activate_stars_sorts(struct cell *c, int sid, struct scheduler *s); +void cell_activate_limiter(struct cell *c, struct scheduler *s); void cell_clear_drift_flags(struct cell *c, void *data); +void cell_clear_limiter_flags(struct cell *c, void *data); void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data); void cell_check_spart_pos(const struct cell *c, const struct spart *global_sparts); diff --git a/src/common_io.c b/src/common_io.c index 24e74014fd52936023b5c7a41378faf3268bfdb3..733cf1dacac5f0c73ea401a584e2aa40eadd4a23 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -142,7 +142,7 @@ void io_read_attribute(hid_t grp, const char* name, enum IO_DATA_TYPE type, * Calls #error() if an error occurs. */ void io_write_attribute(hid_t grp, const char* name, enum IO_DATA_TYPE type, - void* data, int num) { + const void* data, int num) { const hid_t h_space = H5Screate(H5S_SIMPLE); if (h_space < 0) @@ -387,6 +387,332 @@ void io_write_engine_policy(hid_t h_file, const struct engine* e) { H5Gclose(h_grp); } +void io_write_cell_offsets(hid_t h_grp, const int cdim[3], + const struct cell* cells_top, const int nr_cells, + const double width[3], const int nodeID, + const long long global_counts[swift_type_count], + const long long global_offsets[swift_type_count], + const struct unit_system* internal_units, + const struct unit_system* snapshot_units) { + + double cell_width[3] = {width[0], width[1], width[2]}; + + /* Temporary memory for the cell-by-cell information */ + double* centres = NULL; + centres = (double*)malloc(3 * nr_cells * sizeof(double)); + + /* Count of particles in each cell */ + long long *count_part = NULL, *count_gpart = NULL, *count_spart = NULL; + count_part = (long long*)malloc(nr_cells * sizeof(long long)); + count_gpart = (long long*)malloc(nr_cells * sizeof(long long)); + count_spart = (long long*)malloc(nr_cells * sizeof(long long)); + + /* Global offsets of particles in each cell */ + long long *offset_part = NULL, *offset_gpart = NULL, *offset_spart = NULL; + offset_part = (long long*)malloc(nr_cells * sizeof(long long)); + offset_gpart = (long long*)malloc(nr_cells * sizeof(long long)); + offset_spart = (long long*)malloc(nr_cells * sizeof(long long)); + + /* Offsets of the 0^th element */ + offset_part[0] = 0; + offset_gpart[0] = 0; + offset_spart[0] = 0; + + /* Collect the cell information of *local* cells */ + long long local_offset_part = 0; + long long local_offset_gpart = 0; + long long local_offset_spart = 0; + for (int i = 0; i < nr_cells; ++i) { + + if (cells_top[i].nodeID == nodeID) { + + /* Centre of each cell */ + centres[i * 3 + 0] = cells_top[i].loc[0] + cell_width[0] * 0.5; + centres[i * 3 + 1] = cells_top[i].loc[1] + cell_width[1] * 0.5; + centres[i * 3 + 2] = cells_top[i].loc[2] + cell_width[2] * 0.5; + + /* Count real particles that will be written */ + count_part[i] = cells_top[i].hydro.count - cells_top[i].hydro.inhibited; + count_gpart[i] = cells_top[i].grav.count - cells_top[i].grav.inhibited; + count_spart[i] = cells_top[i].stars.count - cells_top[i].stars.inhibited; + + /* Only count DM gpart (gpart without friends) */ + count_gpart[i] -= count_part[i]; + count_gpart[i] -= count_spart[i]; + + /* Offsets including the global offset of all particles on this MPI rank + */ + offset_part[i] = local_offset_part + global_offsets[swift_type_gas]; + offset_gpart[i] = + local_offset_gpart + global_offsets[swift_type_dark_matter]; + offset_spart[i] = local_offset_spart + global_offsets[swift_type_stars]; + + local_offset_part += count_part[i]; + local_offset_gpart += count_gpart[i]; + local_offset_spart += count_spart[i]; + + } else { + + /* Just zero everything for the foregin cells */ + + centres[i * 3 + 0] = 0.; + centres[i * 3 + 1] = 0.; + centres[i * 3 + 2] = 0.; + + count_part[i] = 0; + count_gpart[i] = 0; + count_spart[i] = 0; + + offset_part[i] = 0; + offset_gpart[i] = 0; + offset_spart[i] = 0; + } + } + +#ifdef WITH_MPI + /* Now, reduce all the arrays. Note that we use a bit-wise OR here. This + is safe as we made sure only local cells have non-zero values. */ + if (nodeID == 0) { + MPI_Reduce(MPI_IN_PLACE, count_part, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, + 0, MPI_COMM_WORLD); + } else { + MPI_Reduce(count_part, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0, + MPI_COMM_WORLD); + } + if (nodeID == 0) { + MPI_Reduce(MPI_IN_PLACE, count_gpart, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, + 0, MPI_COMM_WORLD); + } else { + MPI_Reduce(count_gpart, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0, + MPI_COMM_WORLD); + } + if (nodeID == 0) { + MPI_Reduce(MPI_IN_PLACE, count_spart, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, + 0, MPI_COMM_WORLD); + } else { + MPI_Reduce(count_spart, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0, + MPI_COMM_WORLD); + } + if (nodeID == 0) { + MPI_Reduce(MPI_IN_PLACE, offset_part, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, + 0, MPI_COMM_WORLD); + } else { + MPI_Reduce(offset_part, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0, + MPI_COMM_WORLD); + } + if (nodeID == 0) { + MPI_Reduce(MPI_IN_PLACE, offset_gpart, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, + 0, MPI_COMM_WORLD); + } else { + MPI_Reduce(offset_gpart, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0, + MPI_COMM_WORLD); + } + if (nodeID == 0) { + MPI_Reduce(MPI_IN_PLACE, offset_spart, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, + 0, MPI_COMM_WORLD); + } else { + MPI_Reduce(offset_spart, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0, + MPI_COMM_WORLD); + } + + /* For the centres we use a sum as MPI does not like bit-wise operations + on floating point numbers */ + if (nodeID == 0) { + MPI_Reduce(MPI_IN_PLACE, centres, 3 * nr_cells, MPI_DOUBLE, MPI_SUM, 0, + MPI_COMM_WORLD); + } else { + MPI_Reduce(centres, NULL, 3 * nr_cells, MPI_DOUBLE, MPI_SUM, 0, + MPI_COMM_WORLD); + } +#endif + + /* Only rank 0 actually writes */ + if (nodeID == 0) { + + /* Unit conversion if necessary */ + const double factor = units_conversion_factor( + internal_units, snapshot_units, UNIT_CONV_LENGTH); + if (factor != 1.) { + + /* Convert the cell centres */ + for (int i = 0; i < nr_cells; ++i) { + centres[i * 3 + 0] *= factor; + centres[i * 3 + 1] *= factor; + centres[i * 3 + 2] *= factor; + } + + /* Convert the cell widths */ + cell_width[0] *= factor; + cell_width[1] *= factor; + cell_width[2] *= factor; + } + + /* Write some meta-information first */ + hid_t h_subgrp = + H5Gcreate(h_grp, "Meta-data", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_subgrp < 0) error("Error while creating meta-data sub-group"); + io_write_attribute(h_subgrp, "nr_cells", INT, &nr_cells, 1); + io_write_attribute(h_subgrp, "size", DOUBLE, cell_width, 3); + io_write_attribute(h_subgrp, "dimension", INT, cdim, 3); + H5Gclose(h_subgrp); + + /* Write the centres to the group */ + hsize_t shape[2] = {nr_cells, 3}; + hid_t h_space = H5Screate(H5S_SIMPLE); + if (h_space < 0) error("Error while creating data space for cell centres"); + hid_t h_err = H5Sset_extent_simple(h_space, 2, shape, shape); + if (h_err < 0) + error("Error while changing shape of gas offsets data space."); + hid_t h_data = H5Dcreate(h_grp, "Centres", io_hdf5_type(DOUBLE), h_space, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_data < 0) error("Error while creating dataspace for gas offsets."); + h_err = H5Dwrite(h_data, io_hdf5_type(DOUBLE), h_space, H5S_ALL, + H5P_DEFAULT, centres); + if (h_err < 0) error("Error while writing centres."); + H5Dclose(h_data); + H5Sclose(h_space); + + /* Group containing the offsets for each particle type */ + h_subgrp = + H5Gcreate(h_grp, "Offsets", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_subgrp < 0) error("Error while creating offsets sub-group"); + + if (global_counts[swift_type_gas] > 0) { + + shape[0] = nr_cells; + shape[1] = 1; + h_space = H5Screate(H5S_SIMPLE); + if (h_space < 0) error("Error while creating data space for gas offsets"); + h_err = H5Sset_extent_simple(h_space, 1, shape, shape); + if (h_err < 0) + error("Error while changing shape of gas offsets data space."); + h_data = H5Dcreate(h_subgrp, "PartType0", io_hdf5_type(LONGLONG), h_space, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_data < 0) error("Error while creating dataspace for gas offsets."); + h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL, + H5P_DEFAULT, offset_part); + if (h_err < 0) error("Error while writing gas offsets."); + H5Dclose(h_data); + H5Sclose(h_space); + } + + if (global_counts[swift_type_dark_matter] > 0) { + + shape[0] = nr_cells; + shape[1] = 1; + h_space = H5Screate(H5S_SIMPLE); + if (h_space < 0) error("Error while creating data space for DM offsets"); + h_err = H5Sset_extent_simple(h_space, 1, shape, shape); + if (h_err < 0) + error("Error while changing shape of DM offsets data space."); + h_data = H5Dcreate(h_subgrp, "PartType1", io_hdf5_type(LONGLONG), h_space, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_data < 0) error("Error while creating dataspace for DM offsets."); + h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL, + H5P_DEFAULT, offset_gpart); + if (h_err < 0) error("Error while writing DM offsets."); + H5Dclose(h_data); + H5Sclose(h_space); + } + + if (global_counts[swift_type_stars] > 0) { + + shape[0] = nr_cells; + shape[1] = 1; + h_space = H5Screate(H5S_SIMPLE); + if (h_space < 0) + error("Error while creating data space for stars offsets"); + h_err = H5Sset_extent_simple(h_space, 1, shape, shape); + if (h_err < 0) + error("Error while changing shape of stars offsets data space."); + h_data = H5Dcreate(h_subgrp, "PartType4", io_hdf5_type(LONGLONG), h_space, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_data < 0) error("Error while creating dataspace for star offsets."); + h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL, + H5P_DEFAULT, offset_spart); + if (h_err < 0) error("Error while writing star offsets."); + H5Dclose(h_data); + H5Sclose(h_space); + } + + H5Gclose(h_subgrp); + + /* Group containing the counts for each particle type */ + h_subgrp = + H5Gcreate(h_grp, "Counts", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_subgrp < 0) error("Error while creating counts sub-group"); + + if (global_counts[swift_type_gas] > 0) { + + shape[0] = nr_cells; + shape[1] = 1; + h_space = H5Screate(H5S_SIMPLE); + if (h_space < 0) error("Error while creating data space for gas counts"); + h_err = H5Sset_extent_simple(h_space, 1, shape, shape); + if (h_err < 0) + error("Error while changing shape of gas counts data space."); + h_data = H5Dcreate(h_subgrp, "PartType0", io_hdf5_type(LONGLONG), h_space, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_data < 0) error("Error while creating dataspace for gas counts."); + h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL, + H5P_DEFAULT, count_part); + if (h_err < 0) error("Error while writing gas counts."); + H5Dclose(h_data); + H5Sclose(h_space); + } + + if (global_counts[swift_type_dark_matter] > 0) { + + shape[0] = nr_cells; + shape[1] = 1; + h_space = H5Screate(H5S_SIMPLE); + if (h_space < 0) error("Error while creating data space for DM counts"); + h_err = H5Sset_extent_simple(h_space, 1, shape, shape); + if (h_err < 0) + error("Error while changing shape of DM counts data space."); + h_data = H5Dcreate(h_subgrp, "PartType1", io_hdf5_type(LONGLONG), h_space, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_data < 0) error("Error while creating dataspace for DM counts."); + h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL, + H5P_DEFAULT, count_gpart); + if (h_err < 0) error("Error while writing DM counts."); + H5Dclose(h_data); + H5Sclose(h_space); + } + + if (global_counts[swift_type_stars] > 0) { + + shape[0] = nr_cells; + shape[1] = 1; + h_space = H5Screate(H5S_SIMPLE); + if (h_space < 0) + error("Error while creating data space for stars counts"); + h_err = H5Sset_extent_simple(h_space, 1, shape, shape); + if (h_err < 0) + error("Error while changing shape of stars counts data space."); + h_data = H5Dcreate(h_subgrp, "PartType4", io_hdf5_type(LONGLONG), h_space, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_data < 0) error("Error while creating dataspace for star counts."); + h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL, + H5P_DEFAULT, count_spart); + if (h_err < 0) error("Error while writing star counts."); + H5Dclose(h_data); + H5Sclose(h_space); + } + + H5Gclose(h_subgrp); + } + + /* Free everything we allocated */ + free(centres); + free(count_part); + free(count_gpart); + free(count_spart); + free(offset_part); + free(offset_gpart); + free(offset_spart); +} + #endif /* HAVE_HDF5 */ /** @@ -482,6 +808,28 @@ void io_convert_part_d_mapper(void* restrict temp, int N, &temp_d[i * dim]); } +/** + * @brief Mapper function to copy #part into a buffer of doubles using a + * conversion function. + */ +void io_convert_part_l_mapper(void* restrict temp, int N, + void* restrict extra_data) { + + const struct io_props props = *((const struct io_props*)extra_data); + const struct part* restrict parts = props.parts; + const struct xpart* restrict xparts = props.xparts; + const struct engine* e = props.e; + const size_t dim = props.dimension; + + /* How far are we with this chunk? */ + long long* restrict temp_l = (long long*)temp; + const ptrdiff_t delta = (temp_l - props.start_temp_l) / dim; + + for (int i = 0; i < N; i++) + props.convert_part_l(e, parts + delta + i, xparts + delta + i, + &temp_l[i * dim]); +} + /** * @brief Mapper function to copy #gpart into a buffer of floats using a * conversion function. @@ -522,6 +870,26 @@ void io_convert_gpart_d_mapper(void* restrict temp, int N, props.convert_gpart_d(e, gparts + delta + i, &temp_d[i * dim]); } +/** + * @brief Mapper function to copy #gpart into a buffer of doubles using a + * conversion function. + */ +void io_convert_gpart_l_mapper(void* restrict temp, int N, + void* restrict extra_data) { + + const struct io_props props = *((const struct io_props*)extra_data); + const struct gpart* restrict gparts = props.gparts; + const struct engine* e = props.e; + const size_t dim = props.dimension; + + /* How far are we with this chunk? */ + long long* restrict temp_l = (long long*)temp; + const ptrdiff_t delta = (temp_l - props.start_temp_l) / dim; + + for (int i = 0; i < N; i++) + props.convert_gpart_l(e, gparts + delta + i, &temp_l[i * dim]); +} + /** * @brief Mapper function to copy #spart into a buffer of floats using a * conversion function. @@ -562,6 +930,26 @@ void io_convert_spart_d_mapper(void* restrict temp, int N, props.convert_spart_d(e, sparts + delta + i, &temp_d[i * dim]); } +/** + * @brief Mapper function to copy #spart into a buffer of doubles using a + * conversion function. + */ +void io_convert_spart_l_mapper(void* restrict temp, int N, + void* restrict extra_data) { + + const struct io_props props = *((const struct io_props*)extra_data); + const struct spart* restrict sparts = props.sparts; + const struct engine* e = props.e; + const size_t dim = props.dimension; + + /* How far are we with this chunk? */ + long long* restrict temp_l = (long long*)temp; + const ptrdiff_t delta = (temp_l - props.start_temp_l) / dim; + + for (int i = 0; i < N; i++) + props.convert_spart_l(e, sparts + delta + i, &temp_l[i * dim]); +} + /** * @brief Copy the particle data into a temporary buffer ready for i/o. * @@ -619,6 +1007,18 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, io_convert_part_d_mapper, temp_d, N, copySize, 0, (void*)&props); + } else if (props.convert_part_l != NULL) { + + /* Prepare some parameters */ + long long* temp_l = (long long*)temp; + props.start_temp_l = (long long*)temp; + props.e = e; + + /* Copy the whole thing into a buffer */ + threadpool_map((struct threadpool*)&e->threadpool, + io_convert_part_l_mapper, temp_l, N, copySize, 0, + (void*)&props); + } else if (props.convert_gpart_f != NULL) { /* Prepare some parameters */ @@ -643,6 +1043,18 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, io_convert_gpart_d_mapper, temp_d, N, copySize, 0, (void*)&props); + } else if (props.convert_gpart_l != NULL) { + + /* Prepare some parameters */ + long long* temp_l = (long long*)temp; + props.start_temp_l = (long long*)temp; + props.e = e; + + /* Copy the whole thing into a buffer */ + threadpool_map((struct threadpool*)&e->threadpool, + io_convert_gpart_l_mapper, temp_l, N, copySize, 0, + (void*)&props); + } else if (props.convert_spart_f != NULL) { /* Prepare some parameters */ @@ -667,6 +1079,18 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, io_convert_spart_d_mapper, temp_d, N, copySize, 0, (void*)&props); + } else if (props.convert_spart_l != NULL) { + + /* Prepare some parameters */ + long long* temp_l = (long long*)temp; + props.start_temp_l = (long long*)temp; + props.e = e; + + /* Copy the whole thing into a buffer */ + threadpool_map((struct threadpool*)&e->threadpool, + io_convert_spart_l_mapper, temp_l, N, copySize, 0, + (void*)&props); + } else { error("Missing conversion function"); } @@ -928,15 +1352,21 @@ void io_collect_sparts_to_write(const struct spart* restrict sparts, * @brief Copy every non-inhibited DM #gpart into the gparts_written array. * * @param gparts The array of #gpart containing all particles. + * @param vr_data The array of gpart-related VELOCIraptor output. * @param gparts_written The array of #gpart to fill with particles we want to * write. + * @param vr_data_written The array of gpart-related VELOCIraptor with particles + * we want to write. * @param Ngparts The total number of #part. * @param Ngparts_written The total number of #part to write. + * @param with_stf Are we running with STF? i.e. do we want to collect vr data? */ -void io_collect_gparts_to_write(const struct gpart* restrict gparts, - struct gpart* restrict gparts_written, - const size_t Ngparts, - const size_t Ngparts_written) { +void io_collect_gparts_to_write( + const struct gpart* restrict gparts, + const struct velociraptor_gpart_data* restrict vr_data, + struct gpart* restrict gparts_written, + struct velociraptor_gpart_data* restrict vr_data_written, + const size_t Ngparts, const size_t Ngparts_written, const int with_stf) { size_t count = 0; @@ -948,6 +1378,8 @@ void io_collect_gparts_to_write(const struct gpart* restrict gparts, (gparts[i].time_bin != time_bin_not_created) && (gparts[i].type == swift_type_dark_matter)) { + if (with_stf) vr_data_written[count] = vr_data[i]; + gparts_written[count] = gparts[i]; count++; } @@ -955,7 +1387,7 @@ void io_collect_gparts_to_write(const struct gpart* restrict gparts, /* Check that everything is fine */ if (count != Ngparts_written) - error("Collected the wrong number of s-particles (%zu vs. %zu expected)", + error("Collected the wrong number of g-particles (%zu vs. %zu expected)", count, Ngparts_written); } diff --git a/src/common_io.h b/src/common_io.h index 016c5138e18ae8636834c35d659e07d8fcd46e36..eb1ee0a804f324d897842fb2a0ca33fc07e769d6 100644 --- a/src/common_io.h +++ b/src/common_io.h @@ -24,6 +24,7 @@ #include "../config.h" /* Local includes. */ +#include "part_type.h" #include "units.h" #define FIELD_BUFFER_SIZE 200 @@ -32,8 +33,10 @@ #define IO_BUFFER_ALIGNMENT 1024 /* Avoid cyclic inclusion problems */ +struct cell; struct part; struct gpart; +struct velociraptor_gpart_data; struct spart; struct xpart; struct io_props; @@ -65,7 +68,7 @@ void io_read_attribute(hid_t grp, const char* name, enum IO_DATA_TYPE type, void* data); void io_write_attribute(hid_t grp, const char* name, enum IO_DATA_TYPE type, - void* data, int num); + const void* data, int num); void io_write_attribute_d(hid_t grp, const char* name, double data); void io_write_attribute_f(hid_t grp, const char* name, float data); @@ -76,6 +79,14 @@ void io_write_attribute_s(hid_t grp, const char* name, const char* str); void io_write_code_description(hid_t h_file); void io_write_engine_policy(hid_t h_file, const struct engine* e); +void io_write_cell_offsets(hid_t h_grp, const int cdim[3], + const struct cell* cells_top, const int nr_cells, + const double width[3], const int nodeID, + const long long global_counts[swift_type_count], + const long long global_offsets[swift_type_count], + const struct unit_system* internal_units, + const struct unit_system* snapshot_units); + void io_read_unit_system(hid_t h_file, struct unit_system* ic_units, const struct unit_system* internal_units, int mpi_rank); @@ -103,9 +114,11 @@ void io_collect_sparts_to_write(const struct spart* restrict sparts, const size_t Nsparts, const size_t Nsparts_written); void io_collect_gparts_to_write(const struct gpart* restrict gparts, + const struct velociraptor_gpart_data* vr_data, struct gpart* restrict gparts_written, + struct velociraptor_gpart_data* vr_data_written, const size_t Ngparts, - const size_t Ngparts_written); + const size_t Ngparts_written, int with_stf); void io_prepare_dm_gparts(struct threadpool* tp, struct gpart* const gparts, size_t Ndm); void io_duplicate_hydro_gparts(struct threadpool* tp, struct part* const parts, diff --git a/src/const.h b/src/const.h index e417b8ca3827ef87396706c56df36bb9bd3aed75..613a48920e6f26c209faf6e354b82c2ed5be0bf1 100644 --- a/src/const.h +++ b/src/const.h @@ -33,6 +33,9 @@ /* Time integration constants. */ #define const_max_u_change 0.1f +/* Time-step limiter maximal difference in signal velocity */ +#define const_limiter_max_v_sig_ratio 4.1f + /* Type of gradients to use (GIZMO_SPH only) */ /* If no option is chosen, no gradients are used (first order scheme) */ //#define GRADIENTS_SPH diff --git a/src/cooling/EAGLE/cooling.c b/src/cooling/EAGLE/cooling.c index ed38917ad458214f07e7fa52391ab52546a5f4e0..60eba23a5e8927d271c157b1dc0a598bcdcfb1ff 100644 --- a/src/cooling/EAGLE/cooling.c +++ b/src/cooling/EAGLE/cooling.c @@ -499,14 +499,13 @@ void cooling_cool_part(const struct phys_const *restrict phys_const, float abundance_ratio[chemistry_element_count + 2]; abundance_ratio_to_solar(p, cooling, abundance_ratio); - /* Get the Hydrogen mass fraction */ + /* Get the Hydrogen and Helium mass fractions */ const float XH = p->chemistry_data.metal_mass_fraction[chemistry_element_H]; + const float XHe = p->chemistry_data.metal_mass_fraction[chemistry_element_He]; /* Get the Helium mass fraction. Note that this is He / (H + He), i.e. a * metal-free Helium mass fraction as per the Wiersma+08 definition */ - const float HeFrac = - p->chemistry_data.metal_mass_fraction[chemistry_element_He] / - (XH + p->chemistry_data.metal_mass_fraction[chemistry_element_He]); + const float HeFrac = XHe / (XH + XHe); /* convert Hydrogen mass fraction into Hydrogen number density */ const double n_H = @@ -707,14 +706,13 @@ float cooling_get_temperature( const float u = hydro_get_physical_internal_energy(p, xp, cosmo); const double u_cgs = u * cooling->internal_energy_to_cgs; - /* Get the Hydrogen mass fraction */ + /* Get the Hydrogen and Helium mass fractions */ const float XH = p->chemistry_data.metal_mass_fraction[chemistry_element_H]; + const float XHe = p->chemistry_data.metal_mass_fraction[chemistry_element_He]; /* Get the Helium mass fraction. Note that this is He / (H + He), i.e. a * metal-free Helium mass fraction as per the Wiersma+08 definition */ - const float HeFrac = - p->chemistry_data.metal_mass_fraction[chemistry_element_He] / - (XH + p->chemistry_data.metal_mass_fraction[chemistry_element_He]); + const float HeFrac = XHe / (XH + XHe); /* Convert Hydrogen mass fraction into Hydrogen number density */ const float rho = hydro_get_physical_density(p, cosmo); diff --git a/src/cosmology.c b/src/cosmology.c index 4718ed5b316e514476e3ec38dd8771136f3a2f69..be23343d0d62584cd3a811e547b327120db744ef 100644 --- a/src/cosmology.c +++ b/src/cosmology.c @@ -576,6 +576,8 @@ void cosmology_init_no_cosmo(struct cosmology *c) { c->a_dot = 0.; c->time = 0.; c->universe_age_at_present_day = 0.; + c->Hubble_time = 0.; + c->lookback_time = 0.; /* Initialise the interpolation tables */ c->drift_fac_interp_table = NULL; diff --git a/src/dimension.h b/src/dimension.h index 0b2093d718a61c6ce850db1970412af3e2e462b9..7084d70f5794853557539862091809071af2e790 100644 --- a/src/dimension.h +++ b/src/dimension.h @@ -89,6 +89,34 @@ __attribute__((always_inline)) INLINE static float pow_dimension(float x) { #endif } +/** + * @brief Returns the argument to the power given by the inverse of the + * dimension + * + * Computes \f$x^{1/d}\f$. + */ +__attribute__((always_inline)) INLINE static float pow_inv_dimension(float x) { + +#if defined(HYDRO_DIMENSION_3D) + + return cbrtf(x); + +#elif defined(HYDRO_DIMENSION_2D) + + return sqrtf(x); + +#elif defined(HYDRO_DIMENSION_1D) + + return x; + +#else + + error("The dimension is not defined !"); + return 0.f; + +#endif +} + /** * @brief Returns the argument to the power given by the dimension plus one * diff --git a/src/engine.c b/src/engine.c index eea21bc58f2baea1620ed8bf8fb4dfb66f75bf10..b88c1f1c5a1cbad456405b2c5f97e501ee7dc745 100644 --- a/src/engine.c +++ b/src/engine.c @@ -115,7 +115,8 @@ const char *engine_policy_names[] = {"none", "stars", "structure finding", "star formation", - "feedback"}; + "feedback", + "time-step limiter"}; /** The rank of the engine as a global variable (for messages). */ int engine_rank; @@ -147,7 +148,9 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { /* Get the next free link. */ const size_t ind = atomic_inc(&e->nr_links); if (ind >= e->size_links) { - error("Link table overflow."); + error( + "Link table overflow. Increase the value of " + "`Scheduler:links_per_tasks`."); } struct link *res = &e->links[ind]; @@ -1131,84 +1134,12 @@ void engine_exchange_cells(struct engine *e) { #ifdef WITH_MPI - struct space *s = e->s; - const int nr_proxies = e->nr_proxies; const int with_gravity = e->policy & engine_policy_self_gravity; const ticks tic = getticks(); /* Exchange the cell structure with neighbouring ranks. */ proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity); - ticks tic2 = getticks(); - - /* Count the number of particles we need to import and re-allocate - the buffer if needed. */ - size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0; - for (int k = 0; k < nr_proxies; k++) - for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) - count_parts_in += e->proxies[k].cells_in[j]->hydro.count; - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) - count_gparts_in += e->proxies[k].cells_in[j]->grav.count; - count_sparts_in += e->proxies[k].cells_in[j]->stars.count; - } - if (count_parts_in > s->size_parts_foreign) { - if (s->parts_foreign != NULL) free(s->parts_foreign); - s->size_parts_foreign = 1.1 * count_parts_in; - if (posix_memalign((void **)&s->parts_foreign, part_align, - sizeof(struct part) * s->size_parts_foreign) != 0) - error("Failed to allocate foreign part data."); - } - if (count_gparts_in > s->size_gparts_foreign) { - if (s->gparts_foreign != NULL) free(s->gparts_foreign); - s->size_gparts_foreign = 1.1 * count_gparts_in; - if (posix_memalign((void **)&s->gparts_foreign, gpart_align, - sizeof(struct gpart) * s->size_gparts_foreign) != 0) - error("Failed to allocate foreign gpart data."); - } - if (count_sparts_in > s->size_sparts_foreign) { - if (s->sparts_foreign != NULL) free(s->sparts_foreign); - s->size_sparts_foreign = 1.1 * count_sparts_in; - if (posix_memalign((void **)&s->sparts_foreign, spart_align, - sizeof(struct spart) * s->size_sparts_foreign) != 0) - error("Failed to allocate foreign spart data."); - } - - if (e->verbose) - message("Counting and allocating arrays took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - - tic2 = getticks(); - - /* Unpack the cells and link to the particle data. */ - struct part *parts = s->parts_foreign; - struct gpart *gparts = s->gparts_foreign; - struct spart *sparts = s->sparts_foreign; - for (int k = 0; k < nr_proxies; k++) { - for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) { - cell_link_parts(e->proxies[k].cells_in[j], parts); - parts = &parts[e->proxies[k].cells_in[j]->hydro.count]; - } - - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) { - cell_link_gparts(e->proxies[k].cells_in[j], gparts); - gparts = &gparts[e->proxies[k].cells_in[j]->grav.count]; - } - - cell_link_sparts(e->proxies[k].cells_in[j], sparts); - sparts = &sparts[e->proxies[k].cells_in[j]->stars.count]; - } - } - s->nr_parts_foreign = parts - s->parts_foreign; - s->nr_gparts_foreign = gparts - s->gparts_foreign; - s->nr_sparts_foreign = sparts - s->sparts_foreign; - - if (e->verbose) - message("Recursively linking arrays took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -1823,15 +1754,133 @@ void engine_exchange_proxy_multipoles(struct engine *e) { #endif } +/** + * @brief Allocate memory for the foreign particles. + * + * We look into the proxies for cells that have tasks and count + * the number of particles in these cells. We then allocate + * memory and link all the cells that have tasks and all cells + * deeper in the tree. + * + * @param e The #engine. + */ +void engine_allocate_foreign_particles(struct engine *e) { + +#ifdef WITH_MPI + + const int nr_proxies = e->nr_proxies; + struct space *s = e->s; + ticks tic = getticks(); + + /* Count the number of particles we need to import and re-allocate + the buffer if needed. */ + size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0; + for (int k = 0; k < nr_proxies; k++) { + for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { + + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) { + count_parts_in += cell_count_parts_for_tasks(e->proxies[k].cells_in[j]); + } + + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) { + count_gparts_in += + cell_count_gparts_for_tasks(e->proxies[k].cells_in[j]); + } + + /* For stars, we just use the numbers in the top-level cells */ + count_sparts_in += e->proxies[k].cells_in[j]->stars.count; + } + } + + if (e->verbose) + message("Counting number of foreign particles took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + tic = getticks(); + + /* Allocate space for the foreign particles we will receive */ + if (count_parts_in > s->size_parts_foreign) { + if (s->parts_foreign != NULL) free(s->parts_foreign); + s->size_parts_foreign = engine_foreign_alloc_margin * count_parts_in; + if (posix_memalign((void **)&s->parts_foreign, part_align, + sizeof(struct part) * s->size_parts_foreign) != 0) + error("Failed to allocate foreign part data."); + } + /* Allocate space for the foreign particles we will receive */ + if (count_gparts_in > s->size_gparts_foreign) { + if (s->gparts_foreign != NULL) free(s->gparts_foreign); + s->size_gparts_foreign = engine_foreign_alloc_margin * count_gparts_in; + if (posix_memalign((void **)&s->gparts_foreign, gpart_align, + sizeof(struct gpart) * s->size_gparts_foreign) != 0) + error("Failed to allocate foreign gpart data."); + } + /* Allocate space for the foreign particles we will receive */ + if (count_sparts_in > s->size_sparts_foreign) { + if (s->sparts_foreign != NULL) free(s->sparts_foreign); + s->size_sparts_foreign = engine_foreign_alloc_margin * count_sparts_in; + if (posix_memalign((void **)&s->sparts_foreign, spart_align, + sizeof(struct spart) * s->size_sparts_foreign) != 0) + error("Failed to allocate foreign spart data."); + } + + if (e->verbose) + message("Allocating %zd/%zd/%zd foreign part/gpart/spart (%zd/%zd/%zd MB)", + s->size_parts_foreign, s->size_gparts_foreign, + s->size_sparts_foreign, + s->size_parts_foreign * sizeof(struct part) / (1024 * 1024), + s->size_gparts_foreign * sizeof(struct gpart) / (1024 * 1024), + s->size_sparts_foreign * sizeof(struct spart) / (1024 * 1024)); + + /* Unpack the cells and link to the particle data. */ + struct part *parts = s->parts_foreign; + struct gpart *gparts = s->gparts_foreign; + struct spart *sparts = s->sparts_foreign; + for (int k = 0; k < nr_proxies; k++) { + for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { + + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) { + + const size_t count_parts = + cell_link_foreign_parts(e->proxies[k].cells_in[j], parts); + parts = &parts[count_parts]; + } + + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) { + + const size_t count_gparts = + cell_link_foreign_gparts(e->proxies[k].cells_in[j], gparts); + gparts = &gparts[count_gparts]; + } + + /* For stars, we just use the numbers in the top-level cells */ + cell_link_sparts(e->proxies[k].cells_in[j], sparts); + sparts = &sparts[e->proxies[k].cells_in[j]->stars.count]; + } + } + + /* Update the counters */ + s->nr_parts_foreign = parts - s->parts_foreign; + s->nr_gparts_foreign = gparts - s->gparts_foreign; + s->nr_sparts_foreign = sparts - s->sparts_foreign; + + if (e->verbose) + message("Recursively linking foreign arrays took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + /** * @brief Prints the number of tasks in the engine * * @param e The #engine. */ -void engine_print_task_counts(struct engine *e) { +void engine_print_task_counts(const struct engine *e) { const ticks tic = getticks(); - struct scheduler *const sched = &e->sched; + const struct scheduler *sched = &e->sched; const int nr_tasks = sched->nr_tasks; const struct task *const tasks = sched->tasks; @@ -1878,7 +1927,7 @@ void engine_print_task_counts(struct engine *e) { * * @return the estimated total number of tasks */ -int engine_estimate_nr_tasks(struct engine *e) { +int engine_estimate_nr_tasks(const struct engine *e) { int tasks_per_cell = e->tasks_per_cell; if (tasks_per_cell > 0) return e->s->tot_cells * tasks_per_cell; @@ -1887,8 +1936,7 @@ int engine_estimate_nr_tasks(struct engine *e) { * basically use a formula <n1>*ntopcells + <n2>*(totcells - ntopcells). * Where <n1> is the expected maximum tasks per top-level/super cell, and * <n2> the expected maximum tasks for all other cells. These should give - * a safe upper limit. - */ + * a safe upper limit. */ int n1 = 0; int n2 = 0; if (e->policy & engine_policy_hydro) { @@ -1909,6 +1957,10 @@ int engine_estimate_nr_tasks(struct engine *e) { #endif #endif } + if (e->policy & engine_policy_limiter) { + n1 += 18; + n2 += 1; + } if (e->policy & engine_policy_self_gravity) { n1 += 125; n2 += 8; @@ -2587,8 +2639,11 @@ void engine_skip_force_and_kick(struct engine *e) { /* Skip everything that updates the particles */ if (t->type == task_type_drift_part || t->type == task_type_drift_gpart || t->type == task_type_kick1 || t->type == task_type_kick2 || - t->type == task_type_timestep || t->subtype == task_subtype_force || - t->subtype == task_subtype_grav || t->type == task_type_end_force || + t->type == task_type_timestep || + t->type == task_type_timestep_limiter || + t->subtype == task_subtype_force || + t->subtype == task_subtype_limiter || t->subtype == task_subtype_grav || + t->type == task_type_end_force || t->type == task_type_grav_long_range || t->type == task_type_grav_mm || t->type == task_type_grav_down || t->type == task_type_cooling || t->type == task_type_star_formation) @@ -2597,6 +2652,7 @@ void engine_skip_force_and_kick(struct engine *e) { /* Run through the cells and clear some flags. */ space_map_cells_pre(e->s, 1, cell_clear_drift_flags, NULL); + space_map_cells_pre(e->s, 1, cell_clear_limiter_flags, NULL); } /** @@ -2806,6 +2862,11 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, gravity_exact_force_check(e->s, e, 1e-1); #endif +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure all woken-up particles have been processed */ + space_check_limiter(e->s); +#endif + /* Recover the (integer) end of the next time-step */ engine_collect_end_of_step(e, 1); @@ -3063,6 +3124,11 @@ void engine_step(struct engine *e) { gravity_exact_force_check(e->s, e, 1e-1); #endif +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure all woken-up particles have been processed */ + space_check_limiter(e->s); +#endif + /* Collect information about the next time-step */ engine_collect_end_of_step(e, 1); e->forcerebuild = e->collect_group1.forcerebuild; @@ -3106,94 +3172,87 @@ void engine_step(struct engine *e) { */ void engine_check_for_dumps(struct engine *e) { + const int with_cosmology = (e->policy & engine_policy_cosmology); const int with_stf = (e->policy & engine_policy_structure_finding); - const int stf_time_output = (e->stf_output_freq_format == io_stf_time); + + /* What kind of output are we getting? */ + enum output_type { + output_none, + output_snapshot, + output_statistics, + output_stf + }; + + /* What kind of output do we want? And at which time ? + * Find the earliest output (amongst all kinds) that takes place + * before the next time-step */ + enum output_type type = output_none; + integertime_t ti_output = max_nr_timesteps; /* Save some statistics ? */ - int save_stats = 0; - if (e->ti_end_min > e->ti_next_stats && e->ti_next_stats > 0) save_stats = 1; + if (e->ti_end_min > e->ti_next_stats && e->ti_next_stats > 0) { + if (e->ti_next_stats < ti_output) { + ti_output = e->ti_next_stats; + type = output_statistics; + } + } /* Do we want a snapshot? */ - int dump_snapshot = 0; - if (e->ti_end_min > e->ti_next_snapshot && e->ti_next_snapshot > 0) - dump_snapshot = 1; + if (e->ti_end_min > e->ti_next_snapshot && e->ti_next_snapshot > 0) { + if (e->ti_next_snapshot < ti_output) { + ti_output = e->ti_next_snapshot; + type = output_snapshot; + } + } /* Do we want to perform structure finding? */ - int run_stf = 0; - if (with_stf && stf_time_output) { - if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) run_stf = 1; - } - if (with_stf && !stf_time_output) { - if (e->step % e->delta_step_stf == 0) run_stf = 1; + if (with_stf) { + if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) { + if (e->ti_next_stf < ti_output) { + ti_output = e->ti_next_stf; + type = output_stf; + } + } } /* Store information before attempting extra dump-related drifts */ - integertime_t ti_current = e->ti_current; - timebin_t max_active_bin = e->max_active_bin; - double time = e->time; + const integertime_t ti_current = e->ti_current; + const timebin_t max_active_bin = e->max_active_bin; + const double time = e->time; + + while (type != output_none) { + + /* Let's fake that we are at the dump time */ + e->ti_current = ti_output; + e->max_active_bin = 0; + if (with_cosmology) { + cosmology_update(e->cosmology, e->physical_constants, e->ti_current); + e->time = e->cosmology->time; + } else { + e->time = ti_output * e->time_base + e->time_begin; + } - while (save_stats || dump_snapshot || run_stf) { + /* Drift everyone */ + engine_drift_all(e, /*drift_mpole=*/0); /* Write some form of output */ - if (dump_snapshot && save_stats) { - - /* If both, need to figure out which one occurs first */ - if (e->ti_next_stats == e->ti_next_snapshot) { - - /* Let's fake that we are at the common dump time */ - e->ti_current = e->ti_next_snapshot; - e->max_active_bin = 0; - if ((e->policy & engine_policy_cosmology)) { - cosmology_update(e->cosmology, e->physical_constants, e->ti_current); - e->time = e->cosmology->time; - } else { - e->time = e->ti_next_stats * e->time_base + e->time_begin; - } + switch (type) { + case output_snapshot: - /* Drift everyone */ - engine_drift_all(e, /*drift_mpole=*/0); + /* Do we want a corresponding VELOCIraptor output? */ + if (with_stf && e->snapshot_invoke_stf) { - /* Dump everything */ - engine_print_stats(e); -#ifdef WITH_LOGGER - /* Write a file containing the offsets in the particle logger. */ - engine_dump_index(e); +#ifdef HAVE_VELOCIRAPTOR + velociraptor_invoke(e, /*linked_with_snap=*/1); + e->step_props |= engine_step_prop_stf; #else - engine_dump_snapshot(e); + error( + "Asking for a VELOCIraptor output but SWIFT was compiled without " + "the interface!"); #endif - - } else if (e->ti_next_stats < e->ti_next_snapshot) { - - /* Let's fake that we are at the stats dump time */ - e->ti_current = e->ti_next_stats; - e->max_active_bin = 0; - if ((e->policy & engine_policy_cosmology)) { - cosmology_update(e->cosmology, e->physical_constants, e->ti_current); - e->time = e->cosmology->time; - } else { - e->time = e->ti_next_stats * e->time_base + e->time_begin; - } - - /* Drift everyone */ - engine_drift_all(e, /*drift_mpole=*/0); - - /* Dump stats */ - engine_print_stats(e); - - /* Let's fake that we are at the snapshot dump time */ - e->ti_current = e->ti_next_snapshot; - e->max_active_bin = 0; - if ((e->policy & engine_policy_cosmology)) { - cosmology_update(e->cosmology, e->physical_constants, e->ti_current); - e->time = e->cosmology->time; - } else { - e->time = e->ti_next_snapshot * e->time_base + e->time_begin; } - /* Drift everyone */ - engine_drift_all(e, /*drift_mpole=*/0); - - /* Dump snapshot */ + /* Dump... */ #ifdef WITH_LOGGER /* Write a file containing the offsets in the particle logger. */ engine_dump_index(e); @@ -3201,118 +3260,60 @@ void engine_check_for_dumps(struct engine *e) { engine_dump_snapshot(e); #endif - } else if (e->ti_next_stats > e->ti_next_snapshot) { - - /* Let's fake that we are at the snapshot dump time */ - e->ti_current = e->ti_next_snapshot; - e->max_active_bin = 0; - if ((e->policy & engine_policy_cosmology)) { - cosmology_update(e->cosmology, e->physical_constants, e->ti_current); - e->time = e->cosmology->time; - } else { - e->time = e->ti_next_stats * e->time_base + e->time_begin; - } - - /* Drift everyone */ - engine_drift_all(e, /*drift_mpole=*/0); - - /* Dump snapshot */ -#ifdef WITH_LOGGER - /* Write a file containing the offsets in the particle logger. */ - engine_dump_index(e); -#else - engine_dump_snapshot(e); + /* Free the memory allocated for VELOCIraptor i/o. */ + if (with_stf && e->snapshot_invoke_stf) { +#ifdef HAVE_VELOCIRAPTOR + free(e->s->gpart_group_data); + e->s->gpart_group_data = NULL; #endif - - /* Let's fake that we are at the stats dump time */ - e->ti_current = e->ti_next_stats; - e->max_active_bin = 0; - if ((e->policy & engine_policy_cosmology)) { - cosmology_update(e->cosmology, e->physical_constants, e->ti_current); - e->time = e->cosmology->time; - } else { - e->time = e->ti_next_stats * e->time_base + e->time_begin; } - /* Drift everyone */ - engine_drift_all(e, /*drift_mpole=*/0); + /* ... and find the next output time */ + engine_compute_next_snapshot_time(e); + break; + + case output_statistics: - /* Dump stats */ + /* Dump */ engine_print_stats(e); } - /* Let's compute the time of the next outputs */ - engine_compute_next_snapshot_time(e); - engine_compute_next_statistics_time(e); + /* and move on */ + engine_compute_next_statistics_time(e); - } else if (dump_snapshot) { + break; - /* Let's fake that we are at the snapshot dump time */ - e->ti_current = e->ti_next_snapshot; - e->max_active_bin = 0; - if ((e->policy & engine_policy_cosmology)) { - cosmology_update(e->cosmology, e->physical_constants, e->ti_current); - e->time = e->cosmology->time; - } else { - e->time = e->ti_next_snapshot * e->time_base + e->time_begin; - } + case output_stf: - /* Drift everyone */ - engine_drift_all(e, /*drift_mpole=*/0); +#ifdef HAVE_VELOCIRAPTOR + /* Unleash the raptor! */ + velociraptor_invoke(e, /*linked_with_snap=*/0); + e->step_props |= engine_step_prop_stf; - /* Dump... */ -#ifdef WITH_LOGGER - /* Write a file containing the offsets in the particle logger. */ - engine_dump_index(e); + /* ... and find the next output time */ + engine_compute_next_stf_time(e); #else - engine_dump_snapshot(e); + error( + "Asking for a VELOCIraptor output but SWIFT was compiled without " + "the interface!"); #endif + break; - /* ... and find the next output time */ - engine_compute_next_snapshot_time(e); - - } else if (save_stats) { - - /* Let's fake that we are at the stats dump time */ - e->ti_current = e->ti_next_stats; - e->max_active_bin = 0; - if ((e->policy & engine_policy_cosmology)) { - cosmology_update(e->cosmology, e->physical_constants, e->ti_current); - e->time = e->cosmology->time; - } else { - e->time = e->ti_next_stats * e->time_base + e->time_begin; - } - - /* Drift everyone */ - engine_drift_all(e, /*drift_mpole=*/0); - - /* Dump */ - engine_print_stats(e); - - /* and move on */ - engine_compute_next_statistics_time(e); + default: + error("Invalid dump type"); } - /* Perform structure finding? */ - if (run_stf) { - -#ifdef HAVE_VELOCIRAPTOR - - // MATTHIEU: Check the order with the other i/o options. - if (!dump_snapshot && !save_stats) { + /* We need to see whether whether we are in the pathological case + * where there can be another dump before the next step. */ - /* Let's fake that we are at the stats dump time */ - e->ti_current = e->ti_next_stf; - e->max_active_bin = 0; - if ((e->policy & engine_policy_cosmology)) { - cosmology_update(e->cosmology, e->physical_constants, e->ti_current); - e->time = e->cosmology->time; - } else { - e->time = e->ti_next_stats * e->time_base + e->time_begin; - } + type = output_none; + ti_output = max_nr_timesteps; - /* Drift everyone */ - engine_drift_all(e, /*drift_mpole=*/0); + /* Save some statistics ? */ + if (e->ti_end_min > e->ti_next_stats && e->ti_next_stats > 0) { + if (e->ti_next_stats < ti_output) { + ti_output = e->ti_next_stats; + type = output_statistics; } velociraptor_init(e); @@ -3333,16 +3334,24 @@ void engine_check_for_dumps(struct engine *e) { save_stats = 1; /* Do we want a snapshot? */ - dump_snapshot = 0; - if (e->ti_end_min > e->ti_next_snapshot && e->ti_next_snapshot > 0) - dump_snapshot = 1; + if (e->ti_end_min > e->ti_next_snapshot && e->ti_next_snapshot > 0) { + if (e->ti_next_snapshot < ti_output) { + ti_output = e->ti_next_snapshot; + type = output_snapshot; + } + } /* Do we want to perform structure finding? */ - run_stf = 0; - if (with_stf && stf_time_output) { - if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) run_stf = 1; + if (with_stf) { + if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) { + if (e->ti_next_stf < ti_output) { + ti_output = e->ti_next_stf; + type = output_stf; + } + } } - } + + } /* While loop over output types */ /* Restore the information we stored */ e->ti_current = ti_current; @@ -4104,9 +4113,12 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, parser_get_opt_param_int(params, "Snapshots:compression", 0); e->snapshot_int_time_label_on = parser_get_opt_param_int(params, "Snapshots:int_time_label_on", 0); + e->snapshot_invoke_stf = + parser_get_opt_param_int(params, "Snapshots:invoke_stf", 0); e->snapshot_units = (struct unit_system *)malloc(sizeof(struct unit_system)); units_init_default(e->snapshot_units, params, "Snapshots", internal_units); e->snapshot_output_count = 0; + e->stf_output_count = 0; e->dt_min = parser_get_param_double(params, "TimeIntegration:dt_min"); e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max"); e->dt_max_RMS_displacement = FLT_MAX; @@ -4133,7 +4145,6 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, e->star_formation = starform; e->chemistry = chemistry; e->parameter_file = params; - e->cell_loc = NULL; #ifdef WITH_MPI e->cputime_last_step = 0; e->last_repartition = 0; @@ -4174,28 +4185,16 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, /* Initialise VELOCIraptor output. */ if (e->policy & engine_policy_structure_finding) { parser_get_param_string(params, "StructureFinding:basename", - e->stfBaseName); + e->stf_base_name); + parser_get_param_string(params, "StructureFinding:config_file_name", + e->stf_config_file_name); + e->time_first_stf_output = parser_get_opt_param_double(params, "StructureFinding:time_first", 0.); e->a_first_stf_output = parser_get_opt_param_double( params, "StructureFinding:scale_factor_first", 0.1); - e->stf_output_freq_format = (enum io_stf_output_format)parser_get_param_int( - params, "StructureFinding:output_time_format"); - - if (e->stf_output_freq_format == io_stf_steps) { - e->delta_step_stf = - parser_get_param_int(params, "StructureFinding:delta_step"); - } else if (e->stf_output_freq_format == io_stf_time) { - e->delta_time_stf = - parser_get_param_double(params, "StructureFinding:delta_time"); - } else { - error( - "Invalid flag (%d) set for output time format of structure finding.", - e->stf_output_freq_format); - } - - /* overwrite input if outputlist */ - if (e->output_list_stf) e->stf_output_freq_format = io_stf_time; + e->delta_time_stf = + parser_get_opt_param_double(params, "StructureFinding:delta_time", -1.); } engine_init_output_lists(e, params); @@ -4441,10 +4440,11 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, fprintf(e->file_timesteps, "# Step Properties: Rebuild=%d, Redistribute=%d, Repartition=%d, " - "Statistics=%d, Snapshot=%d, Restarts=%d\n", + "Statistics=%d, Snapshot=%d, Restarts=%d STF=%d, logger=%d\n", engine_step_prop_rebuild, engine_step_prop_redistribute, engine_step_prop_repartition, engine_step_prop_statistics, - engine_step_prop_snapshot, engine_step_prop_restarts); + engine_step_prop_snapshot, engine_step_prop_restarts, + engine_step_prop_stf, engine_step_prop_logger_index); fprintf(e->file_timesteps, "# %6s %14s %12s %12s %14s %9s %12s %12s %12s %16s [%s] %6s\n", @@ -4537,17 +4537,18 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, "simulation start a=%e.", e->a_first_statistics, e->cosmology->a_begin); - if ((e->policy & engine_policy_structure_finding) && - (e->stf_output_freq_format == io_stf_time)) { + if (e->policy & engine_policy_structure_finding) { + + if (e->delta_time_stf == -1. && !e->snapshot_invoke_stf) + error("A value for `StructureFinding:delta_time` must be specified"); - if (e->delta_time_stf <= 1.) + if (e->delta_time_stf <= 1. && e->delta_time_stf != -1.) error("Time between STF (%e) must be > 1.", e->delta_time_stf); if (e->a_first_stf_output < e->cosmology->a_begin) error( "Scale-factor of first stf output (%e) must be after the " - "simulation " - "start a=%e.", + "simulation start a=%e.", e->a_first_stf_output, e->cosmology->a_begin); } } else { @@ -4573,10 +4574,12 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, "t=%e.", e->time_first_statistics, e->time_begin); - if ((e->policy & engine_policy_structure_finding) && - (e->stf_output_freq_format == io_stf_time)) { + if (e->policy & engine_policy_structure_finding) { - if (e->delta_time_stf <= 0.) + if (e->delta_time_stf == -1. && !e->snapshot_invoke_stf) + error("A value for `StructureFinding:delta_time` must be specified"); + + if (e->delta_time_stf <= 0. && e->delta_time_stf != -1.) error("Time between STF (%e) must be positive.", e->delta_time_stf); if (e->time_first_stf_output < e->time_begin) @@ -4585,12 +4588,6 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, } } - if (e->policy & engine_policy_structure_finding) { - /* Find the time of the first stf output */ - if (e->stf_output_freq_format == io_stf_time) - engine_compute_next_stf_time(e); - } - /* Get the total mass */ e->total_mass = 0.; for (size_t i = 0; i < e->s->nr_gparts; ++i) @@ -4615,6 +4612,19 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, /* Find the time of the first statistics output */ engine_compute_next_statistics_time(e); + /* Find the time of the first stf output */ + if (e->policy & engine_policy_structure_finding) { + engine_compute_next_stf_time(e); + } + + /* Check that we are invoking VELOCIraptor only if we have it */ + if (e->snapshot_invoke_stf && + !(e->policy & engine_policy_structure_finding)) { + error( + "Invoking VELOCIraptor after snapshots but structure finding wasn't " + "activated at runtime (Use --velociraptor)."); + } + /* Whether restarts are enabled. Yes by default. Can be changed on restart. */ e->restart_dump = parser_get_opt_param_int(params, "Restarts:enable", 1); @@ -4678,6 +4688,10 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, else maxtasks = engine_estimate_nr_tasks(e); + /* Estimated number of links per tasks */ + e->links_per_tasks = + parser_get_opt_param_int(params, "Scheduler:links_per_tasks", 10); + /* Init the scheduler. */ scheduler_init(&e->sched, e->s, maxtasks, nr_queues, (e->policy & scheduler_flag_steal), e->nodeID, &e->threadpool); @@ -5200,7 +5214,6 @@ void engine_clean(struct engine *e) { output_list_clean(&e->output_list_stf); free(e->links); - free(e->cell_loc); #if defined(WITH_LOGGER) logger_clean(e->logger); free(e->logger); diff --git a/src/engine.h b/src/engine.h index f585b8fd74960048c41c60abfe687973ff1eaedb..b5f0799f037b07995cfa96dc6510d5a5957ab0f4 100644 --- a/src/engine.h +++ b/src/engine.h @@ -74,9 +74,10 @@ enum engine_policy { engine_policy_stars = (1 << 15), engine_policy_structure_finding = (1 << 16), engine_policy_star_formation = (1 << 17), - engine_policy_feedback = (1 << 18) + engine_policy_feedback = (1 << 18), + engine_policy_limiter = (1 << 19) }; -#define engine_maxpolicy 19 +#define engine_maxpolicy 20 extern const char *engine_policy_names[engine_maxpolicy + 1]; /** @@ -90,7 +91,8 @@ enum engine_step_properties { engine_step_prop_statistics = (1 << 3), engine_step_prop_snapshot = (1 << 4), engine_step_prop_restarts = (1 << 5), - engine_step_prop_logger_index = (1 << 6) + engine_step_prop_stf = (1 << 6), + engine_step_prop_logger_index = (1 << 7) }; /* Some constants */ @@ -99,6 +101,8 @@ enum engine_step_properties { #define engine_parts_size_grow 1.05 #define engine_max_proxy_centre_frac 0.2 #define engine_redistribute_alloc_margin 1.2 +#define engine_rebuild_link_alloc_margin 1.2 +#define engine_foreign_alloc_margin 1.05 #define engine_default_energy_file_name "energy" #define engine_default_timesteps_file_name "timesteps" #define engine_max_parts_per_ghost 1000 @@ -222,9 +226,6 @@ struct engine { /* The internal system of units */ const struct unit_system *internal_units; - /* Top-level cell locations for VELOCIraptor. */ - struct cell_loc *cell_loc; - /* Snapshot information */ double a_first_snapshot; double time_first_snapshot; @@ -239,12 +240,11 @@ struct engine { char snapshot_base_name[PARSER_MAX_LINE_SIZE]; int snapshot_compression; int snapshot_int_time_label_on; + int snapshot_invoke_stf; struct unit_system *snapshot_units; int snapshot_output_count; /* Structure finding information */ - enum io_stf_output_format stf_output_freq_format; - int delta_step_stf; double a_first_stf_output; double time_first_stf_output; double delta_time_stf; @@ -255,7 +255,9 @@ struct engine { /* Integer time of the next stf output */ integertime_t ti_next_stf; - char stfBaseName[PARSER_MAX_LINE_SIZE]; + char stf_config_file_name[PARSER_MAX_LINE_SIZE]; + char stf_base_name[PARSER_MAX_LINE_SIZE]; + int stf_output_count; /* Statistics information */ double a_first_statistics; @@ -329,6 +331,10 @@ struct engine { * of the various task arrays. */ size_t tasks_per_cell; + /* Average number of links per tasks. This number is used before + the creation of communication tasks so needs to be large enough. */ + size_t links_per_tasks; + /* Are we talkative ? */ int verbose; @@ -405,6 +411,7 @@ void engine_unskip(struct engine *e); void engine_drift_all(struct engine *e, const int drift_mpoles); void engine_drift_top_multipoles(struct engine *e); void engine_reconstruct_multipoles(struct engine *e); +void engine_allocate_foreign_particles(struct engine *e); void engine_print_stats(struct engine *e); void engine_check_for_dumps(struct engine *e); void engine_dump_snapshot(struct engine *e); @@ -447,7 +454,7 @@ int engine_is_done(struct engine *e); void engine_pin(void); void engine_unpin(void); void engine_clean(struct engine *e); -int engine_estimate_nr_tasks(struct engine *e); +int engine_estimate_nr_tasks(const struct engine *e); /* Function prototypes, engine_maketasks.c. */ void engine_maketasks(struct engine *e); diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index 8e23b4932d8b519b4292c76b4ab3eca5b321b5b8..2175595cd149d9d5da5a3aa5f5341ff181fd58d6 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -210,9 +210,13 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, * @param ci The sending #cell. * @param cj Dummy cell containing the nodeID of the receiving node. * @param t_ti The send_ti #task, if it has already been created. + * @param t_limiter The send_limiter #task, if already created. + * @param with_limiter Are we running with the time-step limiter? */ void engine_addtasks_send_timestep(struct engine *e, struct cell *ci, - struct cell *cj, struct task *t_ti) { + struct cell *cj, struct task *t_ti, + struct task *t_limiter, + const int with_limiter) { #ifdef WITH_MPI struct link *l = NULL; @@ -244,19 +248,31 @@ void engine_addtasks_send_timestep(struct engine *e, struct cell *ci, t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, ci->mpi.tag, 0, ci, cj); + if (with_limiter) + t_limiter = scheduler_addtask(s, task_type_send, task_subtype_limiter, + ci->mpi.tag, 0, ci, cj); + /* The super-cell's timestep task should unlock the send_ti task. */ scheduler_addunlock(s, ci->super->timestep, t_ti); + if (with_limiter) scheduler_addunlock(s, t_limiter, ci->super->timestep); + if (with_limiter) + scheduler_addunlock(s, t_limiter, ci->super->timestep_limiter); + if (with_limiter) scheduler_addunlock(s, ci->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(s, ci->super->timestep_limiter, t_ti); } /* Add them to the local cell. */ engine_addlink(e, &ci->mpi.send_ti, t_ti); + if (with_limiter) engine_addlink(e, &ci->mpi.limiter.send, t_limiter); } /* Recurse? */ if (ci->split) for (int k = 0; k < 8; k++) if (ci->progeny[k] != NULL) - engine_addtasks_send_timestep(e, ci->progeny[k], cj, t_ti); + engine_addtasks_send_timestep(e, ci->progeny[k], cj, t_ti, t_limiter, + with_limiter); #else error("SWIFT was not compiled with MPI support."); @@ -380,9 +396,12 @@ void engine_addtasks_recv_gravity(struct engine *e, struct cell *c, * @param e The #engine. * @param c The foreign #cell. * @param t_ti The recv_ti #task, if already been created. + * @param t_limiter The recv_limiter #task, if already created. + * @param with_limiter Are we running with the time-step limiter? */ void engine_addtasks_recv_timestep(struct engine *e, struct cell *c, - struct task *t_ti) { + struct task *t_ti, struct task *t_limiter, + const int with_limiter) { #ifdef WITH_MPI struct scheduler *s = &e->sched; @@ -397,21 +416,42 @@ void engine_addtasks_recv_timestep(struct engine *e, struct cell *c, t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, c->mpi.tag, 0, c, NULL); + + if (with_limiter) + t_limiter = scheduler_addtask(s, task_type_recv, task_subtype_limiter, + c->mpi.tag, 0, c, NULL); } c->mpi.recv_ti = t_ti; - for (struct link *l = c->grav.grav; l != NULL; l = l->next) + for (struct link *l = c->grav.grav; l != NULL; l = l->next) { scheduler_addunlock(s, l->t, t_ti); + } - for (struct link *l = c->hydro.force; l != NULL; l = l->next) - scheduler_addunlock(s, l->t, t_ti); + if (with_limiter) { + + for (struct link *l = c->hydro.force; l != NULL; l = l->next) { + scheduler_addunlock(s, l->t, t_limiter); + } + + for (struct link *l = c->hydro.limiter; l != NULL; l = l->next) { + scheduler_addunlock(s, t_limiter, l->t); + scheduler_addunlock(s, l->t, t_ti); + } + + } else { + + for (struct link *l = c->hydro.force; l != NULL; l = l->next) { + scheduler_addunlock(s, l->t, t_ti); + } + } /* Recurse? */ if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_addtasks_recv_timestep(e, c->progeny[k], t_ti); + engine_addtasks_recv_timestep(e, c->progeny[k], t_ti, t_limiter, + with_limiter); #else error("SWIFT was not compiled with MPI support."); @@ -435,6 +475,7 @@ void engine_make_hierarchical_tasks_common(struct engine *e, struct cell *c) { struct scheduler *s = &e->sched; const int is_with_cooling = (e->policy & engine_policy_cooling); const int is_with_star_formation = (e->policy & engine_policy_star_formation); + const int with_limiter = (e->policy & engine_policy_limiter); /* Are we in a super-cell ? */ if (c->super == c) { @@ -489,6 +530,16 @@ void engine_make_hierarchical_tasks_common(struct engine *e, struct cell *c) { scheduler_addunlock(s, c->timestep, c->kick1); + /* Time-step limiting */ + if (with_limiter) { + c->timestep_limiter = scheduler_addtask( + s, task_type_timestep_limiter, task_subtype_none, 0, 0, c, NULL); + + /* Make sure it is not run before kick2 */ + scheduler_addunlock(s, c->timestep, c->timestep_limiter); + scheduler_addunlock(s, c->timestep_limiter, c->kick1); + } + #if defined(WITH_LOGGER) scheduler_addunlock(s, c->kick1, c->logger); #endif @@ -1281,7 +1332,8 @@ void engine_link_gravity_tasks(struct engine *e) { */ static inline void engine_make_hydro_loops_dependencies( struct scheduler *sched, struct task *density, struct task *gradient, - struct task *force, struct cell *c, int with_cooling) { + struct task *force, struct task *limiter, struct cell *c, int with_cooling, + int with_limiter) { /* density loop --> ghost --> gradient loop --> extra_ghost */ /* extra_ghost --> force loop */ @@ -1299,14 +1351,15 @@ static inline void engine_make_hydro_loops_dependencies( * @param sched The #scheduler. * @param density The density task to link. * @param force The force task to link. + * @param limiter The limiter task to link. * @param c The cell. - * @param with_cooling Are we running with cooling switched on ? + * @param with_cooling Are we running with cooling switched on? + * @param with_limiter Are we running with limiter switched on? */ -static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, - struct task *density, - struct task *force, - struct cell *c, - int with_cooling) { +static inline void engine_make_hydro_loops_dependencies( + struct scheduler *sched, struct task *density, struct task *force, + struct task *limiter, struct cell *c, int with_cooling, int with_limiter) { + /* density loop --> ghost --> force loop */ scheduler_addunlock(sched, density, c->hydro.super->hydro.ghost_in); scheduler_addunlock(sched, c->hydro.super->hydro.ghost_out, force); @@ -1347,6 +1400,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; const int with_cooling = (e->policy & engine_policy_cooling); + const int with_limiter = (e->policy & engine_policy_limiter); +#ifdef EXTRA_HYDRO_LOOP + struct task *t_gradient = NULL; +#endif + struct task *t_force = NULL; + struct task *t_limiter = NULL; for (int ind = 0; ind < num_elements; ind++) { struct task *t = &((struct task *)map_data)[ind]; @@ -1364,31 +1423,53 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop. */ - struct task *t2 = scheduler_addtask( - sched, task_type_self, task_subtype_gradient, 0, 0, t->ci, NULL); - struct task *t3 = scheduler_addtask( - sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL); + t_gradient = scheduler_addtask(sched, task_type_self, + task_subtype_gradient, 0, 0, t->ci, NULL); + t_force = scheduler_addtask(sched, task_type_self, task_subtype_force, 0, + 0, t->ci, NULL); + + /* and the task for the time-step limiter */ + if (with_limiter) + t_limiter = scheduler_addtask(sched, task_type_self, + task_subtype_limiter, 0, 0, t->ci, NULL); /* Add the link between the new loops and the cell */ - engine_addlink(e, &t->ci->hydro.gradient, t2); - engine_addlink(e, &t->ci->hydro.force, t3); + engine_addlink(e, &t->ci->hydro.gradient, t_gradient); + engine_addlink(e, &t->ci->hydro.force, t_force); + if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter); /* Now, build all the dependencies for the hydro */ - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, - with_cooling); - scheduler_addunlock(sched, t3, t->ci->super->end_force); + engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force, + t_limiter, t->ci, with_cooling, + with_limiter); + scheduler_addunlock(sched, t_force, t->ci->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->ci->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep); #else /* Start by constructing the task for the second hydro loop */ - struct task *t2 = scheduler_addtask( - sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL); + t_force = scheduler_addtask(sched, task_type_self, task_subtype_force, 0, + 0, t->ci, NULL); + + /* and the task for the time-step limiter */ + if (with_limiter) + t_limiter = scheduler_addtask(sched, task_type_self, + task_subtype_limiter, 0, 0, t->ci, NULL); /* Add the link between the new loop and the cell */ - engine_addlink(e, &t->ci->hydro.force, t2); + engine_addlink(e, &t->ci->hydro.force, t_force); + if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter); /* Now, build all the dependencies for the hydro */ - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); - scheduler_addunlock(sched, t2, t->ci->super->end_force); + engine_make_hydro_loops_dependencies(sched, t, t_force, t_limiter, t->ci, + with_cooling, with_limiter); + scheduler_addunlock(sched, t_force, t->ci->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->ci->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep); #endif } @@ -1407,54 +1488,103 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ - struct task *t2 = scheduler_addtask( - sched, task_type_pair, task_subtype_gradient, 0, 0, t->ci, t->cj); - struct task *t3 = scheduler_addtask( - sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj); + t_gradient = scheduler_addtask(sched, task_type_pair, + task_subtype_gradient, 0, 0, t->ci, t->cj); + t_force = scheduler_addtask(sched, task_type_pair, task_subtype_force, 0, + 0, t->ci, t->cj); + + /* and the task for the time-step limiter */ + if (with_limiter) + t_limiter = scheduler_addtask(sched, task_type_pair, + task_subtype_limiter, 0, 0, t->ci, t->cj); /* Add the link between the new loop and both cells */ - engine_addlink(e, &t->ci->hydro.gradient, t2); - engine_addlink(e, &t->cj->hydro.gradient, t2); - engine_addlink(e, &t->ci->hydro.force, t3); - engine_addlink(e, &t->cj->hydro.force, t3); + engine_addlink(e, &t->ci->hydro.gradient, t_gradient); + engine_addlink(e, &t->cj->hydro.gradient, t_gradient); + engine_addlink(e, &t->ci->hydro.force, t_force); + engine_addlink(e, &t->cj->hydro.force, t_force); + if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter); + if (with_limiter) engine_addlink(e, &t->cj->hydro.limiter, t_limiter); /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, - with_cooling); - scheduler_addunlock(sched, t3, t->ci->super->end_force); + engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force, + t_limiter, t->ci, with_cooling, + with_limiter); + scheduler_addunlock(sched, t_force, t->ci->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->ci->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter); } if (t->cj->nodeID == nodeID) { - if (t->ci->hydro.super != t->cj->hydro.super) - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj, - with_cooling); - if (t->ci->super != t->cj->super) - scheduler_addunlock(sched, t3, t->cj->super->end_force); + if (t->ci->hydro.super != t->cj->hydro.super) { + engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force, + t_limiter, t->cj, with_cooling, + with_limiter); + } + + if (t->ci->super != t->cj->super) { + scheduler_addunlock(sched, t_force, t->cj->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->cj->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->cj->super->timestep); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, + t->cj->super->timestep_limiter); + } } #else /* Start by constructing the task for the second hydro loop */ - struct task *t2 = scheduler_addtask( - sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj); + t_force = scheduler_addtask(sched, task_type_pair, task_subtype_force, 0, + 0, t->ci, t->cj); + + /* and the task for the time-step limiter */ + if (with_limiter) + t_limiter = scheduler_addtask(sched, task_type_pair, + task_subtype_limiter, 0, 0, t->ci, t->cj); /* Add the link between the new loop and both cells */ - engine_addlink(e, &t->ci->hydro.force, t2); - engine_addlink(e, &t->cj->hydro.force, t2); + engine_addlink(e, &t->ci->hydro.force, t_force); + engine_addlink(e, &t->cj->hydro.force, t_force); + if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter); + if (with_limiter) engine_addlink(e, &t->cj->hydro.limiter, t_limiter); /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); - scheduler_addunlock(sched, t2, t->ci->super->end_force); + engine_make_hydro_loops_dependencies(sched, t, t_force, t_limiter, + t->ci, with_cooling, with_limiter); + scheduler_addunlock(sched, t_force, t->ci->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->ci->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter); } if (t->cj->nodeID == nodeID) { - if (t->ci->hydro.super != t->cj->hydro.super) - engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, - with_cooling); - if (t->ci->super != t->cj->super) - scheduler_addunlock(sched, t2, t->cj->super->end_force); + if (t->ci->hydro.super != t->cj->hydro.super) { + engine_make_hydro_loops_dependencies( + sched, t, t_force, t_limiter, t->cj, with_cooling, with_limiter); + } + + if (t->ci->super != t->cj->super) { + scheduler_addunlock(sched, t_force, t->cj->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->cj->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->cj->super->timestep); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, + t->cj->super->timestep_limiter); + } } #endif @@ -1472,39 +1602,65 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ - struct task *t2 = + t_gradient = scheduler_addtask(sched, task_type_sub_self, task_subtype_gradient, - t->flags, 0, t->ci, t->cj); - struct task *t3 = - scheduler_addtask(sched, task_type_sub_self, task_subtype_force, - t->flags, 0, t->ci, t->cj); + t->flags, 0, t->ci, NULL); + t_force = scheduler_addtask(sched, task_type_sub_self, task_subtype_force, + t->flags, 0, t->ci, NULL); + + /* and the task for the time-step limiter */ + if (with_limiter) + t_limiter = + scheduler_addtask(sched, task_type_sub_self, task_subtype_limiter, + t->flags, 0, t->ci, NULL); /* Add the link between the new loop and the cell */ - engine_addlink(e, &t->ci->hydro.gradient, t2); - engine_addlink(e, &t->ci->hydro.force, t3); + engine_addlink(e, &t->ci->hydro.gradient, t_gradient); + engine_addlink(e, &t->ci->hydro.force, t_force); + if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter); /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, - with_cooling); - scheduler_addunlock(sched, t3, t->ci->super->end_force); + engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force, + t_limiter, t->ci, with_cooling, + with_limiter); + scheduler_addunlock(sched, t_force, t->ci->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->ci->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter); } #else /* Start by constructing the task for the second hydro loop */ - struct task *t2 = - scheduler_addtask(sched, task_type_sub_self, task_subtype_force, - t->flags, 0, t->ci, t->cj); + t_force = scheduler_addtask(sched, task_type_sub_self, task_subtype_force, + t->flags, 0, t->ci, NULL); + + /* and the task for the time-step limiter */ + if (with_limiter) + t_limiter = + scheduler_addtask(sched, task_type_sub_self, task_subtype_limiter, + t->flags, 0, t->ci, NULL); /* Add the link between the new loop and the cell */ - engine_addlink(e, &t->ci->hydro.force, t2); + engine_addlink(e, &t->ci->hydro.force, t_force); + if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter); /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); - scheduler_addunlock(sched, t2, t->ci->super->end_force); + engine_make_hydro_loops_dependencies(sched, t, t_force, t_limiter, + t->ci, with_cooling, with_limiter); + scheduler_addunlock(sched, t_force, t->ci->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->ci->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter); } #endif } @@ -1526,56 +1682,106 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ - struct task *t2 = + t_gradient = scheduler_addtask(sched, task_type_sub_pair, task_subtype_gradient, t->flags, 0, t->ci, t->cj); - struct task *t3 = - scheduler_addtask(sched, task_type_sub_pair, task_subtype_force, - t->flags, 0, t->ci, t->cj); + t_force = scheduler_addtask(sched, task_type_sub_pair, task_subtype_force, + t->flags, 0, t->ci, t->cj); + + /* and the task for the time-step limiter */ + if (with_limiter) + t_limiter = + scheduler_addtask(sched, task_type_sub_pair, task_subtype_limiter, + t->flags, 0, t->ci, t->cj); /* Add the link between the new loop and both cells */ - engine_addlink(e, &t->ci->hydro.gradient, t2); - engine_addlink(e, &t->cj->hydro.gradient, t2); - engine_addlink(e, &t->ci->hydro.force, t3); - engine_addlink(e, &t->cj->hydro.force, t3); + engine_addlink(e, &t->ci->hydro.gradient, t_gradient); + engine_addlink(e, &t->cj->hydro.gradient, t_gradient); + engine_addlink(e, &t->ci->hydro.force, t_force); + engine_addlink(e, &t->cj->hydro.force, t_force); + if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter); + if (with_limiter) engine_addlink(e, &t->cj->hydro.limiter, t_limiter); /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, - with_cooling); - scheduler_addunlock(sched, t3, t->ci->super->end_force); + engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force, + t_limiter, t->ci, with_cooling, + with_limiter); + scheduler_addunlock(sched, t_force, t->ci->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->ci->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter); } if (t->cj->nodeID == nodeID) { - if (t->ci->hydro.super != t->cj->hydro.super) - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj, - with_cooling); - if (t->ci->super != t->cj->super) - scheduler_addunlock(sched, t3, t->cj->super->end_force); + if (t->ci->hydro.super != t->cj->hydro.super) { + engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force, + t_limiter, t->cj, with_cooling, + with_limiter); + } + + if (t->ci->super != t->cj->super) { + scheduler_addunlock(sched, t_force, t->cj->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->cj->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->cj->super->timestep); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, + t->cj->super->timestep_limiter); + } } #else /* Start by constructing the task for the second hydro loop */ - struct task *t2 = - scheduler_addtask(sched, task_type_sub_pair, task_subtype_force, - t->flags, 0, t->ci, t->cj); + t_force = scheduler_addtask(sched, task_type_sub_pair, task_subtype_force, + t->flags, 0, t->ci, t->cj); + + /* and the task for the time-step limiter */ + if (with_limiter) + t_limiter = + scheduler_addtask(sched, task_type_sub_pair, task_subtype_limiter, + t->flags, 0, t->ci, t->cj); /* Add the link between the new loop and both cells */ - engine_addlink(e, &t->ci->hydro.force, t2); - engine_addlink(e, &t->cj->hydro.force, t2); + engine_addlink(e, &t->ci->hydro.force, t_force); + engine_addlink(e, &t->cj->hydro.force, t_force); + if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter); + if (with_limiter) engine_addlink(e, &t->cj->hydro.limiter, t_limiter); /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); - scheduler_addunlock(sched, t2, t->ci->super->end_force); + engine_make_hydro_loops_dependencies(sched, t, t_force, t_limiter, + t->ci, with_cooling, with_limiter); + + scheduler_addunlock(sched, t_force, t->ci->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->ci->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter); } if (t->cj->nodeID == nodeID) { - if (t->ci->hydro.super != t->cj->hydro.super) - engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, - with_cooling); - if (t->ci->super != t->cj->super) - scheduler_addunlock(sched, t2, t->cj->super->end_force); + if (t->ci->hydro.super != t->cj->hydro.super) { + engine_make_hydro_loops_dependencies( + sched, t, t_force, t_limiter, t->cj, with_cooling, with_limiter); + } + + if (t->ci->super != t->cj->super) { + scheduler_addunlock(sched, t_force, t->cj->super->end_force); + if (with_limiter) + scheduler_addunlock(sched, t->cj->super->kick2, t_limiter); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, t->cj->super->timestep); + if (with_limiter) + scheduler_addunlock(sched, t_limiter, + t->cj->super->timestep_limiter); + } } #endif } @@ -1961,6 +2167,7 @@ struct cell_type_pair { void engine_addtasks_send_mapper(void *map_data, int num_elements, void *extra_data) { struct engine *e = (struct engine *)extra_data; + const int with_limiter = (e->policy & engine_policy_limiter); struct cell_type_pair *cell_type_pairs = (struct cell_type_pair *)map_data; for (int k = 0; k < num_elements; k++) { @@ -1969,7 +2176,7 @@ void engine_addtasks_send_mapper(void *map_data, int num_elements, const int type = cell_type_pairs[k].type; /* Add the send task for the particle timesteps. */ - engine_addtasks_send_timestep(e, ci, cj, NULL); + engine_addtasks_send_timestep(e, ci, cj, NULL, NULL, with_limiter); /* Add the send tasks for the cells in the proxy that have a hydro * connection. */ @@ -1988,6 +2195,7 @@ void engine_addtasks_send_mapper(void *map_data, int num_elements, void engine_addtasks_recv_mapper(void *map_data, int num_elements, void *extra_data) { struct engine *e = (struct engine *)extra_data; + const int with_limiter = (e->policy & engine_policy_limiter); struct cell_type_pair *cell_type_pairs = (struct cell_type_pair *)map_data; for (int k = 0; k < num_elements; k++) { @@ -1995,7 +2203,7 @@ void engine_addtasks_recv_mapper(void *map_data, int num_elements, const int type = cell_type_pairs[k].type; /* Add the recv task for the particle timesteps. */ - engine_addtasks_recv_timestep(e, ci, NULL); + engine_addtasks_recv_timestep(e, ci, NULL, NULL, with_limiter); /* Add the recv tasks for the cells in the proxy that have a hydro * connection. */ @@ -2068,39 +2276,6 @@ void engine_maketasks(struct engine *e) { if (e->sched.nr_tasks == 0 && (s->nr_gparts > 0 || s->nr_parts > 0)) error("We have particles but no hydro or gravity tasks were created."); - /* Free the old list of cell-task links. */ - if (e->links != NULL) free(e->links); - e->size_links = 0; - -/* The maximum number of links is the - * number of cells (s->tot_cells) times the number of neighbours (26) times - * the number of interaction types, so 26 * 2 (density, force) pairs - * and 2 (density, force) self. - */ -#ifdef EXTRA_HYDRO_LOOP - const size_t hydro_tasks_per_cell = 27 * 3; -#else - const size_t hydro_tasks_per_cell = 27 * 2; -#endif - const size_t self_grav_tasks_per_cell = 125; - const size_t ext_grav_tasks_per_cell = 1; - const size_t stars_tasks_per_cell = 27; - - if (e->policy & engine_policy_hydro) - e->size_links += s->tot_cells * hydro_tasks_per_cell; - if (e->policy & engine_policy_external_gravity) - e->size_links += s->tot_cells * ext_grav_tasks_per_cell; - if (e->policy & engine_policy_self_gravity) - e->size_links += s->tot_cells * self_grav_tasks_per_cell; - if (e->policy & engine_policy_stars) - e->size_links += s->tot_cells * stars_tasks_per_cell; - - /* Allocate the new link list */ - if ((e->links = (struct link *)malloc(sizeof(struct link) * e->size_links)) == - NULL) - error("Failed to allocate cell-task links."); - e->nr_links = 0; - tic2 = getticks(); /* Split the tasks. */ @@ -2118,6 +2293,20 @@ void engine_maketasks(struct engine *e) { } #endif + /* Free the old list of cell-task links. */ + if (e->links != NULL) free(e->links); + e->size_links = e->sched.nr_tasks * e->links_per_tasks; + + /* Make sure that we have space for more links than last time. */ + if (e->size_links < e->nr_links * engine_rebuild_link_alloc_margin) + e->size_links = e->nr_links * engine_rebuild_link_alloc_margin; + + /* Allocate the new link list */ + if ((e->links = (struct link *)malloc(sizeof(struct link) * e->size_links)) == + NULL) + error("Failed to allocate cell-task links."); + e->nr_links = 0; + tic2 = getticks(); /* Count the number of tasks associated with each cell and @@ -2272,8 +2461,27 @@ void engine_maketasks(struct engine *e) { message("Creating recv tasks took %.3f %s.", clocks_from_ticks(getticks() - tic2), clocks_getunit()); } + + /* Allocate memory for foreign particles */ + engine_allocate_foreign_particles(e); + #endif + /* Report the number of tasks we actually used */ + if (e->verbose) + message( + "Nr. of tasks: %d allocated tasks: %d ratio: %f memory use: %zd MB.", + e->sched.nr_tasks, e->sched.size, + (float)e->sched.nr_tasks / (float)e->sched.size, + e->sched.size * sizeof(struct task) / (1024 * 1024)); + + /* Report the number of links we actually used */ + if (e->verbose) + message( + "Nr. of links: %zd allocated links: %zd ratio: %f memory use: %zd MB.", + e->nr_links, e->size_links, (float)e->nr_links / (float)e->size_links, + e->size_links * sizeof(struct link) / (1024 * 1024)); + tic2 = getticks(); /* Set the unlocks per task. */ diff --git a/src/engine_marktasks.c b/src/engine_marktasks.c index 9c7a783c2547899816842cf9a05163e75d329aa8..3a26dbb2f47f9503aa0b93fa28d679f5eebaeede 100644 --- a/src/engine_marktasks.c +++ b/src/engine_marktasks.c @@ -69,6 +69,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[2]); struct engine *e = (struct engine *)((size_t *)extra_data)[0]; const int nodeID = e->nodeID; + const int with_limiter = e->policy & engine_policy_limiter; for (int ind = 0; ind < num_elements; ind++) { @@ -90,6 +91,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (cell_is_active_hydro(ci, e)) { scheduler_activate(s, t); cell_activate_drift_part(ci, s); + if (with_limiter) cell_activate_limiter(ci, s); } } @@ -99,6 +101,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (cell_is_active_hydro(ci, e)) { scheduler_activate(s, t); cell_activate_subcell_hydro_tasks(ci, NULL, s); + if (with_limiter) cell_activate_limiter(ci, s); } } @@ -111,6 +114,16 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); } + else if (t->type == task_type_self && + t->subtype == task_subtype_limiter) { + if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); + } + + else if (t->type == task_type_sub_self && + t->subtype == task_subtype_limiter) { + if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); + } + #ifdef EXTRA_HYDRO_LOOP else if (t_type == task_type_self && t_subtype == task_subtype_gradient) { if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); @@ -207,6 +220,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Only activate tasks that involve a local active cell. */ if ((t_subtype == task_subtype_density || t_subtype == task_subtype_gradient || + t_subtype == task_subtype_limiter || t_subtype == task_subtype_force) && ((ci_active_hydro && ci_nodeID == nodeID) || (cj_active_hydro && cj_nodeID == nodeID))) { @@ -226,6 +240,10 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); + /* And the limiter */ + if (ci_nodeID == nodeID && with_limiter) cell_activate_limiter(ci, s); + if (cj_nodeID == nodeID && with_limiter) cell_activate_limiter(cj, s); + /* Check the sorts and activate them if needed. */ cell_activate_hydro_sorts(ci, t->flags, s); cell_activate_hydro_sorts(cj, t->flags, s); diff --git a/src/gravity_properties.c b/src/gravity_properties.c index fffbf22ec187f179f0e80b7121beaa3a96de0260..e548e3010f3b46065a2510723b5bde97121b4c02 100644 --- a/src/gravity_properties.c +++ b/src/gravity_properties.c @@ -170,20 +170,22 @@ void gravity_props_print_snapshot(hid_t h_grpgrav, io_write_attribute_s(h_grpgrav, "Softening style", kernel_gravity_softening_name); io_write_attribute_f( - h_grpgrav, "Comoving softening length", + h_grpgrav, "Comoving softening length [internal units]", p->epsilon_comoving * kernel_gravity_softening_plummer_equivalent); - io_write_attribute_f(h_grpgrav, - "Comoving Softening length (Plummer equivalent)", - p->epsilon_comoving); io_write_attribute_f( - h_grpgrav, "Maximal physical softening length", + h_grpgrav, + "Comoving Softening length (Plummer equivalent) [internal units]", + p->epsilon_comoving); + io_write_attribute_f( + h_grpgrav, "Maximal physical softening length [internal units]", p->epsilon_max_physical * kernel_gravity_softening_plummer_equivalent); io_write_attribute_f(h_grpgrav, - "Maximal physical softening length (Plummer equivalent)", + "Maximal physical softening length (Plummer equivalent) " + " [internal units]", p->epsilon_max_physical); io_write_attribute_f(h_grpgrav, "Opening angle", p->theta_crit); io_write_attribute_s(h_grpgrav, "Scheme", GRAVITY_IMPLEMENTATION); - io_write_attribute_d(h_grpgrav, "MM order", SELF_GRAVITY_MULTIPOLE_ORDER); + io_write_attribute_i(h_grpgrav, "MM order", SELF_GRAVITY_MULTIPOLE_ORDER); io_write_attribute_f(h_grpgrav, "Mesh a_smooth", p->a_smooth); io_write_attribute_f(h_grpgrav, "Mesh r_cut_max ratio", p->r_cut_max_ratio); io_write_attribute_f(h_grpgrav, "Mesh r_cut_min ratio", p->r_cut_min_ratio); diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h index b4dc25495ab5be3c2e9c5ba0153e748a344f050f..2b1d19bc916889a5cfdc40b1357f1e3dfe9388af 100644 --- a/src/hydro/Default/hydro.h +++ b/src/hydro/Default/hydro.h @@ -645,6 +645,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( struct part *restrict p, struct xpart *restrict xp) { p->time_bin = 0; + p->wakeup = time_bin_not_awake; xp->v_full[0] = p->v[0]; xp->v_full[1] = p->v[1]; xp->v_full[2] = p->v[2]; diff --git a/src/hydro/Default/hydro_debug.h b/src/hydro/Default/hydro_debug.h index 3be9c9e1760591423edbd218d19b46ddf9aad01e..68367beaee97c285057cb055c1fbdbba5c370085 100644 --- a/src/hydro/Default/hydro_debug.h +++ b/src/hydro/Default/hydro_debug.h @@ -25,10 +25,11 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "x=[%.3e,%.3e,%.3e], " "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " "h=%.3e, " - "wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, time_bin=%d\n", + "wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, time_bin=%d wakeup=%d\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], - p->h, (int)p->density.wcount, p->mass, p->rho_dh, p->rho, p->time_bin); + p->h, (int)p->density.wcount, p->mass, p->rho_dh, p->rho, p->time_bin, + p->wakeup); } #endif /* SWIFT_DEFAULT_HYDRO_DEBUG_H */ diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h index 72808874c3fc6b58005d0e3ad450eafea8aa4b4d..85c586a4e921e38296453b71a2a2b9637971c28c 100644 --- a/src/hydro/Default/hydro_iact.h +++ b/src/hydro/Default/hydro_iact.h @@ -378,4 +378,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = max(pi->force.v_sig, v_sig); } +/** + * @brief Timestep limiter loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Nothing to do here if both particles are active */ +} + +/** + * @brief Timestep limiter loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Wake up the neighbour? */ + if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) { + + pj->wakeup = time_bin_awake; + } +} + #endif /* SWIFT_DEFAULT_HYDRO_IACT_H */ diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h index 2a18e03cb533ca860f227a31152ef2058e0dd37d..7230826dc3c7c2a3486001ca9060dd07d55d0931 100644 --- a/src/hydro/Default/hydro_part.h +++ b/src/hydro/Default/hydro_part.h @@ -21,6 +21,7 @@ #include "chemistry_struct.h" #include "cooling_struct.h" +#include "tracers_struct.h" /* Extra particle data not needed during the SPH loops over neighbours. */ struct xpart { @@ -40,6 +41,9 @@ struct xpart { /* Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; + /* Additional data used by the tracers */ + struct tracers_xpart_data tracers_data; + float u_full; /* Old density. */ @@ -132,6 +136,9 @@ struct part { /* Particle time-bin */ timebin_t time_bin; + /* Need waking-up ? */ + char wakeup; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index 8cad1c62ad669b9c0dc5bbe333985c1e20b882af..4a1eaf729bd5de96b43ef4b749d40038fcf39406 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -765,6 +765,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( struct part *restrict p, struct xpart *restrict xp) { p->time_bin = 0; + p->wakeup = time_bin_not_awake; xp->v_full[0] = p->v[0]; xp->v_full[1] = p->v[1]; xp->v_full[2] = p->v[2]; diff --git a/src/hydro/Gadget2/hydro_debug.h b/src/hydro/Gadget2/hydro_debug.h index d0642a03a4c4eecb2da80fdae473948e460c5e31..aeb43ee5d68930debfa867dc856465ac9d22902a 100644 --- a/src/hydro/Gadget2/hydro_debug.h +++ b/src/hydro/Gadget2/hydro_debug.h @@ -27,14 +27,14 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "h=%.3e, wcount=%.3f, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, " "P=%.3e, P_over_rho2=%.3e, S=%.3e, dS/dt=%.3e, c=%.3e\n" "divV=%.3e, rotV=[%.3e,%.3e,%.3e], balsara=%.3e \n " - "v_sig=%e dh/dt=%.3e time_bin=%d\n", + "v_sig=%e dh/dt=%.3e time_bin=%d wakeup=%d\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->h, p->density.wcount, p->density.wcount_dh, p->mass, p->density.rho_dh, p->rho, hydro_get_comoving_pressure(p), p->force.P_over_rho2, p->entropy, p->entropy_dt, p->force.soundspeed, p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], p->density.rot_v[2], p->force.balsara, - p->force.v_sig, p->force.h_dt, p->time_bin); + p->force.v_sig, p->force.h_dt, p->time_bin, p->wakeup); } #endif /* SWIFT_GADGET2_HYDRO_DEBUG_H */ diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index a3c5e21dbdf8df60b25b01c0326c33c3a10d1bce..1ded85acfb7486b1286ddfbbfa698da0f4344e7d 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -293,7 +293,7 @@ runner_iact_nonsym_2_vec_density(float *R2, float *Dx, float *Dy, float *Dz, vector *wcountSum, vector *wcount_dhSum, vector *div_vSum, vector *curlvxSum, vector *curlvySum, vector *curlvzSum, - mask_t mask, mask_t mask2, short mask_cond) { + mask_t mask, mask_t mask2, int mask_cond) { vector r, ri, ui, wi, wi_dx; vector dvx, dvy, dvz; @@ -1051,4 +1051,34 @@ runner_iact_nonsym_2_vec_force( #endif +/** + * @brief Timestep limiter loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Nothing to do here if both particles are active */ +} + +/** + * @brief Timestep limiter loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Wake up the neighbour? */ + if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) { + + pj->wakeup = time_bin_awake; + + // MATTHIEU + // if (pj->wakeup == time_bin_not_awake) + // pj->wakeup = time_bin_awake; + // else if (pj->wakeup > 0) + // pj->wakeup = -pj->wakeup; + } +} + #endif /* SWIFT_GADGET2_HYDRO_IACT_H */ diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h index 369eff881d5464a3ececca60cfedf077411e6d3d..28dbb2d0f7bb5e79e96a1a3f7e06fdb4086c6c5e 100644 --- a/src/hydro/Gadget2/hydro_part.h +++ b/src/hydro/Gadget2/hydro_part.h @@ -154,6 +154,9 @@ struct part { /* Time-step length */ timebin_t time_bin; + /* Need waking-up ? */ + char wakeup; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ diff --git a/src/hydro/GizmoMFM/hydro.h b/src/hydro/GizmoMFM/hydro.h index b00a3578d02f492050c328af49a6108d566e9204..1ab1c1404f54450ddff8d95b51fdf3970daf7377 100644 --- a/src/hydro/GizmoMFM/hydro.h +++ b/src/hydro/GizmoMFM/hydro.h @@ -137,6 +137,9 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( p->conserved.momentum[2] * p->v[2]); #endif + p->time_bin = 0; + p->wakeup = time_bin_not_awake; + /* initialize the particle velocity based on the primitive fluid velocity */ xp->v_full[0] = p->v[0]; xp->v_full[1] = p->v[1]; diff --git a/src/hydro/GizmoMFM/hydro_debug.h b/src/hydro/GizmoMFM/hydro_debug.h index e8b0914bd3cf6a99210399c6fc654e526319009f..e3c9f793aec92c7bfa2527143e6ad771c3897a09 100644 --- a/src/hydro/GizmoMFM/hydro_debug.h +++ b/src/hydro/GizmoMFM/hydro_debug.h @@ -27,6 +27,7 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "a=[%.3e,%.3e,%.3e], " "h=%.3e, " "time_bin=%d, " + "wakeup=%d, " "rho=%.3e, " "P=%.3e, " "gradients={" @@ -51,7 +52,7 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "wcount_dh=%.3e, " "wcount=%.3e}\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a_hydro[0], - p->a_hydro[1], p->a_hydro[2], p->h, p->time_bin, p->rho, p->P, + p->a_hydro[1], p->a_hydro[2], p->h, p->time_bin, p->wakeup, p->rho, p->P, p->gradients.rho[0], p->gradients.rho[1], p->gradients.rho[2], p->gradients.v[0][0], p->gradients.v[0][1], p->gradients.v[0][2], p->gradients.v[1][0], p->gradients.v[1][1], p->gradients.v[1][2], diff --git a/src/hydro/GizmoMFM/hydro_iact.h b/src/hydro/GizmoMFM/hydro_iact.h index 38a97cbea39c1ed5c6926c911941e655e52362aa..09d4c7c70ee2bae8a31d10cb4a568c4627c7b3cd 100644 --- a/src/hydro/GizmoMFM/hydro_iact.h +++ b/src/hydro/GizmoMFM/hydro_iact.h @@ -486,4 +486,29 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0, a, H); } +/** + * @brief Timestep limiter loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Nothing to do here if both particles are active */ +} + +/** + * @brief Timestep limiter loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Wake up the neighbour? */ + if (pi->timestepvars.vmax > + const_limiter_max_v_sig_ratio * pj->timestepvars.vmax) { + + pj->wakeup = time_bin_awake; + } +} + #endif /* SWIFT_GIZMO_MFM_HYDRO_IACT_H */ diff --git a/src/hydro/GizmoMFM/hydro_part.h b/src/hydro/GizmoMFM/hydro_part.h index 0055d7d86a35746a8ba90015b3a6986f8ddb5f9f..a05cae18aaf18feb80f7a4ec383434eadece8a41 100644 --- a/src/hydro/GizmoMFM/hydro_part.h +++ b/src/hydro/GizmoMFM/hydro_part.h @@ -21,6 +21,7 @@ #include "chemistry_struct.h" #include "cooling_struct.h" +#include "tracers_struct.h" /* Extra particle data not needed during the computation. */ struct xpart { @@ -40,6 +41,9 @@ struct xpart { /* Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; + /* Additional data used by the tracers */ + struct tracers_xpart_data tracers_data; + } SWIFT_STRUCT_ALIGN; /* Data of a single particle. */ @@ -187,6 +191,9 @@ struct part { /* Time-step length */ timebin_t time_bin; + /* Need waking-up ? */ + char wakeup; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ diff --git a/src/hydro/GizmoMFV/hydro.h b/src/hydro/GizmoMFV/hydro.h index 284b67b3b62cd7c6b75de192b299c0c48d170a05..f4e2b829769a58a4896516907317d02c936f2d65 100644 --- a/src/hydro/GizmoMFV/hydro.h +++ b/src/hydro/GizmoMFV/hydro.h @@ -121,6 +121,9 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( const float mass = p->conserved.mass; + p->time_bin = 0; + p->wakeup = time_bin_not_awake; + p->primitives.v[0] = p->v[0]; p->primitives.v[1] = p->v[1]; p->primitives.v[2] = p->v[2]; diff --git a/src/hydro/GizmoMFV/hydro_debug.h b/src/hydro/GizmoMFV/hydro_debug.h index 8af3f824666529efad833c3bd520ace779718449..181bd6f82d547803c7303bd19be11cf66dc3a8a8 100644 --- a/src/hydro/GizmoMFV/hydro_debug.h +++ b/src/hydro/GizmoMFV/hydro_debug.h @@ -27,6 +27,7 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "a=[%.3e,%.3e,%.3e], " "h=%.3e, " "time_bin=%d, " + "wakeup=%d, " "primitives={" "v=[%.3e,%.3e,%.3e], " "rho=%.3e, " @@ -53,9 +54,9 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "wcount_dh=%.3e, " "wcount=%.3e}\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a_hydro[0], - p->a_hydro[1], p->a_hydro[2], p->h, p->time_bin, p->primitives.v[0], - p->primitives.v[1], p->primitives.v[2], p->primitives.rho, - p->primitives.P, p->primitives.gradients.rho[0], + p->a_hydro[1], p->a_hydro[2], p->h, p->time_bin, p->wakeup, + p->primitives.v[0], p->primitives.v[1], p->primitives.v[2], + p->primitives.rho, p->primitives.P, p->primitives.gradients.rho[0], p->primitives.gradients.rho[1], p->primitives.gradients.rho[2], p->primitives.gradients.v[0][0], p->primitives.gradients.v[0][1], p->primitives.gradients.v[0][2], p->primitives.gradients.v[1][0], diff --git a/src/hydro/GizmoMFV/hydro_iact.h b/src/hydro/GizmoMFV/hydro_iact.h index 2f73e67ea2fdcecc527de8b1af0d15731f967b9b..d882549f8c55018419a2e1730d2ac099bbe1f5ee 100644 --- a/src/hydro/GizmoMFV/hydro_iact.h +++ b/src/hydro/GizmoMFV/hydro_iact.h @@ -501,4 +501,29 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0, a, H); } +/** + * @brief Timestep limiter loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Nothing to do here if both particles are active */ +} + +/** + * @brief Timestep limiter loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Wake up the neighbour? */ + if (pi->timestepvars.vmax > + const_limiter_max_v_sig_ratio * pj->timestepvars.vmax) { + + pj->wakeup = time_bin_awake; + } +} + #endif /* SWIFT_GIZMO_MFV_HYDRO_IACT_H */ diff --git a/src/hydro/GizmoMFV/hydro_part.h b/src/hydro/GizmoMFV/hydro_part.h index 6248ddb11daf39a65be9a57fe51e40386ecda50b..8794b597712963e962cc23c796e9769efd4ea620 100644 --- a/src/hydro/GizmoMFV/hydro_part.h +++ b/src/hydro/GizmoMFV/hydro_part.h @@ -21,6 +21,7 @@ #include "chemistry_struct.h" #include "cooling_struct.h" +#include "tracers_struct.h" /* Extra particle data not needed during the computation. */ struct xpart { @@ -40,6 +41,9 @@ struct xpart { /* Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; + /* Additional data used by the tracers */ + struct tracers_xpart_data tracers_data; + } SWIFT_STRUCT_ALIGN; /* Data of a single particle. */ @@ -198,6 +202,9 @@ struct part { /* Time-step length */ timebin_t time_bin; + /* Need waking-up ? */ + char wakeup; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index d638c168f23c95dc3010f838846f4dfc0522bee5..524774435d03a6d808c4535a6c54b68ad16bcb66 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -740,6 +740,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( struct part *restrict p, struct xpart *restrict xp) { p->time_bin = 0; + p->wakeup = time_bin_not_awake; xp->v_full[0] = p->v[0]; xp->v_full[1] = p->v[1]; xp->v_full[2] = p->v[2]; diff --git a/src/hydro/Minimal/hydro_debug.h b/src/hydro/Minimal/hydro_debug.h index 73ffc26b8acf687a5445591ddccd72ea8e8fa8ae..3fadd05f9b93e53f1855c5daa7727d272ffe0fa5 100644 --- a/src/hydro/Minimal/hydro_debug.h +++ b/src/hydro/Minimal/hydro_debug.h @@ -41,12 +41,12 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "v_full=[%.3g, %.3g, %.3g], a=[%.3g, %.3g, %.3g], \n " "m=%.3g, u=%.3g, du/dt=%.3g, P=%.3g, c_s=%.3g, \n " "v_sig=%.3g, h=%.3g, dh/dt=%.3g, wcount=%.3g, rho=%.3g, \n " - "dh_drho=%.3g, time_bin=%d \n", + "dh_drho=%.3g, time_bin=%d wakeup=%d \n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->mass, p->u, p->u_dt, hydro_get_comoving_pressure(p), p->force.soundspeed, p->force.v_sig, p->h, p->force.h_dt, - p->density.wcount, p->rho, p->density.rho_dh, p->time_bin); + p->density.wcount, p->rho, p->density.rho_dh, p->time_bin, p->wakeup); } #endif /* SWIFT_MINIMAL_HYDRO_DEBUG_H */ diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index b29f44588c2e13bb5b7c5c9cd5297205557c3fc9..7fc7a3c67f6c832d70109319ad964e25df30ff4e 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -424,4 +424,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = max(pi->force.v_sig, v_sig); } +/** + * @brief Timestep limiter loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Nothing to do here if both particles are active */ +} + +/** + * @brief Timestep limiter loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Wake up the neighbour? */ + if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) { + + pj->wakeup = time_bin_awake; + } +} + #endif /* SWIFT_MINIMAL_HYDRO_IACT_H */ diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h index 1d14a94f2d91bf259df54c875a32bf3072ad33b6..80e472194e6a008859fa7e7fde9c79df6611142b 100644 --- a/src/hydro/Minimal/hydro_part.h +++ b/src/hydro/Minimal/hydro_part.h @@ -34,6 +34,7 @@ #include "chemistry_struct.h" #include "cooling_struct.h" +#include "tracers_struct.h" /** * @brief Particle fields not needed during the SPH loops over neighbours. @@ -62,6 +63,9 @@ struct xpart { /*! Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; + /* Additional data used by the tracers */ + struct tracers_xpart_data tracers_data; + } SWIFT_STRUCT_ALIGN; /** @@ -168,6 +172,9 @@ struct part { /*! Time-step length */ timebin_t time_bin; + /* Need waking-up ? */ + char wakeup; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ diff --git a/src/hydro/Planetary/hydro.h b/src/hydro/Planetary/hydro.h index 957e96dcf391b9027016926a969b28366590664f..ed7aa6b89d2b50ab2e00cedb0b3ef6779689feb1 100644 --- a/src/hydro/Planetary/hydro.h +++ b/src/hydro/Planetary/hydro.h @@ -735,6 +735,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( struct part *restrict p, struct xpart *restrict xp) { p->time_bin = 0; + p->wakeup = time_bin_not_awake; xp->v_full[0] = p->v[0]; xp->v_full[1] = p->v[1]; xp->v_full[2] = p->v[2]; diff --git a/src/hydro/Planetary/hydro_debug.h b/src/hydro/Planetary/hydro_debug.h index 74261f3b49e2881af1c403013005560efa53a7f1..306f7526404599a051f83dc1b61886ed2aa5b69e 100644 --- a/src/hydro/Planetary/hydro_debug.h +++ b/src/hydro/Planetary/hydro_debug.h @@ -42,12 +42,13 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "v_full=[%.3g, %.3g, %.3g], a=[%.3g, %.3g, %.3g], \n " "m=%.3g, u=%.3g, du/dt=%.3g, P=%.3g, c_s=%.3g, \n " "v_sig=%.3g, h=%.3g, dh/dt=%.3g, wcount=%.3g, rho=%.3g, \n " - "dh_drho=%.3g, time_bin=%d, mat_id=%d \n", + "dh_drho=%.3g, time_bin=%d, wakeup=%d mat_id=%d \n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->mass, p->u, p->u_dt, hydro_get_comoving_pressure(p), p->force.soundspeed, p->force.v_sig, p->h, p->force.h_dt, - p->density.wcount, p->rho, p->density.rho_dh, p->time_bin, p->mat_id); + p->density.wcount, p->rho, p->density.rho_dh, p->time_bin, p->wakeup, + p->mat_id); } #endif /* SWIFT_PLANETARY_HYDRO_DEBUG_H */ diff --git a/src/hydro/Planetary/hydro_iact.h b/src/hydro/Planetary/hydro_iact.h index 19ee002b85c1b0bc8ed621a029059cd02c5e670f..afebb6a406bd310f38d51dcb32fc25da6b2674b5 100644 --- a/src/hydro/Planetary/hydro_iact.h +++ b/src/hydro/Planetary/hydro_iact.h @@ -346,4 +346,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = max(pi->force.v_sig, v_sig); } +/** + * @brief Timestep limiter loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Nothing to do here if both particles are active */ +} + +/** + * @brief Timestep limiter loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Wake up the neighbour? */ + if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) { + + pj->wakeup = time_bin_awake; + } +} + #endif /* SWIFT_PLANETARY_HYDRO_IACT_H */ diff --git a/src/hydro/Planetary/hydro_part.h b/src/hydro/Planetary/hydro_part.h index 4087cef62e873231a556f82869a7f6d848c8d72c..1955366da7265c4c40922d1e7290bc9128641600 100644 --- a/src/hydro/Planetary/hydro_part.h +++ b/src/hydro/Planetary/hydro_part.h @@ -36,6 +36,7 @@ #include "chemistry_struct.h" #include "cooling_struct.h" #include "equation_of_state.h" // For enum material_id +#include "tracers_struct.h" /** * @brief Particle fields not needed during the SPH loops over neighbours. @@ -64,6 +65,9 @@ struct xpart { /*! Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; + /* Additional data used by the tracers */ + struct tracers_xpart_data tracers_data; + } SWIFT_STRUCT_ALIGN; /** @@ -173,6 +177,9 @@ struct part { /*! Time-step length */ timebin_t time_bin; + /* Need waking-up ? */ + char wakeup; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ diff --git a/src/hydro/PressureEnergy/hydro.h b/src/hydro/PressureEnergy/hydro.h index 8dd43cd72968f89cfc818342d618688f2f39cbd3..400a84915b700464b9b86f74400ba578b4efa446 100644 --- a/src/hydro/PressureEnergy/hydro.h +++ b/src/hydro/PressureEnergy/hydro.h @@ -763,6 +763,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( struct part *restrict p, struct xpart *restrict xp) { p->time_bin = 0; + p->wakeup = time_bin_not_awake; xp->v_full[0] = p->v[0]; xp->v_full[1] = p->v[1]; xp->v_full[2] = p->v[2]; @@ -802,4 +803,4 @@ hydro_set_init_internal_energy(struct part *p, float u_init) { __attribute__((always_inline)) INLINE static void hydro_remove_part( const struct part *p, const struct xpart *xp) {} -#endif /* SWIFT_MINIMAL_HYDRO_H */ +#endif /* SWIFT_PRESSURE_ENERGY_HYDRO_H */ diff --git a/src/hydro/PressureEnergy/hydro_debug.h b/src/hydro/PressureEnergy/hydro_debug.h index 6324167f12726e155eeaa3359be9741aca3a1e42..7ffc370ed4d6abd273fc3d8d5b887f5ccf8e001c 100644 --- a/src/hydro/PressureEnergy/hydro_debug.h +++ b/src/hydro/PressureEnergy/hydro_debug.h @@ -32,12 +32,12 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "u=%.3e, du/dt=%.3e v_sig=%.3e, P=%.3e\n" "h=%.3e, dh/dt=%.3e wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, \n" "p_dh=%.3e, p_bar=%.3e \n" - "time_bin=%d\n", + "time_bin=%d wakeup=%d\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->u, p->u_dt, p->force.v_sig, hydro_get_comoving_pressure(p), p->h, p->force.h_dt, (int)p->density.wcount, p->mass, p->density.rho_dh, p->rho, - p->density.pressure_bar_dh, p->pressure_bar, p->time_bin); + p->density.pressure_bar_dh, p->pressure_bar, p->time_bin, p->wakeup); } #endif /* SWIFT_MINIMAL_HYDRO_DEBUG_H */ diff --git a/src/hydro/PressureEnergy/hydro_iact.h b/src/hydro/PressureEnergy/hydro_iact.h index 4146e61a53dd7ece57e263cb90308e2579aa3930..ae154ea549a52cb24ed7c69453533b7d59b39a85 100644 --- a/src/hydro/PressureEnergy/hydro_iact.h +++ b/src/hydro/PressureEnergy/hydro_iact.h @@ -17,8 +17,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_MINIMAL_HYDRO_IACT_H -#define SWIFT_MINIMAL_HYDRO_IACT_H +#ifndef SWIFT_PRESSURE_ENERGY_HYDRO_IACT_H +#define SWIFT_PRESSURE_ENERGY_HYDRO_IACT_H /** * @file PressureEnergy/hydro_iact.h @@ -418,5 +418,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Update the signal velocity. */ pi->force.v_sig = max(pi->force.v_sig, v_sig); } +/** + * @brief Timestep limiter loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_limiter( + float r2, const float* dx, float hi, float hj, struct part* restrict pi, + struct part* restrict pj, float a, float H) { + + /* Nothing to do here if both particles are active */ +} + +/** + * @brief Timestep limiter loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( + float r2, const float* dx, float hi, float hj, struct part* restrict pi, + struct part* restrict pj, float a, float H) { + + /* Wake up the neighbour? */ + if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) { + + pj->wakeup = time_bin_awake; + } +} -#endif /* SWIFT_MINIMAL_HYDRO_IACT_H */ +#endif /* SWIFT_PRESSURE_ENERGY_HYDRO_IACT_H */ diff --git a/src/hydro/PressureEnergy/hydro_io.h b/src/hydro/PressureEnergy/hydro_io.h index 06762c6124c2c726c4e687980455ab956a5fa79e..701c12283bf77acef4af77598f57705a2b364fa1 100644 --- a/src/hydro/PressureEnergy/hydro_io.h +++ b/src/hydro/PressureEnergy/hydro_io.h @@ -17,8 +17,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_PRESSURE_ENERGY_HYDRO_IACT_H -#define SWIFT_PRESSURE_ENERGY_HYDRO_IACT_H +#ifndef SWIFT_PRESSURE_ENERGY_HYDRO_IO_H +#define SWIFT_PRESSURE_ENERGY_HYDRO_IO_H /** * @file PressureEnergy/hydro_io.h * @brief P-U implementation of SPH (i/o routines) diff --git a/src/hydro/PressureEnergy/hydro_part.h b/src/hydro/PressureEnergy/hydro_part.h index bc7d14b612556dc722ecca67dd6ce823192e00f0..218fbf5dc17559b07974b68e42f69f4e7a0e8e3b 100644 --- a/src/hydro/PressureEnergy/hydro_part.h +++ b/src/hydro/PressureEnergy/hydro_part.h @@ -33,6 +33,7 @@ #include "chemistry_struct.h" #include "cooling_struct.h" +#include "tracers_struct.h" /** * @brief Particle fields not needed during the SPH loops over neighbours. @@ -61,6 +62,9 @@ struct xpart { /*! Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; + /* Additional data used by the tracers */ + struct tracers_xpart_data tracers_data; + } SWIFT_STRUCT_ALIGN; /** @@ -168,6 +172,9 @@ struct part { /*! Time-step length */ timebin_t time_bin; + /* Need waking-up ? */ + char wakeup; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h index deb013579fd33340236d3dd5817021fd100c0fcb..7ef55b86c24972f8f287273441da99f26285c531 100644 --- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h @@ -50,22 +50,26 @@ #include <float.h> /** - * @brief Returns the comoving internal energy of a particle + * @brief Returns the comoving internal energy of a particle at the last + * time the particle was kicked. * * For implementations where the main thermodynamic variable * is not internal energy, this function computes the internal * energy from the thermodynamic variable. * * @param p The particle of interest + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float -hydro_get_comoving_internal_energy(const struct part *restrict p) { +hydro_get_comoving_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp) { - return p->u; + return xp->u_full; } /** - * @brief Returns the physical internal energy of a particle + * @brief Returns the physical internal energy of a particle at the last + * time the particle was kicked. * * For implementations where the main thermodynamic variable * is not internal energy, this function computes the internal @@ -73,13 +77,15 @@ hydro_get_comoving_internal_energy(const struct part *restrict p) { * physical coordinates. * * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp, const struct cosmology *cosmo) { - return p->u * cosmo->a_factor_internal_energy; + return xp->u_full * cosmo->a_factor_internal_energy; } /** @@ -734,6 +740,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( struct part *restrict p, struct xpart *restrict xp) { p->time_bin = 0; + p->wakeup = time_bin_not_awake; xp->v_full[0] = p->v[0]; xp->v_full[1] = p->v[1]; xp->v_full[2] = p->v[2]; diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h index ead5fcc0c842d8018f784a1084941bdb9ebcb6ca..d0cd5367f94cd90f36cc2b738a63c7963adbd445 100644 --- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h @@ -36,12 +36,13 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "u=%.3e, du/dt=%.3e v_sig=%.3e, P=%.3e\n" "h=%.3e, dh/dt=%.3e wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, \n" "p_dh=%.3e, p_bar=%.3e \n" - "time_bin=%d, alpha=%.3e\n", + "time_bin=%d, wakeup=%d alpha=%.3e\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->u, p->u_dt, p->force.v_sig, hydro_get_comoving_pressure(p), p->h, p->force.h_dt, (int)p->density.wcount, p->mass, p->density.rho_dh, p->rho, - p->density.pressure_bar_dh, p->pressure_bar, p->time_bin, p->alpha); + p->density.pressure_bar_dh, p->pressure_bar, p->time_bin, p->wakeup, + p->alpha); } #endif /* SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_DEBUG_H */ diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h index 747fca714ce20d9c2b018e14ac24a6492c51a75f..69da511c7544a71ef381a0889c8b56c80d5211f1 100644 --- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h @@ -424,4 +424,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = max(pi->force.v_sig, v_sig); } +/** + * @brief Timestep limiter loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_limiter( + float r2, const float* dx, float hi, float hj, struct part* restrict pi, + struct part* restrict pj, float a, float H) { + + /* Nothing to do here if both particles are active */ +} + +/** + * @brief Timestep limiter loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( + float r2, const float* dx, float hi, float hj, struct part* restrict pi, + struct part* restrict pj, float a, float H) { + + /* Wake up the neighbour? */ + if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) { + + pj->wakeup = time_bin_awake; + } +} + #endif /* SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_IACT_H */ diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h index 1600679bc2e840d0b3b958531c279f5f29293b48..71662f14c61c92d65bcf493b6f5a43b8172e3697 100644 --- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h @@ -69,12 +69,6 @@ INLINE static void hydro_read_particles(struct part* parts, UNIT_CONV_DENSITY, parts, rho); } -INLINE static void convert_u(const struct engine* e, const struct part* p, - const struct xpart* xp, float* ret) { - - ret[0] = hydro_get_comoving_internal_energy(p); -} - INLINE static void convert_S(const struct engine* e, const struct part* p, const struct xpart* xp, float* ret) { @@ -170,9 +164,8 @@ INLINE static void hydro_write_particles(const struct part* parts, io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, parts, mass); list[3] = io_make_output_field("SmoothingLength", FLOAT, 1, UNIT_CONV_LENGTH, parts, h); - list[4] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1, - UNIT_CONV_ENERGY_PER_UNIT_MASS, - parts, xparts, convert_u); + list[4] = io_make_output_field("InternalEnergy", FLOAT, 1, + UNIT_CONV_ENERGY_PER_UNIT_MASS, parts, u); list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, parts, id); list[6] = diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h index da6391236811e2a907281c3db05462bb57602fe0..d66249ea179a830cedbd3c3f165ca5012fd18862 100644 --- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h @@ -34,6 +34,7 @@ #include "chemistry_struct.h" #include "cooling_struct.h" +#include "tracers_struct.h" /** * @brief Particle fields not needed during the SPH loops over neighbours. @@ -62,6 +63,9 @@ struct xpart { /*! Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; + /* Additional data used by the tracers */ + struct tracers_xpart_data tracers_data; + } SWIFT_STRUCT_ALIGN; /** @@ -172,6 +176,9 @@ struct part { /*! Time-step length */ timebin_t time_bin; + /* Need waking-up ? */ + char wakeup; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ diff --git a/src/hydro/PressureEntropy/hydro.h b/src/hydro/PressureEntropy/hydro.h index 38e0f66fe7ecc1b6497717c9754bc36cd10a66f7..2e8d2d5db615f239bf5c3567e7beb155eab5cb38 100644 --- a/src/hydro/PressureEntropy/hydro.h +++ b/src/hydro/PressureEntropy/hydro.h @@ -730,6 +730,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( struct part *restrict p, struct xpart *restrict xp) { p->time_bin = 0; + p->wakeup = time_bin_not_awake; p->rho_bar = 0.f; p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy); xp->v_full[0] = p->v[0]; diff --git a/src/hydro/PressureEntropy/hydro_debug.h b/src/hydro/PressureEntropy/hydro_debug.h index 14d69bb650ff1bbd49394c0ca2f6256ad0cb188d..2163b70b94dde4e88f010d962358dccbde7960a3 100644 --- a/src/hydro/PressureEntropy/hydro_debug.h +++ b/src/hydro/PressureEntropy/hydro_debug.h @@ -36,14 +36,14 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " "h=%.3e, wcount=%.3f, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, " "rho_bar=%.3e, P=%.3e, dP_dh=%.3e, P_over_rho2=%.3e, S=%.3e, S^1/g=%.3e, " - "dS/dt=%.3e,\nc=%.3e v_sig=%e dh/dt=%.3e time_bin=%d\n", + "dS/dt=%.3e,\nc=%.3e v_sig=%e dh/dt=%.3e time_bin=%d wakeup=%d\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->h, p->density.wcount, p->density.wcount_dh, p->mass, p->density.rho_dh, p->rho, p->rho_bar, hydro_get_comoving_pressure(p), p->density.pressure_dh, p->force.P_over_rho2, p->entropy, p->entropy_one_over_gamma, p->entropy_dt, p->force.soundspeed, - p->force.v_sig, p->force.h_dt, p->time_bin); + p->force.v_sig, p->force.h_dt, p->time_bin, p->wakeup); } #endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_DEBUG_H */ diff --git a/src/hydro/PressureEntropy/hydro_iact.h b/src/hydro/PressureEntropy/hydro_iact.h index a018b39a99be5ed691485d93bd8dfd1735378bda..19279adec1f37117cf985e63a18a681ceee4f973 100644 --- a/src/hydro/PressureEntropy/hydro_iact.h +++ b/src/hydro/PressureEntropy/hydro_iact.h @@ -402,4 +402,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->entropy_dt += mj * visc_term * r_inv * dvdr; } +/** + * @brief Timestep limiter loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Nothing to do here if both particles are active */ +} + +/** + * @brief Timestep limiter loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Wake up the neighbour? */ + if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) { + + pj->wakeup = time_bin_awake; + } +} + #endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_IACT_H */ diff --git a/src/hydro/PressureEntropy/hydro_part.h b/src/hydro/PressureEntropy/hydro_part.h index fb8424d66196b7013866acef6bec6ec9889a3353..a404a897b06ddc0777a493e2ecfd28b68e15defe 100644 --- a/src/hydro/PressureEntropy/hydro_part.h +++ b/src/hydro/PressureEntropy/hydro_part.h @@ -32,6 +32,7 @@ #include "chemistry_struct.h" #include "cooling_struct.h" +#include "tracers_struct.h" /* Extra particle data not needed during the SPH loops over neighbours. */ struct xpart { @@ -54,6 +55,9 @@ struct xpart { /*! Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; + /* Additional data used by the tracers */ + struct tracers_xpart_data tracers_data; + } SWIFT_STRUCT_ALIGN; /* Data of a single particle. */ @@ -148,6 +152,9 @@ struct part { /* Time-step length */ timebin_t time_bin; + /* Need waking-up ? */ + char wakeup; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ diff --git a/src/hydro/Shadowswift/hydro.h b/src/hydro/Shadowswift/hydro.h index 446219104dffb2939877ae2a7c782e66af153213..b0f3207dfce69ca79899b1134740d035d47251d1 100644 --- a/src/hydro/Shadowswift/hydro.h +++ b/src/hydro/Shadowswift/hydro.h @@ -103,6 +103,9 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( const float mass = p->conserved.mass; + p->time_bin = 0; + p->wakeup = time_bin_not_awake; + p->primitives.v[0] = p->v[0]; p->primitives.v[1] = p->v[1]; p->primitives.v[2] = p->v[2]; diff --git a/src/hydro/Shadowswift/hydro_debug.h b/src/hydro/Shadowswift/hydro_debug.h index 7cd7f89c8112ebcf1930c5ca52cb389139191975..8ff85d62fc7d58d53220b1f77a7afb44c00c33b0 100644 --- a/src/hydro/Shadowswift/hydro_debug.h +++ b/src/hydro/Shadowswift/hydro_debug.h @@ -23,6 +23,8 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "x=[%.16e,%.16e,%.16e], " "v=[%.3e,%.3e,%.3e], " "a=[%.3e,%.3e,%.3e], " + "time_bin=%d, " + "wakeup=%d, " "h=%.3e, " "primitives={" "v=[%.3e,%.3e,%.3e], " @@ -47,9 +49,9 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "wcount_dh=%.3e, " "wcount=%.3e}", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a_hydro[0], - p->a_hydro[1], p->a_hydro[2], p->h, p->primitives.v[0], - p->primitives.v[1], p->primitives.v[2], p->primitives.rho, - p->primitives.P, p->primitives.gradients.rho[0], + p->a_hydro[1], p->a_hydro[2], p->time_bin, p->wakeup, p->h, + p->primitives.v[0], p->primitives.v[1], p->primitives.v[2], + p->primitives.rho, p->primitives.P, p->primitives.gradients.rho[0], p->primitives.gradients.rho[1], p->primitives.gradients.rho[2], p->primitives.gradients.v[0][0], p->primitives.gradients.v[0][1], p->primitives.gradients.v[0][2], p->primitives.gradients.v[1][0], diff --git a/src/hydro/Shadowswift/hydro_iact.h b/src/hydro/Shadowswift/hydro_iact.h index eda8e3759d9e08dac8073ebed9fb36dd0c5b99f6..791e4c7924df9806fa9150d03c08a543771a7049 100644 --- a/src/hydro/Shadowswift/hydro_iact.h +++ b/src/hydro/Shadowswift/hydro_iact.h @@ -342,3 +342,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0, a, H); } + +/** + * @brief Timestep limiter loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Nothing to do here if both particles are active */ +} + +/** + * @brief Timestep limiter loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Wake up the neighbour? */ + if (pi->timestepvars.vmax > + const_limiter_max_v_sig_ratio * pj->timestepvars.vmax) { + + pj->wakeup = time_bin_awake; + } +} diff --git a/src/hydro/Shadowswift/hydro_part.h b/src/hydro/Shadowswift/hydro_part.h index a7cc9daf0839216f098ac05c2267adc60ea11fb0..91ffaa85e5e6e80e7db577ce09363265f73e7f4c 100644 --- a/src/hydro/Shadowswift/hydro_part.h +++ b/src/hydro/Shadowswift/hydro_part.h @@ -21,6 +21,7 @@ #include "chemistry_struct.h" #include "cooling_struct.h" +#include "tracers_struct.h" #include "voronoi_cell.h" /* Extra particle data not needed during the computation. */ @@ -41,6 +42,9 @@ struct xpart { /* Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; + /* Additional data used by the tracers */ + struct tracers_xpart_data tracers_data; + } SWIFT_STRUCT_ALIGN; /* Data of a single particle. */ @@ -179,6 +183,9 @@ struct part { /* Time-step length */ timebin_t time_bin; + /* Need waking-up ? */ + char wakeup; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ diff --git a/src/hydro_properties.c b/src/hydro_properties.c index 85f88d418bd46354f7a1cd3dd89b0e77b556b7d9..167c13a30c22b01c20e355d1b8c60903ca026ad8 100644 --- a/src/hydro_properties.c +++ b/src/hydro_properties.c @@ -90,6 +90,9 @@ void hydro_props_init(struct hydro_props *p, p->max_smoothing_iterations = parser_get_opt_param_int( params, "SPH:max_ghost_iterations", hydro_props_default_max_iterations); + if (p->max_smoothing_iterations <= 10) + error("The number of smoothing length iterations should be > 10"); + /* Time integration properties */ p->CFL_condition = parser_get_param_float(params, "SPH:CFL_condition"); const float max_volume_change = parser_get_opt_param_float( @@ -239,7 +242,8 @@ void hydro_props_print_snapshot(hid_t h_grpsph, const struct hydro_props *p) { io_write_attribute_f(h_grpsph, "Kernel delta N_ngb", p->delta_neighbours); io_write_attribute_f(h_grpsph, "Kernel eta", p->eta_neighbours); io_write_attribute_f(h_grpsph, "Smoothing length tolerance", p->h_tolerance); - io_write_attribute_f(h_grpsph, "Maximal smoothing length", p->h_max); + io_write_attribute_f(h_grpsph, "Maximal smoothing length [internal units]", + p->h_max); io_write_attribute_f(h_grpsph, "CFL parameter", p->CFL_condition); io_write_attribute_f(h_grpsph, "Volume log(max(delta h))", p->log_max_h_change); @@ -248,8 +252,12 @@ void hydro_props_print_snapshot(hid_t h_grpsph, const struct hydro_props *p) { io_write_attribute_i(h_grpsph, "Max ghost iterations", p->max_smoothing_iterations); io_write_attribute_f(h_grpsph, "Minimal temperature", p->minimal_temperature); + io_write_attribute_f(h_grpsph, + "Minimal energy per unit mass [internal units]", + p->minimal_internal_energy); io_write_attribute_f(h_grpsph, "Initial temperature", p->initial_temperature); - io_write_attribute_f(h_grpsph, "Initial energy per unit mass", + io_write_attribute_f(h_grpsph, + "Initial energy per unit mass [internal units]", p->initial_internal_energy); io_write_attribute_f(h_grpsph, "Hydrogen mass fraction", p->hydrogen_mass_fraction); @@ -260,8 +268,11 @@ void hydro_props_print_snapshot(hid_t h_grpsph, const struct hydro_props *p) { p->viscosity.alpha_max); io_write_attribute_f(h_grpsph, "Alpha viscosity (min)", p->viscosity.alpha_min); - io_write_attribute_f(h_grpsph, "Viscosity decay length", p->viscosity.length); + io_write_attribute_f(h_grpsph, "Viscosity decay length [internal units]", + p->viscosity.length); io_write_attribute_f(h_grpsph, "Beta viscosity", const_viscosity_beta); + io_write_attribute_f(h_grpsph, "Max v_sig ratio (limiter)", + const_limiter_max_v_sig_ratio); } #endif diff --git a/src/io_properties.h b/src/io_properties.h index 9e948fc3991b0178d06fdd5d83fa900a98f84d2a..c45edb2641e374e2cfaec6c3251aff7d18f361d6 100644 --- a/src/io_properties.h +++ b/src/io_properties.h @@ -43,14 +43,23 @@ typedef void (*conversion_func_part_float)(const struct engine*, typedef void (*conversion_func_part_double)(const struct engine*, const struct part*, const struct xpart*, double*); +typedef void (*conversion_func_part_long_long)(const struct engine*, + const struct part*, + const struct xpart*, long long*); typedef void (*conversion_func_gpart_float)(const struct engine*, const struct gpart*, float*); typedef void (*conversion_func_gpart_double)(const struct engine*, const struct gpart*, double*); +typedef void (*conversion_func_gpart_long_long)(const struct engine*, + const struct gpart*, + long long*); typedef void (*conversion_func_spart_float)(const struct engine*, const struct spart*, float*); typedef void (*conversion_func_spart_double)(const struct engine*, const struct spart*, double*); +typedef void (*conversion_func_spart_long_long)(const struct engine*, + const struct spart*, + long long*); /** * @brief The properties of a given dataset for i/o @@ -79,6 +88,7 @@ struct io_props { char* start_temp_c; float* start_temp_f; double* start_temp_d; + long long* start_temp_l; /* Pointer to the engine */ const struct engine* e; @@ -98,14 +108,17 @@ struct io_props { /* Conversion function for part */ conversion_func_part_float convert_part_f; conversion_func_part_double convert_part_d; + conversion_func_part_long_long convert_part_l; /* Conversion function for gpart */ conversion_func_gpart_float convert_gpart_f; conversion_func_gpart_double convert_gpart_d; + conversion_func_gpart_long_long convert_gpart_l; /* Conversion function for spart */ conversion_func_spart_float convert_spart_f; conversion_func_spart_double convert_spart_d; + conversion_func_spart_long_long convert_spart_l; }; /** @@ -147,10 +160,13 @@ INLINE static struct io_props io_make_input_field_( r.conversion = 0; r.convert_part_f = NULL; r.convert_part_d = NULL; + r.convert_part_l = NULL; r.convert_gpart_f = NULL; r.convert_gpart_d = NULL; + r.convert_gpart_l = NULL; r.convert_spart_f = NULL; r.convert_spart_d = NULL; + r.convert_spart_l = NULL; return r; } @@ -191,10 +207,13 @@ INLINE static struct io_props io_make_output_field_( r.conversion = 0; r.convert_part_f = NULL; r.convert_part_d = NULL; + r.convert_part_l = NULL; r.convert_gpart_f = NULL; r.convert_gpart_d = NULL; + r.convert_gpart_l = NULL; r.convert_spart_f = NULL; r.convert_spart_d = NULL; + r.convert_spart_l = NULL; return r; } @@ -242,10 +261,13 @@ INLINE static struct io_props io_make_output_field_convert_part_FLOAT( r.conversion = 1; r.convert_part_f = functionPtr; r.convert_part_d = NULL; + r.convert_part_l = NULL; r.convert_gpart_f = NULL; r.convert_gpart_d = NULL; + r.convert_gpart_l = NULL; r.convert_spart_f = NULL; r.convert_spart_d = NULL; + r.convert_spart_l = NULL; return r; } @@ -285,10 +307,59 @@ INLINE static struct io_props io_make_output_field_convert_part_DOUBLE( r.conversion = 1; r.convert_part_f = NULL; r.convert_part_d = functionPtr; + r.convert_part_l = NULL; r.convert_gpart_f = NULL; r.convert_gpart_d = NULL; + r.convert_gpart_l = NULL; r.convert_spart_f = NULL; r.convert_spart_d = NULL; + r.convert_spart_l = NULL; + + return r; +} + +/** + * @brief Construct an #io_props from its parameters + * + * @param name Name of the field to read + * @param type The type of the data + * @param dimension Dataset dimension (1D, 3D, ...) + * @param units The units of the dataset + * @param partSize The size in byte of the particle + * @param parts The particle array + * @param xparts The xparticle array + * @param functionPtr The function used to convert a particle to a double + * + * Do not call this function directly. Use the macro defined above. + */ +INLINE static struct io_props io_make_output_field_convert_part_LONGLONG( + const char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, + enum unit_conversion_factor units, size_t partSize, + const struct part* parts, const struct xpart* xparts, + conversion_func_part_long_long functionPtr) { + + struct io_props r; + strcpy(r.name, name); + r.type = type; + r.dimension = dimension; + r.importance = UNUSED; + r.units = units; + r.field = NULL; + r.partSize = partSize; + r.parts = parts; + r.xparts = xparts; + r.gparts = NULL; + r.sparts = NULL; + r.conversion = 1; + r.convert_part_f = NULL; + r.convert_part_d = NULL; + r.convert_part_l = functionPtr; + r.convert_gpart_f = NULL; + r.convert_gpart_d = NULL; + r.convert_gpart_l = NULL; + r.convert_spart_f = NULL; + r.convert_spart_d = NULL; + r.convert_spart_l = NULL; return r; } @@ -334,10 +405,13 @@ INLINE static struct io_props io_make_output_field_convert_gpart_FLOAT( r.conversion = 1; r.convert_part_f = NULL; r.convert_part_d = NULL; + r.convert_part_l = NULL; r.convert_gpart_f = functionPtr; r.convert_gpart_d = NULL; + r.convert_gpart_l = NULL; r.convert_spart_f = NULL; r.convert_spart_d = NULL; + r.convert_spart_l = NULL; return r; } @@ -375,10 +449,57 @@ INLINE static struct io_props io_make_output_field_convert_gpart_DOUBLE( r.conversion = 1; r.convert_part_f = NULL; r.convert_part_d = NULL; + r.convert_part_l = NULL; r.convert_gpart_f = NULL; r.convert_gpart_d = functionPtr; + r.convert_gpart_l = NULL; r.convert_spart_f = NULL; r.convert_spart_d = NULL; + r.convert_spart_l = NULL; + + return r; +} + +/** + * @brief Construct an #io_props from its parameters + * + * @param name Name of the field to read + * @param type The type of the data + * @param dimension Dataset dimension (1D, 3D, ...) + * @param units The units of the dataset + * @param gpartSize The size in byte of the particle + * @param gparts The particle array + * @param functionPtr The function used to convert a g-particle to a double + * + * Do not call this function directly. Use the macro defined above. + */ +INLINE static struct io_props io_make_output_field_convert_gpart_LONGLONG( + const char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, + enum unit_conversion_factor units, size_t gpartSize, + const struct gpart* gparts, conversion_func_gpart_long_long functionPtr) { + + struct io_props r; + strcpy(r.name, name); + r.type = type; + r.dimension = dimension; + r.importance = UNUSED; + r.units = units; + r.field = NULL; + r.partSize = gpartSize; + r.parts = NULL; + r.xparts = NULL; + r.gparts = gparts; + r.sparts = NULL; + r.conversion = 1; + r.convert_part_f = NULL; + r.convert_part_d = NULL; + r.convert_part_l = NULL; + r.convert_gpart_f = NULL; + r.convert_gpart_d = NULL; + r.convert_gpart_l = functionPtr; + r.convert_spart_f = NULL; + r.convert_spart_d = NULL; + r.convert_spart_l = NULL; return r; } @@ -424,10 +545,13 @@ INLINE static struct io_props io_make_output_field_convert_spart_FLOAT( r.conversion = 1; r.convert_part_f = NULL; r.convert_part_d = NULL; + r.convert_part_l = NULL; r.convert_gpart_f = NULL; r.convert_gpart_d = NULL; + r.convert_gpart_l = NULL; r.convert_spart_f = functionPtr; r.convert_spart_d = NULL; + r.convert_spart_l = NULL; return r; } @@ -465,10 +589,57 @@ INLINE static struct io_props io_make_output_field_convert_spart_DOUBLE( r.conversion = 1; r.convert_part_f = NULL; r.convert_part_d = NULL; + r.convert_part_l = NULL; r.convert_gpart_f = NULL; r.convert_gpart_d = NULL; + r.convert_gpart_l = NULL; r.convert_spart_f = NULL; r.convert_spart_d = functionPtr; + r.convert_spart_l = NULL; + + return r; +} + +/** + * @brief Construct an #io_props from its parameters + * + * @param name Name of the field to read + * @param type The type of the data + * @param dimension Dataset dimension (1D, 3D, ...) + * @param units The units of the dataset + * @param spartSize The size in byte of the particle + * @param sparts The particle array + * @param functionPtr The function used to convert a s-particle to a double + * + * Do not call this function directly. Use the macro defined above. + */ +INLINE static struct io_props io_make_output_field_convert_spart_LONGLONG( + const char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, + enum unit_conversion_factor units, size_t spartSize, + const struct spart* sparts, conversion_func_spart_long_long functionPtr) { + + struct io_props r; + strcpy(r.name, name); + r.type = type; + r.dimension = dimension; + r.importance = UNUSED; + r.units = units; + r.field = NULL; + r.partSize = spartSize; + r.parts = NULL; + r.xparts = NULL; + r.gparts = NULL; + r.sparts = sparts; + r.conversion = 1; + r.convert_part_f = NULL; + r.convert_part_d = NULL; + r.convert_part_l = NULL; + r.convert_gpart_f = NULL; + r.convert_gpart_d = NULL; + r.convert_gpart_l = NULL; + r.convert_spart_f = NULL; + r.convert_spart_d = NULL; + r.convert_spart_l = functionPtr; return r; } diff --git a/src/kick.h b/src/kick.h index 4cd0cee56750c96ce1f1d2be66148d68e69f055e..b33ee059f56a1e979834cb4bc784d55de2130fbe 100644 --- a/src/kick.h +++ b/src/kick.h @@ -86,8 +86,8 @@ __attribute__((always_inline)) INLINE static void kick_part( if (p->ti_kick != ti_start) error( "particle has not been kicked to the current time p->ti_kick=%lld, " - "ti_start=%lld, ti_end=%lld id=%lld", - p->ti_kick, ti_start, ti_end, p->id); + "ti_start=%lld, ti_end=%lld id=%lld time_bin=%d wakeup=%d", + p->ti_kick, ti_start, ti_end, p->id, p->time_bin, p->wakeup); p->ti_kick = ti_end; #endif diff --git a/src/memswap.h b/src/memswap.h index 2f7b9215ed48535fab9e8331303457c2f92859cd..91d83d231692a2b8f540c3e6b9334bc89e1ee87b 100644 --- a/src/memswap.h +++ b/src/memswap.h @@ -1,7 +1,7 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk) - * + * 2018 STFC (author email aidan.chalk@stfc.ac.uk) * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or @@ -20,6 +20,7 @@ #define SWIFT_MEMSWAP_H /* Config parameters. */ +#include <stdint.h> #include "../config.h" #ifdef HAVE_IMMINTRIN_H @@ -33,7 +34,7 @@ #endif /* Macro for in-place swap of two values a and b of type t. a and b are - assumed to be of type char* so that the pointer arithmetic works. */ + assumed to be of type uint8_t* so that the pointer arithmetic works. */ #define swap_loop(type, a, b, count) \ while (count >= sizeof(type)) { \ register type temp = *(type *)a; \ @@ -60,9 +61,10 @@ * @param void_b Pointer to the second element. * @param bytes Size, in bytes, of the data pointed to by @c a and @c b. */ -__attribute__((always_inline)) inline void memswap(void *void_a, void *void_b, +__attribute__((always_inline)) inline void memswap(void *restrict void_a, + void *restrict void_b, size_t bytes) { - char *a = (char *)void_a, *b = (char *)void_b; + int8_t *restrict a = (int8_t *)void_a, *restrict b = (int8_t *)void_b; #if defined(__AVX512F__) && defined(__INTEL_COMPILER) swap_loop(__m512i, a, b, bytes); #endif @@ -75,10 +77,10 @@ __attribute__((always_inline)) inline void memswap(void *void_a, void *void_b, #ifdef __ALTIVEC__ swap_loop(vector int, a, b, bytes); #endif - swap_loop(size_t, a, b, bytes); - swap_loop(int, a, b, bytes); - swap_loop(short, a, b, bytes); - swap_loop(char, a, b, bytes); + swap_loop(int_least64_t, a, b, bytes); + swap_loop(int_least32_t, a, b, bytes); + swap_loop(int_least16_t, a, b, bytes); + swap_loop(int_least8_t, a, b, bytes); } /** @@ -93,10 +95,9 @@ __attribute__((always_inline)) inline void memswap(void *void_a, void *void_b, * @param void_b Pointer to the second element. * @param bytes Size, in bytes, of the data pointed to by @c a and @c b. */ -__attribute__((always_inline)) inline void memswap_unaligned(void *void_a, - void *void_b, - size_t bytes) { - char *a = (char *)void_a, *b = (char *)void_b; +__attribute__((always_inline)) inline void memswap_unaligned( + void *restrict void_a, void *restrict void_b, size_t bytes) { + int8_t *restrict a = (int8_t *)void_a, *restrict b = (int8_t *)void_b; #ifdef __AVX512F__ while (bytes >= sizeof(__m512i)) { register __m512i temp; @@ -134,10 +135,10 @@ __attribute__((always_inline)) inline void memswap_unaligned(void *void_a, // Power8 supports unaligned load/stores, but not sure what it will do here. swap_loop(vector int, a, b, bytes); #endif - swap_loop(size_t, a, b, bytes); - swap_loop(int, a, b, bytes); - swap_loop(short, a, b, bytes); - swap_loop(char, a, b, bytes); + swap_loop(int_least64_t, a, b, bytes); + swap_loop(int_least32_t, a, b, bytes); + swap_loop(int_least16_t, a, b, bytes); + swap_loop(int_least8_t, a, b, bytes); } #endif /* SWIFT_MEMSWAP_H */ diff --git a/src/parallel_io.c b/src/parallel_io.c index c826d13646c4196f40a77401cfe44f38c10e377b..e06ffaddc7c8a5b225fda6dd81af756b0ab76189 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -56,6 +56,7 @@ #include "stars_io.h" #include "tracers_io.h" #include "units.h" +#include "velociraptor_io.h" #include "xmf.h" /* The current limit of ROMIO (the underlying MPI-IO layer) is 2GB */ @@ -957,9 +958,16 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], const struct spart* sparts = e->s->sparts; struct swift_params* params = e->parameter_file; const int with_cosmology = e->policy & engine_policy_cosmology; + const int with_cooling = e->policy & engine_policy_cooling; + const int with_temperature = e->policy & engine_policy_temperature; +#ifdef HAVE_VELOCIRAPTOR + const int with_stf = (e->policy & engine_policy_structure_finding) && + (e->s->gpart_group_data != NULL); +#else + const int with_stf = 0; +#endif FILE* xmfFile = 0; - int periodic = e->s->periodic; int numFiles = 1; /* First time, we need to create the XMF file */ @@ -985,28 +993,26 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], * specific output */ xmf_write_outputheader(xmfFile, fileName, e->time); - /* Open header to write simulation properties */ - /* message("Writing runtime parameters..."); */ - hid_t h_grp = - H5Gcreate(h_file, "/RuntimePars", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (h_grp < 0) error("Error while creating runtime parameters group\n"); - - /* Write the relevant information */ - io_write_attribute(h_grp, "PeriodicBoundariesOn", INT, &periodic, 1); - - /* Close runtime parameters */ - H5Gclose(h_grp); - /* Open header to write simulation properties */ /* message("Writing file header..."); */ - h_grp = H5Gcreate(h_file, "/Header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hid_t h_grp = + H5Gcreate(h_file, "/Header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (h_grp < 0) error("Error while creating file header\n"); + /* Convert basic output information to snapshot units */ + const double factor_time = + units_conversion_factor(internal_units, snapshot_units, UNIT_CONV_TIME); + const double factor_length = + units_conversion_factor(internal_units, snapshot_units, UNIT_CONV_LENGTH); + const double dblTime = e->time * factor_time; + const double dim[3] = {e->s->dim[0] * factor_length, + e->s->dim[1] * factor_length, + e->s->dim[2] * factor_length}; + /* Print the relevant information and print status */ - io_write_attribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3); - double dblTime = e->time; + io_write_attribute(h_grp, "BoxSize", DOUBLE, dim, 3); io_write_attribute(h_grp, "Time", DOUBLE, &dblTime, 1); - int dimension = (int)hydro_dimension; + const int dimension = (int)hydro_dimension; io_write_attribute(h_grp, "Dimension", INT, &dimension, 1); io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1); io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1); @@ -1143,17 +1149,26 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], case swift_type_gas: hydro_write_particles(parts, xparts, list, &num_fields); num_fields += chemistry_write_particles(parts, list + num_fields); - num_fields += cooling_write_particles(parts, xparts, list + num_fields, - e->cooling_func); + if (with_cooling || with_temperature) { + num_fields += cooling_write_particles( + parts, xparts, list + num_fields, e->cooling_func); + } num_fields += tracers_write_particles(parts, xparts, list + num_fields, with_cosmology); - num_fields += sftracers_write_particles( + num_fields += sftracers_write_particles( parts, xparts, list + num_fields, with_cosmology); - + if (with_stf) { + num_fields += + velociraptor_write_parts(parts, xparts, list + num_fields); + } break; case swift_type_dark_matter: darkmatter_write_particles(gparts, list, &num_fields); + if (with_stf) { + num_fields += velociraptor_write_gparts(e->s->gpart_group_data, + list + num_fields); + } break; case swift_type_stars: @@ -1161,6 +1176,9 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], num_fields += chemistry_write_sparticles(sparts, list + num_fields); num_fields += tracers_write_sparticles(sparts, list + num_fields, with_cosmology); + if (with_stf) { + num_fields += velociraptor_write_sparts(sparts, list + num_fields); + } break; default: @@ -1228,6 +1246,14 @@ void write_output_parallel(struct engine* e, const char* baseName, const struct spart* sparts = e->s->sparts; struct swift_params* params = e->parameter_file; const int with_cosmology = e->policy & engine_policy_cosmology; + const int with_cooling = e->policy & engine_policy_cooling; + const int with_temperature = e->policy & engine_policy_temperature; +#ifdef HAVE_VELOCIRAPTOR + const int with_stf = (e->policy & engine_policy_structure_finding) && + (e->s->gpart_group_data != NULL); +#else + const int with_stf = 0; +#endif /* Number of particles currently in the arrays */ const size_t Ntot = e->s->nr_gparts; @@ -1290,6 +1316,32 @@ void write_output_parallel(struct engine* e, const char* baseName, snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName, e->snapshot_output_count); + /* Now write the top-level cell structure */ + hid_t h_file_cells = 0, h_grp_cells = 0; + if (mpi_rank == 0) { + + /* Open the snapshot on rank 0 */ + h_file_cells = H5Fopen(fileName, H5F_ACC_RDWR, H5P_DEFAULT); + if (h_file_cells < 0) + error("Error while opening file '%s' on rank %d.", fileName, mpi_rank); + + /* Create the group we want in the file */ + h_grp_cells = H5Gcreate(h_file_cells, "/Cells", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp_cells < 0) error("Error while creating cells group"); + } + + /* Write the location of the particles in the arrays */ + io_write_cell_offsets(h_grp_cells, e->s->cdim, e->s->cells_top, + e->s->nr_cells, e->s->width, mpi_rank, N_total, offset, + internal_units, snapshot_units); + + /* Close everything */ + if (mpi_rank == 0) { + H5Gclose(h_grp_cells); + H5Fclose(h_file_cells); + } + /* Prepare some file-access properties */ hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); @@ -1403,6 +1455,7 @@ void write_output_parallel(struct engine* e, const char* baseName, struct part* parts_written = NULL; struct xpart* xparts_written = NULL; struct gpart* gparts_written = NULL; + struct velociraptor_gpart_data* gpart_group_data_written = NULL; struct spart* sparts_written = NULL; /* Write particle fields from the particle structure */ @@ -1415,8 +1468,14 @@ void write_output_parallel(struct engine* e, const char* baseName, Nparticles = Ngas; hydro_write_particles(parts, xparts, list, &num_fields); num_fields += chemistry_write_particles(parts, list + num_fields); - num_fields += cooling_write_particles( - parts, xparts, list + num_fields, e->cooling_func); + if (with_cooling || with_temperature) { + num_fields += cooling_write_particles( + parts, xparts, list + num_fields, e->cooling_func); + } + if (with_stf) { + num_fields += + velociraptor_write_parts(parts, xparts, list + num_fields); + } num_fields += tracers_write_particles( parts, xparts, list + num_fields, with_cosmology); num_fields += sftracers_write_particles( @@ -1444,9 +1503,15 @@ void write_output_parallel(struct engine* e, const char* baseName, &num_fields); num_fields += chemistry_write_particles(parts_written, list + num_fields); - num_fields += - cooling_write_particles(parts_written, xparts_written, - list + num_fields, e->cooling_func); + if (with_cooling || with_temperature) { + num_fields += + cooling_write_particles(parts_written, xparts_written, + list + num_fields, e->cooling_func); + } + if (with_stf) { + num_fields += velociraptor_write_parts( + parts_written, xparts_written, list + num_fields); + } num_fields += tracers_write_particles( parts_written, xparts_written, list + num_fields, with_cosmology); num_fields += sftracers_write_particles( @@ -1460,6 +1525,10 @@ void write_output_parallel(struct engine* e, const char* baseName, /* This is a DM-only run without inhibited particles */ Nparticles = Ntot; darkmatter_write_particles(gparts, list, &num_fields); + if (with_stf) { + num_fields += velociraptor_write_gparts(e->s->gpart_group_data, + list + num_fields); + } } else { /* Ok, we need to fish out the particles we want */ @@ -1470,11 +1539,28 @@ void write_output_parallel(struct engine* e, const char* baseName, Ndm_written * sizeof(struct gpart)) != 0) error("Error while allocating temporart memory for gparts"); + if (with_stf) { + if (posix_memalign( + (void**)&gpart_group_data_written, gpart_align, + Ndm_written * sizeof(struct velociraptor_gpart_data)) != 0) + error( + "Error while allocating temporart memory for gparts STF " + "data"); + } + /* Collect the non-inhibited DM particles from gpart */ - io_collect_gparts_to_write(gparts, gparts_written, Ntot, Ndm_written); + io_collect_gparts_to_write(gparts, e->s->gpart_group_data, + gparts_written, gpart_group_data_written, + Ntot, Ndm_written, with_stf); - /* Write DM particles */ + /* Select the fields to write */ darkmatter_write_particles(gparts_written, list, &num_fields); + if (with_stf) { +#ifdef HAVE_VELOCIRAPTOR + num_fields += velociraptor_write_gparts(gpart_group_data_written, + list + num_fields); +#endif + } } } break; @@ -1487,6 +1573,9 @@ void write_output_parallel(struct engine* e, const char* baseName, num_fields += chemistry_write_sparticles(sparts, list + num_fields); num_fields += tracers_write_sparticles(sparts, list + num_fields, with_cosmology); + if (with_stf) { + num_fields += velociraptor_write_sparts(sparts, list + num_fields); + } } else { /* Ok, we need to fish out the particles we want */ @@ -1503,9 +1592,13 @@ void write_output_parallel(struct engine* e, const char* baseName, /* Select the fields to write */ stars_write_particles(sparts_written, list, &num_fields); - num_fields += chemistry_write_sparticles(sparts, list + num_fields); + num_fields += chemistry_write_sparticles(sparts, list + num_fields); num_fields += tracers_write_sparticles(sparts, list + num_fields, with_cosmology); + if (with_stf) { + num_fields += + velociraptor_write_sparts(sparts_written, list + num_fields); + } } } break; @@ -1532,6 +1625,7 @@ void write_output_parallel(struct engine* e, const char* baseName, if (parts_written) free(parts_written); if (xparts_written) free(xparts_written); if (gparts_written) free(gparts_written); + if (gpart_group_data_written) free(gpart_group_data_written); if (sparts_written) free(sparts_written); #ifdef IO_SPEED_MEASUREMENT diff --git a/src/partition.c b/src/partition.c index bbd7454dd63be6ab5192558fb4a2e3399ea03cfc..60ee7716efb25188b3a09f44f93a65a3ccbd5893 100644 --- a/src/partition.c +++ b/src/partition.c @@ -330,22 +330,28 @@ static void accumulate_sizes(struct space *s, double *counts) { mapper_data.s = s; double hsize = (double)sizeof(struct part); - mapper_data.size = hsize; - threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_part, s->parts, - s->nr_parts, sizeof(struct part), space_splitsize, - &mapper_data); + if (s->nr_parts > 0) { + mapper_data.size = hsize; + threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_part, s->parts, + s->nr_parts, sizeof(struct part), space_splitsize, + &mapper_data); + } double gsize = (double)sizeof(struct gpart); - mapper_data.size = gsize; - threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_gpart, s->gparts, - s->nr_gparts, sizeof(struct gpart), space_splitsize, - &mapper_data); + if (s->nr_gparts > 0) { + mapper_data.size = gsize; + threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_gpart, s->gparts, + s->nr_gparts, sizeof(struct gpart), space_splitsize, + &mapper_data); + } double ssize = (double)sizeof(struct spart); - mapper_data.size = ssize; - threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_spart, s->sparts, - s->nr_sparts, sizeof(struct spart), space_splitsize, - &mapper_data); + if (s->nr_sparts > 0) { + mapper_data.size = ssize; + threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_spart, s->sparts, + s->nr_sparts, sizeof(struct spart), space_splitsize, + &mapper_data); + } /* Keep the sum of particles across all ranks in the range of IDX_MAX. */ if ((s->e->total_nr_parts * hsize + s->e->total_nr_gparts * gsize + diff --git a/src/physical_constants.c b/src/physical_constants.c index 7752f4d3130b7174863d520b3d4d3c6a3e8eb433..3e3c72812c552aba1204086353dc7d239a5c36f9 100644 --- a/src/physical_constants.c +++ b/src/physical_constants.c @@ -32,7 +32,8 @@ /** * @brief Converts physical constants to the internal unit system * - * Some constants can be overwritten by the YAML file values. + * Some constants can be overwritten by the YAML file values. If the + * param argument is NULL, no overwriting is done. * * @param us The current internal system of units. * @param params The parsed parameter file. @@ -48,8 +49,10 @@ void phys_const_init(const struct unit_system *us, struct swift_params *params, const_newton_G_cgs / units_general_cgs_conversion_factor(us, dimension_G); /* Overwrite G if present in the file */ - internal_const->const_newton_G = parser_get_opt_param_double( - params, "PhysicalConstants:G", internal_const->const_newton_G); + if (params != NULL) { + internal_const->const_newton_G = parser_get_opt_param_double( + params, "PhysicalConstants:G", internal_const->const_newton_G); + } const float dimension_c[5] = {0, 1, -1, 0, 0}; /* [cm s^-1] */ internal_const->const_speed_light_c = diff --git a/src/runner.c b/src/runner.c index 69f7577a7a3a33b73a6492ba52abfea7e874614e..0d0765af3a1315d9554af0a2eaebb10be8931c2c 100644 --- a/src/runner.c +++ b/src/runner.c @@ -66,6 +66,7 @@ #include "task.h" #include "timers.h" #include "timestep.h" +#include "timestep_limiter.h" #include "tracers.h" #define TASK_LOOP_DENSITY 0 @@ -96,6 +97,13 @@ #undef FUNCTION #undef FUNCTION_TASK_LOOP +/* Import the limiter loop functions. */ +#define FUNCTION limiter +#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER +#include "runner_doiact.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + /* Import the gravity loop functions. */ #include "runner_doiact_grav.h" @@ -1227,14 +1235,22 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { * current smoothing lengths. */ int *pid = NULL; float *h_0 = NULL; + float *left = NULL; + float *right = NULL; if ((pid = (int *)malloc(sizeof(int) * c->hydro.count)) == NULL) error("Can't allocate memory for pid."); if ((h_0 = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) error("Can't allocate memory for h_0."); + if ((left = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) + error("Can't allocate memory for left."); + if ((right = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) + error("Can't allocate memory for right."); for (int k = 0; k < c->hydro.count; k++) if (part_is_active(&parts[k], e)) { pid[count] = k; h_0[count] = parts[k].h; + left[count] = 0.f; + right[count] = hydro_h_max; ++count; } @@ -1287,6 +1303,16 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { p->density.wcount_dh * h_old_dim + hydro_dimension * p->density.wcount * h_old_dim_minus_one; + /* Improve the bisection bounds */ + if (n_sum < n_target) left[i] = max(left[i], h_old); + if (n_sum > n_target) right[i] = min(right[i], h_old); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check the validity of the left and right bounds */ + if (left[i] > right[i]) + error("Invalid left (%e) and right (%e)", left[i], right[i]); +#endif + /* Skip if h is already h_max and we don't have enough neighbours */ if ((p->h >= hydro_h_max) && (f < 0.f)) { @@ -1349,13 +1375,15 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { /* Avoid floating point exception from f_prime = 0 */ h_new = h_old - f / (f_prime + FLT_MIN); - /* Be verbose about the particles that struggle to converve */ + /* Be verbose about the particles that struggle to converge */ if (num_reruns > max_smoothing_iter - 10) { message( - "iter=%d p->id=%lld h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " - "n_sum=%f n_target=%f", - num_reruns, p->id, h_old, h_new, f, f_prime, n_sum, n_target); + "Smoothing length convergence problem: iter=%d p->id=%lld " + "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " + "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", + num_reruns, p->id, h_init, h_old, h_new, f, f_prime, n_sum, + n_target, left[i], right[i]); } #ifdef SWIFT_DEBUG_CHECKS @@ -1367,13 +1395,30 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ h_new = min(h_new, 2.f * h_old); h_new = max(h_new, 0.5f * h_old); + + /* Verify that we are actually progrssing towards the answer */ + h_new = max(h_new, left[i]); + h_new = min(h_new, right[i]); } /* Check whether the particle has an inappropriate smoothing length */ if (fabsf(h_new - h_old) > eps * h_init) { /* Ok, correct then */ - p->h = h_new; + + /* Case where we have been oscillating around the solution */ + if ((h_new == left[i] && h_old == right[i]) || + (h_old == left[i] && h_new == right[i])) { + + /* Bissect the remaining interval */ + p->h = pow_inv_dimension( + 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); + + } else { + + /* Normal case */ + p->h = h_new; + } /* If below the absolute maximum, try again */ if (p->h < hydro_h_max) { @@ -1381,6 +1426,8 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { /* Flag for another round of fun */ pid[redo] = pid[i]; h_0[redo] = h_0[i]; + left[redo] = left[i]; + right[redo] = right[i]; redo += 1; /* Re-initialise everything */ @@ -1511,6 +1558,8 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { } /* Be clean */ + free(left); + free(right); free(pid); free(h_0); } @@ -1712,19 +1761,26 @@ void runner_do_kick1(struct runner *r, struct cell *c, int timer) { /* If particle needs to be kicked */ if (part_is_starting(p, e)) { +#ifdef SWIFT_DEBUG_CHECKS + if (p->wakeup == time_bin_awake) + error("Woken-up particle that has not been processed in kick1"); +#endif + + /* Skip particles that have been woken up and treated by the limiter. */ + if (p->wakeup != time_bin_not_awake) continue; + const integertime_t ti_step = get_integer_timestep(p->time_bin); const integertime_t ti_begin = get_integer_time_begin(ti_current + 1, p->time_bin); #ifdef SWIFT_DEBUG_CHECKS - const integertime_t ti_end = - get_integer_time_end(ti_current + 1, p->time_bin); + const integertime_t ti_end = ti_begin + ti_step; if (ti_begin != ti_current) error( "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " - "ti_step=%lld time_bin=%d ti_current=%lld", - ti_end, ti_begin, ti_step, p->time_bin, ti_current); + "ti_step=%lld time_bin=%d wakeup=%d ti_current=%lld", + ti_end, ti_begin, ti_step, p->time_bin, p->wakeup, ti_current); #endif /* Time interval for this half-kick */ @@ -1887,39 +1943,60 @@ void runner_do_kick2(struct runner *r, struct cell *c, int timer) { /* If particle needs to be kicked */ if (part_is_active(p, e)) { - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current, p->time_bin); + integertime_t ti_begin, ti_end, ti_step; + +#ifdef SWIFT_DEBUG_CHECKS + if (p->wakeup == time_bin_awake) + error("Woken-up particle that has not been processed in kick1"); +#endif + + if (p->wakeup == time_bin_not_awake) { + + /* Time-step from a regular kick */ + ti_step = get_integer_timestep(p->time_bin); + ti_begin = get_integer_time_begin(ti_current, p->time_bin); + ti_end = ti_begin + ti_step; + + } else { + + /* Time-step that follows a wake-up call */ + ti_begin = get_integer_time_begin(ti_current, p->wakeup); + ti_end = get_integer_time_end(ti_current, p->time_bin); + ti_step = ti_end - ti_begin; + + /* Reset the flag. Everything is back to normal from now on. */ + p->wakeup = time_bin_awake; + } #ifdef SWIFT_DEBUG_CHECKS if (ti_begin + ti_step != ti_current) error( "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld " - "time_bin=%d ti_current=%lld", - ti_begin, ti_step, p->time_bin, ti_current); + "time_bin=%d wakeup=%d ti_current=%lld", + ti_begin, ti_step, p->time_bin, p->wakeup, ti_current); #endif /* Time interval for this half-kick */ double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr; if (with_cosmology) { dt_kick_hydro = cosmology_get_hydro_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); + cosmo, ti_begin + ti_step / 2, ti_end); dt_kick_grav = cosmology_get_grav_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); + cosmo, ti_begin + ti_step / 2, ti_end); dt_kick_therm = cosmology_get_therm_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); + cosmo, ti_begin + ti_step / 2, ti_end); dt_kick_corr = cosmology_get_corr_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); + cosmo, ti_begin + ti_step / 2, ti_end); } else { - dt_kick_hydro = (ti_step / 2) * time_base; - dt_kick_grav = (ti_step / 2) * time_base; - dt_kick_therm = (ti_step / 2) * time_base; - dt_kick_corr = (ti_step / 2) * time_base; + dt_kick_hydro = (ti_end - (ti_begin + ti_step / 2)) * time_base; + dt_kick_grav = (ti_end - (ti_begin + ti_step / 2)) * time_base; + dt_kick_therm = (ti_end - (ti_begin + ti_step / 2)) * time_base; + dt_kick_corr = (ti_end - (ti_begin + ti_step / 2)) * time_base; } /* Finish the time-step with a second half-kick */ kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, dt_kick_corr, cosmo, hydro_props, entropy_floor, - ti_begin + ti_step / 2, ti_begin + ti_step); + ti_begin + ti_step / 2, ti_end); #ifdef SWIFT_DEBUG_CHECKS /* Check that kick and the drift are synchronized */ @@ -2321,6 +2398,144 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { if (timer) TIMER_TOC(timer_timestep); } +/** + * @brief Apply the time-step limiter to all awaken particles in a cell + * hierarchy. + * + * @param r The task #runner. + * @param c The #cell. + * @param force Limit the particles irrespective of the #cell flags. + * @param timer Are we timing this ? + */ +void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer) { + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const int count = c->hydro.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we only limit local cells. */ + if (c->nodeID != engine_rank) error("Limiting dt of a foreign cell is nope."); +#endif + + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, + ti_gravity_beg_max = 0; + + /* Limit irrespective of cell flags? */ + force |= c->hydro.do_limiter; + + /* Early abort? */ + if (c->hydro.count == 0) { + + /* Clear the limiter flags. */ + c->hydro.do_limiter = 0; + c->hydro.do_sub_limiter = 0; + return; + } + + /* Loop over the progeny ? */ + if (c->split && (force || c->hydro.do_sub_limiter)) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *restrict cp = c->progeny[k]; + + /* Recurse */ + runner_do_limiter(r, cp, force, 0); + + /* And aggregate */ + ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min); + ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max); + ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max); + ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); + ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); + ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); + } + } + + /* Store the updated values */ + c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min); + c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max); + c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max); + c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min); + c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max); + c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max); + + } else if (!c->split && force) { + + ti_hydro_end_min = c->hydro.ti_end_min; + ti_hydro_end_max = c->hydro.ti_end_max; + ti_hydro_beg_max = c->hydro.ti_beg_max; + ti_gravity_end_min = c->grav.ti_end_min; + ti_gravity_end_max = c->grav.ti_end_max; + ti_gravity_beg_max = c->grav.ti_beg_max; + + /* Loop over the gas particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* Avoid inhibited particles */ + if (part_is_inhibited(p, e)) continue; + + /* If the particle will be active no need to wake it up */ + if (part_is_active(p, e) && p->wakeup != time_bin_not_awake) + p->wakeup = time_bin_not_awake; + + /* Bip, bip, bip... wake-up time */ + if (p->wakeup == time_bin_awake) { + + /* Apply the limiter and get the new time-step size */ + const integertime_t ti_new_step = timestep_limit_part(p, xp, e); + + /* What is the next sync-point ? */ + ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min); + ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max); + + /* What is the next starting point for this cell ? */ + ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max); + + /* Also limit the gpart counter-part */ + if (p->gpart != NULL) { + + /* Register the time-bin */ + p->gpart->time_bin = p->time_bin; + + /* What is the next sync-point ? */ + ti_gravity_end_min = + min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = + max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + } + } + } + + /* Store the updated values */ + c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min); + c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max); + c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max); + c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min); + c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max); + c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max); + } + + /* Clear the limiter flags. */ + c->hydro.do_limiter = 0; + c->hydro.do_sub_limiter = 0; + + if (timer) TIMER_TOC(timer_do_limiter); +} + /** * @brief End the force calculation of all active particles in a cell * by multiplying the acccelerations by the relevant constants @@ -2773,6 +2988,8 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_doself2_branch_force(r, ci); + else if (t->subtype == task_subtype_limiter) + runner_doself2_branch_limiter(r, ci); else if (t->subtype == task_subtype_grav) runner_doself_recursive_grav(r, ci, 1); else if (t->subtype == task_subtype_external_grav) @@ -2794,6 +3011,8 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_dopair2_branch_force(r, ci, cj); + else if (t->subtype == task_subtype_limiter) + runner_dopair2_branch_limiter(r, ci, cj); else if (t->subtype == task_subtype_grav) runner_dopair_recursive_grav(r, ci, cj, 1); else if (t->subtype == task_subtype_stars_density) @@ -2813,6 +3032,8 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_dosub_self2_force(r, ci, 1); + else if (t->subtype == task_subtype_limiter) + runner_dosub_self2_limiter(r, ci, 1); else if (t->subtype == task_subtype_stars_density) runner_dosub_self_stars_density(r, ci, 1); else if (t->subtype == task_subtype_stars_feedback) @@ -2830,6 +3051,8 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_dosub_pair2_force(r, ci, cj, t->flags, 1); + else if (t->subtype == task_subtype_limiter) + runner_dosub_pair2_limiter(r, ci, cj, t->flags, 1); else if (t->subtype == task_subtype_stars_density) runner_dosub_pair_stars_density(r, ci, cj, t->flags, 1); else if (t->subtype == task_subtype_stars_feedback) @@ -2889,6 +3112,9 @@ void *runner_main(void *data) { case task_type_timestep: runner_do_timestep(r, ci, 1); break; + case task_type_timestep_limiter: + runner_do_limiter(r, ci, 0, 1); + break; #ifdef WITH_MPI case task_type_send: if (t->subtype == task_subtype_tend) { @@ -2905,6 +3131,8 @@ void *runner_main(void *data) { runner_do_recv_part(r, ci, 0, 1); } else if (t->subtype == task_subtype_gradient) { runner_do_recv_part(r, ci, 0, 1); + } else if (t->subtype == task_subtype_limiter) { + runner_do_recv_part(r, ci, 0, 1); } else if (t->subtype == task_subtype_gpart) { runner_do_recv_gpart(r, ci, 1); } else if (t->subtype == task_subtype_spart) { diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 53cf51ed400f82d0e195e38dd08fcc5af16f1ad7..861798b70b8ba90b9267375253bd8570baec3e9a 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -168,8 +168,11 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[pid]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pi, e)) continue; + const int pi_active = part_is_active(pi, e); - const int pi_inhibited = part_is_inhibited(pi, e); const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - (cj->loc[0] + shift[0])), @@ -181,10 +184,13 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; const int pj_active = part_is_active(pj, e); - const int pj_inhibited = part_is_inhibited(pj, e); /* Compute the pairwise distance. */ const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), @@ -195,21 +201,21 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pi_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2 && pi_active && !pj_inhibited) { + if (r2 < hig2 && pi_active) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); #endif } - if (r2 < hjg2 && pj_active && !pi_inhibited) { + if (r2 < hjg2 && pj_active) { dx[0] = -dx[0]; dx[1] = -dx[1]; @@ -270,8 +276,11 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[pid]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pi, e)) continue; + const int pi_active = part_is_active(pi, e); - const int pi_inhibited = part_is_inhibited(pi, e); const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - (cj->loc[0] + shift[0])), @@ -283,8 +292,11 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const int pj_active = part_is_active(pj, e); - const int pj_inhibited = part_is_inhibited(pj, e); const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; @@ -297,28 +309,28 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pj_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pi_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ if (r2 < hig2 || r2 < hjg2) { - if (pi_active && pj_active && !pi_inhibited && !pj_inhibited) { + if (pi_active && pj_active) { IACT(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); #endif - } else if (pi_active && !pj_inhibited) { + } else if (pi_active) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); #endif - } else if (pj_active && !pi_inhibited) { + } else if (pj_active) { dx[0] = -dx[0]; dx[1] = -dx[1]; @@ -366,8 +378,11 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts[pid]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pi, e)) continue; + const int pi_active = part_is_active(pi, e); - const int pi_inhibited = part_is_inhibited(pi, e); const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - c->loc[0]), @@ -379,10 +394,13 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts[pjd]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; const int pj_active = part_is_active(pj, e); - const int pj_inhibited = part_is_inhibited(pj, e); /* Compute the pairwise distance. */ const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), @@ -391,14 +409,14 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - const int doi = pi_active && (r2 < hig2) && !pj_inhibited; - const int doj = pj_active && (r2 < hjg2) && !pi_inhibited; + const int doi = pi_active && (r2 < hig2); + const int doj = pj_active && (r2 < hjg2); #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pi_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif @@ -462,8 +480,11 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts[pid]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pi, e)) continue; + const int pi_active = part_is_active(pi, e); - const int pi_inhibited = part_is_inhibited(pi, e); const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - c->loc[0]), @@ -475,10 +496,13 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts[pjd]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; const int pj_active = part_is_active(pj, e); - const int pj_inhibited = part_is_inhibited(pj, e); /* Compute the pairwise distance. */ const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), @@ -487,16 +511,14 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - const int doi = - pi_active && ((r2 < hig2) || (r2 < hjg2)) && !pj_inhibited; - const int doj = - pj_active && ((r2 < hig2) || (r2 < hjg2)) && !pi_inhibited; + const int doi = pi_active && ((r2 < hig2) || (r2 < hjg2)); + const int doj = pj_active && ((r2 < hig2) || (r2 < hjg2)); #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pi_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif @@ -581,7 +603,9 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; - const int pj_inhibited = part_is_inhibited(pj, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; /* Compute the pairwise distance. */ float r2 = 0.0f; @@ -595,12 +619,12 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2 && !pj_inhibited) { + if (r2 < hig2) { IACT_NONSYM(r2, dx, hi, pj->h, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -669,7 +693,10 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[sort_j[pjd].i]; - const int pj_inhibited = part_is_inhibited(pj, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const float hj = pj->h; const double pjx = pj->x[0]; const double pjy = pj->x[1]; @@ -684,12 +711,12 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2 && !pj_inhibited) { + if (r2 < hig2) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -721,7 +748,10 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[sort_j[pjd].i]; - const int pj_inhibited = part_is_inhibited(pj, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const float hj = pj->h; const double pjx = pj->x[0]; const double pjy = pj->x[1]; @@ -736,12 +766,12 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2 && !pj_inhibited) { + if (r2 < hig2) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -858,7 +888,10 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; - const int pj_inhibited = part_is_inhibited(pj, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const float hj = pj->h; /* Compute the pairwise distance. */ @@ -872,12 +905,12 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 > 0.f && r2 < hig2 && !pj_inhibited) { + if (r2 > 0.f && r2 < hig2) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -992,7 +1025,10 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Recover pj */ struct part *pj = &parts_j[sort_j[pjd].i]; - const int pj_inhibited = part_is_inhibited(pj, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const float hj = pj->h; const float pjx = pj->x[0] - cj->loc[0]; const float pjy = pj->x[1] - cj->loc[1]; @@ -1032,12 +1068,12 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2 && !pj_inhibited) { + if (r2 < hig2) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1076,7 +1112,10 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Recover pi */ struct part *pi = &parts_i[sort_i[pid].i]; - const int pi_inhibited = part_is_inhibited(pi, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pi, e)) continue; + const float hi = pi->h; const float pix = pi->x[0] - (cj->loc[0] + shift[0]); const float piy = pi->x[1] - (cj->loc[1] + shift[1]); @@ -1114,14 +1153,14 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, pjz, ci->width[2]); /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pi_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hjg2 && !pi_inhibited) { + if (r2 < hjg2) { IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1335,7 +1374,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Get a hold of the ith part in ci. */ struct part *pi = &parts_i[sort_i[pid].i]; - const int pi_inhibited = part_is_inhibited(pi, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pi, e)) continue; + const float hi = pi->h; /* Is there anything we need to interact with (for this specific hi) ? */ @@ -1397,7 +1439,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, pjz, ci->width[2]); /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pi_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); @@ -1405,7 +1447,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? (note that we will do the other condition in the reverse loop) */ - if (r2 < hig2 && !pi_inhibited) { + if (r2 < hig2) { IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); @@ -1421,7 +1463,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Recover pj */ struct part *pj = &parts_j[sort_j[pjd].i]; - const int pj_inhibited = part_is_inhibited(pj, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const float hj = pj->h; /* Get the position of pj in the right frame */ @@ -1461,14 +1506,14 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, pjz, ci->width[2]); /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pi_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? (note that we will do the other condition in the reverse loop) */ - if (r2 < hig2 && !pj_inhibited) { + if (r2 < hig2) { /* Does pj need to be updated too? */ if (part_is_active(pj, e)) { @@ -1496,7 +1541,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Get a hold of the jth part in cj. */ struct part *pj = &parts_j[sort_j[pjd].i]; - const int pj_inhibited = part_is_inhibited(pj, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const float hj = pj->h; /* Is there anything we need to interact with (for this specific hj) ? */ @@ -1561,13 +1609,13 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? (note that we must avoid the r2 < hig2 cases we already processed) */ - if (r2 < hjg2 && r2 >= hig2 && !pj_inhibited) { + if (r2 < hjg2 && r2 >= hig2) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); @@ -1584,7 +1632,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Recover pi */ struct part *pi = &parts_i[sort_i[pid].i]; - const int pi_inhibited = part_is_inhibited(pi, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pi, e)) continue; + const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; @@ -1625,15 +1676,15 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, pjz, ci->width[2]); /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pi_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? (note that we must avoid the r2 < hig2 cases we already processed) */ - if (r2 < hjg2 && r2 >= hig2 && !pi_inhibited) { + if (r2 < hjg2 && r2 >= hig2) { /* Does pi need to be updated too? */ if (part_is_active(pi, e)) { @@ -1788,7 +1839,9 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Get a pointer to the ith particle. */ struct part *restrict pi = &parts[pid]; - const int pi_inhibited = part_is_inhibited(pi, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pi, e)) continue; /* Get the particle position and radius. */ double pix[3]; @@ -1808,7 +1861,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pi_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); @@ -1823,7 +1876,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { } /* Hit or miss? */ - if (r2 < hj * hj * kernel_gamma2 && !pi_inhibited) { + if (r2 < hj * hj * kernel_gamma2) { IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1844,7 +1897,10 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts[pjd]; - const int pj_inhibited = part_is_inhibited(pj, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const float hj = pj->h; /* Compute the pairwise distance. */ @@ -1861,9 +1917,9 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pi_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif @@ -1877,13 +1933,13 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); #endif - } else if (doi && !pj_inhibited) { + } else if (doi) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); #endif - } else if (doj && !pi_inhibited) { + } else if (doj) { dx[0] = -dx[0]; dx[1] = -dx[1]; @@ -1972,7 +2028,9 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Get a pointer to the ith particle. */ struct part *restrict pi = &parts[pid]; - const int pi_inhibited = part_is_inhibited(pi, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pi, e)) continue; /* Get the particle position and radius. */ double pix[3]; @@ -2000,14 +2058,14 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pi_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if ((r2 < hig2 || r2 < hj * hj * kernel_gamma2) && !pi_inhibited) { + if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -2028,7 +2086,10 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts[pjd]; - const int pj_inhibited = part_is_inhibited(pj, e); + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + const float hj = pj->h; /* Compute the pairwise distance. */ @@ -2041,14 +2102,14 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current && !pi_inhibited) + if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current && !pj_inhibited) + if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if ((r2 < hig2 || r2 < hj * hj * kernel_gamma2) && !pj_inhibited) { + if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { /* Does pj need to be updated too? */ if (part_is_active(pj, e)) { diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index c74fa7c8f53576f2e80578488fdf3378c59c0400..75bd86a6b4c5aa6c229ffeffa0d43c61e2948b72 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -23,9 +23,6 @@ /* This object's header. */ #include "runner_doiact_vec.h" -/* Local headers. */ -#include "active.h" - #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2); @@ -68,8 +65,6 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions( vector *v_curlvzSum, vector v_hi_inv, vector v_vix, vector v_viy, vector v_viz, int *icount_align) { - mask_t int_mask, int_mask2; - /* Work out the number of remainder interactions and pad secondary cache. */ *icount_align = icount; int rem = icount % (NUM_VEC_PROC * VEC_SIZE); @@ -78,6 +73,7 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions( *icount_align += pad; /* Initialise masks to true. */ + mask_t int_mask, int_mask2; vec_init_mask_true(int_mask); vec_init_mask_true(int_mask2); @@ -654,7 +650,6 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) { /* Get some local variables */ const struct engine *e = r->e; - const timebin_t max_active_bin = e->max_active_bin; struct part *restrict parts = c->hydro.parts; const int count = c->hydro.count; @@ -663,12 +658,13 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) { /* Anything to do here? */ if (!cell_is_active_hydro(c, e)) return; + /* Check that everybody was drifted here */ if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); #ifdef SWIFT_DEBUG_CHECKS for (int i = 0; i < count; i++) { /* Check that particles have been drifted to the current time */ - if (parts[i].ti_drift != e->ti_current) + if (parts[i].ti_drift != e->ti_current && !part_is_inhibited(&parts[i], e)) error("Particle pi not drifted to current time"); } #endif @@ -679,7 +675,7 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) { if (cell_cache->count < count) cache_init(cell_cache, count); /* Read the particles from the cell and store them locally in the cache. */ - cache_read_particles(c, cell_cache); + const int count_align = cache_read_particles(c, cell_cache); /* Create secondary cache to store particle interactions. */ struct c2_cache int_cache; @@ -690,25 +686,23 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) { /* Get a pointer to the ith particle. */ struct part *restrict pi = &parts[pid]; - /* Is the ith particle active? */ - if (!part_is_active_no_debug(pi, max_active_bin)) continue; - - const float hi = cell_cache->h[pid]; + /* Is the i^th particle active? */ + if (!part_is_active(pi, e)) continue; /* Fill particle pi vectors. */ const vector v_pix = vector_set1(cell_cache->x[pid]); const vector v_piy = vector_set1(cell_cache->y[pid]); const vector v_piz = vector_set1(cell_cache->z[pid]); - const vector v_hi = vector_set1(hi); + const vector v_hi = vector_set1(cell_cache->h[pid]); const vector v_vix = vector_set1(cell_cache->vx[pid]); const vector v_viy = vector_set1(cell_cache->vy[pid]); const vector v_viz = vector_set1(cell_cache->vz[pid]); + /* Some useful mulitples of h */ + const float hi = cell_cache->h[pid]; const float hig2 = hi * hi * kernel_gamma2; const vector v_hig2 = vector_set1(hig2); - - /* Get the inverse of hi. */ - vector v_hi_inv = vec_reciprocal(v_hi); + const vector v_hi_inv = vec_reciprocal(v_hi); /* Reset cumulative sums of update vectors. */ vector v_rhoSum = vector_setzero(); @@ -720,21 +714,6 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) { vector v_curlvySum = vector_setzero(); vector v_curlvzSum = vector_setzero(); - /* Pad cache if there is a serial remainder. */ - int count_align = count; - const int rem = count % (NUM_VEC_PROC * VEC_SIZE); - if (rem != 0) { - count_align += (NUM_VEC_PROC * VEC_SIZE) - rem; - - /* Set positions to the same as particle pi so when the r2 > 0 mask is - * applied these extra contributions are masked out.*/ - for (int i = count; i < count_align; i++) { - cell_cache->x[i] = v_pix.f[0]; - cell_cache->y[i] = v_piy.f[0]; - cell_cache->z[i] = v_piz.f[0]; - } - } - /* The number of interactions for pi and the padded version of it to * make it a multiple of VEC_SIZE. */ int icount = 0, icount_align = 0; @@ -771,8 +750,8 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) { v_r2_2.v = vec_fma(v_dz_2.v, v_dz_2.v, v_r2_2.v); /* Form a mask from r2 < hig2 and r2 > 0.*/ - mask_t v_doi_mask, v_doi_mask_self_check, v_doi_mask2, - v_doi_mask2_self_check; + mask_t v_doi_mask, v_doi_mask2; + mask_t v_doi_mask_self_check, v_doi_mask2_self_check; /* Form r2 > 0 mask and r2 < hig2 mask. */ vec_create_mask(v_doi_mask_self_check, vec_cmp_gt(v_r2.v, vec_setzero())); @@ -789,6 +768,25 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) { const int doi_mask2 = vec_is_mask_true(v_doi_mask2) & vec_is_mask_true(v_doi_mask2_self_check); +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that we have no inhibited particles in the interaction cache */ + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + if (doi_mask & (1 << bit_index)) { + if (parts[pjd + bit_index].time_bin >= time_bin_inhibited) { + error("Inhibited particle in interaction cache!"); + } + } + } + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + if (doi_mask2 & (1 << bit_index)) { + if (parts[pjd + VEC_SIZE + bit_index].time_bin >= + time_bin_inhibited) { + error("Inhibited particle in interaction cache2!"); + } + } + } +#endif + #ifdef DEBUG_INTERACTIONS_SPH for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { if (doi_mask & (1 << bit_index)) { @@ -837,7 +835,7 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) { vec_init_mask_true(int_mask); vec_init_mask_true(int_mask2); - /* Perform interaction with 2 vectors. */ + /* Perform interaction with NUM_VEC_PROC vectors. */ for (int pjd = 0; pjd < icount_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) { runner_iact_nonsym_2_vec_density( &int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd], @@ -848,8 +846,7 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) { &v_curlvzSum, int_mask, int_mask2, 0); } - /* Perform horizontal adds on vector sums and store result in particle pi. - */ + /* Perform horizontal adds on vector sums and store result in pi. */ VEC_HADD(v_rhoSum, pi->rho); VEC_HADD(v_rho_dhSum, pi->density.rho_dh); VEC_HADD(v_wcountSum, pi->density.wcount); @@ -899,7 +896,7 @@ void runner_doself_subset_density_vec(struct runner *r, struct cell *restrict c, if (cell_cache->count < count) cache_init(cell_cache, count); /* Read the particles from the cell and store them locally in the cache. */ - cache_read_particles(c, cell_cache); + const int count_align = cache_read_particles(c, cell_cache); /* Create secondary cache to store particle interactions. */ struct c2_cache int_cache; @@ -942,23 +939,6 @@ void runner_doself_subset_density_vec(struct runner *r, struct cell *restrict c, vector v_curlvySum = vector_setzero(); vector v_curlvzSum = vector_setzero(); - /* Pad cache if there is a serial remainder. */ - int count_align = count; - const int rem = count % (NUM_VEC_PROC * VEC_SIZE); - if (rem != 0) { - const int pad = (NUM_VEC_PROC * VEC_SIZE) - rem; - - count_align += pad; - - /* Set positions to the same as particle pi so when the r2 > 0 mask is - * applied these extra contributions are masked out.*/ - for (int i = count; i < count_align; i++) { - cell_cache->x[i] = v_pix.f[0]; - cell_cache->y[i] = v_piy.f[0]; - cell_cache->z[i] = v_piz.f[0]; - } - } - /* The number of interactions for pi and the padded version of it to * make it a multiple of VEC_SIZE. */ int icount = 0, icount_align = 0; @@ -1015,9 +995,33 @@ void runner_doself_subset_density_vec(struct runner *r, struct cell *restrict c, const int doi_mask2 = vec_is_mask_true(v_doi_mask2) & vec_is_mask_true(v_doi_mask2_self_check); +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that we have no inhibited particles in the interaction cache */ + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + struct part *restrict parts_i = c->hydro.parts; + + if (doi_mask & (1 << bit_index)) { + if (parts_i[pjd + bit_index].time_bin >= time_bin_inhibited) { + error("Inhibited particle in interaction cache!"); + } + } + } + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + struct part *restrict parts_i = c->hydro.parts; + + if (doi_mask2 & (1 << bit_index)) { + if (parts_i[pjd + VEC_SIZE + bit_index].time_bin >= + time_bin_inhibited) { + error("Inhibited particle in interaction cache2!"); + } + } + } +#endif + #ifdef DEBUG_INTERACTIONS_SPH - struct part *restrict parts_i = c->hydro.parts; for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + struct part *restrict parts_i = c->hydro.parts; + if (doi_mask & (1 << bit_index)) { if (pi->num_ngb_density < MAX_NUM_OF_NEIGHBOURS) pi->ids_ngbs_density[pi->num_ngb_density] = @@ -1112,7 +1116,6 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; const struct cosmology *restrict cosmo = e->cosmology; - const timebin_t max_active_bin = e->max_active_bin; struct part *restrict parts = c->hydro.parts; const int count = c->hydro.count; @@ -1126,7 +1129,7 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) { #ifdef SWIFT_DEBUG_CHECKS for (int i = 0; i < count; i++) { /* Check that particles have been drifted to the current time */ - if (parts[i].ti_drift != e->ti_current) + if (parts[i].ti_drift != e->ti_current && !part_is_inhibited(&parts[i], e)) error("Particle pi not drifted to current time"); } #endif @@ -1138,7 +1141,7 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) { if (cell_cache->count < count) cache_init(cell_cache, count); /* Read the particles from the cell and store them locally in the cache. */ - cache_read_force_particles(c, cell_cache); + const int count_align = cache_read_force_particles(c, cell_cache); /* Cosmological terms */ const float a = cosmo->a; @@ -1150,16 +1153,14 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) { /* Get a pointer to the ith particle. */ struct part *restrict pi = &parts[pid]; - /* Is the ith particle active? */ - if (!part_is_active_no_debug(pi, max_active_bin)) continue; - - const float hi = cell_cache->h[pid]; + /* Is the i^th particle active? */ + if (!part_is_active(pi, e)) continue; /* Fill particle pi vectors. */ const vector v_pix = vector_set1(cell_cache->x[pid]); const vector v_piy = vector_set1(cell_cache->y[pid]); const vector v_piz = vector_set1(cell_cache->z[pid]); - const vector v_hi = vector_set1(hi); + const vector v_hi = vector_set1(cell_cache->h[pid]); const vector v_vix = vector_set1(cell_cache->vx[pid]); const vector v_viy = vector_set1(cell_cache->vy[pid]); const vector v_viz = vector_set1(cell_cache->vz[pid]); @@ -1170,11 +1171,11 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) { const vector v_balsara_i = vector_set1(cell_cache->balsara[pid]); const vector v_ci = vector_set1(cell_cache->soundspeed[pid]); + /* Some useful powers of h */ + const float hi = cell_cache->h[pid]; const float hig2 = hi * hi * kernel_gamma2; const vector v_hig2 = vector_set1(hig2); - - /* Get the inverse of hi. */ - vector v_hi_inv = vec_reciprocal(v_hi); + const vector v_hi_inv = vec_reciprocal(v_hi); /* Reset cumulative sums of update vectors. */ vector v_a_hydro_xSum = vector_setzero(); @@ -1184,39 +1185,18 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) { vector v_sigSum = vector_set1(pi->force.v_sig); vector v_entropy_dtSum = vector_setzero(); - /* Pad cache if there is a serial remainder. */ - int count_align = count; - int rem = count % VEC_SIZE; - if (rem != 0) { - int pad = VEC_SIZE - rem; - - count_align += pad; - - /* Set positions to the same as particle pi so when the r2 > 0 mask is - * applied these extra contributions are masked out.*/ - for (int i = count; i < count_align; i++) { - cell_cache->x[i] = v_pix.f[0]; - cell_cache->y[i] = v_piy.f[0]; - cell_cache->z[i] = v_piz.f[0]; - cell_cache->h[i] = 1.f; - cell_cache->rho[i] = 1.f; - cell_cache->grad_h[i] = 1.f; - cell_cache->pOrho2[i] = 1.f; - cell_cache->balsara[i] = 1.f; - cell_cache->soundspeed[i] = 1.f; - } - } - /* Find all of particle pi's interacions and store needed values in the * secondary cache.*/ for (int pjd = 0; pjd < count_align; pjd += VEC_SIZE) { /* Load 1 set of vectors from the particle cache. */ - vector hjg2; const vector v_pjx = vector_load(&cell_cache->x[pjd]); const vector v_pjy = vector_load(&cell_cache->y[pjd]); const vector v_pjz = vector_load(&cell_cache->z[pjd]); const vector hj = vector_load(&cell_cache->h[pjd]); + + /* (hj * gamma)^2 */ + vector hjg2; hjg2.v = vec_mul(vec_mul(hj.v, hj.v), kernel_gamma2_vec.v); /* Compute the pairwise distance. */ @@ -1229,20 +1209,33 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) { v_r2.v = vec_fma(v_dy.v, v_dy.v, v_r2.v); v_r2.v = vec_fma(v_dz.v, v_dz.v, v_r2.v); - /* Form r2 > 0 mask, r2 < hig2 mask and r2 < hjg2 mask. */ - mask_t v_doi_mask, v_doi_mask_self_check; - - /* Form r2 > 0 mask.*/ + /* Form r2 > 0 mask. + * This is used to avoid self-interctions */ + mask_t v_doi_mask_self_check; vec_create_mask(v_doi_mask_self_check, vec_cmp_gt(v_r2.v, vec_setzero())); - /* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */ - vector v_h2; - v_h2.v = vec_fmax(v_hig2.v, hjg2.v); - vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_h2.v)); + /* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. + * This is writen as r2 < max(hig2, hjg2) */ + mask_t v_doi_mask; + vec_create_mask(v_doi_mask, + vec_cmp_lt(v_r2.v, vec_fmax(v_hig2.v, hjg2.v))); - /* Combine all 3 masks. */ + /* Combine both masks. */ vec_combine_masks(v_doi_mask, v_doi_mask_self_check); +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that we have no inhibited particles in the interaction cache */ + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { + if ((pjd + bit_index < count) && + (parts[pjd + bit_index].time_bin >= time_bin_inhibited)) { + error("Inhibited particle in interaction cache! id=%lld", + parts[pjd + bit_index].id); + } + } + } +#endif + #ifdef DEBUG_INTERACTIONS_SPH for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { @@ -1255,10 +1248,14 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) { /* If there are any interactions perform them. */ if (vec_is_mask_true(v_doi_mask)) { - vector v_hj_inv = vec_reciprocal(hj); - /* To stop floating point exceptions for when particle separations are - * 0. */ + /* 1 / hj */ + const vector v_hj_inv = vec_reciprocal(hj); + + /* To stop floating point exceptions when particle separations are 0. + * Note that the results for r2==0 are masked out but may still raise + * an FPE as only the final operaion is masked, not the whole math + * operations sequence. */ v_r2.v = vec_add(v_r2.v, vec_set1(FLT_MIN)); runner_iact_nonsym_1_vec_force( @@ -1278,9 +1275,10 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) { VEC_HADD(v_a_hydro_ySum, pi->a_hydro[1]); VEC_HADD(v_a_hydro_zSum, pi->a_hydro[2]); VEC_HADD(v_h_dtSum, pi->force.h_dt); - VEC_HMAX(v_sigSum, pi->force.v_sig); VEC_HADD(v_entropy_dtSum, pi->entropy_dt); + VEC_HMAX(v_sigSum, pi->force.v_sig); + } /* loop over all particles. */ TIMER_TOC(timer_doself_force); @@ -1341,10 +1339,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ for (int pid = 0; pid < count_i; pid++) - if (parts_i[pid].ti_drift != e->ti_current) + if (parts_i[pid].ti_drift != e->ti_current && + !part_is_inhibited(&parts_i[pid], e)) error("Particle pi not drifted to current time"); for (int pjd = 0; pjd < count_j; pjd++) - if (parts_j[pjd].ti_drift != e->ti_current) + if (parts_j[pjd].ti_drift != e->ti_current && + !part_is_inhibited(&parts_j[pjd], e)) error("Particle pj not drifted to current time"); #endif @@ -1497,6 +1497,21 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Form r2 < hig2 mask. */ vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_hig2.v)); +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that we have no inhibited particles in the interaction cache + */ + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { + if ((pjd + bit_index < count_j) && + (parts_j[sort_j[pjd + bit_index].i].time_bin >= + time_bin_inhibited)) { + error("Inhibited particle in interaction cache! id=%lld", + parts_j[sort_j[pjd + bit_index].i].id); + } + } + } +#endif + #ifdef DEBUG_INTERACTIONS_SPH for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { @@ -1623,6 +1638,21 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Form r2 < hig2 mask. */ vec_create_mask(v_doj_mask, vec_cmp_lt(v_r2.v, v_hjg2.v)); +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that we have no inhibited particles in the interaction cache + */ + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + if (vec_is_mask_true(v_doj_mask) & (1 << bit_index)) { + if ((ci_cache_idx + first_pi + bit_index < count_i) && + (parts_i[sort_i[ci_cache_idx + first_pi + bit_index].i] + .time_bin >= time_bin_inhibited)) { + error("Inhibited particle in interaction cache! id=%lld", + parts_i[sort_i[ci_cache_idx + first_pi + bit_index].i].id); + } + } + } +#endif + #ifdef DEBUG_INTERACTIONS_SPH for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { if (vec_is_mask_true(v_doj_mask) & (1 << bit_index)) { @@ -1805,9 +1835,27 @@ void runner_dopair_subset_density_vec(struct runner *r, mask_t v_doi_mask; vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_hig2.v)); +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that we have no inhibited particles in the interaction cache + */ + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + struct part *restrict parts_j = cj->hydro.parts; + + if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { + if ((pjd + bit_index < count_j) && + (parts_j[sort_j[pjd + bit_index].i].time_bin >= + time_bin_inhibited)) { + error("Inhibited particle in interaction cache! id=%lld", + parts_j[sort_j[pjd + bit_index].i].id); + } + } + } +#endif + #ifdef DEBUG_INTERACTIONS_SPH - struct part *restrict parts_j = cj->hydro.parts; for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + struct part *restrict parts_j = cj->hydro.parts; + if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { if (pi->num_ngb_density < MAX_NUM_OF_NEIGHBOURS) { pi->ids_ngbs_density[pi->num_ngb_density] = @@ -1934,9 +1982,27 @@ void runner_dopair_subset_density_vec(struct runner *r, mask_t v_doi_mask; vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_hig2.v)); +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that we have no inhibited particles in the interaction cache + */ + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + struct part *restrict parts_j = cj->hydro.parts; + + if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { + if ((cj_cache_idx + bit_index < count_j) && + (parts_j[sort_j[cj_cache_idx + first_pj + bit_index].i] + .time_bin >= time_bin_inhibited)) { + error("Inhibited particle in interaction cache! id=%lld", + parts_j[sort_j[cj_cache_idx + first_pj + bit_index].i].id); + } + } + } +#endif + #ifdef DEBUG_INTERACTIONS_SPH - struct part *restrict parts_j = cj->hydro.parts; for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + struct part *restrict parts_j = cj->hydro.parts; + if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { if (pi->num_ngb_density < MAX_NUM_OF_NEIGHBOURS) { pi->ids_ngbs_density[pi->num_ngb_density] = @@ -2032,10 +2098,12 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ for (int pid = 0; pid < count_i; pid++) - if (parts_i[pid].ti_drift != e->ti_current) + if (parts_i[pid].ti_drift != e->ti_current && + !part_is_inhibited(&parts_i[pid], e)) error("Particle pi not drifted to current time"); for (int pjd = 0; pjd < count_j; pjd++) - if (parts_j[pjd].ti_drift != e->ti_current) + if (parts_j[pjd].ti_drift != e->ti_current && + !part_is_inhibited(&parts_j[pjd], e)) error("Particle pj not drifted to current time"); #endif @@ -2200,6 +2268,21 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, v_h2.v = vec_fmax(v_hig2.v, v_hjg2.v); vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_h2.v)); +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that we have no inhibited particles in the interaction cache + */ + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { + if ((pjd + bit_index < count_j) && + (parts_j[sort_j[pjd + bit_index].i].time_bin >= + time_bin_inhibited)) { + error("Inhibited particle in interaction cache! id=%lld", + parts_j[sort_j[pjd + bit_index].i].id); + } + } + } +#endif + #ifdef DEBUG_INTERACTIONS_SPH for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { @@ -2336,6 +2419,21 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, v_h2.v = vec_fmax(v_hjg2.v, v_hig2.v); vec_create_mask(v_doj_mask, vec_cmp_lt(v_r2.v, v_h2.v)); +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that we have no inhibited particles in the interaction cache + */ + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + if (vec_is_mask_true(v_doj_mask) & (1 << bit_index)) { + if ((ci_cache_idx + first_pi + bit_index < count_i) && + (parts_i[sort_i[ci_cache_idx + first_pi + bit_index].i] + .time_bin >= time_bin_inhibited)) { + error("Inhibited particle in interaction cache! id=%lld", + parts_i[sort_i[ci_cache_idx + first_pi + bit_index].i].id); + } + } + } +#endif + #ifdef DEBUG_INTERACTIONS_SPH for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { if (vec_is_mask_true(v_doj_mask) & (1 << bit_index)) { diff --git a/src/scheduler.c b/src/scheduler.c index 4e3eb4e29e6cd4a2cd91032d4ee81d203977a59e..ad6af73aec209a19106794636c3b6599baca21e1 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -59,12 +59,43 @@ */ void scheduler_clear_active(struct scheduler *s) { s->active_count = 0; } +/** + * @brief Increase the space available for unlocks. Only call when + * current index == s->size_unlock; + */ +static void scheduler_extend_unlocks(struct scheduler *s) { + + /* Allocate the new buffer. */ + const int size_unlocks_new = s->size_unlocks * 2; + struct task **unlocks_new = + (struct task **)malloc(sizeof(struct task *) * size_unlocks_new); + int *unlock_ind_new = (int *)malloc(sizeof(int) * size_unlocks_new); + if (unlocks_new == NULL || unlock_ind_new == NULL) + error("Failed to re-allocate unlocks."); + + /* Wait for all writes to the old buffer to complete. */ + while (s->completed_unlock_writes < s->size_unlocks) + ; + + /* Copy the buffers. */ + memcpy(unlocks_new, s->unlocks, sizeof(struct task *) * s->size_unlocks); + memcpy(unlock_ind_new, s->unlock_ind, sizeof(int) * s->size_unlocks); + free(s->unlocks); + free(s->unlock_ind); + s->unlocks = unlocks_new; + s->unlock_ind = unlock_ind_new; + + /* Publish the new buffer size. */ + s->size_unlocks = size_unlocks_new; +} + /** * @brief Add an unlock_task to the given task. * * @param s The #scheduler. * @param ta The unlocking #task. * @param tb The #task that will be unlocked. + */ void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task *tb) { @@ -77,37 +108,21 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, const int ind = atomic_inc(&s->nr_unlocks); /* Does the buffer need to be grown? */ - if (ind == s->size_unlocks) { - /* Allocate the new buffer. */ - struct task **unlocks_new; - int *unlock_ind_new; - const int size_unlocks_new = s->size_unlocks * 2; - if ((unlocks_new = (struct task **)malloc(sizeof(struct task *) * - size_unlocks_new)) == NULL || - (unlock_ind_new = (int *)malloc(sizeof(int) * size_unlocks_new)) == - NULL) - error("Failed to re-allocate unlocks."); - - /* Wait for all writes to the old buffer to complete. */ - while (s->completed_unlock_writes < ind) - ; - - /* Copy the buffers. */ - memcpy(unlocks_new, s->unlocks, sizeof(struct task *) * ind); - memcpy(unlock_ind_new, s->unlock_ind, sizeof(int) * ind); - free(s->unlocks); - free(s->unlock_ind); - s->unlocks = unlocks_new; - s->unlock_ind = unlock_ind_new; - - /* Publish the new buffer size. */ - s->size_unlocks = size_unlocks_new; - } + if (ind == s->size_unlocks) scheduler_extend_unlocks(s); + +#ifdef SWIFT_DEBUG_CHECKS + if (ind > s->size_unlocks * 2) + message("unlocks guard enabled: %d / %d", ind, s->size_unlocks); +#endif /* Wait for there to actually be space at my index. */ while (ind > s->size_unlocks) ; + /* Guard against case when more than (old) s->size_unlocks unlocks + * are now pending. */ + if (ind == s->size_unlocks) scheduler_extend_unlocks(s); + /* Write the unlock to the scheduler. */ s->unlocks[ind] = tb; s->unlock_ind[ind] = ta - s->tasks; @@ -115,7 +130,7 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, } /** - * @brief compute the number of same dependencies + * @brief compute the number of similar dependencies * * @param s The #scheduler * @param ta The #task @@ -513,7 +528,7 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { /* Be clean */ free(task_dep); - if (verbose && s->nodeID == 0) + if (verbose) message("Printing task graph took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); } diff --git a/src/serial_io.c b/src/serial_io.c index 609f9aaf05a7f673a324a3e64848d89bdb248d16..0753e171cc4784c3c38fd5ea0e2a2c39dc4da1b7 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -56,6 +56,7 @@ #include "stars_io.h" #include "tracers_io.h" #include "units.h" +#include "velociraptor_io.h" #include "xmf.h" /** @@ -778,7 +779,6 @@ void write_output_serial(struct engine* e, const char* baseName, int mpi_size, MPI_Comm comm, MPI_Info info) { hid_t h_file = 0, h_grp = 0; - int periodic = e->s->periodic; int numFiles = 1; const struct part* parts = e->s->parts; const struct xpart* xparts = e->s->xparts; @@ -786,6 +786,15 @@ void write_output_serial(struct engine* e, const char* baseName, const struct spart* sparts = e->s->sparts; struct swift_params* params = e->parameter_file; const int with_cosmology = e->policy & engine_policy_cosmology; + const int with_cooling = e->policy & engine_policy_cooling; + const int with_temperature = e->policy & engine_policy_temperature; +#ifdef HAVE_VELOCIRAPTOR + const int with_stf = (e->policy & engine_policy_structure_finding) && + (e->s->gpart_group_data != NULL); +#else + const int with_stf = 0; +#endif + FILE* xmfFile = 0; /* Number of particles currently in the arrays */ @@ -847,28 +856,25 @@ void write_output_serial(struct engine* e, const char* baseName, h_file = H5Fcreate(fileName, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); if (h_file < 0) error("Error while opening file '%s'.", fileName); - /* Open header to write simulation properties */ - /* message("Writing runtime parameters..."); */ - h_grp = H5Gcreate(h_file, "/RuntimePars", H5P_DEFAULT, H5P_DEFAULT, - H5P_DEFAULT); - if (h_grp < 0) error("Error while creating runtime parameters group\n"); - - /* Write the relevant information */ - io_write_attribute(h_grp, "PeriodicBoundariesOn", INT, &periodic, 1); - - /* Close runtime parameters */ - H5Gclose(h_grp); - /* Open header to write simulation properties */ /* message("Writing file header..."); */ h_grp = H5Gcreate(h_file, "/Header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (h_grp < 0) error("Error while creating file header\n"); + /* Convert basic output information to snapshot units */ + const double factor_time = + units_conversion_factor(internal_units, snapshot_units, UNIT_CONV_TIME); + const double factor_length = units_conversion_factor( + internal_units, snapshot_units, UNIT_CONV_LENGTH); + const double dblTime = e->time * factor_time; + const double dim[3] = {e->s->dim[0] * factor_length, + e->s->dim[1] * factor_length, + e->s->dim[2] * factor_length}; + /* Print the relevant information and print status */ - io_write_attribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3); - double dblTime = e->time; + io_write_attribute(h_grp, "BoxSize", DOUBLE, dim, 3); io_write_attribute(h_grp, "Time", DOUBLE, &dblTime, 1); - int dimension = (int)hydro_dimension; + const int dimension = (int)hydro_dimension; io_write_attribute(h_grp, "Dimension", INT, &dimension, 1); io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1); io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1); @@ -1031,6 +1037,32 @@ void write_output_serial(struct engine* e, const char* baseName, H5Fclose(h_file); } + /* Now write the top-level cell structure */ + hid_t h_file_cells = 0, h_grp_cells = 0; + if (mpi_rank == 0) { + + /* Open the snapshot on rank 0 */ + h_file_cells = H5Fopen(fileName, H5F_ACC_RDWR, H5P_DEFAULT); + if (h_file_cells < 0) + error("Error while opening file '%s' on rank %d.", fileName, mpi_rank); + + /* Create the group we want in the file */ + h_grp_cells = H5Gcreate(h_file_cells, "/Cells", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp_cells < 0) error("Error while creating cells group"); + } + + /* Write the location of the particles in the arrays */ + io_write_cell_offsets(h_grp_cells, e->s->cdim, e->s->cells_top, + e->s->nr_cells, e->s->width, mpi_rank, N_total, offset, + internal_units, snapshot_units); + + /* Close everything */ + if (mpi_rank == 0) { + H5Gclose(h_grp_cells); + H5Fclose(h_file_cells); + } + /* Now loop over ranks and write the data */ for (int rank = 0; rank < mpi_size; ++rank) { @@ -1068,6 +1100,7 @@ void write_output_serial(struct engine* e, const char* baseName, struct part* parts_written = NULL; struct xpart* xparts_written = NULL; struct gpart* gparts_written = NULL; + struct velociraptor_gpart_data* gpart_group_data_written = NULL; struct spart* sparts_written = NULL; /* Write particle fields from the particle structure */ @@ -1080,8 +1113,14 @@ void write_output_serial(struct engine* e, const char* baseName, Nparticles = Ngas; hydro_write_particles(parts, xparts, list, &num_fields); num_fields += chemistry_write_particles(parts, list + num_fields); - num_fields += cooling_write_particles( - parts, xparts, list + num_fields, e->cooling_func); + if (with_cooling || with_temperature) { + num_fields += cooling_write_particles( + parts, xparts, list + num_fields, e->cooling_func); + } + if (with_stf) { + num_fields += + velociraptor_write_parts(parts, xparts, list + num_fields); + } num_fields += tracers_write_particles( parts, xparts, list + num_fields, with_cosmology); num_fields += sftracers_write_particles( @@ -1109,9 +1148,15 @@ void write_output_serial(struct engine* e, const char* baseName, &num_fields); num_fields += chemistry_write_particles(parts_written, list + num_fields); - num_fields += - cooling_write_particles(parts_written, xparts_written, - list + num_fields, e->cooling_func); + if (with_cooling || with_temperature) { + num_fields += + cooling_write_particles(parts_written, xparts_written, + list + num_fields, e->cooling_func); + } + if (with_stf) { + num_fields += velociraptor_write_parts( + parts_written, xparts_written, list + num_fields); + } num_fields += tracers_write_particles(parts_written, xparts_written, list + num_fields, with_cosmology); @@ -1127,6 +1172,10 @@ void write_output_serial(struct engine* e, const char* baseName, /* This is a DM-only run without inhibited particles */ Nparticles = Ntot; darkmatter_write_particles(gparts, list, &num_fields); + if (with_stf) { + num_fields += velociraptor_write_gparts(e->s->gpart_group_data, + list + num_fields); + } } else { /* Ok, we need to fish out the particles we want */ @@ -1137,12 +1186,27 @@ void write_output_serial(struct engine* e, const char* baseName, Ndm_written * sizeof(struct gpart)) != 0) error("Error while allocating temporart memory for gparts"); + if (with_stf) { + if (posix_memalign( + (void**)&gpart_group_data_written, gpart_align, + Ndm_written * sizeof(struct velociraptor_gpart_data)) != + 0) + error( + "Error while allocating temporart memory for gparts STF " + "data"); + } + /* Collect the non-inhibited DM particles from gpart */ - io_collect_gparts_to_write(gparts, gparts_written, Ntot, - Ndm_written); + io_collect_gparts_to_write( + gparts, e->s->gpart_group_data, gparts_written, + gpart_group_data_written, Ntot, Ndm_written, with_stf); - /* Write DM particles */ + /* Select the fields to write */ darkmatter_write_particles(gparts_written, list, &num_fields); + if (with_stf) { + num_fields += velociraptor_write_gparts( + gpart_group_data_written, list + num_fields); + } } } break; @@ -1156,6 +1220,10 @@ void write_output_serial(struct engine* e, const char* baseName, chemistry_write_sparticles(sparts, list + num_fields); num_fields += tracers_write_sparticles(sparts, list + num_fields, with_cosmology); + if (with_stf) { + num_fields += + velociraptor_write_sparts(sparts, list + num_fields); + } } else { /* Ok, we need to fish out the particles we want */ @@ -1176,6 +1244,10 @@ void write_output_serial(struct engine* e, const char* baseName, chemistry_write_sparticles(sparts, list + num_fields); num_fields += tracers_write_sparticles(sparts, list + num_fields, with_cosmology); + if (with_stf) { + num_fields += velociraptor_write_sparts(sparts_written, + list + num_fields); + } } } break; @@ -1202,6 +1274,7 @@ void write_output_serial(struct engine* e, const char* baseName, if (parts_written) free(parts_written); if (xparts_written) free(xparts_written); if (gparts_written) free(gparts_written); + if (gpart_group_data_written) free(gpart_group_data_written); if (sparts_written) free(sparts_written); /* Close particle group */ diff --git a/src/single_io.c b/src/single_io.c index 110b5d95862ba6c3f3d5ff653395d09721957e07..a7a701d8bd3dc9371ece8f7dc298e2708741a163 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -55,6 +55,7 @@ #include "stars_io.h" #include "tracers_io.h" #include "units.h" +#include "velociraptor_io.h" #include "xmf.h" /** @@ -641,7 +642,6 @@ void write_output_single(struct engine* e, const char* baseName, const struct unit_system* snapshot_units) { hid_t h_file = 0, h_grp = 0; - int periodic = e->s->periodic; int numFiles = 1; const struct part* parts = e->s->parts; const struct xpart* xparts = e->s->xparts; @@ -649,6 +649,14 @@ void write_output_single(struct engine* e, const char* baseName, const struct spart* sparts = e->s->sparts; struct swift_params* params = e->parameter_file; const int with_cosmology = e->policy & engine_policy_cosmology; + const int with_cooling = e->policy & engine_policy_cooling; + const int with_temperature = e->policy & engine_policy_temperature; +#ifdef HAVE_VELOCIRAPTOR + const int with_stf = (e->policy & engine_policy_structure_finding) && + (e->s->gpart_group_data != NULL); +#else + const int with_stf = 0; +#endif /* Number of particles currently in the arrays */ const size_t Ntot = e->s->nr_gparts; @@ -700,28 +708,25 @@ void write_output_single(struct engine* e, const char* baseName, h_file = H5Fcreate(fileName, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); if (h_file < 0) error("Error while opening file '%s'.", fileName); - /* Open header to write simulation properties */ - /* message("Writing runtime parameters..."); */ - h_grp = - H5Gcreate(h_file, "/RuntimePars", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (h_grp < 0) error("Error while creating runtime parameters group\n"); - - /* Write the relevant information */ - io_write_attribute(h_grp, "PeriodicBoundariesOn", INT, &periodic, 1); - - /* Close runtime parameters */ - H5Gclose(h_grp); - /* Open header to write simulation properties */ /* message("Writing file header..."); */ h_grp = H5Gcreate(h_file, "/Header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (h_grp < 0) error("Error while creating file header\n"); + /* Convert basic output information to snapshot units */ + const double factor_time = + units_conversion_factor(internal_units, snapshot_units, UNIT_CONV_TIME); + const double factor_length = + units_conversion_factor(internal_units, snapshot_units, UNIT_CONV_LENGTH); + const double dblTime = e->time * factor_time; + const double dim[3] = {e->s->dim[0] * factor_length, + e->s->dim[1] * factor_length, + e->s->dim[2] * factor_length}; + /* Print the relevant information and print status */ - io_write_attribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3); - double dblTime = e->time; + io_write_attribute(h_grp, "BoxSize", DOUBLE, dim, 3); io_write_attribute(h_grp, "Time", DOUBLE, &dblTime, 1); - int dimension = (int)hydro_dimension; + const int dimension = (int)hydro_dimension; io_write_attribute(h_grp, "Dimension", INT, &dimension, 1); io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1); io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1); @@ -830,6 +835,17 @@ void write_output_single(struct engine* e, const char* baseName, /* Print the system of Units used internally */ io_write_unit_system(h_file, internal_units, "InternalCodeUnits"); + /* Now write the top-level cell structure */ + long long global_offsets[swift_type_count] = {0}; + h_grp = H5Gcreate(h_file, "/Cells", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_grp < 0) error("Error while creating cells group"); + + /* Write the location of the particles in the arrays */ + io_write_cell_offsets(h_grp, e->s->cdim, e->s->cells_top, e->s->nr_cells, + e->s->width, e->nodeID, N_total, global_offsets, + internal_units, snapshot_units); + H5Gclose(h_grp); + /* Tell the user if a conversion will be needed */ if (e->verbose) { if (units_are_equal(snapshot_units, internal_units)) { @@ -888,6 +904,7 @@ void write_output_single(struct engine* e, const char* baseName, struct part* parts_written = NULL; struct xpart* xparts_written = NULL; struct gpart* gparts_written = NULL; + struct velociraptor_gpart_data* gpart_group_data_written = NULL; struct spart* sparts_written = NULL; /* Write particle fields from the particle structure */ @@ -900,8 +917,14 @@ void write_output_single(struct engine* e, const char* baseName, N = Ngas; hydro_write_particles(parts, xparts, list, &num_fields); num_fields += chemistry_write_particles(parts, list + num_fields); - num_fields += cooling_write_particles( - parts, xparts, list + num_fields, e->cooling_func); + if (with_cooling || with_temperature) { + num_fields += cooling_write_particles( + parts, xparts, list + num_fields, e->cooling_func); + } + if (with_stf) { + num_fields += + velociraptor_write_parts(parts, xparts, list + num_fields); + } num_fields += tracers_write_particles( parts, xparts, list + num_fields, with_cosmology); num_fields += sftracers_write_particles( @@ -929,9 +952,15 @@ void write_output_single(struct engine* e, const char* baseName, &num_fields); num_fields += chemistry_write_particles(parts_written, list + num_fields); - num_fields += - cooling_write_particles(parts_written, xparts_written, - list + num_fields, e->cooling_func); + if (with_cooling || with_temperature) { + num_fields += + cooling_write_particles(parts_written, xparts_written, + list + num_fields, e->cooling_func); + } + if (with_stf) { + num_fields += velociraptor_write_parts( + parts_written, xparts_written, list + num_fields); + } num_fields += tracers_write_particles( parts_written, xparts_written, list + num_fields, with_cosmology); num_fields += sftracers_write_particles( @@ -945,6 +974,10 @@ void write_output_single(struct engine* e, const char* baseName, /* This is a DM-only run without inhibited particles */ N = Ntot; darkmatter_write_particles(gparts, list, &num_fields); + if (with_stf) { + num_fields += velociraptor_write_gparts(e->s->gpart_group_data, + list + num_fields); + } } else { /* Ok, we need to fish out the particles we want */ @@ -955,11 +988,26 @@ void write_output_single(struct engine* e, const char* baseName, Ndm_written * sizeof(struct gpart)) != 0) error("Error while allocating temporart memory for gparts"); + if (with_stf) { + if (posix_memalign( + (void**)&gpart_group_data_written, gpart_align, + Ndm_written * sizeof(struct velociraptor_gpart_data)) != 0) + error( + "Error while allocating temporart memory for gparts STF " + "data"); + } + /* Collect the non-inhibited DM particles from gpart */ - io_collect_gparts_to_write(gparts, gparts_written, Ntot, Ndm_written); + io_collect_gparts_to_write(gparts, e->s->gpart_group_data, + gparts_written, gpart_group_data_written, + Ntot, Ndm_written, with_stf); - /* Write DM particles */ + /* Select the fields to write */ darkmatter_write_particles(gparts_written, list, &num_fields); + if (with_stf) { + num_fields += velociraptor_write_gparts(gpart_group_data_written, + list + num_fields); + } } } break; @@ -972,6 +1020,9 @@ void write_output_single(struct engine* e, const char* baseName, num_fields += chemistry_write_sparticles(sparts, list + num_fields); num_fields += tracers_write_sparticles(sparts, list + num_fields, with_cosmology); + if (with_stf) { + num_fields += velociraptor_write_sparts(sparts, list + num_fields); + } } else { /* Ok, we need to fish out the particles we want */ @@ -992,6 +1043,10 @@ void write_output_single(struct engine* e, const char* baseName, chemistry_write_sparticles(sparts_written, list + num_fields); num_fields += tracers_write_sparticles( sparts_written, list + num_fields, with_cosmology); + if (with_stf) { + num_fields += + velociraptor_write_sparts(sparts_written, list + num_fields); + } } } break; @@ -1017,6 +1072,7 @@ void write_output_single(struct engine* e, const char* baseName, if (parts_written) free(parts_written); if (xparts_written) free(xparts_written); if (gparts_written) free(gparts_written); + if (gpart_group_data_written) free(gpart_group_data_written); if (sparts_written) free(sparts_written); /* Close particle group */ diff --git a/src/space.c b/src/space.c index 35aaffa66d5b921ec687350588ac91e3a52bb59f..d930bcdcabada454c86719b66eb029f4e231d6b3 100644 --- a/src/space.c +++ b/src/space.c @@ -189,6 +189,7 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->hydro.density = NULL; c->hydro.gradient = NULL; c->hydro.force = NULL; + c->hydro.limiter = NULL; c->grav.grav = NULL; c->grav.mm = NULL; c->hydro.dx_max_part = 0.0f; @@ -223,12 +224,12 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->kick1 = NULL; c->kick2 = NULL; c->timestep = NULL; + c->timestep_limiter = NULL; c->end_force = NULL; c->hydro.drift = NULL; c->grav.drift = NULL; c->grav.drift_out = NULL; c->hydro.cooling = NULL; - c->sourceterms = NULL; c->grav.long_range = NULL; c->grav.down_in = NULL; c->grav.down = NULL; @@ -244,6 +245,8 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->stars.do_sub_sort = 0; c->grav.do_sub_drift = 0; c->hydro.do_sub_drift = 0; + c->hydro.do_sub_limiter = 0; + c->hydro.do_limiter = 0; c->hydro.ti_end_min = -1; c->hydro.ti_end_max = -1; c->grav.ti_end_min = -1; @@ -272,12 +275,14 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->mpi.hydro.recv_gradient = NULL; c->mpi.grav.recv = NULL; c->mpi.recv_ti = NULL; + c->mpi.limiter.recv = NULL; c->mpi.hydro.send_xv = NULL; c->mpi.hydro.send_rho = NULL; c->mpi.hydro.send_gradient = NULL; c->mpi.grav.send = NULL; c->mpi.send_ti = NULL; + c->mpi.limiter.send = NULL; #endif } } @@ -2707,6 +2712,8 @@ void space_split_recursive(struct space *s, struct cell *c, cp->stars.do_sub_sort = 0; cp->grav.do_sub_drift = 0; cp->hydro.do_sub_drift = 0; + cp->hydro.do_sub_limiter = 0; + cp->hydro.do_limiter = 0; #ifdef WITH_MPI cp->mpi.tag = -1; #endif // WITH_MPI @@ -4301,6 +4308,49 @@ void space_check_timesteps(struct space *s) { #endif } +/** + * @brief #threadpool mapper function for the limiter debugging check + */ +void space_check_limiter_mapper(void *map_data, int nr_parts, + void *extra_data) { +#ifdef SWIFT_DEBUG_CHECKS + /* Unpack the data */ + struct part *restrict parts = (struct part *)map_data; + + /* Verify that all limited particles have been treated */ + for (int k = 0; k < nr_parts; k++) { + + if (parts[k].time_bin == time_bin_inhibited) continue; + + if (parts[k].wakeup == time_bin_awake) + error("Particle still woken up! id=%lld", parts[k].id); + + if (parts[k].gpart != NULL) + if (parts[k].time_bin != parts[k].gpart->time_bin) + error("Gpart not on the same time-bin as part"); + } +#else + error("Calling debugging code without debugging flag activated."); +#endif +} + +/** + * @brief Checks that all particles have their wakeup flag in a correct state. + * + * Should only be used for debugging purposes. + * + * @param s The #space to check. + */ +void space_check_limiter(struct space *s) { +#ifdef SWIFT_DEBUG_CHECKS + + threadpool_map(&s->e->threadpool, space_check_limiter_mapper, s->parts, + s->nr_parts, sizeof(struct part), 1000, NULL); +#else + error("Calling debugging code without debugging flag activated."); +#endif +} + /** * @brief Resets all the individual cell task counters to 0. * @@ -4384,7 +4434,6 @@ void space_struct_restore(struct space *s, FILE *stream) { s->local_cells_with_tasks_top = NULL; s->cells_with_particles_top = NULL; s->local_cells_with_particles_top = NULL; - s->grav_top_level = NULL; s->nr_local_cells_with_tasks = 0; s->nr_cells_with_particles = 0; #ifdef WITH_MPI diff --git a/src/space.h b/src/space.h index a1280945d2aa232cbb5e5b519266bc7058e5dc57..98ab2523668c9789bb644f0ebe300cf73ef6f182 100644 --- a/src/space.h +++ b/src/space.h @@ -35,6 +35,7 @@ #include "lock.h" #include "parser.h" #include "part.h" +#include "velociraptor_struct.h" /* Avoid cyclic inclusions */ struct cell; @@ -207,9 +208,6 @@ struct space { /*! The s-particle data (cells have pointers to this). */ struct spart *sparts; - /*! The top-level FFT task */ - struct task *grav_top_level; - /*! Minimal mass of all the #part */ float min_part_mass; @@ -237,6 +235,9 @@ struct space { /*! The associated engine. */ struct engine *e; + /*! The group information returned by VELOCIraptor for each #gpart. */ + struct velociraptor_gpart_data *gpart_group_data; + #ifdef WITH_MPI /*! Buffers for parts that we will receive from foreign cells. */ @@ -317,6 +318,7 @@ void space_check_drift_point(struct space *s, integertime_t ti_drift, void space_check_top_multipoles_drift_point(struct space *s, integertime_t ti_drift); void space_check_timesteps(struct space *s); +void space_check_limiter(struct space *s); void space_replicate(struct space *s, int replicate, int verbose); void space_generate_gas(struct space *s, const struct cosmology *cosmo, int periodic, const double dim[3], int verbose); diff --git a/src/swift_velociraptor_part.h b/src/swift_velociraptor_part.h index adae884c2f930c44edf4d48f47f168475bc65885..700842ac5a13e5bee4af15cc0d8726fc668ce421 100644 --- a/src/swift_velociraptor_part.h +++ b/src/swift_velociraptor_part.h @@ -21,7 +21,13 @@ #include "part_type.h" -/* SWIFT/VELOCIraptor particle. */ +/** + * @brief SWIFT/VELOCIraptor particle. + * + * This should match the structure Swift::swift_vel_part + * defined in the file NBodylib/src/NBody/SwiftParticle.h + * of the VELOCIraptor code. + */ struct swift_vel_part { /*! Particle ID. */ @@ -42,8 +48,18 @@ struct swift_vel_part { /*! Internal energy of gas particle */ float u; + /*! Temperature of a gas particle */ + float T; + /*! Type of the #gpart (DM, gas, star, ...) */ enum part_type type; + + /*! MPI rank on which this #gpart lives on the SWIFT side. */ + int task; + + /*! Index of this #gpart in the global array of this rank on the SWIFT + side. */ + int index; }; #endif /* SWIFT_VELOCIRAPTOR_PART_H */ diff --git a/src/task.c b/src/task.c index f16aadc8afb7a2f811c4790688fb849ba1601ce3..4d5695f64c81e710c39fcc460a642a0887856814 100644 --- a/src/task.c +++ b/src/task.c @@ -66,6 +66,7 @@ const char *taskID_names[task_type_count] = {"none", "kick1", "kick2", "timestep", + "timestep_limiter", "send", "recv", "grav_long_range", @@ -83,10 +84,10 @@ const char *taskID_names[task_type_count] = {"none", /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { - "none", "density", "gradient", "force", - "grav", "external_grav", "tend", "xv", - "rho", "gpart", "multipole", "spart", - "stars_density", "stars_feedback"}; + "none", "density", "gradient", "force", + "limiter", "grav", "external_grav", "tend", + "xv", "rho", "gpart", "multipole", + "spart", "stars_density", "stars_feedback"}; #ifdef WITH_MPI /* MPI communicators for the subtypes. */ @@ -140,6 +141,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( case task_type_sort: case task_type_ghost: case task_type_extra_ghost: + case task_type_timestep_limiter: case task_type_cooling: return task_action_part; break; @@ -161,6 +163,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( case task_subtype_density: case task_subtype_gradient: case task_subtype_force: + case task_subtype_limiter: return task_action_part; break; @@ -337,6 +340,8 @@ void task_unlock(struct task *t) { case task_type_drift_part: case task_type_sort: + case task_type_ghost: + case task_type_timestep_limiter: cell_unlocktree(ci); break; @@ -462,6 +467,8 @@ int task_lock(struct task *t) { case task_type_drift_part: case task_type_sort: + case task_type_ghost: + case task_type_timestep_limiter: if (ci->hydro.hold) return 0; if (cell_locktree(ci) != 0) return 0; break; @@ -655,6 +662,9 @@ void task_get_group_name(int type, int subtype, char *cluster) { case task_subtype_grav: strcpy(cluster, "Gravity"); break; + case task_subtype_limiter: + strcpy(cluster, "Timestep_limiter"); + break; case task_subtype_stars_density: strcpy(cluster, "Stars"); break; diff --git a/src/task.h b/src/task.h index a6782a6302e2f234f02d2b4e3052a11cb388dc31..100ac225bd5956e8d59d6a197c1257cb3e796ebb 100644 --- a/src/task.h +++ b/src/task.h @@ -58,6 +58,7 @@ enum task_types { task_type_kick1, task_type_kick2, task_type_timestep, + task_type_timestep_limiter, task_type_send, task_type_recv, task_type_grav_long_range, @@ -83,6 +84,7 @@ enum task_subtypes { task_subtype_density, task_subtype_gradient, task_subtype_force, + task_subtype_limiter, task_subtype_grav, task_subtype_external_grav, task_subtype_tend, diff --git a/src/timestep_limiter.h b/src/timestep_limiter.h new file mode 100644 index 0000000000000000000000000000000000000000..cfadc2e62a872a2d2a8a578fe6bb48fd24c5ba29 --- /dev/null +++ b/src/timestep_limiter.h @@ -0,0 +1,143 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_TIMESTEP_LIMITER_H +#define SWIFT_TIMESTEP_LIMITER_H + +/* Config parameters. */ +#include "../config.h" + +/** + * @brief Wakes up a particle by rewinding it's kick1 back in time and applying + * a new one such that the particle becomes active again in the next time-step. + * + * @param p The #part to update. + * @param xp Its #xpart companion. + * @param e The #engine (to extract time-line information). + */ +__attribute__((always_inline)) INLINE static integertime_t timestep_limit_part( + struct part *restrict p, struct xpart *restrict xp, + const struct engine *e) { + + const struct cosmology *cosmo = e->cosmology; + const int with_cosmology = e->policy & engine_policy_cosmology; + const double time_base = e->time_base; + + integertime_t old_ti_beg, old_ti_end; + timebin_t old_time_bin; + + /* Let's see when this particle started and used to end */ + if (p->wakeup == time_bin_awake) { + + /* Normal case */ + old_ti_beg = get_integer_time_begin(e->ti_current, p->time_bin); + old_ti_end = get_integer_time_end(e->ti_current, p->time_bin); + old_time_bin = p->time_bin; + } else { + + /* Particle that was limited in the previous step already */ + old_ti_beg = get_integer_time_begin(e->ti_current, -p->wakeup); + old_ti_end = get_integer_time_end(e->ti_current, p->time_bin); + old_time_bin = -p->wakeup; + } + + const integertime_t old_dti = old_ti_end - old_ti_beg; + + /* The new fake time-step the particle will be on */ + const integertime_t new_fake_ti_step = + get_integer_timestep(e->min_active_bin); + + /* The actual time-step size this particle will use */ + const integertime_t new_ti_beg = old_ti_beg; + const integertime_t new_ti_end = e->ti_current + new_fake_ti_step; + const integertime_t new_dti = new_ti_end - new_ti_beg; + +#ifdef SWIFT_DEBUG_CHECKS + /* Some basic safety checks */ + if (old_ti_beg >= e->ti_current) + error( + "Incorrect value for old time-step beginning ti_current=%lld, " + "old_ti_beg=%lld", + e->ti_current, old_ti_beg); + + if (old_ti_end <= e->ti_current) + error( + "Incorrect value for old time-step end ti_current=%lld, " + "old_ti_end=%lld", + e->ti_current, old_ti_end); + + if (new_ti_end > old_ti_end) error("New end of time-step after the old one"); + + if (new_dti > old_dti) error("New time-step larger than old one"); + + if (new_fake_ti_step == 0) error("Wakeup call too early"); +#endif + + double dt_kick_grav = 0., dt_kick_hydro = 0., dt_kick_therm = 0., + dt_kick_corr = 0.; + + /* Now we need to reverse the kick1... (the dt are negative here) */ + if (with_cosmology) { + dt_kick_hydro = -cosmology_get_hydro_kick_factor(cosmo, old_ti_beg, + old_ti_beg + old_dti / 2); + dt_kick_grav = -cosmology_get_grav_kick_factor(cosmo, old_ti_beg, + old_ti_beg + old_dti / 2); + dt_kick_therm = -cosmology_get_therm_kick_factor(cosmo, old_ti_beg, + old_ti_beg + old_dti / 2); + dt_kick_corr = -cosmology_get_corr_kick_factor(cosmo, old_ti_beg, + old_ti_beg + old_dti / 2); + } else { + dt_kick_hydro = -(old_dti / 2) * time_base; + dt_kick_grav = -(old_dti / 2) * time_base; + dt_kick_therm = -(old_dti / 2) * time_base; + dt_kick_corr = -(old_dti / 2) * time_base; + } + kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, dt_kick_corr, + e->cosmology, e->hydro_properties, old_ti_beg + old_dti / 2, + old_ti_beg); + + /* ...and apply the new one (dt is positiive) */ + if (with_cosmology) { + dt_kick_hydro = cosmology_get_hydro_kick_factor(cosmo, new_ti_beg, + new_ti_beg + new_dti / 2); + dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, new_ti_beg, + new_ti_beg + new_dti / 2); + dt_kick_therm = cosmology_get_therm_kick_factor(cosmo, new_ti_beg, + new_ti_beg + new_dti / 2); + dt_kick_corr = cosmology_get_corr_kick_factor(cosmo, new_ti_beg, + new_ti_beg + new_dti / 2); + } else { + dt_kick_hydro = (new_dti / 2) * time_base; + dt_kick_grav = (new_dti / 2) * time_base; + dt_kick_therm = (new_dti / 2) * time_base; + dt_kick_corr = (new_dti / 2) * time_base; + } + kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, dt_kick_corr, + e->cosmology, e->hydro_properties, new_ti_beg, + new_ti_beg + new_dti / 2); + + /* Remember the old time-bin */ + p->wakeup = old_time_bin; + + /* Update the time bin of this particle */ + p->time_bin = e->min_active_bin; + + return new_fake_ti_step; +} + +#endif /* SWIFT_TIMESTEP_LIMITER_H */ diff --git a/src/tools.c b/src/tools.c index c0400aa7b42322fce276a5e788af7bcb9e7f3625..ca531671a7a2522eab760c1eb4896a6bd522a073 100644 --- a/src/tools.c +++ b/src/tools.c @@ -217,7 +217,7 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { } /* Hit or miss? */ - if (r2 < hig2) { + if (r2 < hig2 && !part_is_inhibited(pj, e)) { /* Interact */ runner_iact_nonsym_density(r2, dx, hi, pj->h, pi, pj, a, H); @@ -249,7 +249,7 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { } /* Hit or miss? */ - if (r2 < hjg2) { + if (r2 < hjg2 && !part_is_inhibited(pi, e)) { /* Interact */ runner_iact_nonsym_density(r2, dx, hj, pi->h, pj, pi, a, H); @@ -438,7 +438,7 @@ void self_all_density(struct runner *r, struct cell *ci) { } /* Hit or miss? */ - if (r2 < hig2 && part_is_active(pi, e)) { + if (r2 < hig2 && part_is_active(pi, e) && !part_is_inhibited(pj, e)) { /* Interact */ runner_iact_nonsym_density(r2, dxi, hi, hj, pi, pj, a, H); @@ -446,7 +446,7 @@ void self_all_density(struct runner *r, struct cell *ci) { } /* Hit or miss? */ - if (r2 < hjg2 && part_is_active(pj, e)) { + if (r2 < hjg2 && part_is_active(pj, e) && !part_is_inhibited(pi, e)) { dxi[0] = -dxi[0]; dxi[1] = -dxi[1]; diff --git a/src/tracers/EAGLE/tracers_io.h b/src/tracers/EAGLE/tracers_io.h index c593524c31d652e9aeb586a838d39d7eef7668ad..0b0e372ffc35ec5729affd4f6a86e358c914ca6d 100644 --- a/src/tracers/EAGLE/tracers_io.h +++ b/src/tracers/EAGLE/tracers_io.h @@ -65,7 +65,7 @@ __attribute__((always_inline)) INLINE static int tracers_write_particles( } else { - list[1] = io_make_output_field("MaxTemperature time", FLOAT, 1, + list[1] = io_make_output_field("Maximal Temperature time", FLOAT, 1, UNIT_CONV_NO_UNITS, xparts, tracers_data.maximum_temperature_time); } diff --git a/src/velociraptor_dummy.c b/src/velociraptor_dummy.c index 8f14a3230d341993122f09f2bccf3d8232550fd9..36cb65bfbe6931464f33d7e4b641f8882fdf65d0 100644 --- a/src/velociraptor_dummy.c +++ b/src/velociraptor_dummy.c @@ -20,9 +20,6 @@ /* Config parameters. */ #include "../config.h" -/* Some standard headers. */ -#include <stddef.h> - /* Local includes. */ #include "error.h" #include "swift_velociraptor_part.h" @@ -36,19 +33,41 @@ struct unitinfo {}; struct cell_loc {}; struct siminfo {}; +/* int InitVelociraptor(char *config_name, char *output_name, struct cosmoinfo cosmo_info, struct unitinfo unit_info, - struct siminfo sim_info) { + struct siminfo sim_info, const int numthreads) { error("This is only a dummy. Call the real one!"); return 0; } + int InvokeVelociraptor(const size_t num_gravity_parts, - const size_t num_hydro_parts, + const size_t num_hydro_parts, const int snapnum, struct swift_vel_part *swift_parts, - const int *cell_node_ids, char *output_name) { + const int *cell_node_ids, char *output_name, + const int numthreads) { + + error("This is only a dummy. Call the real one!"); + return 0; +} +*/ +int InitVelociraptor(char *config_name, struct unitinfo unit_info, + struct siminfo sim_info, const int numthreads) { error("This is only a dummy. Call the real one!"); return 0; } + +struct groupinfo *InvokeVelociraptor( + const int snapnum, char *output_name, struct cosmoinfo cosmo_info, + struct siminfo sim_info, const size_t num_gravity_parts, + const size_t num_hydro_parts, const size_t num_star_parts, + struct swift_vel_part *swift_parts, const int *cell_node_ids, + const int numthreads, const int return_group_flags, + int *const num_in_groups) { + error("This is only a dummy. Call the real one!"); + return 0; +} + #endif /* HAVE_DUMMY_VELOCIRAPTOR */ diff --git a/src/velociraptor_interface.c b/src/velociraptor_interface.c index 7756fe4b937986c108d223c56183f7d31cdfaa98..1049c4730e996112c9b4dc88effad3732af9025d 100644 --- a/src/velociraptor_interface.c +++ b/src/velociraptor_interface.c @@ -21,21 +21,23 @@ #include "../config.h" /* Some standard headers. */ -#include <errno.h> #include <unistd.h> /* This object's header. */ #include "velociraptor_interface.h" /* Local includes. */ -#include "common_io.h" +#include "cooling.h" #include "engine.h" #include "hydro.h" #include "swift_velociraptor_part.h" +#include "velociraptor_struct.h" #ifdef HAVE_VELOCIRAPTOR -/* Structure for passing cosmological information to VELOCIraptor. */ +/** + * @brief Structure for passing cosmological information to VELOCIraptor. + */ struct cosmoinfo { /*! Current expansion factor of the Universe. (cosmology.a) */ @@ -47,6 +49,15 @@ struct cosmoinfo { /*! Matter density parameter (cosmology.Omega_m) */ double Omega_m; + /*! Radiation density parameter (cosmology.Omega_r) */ + double Omega_r; + + /*! Neutrino density parameter (0 in SWIFT) */ + double Omega_nu; + + /*! Neutrino density parameter (cosmology.Omega_k) */ + double Omega_k; + /*! Baryon density parameter (cosmology.Omega_b) */ double Omega_b; @@ -60,19 +71,21 @@ struct cosmoinfo { double w_de; }; -/* Structure for passing unit information to VELOCIraptor. */ +/** + * @brief Structure for passing unit information to VELOCIraptor. + */ struct unitinfo { - /* Length conversion factor to kpc. */ + /*! Length conversion factor to kpc. */ double lengthtokpc; - /* Velocity conversion factor to km/s. */ + /*! Velocity conversion factor to km/s. */ double velocitytokms; - /* Mass conversion factor to solar masses. */ + /*! Mass conversion factor to solar masses. */ double masstosolarmass; - /* Potential conversion factor. */ + /*! Potential conversion factor to (km/s)^2. */ double energyperunitmass; /*! Newton's gravitationl constant (phys_const.const_newton_G)*/ @@ -82,18 +95,34 @@ struct unitinfo { double hubbleunit; }; -/* Structure to hold the location of a top-level cell. */ +/** + * @brief Structure to hold the location of a top-level cell. + */ struct cell_loc { - /* Coordinates x,y,z */ + /*! Coordinates x,y,z */ double loc[3]; }; -/* Structure for passing simulation information to VELOCIraptor. */ +/** + * @brief Structure for passing simulation information to VELOCIraptor for a + * given call. + */ struct siminfo { - double period, zoomhigresolutionmass, interparticlespacing, spacedimension[3]; - /* Number of top-cells. */ + /*! Size of periodic replications */ + double period; + + /*! Mass of the high-resolution DM particles in a zoom-in run. */ + double zoomhigresolutionmass; + + /*! Mean inter-particle separation of the DM particles */ + double interparticlespacing; + + /*! Spacial extent of the simulation volume */ + double spacedimension[3]; + + /*! Number of top-level cells. */ int numcells; /*! Locations of top-level cells. */ @@ -105,142 +134,135 @@ struct siminfo { /*! Inverse of the top-level cell width. */ double icellwidth[3]; + /*! Holds the node ID of each top-level cell. */ + int *cellnodeids; + + /*! Is this a cosmological simulation? */ int icosmologicalsim; + + /*! Is this a zoom-in simulation? */ + int izoomsim; + + /*! Do we have DM particles? */ + int idarkmatter; + + /*! Do we have gas particles? */ + int igas; + + /*! Do we have star particles? */ + int istar; + + /*! Do we have BH particles? */ + int ibh; + + /*! Do we have other particles? */ + int iother; }; -/* VELOCIraptor interface. */ -int InitVelociraptor(char *config_name, char *output_name, - struct cosmoinfo cosmo_info, struct unitinfo unit_info, - struct siminfo sim_info); -int InvokeVelociraptor(const size_t num_gravity_parts, - const size_t num_hydro_parts, - struct swift_vel_part *swift_parts, - const int *cell_node_ids, char *output_name); +/** + * @brief Structure for group information back to swift + */ +struct groupinfo { + + /*! Index of a #gpart in the global array on this MPI rank */ + int index; + + /*! Group number of the #gpart. */ + long long groupID; +}; + +int InitVelociraptor(char *config_name, struct unitinfo unit_info, + struct siminfo sim_info, const int numthreads); + +struct groupinfo *InvokeVelociraptor( + const int snapnum, char *output_name, struct cosmoinfo cosmo_info, + struct siminfo sim_info, const size_t num_gravity_parts, + const size_t num_hydro_parts, const size_t num_star_parts, + struct swift_vel_part *swift_parts, const int *cell_node_ids, + const int numthreads, const int return_group_flags, + int *const num_in_groups); #endif /* HAVE_VELOCIRAPTOR */ /** - * @brief Initialise VELOCIraptor with input and output file names along with - * cosmological info needed to run. + * @brief Initialise VELOCIraptor with configuration, units, + * simulation info needed to run. * * @param e The #engine. - * */ void velociraptor_init(struct engine *e) { #ifdef HAVE_VELOCIRAPTOR - struct space *s = e->s; - struct cosmoinfo cosmo_info; - struct unitinfo unit_info; - struct siminfo sim_info; - - /* Set cosmological constants. */ - cosmo_info.atime = e->cosmology->a; - cosmo_info.littleh = e->cosmology->h; - cosmo_info.Omega_m = e->cosmology->Omega_m; - cosmo_info.Omega_b = e->cosmology->Omega_b; - cosmo_info.Omega_Lambda = e->cosmology->Omega_lambda; - cosmo_info.Omega_cdm = e->cosmology->Omega_m - e->cosmology->Omega_b; - cosmo_info.w_de = e->cosmology->w; + const ticks tic = getticks(); - message("Scale factor: %e", cosmo_info.atime); - message("Little h: %e", cosmo_info.littleh); - message("Omega_m: %e", cosmo_info.Omega_m); - message("Omega_b: %e", cosmo_info.Omega_b); - message("Omega_Lambda: %e", cosmo_info.Omega_Lambda); - message("Omega_cdm: %e", cosmo_info.Omega_cdm); - message("w_de: %e", cosmo_info.w_de); + /* Internal SWIFT units */ + const struct unit_system *swift_us = e->internal_units; - if (e->cosmology->w != -1.) - error("w_de is not 1. It is: %lf", e->cosmology->w); + /* CGS units and physical constants in CGS */ + struct unit_system cgs_us; + units_init_cgs(&cgs_us); + struct phys_const cgs_pc; + phys_const_init(&cgs_us, /*params=*/NULL, &cgs_pc); /* Set unit conversions. */ - unit_info.lengthtokpc = 1.0; - unit_info.velocitytokms = 1.0; - unit_info.masstosolarmass = 1.0; - unit_info.energyperunitmass = 1.0; + struct unitinfo unit_info; + unit_info.lengthtokpc = + units_cgs_conversion_factor(swift_us, UNIT_CONV_LENGTH) / + (1000. * cgs_pc.const_parsec); + unit_info.velocitytokms = + units_cgs_conversion_factor(swift_us, UNIT_CONV_VELOCITY) / 1.0e5; + unit_info.masstosolarmass = + units_cgs_conversion_factor(swift_us, UNIT_CONV_MASS) / + cgs_pc.const_solar_mass; + unit_info.energyperunitmass = + units_cgs_conversion_factor(swift_us, UNIT_CONV_ENERGY_PER_UNIT_MASS) / + (1.0e10); unit_info.gravity = e->physical_constants->const_newton_G; unit_info.hubbleunit = e->cosmology->H0 / e->cosmology->h; - message("Length conversion factor: %e", unit_info.lengthtokpc); - message("Velocity conversion factor: %e", unit_info.velocitytokms); - message("Mass conversion factor: %e", unit_info.masstosolarmass); - message("Potential conversion factor: %e", unit_info.energyperunitmass); - message("G: %e", unit_info.gravity); - message("H: %e", unit_info.hubbleunit); - - /* TODO: Find the total number of DM particles when running with star - * particles and BHs. */ - const int total_nr_dmparts = e->total_nr_gparts - e->total_nr_parts; + /* Gather some information about the simulation */ + struct siminfo sim_info; - /* Set simulation information. */ - if (e->s->periodic) { - sim_info.period = - unit_info.lengthtokpc * - s->dim[0]; /* Physical size of box in VELOCIraptor units (kpc). */ - } else - sim_info.period = 0.0; - sim_info.zoomhigresolutionmass = -1.0; /* Placeholder. */ - sim_info.interparticlespacing = sim_info.period / cbrt(total_nr_dmparts); - if (e->policy & engine_policy_cosmology) + /* Are we running with cosmology? */ + if (e->policy & engine_policy_cosmology) { sim_info.icosmologicalsim = 1; - else + } else { sim_info.icosmologicalsim = 0; - sim_info.spacedimension[0] = unit_info.lengthtokpc * s->dim[0]; - sim_info.spacedimension[1] = unit_info.lengthtokpc * s->dim[1]; - sim_info.spacedimension[2] = unit_info.lengthtokpc * s->dim[2]; - sim_info.numcells = s->nr_cells; - - sim_info.cellwidth[0] = unit_info.lengthtokpc * s->cells_top[0].width[0]; - sim_info.cellwidth[1] = unit_info.lengthtokpc * s->cells_top[0].width[1]; - sim_info.cellwidth[2] = unit_info.lengthtokpc * s->cells_top[0].width[2]; - - sim_info.icellwidth[0] = s->iwidth[0] / unit_info.lengthtokpc; - sim_info.icellwidth[1] = s->iwidth[1] / unit_info.lengthtokpc; - sim_info.icellwidth[2] = s->iwidth[2] / unit_info.lengthtokpc; - - /* Only allocate cell location array on first call to velociraptor_init(). */ - if (e->cell_loc == NULL) { - /* Allocate and populate top-level cell locations. */ - if (posix_memalign((void **)&(e->cell_loc), 32, - s->nr_cells * sizeof(struct cell_loc)) != 0) - error("Failed to allocate top-level cell locations for VELOCIraptor."); - - for (int i = 0; i < s->nr_cells; i++) { - e->cell_loc[i].loc[0] = unit_info.lengthtokpc * s->cells_top[i].loc[0]; - e->cell_loc[i].loc[1] = unit_info.lengthtokpc * s->cells_top[i].loc[1]; - e->cell_loc[i].loc[2] = unit_info.lengthtokpc * s->cells_top[i].loc[2]; - } } - - sim_info.cell_loc = e->cell_loc; - - char configfilename[PARSER_MAX_LINE_SIZE], - outputFileName[PARSER_MAX_LINE_SIZE + 128]; - parser_get_param_string(e->parameter_file, - "StructureFinding:config_file_name", configfilename); - snprintf(outputFileName, PARSER_MAX_LINE_SIZE + 128, "%s.VELOCIraptor", - e->stfBaseName); - - message("Config file name: %s", configfilename); - message("Period: %e", sim_info.period); - message("Zoom high res mass: %e", sim_info.zoomhigresolutionmass); - message("Inter-particle spacing: %e", sim_info.interparticlespacing); - message("Cosmological: %d", sim_info.icosmologicalsim); - message("Space dimensions: (%e,%e,%e)", sim_info.spacedimension[0], - sim_info.spacedimension[1], sim_info.spacedimension[2]); - message("No. of top-level cells: %d", sim_info.numcells); - message("Top-level cell locations range: (%e,%e,%e) -> (%e,%e,%e)", - sim_info.cell_loc[0].loc[0], sim_info.cell_loc[0].loc[1], - sim_info.cell_loc[0].loc[2], - sim_info.cell_loc[sim_info.numcells - 1].loc[0], - sim_info.cell_loc[sim_info.numcells - 1].loc[1], - sim_info.cell_loc[sim_info.numcells - 1].loc[2]); + sim_info.izoomsim = 0; + + /* Tell VELOCIraptor what we have in the simulation */ + sim_info.idarkmatter = (e->total_nr_gparts - e->total_nr_parts > 0); + sim_info.igas = (e->policy & engine_policy_hydro); + sim_info.istar = (e->policy & engine_policy_stars); + sim_info.ibh = 0; // sim_info.ibh = (e->policy&engine_policy_bh); + sim_info.iother = 0; + + /* Be nice, talk! */ + if (e->verbose) { + message("VELOCIraptor conf: Length conversion factor: %e", + unit_info.lengthtokpc); + message("VELOCIraptor conf: Velocity conversion factor: %e", + unit_info.velocitytokms); + message("VELOCIraptor conf: Mass conversion factor: %e", + unit_info.masstosolarmass); + message("VELOCIraptor conf: Internal energy conversion factor: %e", + unit_info.energyperunitmass); + message("VELOCIraptor conf: G: %e", unit_info.gravity); + message("VELOCIraptor conf: H0/h: %e", unit_info.hubbleunit); + message("VELOCIraptor conf: Config file name: %s", e->stf_config_file_name); + message("VELOCIraptor conf: Cosmological Simulation: %d", + sim_info.icosmologicalsim); + } /* Initialise VELOCIraptor. */ - if (!InitVelociraptor(configfilename, outputFileName, cosmo_info, unit_info, - sim_info)) - error("Exiting. VELOCIraptor initialisation failed."); + if (InitVelociraptor(e->stf_config_file_name, unit_info, sim_info, + e->nr_threads) != 1) + error("VELOCIraptor initialisation failed."); + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); #else error("SWIFT not configure to run with VELOCIraptor."); #endif /* HAVE_VELOCIRAPTOR */ @@ -250,118 +272,287 @@ void velociraptor_init(struct engine *e) { * @brief Run VELOCIraptor with current particle data. * * @param e The #engine. - * + * @param linked_with_snap Are we running at the same time as a snapshot dump? */ -void velociraptor_invoke(struct engine *e) { +void velociraptor_invoke(struct engine *e, const int linked_with_snap) { #ifdef HAVE_VELOCIRAPTOR - struct space *s = e->s; - struct gpart *gparts = s->gparts; - struct part *parts = s->parts; - struct xpart *xparts = s->xparts; + + const struct cosmology *cosmo = e->cosmology; + const struct hydro_props *hydro_props = e->hydro_properties; + const struct unit_system *us = e->internal_units; + const struct phys_const *phys_const = e->physical_constants; + const struct cooling_function_data *cool_func = e->cooling_func; + + /* Handle on the particles */ + const struct space *s = e->s; + const struct part *parts = s->parts; + const struct xpart *xparts = s->xparts; + const struct gpart *gparts = s->gparts; + const struct spart *sparts = s->sparts; const size_t nr_gparts = s->nr_gparts; - const size_t nr_hydro_parts = s->nr_parts; + const size_t nr_parts = s->nr_parts; + const size_t nr_sparts = s->nr_sparts; const int nr_cells = s->nr_cells; - int *cell_node_ids = NULL; - static int stf_output_count = 0; + + const ticks tic = getticks(); /* Allow thread to run on any core for the duration of the call to - * VELOCIraptor so that - * when OpenMP threads are spawned they can run on any core on the processor. - */ + * VELOCIraptor so that when OpenMP threads are spawned + * they can run on any core on the processor. */ const int nr_cores = sysconf(_SC_NPROCESSORS_ONLN); - cpu_set_t cpuset; pthread_t thread = pthread_self(); /* Set affinity mask to include all cores on the CPU for VELOCIraptor. */ + cpu_set_t cpuset; CPU_ZERO(&cpuset); for (int j = 0; j < nr_cores; j++) CPU_SET(j, &cpuset); - pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); - ticks tic = getticks(); + /* Set cosmology information for this point in time */ + struct cosmoinfo cosmo_info; + cosmo_info.atime = e->cosmology->a; + cosmo_info.littleh = e->cosmology->h; + cosmo_info.Omega_m = e->cosmology->Omega_m; + cosmo_info.Omega_b = e->cosmology->Omega_b; + cosmo_info.Omega_r = e->cosmology->Omega_r; + cosmo_info.Omega_k = e->cosmology->Omega_k; + cosmo_info.Omega_nu = 0.; + cosmo_info.Omega_Lambda = e->cosmology->Omega_lambda; + cosmo_info.Omega_cdm = e->cosmology->Omega_m - e->cosmology->Omega_b; + cosmo_info.w_de = e->cosmology->w; + + /* Report the cosmo info we use */ + if (e->verbose) { + message("VELOCIraptor conf: Scale factor: %e", cosmo_info.atime); + message("VELOCIraptor conf: Little h: %e", cosmo_info.littleh); + message("VELOCIraptor conf: Omega_m: %e", cosmo_info.Omega_m); + message("VELOCIraptor conf: Omega_b: %e", cosmo_info.Omega_b); + message("VELOCIraptor conf: Omega_Lambda: %e", cosmo_info.Omega_Lambda); + message("VELOCIraptor conf: Omega_cdm: %e", cosmo_info.Omega_cdm); + message("VELOCIraptor conf: w_de: %e", cosmo_info.w_de); + } + + /* Update the simulation information */ + struct siminfo sim_info; + + /* Period of the box (Note we assume a cubic box!) */ + if (e->s->periodic) { + sim_info.period = s->dim[0]; + } else { + sim_info.period = 0.0; + } + + /* Tell VELOCIraptor this is not a zoom-in simulation */ + sim_info.zoomhigresolutionmass = -1.0; + + /* Are we running with cosmology? */ + if (e->policy & engine_policy_cosmology) { + sim_info.icosmologicalsim = 1; + sim_info.izoomsim = 0; + const size_t total_nr_baryons = e->total_nr_parts + e->total_nr_sparts; + const size_t total_nr_dmparts = e->total_nr_gparts - total_nr_baryons; + sim_info.interparticlespacing = sim_info.period / cbrt(total_nr_dmparts); + } else { + sim_info.icosmologicalsim = 0; + sim_info.izoomsim = 0; + sim_info.interparticlespacing = -1; + } + + /* Set the spatial extent of the simulation volume */ + sim_info.spacedimension[0] = s->dim[0]; + sim_info.spacedimension[1] = s->dim[1]; + sim_info.spacedimension[2] = s->dim[2]; + + /* Store number of top-level cells */ + sim_info.numcells = s->nr_cells; + + /* Size and inverse size of the top-level cells in VELOCIraptor units */ + sim_info.cellwidth[0] = s->cells_top[0].width[0]; + sim_info.cellwidth[1] = s->cells_top[0].width[1]; + sim_info.cellwidth[2] = s->cells_top[0].width[2]; + sim_info.icellwidth[0] = s->iwidth[0]; + sim_info.icellwidth[1] = s->iwidth[1]; + sim_info.icellwidth[2] = s->iwidth[2]; + + /* Copy the poisiton of the top-level cells */ + if (posix_memalign((void **)&sim_info.cell_loc, 32, + s->nr_cells * sizeof(struct cell_loc)) != 0) + error("Failed to allocate top-level cell locations for VELOCIraptor."); + for (int i = 0; i < s->nr_cells; i++) { + sim_info.cell_loc[i].loc[0] = s->cells_top[i].loc[0]; + sim_info.cell_loc[i].loc[1] = s->cells_top[i].loc[1]; + sim_info.cell_loc[i].loc[2] = s->cells_top[i].loc[2]; + } + + if (e->verbose) { + message("VELOCIraptor conf: Space dimensions: (%e,%e,%e)", + sim_info.spacedimension[0], sim_info.spacedimension[1], + sim_info.spacedimension[2]); + message("VELOCIraptor conf: No. of top-level cells: %d", sim_info.numcells); + message( + "VELOCIraptor conf: Top-level cell locations range: (%e,%e,%e) -> " + "(%e,%e,%e)", + sim_info.cell_loc[0].loc[0], sim_info.cell_loc[0].loc[1], + sim_info.cell_loc[0].loc[2], + sim_info.cell_loc[sim_info.numcells - 1].loc[0], + sim_info.cell_loc[sim_info.numcells - 1].loc[1], + sim_info.cell_loc[sim_info.numcells - 1].loc[2]); + } /* Allocate and populate array of cell node IDs. */ + int *cell_node_ids = NULL; if (posix_memalign((void **)&cell_node_ids, 32, nr_cells * sizeof(int)) != 0) error("Failed to allocate list of cells node IDs for VELOCIraptor."); - for (int i = 0; i < nr_cells; i++) cell_node_ids[i] = s->cells_top[i].nodeID; - message("MPI rank %d sending %zu gparts to VELOCIraptor.", engine_rank, - nr_gparts); + /* Mention the number of particles being sent */ + if (e->verbose) + message( + "VELOCIraptor conf: MPI rank %d sending %zu gparts to VELOCIraptor.", + engine_rank, nr_gparts); - /* Append base name with either the step number or time depending on what - * format is specified in the parameter file. */ + /* Append base name with the current output number */ char outputFileName[PARSER_MAX_LINE_SIZE + 128]; - if (e->stf_output_freq_format == io_stf_steps) { - snprintf(outputFileName, PARSER_MAX_LINE_SIZE + 128, "%s_%04i.VELOCIraptor", - e->stfBaseName, e->step); - } else if (e->stf_output_freq_format == io_stf_time) { + + /* What should the filename be? */ + if (linked_with_snap) { + snprintf(outputFileName, PARSER_MAX_LINE_SIZE + 128, + "stf_%s_%04i.VELOCIraptor", e->snapshot_base_name, + e->snapshot_output_count); + } else { snprintf(outputFileName, PARSER_MAX_LINE_SIZE + 128, "%s_%04i.VELOCIraptor", - e->stfBaseName, stf_output_count); + e->stf_base_name, e->stf_output_count); + } + + /* What is the snapshot number? */ + int snapnum; + if (linked_with_snap) { + snapnum = e->snapshot_output_count; + } else { + snapnum = e->stf_output_count; } /* Allocate and populate an array of swift_vel_parts to be passed to * VELOCIraptor. */ struct swift_vel_part *swift_parts = NULL; - if (posix_memalign((void **)&swift_parts, part_align, nr_gparts * sizeof(struct swift_vel_part)) != 0) error("Failed to allocate array of particles for VELOCIraptor."); - bzero(swift_parts, nr_gparts * sizeof(struct swift_vel_part)); - - const float energy_scale = 1.0; - const float a = e->cosmology->a; - - message("Energy scaling factor: %f", energy_scale); - message("a: %f", a); + const float a_inv = e->cosmology->a_inv; - /* Convert particle properties into VELOCIraptor units */ + /* Convert particle properties into VELOCIraptor units. + * VELOCIraptor wants: + * - Co-moving positions, + * - Peculiar velocities, + * - Co-moving potential, + * - Physical internal energy (for the gas), + * - Temperatures (for the gas). + */ for (size_t i = 0; i < nr_gparts; i++) { + swift_parts[i].x[0] = gparts[i].x[0]; swift_parts[i].x[1] = gparts[i].x[1]; swift_parts[i].x[2] = gparts[i].x[2]; - swift_parts[i].v[0] = gparts[i].v_full[0] / a; - swift_parts[i].v[1] = gparts[i].v_full[1] / a; - swift_parts[i].v[2] = gparts[i].v_full[2] / a; + + swift_parts[i].v[0] = gparts[i].v_full[0] * a_inv; + swift_parts[i].v[1] = gparts[i].v_full[1] * a_inv; + swift_parts[i].v[2] = gparts[i].v_full[2] * a_inv; + swift_parts[i].mass = gravity_get_mass(&gparts[i]); swift_parts[i].potential = gravity_get_comoving_potential(&gparts[i]); + swift_parts[i].type = gparts[i].type; + swift_parts[i].index = i; +#ifdef WITH_MPI + swift_parts[i].task = e->nodeID; +#else + swift_parts[i].task = 0; +#endif + /* Set gas particle IDs from their hydro counterparts and set internal * energies. */ - if (gparts[i].type == swift_type_gas) { - swift_parts[i].id = parts[-gparts[i].id_or_neg_offset].id; - swift_parts[i].u = - hydro_get_physical_internal_energy( - &parts[-gparts[i].id_or_neg_offset], - &xparts[-gparts[i].id_or_neg_offset], e->cosmology) * - energy_scale; - } else if (gparts[i].type == swift_type_dark_matter) { - swift_parts[i].id = gparts[i].id_or_neg_offset; - swift_parts[i].u = 0.f; - } else { - error("Particle type not handled by velociraptor (yet?) !"); + switch (gparts[i].type) { + + case swift_type_gas: { + const struct part *p = &parts[-gparts[i].id_or_neg_offset]; + const struct xpart *xp = &xparts[-gparts[i].id_or_neg_offset]; + + swift_parts[i].id = parts[-gparts[i].id_or_neg_offset].id; + swift_parts[i].u = hydro_get_drifted_physical_internal_energy(p, cosmo); + swift_parts[i].T = cooling_get_temperature(phys_const, hydro_props, us, + cosmo, cool_func, p, xp); + } break; + + case swift_type_stars: + + swift_parts[i].id = sparts[-gparts[i].id_or_neg_offset].id; + swift_parts[i].u = 0.f; + swift_parts[i].T = 0.f; + break; + + case swift_type_dark_matter: + + swift_parts[i].id = gparts[i].id_or_neg_offset; + swift_parts[i].u = 0.f; + swift_parts[i].T = 0.f; + break; + + default: + error("Particle type not handled by VELOCIraptor."); } } + /* Values returned by VELOCIRaptor */ + int num_gparts_in_groups = -1; + struct groupinfo *group_info = NULL; + /* Call VELOCIraptor. */ - if (!InvokeVelociraptor(nr_gparts, nr_hydro_parts, swift_parts, cell_node_ids, - outputFileName)) + group_info = (struct groupinfo *)InvokeVelociraptor( + snapnum, outputFileName, cosmo_info, sim_info, nr_gparts, nr_parts, + nr_sparts, swift_parts, cell_node_ids, e->nr_threads, linked_with_snap, + &num_gparts_in_groups); + + /* Check that the ouput is valid */ + if (linked_with_snap && group_info == NULL && num_gparts_in_groups < 0) { error("Exiting. Call to VELOCIraptor failed on rank: %d.", e->nodeID); + } + if (!linked_with_snap && group_info != NULL) { + error("VELOCIraptor returned an array whilst it should not have."); + } + + /* Assign the group IDs back to the gparts */ + if (linked_with_snap) { + + if (posix_memalign((void **)&s->gpart_group_data, part_align, + nr_gparts * sizeof(struct velociraptor_gpart_data)) != 0) + error("Failed to allocate array of gpart data for VELOCIraptor i/o."); + + struct velociraptor_gpart_data *data = s->gpart_group_data; + + /* Zero the array (gparts not in groups have an ID of 0) */ + bzero(data, nr_gparts * sizeof(struct velociraptor_gpart_data)); + + /* Copy the data at the right place */ + for (int i = 0; i < num_gparts_in_groups; i++) { + data[group_info[i].index].groupID = group_info[i].groupID; + } + + /* Free the array returned by VELOCIraptor */ + free(group_info); + } /* Reset the pthread affinity mask after VELOCIraptor returns. */ pthread_setaffinity_np(thread, sizeof(cpu_set_t), engine_entry_affinity()); - /* Free cell node ids after VELOCIraptor has copied them. */ - free(cell_node_ids); - free(swift_parts); - - stf_output_count++; + /* Increase output counter (if not linked with snapshots) */ + if (!linked_with_snap) e->stf_output_count++; - message("VELOCIraptor took %.3f %s on rank %d.", - clocks_from_ticks(getticks() - tic), clocks_getunit(), engine_rank); + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); #else error("SWIFT not configure to run with VELOCIraptor."); #endif /* HAVE_VELOCIRAPTOR */ diff --git a/src/velociraptor_interface.h b/src/velociraptor_interface.h index 1f29be11c9dd8e267c87201b0a438979fec3775b..2547fa56c1677e93b1c59a1435e9a6ab92c1f308 100644 --- a/src/velociraptor_interface.h +++ b/src/velociraptor_interface.h @@ -22,19 +22,11 @@ /* Config parameters. */ #include "../config.h" -/** - * @brief The different formats for when to run structure finding. - */ -enum io_stf_output_format { - io_stf_steps = 0, /*!< Output every N steps */ - io_stf_time /*!< Output at fixed time intervals */ -}; - /* Forward declaration */ struct engine; /* VELOCIraptor wrapper functions. */ void velociraptor_init(struct engine *e); -void velociraptor_invoke(struct engine *e); +void velociraptor_invoke(struct engine *e, const int linked_with_snap); #endif /* SWIFT_VELOCIRAPTOR_INTERFACE_H */ diff --git a/src/velociraptor_io.h b/src/velociraptor_io.h new file mode 100644 index 0000000000000000000000000000000000000000..f18398219bfbc5cd6bb58a37b103f29527fa5589 --- /dev/null +++ b/src/velociraptor_io.h @@ -0,0 +1,78 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_VELOCIRAPTOR_IO_H +#define SWIFT_VELOCIRAPTOR_IO_H + +/* Config parameters. */ +#include "../config.h" + +INLINE static void velociraptor_convert_part_groupID(const struct engine* e, + const struct part* p, + const struct xpart* xp, + long long* ret) { + if (p->gpart == NULL) + ret[0] = 0.f; + else { + const ptrdiff_t offset = p->gpart - e->s->gparts; + *ret = (e->s->gpart_group_data + offset)->groupID; + } +} + +INLINE static void velociraptor_convert_spart_groupID(const struct engine* e, + const struct spart* sp, + long long* ret) { + if (sp->gpart == NULL) + ret[0] = 0.f; + else { + const ptrdiff_t offset = sp->gpart - e->s->gparts; + *ret = (e->s->gpart_group_data + offset)->groupID; + } +} + +__attribute__((always_inline)) INLINE static int velociraptor_write_parts( + const struct part* parts, const struct xpart* xparts, + struct io_props* list) { + + list[0] = io_make_output_field_convert_part( + "GroupID", LONGLONG, 1, UNIT_CONV_NO_UNITS, parts, xparts, + velociraptor_convert_part_groupID); + + return 1; +} + +__attribute__((always_inline)) INLINE static int velociraptor_write_gparts( + const struct velociraptor_gpart_data* group_data, struct io_props* list) { + + list[0] = io_make_output_field("GroupID", LONGLONG, 1, UNIT_CONV_NO_UNITS, + group_data, groupID); + + return 1; +} + +__attribute__((always_inline)) INLINE static int velociraptor_write_sparts( + const struct spart* sparts, struct io_props* list) { + + list[0] = io_make_output_field_convert_spart( + "GroupID", LONGLONG, 1, UNIT_CONV_NO_UNITS, sparts, + velociraptor_convert_spart_groupID); + + return 1; +} + +#endif /* SWIFT_VELOCIRAPTOR_IO_H */ diff --git a/src/velociraptor_struct.h b/src/velociraptor_struct.h new file mode 100644 index 0000000000000000000000000000000000000000..b998263a6ba2fe0aaa6552f274cb8f4ee85d3b1c --- /dev/null +++ b/src/velociraptor_struct.h @@ -0,0 +1,34 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_VELOCIRAPTOR_STRUCT_H +#define SWIFT_VELOCIRAPTOR_STRUCT_H + +/* Config parameters. */ +#include "../config.h" + +/** + * @brief Data returned by VELOCIraptor for each #gpart. + */ +struct velociraptor_gpart_data { + + /*! Group ID of that #gpart. */ + long long groupID; +}; + +#endif /* SWIFT_VELOCIRAPTOR_STRUCT_H */ diff --git a/tests/test125cells.c b/tests/test125cells.c index 5a9c4ea9511b5d75a3098f7997b83607cdcbd715..5b518970ea118c98a8354e816f86ecd16a5f85cf 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -459,11 +459,10 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, /* Just a forward declaration... */ void runner_dopair1_branch_density(struct runner *r, struct cell *ci, struct cell *cj); -void runner_doself1_density(struct runner *r, struct cell *ci); +void runner_doself1_branch_density(struct runner *r, struct cell *ci); void runner_dopair2_branch_force(struct runner *r, struct cell *ci, struct cell *cj); -void runner_doself2_force(struct runner *r, struct cell *ci); -void runner_doself2_force_vec(struct runner *r, struct cell *ci); +void runner_doself2_branch_force(struct runner *r, struct cell *ci); /* And go... */ int main(int argc, char *argv[]) { @@ -707,7 +706,7 @@ int main(int argc, char *argv[]) { /* And now the self-interaction for the central cells*/ for (int j = 0; j < 27; ++j) - runner_doself1_density(&runner, inner_cells[j]); + runner_doself1_branch_density(&runner, inner_cells[j]); /* Ghost to finish everything on the central cells */ for (int j = 0; j < 27; ++j) runner_do_ghost(&runner, inner_cells[j], 0); @@ -745,7 +744,7 @@ int main(int argc, char *argv[]) { ticks self_tic = getticks(); /* And now the self-interaction for the main cell */ - runner_doself2_force(&runner, main_cell); + runner_doself2_branch_force(&runner, main_cell); timings[26] += getticks() - self_tic; diff --git a/tests/tolerance_125_perturbed.dat b/tests/tolerance_125_perturbed.dat index 9987f8a0703a6106f41b73c1a16b4cea8af3bc1e..95f5f78246a82b7c326c87f9b4edbac4f51c65e9 100644 --- a/tests/tolerance_125_perturbed.dat +++ b/tests/tolerance_125_perturbed.dat @@ -1,4 +1,4 @@ # ID pos_x pos_y pos_z v_x v_y v_z h rho div_v S u P c a_x a_y a_z h_dt v_sig dS/dt du/dt 0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 - 0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 2.3e-3 2e-3 2e-3 1e-4 1e-4 1e-4 1e-4 + 0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 3.6e-3 2e-3 2e-3 1e-4 1e-4 1e-4 1e-4 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-4 2e-4 2e-4 1e-6 1e-6 1e-6 1e-6 diff --git a/tools/analyse_runtime.py b/tools/analyse_runtime.py index f2f198dfb80d6373e63296b6350fe6768191dd39..a2c3dd0f201fc47518d6bb0a6a918627db2f3e96 100755 --- a/tools/analyse_runtime.py +++ b/tools/analyse_runtime.py @@ -53,92 +53,52 @@ threshold = 0.008 num_files = len(sys.argv) - 1 labels = [ - "Gpart assignment", - "Mesh comunication", - "Forward Fourier transform", - "Green function", - "Backwards Fourier transform", - "engine_recompute_displacement_constraint:", - "engine_exchange_top_multipoles:", - "updating particle counts", - "Making gravity tasks", - "Making hydro tasks", - "Splitting tasks", - "Counting and linking tasks", - "Setting super-pointers", - "Making extra hydroloop tasks", - "Making extra starsloop tasks", - "Linking gravity tasks", - "Creating send tasks", - "Exchanging cell tags", - "Creating recv tasks", - "Setting unlocks", - "Ranking the tasks", - "scheduler_reweight:", - "space_list_useful_top_level_cells:", - "space_rebuild:", - "engine_drift_all:", - "engine_unskip:", - "engine_collect_end_of_step:", - "engine_launch:", - "writing particle properties", - "engine_repartition:", - "engine_exchange_cells:", - "Dumping restart files", - "engine_print_stats:", - "engine_marktasks:", - "Reading initial conditions", - "engine_print_task_counts:", - "engine_drift_top_multipoles:", - "Communicating rebuild flag", - "engine_split:", - "space_init", - "engine_init", - "engine_repartition_trigger:" -] -is_rebuild = [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0 + ["Gpart assignment", 1], + ["Mesh comunication", 1], + ["Forward Fourier transform", 1], + ["Green function", 1], + ["Backwards Fourier transform", 1], + ["engine_recompute_displacement_constraint:", 1], + ["engine_exchange_top_multipoles:", 1], + ["updating particle counts", 1], + ["engine_estimate_nr_tasks:", 1], + ["Making gravity tasks", 1], + ["Making hydro tasks", 1], + ["Splitting tasks", 1], + ["Counting and linking tasks", 1], + ["Setting super-pointers", 1], + ["Making extra hydroloop tasks", 1], + ["Making extra starsloop tasks", 1], + ["Linking gravity tasks", 1], + ["Creating send tasks", 1], + ["Exchanging cell tags", 1], + ["Creating recv tasks", 1], + ["Counting number of foreign particles", 1], + ["Recursively linking foreign arrays", 1], + ["Setting unlocks", 1], + ["Ranking the tasks", 1], + ["scheduler_reweight:", 1], + ["space_list_useful_top_level_cells:", 1], + ["space_rebuild:", 1], + ["engine_drift_all:", 0], + ["engine_unskip:", 0], + ["engine_collect_end_of_step:", 0], + ["engine_launch:", 0], + ["writing particle properties", 0], + ["engine_repartition:", 0], + ["engine_exchange_cells:", 1], + ["Dumping restart files", 0], + ["engine_print_stats:", 0], + ["engine_marktasks:", 1], + ["Reading initial conditions", 0], + ["engine_print_task_counts:", 0], + ["engine_drift_top_multipoles:", 0], + ["Communicating rebuild flag", 0], + ["engine_split:", 0], + ["space_init", 0], + ["engine_init", 0], + ["engine_repartition_trigger:", 0], + ["velociraptor_invoke:", 0] ] times = np.zeros(len(labels)) counts = np.zeros(len(labels)) @@ -178,20 +138,20 @@ for i in range(num_files): for i in range(len(labels)): # Extract the different blocks - if re.search("%s took" % labels[i], line): + if re.search("%s took" % labels[i][0], line): counts[i] += 1.0 times[i] += float( re.findall(r"[+-]?((\d+\.?\d*)|(\.\d+))", line)[-1][0] ) - # Find the last line with meaningful output (avoid crash report, batch system stuf....) + # Find the last line with meaningful output (avoid crash report, batch system stuff....) if re.findall(r"\[[0-9]{4}\][ ]\[*", line) or re.findall( r"^\[[0-9]*[.][0-9]+\][ ]", line ): lastline = line # Total run time - total_time += float(re.findall(r"[+-]?([0-9]*[.])?[0-9]+", lastline)[1]) + total_time += float(re.findall(r"[+-]?(\[[0-9]\])?(\[[0-9]*[.][0-9]*\])+", lastline)[0][1][1:-1]) # Conver to seconds times /= 1000.0 @@ -207,35 +167,33 @@ time_ratios = times / total_time # Better looking labels for i in range(len(labels)): - labels[i] = labels[i].replace("_", " ") - labels[i] = labels[i].replace(":", "") - labels[i] = labels[i].title() + labels[i][0] = labels[i][0].replace("_", " ") + labels[i][0] = labels[i][0].replace(":", "") + labels[i][0] = labels[i][0].title() times = np.array(times) time_ratios = np.array(time_ratios) -is_rebuild = np.array(is_rebuild) # Sort in order of importance order = np.argsort(-times) times = times[order] counts = counts[order] time_ratios = time_ratios[order] -is_rebuild = is_rebuild[order] -labels = np.take(labels, order) +labels = [labels[i] for i in order] # Keep only the important components important_times = [0.0] important_ratios = [0.0] -important_labels = ["Others (all below %.1f\%%)" % (threshold * 100)] important_is_rebuild = [0] +important_labels = ["Others (all below %.1f\%%)" % (threshold * 100)] need_print = True print("Time spent in the different code sections:") for i in range(len(labels)): if time_ratios[i] > threshold: important_times.append(times[i]) important_ratios.append(time_ratios[i]) - important_labels.append(labels[i]) - important_is_rebuild.append(is_rebuild[i]) + important_is_rebuild.append(labels[i][1]) + important_labels.append(labels[i][0]) else: if need_print: print("Elements in 'Other' category (<%.1f%%):" % (threshold * 100)) @@ -243,7 +201,7 @@ for i in range(len(labels)): important_times[0] += times[i] important_ratios[0] += time_ratios[i] - print(" - '%-40s' (%5d calls, time: %.4fs): %.4f%%" % (labels[i], counts[i], times[i], time_ratios[i] * 100)) + print(" - '%-40s' (%5d calls, time: %.4fs): %.4f%%" % (labels[i][0], counts[i], times[i], time_ratios[i] * 100)) # Anything unaccounted for? print( @@ -254,8 +212,8 @@ print( important_ratios = np.array(important_ratios) important_is_rebuild = np.array(important_is_rebuild) -figure() +figure() def func(pct): return "$%4.2f\\%%$" % pct diff --git a/tools/task_plots/analyse_tasks.py b/tools/task_plots/analyse_tasks.py index 5738ca068c215a78c6fb4ef2524ce3d73565633e..e897424a95be8937073bd16adf108fa4fa1456ad 100755 --- a/tools/task_plots/analyse_tasks.py +++ b/tools/task_plots/analyse_tasks.py @@ -82,6 +82,7 @@ TASKTYPES = [ "kick1", "kick2", "timestep", + "timestep_limiter", "send", "recv", "grav_long_range", @@ -104,6 +105,7 @@ SUBTYPES = [ "density", "gradient", "force", + "limiter", "grav", "external_grav", "tend", diff --git a/tools/task_plots/plot_tasks.py b/tools/task_plots/plot_tasks.py index 82dc882becfc2a7a8a537b822aceb8d9d226792d..12fd4d241a268c9d45fd72f5cdda2727221ba94d 100755 --- a/tools/task_plots/plot_tasks.py +++ b/tools/task_plots/plot_tasks.py @@ -167,6 +167,7 @@ TASKTYPES = [ "kick1", "kick2", "timestep", + "timestep_limiter", "send", "recv", "grav_long_range", @@ -189,6 +190,7 @@ SUBTYPES = [ "density", "gradient", "force", + "limiter", "grav", "external_grav", "tend", @@ -204,15 +206,23 @@ SUBTYPES = [ # Task/subtypes of interest. FULLTYPES = [ + "self/limiter", "self/force", + "self/gradient", "self/density", "self/grav", + "sub_self/limiter", "sub_self/force", + "sub_self/gradient", "sub_self/density", + "pair/limiter", "pair/force", + "pair/gradient", "pair/density", "pair/grav", + "sub_pair/limiter", "sub_pair/force", + "sub_pair/gradient", "sub_pair/density", "recv/xv", "send/xv",