diff --git a/.gitignore b/.gitignore index 4fed6ceb34fb7d6e3532745746b7e3b773429e09..00bf6d051c26c47ceefa2870da1a85a3a34fb50d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ config.h.in config.sub ltmain.sh libtool +build src/version_string.h swift*.tar.gz @@ -35,6 +36,8 @@ examples/*/*/*.rst examples/*/*/*.hdf5 examples/*/*/*.csv examples/*/*/*.dot +examples/*/*/memuse_report-step*.dat +examples/*/*/memuse_report-step*.log examples/*/*/restart/* examples/*/*/used_parameters.yml examples/*/stf_output* diff --git a/COPYING.LESSER b/COPYING.LESSER new file mode 100644 index 0000000000000000000000000000000000000000..0927556b54440825c63fbf61b9e6179894a5547a --- /dev/null +++ b/COPYING.LESSER @@ -0,0 +1,157 @@ +### GNU LESSER GENERAL PUBLIC LICENSE + +Version 3, 29 June 2007 + +Copyright (C) 2007 Free Software Foundation, Inc. +<https://fsf.org/> + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +This version of the GNU Lesser General Public License incorporates the +terms and conditions of version 3 of the GNU General Public License, +supplemented by the additional permissions listed below. + +#### 0. Additional Definitions. + +As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the +GNU General Public License. + +"The Library" refers to a covered work governed by this License, other +than an Application or a Combined Work as defined below. + +An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + +A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + +The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + +The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + +#### 1. Exception to Section 3 of the GNU GPL. + +You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + +#### 2. Conveying Modified Versions. + +If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + +- a) under this License, provided that you make a good faith effort + to ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or +- b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + +#### 3. Object Code Incorporating Material from Library Header Files. + +The object code form of an Application may incorporate material from a +header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + +- a) Give prominent notice with each copy of the object code that + the Library is used in it and that the Library and its use are + covered by this License. +- b) Accompany the object code with a copy of the GNU GPL and this + license document. + +#### 4. Combined Works. + +You may convey a Combined Work under terms of your choice that, taken +together, effectively do not restrict modification of the portions of +the Library contained in the Combined Work and reverse engineering for +debugging such modifications, if you also do each of the following: + +- a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. +- b) Accompany the Combined Work with a copy of the GNU GPL and this + license document. +- c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. +- d) Do one of the following: + - 0) Convey the Minimal Corresponding Source under the terms of + this License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + - 1) Use a suitable shared library mechanism for linking with + the Library. A suitable mechanism is one that (a) uses at run + time a copy of the Library already present on the user's + computer system, and (b) will operate properly with a modified + version of the Library that is interface-compatible with the + Linked Version. +- e) Provide Installation Information, but only if you would + otherwise be required to provide such information under section 6 + of the GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the Application + with a modified version of the Linked Version. (If you use option + 4d0, the Installation Information must accompany the Minimal + Corresponding Source and Corresponding Application Code. If you + use option 4d1, you must provide the Installation Information in + the manner specified by section 6 of the GNU GPL for conveying + Corresponding Source.) + +#### 5. Combined Libraries. + +You may place library facilities that are a work based on the Library +side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + +- a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities, conveyed under the terms of this License. +- b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + +#### 6. Revised Versions of the GNU Lesser General Public License. + +The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +as you received it specifies that a certain numbered version of the +GNU Lesser General Public License "or any later version" applies to +it, you have the option of following the terms and conditions either +of that published version or of any later version published by the +Free Software Foundation. If the Library as you received it does not +specify a version number of the GNU Lesser General Public License, you +may choose any version of the GNU Lesser General Public License ever +published by the Free Software Foundation. + +If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/doc/RTD/source/AnalysisTools/index.rst b/doc/RTD/source/AnalysisTools/index.rst index 9fa94e0baff5732092a704e20ecc12de57d8301f..7ba9f6256d841c7bb3bec42abab6a5c22ba4083e 100644 --- a/doc/RTD/source/AnalysisTools/index.rst +++ b/doc/RTD/source/AnalysisTools/index.rst @@ -2,7 +2,7 @@ Loic Hausammann 20th March 2019 Peter W. Draper 28th March 2019 -.. _analysistools: +.. _Analysis_Tools: Analysis Tools ============== diff --git a/doc/RTD/source/GettingStarted/running_example.rst b/doc/RTD/source/GettingStarted/running_example.rst index 9dfbdd8c8ec98ea59892a551691aa5f230052e2e..a5614fb2a2e2068a8188d82182042feb95251a73 100644 --- a/doc/RTD/source/GettingStarted/running_example.rst +++ b/doc/RTD/source/GettingStarted/running_example.rst @@ -11,7 +11,7 @@ as ``wget`` for grabbing the glass). .. code-block:: bash - cd examples/SodShock_3D + cd examples/HydroTests/SodShock_3D ./getGlass.sh python makeIC.py ../swift --hydro --threads=4 sodShock.yml diff --git a/doc/RTD/source/HydroSchemes/anarchy_sph.rst b/doc/RTD/source/HydroSchemes/anarchy_sph.rst new file mode 100644 index 0000000000000000000000000000000000000000..8d09280039c25608d3664e1a18d9b5c28d3108b6 --- /dev/null +++ b/doc/RTD/source/HydroSchemes/anarchy_sph.rst @@ -0,0 +1,33 @@ +.. ANARCHY-SPH + Josh Borrow 5th April 2018 + +ANARCHY-PU SPH +============== + +This scheme is similar to the one used in the EAGLE code. This scheme +includes: + ++ Durier & Dalla Vecchia (2012) time-step limiter ++ Pressure-Energy SPH ++ Thermal diffusion following Price (2012) ++ A simplified version of the 'Inviscid SPH' artificial viscosity + (Cullen & Denhen 2010). + +More information will be made available in a forthcoming publication. + +The scheme as-implemented in SWIFT is slightly different to the one +implemented in the original EAGLE code: + ++ Pressure-Energy SPH is used instead of Pressure-Entropy SPH ++ Artificial viscosity coefficients have changed -- from minimal + value of 0.1 to 0.0, and from length of 0.1 to 0.25. This + is based on performance of hydrodynamics tests in SWIFT and may + be to do with our choice of smoothing length definition. ++ Recommended kernel changed from Wendland-C2 (with 100 Ngb) to + Quintic Spline (with ~82 Ngb). + + +.. code-block:: bash + + ./configure --with-hydro=anarchy-pu --with-kernel=quintic-spline --disable-hand-vec + diff --git a/doc/RTD/source/HydroSchemes/index.rst b/doc/RTD/source/HydroSchemes/index.rst index 462bb7378162ff1addab3212a6901412195a3377..7331331bda7c28fad2f471dedb1335f1201d4a21 100644 --- a/doc/RTD/source/HydroSchemes/index.rst +++ b/doc/RTD/source/HydroSchemes/index.rst @@ -17,6 +17,7 @@ schemes available in SWIFT, as well as how to implement your own. minimal_sph planetary hopkins_sph + anarchy_sph gizmo adding_your_own diff --git a/examples/Cooling/CoolingRedshiftDependence/.gitignore b/examples/Cooling/CoolingRedshiftDependence/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..60baa9cb833f9e075739f327f4fe68477c84a093 --- /dev/null +++ b/examples/Cooling/CoolingRedshiftDependence/.gitignore @@ -0,0 +1 @@ +data/* diff --git a/examples/Cooling/CoolingRedshiftDependence/cooling_redshift_dependence_high_z.yml b/examples/Cooling/CoolingRedshiftDependence/cooling_redshift_dependence_high_z.yml new file mode 100644 index 0000000000000000000000000000000000000000..13ed73554936abc824ee990322cfd49464897c5b --- /dev/null +++ b/examples/Cooling/CoolingRedshiftDependence/cooling_redshift_dependence_high_z.yml @@ -0,0 +1,85 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc + UnitVelocity_in_cgs: 1e5 # 1 km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Cosmological parameters +Cosmology: + h: 0.6777 # Reduced Hubble constant + a_begin: 0.5 # Initial scale-factor of the simulation (z = 1.0) + a_end: 0.5061559 # Final scale factor of the simulation (~ 100 myr) + Omega_m: 0.307 # Matter density parameter + Omega_lambda: 0.693 # Dark-energy density parameter + Omega_b: 0.0455 # Baryon density parameter + +# Parameters governing the time integration +TimeIntegration: + dt_min: 1e-14 + dt_max: 5e-3 + +# Parameters governing the snapshots +Snapshots: + basename: data/redshift_dependence_high_z + delta_time: 1.000122373748838 + scale_factor_first: 0.5 + compression: 4 + +# Parameters governing the conserved quantities statistics +Statistics: + scale_factor_first: 0.5 + delta_time: 1.1 + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + minimal_temperature: 100 # K + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./ics_high_z.hdf5 # The file to read + periodic: 1 + +# Parameters for the EAGLE chemistry +EAGLEChemistry: # Solar abundances + init_abundance_metal: 0. + init_abundance_Hydrogen: 0.752 + init_abundance_Helium: 0.248 + init_abundance_Carbon: 0. + init_abundance_Nitrogen: 0. + init_abundance_Oxygen: 0. + init_abundance_Neon: 0. + init_abundance_Magnesium: 0. + init_abundance_Silicon: 0. + init_abundance_Iron: 0. + +# Parameters for the EAGLE cooling +EAGLECooling: + dir_name: ./coolingtables/ + H_reion_z: 11.5 + H_reion_eV_p_H: 2.0 + He_reion_z_centre: 3.5 + He_reion_z_sigma: 0.5 + He_reion_eV_p_H: 2.0 + +# Parameters for the EAGLE "equation of state" +EAGLEEntropyFloor: + Jeans_density_threshold_H_p_cm3: 0.1 # Physical density above which the EAGLE Jeans limiter entropy floor kicks in expressed in Hydrogen atoms per cm^3. + Jeans_over_density_threshold: 10. # Overdensity above which the EAGLE Jeans limiter entropy floor can kick in. + Jeans_temperature_norm_K: 8000 # Temperature of the EAGLE Jeans limiter entropy floor at the density threshold expressed in Kelvin. + Jeans_gamma_effective: 1.3333333 # Slope the of the EAGLE Jeans limiter entropy floor + Cool_density_threshold_H_p_cm3: 1e-5 # Physical density above which the EAGLE Cool limiter entropy floor kicks in expressed in Hydrogen atoms per cm^3. + Cool_over_density_threshold: 10. # Overdensity above which the EAGLE Cool limiter entropy floor can kick in. + Cool_temperature_norm_K: 8000 # Temperature of the EAGLE Cool limiter entropy floor at the density threshold expressed in Kelvin. + Cool_gamma_effective: 1. # Slope the of the EAGLE Cool limiter entropy floor + +LambdaCooling: + lambda_nH2_cgs: 2.13744785e-23 # Cooling rate divided by square Hydrogen number density (in cgs units [erg * s^-1 * cm^3] + +ConstCooling: + cooling_rate: 1. # Cooling rate (du/dt) (internal units) + min_energy: 1. # Minimal internal energy per unit mass (internal units) + cooling_tstep_mult: 1. # Dimensionless pre-factor for the time-step condition \ No newline at end of file diff --git a/examples/Cooling/CoolingRedshiftDependence/cooling_redshift_dependence_low_z.yml b/examples/Cooling/CoolingRedshiftDependence/cooling_redshift_dependence_low_z.yml new file mode 100644 index 0000000000000000000000000000000000000000..5b336bc4bf86d2596ec52e84e24da2710d96cb8a --- /dev/null +++ b/examples/Cooling/CoolingRedshiftDependence/cooling_redshift_dependence_low_z.yml @@ -0,0 +1,85 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e33 # 10^10 M_sun + UnitLength_in_cgs: 3.08567758e21 # 1 kpc + UnitVelocity_in_cgs: 1 # 1 km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Cosmological parameters +Cosmology: + h: 0.6777 # Reduced Hubble constant + a_begin: 0.99009 # Initial scale-factor of the simulation (z = 0.01) + a_end: 0.99700 # Final scale factor of the simulation (~ 100 myr) + Omega_m: 0.307 # Matter density parameter + Omega_lambda: 0.693 # Dark-energy density parameter + Omega_b: 0.0455 # Baryon density parameter + +# Parameters governing the time integration +TimeIntegration: + dt_min: 1e-14 + dt_max: 5e-3 + +# Parameters governing the snapshots +Snapshots: + basename: data/redshift_dependence_low_z + delta_time: 1.0000695206950205 + scale_factor_first: 0.99009 + compression: 4 + +# Parameters governing the conserved quantities statistics +Statistics: + scale_factor_first: 0.99009 + delta_time: 1.1 + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + minimal_temperature: 100 # K + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./ics_low_z.hdf5 # The file to read + periodic: 1 + +# Parameters for the EAGLE chemistry +EAGLEChemistry: # Primordial + init_abundance_metal: 0. + init_abundance_Hydrogen: 0.752 + init_abundance_Helium: 0.248 + init_abundance_Carbon: 0. + init_abundance_Nitrogen: 0. + init_abundance_Oxygen: 0. + init_abundance_Neon: 0. + init_abundance_Magnesium: 0. + init_abundance_Silicon: 0. + init_abundance_Iron: 0. + +# Parameters for the EAGLE cooling +EAGLECooling: + dir_name: ./coolingtables/ + H_reion_z: 11.5 + H_reion_eV_p_H: 2.0 + He_reion_z_centre: 3.5 + He_reion_z_sigma: 0.5 + He_reion_eV_p_H: 2.0 + +# Parameters for the EAGLE "equation of state" +EAGLEEntropyFloor: + Jeans_density_threshold_H_p_cm3: 0.1 # Physical density above which the EAGLE Jeans limiter entropy floor kicks in expressed in Hydrogen atoms per cm^3. + Jeans_over_density_threshold: 10. # Overdensity above which the EAGLE Jeans limiter entropy floor can kick in. + Jeans_temperature_norm_K: 8000 # Temperature of the EAGLE Jeans limiter entropy floor at the density threshold expressed in Kelvin. + Jeans_gamma_effective: 1.3333333 # Slope the of the EAGLE Jeans limiter entropy floor + Cool_density_threshold_H_p_cm3: 1e-5 # Physical density above which the EAGLE Cool limiter entropy floor kicks in expressed in Hydrogen atoms per cm^3. + Cool_over_density_threshold: 10. # Overdensity above which the EAGLE Cool limiter entropy floor can kick in. + Cool_temperature_norm_K: 8000 # Temperature of the EAGLE Cool limiter entropy floor at the density threshold expressed in Kelvin. + Cool_gamma_effective: 1. # Slope the of the EAGLE Cool limiter entropy floor + +LambdaCooling: + lambda_nH2_cgs: 2.13744785e-23 # Cooling rate divided by square Hydrogen number density (in cgs units [erg * s^-1 * cm^3] + +ConstCooling: + cooling_rate: 1. # Cooling rate (du/dt) (internal units) + min_energy: 1. # Minimal internal energy per unit mass (internal units) + cooling_tstep_mult: 1. # Dimensionless pre-factor for the time-step condition diff --git a/examples/Cooling/CoolingRedshiftDependence/cooling_redshift_dependence_no_z.yml b/examples/Cooling/CoolingRedshiftDependence/cooling_redshift_dependence_no_z.yml new file mode 100644 index 0000000000000000000000000000000000000000..8bce6498432b3782225c7e53397868fc1356a26e --- /dev/null +++ b/examples/Cooling/CoolingRedshiftDependence/cooling_redshift_dependence_no_z.yml @@ -0,0 +1,76 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc + UnitVelocity_in_cgs: 1e5 # 1 km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. + time_end: 1.02e-4 # ~ 100 myr + dt_min: 1e-14 + dt_max: 5e-5 + +# Parameters governing the snapshots +Snapshots: + basename: data/redshift_dependence_no_z + delta_time: 1.02e-6 + compression: 4 + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1.02e-6 + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + minimal_temperature: 100 # K + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./ics_no_z.hdf5 # The file to read + periodic: 1 + +# Parameters for the EAGLE chemistry +EAGLEChemistry: # Solar abundances + init_abundance_metal: 0. + init_abundance_Hydrogen: 0.752 + init_abundance_Helium: 0.248 + init_abundance_Carbon: 0. + init_abundance_Nitrogen: 0. + init_abundance_Oxygen: 0. + init_abundance_Neon: 0. + init_abundance_Magnesium: 0. + init_abundance_Silicon: 0. + init_abundance_Iron: 0. + +# Parameters for the EAGLE cooling +EAGLECooling: + dir_name: ./coolingtables/ + H_reion_z: 11.5 + H_reion_eV_p_H: 2.0 + He_reion_z_centre: 3.5 + He_reion_z_sigma: 0.5 + He_reion_eV_p_H: 2.0 + +# Parameters for the EAGLE "equation of state" +EAGLEEntropyFloor: + Jeans_density_threshold_H_p_cm3: 0.1 # Physical density above which the EAGLE Jeans limiter entropy floor kicks in expressed in Hydrogen atoms per cm^3. + Jeans_over_density_threshold: 10. # Overdensity above which the EAGLE Jeans limiter entropy floor can kick in. + Jeans_temperature_norm_K: 8000 # Temperature of the EAGLE Jeans limiter entropy floor at the density threshold expressed in Kelvin. + Jeans_gamma_effective: 1.3333333 # Slope the of the EAGLE Jeans limiter entropy floor + Cool_density_threshold_H_p_cm3: 1e-5 # Physical density above which the EAGLE Cool limiter entropy floor kicks in expressed in Hydrogen atoms per cm^3. + Cool_over_density_threshold: 10. # Overdensity above which the EAGLE Cool limiter entropy floor can kick in. + Cool_temperature_norm_K: 8000 # Temperature of the EAGLE Cool limiter entropy floor at the density threshold expressed in Kelvin. + Cool_gamma_effective: 1. # Slope the of the EAGLE Cool limiter entropy floor + +LambdaCooling: + lambda_nH2_cgs: 2.13744785e-23 # Cooling rate divided by square Hydrogen number density (in cgs units [erg * s^-1 * cm^3] + +ConstCooling: + cooling_rate: 1. # Cooling rate (du/dt) (internal units) + min_energy: 1. # Minimal internal energy per unit mass (internal units) + cooling_tstep_mult: 1. # Dimensionless pre-factor for the time-step condition \ No newline at end of file diff --git a/examples/Cooling/CoolingRedshiftDependence/getGlass.sh b/examples/Cooling/CoolingRedshiftDependence/getGlass.sh new file mode 100755 index 0000000000000000000000000000000000000000..01b4474ac21666c843b7abedfa39a76948934911 --- /dev/null +++ b/examples/Cooling/CoolingRedshiftDependence/getGlass.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/gravity_glassCube_32.hdf5 diff --git a/examples/Cooling/CoolingRedshiftDependence/makeIC.py b/examples/Cooling/CoolingRedshiftDependence/makeIC.py new file mode 100644 index 0000000000000000000000000000000000000000..61b87566882f1bad9704844b13ae04773a353acd --- /dev/null +++ b/examples/Cooling/CoolingRedshiftDependence/makeIC.py @@ -0,0 +1,124 @@ +""" +Creates a redshift-dependent cooling box such that it always has the same +_physical_ density at the given redshift. +""" + +from swiftsimio import Writer +from swiftsimio.units import cosmo_units + +from unyt import mh, cm, s, K, Mpc, kb +import numpy as np + +import h5py + +# Physics parameters. +boxsize = 1.0 * Mpc +physical_density = 0.1 * mh / cm ** 3 +mu_hydrogen = 0.5 # Fully ionised +temperature = 1e7 * K +gamma = 5.0 / 3.0 + + +def get_coordinates(glass_filename: str) -> np.array: + """ + Gets the coordinates from the glass file. + """ + + with h5py.File(glass_filename, "r") as handle: + coordinates = handle["PartType1/Coordinates"][...] + + return coordinates + + +def generate_ics(redshift: float, filename: str, glass_filename: str) -> None: + """ + Generate initial conditions for the CoolingRedshiftDependence example. + """ + + scale_factor = 1 / (1 + redshift) + comoving_boxsize = boxsize / scale_factor + + glass_coordinates = get_coordinates(glass_filename) + number_of_particles = len(glass_coordinates) + + gas_particle_mass = physical_density * (boxsize ** 3) / number_of_particles + + writer = Writer(cosmo_units, comoving_boxsize) + + writer.gas.coordinates = glass_coordinates * comoving_boxsize + + writer.gas.velocities = np.zeros_like(glass_coordinates) * cm / s + + writer.gas.masses = np.ones(number_of_particles, dtype=float) * gas_particle_mass + + # Leave in physical units; handled by boxsize change. + writer.gas.internal_energy = ( + np.ones(number_of_particles, dtype=float) + * 3.0 / 2.0 + * (temperature * kb) + / (mu_hydrogen * mh) + ) + + writer.gas.generate_smoothing_lengths(boxsize=comoving_boxsize, dimension=3) + + writer.write(filename) + + return + + +if __name__ == "__main__": + """ + Sets up the initial parameters. + """ + + import argparse as ap + + parser = ap.ArgumentParser( + description=""" + Sets up the initial conditions for the cooling test. Takes two + redshifts, and produces two files: ics_high_z.hdf5 and + ics_low_z.hdf5. + """ + ) + + parser.add_argument( + "-a", + "--high", + help="The high redshift to generate initial conditions for. Default: 1.0", + default=1, + type=float, + ) + + parser.add_argument( + "-b", + "--low", + help="The low redshift to generate initial conditions for. Default: 0.01", + default=0.01, + type=float, + ) + + parser.add_argument( + "-n", + "--nocosmo", + help="Generate a non-cosmological box? Default: Truthy", + default=1, + type=bool, + ) + + parser.add_argument( + "-g", + "--glass", + help="Glass filename. Default: gravity_glassCube_32.hdf5", + type=str, + default="gravity_glassCube_32.hdf5", + ) + + args = parser.parse_args() + + generate_ics(args.low, filename="ics_low_z.hdf5", glass_filename=args.glass) + generate_ics(args.high, filename="ics_high_z.hdf5", glass_filename=args.glass) + + if args.nocosmo: + generate_ics(0.0, filename="ics_no_z.hdf5", glass_filename=args.glass) + + exit(0) diff --git a/examples/Cooling/CoolingRedshiftDependence/plotSolution.py b/examples/Cooling/CoolingRedshiftDependence/plotSolution.py new file mode 100644 index 0000000000000000000000000000000000000000..9cde36cb05b88e60b3bf3527f3857a3774bf5dca --- /dev/null +++ b/examples/Cooling/CoolingRedshiftDependence/plotSolution.py @@ -0,0 +1,223 @@ +""" +Plots the mean temperature. +""" + +import matplotlib.pyplot as plt + +from swiftsimio import load + +from unyt import Myr, K, mh, cm, erg + +import numpy as np + + +def get_data_dump(metadata): + """ + Gets a big data dump from the SWIFT metadata + """ + + try: + viscosity = metadata.viscosity_info + except: + viscosity = "No info" + + try: + diffusion = metadata.diffusion_info + except: + diffusion = "No info" + + output = ( + "$\\bf{SWIFT}$\n" + + metadata.code_info + + "\n\n" + + "$\\bf{Compiler}$\n" + + metadata.compiler_info + + "\n\n" + + "$\\bf{Hydrodynamics}$\n" + + metadata.hydro_info + + "\n\n" + + "$\\bf{Viscosity}$\n" + + viscosity + + "\n\n" + + "$\\bf{Diffusion}$\n" + + diffusion + ) + + return output + + +def setup_axes(): + """ + Sets up the axes and returns fig, ax + """ + + fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(8, 8)) + + ax = ax.flatten() + + ax[0].semilogy() + ax[2].semilogy() + + ax[1].set_xlabel("Simulation time elapsed [Myr]") + ax[2].set_xlabel("Simulation time elapsed [Myr]") + + ax[0].set_ylabel("Temperature of Universe [K]") + ax[1].set_ylabel("Physical Density of Universe [$n_H$ cm$^{-3}$]") + ax[2].set_ylabel("Physical Energy [erg]") + + for a in ax[:-1]: + a.set_xlim(0, 100) + + fig.tight_layout(pad=0.5) + + return fig, ax + + +def get_data(handle: float, n_snaps: int): + """ + Returns the elapsed simulation time, temperature, and density + """ + + t0 = 0.0 + + times = [] + temps = [] + densities = [] + energies = [] + radiated_energies = [] + + for snap in range(n_snaps): + try: + data = load(f"data/{handle}_{snap:04d}.hdf5") + + if snap == 0: + t0 = data.metadata.t.to(Myr).value + + times.append(data.metadata.t.to(Myr).value - t0) + temps.append(np.mean(data.gas.temperature.to(K).value)) + densities.append( + np.mean(data.gas.density.to(mh / cm ** 3).value) + / (data.metadata.scale_factor ** 3) + ) + + try: + energies.append( + np.mean((data.gas.internal_energy * data.gas.masses).to(erg).value) + * data.metadata.scale_factor ** (2) + ) + radiated_energies.append( + np.mean(data.gas.radiated_energy.to(erg).value) + ) + except AttributeError: + continue + except OSError: + continue + + return times, temps, densities, energies, radiated_energies + + +def get_n_steps(timesteps_filename: str) -> int: + """ + Reads the number of steps from the timesteps file. + """ + + data = np.genfromtxt(timesteps_filename) + + return int(data[-1][0]) + + +def plot_single_data( + handle: str, n_snaps: int, label: str, ax: plt.axes, n_steps: int = 0, run: int = 0 +): + """ + Takes the a single simulation and plots it on the axes. + """ + + data = get_data(handle, n_snaps) + + ax[0].plot(data[0], data[1], label=label, marker="o", ms=2, c=f"C{run}") + + ax[1].plot( + data[0], data[2], label=f"Steps: {n_steps}", marker="o", ms=2, c=f"C{run}" + ) + + if run == 0: + label_energy = "Particle Energy" + label_radiated = "Radiated Energy" + label_sum = "Sum" + else: + label_energy = "" + label_radiated = "" + label_sum = "" + + ax[2].plot(data[0], data[3], label=label_energy, ls="dotted", C=f"C{run}") + + # ax[2].plot(data[0], data[4], label=label_radiated, ls="dashed", C=f"C{run}") + + # ax[2].plot( + # data[0], [x + y for x, y in zip(*data[3:5])], label=label_sum, C=f"C{run}" + # ) + + return + + +def make_plot(handles, names, timestep_names, n_snaps=100): + """ + Makes the whole plot and returns the fig, ax objects. + """ + + fig, ax = setup_axes() + + run = 0 + + for handle, name, timestep_name in zip(handles, names, timestep_names): + try: + n_steps = get_n_steps(timestep_name) + except: + n_steps = "Unknown" + + plot_single_data(handle, n_snaps, name, ax, n_steps=n_steps, run=run) + run += 1 + + ax[0].legend() + ax[1].legend() + ax[2].legend() + + info_axis = ax[-1] + + try: + info = get_data_dump(load(f"data/{handles[0]}_0000.hdf5").metadata) + + info_axis.text( + 0.5, + 0.45, + info, + ha="center", + va="center", + fontsize=7, + transform=info_axis.transAxes, + ) + except OSError: + pass + + info_axis.axis("off") + + return fig, ax + + +if __name__ == "__main__": + """ + Plot everything! + """ + + handles = [ + "redshift_dependence_no_z", + "redshift_dependence_low_z", + "redshift_dependence_high_z", + ] + names = ["No Cosmology", "Low Redshift ($z=0.01$)", "High Redshift ($z=1$)"] + timestep_names = ["timesteps_no.txt", "timesteps_low.txt", "timesteps_high.txt"] + + fig, ax = make_plot(handles, names, timestep_names) + + fig.savefig("redshift_dependence.png", dpi=300) diff --git a/examples/Cooling/CoolingRedshiftDependence/run.sh b/examples/Cooling/CoolingRedshiftDependence/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c9bf2aea7edd33886219dd4db822d930baff7d47 --- /dev/null +++ b/examples/Cooling/CoolingRedshiftDependence/run.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Generate the initial conditions if they are not present. +if [ ! -e gravity_glassCube_32.hdf5 ] +then + echo "Fetching initial gravity glass file for the constant cosmological box example..." + ./getGlass.sh +fi + +# Fetch the cooling tables +if [ ! -e ics_no_z.hdf5 ] +then + echo "Generating initial conditions for the uniform cosmo box example..." + python3 makeIC.py +fi + +swift_location="../../swift" + +rm data/redshift_dependence_*_z_*.hdf5 + +mkdir data + +# Run SWIFT +$swift_location --hydro --cosmology --cooling --limiter --threads=4 cooling_redshift_dependence_high_z.yml 2>&1 | tee output_high.log +mv timesteps_4.txt timesteps_high.txt +$swift_location --hydro --cosmology --cooling --limiter --threads=4 cooling_redshift_dependence_low_z.yml 2>&1 | tee output_low.log +mv timesteps_4.txt timesteps_low.txt +$swift_location --hydro --cooling --limiter --threads=4 cooling_redshift_dependence_no_z.yml 2>&1 | tee output_no.log +mv timesteps_4.txt timesteps_no.txt + +python3 plotSolution.py diff --git a/examples/Cosmology/ComovingSodShock_3D/plotSolution.py b/examples/Cosmology/ComovingSodShock_3D/plotSolution.py index f05a385e8620b18189d2e7abca8aebb8ae65060e..d85f4be9a49d108d133928a81ea4482fa9099792 100644 --- a/examples/Cosmology/ComovingSodShock_3D/plotSolution.py +++ b/examples/Cosmology/ComovingSodShock_3D/plotSolution.py @@ -88,6 +88,18 @@ S = sim["/PartType0/Entropy"][:] P = sim["/PartType0/Pressure"][:] rho = sim["/PartType0/Density"][:] +try: + diffusion = sim["/PartType0/Diffusion"][:] + plot_diffusion = True +except: + plot_diffusion = False + +try: + viscosity = sim["/PartType0/Viscosity"][:] + plot_viscosity = True +except: + plot_viscosity = False + x_min = -1. x_max = 1. x += x_min @@ -112,6 +124,15 @@ P_sigma_bin = np.sqrt(P2_bin - P_bin**2) S_sigma_bin = np.sqrt(S2_bin - S_bin**2) u_sigma_bin = np.sqrt(u2_bin - u_bin**2) +if plot_diffusion: + alpha_diff_bin,_,_ = stats.binned_statistic(x, diffusion, statistic='mean', bins=x_bin_edge) + alpha2_diff_bin,_,_ = stats.binned_statistic(x, diffusion**2, statistic='mean', bins=x_bin_edge) + alpha_diff_sigma_bin = np.sqrt(alpha2_diff_bin - alpha_diff_bin**2) + +if plot_viscosity: + alpha_visc_bin,_,_ = stats.binned_statistic(x, viscosity, statistic='mean', bins=x_bin_edge) + alpha2_visc_bin,_,_ = stats.binned_statistic(x, viscosity**2, statistic='mean', bins=x_bin_edge) + alpha_visc_sigma_bin = np.sqrt(alpha2_visc_bin - alpha_visc_bin**2) # Analytic solution c_L = sqrt(gas_gamma * P_L / rho_L) # Speed of the rarefaction wave @@ -285,13 +306,26 @@ ylim(0.8, 2.2) # Entropy profile --------------------------------- subplot(235) -plot(x, S, '.', color='r', ms=0.5, alpha=0.2) -plot(x_s, s_s, '--', color='k', alpha=0.8, lw=1.2) -errorbar(x_bin, S_bin, yerr=S_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -xlabel("${\\rm{Position}}~x$", labelpad=0) -ylabel("${\\rm{Entropy}}~S$", labelpad=0) xlim(-0.5, 0.5) -ylim(0.8, 3.8) +xlabel("${\\rm{Position}}~x$", labelpad=0) + +if plot_diffusion or plot_viscosity: + if plot_diffusion: + plot(x, diffusion * 100, ".", color='r', ms=0.5, alpha=0.2) + errorbar(x_bin, alpha_diff_bin * 100, yerr=alpha_diff_sigma_bin * 100, fmt=".", ms=8.0, color='b', lw=1.2, label="Diffusion (100x)") + + if plot_viscosity: + plot(x, viscosity, ".", color='g', ms=0.5, alpha=0.2) + errorbar(x_bin, alpha_visc_bin, yerr=alpha_visc_sigma_bin, fmt=".", ms=8.0, color='y', lw=1.2, label="Viscosity") + + ylabel("${\\rm{Rate~Coefficient}}~\\alpha$", labelpad=0) + legend() +else: + plot(x, S, '.', color='r', ms=0.5, alpha=0.2) + plot(x_s, s_s, '--', color='k', alpha=0.8, lw=1.2) + errorbar(x_bin, S_bin, yerr=S_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) + ylabel("${\\rm{Entropy}}~S$", labelpad=0) + ylim(0.8, 3.8) # Information ------------------------------------- subplot(236, frameon=False) @@ -312,5 +346,4 @@ ylim(0, 1) xticks([]) yticks([]) -tight_layout() savefig("SodShock.png", dpi=200) diff --git a/examples/Cosmology/ComovingSodShock_3D/sodShock.yml b/examples/Cosmology/ComovingSodShock_3D/sodShock.yml index 2d7a5727cbbc2cd417527ce05d7a8ea8ea05dd71..b5b2ff8fd5571c4093b043c9903398a391e51758 100644 --- a/examples/Cosmology/ComovingSodShock_3D/sodShock.yml +++ b/examples/Cosmology/ComovingSodShock_3D/sodShock.yml @@ -2,13 +2,13 @@ InternalUnitSystem: UnitMass_in_cgs: 2.94e55 # Grams UnitLength_in_cgs: 3.086e18 # pc - UnitVelocity_in_cgs: 1. # km per s + UnitVelocity_in_cgs: 1 # km per s UnitCurrent_in_cgs: 1 # Amperes UnitTemp_in_cgs: 1 # Kelvin # Parameters governing the time integration TimeIntegration: - dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_min: 1e-18 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots @@ -16,11 +16,13 @@ Snapshots: basename: sodShock # Common part of the name of output files time_first: 0. # Time of the first output (in internal units) delta_time: 1.06638 # Time difference between consecutive outputs (in internal units) + # scale_factor_first: 1.0 scale_factor_first: 0.001 compression: 1 # Parameters governing the conserved quantities statistics Statistics: + scale_factor_first: 1.0 delta_time: 1.02 # Time between statistics output # Parameters for the hydrodynamics scheme @@ -41,3 +43,38 @@ Cosmology: a_begin: 0.001 a_end: 0.00106638 +# Impose primoridal metallicity +EAGLEChemistry: + init_abundance_metal: 0. + init_abundance_Hydrogen: 0.752 + init_abundance_Helium: 0.248 + init_abundance_Carbon: 0.0 + init_abundance_Nitrogen: 0.0 + init_abundance_Oxygen: 0.0 + init_abundance_Neon: 0.0 + init_abundance_Magnesium: 0.0 + init_abundance_Silicon: 0.0 + init_abundance_Iron: 0.0 + +EAGLECooling: + dir_name: ./coolingtables/ + H_reion_z: 11.5 + H_reion_eV_p_H: 2.0 + He_reion_z_centre: 3.5 + He_reion_z_sigma: 0.5 + He_reion_eV_p_H: 2.0 + +# Parameters for the EAGLE "equation of state" +EAGLEEntropyFloor: + Jeans_density_threshold_H_p_cm3: 0.1 # Physical density above which the EAGLE Jeans limiter entropy floor kicks in expressed in Hydrogen atoms per cm^3. + Jeans_over_density_threshold: 10. # Overdensity above which the EAGLE Jeans limiter entropy floor can kick in. + Jeans_temperature_norm_K: 8000 # Temperature of the EAGLE Jeans limiter entropy floor at the density threshold expressed in Kelvin. + Jeans_gamma_effective: 1.3333333 # Slope the of the EAGLE Jeans limiter entropy floor + Cool_density_threshold_H_p_cm3: 1e-5 # Physical density above which the EAGLE Cool limiter entropy floor kicks in expressed in Hydrogen atoms per cm^3. + Cool_over_density_threshold: 10. # Overdensity above which the EAGLE Cool limiter entropy floor can kick in. + Cool_temperature_norm_K: 8000 # Temperature of the EAGLE Cool limiter entropy floor at the density threshold expressed in Kelvin. + Cool_gamma_effective: 1. # Slope the of the EAGLE Cool limiter entropy floor + +LambdaCooling: + lambda_nH2_cgs: 1e-48 # Cooling rate divided by square Hydrogen number density (in cgs units [erg * s^-1 * cm^3]) + cooling_tstep_mult: 1.0 # (Optional) Dimensionless pre-factor for the time-step condition. diff --git a/examples/EAGLE_low_z/EAGLE_12/README b/examples/EAGLE_low_z/EAGLE_12/README index 856e2d9cf2c54e61ae172036e5be7ae9dd450f60..9fb2930f625fd2e85293b9ebcc91f46e59f02745 100644 --- a/examples/EAGLE_low_z/EAGLE_12/README +++ b/examples/EAGLE_low_z/EAGLE_12/README @@ -13,4 +13,4 @@ The particle load of the main EAGLE simulation can be reproduced by running these ICs on 8 cores. MD5 checksum of the ICs: -88877f5bb0ee21488c20b8f065fc74db EAGLE_ICs_12.hdf5 +d1ebf1c7ffb0488c3628e08ed6d2dbf4 EAGLE_ICs_12.hdf5 diff --git a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml index 2494e17e206d0e7a10433108db42dbfd8631f5d8..207f70331bd8f4662e8335057608922299120b39 100644 --- a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml @@ -93,7 +93,6 @@ EAGLEStarFormation: EOS_density_norm_H_p_cm3: 0.1 # Physical density used for the normalisation of the EOS assumed for the star-forming gas in Hydrogen atoms per cm^3. EOS_temperature_norm_K: 8000 # Temperature om the polytropic EOS assumed for star-forming gas at the density normalisation in Kelvin. EOS_gamma_effective: 1.3333333 # Slope the of the polytropic EOS assumed for the star-forming gas. - gas_fraction: 0.3 # The gas fraction used internally by the model. KS_normalisation: 1.515e-4 # The normalization of the Kennicutt-Schmidt law in Msun / kpc^2 / yr. KS_exponent: 1.4 # The exponent of the Kennicutt-Schmidt law. KS_min_over_density: 57.7 # The over-density above which star-formation is allowed. diff --git a/examples/EAGLE_low_z/EAGLE_25/README b/examples/EAGLE_low_z/EAGLE_25/README index 88fc1ea3eede1e254907dd5ba1dbf2eaa81fb694..0fe37964d3734b63acd10195822e11094219586a 100644 --- a/examples/EAGLE_low_z/EAGLE_25/README +++ b/examples/EAGLE_low_z/EAGLE_25/README @@ -13,4 +13,4 @@ The particle load of the main EAGLE simulation can be reproduced by running these ICs on 64 cores. MD5 checksum of the ICs: -02cd1c353b86230af047b5d4ab22afcf EAGLE_ICs_25.hdf5 +592faecfc51239a00396f5a4acc2fa4d EAGLE_ICs_25.hdf5 diff --git a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml index 475d36d603f8ca36f332da0b49f1952d3f521c63..1cb0ddaf4b10f7aecf232619d36903f997563624 100644 --- a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml @@ -100,7 +100,6 @@ EAGLEStarFormation: EOS_density_norm_H_p_cm3: 0.1 # Physical density used for the normalisation of the EOS assumed for the star-forming gas in Hydrogen atoms per cm^3. EOS_temperature_norm_K: 8000 # Temperature om the polytropic EOS assumed for star-forming gas at the density normalisation in Kelvin. EOS_gamma_effective: 1.3333333 # Slope the of the polytropic EOS assumed for the star-forming gas. - gas_fraction: 0.3 # The gas fraction used internally by the model. KS_normalisation: 1.515e-4 # The normalization of the Kennicutt-Schmidt law in Msun / kpc^2 / yr. KS_exponent: 1.4 # The exponent of the Kennicutt-Schmidt law. KS_min_over_density: 57.7 # The over-density above which star-formation is allowed. diff --git a/examples/EAGLE_low_z/EAGLE_50/README b/examples/EAGLE_low_z/EAGLE_50/README index f6ae9cd9537ad224ba63eaff2daba25dfd19718c..211360be35f83423160ceb9912f7f0ce9ee444c8 100644 --- a/examples/EAGLE_low_z/EAGLE_50/README +++ b/examples/EAGLE_low_z/EAGLE_50/README @@ -13,4 +13,4 @@ The particle load of the main EAGLE simulation can be reproduced by running these ICs on 512 cores. MD5 checksum of the ICs: -3591b579bd108ddf0e555092bdfbf97f EAGLE_ICs_50.hdf5 +203985136c665e8677a4faa77f94110a EAGLE_ICs_50.hdf5 diff --git a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml index 45dd3a706ef4a8abe12998bda8f763c933f84671..97935ef81804e2150c60fe459792c71673326cef 100644 --- a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml +++ b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml @@ -100,7 +100,6 @@ EAGLEStarFormation: EOS_density_norm_H_p_cm3: 0.1 # Physical density used for the normalisation of the EOS assumed for the star-forming gas in Hydrogen atoms per cm^3. EOS_temperature_norm_K: 8000 # Temperature om the polytropic EOS assumed for star-forming gas at the density normalisation in Kelvin. EOS_gamma_effective: 1.3333333 # Slope the of the polytropic EOS assumed for the star-forming gas. - gas_fraction: 0.3 # The gas fraction used internally by the model. KS_normalisation: 1.515e-4 # The normalization of the Kennicutt-Schmidt law in Msun / kpc^2 / yr. KS_exponent: 1.4 # The exponent of the Kennicutt-Schmidt law. KS_min_over_density: 57.7 # The over-density above which star-formation is allowed. diff --git a/examples/EAGLE_low_z/EAGLE_6/README b/examples/EAGLE_low_z/EAGLE_6/README index 9fe951252f1abf4e27264c6497ec14451080b01e..844c779e222b1cd7ebefaa61d637092a5a8c1e79 100644 --- a/examples/EAGLE_low_z/EAGLE_6/README +++ b/examples/EAGLE_low_z/EAGLE_6/README @@ -10,4 +10,4 @@ been removed. Everything is ready to be run without cosmological integration. MD5 checksum of the ICs: -a4efccd3646a60ad8600ac3a2895ea82 EAGLE_ICs_6.hdf5 +1923db3cf59b2e80b23e4e1aee13e012 EAGLE_ICs_6.hdf5 diff --git a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml index 8a3b85d4f90f658c5752bd84e1f5c9122eee73dc..8e31ea1852e9f3a22153d16cdcba4f0eac6346c9 100644 --- a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml +++ b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml @@ -109,7 +109,6 @@ EAGLEStarFormation: EOS_density_norm_H_p_cm3: 0.1 # Physical density used for the normalisation of the EOS assumed for the star-forming gas in Hydrogen atoms per cm^3. EOS_temperature_norm_K: 8000 # Temperature om the polytropic EOS assumed for star-forming gas at the density normalisation in Kelvin. EOS_gamma_effective: 1.3333333 # Slope the of the polytropic EOS assumed for the star-forming gas. - gas_fraction: 0.3 # The gas fraction used internally by the model. KS_normalisation: 1.515e-4 # The normalization of the Kennicutt-Schmidt law in Msun / kpc^2 / yr. KS_exponent: 1.4 # The exponent of the Kennicutt-Schmidt law. KS_min_over_density: 57.7 # The over-density above which star-formation is allowed. diff --git a/examples/HydroTests/GreshoVortex_3D/plotSolution.py b/examples/HydroTests/GreshoVortex_3D/plotSolution.py index 8fddf148bd169310d5e69ffbfa4a6de099068c69..545440c997d9ebc3ab11562d0a7d9fa143e23ed2 100644 --- a/examples/HydroTests/GreshoVortex_3D/plotSolution.py +++ b/examples/HydroTests/GreshoVortex_3D/plotSolution.py @@ -108,6 +108,18 @@ u = sim["/PartType0/InternalEnergy"][:] S = sim["/PartType0/Entropy"][:] P = sim["/PartType0/Pressure"][:] +try: + diffusion = sim["/PartType0/Diffusion"][:] + plot_diffusion = True +except: + plot_diffusion = False + +try: + viscosity = sim["/PartType0/Viscosity"][:] + plot_viscosity = True +except: + plot_viscosity = False + # Bin te data r_bin_edge = np.arange(0., 1., 0.02) r_bin = 0.5*(r_bin_edge[1:] + r_bin_edge[:-1]) @@ -127,6 +139,15 @@ P_sigma_bin = np.sqrt(P2_bin - P_bin**2) S_sigma_bin = np.sqrt(S2_bin - S_bin**2) u_sigma_bin = np.sqrt(u2_bin - u_bin**2) +if plot_diffusion: + alpha_diff_bin,_,_ = stats.binned_statistic(r, diffusion, statistic='mean', bins=r_bin_edge) + alpha2_diff_bin,_,_ = stats.binned_statistic(r, diffusion**2, statistic='mean', bins=r_bin_edge) + alpha_diff_sigma_bin = np.sqrt(alpha2_diff_bin - alpha_diff_bin**2) + +if plot_viscosity: + alpha_visc_bin,_,_ = stats.binned_statistic(r, viscosity, statistic='mean', bins=r_bin_edge) + alpha2_visc_bin,_,_ = stats.binned_statistic(r, viscosity**2, statistic='mean', bins=r_bin_edge) + alpha_visc_sigma_bin = np.sqrt(alpha2_visc_bin - alpha_visc_bin**2) # Plot the interesting quantities figure() @@ -188,16 +209,32 @@ ylim(7.3, 9.1) # Radial entropy profile -------------------------------- subplot(235) - -plot(r, S, '.', color='r', ms=0.5) -plot(solution_r, solution_s, '--', color='k', alpha=0.8, lw=1.2) -errorbar(r_bin, S_bin, yerr=S_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) xlabel("${\\rm{Radius}}~r$", labelpad=0) -ylabel("${\\rm{Entropy}}~S$", labelpad=0) + xlim(0, R_max) -ylim(4.9 + P0, P0 + 6.1) + +xlabel("${\\rm{Radius}}~r$", labelpad=0) + + +if plot_diffusion or plot_viscosity: + if plot_diffusion: + plot(r, diffusion, ".", color='r', ms=0.5, alpha=0.2) + errorbar(r_bin, alpha_diff_bin, yerr=alpha_diff_sigma_bin, fmt=".", ms=8.0, color='b', lw=1.2, label="Diffusion") + + if plot_viscosity: + plot(r, viscosity, ".", color='g', ms=0.5, alpha=0.2) + errorbar(r_bin, alpha_visc_bin, yerr=alpha_visc_sigma_bin, fmt=".", ms=8.0, color='y', lw=1.2, label="Viscosity") + + ylabel("${\\rm{Rate~Coefficient}}~\\alpha$", labelpad=0) + legend() +else: + plot(r, S, '.', color='r', ms=0.5) + plot(solution_r, solution_s, '--', color='k', alpha=0.8, lw=1.2) + errorbar(r_bin, S_bin, yerr=S_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) + plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) + plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) + ylabel("${\\rm{Entropy}}~S$", labelpad=0) + ylim(4.9 + P0, P0 + 6.1) # Image -------------------------------------------------- #subplot(234) diff --git a/examples/HydroTests/InteractingBlastWaves_1D/interactingBlastWaves.yml b/examples/HydroTests/InteractingBlastWaves_1D/interactingBlastWaves.yml index c4960dfa2c07b6b08cd6559b1de49f27b518bf94..aaad57dc0907cbe496ecb819d6df2a3805f7babf 100644 --- a/examples/HydroTests/InteractingBlastWaves_1D/interactingBlastWaves.yml +++ b/examples/HydroTests/InteractingBlastWaves_1D/interactingBlastWaves.yml @@ -10,7 +10,7 @@ InternalUnitSystem: TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 3.8e-2 # The end time of the simulation (in internal units). - dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-5 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots diff --git a/examples/HydroTests/Noh_1D/noh.yml b/examples/HydroTests/Noh_1D/noh.yml index 58e13ddda8939c8fc5fa4360a498a87f1c5b189a..3a7e328df1da1477d34d56d25121cdd4e89c03a0 100644 --- a/examples/HydroTests/Noh_1D/noh.yml +++ b/examples/HydroTests/Noh_1D/noh.yml @@ -10,7 +10,7 @@ InternalUnitSystem: TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 0.6 # The end time of the simulation (in internal units). - dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots @@ -33,4 +33,4 @@ InitialConditions: file_name: ./noh.hdf5 # The file to read periodic: 1 - \ No newline at end of file + diff --git a/examples/HydroTests/Noh_2D/noh.yml b/examples/HydroTests/Noh_2D/noh.yml index eaf991631854e9a9781f0fcee50d996f8af949cd..76ac8eb911faca76921370ee726ee8efa00000f3 100644 --- a/examples/HydroTests/Noh_2D/noh.yml +++ b/examples/HydroTests/Noh_2D/noh.yml @@ -10,7 +10,7 @@ InternalUnitSystem: TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 0.6 # The end time of the simulation (in internal units). - dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots diff --git a/examples/HydroTests/SedovBlast_2D/sedov.yml b/examples/HydroTests/SedovBlast_2D/sedov.yml index b4252581d6eb3b2932a074e7545b2d308be51865..208e4ac72207140e055b3164ceaac04f7f521e4f 100644 --- a/examples/HydroTests/SedovBlast_2D/sedov.yml +++ b/examples/HydroTests/SedovBlast_2D/sedov.yml @@ -10,7 +10,7 @@ InternalUnitSystem: TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 5e-2 # The end time of the simulation (in internal units). - dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_min: 1e-9 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots diff --git a/examples/HydroTests/SedovBlast_3D/plotSolution.py b/examples/HydroTests/SedovBlast_3D/plotSolution.py index ad34695d36f1bf8e8985b883200f17d6e38a70c9..b0f2e08441b3fa550e61602ba852228a04362fbc 100644 --- a/examples/HydroTests/SedovBlast_3D/plotSolution.py +++ b/examples/HydroTests/SedovBlast_3D/plotSolution.py @@ -24,7 +24,7 @@ # Parameters rho_0 = 1. # Background Density P_0 = 1.e-6 # Background Pressure -E_0 = 1. # Energy of the explosion +E_0 = 1.0 # Energy of the explosion gas_gamma = 5./3. # Gas polytropic index @@ -86,6 +86,18 @@ S = sim["/PartType0/Entropy"][:] P = sim["/PartType0/Pressure"][:] rho = sim["/PartType0/Density"][:] +try: + diffusion = sim["/PartType0/Diffusion"][:] + plot_diffusion = True +except: + plot_diffusion = False + +try: + viscosity = sim["/PartType0/Viscosity"][:] + plot_viscosity = True +except: + plot_viscosity = False + # Bin te data r_bin_edge = np.arange(0., 0.5, 0.01) r_bin = 0.5*(r_bin_edge[1:] + r_bin_edge[:-1]) @@ -105,6 +117,16 @@ P_sigma_bin = np.sqrt(P2_bin - P_bin**2) S_sigma_bin = np.sqrt(S2_bin - S_bin**2) u_sigma_bin = np.sqrt(u2_bin - u_bin**2) +if plot_diffusion: + alpha_diff_bin,_,_ = stats.binned_statistic(r, diffusion, statistic='mean', bins=r_bin_edge) + alpha2_diff_bin,_,_ = stats.binned_statistic(r, diffusion**2, statistic='mean', bins=r_bin_edge) + alpha_diff_sigma_bin = np.sqrt(alpha2_diff_bin - alpha_diff_bin**2) + +if plot_viscosity: + alpha_visc_bin,_,_ = stats.binned_statistic(r, viscosity, statistic='mean', bins=r_bin_edge) + alpha2_visc_bin,_,_ = stats.binned_statistic(r, viscosity**2, statistic='mean', bins=r_bin_edge) + alpha_visc_sigma_bin = np.sqrt(alpha2_visc_bin - alpha_visc_bin**2) + # Now, work our the solution.... @@ -274,14 +296,28 @@ ylim(-2, 22) # Entropy profile --------------------------------- subplot(235) -plot(r, S, '.', color='r', ms=0.5, alpha=0.2) -plot(r_s, s_s, '--', color='k', alpha=0.8, lw=1.2) -errorbar(r_bin, S_bin, yerr=S_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) xlabel("${\\rm{Radius}}~r$", labelpad=0) -ylabel("${\\rm{Entropy}}~S$", labelpad=0) -xlim(0, 1.3 * r_shock) -ylim(-5, 50) + +if plot_diffusion or plot_viscosity: + if plot_diffusion: + plot(r, diffusion, ".", color='r', ms=0.5, alpha=0.2) + errorbar(r_bin, alpha_diff_bin, yerr=alpha_diff_sigma_bin, fmt=".", ms=8.0, color='b', lw=1.2, label="Diffusion") + + if plot_viscosity: + plot(r, viscosity, ".", color='g', ms=0.5, alpha=0.2) + errorbar(r_bin, alpha_visc_bin, yerr=alpha_visc_sigma_bin, fmt=".", ms=8.0, color='y', lw=1.2, label="Viscosity") + + ylabel("${\\rm{Rate~Coefficient}}~\\alpha$", labelpad=0) + legend() +else: + plot(r, S, '.', color='r', ms=0.5, alpha=0.2) + plot(r_s, s_s, '--', color='k', alpha=0.8, lw=1.2) + errorbar(r_bin, S_bin, yerr=S_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) + ylabel("${\\rm{Entropy}}~S$", labelpad=0) + ylim(-5, 50) + +xlim(0, 1.3 * r_shock) # Information ------------------------------------- subplot(236, frameon=False) diff --git a/examples/HydroTests/SedovBlast_3D/sedov.yml b/examples/HydroTests/SedovBlast_3D/sedov.yml index 19e8c72538a748304ca4da076458c9ae27dc8f46..00419ba94b262a1c94c0d3cd31acf70c949b9164 100644 --- a/examples/HydroTests/SedovBlast_3D/sedov.yml +++ b/examples/HydroTests/SedovBlast_3D/sedov.yml @@ -10,7 +10,7 @@ InternalUnitSystem: TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 5e-2 # The end time of the simulation (in internal units). - dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_min: 1e-9 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots diff --git a/examples/HydroTests/SodShock_3D/plotSolution.py b/examples/HydroTests/SodShock_3D/plotSolution.py index 6da7193bcd3cdfb7c22a3fc6a14f91aea5cff5f7..69b2fe4887e986156ed01e0f4177d01ccbed6035 100644 --- a/examples/HydroTests/SodShock_3D/plotSolution.py +++ b/examples/HydroTests/SodShock_3D/plotSolution.py @@ -84,6 +84,18 @@ S = sim["/PartType0/Entropy"][:] P = sim["/PartType0/Pressure"][:] rho = sim["/PartType0/Density"][:] +try: + diffusion = sim["/PartType0/Diffusion"][:] + plot_diffusion = True +except: + plot_diffusion = False + +try: + viscosity = sim["/PartType0/Viscosity"][:] + plot_viscosity = True +except: + plot_viscosity = False + x_min = -1. x_max = 1. x += x_min @@ -108,6 +120,15 @@ P_sigma_bin = np.sqrt(P2_bin - P_bin**2) S_sigma_bin = np.sqrt(S2_bin - S_bin**2) u_sigma_bin = np.sqrt(u2_bin - u_bin**2) +if plot_diffusion: + alpha_diff_bin,_,_ = stats.binned_statistic(x, diffusion, statistic='mean', bins=x_bin_edge) + alpha2_diff_bin,_,_ = stats.binned_statistic(x, diffusion**2, statistic='mean', bins=x_bin_edge) + alpha_diff_sigma_bin = np.sqrt(alpha2_diff_bin - alpha_diff_bin**2) + +if plot_viscosity: + alpha_visc_bin,_,_ = stats.binned_statistic(x, viscosity, statistic='mean', bins=x_bin_edge) + alpha2_visc_bin,_,_ = stats.binned_statistic(x, viscosity**2, statistic='mean', bins=x_bin_edge) + alpha_visc_sigma_bin = np.sqrt(alpha2_visc_bin - alpha_visc_bin**2) # Analytic solution c_L = sqrt(gas_gamma * P_L / rho_L) # Speed of the rarefaction wave @@ -282,13 +303,26 @@ ylim(0.8, 2.2) # Entropy profile --------------------------------- subplot(235) -plot(x, S, '.', color='r', ms=0.5, alpha=0.2) -plot(x_s, s_s, '--', color='k', alpha=0.8, lw=1.2) -errorbar(x_bin, S_bin, yerr=S_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -xlabel("${\\rm{Position}}~x$", labelpad=0) -ylabel("${\\rm{Entropy}}~S$", labelpad=0) xlim(-0.5, 0.5) -ylim(0.8, 3.8) +xlabel("${\\rm{Position}}~x$", labelpad=0) + +if plot_diffusion or plot_viscosity: + if plot_diffusion: + plot(x, diffusion * 100, ".", color='r', ms=0.5, alpha=0.2) + errorbar(x_bin, alpha_diff_bin * 100, yerr=alpha_diff_sigma_bin * 100, fmt=".", ms=8.0, color='b', lw=1.2, label="Diffusion (100x)") + + if plot_viscosity: + plot(x, viscosity, ".", color='g', ms=0.5, alpha=0.2) + errorbar(x_bin, alpha_visc_bin, yerr=alpha_visc_sigma_bin, fmt=".", ms=8.0, color='y', lw=1.2, label="Viscosity") + + ylabel("${\\rm{Rate~Coefficient}}~\\alpha$", labelpad=0) + legend() +else: + plot(x, S, '.', color='r', ms=0.5, alpha=0.2) + plot(x_s, s_s, '--', color='k', alpha=0.8, lw=1.2) + errorbar(x_bin, S_bin, yerr=S_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) + ylabel("${\\rm{Entropy}}~S$", labelpad=0) + ylim(0.8, 3.8) # Information ------------------------------------- subplot(236, frameon=False) diff --git a/examples/HydroTests/UniformBox_3D/makeICbig.py b/examples/HydroTests/UniformBox_3D/makeICbig.py index bd5cf627fb535595b3abb224cbc8de50589f12cf..8841b601656d6f03684e81b4012597615485304f 100644 --- a/examples/HydroTests/UniformBox_3D/makeICbig.py +++ b/examples/HydroTests/UniformBox_3D/makeICbig.py @@ -45,7 +45,7 @@ internalEnergy = P / ((gamma - 1.)*rho) n_iterations = numPart / N remainder = numPart % N -print "Writing", numPart, "in", n_iterations, "iterations of size", N, "and a remainder of", remainder +print("Writing", numPart, "in", n_iterations, "iterations of size", N, "and a remainder of", remainder) #--------------------------------------------------- @@ -55,8 +55,8 @@ file = h5py.File(fileName, 'w') # Header grp = file.create_group("/Header") grp.attrs["BoxSize"] = boxSize -grp.attrs["NumPart_Total"] = [numPart % (long(1)<<32), 0, 0, 0, 0, 0] -grp.attrs["NumPart_Total_HighWord"] = [numPart / (long(1)<<32), 0, 0, 0, 0, 0] +grp.attrs["NumPart_Total"] = [numPart % (int(1)<<32), 0, 0, 0, 0, 0] +grp.attrs["NumPart_Total_HighWord"] = [numPart / (int(1)<<32), 0, 0, 0, 0, 0] grp.attrs["NumPart_ThisFile"] = [numPart, 0, 0, 0, 0, 0] grp.attrs["Time"] = 0.0 grp.attrs["NumFilesPerSnapshot"] = 1 @@ -120,7 +120,7 @@ for n in range(n_iterations): coords = zeros((1,3)) offset += N - print "Done", offset,"/", numPart, "(%.1f %%)"%(100*(float)(offset)/numPart) + print("Done", offset,"/", numPart, "(%.1f %%)"%(100*(float)(offset)/numPart)) # And now, the remainder v = zeros((remainder, 3)) @@ -150,7 +150,7 @@ coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L) coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L) ods_x[offset:offset+remainder,:] = coords -print "Done", offset+remainder,"/", numPart +print("Done", offset+remainder,"/", numPart) diff --git a/examples/main.c b/examples/main.c index fa373027b0db3f6809224732e175fdc3aaad2198..ab4818f13b72d8deab7f4075852ab9e87460cd3a 100644 --- a/examples/main.c +++ b/examples/main.c @@ -354,6 +354,16 @@ int main(int argc, char *argv[]) { return 1; } + if (!with_stars && with_star_formation) { + if (myrank == 0) { + argparse_usage(&argparse); + printf( + "\nError: Cannot process star formation without stars, --stars must " + "be chosen.\n"); + } + return 1; + } + if (!with_stars && with_feedback) { if (myrank == 0) { argparse_usage(&argparse); @@ -481,8 +491,8 @@ int main(int argc, char *argv[]) { #ifdef WITH_MPI if (with_mpole_reconstruction && nr_nodes > 1) error("Cannot reconstruct m-poles every step over MPI (yet)."); - if (with_star_formation) - error("Can't run with star formation over MPI (yet)"); + if (with_star_formation && with_feedback) + error("Can't run with star formation and feedback over MPI (yet)"); if (with_limiter) error("Can't run with time-step limiter over MPI (yet)"); #endif @@ -735,6 +745,48 @@ int main(int argc, char *argv[]) { else bzero(&gravity_properties, sizeof(struct gravity_props)); + /* Initialise the external potential properties */ + bzero(&potential, sizeof(struct external_potential)); + if (with_external_gravity) + potential_init(params, &prog_const, &us, &s, &potential); + if (myrank == 0) potential_print(&potential); + + /* Initialise the cooling function properties */ +#ifdef COOLING_NONE + if (with_cooling || with_temperature) { + error( + "ERROR: Running with cooling / temperature calculation" + " but compiled without it."); + } +#else + if (!with_cooling && !with_temperature) { + error( + "ERROR: Compiled with cooling but running without it. " + "Did you forget the --cooling or --temperature flags?"); + } +#endif + bzero(&cooling_func, sizeof(struct cooling_function_data)); + if (with_cooling || with_temperature) { + cooling_init(params, &us, &prog_const, &cooling_func); + } + if (myrank == 0) cooling_print(&cooling_func); + + /* Initialise the star formation law and its properties */ + bzero(&starform, sizeof(struct star_formation)); + if (with_star_formation) { +#ifdef STAR_FORMATION_NONE + error("ERROR: Running with star formation but compiled without it!"); +#endif + starformation_init(params, &prog_const, &us, &hydro_properties, + &starform); + } + if (with_star_formation && myrank == 0) starformation_print(&starform); + + /* Initialise the chemistry */ + bzero(&chemistry, sizeof(struct chemistry_global_data)); + chemistry_init(params, &us, &prog_const, &chemistry); + if (myrank == 0) chemistry_print(&chemistry); + /* Be verbose about what happens next */ if (myrank == 0) message("Reading ICs from file '%s'", ICfileName); if (myrank == 0 && cleanup_h) @@ -900,30 +952,6 @@ int main(int argc, char *argv[]) { message("nr of cells at depth %i is %i.", data[0], data[1]); } - /* Initialise the external potential properties */ - bzero(&potential, sizeof(struct external_potential)); - if (with_external_gravity) - potential_init(params, &prog_const, &us, &s, &potential); - if (myrank == 0) potential_print(&potential); - - /* Initialise the cooling function properties */ - bzero(&cooling_func, sizeof(struct cooling_function_data)); - if (with_cooling || with_temperature) - cooling_init(params, &us, &prog_const, &cooling_func); - if (myrank == 0) cooling_print(&cooling_func); - - /* Initialise the star formation law and its properties */ - bzero(&starform, sizeof(struct star_formation)); - if (with_star_formation) - starformation_init(params, &prog_const, &us, &hydro_properties, - &starform); - if (with_star_formation && myrank == 0) starformation_print(&starform); - - /* Initialise the chemistry */ - bzero(&chemistry, sizeof(struct chemistry_global_data)); - chemistry_init(params, &us, &prog_const, &chemistry); - if (myrank == 0) chemistry_print(&chemistry); - /* Construct the engine policy */ int engine_policies = ENGINE_POLICY | engine_policy_steal; if (with_drift_all) engine_policies |= engine_policy_drift_all; diff --git a/src/atomic.h b/src/atomic.h index 35c2416f44acf33d55abdd4c699c9e6940b5fd24..7674fbd52825ebf645a21d3bfa5943309942bec4 100644 --- a/src/atomic.h +++ b/src/atomic.h @@ -88,6 +88,38 @@ __attribute__((always_inline)) INLINE static void atomic_min( } while (test_val != old_val); } +/** + * @brief Atomic min operation on doubles. + * + * This is a text-book implementation based on an atomic CAS. + * + * We create a temporary union to cope with the int-only atomic CAS + * and the floating-point min that we want. + * + * @param address The address to update. + * @param y The value to update the address with. + */ +__attribute__((always_inline)) INLINE static void atomic_min_d( + volatile double *const address, const double y) { + + long long *const long_long_ptr = (long long *)address; + + typedef union { + double as_double; + long long as_long_long; + } cast_type; + + cast_type test_val, old_val, new_val; + old_val.as_double = *address; + + do { + test_val.as_long_long = old_val.as_long_long; + new_val.as_double = min(old_val.as_double, y); + old_val.as_long_long = + atomic_cas(long_long_ptr, test_val.as_long_long, new_val.as_long_long); + } while (test_val.as_long_long != old_val.as_long_long); +} + /** * @brief Atomic max operation on floats. * @@ -119,6 +151,38 @@ __attribute__((always_inline)) INLINE static void atomic_max_f( } while (test_val.as_int != old_val.as_int); } +/** + * @brief Atomic max operation on doubles. + * + * This is a text-book implementation based on an atomic CAS. + * + * We create a temporary union to cope with the int-only atomic CAS + * and the floating-point max that we want. + * + * @param address The address to update. + * @param y The value to update the address with. + */ +__attribute__((always_inline)) INLINE static void atomic_max_d( + volatile double *const address, const double y) { + + long long *const long_long_ptr = (long long *)address; + + typedef union { + double as_double; + long long as_long_long; + } cast_type; + + cast_type test_val, old_val, new_val; + old_val.as_double = *address; + + do { + test_val.as_long_long = old_val.as_long_long; + new_val.as_double = max(old_val.as_double, y); + old_val.as_long_long = + atomic_cas(long_long_ptr, test_val.as_long_long, new_val.as_long_long); + } while (test_val.as_long_long != old_val.as_long_long); +} + /** * @brief Atomic add operation on floats. * diff --git a/src/cell.c b/src/cell.c index 675c30066da886645b25fd527991a58f8e754e85..bdec2dfebd84cf6421ba32311bcbbcf4b96a9b25 100644 --- a/src/cell.c +++ b/src/cell.c @@ -614,26 +614,21 @@ int cell_unpack_tags(const int *tags, struct cell *restrict c) { * * @return The number of packed cells. */ -int cell_pack_end_step(struct cell *restrict c, - struct pcell_step *restrict pcells) { +int cell_pack_end_step_hydro(struct cell *restrict c, + struct pcell_step_hydro *restrict pcells) { #ifdef WITH_MPI /* Pack this cell's data. */ - pcells[0].hydro.ti_end_min = c->hydro.ti_end_min; - pcells[0].hydro.ti_end_max = c->hydro.ti_end_max; - pcells[0].grav.ti_end_min = c->grav.ti_end_min; - pcells[0].grav.ti_end_max = c->grav.ti_end_max; - pcells[0].stars.ti_end_min = c->stars.ti_end_min; - pcells[0].stars.ti_end_max = c->stars.ti_end_max; - pcells[0].hydro.dx_max_part = c->hydro.dx_max_part; - pcells[0].stars.dx_max_part = c->stars.dx_max_part; + pcells[0].ti_end_min = c->hydro.ti_end_min; + pcells[0].ti_end_max = c->hydro.ti_end_max; + pcells[0].dx_max_part = c->hydro.dx_max_part; /* Fill in the progeny, depth-first recursion. */ int count = 1; for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { - count += cell_pack_end_step(c->progeny[k], &pcells[count]); + count += cell_pack_end_step_hydro(c->progeny[k], &pcells[count]); } /* Return the number of packed values. */ @@ -653,26 +648,155 @@ int cell_pack_end_step(struct cell *restrict c, * * @return The number of cells created. */ -int cell_unpack_end_step(struct cell *restrict c, - struct pcell_step *restrict pcells) { +int cell_unpack_end_step_hydro(struct cell *restrict c, + struct pcell_step_hydro *restrict pcells) { #ifdef WITH_MPI /* Unpack this cell's data. */ - c->hydro.ti_end_min = pcells[0].hydro.ti_end_min; - c->hydro.ti_end_max = pcells[0].hydro.ti_end_max; - c->grav.ti_end_min = pcells[0].grav.ti_end_min; - c->grav.ti_end_max = pcells[0].grav.ti_end_max; - c->stars.ti_end_min = pcells[0].stars.ti_end_min; - c->stars.ti_end_max = pcells[0].stars.ti_end_max; - c->hydro.dx_max_part = pcells[0].hydro.dx_max_part; - c->stars.dx_max_part = pcells[0].stars.dx_max_part; + c->hydro.ti_end_min = pcells[0].ti_end_min; + c->hydro.ti_end_max = pcells[0].ti_end_max; + c->hydro.dx_max_part = pcells[0].dx_max_part; /* Fill in the progeny, depth-first recursion. */ int count = 1; for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { - count += cell_unpack_end_step(c->progeny[k], &pcells[count]); + count += cell_unpack_end_step_hydro(c->progeny[k], &pcells[count]); + } + + /* Return the number of packed values. */ + return count; + +#else + error("SWIFT was not compiled with MPI support."); + return 0; +#endif +} + +/** + * @brief Pack the time information of the given cell and all it's sub-cells. + * + * @param c The #cell. + * @param pcells (output) The end-of-timestep information we pack into + * + * @return The number of packed cells. + */ +int cell_pack_end_step_grav(struct cell *restrict c, + struct pcell_step_grav *restrict pcells) { + +#ifdef WITH_MPI + + /* Pack this cell's data. */ + pcells[0].ti_end_min = c->grav.ti_end_min; + pcells[0].ti_end_max = c->grav.ti_end_max; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_pack_end_step_grav(c->progeny[k], &pcells[count]); + } + + /* Return the number of packed values. */ + return count; + +#else + error("SWIFT was not compiled with MPI support."); + return 0; +#endif +} + +/** + * @brief Unpack the time information of a given cell and its sub-cells. + * + * @param c The #cell + * @param pcells The end-of-timestep information to unpack + * + * @return The number of cells created. + */ +int cell_unpack_end_step_grav(struct cell *restrict c, + struct pcell_step_grav *restrict pcells) { + +#ifdef WITH_MPI + + /* Unpack this cell's data. */ + c->grav.ti_end_min = pcells[0].ti_end_min; + c->grav.ti_end_max = pcells[0].ti_end_max; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_unpack_end_step_grav(c->progeny[k], &pcells[count]); + } + + /* Return the number of packed values. */ + return count; + +#else + error("SWIFT was not compiled with MPI support."); + return 0; +#endif +} + +/** + * @brief Pack the time information of the given cell and all it's sub-cells. + * + * @param c The #cell. + * @param pcells (output) The end-of-timestep information we pack into + * + * @return The number of packed cells. + */ +int cell_pack_end_step_stars(struct cell *restrict c, + struct pcell_step_stars *restrict pcells) { + +#ifdef WITH_MPI + + /* Pack this cell's data. */ + pcells[0].ti_end_min = c->stars.ti_end_min; + pcells[0].ti_end_max = c->stars.ti_end_max; + pcells[0].dx_max_part = c->stars.dx_max_part; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_pack_end_step_stars(c->progeny[k], &pcells[count]); + } + + /* Return the number of packed values. */ + return count; + +#else + error("SWIFT was not compiled with MPI support."); + return 0; +#endif +} + +/** + * @brief Unpack the time information of a given cell and its sub-cells. + * + * @param c The #cell + * @param pcells The end-of-timestep information to unpack + * + * @return The number of cells created. + */ +int cell_unpack_end_step_stars(struct cell *restrict c, + struct pcell_step_stars *restrict pcells) { + +#ifdef WITH_MPI + + /* Unpack this cell's data. */ + c->stars.ti_end_min = pcells[0].ti_end_min; + c->stars.ti_end_max = pcells[0].ti_end_max; + c->stars.dx_max_part = pcells[0].dx_max_part; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_unpack_end_step_stars(c->progeny[k], &pcells[count]); } /* Return the number of packed values. */ @@ -1845,6 +1969,32 @@ void cell_clear_limiter_flags(struct cell *c, void *data) { c->hydro.do_sub_limiter = 0; } +/** + * @brief Recurse down in a cell hierarchy until the hydro.super level is + * reached and activate the spart drift at that level. + * + * @param c The #cell to recurse into. + * @param s The #scheduler. + */ +void cell_activate_super_spart_drifts(struct cell *c, struct scheduler *s) { + + if (c == c->hydro.super) { + cell_activate_drift_spart(c, s); + } else { + if (c->split) { + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) { + cell_activate_super_spart_drifts(c->progeny[k], s); + } + } + } else { +#ifdef SWIFT_DEBUG_CHECKS + error("Reached a leaf cell without finding a hydro.super!!"); +#endif + } + } +} + /** * @brief Activate the #part drifts on the given cell. */ @@ -3120,7 +3270,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { } /* If the foreign cell is active, we want its ti_end values. */ - if (ci_active || with_limiter) scheduler_activate(s, ci->mpi.recv_ti); + if (ci_active || with_limiter) + scheduler_activate(s, ci->mpi.hydro.recv_ti); if (with_limiter) scheduler_activate(s, ci->mpi.limiter.recv); if (with_limiter) @@ -3148,7 +3299,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* If the local cell is active, send its ti_end values. */ if (cj_active || with_limiter) - scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); + scheduler_activate_send(s, cj->mpi.hydro.send_ti, ci_nodeID); } else if (cj_nodeID != nodeID) { @@ -3165,7 +3316,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { } /* If the foreign cell is active, we want its ti_end values. */ - if (cj_active || with_limiter) scheduler_activate(s, cj->mpi.recv_ti); + if (cj_active || with_limiter) + scheduler_activate(s, cj->mpi.hydro.recv_ti); if (with_limiter) scheduler_activate(s, cj->mpi.limiter.recv); if (with_limiter) @@ -3194,7 +3346,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* If the local cell is active, send its ti_end values. */ if (ci_active || with_limiter) - scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); + scheduler_activate_send(s, ci->mpi.hydro.send_ti, cj_nodeID); } #endif } @@ -3222,8 +3374,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { if (c->hydro.cooling != NULL) scheduler_activate(s, c->hydro.cooling); if (c->logger != NULL) scheduler_activate(s, c->logger); - if (c->hydro.star_formation != NULL) { - scheduler_activate(s, c->hydro.star_formation); + if (c->top->hydro.star_formation != NULL) { + scheduler_activate(s, c->top->hydro.star_formation); cell_activate_drift_spart(c, s); } } @@ -3292,7 +3444,7 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { if (cj_active) scheduler_activate(s, ci->mpi.grav.recv); /* If the foreign cell is active, we want its ti_end values. */ - if (ci_active) scheduler_activate(s, ci->mpi.recv_ti); + if (ci_active) scheduler_activate(s, ci->mpi.grav.recv_ti); /* Is the foreign cell active and will need stuff from us? */ if (ci_active) { @@ -3306,7 +3458,8 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { } /* If the local cell is active, send its ti_end values. */ - if (cj_active) scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); + if (cj_active) + scheduler_activate_send(s, cj->mpi.grav.send_ti, ci_nodeID); } else if (cj_nodeID != nodeID) { @@ -3314,7 +3467,7 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { if (ci_active) scheduler_activate(s, cj->mpi.grav.recv); /* If the foreign cell is active, we want its ti_end values. */ - if (cj_active) scheduler_activate(s, cj->mpi.recv_ti); + if (cj_active) scheduler_activate(s, cj->mpi.grav.recv_ti); /* Is the foreign cell active and will need stuff from us? */ if (cj_active) { @@ -3328,7 +3481,8 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { } /* If the local cell is active, send its ti_end values. */ - if (ci_active) scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); + if (ci_active) + scheduler_activate_send(s, ci->mpi.grav.send_ti, cj_nodeID); } #endif } @@ -3399,11 +3553,10 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { struct engine *e = s->space->e; - const int with_feedback = (e->policy & engine_policy_feedback); const int nodeID = e->nodeID; int rebuild = 0; - if (!with_feedback && c->stars.drift != NULL && cell_is_active_stars(c, e)) { + if (c->stars.drift != NULL && cell_is_active_stars(c, e)) { cell_activate_drift_spart(c, s); } @@ -3501,14 +3654,14 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { cell_activate_drift_spart(cj, s); /* If the local cell is active, send its ti_end values. */ - scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); + scheduler_activate_send(s, cj->mpi.stars.send_ti, ci_nodeID); } if (ci_active) { scheduler_activate(s, ci->mpi.stars.recv); /* If the foreign cell is active, we want its ti_end values. */ - scheduler_activate(s, ci->mpi.recv_ti); + scheduler_activate(s, ci->mpi.stars.recv_ti); /* Is the foreign cell active and will need stuff from us? */ scheduler_activate_send(s, cj->mpi.hydro.send_xv, ci_nodeID); @@ -3531,14 +3684,14 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { cell_activate_drift_spart(ci, s); /* If the local cell is active, send its ti_end values. */ - scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); + scheduler_activate_send(s, ci->mpi.stars.send_ti, cj_nodeID); } if (cj_active) { scheduler_activate(s, cj->mpi.stars.recv); /* If the foreign cell is active, we want its ti_end values. */ - scheduler_activate(s, cj->mpi.recv_ti); + scheduler_activate(s, cj->mpi.stars.recv_ti); /* Is the foreign cell active and will need stuff from us? */ scheduler_activate_send(s, ci->mpi.hydro.send_xv, cj_nodeID); @@ -3604,6 +3757,9 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { if (c->stars.ghost != NULL) scheduler_activate(s, c->stars.ghost); if (c->stars.stars_in != NULL) scheduler_activate(s, c->stars.stars_in); if (c->stars.stars_out != NULL) scheduler_activate(s, c->stars.stars_out); + if (c->kick1 != NULL) scheduler_activate(s, c->kick1); + if (c->kick2 != NULL) scheduler_activate(s, c->kick2); + if (c->timestep != NULL) scheduler_activate(s, c->timestep); if (c->logger != NULL) scheduler_activate(s, c->logger); } @@ -3726,7 +3882,9 @@ void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data) { int cell_has_tasks(struct cell *c) { #ifdef WITH_MPI - if (c->timestep != NULL || c->mpi.recv_ti != NULL) return 1; + if (c->timestep != NULL || c->mpi.hydro.recv_ti != NULL || + c->mpi.grav.recv_ti != NULL || c->mpi.stars.recv_ti != NULL) + return 1; #else if (c->timestep != NULL) return 1; #endif @@ -4299,32 +4457,19 @@ void cell_drift_multipole(struct cell *c, const struct engine *e) { * hierarchy. * * @param c The #cell to clean. - * @param is_super Is this a super-cell? */ -void cell_clear_stars_sort_flags(struct cell *c, const int is_super) { +void cell_clear_stars_sort_flags(struct cell *c) { /* Recurse if possible */ if (c->split) { for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - cell_clear_stars_sort_flags(c->progeny[k], /*is_super=*/0); - } - - /* Free the sorted array at the level where it was allocated */ - if (is_super) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c != c->hydro.super) error("Cell is not a super-cell!!!"); -#endif - cell_free_stars_sorts(c); + if (c->progeny[k] != NULL) cell_clear_stars_sort_flags(c->progeny[k]); } /* Indicate that the cell is not sorted and cancel the pointer sorting arrays. */ c->stars.sorted = 0; - for (int i = 0; i < 13; i++) { - c->stars.sort[i] = NULL; - } + cell_free_stars_sorts(c); } /** @@ -4460,11 +4605,24 @@ struct spart *cell_add_spart(struct engine *e, struct cell *const c) { each level */ struct cell *top = c; while (top->parent != NULL) { + + /* What is the progeny index of the cell? */ for (int k = 0; k < 8; ++k) { if (top->parent->progeny[k] == top) { progeny[(int)top->parent->depth] = k; } } + + /* Check that the cell was indeed drifted to this point to avoid future + * issues */ +#ifdef SWIFT_DEBUG_CHECKS + if (top->hydro.super != NULL && top->stars.count > 0 && + top->stars.ti_old_part != e->ti_current) { + error("Cell had not been correctly drifted before star formation"); + } +#endif + + /* Climb up */ top = top->parent; } @@ -4473,6 +4631,10 @@ struct spart *cell_add_spart(struct engine *e, struct cell *const c) { /* Are there any extra particles left? */ if (top->stars.count == top->stars.count_total - 1) { + + /* Release the local lock before exiting. */ + if (lock_unlock(&top->stars.star_formation_lock) != 0) + error("Failed to unlock the top-level cell."); message("We ran out of star particles!"); atomic_inc(&e->forcerebuild); return NULL; @@ -4512,10 +4674,10 @@ struct spart *cell_add_spart(struct engine *e, struct cell *const c) { */ struct cell *top2 = c; while (top2->parent != NULL) { - top2->grav.ti_end_min = e->ti_current; + top2->stars.ti_old_part = e->ti_current; top2 = top2->parent; } - top2->grav.ti_end_min = e->ti_current; + top2->stars.ti_old_part = e->ti_current; /* Release the lock */ if (lock_unlock(&top->stars.star_formation_lock) != 0) @@ -4829,6 +4991,17 @@ void cell_reorder_extra_parts(struct cell *c, const ptrdiff_t parts_offset) { parts[i].gpart->id_or_neg_offset = -(i + parts_offset); } } + +#ifdef SWIFT_DEBUG_CHECKS + for (int i = 0; i < c->hydro.count_total; ++i) { + if (parts[i].time_bin == time_bin_not_created && i < c->hydro.count) { + error("Extra particle before the end of the regular array"); + } + if (parts[i].time_bin != time_bin_not_created && i >= c->hydro.count) { + error("Regular particle after the end of the regular array"); + } + } +#endif } /** @@ -4875,6 +5048,17 @@ void cell_reorder_extra_sparts(struct cell *c, const ptrdiff_t sparts_offset) { #endif } } + +#ifdef SWIFT_DEBUG_CHECKS + for (int i = 0; i < c->stars.count_total; ++i) { + if (sparts[i].time_bin == time_bin_not_created && i < c->stars.count) { + error("Extra particle before the end of the regular array"); + } + if (sparts[i].time_bin != time_bin_not_created && i >= c->stars.count) { + error("Regular particle after the end of the regular array"); + } + } +#endif } /** @@ -4920,6 +5104,17 @@ void cell_reorder_extra_gparts(struct cell *c, struct part *parts, } } } + +#ifdef SWIFT_DEBUG_CHECKS + for (int i = 0; i < c->grav.count_total; ++i) { + if (gparts[i].time_bin == time_bin_not_created && i < c->grav.count) { + error("Extra particle before the end of the regular array"); + } + if (gparts[i].time_bin != time_bin_not_created && i >= c->grav.count) { + error("Regular particle after the end of the regular array"); + } + } +#endif } /** diff --git a/src/cell.h b/src/cell.h index f1d55b255438cb5de664ab29bebadd27a0a035a9..1f2330f4913d3fa10d95a7ae80eae2e583d3e139 100644 --- a/src/cell.h +++ b/src/cell.h @@ -175,46 +175,37 @@ struct pcell { /** * @brief Cell information at the end of a time-step. */ -struct pcell_step { +struct pcell_step_hydro { - /*! Hydro variables */ - struct { + /*! Minimal integer end-of-timestep in this cell (hydro) */ + integertime_t ti_end_min; - /*! Minimal integer end-of-timestep in this cell (hydro) */ - integertime_t ti_end_min; - - /*! Minimal integer end-of-timestep in this cell (hydro) */ - integertime_t ti_end_max; - - /*! Maximal distance any #part has travelled since last rebuild */ - float dx_max_part; - - } hydro; + /*! Minimal integer end-of-timestep in this cell (hydro) */ + integertime_t ti_end_max; - /*! Grav variables */ - struct { - - /*! Minimal integer end-of-timestep in this cell (gravity) */ - integertime_t ti_end_min; + /*! Maximal distance any #part has travelled since last rebuild */ + float dx_max_part; +}; - /*! Minimal integer end-of-timestep in this cell (gravity) */ - integertime_t ti_end_max; +struct pcell_step_grav { - } grav; + /*! Minimal integer end-of-timestep in this cell (gravity) */ + integertime_t ti_end_min; - /*! Stars variables */ - struct { + /*! Minimal integer end-of-timestep in this cell (gravity) */ + integertime_t ti_end_max; +}; - /*! Minimal integer end-of-timestep in this cell (stars) */ - integertime_t ti_end_min; +struct pcell_step_stars { - /*! Maximal integer end-of-timestep in this cell (stars) */ - integertime_t ti_end_max; + /*! Minimal integer end-of-timestep in this cell (stars) */ + integertime_t ti_end_min; - /*! Maximal distance any #part has travelled since last rebuild */ - float dx_max_part; + /*! Maximal integer end-of-timestep in this cell (stars) */ + integertime_t ti_end_max; - } stars; + /*! Maximal distance any #part has travelled since last rebuild */ + float dx_max_part; }; /** @@ -239,6 +230,9 @@ struct cell { /*! Parent cell. */ struct cell *parent; + /*! Pointer to the top-level cell in a hierarchy */ + struct cell *top; + /*! Super cell, i.e. the highest-level parent cell with *any* task */ struct cell *super; @@ -600,6 +594,9 @@ struct cell { /* Task receiving hydro data (gradient). */ struct task *recv_gradient; + /* Task receiving data (time-step). */ + struct task *recv_ti; + /* Linked list for sending hydro data (positions). */ struct link *send_xv; @@ -609,6 +606,9 @@ struct cell { /* Linked list for sending hydro data (gradient). */ struct link *send_gradient; + /* Linked list for sending data (time-step). */ + struct link *send_ti; + } hydro; struct { @@ -616,16 +616,30 @@ struct cell { /* Task receiving gpart data. */ struct task *recv; + /* Task receiving data (time-step). */ + struct task *recv_ti; + /* Linked list for sending gpart data. */ struct link *send; + + /* Linked list for sending data (time-step). */ + struct link *send_ti; + } grav; struct { /* Task receiving spart data. */ struct task *recv; + /* Task receiving data (time-step). */ + struct task *recv_ti; + /* Linked list for sending spart data. */ struct link *send; + + /* Linked list for sending data (time-step). */ + struct link *send_ti; + } stars; struct { @@ -636,12 +650,6 @@ struct cell { struct link *send; } limiter; - /* Task receiving data (time-step). */ - struct task *recv_ti; - - /* Linked list for sending data (time-step). */ - struct link *send_ti; - /*! Bit mask of the proxies this cell is registered with. */ unsigned long long int sendto; @@ -731,8 +739,12 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s, const int with_gravity); int cell_pack_tags(const struct cell *c, int *tags); int cell_unpack_tags(const int *tags, struct cell *c); -int cell_pack_end_step(struct cell *c, struct pcell_step *pcell); -int cell_unpack_end_step(struct cell *c, struct pcell_step *pcell); +int cell_pack_end_step_hydro(struct cell *c, struct pcell_step_hydro *pcell); +int cell_unpack_end_step_hydro(struct cell *c, struct pcell_step_hydro *pcell); +int cell_pack_end_step_grav(struct cell *c, struct pcell_step_grav *pcell); +int cell_unpack_end_step_grav(struct cell *c, struct pcell_step_grav *pcell); +int cell_pack_end_step_stars(struct cell *c, struct pcell_step_stars *pcell); +int cell_unpack_end_step_stars(struct cell *c, struct pcell_step_stars *pcell); int cell_pack_multipoles(struct cell *c, struct gravity_tensors *m); int cell_unpack_multipoles(struct cell *c, struct gravity_tensors *m); int cell_getsize(struct cell *c); @@ -771,6 +783,7 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, struct scheduler *s); void cell_activate_subcell_external_grav_tasks(struct cell *ci, struct scheduler *s); +void cell_activate_super_spart_drifts(struct cell *c, struct scheduler *s); void cell_activate_drift_part(struct cell *c, struct scheduler *s); void cell_activate_drift_gpart(struct cell *c, struct scheduler *s); void cell_activate_drift_spart(struct cell *c, struct scheduler *s); @@ -782,7 +795,7 @@ void cell_clear_limiter_flags(struct cell *c, void *data); void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data); void cell_check_spart_pos(const struct cell *c, const struct spart *global_sparts); -void cell_clear_stars_sort_flags(struct cell *c, const int is_super); +void cell_clear_stars_sort_flags(struct cell *c); int cell_has_tasks(struct cell *c); void cell_remove_part(const struct engine *e, struct cell *c, struct part *p, struct xpart *xp); diff --git a/src/collectgroup.c b/src/collectgroup.c index ddf3e35d945fd8b07cc927d8ba383963c7558cd2..5242d0d93bbca2315a91d2abc099670afb2789ed 100644 --- a/src/collectgroup.c +++ b/src/collectgroup.c @@ -40,6 +40,13 @@ struct mpicollectgroup1 { long long inhibited, g_inhibited, s_inhibited; integertime_t ti_hydro_end_min; integertime_t ti_gravity_end_min; + integertime_t ti_stars_end_min; + integertime_t ti_hydro_end_max; + integertime_t ti_gravity_end_max; + integertime_t ti_stars_end_max; + integertime_t ti_hydro_beg_max; + integertime_t ti_gravity_beg_max; + integertime_t ti_stars_beg_max; int forcerebuild; long long total_nr_cells; long long total_nr_tasks; @@ -86,9 +93,15 @@ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) { e->ti_gravity_end_min = grp1->ti_gravity_end_min; e->ti_gravity_end_max = grp1->ti_gravity_end_max; e->ti_gravity_beg_max = grp1->ti_gravity_beg_max; - e->ti_end_min = min(e->ti_hydro_end_min, e->ti_gravity_end_min); - e->ti_end_max = max(e->ti_hydro_end_max, e->ti_gravity_end_max); - e->ti_beg_max = max(e->ti_hydro_beg_max, e->ti_gravity_beg_max); + e->ti_stars_end_min = grp1->ti_stars_end_min; + e->ti_stars_end_max = grp1->ti_stars_end_max; + e->ti_stars_beg_max = grp1->ti_stars_beg_max; + e->ti_end_min = + min3(e->ti_hydro_end_min, e->ti_gravity_end_min, e->ti_stars_end_min); + e->ti_end_max = + max3(e->ti_hydro_end_max, e->ti_gravity_end_max, e->ti_stars_end_max); + e->ti_beg_max = + max3(e->ti_hydro_beg_max, e->ti_gravity_beg_max, e->ti_stars_beg_max); e->updates = grp1->updated; e->g_updates = grp1->g_updated; e->s_updates = grp1->s_updated; @@ -127,6 +140,12 @@ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) { * after this step. * @param ti_gravity_beg_max the maximum begin time for next gravity time step * after this step. + * @param ti_stars_end_min the minimum end time for next stars time step + * after this step. + * @param ti_stars_end_max the maximum end time for next stars time step + * after this step. + * @param ti_stars_beg_max the maximum begin time for next stars time step + * after this step. * @param forcerebuild whether a rebuild is required after this step. * @param total_nr_cells total number of all cells on rank. * @param total_nr_tasks total number of tasks on rank. @@ -138,8 +157,9 @@ void collectgroup1_init( integertime_t ti_hydro_end_min, integertime_t ti_hydro_end_max, integertime_t ti_hydro_beg_max, integertime_t ti_gravity_end_min, integertime_t ti_gravity_end_max, integertime_t ti_gravity_beg_max, - int forcerebuild, long long total_nr_cells, long long total_nr_tasks, - float tasks_per_cell) { + integertime_t ti_stars_end_min, integertime_t ti_stars_end_max, + integertime_t ti_stars_beg_max, int forcerebuild, long long total_nr_cells, + long long total_nr_tasks, float tasks_per_cell) { grp1->updated = updated; grp1->g_updated = g_updated; @@ -153,6 +173,9 @@ void collectgroup1_init( grp1->ti_gravity_end_min = ti_gravity_end_min; grp1->ti_gravity_end_max = ti_gravity_end_max; grp1->ti_gravity_beg_max = ti_gravity_beg_max; + grp1->ti_stars_end_min = ti_stars_end_min; + grp1->ti_stars_end_max = ti_stars_end_max; + grp1->ti_stars_beg_max = ti_stars_beg_max; grp1->forcerebuild = forcerebuild; grp1->total_nr_cells = total_nr_cells; grp1->total_nr_tasks = total_nr_tasks; @@ -181,6 +204,13 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) { mpigrp11.s_inhibited = grp1->s_inhibited; mpigrp11.ti_hydro_end_min = grp1->ti_hydro_end_min; mpigrp11.ti_gravity_end_min = grp1->ti_gravity_end_min; + mpigrp11.ti_stars_end_min = grp1->ti_stars_end_min; + mpigrp11.ti_hydro_end_max = grp1->ti_hydro_end_max; + mpigrp11.ti_gravity_end_max = grp1->ti_gravity_end_max; + mpigrp11.ti_stars_end_max = grp1->ti_stars_end_max; + mpigrp11.ti_hydro_beg_max = grp1->ti_hydro_beg_max; + mpigrp11.ti_gravity_beg_max = grp1->ti_gravity_beg_max; + mpigrp11.ti_stars_beg_max = grp1->ti_stars_beg_max; mpigrp11.forcerebuild = grp1->forcerebuild; mpigrp11.total_nr_cells = grp1->total_nr_cells; mpigrp11.total_nr_tasks = grp1->total_nr_tasks; @@ -200,6 +230,13 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) { grp1->s_inhibited = mpigrp12.s_inhibited; grp1->ti_hydro_end_min = mpigrp12.ti_hydro_end_min; grp1->ti_gravity_end_min = mpigrp12.ti_gravity_end_min; + grp1->ti_stars_end_min = mpigrp12.ti_stars_end_min; + grp1->ti_hydro_end_max = mpigrp12.ti_hydro_end_max; + grp1->ti_gravity_end_max = mpigrp12.ti_gravity_end_max; + grp1->ti_stars_end_max = mpigrp12.ti_stars_end_max; + grp1->ti_hydro_beg_max = mpigrp12.ti_hydro_beg_max; + grp1->ti_gravity_beg_max = mpigrp12.ti_gravity_beg_max; + grp1->ti_stars_beg_max = mpigrp12.ti_stars_beg_max; grp1->forcerebuild = mpigrp12.forcerebuild; grp1->total_nr_cells = mpigrp12.total_nr_cells; grp1->total_nr_tasks = mpigrp12.total_nr_tasks; @@ -234,6 +271,24 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11, min(mpigrp11->ti_hydro_end_min, mpigrp12->ti_hydro_end_min); mpigrp11->ti_gravity_end_min = min(mpigrp11->ti_gravity_end_min, mpigrp12->ti_gravity_end_min); + mpigrp11->ti_stars_end_min = + min(mpigrp11->ti_stars_end_min, mpigrp12->ti_stars_end_min); + + /* Maximum end time. */ + mpigrp11->ti_hydro_end_max = + max(mpigrp11->ti_hydro_end_max, mpigrp12->ti_hydro_end_max); + mpigrp11->ti_gravity_end_max = + max(mpigrp11->ti_gravity_end_max, mpigrp12->ti_gravity_end_max); + mpigrp11->ti_stars_end_max = + max(mpigrp11->ti_stars_end_max, mpigrp12->ti_stars_end_max); + + /* Maximum beg time. */ + mpigrp11->ti_hydro_beg_max = + max(mpigrp11->ti_hydro_beg_max, mpigrp12->ti_hydro_beg_max); + mpigrp11->ti_gravity_beg_max = + max(mpigrp11->ti_gravity_beg_max, mpigrp12->ti_gravity_beg_max); + mpigrp11->ti_stars_beg_max = + max(mpigrp11->ti_stars_beg_max, mpigrp12->ti_stars_beg_max); /* Everyone must agree to not rebuild. */ if (mpigrp11->forcerebuild || mpigrp12->forcerebuild) diff --git a/src/collectgroup.h b/src/collectgroup.h index 3e430b58db05b563f96149d1ae21039444a03640..0c0e0898f64deb0081cdab5c6cfbcad1dc760b1c 100644 --- a/src/collectgroup.h +++ b/src/collectgroup.h @@ -43,6 +43,7 @@ struct collectgroup1 { /* Times for the time-step */ integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; + integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max; /* Force the engine to rebuild? */ int forcerebuild; @@ -63,8 +64,9 @@ void collectgroup1_init( integertime_t ti_hydro_end_min, integertime_t ti_hydro_end_max, integertime_t ti_hydro_beg_max, integertime_t ti_gravity_end_min, integertime_t ti_gravity_end_max, integertime_t ti_gravity_beg_max, - int forcerebuild, long long total_nr_cells, long long total_nr_tasks, - float tasks_per_cell); + integertime_t ti_stars_end_min, integertime_t ti_stars_end_max, + integertime_t ti_stars_beg_max, int forcerebuild, long long total_nr_cells, + long long total_nr_tasks, float tasks_per_cell); void collectgroup1_reduce(struct collectgroup1 *grp1); #endif /* SWIFT_COLLECTGROUP_H */ diff --git a/src/cooling/EAGLE/cooling.c b/src/cooling/EAGLE/cooling.c index 2e829c9f1852d96f40a91e6c95cc8d803a8095a6..2e5da9576afecd63b7ecf12e1636f0c7892bdbf6 100644 --- a/src/cooling/EAGLE/cooling.c +++ b/src/cooling/EAGLE/cooling.c @@ -988,20 +988,20 @@ void cooling_print_backend(const struct cooling_function_data *cooling) { void cooling_clean(struct cooling_function_data *cooling) { /* Free the side arrays */ - free(cooling->Redshifts); - free(cooling->nH); - free(cooling->Temp); - free(cooling->HeFrac); - free(cooling->Therm); - free(cooling->SolarAbundances); - free(cooling->SolarAbundances_inv); + swift_free("cooling", cooling->Redshifts); + swift_free("cooling", cooling->nH); + swift_free("cooling", cooling->Temp); + swift_free("cooling", cooling->HeFrac); + swift_free("cooling", cooling->Therm); + swift_free("cooling", cooling->SolarAbundances); + swift_free("cooling", cooling->SolarAbundances_inv); /* Free the tables */ - free(cooling->table.metal_heating); - free(cooling->table.electron_abundance); - free(cooling->table.temperature); - free(cooling->table.H_plus_He_heating); - free(cooling->table.H_plus_He_electron_abundance); + swift_free("cooling-tables", cooling->table.metal_heating); + swift_free("cooling-tables", cooling->table.electron_abundance); + swift_free("cooling-tables", cooling->table.temperature); + swift_free("cooling-tables", cooling->table.H_plus_He_heating); + swift_free("cooling-tables", cooling->table.H_plus_He_electron_abundance); } /** diff --git a/src/cooling/EAGLE/cooling_tables.c b/src/cooling/EAGLE/cooling_tables.c index c66b7ebb8f8bea4aac557fe3b7f24f944014deda..cd6678c70614aa62c8145afbecc2c8f673dbf0de 100644 --- a/src/cooling/EAGLE/cooling_tables.c +++ b/src/cooling/EAGLE/cooling_tables.c @@ -107,10 +107,11 @@ void get_cooling_redshifts(struct cooling_function_data *cooling) { /* Check value */ if (N_Redshifts != eagle_cooling_N_redshifts) - error("Invalid redshift lenght array."); + error("Invalid redshift length array."); /* Allocate the list of redshifts */ - if (posix_memalign((void **)&cooling->Redshifts, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling", (void **)&cooling->Redshifts, + SWIFT_STRUCT_ALIGNMENT, eagle_cooling_N_redshifts * sizeof(float)) != 0) error("Failed to allocate redshift table"); @@ -227,22 +228,23 @@ void read_cooling_header(const char *fname, if (N_Elements != eagle_cooling_N_metal) error("Invalid metal array length."); /* allocate arrays of values for each of the above quantities */ - if (posix_memalign((void **)&cooling->Temp, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling", (void **)&cooling->Temp, SWIFT_STRUCT_ALIGNMENT, N_Temp * sizeof(float)) != 0) error("Failed to allocate temperature table"); - if (posix_memalign((void **)&cooling->Therm, SWIFT_STRUCT_ALIGNMENT, - N_Temp * sizeof(float)) != 0) + if (swift_memalign("cooling", (void **)&cooling->Therm, + SWIFT_STRUCT_ALIGNMENT, N_Temp * sizeof(float)) != 0) error("Failed to allocate internal energy table"); - if (posix_memalign((void **)&cooling->nH, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling", (void **)&cooling->nH, SWIFT_STRUCT_ALIGNMENT, N_nH * sizeof(float)) != 0) error("Failed to allocate nH table"); - if (posix_memalign((void **)&cooling->HeFrac, SWIFT_STRUCT_ALIGNMENT, - N_He * sizeof(float)) != 0) + if (swift_memalign("cooling", (void **)&cooling->HeFrac, + SWIFT_STRUCT_ALIGNMENT, N_He * sizeof(float)) != 0) error("Failed to allocate HeFrac table"); - if (posix_memalign((void **)&cooling->SolarAbundances, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling", (void **)&cooling->SolarAbundances, + SWIFT_STRUCT_ALIGNMENT, N_SolarAbundances * sizeof(float)) != 0) error("Failed to allocate Solar abundances table"); - if (posix_memalign((void **)&cooling->SolarAbundances_inv, + if (swift_memalign("cooling", (void **)&cooling->SolarAbundances_inv, SWIFT_STRUCT_ALIGNMENT, N_SolarAbundances * sizeof(float)) != 0) error("Failed to allocate Solar abundances inverses table"); @@ -315,31 +317,34 @@ void allocate_cooling_tables(struct cooling_function_data *restrict cooling) { * cooling rates with one table being for the redshift above current redshift * and one below. */ - if (posix_memalign((void **)&cooling->table.metal_heating, + if (swift_memalign("cooling-tables", (void **)&cooling->table.metal_heating, SWIFT_STRUCT_ALIGNMENT, eagle_cooling_N_loaded_redshifts * num_elements_metal_heating * sizeof(float)) != 0) error("Failed to allocate metal_heating array"); - if (posix_memalign((void **)&cooling->table.electron_abundance, + if (swift_memalign("cooling-tables", + (void **)&cooling->table.electron_abundance, SWIFT_STRUCT_ALIGNMENT, eagle_cooling_N_loaded_redshifts * num_elements_electron_abundance * sizeof(float)) != 0) error("Failed to allocate electron_abundance array"); - if (posix_memalign((void **)&cooling->table.temperature, + if (swift_memalign("cooling-tables", (void **)&cooling->table.temperature, SWIFT_STRUCT_ALIGNMENT, eagle_cooling_N_loaded_redshifts * num_elements_temperature * sizeof(float)) != 0) error("Failed to allocate temperature array"); - if (posix_memalign((void **)&cooling->table.H_plus_He_heating, + if (swift_memalign("cooling-tables", + (void **)&cooling->table.H_plus_He_heating, SWIFT_STRUCT_ALIGNMENT, eagle_cooling_N_loaded_redshifts * num_elements_HpHe_heating * sizeof(float)) != 0) error("Failed to allocate H_plus_He_heating array"); - if (posix_memalign((void **)&cooling->table.H_plus_He_electron_abundance, + if (swift_memalign("cooling-tables", + (void **)&cooling->table.H_plus_He_electron_abundance, SWIFT_STRUCT_ALIGNMENT, eagle_cooling_N_loaded_redshifts * num_elements_HpHe_electron_abundance * @@ -373,19 +378,24 @@ void get_redshift_invariant_table( float *he_electron_abundance = NULL; /* Allocate arrays for reading in cooling tables. */ - if (posix_memalign((void **)&net_cooling_rate, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling-temp", (void **)&net_cooling_rate, + SWIFT_STRUCT_ALIGNMENT, num_elements_cooling_rate * sizeof(float)) != 0) error("Failed to allocate net_cooling_rate array"); - if (posix_memalign((void **)&electron_abundance, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling-temp", (void **)&electron_abundance, + SWIFT_STRUCT_ALIGNMENT, num_elements_electron_abundance * sizeof(float)) != 0) error("Failed to allocate electron_abundance array"); - if (posix_memalign((void **)&temperature, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling-temp", (void **)&temperature, + SWIFT_STRUCT_ALIGNMENT, num_elements_temperature * sizeof(float)) != 0) error("Failed to allocate temperature array"); - if (posix_memalign((void **)&he_net_cooling_rate, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling-temp", (void **)&he_net_cooling_rate, + SWIFT_STRUCT_ALIGNMENT, num_elements_HpHe_heating * sizeof(float)) != 0) error("Failed to allocate he_net_cooling_rate array"); - if (posix_memalign((void **)&he_electron_abundance, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling-temp", (void **)&he_electron_abundance, + SWIFT_STRUCT_ALIGNMENT, num_elements_HpHe_electron_abundance * sizeof(float)) != 0) error("Failed to allocate he_electron_abundance array"); @@ -530,11 +540,11 @@ void get_redshift_invariant_table( status = H5Fclose(file_id); if (status < 0) error("error closing file"); - free(net_cooling_rate); - free(electron_abundance); - free(temperature); - free(he_net_cooling_rate); - free(he_electron_abundance); + swift_free("cooling-temp", net_cooling_rate); + swift_free("cooling-temp", electron_abundance); + swift_free("cooling-temp", temperature); + swift_free("cooling-temp", he_net_cooling_rate); + swift_free("cooling-temp", he_electron_abundance); #ifdef SWIFT_DEBUG_CHECKS message("done reading in redshift invariant table"); @@ -573,19 +583,24 @@ void get_cooling_table(struct cooling_function_data *restrict cooling, float *he_electron_abundance = NULL; /* Allocate arrays for reading in cooling tables. */ - if (posix_memalign((void **)&net_cooling_rate, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling-temp", (void **)&net_cooling_rate, + SWIFT_STRUCT_ALIGNMENT, num_elements_cooling_rate * sizeof(float)) != 0) error("Failed to allocate net_cooling_rate array"); - if (posix_memalign((void **)&electron_abundance, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling-temp", (void **)&electron_abundance, + SWIFT_STRUCT_ALIGNMENT, num_elements_electron_abundance * sizeof(float)) != 0) error("Failed to allocate electron_abundance array"); - if (posix_memalign((void **)&temperature, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling-temp", (void **)&temperature, + SWIFT_STRUCT_ALIGNMENT, num_elements_temperature * sizeof(float)) != 0) error("Failed to allocate temperature array"); - if (posix_memalign((void **)&he_net_cooling_rate, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling-temp", (void **)&he_net_cooling_rate, + SWIFT_STRUCT_ALIGNMENT, num_elements_HpHe_heating * sizeof(float)) != 0) error("Failed to allocate he_net_cooling_rate array"); - if (posix_memalign((void **)&he_electron_abundance, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("cooling-temp", (void **)&he_electron_abundance, + SWIFT_STRUCT_ALIGNMENT, num_elements_HpHe_electron_abundance * sizeof(float)) != 0) error("Failed to allocate he_electron_abundance array"); @@ -741,11 +756,11 @@ void get_cooling_table(struct cooling_function_data *restrict cooling, if (status < 0) error("error closing file"); } - free(net_cooling_rate); - free(electron_abundance); - free(temperature); - free(he_net_cooling_rate); - free(he_electron_abundance); + swift_free("cooling-temp", net_cooling_rate); + swift_free("cooling-temp", electron_abundance); + swift_free("cooling-temp", temperature); + swift_free("cooling-temp", he_net_cooling_rate); + swift_free("cooling-temp", he_electron_abundance); #ifdef SWIFT_DEBUG_CHECKS message("Done reading in general cooling table"); diff --git a/src/cooling/const_lambda/cooling.h b/src/cooling/const_lambda/cooling.h index 1e50c162d02e8c326f67978a7de73fda5c6aecd8..fddc9a62067d23e13b8c2229ad5d5e798600b4e0 100644 --- a/src/cooling/const_lambda/cooling.h +++ b/src/cooling/const_lambda/cooling.h @@ -122,59 +122,65 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( /* Nothing to do here? */ if (dt == 0.) return; - /* Current energy */ - const float u_old = hydro_get_physical_internal_energy(p, xp, cosmo); + /* Current energy (in internal units) */ + const float u_old_com = hydro_get_comoving_internal_energy(p, xp); - /* Current du_dt in physical coordinates (internal units) */ - const float hydro_du_dt = hydro_get_physical_internal_energy_dt(p, cosmo); + /* Y' = RHS of the comoving equation for du/dt that will be integrated + forward in time using dt_therm */ + const float hydro_du_dt_com = hydro_get_comoving_internal_energy_dt(p); /* Calculate cooling du_dt (in cgs units) */ const double cooling_du_dt_cgs = cooling_rate_cgs(cosmo, hydro_props, cooling, p); /* Convert to internal units */ - float cooling_du_dt = + const float cooling_du_dt_physical = cooling_du_dt_cgs * cooling->conv_factor_energy_rate_from_cgs; - /* Add cosmological term */ - cooling_du_dt *= cosmo->a * cosmo->a; + /* Add cosmological term to get Y_cooling' */ + const float cooling_du_dt = cooling_du_dt_physical * cosmo->a * cosmo->a / + cosmo->a_factor_internal_energy; - float total_du_dt = hydro_du_dt + cooling_du_dt; + /* Y_total' */ + float total_du_dt = hydro_du_dt_com + cooling_du_dt; /* We now need to check that we are not going to go below any of the limits */ - /* Limit imposed by the entropy floor */ - const float A_floor = entropy_floor(p, cosmo, floor_props); - const float rho = hydro_get_physical_density(p, cosmo); - const float u_floor = gas_internal_energy_from_entropy(rho, A_floor); + /* Limit imposed by the entropy floor (comoving) + * (Recall entropy is the same in physical and comoving frames) */ + const float A_floor_com = entropy_floor(p, cosmo, floor_props); + const float rho_com = hydro_get_comoving_density(p); + const float u_floor_com = + gas_internal_energy_from_entropy(rho_com, A_floor_com); /* Absolute minimum */ - const float u_minimal = hydro_props->minimal_internal_energy; + const float u_minimal_com = + hydro_props->minimal_internal_energy / cosmo->a_factor_internal_energy; /* Largest of both limits */ - const float u_limit = max(u_minimal, u_floor); + const float u_limit_com = max(u_minimal_com, u_floor_com); /* First, check whether we may end up below the minimal energy after * this step 1/2 kick + another 1/2 kick that could potentially be for * a time-step twice as big. We hence check for 1.5 delta_t. */ - if (u_old + total_du_dt * 1.5 * dt_therm < u_limit) { - total_du_dt = (u_limit - u_old) / (1.5f * dt_therm); + if (u_old_com + total_du_dt * 1.5 * dt_therm < u_limit_com) { + total_du_dt = (u_limit_com - u_old_com) / (1.5f * dt_therm); } /* Second, check whether the energy used in the prediction could get negative. * We need to check for the 1/2 dt kick followed by a full time-step drift * that could potentially be for a time-step twice as big. We hence check * for 2.5 delta_t but this time against 0 energy not the minimum */ - if (u_old + total_du_dt * 2.5 * dt_therm < 0.) { - total_du_dt = -u_old / ((2.5f + 0.0001f) * dt_therm); + if (u_old_com + total_du_dt * 2.5 * dt_therm < 0.) { + total_du_dt = -u_old_com / ((2.5f + 0.0001f) * dt_therm); } /* Update the internal energy time derivative */ - hydro_set_physical_internal_energy_dt(p, cosmo, total_du_dt); + hydro_set_comoving_internal_energy_dt(p, total_du_dt); /* Store the radiated energy (assuming dt will not change) */ xp->cooling_data.radiated_energy += - -hydro_get_mass(p) * (total_du_dt - hydro_du_dt) * dt_therm; + -hydro_get_mass(p) * cooling_du_dt_physical * dt; } /** @@ -208,8 +214,9 @@ __attribute__((always_inline)) INLINE static float cooling_timestep( cooling_rate_cgs(cosmo, hydro_props, cooling, p); /* Convert to internal units */ - const float cooling_du_dt = - cooling_du_dt_cgs * cooling->conv_factor_energy_rate_from_cgs; + const float cooling_du_dt = cooling_du_dt_cgs * + cooling->conv_factor_energy_rate_from_cgs / + cosmo->a_factor_internal_energy; /* If we are close to (or below) the limit, we ignore the condition */ if (u < 1.01f * hydro_props->minimal_internal_energy || cooling_du_dt == 0.f) @@ -270,7 +277,7 @@ INLINE static float cooling_get_temperature( const double mu_ionised = hydro_props->mu_ionised; /* Particle temperature */ - const double u = hydro_get_physical_internal_energy(p, xp, cosmo); + const double u = hydro_get_drifted_physical_internal_energy(p, cosmo); /* Temperature over mean molecular weight */ const double T_over_mu = hydro_gamma_minus_one * u * m_H / k_B; @@ -353,6 +360,9 @@ static INLINE void cooling_print_backend( "cm^3]", cooling->lambda_nH2_cgs); + message("Lambda/n_H^2=%g [internal units]", + cooling->lambda_nH2_cgs * cooling->conv_factor_energy_rate_from_cgs); + if (cooling->cooling_tstep_mult == FLT_MAX) message("Cooling function time-step size is unlimited"); else diff --git a/src/cooling/const_lambda/cooling_io.h b/src/cooling/const_lambda/cooling_io.h index 9437f0f94db41725d6715cf349843bf079137305..2e2ba799ab51a73c610701499ef61f1b398e42c5 100644 --- a/src/cooling/const_lambda/cooling_io.h +++ b/src/cooling/const_lambda/cooling_io.h @@ -76,7 +76,10 @@ __attribute__((always_inline)) INLINE static int cooling_write_particles( UNIT_CONV_TEMPERATURE, parts, xparts, convert_part_T); - return 1; + list[1] = io_make_output_field("RadiatedEnergy", FLOAT, 1, UNIT_CONV_ENERGY, + xparts, cooling_data.radiated_energy); + + return 2; } #endif /* SWIFT_COOLING_CONST_LAMBDA_IO_H */ diff --git a/src/cosmology.c b/src/cosmology.c index a18f52b84da330238caaa0a16bb69de025e92046..5dfe1d7f01fc6d596942188b90fbb4598b7a6810 100644 --- a/src/cosmology.c +++ b/src/cosmology.c @@ -32,6 +32,7 @@ #include "adiabatic_index.h" #include "common_io.h" #include "inline.h" +#include "memuse.h" #include "restart.h" #ifdef HAVE_LIBGSL @@ -329,23 +330,24 @@ void cosmology_init_tables(struct cosmology *c) { const double a_begin = c->a_begin; /* Allocate memory for the interpolation tables */ - c->drift_fac_interp_table = - (double *)malloc(cosmology_table_length * sizeof(double)); - c->grav_kick_fac_interp_table = - (double *)malloc(cosmology_table_length * sizeof(double)); - c->hydro_kick_fac_interp_table = - (double *)malloc(cosmology_table_length * sizeof(double)); - c->hydro_kick_corr_interp_table = - (double *)malloc(cosmology_table_length * sizeof(double)); - c->time_interp_table = - (double *)malloc(cosmology_table_length * sizeof(double)); - c->scale_factor_interp_table = - (double *)malloc(cosmology_table_length * sizeof(double)); + c->drift_fac_interp_table = (double *)swift_malloc( + "cosmo.table", cosmology_table_length * sizeof(double)); + c->grav_kick_fac_interp_table = (double *)swift_malloc( + "cosmo.table", cosmology_table_length * sizeof(double)); + c->hydro_kick_fac_interp_table = (double *)swift_malloc( + "cosmo.table", cosmology_table_length * sizeof(double)); + c->hydro_kick_corr_interp_table = (double *)swift_malloc( + "cosmo.table", cosmology_table_length * sizeof(double)); + c->time_interp_table = (double *)swift_malloc( + "cosmo.table", cosmology_table_length * sizeof(double)); + c->scale_factor_interp_table = (double *)swift_malloc( + "cosmo.table", cosmology_table_length * sizeof(double)); /* Prepare a table of scale factors for the integral bounds */ const double delta_a = (c->log_a_end - c->log_a_begin) / cosmology_table_length; - double *a_table = (double *)malloc(cosmology_table_length * sizeof(double)); + double *a_table = (double *)swift_malloc( + "cosmo.table", cosmology_table_length * sizeof(double)); for (int i = 0; i < cosmology_table_length; i++) a_table[i] = exp(c->log_a_begin + delta_a * (i + 1)); @@ -456,7 +458,7 @@ void cosmology_init_tables(struct cosmology *c) { /* Free the workspace and temp array */ gsl_integration_workspace_free(space); - free(a_table); + swift_free("cosmo.table", a_table); #else @@ -807,12 +809,12 @@ void cosmology_print(const struct cosmology *c) { void cosmology_clean(struct cosmology *c) { - free(c->drift_fac_interp_table); - free(c->grav_kick_fac_interp_table); - free(c->hydro_kick_fac_interp_table); - free(c->hydro_kick_corr_interp_table); - free(c->time_interp_table); - free(c->scale_factor_interp_table); + swift_free("cosmo.table", c->drift_fac_interp_table); + swift_free("cosmo.table", c->grav_kick_fac_interp_table); + swift_free("cosmo.table", c->hydro_kick_fac_interp_table); + swift_free("cosmo.table", c->hydro_kick_corr_interp_table); + swift_free("cosmo.table", c->time_interp_table); + swift_free("cosmo.table", c->scale_factor_interp_table); } #ifdef HAVE_HDF5 diff --git a/src/engine.c b/src/engine.c index be2a24fbf34dcfc3099271087570295db706eb2c..1cb308cc0d1e10aca5026919e1d721661426512c 100644 --- a/src/engine.c +++ b/src/engine.c @@ -135,7 +135,7 @@ struct end_of_step_data { size_t inhibited, g_inhibited, s_inhibited; integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; - integertime_t ti_stars_end_min; + integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max; struct engine *e; }; @@ -557,7 +557,8 @@ void engine_redistribute(struct engine *e) { /* Start by moving inhibited particles to the end of the arrays */ for (size_t k = 0; k < nr_parts; /* void */) { - if (parts[k].time_bin == time_bin_inhibited) { + if (parts[k].time_bin == time_bin_inhibited || + parts[k].time_bin == time_bin_not_created) { nr_parts -= 1; /* Swap the particle */ @@ -580,7 +581,8 @@ void engine_redistribute(struct engine *e) { /* Now move inhibited star particles to the end of the arrays */ for (size_t k = 0; k < nr_sparts; /* void */) { - if (sparts[k].time_bin == time_bin_inhibited) { + if (sparts[k].time_bin == time_bin_inhibited || + sparts[k].time_bin == time_bin_not_created) { nr_sparts -= 1; /* Swap the particle */ @@ -600,7 +602,8 @@ void engine_redistribute(struct engine *e) { /* Finally do the same with the gravity particles */ for (size_t k = 0; k < nr_gparts; /* void */) { - if (gparts[k].time_bin == time_bin_inhibited) { + if (gparts[k].time_bin == time_bin_inhibited || + gparts[k].time_bin == time_bin_not_created) { nr_gparts -= 1; /* Swap the particle */ @@ -665,6 +668,12 @@ void engine_redistribute(struct engine *e) { for (size_t k = 0; k < nr_parts; k++) { const struct part *p = &s->parts[k]; + if (p->time_bin == time_bin_inhibited) + error("Inhibited particle found after sorting!"); + + if (p->time_bin == time_bin_not_created) + error("Inhibited particle found after sorting!"); + /* New cell index */ const int new_cid = cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1], @@ -724,6 +733,12 @@ void engine_redistribute(struct engine *e) { for (size_t k = 0; k < nr_sparts; k++) { const struct spart *sp = &s->sparts[k]; + if (sp->time_bin == time_bin_inhibited) + error("Inhibited particle found after sorting!"); + + if (sp->time_bin == time_bin_not_created) + error("Inhibited particle found after sorting!"); + /* New cell index */ const int new_cid = cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1], @@ -782,6 +797,12 @@ void engine_redistribute(struct engine *e) { for (size_t k = 0; k < nr_gparts; k++) { const struct gpart *gp = &s->gparts[k]; + if (gp->time_bin == time_bin_inhibited) + error("Inhibited particle found after sorting!"); + + if (gp->time_bin == time_bin_not_created) + error("Inhibited particle found after sorting!"); + /* New cell index */ const int new_cid = cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1], @@ -952,6 +973,7 @@ void engine_redistribute(struct engine *e) { /* Verify that the links are correct */ part_verify_links(s->parts, s->gparts, s->sparts, nr_parts_new, nr_gparts_new, nr_sparts_new, e->verbose); + #endif /* Be verbose about what just happened. */ @@ -963,6 +985,11 @@ void engine_redistribute(struct engine *e) { nodeID, nr_parts_new, nr_sparts_new, nr_gparts_new, my_cells); } + /* Flag that we do not have any extra particles any more */ + s->nr_extra_parts = 0; + s->nr_extra_gparts = 0; + s->nr_extra_sparts = 0; + /* Flag that a redistribute has taken place */ e->step_props |= engine_step_prop_redistribute; @@ -2384,57 +2411,45 @@ void engine_barrier(struct engine *e) { * @param c The #cell to recurse into. * @param e The #engine. */ -void engine_collect_end_of_step_recurse(struct cell *c, - const struct engine *e) { +void engine_collect_end_of_step_recurse_hydro(struct cell *c, + const struct engine *e) { /* Skip super-cells (Their values are already set) */ #ifdef WITH_MPI - if (c->timestep != NULL || c->mpi.recv_ti != NULL) return; + if (c->timestep != NULL || c->mpi.hydro.recv_ti != NULL) return; #else if (c->timestep != NULL) return; #endif /* WITH_MPI */ +#ifdef SWIFT_DEBUG_CHECKS + /* if (!c->split) error("Reached a leaf without finding a time-step task! + * c->depth=%d c->maxdepth=%d c->count=%d c->node=%d", */ + /* c->depth, c->maxdepth, c->hydro.count, c->nodeID); */ +#endif + /* Counters for the different quantities. */ - size_t updated = 0, g_updated = 0, s_updated = 0; - size_t inhibited = 0, g_inhibited = 0, s_inhibited = 0; + size_t updated = 0, inhibited = 0; integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, ti_hydro_beg_max = 0; - integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, - ti_gravity_beg_max = 0; - integertime_t ti_stars_end_min = max_nr_timesteps; /* Collect the values from the progeny. */ for (int k = 0; k < 8; k++) { struct cell *cp = c->progeny[k]; - if (cp != NULL && - (cp->hydro.count > 0 || cp->grav.count > 0 || cp->stars.count > 0)) { + if (cp != NULL && cp->hydro.count > 0) { /* Recurse */ - engine_collect_end_of_step_recurse(cp, e); + engine_collect_end_of_step_recurse_hydro(cp, e); /* And update */ ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min); ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max); ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max); - ti_gravity_end_min = min(ti_gravity_end_min, cp->grav.ti_end_min); - ti_gravity_end_max = max(ti_gravity_end_max, cp->grav.ti_end_max); - ti_gravity_beg_max = max(ti_gravity_beg_max, cp->grav.ti_beg_max); - - ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); - updated += cp->hydro.updated; - g_updated += cp->grav.updated; - s_updated += cp->stars.updated; - inhibited += cp->hydro.inhibited; - g_inhibited += cp->grav.inhibited; - s_inhibited += cp->stars.inhibited; /* Collected, so clear for next time. */ cp->hydro.updated = 0; - cp->grav.updated = 0; - cp->stars.updated = 0; } } @@ -2442,16 +2457,125 @@ void engine_collect_end_of_step_recurse(struct cell *c, c->hydro.ti_end_min = ti_hydro_end_min; c->hydro.ti_end_max = ti_hydro_end_max; c->hydro.ti_beg_max = ti_hydro_beg_max; - c->grav.ti_end_min = ti_gravity_end_min; - c->grav.ti_end_max = ti_gravity_end_max; - c->grav.ti_beg_max = ti_gravity_beg_max; - c->stars.ti_end_min = ti_stars_end_min; c->hydro.updated = updated; - c->grav.updated = g_updated; - c->stars.updated = s_updated; c->hydro.inhibited = inhibited; - c->grav.inhibited = g_inhibited; - c->stars.inhibited = s_inhibited; +} + +/** + * @brief Recursive function gathering end-of-step data. + * + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. + */ +void engine_collect_end_of_step_recurse_grav(struct cell *c, + const struct engine *e) { + +/* Skip super-cells (Their values are already set) */ +#ifdef WITH_MPI + if (c->timestep != NULL || c->mpi.grav.recv_ti != NULL) return; +#else + if (c->timestep != NULL) return; +#endif /* WITH_MPI */ + +#ifdef SWIFT_DEBUG_CHECKS + // if (!c->split) error("Reached a leaf without finding a time-step + // task!"); +#endif + + /* Counters for the different quantities. */ + size_t updated = 0, inhibited = 0; + integertime_t ti_grav_end_min = max_nr_timesteps, ti_grav_end_max = 0, + ti_grav_beg_max = 0; + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && cp->grav.count > 0) { + + /* Recurse */ + engine_collect_end_of_step_recurse_grav(cp, e); + + /* And update */ + ti_grav_end_min = min(ti_grav_end_min, cp->grav.ti_end_min); + ti_grav_end_max = max(ti_grav_end_max, cp->grav.ti_end_max); + ti_grav_beg_max = max(ti_grav_beg_max, cp->grav.ti_beg_max); + + updated += cp->grav.updated; + inhibited += cp->grav.inhibited; + + /* Collected, so clear for next time. */ + cp->grav.updated = 0; + } + } + + /* Store the collected values in the cell. */ + c->grav.ti_end_min = ti_grav_end_min; + c->grav.ti_end_max = ti_grav_end_max; + c->grav.ti_beg_max = ti_grav_beg_max; + c->grav.updated = updated; + c->grav.inhibited = inhibited; +} + +/** + * @brief Recursive function gathering end-of-step data. + * + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. + */ +void engine_collect_end_of_step_recurse_stars(struct cell *c, + const struct engine *e) { + +/* Skip super-cells (Their values are already set) */ +#ifdef WITH_MPI + if (c->timestep != NULL || c->mpi.stars.recv_ti != NULL) return; +#else + if (c->timestep != NULL) return; +#endif /* WITH_MPI */ + +#ifdef SWIFT_DEBUG_CHECKS + // if (!c->split) error("Reached a leaf without finding a time-step task!"); +#endif + + /* Counters for the different quantities. */ + size_t updated = 0, inhibited = 0; + integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, + ti_stars_beg_max = 0; + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && cp->stars.count > 0) { + + /* Recurse */ + engine_collect_end_of_step_recurse_stars(cp, e); + + /* And update */ + ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); + ti_stars_end_max = max(ti_stars_end_max, cp->stars.ti_end_max); + ti_stars_beg_max = max(ti_stars_beg_max, cp->stars.ti_beg_max); + + updated += cp->stars.updated; + inhibited += cp->stars.inhibited; + + /* Collected, so clear for next time. */ + cp->stars.updated = 0; + } + } + + /* Store the collected values in the cell. */ + c->stars.ti_end_min = ti_stars_end_min; + c->stars.ti_end_max = ti_stars_end_max; + c->stars.ti_beg_max = ti_stars_beg_max; + c->stars.updated = updated; + c->stars.inhibited = inhibited; } /** @@ -2470,6 +2594,11 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, struct end_of_step_data *data = (struct end_of_step_data *)extra_data; const struct engine *e = data->e; + const int with_hydro = (e->policy & engine_policy_hydro); + const int with_self_grav = (e->policy & engine_policy_self_gravity); + const int with_ext_grav = (e->policy & engine_policy_external_gravity); + const int with_grav = (with_self_grav || with_ext_grav); + const int with_stars = (e->policy & engine_policy_stars); struct space *s = e->s; int *local_cells = (int *)map_data; @@ -2480,7 +2609,8 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; - integertime_t ti_stars_end_min = max_nr_timesteps; + integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, + ti_stars_beg_max = 0; for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &s->cells_top[local_cells[ind]]; @@ -2488,7 +2618,15 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0) { /* Make the top-cells recurse */ - engine_collect_end_of_step_recurse(c, e); + if (with_hydro) { + engine_collect_end_of_step_recurse_hydro(c, e); + } + if (with_grav) { + engine_collect_end_of_step_recurse_grav(c, e); + } + if (with_stars) { + engine_collect_end_of_step_recurse_stars(c, e); + } /* And aggregate */ if (c->hydro.ti_end_min > e->ti_current) @@ -2503,6 +2641,8 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, if (c->stars.ti_end_min > e->ti_current) ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min); + ti_stars_end_max = max(ti_stars_end_max, c->stars.ti_end_max); + ti_stars_beg_max = max(ti_stars_beg_max, c->stars.ti_beg_max); updated += c->hydro.updated; g_updated += c->grav.updated; @@ -2545,6 +2685,8 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, if (ti_stars_end_min > e->ti_current) data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min); + data->ti_stars_end_max = max(ti_stars_end_max, data->ti_stars_end_max); + data->ti_stars_beg_max = max(ti_stars_beg_max, data->ti_stars_beg_max); } if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space"); @@ -2578,6 +2720,8 @@ void engine_collect_end_of_step(struct engine *e, int apply) { data.ti_hydro_beg_max = 0; data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0, data.ti_gravity_beg_max = 0; + data.ti_stars_end_min = max_nr_timesteps, data.ti_stars_end_max = 0, + data.ti_stars_beg_max = 0; data.e = e; /* Collect information from the local top-level cells */ @@ -2595,7 +2739,8 @@ void engine_collect_end_of_step(struct engine *e, int apply) { &e->collect_group1, data.updated, data.g_updated, data.s_updated, data.inhibited, data.g_inhibited, data.s_inhibited, data.ti_hydro_end_min, data.ti_hydro_end_max, data.ti_hydro_beg_max, data.ti_gravity_end_min, - data.ti_gravity_end_max, data.ti_gravity_beg_max, e->forcerebuild, + data.ti_gravity_end_max, data.ti_gravity_beg_max, data.ti_stars_end_min, + data.ti_stars_end_max, data.ti_stars_beg_max, e->forcerebuild, e->s->tot_cells, e->sched.nr_tasks, (float)e->sched.nr_tasks / (float)e->s->tot_cells); @@ -2774,8 +2919,10 @@ void engine_skip_force_and_kick(struct engine *e) { t->type == task_type_extra_ghost || t->subtype == task_subtype_gradient || t->subtype == task_subtype_stars_feedback || - t->subtype == task_subtype_tend || t->subtype == task_subtype_rho || - t->subtype == task_subtype_gpart) + t->subtype == task_subtype_tend_part || + t->subtype == task_subtype_tend_gpart || + t->subtype == task_subtype_tend_spart || + t->subtype == task_subtype_rho || t->subtype == task_subtype_gpart) t->skip = 1; } @@ -3553,6 +3700,13 @@ void engine_unskip(struct engine *e) { const ticks tic = getticks(); struct space *s = e->s; + const int nodeID = e->nodeID; + + const int with_hydro = e->policy & engine_policy_hydro; + const int with_self_grav = e->policy & engine_policy_self_gravity; + const int with_ext_grav = e->policy & engine_policy_external_gravity; + const int with_stars = e->policy & engine_policy_stars; + const int with_feedback = e->policy & engine_policy_feedback; #ifdef WITH_PROFILER static int count = 0; @@ -3567,11 +3721,12 @@ void engine_unskip(struct engine *e) { for (int k = 0; k < s->nr_local_cells_with_tasks; k++) { struct cell *c = &s->cells_top[local_cells[k]]; - if ((e->policy & engine_policy_hydro && cell_is_active_hydro(c, e)) || - (e->policy & engine_policy_self_gravity && + if ((with_hydro && cell_is_active_hydro(c, e)) || + (with_self_grav && cell_is_active_gravity(c, e)) || + (with_ext_grav && c->nodeID == nodeID && cell_is_active_gravity(c, e)) || - (e->policy & engine_policy_external_gravity && - cell_is_active_gravity(c, e))) { + (with_feedback && cell_is_active_stars(c, e)) || + (with_stars && c->nodeID == nodeID && cell_is_active_stars(c, e))) { if (num_active_cells != k) memswap(&local_cells[k], &local_cells[num_active_cells], sizeof(int)); diff --git a/src/engine.h b/src/engine.h index 17cee47fe6f407ac11b624d148b4814fde2dd3d0..206a6e7945006c339b3ef5ad1c68619d6b013a57 100644 --- a/src/engine.h +++ b/src/engine.h @@ -196,6 +196,15 @@ struct engine { /* Maximal gravity ti_beg for the next time-step */ integertime_t ti_gravity_beg_max; + /* Minimal stars ti_end for the next time-step */ + integertime_t ti_stars_end_min; + + /* Maximal stars ti_end for the next time-step */ + integertime_t ti_stars_end_max; + + /* Maximal stars ti_beg for the next time-step */ + integertime_t ti_stars_beg_max; + /* Minimal overall ti_end for the next time-step */ integertime_t ti_end_min; diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index e20457416a7eee329a12a1100373c50b72b1837a..d3ad3882c3c2819cf51c2759d464d78a366acae0 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -59,9 +59,11 @@ * @param ci The sending #cell. * @param cj Dummy cell containing the nodeID of the receiving node. * @param t_grav The send_grav #task, if it has already been created. + * @param t_ti The recv_ti_end #task, if it has already been created. */ void engine_addtasks_send_gravity(struct engine *e, struct cell *ci, - struct cell *cj, struct task *t_grav) { + struct cell *cj, struct task *t_grav, + struct task *t_ti) { #ifdef WITH_MPI struct link *l = NULL; @@ -86,22 +88,28 @@ void engine_addtasks_send_gravity(struct engine *e, struct cell *ci, t_grav = scheduler_addtask(s, task_type_send, task_subtype_gpart, ci->mpi.tag, 0, ci, cj); + t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend_gpart, + ci->mpi.tag, 0, ci, cj); + /* The sends should unlock the down pass. */ scheduler_addunlock(s, t_grav, ci->grav.super->grav.down); /* Drift before you send */ scheduler_addunlock(s, ci->grav.super->grav.drift, t_grav); + + scheduler_addunlock(s, ci->super->timestep, t_ti); } /* Add them to the local cell. */ engine_addlink(e, &ci->mpi.grav.send, t_grav); + engine_addlink(e, &ci->mpi.grav.send_ti, t_ti); } /* Recurse? */ if (ci->split) for (int k = 0; k < 8; k++) if (ci->progeny[k] != NULL) - engine_addtasks_send_gravity(e, ci->progeny[k], cj, t_grav); + engine_addtasks_send_gravity(e, ci->progeny[k], cj, t_grav, t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -117,10 +125,12 @@ void engine_addtasks_send_gravity(struct engine *e, struct cell *ci, * @param t_xv The send_xv #task, if it has already been created. * @param t_rho The send_rho #task, if it has already been created. * @param t_gradient The send_gradient #task, if already created. + * @param t_ti The recv_ti_end #task, if it has already been created. */ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, struct cell *cj, struct task *t_xv, - struct task *t_rho, struct task *t_gradient) { + struct task *t_rho, struct task *t_gradient, + struct task *t_ti) { #ifdef WITH_MPI struct link *l = NULL; @@ -152,6 +162,9 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, ci->mpi.tag, 0, ci, cj); #endif + t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend_part, + ci->mpi.tag, 0, ci, cj); + #ifdef EXTRA_HYDRO_LOOP scheduler_addunlock(s, t_gradient, ci->hydro.super->hydro.end_force); @@ -184,6 +197,8 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, /* Drift before you send */ scheduler_addunlock(s, ci->hydro.super->hydro.drift, t_xv); + + scheduler_addunlock(s, ci->super->timestep, t_ti); } /* Add them to the local cell. */ @@ -192,6 +207,7 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, #ifdef EXTRA_HYDRO_LOOP engine_addlink(e, &ci->mpi.hydro.send_gradient, t_gradient); #endif + engine_addlink(e, &ci->mpi.hydro.send_ti, t_ti); } /* Recurse? */ @@ -199,7 +215,7 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, for (int k = 0; k < 8; k++) if (ci->progeny[k] != NULL) engine_addtasks_send_hydro(e, ci->progeny[k], cj, t_xv, t_rho, - t_gradient); + t_gradient, t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -213,9 +229,11 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, * @param ci The sending #cell. * @param cj Dummy cell containing the nodeID of the receiving node. * @param t_feedback The send_feed #task, if it has already been created. + * @param t_ti The recv_ti_end #task, if it has already been created. */ void engine_addtasks_send_stars(struct engine *e, struct cell *ci, - struct cell *cj, struct task *t_feedback) { + struct cell *cj, struct task *t_feedback, + struct task *t_ti) { #ifdef WITH_MPI @@ -241,6 +259,9 @@ void engine_addtasks_send_stars(struct engine *e, struct cell *ci, t_feedback = scheduler_addtask(s, task_type_send, task_subtype_spart, ci->mpi.tag, 0, ci, cj); + t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend_spart, + ci->mpi.tag, 0, ci, cj); + /* The send_stars task should unlock the super_cell's kick task. */ scheduler_addunlock(s, t_feedback, ci->hydro.super->stars.stars_out); @@ -249,98 +270,19 @@ void engine_addtasks_send_stars(struct engine *e, struct cell *ci, /* Drift before you send */ scheduler_addunlock(s, ci->hydro.super->stars.drift, t_feedback); - } - - engine_addlink(e, &ci->mpi.stars.send, t_feedback); - } - - /* Recurse? */ - if (ci->split) - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) - engine_addtasks_send_stars(e, ci->progeny[k], cj, t_feedback); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Add send tasks for the time-step to a hierarchy of cells. - * - * @param e The #engine. - * @param ci The sending #cell. - * @param cj Dummy cell containing the nodeID of the receiving node. - * @param t_ti The send_ti #task, if it has already been created. - * @param t_limiter The send_limiter #task, if already created. - * @param with_limiter Are we running with the time-step limiter? - */ -void engine_addtasks_send_timestep(struct engine *e, struct cell *ci, - struct cell *cj, struct task *t_ti, - struct task *t_limiter, - const int with_limiter) { -#ifdef WITH_MPI - struct link *l = NULL; - struct scheduler *s = &e->sched; - const int nodeID = cj->nodeID; - - /* Check if any of the gravity tasks are for the target node. */ - for (l = ci->grav.grav; l != NULL; l = l->next) - if (l->t->ci->nodeID == nodeID || - (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) - break; - - /* Check whether instead any of the hydro tasks are for the target node. */ - if (l == NULL) - for (l = ci->hydro.density; l != NULL; l = l->next) - if (l->t->ci->nodeID == nodeID || - (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) - break; - - if (l == NULL) - for (l = ci->stars.density; l != NULL; l = l->next) - if (l->t->ci->nodeID == nodeID || - (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) - break; - - /* If found anything, attach send tasks. */ - if (l != NULL) { - - /* Create the tasks and their dependencies? */ - if (t_ti == NULL) { - - /* Make sure this cell is tagged. */ - cell_ensure_tagged(ci); - - t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, - ci->mpi.tag, 0, ci, cj); - - if (with_limiter) - t_limiter = scheduler_addtask(s, task_type_send, task_subtype_limiter, - ci->mpi.tag, 0, ci, cj); - - /* The super-cell's timestep task should unlock the send_ti task. */ scheduler_addunlock(s, ci->super->timestep, t_ti); - if (with_limiter) scheduler_addunlock(s, t_limiter, ci->super->timestep); - if (with_limiter) - scheduler_addunlock(s, t_limiter, ci->super->timestep_limiter); - if (with_limiter) scheduler_addunlock(s, ci->super->kick2, t_limiter); - if (with_limiter) - scheduler_addunlock(s, ci->super->timestep_limiter, t_ti); } - /* Add them to the local cell. */ - engine_addlink(e, &ci->mpi.send_ti, t_ti); - if (with_limiter) engine_addlink(e, &ci->mpi.limiter.send, t_limiter); + engine_addlink(e, &ci->mpi.stars.send, t_feedback); + engine_addlink(e, &ci->mpi.stars.send_ti, t_ti); } /* Recurse? */ if (ci->split) for (int k = 0; k < 8; k++) if (ci->progeny[k] != NULL) - engine_addtasks_send_timestep(e, ci->progeny[k], cj, t_ti, t_limiter, - with_limiter); + engine_addtasks_send_stars(e, ci->progeny[k], cj, t_feedback, t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -355,10 +297,11 @@ void engine_addtasks_send_timestep(struct engine *e, struct cell *ci, * @param t_xv The recv_xv #task, if it has already been created. * @param t_rho The recv_rho #task, if it has already been created. * @param t_gradient The recv_gradient #task, if it has already been created. + * @param t_ti The recv_ti_end #task, if it has already been created. */ void engine_addtasks_recv_hydro(struct engine *e, struct cell *c, struct task *t_xv, struct task *t_rho, - struct task *t_gradient) { + struct task *t_gradient, struct task *t_ti) { #ifdef WITH_MPI struct scheduler *s = &e->sched; @@ -380,11 +323,15 @@ void engine_addtasks_recv_hydro(struct engine *e, struct cell *c, t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_gradient, c->mpi.tag, 0, c, NULL); #endif + + t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend_part, + c->mpi.tag, 0, c, NULL); } c->mpi.hydro.recv_xv = t_xv; c->mpi.hydro.recv_rho = t_rho; c->mpi.hydro.recv_gradient = t_gradient; + c->mpi.hydro.recv_ti = t_ti; /* Add dependencies. */ if (c->hydro.sorts != NULL) { @@ -403,10 +350,12 @@ void engine_addtasks_recv_hydro(struct engine *e, struct cell *c, } for (struct link *l = c->hydro.force; l != NULL; l = l->next) { scheduler_addunlock(s, t_gradient, l->t); + scheduler_addunlock(s, l->t, t_ti); } #else for (struct link *l = c->hydro.force; l != NULL; l = l->next) { scheduler_addunlock(s, t_rho, l->t); + scheduler_addunlock(s, l->t, t_ti); } #endif @@ -419,7 +368,8 @@ void engine_addtasks_recv_hydro(struct engine *e, struct cell *c, if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_addtasks_recv_hydro(e, c->progeny[k], t_xv, t_rho, t_gradient); + engine_addtasks_recv_hydro(e, c->progeny[k], t_xv, t_rho, t_gradient, + t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -432,9 +382,10 @@ void engine_addtasks_recv_hydro(struct engine *e, struct cell *c, * @param e The #engine. * @param c The foreign #cell. * @param t_feedback The recv_feed #task, if it has already been created. + * @param t_ti The recv_ti_end #task, if it has already been created. */ void engine_addtasks_recv_stars(struct engine *e, struct cell *c, - struct task *t_feedback) { + struct task *t_feedback, struct task *t_ti) { #ifdef WITH_MPI struct scheduler *s = &e->sched; @@ -450,9 +401,13 @@ void engine_addtasks_recv_stars(struct engine *e, struct cell *c, /* Create the tasks. */ t_feedback = scheduler_addtask(s, task_type_recv, task_subtype_spart, c->mpi.tag, 0, c, NULL); + + t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend_spart, + c->mpi.tag, 0, c, NULL); } c->mpi.stars.recv = t_feedback; + c->mpi.stars.recv_ti = t_ti; #ifdef SWIFT_DEBUG_CHECKS if (c->nodeID == e->nodeID) error("Local cell!"); @@ -466,13 +421,14 @@ void engine_addtasks_recv_stars(struct engine *e, struct cell *c, for (struct link *l = c->stars.feedback; l != NULL; l = l->next) { scheduler_addunlock(s, t_feedback, l->t); + scheduler_addunlock(s, l->t, t_ti); } /* Recurse? */ if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_addtasks_recv_stars(e, c->progeny[k], t_feedback); + engine_addtasks_recv_stars(e, c->progeny[k], t_feedback, t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -485,9 +441,10 @@ void engine_addtasks_recv_stars(struct engine *e, struct cell *c, * @param e The #engine. * @param c The foreign #cell. * @param t_grav The recv_gpart #task, if it has already been created. + * @param t_ti The recv_ti_end #task, if it has already been created. */ void engine_addtasks_recv_gravity(struct engine *e, struct cell *c, - struct task *t_grav) { + struct task *t_grav, struct task *t_ti) { #ifdef WITH_MPI struct scheduler *s = &e->sched; @@ -503,89 +460,24 @@ void engine_addtasks_recv_gravity(struct engine *e, struct cell *c, /* Create the tasks. */ t_grav = scheduler_addtask(s, task_type_recv, task_subtype_gpart, c->mpi.tag, 0, c, NULL); - } - c->mpi.grav.recv = t_grav; - - for (struct link *l = c->grav.grav; l != NULL; l = l->next) - scheduler_addunlock(s, t_grav, l->t); - - /* Recurse? */ - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - engine_addtasks_recv_gravity(e, c->progeny[k], t_grav); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Add recv tasks for gravity pairs to a hierarchy of cells. - * - * @param e The #engine. - * @param c The foreign #cell. - * @param t_ti The recv_ti #task, if already been created. - * @param t_limiter The recv_limiter #task, if already created. - * @param with_limiter Are we running with the time-step limiter? - */ -void engine_addtasks_recv_timestep(struct engine *e, struct cell *c, - struct task *t_ti, struct task *t_limiter, - const int with_limiter) { - -#ifdef WITH_MPI - struct scheduler *s = &e->sched; - - /* Have we reached a level where there are any self/pair tasks ? */ - if (t_ti == NULL && (c->grav.grav != NULL || c->hydro.density != NULL)) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Make sure this cell has a valid tag. */ - if (c->mpi.tag < 0) error("Trying to receive from untagged cell."); -#endif // SWIFT_DEBUG_CHECKS - - t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, c->mpi.tag, - 0, c, NULL); - - if (with_limiter) - t_limiter = scheduler_addtask(s, task_type_recv, task_subtype_limiter, - c->mpi.tag, 0, c, NULL); + t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend_gpart, + c->mpi.tag, 0, c, NULL); } - c->mpi.recv_ti = t_ti; + c->mpi.grav.recv = t_grav; + c->mpi.grav.recv_ti = t_ti; for (struct link *l = c->grav.grav; l != NULL; l = l->next) { + scheduler_addunlock(s, t_grav, l->t); scheduler_addunlock(s, l->t, t_ti); } - if (with_limiter) { - - for (struct link *l = c->hydro.force; l != NULL; l = l->next) { - scheduler_addunlock(s, l->t, t_limiter); - } - - for (struct link *l = c->hydro.limiter; l != NULL; l = l->next) { - scheduler_addunlock(s, t_limiter, l->t); - scheduler_addunlock(s, l->t, t_ti); - } - - } else { - - for (struct link *l = c->hydro.force; l != NULL; l = l->next) { - scheduler_addunlock(s, l->t, t_ti); - } - } - - for (struct link *l = c->stars.feedback; l != NULL; l = l->next) - scheduler_addunlock(s, l->t, t_ti); - /* Recurse? */ if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_addtasks_recv_timestep(e, c->progeny[k], t_ti, t_limiter, - with_limiter); + engine_addtasks_recv_gravity(e, c->progeny[k], t_grav, t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -608,6 +500,16 @@ void engine_make_hierarchical_tasks_common(struct engine *e, struct cell *c) { struct scheduler *s = &e->sched; const int with_limiter = (e->policy & engine_policy_limiter); + const int with_star_formation = (e->policy & engine_policy_star_formation); + + /* Are we at the top-level? */ + if (c->top == c && c->nodeID == e->nodeID) { + + if (with_star_formation && c->hydro.count > 0) { + c->hydro.star_formation = scheduler_addtask( + s, task_type_star_formation, task_subtype_none, 0, 0, c, NULL); + } + } /* Are we in a super-cell ? */ if (c->super == c) { @@ -634,6 +536,12 @@ void engine_make_hierarchical_tasks_common(struct engine *e, struct cell *c) { scheduler_addunlock(s, c->kick2, c->timestep); scheduler_addunlock(s, c->timestep, c->kick1); + /* Subgrid tasks: star formation */ + if (with_star_formation && c->hydro.count > 0) { + scheduler_addunlock(s, c->kick2, c->top->hydro.star_formation); + scheduler_addunlock(s, c->top->hydro.star_formation, c->timestep); + } + /* Time-step limiting */ if (with_limiter) { c->timestep_limiter = scheduler_addtask( @@ -868,16 +776,6 @@ void engine_make_hierarchical_tasks_hydro(struct engine *e, struct cell *c) { scheduler_addunlock(s, c->hydro.end_force, c->super->kick2); } - /* Subgrid tasks: star formation */ - if (with_star_formation) { - - c->hydro.star_formation = scheduler_addtask( - s, task_type_star_formation, task_subtype_none, 0, 0, c, NULL); - - scheduler_addunlock(s, c->super->kick2, c->hydro.star_formation); - scheduler_addunlock(s, c->hydro.star_formation, c->super->timestep); - } - /* Subgrid tasks: feedback */ if (with_feedback) { @@ -895,8 +793,9 @@ void engine_make_hierarchical_tasks_hydro(struct engine *e, struct cell *c) { scheduler_addunlock(s, c->super->kick2, c->stars.stars_in); scheduler_addunlock(s, c->stars.stars_out, c->super->timestep); - if (with_star_formation) { - scheduler_addunlock(s, c->hydro.star_formation, c->stars.stars_in); + if (with_star_formation && c->hydro.count > 0) { + scheduler_addunlock(s, c->top->hydro.star_formation, + c->stars.stars_in); } } } @@ -2044,6 +1943,7 @@ void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, struct engine *e = (struct engine *)extra_data; const int periodic = e->s->periodic; const int with_feedback = (e->policy & engine_policy_feedback); + const int with_stars = (e->policy & engine_policy_stars); struct space *s = e->s; struct scheduler *sched = &e->sched; @@ -2066,7 +1966,7 @@ void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, struct cell *ci = &cells[cid]; /* Skip cells without hydro or star particles */ - if ((ci->hydro.count == 0) && (!with_feedback || ci->stars.count == 0)) + if ((ci->hydro.count == 0) && (!with_stars || ci->stars.count == 0)) continue; /* If the cell is local build a self-interaction */ @@ -2163,7 +2063,7 @@ void engine_addtasks_send_mapper(void *map_data, int num_elements, void *extra_data) { struct engine *e = (struct engine *)extra_data; - const int with_limiter = (e->policy & engine_policy_limiter); + // const int with_limiter = (e->policy & engine_policy_limiter); struct cell_type_pair *cell_type_pairs = (struct cell_type_pair *)map_data; for (int k = 0; k < num_elements; k++) { @@ -2172,24 +2072,25 @@ void engine_addtasks_send_mapper(void *map_data, int num_elements, const int type = cell_type_pairs[k].type; /* Add the send task for the particle timesteps. */ - engine_addtasks_send_timestep(e, ci, cj, NULL, NULL, with_limiter); + // engine_addtasks_send_timestep(e, ci, cj, NULL, NULL, with_limiter); /* Add the send tasks for the cells in the proxy that have a hydro * connection. */ if ((e->policy & engine_policy_hydro) && (type & proxy_cell_type_hydro)) engine_addtasks_send_hydro(e, ci, cj, /*t_xv=*/NULL, - /*t_rho=*/NULL, /*t_gradient=*/NULL); + /*t_rho=*/NULL, /*t_gradient=*/NULL, + /*t_ti=*/NULL); /* Add the send tasks for the cells in the proxy that have a stars * connection. */ if ((e->policy & engine_policy_feedback) && (type & proxy_cell_type_hydro)) - engine_addtasks_send_stars(e, ci, cj, /*t_feedback=*/NULL); + engine_addtasks_send_stars(e, ci, cj, /*t_feedback=*/NULL, /*t_ti=*/NULL); /* Add the send tasks for the cells in the proxy that have a gravity * connection. */ if ((e->policy & engine_policy_self_gravity) && (type & proxy_cell_type_gravity)) - engine_addtasks_send_gravity(e, ci, cj, NULL); + engine_addtasks_send_gravity(e, ci, cj, NULL, NULL); } } @@ -2197,7 +2098,7 @@ void engine_addtasks_recv_mapper(void *map_data, int num_elements, void *extra_data) { struct engine *e = (struct engine *)extra_data; - const int with_limiter = (e->policy & engine_policy_limiter); + // const int with_limiter = (e->policy & engine_policy_limiter); struct cell_type_pair *cell_type_pairs = (struct cell_type_pair *)map_data; for (int k = 0; k < num_elements; k++) { @@ -2205,23 +2106,23 @@ void engine_addtasks_recv_mapper(void *map_data, int num_elements, const int type = cell_type_pairs[k].type; /* Add the recv task for the particle timesteps. */ - engine_addtasks_recv_timestep(e, ci, NULL, NULL, with_limiter); + // engine_addtasks_recv_timestep(e, ci, NULL, NULL, with_limiter); /* Add the recv tasks for the cells in the proxy that have a hydro * connection. */ if ((e->policy & engine_policy_hydro) && (type & proxy_cell_type_hydro)) - engine_addtasks_recv_hydro(e, ci, NULL, NULL, NULL); + engine_addtasks_recv_hydro(e, ci, NULL, NULL, NULL, NULL); /* Add the recv tasks for the cells in the proxy that have a stars * connection. */ if ((e->policy & engine_policy_feedback) && (type & proxy_cell_type_hydro)) - engine_addtasks_recv_stars(e, ci, NULL); + engine_addtasks_recv_stars(e, ci, NULL, NULL); /* Add the recv tasks for the cells in the proxy that have a gravity * connection. */ if ((e->policy & engine_policy_self_gravity) && (type & proxy_cell_type_gravity)) - engine_addtasks_recv_gravity(e, ci, NULL); + engine_addtasks_recv_gravity(e, ci, NULL, NULL); } } diff --git a/src/engine_marktasks.c b/src/engine_marktasks.c index 4ec27065b4ac1780f2401e8b3d161626082d6589..c114e7e7ffe2c5604460f2898c6ca3447645cf4c 100644 --- a/src/engine_marktasks.c +++ b/src/engine_marktasks.c @@ -372,7 +372,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } /* If the foreign cell is active, we want its ti_end values. */ - if (ci_active_hydro) scheduler_activate(s, ci->mpi.recv_ti); + if (ci_active_hydro) scheduler_activate(s, ci->mpi.hydro.recv_ti); /* Is the foreign cell active and will need stuff from us? */ if (ci_active_hydro) { @@ -397,7 +397,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* If the local cell is active, send its ti_end values. */ if (cj_active_hydro) - scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); + scheduler_activate_send(s, cj->mpi.hydro.send_ti, ci_nodeID); } else if (cj_nodeID != nodeID) { @@ -414,7 +414,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } /* If the foreign cell is active, we want its ti_end values. */ - if (cj_active_hydro) scheduler_activate(s, cj->mpi.recv_ti); + if (cj_active_hydro) scheduler_activate(s, cj->mpi.hydro.recv_ti); /* Is the foreign cell active and will need stuff from us? */ if (cj_active_hydro) { @@ -441,7 +441,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* If the local cell is active, send its ti_end values. */ if (ci_active_hydro) - scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); + scheduler_activate_send(s, ci->mpi.hydro.send_ti, cj_nodeID); } #endif } @@ -466,14 +466,14 @@ void engine_marktasks_mapper(void *map_data, int num_elements, cell_activate_drift_spart(cj, s); /* If the local cell is active, send its ti_end values. */ - scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); + scheduler_activate_send(s, cj->mpi.stars.send_ti, ci_nodeID); } if (ci_active_stars) { scheduler_activate(s, ci->mpi.stars.recv); /* If the foreign cell is active, we want its ti_end values. */ - scheduler_activate(s, ci->mpi.recv_ti); + scheduler_activate(s, ci->mpi.stars.recv_ti); /* Is the foreign cell active and will need stuff from us? */ scheduler_activate_send(s, cj->mpi.hydro.send_xv, ci_nodeID); @@ -496,14 +496,14 @@ void engine_marktasks_mapper(void *map_data, int num_elements, cell_activate_drift_spart(ci, s); /* If the local cell is active, send its ti_end values. */ - scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); + scheduler_activate_send(s, ci->mpi.stars.send_ti, cj_nodeID); } if (cj_active_stars) { scheduler_activate(s, cj->mpi.stars.recv); /* If the foreign cell is active, we want its ti_end values. */ - scheduler_activate(s, cj->mpi.recv_ti); + scheduler_activate(s, cj->mpi.stars.recv_ti); /* Is the foreign cell active and will need stuff from us? */ scheduler_activate_send(s, ci->mpi.hydro.send_xv, cj_nodeID); @@ -528,7 +528,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (cj_active_gravity) scheduler_activate(s, ci->mpi.grav.recv); /* If the foreign cell is active, we want its ti_end values. */ - if (ci_active_gravity) scheduler_activate(s, ci->mpi.recv_ti); + if (ci_active_gravity) scheduler_activate(s, ci->mpi.grav.recv_ti); /* Is the foreign cell active and will need stuff from us? */ if (ci_active_gravity) { @@ -544,7 +544,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* If the local cell is active, send its ti_end values. */ if (cj_active_gravity) - scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); + scheduler_activate_send(s, cj->mpi.grav.send_ti, ci_nodeID); } else if (cj_nodeID != nodeID) { @@ -552,7 +552,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (ci_active_gravity) scheduler_activate(s, cj->mpi.grav.recv); /* If the foreign cell is active, we want its ti_end values. */ - if (cj_active_gravity) scheduler_activate(s, cj->mpi.recv_ti); + if (cj_active_gravity) scheduler_activate(s, cj->mpi.grav.recv_ti); /* Is the foreign cell active and will need stuff from us? */ if (cj_active_gravity) { @@ -568,7 +568,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* If the local cell is active, send its ti_end values. */ if (ci_active_gravity) - scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); + scheduler_activate_send(s, ci->mpi.grav.send_ti, cj_nodeID); } #endif } @@ -665,7 +665,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, else if (t_type == task_type_star_formation) { if (cell_is_active_hydro(t->ci, e)) { scheduler_activate(s, t); - cell_activate_drift_spart(t->ci, s); + cell_activate_super_spart_drifts(t->ci, s); } } } diff --git a/src/gravity_cache.h b/src/gravity_cache.h index 6453d1eb92814f0e20cf25fa5996b920e523812d..6d073db60171a9d30d6f397213849e4c3d5314a1 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -80,17 +80,17 @@ struct gravity_cache { static INLINE void gravity_cache_clean(struct gravity_cache *c) { if (c->count > 0) { - free(c->x); - free(c->y); - free(c->z); - free(c->epsilon); - free(c->m); - free(c->a_x); - free(c->a_y); - free(c->a_z); - free(c->pot); - free(c->active); - free(c->use_mpole); + swift_free("gravity_cache", c->x); + swift_free("gravity_cache", c->y); + swift_free("gravity_cache", c->z); + swift_free("gravity_cache", c->epsilon); + swift_free("gravity_cache", c->m); + swift_free("gravity_cache", c->a_x); + swift_free("gravity_cache", c->a_y); + swift_free("gravity_cache", c->a_z); + swift_free("gravity_cache", c->pot); + swift_free("gravity_cache", c->active); + swift_free("gravity_cache", c->use_mpole); } c->count = 0; } @@ -117,18 +117,28 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, gravity_cache_clean(c); int e = 0; - e += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytesF); - e += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytesF); - e += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytesF); - e += posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytesF); - e += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytesF); - e += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytesF); - e += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytesF); - e += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytesF); - e += posix_memalign((void **)&c->pot, SWIFT_CACHE_ALIGNMENT, sizeBytesF); - e += posix_memalign((void **)&c->active, SWIFT_CACHE_ALIGNMENT, sizeBytesI); - e += - posix_memalign((void **)&c->use_mpole, SWIFT_CACHE_ALIGNMENT, sizeBytesI); + e += swift_memalign("gravity_cache", (void **)&c->x, SWIFT_CACHE_ALIGNMENT, + sizeBytesF); + e += swift_memalign("gravity_cache", (void **)&c->y, SWIFT_CACHE_ALIGNMENT, + sizeBytesF); + e += swift_memalign("gravity_cache", (void **)&c->z, SWIFT_CACHE_ALIGNMENT, + sizeBytesF); + e += swift_memalign("gravity_cache", (void **)&c->epsilon, + SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += swift_memalign("gravity_cache", (void **)&c->m, SWIFT_CACHE_ALIGNMENT, + sizeBytesF); + e += swift_memalign("gravity_cache", (void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, + sizeBytesF); + e += swift_memalign("gravity_cache", (void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, + sizeBytesF); + e += swift_memalign("gravity_cache", (void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, + sizeBytesF); + e += swift_memalign("gravity_cache", (void **)&c->pot, SWIFT_CACHE_ALIGNMENT, + sizeBytesF); + e += swift_memalign("gravity_cache", (void **)&c->active, + SWIFT_CACHE_ALIGNMENT, sizeBytesI); + e += swift_memalign("gravity_cache", (void **)&c->use_mpole, + SWIFT_CACHE_ALIGNMENT, sizeBytesI); if (e != 0) error("Couldn't allocate gravity cache, size: %d", padded_count); @@ -195,6 +205,14 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate( const float theta_crit2 = grav_props->theta_crit2; +#ifdef SWIFT_DEBUG_CHECKS + if (gcount_padded < gcount) error("Invalid padded cache size. Too small."); + if (gcount_padded % VEC_SIZE != 0) + error("Padded gravity cache size invalid. Not a multiple of SIMD length."); + if (c->count < gcount_padded) + error("Size of the gravity cache is not large enough."); +#endif + /* Make the compiler understand we are in happy vectorization land */ swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT); @@ -287,6 +305,14 @@ gravity_cache_populate_no_mpole(const timebin_t max_active_bin, const double shift[3], const struct cell *cell, const struct gravity_props *grav_props) { +#ifdef SWIFT_DEBUG_CHECKS + if (gcount_padded < gcount) error("Invalid padded cache size. Too small."); + if (gcount_padded % VEC_SIZE != 0) + error("Padded gravity cache size invalid. Not a multiple of SIMD length."); + if (c->count < gcount_padded) + error("Size of the gravity cache is not large enough."); +#endif + /* Make the compiler understand we are in happy vectorization land */ swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT); @@ -366,6 +392,12 @@ gravity_cache_populate_all_mpole(const timebin_t max_active_bin, const struct gravity_props *grav_props) { #ifdef SWIFT_DEBUG_CHECKS + if (gcount_padded < gcount) error("Invalid padded cache size. Too small."); + if (gcount_padded % VEC_SIZE != 0) + error("Padded gravity cache size invalid. Not a multiple of SIMD length."); + if (c->count < gcount_padded) + error("Size of the gravity cache is not large enough."); + const float theta_crit2 = grav_props->theta_crit2; #endif diff --git a/src/hydro/AnarchyPU/hydro.h b/src/hydro/AnarchyPU/hydro.h index 37d182889b703df9ea6d98b63a6daa5dc85274a8..0ac52165ec4b6dc5c193cf3d22d20458d2e643d3 100644 --- a/src/hydro/AnarchyPU/hydro.h +++ b/src/hydro/AnarchyPU/hydro.h @@ -218,6 +218,7 @@ hydro_get_comoving_soundspeed(const struct part *restrict p) { /* Compute the sound speed -- see theory section for justification */ /* IDEAL GAS ONLY -- P-U does not work with generic EoS. */ + const float square_rooted = sqrtf(hydro_gamma * p->pressure_bar / p->rho); return square_rooted; @@ -233,7 +234,7 @@ __attribute__((always_inline)) INLINE static float hydro_get_physical_soundspeed(const struct part *restrict p, const struct cosmology *cosmo) { - return cosmo->a_factor_sound_speed * p->force.soundspeed; + return cosmo->a_factor_sound_speed * hydro_get_comoving_soundspeed(p); } /** @@ -541,8 +542,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( p->density.rot_v[2] *= h_inv_dim_plus_one * a_inv2 * rho_inv; /* Finish calculation of the velocity divergence */ - p->viscosity.div_v *= - h_inv_dim_plus_one * rho_inv * a_inv2 + cosmo->H * hydro_dimension; + p->viscosity.div_v *= h_inv_dim_plus_one * rho_inv * a_inv2; + p->viscosity.div_v += cosmo->H * hydro_dimension; } /** @@ -693,18 +694,31 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Here we need to update the artificial viscosity */ - /* Timescale for decay */ - const float tau = - p->h / (2.f * p->viscosity.v_sig * hydro_props->viscosity.length); - /* Construct time differential of div.v implicitly */ - const float div_v_dt = + /* We use in this function that h is the radius of support */ + const float kernel_support_physical = p->h * cosmo->a * kernel_gamma; + const float v_sig_physical = p->viscosity.v_sig * cosmo->a_factor_sound_speed; + const float soundspeed_physical = hydro_get_physical_soundspeed(p, cosmo); + + const float sound_crossing_time_inverse = + soundspeed_physical / kernel_support_physical; + + /* Construct time differential of div.v implicitly following the ANARCHY spec + */ + + float div_v_dt = dt_alpha == 0.f ? 0.f : (p->viscosity.div_v - p->viscosity.div_v_previous_step) / dt_alpha; + /* Construct the source term for the AV; if shock detected this is _positive_ * as div_v_dt should be _negative_ before the shock hits */ - const float S = p->h * p->h * max(0.f, -1.f * div_v_dt); - const float v_sig_square = p->viscosity.v_sig * p->viscosity.v_sig; + const float S = kernel_support_physical * kernel_support_physical * + max(0.f, -1.f * div_v_dt); + /* 0.25 factor comes from our definition of v_sig (sum of soundspeeds rather + * than mean). */ + /* Note this is v_sig_physical squared, not comoving */ + const float v_sig_square = 0.25 * v_sig_physical * v_sig_physical; + /* Calculate the current appropriate value of the AV based on the above */ const float alpha_loc = hydro_props->viscosity.alpha_max * S / (v_sig_square + S); @@ -713,11 +727,11 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Reset the value of alpha to the appropriate value */ p->viscosity.alpha = alpha_loc; } else { - /* Integrate the alpha forward in time to decay back to alpha = 0 */ - const float alpha_dt = (alpha_loc - p->viscosity.alpha) / tau; - - /* Finally, we can update the actual value of the alpha */ - p->viscosity.alpha += alpha_dt * dt_alpha; + /* Integrate the alpha forward in time to decay back to alpha = alpha_loc */ + p->viscosity.alpha = + alpha_loc + (p->viscosity.alpha - alpha_loc) * + expf(-dt_alpha * sound_crossing_time_inverse * + hydro_props->viscosity.length); } if (p->viscosity.alpha < hydro_props->viscosity.alpha_min) { @@ -729,16 +743,21 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Now for the diffusive alpha */ + const float diffusion_timescale_physical_inverse = + v_sig_physical / kernel_support_physical; + const float sqrt_u = sqrtf(p->u); /* Calculate initial value of alpha dt before bounding */ - /* alpha_diff_dt is cosmology-less */ - /* Evolution term: following Schaller+ 2015 */ - float alpha_diff_dt = - hydro_props->diffusion.beta * p->h * p->diffusion.laplace_u / sqrt_u; + /* Evolution term: following Schaller+ 2015. This is made up of several + cosmology factors: physical h, sound speed from u / sqrt(u), and the + 1 / a^2 coming from the laplace operator. */ + float alpha_diff_dt = hydro_props->diffusion.beta * kernel_support_physical * + p->diffusion.laplace_u * cosmo->a_factor_sound_speed / + (sqrt_u * cosmo->a * cosmo->a); /* Decay term: not documented in Schaller+ 2015 but was present * in the original EAGLE code and in Schaye+ 2015 */ - alpha_diff_dt -= - (p->diffusion.alpha - hydro_props->diffusion.alpha_min) / tau; + alpha_diff_dt -= (p->diffusion.alpha - hydro_props->diffusion.alpha_min) * + diffusion_timescale_physical_inverse; float new_diffusion_alpha = p->diffusion.alpha; new_diffusion_alpha += alpha_diff_dt * dt_alpha; diff --git a/src/hydro/AnarchyPU/hydro_iact.h b/src/hydro/AnarchyPU/hydro_iact.h index c214db3b018e00b7f3881fb301b55d6cf49a1f43..d091ebebcf4f165db006ca58667e943ddbaf8599 100644 --- a/src/hydro/AnarchyPU/hydro_iact.h +++ b/src/hydro/AnarchyPU/hydro_iact.h @@ -200,14 +200,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_gradient( /* We need to construct the maximal signal velocity between our particle * and all of it's neighbours */ - const float dv_dx = (pi->v[0] - pj->v[0]) * dx[0] + - (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2]; + const float r = sqrtf(r2); + const float r_inv = 1.f / r; + const float ci = pi->force.soundspeed; + const float cj = pj->force.soundspeed; + + /* Cosmology terms for the signal velocity */ + const float fac_mu = pow_three_gamma_minus_five_over_two(a); + const float a2_Hubble = a * a * H; + + const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + + (pi->v[1] - pj->v[1]) * dx[1] + + (pi->v[2] - pj->v[2]) * dx[2]; - const float dv_dx_factor = min(0, const_viscosity_beta * dv_dx); + /* Add Hubble flow */ - const float new_v_sig = - pi->force.soundspeed + pj->force.soundspeed - dv_dx_factor; + const float dvdr_Hubble = dvdr + a2_Hubble * r2; + /* Are the particles moving towards each others ? */ + const float omega_ij = min(dvdr_Hubble, 0.f); + const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ + + /* Signal velocity */ + const float new_v_sig = ci + cj - const_viscosity_beta * mu_ij; /* Update if we need to */ pi->viscosity.v_sig = max(pi->viscosity.v_sig, new_v_sig); @@ -217,14 +231,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_gradient( /* Need to get some kernel values F_ij = wi_dx */ float wi, wi_dx, wj, wj_dx; - const float r = sqrtf(r2); const float ui = r / hi; const float uj = r / hj; kernel_deval(ui, &wi, &wi_dx); kernel_deval(uj, &wj, &wj_dx); - const float delta_u_factor = (pi->u - pj->u) / r; + const float delta_u_factor = (pi->u - pj->u) * r_inv; pi->diffusion.laplace_u += pj->mass * delta_u_factor * wi_dx / pj->rho; pj->diffusion.laplace_u -= pi->mass * delta_u_factor * wj_dx / pi->rho; } @@ -253,14 +266,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_gradient( /* We need to construct the maximal signal velocity between our particle * and all of it's neighbours */ - const float dv_dx = (pi->v[0] - pj->v[0]) * dx[0] + - (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2]; + const float r = sqrtf(r2); + const float r_inv = 1.f / r; + const float ci = pi->force.soundspeed; + const float cj = pj->force.soundspeed; + + /* Cosmology terms for the signal velocity */ + const float fac_mu = pow_three_gamma_minus_five_over_two(a); + const float a2_Hubble = a * a * H; + + const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + + (pi->v[1] - pj->v[1]) * dx[1] + + (pi->v[2] - pj->v[2]) * dx[2]; - const float dv_dx_factor = min(0, const_viscosity_beta * dv_dx); + /* Add Hubble flow */ - const float new_v_sig = - pi->force.soundspeed + pj->force.soundspeed - dv_dx_factor; + const float dvdr_Hubble = dvdr + a2_Hubble * r2; + /* Are the particles moving towards each others ? */ + const float omega_ij = min(dvdr_Hubble, 0.f); + const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ + + /* Signal velocity */ + const float new_v_sig = ci + cj - const_viscosity_beta * mu_ij; /* Update if we need to */ pi->viscosity.v_sig = max(pi->viscosity.v_sig, new_v_sig); @@ -269,12 +296,11 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_gradient( /* Need to get some kernel values F_ij = wi_dx */ float wi, wi_dx; - const float r = sqrtf(r2); const float ui = r / hi; kernel_deval(ui, &wi, &wi_dx); - const float delta_u_factor = (pi->u - pj->u) / r; + const float delta_u_factor = (pi->u - pj->u) * r_inv; pi->diffusion.laplace_u += pj->mass * delta_u_factor * wi_dx / pj->rho; } @@ -343,7 +369,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Compute sound speeds and signal velocity */ - const float v_sig = 0.5 * (pi->viscosity.v_sig + pj->viscosity.v_sig); + const float v_sig = 0.5f * (pi->viscosity.v_sig + pj->viscosity.v_sig); /* Balsara term */ const float balsara_i = pi->force.balsara; @@ -390,7 +416,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Diffusion term */ const float v_diff = max(pi->force.soundspeed + pj->force.soundspeed + dvdr_Hubble, 0.f); - const float alpha_diff = 0.5 * (pi->diffusion.alpha + pj->diffusion.alpha); + const float alpha_diff = 0.5f * (pi->diffusion.alpha + pj->diffusion.alpha); /* wi_dx + wj_dx / 2 is F_ij */ const float diff_du_term = alpha_diff * fac_mu * v_diff * (pi->u - pj->u) * (wi_dr + wj_dr) / rho_ij; @@ -474,7 +500,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Compute sound speeds and signal velocity */ - const float v_sig = 0.5 * (pi->viscosity.v_sig + pj->viscosity.v_sig); + const float v_sig = 0.5f * (pi->viscosity.v_sig + pj->viscosity.v_sig); /* Balsara term */ const float balsara_i = pi->force.balsara; @@ -514,7 +540,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Diffusion term */ const float v_diff = max(pi->force.soundspeed + pj->force.soundspeed + dvdr_Hubble, 0.f); - const float alpha_diff = 0.5 * (pi->diffusion.alpha + pj->diffusion.alpha); + const float alpha_diff = 0.5f * (pi->diffusion.alpha + pj->diffusion.alpha); /* wi_dx + wj_dx / 2 is F_ij */ const float diff_du_term = alpha_diff * fac_mu * v_diff * (pi->u - pj->u) * (wi_dr + wj_dr) / rho_ij; diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index bcef2480810c807d61a6545f04fca6144f90e8fe..f05e3229a03f18115aa0e60ba5fee94bc39c5b9e 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -562,7 +562,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( p->density.rot_v[2] * p->density.rot_v[2]); /* Compute the norm of div v including the Hubble flow term */ - const float div_physical_v = p->density.div_v + 3.f * cosmo->H; + const float div_physical_v = p->density.div_v + hydro_dimension * cosmo->H; const float abs_div_physical_v = fabsf(div_physical_v); /* Compute the pressure */ diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index 1d5d25f734acae72490ae42552abf65983463c07..e2fd0069524de32c49cbc3cb46553b18928d14bf 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -561,7 +561,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( p->density.rot_v[2] * p->density.rot_v[2]); /* Compute the norm of div v including the Hubble flow term */ - const float div_physical_v = p->density.div_v + 3.f * cosmo->H; + const float div_physical_v = p->density.div_v + hydro_dimension * cosmo->H; const float abs_div_physical_v = fabsf(div_physical_v); /* Compute the pressure */ diff --git a/src/hydro/PressureEnergy/hydro.h b/src/hydro/PressureEnergy/hydro.h index 0e53b5f2ea12462fb1a618cda44934d113f655dd..3fe3c805fd4df4b495b102f9061b1e0a507e84e9 100644 --- a/src/hydro/PressureEnergy/hydro.h +++ b/src/hydro/PressureEnergy/hydro.h @@ -530,8 +530,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( /* Finish calculation of the velocity divergence, including hubble flow term */ - p->density.div_v *= - h_inv_dim_plus_one * rho_inv * a_inv2 + cosmo->H * hydro_dimension; + p->density.div_v *= h_inv_dim_plus_one * rho_inv * a_inv2; + p->density.div_v += cosmo->H * hydro_dimension; } /** diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h index 9f981569e80021774cebaf11066ebdac2afc6f94..be6a789ebdcd664a99a95b83f70167da04bd7534 100644 --- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h @@ -526,8 +526,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( p->density.rot_v[2] *= h_inv_dim_plus_one * a_inv2 * rho_inv; /* Finish calculation of the velocity divergence */ - p->density.div_v *= - h_inv_dim_plus_one * rho_inv * a_inv2 + cosmo->H * hydro_dimension; + p->density.div_v *= h_inv_dim_plus_one * rho_inv * a_inv2; + p->density.div_v += cosmo->H * hydro_dimension; } /** @@ -614,7 +614,14 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Artificial viscosity updates */ - const float inverse_tau = hydro_props->viscosity.length * soundspeed * h_inv; + /* TODO: Actually work out why this cosmology factor is correct + * and update the SPH / cosmology theory documents. */ + + /* We divide by a^2 here to make this transform under cosmology the + * same as the velocity (which in SWIFT has an extra 1/a^2 factor. + * See the cosmology theory documents for more information. */ + const float inverse_tau = + (hydro_props->viscosity.length * cosmo->a2_inv) * soundspeed * h_inv; const float source_term = -1.f * min(p->density.div_v, 0.f); /* Compute da/dt */ diff --git a/src/hydro_properties.c b/src/hydro_properties.c index ff578aec139ad06cb80279d8d827a41975f9773f..eb506839c859f561756fdc77072a915ff3a383aa 100644 --- a/src/hydro_properties.c +++ b/src/hydro_properties.c @@ -49,12 +49,15 @@ #ifdef ANARCHY_PU_SPH /* This nasty #ifdef is only temporary until we separate the viscosity * and hydro components. If it is not removed by July 2019, shout at JB. */ +#undef hydro_props_default_viscosity_alpha +#define hydro_props_default_viscosity_alpha \ + 0.1f /* Use a very low initial AV paramater for hydrodynamics tests */ #define hydro_props_default_viscosity_alpha_min \ - 0.01f /* values taken from Schaller+ 2015 */ + 0.0f /* values NOT the same as Schaller+ 2015 */ #define hydro_props_default_viscosity_alpha_max \ 2.0f /* values taken from Schaller+ 2015 */ #define hydro_props_default_viscosity_length \ - 0.01f /* values taken from Schaller+ 2015 */ + 0.25f /* values taken from Schaller+ 2015 */ #else #define hydro_props_default_viscosity_alpha_min \ 0.1f /* values taken from (price,2004), not used in legacy gadget mode */ diff --git a/src/partition.c b/src/partition.c index 606f64e4c2f1c057520ed7ff893db10905f5aa8e..e4cfd9dd142739d9508af133617439db7ab6eef9 100644 --- a/src/partition.c +++ b/src/partition.c @@ -389,7 +389,8 @@ static void split_metis(struct space *s, int nregions, int *celllist) { for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i]; /* To check or visualise the partition dump all the cells. */ - /*dumpCellRanks("metis_partition", s->cells_top, s->nr_cells);*/ + /*if (engine_rank == 0) dumpCellRanks("metis_partition", s->cells_top, + * s->nr_cells);*/ } #endif diff --git a/src/runner.c b/src/runner.c index e86a4c03c3dcf5251ee242cb57b0136982cdf775..e002eabc5137d33a6ef2a26d16b14eace83aabdd 100644 --- a/src/runner.c +++ b/src/runner.c @@ -145,6 +145,9 @@ void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { const int max_smoothing_iter = e->stars_properties->max_smoothing_iterations; int redo = 0, scount = 0; + /* Running value of the maximal smoothing length */ + double h_max = c->stars.h_max; + TIMER_TIC; /* Anything to do here? */ @@ -153,8 +156,14 @@ void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { /* Recurse? */ if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_stars_ghost(r, c->progeny[k], 0); + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_stars_ghost(r, c->progeny[k], 0); + + /* Update h_max */ + h_max = max(h_max, c->progeny[k]->stars.h_max); + } + } } else { /* Init the list of active particles that have to be updated. */ @@ -340,6 +349,10 @@ void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { } /* We now have a particle whose smoothing length has converged */ + + /* Check if h_max has increased */ + h_max = max(h_max, sp->h); + stars_reset_feedback(sp); /* Compute the stellar evolution */ @@ -413,6 +426,17 @@ void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { free(h_0); } + /* Update h_max */ + c->stars.h_max = h_max; + + /* The ghost may not always be at the top level. + * Therefore we need to update h_max between the super- and top-levels */ + if (c->stars.ghost) { + for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { + atomic_max_d(&tmp->stars.h_max, h_max); + } + } + if (timer) TIMER_TOC(timer_dostars_ghost); } @@ -589,7 +613,13 @@ void runner_do_star_formation(struct runner *r, struct cell *c, int timer) { TIMER_TIC; +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != e->nodeID) + error("Running star formation task on a foreign node!"); +#endif + /* Anything to do here? */ + if (c->hydro.count == 0) return; if (!cell_is_active_hydro(c, e)) return; /* Recurse? */ @@ -638,9 +668,13 @@ void runner_do_star_formation(struct runner *r, struct cell *c, int timer) { /* Convert the gas particle to a star particle */ struct spart *sp = cell_convert_part_to_spart(e, c, p, xp); - /* Copy the properties of the gas particle to the star particle */ - star_formation_copy_properties(p, xp, sp, e, sf_props, cosmo, - with_cosmology); + /* Did we get a star? (Or did we run out of spare ones?) */ + if (sp != NULL) { + + /* Copy the properties of the gas particle to the star particle */ + star_formation_copy_properties(p, xp, sp, e, sf_props, cosmo, + with_cosmology); + } } } else { /* Are we not star-forming? */ @@ -656,10 +690,11 @@ void runner_do_star_formation(struct runner *r, struct cell *c, int timer) { /* If we formed any stars, the star sorts are now invalid. We need to * re-compute them. */ - if (with_feedback && (c == c->hydro.super) && + if (with_feedback && (c == c->top) && (current_stars_count != c->stars.count)) { - cell_clear_stars_sort_flags(c, /*is_super=*/1); - runner_do_stars_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0); + + cell_clear_stars_sort_flags(c); + runner_do_all_stars_sort(r, c); } if (timer) TIMER_TOC(timer_do_star_formation); @@ -790,6 +825,10 @@ void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, TIMER_TIC; +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super == NULL) error("Task called above the super level!!!"); +#endif + /* We need to do the local sorts plus whatever was requested further up. */ flags |= c->hydro.do_sort; if (cleanup) { @@ -1008,6 +1047,10 @@ void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, TIMER_TIC; +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super == NULL) error("Task called above the super level!!!"); +#endif + /* We need to do the local sorts plus whatever was requested further up. */ flags |= c->stars.do_sort; if (cleanup) { @@ -1199,6 +1242,92 @@ void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, if (clock) TIMER_TOC(timer_do_stars_sort); } +/** + * @brief Recurse into a cell until reaching the super level and call + * the hydro sorting function there. + * + * This function must be called at or above the super level! + * + * This function will sort the particles in all 13 directions. + * + * @param r the #runner. + * @param c the #cell. + */ +void runner_do_all_hydro_sort(struct runner *r, struct cell *c) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != engine_rank) error("Function called on a foreign cell!"); +#endif + + /* Shall we sort at this level? */ + if (c->hydro.super == c) { + + /* Sort everything */ + runner_do_hydro_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0); + + } else { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super != NULL) error("Function called below the super level!"); +#endif + + /* Ok, then, let's try lower */ + if (c->split) { + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) runner_do_all_hydro_sort(r, c->progeny[k]); + } + } else { +#ifdef SWIFT_DEBUG_CHECKS + error("Reached a leaf without encountering a hydro super cell!"); +#endif + } + } +} + +/** + * @brief Recurse into a cell until reaching the super level and call + * the star sorting function there. + * + * This function must be called at or above the super level! + * + * This function will sort the particles in all 13 directions. + * + * @param r the #runner. + * @param c the #cell. + */ +void runner_do_all_stars_sort(struct runner *r, struct cell *c) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != engine_rank) error("Function called on a foreign cell!"); +#endif + + if (!cell_is_active_stars(c, r->e) && !cell_is_active_hydro(c, r->e)) return; + + /* Shall we sort at this level? */ + if (c->hydro.super == c) { + + /* Sort everything */ + runner_do_stars_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0); + + } else { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super != NULL) error("Function called below the super level!"); +#endif + + /* Ok, then, let's try lower */ + if (c->split) { + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) runner_do_all_stars_sort(r, c->progeny[k]); + } + } else { +#ifdef SWIFT_DEBUG_CHECKS + error("Reached a leaf without encountering a hydro super cell!"); +#endif + } + } +} + /** * @brief Initialize the multipoles before the gravity calculation. * @@ -1249,7 +1378,7 @@ void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) { struct xpart *restrict xparts = c->hydro.xparts; const int count = c->hydro.count; const struct engine *e = r->e; - const integertime_t ti_end = e->ti_current; + const integertime_t ti_current = e->ti_current; const int with_cosmology = (e->policy & engine_policy_cosmology); const double time_base = e->time_base; const struct cosmology *cosmo = e->cosmology; @@ -1284,9 +1413,14 @@ void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) { * This is the physical time between the start and end of the time-step * without any scale-factor powers. */ double dt_alpha; + if (with_cosmology) { const integertime_t ti_step = get_integer_timestep(p->time_bin); - dt_alpha = cosmology_get_delta_time(cosmo, ti_end - ti_step, ti_end); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_alpha = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); } else { dt_alpha = get_timestep(p->time_bin, time_base); } @@ -1336,15 +1470,25 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations; int redo = 0, count = 0; + /* Running value of the maximal smoothing length */ + double h_max = c->hydro.h_max; + TIMER_TIC; /* Anything to do here? */ + if (c->hydro.count == 0) return; if (!cell_is_active_hydro(c, e)) return; /* Recurse? */ if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_ghost(r, c->progeny[k], 0); + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_ghost(r, c->progeny[k], 0); + + /* Update h_max */ + h_max = max(h_max, c->progeny[k]->hydro.h_max); + } + } } else { /* Init the list of active particles that have to be updated and their @@ -1465,13 +1609,16 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { * artificial viscosity and thermal conduction terms) */ const int with_cosmology = (e->policy & engine_policy_cosmology); const double time_base = e->time_base; - const integertime_t ti_end = e->ti_current; + const integertime_t ti_current = e->ti_current; double dt_alpha; if (with_cosmology) { const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + dt_alpha = - cosmology_get_delta_time(cosmo, ti_end - ti_step, ti_end); + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); } else { dt_alpha = get_timestep(p->time_bin, time_base); } @@ -1586,7 +1733,10 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { } } - /* We now have a particle whose smoothing length has converged */ + /* We now have a particle whose smoothing length has converged */ + + /* Check if h_max is increased */ + h_max = max(h_max, p->h); #ifdef EXTRA_HYDRO_LOOP @@ -1609,13 +1759,17 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { * the evolution of alpha factors (i.e. those involved in the artificial * viscosity and thermal conduction terms) */ const int with_cosmology = (e->policy & engine_policy_cosmology); - const integertime_t ti_end = e->ti_current; const double time_base = e->time_base; + const integertime_t ti_current = e->ti_current; double dt_alpha; if (with_cosmology) { const integertime_t ti_step = get_integer_timestep(p->time_bin); - dt_alpha = cosmology_get_delta_time(cosmo, ti_end - ti_step, ti_end); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_alpha = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); } else { dt_alpha = get_timestep(p->time_bin, time_base); } @@ -1700,6 +1854,17 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { free(h_0); } + /* Update h_max */ + c->hydro.h_max = h_max; + + /* The ghost may not always be at the top level. + * Therefore we need to update h_max between the super- and top-levels */ + if (c->hydro.ghost) { + for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { + atomic_max_d(&tmp->hydro.h_max, h_max); + } + } + if (timer) TIMER_TOC(timer_do_ghost); } @@ -1800,6 +1965,7 @@ void runner_do_unskip_mapper(void *map_data, int num_elements, void *extra_data) { struct engine *e = (struct engine *)extra_data; + const int nodeID = e->nodeID; struct space *s = e->s; int *local_cells = (int *)map_data; @@ -1812,7 +1978,7 @@ void runner_do_unskip_mapper(void *map_data, int num_elements, /* All gravity tasks */ if ((e->policy & engine_policy_self_gravity) || - (e->policy & engine_policy_external_gravity)) + ((e->policy & engine_policy_external_gravity) && c->nodeID == nodeID)) runner_do_unskip_gravity(c, e); /* Stars tasks */ @@ -1868,6 +2034,7 @@ void runner_do_drift_spart(struct runner *r, struct cell *c, int timer) { if (timer) TIMER_TOC(timer_drift_spart); } + /** * @brief Perform the first half-kick on all the active particles in a cell. * @@ -3310,13 +3477,25 @@ void *runner_main(void *data) { break; #ifdef WITH_MPI case task_type_send: - if (t->subtype == task_subtype_tend) { + if (t->subtype == task_subtype_tend_part) { + free(t->buff); + } + if (t->subtype == task_subtype_tend_gpart) { + free(t->buff); + } + if (t->subtype == task_subtype_tend_spart) { free(t->buff); } break; case task_type_recv: - if (t->subtype == task_subtype_tend) { - cell_unpack_end_step(ci, (struct pcell_step *)t->buff); + if (t->subtype == task_subtype_tend_part) { + cell_unpack_end_step_hydro(ci, (struct pcell_step_hydro *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_tend_gpart) { + cell_unpack_end_step_grav(ci, (struct pcell_step_grav *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_tend_spart) { + cell_unpack_end_step_stars(ci, (struct pcell_step_stars *)t->buff); free(t->buff); } else if (t->subtype == task_subtype_xv) { runner_do_recv_part(r, ci, 1, 1); diff --git a/src/runner.h b/src/runner.h index 01eed2cd712f6470ff200795f2581a30deeecb25..c7b923ed262965e9dd7566dd093084c6f0948079 100644 --- a/src/runner.h +++ b/src/runner.h @@ -73,6 +73,8 @@ void runner_do_hydro_sort(struct runner *r, struct cell *c, int flag, int cleanup, int clock); void runner_do_stars_sort(struct runner *r, struct cell *c, int flag, int cleanup, int clock); +void runner_do_all_hydro_sort(struct runner *r, struct cell *c); +void runner_do_all_stars_sort(struct runner *r, struct cell *c); void runner_do_drift_part(struct runner *r, struct cell *c, int timer); void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer); void runner_do_drift_spart(struct runner *r, struct cell *c, int timer); diff --git a/src/scheduler.c b/src/scheduler.c index ab9ff41d3dc18aba9a81db130d115fcc7f084d46..c1662ac1d8045534898e31d357a85d646a0dbad8 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -547,6 +547,7 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Note this is not very clean as the scheduler should not really access the engine... */ const int with_feedback = (s->space->e->policy & engine_policy_feedback); + const int with_stars = (s->space->e->policy & engine_policy_stars); /* Iterate on this task until we're done with it. */ int redo = 1; @@ -558,7 +559,7 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Is this a non-empty self-task? */ const int is_self = (t->type == task_type_self) && (t->ci != NULL) && - ((t->ci->hydro.count > 0) || (with_feedback && t->ci->stars.count > 0)); + ((t->ci->hydro.count > 0) || (with_stars && t->ci->stars.count > 0)); /* Is this a non-empty pair-task? */ const int is_pair = @@ -1701,6 +1702,9 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_drift_gpart: cost = wscale * gcount_i; break; + case task_type_drift_spart: + cost = wscale * scount_i; + break; case task_type_init_grav: cost = wscale * gcount_i; break; @@ -1719,6 +1723,12 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_end_grav_force: cost = wscale * gcount_i; break; + case task_type_cooling: + cost = wscale * count_i; + break; + case task_type_star_formation: + cost = wscale * (count_i + scount_i); + break; case task_type_kick1: cost = wscale * count_i + wscale * gcount_i; break; @@ -1918,13 +1928,27 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_recv: #ifdef WITH_MPI - if (t->subtype == task_subtype_tend) { - t->buff = (struct pcell_step *)malloc(sizeof(struct pcell_step) * - t->ci->mpi.pcell_size); - err = MPI_Irecv(t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_step), - MPI_BYTE, t->ci->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); + if (t->subtype == task_subtype_tend_part) { + t->buff = (struct pcell_step_hydro *)malloc( + sizeof(struct pcell_step_hydro) * t->ci->mpi.pcell_size); + err = MPI_Irecv( + t->buff, t->ci->mpi.pcell_size * sizeof(struct pcell_step_hydro), + MPI_BYTE, t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); + } else if (t->subtype == task_subtype_tend_gpart) { + t->buff = (struct pcell_step_grav *)malloc( + sizeof(struct pcell_step_grav) * t->ci->mpi.pcell_size); + err = MPI_Irecv( + t->buff, t->ci->mpi.pcell_size * sizeof(struct pcell_step_grav), + MPI_BYTE, t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); + } else if (t->subtype == task_subtype_tend_spart) { + t->buff = (struct pcell_step_stars *)malloc( + sizeof(struct pcell_step_stars) * t->ci->mpi.pcell_size); + err = MPI_Irecv( + t->buff, t->ci->mpi.pcell_size * sizeof(struct pcell_step_stars), + MPI_BYTE, t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); } else if (t->subtype == task_subtype_xv || t->subtype == task_subtype_rho || t->subtype == task_subtype_gradient) { @@ -1958,21 +1982,61 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_send: #ifdef WITH_MPI - if (t->subtype == task_subtype_tend) { - t->buff = (struct pcell_step *)malloc(sizeof(struct pcell_step) * - t->ci->mpi.pcell_size); - cell_pack_end_step(t->ci, (struct pcell_step *)t->buff); - if ((t->ci->mpi.pcell_size * sizeof(struct pcell_step)) > - s->mpi_message_limit) - err = MPI_Isend(t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_step), - MPI_BYTE, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); - else - err = MPI_Issend(t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_step), - MPI_BYTE, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); + if (t->subtype == task_subtype_tend_part) { + t->buff = (struct pcell_step_hydro *)malloc( + sizeof(struct pcell_step_hydro) * t->ci->mpi.pcell_size); + cell_pack_end_step_hydro(t->ci, (struct pcell_step_hydro *)t->buff); + + if ((t->ci->mpi.pcell_size * sizeof(struct pcell_step_hydro)) > + s->mpi_message_limit) { + err = MPI_Isend( + t->buff, + t->ci->mpi.pcell_size * sizeof(struct pcell_step_hydro), + MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); + } else { + err = MPI_Issend( + t->buff, + t->ci->mpi.pcell_size * sizeof(struct pcell_step_hydro), + MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); + } + } else if (t->subtype == task_subtype_tend_gpart) { + t->buff = (struct pcell_step_grav *)malloc( + sizeof(struct pcell_step_grav) * t->ci->mpi.pcell_size); + cell_pack_end_step_grav(t->ci, (struct pcell_step_grav *)t->buff); + + if ((t->ci->mpi.pcell_size * sizeof(struct pcell_step_grav)) > + s->mpi_message_limit) { + err = MPI_Isend( + t->buff, t->ci->mpi.pcell_size * sizeof(struct pcell_step_grav), + MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); + } else { + err = MPI_Issend( + t->buff, t->ci->mpi.pcell_size * sizeof(struct pcell_step_grav), + MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); + } + } else if (t->subtype == task_subtype_tend_spart) { + t->buff = (struct pcell_step_stars *)malloc( + sizeof(struct pcell_step_stars) * t->ci->mpi.pcell_size); + cell_pack_end_step_stars(t->ci, (struct pcell_step_stars *)t->buff); + + if ((t->ci->mpi.pcell_size * sizeof(struct pcell_step_stars)) > + s->mpi_message_limit) { + err = MPI_Isend( + t->buff, + t->ci->mpi.pcell_size * sizeof(struct pcell_step_stars), + MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); + } else { + err = MPI_Issend( + t->buff, + t->ci->mpi.pcell_size * sizeof(struct pcell_step_stars), + MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); + } } else if (t->subtype == task_subtype_xv || t->subtype == task_subtype_rho || t->subtype == task_subtype_gradient) { diff --git a/src/space.c b/src/space.c index f92830061085dcd439e6165977f693f8f00171a4..a2a08be232bb8b8a439e23efb6d143d0c069771b 100644 --- a/src/space.c +++ b/src/space.c @@ -236,6 +236,7 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->grav.down = NULL; c->grav.mesh = NULL; c->grav.end_force = NULL; + c->top = c; c->super = c; c->hydro.super = c; c->grav.super = c; @@ -270,17 +271,21 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->mpi.hydro.recv_xv = NULL; c->mpi.hydro.recv_rho = NULL; c->mpi.hydro.recv_gradient = NULL; + c->mpi.hydro.recv_ti = NULL; c->mpi.grav.recv = NULL; + c->mpi.grav.recv_ti = NULL; c->mpi.stars.recv = NULL; - c->mpi.recv_ti = NULL; + c->mpi.stars.recv_ti = NULL; c->mpi.limiter.recv = NULL; c->mpi.hydro.send_xv = NULL; c->mpi.hydro.send_rho = NULL; c->mpi.hydro.send_gradient = NULL; + c->mpi.hydro.send_ti = NULL; c->mpi.grav.send = NULL; + c->mpi.grav.send_ti = NULL; c->mpi.stars.send = NULL; - c->mpi.send_ti = NULL; + c->mpi.stars.send_ti = NULL; c->mpi.limiter.send = NULL; #endif } @@ -570,6 +575,7 @@ void space_regrid(struct space *s, int verbose) { c->hydro.count = 0; c->grav.count = 0; c->stars.count = 0; + c->top = c; c->super = c; c->hydro.super = c; c->grav.super = c; @@ -709,14 +715,23 @@ void space_allocate_extras(struct space *s, int verbose) { size_t size_gparts = s->size_gparts; size_t size_sparts = s->size_sparts; - int local_cells = 0; - for (int i = 0; i < s->nr_cells; ++i) - if (s->cells_top[i].nodeID == local_nodeID) local_cells++; + int *local_cells = (int *)malloc(sizeof(int) * s->nr_cells); + if (local_cells == NULL) + error("Failed to allocate list of local top-level cells"); + + /* List the local cells */ + int nr_local_cells = 0; + for (int i = 0; i < s->nr_cells; ++i) { + if (s->cells_top[i].nodeID == local_nodeID) { + local_cells[nr_local_cells] = i; + ++nr_local_cells; + } + } /* Number of extra particles we want for each type */ - const size_t expected_num_extra_parts = local_cells * space_extra_parts; - const size_t expected_num_extra_gparts = local_cells * space_extra_gparts; - const size_t expected_num_extra_sparts = local_cells * space_extra_sparts; + const size_t expected_num_extra_parts = nr_local_cells * space_extra_parts; + const size_t expected_num_extra_gparts = nr_local_cells * space_extra_gparts; + const size_t expected_num_extra_sparts = nr_local_cells * space_extra_sparts; if (verbose) { message("Currently have %zd/%zd/%zd real particles.", nr_actual_parts, @@ -784,11 +799,10 @@ void space_allocate_extras(struct space *s, int verbose) { s->gparts[i].id_or_neg_offset = -1; } - /* Put the spare particles in their correct cell */ -#ifdef WITH_MPI - error("Need to do this correctly over MPI for only the local cells."); -#endif - int count_in_cell = 0, current_cell = 0; + /* Put the spare particles in their correct cell */ + int local_cell_id = 0; + int current_cell = local_cells[local_cell_id]; + int count_in_cell = 0; size_t count_extra_gparts = 0; for (size_t i = 0; i < nr_actual_gparts + expected_num_extra_gparts; ++i) { @@ -810,7 +824,11 @@ void space_allocate_extras(struct space *s, int verbose) { /* Once we have reached the number of extra gpart per cell, we move to the * next */ if (count_in_cell == space_extra_gparts) { - ++current_cell; + ++local_cell_id; + + if (local_cell_id == nr_local_cells) break; + + current_cell = local_cells[local_cell_id]; count_in_cell = 0; } } @@ -873,11 +891,10 @@ void space_allocate_extras(struct space *s, int verbose) { s->parts[i].id = -1; } - /* Put the spare particles in their correct cell */ -#ifdef WITH_MPI - error("Need to do this correctly over MPI for only the local cells."); -#endif - int count_in_cell = 0, current_cell = 0; + /* Put the spare particles in their correct cell */ + int local_cell_id = 0; + int current_cell = local_cells[local_cell_id]; + int count_in_cell = 0; size_t count_extra_parts = 0; for (size_t i = 0; i < nr_actual_parts + expected_num_extra_parts; ++i) { @@ -899,7 +916,11 @@ void space_allocate_extras(struct space *s, int verbose) { /* Once we have reached the number of extra part per cell, we move to the * next */ if (count_in_cell == space_extra_parts) { - ++current_cell; + ++local_cell_id; + + if (local_cell_id == nr_local_cells) break; + + current_cell = local_cells[local_cell_id]; count_in_cell = 0; } } @@ -952,11 +973,10 @@ void space_allocate_extras(struct space *s, int verbose) { s->sparts[i].id = -42; } - /* Put the spare particles in their correct cell */ -#ifdef WITH_MPI - error("Need to do this correctly over MPI for only the local cells."); -#endif - int count_in_cell = 0, current_cell = 0; + /* Put the spare particles in their correct cell */ + int local_cell_id = 0; + int current_cell = local_cells[local_cell_id]; + int count_in_cell = 0; size_t count_extra_sparts = 0; for (size_t i = 0; i < nr_actual_sparts + expected_num_extra_sparts; ++i) { @@ -978,7 +998,11 @@ void space_allocate_extras(struct space *s, int verbose) { /* Once we have reached the number of extra spart per cell, we move to the * next */ if (count_in_cell == space_extra_sparts) { - ++current_cell; + ++local_cell_id; + + if (local_cell_id == nr_local_cells) break; + + current_cell = local_cells[local_cell_id]; count_in_cell = 0; } } @@ -1000,6 +1024,9 @@ void space_allocate_extras(struct space *s, int verbose) { part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts, nr_sparts, verbose); #endif + + /* Free the list of local cells */ + free(local_cells); } /** @@ -1017,6 +1044,9 @@ void space_rebuild(struct space *s, int repartitioned, int verbose) { #ifdef SWIFT_DEBUG_CHECKS if (s->e->nodeID == 0 || verbose) message("(re)building space"); fflush(stdout); + + /* Reset the cell counter */ + last_cell_id = 1; #endif /* Re-grid if necessary, or just re-set the cell data. */ @@ -1665,12 +1695,12 @@ void space_split(struct space *s, int verbose) { void space_reorder_extra_parts_mapper(void *map_data, int num_cells, void *extra_data) { - - struct cell *cells_top = (struct cell *)map_data; + int *local_cells = (int *)map_data; struct space *s = (struct space *)extra_data; + struct cell *cells_top = s->cells_top; for (int ind = 0; ind < num_cells; ind++) { - struct cell *c = &cells_top[ind]; + struct cell *c = &cells_top[local_cells[ind]]; cell_reorder_extra_parts(c, c->hydro.parts - s->parts); } } @@ -1678,11 +1708,12 @@ void space_reorder_extra_parts_mapper(void *map_data, int num_cells, void space_reorder_extra_gparts_mapper(void *map_data, int num_cells, void *extra_data) { - struct cell *cells_top = (struct cell *)map_data; + int *local_cells = (int *)map_data; struct space *s = (struct space *)extra_data; + struct cell *cells_top = s->cells_top; for (int ind = 0; ind < num_cells; ind++) { - struct cell *c = &cells_top[ind]; + struct cell *c = &cells_top[local_cells[ind]]; cell_reorder_extra_gparts(c, s->parts, s->sparts); } } @@ -1690,11 +1721,12 @@ void space_reorder_extra_gparts_mapper(void *map_data, int num_cells, void space_reorder_extra_sparts_mapper(void *map_data, int num_cells, void *extra_data) { - struct cell *cells_top = (struct cell *)map_data; + int *local_cells = (int *)map_data; struct space *s = (struct space *)extra_data; + struct cell *cells_top = s->cells_top; for (int ind = 0; ind < num_cells; ind++) { - struct cell *c = &cells_top[ind]; + struct cell *c = &cells_top[local_cells[ind]]; cell_reorder_extra_sparts(c, c->stars.parts - s->sparts); } } @@ -1711,25 +1743,20 @@ void space_reorder_extra_sparts_mapper(void *map_data, int num_cells, */ void space_reorder_extras(struct space *s, int verbose) { -#ifdef WITH_MPI - if (space_extra_parts || space_extra_gparts || space_extra_sparts) - error("Need an MPI-proof version of this."); -#endif - /* Re-order the gas particles */ if (space_extra_parts) threadpool_map(&s->e->threadpool, space_reorder_extra_parts_mapper, - s->cells_top, s->nr_cells, sizeof(struct cell), 0, s); + s->local_cells_top, s->nr_local_cells, sizeof(int), 0, s); /* Re-order the gravity particles */ if (space_extra_gparts) threadpool_map(&s->e->threadpool, space_reorder_extra_gparts_mapper, - s->cells_top, s->nr_cells, sizeof(struct cell), 0, s); + s->local_cells_top, s->nr_local_cells, sizeof(int), 0, s); /* Re-order the star particles */ if (space_extra_sparts) threadpool_map(&s->e->threadpool, space_reorder_extra_sparts_mapper, - s->cells_top, s->nr_cells, sizeof(struct cell), 0, s); + s->local_cells_top, s->nr_local_cells, sizeof(int), 0, s); } /** @@ -2747,6 +2774,7 @@ void space_split_recursive(struct space *s, struct cell *c, cp->stars.dx_max_sort = 0.f; cp->nodeID = c->nodeID; cp->parent = c; + cp->top = c->top; cp->super = NULL; cp->hydro.super = NULL; cp->grav.super = NULL; diff --git a/src/task.c b/src/task.c index 4327d7cf28913534441b596496d82141d14ae5a0..b1a384a10e7f2555f87172dc49950a4a9b382605 100644 --- a/src/task.c +++ b/src/task.c @@ -79,6 +79,8 @@ const char *taskID_names[task_type_count] = {"none", "grav_end_force", "cooling", "star_formation", + "star_formation_in", + "star_formation_out", "logger", "stars_in", "stars_out", @@ -90,11 +92,23 @@ const char *taskID_names[task_type_count] = {"none", "fof_pair"}; /* Sub-task type names. */ -const char *subtaskID_names[task_subtype_count] = { - "none", "density", "gradient", "force", - "limiter", "grav", "external_grav", "tend", - "xv", "rho", "gpart", "multipole", - "spart", "stars_density", "stars_feedback"}; +const char *subtaskID_names[task_subtype_count] = {"none", + "density", + "gradient", + "force", + "limiter", + "grav", + "external_grav", + "tend_part", + "tend_gpart", + "tend_spart", + "xv", + "rho", + "gpart", + "multipole", + "spart", + "stars_density", + "stars_feedback"}; #ifdef WITH_MPI /* MPI communicators for the subtypes. */ diff --git a/src/task.h b/src/task.h index 32757d66b2e34b9228f442905c17dbdf616cb3ae..5a79c3b9bc94c8da56a7016b4650abd35c1dd189 100644 --- a/src/task.h +++ b/src/task.h @@ -70,6 +70,8 @@ enum task_types { task_type_end_grav_force, task_type_cooling, task_type_star_formation, + task_type_star_formation_in, /* Implicit */ + task_type_star_formation_out, /* Implicit */ task_type_logger, task_type_stars_in, /* Implicit */ task_type_stars_out, /* Implicit */ @@ -93,7 +95,9 @@ enum task_subtypes { task_subtype_limiter, task_subtype_grav, task_subtype_external_grav, - task_subtype_tend, + task_subtype_tend_part, + task_subtype_tend_gpart, + task_subtype_tend_spart, task_subtype_xv, task_subtype_rho, task_subtype_gpart, diff --git a/tools/task_plots/analyse_tasks.py b/tools/task_plots/analyse_tasks.py index dcd985a5bd58ce363c75145da493d3ddc7d595fe..44def41bdc3d1a3266b03246aeb1aab59698ca1a 100755 --- a/tools/task_plots/analyse_tasks.py +++ b/tools/task_plots/analyse_tasks.py @@ -94,6 +94,8 @@ TASKTYPES = [ "grav_end_force", "cooling", "star_formation", + "star_formation_in", + "star_formation_out", "logger", "stars_in", "stars_out", @@ -114,7 +116,9 @@ SUBTYPES = [ "limiter", "grav", "external_grav", - "tend", + "tend_part", + "tend_gpart", + "tend_spart", "xv", "rho", "gpart", diff --git a/tools/task_plots/plot_tasks.py b/tools/task_plots/plot_tasks.py index 848b750e7fb54f9b706fbb20c95d9df7a640c0b7..06bfa474d0ae7ed4c529bd1ad94ddca422cd7ed9 100755 --- a/tools/task_plots/plot_tasks.py +++ b/tools/task_plots/plot_tasks.py @@ -179,6 +179,8 @@ TASKTYPES = [ "grav_end_force", "cooling", "star_formation", + "star_formation_in", + "star_formation_out", "logger", "stars_in", "stars_out", @@ -199,7 +201,9 @@ SUBTYPES = [ "limiter", "grav", "external_grav", - "tend", + "tend_part", + "tend_gpart", + "tend_spart", "xv", "rho", "gpart", @@ -234,8 +238,12 @@ FULLTYPES = [ "send/xv", "recv/rho", "send/rho", - "recv/tend", - "send/tend", + "recv/tend_part", + "send/tend_part", + "recv/tend_gpart", + "send/tend_gpart", + "recv/tend_spart", + "send/tend_spart", "recv/gpart", "send/gpart", "recv/spart",