diff --git a/configure.ac b/configure.ac index 82f4f55cc27d5e95d7382749e1b80328bf5bc79e..5c46efc1a1a47837d9d75e4dcfc0632ea9d98b49 100644 --- a/configure.ac +++ b/configure.ac @@ -16,7 +16,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Init the project. -AC_INIT([SWIFT],[0.7.0],[https://gitlab.cosma.dur.ac.uk/swift/swiftsim]) +AC_INIT([SWIFT],[0.8.0],[https://gitlab.cosma.dur.ac.uk/swift/swiftsim]) swift_config_flags="$*" AC_COPYRIGHT diff --git a/doc/RTD/source/Cooling/index.rst b/doc/RTD/source/Cooling/index.rst index 0ca3f62f3ca6fdff2abb95501681dc7bf4676fd2..d9462b5233334769f163883ffd9e859cc7b69767 100644 --- a/doc/RTD/source/Cooling/index.rst +++ b/doc/RTD/source/Cooling/index.rst @@ -31,7 +31,7 @@ cooling contains a temperature floor avoiding negative temperature. Grackle ~~~~~~~ -Grackle is a chemistry and cooling library presented in B. Smith et al. 2016 +Grackle is a chemistry and cooling library presented in `B. Smith et al. 2016 <https://arxiv.org/abs/1610.09591>`_ (do not forget to cite if used). Four different modes are available: equilibrium, 6 species network (H, H\\( ^+ \\), e\\( ^- \\), He, He\\( ^+ \\) and He\\( ^{++} \\)), 9 species network (adds H\\(^-\\), H\\(_2\\) and diff --git a/doc/RTD/source/InitialConditions/index.rst b/doc/RTD/source/InitialConditions/index.rst index 00e7b98c5916a29b31d9396c317dfc2851d7389b..ec2559f5df34827e78c8df2c7417934eeab27bf8 100644 --- a/doc/RTD/source/InitialConditions/index.rst +++ b/doc/RTD/source/InitialConditions/index.rst @@ -11,17 +11,21 @@ conditions format as the popular `GADGET-2 its type 3 format. Note that we do not support the GADGET-2 types 1 and 2 formats. +One crucial difference is that whilst GADGET-2 can have initial conditions split +over many files SWIFT only supports initial conditions in one single file. **ICs +split over multiple files cannot be read by SWIFT**. See the +":ref:`multiple_files_ICs`" section below for possible solutions. In GADGET-2 +having multiple files allows multiple ones to be read in parallel and is the +only way the code can handle more than 2^31 particles. This limitation is not in +place in SWIFT. A single file can contain any number of particles (well... up to +2^64...) and the file is read in parallel by HDF5 when running on more than one +compute node. + The original GADGET-2 file format only contains 2 types of particles: gas particles and 5 sorts of collisionless particles that allow users to run with 5 separate particle masses and softenings. In SWIFT, we expand on this by using two of these types for stars and black holes. -GADGET-2 can have initial conditions split over many files. This allow multiple -ones to be read in parallel and is the only way the code can handle more than -2^31 particles. This limitation is not in place in SWIFT. A single file can -contain any number of particles (well... up to 2^64...) and the file is read in -parallel by HDF5 when running on more than one compute node. - As the original documentation for the GADGET-2 initial conditions format is quite sparse, we lay out here all of the necessary components. If you are generating your initial conditions from python, we recommend you use the h5py @@ -113,7 +117,9 @@ GADGET-2 based analysis programs: exactly the same as the ``NumPart_Total`` array. As SWIFT only uses ICs contained in a single file, this is not necessary for SWIFT-only ICs. + ``NumFilesPerSnapshot``, again a historical integer value that tells the code - how many files there are per snapshot. You will probably want to set this to 1. + how many files there are per snapshot. You will probably want to set + this to 1. If this field is present in a SWIFT IC file and has a + value different from 1, the code will return an error message. + ``Time``, time of the start of the simulation in internal units or expressed as a scale-factor for cosmological runs. SWIFT ignores this and reads it from the parameter file. @@ -226,4 +232,27 @@ You should have an HDF5 file with the following structure: ParticleIDs=[...] Masses=[...] +.. _multiple_files_ICs: + +ICs split over multiple files +----------------------------- + +A basic script ``tools/combine_ics.py`` is provided to merge basic GADGET-2 +initial conditions split into multiple files into one single valid file. This +script can handle simple HDF5 files (GADGET-2 type 3 ICs) that follow the format +described above but split over multiple files. + +The script can also convert ICs using a ``MassTable`` and create the +corresponding particle fields. Note that additional fields present in ICs beyond +the simple GADGET-2 specification will not be merged. + +One additional option is to compress the fields in the files using HDF5's gzip +compression. This is very effective for the fields such as masses or particle +IDs which are very similar. A checksum filter is also applied in all cases to +help with data curation. + +**We caution that this script is very basic and should only be used with great +caution.** + + diff --git a/doc/RTD/source/ParameterFiles/index.rst b/doc/RTD/source/ParameterFiles/index.rst index 6ff6d5f7625693b199f1aa966403bea633065ab4..f87045797d8846a327066fcd3dc52c1eb5ff10fb 100644 --- a/doc/RTD/source/ParameterFiles/index.rst +++ b/doc/RTD/source/ParameterFiles/index.rst @@ -72,11 +72,11 @@ section. A list of all the possible parameters is kept in the file Internal Unit System -------------------- -This section describes the units used internally by the code. This is -the system of units in which all the equations are solved. All -physical constants are converted to this system and if the ICs use a -different system (see :ref:`ICs_units_label`) the particle quantities -will be converted when read in. +The ``InternalUnitSystem`` section describes the units used internally by the +code. This is the system of units in which all the equations are solved. All +physical constants are converted to this system and if the ICs use a different +system (see :ref:`ICs_units_label`) the particle quantities will be converted +when read in. The system of units is described using the value of the 5 basic units of any system with respect to the CGS system. Instead of using a unit @@ -132,7 +132,7 @@ exercise for the reader [#f1]_. Cosmology --------- -When running a cosmological simulation, this section set the values of the +When running a cosmological simulation, the section ``Cosmology`` sets the values of the cosmological model. The epanded :math:`\Lambda\rm{CDM}` parameters governing the background evolution of the Univese need to be specified here. These are: @@ -186,7 +186,7 @@ Gravity ------- The behaviour of the self-gravity solver can be modifed by the parameters -provided in this section. The theory document puts these parameters into the +provided in the ``Gravity`` section. The theory document puts these parameters into the context of the equations being solved. We give a brief overview here. * The Plummer-equivalent co-moving softening length used for all particles :math:`\epsilon_{com}`: ``comoving_softening``, @@ -251,10 +251,64 @@ SPH Time Integration ---------------- +The ``TimeIntegration`` section is used to set some general parameters related to time +integration. In all cases, users have to provide a minimal and maximal time-step +size: + +* Maximal time-step size: ``dt_max`` +* Minimal time-step size: ``dt_min`` + +These quantities are expressed in internal units. All particles will have their +time-step limited by the maximal value on top of all the other criteria that may +apply to them (gravity acceleration, Courant condition, etc.). If a particle +demands a time-step size smaller than the minimum, SWIFT will abort with an +error message. This is a safe-guard against simulations that would never +complete due to the number of steps to run being too large. + +When running a non-cosmological simulation, the user also has to provide the +time of the start and the time of the end of the simulation: + +* Start time: ``time_begin`` +* End time: ``time_end`` + +Both are expressed in internal units. The start time is typically set to ``0`` +but SWIFT can handle any value here. For cosmological runs, these values are +ignored and the start- and end-points of the runs are specified by the start and +end scale-factors in the cosmology section of the parameter file. + +Additionally, when running a cosmological volume, advanced users can specify the +value of the dimensionless pre-factor entering the time-step condition linked +with the motion of particles with respect to the background expansion and mesh +size. See the theory document for the exact equations. + +* Dimensionless pre-factor of the maximal allowed displacement: + ``max_dt_RMS_factor`` (default: ``0.25``) + +This value rarely needs altering. + +A full time-step section for a non-cosmological run would be: + +.. code:: YAML + + TimeIntegration: + time_begin: 0 # Start time in internal units. + time_end: 10. # End time in internal units. + dt_max: 1e-2 + dt_min: 1e-6 + +Whilst for a cosmological run, one would need: + +.. code:: YAML + + TimeIntegration: + dt_max: 1e-4 + dt_min: 1e-10 + max_dt_RMS_factor: 0.25 # Default optional value + Initial Conditions ------------------ -This section of the parameter file contains all the options related to +This ``IntialConditions`` section of the parameter file contains all the options related to the initial conditions. The main two parameters are * The name of the initial conditions file: ``file_name``, @@ -302,12 +356,13 @@ The full section to start a DM+hydro run from Gadget DM-only ICs would be: .. code:: YAML + InitialConditions: file_name: my_ics.hdf5 periodic: 1 - cleanup_h_factors: 1 - cleanup_velocity_factors: 1 - generate_gas_in_ics: 1 + cleanup_h_factors: 1 + cleanup_velocity_factors: 1 + generate_gas_in_ics: 1 cleanup_smoothing_lengths: 1 @@ -351,7 +406,7 @@ Restarts -------- SWIFT can write check-pointing files and restart from them. The behaviour of -this mechanism is driven by the options int he `Restarts` section of the YAML +this mechanism is driven by the options in the ``Restarts`` section of the YAML parameter file. All the parameters are optional but default to values that ensure a reasonable behaviour. diff --git a/doc/RTD/source/VELOCIraptorInterface/index.rst b/doc/RTD/source/VELOCIraptorInterface/index.rst index 3ae17f04b9950d30d1484ac5117b65063b7739c6..312b9cd3f893dd44f814ad80ac40748db12bd4d5 100644 --- a/doc/RTD/source/VELOCIraptorInterface/index.rst +++ b/doc/RTD/source/VELOCIraptorInterface/index.rst @@ -5,7 +5,7 @@ VELOCIraptor Interface ====================== This section includes information on the VELOCIraptor interface implemented in -SWIFT. There are mainly four subsection; the first section explains shortly +SWIFT. There are mainly four subsections; the first section explains shortly how VELOCIraptor works, the second subsection explains how to configure SWIFT with VELOCIraptor, the third subsection explains how to configure a standalone version of VELOCIraptor and the last subsection explains how the output format diff --git a/doc/RTD/source/VELOCIraptorInterface/output.rst b/doc/RTD/source/VELOCIraptorInterface/output.rst index cf0e386d4a7de982dcca5a2ddc5ebd642c46ec34..2b0b88eea07df607b71018bdd9bc76d046e76477 100644 --- a/doc/RTD/source/VELOCIraptorInterface/output.rst +++ b/doc/RTD/source/VELOCIraptorInterface/output.rst @@ -46,7 +46,7 @@ The second file that is produced by VELOCIraptor is the ``.catalog_particles`` file, this file contains mainly all the IDs of the particles and has two interesting parameters: -+ The ``Num_of_particles_in_groups`` and ``Num_of_particles_in_groups`` ++ The ``Num_of_particles_in_groups`` and ``Total_num_of_particles_in_all_groups`` parameter: Gives the total number of particles in the file or the total number of particles that are in halos. + The ``Particle_IDs``: The list of particles as sorted by halo, in which halo @@ -136,7 +136,7 @@ the unbound particles. Properties file --------------- -The Fourth file is the ``.properties`` file, this file contains many physical +The fourth file is the ``.properties`` file, this file contains many physical useful information of the corresponding halos. This can be divided in several useful groups of physical parameters, on this page we have divided the several variables which are present in the ``.properties`` file. This file has most @@ -179,7 +179,7 @@ Bryan and Norman 1998 properties: + ``Mass_BN98``, The Bryan and Norman (1998) determination of the mass of the halo [#BN98]_. -+ ``R_BN98``, the Bryan and Norman (1998) corresponding radius[#BN98]_. ++ ``R_BN98``, the Bryan and Norman (1998) corresponding radius [#BN98]_. Several Mass types: """"""""""""""""""" diff --git a/doc/RTD/source/VELOCIraptorInterface/whatis.rst b/doc/RTD/source/VELOCIraptorInterface/whatis.rst index e7f067ec4723e41da0f3a95dddad8f55d9897e85..bc14e0925a288315c311972d97951abfc91426a6 100644 --- a/doc/RTD/source/VELOCIraptorInterface/whatis.rst +++ b/doc/RTD/source/VELOCIraptorInterface/whatis.rst @@ -12,7 +12,7 @@ What is VELOCIraptor? In SWIFT it is possible to run a cosmological simulation and at the same time do on the fly halo finding at specific predefined intervals. For finding the -Halos SWIFT uses VELOCIraptor (Elahi, Thacker and Widrow; 2011) [#velo]_, this +halos SWIFT uses VELOCIraptor (Elahi, Thacker and Widrow; 2011) [#velo]_, this is a C++ halo finder that can use MPI. It differs from other halo finder algorithms in the sense that it uses the velocity distributions of the particles in the simulations and the the positions of the particles to get @@ -22,7 +22,7 @@ whether there are substructures in halos. The Algorithm ------------- -The VELOCIraptor algorithm consist basically off the following steps [#ref]_: +The VELOCIraptor algorithm consists basically of the following steps [#ref]_: 1. A kd-tree is constructed based on the maximization of the Shannon-entropy, this means that every level in the kd-tree an equal number of particles @@ -32,21 +32,21 @@ The VELOCIraptor algorithm consist basically off the following steps [#ref]_: takes into account the absolute positions of the particles. 2. The next part is calculating the the centre of mass velocity and the velocity distribution for every individual node in the kd-tree. -3. Than the algorithm estimates the background velocity density function for +3. Then the algorithm estimates the background velocity density function for every particle based on the cell of the particle and the six nearest neighbour cells. This prevents the background velocity density function to be over sensitive for variations between different cells due to dominant halo features in the velocity density function. 4. After this the algorithm searches for the nearest velocity neighbours (:math:`N_v`) from a set of nearest position neighbours (:math:`N_x>N_v`). - The position neighbours do not need to be in the cell of the particles, in + The neighbours' positions do not need to be in the cell of the particles, in general the set of nearest position neighbours is substantially larger than the nearest velocity neighbours, the default is set as :math:`N_x=32 N_v`. 5. The individual local velocity density function is calculated for every particle. 6. The fractional difference is calculated between the local velocity density function and the background velocity density function. -7. Based on the calculated ratio outliers are picked and the outliers are +7. Based on the calculated ratio, outliers are picked and the outliers are grouped together in halos and subhalos. diff --git a/examples/DwarfGalaxy/run.sh b/examples/DwarfGalaxy/run.sh index 4cc87880215a37c9eac59b19e584f4887cba2c38..ced1959de34882937a5aa37df4facc294db26050 100755 --- a/examples/DwarfGalaxy/run.sh +++ b/examples/DwarfGalaxy/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift -b -G -s -S -t 8 dwarf_galaxy.yml 2>&1 | tee output.log +../swift -b -G -s -S -t 8 $@ dwarf_galaxy.yml 2>&1 | tee output.log diff --git a/examples/ExternalPointMass/energy_plot.py b/examples/ExternalPointMass/energy_plot.py index 1863305614c226f64faac3d86fa2f809d49b9d74..5644e48f8bd954800526369cc152da7024d069dd 100644 --- a/examples/ExternalPointMass/energy_plot.py +++ b/examples/ExternalPointMass/energy_plot.py @@ -91,8 +91,8 @@ for i in range(402): E_tot_snap[i] = E_kin_snap[i] + E_pot_snap[i] Lz_snap[i] = np.sum(Lz) -print "Starting energy:", E_kin_stats[0], E_pot_stats[0], E_tot_stats[0] -print "Ending energy:", E_kin_stats[-1], E_pot_stats[-1], E_tot_stats[-1] +print("Starting energy:", E_kin_stats[0], E_pot_stats[0], E_tot_stats[0]) +print("Ending energy:", E_kin_stats[-1], E_pot_stats[-1], E_tot_stats[-1]) # Plot energy evolution figure() diff --git a/examples/ExternalPointMass/makeIC.py b/examples/ExternalPointMass/makeIC.py index 7a9e2e1fd555e4823957721e3c7bf53da9eff48d..6780430d22e39350e7efeb52190708c78141bd4f 100644 --- a/examples/ExternalPointMass/makeIC.py +++ b/examples/ExternalPointMass/makeIC.py @@ -36,16 +36,16 @@ const_unit_length_in_cgs = (1000*PARSEC_IN_CGS) const_unit_mass_in_cgs = (SOLAR_MASS_IN_CGS) const_unit_velocity_in_cgs = (1e5) -print "UnitMass_in_cgs: ", const_unit_mass_in_cgs -print "UnitLength_in_cgs: ", const_unit_length_in_cgs -print "UnitVelocity_in_cgs: ", const_unit_velocity_in_cgs -print "UnitTime_in_cgs: ", const_unit_length_in_cgs / const_unit_velocity_in_cgs +print("UnitMass_in_cgs: ", const_unit_mass_in_cgs) +print("UnitLength_in_cgs: ", const_unit_length_in_cgs) +print("UnitVelocity_in_cgs: ", const_unit_velocity_in_cgs) +print("UnitTime_in_cgs: ", const_unit_length_in_cgs / const_unit_velocity_in_cgs) # derived units const_unit_time_in_cgs = (const_unit_length_in_cgs / const_unit_velocity_in_cgs) const_G = ((NEWTON_GRAVITY_CGS*const_unit_mass_in_cgs*const_unit_time_in_cgs*const_unit_time_in_cgs/(const_unit_length_in_cgs*const_unit_length_in_cgs*const_unit_length_in_cgs))) -print '---------------------' -print 'G in internal units: ', const_G +print('---------------------') +print('G in internal units: ', const_G) # Parameters @@ -53,7 +53,7 @@ periodic = 1 # 1 For periodic box boxSize = 100. # max_radius = boxSize / 4. # maximum radius of particles Mass = 1e10 -print "Mass at the centre: ", Mass +print("Mass at the centre: ", Mass) numPart = int(sys.argv[1]) # Number of particles mass = 1. @@ -93,9 +93,9 @@ grp1 = file.create_group("/PartType1") #generate particle positions radius = max_radius * (numpy.random.rand(numPart))**(1./3.) -print '---------------------' -print 'Radius: minimum = ',min(radius) -print 'Radius: maximum = ',max(radius) +print('---------------------') +print('Radius: minimum = ',min(radius)) +print('Radius: maximum = ',max(radius)) radius = numpy.sort(radius) r = numpy.zeros((numPart, 3)) r[:,0] = radius @@ -104,9 +104,9 @@ r[:,0] = radius speed = numpy.sqrt(const_G * Mass / radius) omega = speed / radius period = 2.*math.pi/omega -print '---------------------' -print 'Period: minimum = ',min(period) -print 'Period: maximum = ',max(period) +print('---------------------') +print('Period: minimum = ',min(period)) +print('Period: maximum = ',max(period)) v = numpy.zeros((numPart, 3)) v[:,0] = -omega * r[:,1] diff --git a/examples/main.c b/examples/main.c index 049d49fa32910a6ba6b15419ba378db7bd7de80b..7340f244e31e78f4a76c3fa42ca2e45850e064e5 100644 --- a/examples/main.c +++ b/examples/main.c @@ -1230,7 +1230,7 @@ int main(int argc, char *argv[]) { /* Write final output. */ if (!force_stop) { - engine_drift_all(&e); + engine_drift_all(&e, /*drift_mpole=*/0); engine_print_stats(&e); #ifdef WITH_LOGGER logger_log_all(e.logger, &e); diff --git a/src/Makefile.am b/src/Makefile.am index afe4002b29b55a0f5ea4a0aff9ba1dd5adefc005..9b0610667bdcf1f760f6e94d4481848a8fc4d0f0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -53,8 +53,8 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c \ - engine_marktasks.c serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \ - units.c common_io.c single_io.c multipole.c version.c map.c \ + engine_marktasks.c engine_drift.c serial_io.c timers.c debug.c scheduler.c \ + proxy.c parallel_io.c units.c common_io.c single_io.c multipole.c version.c map.c \ kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \ physical_constants.c potential.c hydro_properties.c \ threadpool.c cooling.c sourceterms.c \ diff --git a/src/active.h b/src/active.h index a3d2d5ad90c93b06bbd4853a2633d087304ee570..5bbbd3803cb09e7aa05ddb15e2e5c2a15b27602c 100644 --- a/src/active.h +++ b/src/active.h @@ -74,6 +74,22 @@ __attribute__((always_inline)) INLINE static int cell_are_gpart_drifted( return (c->grav.ti_old_part == e->ti_current); } +/** + * @brief Check that the #spart in a #cell have been drifted to the current + * time. + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if the #cell has been drifted to the current time, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_are_spart_drifted( + const struct cell *c, const struct engine *e) { + + /* Currently just use the gpart drift + * This function is just for clarity */ + return cell_are_gpart_drifted(c, e); +} + /* Are cells / particles active for regular tasks ? */ /** @@ -185,9 +201,16 @@ __attribute__((always_inline)) INLINE static int cell_is_all_active_gravity( __attribute__((always_inline)) INLINE static int cell_is_active_stars( const struct cell *c, const struct engine *e) { - // LOIC: Need star-specific implementation +#ifdef SWIFT_DEBUG_CHECKS + if (c->stars.ti_end_min < e->ti_current) + error( + "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and " + "e->ti_current=%lld (t=%e, a=%e)", + c->stars.ti_end_min, c->stars.ti_end_min * e->time_base, e->ti_current, + e->ti_current * e->time_base, e->cosmology->a); +#endif - return cell_is_active_gravity(c, e); + return (c->stars.ti_end_min == e->ti_current); } /** diff --git a/src/cell.c b/src/cell.c index b856a2a5e10d7a87c6292cd87ac38a3f67f64eb5..91a1eef7b0c33ec4af8760aa2ed4904068a42fe9 100644 --- a/src/cell.c +++ b/src/cell.c @@ -184,6 +184,7 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc, pc->hydro.ti_end_max = c->hydro.ti_end_max; pc->grav.ti_end_min = c->grav.ti_end_min; pc->grav.ti_end_max = c->grav.ti_end_max; + pc->stars.ti_end_min = c->stars.ti_end_min; pc->hydro.ti_old_part = c->hydro.ti_old_part; pc->grav.ti_old_part = c->grav.ti_old_part; pc->grav.ti_old_multipole = c->grav.ti_old_multipole; @@ -287,6 +288,7 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, c->hydro.ti_end_max = pc->hydro.ti_end_max; c->grav.ti_end_min = pc->grav.ti_end_min; c->grav.ti_end_max = pc->grav.ti_end_max; + c->stars.ti_end_min = pc->stars.ti_end_min; c->hydro.ti_old_part = pc->hydro.ti_old_part; c->grav.ti_old_part = pc->grav.ti_old_part; c->grav.ti_old_multipole = pc->grav.ti_old_multipole; @@ -342,6 +344,7 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, temp->hydro.dx_max_part = 0.f; temp->hydro.dx_max_sort = 0.f; temp->stars.dx_max_part = 0.f; + temp->stars.dx_max_sort = 0.f; temp->nodeID = c->nodeID; temp->parent = c; c->progeny[k] = temp; @@ -414,6 +417,7 @@ int cell_pack_end_step(struct cell *restrict c, pcells[0].hydro.ti_end_max = c->hydro.ti_end_max; pcells[0].grav.ti_end_min = c->grav.ti_end_min; pcells[0].grav.ti_end_max = c->grav.ti_end_max; + pcells[0].stars.ti_end_min = c->stars.ti_end_min; pcells[0].hydro.dx_max_part = c->hydro.dx_max_part; pcells[0].stars.dx_max_part = c->stars.dx_max_part; @@ -451,6 +455,7 @@ int cell_unpack_end_step(struct cell *restrict c, c->hydro.ti_end_max = pcells[0].hydro.ti_end_max; c->grav.ti_end_min = pcells[0].grav.ti_end_min; c->grav.ti_end_max = pcells[0].grav.ti_end_max; + c->stars.ti_end_min = pcells[0].stars.ti_end_min; c->hydro.dx_max_part = pcells[0].hydro.dx_max_part; c->stars.dx_max_part = pcells[0].stars.dx_max_part; @@ -1564,12 +1569,20 @@ void cell_check_multipole(struct cell *c) { */ void cell_clean(struct cell *c) { + /* Hydro */ for (int i = 0; i < 13; i++) if (c->hydro.sort[i] != NULL) { free(c->hydro.sort[i]); c->hydro.sort[i] = NULL; } + /* Stars */ + for (int i = 0; i < 13; i++) + if (c->stars.sort[i] != NULL) { + free(c->stars.sort[i]); + c->stars.sort[i] = NULL; + } + /* Recurse */ for (int k = 0; k < 8; k++) if (c->progeny[k]) cell_clean(c->progeny[k]); @@ -1668,13 +1681,14 @@ void cell_activate_drift_gpart(struct cell *c, struct scheduler *s) { * @brief Activate the #spart drifts on the given cell. */ void cell_activate_drift_spart(struct cell *c, struct scheduler *s) { + // MATTHIEU: This will need changing cell_activate_drift_gpart(c, s); } /** * @brief Activate the sorts up a cell hierarchy. */ -void cell_activate_sorts_up(struct cell *c, struct scheduler *s) { +void cell_activate_hydro_sorts_up(struct cell *c, struct scheduler *s) { if (c == c->hydro.super) { #ifdef SWIFT_DEBUG_CHECKS @@ -1705,7 +1719,7 @@ void cell_activate_sorts_up(struct cell *c, struct scheduler *s) { /** * @brief Activate the sorts on a given cell, if needed. */ -void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s) { +void cell_activate_hydro_sorts(struct cell *c, int sid, struct scheduler *s) { /* Do we need to re-sort? */ if (c->hydro.dx_max_sort > space_maxreldx * c->dmin) { @@ -1714,7 +1728,7 @@ void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s) { for (struct cell *finger = c; finger != NULL; finger = finger->parent) { if (finger->hydro.requires_sorts) { atomic_or(&finger->hydro.do_sort, finger->hydro.requires_sorts); - cell_activate_sorts_up(finger, s); + cell_activate_hydro_sorts_up(finger, s); } finger->hydro.sorted = 0; } @@ -1723,7 +1737,70 @@ void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s) { /* Has this cell been sorted at all for the given sid? */ if (!(c->hydro.sorted & (1 << sid)) || c->nodeID != engine_rank) { atomic_or(&c->hydro.do_sort, (1 << sid)); - cell_activate_sorts_up(c, s); + cell_activate_hydro_sorts_up(c, s); + } +} + +/** + * @brief Activate the sorts up a cell hierarchy. + */ +void cell_activate_stars_sorts_up(struct cell *c, struct scheduler *s) { + + if (c == c->super) { +#ifdef SWIFT_DEBUG_CHECKS + if (c->stars.sorts == NULL) + error("Trying to activate un-existing c->stars.sorts"); +#endif + scheduler_activate(s, c->stars.sorts); + if (c->nodeID == engine_rank) { + // MATTHIEU: to do: do we actually need both drifts here? + cell_activate_drift_part(c, s); + cell_activate_drift_spart(c, s); + } + } else { + + for (struct cell *parent = c->parent; + parent != NULL && !parent->stars.do_sub_sort; + parent = parent->parent) { + parent->stars.do_sub_sort = 1; + if (parent == c->super) { +#ifdef SWIFT_DEBUG_CHECKS + if (parent->stars.sorts == NULL) + error("Trying to activate un-existing parents->stars.sorts"); +#endif + scheduler_activate(s, parent->stars.sorts); + if (parent->nodeID == engine_rank) { + cell_activate_drift_part(parent, s); + cell_activate_drift_spart(parent, s); + } + break; + } + } + } +} + +/** + * @brief Activate the sorts on a given cell, if needed. + */ +void cell_activate_stars_sorts(struct cell *c, int sid, struct scheduler *s) { + + /* Do we need to re-sort? */ + if (c->stars.dx_max_sort > space_maxreldx * c->dmin) { + + /* Climb up the tree to active the sorts in that direction */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + if (finger->stars.requires_sorts) { + atomic_or(&finger->stars.do_sort, finger->stars.requires_sorts); + cell_activate_stars_sorts_up(finger, s); + } + finger->stars.sorted = 0; + } + } + + /* Has this cell been sorted at all for the given sid? */ + if (!(c->stars.sorted & (1 << sid)) || c->nodeID != engine_rank) { + atomic_or(&c->stars.do_sort, (1 << sid)); + cell_activate_stars_sorts_up(c, s); } } @@ -2077,8 +2154,8 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); /* Do we need to sort the cells? */ - cell_activate_sorts(ci, sid, s); - cell_activate_sorts(cj, sid, s); + cell_activate_hydro_sorts(ci, sid, s); + cell_activate_hydro_sorts(cj, sid, s); } } /* Otherwise, pair interation */ } @@ -2108,7 +2185,9 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, if (cj == NULL) { /* Do anything? */ - if (ci->stars.count == 0 || !cell_is_active_stars(ci, e)) return; + if (!cell_is_active_stars(ci, e) || ci->hydro.count == 0 || + ci->stars.count == 0) + return; /* Recurse? */ if (cell_can_recurse_in_self_stars_task(ci)) { @@ -2136,7 +2215,10 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, /* Should we even bother? */ if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; - if (ci->stars.count == 0 || cj->stars.count == 0) return; + + int should_do = ci->stars.count != 0 && cj->hydro.count != 0; + should_do |= cj->stars.count != 0 && ci->hydro.count != 0; + if (!should_do) return; /* Get the orientation of the pair. */ double shift[3]; @@ -2421,23 +2503,43 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, } /* Otherwise, activate the sorts and drifts. */ - else if (cell_is_active_stars(ci, e) || cell_is_active_stars(cj, e)) { + else { - /* We are going to interact this pair, so store some values. */ - atomic_or(&ci->hydro.requires_sorts, 1 << sid); - atomic_or(&cj->hydro.requires_sorts, 1 << sid); - ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; - cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + if (cell_is_active_stars(ci, e) && cj->hydro.count != 0 && + ci->stars.count != 0) { + /* We are going to interact this pair, so store some values. */ + atomic_or(&cj->hydro.requires_sorts, 1 << sid); + atomic_or(&ci->stars.requires_sorts, 1 << sid); - /* Activate the drifts if the cells are local. */ - if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); - if (ci->nodeID == engine_rank) cell_activate_drift_spart(ci, s); - if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); - if (cj->nodeID == engine_rank) cell_activate_drift_spart(cj, s); + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; - /* Do we need to sort the cells? */ - cell_activate_sorts(ci, sid, s); - cell_activate_sorts(cj, sid, s); + /* Activate the drifts if the cells are local. */ + if (ci->nodeID == engine_rank) cell_activate_drift_spart(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); + + /* Do we need to sort the cells? */ + cell_activate_hydro_sorts(cj, sid, s); + cell_activate_stars_sorts(ci, sid, s); + } + + if (cell_is_active_stars(cj, e) && ci->hydro.count != 0 && + cj->stars.count != 0) { + /* We are going to interact this pair, so store some values. */ + atomic_or(&cj->stars.requires_sorts, 1 << sid); + atomic_or(&ci->hydro.requires_sorts, 1 << sid); + + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; + + /* Activate the drifts if the cells are local. */ + if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_spart(cj, s); + + /* Do we need to sort the cells? */ + cell_activate_hydro_sorts(ci, sid, s); + cell_activate_stars_sorts(cj, sid, s); + } } } /* Otherwise, pair interation */ } @@ -2655,8 +2757,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); /* Check the sorts and activate them if needed. */ - cell_activate_sorts(ci, t->flags, s); - cell_activate_sorts(cj, t->flags, s); + cell_activate_hydro_sorts(ci, t->flags, s); + cell_activate_hydro_sorts(cj, t->flags, s); } /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_pair || t->type == task_type_sub_self) { @@ -2949,6 +3051,7 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { * @return 1 If the space needs rebuilding. 0 otherwise. */ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { + struct engine *e = s->space->e; const int nodeID = e->nodeID; int rebuild = 0; @@ -2968,25 +3071,48 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { /* Activate drifts */ if (t->type == task_type_self) { - if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); - if (ci->nodeID == nodeID) cell_activate_drift_gpart(ci, s); + if (ci->nodeID == nodeID) { + cell_activate_drift_part(ci, s); + cell_activate_drift_spart(ci, s); + } } /* Set the correct sorting flags and activate hydro drifts */ else if (t->type == task_type_pair) { - /* Store some values. */ - atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + /* Do ci */ + /* stars for ci */ + atomic_or(&ci->stars.requires_sorts, 1 << t->flags); + ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; + + /* hydro for cj */ atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); - ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; /* Activate the drift tasks. */ - if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); + if (ci->nodeID == nodeID) cell_activate_drift_spart(ci, s); if (cj->nodeID == nodeID) cell_activate_drift_part(cj, s); /* Check the sorts and activate them if needed. */ - cell_activate_sorts(ci, t->flags, s); - cell_activate_sorts(cj, t->flags, s); + cell_activate_stars_sorts(ci, t->flags, s); + cell_activate_hydro_sorts(cj, t->flags, s); + + /* Do cj */ + /* hydro for ci */ + atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + + /* stars for cj */ + atomic_or(&cj->stars.requires_sorts, 1 << t->flags); + cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; + + /* Activate the drift tasks. */ + if (cj->nodeID == nodeID) cell_activate_drift_spart(cj, s); + if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_hydro_sorts(ci, t->flags, s); + cell_activate_stars_sorts(cj, t->flags, s); + } /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_pair || t->type == task_type_sub_self) { @@ -3176,7 +3302,7 @@ void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data) { /* All top-level cells get an MPI tag. */ #ifdef WITH_MPI - if (c->mpi.tag < 0 && c->mpi.sendto) cell_tag(c); + cell_ensure_tagged(c); #endif /* Super-pointer for hydro */ @@ -3248,6 +3374,21 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) { if (ti_current < ti_old_part) error("Attempt to drift to the past"); #endif + /* Early abort? */ + if (c->hydro.count == 0) { + + /* Clear the drift flags. */ + c->hydro.do_drift = 0; + c->hydro.do_sub_drift = 0; + + /* Update the time of the last drift */ + c->hydro.ti_old_part = ti_current; + + return; + } + + /* Ok, we have some particles somewhere in the hierarchy to drift */ + /* Are we not in a leaf ? */ if (c->split && (force || c->hydro.do_sub_drift)) { @@ -3405,6 +3546,7 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { struct spart *const sparts = c->stars.parts; float dx_max = 0.f, dx2_max = 0.f; + float dx_max_sort = 0.0f, dx2_max_sort = 0.f; float cell_h_max = 0.f; /* Drift irrespective of cell flags? */ @@ -3418,6 +3560,21 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { if (ti_current < ti_old_gpart) error("Attempt to drift to the past"); #endif + /* Early abort? */ + if (c->grav.count == 0) { + + /* Clear the drift flags. */ + c->grav.do_drift = 0; + c->grav.do_sub_drift = 0; + + /* Update the time of the last drift */ + c->grav.ti_old_part = ti_current; + + return; + } + + /* Ok, we have some particles somewhere in the hierarchy to drift */ + /* Are we not in a leaf ? */ if (c->split && (force || c->grav.do_sub_drift)) { @@ -3431,6 +3588,7 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { /* Update */ dx_max = max(dx_max, cp->stars.dx_max_part); + dx_max_sort = max(dx_max_sort, cp->stars.dx_max_sort); cell_h_max = max(cell_h_max, cp->stars.h_max); } } @@ -3438,6 +3596,7 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { /* Store the values */ c->stars.h_max = cell_h_max; c->stars.dx_max_part = dx_max; + c->stars.dx_max_sort = dx_max_sort; /* Update the time of the last drift */ c->grav.ti_old_part = ti_current; @@ -3531,6 +3690,12 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { sp->x_diff[2] * sp->x_diff[2]; dx2_max = max(dx2_max, dx2); + const float dx2_sort = sp->x_diff_sort[0] * sp->x_diff_sort[0] + + sp->x_diff_sort[1] * sp->x_diff_sort[1] + + sp->x_diff_sort[2] * sp->x_diff_sort[2]; + + dx2_max_sort = max(dx2_max_sort, dx2_sort); + /* Maximal smoothing length */ cell_h_max = max(cell_h_max, sp->h); @@ -3538,10 +3703,12 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { /* Now, get the maximal particle motion from its square */ dx_max = sqrtf(dx2_max); + dx_max_sort = sqrtf(dx2_max_sort); /* Store the values */ c->stars.h_max = cell_h_max; c->stars.dx_max_part = dx_max; + c->stars.dx_max_sort = dx_max_sort; /* Update the time of the last drift */ c->grav.ti_old_part = ti_current; @@ -3630,7 +3797,8 @@ void cell_drift_multipole(struct cell *c, const struct engine *e) { void cell_check_timesteps(struct cell *c) { #ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.ti_end_min == 0 && c->grav.ti_end_min == 0 && c->nr_tasks > 0) + if (c->hydro.ti_end_min == 0 && c->grav.ti_end_min == 0 && + c->stars.ti_end_min == 0 && c->nr_tasks > 0) error("Cell without assigned time-step"); if (c->split) { diff --git a/src/cell.h b/src/cell.h index 11699a2ce506a1e3546b492b03e4095cbf63424d..73f49498353ab587df04e6eee13e2594632f7848 100644 --- a/src/cell.h +++ b/src/cell.h @@ -147,6 +147,9 @@ struct pcell { /*! Maximal smoothing length. */ double h_max; + /*! Minimal integer end-of-timestep in this cell for stars tasks */ + integertime_t ti_end_min; + } stars; /*! Maximal depth in that part of the tree */ @@ -198,6 +201,9 @@ struct pcell_step { /*! Maximal distance any #part has travelled since last rebuild */ float dx_max_part; + /*! Minimal integer end-of-timestep in this cell (stars) */ + integertime_t ti_end_min; + } stars; }; @@ -342,7 +348,7 @@ struct cell { /*! Do any of this cell's sub-cells need to be sorted? */ char do_sub_sort; -#ifdef SWIFT_DEBUG_CHECKS + #ifdef SWIFT_DEBUG_CHECKS /*! Last (integer) time the cell's sort arrays were updated. */ integertime_t ti_sort; @@ -661,7 +667,9 @@ void cell_activate_subcell_external_grav_tasks(struct cell *ci, struct scheduler *s); void cell_activate_drift_part(struct cell *c, struct scheduler *s); void cell_activate_drift_gpart(struct cell *c, struct scheduler *s); -void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s); +void cell_activate_drift_spart(struct cell *c, struct scheduler *s); +void cell_activate_hydro_sorts(struct cell *c, int sid, struct scheduler *s); +void cell_activate_stars_sorts(struct cell *c, int sid, struct scheduler *s); void cell_clear_drift_flags(struct cell *c, void *data); void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data); void cell_check_spart_pos(const struct cell *c, @@ -687,6 +695,68 @@ int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj, int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj, const struct engine *e, const struct space *s); +/** + * @brief Compute the square of the minimal distance between any two points in + * two cells of the same size + * + * @param ci The first #cell. + * @param cj The second #cell. + * @param periodic Are we using periodic BCs? + * @param dim The dimensions of the simulation volume + */ +__attribute__((always_inline)) INLINE static double cell_min_dist2_same_size( + const struct cell *restrict ci, const struct cell *restrict cj, + const int periodic, const double dim[3]) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->width[0] != cj->width[0]) error("Cells of different size!"); + if (ci->width[1] != cj->width[1]) error("Cells of different size!"); + if (ci->width[2] != cj->width[2]) error("Cells of different size!"); +#endif + + const double cix_min = ci->loc[0]; + const double ciy_min = ci->loc[1]; + const double ciz_min = ci->loc[2]; + const double cjx_min = cj->loc[0]; + const double cjy_min = cj->loc[1]; + const double cjz_min = cj->loc[2]; + + const double cix_max = ci->loc[0] + ci->width[0]; + const double ciy_max = ci->loc[1] + ci->width[1]; + const double ciz_max = ci->loc[2] + ci->width[2]; + const double cjx_max = cj->loc[0] + cj->width[0]; + const double cjy_max = cj->loc[1] + cj->width[1]; + const double cjz_max = cj->loc[2] + cj->width[2]; + + if (periodic) { + + const double dx = min4(fabs(nearest(cix_min - cjx_min, dim[0])), + fabs(nearest(cix_min - cjx_max, dim[0])), + fabs(nearest(cix_max - cjx_min, dim[0])), + fabs(nearest(cix_max - cjx_max, dim[0]))); + + const double dy = min4(fabs(nearest(ciy_min - cjy_min, dim[1])), + fabs(nearest(ciy_min - cjy_max, dim[1])), + fabs(nearest(ciy_max - cjy_min, dim[1])), + fabs(nearest(ciy_max - cjy_max, dim[1]))); + + const double dz = min4(fabs(nearest(ciz_min - cjz_min, dim[2])), + fabs(nearest(ciz_min - cjz_max, dim[2])), + fabs(nearest(ciz_max - cjz_min, dim[2])), + fabs(nearest(ciz_max - cjz_max, dim[2]))); + + return dx * dx + dy * dy + dz * dz; + + } else { + + const double dx = min(fabs(cix_max - cjx_min), fabs(cix_min - cjx_max)); + const double dy = min(fabs(ciy_max - cjy_min), fabs(ciy_min - cjy_max)); + const double dz = min(fabs(ciz_max - cjz_min), fabs(ciz_min - cjz_max)); + + return dx * dx + dy * dy + dz * dz; + } +} + /* Inlined functions (for speed). */ /** @@ -866,20 +936,23 @@ __attribute__((always_inline)) INLINE static int cell_need_rebuild_for_pair( } /** - * @brief Add a unique tag to a cell mostly for MPI communications. + * @brief Add a unique tag to a cell, mostly for MPI communications. + * + * This function locks the cell so that tags can be added concurrently. * * @param c The #cell to tag. */ -__attribute__((always_inline)) INLINE static void cell_tag(struct cell *c) { +__attribute__((always_inline)) INLINE static void cell_ensure_tagged( + struct cell *c) { #ifdef WITH_MPI -#ifdef SWIFT_DEBUG_CHECKS - if (c->mpi.tag > 0) error("setting tag for already tagged cell"); -#endif - + lock_lock(&c->hydro.lock); if (c->mpi.tag < 0 && (c->mpi.tag = atomic_inc(&cell_next_tag)) > cell_max_tag) error("Ran out of cell tags."); + if (lock_unlock(&c->hydro.lock) != 0) { + error("Failed to unlock cell."); + } #else error("SWIFT was not compiled with MPI enabled."); #endif // WITH_MPI diff --git a/src/drift.h b/src/drift.h index 351b15381dde9a95af908003b1c8df01b56610fa..a4bdf9be74aade4fe0f1349544cf472363c81c99 100644 --- a/src/drift.h +++ b/src/drift.h @@ -142,6 +142,7 @@ __attribute__((always_inline)) INLINE static void drift_spart( for (int k = 0; k < 3; k++) { const float dx = sp->v[k] * dt_drift; sp->x_diff[k] -= dx; + sp->x_diff_sort[k] -= dx; } } diff --git a/src/engine.c b/src/engine.c index f8682de0f967fc0f357ce6e0a123a7a955121734..472800b499d11e992a36c3816ac0d0829c4f588e 100644 --- a/src/engine.c +++ b/src/engine.c @@ -128,6 +128,7 @@ struct end_of_step_data { size_t inhibited, g_inhibited, s_inhibited; integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; + integertime_t ti_stars_end_min; struct engine *e; }; @@ -980,8 +981,7 @@ void engine_repartition(struct engine *e) { fflush(stdout); /* Check that all cells have been drifted to the current time */ - space_check_drift_point(e->s, e->ti_current, - e->policy & engine_policy_self_gravity); + space_check_drift_point(e->s, e->ti_current, /*check_multipoles=*/0); #endif /* Clear the repartition flag. */ @@ -1927,7 +1927,8 @@ int engine_estimate_nr_tasks(struct engine *e) { n1 += 2; } if (e->policy & engine_policy_stars) { - n1 += 2; + /* 1 self, 1 sort, 26/2 pairs */ + n1 += 15; } #if defined(WITH_LOGGER) n1 += 1; @@ -2120,7 +2121,7 @@ void engine_prepare(struct engine *e) { /* Unskip active tasks and check for rebuild */ if (!e->forcerebuild && !e->forcerepart && !e->restarting) engine_unskip(e); - const ticks tic2 = getticks(); + const ticks tic3 = getticks(); #ifdef WITH_MPI MPI_Allreduce(MPI_IN_PLACE, &e->forcerebuild, 1, MPI_INT, MPI_MAX, @@ -2129,13 +2130,13 @@ void engine_prepare(struct engine *e) { if (e->verbose) message("Communicating rebuild flag took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); + clocks_from_ticks(getticks() - tic3), clocks_getunit()); /* Do we need repartitioning ? */ if (e->forcerepart) { /* Let's start by drifting everybody to the current time */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); drifted_all = 1; /* And repartition */ @@ -2147,7 +2148,7 @@ void engine_prepare(struct engine *e) { if (e->forcerebuild) { /* Let's start by drifting everybody to the current time */ - if (!e->restarting && !drifted_all) engine_drift_all(e); + if (!e->restarting && !drifted_all) engine_drift_all(e, /*drift_mpole=*/0); /* And rebuild */ engine_rebuild(e, repartitioned, 0); @@ -2218,6 +2219,7 @@ void engine_collect_end_of_step_recurse(struct cell *c, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; /* Collect the values from the progeny. */ for (int k = 0; k < 8; k++) { @@ -2237,6 +2239,8 @@ void engine_collect_end_of_step_recurse(struct cell *c, ti_gravity_end_max = max(ti_gravity_end_max, cp->grav.ti_end_max); ti_gravity_beg_max = max(ti_gravity_beg_max, cp->grav.ti_beg_max); + ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); + updated += cp->hydro.updated; g_updated += cp->grav.updated; s_updated += cp->stars.updated; @@ -2259,6 +2263,7 @@ void engine_collect_end_of_step_recurse(struct cell *c, c->grav.ti_end_min = ti_gravity_end_min; c->grav.ti_end_max = ti_gravity_end_max; c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.ti_end_min = ti_stars_end_min; c->hydro.updated = updated; c->grav.updated = g_updated; c->stars.updated = s_updated; @@ -2293,6 +2298,7 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &s->cells_top[local_cells[ind]]; @@ -2313,6 +2319,9 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, ti_gravity_end_max = max(ti_gravity_end_max, c->grav.ti_end_max); ti_gravity_beg_max = max(ti_gravity_beg_max, c->grav.ti_beg_max); + if (c->stars.ti_end_min > e->ti_current) + ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min); + updated += c->hydro.updated; g_updated += c->grav.updated; s_updated += c->stars.updated; @@ -2351,6 +2360,9 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, max(ti_gravity_end_max, data->ti_gravity_end_max); data->ti_gravity_beg_max = max(ti_gravity_beg_max, data->ti_gravity_beg_max); + + if (ti_stars_end_min > e->ti_current) + data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min); } if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space"); @@ -2492,8 +2504,7 @@ void engine_print_stats(struct engine *e) { /* Check that all cells have been drifted to the current time. * That can include cells that have not * previously been active on this rank. */ - space_check_drift_point(e->s, e->ti_current, - e->policy & engine_policy_self_gravity); + space_check_drift_point(e->s, e->ti_current, /*chek_mpoles=*/0); /* Be verbose about this */ if (e->nodeID == 0) { @@ -2936,7 +2947,7 @@ void engine_step(struct engine *e) { e->step_props = engine_step_prop_none; /* When restarting, move everyone to the current time. */ - if (e->restarting) engine_drift_all(e); + if (e->restarting) engine_drift_all(e, /*drift_mpole=*/1); /* Get the physical value of the time and time-step size */ if (e->policy & engine_policy_cosmology) { @@ -2975,7 +2986,7 @@ void engine_step(struct engine *e) { /* Are we drifting everything (a la Gadget/GIZMO) ? */ if (e->policy & engine_policy_drift_all && !e->forcerebuild) - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/1); /* Are we reconstructing the multipoles or drifting them ?*/ if ((e->policy & engine_policy_self_gravity) && !e->forcerebuild) { @@ -3119,7 +3130,7 @@ void engine_check_for_dumps(struct engine *e) { } /* Drift everyone */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); /* Dump everything */ engine_print_stats(e); @@ -3143,7 +3154,7 @@ void engine_check_for_dumps(struct engine *e) { } /* Drift everyone */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); /* Dump stats */ engine_print_stats(e); @@ -3155,7 +3166,7 @@ void engine_check_for_dumps(struct engine *e) { e->time = e->ti_next_snapshot * e->time_base + e->time_begin; /* Drift everyone */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); /* Dump snapshot */ #ifdef WITH_LOGGER @@ -3178,7 +3189,7 @@ void engine_check_for_dumps(struct engine *e) { } /* Drift everyone */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); /* Dump snapshot */ #ifdef WITH_LOGGER @@ -3195,7 +3206,7 @@ void engine_check_for_dumps(struct engine *e) { e->time = e->ti_next_stats * e->time_base + e->time_begin; /* Drift everyone */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); /* Dump stats */ engine_print_stats(e); @@ -3218,7 +3229,7 @@ void engine_check_for_dumps(struct engine *e) { } /* Drift everyone */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); /* Dump... */ #ifdef WITH_LOGGER @@ -3244,7 +3255,7 @@ void engine_check_for_dumps(struct engine *e) { } /* Drift everyone */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); /* Dump */ engine_print_stats(e); @@ -3272,7 +3283,7 @@ void engine_check_for_dumps(struct engine *e) { } /* Drift everyone */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); } velociraptor_init(e); @@ -3341,7 +3352,7 @@ void engine_dump_restarts(struct engine *e, int drifted_all, int force) { restart_remove_previous(e->restart_file); /* Drift all particles first (may have just been done). */ - if (!drifted_all) engine_drift_all(e); + if (!drifted_all) engine_drift_all(e, /*drift_mpole=*/1); restart_write(e, e->restart_file); if (e->verbose) @@ -3412,160 +3423,6 @@ void engine_unskip(struct engine *e) { clocks_getunit()); } -/** - * @brief Mapper function to drift *all* particle types and multipoles forward - * in time. - * - * @param map_data An array of #cell%s. - * @param num_elements Chunk size. - * @param extra_data Pointer to an #engine. - */ -void engine_do_drift_all_mapper(void *map_data, int num_elements, - void *extra_data) { - - const struct engine *e = (const struct engine *)extra_data; - const int restarting = e->restarting; - struct space *s = e->s; - struct cell *cells_top; - int *local_cells_with_tasks_top; - - if (restarting) { - - /* When restarting, we loop over all top-level cells */ - cells_top = (struct cell *)map_data; - local_cells_with_tasks_top = NULL; - - } else { - - /* In any other case, we use th list of local cells with tasks */ - cells_top = s->cells_top; - local_cells_with_tasks_top = (int *)map_data; - } - - for (int ind = 0; ind < num_elements; ind++) { - - struct cell *c; - - /* When restarting, the list of local cells with tasks does not - yet exist. We use the raw list of top-level cells instead */ - if (restarting) - c = &cells_top[ind]; - else - c = &cells_top[local_cells_with_tasks_top[ind]]; - - if (c->nodeID == e->nodeID) { - - /* Drift all the particles */ - cell_drift_part(c, e, 1); - - /* Drift all the g-particles */ - cell_drift_gpart(c, e, 1); - } - - /* Drift the multipoles */ - if (e->policy & engine_policy_self_gravity) { - cell_drift_all_multipoles(c, e); - } - } -} - -/** - * @brief Drift *all* particles and multipoles at all levels - * forward to the current time. - * - * @param e The #engine. - */ -void engine_drift_all(struct engine *e) { - - const ticks tic = getticks(); - -#ifdef SWIFT_DEBUG_CHECKS - if (e->nodeID == 0) { - if (e->policy & engine_policy_cosmology) - message("Drifting all to a=%e", - exp(e->ti_current * e->time_base) * e->cosmology->a_begin); - else - message("Drifting all to t=%e", - e->ti_current * e->time_base + e->time_begin); - } -#endif - - if (!e->restarting) { - - /* Normal case: We have a list of local cells with tasks to play with */ - threadpool_map(&e->threadpool, engine_do_drift_all_mapper, - e->s->local_cells_with_tasks_top, - e->s->nr_local_cells_with_tasks, sizeof(int), 0, e); - } else { - - /* When restarting, the list of local cells with tasks does not yet - exist. We use the raw list of top-level cells instead */ - threadpool_map(&e->threadpool, engine_do_drift_all_mapper, e->s->cells_top, - e->s->nr_cells, sizeof(struct cell), 0, e); - } - - /* Synchronize particle positions */ - space_synchronize_particle_positions(e->s); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that all cells have been drifted to the current time. */ - space_check_drift_point(e->s, e->ti_current, - e->policy & engine_policy_self_gravity); - part_verify_links(e->s->parts, e->s->gparts, e->s->sparts, e->s->nr_parts, - e->s->nr_gparts, e->s->nr_sparts, e->verbose); -#endif - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - -/** - * @brief Mapper function to drift *all* top-level multipoles forward in - * time. - * - * @param map_data An array of #cell%s. - * @param num_elements Chunk size. - * @param extra_data Pointer to an #engine. - */ -void engine_do_drift_top_multipoles_mapper(void *map_data, int num_elements, - void *extra_data) { - - struct engine *e = (struct engine *)extra_data; - struct cell *cells = (struct cell *)map_data; - - for (int ind = 0; ind < num_elements; ind++) { - struct cell *c = &cells[ind]; - if (c != NULL) { - - /* Drift the multipole at this level only */ - if (c->grav.ti_old_multipole != e->ti_current) cell_drift_multipole(c, e); - } - } -} - -/** - * @brief Drift *all* top-level multipoles forward to the current time. - * - * @param e The #engine. - */ -void engine_drift_top_multipoles(struct engine *e) { - - const ticks tic = getticks(); - - threadpool_map(&e->threadpool, engine_do_drift_top_multipoles_mapper, - e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 0, e); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that all cells have been drifted to the current time. */ - space_check_top_multipoles_drift_point(e->s, e->ti_current); -#endif - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - void engine_do_reconstruct_multipoles_mapper(void *map_data, int num_elements, void *extra_data) { @@ -3641,13 +3498,20 @@ void engine_makeproxies(struct engine *e) { const int with_gravity = (e->policy & engine_policy_self_gravity); const double theta_crit_inv = e->gravity_properties->theta_crit_inv; const double theta_crit2 = e->gravity_properties->theta_crit2; - const double max_distance = e->mesh->r_cut_max; + const double max_mesh_dist = e->mesh->r_cut_max; + const double max_mesh_dist2 = max_mesh_dist * max_mesh_dist; + + /* Distance between centre of the cell and corners */ + const double r_diag2 = cell_width[0] * cell_width[0] + + cell_width[1] * cell_width[1] + + cell_width[2] * cell_width[2]; + const double r_diag = 0.5 * sqrt(r_diag2); + + /* Maximal distance from a shifted CoM to centre of cell */ + const double delta_CoM = engine_max_proxy_centre_frac * r_diag; - /* Maximal distance between CoMs and any particle in the cell */ - const double r_max2 = cell_width[0] * cell_width[0] + - cell_width[1] * cell_width[1] + - cell_width[2] * cell_width[2]; - const double r_max = sqrt(r_max2); + /* Maximal distance from shifted CoM to any corner */ + const double r_max = r_diag + 2. * delta_CoM; /* Prepare the proxies and the proxy index. */ if (e->proxy_ind == NULL) @@ -3657,20 +3521,20 @@ void engine_makeproxies(struct engine *e) { e->nr_proxies = 0; /* Compute how many cells away we need to walk */ - int delta = 1; /*hydro case */ + int delta_cells = 1; /*hydro case */ /* Gravity needs to take the opening angle into account */ if (with_gravity) { const double distance = 2. * r_max * theta_crit_inv; - delta = (int)(distance / cells[0].dmin) + 1; + delta_cells = (int)(distance / cells[0].dmin) + 1; } /* Turn this into upper and lower bounds for loops */ - int delta_m = delta; - int delta_p = delta; + int delta_m = delta_cells; + int delta_p = delta_cells; /* Special case where every cell is in range of every other one */ - if (delta >= cdim[0] / 2) { + if (delta_cells >= cdim[0] / 2) { if (cdim[0] % 2 == 0) { delta_m = cdim[0] / 2; delta_p = cdim[0] / 2 - 1; @@ -3685,46 +3549,35 @@ void engine_makeproxies(struct engine *e) { message( "Looking for proxies up to %d top-level cells away (delta_m=%d " "delta_p=%d)", - delta, delta_m, delta_p); + delta_cells, delta_m, delta_p); /* Loop over each cell in the space. */ - int ind[3]; - for (ind[0] = 0; ind[0] < cdim[0]; ind[0]++) { - for (ind[1] = 0; ind[1] < cdim[1]; ind[1]++) { - for (ind[2] = 0; ind[2] < cdim[2]; ind[2]++) { + for (int i = 0; i < cdim[0]; i++) { + for (int j = 0; j < cdim[1]; j++) { + for (int k = 0; k < cdim[2]; k++) { /* Get the cell ID. */ - const int cid = cell_getid(cdim, ind[0], ind[1], ind[2]); - - /* and it's location */ - const double loc_i[3] = {cells[cid].loc[0], cells[cid].loc[1], - cells[cid].loc[2]}; - - /* Loop over all its neighbours (periodic). */ - for (int i = -delta_m; i <= delta_p; i++) { - int ii = ind[0] + i; - if (ii >= cdim[0]) - ii -= cdim[0]; - else if (ii < 0) - ii += cdim[0]; - for (int j = -delta_m; j <= delta_p; j++) { - int jj = ind[1] + j; - if (jj >= cdim[1]) - jj -= cdim[1]; - else if (jj < 0) - jj += cdim[1]; - for (int k = -delta_m; k <= delta_p; k++) { - int kk = ind[2] + k; - if (kk >= cdim[2]) - kk -= cdim[2]; - else if (kk < 0) - kk += cdim[2]; + const int cid = cell_getid(cdim, i, j, k); + + /* Loop over all its neighbours neighbours in range. */ + for (int ii = -delta_m; ii <= delta_p; ii++) { + int iii = i + ii; + if (!periodic && (iii < 0 || iii >= cdim[0])) continue; + iii = (iii + cdim[0]) % cdim[0]; + for (int jj = -delta_m; jj <= delta_p; jj++) { + int jjj = j + jj; + if (!periodic && (jjj < 0 || jjj >= cdim[1])) continue; + jjj = (jjj + cdim[1]) % cdim[1]; + for (int kk = -delta_m; kk <= delta_p; kk++) { + int kkk = k + kk; + if (!periodic && (kkk < 0 || kkk >= cdim[2])) continue; + kkk = (kkk + cdim[2]) % cdim[2]; /* Get the cell ID. */ - const int cjd = cell_getid(cdim, ii, jj, kk); + const int cjd = cell_getid(cdim, iii, jjj, kkk); - /* Early abort (same cell) */ - if (cid == cjd) continue; + /* Early abort */ + if (cid >= cjd) continue; /* Early abort (both same node) */ if (cells[cid].nodeID == nodeID && cells[cjd].nodeID == nodeID) @@ -3744,15 +3597,12 @@ void engine_makeproxies(struct engine *e) { /* This is super-ugly but checks for direct neighbours */ /* with periodic BC */ - if (((abs(ind[0] - ii) <= 1 || - abs(ind[0] - ii - cdim[0]) <= 1 || - abs(ind[0] - ii + cdim[0]) <= 1) && - (abs(ind[1] - jj) <= 1 || - abs(ind[1] - jj - cdim[1]) <= 1 || - abs(ind[1] - jj + cdim[1]) <= 1) && - (abs(ind[2] - kk) <= 1 || - abs(ind[2] - kk - cdim[2]) <= 1 || - abs(ind[2] - kk + cdim[2]) <= 1))) + if (((abs(i - iii) <= 1 || abs(i - iii - cdim[0]) <= 1 || + abs(i - iii + cdim[0]) <= 1) && + (abs(j - jjj) <= 1 || abs(j - jjj - cdim[1]) <= 1 || + abs(j - jjj + cdim[1]) <= 1) && + (abs(k - kkk) <= 1 || abs(k - kkk - cdim[2]) <= 1 || + abs(k - kkk + cdim[2]) <= 1))) proxy_type |= (int)proxy_cell_type_hydro; } @@ -3766,44 +3616,28 @@ void engine_makeproxies(struct engine *e) { for an M2L interaction and hence require a proxy as this pair of cells cannot rely on just an M2L calculation. */ - const double loc_j[3] = {cells[cjd].loc[0], cells[cjd].loc[1], - cells[cjd].loc[2]}; - - /* Start with the distance between the cell centres. */ - double dx = loc_i[0] - loc_j[0]; - double dy = loc_i[1] - loc_j[1]; - double dz = loc_i[2] - loc_j[2]; - - /* Apply BC */ - if (periodic) { - dx = nearest(dx, dim[0]); - dy = nearest(dy, dim[1]); - dz = nearest(dz, dim[2]); - } - - /* Add to it for the case where the future CoMs are in the - * corners */ - dx += cell_width[0]; - dy += cell_width[1]; - dz += cell_width[2]; + /* Minimal distance between any two points in the cells */ + const double min_dist_centres2 = cell_min_dist2_same_size( + &cells[cid], &cells[cjd], periodic, dim); - /* This is a crazy upper-bound but the best we can do */ - const double r2 = dx * dx + dy * dy + dz * dz; - - /* Minimal distance between any pair of particles */ - const double min_radius = sqrt(r2) - 2. * r_max; + /* Let's now assume the CoMs will shift a bit */ + const double min_dist_CoM = + sqrt(min_dist_centres2) - 2. * delta_CoM; + const double min_dist_CoM2 = min_dist_CoM * min_dist_CoM; /* Are we beyond the distance where the truncated forces are 0 * but not too far such that M2L can be used? */ if (periodic) { - if ((min_radius < max_distance) && - (!gravity_M2L_accept(r_max, r_max, theta_crit2, r2))) + if ((min_dist_CoM2 < max_mesh_dist2) && + (!gravity_M2L_accept(r_max, r_max, theta_crit2, + min_dist_CoM2))) proxy_type |= (int)proxy_cell_type_gravity; } else { - if (!gravity_M2L_accept(r_max, r_max, theta_crit2, r2)) + if (!gravity_M2L_accept(r_max, r_max, theta_crit2, + min_dist_CoM2)) proxy_type |= (int)proxy_cell_type_gravity; } } @@ -3815,8 +3649,8 @@ void engine_makeproxies(struct engine *e) { if (cells[cid].nodeID == nodeID && cells[cjd].nodeID != nodeID) { /* Do we already have a relationship with this node? */ - int pid = e->proxy_ind[cells[cjd].nodeID]; - if (pid < 0) { + int proxy_id = e->proxy_ind[cells[cjd].nodeID]; + if (proxy_id < 0) { if (e->nr_proxies == engine_maxproxies) error("Maximum number of proxies exceeded."); @@ -3826,24 +3660,31 @@ void engine_makeproxies(struct engine *e) { /* Store the information */ e->proxy_ind[cells[cjd].nodeID] = e->nr_proxies; - pid = e->nr_proxies; + proxy_id = e->nr_proxies; e->nr_proxies += 1; + + /* Check the maximal proxy limit */ + if ((size_t)proxy_id > 8 * sizeof(long long)) + error( + "Created more than %zd proxies. cell.mpi.sendto will " + "overflow.", + 8 * sizeof(long long)); } /* Add the cell to the proxy */ - proxy_addcell_in(&proxies[pid], &cells[cjd], proxy_type); - proxy_addcell_out(&proxies[pid], &cells[cid], proxy_type); + proxy_addcell_in(&proxies[proxy_id], &cells[cjd], proxy_type); + proxy_addcell_out(&proxies[proxy_id], &cells[cid], proxy_type); /* Store info about where to send the cell */ - cells[cid].mpi.sendto |= (1ULL << pid); + cells[cid].mpi.sendto |= (1ULL << proxy_id); } /* Same for the symmetric case? */ if (cells[cjd].nodeID == nodeID && cells[cid].nodeID != nodeID) { /* Do we already have a relationship with this node? */ - int pid = e->proxy_ind[cells[cid].nodeID]; - if (pid < 0) { + int proxy_id = e->proxy_ind[cells[cid].nodeID]; + if (proxy_id < 0) { if (e->nr_proxies == engine_maxproxies) error("Maximum number of proxies exceeded."); @@ -3853,16 +3694,23 @@ void engine_makeproxies(struct engine *e) { /* Store the information */ e->proxy_ind[cells[cid].nodeID] = e->nr_proxies; - pid = e->nr_proxies; + proxy_id = e->nr_proxies; e->nr_proxies += 1; + + /* Check the maximal proxy limit */ + if ((size_t)proxy_id > 8 * sizeof(long long)) + error( + "Created more than %zd proxies. cell.mpi.sendto will " + "overflow.", + 8 * sizeof(long long)); } /* Add the cell to the proxy */ - proxy_addcell_in(&proxies[pid], &cells[cid], proxy_type); - proxy_addcell_out(&proxies[pid], &cells[cjd], proxy_type); + proxy_addcell_in(&proxies[proxy_id], &cells[cid], proxy_type); + proxy_addcell_out(&proxies[proxy_id], &cells[cjd], proxy_type); /* Store info about where to send the cell */ - cells[cjd].mpi.sendto |= (1ULL << pid); + cells[cjd].mpi.sendto |= (1ULL << proxy_id); } } } @@ -3994,8 +3842,7 @@ void engine_dump_snapshot(struct engine *e) { /* Check that all cells have been drifted to the current time. * That can include cells that have not * previously been active on this rank. */ - space_check_drift_point(e->s, e->ti_current, - e->policy & engine_policy_self_gravity); + space_check_drift_point(e->s, e->ti_current, /* check_mpole=*/0); /* Be verbose about this */ if (e->nodeID == 0) { diff --git a/src/engine.h b/src/engine.h index 50eef4b314da4c294a86e59e29542859f7f402f2..eb73dc32d0dd885424335ad598ec93c866a6ccda 100644 --- a/src/engine.h +++ b/src/engine.h @@ -98,6 +98,7 @@ enum engine_step_properties { #define engine_maxproxies 64 #define engine_tasksreweight 1 #define engine_parts_size_grow 1.05 +#define engine_max_proxy_centre_frac 0.2 #define engine_redistribute_alloc_margin 1.2 #define engine_default_energy_file_name "energy" #define engine_default_timesteps_file_name "timesteps" @@ -399,7 +400,7 @@ void engine_compute_next_stf_time(struct engine *e); void engine_compute_next_statistics_time(struct engine *e); void engine_recompute_displacement_constraint(struct engine *e); void engine_unskip(struct engine *e); -void engine_drift_all(struct engine *e); +void engine_drift_all(struct engine *e, const int drift_mpoles); void engine_drift_top_multipoles(struct engine *e); void engine_reconstruct_multipoles(struct engine *e); void engine_print_stats(struct engine *e); diff --git a/src/engine_drift.c b/src/engine_drift.c new file mode 100644 index 0000000000000000000000000000000000000000..7a842068b57813575c33dd670172059abb1e8fc0 --- /dev/null +++ b/src/engine_drift.c @@ -0,0 +1,297 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * Angus Lepper (angus.lepper@ed.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "engine.h" + +/** + * @brief Mapper function to drift *all* the #part to the current time. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an #engine. + */ +void engine_do_drift_all_part_mapper(void *map_data, int num_elements, + void *extra_data) { + + const struct engine *e = (const struct engine *)extra_data; + const int restarting = e->restarting; + struct space *s = e->s; + struct cell *cells_top; + int *local_cells_top; + + if (restarting) { + + /* When restarting, we loop over all top-level cells */ + cells_top = (struct cell *)map_data; + local_cells_top = NULL; + + } else { + + /* In any other case, we use the list of local cells with tasks */ + cells_top = s->cells_top; + local_cells_top = (int *)map_data; + } + + for (int ind = 0; ind < num_elements; ind++) { + + struct cell *c; + + /* When restarting, the list of local cells does not + yet exist. We use the raw list of top-level cells instead */ + if (restarting) + c = &cells_top[ind]; + else + c = &cells_top[local_cells_top[ind]]; + + if (c->nodeID == e->nodeID) { + + /* Drift all the particles */ + cell_drift_part(c, e, /* force the drift=*/1); + } + } +} + +/** + * @brief Mapper function to drift *all* the #gpart to the current time. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an #engine. + */ +void engine_do_drift_all_gpart_mapper(void *map_data, int num_elements, + void *extra_data) { + + const struct engine *e = (const struct engine *)extra_data; + const int restarting = e->restarting; + struct space *s = e->s; + struct cell *cells_top; + int *local_cells_top; + + if (restarting) { + + /* When restarting, we loop over all top-level cells */ + cells_top = (struct cell *)map_data; + local_cells_top = NULL; + + } else { + + /* In any other case, we use the list of local cells with tasks */ + cells_top = s->cells_top; + local_cells_top = (int *)map_data; + } + + for (int ind = 0; ind < num_elements; ind++) { + + struct cell *c; + + /* When restarting, the list of local cells does not + yet exist. We use the raw list of top-level cells instead */ + if (restarting) + c = &cells_top[ind]; + else + c = &cells_top[local_cells_top[ind]]; + + if (c->nodeID == e->nodeID) { + + /* Drift all the particles */ + cell_drift_gpart(c, e, /* force the drift=*/1); + } + } +} + +/** + * @brief Mapper function to drift *all* the multipoles to the current time. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an #engine. + */ +void engine_do_drift_all_multipole_mapper(void *map_data, int num_elements, + void *extra_data) { + + const struct engine *e = (const struct engine *)extra_data; + const int restarting = e->restarting; + struct space *s = e->s; + struct cell *cells_top; + int *local_cells_with_tasks_top; + + if (restarting) { + + /* When restarting, we loop over all top-level cells */ + cells_top = (struct cell *)map_data; + local_cells_with_tasks_top = NULL; + + } else { + + /* In any other case, we use the list of local cells with tasks */ + cells_top = s->cells_top; + local_cells_with_tasks_top = (int *)map_data; + } + + for (int ind = 0; ind < num_elements; ind++) { + + struct cell *c; + + /* When restarting, the list of local cells does not + yet exist. We use the raw list of top-level cells instead */ + if (restarting) + c = &cells_top[ind]; + else + c = &cells_top[local_cells_with_tasks_top[ind]]; + + cell_drift_all_multipoles(c, e); + } +} + +/** + * @brief Drift *all* particles and multipoles at all levels + * forward to the current time. + * + * @param e The #engine. + * @param drift_mpoles Do we want to drift all the multipoles as well? + */ +void engine_drift_all(struct engine *e, const int drift_mpoles) { + + const ticks tic = getticks(); + +#ifdef SWIFT_DEBUG_CHECKS + if (e->nodeID == 0) { + if (e->policy & engine_policy_cosmology) + message("Drifting all to a=%e", + exp(e->ti_current * e->time_base) * e->cosmology->a_begin); + else + message("Drifting all to t=%e", + e->ti_current * e->time_base + e->time_begin); + } +#endif + + if (!e->restarting) { + + /* Normal case: We have a list of local cells with tasks to play with */ + + if (e->s->nr_parts > 0) { + threadpool_map(&e->threadpool, engine_do_drift_all_part_mapper, + e->s->local_cells_top, e->s->nr_local_cells, sizeof(int), + /* default chunk */ 0, e); + } + if (e->s->nr_gparts > 0) { + threadpool_map(&e->threadpool, engine_do_drift_all_gpart_mapper, + e->s->local_cells_top, e->s->nr_local_cells, sizeof(int), + /* default chunk */ 0, e); + } + if (drift_mpoles && (e->policy & engine_policy_self_gravity)) { + threadpool_map(&e->threadpool, engine_do_drift_all_multipole_mapper, + e->s->local_cells_with_tasks_top, + e->s->nr_local_cells_with_tasks, sizeof(int), + /* default chunk */ 0, e); + } + + } else { + + /* When restarting, the list of local cells with tasks does not yet + exist. We use the raw list of top-level cells instead */ + + if (e->s->nr_parts > 0) { + threadpool_map(&e->threadpool, engine_do_drift_all_part_mapper, + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), + /* default chunk */ 0, e); + } + if (e->s->nr_gparts > 0) { + threadpool_map(&e->threadpool, engine_do_drift_all_gpart_mapper, + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), + /* default chunk */ 0, e); + } + if (e->policy & engine_policy_self_gravity) { + threadpool_map(&e->threadpool, engine_do_drift_all_multipole_mapper, + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), + /* default chunk */ 0, e); + } + } + + /* Synchronize particle positions */ + space_synchronize_particle_positions(e->s); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that all cells have been drifted to the current time. */ + space_check_drift_point( + e->s, e->ti_current, + drift_mpoles && (e->policy & engine_policy_self_gravity)); + part_verify_links(e->s->parts, e->s->gparts, e->s->sparts, e->s->nr_parts, + e->s->nr_gparts, e->s->nr_sparts, e->verbose); +#endif + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +/** + * @brief Mapper function to drift *all* top-level multipoles forward in + * time. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an #engine. + */ +void engine_do_drift_top_multipoles_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct cell *cells = (struct cell *)map_data; + + for (int ind = 0; ind < num_elements; ind++) { + struct cell *c = &cells[ind]; + if (c != NULL) { + + /* Drift the multipole at this level only */ + if (c->grav.ti_old_multipole != e->ti_current) cell_drift_multipole(c, e); + } + } +} + +/** + * @brief Drift *all* top-level multipoles forward to the current time. + * + * @param e The #engine. + */ +void engine_drift_top_multipoles(struct engine *e) { + + const ticks tic = getticks(); + + threadpool_map(&e->threadpool, engine_do_drift_top_multipoles_mapper, + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 0, e); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that all cells have been drifted to the current time. */ + space_check_top_multipoles_drift_point(e->s, e->ti_current); +#endif + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index 54a73513935cbd73db7362711bd992e669cbe7ca..68841aa5999441e6a2621f867038a44e9f52794c 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -80,8 +80,8 @@ void engine_addtasks_send_gravity(struct engine *e, struct cell *ci, /* Create the tasks and their dependencies? */ if (t_grav == NULL) { - /* Create a tag for this cell. */ - if (ci->mpi.tag < 0) cell_tag(ci); + /* Make sure this cell is tagged. */ + cell_ensure_tagged(ci); t_grav = scheduler_addtask(s, task_type_send, task_subtype_gpart, ci->mpi.tag, 0, ci, cj); @@ -139,8 +139,8 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, /* Create the tasks and their dependencies? */ if (t_xv == NULL) { - /* Create a tag for this cell. */ - if (ci->mpi.tag < 0) cell_tag(ci); + /* Make sure this cell is tagged. */ + cell_ensure_tagged(ci); t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, ci->mpi.tag, 0, ci, cj); @@ -238,8 +238,8 @@ void engine_addtasks_send_timestep(struct engine *e, struct cell *ci, /* Create the tasks and their dependencies? */ if (t_ti == NULL) { - /* Create a tag for this cell. */ - if (ci->mpi.tag < 0) cell_tag(ci); + /* Make sure this cell is tagged. */ + cell_ensure_tagged(ci); t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, ci->mpi.tag, 0, ci, cj); @@ -733,6 +733,10 @@ void engine_make_hierarchical_tasks_stars(struct engine *e, struct cell *c) { /* Are we in a super-cell ? */ if (c->super == c) { + /* Add the sort task. */ + c->stars.sorts = scheduler_addtask(s, task_type_stars_sort, + task_subtype_none, 0, 0, c, NULL); + /* Local tasks only... */ if (c->nodeID == e->nodeID) { @@ -766,7 +770,7 @@ void engine_make_hierarchical_tasks_stars(struct engine *e, struct cell *c) { void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, void *extra_data) { - struct engine *e = ((struct engine **)extra_data)[0]; + struct engine *e = (struct engine *)extra_data; struct space *s = e->s; struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; @@ -776,6 +780,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, struct cell *cells = s->cells_top; const double theta_crit = e->gravity_properties->theta_crit; const double max_distance = e->mesh->r_cut_max; + const double max_distance2 = max_distance * max_distance; /* Compute how many cells away we need to walk */ const double distance = 2.5 * cells[0].width[0] / theta_crit; @@ -811,91 +816,50 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, /* Skip cells without gravity particles */ if (ci->grav.count == 0) continue; - /* Is that cell local ? */ - if (ci->nodeID != nodeID) continue; - - /* If the cells is local build a self-interaction */ - scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, NULL); - - /* Recover the multipole information */ - const struct gravity_tensors *const multi_i = ci->grav.multipole; - const double CoM_i[3] = {multi_i->CoM[0], multi_i->CoM[1], multi_i->CoM[2]}; - -#ifdef SWIFT_DEBUG_CHECKS - if (cell_getid(cdim, i, j, k) != cid) - error("Incorrect calculation of indices (i,j,k)=(%d,%d,%d) cid=%d", i, j, - k, cid); - - if (multi_i->r_max != multi_i->r_max_rebuild) - error( - "Multipole size not equal ot it's size after rebuild. But we just " - "rebuilt..."); -#endif + /* If the cell is local build a self-interaction */ + if (ci->nodeID == nodeID) { + scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, + NULL); + } /* Loop over every other cell within (Manhattan) range delta */ - for (int x = -delta_m; x <= delta_p; x++) { - int ii = i + x; - if (ii >= cdim[0]) - ii -= cdim[0]; - else if (ii < 0) - ii += cdim[0]; - for (int y = -delta_m; y <= delta_p; y++) { - int jj = j + y; - if (jj >= cdim[1]) - jj -= cdim[1]; - else if (jj < 0) - jj += cdim[1]; - for (int z = -delta_m; z <= delta_p; z++) { - int kk = k + z; - if (kk >= cdim[2]) - kk -= cdim[2]; - else if (kk < 0) - kk += cdim[2]; + for (int ii = -delta_m; ii <= delta_p; ii++) { + int iii = i + ii; + if (!periodic && (iii < 0 || iii >= cdim[0])) continue; + iii = (iii + cdim[0]) % cdim[0]; + for (int jj = -delta_m; jj <= delta_p; jj++) { + int jjj = j + jj; + if (!periodic && (jjj < 0 || jjj >= cdim[1])) continue; + jjj = (jjj + cdim[1]) % cdim[1]; + for (int kk = -delta_m; kk <= delta_p; kk++) { + int kkk = k + kk; + if (!periodic && (kkk < 0 || kkk >= cdim[2])) continue; + kkk = (kkk + cdim[2]) % cdim[2]; /* Get the cell */ - const int cjd = cell_getid(cdim, ii, jj, kk); + const int cjd = cell_getid(cdim, iii, jjj, kkk); struct cell *cj = &cells[cjd]; -#ifdef SWIFT_DEBUG_CHECKS - const int iii = cjd / (cdim[1] * cdim[2]); - const int jjj = (cjd / cdim[2]) % cdim[1]; - const int kkk = cjd % cdim[2]; - - if (ii != iii || jj != jjj || kk != kkk) - error( - "Incorrect calculation of indices (iii,jjj,kkk)=(%d,%d,%d) " - "cjd=%d", - iii, jjj, kkk, cjd); -#endif - - /* Avoid duplicates of local pairs*/ - if (cid <= cjd && cj->nodeID == nodeID) continue; - - /* Skip cells without gravity particles */ - if (cj->grav.count == 0) continue; + /* Avoid duplicates, empty cells and completely foreign pairs */ + if (cid >= cjd || cj->grav.count == 0 || + (ci->nodeID != nodeID && cj->nodeID != nodeID)) + continue; /* Recover the multipole information */ - const struct gravity_tensors *const multi_j = cj->grav.multipole; - - /* Get the distance between the CoMs */ - double dx = CoM_i[0] - multi_j->CoM[0]; - double dy = CoM_i[1] - multi_j->CoM[1]; - double dz = CoM_i[2] - multi_j->CoM[2]; - - /* Apply BC */ - if (periodic) { - dx = nearest(dx, dim[0]); - dy = nearest(dy, dim[1]); - dz = nearest(dz, dim[2]); - } - const double r2 = dx * dx + dy * dy + dz * dz; + const struct gravity_tensors *multi_i = ci->grav.multipole; + const struct gravity_tensors *multi_j = cj->grav.multipole; + + if (multi_i == NULL && ci->nodeID != nodeID) + error("Multipole of ci was not exchanged properly via the proxies"); + if (multi_j == NULL && cj->nodeID != nodeID) + error("Multipole of cj was not exchanged properly via the proxies"); /* Minimal distance between any pair of particles */ - const double min_radius = - sqrt(r2) - (multi_i->r_max + multi_j->r_max); + const double min_radius2 = + cell_min_dist2_same_size(ci, cj, periodic, dim); /* Are we beyond the distance where the truncated forces are 0 ?*/ - if (periodic && min_radius > max_distance) continue; + if (periodic && min_radius2 > max_distance2) continue; /* Are the cells too close for a MM interaction ? */ if (!cell_can_use_pair_mm_rebuild(ci, cj, e, s)) { @@ -903,6 +867,54 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, /* Ok, we need to add a direct pair calculation */ scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, ci, cj); + +#ifdef SWIFT_DEBUG_CHECKS +#ifdef WITH_MPI + + /* Let's cross-check that we had a proxy for that cell */ + if (ci->nodeID == nodeID && cj->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[cj->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", cj->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (; n < p->nr_cells_in; n++) + if (p->cells_in[n] == cj) { + break; + } + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "grav task!", + cjd); + } else if (cj->nodeID == nodeID && ci->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[ci->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", ci->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (; n < p->nr_cells_in; n++) + if (p->cells_in[n] == ci) { + break; + } + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "grav task!", + cid); + } +#endif /* WITH_MPI */ +#endif /* SWIFT_DEBUG_CHECKS */ } } } @@ -932,26 +944,6 @@ void engine_make_hierarchical_tasks_mapper(void *map_data, int num_elements, } } -/** - * @brief Constructs the top-level tasks for the short-range gravity - * interactions (master function). - * - * - Create the FFT task and the array of gravity ghosts. - * - Call the mapper function to create the other tasks. - * - * @param e The #engine. - */ -void engine_make_self_gravity_tasks(struct engine *e) { - - struct space *s = e->s; - struct task **ghosts = NULL; - - /* Create the multipole self and pair tasks. */ - void *extra_data[2] = {e, ghosts}; - threadpool_map(&e->threadpool, engine_make_self_gravity_tasks_mapper, NULL, - s->nr_cells, 1, 0, extra_data); -} - /** * @brief Constructs the top-level tasks for the external gravity. * @@ -1010,6 +1002,14 @@ void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, scheduler_addunlock(sched, t, finger->hydro.sorts); } + /* Link stars sort tasks to all the higher sort task. */ + if (t_type == task_type_stars_sort) { + for (struct cell *finger = t->ci->parent; finger != NULL; + finger = finger->parent) + if (finger->stars.sorts != NULL) + scheduler_addunlock(sched, t, finger->stars.sorts); + } + /* Link self tasks to cells. */ else if (t_type == task_type_self) { atomic_inc(&ci->nr_tasks); @@ -1115,9 +1115,19 @@ void engine_link_gravity_tasks(struct engine *e) { const enum task_types t_type = t->type; const enum task_subtypes t_subtype = t->subtype; - struct cell *ci_parent = (ci->parent != NULL) ? ci->parent : ci; - struct cell *cj_parent = - (cj != NULL && cj->parent != NULL) ? cj->parent : cj; + /* Pointers to the parent cells for tasks going up and down the tree + * In the case where we are at the super-level we don't + * want the parent as no tasks are defined above that level. */ + struct cell *ci_parent, *cj_parent; + if (ci->parent != NULL && ci->grav.super != ci) + ci_parent = ci->parent; + else + ci_parent = ci; + + if (cj != NULL && cj->parent != NULL && cj->grav.super != cj) + cj_parent = cj->parent; + else + cj_parent = cj; /* Node ID (if running with MPI) */ #ifdef WITH_MPI @@ -1576,6 +1586,11 @@ void engine_link_stars_tasks_mapper(void *map_data, int num_elements, for (int ind = 0; ind < num_elements; ind++) { struct task *t = &((struct task *)map_data)[ind]; + /* Sort tasks depend on the drift of the cell. */ + if (t->type == task_type_stars_sort && t->ci->nodeID == engine_rank) { + scheduler_addunlock(sched, t->ci->super->grav.drift, t); + } + /* Self-interaction? */ if (t->type == task_type_self && t->subtype == task_subtype_stars_density) { @@ -1596,13 +1611,22 @@ void engine_link_stars_tasks_mapper(void *map_data, int num_elements, t->subtype == task_subtype_stars_density) { /* Make all density tasks depend on the drift and the sorts. */ - if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->super->hydro.drift, t); - scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->hydro.drift, t); + scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); + + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->grav.drift, t); + scheduler_addunlock(sched, t->ci->super->stars.sorts, t); + if (t->ci->super != t->cj->super) { - if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->super->hydro.drift, t); - scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->grav.drift, t); + scheduler_addunlock(sched, t->cj->super->stars.sorts, t); } /* Now, build all the dependencies for the stars for the cells */ @@ -1624,6 +1648,8 @@ void engine_link_stars_tasks_mapper(void *map_data, int num_elements, /* Make all density tasks depend on the drift and sorts. */ scheduler_addunlock(sched, t->ci->super->hydro.drift, t); scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + scheduler_addunlock(sched, t->ci->super->grav.drift, t); + scheduler_addunlock(sched, t->ci->super->stars.sorts, t); /* Now, build all the dependencies for the stars for the cells */ /* that are local and are not descendant of the same super-cells */ @@ -1638,13 +1664,22 @@ void engine_link_stars_tasks_mapper(void *map_data, int num_elements, t->subtype == task_subtype_stars_density) { /* Make all density tasks depend on the drift. */ - if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->super->hydro.drift, t); - scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->hydro.drift, t); + scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); + + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->grav.drift, t); + scheduler_addunlock(sched, t->ci->super->stars.sorts, t); + if (t->ci->super != t->cj->super) { - if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->super->hydro.drift, t); - scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->grav.drift, t); + scheduler_addunlock(sched, t->cj->super->stars.sorts, t); } /* Now, build all the dependencies for the stars for the cells */ @@ -1697,8 +1732,8 @@ void engine_make_starsloop_tasks_mapper(void *map_data, int num_elements, /* Get the cell */ struct cell *ci = &cells[cid]; - /* Skip cells without star particles */ - if (ci->stars.count == 0) continue; + /* Skip cells without particles */ + if (ci->stars.count == 0 && ci->hydro.count == 0) continue; /* If the cells is local build a self-interaction */ if (ci->nodeID == nodeID) @@ -1724,7 +1759,7 @@ void engine_make_starsloop_tasks_mapper(void *map_data, int num_elements, struct cell *cj = &cells[cjd]; /* Is that neighbour local and does it have particles ? */ - if (cid >= cjd || cj->hydro.count == 0 || + if (cid >= cjd || (cj->stars.count == 0 && cj->hydro.count == 0) || (ci->nodeID != nodeID && cj->nodeID != nodeID)) continue; @@ -1768,6 +1803,8 @@ void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, /* Get the cell index. */ const int cid = (size_t)(map_data) + ind; + + /* Integer indices of the cell in the top-level grid */ const int i = cid / (cdim[1] * cdim[2]); const int j = (cid / cdim[2]) % cdim[1]; const int k = cid % cdim[2]; @@ -1778,10 +1815,11 @@ void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, /* Skip cells without hydro particles */ if (ci->hydro.count == 0) continue; - /* If the cells is local build a self-interaction */ - if (ci->nodeID == nodeID) + /* If the cell is local build a self-interaction */ + if (ci->nodeID == nodeID) { scheduler_addtask(sched, task_type_self, task_subtype_density, 0, 0, ci, NULL); + } /* Now loop over all the neighbours of this cell */ for (int ii = -1; ii < 2; ii++) { @@ -1810,12 +1848,113 @@ void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, const int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; scheduler_addtask(sched, task_type_pair, task_subtype_density, sid, 0, ci, cj); + +#ifdef SWIFT_DEBUG_CHECKS +#ifdef WITH_MPI + + /* Let's cross-check that we had a proxy for that cell */ + if (ci->nodeID == nodeID && cj->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[cj->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", cj->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (n = 0; n < p->nr_cells_in; n++) + if (p->cells_in[n] == cj) break; + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "hydro task!", + cjd); + } else if (cj->nodeID == nodeID && ci->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[ci->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", ci->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (n = 0; n < p->nr_cells_in; n++) + if (p->cells_in[n] == ci) break; + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "hydro task!", + cid); + } +#endif /* WITH_MPI */ +#endif /* SWIFT_DEBUG_CHECKS */ } } } } } +struct cell_type_pair { + struct cell *ci, *cj; + int type; +}; + +void engine_addtasks_send_mapper(void *map_data, int num_elements, + void *extra_data) { + struct engine *e = (struct engine *)extra_data; + struct cell_type_pair *cell_type_pairs = (struct cell_type_pair *)map_data; + + for (int k = 0; k < num_elements; k++) { + struct cell *ci = cell_type_pairs[k].ci; + struct cell *cj = cell_type_pairs[k].cj; + const int type = cell_type_pairs[k].type; + + /* Add the send task for the particle timesteps. */ + engine_addtasks_send_timestep(e, ci, cj, NULL); + + /* Add the send tasks for the cells in the proxy that have a hydro + * connection. */ + if ((e->policy & engine_policy_hydro) && (type & proxy_cell_type_hydro)) + engine_addtasks_send_hydro(e, ci, cj, /*t_xv=*/NULL, + /*t_rho=*/NULL, /*t_gradient=*/NULL); + + /* Add the send tasks for the cells in the proxy that have a gravity + * connection. */ + if ((e->policy & engine_policy_self_gravity) && + (type & proxy_cell_type_gravity)) + engine_addtasks_send_gravity(e, ci, cj, NULL); + } +} + +void engine_addtasks_recv_mapper(void *map_data, int num_elements, + void *extra_data) { + struct engine *e = (struct engine *)extra_data; + struct cell_type_pair *cell_type_pairs = (struct cell_type_pair *)map_data; + + for (int k = 0; k < num_elements; k++) { + struct cell *ci = cell_type_pairs[k].ci; + const int type = cell_type_pairs[k].type; + + /* Add the recv task for the particle timesteps. */ + engine_addtasks_recv_timestep(e, ci, NULL); + + /* Add the recv tasks for the cells in the proxy that have a hydro + * connection. */ + if ((e->policy & engine_policy_hydro) && (type & proxy_cell_type_hydro)) + engine_addtasks_recv_hydro(e, ci, NULL, NULL, NULL); + + /* Add the recv tasks for the cells in the proxy that have a gravity + * connection. */ + if ((e->policy & engine_policy_self_gravity) && + (type & proxy_cell_type_gravity)) + engine_addtasks_recv_gravity(e, ci, NULL); + } +} + /** * @brief Fill the #space's task list. * @@ -1851,8 +1990,17 @@ void engine_maketasks(struct engine *e) { s->nr_cells, 1, 0, e); } + if (e->verbose) + message("Making stellar feedback tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + /* Add the self gravity tasks. */ - if (e->policy & engine_policy_self_gravity) engine_make_self_gravity_tasks(e); + if (e->policy & engine_policy_self_gravity) { + threadpool_map(&e->threadpool, engine_make_self_gravity_tasks_mapper, NULL, + s->nr_cells, 1, 0, e); + } if (e->verbose) message("Making gravity tasks took %.3f %s.", @@ -1881,7 +2029,7 @@ void engine_maketasks(struct engine *e) { #endif const size_t self_grav_tasks_per_cell = 125; const size_t ext_grav_tasks_per_cell = 1; - const size_t stars_tasks_per_cell = 15; + const size_t stars_tasks_per_cell = 27; if (e->policy & engine_policy_hydro) e->size_links += s->tot_cells * hydro_tasks_per_cell; @@ -1978,7 +2126,7 @@ void engine_maketasks(struct engine *e) { sched->nr_tasks, sizeof(struct task), 0, e); if (e->verbose) - message("Linking stars tasks took %.3f %s (including reweight).", + message("Linking stars tasks took %.3f %s.", clocks_from_ticks(getticks() - tic2), clocks_getunit()); #ifdef WITH_MPI @@ -1992,32 +2140,34 @@ void engine_maketasks(struct engine *e) { /* Loop over the proxies and add the send tasks, which also generates the * cell tags for super-cells. */ + int max_num_send_cells = 0; + for (int pid = 0; pid < e->nr_proxies; pid++) + max_num_send_cells += e->proxies[pid].nr_cells_out; + struct cell_type_pair *send_cell_type_pairs = NULL; + if ((send_cell_type_pairs = (struct cell_type_pair *)malloc( + sizeof(struct cell_type_pair) * max_num_send_cells)) == NULL) + error("Failed to allocate temporary cell pointer list."); + int num_send_cells = 0; + for (int pid = 0; pid < e->nr_proxies; pid++) { /* Get a handle on the proxy. */ struct proxy *p = &e->proxies[pid]; - for (int k = 0; k < p->nr_cells_out; k++) - engine_addtasks_send_timestep(e, p->cells_out[k], p->cells_in[0], NULL); - - /* Loop through the proxy's outgoing cells and add the - send tasks for the cells in the proxy that have a hydro connection. */ - if (e->policy & engine_policy_hydro) - for (int k = 0; k < p->nr_cells_out; k++) - if (p->cells_out_type[k] & proxy_cell_type_hydro) - engine_addtasks_send_hydro(e, p->cells_out[k], p->cells_in[0], NULL, - NULL, NULL); - - /* Loop through the proxy's outgoing cells and add the - send tasks for the cells in the proxy that have a gravity connection. - */ - if (e->policy & engine_policy_self_gravity) - for (int k = 0; k < p->nr_cells_out; k++) - if (p->cells_out_type[k] & proxy_cell_type_gravity) - engine_addtasks_send_gravity(e, p->cells_out[k], p->cells_in[0], - NULL); + for (int k = 0; k < p->nr_cells_out; k++) { + send_cell_type_pairs[num_send_cells].ci = p->cells_out[k]; + send_cell_type_pairs[num_send_cells].cj = p->cells_in[0]; + send_cell_type_pairs[num_send_cells++].type = p->cells_out_type[k]; + } } + threadpool_map(&e->threadpool, engine_addtasks_send_mapper, + send_cell_type_pairs, num_send_cells, + sizeof(struct cell_type_pair), + /*chunk=*/0, e); + + free(send_cell_type_pairs); + if (e->verbose) message("Creating send tasks took %.3f %s.", clocks_from_ticks(getticks() - tic2), clocks_getunit()); @@ -2035,29 +2185,28 @@ void engine_maketasks(struct engine *e) { /* Loop over the proxies and add the recv tasks, which relies on having the * cell tags. */ + int max_num_recv_cells = 0; + for (int pid = 0; pid < e->nr_proxies; pid++) + max_num_recv_cells += e->proxies[pid].nr_cells_in; + struct cell_type_pair *recv_cell_type_pairs = NULL; + if ((recv_cell_type_pairs = (struct cell_type_pair *)malloc( + sizeof(struct cell_type_pair) * max_num_recv_cells)) == NULL) + error("Failed to allocate temporary cell pointer list."); + int num_recv_cells = 0; for (int pid = 0; pid < e->nr_proxies; pid++) { /* Get a handle on the proxy. */ struct proxy *p = &e->proxies[pid]; - - for (int k = 0; k < p->nr_cells_in; k++) - engine_addtasks_recv_timestep(e, p->cells_in[k], NULL); - - /* Loop through the proxy's incoming cells and add the - recv tasks for the cells in the proxy that have a hydro connection. */ - if (e->policy & engine_policy_hydro) - for (int k = 0; k < p->nr_cells_in; k++) - if (p->cells_in_type[k] & proxy_cell_type_hydro) - engine_addtasks_recv_hydro(e, p->cells_in[k], NULL, NULL, NULL); - - /* Loop through the proxy's incoming cells and add the - recv tasks for the cells in the proxy that have a gravity connection. - */ - if (e->policy & engine_policy_self_gravity) - for (int k = 0; k < p->nr_cells_in; k++) - if (p->cells_in_type[k] & proxy_cell_type_gravity) - engine_addtasks_recv_gravity(e, p->cells_in[k], NULL); + for (int k = 0; k < p->nr_cells_in; k++) { + recv_cell_type_pairs[num_recv_cells].ci = p->cells_in[k]; + recv_cell_type_pairs[num_recv_cells++].type = p->cells_in_type[k]; + } } + threadpool_map(&e->threadpool, engine_addtasks_recv_mapper, + recv_cell_type_pairs, num_recv_cells, + sizeof(struct cell_type_pair), + /*chunk=*/0, e); + free(recv_cell_type_pairs); if (e->verbose) message("Creating recv tasks took %.3f %s.", diff --git a/src/engine_marktasks.c b/src/engine_marktasks.c index 3d607bcf85fde93fc58581132e8b443a5b257fe4..ad36c532da9b582e9f3cdb3287e8cbd121642b67 100644 --- a/src/engine_marktasks.c +++ b/src/engine_marktasks.c @@ -128,7 +128,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (cell_is_active_stars(ci, e)) { scheduler_activate(s, t); cell_activate_drift_part(ci, s); - cell_activate_drift_gpart(ci, s); + cell_activate_drift_spart(ci, s); } } @@ -180,8 +180,10 @@ void engine_marktasks_mapper(void *map_data, int num_elements, #endif const int ci_active_hydro = cell_is_active_hydro(ci, e); const int cj_active_hydro = cell_is_active_hydro(cj, e); + const int ci_active_gravity = cell_is_active_gravity(ci, e); const int cj_active_gravity = cell_is_active_gravity(cj, e); + const int ci_active_stars = cell_is_active_stars(ci, e); const int cj_active_stars = cell_is_active_stars(cj, e); @@ -195,9 +197,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, scheduler_activate(s, t); /* Set the correct sorting flags */ - if (t_type == task_type_pair && - (t_subtype == task_subtype_density || - t_subtype == task_subtype_stars_density)) { + if (t_type == task_type_pair && t_subtype == task_subtype_density) { /* Store some values. */ atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); @@ -210,15 +210,14 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); /* Check the sorts and activate them if needed. */ - cell_activate_sorts(ci, t->flags, s); - cell_activate_sorts(cj, t->flags, s); + cell_activate_hydro_sorts(ci, t->flags, s); + cell_activate_hydro_sorts(cj, t->flags, s); } /* Store current values of dx_max and h_max. */ else if (t_type == task_type_sub_pair && - (t_subtype == task_subtype_density || - t_subtype == task_subtype_stars_density)) { + t_subtype == task_subtype_density) { cell_activate_subcell_hydro_tasks(t->ci, t->cj, s); } } @@ -228,31 +227,50 @@ void engine_marktasks_mapper(void *map_data, int num_elements, ((ci_active_stars && ci->nodeID == engine_rank) || (cj_active_stars && cj->nodeID == engine_rank))) { + // MATTHIEU: The logic here can be improved. + // If ci is active for stars but not cj, then we can only drift the + // stars in ci and parts in cj. (and vice-versa). The same logic can be + // applied in cell_unskip_stars(). + scheduler_activate(s, t); /* Set the correct sorting flags */ if (t_type == task_type_pair) { + /* Do ci */ /* Store some values. */ - atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); - ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + atomic_or(&ci->stars.requires_sorts, 1 << t->flags); + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; /* Activate the hydro drift tasks. */ - if (ci_nodeID == nodeID) { - cell_activate_drift_part(ci, s); - cell_activate_drift_gpart(ci, s); - } - if (cj_nodeID == nodeID) { - cell_activate_drift_part(cj, s); - cell_activate_drift_gpart(cj, s); - } + if (ci_nodeID == nodeID) cell_activate_drift_spart(ci, s); + + if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); /* Check the sorts and activate them if needed. */ - cell_activate_sorts(ci, t->flags, s); - cell_activate_sorts(cj, t->flags, s); + cell_activate_hydro_sorts(cj, t->flags, s); + + cell_activate_stars_sorts(ci, t->flags, s); + /* Do cj */ + /* Store some values. */ + atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + atomic_or(&cj->stars.requires_sorts, 1 << t->flags); + + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; + + /* Activate the hydro drift tasks. */ + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); + + if (cj_nodeID == nodeID) cell_activate_drift_spart(cj, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_hydro_sorts(ci, t->flags, s); + cell_activate_stars_sorts(cj, t->flags, s); } /* Store current values of dx_max and h_max. */ diff --git a/src/lock.h b/src/lock.h index b2dd2eac9d0ca5d7807907e31cf3fa31894f9aed..39601b0c52e414dad1a507b406c54640a254df30 100644 --- a/src/lock.h +++ b/src/lock.h @@ -34,6 +34,7 @@ #define lock_trylock(l) (pthread_spin_lock(l) != 0) #define lock_unlock(l) (pthread_spin_unlock(l) != 0) #define lock_unlock_blind(l) pthread_spin_unlock(l) +#define lock_static_initializer ((pthread_spinlock_t)0) #elif defined(PTHREAD_LOCK) #include <pthread.h> @@ -44,6 +45,7 @@ #define lock_trylock(l) (pthread_mutex_trylock(l) != 0) #define lock_unlock(l) (pthread_mutex_unlock(l) != 0) #define lock_unlock_blind(l) pthread_mutex_unlock(l) +#define lock_static_initializer PTHREAD_MUTEX_INITIALIZER #else #define swift_lock_type volatile int @@ -52,12 +54,12 @@ INLINE static int lock_lock(volatile int *l) { while (atomic_cas(l, 0, 1) != 0) ; - // while( *l ); return 0; } #define lock_trylock(l) ((*(l)) ? 1 : atomic_cas(l, 0, 1)) #define lock_unlock(l) (atomic_cas(l, 1, 0) != 1) #define lock_unlock_blind(l) atomic_cas(l, 1, 0) +#define lock_static_initializer 0 #endif #endif /* SWIFT_LOCK_H */ diff --git a/src/minmax.h b/src/minmax.h index 90dd87968a94d9601a87fd3b826000c166a98966..e4d7c8788ea1e43d1c296a212193049a94347949 100644 --- a/src/minmax.h +++ b/src/minmax.h @@ -71,4 +71,36 @@ max(_temp, _z); \ }) +/** + * @brief Minimum of four numbers + * + * This macro evaluates its arguments exactly once. + */ +#define min4(x, y, z, w) \ + ({ \ + const __typeof__(x) _x = (x); \ + const __typeof__(y) _y = (y); \ + const __typeof__(z) _z = (z); \ + const __typeof__(w) _w = (w); \ + const __typeof__(x) _temp1 = min(_x, _y); \ + const __typeof__(x) _temp2 = min(_z, _w); \ + min(_temp1, _temp2); \ + }) + +/** + * @brief Maximum of four numbers + * + * This macro evaluates its arguments exactly once. + */ +#define max4(x, y, z, w) \ + ({ \ + const __typeof__(x) _x = (x); \ + const __typeof__(y) _y = (y); \ + const __typeof__(z) _z = (z); \ + const __typeof__(w) _w = (w); \ + const __typeof__(x) _temp1 = max(_x, _y); \ + const __typeof__(x) _temp2 = max(_z, _w); \ + max(_temp1, _temp2); \ + }) + #endif /* SWIFT_MINMAX_H */ diff --git a/src/parallel_io.c b/src/parallel_io.c index 7e34152f10bcca55c38e65c4158725a4755aac34..7fcc22fcae60a393523315cf4cb928629d41541e 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -708,6 +708,21 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, error("ICs dimensionality (%dD) does not match code dimensionality (%dD)", dimension, (int)hydro_dimension); + /* Check whether the number of files is specified (if the info exists) */ + const hid_t hid_files = H5Aexists(h_grp, "NumFilesPerSnapshot"); + int num_files = 1; + if (hid_files < 0) + error( + "Error while testing the existance of 'NumFilesPerSnapshot' attribute"); + if (hid_files > 0) + io_read_attribute(h_grp, "NumFilesPerSnapshot", INT, &num_files); + if (num_files != 1) + error( + "ICs are split over multiples files (%d). SWIFT cannot handle this " + "case. The script /tools/combine_ics.py is availalbe in the repository " + "to combine files into a valid input file.", + num_files); + /* Read the relevant information and print status */ int flag_entropy_temp[6]; io_read_attribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp); diff --git a/src/partition.c b/src/partition.c index ba29b645af4dc41369b2f01ccd48252af673a75b..bbd7454dd63be6ab5192558fb4a2e3399ea03cfc 100644 --- a/src/partition.c +++ b/src/partition.c @@ -240,33 +240,121 @@ static void graph_init(struct space *s, idx_t *adjncy, idx_t *xadj) { #endif #if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +struct counts_mapper_data { + double *counts; + size_t size; + struct space *s; +}; + +/* Generic function for accumulating sized counts for TYPE parts. Note uses + * local memory to reduce contention, the amount of memory required is + * precalculated by an additional loop determining the range of cell IDs. */ +#define ACCUMULATE_SIZES_MAPPER(TYPE) \ + accumulate_sizes_mapper_##TYPE(void *map_data, int num_elements, \ + void *extra_data) { \ + struct TYPE *parts = (struct TYPE *)map_data; \ + struct counts_mapper_data *mydata = \ + (struct counts_mapper_data *)extra_data; \ + double size = mydata->size; \ + int *cdim = mydata->s->cdim; \ + double iwidth[3] = {mydata->s->iwidth[0], mydata->s->iwidth[1], \ + mydata->s->iwidth[2]}; \ + double dim[3] = {mydata->s->dim[0], mydata->s->dim[1], mydata->s->dim[2]}; \ + double *lcounts = NULL; \ + int lcid = mydata->s->nr_cells; \ + int ucid = 0; \ + for (int k = 0; k < num_elements; k++) { \ + for (int j = 0; j < 3; j++) { \ + if (parts[k].x[j] < 0.0) \ + parts[k].x[j] += dim[j]; \ + else if (parts[k].x[j] >= dim[j]) \ + parts[k].x[j] -= dim[j]; \ + } \ + const int cid = \ + cell_getid(cdim, parts[k].x[0] * iwidth[0], \ + parts[k].x[1] * iwidth[1], parts[k].x[2] * iwidth[2]); \ + if (cid > ucid) ucid = cid; \ + if (cid < lcid) lcid = cid; \ + } \ + int nused = ucid - lcid + 1; \ + if ((lcounts = (double *)calloc(sizeof(double), nused)) == NULL) \ + error("Failed to allocate counts thread-specific buffer"); \ + for (int k = 0; k < num_elements; k++) { \ + const int cid = \ + cell_getid(cdim, parts[k].x[0] * iwidth[0], \ + parts[k].x[1] * iwidth[1], parts[k].x[2] * iwidth[2]); \ + lcounts[cid - lcid] += size; \ + } \ + for (int k = 0; k < nused; k++) \ + atomic_add_d(&mydata->counts[k + lcid], lcounts[k]); \ + free(lcounts); \ + } + /** - * @brief Accumulate the counts of particles per cell. + * @brief Accumulate the sized counts of particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. * - * @param s the space containing the cells. - * @param counts the number of particles per cell. Should be - * allocated as size s->nr_parts. + * part version. + */ +static void ACCUMULATE_SIZES_MAPPER(part); + +/** + * @brief Accumulate the sized counts of particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * gpart version. + */ +static void ACCUMULATE_SIZES_MAPPER(gpart); + +/** + * @brief Accumulate the sized counts of particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * spart version. */ -static void accumulate_counts(struct space *s, double *counts) { +static void ACCUMULATE_SIZES_MAPPER(spart); - struct part *parts = s->parts; - int *cdim = s->cdim; - double iwidth[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]}; - double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; +/** + * @brief Accumulate total memory size in particles per cell. + * + * @param s the space containing the cells. + * @param counts the number of bytes in particles per cell. Should be + * allocated as size s->nr_cells. + */ +static void accumulate_sizes(struct space *s, double *counts) { bzero(counts, sizeof(double) * s->nr_cells); - for (size_t k = 0; k < s->nr_parts; k++) { - for (int j = 0; j < 3; j++) { - if (parts[k].x[j] < 0.0) - parts[k].x[j] += dim[j]; - else if (parts[k].x[j] >= dim[j]) - parts[k].x[j] -= dim[j]; - } - const int cid = - cell_getid(cdim, parts[k].x[0] * iwidth[0], parts[k].x[1] * iwidth[1], - parts[k].x[2] * iwidth[2]); - counts[cid]++; + struct counts_mapper_data mapper_data; + mapper_data.counts = counts; + mapper_data.s = s; + + double hsize = (double)sizeof(struct part); + mapper_data.size = hsize; + threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_part, s->parts, + s->nr_parts, sizeof(struct part), space_splitsize, + &mapper_data); + + double gsize = (double)sizeof(struct gpart); + mapper_data.size = gsize; + threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_gpart, s->gparts, + s->nr_gparts, sizeof(struct gpart), space_splitsize, + &mapper_data); + + double ssize = (double)sizeof(struct spart); + mapper_data.size = ssize; + threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_spart, s->sparts, + s->nr_sparts, sizeof(struct spart), space_splitsize, + &mapper_data); + + /* Keep the sum of particles across all ranks in the range of IDX_MAX. */ + if ((s->e->total_nr_parts * hsize + s->e->total_nr_gparts * gsize + + s->e->total_nr_sparts * ssize) > (double)IDX_MAX) { + double vscale = + (double)(IDX_MAX - 1000) / + (double)(s->e->total_nr_parts * hsize + s->e->total_nr_gparts * gsize + + s->e->total_nr_sparts * ssize); + for (int k = 0; k < s->nr_cells; k++) counts[k] *= vscale; } } #endif @@ -284,7 +372,7 @@ static void split_metis(struct space *s, int nregions, int *celllist) { for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i]; /* To check or visualise the partition dump all the cells. */ - /* dumpCellRanks("metis_partition", s->cells_top, s->nr_cells);*/ + /*dumpCellRanks("metis_partition", s->cells_top, s->nr_cells);*/ } #endif @@ -427,6 +515,7 @@ void permute_regions(int *newlist, int *oldlist, int nregions, int ncells, static void pick_parmetis(int nodeID, struct space *s, int nregions, double *vertexw, double *edgew, int refine, int adaptive, float itr, int *celllist) { + int res; MPI_Comm comm; MPI_Comm_dup(MPI_COMM_WORLD, &comm); @@ -609,14 +698,14 @@ static void pick_parmetis(int nodeID, struct space *s, int nregions, /* Dump graphs to disk files for testing. ParMETIS xadj isn't right for * a dump, so make a serial-like version. */ /*{ - idx_t *tmp_xadj = (idx_t *)malloc(sizeof(idx_t) * (ncells + nregions + - 1)); + idx_t *tmp_xadj = + (idx_t *)malloc(sizeof(idx_t) * (ncells + nregions + 1)); tmp_xadj[0] = 0; for (int k = 0; k < ncells; k++) tmp_xadj[k + 1] = tmp_xadj[k] + 26; - dumpParMETISGraph("parmetis_graph", ncells, 1, tmp_xadj, full_adjncy, - full_weights_v, NULL, full_weights_e); + dumpMETISGraph("parmetis_graph", ncells, 1, tmp_xadj, full_adjncy, + full_weights_v, NULL, full_weights_e); free(tmp_xadj); - }*/ + }*/ /* Send ranges to the other ranks and keep our own. */ for (int rank = 0, j1 = 0, j2 = 0, j3 = 0; rank < nregions; rank++) { @@ -1024,9 +1113,9 @@ static void pick_metis(int nodeID, struct space *s, int nregions, idx_t objval; /* Dump graph in METIS format */ - /*dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, - * weights_v, NULL, weights_e); - */ + /*dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, weights_v, + NULL, weights_e);*/ + if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, NULL, weights_e, &idx_nregions, NULL, NULL, options, &objval, regionid) != METIS_OK) @@ -1488,6 +1577,7 @@ void partition_repartition(struct repartition *reparttype, int nodeID, */ void partition_initial_partition(struct partition *initial_partition, int nodeID, int nr_nodes, struct space *s) { + ticks tic = getticks(); /* Geometric grid partitioning. */ if (initial_partition->type == INITPART_GRID) { @@ -1529,16 +1619,15 @@ void partition_initial_partition(struct partition *initial_partition, * inhomogeneous dist. */ - /* Space for particles per cell counts, which will be used as weights or - * not. */ + /* Space for particles sizes per cell, which will be used as weights. */ double *weights = NULL; if (initial_partition->type == INITPART_METIS_WEIGHT) { if ((weights = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL) error("Failed to allocate weights buffer."); bzero(weights, sizeof(double) * s->nr_cells); - /* Check each particle and accumilate the counts per cell. */ - accumulate_counts(s, weights); + /* Check each particle and accumilate the sizes per cell. */ + accumulate_sizes(s, weights); /* Get all the counts from all the nodes. */ if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE, MPI_SUM, @@ -1603,6 +1692,10 @@ void partition_initial_partition(struct partition *initial_partition, error("SWIFT was not compiled with MPI support"); #endif } + + if (s->e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -1793,11 +1886,11 @@ static int check_complete(struct space *s, int verbose, int nregions) { * @brief Check that the threadpool version of the weights construction is * correct by comparing to the old serial code. * - * @tasks the list of tasks - * @nr_tasks number of tasks - * @mydata additional values as passed to threadpool - * @ref_weights_v vertex weights to check - * @ref_weights_e edge weights to check + * @param tasks the list of tasks + * @param nr_tasks number of tasks + * @param mydata additional values as passed to threadpool + * @param ref_weights_v vertex weights to check + * @param ref_weights_e edge weights to check */ static void check_weights(struct task *tasks, int nr_tasks, struct weights_mapper_data *mydata, diff --git a/src/proxy.c b/src/proxy.c index 325ed78644b07a497374e40bfc8518edcb018593..4a67b4b3584c43b2df63f17303eba9ec5e742cb0 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -269,6 +269,93 @@ void proxy_cells_exchange_second(struct proxy *p) { #endif } +#ifdef WITH_MPI + +void proxy_cells_count_mapper(void *map_data, int num_elements, + void *extra_data) { + struct cell *cells = (struct cell *)map_data; + + for (int k = 0; k < num_elements; k++) { + if (cells[k].mpi.sendto) cells[k].mpi.pcell_size = cell_getsize(&cells[k]); + } +} + +struct pack_mapper_data { + struct space *s; + int *offset; + struct pcell *pcells; + int with_gravity; +}; + +void proxy_cells_pack_mapper(void *map_data, int num_elements, + void *extra_data) { + struct cell *cells = (struct cell *)map_data; + struct pack_mapper_data *data = (struct pack_mapper_data *)extra_data; + + for (int k = 0; k < num_elements; k++) { + if (cells[k].mpi.sendto) { + ptrdiff_t ind = &cells[k] - data->s->cells_top; + cells[k].mpi.pcell = &data->pcells[data->offset[ind]]; + cell_pack(&cells[k], cells[k].mpi.pcell, data->with_gravity); + } + } +} + +void proxy_cells_exchange_first_mapper(void *map_data, int num_elements, + void *extra_data) { + struct proxy *proxies = (struct proxy *)map_data; + + for (int k = 0; k < num_elements; k++) { + proxy_cells_exchange_first(&proxies[k]); + } +} + +struct wait_and_unpack_mapper_data { + struct space *s; + int num_proxies; + MPI_Request *reqs_in; + struct proxy *proxies; + int with_gravity; + swift_lock_type lock; +}; + +void proxy_cells_wait_and_unpack_mapper(void *unused_map_data, int num_elements, + void *extra_data) { + + // MATTHIEU: This is currently unused. Scalar (non-threadpool) version is + // faster but we still need to explore why this happens. + + struct wait_and_unpack_mapper_data *data = + (struct wait_and_unpack_mapper_data *)extra_data; + + for (int k = 0; k < num_elements; k++) { + int pid = MPI_UNDEFINED; + MPI_Status status; + int res; + + /* We need a lock to prevent concurrent calls to MPI_Waitany on + the same array of requests since this is not supported in the MPI + standard (v3.1). This is not really a problem since the threads + would block inside MPI_Waitany anyway. */ + lock_lock(&data->lock); + if ((res = MPI_Waitany(data->num_proxies, data->reqs_in, &pid, &status)) != + MPI_SUCCESS || + pid == MPI_UNDEFINED) + mpi_error(res, "MPI_Waitany failed."); + if (lock_unlock(&data->lock) != 0) { + error("Failed to release lock."); + } + + // message( "cell data from proxy %i has arrived." , pid ); + for (int count = 0, j = 0; j < data->proxies[pid].nr_cells_in; j++) + count += cell_unpack(&data->proxies[pid].pcells_in[count], + data->proxies[pid].cells_in[j], data->s, + data->with_gravity); + } +} + +#endif // WITH_MPI + /** * @brief Exchange the cell structures with all proxies. * @@ -294,13 +381,14 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, /* Run through the cells and get the size of the ones that will be sent off. */ + threadpool_map(&s->e->threadpool, proxy_cells_count_mapper, s->cells_top, + s->nr_cells, sizeof(struct cell), /*chunk=*/0, + /*extra_data=*/NULL); int count_out = 0; int offset[s->nr_cells]; for (int k = 0; k < s->nr_cells; k++) { offset[k] = count_out; - if (s->cells_top[k].mpi.sendto) - count_out += - (s->cells_top[k].mpi.pcell_size = cell_getsize(&s->cells_top[k])); + if (s->cells_top[k].mpi.sendto) count_out += s->cells_top[k].mpi.pcell_size; } if (s->e->verbose) @@ -316,19 +404,19 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, tic2 = getticks(); /* Pack the cells. */ - for (int k = 0; k < s->nr_cells; k++) - if (s->cells_top[k].mpi.sendto) { - cell_pack(&s->cells_top[k], &pcells[offset[k]], with_gravity); - s->cells_top[k].mpi.pcell = &pcells[offset[k]]; - } + struct pack_mapper_data data = {s, offset, pcells, with_gravity}; + threadpool_map(&s->e->threadpool, proxy_cells_pack_mapper, s->cells_top, + s->nr_cells, sizeof(struct cell), /*chunk=*/0, &data); if (s->e->verbose) message("Packing cells took %.3f %s.", clocks_from_ticks(getticks() - tic2), clocks_getunit()); /* Launch the first part of the exchange. */ + threadpool_map(&s->e->threadpool, proxy_cells_exchange_first_mapper, proxies, + num_proxies, sizeof(struct proxy), /*chunk=*/0, + /*extra_data=*/NULL); for (int k = 0; k < num_proxies; k++) { - proxy_cells_exchange_first(&proxies[k]); reqs_in[k] = proxies[k].req_cells_count_in; reqs_out[k] = proxies[k].req_cells_count_out; } diff --git a/src/runner.c b/src/runner.c index 2e73937e061c5ae4785e1cd42d2431d6ece668ad..130335c714c434758cb9afb8a81ffb1823c14f37 100644 --- a/src/runner.c +++ b/src/runner.c @@ -337,7 +337,7 @@ void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { free(sid); } - if (timer) TIMER_TOC(timer_do_stars_ghost); + if (timer) TIMER_TOC(timer_dostars_ghost); } /** @@ -650,26 +650,33 @@ void runner_do_sort_ascending(struct entry *sort, int N) { } } +#ifdef SWIFT_DEBUG_CHECKS /** * @brief Recursively checks that the flags are consistent in a cell hierarchy. * - * Debugging function. + * Debugging function. Exists in two flavours: hydro & stars. * * @param c The #cell to check. * @param flags The sorting flags to check. */ -void runner_check_sorts(struct cell *c, int flags) { - -#ifdef SWIFT_DEBUG_CHECKS - if (flags & ~c->hydro.sorted) error("Inconsistent sort flags (downward)!"); - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) - runner_check_sorts(c->progeny[k], c->hydro.sorted); +#define RUNNER_CHECK_SORTS(TYPE) \ + void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ + \ + if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \ + if (c->split) \ + for (int k = 0; k < 8; k++) \ + if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0) \ + runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted); \ + } #else - error("Calling debugging code without debugging flag activated."); +#define RUNNER_CHECK_SORTS(TYPE) \ + void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ + error("Calling debugging code without debugging flag activated."); \ + } #endif -} + +RUNNER_CHECK_SORTS(hydro) +RUNNER_CHECK_SORTS(stars) /** * @brief Sort the particles in the given cell along all cardinal directions. @@ -682,8 +689,8 @@ void runner_check_sorts(struct cell *c, int flags) { * @param clock Flag indicating whether to record the timing or not, needed * for recursive calls. */ -void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, - int clock) { +void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, + int cleanup, int clock) { struct entry *fingers[8]; const int count = c->hydro.count; @@ -708,7 +715,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, #ifdef SWIFT_DEBUG_CHECKS /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts(c, c->hydro.sorted); + runner_check_sorts_hydro(c, c->hydro.sorted); /* Make sure the sort flags are consistent (upard). */ for (struct cell *finger = c->parent; finger != NULL; @@ -740,10 +747,10 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { /* Only propagate cleanup if the progeny is stale. */ - runner_do_sort(r, c->progeny[k], flags, - cleanup && (c->progeny[k]->hydro.dx_max_sort > - space_maxreldx * c->progeny[k]->dmin), - 0); + runner_do_hydro_sort(r, c->progeny[k], flags, + cleanup && (c->progeny[k]->hydro.dx_max_sort_old > + space_maxreldx * c->progeny[k]->dmin), + 0); dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort); dx_max_sort_old = max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old); @@ -877,7 +884,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, } /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts(c, flags); + runner_check_sorts_hydro(c, flags); /* Make sure the sort flags are consistent (upward). */ for (struct cell *finger = c->parent; finger != NULL; @@ -895,6 +902,224 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, if (clock) TIMER_TOC(timer_dosort); } +/** + * @brief Sort the stars particles in the given cell along all cardinal + * directions. + * + * @param r The #runner. + * @param c The #cell. + * @param flags Cell flag. + * @param cleanup If true, re-build the sorts for the selected flags instead + * of just adding them. + * @param clock Flag indicating whether to record the timing or not, needed + * for recursive calls. + */ +void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, + int cleanup, int clock) { + + struct entry *fingers[8]; + const int count = c->stars.count; + struct spart *sparts = c->stars.parts; + float buff[8]; + + TIMER_TIC; + + /* We need to do the local sorts plus whatever was requested further up. */ + flags |= c->stars.do_sort; + if (cleanup) { + c->stars.sorted = 0; + } else { + flags &= ~c->stars.sorted; + } + if (flags == 0 && !c->stars.do_sub_sort) return; + + /* Check that the particles have been moved to the current time */ + if (flags && !cell_are_spart_drifted(c, r->e)) + error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); + +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_stars(c, c->stars.sorted); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->stars.sorted & ~c->stars.sorted) + error("Inconsistent sort flags (upward)."); + } + + /* Update the sort timer which represents the last time the sorts + were re-set. */ + if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current; +#endif + + /* start by allocating the entry arrays in the requested dimensions. */ + for (int j = 0; j < 13; j++) { + if ((flags & (1 << j)) && c->stars.sort[j] == NULL) { + if ((c->stars.sort[j] = (struct entry *)malloc(sizeof(struct entry) * + (count + 1))) == NULL) + error("Failed to allocate sort memory."); + } + } + + /* Does this cell have any progeny? */ + if (c->split) { + + /* Fill in the gaps within the progeny. */ + float dx_max_sort = 0.0f; + float dx_max_sort_old = 0.0f; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { + /* Only propagate cleanup if the progeny is stale. */ + runner_do_stars_sort(r, c->progeny[k], flags, + cleanup && (c->progeny[k]->stars.dx_max_sort_old > + space_maxreldx * c->progeny[k]->dmin), + 0); + dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort); + dx_max_sort_old = + max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old); + } + } + c->stars.dx_max_sort = dx_max_sort; + c->stars.dx_max_sort_old = dx_max_sort_old; + + /* Loop over the 13 different sort arrays. */ + for (int j = 0; j < 13; j++) { + + /* Has this sort array been flagged? */ + if (!(flags & (1 << j))) continue; + + /* Init the particle index offsets. */ + int off[8]; + off[0] = 0; + for (int k = 1; k < 8; k++) + if (c->progeny[k - 1] != NULL) + off[k] = off[k - 1] + c->progeny[k - 1]->stars.count; + else + off[k] = off[k - 1]; + + /* Init the entries and indices. */ + int inds[8]; + for (int k = 0; k < 8; k++) { + inds[k] = k; + if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { + fingers[k] = c->progeny[k]->stars.sort[j]; + buff[k] = fingers[k]->d; + off[k] = off[k]; + } else + buff[k] = FLT_MAX; + } + + /* Sort the buffer. */ + for (int i = 0; i < 7; i++) + for (int k = i + 1; k < 8; k++) + if (buff[inds[k]] < buff[inds[i]]) { + int temp_i = inds[i]; + inds[i] = inds[k]; + inds[k] = temp_i; + } + + /* For each entry in the new sort list. */ + struct entry *finger = c->stars.sort[j]; + for (int ind = 0; ind < count; ind++) { + + /* Copy the minimum into the new sort array. */ + finger[ind].d = buff[inds[0]]; + finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; + + /* Update the buffer. */ + fingers[inds[0]] += 1; + buff[inds[0]] = fingers[inds[0]]->d; + + /* Find the smallest entry. */ + for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { + int temp_i = inds[k - 1]; + inds[k - 1] = inds[k]; + inds[k] = temp_i; + } + + } /* Merge. */ + + /* Add a sentinel. */ + c->stars.sort[j][count].d = FLT_MAX; + c->stars.sort[j][count].i = 0; + + /* Mark as sorted. */ + atomic_or(&c->stars.sorted, 1 << j); + + } /* loop over sort arrays. */ + + } /* progeny? */ + + /* Otherwise, just sort. */ + else { + + /* Reset the sort distance */ + if (c->stars.sorted == 0) { + + /* And the individual sort distances if we are a local cell */ + for (int k = 0; k < count; k++) { + sparts[k].x_diff_sort[0] = 0.0f; + sparts[k].x_diff_sort[1] = 0.0f; + sparts[k].x_diff_sort[2] = 0.0f; + } + c->stars.dx_max_sort_old = 0.f; + c->stars.dx_max_sort = 0.f; + } + + /* Fill the sort array. */ + for (int k = 0; k < count; k++) { + const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]}; + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->stars.sort[j][k].i = k; + c->stars.sort[j][k].d = px[0] * runner_shift[j][0] + + px[1] * runner_shift[j][1] + + px[2] * runner_shift[j][2]; + } + } + + /* Add the sentinel and sort. */ + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->stars.sort[j][count].d = FLT_MAX; + c->stars.sort[j][count].i = 0; + runner_do_sort_ascending(c->stars.sort[j], count); + atomic_or(&c->stars.sorted, 1 << j); + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify the sorting. */ + for (int j = 0; j < 13; j++) { + if (!(flags & (1 << j))) continue; + struct entry *finger = c->stars.sort[j]; + for (int k = 1; k < count; k++) { + if (finger[k].d < finger[k - 1].d) + error("Sorting failed, ascending array."); + if (finger[k].i >= count) error("Sorting failed, indices borked."); + } + } + + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_stars(c, flags); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->stars.sorted & ~c->stars.sorted) + error("Inconsistent sort flags."); + } +#endif + + /* Clear the cell's sort flags. */ + c->stars.do_sort = 0; + c->stars.do_sub_sort = 0; + c->stars.requires_sorts = 0; + + if (clock) TIMER_TOC(timer_do_stars_sort); +} + /** * @brief Initialize the multipoles before the gravity calculation. * @@ -1853,6 +2078,7 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; /* No children? */ if (!c->split) { @@ -2029,6 +2255,8 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); + ti_stars_end_min = min(ti_current + ti_new_step, ti_stars_end_min); + /* What is the next starting point for this cell ? */ ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); @@ -2045,6 +2273,8 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { ti_gravity_end_min = min(ti_end, ti_gravity_end_min); ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + ti_stars_end_min = min(ti_end, ti_stars_end_min); + const integertime_t ti_beg = get_integer_time_begin(ti_current + 1, sp->time_bin); @@ -2075,6 +2305,7 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); + ti_stars_end_min = min(cp->stars.ti_end_min, ti_stars_end_min); } } @@ -2091,6 +2322,7 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { c->grav.ti_end_min = ti_gravity_end_min; c->grav.ti_end_max = ti_gravity_end_max; c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.ti_end_min = ti_stars_end_min; #ifdef SWIFT_DEBUG_CHECKS if (c->hydro.ti_end_min == e->ti_current && @@ -2099,6 +2331,9 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { if (c->grav.ti_end_min == e->ti_current && c->grav.ti_end_min < max_nr_timesteps) error("End of next gravity step is current time!"); + if (c->stars.ti_end_min == e->ti_current && + c->stars.ti_end_min < max_nr_timesteps) + error("End of next stars step is current time!"); #endif if (timer) TIMER_TOC(timer_timestep); @@ -2615,9 +2850,17 @@ void *runner_main(void *data) { case task_type_sort: /* Cleanup only if any of the indices went stale. */ - runner_do_sort(r, ci, t->flags, - ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, - 1); + runner_do_hydro_sort( + r, ci, t->flags, + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, 1); + /* Reset the sort flags as our work here is done. */ + t->flags = 0; + break; + case task_type_stars_sort: + /* Cleanup only if any of the indices went stale. */ + runner_do_stars_sort( + r, ci, t->flags, + ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin, 1); /* Reset the sort flags as our work here is done. */ t->flags = 0; break; diff --git a/src/runner.h b/src/runner.h index 29c42da405255c217d43cc905e87dde9b69276f9..6af0cd227374afd616b3329a8dbd527634902922 100644 --- a/src/runner.h +++ b/src/runner.h @@ -69,8 +69,10 @@ struct runner { /* Function prototypes. */ void runner_do_ghost(struct runner *r, struct cell *c, int timer); void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer); -void runner_do_sort(struct runner *r, struct cell *c, int flag, int cleanup, - int clock); +void runner_do_hydro_sort(struct runner *r, struct cell *c, int flag, + int cleanup, int clock); +void runner_do_stars_sort(struct runner *r, struct cell *c, int flag, + int cleanup, int clock); void runner_do_drift_part(struct runner *r, struct cell *c, int timer); void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer); void runner_do_kick1(struct runner *r, struct cell *c, int timer); diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 5bcd57d643911703bc0f4e19f17fdb63a11a5c12..c6885746a29fd7b6bd828496316f8dad01c1b7da 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -1714,7 +1714,7 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, const int periodic = e->mesh->periodic; const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; const double theta_crit2 = e->gravity_properties->theta_crit2; - const double max_distance = e->mesh->r_cut_max; + const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max; TIMER_TIC; @@ -1759,6 +1759,29 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, /* Skip empty cells */ if (multi_j->m_pole.M_000 == 0.f) continue; + /* Can we escape early in the periodic BC case? */ + if (periodic) { + + /* Minimal distance between any pair of particles */ + const double min_radius2 = + cell_min_dist2_same_size(top, cj, periodic, dim); + + /* Are we beyond the distance where the truncated forces are 0 ?*/ + if (min_radius2 > max_distance2) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Need to account for the interactions we missed */ + multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; +#endif + + /* Record that this multipole received a contribution */ + multi_i->pot.interacted = 1; + + /* We are done here. */ + continue; + } + } + /* Get the distance between the CoMs at the last rebuild*/ double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0]; double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1]; @@ -1772,24 +1795,6 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, } const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r; - const double max_radius = - sqrt(r2_rebuild) - (multi_top->r_max_rebuild + multi_j->r_max_rebuild); - - /* Are we beyond the distance where the truncated forces are 0 ?*/ - if (periodic && max_radius > max_distance) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Need to account for the interactions we missed */ - multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; -#endif - - /* Record that this multipole received a contribution */ - multi_i->pot.interacted = 1; - - /* We are done here. */ - continue; - } - /* Are we in charge of this cell pair? */ if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild, theta_crit2, r2_rebuild)) { diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h index e696e4fd10853536008d9d9fafc90e6475fd291a..3a920ab553f3e604619dfc4b6efe506e83e9ef52 100644 --- a/src/runner_doiact_stars.h +++ b/src/runner_doiact_stars.h @@ -17,6 +17,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ +#ifndef SWIFT_RUNNER_DOIACT_STARS_H +#define SWIFT_RUNNER_DOIACT_STARS_H #include "swift.h" @@ -35,6 +37,7 @@ void runner_doself_stars_density(struct runner *r, struct cell *c, int timer) { /* Anything to do here? */ if (!cell_is_active_stars(c, e)) return; + if (c->hydro.count == 0 && c->stars.count == 0) return; /* Cosmological terms */ const float a = cosmo->a; @@ -99,7 +102,7 @@ void runner_dosubpair_stars_density(struct runner *r, struct cell *restrict ci, const struct cosmology *cosmo = e->cosmology; /* Anything to do here? */ - if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; + if (!cell_is_active_stars(ci, e)) return; const int scount_i = ci->stars.count; const int count_j = cj->hydro.count; @@ -162,8 +165,10 @@ void runner_dopair_stars_density(struct runner *r, struct cell *restrict ci, TIMER_TIC; - runner_dosubpair_stars_density(r, ci, cj); - runner_dosubpair_stars_density(r, cj, ci); + if (ci->stars.count != 0 && cj->hydro.count != 0) + runner_dosubpair_stars_density(r, ci, cj); + if (cj->stars.count != 0 && ci->hydro.count != 0) + runner_dosubpair_stars_density(r, cj, ci); if (timer) TIMER_TOC(timer_dopair_stars_density); } @@ -242,7 +247,7 @@ void runner_dopair_subset_stars_density(struct runner *r, } /* loop over the parts in cj. */ } /* loop over the parts in ci. */ - TIMER_TOC(timer_dopair_subset_naive); + TIMER_TOC(timer_dopair_subset_stars_density); } /** @@ -382,7 +387,6 @@ void runner_dosub_subset_stars_density(struct runner *r, struct cell *ci, if (!cell_is_active_stars(ci, e) && (cj == NULL || !cell_is_active_stars(cj, e))) return; - if (ci->stars.count == 0 || (cj != NULL && cj->stars.count == 0)) return; /* Find out in which sub-cell of ci the parts are. */ struct cell *sub = NULL; @@ -944,7 +948,7 @@ void runner_dosub_subset_stars_density(struct runner *r, struct cell *ci, } /* otherwise, pair interaction. */ - if (gettimer) TIMER_TOC(timer_dosub_subset); + if (gettimer) TIMER_TOC(timer_dosub_subset_pair_stars_density); } /** @@ -969,6 +973,34 @@ void runner_doself_branch_stars_density(struct runner *r, struct cell *c) { runner_doself_stars_density(r, c, 1); } +#define RUNNER_STARS_CHECK_SORT(TYPE, PART) \ + void runner_stars_check_sort_##TYPE(struct cell *cj, struct cell *ci, \ + const int sid) { \ + const struct entry *restrict sort_j = cj->TYPE.sort[sid]; \ + \ + for (int pjd = 0; pjd < cj->TYPE.count; pjd++) { \ + const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i]; \ + const float d = p->x[0] * runner_shift[sid][0] + \ + p->x[1] * runner_shift[sid][1] + \ + p->x[2] * runner_shift[sid][2]; \ + if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ + 1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) && \ + (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ + cj->width[0] * 1.0e-10) \ + error( \ + "particle shift diff exceeds dx_max_sort in cell cj. " \ + "cj->nodeID=%d " \ + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE \ + ".dx_max_sort=%e " \ + "cj->" #TYPE ".dx_max_sort_old=%e", \ + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \ + cj->TYPE.dx_max_sort_old); \ + } \ + } + +RUNNER_STARS_CHECK_SORT(hydro, part) +RUNNER_STARS_CHECK_SORT(stars, spart) + /** * @brief Determine which version of DOPAIR1 needs to be called depending on the * orientation of the cells or whether DOPAIR1 needs to be called at all. @@ -982,64 +1014,54 @@ void runner_dopair_branch_stars_density(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; + const int ci_active = cell_is_active_stars(ci, e); + const int cj_active = cell_is_active_stars(cj, e); + const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 && ci_active); + const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 && cj_active); /* Anything to do here? */ - if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; - - /* Check that cells are drifted. */ - if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells."); + if (!do_ci && !do_cj) return; /* Get the sort ID. */ double shift[3] = {0.0, 0.0, 0.0}; const int sid = space_getsid(e->s, &ci, &cj, shift); + /* Check that cells are drifted. */ + if (do_ci && + (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) + error("Interacting undrifted cells."); + /* Have the cells been sorted? */ - if (!(ci->hydro.sorted & (1 << sid)) || - ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin) + if (do_ci && (!(ci->stars.sorted & (1 << sid)) || + ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin)) error("Interacting unsorted cells."); - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) + + if (do_ci && (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)) + error("Interacting unsorted cells."); + + if (do_cj && + (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e))) + error("Interacting undrifted cells."); + + /* Have the cells been sorted? */ + if (do_cj && (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin)) + error("Interacting unsorted cells."); + + if (do_cj && (!(cj->stars.sorted & (1 << sid)) || + cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin)) error("Interacting unsorted cells."); #ifdef SWIFT_DEBUG_CHECKS - /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->hydro.sort[sid]; - const struct entry *restrict sort_j = cj->hydro.sort[sid]; - - /* Check that the dx_max_sort values in the cell are indeed an upper - bound on particle movement. */ - for (int pid = 0; pid < ci->hydro.count; pid++) { - const struct part *p = &ci->hydro.parts[sort_i[pid].i]; - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > - 1.0e-4 * max(fabsf(d), ci->hydro.dx_max_sort_old) && - fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > - ci->width[0] * 1.0e-10) - error( - "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " - "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->hydro.dx_max_sort=%e " - "ci->hydro.dx_max_sort_old=%e", - ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->hydro.dx_max_sort, - ci->hydro.dx_max_sort_old); + if (do_ci) { + runner_stars_check_sort_hydro(cj, ci, sid); + runner_stars_check_sort_stars(ci, cj, sid); } - for (int pjd = 0; pjd < cj->hydro.count; pjd++) { - const struct part *p = &cj->hydro.parts[sort_j[pjd].i]; - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if ((fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort) > - 1.0e-4 * max(fabsf(d), cj->hydro.dx_max_sort_old) && - (fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort) > - cj->width[0] * 1.0e-10) - error( - "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->hydro.dx_max_sort=%e " - "cj->hydro.dx_max_sort_old=%e", - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->hydro.dx_max_sort, - cj->hydro.dx_max_sort_old); + + if (do_cj) { + runner_stars_check_sort_hydro(ci, cj, sid); + runner_stars_check_sort_stars(cj, ci, sid); } #endif /* SWIFT_DEBUG_CHECKS */ @@ -1067,8 +1089,11 @@ void runner_dosub_pair_stars_density(struct runner *r, struct cell *ci, TIMER_TIC; /* Should we even bother? */ - if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; - if (ci->stars.count == 0 || cj->stars.count == 0) return; + int should_do = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e); + should_do |= cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e); + if (!should_do) return; /* Get the type of pair if not specified explicitly. */ double shift[3]; @@ -1353,25 +1378,55 @@ void runner_dosub_pair_stars_density(struct runner *r, struct cell *ci, } /* Otherwise, compute the pair directly. */ - else if (cell_is_active_stars(ci, e) || cell_is_active_stars(cj, e)) { - - /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells."); - - /* Do any of the cells need to be sorted first? */ - if (!(ci->hydro.sorted & (1 << sid)) || - ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) - error("Interacting unsorted cell."); - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) - error("Interacting unsorted cell."); - - /* Compute the interactions. */ - runner_dopair_branch_stars_density(r, ci, cj); + else { + + const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e); + const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e); + + if (do_ci) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_spart_drifted(ci, e)) + error("Interacting undrifted cells (sparts)."); + + if (!cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells (parts)."); + + /* Do any of the cells need to be sorted first? */ + if (!(ci->stars.sorted & (1 << sid)) || + ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) + error("Interacting unsorted cell (sparts)."); + + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cell (parts)."); + } + + if (do_cj) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_part_drifted(ci, e)) + error("Interacting undrifted cells (parts)."); + + if (!cell_are_spart_drifted(cj, e)) + error("Interacting undrifted cells (sparts)."); + + /* Do any of the cells need to be sorted first? */ + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) + error("Interacting unsorted cell (parts)."); + + if (!(cj->stars.sorted & (1 << sid)) || + cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cell (sparts)."); + } + + if (do_ci || do_cj) runner_dopair_branch_stars_density(r, ci, cj); } - if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); + if (gettimer) TIMER_TOC(timer_dosub_pair_stars_density); } /** @@ -1387,7 +1442,9 @@ void runner_dosub_self_stars_density(struct runner *r, struct cell *ci, TIMER_TIC; /* Should we even bother? */ - if (ci->stars.count == 0 || !cell_is_active_stars(ci, r->e)) return; + if (ci->hydro.count == 0 || ci->stars.count == 0 || + !cell_is_active_stars(ci, r->e)) + return; /* Recurse? */ if (cell_can_recurse_in_self_stars_task(ci)) { @@ -1406,8 +1463,13 @@ void runner_dosub_self_stars_density(struct runner *r, struct cell *ci, /* Otherwise, compute self-interaction. */ else { + /* Drift the cell to the current timestep if needed. */ + if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell."); + runner_doself_branch_stars_density(r, ci); } if (gettimer) TIMER_TOC(timer_dosub_self_stars_density); } + +#endif // SWIFT_RUNNER_DOIACT_STARS_H diff --git a/src/scheduler.c b/src/scheduler.c index 28fb4146983ee40144ff30693453f7e3b27eac3b..2ae8f6785434af021b52dd2d6586b4e2dc5d68bb 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -408,7 +408,7 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Reset the redo flag. */ redo = 0; - /* Non-splittable task? */ + /* Empty task? */ if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL) || t->ci->hydro.count == 0 || (t->cj != NULL && t->cj->hydro.count == 0)) { t->type = task_type_none; @@ -489,6 +489,12 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { double shift[3]; const int sid = space_getsid(s->space, &ci, &cj, shift); +#ifdef SWIFT_DEBUG_CHECKS + if (sid != t->flags) + error("Got pair task with incorrect flags: sid=%d flags=%lld", sid, + t->flags); +#endif + /* Should this task be split-up? */ if (cell_can_split_pair_hydro_task(ci) && cell_can_split_pair_hydro_task(cj)) { @@ -883,7 +889,7 @@ static void scheduler_splittask_stars(struct task *t, struct scheduler *s) { /* Reset the redo flag. */ redo = 0; - /* Non-splittable task? */ + /* Empty task? */ if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL) || t->ci->stars.count == 0 || (t->cj != NULL && t->cj->stars.count == 0)) { t->type = task_type_none; @@ -1835,6 +1841,11 @@ void scheduler_reweight(struct scheduler *s, int verbose) { (sizeof(int) * 8 - intrinsics_clz(t->ci->hydro.count)); break; + case task_type_stars_sort: + cost = wscale * intrinsics_popcount(t->flags) * scount_i * + (sizeof(int) * 8 - intrinsics_clz(t->ci->stars.count)); + break; + case task_type_self: if (t->subtype == task_subtype_grav) cost = 1.f * (wscale * gcount_i) * gcount_i; @@ -2124,6 +2135,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_kick2: case task_type_stars_ghost: case task_type_logger: + case task_type_stars_sort: case task_type_timestep: qid = t->ci->super->owner; break; diff --git a/src/serial_io.c b/src/serial_io.c index 48d9838954e318800870c5d8d4ec9b462329c898..8ccd2b5fcffd309eb6b4e465dd935922302f4025 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -500,6 +500,23 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, error("ICs dimensionality (%dD) does not match code dimensionality (%dD)", dimension, (int)hydro_dimension); + /* Check whether the number of files is specified (if the info exists) */ + const hid_t hid_files = H5Aexists(h_grp, "NumFilesPerSnapshot"); + int num_files = 1; + if (hid_files < 0) + error( + "Error while testing the existance of 'NumFilesPerSnapshot' " + "attribute"); + if (hid_files > 0) + io_read_attribute(h_grp, "NumFilesPerSnapshot", INT, &num_files); + if (num_files != 1) + error( + "ICs are split over multiples files (%d). SWIFT cannot handle this " + "case. The script /tools/combine_ics.py is availalbe in the " + "repository " + "to combine files into a valid input file.", + num_files); + /* Read the relevant information and print status */ int flag_entropy_temp[6]; io_read_attribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp); diff --git a/src/single_io.c b/src/single_io.c index 780978fe94ca6692abe78bea812fad38d4869b58..e04e4e9ea9fcea67fb64db6b5756d87d692e6d57 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -404,6 +404,21 @@ void read_ic_single(const char* fileName, error("ICs dimensionality (%dD) does not match code dimensionality (%dD)", dimension, (int)hydro_dimension); + /* Check whether the number of files is specified (if the info exists) */ + const hid_t hid_files = H5Aexists(h_grp, "NumFilesPerSnapshot"); + int num_files = 1; + if (hid_files < 0) + error( + "Error while testing the existance of 'NumFilesPerSnapshot' attribute"); + if (hid_files > 0) + io_read_attribute(h_grp, "NumFilesPerSnapshot", INT, &num_files); + if (num_files != 1) + error( + "ICs are split over multiples files (%d). SWIFT cannot handle this " + "case. The script /tools/combine_ics.py is availalbe in the repository " + "to combine files into a valid input file.", + num_files); + /* Read the relevant information and print status */ int flag_entropy_temp[6]; io_read_attribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp); diff --git a/src/space.c b/src/space.c index b924c4360ec241e3288fd9f98cf81fd8a7d1f2ee..2776e549293b98d6e326d914ff33f00b83afb7a3 100644 --- a/src/space.c +++ b/src/space.c @@ -182,6 +182,7 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, space_recycle_list(s, cell_rec_begin, cell_rec_end, multipole_rec_begin, multipole_rec_end); c->hydro.sorts = NULL; + c->stars.sorts = NULL; c->nr_tasks = 0; c->grav.nr_mm_tasks = 0; c->hydro.density = NULL; @@ -192,7 +193,9 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->hydro.dx_max_part = 0.0f; c->hydro.dx_max_sort = 0.0f; c->stars.dx_max_part = 0.f; + c->stars.dx_max_sort = 0.f; c->hydro.sorted = 0; + c->stars.sorted = 0; c->hydro.count = 0; c->hydro.count_total = 0; c->hydro.updated = 0; @@ -221,6 +224,7 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->end_force = NULL; c->hydro.drift = NULL; c->grav.drift = NULL; + c->grav.drift_out = NULL; c->hydro.cooling = NULL; c->sourceterms = NULL; c->grav.long_range = NULL; @@ -235,12 +239,14 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->grav.parts = NULL; c->stars.parts = NULL; c->hydro.do_sub_sort = 0; + c->stars.do_sub_sort = 0; c->grav.do_sub_drift = 0; c->hydro.do_sub_drift = 0; c->hydro.ti_end_min = -1; c->hydro.ti_end_max = -1; c->grav.ti_end_min = -1; c->grav.ti_end_max = -1; + c->stars.ti_end_min = -1; #ifdef SWIFT_DEBUG_CHECKS c->cellID = 0; #endif @@ -251,6 +257,11 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, free(c->hydro.sort[i]); c->hydro.sort[i] = NULL; } + if (c->stars.sort[i] != NULL) { + free(c->stars.sort[i]); + c->stars.sort[i] = NULL; + } + } #if WITH_MPI c->mpi.tag = -1; @@ -298,8 +309,6 @@ void space_regrid(struct space *s, int verbose) { const ticks tic = getticks(); const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0; - message("REGRID!!"); - /* Run through the cells and get the current h_max. */ // tic = getticks(); float h_max = s->cell_min / kernel_gamma / space_stretch; @@ -2363,11 +2372,16 @@ void space_gparts_sort(struct gpart *gparts, struct part *parts, */ void space_map_clearsort(struct cell *c, void *data) { - for (int i = 0; i < 13; i++) + for (int i = 0; i < 13; i++) { if (c->hydro.sort[i] != NULL) { free(c->hydro.sort[i]); c->hydro.sort[i] = NULL; } + if (c->stars.sort[i] != NULL) { + free(c->stars.sort[i]); + c->stars.sort[i] = NULL; + } + } } /** @@ -2538,6 +2552,7 @@ void space_split_recursive(struct space *s, struct cell *c, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; struct part *parts = c->hydro.parts; struct gpart *gparts = c->grav.parts; struct spart *sparts = c->stars.parts; @@ -2647,12 +2662,14 @@ void space_split_recursive(struct space *s, struct cell *c, cp->hydro.dx_max_sort = 0.f; cp->stars.h_max = 0.f; cp->stars.dx_max_part = 0.f; + cp->stars.dx_max_sort = 0.f; cp->nodeID = c->nodeID; cp->parent = c; cp->super = NULL; cp->hydro.super = NULL; cp->grav.super = NULL; cp->hydro.do_sub_sort = 0; + cp->stars.do_sub_sort = 0; cp->grav.do_sub_drift = 0; cp->hydro.do_sub_drift = 0; #ifdef WITH_MPI @@ -2702,6 +2719,7 @@ void space_split_recursive(struct space *s, struct cell *c, ti_gravity_end_min = min(ti_gravity_end_min, cp->grav.ti_end_min); ti_gravity_end_max = max(ti_gravity_end_max, cp->grav.ti_end_max); ti_gravity_beg_max = max(ti_gravity_beg_max, cp->grav.ti_beg_max); + ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); /* Increase the depth */ if (cp->maxdepth > maxdepth) maxdepth = cp->maxdepth; @@ -2828,6 +2846,7 @@ void space_split_recursive(struct space *s, struct cell *c, timebin_t hydro_time_bin_min = num_time_bins, hydro_time_bin_max = 0; timebin_t gravity_time_bin_min = num_time_bins, gravity_time_bin_max = 0; + timebin_t stars_time_bin_min = num_time_bins; /* parts: Get dt_min/dt_max and h_max. */ for (int k = 0; k < count; k++) { @@ -2871,6 +2890,8 @@ void space_split_recursive(struct space *s, struct cell *c, #endif gravity_time_bin_min = min(gravity_time_bin_min, sparts[k].time_bin); gravity_time_bin_max = max(gravity_time_bin_max, sparts[k].time_bin); + stars_time_bin_min = min(stars_time_bin_min, sparts[k].time_bin); + stars_h_max = max(stars_h_max, sparts[k].h); /* Reset x_diff */ @@ -2888,6 +2909,7 @@ void space_split_recursive(struct space *s, struct cell *c, ti_gravity_end_max = get_integer_time_end(ti_current, gravity_time_bin_max); ti_gravity_beg_max = get_integer_time_begin(ti_current + 1, gravity_time_bin_max); + ti_stars_end_min = get_integer_time_end(ti_current, stars_time_bin_min); /* Construct the multipole and the centre of mass*/ if (s->with_self_gravity) { @@ -2925,6 +2947,7 @@ void space_split_recursive(struct space *s, struct cell *c, c->grav.ti_end_min = ti_gravity_end_min; c->grav.ti_end_max = ti_gravity_end_max; c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.ti_end_min = ti_stars_end_min; c->stars.h_max = stars_h_max; c->maxdepth = maxdepth; @@ -3128,8 +3151,10 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { /* Init some things in the cell we just got. */ for (int j = 0; j < nr_cells; j++) { - for (int k = 0; k < 13; k++) + for (int k = 0; k < 13; k++) { if (cells[j]->hydro.sort[k] != NULL) free(cells[j]->hydro.sort[k]); + if (cells[j]->stars.sort[k] != NULL) free(cells[j]->stars.sort[k]); + } struct gravity_tensors *temp = cells[j]->grav.multipole; bzero(cells[j], sizeof(struct cell)); cells[j]->grav.multipole = temp; @@ -3150,11 +3175,16 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { void space_free_buff_sort_indices(struct space *s) { for (struct cell *finger = s->cells_sub; finger != NULL; finger = finger->next) { - for (int k = 0; k < 13; k++) + for (int k = 0; k < 13; k++) { if (finger->hydro.sort[k] != NULL) { free(finger->hydro.sort[k]); finger->hydro.sort[k] = NULL; } + if (finger->stars.sort[k] != NULL) { + free(finger->stars.sort[k]); + finger->stars.sort[k] = NULL; + } + } } } diff --git a/src/stars/Default/stars_part.h b/src/stars/Default/stars_part.h index 1922cc0e260a3c0f2052649a87252019514e637f..32006a15b67b9e97be7aac1a86ec45d369bcc1a0 100644 --- a/src/stars/Default/stars_part.h +++ b/src/stars/Default/stars_part.h @@ -41,6 +41,9 @@ struct spart { /* Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /* Offset between current position and position at last tree rebuild. */ + float x_diff_sort[3]; + /*! Particle velocity. */ float v[3]; diff --git a/src/task.c b/src/task.c index 52c62c67f16bea15d98a7d90893c63d0a60cea79..a90482b26925f0602a870e5e4847c58b53fe02cc 100644 --- a/src/task.c +++ b/src/task.c @@ -79,7 +79,8 @@ const char *taskID_names[task_type_count] = {"none", "logger", "stars_ghost_in", "stars_ghost", - "stars_ghost_out"}; + "stars_ghost_out", + "stars_sort"}; /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { @@ -148,6 +149,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_all; case task_type_stars_ghost: + case task_type_stars_sort: return task_action_spart; break; @@ -343,11 +345,17 @@ void task_unlock(struct task *t) { cell_gunlocktree(ci); break; + case task_type_stars_sort: + cell_sunlocktree(ci); + break; + case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { cell_gunlocktree(ci); cell_munlocktree(ci); + } else if (subtype == task_subtype_stars_density) { + cell_sunlocktree(ci); } else { cell_unlocktree(ci); } @@ -360,6 +368,9 @@ void task_unlock(struct task *t) { cell_gunlocktree(cj); cell_munlocktree(ci); cell_munlocktree(cj); + } else if (subtype == task_subtype_stars_density) { + cell_sunlocktree(ci); + cell_sunlocktree(cj); } else { cell_unlocktree(ci); cell_unlocktree(cj); @@ -447,6 +458,11 @@ int task_lock(struct task *t) { if (cell_locktree(ci) != 0) return 0; break; + case task_type_stars_sort: + if (ci->stars.hold) return 0; + if (cell_slocktree(ci) != 0) return 0; + break; + case task_type_drift_gpart: case task_type_grav_mesh: if (ci->grav.phold) return 0; @@ -464,7 +480,11 @@ int task_lock(struct task *t) { cell_gunlocktree(ci); return 0; } + } else if (subtype == task_subtype_stars_density) { + if (ci->stars.hold) return 0; + if (cell_slocktree(ci) != 0) return 0; } else { + if (ci->hydro.hold) return 0; if (cell_locktree(ci) != 0) return 0; } break; @@ -488,6 +508,13 @@ int task_lock(struct task *t) { cell_munlocktree(ci); return 0; } + } else if (subtype == task_subtype_stars_density) { + if (ci->stars.hold || cj->stars.hold) return 0; + if (cell_slocktree(ci) != 0) return 0; + if (cell_slocktree(cj) != 0) { + cell_sunlocktree(ci); + return 0; + } } else { /* Lock the parts in both cells */ if (ci->hydro.hold || cj->hydro.hold) return 0; diff --git a/src/task.h b/src/task.h index 64b611534ed10e3c34b628e2c733a508e5d42a1c..994b2b14c05965b71e877feac5cb9827a1d1b4bb 100644 --- a/src/task.h +++ b/src/task.h @@ -72,6 +72,7 @@ enum task_types { task_type_stars_ghost_in, task_type_stars_ghost, task_type_stars_ghost_out, + task_type_stars_sort, task_type_count } __attribute__((packed)); diff --git a/src/timers.c b/src/timers.c index 898c833c3b6764f05ed9205efa0db6220e911a7e..ccec0a9657d3394de93055992ae4b18c05794a30 100644 --- a/src/timers.c +++ b/src/timers.c @@ -95,6 +95,7 @@ const char* timers_names[timer_count] = { "dosubpair_stars_density", "dosub_self_stars_density", "logger", + "do_stars_sort", }; /* File to store the timers */ diff --git a/src/timers.h b/src/timers.h index c43f0154d2aaaa9d1c5aed3ad51b912e4fc5d751..48ca1e2763302a24356d15ceeba7ed982a9f169e 100644 --- a/src/timers.h +++ b/src/timers.h @@ -93,9 +93,12 @@ enum { timer_dostars_ghost, timer_doself_subset_stars_density, timer_dopair_subset_stars_density, - timer_dosubpair_stars_density, + timer_dosub_pair_stars_density, timer_dosub_self_stars_density, + timer_dosub_subset_pair_stars_density, + timer_dosub_subset_self_stars_density, timer_logger, + timer_do_stars_sort, timer_count, }; diff --git a/src/tools.c b/src/tools.c index ff33afbca3fdc97ede61ff09998d8dc27bf0154b..c0400aa7b42322fce276a5e788af7bcb9e7f3625 100644 --- a/src/tools.c +++ b/src/tools.c @@ -714,7 +714,7 @@ int compare_values(double a, double b, double threshold, double *absDiff, * * @return 1 if difference found, 0 otherwise */ -int compare_particles(struct part a, struct part b, double threshold) { +int compare_particles(struct part *a, struct part *b, double threshold) { #ifdef GADGET2_SPH @@ -722,117 +722,117 @@ int compare_particles(struct part a, struct part b, double threshold) { double absDiff = 0.0, absSum = 0.0, relDiff = 0.0; for (int k = 0; k < 3; k++) { - if (compare_values(a.x[k], b.x[k], threshold, &absDiff, &absSum, + if (compare_values(a->x[k], b->x[k], threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for x[%d] of " "particle %lld.", - relDiff, threshold, k, a.id); - message("a = %e, b = %e", a.x[k], b.x[k]); + relDiff, threshold, k, a->id); + message("a = %e, b = %e", a->x[k], b->x[k]); result = 1; } } for (int k = 0; k < 3; k++) { - if (compare_values(a.v[k], b.v[k], threshold, &absDiff, &absSum, + if (compare_values(a->v[k], b->v[k], threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for v[%d] of " "particle %lld.", - relDiff, threshold, k, a.id); - message("a = %e, b = %e", a.v[k], b.v[k]); + relDiff, threshold, k, a->id); + message("a = %e, b = %e", a->v[k], b->v[k]); result = 1; } } for (int k = 0; k < 3; k++) { - if (compare_values(a.a_hydro[k], b.a_hydro[k], threshold, &absDiff, &absSum, - &relDiff)) { + if (compare_values(a->a_hydro[k], b->a_hydro[k], threshold, &absDiff, + &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for a_hydro[%d] " "of particle %lld.", - relDiff, threshold, k, a.id); - message("a = %e, b = %e", a.a_hydro[k], b.a_hydro[k]); + relDiff, threshold, k, a->id); + message("a = %e, b = %e", a->a_hydro[k], b->a_hydro[k]); result = 1; } } - if (compare_values(a.rho, b.rho, threshold, &absDiff, &absSum, &relDiff)) { + if (compare_values(a->rho, b->rho, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for rho of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.rho, b.rho); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->rho, b->rho); result = 1; } - if (compare_values(a.density.rho_dh, b.density.rho_dh, threshold, &absDiff, + if (compare_values(a->density.rho_dh, b->density.rho_dh, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for rho_dh of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.density.rho_dh, b.density.rho_dh); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->density.rho_dh, b->density.rho_dh); result = 1; } - if (compare_values(a.density.wcount, b.density.wcount, threshold, &absDiff, + if (compare_values(a->density.wcount, b->density.wcount, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for wcount of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.density.wcount, b.density.wcount); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->density.wcount, b->density.wcount); result = 1; } - if (compare_values(a.density.wcount_dh, b.density.wcount_dh, threshold, + if (compare_values(a->density.wcount_dh, b->density.wcount_dh, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for wcount_dh of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.density.wcount_dh, b.density.wcount_dh); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->density.wcount_dh, b->density.wcount_dh); result = 1; } - if (compare_values(a.force.h_dt, b.force.h_dt, threshold, &absDiff, &absSum, + if (compare_values(a->force.h_dt, b->force.h_dt, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for h_dt of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.force.h_dt, b.force.h_dt); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->force.h_dt, b->force.h_dt); result = 1; } - if (compare_values(a.force.v_sig, b.force.v_sig, threshold, &absDiff, &absSum, - &relDiff)) { + if (compare_values(a->force.v_sig, b->force.v_sig, threshold, &absDiff, + &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for v_sig of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.force.v_sig, b.force.v_sig); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->force.v_sig, b->force.v_sig); result = 1; } - if (compare_values(a.entropy_dt, b.entropy_dt, threshold, &absDiff, &absSum, + if (compare_values(a->entropy_dt, b->entropy_dt, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for entropy_dt of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.entropy_dt, b.entropy_dt); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->entropy_dt, b->entropy_dt); result = 1; } - if (compare_values(a.density.div_v, b.density.div_v, threshold, &absDiff, + if (compare_values(a->density.div_v, b->density.div_v, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for div_v of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.density.div_v, b.density.div_v); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->density.div_v, b->density.div_v); result = 1; } for (int k = 0; k < 3; k++) { - if (compare_values(a.density.rot_v[k], b.density.rot_v[k], threshold, + if (compare_values(a->density.rot_v[k], b->density.rot_v[k], threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for rot_v[%d] " "of particle %lld.", - relDiff, threshold, k, a.id); - message("a = %e, b = %e", a.density.rot_v[k], b.density.rot_v[k]); + relDiff, threshold, k, a->id); + message("a = %e, b = %e", a->density.rot_v[k], b->density.rot_v[k]); result = 1; } } diff --git a/src/tools.h b/src/tools.h index 5ec2ca42810e1cb733e1aa674bd1c94ac5c569bd..22eba1519ebf80673cb2d8540791e6b4d092bab0 100644 --- a/src/tools.h +++ b/src/tools.h @@ -54,7 +54,7 @@ void gravity_n2(struct gpart *gparts, const int gcount, const struct gravity_props *gravity_properties, float rlr); int compare_values(double a, double b, double threshold, double *absDiff, double *absSum, double *relDiff); -int compare_particles(struct part a, struct part b, double threshold); +int compare_particles(struct part *a, struct part *b, double threshold); long get_maxrss(void); diff --git a/tests/test125cells.c b/tests/test125cells.c index cfe7e18b07b30c309531642a769241841de3a593..5a9c4ea9511b5d75a3098f7997b83607cdcbd715 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -663,7 +663,7 @@ int main(int argc, char *argv[]) { /* First, sort stuff */ for (int j = 0; j < 125; ++j) - runner_do_sort(&runner, cells[j], 0x1FFF, 0, 0); + runner_do_hydro_sort(&runner, cells[j], 0x1FFF, 0, 0); /* Do the density calculation */ diff --git a/tests/test27cells.c b/tests/test27cells.c index 6930638efe28aafab1c614e8ce71b353eb0c5b8f..d100e2e30f0bb1452b3366ddde51dbae0575d67a 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -492,7 +492,7 @@ int main(int argc, char *argv[]) { runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0); - runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); + runner_do_hydro_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); } } } diff --git a/tests/test27cellsStars.c b/tests/test27cellsStars.c index 3875bf75b1bb315bf48acae13b9553c689416a18..0377fc49edfedc8b1d9ce0630821622117187c9b 100644 --- a/tests/test27cellsStars.c +++ b/tests/test27cellsStars.c @@ -164,6 +164,7 @@ struct cell *make_cell(size_t n, size_t n_stars, double *offset, double size, cell->stars.count = scount; cell->hydro.dx_max_part = 0.; cell->hydro.dx_max_sort = 0.; + cell->stars.dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; cell->width[2] = size; @@ -174,8 +175,10 @@ struct cell *make_cell(size_t n, size_t n_stars, double *offset, double size, cell->hydro.ti_old_part = 8; cell->hydro.ti_end_min = 8; cell->hydro.ti_end_max = 8; + cell->grav.ti_old_part = 8; cell->grav.ti_end_min = 8; cell->grav.ti_end_max = 8; + cell->stars.ti_end_min = 8; cell->nodeID = NODE_ID; shuffle_particles(cell->hydro.parts, cell->hydro.count); @@ -414,7 +417,8 @@ int main(int argc, char *argv[]) { runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0); - runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); + runner_do_hydro_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); + runner_do_stars_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); } } } diff --git a/tests/testActivePair.c b/tests/testActivePair.c index a4eb04330fba7c984333fc7c705235223810ec4d..402a9a7ac416a0b2651f628eb32988d8ad62a14f 100644 --- a/tests/testActivePair.c +++ b/tests/testActivePair.c @@ -298,8 +298,8 @@ void test_pair_interactions(struct runner *runner, struct cell **ci, interaction_func vec_interaction, init_func init, finalise_func finalise) { - runner_do_sort(runner, *ci, 0x1FFF, 0, 0); - runner_do_sort(runner, *cj, 0x1FFF, 0, 0); + runner_do_hydro_sort(runner, *ci, 0x1FFF, 0, 0); + runner_do_hydro_sort(runner, *cj, 0x1FFF, 0, 0); /* Zero the fields */ init(*ci, runner->e->cosmology, runner->e->hydro_properties); diff --git a/tests/testInteractions.c b/tests/testInteractions.c index 0241a46634bdd12fb8a89bc3912c3b160198c389..dae55a337642e1616e94119263ff8f1c2a617c89 100644 --- a/tests/testInteractions.c +++ b/tests/testInteractions.c @@ -197,10 +197,10 @@ int check_results(struct part serial_test_part, struct part *serial_parts, struct part vec_test_part, struct part *vec_parts, int count) { int result = 0; - result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD); + result += compare_particles(&serial_test_part, &vec_test_part, ACC_THRESHOLD); for (int i = 0; i < count; i++) - result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD); + result += compare_particles(&serial_parts[i], &vec_parts[i], ACC_THRESHOLD); return result; } diff --git a/tests/testOutputList.c b/tests/testOutputList.c index cd9c62c3abc7462406950e24fa330788b76ed249..dbc257aafdee85429d05dc517a86f496443ac0ed 100644 --- a/tests/testOutputList.c +++ b/tests/testOutputList.c @@ -36,9 +36,9 @@ const double time_values[Ntest] = { /* Expected values from file */ const double a_values[Ntest] = { - 0.5, - 0.1, 0.01, + 0.1, + 0.5, }; void test_no_cosmo(struct engine *e, char *name, int with_assert) { @@ -62,7 +62,7 @@ void test_no_cosmo(struct engine *e, char *name, int with_assert) { for (int i = 0; i < Ntest; i++) { /* Test last value */ if (with_assert) { - assert(abs(output_time - time_values[i]) < tol); + assert(fabs(output_time - time_values[i]) < tol); } /* Set current time */ @@ -98,7 +98,7 @@ void test_cosmo(struct engine *e, char *name, int with_assert) { for (int i = 0; i < Ntest; i++) { /* Test last value */ if (with_assert) { - assert(abs(output_time - a_values[i]) < tol); + assert(fabs(output_time - a_values[i]) < tol); } /* Set current time */ diff --git a/tests/testPeriodicBC.c b/tests/testPeriodicBC.c index e4a5a85845f92bfbecae85d9dc9453b38b9b7646..be83f20a58b17f9a5fdcf967cda9a678aab5b8a9 100644 --- a/tests/testPeriodicBC.c +++ b/tests/testPeriodicBC.c @@ -273,7 +273,7 @@ int check_results(struct part *serial_parts, struct part *vec_parts, int count, int result = 0; for (int i = 0; i < count; i++) - result += compare_particles(serial_parts[i], vec_parts[i], threshold); + result += compare_particles(&serial_parts[i], &vec_parts[i], threshold); return result; } @@ -505,8 +505,8 @@ int main(int argc, char *argv[]) { runner_do_drift_part(&runner, cells[i * (dim * dim) + j * dim + k], 0); - runner_do_sort(&runner, cells[i * (dim * dim) + j * dim + k], 0x1FFF, 0, - 0); + runner_do_hydro_sort(&runner, cells[i * (dim * dim) + j * dim + k], + 0x1FFF, 0, 0); } } } diff --git a/tools/analyse_runtime.py b/tools/analyse_runtime.py index 5cf2bbce7f44d5ddca6c6f6c2f372ead835bd569..e1b09a9903c804a20788f165ff28c90142ae38b1 100755 --- a/tools/analyse_runtime.py +++ b/tools/analyse_runtime.py @@ -93,7 +93,7 @@ labels = [ "engine_split:", "space_init", "engine_init", - "engine_repartition_trigger" + "engine_repartition_trigger:" ] is_rebuild = [ 1, diff --git a/tools/combine_ics.py b/tools/combine_ics.py index 447af1cc3d6d9c09e6322648a0dc5035382e857c..ac5680f9c70e3c7f8280b14a80a09d8c3140ae99 100755 --- a/tools/combine_ics.py +++ b/tools/combine_ics.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """ Usage: - combine_ics.py input_file.0.hdf5 merged_file.hdf5 + combine_ics.py input_file.0.hdf5 merged_file.hdf5 gzip_level This file combines Gadget-2 type 2 (i.e. hdf5) initial condition files into a single file that can be digested by SWIFT. @@ -11,6 +11,9 @@ the DM particles is handled. No unit conversions are applied nor are any scale-factors or h-factors changed. The script applies some compression and checksum filters to the output to save disk space. +The last argument `gzip_level` is used to specify the level of compression +to apply to all the fields in the file. Use 0 to cancel all coompression. +The default value is `4`. This file is part of SWIFT. Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) @@ -35,6 +38,11 @@ import sys import h5py as h5 import numpy as np +# Store the compression level +gzip_level = 4 +if sys.argc > 3: + gzip_level = sys.argv[3] + # First, we need to collect some information from the master file main_file_name = str(sys.argv[1])[:-7] print("Merging snapshots files with name", main_file_name) @@ -45,19 +53,19 @@ grp_header = master_file["/Header"] num_files = grp_header.attrs["NumFilesPerSnapshot"] tot_num_parts = grp_header.attrs["NumPart_Total"] -tot_num_parts_high_word = grp_header.attrs["NumPart_Total"] +tot_num_parts_high_word = grp_header.attrs["NumPart_Total_HighWord"] entropy_flag = grp_header.attrs["Flag_Entropy_ICs"] box_size = grp_header.attrs["BoxSize"] time = grp_header.attrs["Time"] # Combine the low- and high-words +tot_num_parts = tot_num_parts.astype(np.int64) for i in range(6): - tot_num_parts[i] += np.int64(tot_num_parts_high_word[i]) << 32 + tot_num_parts[i] += (np.int64(tot_num_parts_high_word[i]) << 32) # Some basic information print("Reading", tot_num_parts, "particles from", num_files, "files.") - # Check whether there is a mass table DM_mass = 0.0 mtable = grp_header.attrs.get("MassTable") @@ -105,7 +113,7 @@ def create_set(grp, name, size, dim, dtype): dtype=dtype, chunks=True, compression="gzip", - compression_opts=4, + compression_opts=gzip_level, shuffle=True, fletcher32=True, maxshape=(size,), @@ -117,7 +125,7 @@ def create_set(grp, name, size, dim, dtype): dtype=dtype, chunks=True, compression="gzip", - compression_opts=4, + compression_opts=gzip_level, shuffle=True, fletcher32=True, maxshape=(size, dim), diff --git a/tools/task_plots/analyse_tasks.py b/tools/task_plots/analyse_tasks.py index 0b6439ff481367b8fa1748d89a8396bef4c2c006..ca41970c683a1680e9d1054c9d70d6370992a05e 100755 --- a/tools/task_plots/analyse_tasks.py +++ b/tools/task_plots/analyse_tasks.py @@ -92,9 +92,11 @@ TASKTYPES = [ "cooling", "star_formation", "sourceterms", + "logger", "stars_ghost_in", "stars_ghost", "stars_ghost_out", + "stars_sort", "count", ] diff --git a/tools/task_plots/plot_tasks.py b/tools/task_plots/plot_tasks.py index e9b676d92832e5c4667aa25bd9458472466500cd..1fe7bcbd11f30ff17051bc9a7ae789439df8b9e9 100755 --- a/tools/task_plots/plot_tasks.py +++ b/tools/task_plots/plot_tasks.py @@ -177,9 +177,11 @@ TASKTYPES = [ "cooling", "star_formation", "sourceterms", + "logger", "stars_ghost_in", "stars_ghost", "stars_ghost_out", + "stars_sort", "count", ]