diff --git a/.gitignore b/.gitignore index 619c2e78a22ce2752d0495179384b078868761ef..3f2555a8877cf520c3b65bdcfc9beb3ed7a1f8f2 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,10 @@ doc/Doxyfile examples/swift examples/swift_mpi +examples/fof +examples/fof_mpi examples/*/*/*.xmf +examples/*/*/*.dat examples/*/*/*.png examples/*/*/*.mp4 examples/*/*/*.txt @@ -37,7 +40,10 @@ examples/*/*/memuse_report-step*.dat examples/*/*/memuse_report-step*.log examples/*/*/restart/* examples/*/*/used_parameters.yml -examples/*/*/log* +examples/*/stf_output* +examples/*/stf.* +examples/*/fof_output* +examples/*/log* examples/*/*/unused_parameters.yml examples/*/*.mpg examples/*/*/gravity_checks_*.dat @@ -145,6 +151,7 @@ tests/testOutputList tests/testCbrt tests/testFormat.sh tests/testCooling +tests/testHashmap tests/*.png tests/*.txt diff --git a/README b/README index 2e26f9d997f7396109638a5f44369922ebc4b275..323033003add088eda1ca74e9d33ccb99a361694 100644 --- a/README +++ b/README @@ -36,6 +36,8 @@ Parameters: -s, --hydro Run with hydrodynamics. -S, --stars Run with stars. -B, --black-holes Run with black holes. + -u, --fof Run Friends-of-Friends algorithm and + black holes seeding. -x, --velociraptor Run with structure finding. --limiter Run with time-step limiter. diff --git a/configure.ac b/configure.ac index 6c617996d62eec4cd4ccf5da37881150f00681c1..a16131c8bb8de1ab9c71faa40b326ceeb757f9ef 100644 --- a/configure.ac +++ b/configure.ac @@ -1088,7 +1088,7 @@ if test "x$with_velociraptor" != "xno"; then [velociraptor], [InitVelociraptor], [AC_DEFINE([HAVE_VELOCIRAPTOR],1,[The VELOCIraptor library appears to be present.])], - [AC_MSG_ERROR(Cannot find VELOCIraptor library at $with_velociraptor)], + [AC_MSG_ERROR(Cannot find VELOCIraptor library at $with_velociraptor or incompatible HDF5 library loaded.)], [$VELOCIRAPTOR_LIBS $HDF5_LDFLAGS $HDF5_LIBS $GSL_LIBS] ) fi diff --git a/doc/RTD/source/CommandLineOptions/index.rst b/doc/RTD/source/CommandLineOptions/index.rst index 94fc72ae38074c165ca1e28ff175961b9a9b9a87..dd8675add7cf03f34298ed85a6c2f2fefc650d58 100644 --- a/doc/RTD/source/CommandLineOptions/index.rst +++ b/doc/RTD/source/CommandLineOptions/index.rst @@ -31,6 +31,8 @@ can be found by typing ``./swift -h``:: -s, --hydro Run with hydrodynamics. -S, --stars Run with stars. -B, --black-holes Run with black holes. + -u, --fof Run Friends-of-Friends algorithm and + black holes seeding. -x, --velociraptor Run with structure finding. --limiter Run with time-step limiter. diff --git a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml index 8649e34a77f68c2f8b14b4619ce0fd4a616390fc..a7362bf88a3898f687667ea110153ece4a8ade08 100644 --- a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml @@ -51,6 +51,15 @@ SPH: minimal_temperature: 100.0 # (internal units) initial_temperature: 268.7 +# Parameters for the Friends-Of-Friends algorithm +FOF: + basename: fof_output # Filename for the FOF outputs. + min_group_size: 256 # The minimum no. of particles required for a group. + linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. + black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). + scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + Scheduler: max_top_level_cells: 16 cell_split_size: 100 @@ -154,6 +163,7 @@ EAGLEFeedback: # EAGLE AGN model EAGLEAGN: + subgrid_seed_mass_Msun: 1.5e5 # Black hole subgrid mass at creation time in solar masses. max_eddington_fraction: 1. # Maximal allowed accretion rate in units of the Eddington rate. radiative_efficiency: 0.1 # Fraction of the accreted mass that gets radiated. coupling_efficiency: 0.15 # Fraction of the radiated energy that couples to the gas in feedback events. diff --git a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml index c0e6ffe9aabf9d6ea84b034a81543e0b347e3cc5..ed1782127f7d80ba6d6802c256eeb9db9724d04f 100644 --- a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml @@ -51,6 +51,15 @@ SPH: minimal_temperature: 100.0 # (internal units) initial_temperature: 268.7 +# Parameters for the Friends-Of-Friends algorithm +FOF: + basename: fof_output # Filename for the FOF outputs. + min_group_size: 256 # The minimum no. of particles required for a group. + linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. + black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). + scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + Scheduler: max_top_level_cells: 16 cell_split_size: 100 @@ -155,6 +164,7 @@ EAGLEFeedback: # EAGLE AGN model EAGLEAGN: + subgrid_seed_mass_Msun: 1.5e5 # Black hole subgrid mass at creation time in solar masses. max_eddington_fraction: 1. # Maximal allowed accretion rate in units of the Eddington rate. radiative_efficiency: 0.1 # Fraction of the accreted mass that gets radiated. coupling_efficiency: 0.15 # Fraction of the radiated energy that couples to the gas in feedback events. diff --git a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml index 082422ec6acba19acdc02c293a82b0fef3e0e23c..ca8eec7246bc0d3a0d6b10f8f75f0db6741f4f5b 100644 --- a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml +++ b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml @@ -51,6 +51,15 @@ SPH: minimal_temperature: 100.0 # (internal units) initial_temperature: 268.7 +# Parameters for the Friends-Of-Friends algorithm +FOF: + basename: fof_output # Filename for the FOF outputs. + min_group_size: 256 # The minimum no. of particles required for a group. + linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. + black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). + scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + Scheduler: max_top_level_cells: 32 cell_split_size: 100 @@ -155,6 +164,7 @@ EAGLEFeedback: # EAGLE AGN model EAGLEAGN: + subgrid_seed_mass_Msun: 1.5e5 # Black hole subgrid mass at creation time in solar masses. max_eddington_fraction: 1. # Maximal allowed accretion rate in units of the Eddington rate. radiative_efficiency: 0.1 # Fraction of the accreted mass that gets radiated. coupling_efficiency: 0.15 # Fraction of the radiated energy that couples to the gas in feedback events. diff --git a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml index 5b41f0861d72a2c8dc315df313dc737c5598a818..434840636aceca6f6372cecfc5529a743a5cdc57 100644 --- a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml @@ -51,13 +51,22 @@ SPH: CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. minimal_temperature: 100 # (internal units) +# Parameters for the Friends-Of-Friends algorithm +FOF: + basename: fof_output # Filename for the FOF outputs. + min_group_size: 256 # The minimum no. of particles required for a group. + linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. + black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). + scale_factor_first: 0.91 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + # Parameters related to the initial conditions InitialConditions: file_name: ./EAGLE_ICs_12.hdf5 # The file to read periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget - + EAGLEChemistry: # Solar abundances init_abundance_metal: 0.014 init_abundance_Hydrogen: 0.70649785 @@ -143,6 +152,7 @@ EAGLEFeedback: # EAGLE AGN model EAGLEAGN: + subgrid_seed_mass_Msun: 1.5e5 # Black hole subgrid mass at creation time in solar masses. max_eddington_fraction: 1. # Maximal allowed accretion rate in units of the Eddington rate. radiative_efficiency: 0.1 # Fraction of the accreted mass that gets radiated. coupling_efficiency: 0.15 # Fraction of the radiated energy that couples to the gas in feedback events. diff --git a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml index 218f5142dda91bff63e691d940916b8759c1178d..484eb885c4a47d92ebc82a978b1fe0d3973a00ff 100644 --- a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml @@ -58,6 +58,15 @@ SPH: CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. minimal_temperature: 100 # (internal units) +# Parameters for the Friends-Of-Friends algorithm +FOF: + basename: fof_output # Filename for the FOF outputs. + min_group_size: 256 # The minimum no. of particles required for a group. + linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. + black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). + scale_factor_first: 0.91 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + # Parameters related to the initial conditions InitialConditions: file_name: ./EAGLE_ICs_25.hdf5 # The file to read @@ -150,6 +159,7 @@ EAGLEFeedback: # EAGLE AGN model EAGLEAGN: + subgrid_seed_mass_Msun: 1.5e5 # Black hole subgrid mass at creation time in solar masses. max_eddington_fraction: 1. # Maximal allowed accretion rate in units of the Eddington rate. radiative_efficiency: 0.1 # Fraction of the accreted mass that gets radiated. coupling_efficiency: 0.15 # Fraction of the radiated energy that couples to the gas in feedback events. diff --git a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml index 9ab1214dc010225fd3e4d60a36fc8c295b1651f2..66df7929b9432eaad153dcd1a9b55da396f2e8be 100644 --- a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml +++ b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml @@ -53,6 +53,15 @@ SPH: CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. minimal_temperature: 100 # (internal units) +# Parameters for the Friends-Of-Friends algorithm +FOF: + basename: fof_output # Filename for the FOF outputs. + min_group_size: 256 # The minimum no. of particles required for a group. + linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. + black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). + scale_factor_first: 0.91 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + # Parameters related to the initial conditions InitialConditions: file_name: ./EAGLE_ICs_50.hdf5 # The file to read @@ -145,6 +154,7 @@ EAGLEFeedback: # EAGLE AGN model EAGLEAGN: + subgrid_seed_mass_Msun: 1.5e5 # Black hole subgrid mass at creation time in solar masses. max_eddington_fraction: 1. # Maximal allowed accretion rate in units of the Eddington rate. radiative_efficiency: 0.1 # Fraction of the accreted mass that gets radiated. coupling_efficiency: 0.15 # Fraction of the radiated energy that couples to the gas in feedback events. diff --git a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml index 1046fcd0fe1aed5c0a52ed35547282627fed22c9..5506ad242bff579ac4f6ec6ef23dbb8597eb65a9 100644 --- a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml +++ b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml @@ -62,13 +62,22 @@ SPH: CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. minimal_temperature: 100 # (internal units) +# Parameters for the Friends-Of-Friends algorithm +FOF: + basename: fof_output # Filename for the FOF outputs. + min_group_size: 256 # The minimum no. of particles required for a group. + linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. + black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). + scale_factor_first: 0.91 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + # Parameters related to the initial conditions InitialConditions: file_name: ./EAGLE_ICs_6.hdf5 # The file to read periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget - + EAGLEChemistry: # Solar abundances init_abundance_metal: 0.014 init_abundance_Hydrogen: 0.70649785 @@ -154,9 +163,9 @@ EAGLEFeedback: # EAGLE AGN model EAGLEAGN: + subgrid_seed_mass_Msun: 1.5e5 # Black hole subgrid mass at creation time in solar masses. max_eddington_fraction: 1. # Maximal allowed accretion rate in units of the Eddington rate. radiative_efficiency: 0.1 # Fraction of the accreted mass that gets radiated. coupling_efficiency: 0.15 # Fraction of the radiated energy that couples to the gas in feedback events. AGN_delta_T_K: 3.16228e8 # Change in temperature to apply to the gas particle in an AGN feedback event in Kelvin. AGN_num_ngb_to_heat: 1. # Target number of gas neighbours to heat in an AGN feedback event. - diff --git a/examples/Makefile.am b/examples/Makefile.am index 1c8415e1302df828133d0459d7dc9e9b4d73ffcb..61ef0972ddf4a92aa0d70a62943b196e965cca33 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -36,9 +36,12 @@ MPI_FLAGS = -DWITH_MPI $(PARMETIS_INCS) $(METIS_INCS) # Programs. bin_PROGRAMS = swift +bin_PROGRAMS += fof + # Build MPI versions as well? if HAVEMPI bin_PROGRAMS += swift_mpi +bin_PROGRAMS += fof_mpi endif # engine_policy_setaffinity is available? @@ -58,6 +61,16 @@ swift_mpi_SOURCES = main.c swift_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" swift_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a ../argparse/.libs/libargparse.a $(MPI_LIBS) $(EXTRA_LIBS) +# Sources for fof +fof_SOURCES = main_fof.c +fof_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DENGINE_POLICY="engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" +fof_LDADD = ../src/.libs/libswiftsim.a ../argparse/.libs/libargparse.a $(EXTRA_LIBS) + +# Sources for fof_mpi, do we need an affinity policy for MPI? +fof_mpi_SOURCES = main_fof.c +fof_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" +fof_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a ../argparse/.libs/libargparse.a $(MPI_LIBS) $(EXTRA_LIBS) + # Scripts to generate ICs EXTRA_DIST = Cooling/CoolingBox/coolingBox.yml Cooling/CoolingBox/plotEnergy.py Cooling/CoolingBox/makeIC.py Cooling/CoolingBox/run.sh Cooling/CoolingBox/getGlass.sh \ Cosmology/ConstantCosmoVolume/run.sh Cosmology/ConstantCosmoVolume/makeIC.py Cosmology/ConstantCosmoVolume/plotSolution.py Cosmology/ConstantCosmoVolume/constant_volume.yml \ diff --git a/examples/main.c b/examples/main.c index d6432370ddd11df401e66616ea26dfccb6b34a84..10e592109b075069286c74ba27508f5ba73818b1 100644 --- a/examples/main.c +++ b/examples/main.c @@ -99,6 +99,7 @@ int main(int argc, char *argv[]) { struct feedback_props feedback_properties; struct entropy_floor_properties entropy_floor; struct black_holes_props black_holes_properties; + struct fof_props fof_properties; struct part *parts = NULL; struct phys_const prog_const; struct space s; @@ -157,6 +158,7 @@ int main(int argc, char *argv[]) { int with_self_gravity = 0; int with_hydro = 0; int with_stars = 0; + int with_fof = 0; int with_star_formation = 0; int with_feedback = 0; int with_black_holes = 0; @@ -210,6 +212,10 @@ int main(int argc, char *argv[]) { OPT_BOOLEAN('S', "stars", &with_stars, "Run with stars.", NULL, 0, 0), OPT_BOOLEAN('B', "black-holes", &with_black_holes, "Run with black holes.", NULL, 0, 0), + OPT_BOOLEAN( + 'u', "fof", &with_fof, + "Run Friends-of-Friends algorithm to perform black hole seeding.", + NULL, 0, 0), OPT_BOOLEAN('x', "velociraptor", &with_structure_finding, "Run with structure finding.", NULL, 0, 0), OPT_BOOLEAN(0, "limiter", &with_limiter, "Run with time-step limiter.", @@ -243,12 +249,12 @@ int main(int argc, char *argv[]) { handle_cmdparam, (intptr_t)&cmdps, 0), OPT_BOOLEAN('r', "restart", &restart, "Continue using restart files.", NULL, 0, 0), - OPT_BOOLEAN('T', "timers", &with_verbose_timers, - "Print timers every time-step.", NULL, 0, 0), OPT_INTEGER('t', "threads", &nr_threads, "The number of threads to use on each MPI rank. Defaults to " "1 if not specified.", NULL, 0, 0), + OPT_INTEGER('T', "timers", &with_verbose_timers, + "Print timers every time-step.", NULL, 0, 0), OPT_INTEGER('v', "verbose", &verbose, "Run in verbose mode, in MPI mode 2 outputs from all ranks.", NULL, 0, 0), @@ -360,6 +366,22 @@ int main(int argc, char *argv[]) { return 1; } + if (with_fof && !with_self_gravity) { + if (myrank == 0) + printf( + "Error: Cannot perform FOF search without gravity, -g or -G must be " + "chosen.\n"); + return 1; + } + + if (with_fof && !with_black_holes) { + if (myrank == 0) + printf( + "Error: Cannot perform FOF seeding without black holes being in use, " + "-B must be chosen.\n"); + return 1; + } + if (!with_stars && with_star_formation) { if (myrank == 0) { argparse_usage(&argparse); @@ -817,6 +839,10 @@ int main(int argc, char *argv[]) { chemistry_init(params, &us, &prog_const, &chemistry); if (myrank == 0) chemistry_print(&chemistry); + /* Initialise the FOF properties */ + bzero(&fof_properties, sizeof(struct fof_props)); + if (with_fof) fof_init(&fof_properties, params, &prog_const, &us); + /* Be verbose about what happens next */ if (myrank == 0) message("Reading ICs from file '%s'", ICfileName); if (myrank == 0 && cleanup_h) @@ -1018,15 +1044,16 @@ int main(int argc, char *argv[]) { if (with_black_holes) engine_policies |= engine_policy_black_holes; if (with_structure_finding) engine_policies |= engine_policy_structure_finding; + if (with_fof) engine_policies |= engine_policy_fof; /* Initialize the engine with the space and policies. */ if (myrank == 0) clocks_gettime(&tic); - engine_init(&e, &s, params, N_total[0], N_total[1], N_total[2], + engine_init(&e, &s, params, N_total[0], N_total[1], N_total[2], N_total[3], engine_policies, talking, &reparttype, &us, &prog_const, &cosmo, &hydro_properties, &entropy_floor, &gravity_properties, &stars_properties, &black_holes_properties, &feedback_properties, &mesh, &potential, &cooling_func, - &starform, &chemistry); + &starform, &chemistry, &fof_properties); engine_config(0, &e, params, nr_nodes, myrank, nr_threads, with_aff, talking, restart_file); @@ -1086,7 +1113,8 @@ int main(int argc, char *argv[]) { #endif /* Initialise the particles */ - engine_init_particles(&e, flag_entropy_ICs, clean_smoothing_length_values); + engine_init_particles(&e, flag_entropy_ICs, clean_smoothing_length_values, + 1); /* Write the state of the system before starting time integration. */ #ifdef WITH_LOGGER diff --git a/examples/main_fof.c b/examples/main_fof.c new file mode 100644 index 0000000000000000000000000000000000000000..eeeca3ba9694e5ba6fe6f75006592495fc506c7e --- /dev/null +++ b/examples/main_fof.c @@ -0,0 +1,1093 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * Angus Lepper (angus.lepper@ed.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <errno.h> +#include <fenv.h> +#include <libgen.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* Local headers. */ +#include "argparse.h" +#include "swift.h" + +/* Engine policy flags. */ +#ifndef ENGINE_POLICY +#define ENGINE_POLICY engine_policy_none +#endif + +/* Global profiler. */ +struct profiler prof; + +/* Usage string. */ +static const char *const swift_usage[] = { + "swift [options] [[--] param-file]", + "swift [options] param-file", + "swift_mpi [options] [[--] param-file]", + "swift_mpi [options] param-file", + NULL, +}; + +/* Function to handle multiple -P arguments. */ +struct cmdparams { + const char *param[PARSER_MAX_NO_OF_PARAMS]; + int nparam; +}; + +static int handle_cmdparam(struct argparse *self, + const struct argparse_option *opt) { + struct cmdparams *cmdps = (struct cmdparams *)opt->data; + cmdps->param[cmdps->nparam] = *(char **)opt->value; + cmdps->nparam++; + return 1; +} + +/** + * @brief Main routine that loads a few particles and generates some output. + * + */ +int main(int argc, char *argv[]) { + + struct clocks_time tic, toc; + struct engine e; + + /* Structs used by the engine. Declare now to make sure these are always in + * scope. */ + struct chemistry_global_data chemistry; + struct cooling_function_data cooling_func; + struct cosmology cosmo; + struct external_potential potential; + struct star_formation starform; + struct pm_mesh mesh; + struct gpart *gparts = NULL; + struct gravity_props gravity_properties; + struct hydro_props hydro_properties; + struct stars_props stars_properties; + struct feedback_props feedback_properties; + struct entropy_floor_properties entropy_floor; + struct black_holes_props black_holes_properties; + struct fof_props fof_properties; + struct part *parts = NULL; + struct phys_const prog_const; + struct space s; + struct spart *sparts = NULL; + struct bpart *bparts = NULL; + struct unit_system us; + + int nr_nodes = 1, myrank = 0; + +#ifdef WITH_MPI + /* Start by initializing MPI. */ + int res = 0, prov = 0; + if ((res = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &prov)) != + MPI_SUCCESS) + error("Call to MPI_Init failed with error %i.", res); + if (prov != MPI_THREAD_MULTIPLE) + error( + "MPI does not provide the level of threading" + " required (MPI_THREAD_MULTIPLE)."); + if ((res = MPI_Comm_size(MPI_COMM_WORLD, &nr_nodes)) != MPI_SUCCESS) + error("MPI_Comm_size failed with error %i.", res); + if ((res = MPI_Comm_rank(MPI_COMM_WORLD, &myrank)) != MPI_SUCCESS) + error("Call to MPI_Comm_rank failed with error %i.", res); + + /* Make sure messages are stamped with the correct rank and step. */ + engine_rank = myrank; + engine_current_step = 0; + + if ((res = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)) != + MPI_SUCCESS) + error("Call to MPI_Comm_set_errhandler failed with error %i.", res); + if (myrank == 0) + printf("[0000] [00000.0] main: MPI is up and running with %i node(s).\n\n", + nr_nodes); + if (nr_nodes == 1) { + message("WARNING: you are running with one MPI rank."); + message("WARNING: you should use the non-MPI version of this program."); + } + fflush(stdout); + +#endif + + /* Welcome to SWIFT, you made the right choice */ + if (myrank == 0) greetings(); + + int with_aff = 0; + int dry_run = 0; + int dump_tasks = 0; + int dump_threadpool = 0; + int nsteps = -2; + int restart = 0; + int with_cosmology = 0; + int with_external_gravity = 0; + int with_cooling = 0; + int with_self_gravity = 0; + int with_hydro = 0; + int with_stars = 0; + int with_fof = 1; + int with_star_formation = 0; + int with_feedback = 0; + int with_black_holes = 0; + int with_fp_exceptions = 0; + int with_drift_all = 0; + int with_mpole_reconstruction = 0; + int with_structure_finding = 0; + int verbose = 0; + int nr_threads = 1; + int with_verbose_timers = 0; + char *output_parameters_filename = NULL; + char *cpufreqarg = NULL; + char *param_filename = NULL; + char restart_file[200] = ""; + unsigned long long cpufreq = 0; + struct cmdparams cmdps; + cmdps.nparam = 0; + cmdps.param[0] = NULL; + char *buffer = NULL; + + /* Parse the command-line parameters. */ + struct argparse_option options[] = { + OPT_HELP(), + + OPT_GROUP(" Simulation options:\n"), + OPT_BOOLEAN('b', "feedback", &with_feedback, "Run with stars feedback.", + NULL, 0, 0), + OPT_BOOLEAN('c', "cosmology", &with_cosmology, + "Run with cosmological time integration.", NULL, 0, 0), + OPT_BOOLEAN('C', "cooling", &with_cooling, + "Run with cooling (also switches on --with-temperature).", + NULL, 0, 0), + OPT_BOOLEAN('D', "drift-all", &with_drift_all, + "Always drift all particles even the ones far from active " + "particles. This emulates Gadget-[23] and GIZMO's default " + "behaviours.", + NULL, 0, 0), + OPT_BOOLEAN('F', "star-formation", &with_star_formation, + "Run with star formation.", NULL, 0, 0), + OPT_BOOLEAN('g', "external-gravity", &with_external_gravity, + "Run with an external gravitational potential.", NULL, 0, 0), + OPT_BOOLEAN('G', "self-gravity", &with_self_gravity, + "Run with self-gravity.", NULL, 0, 0), + OPT_BOOLEAN('M', "multipole-reconstruction", &with_mpole_reconstruction, + "Reconstruct the multipoles every time-step.", NULL, 0, 0), + OPT_BOOLEAN('s', "hydro", &with_hydro, "Run with hydrodynamics.", NULL, 0, + 0), + OPT_BOOLEAN('S', "stars", &with_stars, "Run with stars.", NULL, 0, 0), + OPT_BOOLEAN('x', "velociraptor", &with_structure_finding, + "Run with structure finding.", NULL, 0, 0), + + OPT_GROUP(" Control options:\n"), + OPT_BOOLEAN('a', "pin", &with_aff, + "Pin runners using processor affinity.", NULL, 0, 0), + OPT_BOOLEAN('d', "dry-run", &dry_run, + "Dry run. Read the parameter file, allocates memory but does " + "not read the particles from ICs. Exits before the start of " + "time integration. Checks the validity of parameters and IC " + "files as well as memory limits.", + NULL, 0, 0), + OPT_BOOLEAN('e', "fpe", &with_fp_exceptions, + "Enable floating-point exceptions (debugging mode).", NULL, 0, + 0), + OPT_STRING('f', "cpu-frequency", &cpufreqarg, + "Overwrite the CPU " + "frequency (Hz) to be used for time measurements.", + NULL, 0, 0), + OPT_INTEGER('n', "steps", &nsteps, + "Execute a fixed number of time steps. When unset use the " + "time_end parameter to stop.", + NULL, 0, 0), + OPT_STRING('o', "output-params", &output_parameters_filename, + "Generate a default output parameter file.", NULL, 0, 0), + OPT_STRING('P', "param", &buffer, + "Set parameter value, overiding the value read from the " + "parameter file. Can be used more than once {sec:par:value}.", + handle_cmdparam, (intptr_t)&cmdps, 0), + OPT_BOOLEAN('r', "restart", &restart, "Continue using restart files.", + NULL, 0, 0), + OPT_INTEGER('t', "threads", &nr_threads, + "The number of threads to use on each MPI rank. Defaults to " + "1 if not specified.", + NULL, 0, 0), + OPT_INTEGER('T', "timers", &with_verbose_timers, + "Print timers every time-step.", NULL, 0, 0), + OPT_BOOLEAN('u', "fof", &with_fof, "Run Friends-of-Friends algorithm.", + NULL, 0, 0), + OPT_INTEGER('v', "verbose", &verbose, + "Run in verbose mode, in MPI mode 2 outputs from all ranks.", + NULL, 0, 0), + OPT_INTEGER('y', "task-dumps", &dump_tasks, + "Time-step frequency at which task graphs are dumped.", NULL, + 0, 0), + OPT_INTEGER('Y', "threadpool-dumps", &dump_threadpool, + "Time-step frequency at which threadpool tasks are dumped.", + NULL, 0, 0), + OPT_END(), + }; + struct argparse argparse; + argparse_init(&argparse, options, swift_usage, 0); + argparse_describe(&argparse, "\nParameters:", + "\nSee the file examples/parameter_example.yml for an " + "example of parameter file."); + int nargs = argparse_parse(&argparse, argc, (const char **)argv); + + /* Need a parameter file. */ + if (nargs != 1) { + if (myrank == 0) argparse_usage(&argparse); + printf("\nError: no parameter file was supplied.\n"); + return 1; + } + param_filename = argv[0]; + + /* Checks of options. */ +#if !defined(HAVE_SETAFFINITY) || !defined(HAVE_LIBNUMA) + if (with_aff) { + printf("Error: no NUMA support for thread affinity\n"); + return 1; + } +#endif + +#ifndef HAVE_FE_ENABLE_EXCEPT + if (with_fp_exceptions) { + printf("Error: no support for floating point exceptions\n"); + return 1; + } +#endif + +#ifndef HAVE_VELOCIRAPTOR + if (with_structure_finding) { + printf("Error: VELOCIraptor is not available\n"); + return 1; + } +#endif + +#ifndef SWIFT_DEBUG_TASKS + if (dump_tasks) { + if (myrank == 0) { + message( + "WARNING: complete task dumps are only created when " + "configured with --enable-task-debugging."); + message(" Basic task statistics will be output."); + } + } +#endif + +#ifndef SWIFT_DEBUG_THREADPOOL + if (dump_threadpool) { + printf( + "Error: threadpool dumping is only possible if SWIFT was " + "configured with the --enable-threadpool-debugging option.\n"); + return 1; + } +#endif + + /* The CPU frequency is a long long, so we need to parse that ourselves. */ + if (cpufreqarg != NULL) { + if (sscanf(cpufreqarg, "%llu", &cpufreq) != 1) { + if (myrank == 0) + printf("Error parsing CPU frequency (%s).\n", cpufreqarg); + return 1; + } + } + + /* Write output parameter file */ + if (myrank == 0 && output_parameters_filename != NULL) { + io_write_output_field_parameter(output_parameters_filename); + printf("End of run.\n"); + return 0; + } + + if (!with_self_gravity && !with_hydro && !with_external_gravity) { + if (myrank == 0) { + argparse_usage(&argparse); + printf("\nError: At least one of -s, -g or -G must be chosen.\n"); + } + return 1; + } + if (with_stars && !with_external_gravity && !with_self_gravity) { + if (myrank == 0) { + argparse_usage(&argparse); + printf( + "\nError: Cannot process stars without gravity, -g or -G " + "must be chosen.\n"); + } + return 1; + } + if (with_fof && !with_external_gravity && !with_self_gravity) { + if (myrank == 0) + printf( + "Error: Cannot perform FOF search without gravity, -g or -G must be " + "chosen.\n"); + return 1; + } + + if (!with_stars && with_star_formation) { + if (myrank == 0) { + argparse_usage(&argparse); + printf( + "\nError: Cannot process star formation without stars, --stars must " + "be chosen.\n"); + } + return 1; + } + + if (!with_stars && with_feedback) { + if (myrank == 0) { + argparse_usage(&argparse); + printf( + "\nError: Cannot process feedback without stars, --stars must be " + "chosen.\n"); + } + return 1; + } + + if (!with_hydro && with_feedback) { + if (myrank == 0) { + argparse_usage(&argparse); + printf( + "\nError: Cannot process feedback without gas, --hydro must be " + "chosen.\n"); + } + return 1; + } + +/* Let's pin the main thread, now we know if affinity will be used. */ +#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE) + if (with_aff && + ((ENGINE_POLICY)&engine_policy_setaffinity) == engine_policy_setaffinity) + engine_pin(); +#endif + + /* Genesis 1.1: And then, there was time ! */ + clocks_set_cpufreq(cpufreq); + + /* How vocal are we ? */ + const int talking = (verbose == 1 && myrank == 0) || (verbose == 2); + + if (myrank == 0 && dry_run) + message( + "Executing a dry run. No i/o or time integration will be performed."); + + /* Report CPU frequency.*/ + cpufreq = clocks_get_cpufreq(); + if (myrank == 0) { + message("CPU frequency used for tick conversion: %llu Hz", cpufreq); + } + +/* Report host name(s). */ +#ifdef WITH_MPI + if (talking) { + message("Rank %d running on: %s", myrank, hostname()); + } +#else + message("Running on: %s", hostname()); +#endif + +/* Do we have debugging checks ? */ +#ifdef SWIFT_DEBUG_CHECKS + if (myrank == 0) + message("WARNING: Debugging checks activated. Code will be slower !"); +#endif + +/* Do we have debugging checks ? */ +#ifdef SWIFT_USE_NAIVE_INTERACTIONS + if (myrank == 0) + message( + "WARNING: Naive cell interactions activated. Code will be slower !"); +#endif + +/* Do we have gravity accuracy checks ? */ +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + if (myrank == 0) + message( + "WARNING: Checking 1/%d of all gpart for gravity accuracy. Code will " + "be slower !", + SWIFT_GRAVITY_FORCE_CHECKS); +#endif + + /* Do we choke on FP-exceptions ? */ + if (with_fp_exceptions) { +#ifdef HAVE_FE_ENABLE_EXCEPT + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + if (myrank == 0) + message("WARNING: Floating point exceptions will be reported."); + } + +/* Do we have slow barriers? */ +#ifndef HAVE_PTHREAD_BARRIERS + if (myrank == 0) + message("WARNING: Non-optimal thread barriers are being used."); +#endif + + /* How large are the parts? */ + if (myrank == 0) { + message("sizeof(part) is %4zi bytes.", sizeof(struct part)); + message("sizeof(xpart) is %4zi bytes.", sizeof(struct xpart)); + message("sizeof(spart) is %4zi bytes.", sizeof(struct spart)); + message("sizeof(gpart) is %4zi bytes.", sizeof(struct gpart)); + message("sizeof(multipole) is %4zi bytes.", sizeof(struct multipole)); + message("sizeof(grav_tensor) is %4zi bytes.", sizeof(struct grav_tensor)); + message("sizeof(task) is %4zi bytes.", sizeof(struct task)); + message("sizeof(cell) is %4zi bytes.", sizeof(struct cell)); + } + + /* Read the parameter file */ + struct swift_params *params = + (struct swift_params *)malloc(sizeof(struct swift_params)); + if (params == NULL) error("Error allocating memory for the parameter file."); + if (myrank == 0) { + message("Reading runtime parameters from file '%s'", param_filename); + parser_read_file(param_filename, params); + + /* Handle any command-line overrides. */ + if (cmdps.nparam > 0) { + message( + "Overwriting values read from the YAML file with command-line " + "values."); + for (int k = 0; k < cmdps.nparam; k++) + parser_set_param(params, cmdps.param[k]); + } + } +#ifdef WITH_MPI + /* Broadcast the parameter file */ + MPI_Bcast(params, sizeof(struct swift_params), MPI_BYTE, 0, MPI_COMM_WORLD); +#endif + + /* Temporary early aborts for modes not supported over MPI. */ +#ifdef WITH_MPI + if (with_mpole_reconstruction && nr_nodes > 1) + error("Cannot reconstruct m-poles every step over MPI (yet)."); + if (with_star_formation && with_feedback) + error("Can't run with star formation and feedback over MPI (yet)"); +#endif + + /* Temporary early aborts for modes not supported with hand-vec. */ +#if defined(WITH_VECTORIZATION) && !defined(CHEMISTRY_NONE) + error( + "Cannot run with chemistry and hand-vectorization (yet). " + "Use --disable-hand-vec at configure time."); +#endif + + /* Check that we can write the snapshots by testing if the output + * directory exists and is searchable and writable. */ + char basename[PARSER_MAX_LINE_SIZE]; + parser_get_param_string(params, "Snapshots:basename", basename); + const char *dirp = dirname(basename); + if (access(dirp, W_OK | X_OK) != 0) { + error("Cannot write snapshots in directory %s (%s)", dirp, strerror(errno)); + } + + /* Check that we can write the structure finding catalogues by testing if the + * output directory exists and is searchable and writable. */ + if (with_structure_finding) { + char stfbasename[PARSER_MAX_LINE_SIZE]; + parser_get_param_string(params, "StructureFinding:basename", stfbasename); + const char *stfdirp = dirname(stfbasename); + if (access(stfdirp, W_OK | X_OK) != 0) { + error("Cannot write stf catalogues in directory %s (%s)", stfdirp, + strerror(errno)); + } + } + + /* Prepare the domain decomposition scheme */ + struct repartition reparttype; +#ifdef WITH_MPI + struct partition initial_partition; + partition_init(&initial_partition, &reparttype, params, nr_nodes); + + /* Let's report what we did */ + if (myrank == 0) { +#if defined(HAVE_PARMETIS) + if (reparttype.usemetis) + message("Using METIS serial partitioning:"); + else + message("Using ParMETIS partitioning:"); +#elif defined(HAVE_METIS) + message("Using METIS serial partitioning:"); +#else + message("Non-METIS partitioning:"); +#endif + message(" initial partitioning: %s", + initial_partition_name[initial_partition.type]); + if (initial_partition.type == INITPART_GRID) + message(" grid set to [ %i %i %i ].", initial_partition.grid[0], + initial_partition.grid[1], initial_partition.grid[2]); + message(" repartitioning: %s", repartition_name[reparttype.type]); + } +#endif + + /* Common variables for restart and IC sections. */ + int clean_smoothing_length_values = 0; + int flag_entropy_ICs = 0; + + /* Work out where we will read and write restart files. */ + char restart_dir[PARSER_MAX_LINE_SIZE]; + parser_get_opt_param_string(params, "Restarts:subdir", restart_dir, + "restart"); + + /* The directory must exist. */ + if (myrank == 0) { + if (access(restart_dir, W_OK | X_OK) != 0) { + if (restart) { + error("Cannot restart as no restart subdirectory: %s (%s)", restart_dir, + strerror(errno)); + } else { + if (mkdir(restart_dir, 0777) != 0) + error("Failed to create restart directory: %s (%s)", restart_dir, + strerror(errno)); + } + } + } + + /* Basename for any restart files. */ + char restart_name[PARSER_MAX_LINE_SIZE]; + parser_get_opt_param_string(params, "Restarts:basename", restart_name, + "swift"); + + /* If restarting, look for the restart files. */ + if (restart) { + + /* Attempting a restart. */ + char **restart_files = NULL; + int restart_nfiles = 0; + + if (myrank == 0) { + message("Restarting SWIFT"); + + /* Locate the restart files. */ + restart_files = + restart_locate(restart_dir, restart_name, &restart_nfiles); + if (restart_nfiles == 0) + error("Failed to locate any restart files in %s", restart_dir); + + /* We need one file per rank. */ + if (restart_nfiles != nr_nodes) + error("Incorrect number of restart files, expected %d found %d", + nr_nodes, restart_nfiles); + + if (verbose > 0) + for (int i = 0; i < restart_nfiles; i++) + message("found restart file: %s", restart_files[i]); + } + +#ifdef WITH_MPI + /* Distribute the restart files, need one for each rank. */ + if (myrank == 0) { + + for (int i = 1; i < nr_nodes; i++) { + strcpy(restart_file, restart_files[i]); + MPI_Send(restart_file, 200, MPI_BYTE, i, 0, MPI_COMM_WORLD); + } + + /* Keep local file. */ + strcpy(restart_file, restart_files[0]); + + /* Finished with the list. */ + restart_locate_free(restart_nfiles, restart_files); + + } else { + MPI_Recv(restart_file, 200, MPI_BYTE, 0, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + if (verbose > 1) message("local restart file = %s", restart_file); +#else + + /* Just one restart file. */ + strcpy(restart_file, restart_files[0]); +#endif + + /* Now read it. */ + restart_read(&e, restart_file); + + /* And initialize the engine with the space and policies. */ + if (myrank == 0) clocks_gettime(&tic); + engine_config(1, &e, params, nr_nodes, myrank, nr_threads, with_aff, + talking, restart_file); + if (myrank == 0) { + clocks_gettime(&toc); + message("engine_config took %.3f %s.", clocks_diff(&tic, &toc), + clocks_getunit()); + fflush(stdout); + } + + /* Check if we are already done when given steps on the command-line. */ + if (e.step >= nsteps && nsteps > 0) + error("Not restarting, already completed %d steps", e.step); + + } else { + + /* Not restarting so look for the ICs. */ + /* Initialize unit system and constants */ + units_init_from_params(&us, params, "InternalUnitSystem"); + phys_const_init(&us, params, &prog_const); + if (myrank == 0) { + message("Internal unit system: U_M = %e g.", us.UnitMass_in_cgs); + message("Internal unit system: U_L = %e cm.", us.UnitLength_in_cgs); + message("Internal unit system: U_t = %e s.", us.UnitTime_in_cgs); + message("Internal unit system: U_I = %e A.", us.UnitCurrent_in_cgs); + message("Internal unit system: U_T = %e K.", us.UnitTemperature_in_cgs); + phys_const_print(&prog_const); + } + + /* Read particles and space information from ICs */ + char ICfileName[200] = ""; + parser_get_param_string(params, "InitialConditions:file_name", ICfileName); + const int periodic = + parser_get_param_int(params, "InitialConditions:periodic"); + const int replicate = + parser_get_opt_param_int(params, "InitialConditions:replicate", 1); + clean_smoothing_length_values = parser_get_opt_param_int( + params, "InitialConditions:cleanup_smoothing_lengths", 0); + const int cleanup_h = parser_get_opt_param_int( + params, "InitialConditions:cleanup_h_factors", 0); + const int cleanup_sqrt_a = parser_get_opt_param_int( + params, "InitialConditions:cleanup_velocity_factors", 0); + const int generate_gas_in_ics = parser_get_opt_param_int( + params, "InitialConditions:generate_gas_in_ics", 0); + + /* Some checks that we are not doing something stupid */ + if (generate_gas_in_ics && flag_entropy_ICs) + error("Can't generate gas if the entropy flag is set in the ICs."); + if (generate_gas_in_ics && !with_cosmology) + error("Can't generate gas if the run is not cosmological."); + + /* Initialise the cosmology */ + if (with_cosmology) + cosmology_init(params, &us, &prog_const, &cosmo); + else + cosmology_init_no_cosmo(&cosmo); + if (myrank == 0 && with_cosmology) cosmology_print(&cosmo); + + /* Initialise the hydro properties */ + if (with_hydro) + hydro_props_init(&hydro_properties, &prog_const, &us, params); + else + bzero(&hydro_properties, sizeof(struct hydro_props)); + + /* Initialise the equation of state */ + if (with_hydro) + eos_init(&eos, &prog_const, &us, params); + else + bzero(&eos, sizeof(struct eos_parameters)); + + /* Initialise the entropy floor */ + if (with_hydro) + entropy_floor_init(&entropy_floor, &prog_const, &us, &hydro_properties, + params); + else + bzero(&entropy_floor, sizeof(struct entropy_floor_properties)); + + /* Initialise the stars properties */ + if (with_stars) + stars_props_init(&stars_properties, &prog_const, &us, params, + &hydro_properties, &cosmo); + else + bzero(&stars_properties, sizeof(struct stars_props)); + + /* Initialise the feedback properties */ + if (with_feedback) { + feedback_props_init(&feedback_properties, &prog_const, &us, params, + &hydro_properties, &cosmo); + } else + bzero(&feedback_properties, sizeof(struct feedback_props)); + + /* Initialise the black holes properties */ + if (with_black_holes) { + black_holes_props_init(&black_holes_properties, &prog_const, &us, params, + &hydro_properties, &cosmo); + } else + bzero(&black_holes_properties, sizeof(struct black_holes_props)); + + /* Initialise the gravity properties */ + if (with_self_gravity) + gravity_props_init(&gravity_properties, params, &cosmo, with_cosmology, + periodic); + else + bzero(&gravity_properties, sizeof(struct gravity_props)); + + /* Be verbose about what happens next */ + if (myrank == 0) message("Reading ICs from file '%s'", ICfileName); + if (myrank == 0 && cleanup_h) + message("Cleaning up h-factors (h=%f)", cosmo.h); + if (myrank == 0 && cleanup_sqrt_a) + message("Cleaning up a-factors from velocity (a=%f)", cosmo.a); + fflush(stdout); + + /* Get ready to read particles of all kinds */ + size_t Ngas = 0, Ngpart = 0, Nspart = 0, Nbpart = 0; + double dim[3] = {0., 0., 0.}; + if (myrank == 0) clocks_gettime(&tic); +#if defined(HAVE_HDF5) +#if defined(WITH_MPI) +#if defined(HAVE_PARALLEL_HDF5) + read_ic_parallel(ICfileName, &us, dim, &parts, &gparts, &sparts, &bparts, + &Ngas, &Ngpart, &Nspart, &Nbpart, &flag_entropy_ICs, + with_hydro, (with_external_gravity || with_self_gravity), + with_stars, with_black_holes, cleanup_h, cleanup_sqrt_a, + cosmo.h, cosmo.a, myrank, nr_nodes, MPI_COMM_WORLD, + MPI_INFO_NULL, nr_threads, dry_run); +#else + read_ic_serial(ICfileName, &us, dim, &parts, &gparts, &sparts, &bparts, + &Ngas, &Ngpart, &Nspart, &Nbpart, &flag_entropy_ICs, + with_hydro, (with_external_gravity || with_self_gravity), + with_stars, with_black_holes, cleanup_h, cleanup_sqrt_a, + cosmo.h, cosmo.a, myrank, nr_nodes, MPI_COMM_WORLD, + MPI_INFO_NULL, nr_threads, dry_run); +#endif +#else + read_ic_single(ICfileName, &us, dim, &parts, &gparts, &sparts, &bparts, + &Ngas, &Ngpart, &Nspart, &Nbpart, &flag_entropy_ICs, + with_hydro, (with_external_gravity || with_self_gravity), + with_stars, with_black_holes, cleanup_h, cleanup_sqrt_a, + cosmo.h, cosmo.a, nr_threads, dry_run); +#endif +#endif + if (myrank == 0) { + clocks_gettime(&toc); + message("Reading initial conditions took %.3f %s.", + clocks_diff(&tic, &toc), clocks_getunit()); + fflush(stdout); + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check once and for all that we don't have unwanted links */ + if (!with_stars && !dry_run) { + for (size_t k = 0; k < Ngpart; ++k) + if (gparts[k].type == swift_type_stars) error("Linking problem"); + } + if (!with_hydro && !dry_run) { + for (size_t k = 0; k < Ngpart; ++k) + if (gparts[k].type == swift_type_gas) error("Linking problem"); + } + + /* Check that the other links are correctly set */ + if (!dry_run) + part_verify_links(parts, gparts, sparts, bparts, Ngas, Ngpart, Nspart, + Nbpart, 1); +#endif + + /* Get the total number of particles across all nodes. */ + long long N_total[4] = {0, 0, 0}; +#if defined(WITH_MPI) + long long N_long[4] = {Ngas, Ngpart, Nspart, Nbpart}; + MPI_Allreduce(&N_long, &N_total, 4, MPI_LONG_LONG_INT, MPI_SUM, + MPI_COMM_WORLD); +#else + N_total[0] = Ngas; + N_total[1] = Ngpart; + N_total[2] = Nspart; + N_total[3] = Nbpart; +#endif + + if (myrank == 0) + message( + "Read %lld gas particles, %lld stars particles and %lld gparts from " + "the ICs.", + N_total[0], N_total[2], N_total[1]); + + /* Verify that the fields to dump actually exist */ + if (myrank == 0) io_check_output_fields(params, N_total); + + /* Initialize the space with these data. */ + if (myrank == 0) clocks_gettime(&tic); + space_init(&s, params, &cosmo, dim, parts, gparts, sparts, bparts, Ngas, + Ngpart, Nspart, Nbpart, periodic, replicate, generate_gas_in_ics, + with_hydro, with_self_gravity, with_star_formation, talking, + dry_run); + + if (myrank == 0) { + clocks_gettime(&toc); + message("space_init took %.3f %s.", clocks_diff(&tic, &toc), + clocks_getunit()); + fflush(stdout); + } + + /* Initialise the long-range gravity mesh */ + if (with_self_gravity && periodic) { +#ifdef HAVE_FFTW + pm_mesh_init(&mesh, &gravity_properties, s.dim, nr_threads); +#else + /* Need the FFTW library if periodic and self gravity. */ + error( + "No FFTW library found. Cannot compute periodic long-range forces."); +#endif + } else { + pm_mesh_init_no_mesh(&mesh, s.dim); + } + + /* Check that the matter content matches the cosmology given in the + * parameter file. */ + if (with_cosmology && with_self_gravity && !dry_run) + space_check_cosmology(&s, &cosmo, myrank); + +/* Also update the total counts (in case of changes due to replication) */ +#if defined(WITH_MPI) + N_long[0] = s.nr_parts; + N_long[1] = s.nr_gparts; + N_long[2] = s.nr_sparts; + MPI_Allreduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM, + MPI_COMM_WORLD); +#else + N_total[0] = s.nr_parts; + N_total[1] = s.nr_gparts; + N_total[2] = s.nr_sparts; +#endif + + /* Say a few nice things about the space we just created. */ + if (myrank == 0) { + message("space dimensions are [ %.3f %.3f %.3f ].", s.dim[0], s.dim[1], + s.dim[2]); + message("space %s periodic.", s.periodic ? "is" : "isn't"); + message("highest-level cell dimensions are [ %i %i %i ].", s.cdim[0], + s.cdim[1], s.cdim[2]); + message("%zi parts in %i cells.", s.nr_parts, s.tot_cells); + message("%zi gparts in %i cells.", s.nr_gparts, s.tot_cells); + message("%zi sparts in %i cells.", s.nr_sparts, s.tot_cells); + message("maximum depth is %d.", s.maxdepth); + fflush(stdout); + } + + /* Verify that each particle is in it's proper cell. */ + if (talking && !dry_run) { + int icount = 0; + space_map_cells_pre(&s, 0, &map_cellcheck, &icount); + message("map_cellcheck picked up %i parts.", icount); + } + + /* Verify the maximal depth of cells. */ + if (talking && !dry_run) { + int data[2] = {s.maxdepth, 0}; + space_map_cells_pre(&s, 0, &map_maxdepth, data); + message("nr of cells at depth %i is %i.", data[0], data[1]); + } + + /* Initialise the external potential properties */ + if (with_external_gravity) + potential_init(params, &prog_const, &us, &s, &potential); + if (myrank == 0) potential_print(&potential); + + /* Initialise the cooling function properties */ + if (with_cooling) + cooling_init(params, &us, &prog_const, &hydro_properties, &cooling_func); + if (myrank == 0) cooling_print(&cooling_func); + + /* Initialise the star formation law and its properties */ + if (with_star_formation) + starformation_init(params, &prog_const, &us, &hydro_properties, + &starform); + if (myrank == 0) starformation_print(&starform); + + /* Initialise the chemistry */ + chemistry_init(params, &us, &prog_const, &chemistry); + if (myrank == 0) chemistry_print(&chemistry); + + /* Initialise the FOF properties */ + bzero(&fof_properties, sizeof(struct fof_props)); + if (with_fof) fof_init(&fof_properties, params, &prog_const, &us); + + /* Construct the engine policy */ + int engine_policies = ENGINE_POLICY | engine_policy_steal; + if (with_drift_all) engine_policies |= engine_policy_drift_all; + if (with_mpole_reconstruction) + engine_policies |= engine_policy_reconstruct_mpoles; + if (with_hydro) engine_policies |= engine_policy_hydro; + if (with_self_gravity) engine_policies |= engine_policy_self_gravity; + if (with_external_gravity) + engine_policies |= engine_policy_external_gravity; + if (with_cosmology) engine_policies |= engine_policy_cosmology; + if (with_cooling) engine_policies |= engine_policy_cooling; + if (with_stars) engine_policies |= engine_policy_stars; + if (with_fof) engine_policies |= engine_policy_fof; + if (with_feedback) engine_policies |= engine_policy_feedback; + if (with_black_holes) engine_policies |= engine_policy_black_holes; + if (with_structure_finding) + engine_policies |= engine_policy_structure_finding; + if (with_fof) engine_policies |= engine_policy_fof; + + /* Initialize the engine with the space and policies. */ + if (myrank == 0) clocks_gettime(&tic); + engine_init(&e, &s, params, N_total[0], N_total[1], N_total[2], N_total[3], + engine_policies, talking, &reparttype, &us, &prog_const, &cosmo, + &hydro_properties, &entropy_floor, &gravity_properties, + &stars_properties, &black_holes_properties, + &feedback_properties, &mesh, &potential, &cooling_func, + &starform, &chemistry, &fof_properties); + engine_config(0, &e, params, nr_nodes, myrank, nr_threads, with_aff, + talking, restart_file); + + if (myrank == 0) { + clocks_gettime(&toc); + message("engine_init took %.3f %s.", clocks_diff(&tic, &toc), + clocks_getunit()); + fflush(stdout); + } + + /* Get some info to the user. */ + if (myrank == 0) { + long long N_DM = N_total[1] - N_total[2] - N_total[0]; + message( + "Running on %lld gas particles, %lld stars particles and %lld DM " + "particles (%lld gravity particles)", + N_total[0], N_total[2], N_total[1] > 0 ? N_DM : 0, N_total[1]); + message( + "from t=%.3e until t=%.3e with %d ranks, %d threads / rank and %d " + "task queues / rank (dt_min=%.3e, dt_max=%.3e)...", + e.time_begin, e.time_end, nr_nodes, e.nr_threads, e.sched.nr_queues, + e.dt_min, e.dt_max); + fflush(stdout); + } + } + + /* Time to say good-bye if this was not a serious run. */ + if (dry_run) { +#ifdef WITH_MPI + if ((res = MPI_Finalize()) != MPI_SUCCESS) + error("call to MPI_Finalize failed with error %i.", res); +#endif + if (myrank == 0) + message("Time integration ready to start. End of dry-run."); + engine_clean(&e); + free(params); + return 0; + } + +/* Initialise the table of Ewald corrections for the gravity checks */ +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + if (s.periodic) gravity_exact_force_ewald_init(e.s->dim[0]); +#endif + +/* Init the runner history. */ +#ifdef HIST + for (k = 0; k < runner_hist_N; k++) runner_hist_bins[k] = 0; +#endif + + if (!restart) { + +#ifdef WITH_MPI + /* Split the space. */ + engine_split(&e, &initial_partition); + engine_redistribute(&e); +#endif + +#ifdef SWIFT_DEBUG_TASKS + e.tic_step = getticks(); +#endif + + /* Initialise the particles */ + engine_init_particles(&e, flag_entropy_ICs, clean_smoothing_length_values, + 0); + +#ifdef SWIFT_DEBUG_TASKS + e.toc_step = getticks(); +#endif + + /* Perform first FOF search after the first snapshot dump. */ + engine_fof(&e); + +#ifdef WITH_MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif + } + + /* Dump the task data using the given frequency. */ + if (dump_tasks) { +#ifdef SWIFT_DEBUG_TASKS + task_dump_all(&e, 0); +#endif + + /* Generate the task statistics. */ + char dumpfile[40]; + snprintf(dumpfile, 40, "thread_stats-step%d.dat", 0); + task_dump_stats(dumpfile, &e, /* header = */ 0, /* allranks = */ 1); + } + +#ifdef SWIFT_DEBUG_THREADPOOL + /* Dump the task data using the given frequency. */ + if (dump_threadpool) { + char threadpool_dumpfile[52]; +#ifdef WITH_MPI + snprintf(threadpool_dumpfile, 52, "threadpool_info-rank%d-step%d.dat", + engine_rank, 0); +#else + snprintf(threadpool_dumpfile, 52, "threadpool_info-step%d.dat", 0); +#endif // WITH_MPI + threadpool_dump_log(&e.threadpool, threadpool_dumpfile, 1); + } else { + threadpool_reset_log(&e.threadpool); + } +#endif // SWIFT_DEBUG_THREADPOOL + + /* used parameters */ + parser_write_params_to_file(params, "used_parameters.yml", 1); + /* unused parameters */ + parser_write_params_to_file(params, "unused_parameters.yml", 0); + + /* Write final output. */ + engine_drift_all(&e, /*drift_mpole=*/0); + engine_print_stats(&e); + engine_dump_snapshot(&e); + + /* Dump memory use report */ +#ifdef SWIFT_MEMUSE_REPORTS + { + char dumpfile[40]; +#ifdef WITH_MPI + snprintf(dumpfile, 40, "memuse_report-rank%d-fof.dat", engine_rank); +#else + snprintf(dumpfile, 40, "memuse_report-fof.dat"); +#endif // WITH_MPI + memuse_log_dump(dumpfile); + } +#endif + +#ifdef WITH_MPI + if ((res = MPI_Finalize()) != MPI_SUCCESS) + error("call to MPI_Finalize failed with error %i.", res); +#endif + + /* Clean everything */ + if (with_verbose_timers) timers_close_file(); + if (with_cosmology) cosmology_clean(&cosmo); + if (with_self_gravity) pm_mesh_clean(&mesh); + engine_clean(&e); + free(params); + + /* Say goodbye. */ + if (myrank == 0) message("done. Bye."); + + /* All is calm, all is bright. */ + return 0; +} diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 61e7ab470292d0575881ae0382103988e4748c74..176508f5c514b79515f3955679414ec99043b653 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -61,6 +61,19 @@ Gravity: r_cut_max: 4.5 # (Optional) Cut-off in number of top-level cells beyond which no FMM forces are computed (this is the default value). r_cut_min: 0.1 # (Optional) Cut-off in number of top-level cells below which no truncation of FMM forces are performed (this is the default value). +# Parameters for the Friends-Of-Friends algorithm +FOF: + basename: fof_output # Filename for the FOF outputs (Unused when FoF is only run to seed BHs). + scale_factor_first: 0.91 # Scale-factor of first FoF black hole seeding calls (needed for cosmological runs). + time_first: 0.2 # Time of first FoF black hole seeding calls (needed for non-cosmological runs). + delta_time: 1.005 # Time between consecutive FoF black hole seeding calls. + min_group_size: 256 # The minimum no. of particles required for a group. + linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. + black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). + absolute_linking_length: -1. # (Optional) Absolute linking length (in internal units). When not set to -1, this will overwrite the linking length computed from 'linking_length_ratio'. + group_id_default: 2147483647 # (Optional) Sets the group ID of particles in groups below the minimum size. Defaults to 2^31 - 1 if unspecified. Has to be positive. + group_id_offset: 1 # (Optional) Sets the offset of group ID labeling. Defaults to 1 if unspecified. + # Parameters for the task scheduling Scheduler: nr_queues: 0 # (Optional) The number of task queues to use. Use 0 to let the system decide. @@ -381,6 +394,7 @@ EAGLEFeedback: # EAGLE AGN model EAGLEAGN: + subgrid_seed_mass_Msun: 1.5e5 # Black hole subgrid mass at creation time in solar masses. max_eddington_fraction: 1. # Maximal allowed accretion rate in units of the Eddington rate. radiative_efficiency: 0.1 # Fraction of the accreted mass that gets radiated. coupling_efficiency: 0.15 # Fraction of the radiated energy that couples to the gas in feedback events. diff --git a/src/Makefile.am b/src/Makefile.am index 99dce90211806eee062a9acf81b77ddca9fb3842..7ae03c4d677f4d60ac580c3e9a8c7ec1f8305398 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -49,7 +49,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h \ chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \ mesh_gravity.h cbrt.h exp10.h velociraptor_interface.h swift_velociraptor_part.h outputlist.h \ - logger_io.h tracers_io.h tracers.h tracers_struct.h star_formation_io.h \ + logger_io.h tracers_io.h tracers.h tracers_struct.h star_formation_io.h fof.h \ star_formation_struct.h star_formation.h star_formation_iact.h \ star_formation_logger.h star_formation_logger_struct.h \ velociraptor_struct.h velociraptor_io.h random.h memuse.h black_holes.h black_holes_io.h \ @@ -78,15 +78,16 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c part_type.c xmf.c gravity_properties.c gravity.c \ collectgroup.c hydro_space.c equation_of_state.c \ chemistry.c cosmology.c restart.c mesh_gravity.c velociraptor_interface.c \ - outputlist.c velociraptor_dummy.c logger_io.c memuse.c \ + outputlist.c velociraptor_dummy.c logger_io.c memuse.c fof.c \ + hashmap.c \ $(EAGLE_COOLING_SOURCES) $(EAGLE_FEEDBACK_SOURCES) # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ gravity_iact.h kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h \ - runner_doiact_nosort.h runner_doiact_stars.h runner_doiact_black_holes.h units.h intrinsics.h minmax.h \ - kick.h timestep.h drift.h adiabatic_index.h io_properties.h dimension.h part_type.h periodic.h memswap.h \ - dump.h logger.h sign.h logger_io.h timestep_limiter.h \ + runner_doiact_nosort.h runner_doiact_stars.h runner_doiact_black_holes.h units.h intrinsics.h minmax.h \ + kick.h timestep.h drift.h adiabatic_index.h io_properties.h dimension.h part_type.h periodic.h memswap.h \ + dump.h logger.h sign.h logger_io.h timestep_limiter.h hashmap.h \ gravity.h gravity_io.h gravity_cache.h \ gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \ gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \ diff --git a/src/atomic.h b/src/atomic.h index 43da9823466b200c11dd55d43c9762c93df364ea..8a790a7f6cbbf190c3d87d590f10c6f1676c2d24 100644 --- a/src/atomic.h +++ b/src/atomic.h @@ -66,6 +66,29 @@ __attribute__((always_inline)) INLINE static void atomic_min_f( } while (test_val.as_int != old_val.as_int); } +/** + * @brief Atomic min operation on ints. + * + * This is a text-book implementation based on an atomic CAS. + * + * @param address The address to update. + * @param y The value to update the address with. + */ +__attribute__((always_inline)) INLINE static void atomic_min( + volatile int *address, int y) { + + int *int_ptr = (int *)address; + + int test_val, old_val, new_val; + old_val = *address; + + do { + test_val = old_val; + new_val = min(old_val, y); + old_val = atomic_cas(int_ptr, test_val, new_val); + } while (test_val != old_val); +} + /** * @brief Atomic min operation on doubles. * diff --git a/src/black_holes/Default/black_holes.h b/src/black_holes/Default/black_holes.h index 3c35d0862df99ff194fd609d6058c1be7065b0bf..fa7ece225dbed571b5bfb4f695f1fb50e4a59422 100644 --- a/src/black_holes/Default/black_holes.h +++ b/src/black_holes/Default/black_holes.h @@ -175,4 +175,22 @@ __attribute__((always_inline)) INLINE static void black_holes_reset_feedback( #endif } +/** + * @brief Initialise a BH particle that has just been seeded. + * + * @param bp The #bpart to initialise. + * @param props The properties of the black hole scheme. + * @param constants The physical constants in internal units. + * @param cosmo The current cosmological model. + * @param p The #part that became a black hole. + */ +INLINE static void black_holes_create_from_gas( + struct bpart* bp, const struct black_holes_props* props, + const struct phys_const* constants, const struct cosmology* cosmo, + const struct part* p) { + + /* First initialisation */ + black_holes_init_bpart(bp); +} + #endif /* SWIFT_DEFAULT_BLACK_HOLES_H */ diff --git a/src/black_holes/EAGLE/black_holes.h b/src/black_holes/EAGLE/black_holes.h index 94962b7deea4900598a565dfd6ea90cfc6e17798..6e9be26bc6dea438df80e15054ebe5303ecf5b63 100644 --- a/src/black_holes/EAGLE/black_holes.h +++ b/src/black_holes/EAGLE/black_holes.h @@ -312,4 +312,31 @@ __attribute__((always_inline)) INLINE static void black_holes_reset_feedback( #endif } +/** + * @brief Initialise a BH particle that has just been seeded. + * + * @param bp The #bpart to initialise. + * @param props The properties of the black hole scheme. + * @param constants The physical constants in internal units. + * @param cosmo The current cosmological model. + * @param p The #part that became a black hole. + */ +INLINE static void black_holes_create_from_gas( + struct bpart* bp, const struct black_holes_props* props, + const struct phys_const* constants, const struct cosmology* cosmo, + const struct part* p) { + + /* All the non-basic properties of the black hole have been zeroed + in the FOF code. We just update the rest. */ + + /* Birth time */ + bp->formation_scale_factor = cosmo->a; + + /* Initial seed mass */ + bp->subgrid_mass = props->subgrid_seed_mass; + + /* First initialisation */ + black_holes_init_bpart(bp); +} + #endif /* SWIFT_EAGLE_BLACK_HOLES_H */ diff --git a/src/black_holes/EAGLE/black_holes_properties.h b/src/black_holes/EAGLE/black_holes_properties.h index 01e420509d6eaee7dd07e5af23fc845acbcd330d..6cb0b4ea7056c19b7c131a9d226675b5c888b5ce 100644 --- a/src/black_holes/EAGLE/black_holes_properties.h +++ b/src/black_holes/EAGLE/black_holes_properties.h @@ -49,6 +49,9 @@ struct black_holes_props { /* ----- Initialisation properties ------ */ + /*! Mass of a BH seed at creation time */ + float subgrid_seed_mass; + /* ----- Properties of the accretion model ------ */ /*! Maximal fraction of the Eddington rate allowed. */ @@ -126,6 +129,14 @@ INLINE static void black_holes_props_init(struct black_holes_props *bp, else bp->log_max_h_change = logf(powf(max_volume_change, hydro_dimension_inv)); + /* Initialisation properties ---------------------------- */ + + bp->subgrid_seed_mass = + parser_get_param_float(params, "EAGLEAGN:subgrid_seed_mass_Msun"); + + /* Convert to internal units */ + bp->subgrid_seed_mass *= phys_const->const_solar_mass; + /* Accretion parameters ---------------------------------- */ bp->f_Edd = parser_get_param_float(params, "EAGLEAGN:max_eddington_fraction"); diff --git a/src/cell.h b/src/cell.h index 6fcd073610474bf734e2e533781a21a5367be65d..4f8bed5fa5e337842bf37a0b382e3915a806a474 100644 --- a/src/cell.h +++ b/src/cell.h @@ -1144,6 +1144,20 @@ cell_can_split_self_gravity_task(const struct cell *c) { return c->split && ((c->maxdepth - c->depth) > space_subdepth_diff_grav); } +/** + * @brief Can a self FOF task associated with a cell be split into smaller + * sub-tasks. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int cell_can_split_self_fof_task( + const struct cell *c) { + + /* Is the cell split ? */ + return c->split && c->grav.count > 5000 && + ((c->maxdepth - c->depth) > space_subdepth_diff_grav); +} + /** * @brief Have gas particles in a pair of cells moved too much and require a * rebuild diff --git a/src/common_io.c b/src/common_io.c index 42976fdf8d2f9aaa35973622aac0fe34262cbe04..537f06e53837619d5f5011628aa649fce90164cb 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -858,6 +858,28 @@ void io_convert_part_f_mapper(void* restrict temp, int N, &temp_f[i * dim]); } +/** + * @brief Mapper function to copy #part into a buffer of ints using a + * conversion function. + */ +void io_convert_part_i_mapper(void* restrict temp, int N, + void* restrict extra_data) { + + const struct io_props props = *((const struct io_props*)extra_data); + const struct part* restrict parts = props.parts; + const struct xpart* restrict xparts = props.xparts; + const struct engine* e = props.e; + const size_t dim = props.dimension; + + /* How far are we with this chunk? */ + int* restrict temp_i = (int*)temp; + const ptrdiff_t delta = (temp_i - props.start_temp_i) / dim; + + for (int i = 0; i < N; i++) + props.convert_part_i(e, parts + delta + i, xparts + delta + i, + &temp_i[i * dim]); +} + /** * @brief Mapper function to copy #part into a buffer of doubles using a * conversion function. @@ -1127,6 +1149,18 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, io_convert_part_f_mapper, temp_f, N, copySize, 0, (void*)&props); + } else if (props.convert_part_i != NULL) { + + /* Prepare some parameters */ + int* temp_i = (int*)temp; + props.start_temp_i = (int*)temp; + props.e = e; + + /* Copy the whole thing into a buffer */ + threadpool_map((struct threadpool*)&e->threadpool, + io_convert_part_i_mapper, temp_i, N, copySize, 0, + (void*)&props); + } else if (props.convert_part_d != NULL) { /* Prepare some parameters */ diff --git a/src/engine.c b/src/engine.c index d6de3da5a85795fb5aa4688af26cffa7472dd641..55b3346c3bdc30505e6bb53ef14edea7562301c8 100644 --- a/src/engine.c +++ b/src/engine.c @@ -121,6 +121,7 @@ const char *engine_policy_names[] = {"none", "star formation", "feedback", "black holes", + "fof search", "time-step limiter"}; /** The rank of the engine as a global variable (for messages). */ @@ -1005,14 +1006,14 @@ void engine_redistribute(struct engine *e) { } } if (total > 0) - message("%ld of %ld (%.2f%%) of particles moved", total - unmoved, + message("%zu of %zu (%.2f%%) of particles moved", total - unmoved, total, 100.0 * (double)(total - unmoved) / (double)total); if (g_total > 0) - message("%ld of %ld (%.2f%%) of g-particles moved", g_total - g_unmoved, + message("%zu of %zu (%.2f%%) of g-particles moved", g_total - g_unmoved, g_total, 100.0 * (double)(g_total - g_unmoved) / (double)g_total); if (s_total > 0) - message("%ld of %ld (%.2f%%) of s-particles moved", s_total - s_unmoved, + message("%zu of %zu (%.2f%%) of s-particles moved", s_total - s_unmoved, s_total, 100.0 * (double)(s_total - s_unmoved) / (double)s_total); if (b_total > 0) @@ -2386,6 +2387,9 @@ int engine_estimate_nr_tasks(const struct engine *e) { n1 += 6; #endif } + if (e->policy & engine_policy_fof) { + n1 += 2; + } #if defined(WITH_LOGGER) /* each cell logs its particles */ n1 += 1; @@ -2613,6 +2617,18 @@ void engine_prepare(struct engine *e) { message("Communicating rebuild flag took %.3f %s.", clocks_from_ticks(getticks() - tic3), clocks_getunit()); + /* Perform FOF search to seed black holes. Only if there is a rebuild coming + * and no repartitioing. */ + if (e->policy & engine_policy_fof && e->forcerebuild && !e->forcerepart && + e->run_fof) { + + /* Let's start by drifting everybody to the current time */ + engine_drift_all(e, /*drift_mpole=*/0); + drifted_all = 1; + + engine_fof(e); + } + /* Do we need repartitioning ? */ if (e->forcerepart) { @@ -3425,10 +3441,11 @@ void engine_first_init_particles(struct engine *e) { * @param flag_entropy_ICs Did the 'Internal Energy' of the particles actually * contain entropy ? * @param clean_h_values Are we cleaning up the values of h before building - * the tasks ? + * @param compute_init_accel Are we computing the initial acceleration of + *particles? */ void engine_init_particles(struct engine *e, int flag_entropy_ICs, - int clean_h_values) { + int clean_h_values, int compute_init_accel) { struct space *s = e->s; @@ -3499,6 +3516,9 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, } } + /* Exit if only computing the FOF. */ + if (!compute_init_accel) return; + #ifdef SWIFT_DEBUG_CHECKS /* Check that we have the correct total mass in the top-level multipoles */ long long num_gpart_mpole = 0; @@ -3766,6 +3786,13 @@ void engine_step(struct engine *e) { ((double)e->total_nr_gparts) * e->gravity_properties->rebuild_frequency)) e->forcerebuild = 1; + /* Trigger a FOF black hole seeding? */ + if (e->policy & engine_policy_fof) { + if (e->ti_end_min > e->ti_next_fof && e->ti_next_fof > 0) { + e->run_fof = 1; + } + } + #ifdef WITH_LOGGER /* Mark the current time step in the particle logger file. */ logger_log_timestamp(e->logger, e->ti_current, e->time, @@ -4855,6 +4882,7 @@ void engine_unpin(void) { * @param Ngas total number of gas particles in the simulation. * @param Ngparts total number of gravity particles in the simulation. * @param Nstars total number of star particles in the simulation. + * @param Nblackholes total number of black holes in the simulation. * @param policy The queuing policy to use. * @param verbose Is this #engine talkative ? * @param reparttype What type of repartition algorithm are we using ? @@ -4872,10 +4900,12 @@ void engine_unpin(void) { * @param cooling_func The properties of the cooling function. * @param starform The #star_formation model of this run. * @param chemistry The chemistry information. + * @param fof_properties The #fof_props. */ void engine_init(struct engine *e, struct space *s, struct swift_params *params, long long Ngas, long long Ngparts, long long Nstars, - int policy, int verbose, struct repartition *reparttype, + long long Nblackholes, int policy, int verbose, + struct repartition *reparttype, const struct unit_system *internal_units, const struct phys_const *physical_constants, struct cosmology *cosmo, struct hydro_props *hydro, @@ -4886,7 +4916,8 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, const struct external_potential *potential, struct cooling_function_data *cooling_func, const struct star_formation *starform, - const struct chemistry_global_data *chemistry) { + const struct chemistry_global_data *chemistry, + struct fof_props *fof_properties) { /* Clean-up everything */ bzero(e, sizeof(struct engine)); @@ -4898,6 +4929,7 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, e->total_nr_parts = Ngas; e->total_nr_gparts = Ngparts; e->total_nr_sparts = Nstars; + e->total_nr_bparts = Nblackholes; e->proxy_ind = NULL; e->nr_proxies = 0; e->reparttype = reparttype; @@ -4941,6 +4973,8 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, e->delta_time_statistics = parser_get_param_double(params, "Statistics:delta_time"); e->ti_next_stats = 0; + e->ti_next_stf = 0; + e->ti_next_fof = 0; e->verbose = verbose; e->wallclock_time = 0.f; e->physical_constants = physical_constants; @@ -4956,6 +4990,7 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, e->star_formation = starform; e->feedback_props = feedback; e->chemistry = chemistry; + e->fof_properties = fof_properties; e->parameter_file = params; #ifdef WITH_MPI e->cputime_last_step = 0; @@ -5020,6 +5055,17 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, parser_get_opt_param_double(params, "StructureFinding:delta_time", -1.); } + /* Initialise FoF calls frequency. */ + if (e->policy & engine_policy_fof) { + + e->time_first_fof_call = + parser_get_opt_param_double(params, "FOF:time_first", 0.); + e->a_first_fof_call = + parser_get_opt_param_double(params, "FOF:scale_factor_first", 0.1); + e->delta_time_fof = + parser_get_opt_param_double(params, "FOF:delta_time", -1.); + } + engine_init_output_lists(e, params); } @@ -5071,6 +5117,7 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, e->restart_file = restart_file; e->restart_next = 0; e->restart_dt = 0; + e->run_fof = 0; engine_rank = nodeID; /* Get the number of queues */ @@ -5262,13 +5309,15 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, e->hydro_properties->eta_neighbours, configuration_options(), compilation_cflags()); - fprintf(e->file_timesteps, - "# Step Properties: Rebuild=%d, Redistribute=%d, Repartition=%d, " - "Statistics=%d, Snapshot=%d, Restarts=%d STF=%d, logger=%d\n", - engine_step_prop_rebuild, engine_step_prop_redistribute, - engine_step_prop_repartition, engine_step_prop_statistics, - engine_step_prop_snapshot, engine_step_prop_restarts, - engine_step_prop_stf, engine_step_prop_logger_index); + fprintf( + e->file_timesteps, + "# Step Properties: Rebuild=%d, Redistribute=%d, Repartition=%d, " + "Statistics=%d, Snapshot=%d, Restarts=%d STF=%d, FOF=%d, logger=%d\n", + engine_step_prop_rebuild, engine_step_prop_redistribute, + engine_step_prop_repartition, engine_step_prop_statistics, + engine_step_prop_snapshot, engine_step_prop_restarts, + engine_step_prop_stf, engine_step_prop_fof, + engine_step_prop_logger_index); fprintf( e->file_timesteps, @@ -5386,6 +5435,18 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, "simulation start a=%e.", e->a_first_stf_output, e->cosmology->a_begin); } + + if (e->policy & engine_policy_fof) { + + if (e->delta_time_fof <= 1.) + error("Time between FOF (%e) must be > 1.", e->delta_time_fof); + + if (e->a_first_fof_call < e->cosmology->a_begin) + error( + "Scale-factor of first fof call (%e) must be after the " + "simulation start a=%e.", + e->a_first_fof_call, e->cosmology->a_begin); + } } else { if (e->delta_time_snapshot <= 0.) @@ -5421,6 +5482,16 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, error("Time of first STF (%e) must be after the simulation start t=%e.", e->time_first_stf_output, e->time_begin); } + + if (e->policy & engine_policy_structure_finding) { + + if (e->delta_time_fof <= 0.) + error("Time between FOF (%e) must be positive.", e->delta_time_fof); + + if (e->time_first_fof_call < e->time_begin) + error("Time of first FOF (%e) must be after the simulation start t=%e.", + e->time_first_fof_call, e->time_begin); + } } /* Get the total mass */ @@ -5452,6 +5523,11 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, engine_compute_next_stf_time(e); } + /* Find the time of the first stf output */ + if (e->policy & engine_policy_fof) { + engine_compute_next_fof_time(e); + } + /* Check that we are invoking VELOCIraptor only if we have it */ if (e->snapshot_invoke_stf && !(e->policy & engine_policy_structure_finding)) { @@ -5498,6 +5574,7 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, stats_create_mpi_type(); proxy_create_mpi_type(); task_create_mpi_comms(); + fof_create_mpi_types(); #endif /* Initialise the collection group. */ @@ -5861,6 +5938,67 @@ void engine_compute_next_stf_time(struct engine *e) { } } +/** + * @brief Computes the next time (on the time line) for FoF black holes seeding + * + * @param e The #engine. + */ +void engine_compute_next_fof_time(struct engine *e) { + + /* Find upper-bound on last output */ + double time_end; + if (e->policy & engine_policy_cosmology) + time_end = e->cosmology->a_end * e->delta_time_fof; + else + time_end = e->time_end + e->delta_time_fof; + + /* Find next snasphot above current time */ + double time; + if (e->policy & engine_policy_cosmology) + time = e->a_first_fof_call; + else + time = e->time_first_fof_call; + + int found_fof_time = 0; + while (time < time_end) { + + /* Output time on the integer timeline */ + if (e->policy & engine_policy_cosmology) + e->ti_next_fof = log(time / e->cosmology->a_begin) / e->time_base; + else + e->ti_next_fof = (time - e->time_begin) / e->time_base; + + /* Found it? */ + if (e->ti_next_fof > e->ti_current) { + found_fof_time = 1; + break; + } + + if (e->policy & engine_policy_cosmology) + time *= e->delta_time_fof; + else + time += e->delta_time_fof; + } + + /* Deal with last snapshot */ + if (!found_fof_time) { + e->ti_next_fof = -1; + if (e->verbose) message("No further FoF time."); + } else { + + /* Be nice, talk... */ + if (e->policy & engine_policy_cosmology) { + const float next_fof_time = + exp(e->ti_next_fof * e->time_base) * e->cosmology->a_begin; + // if (e->verbose) + message("Next FoF time set to a=%e.", next_fof_time); + } else { + const float next_fof_time = e->ti_next_fof * e->time_base + e->time_begin; + if (e->verbose) message("Next FoF time set to t=%e.", next_fof_time); + } + } +} + /** * @brief Initialize all the output_list required by the engine * @@ -6111,6 +6249,7 @@ void engine_struct_dump(struct engine *e, FILE *stream) { feedback_struct_dump(e->feedback_props, stream); black_holes_struct_dump(e->black_holes_properties, stream); chemistry_struct_dump(e->chemistry, stream); + fof_struct_dump(e->fof_properties, stream); parser_struct_dump(e->parameter_file, stream); if (e->output_list_snapshots) output_list_struct_dump(e->output_list_snapshots, stream); @@ -6228,6 +6367,11 @@ void engine_struct_restore(struct engine *e, FILE *stream) { chemistry_struct_restore(chemistry, stream); e->chemistry = chemistry; + struct fof_props *fof_props = + (struct fof_props *)malloc(sizeof(struct fof_props)); + fof_struct_restore(fof_props, stream); + e->fof_properties = fof_props; + struct swift_params *parameter_file = (struct swift_params *)malloc(sizeof(struct swift_params)); parser_struct_restore(parameter_file, stream); @@ -6262,3 +6406,126 @@ void engine_struct_restore(struct engine *e, FILE *stream) { e->forcerebuild = 1; e->forcerepart = 0; } + +/** + * @brief Activate all the #gpart communications in preparation + * fof a call to FOF. + * + * @param e The #engine to act on. + */ +void engine_activate_gpart_comms(struct engine *e) { + +#ifdef WITH_MPI + + const ticks tic = getticks(); + + struct scheduler *s = &e->sched; + const int nr_tasks = s->nr_tasks; + struct task *tasks = s->tasks; + + for (int k = 0; k < nr_tasks; ++k) { + + struct task *t = &tasks[k]; + + if ((t->type == task_type_send) && (t->subtype == task_subtype_gpart)) { + scheduler_activate(s, t); + } else if ((t->type == task_type_recv) && + (t->subtype == task_subtype_gpart)) { + scheduler_activate(s, t); + } else { + t->skip = 1; + } + } + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + +#else + error("Calling an MPI function in non-MPI mode."); +#endif +} + +/** + * @brief Activate all the FOF tasks. + * + * Marks all the other task types to be skipped. + * + * @param e The #engine to act on. + */ +void engine_activate_fof_tasks(struct engine *e) { + + const ticks tic = getticks(); + + struct scheduler *s = &e->sched; + const int nr_tasks = s->nr_tasks; + struct task *tasks = s->tasks; + + for (int k = 0; k < nr_tasks; k++) { + + struct task *t = &tasks[k]; + + if (t->type == task_type_fof_self || t->type == task_type_fof_pair) + scheduler_activate(s, t); + else + t->skip = 1; + } + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +/** + * @brief Run a FOF search. + * + * @param e the engine + */ +void engine_fof(struct engine *e) { + + ticks tic = getticks(); + + /* Compute number of DM particles */ + const long long total_nr_baryons = + e->total_nr_parts + e->total_nr_sparts + e->total_nr_bparts; + const long long total_nr_dmparts = e->total_nr_gparts - total_nr_baryons; + + /* Initialise FOF parameters and allocate FOF arrays. */ + fof_allocate(e->s, total_nr_dmparts, e->fof_properties); + + /* Make FOF tasks */ + engine_make_fof_tasks(e); + + /* and activate them. */ + engine_activate_fof_tasks(e); + + /* Perform local FOF tasks. */ + engine_launch(e); + +#ifdef WITH_MPI + /* Exchange the gparts that now contain all their local group information */ + engine_activate_gpart_comms(e); + + /* Perform the communications */ + engine_launch(e); +#endif + + /* Perform FOF search over foreign particles and + * find groups which require black hole seeding. */ + fof_search_tree(e->fof_properties, e->black_holes_properties, + e->physical_constants, e->cosmology, e->s, /*dump_results=*/0, + /*seed_black_holes=*/1); + + /* Reset flag. */ + e->run_fof = 0; + + /* Flag that a FOF has taken place */ + e->step_props |= engine_step_prop_fof; + + /* ... and find the next FOF time */ + engine_compute_next_fof_time(e); + + if (engine_rank == 0) + message("Complete FOF search took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); +} diff --git a/src/engine.h b/src/engine.h index e9c145a4054f95eb19a8ac7e62a12f83b925a187..f6de893ca23cc995d1ff9646b79e14c2faf8761b 100644 --- a/src/engine.h +++ b/src/engine.h @@ -78,9 +78,10 @@ enum engine_policy { engine_policy_star_formation = (1 << 17), engine_policy_feedback = (1 << 18), engine_policy_black_holes = (1 << 19), - engine_policy_limiter = (1 << 20) + engine_policy_fof = (1 << 20), + engine_policy_limiter = (1 << 21) }; -#define engine_maxpolicy 21 +#define engine_maxpolicy 22 extern const char *engine_policy_names[engine_maxpolicy + 1]; /** @@ -95,8 +96,9 @@ enum engine_step_properties { engine_step_prop_snapshot = (1 << 4), engine_step_prop_restarts = (1 << 5), engine_step_prop_stf = (1 << 6), - engine_step_prop_logger_index = (1 << 7), - engine_step_prop_done = (1 << 8) + engine_step_prop_fof = (1 << 7), + engine_step_prop_logger_index = (1 << 8), + engine_step_prop_done = (1 << 9) }; /* Some constants */ @@ -305,6 +307,17 @@ struct engine { char stf_base_name[PARSER_MAX_LINE_SIZE]; int stf_output_count; + /* FoF black holes seeding information */ + double a_first_fof_call; + double time_first_fof_call; + double delta_time_fof; + + /* Integer time of the next FoF black holes seeding call */ + integertime_t ti_next_fof; + + /* FOF information */ + int run_fof; + /* Statistics information */ double a_first_statistics; double time_first_statistics; @@ -425,6 +438,9 @@ struct engine { /* Properties of the chemistry model */ const struct chemistry_global_data *chemistry; + /*! The FOF properties data. */ + struct fof_props *fof_properties; + /* The (parsed) parameter file */ struct swift_params *parameter_file; @@ -462,6 +478,7 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t); void engine_barrier(struct engine *e); void engine_compute_next_snapshot_time(struct engine *e); void engine_compute_next_stf_time(struct engine *e); +void engine_compute_next_fof_time(struct engine *e); void engine_compute_next_statistics_time(struct engine *e); void engine_recompute_displacement_constraint(struct engine *e); void engine_unskip(struct engine *e); @@ -475,7 +492,8 @@ void engine_dump_snapshot(struct engine *e); void engine_init_output_lists(struct engine *e, struct swift_params *params); void engine_init(struct engine *e, struct space *s, struct swift_params *params, long long Ngas, long long Ngparts, long long Nstars, - int policy, int verbose, struct repartition *reparttype, + long long Nblackholes, int policy, int verbose, + struct repartition *reparttype, const struct unit_system *internal_units, const struct phys_const *physical_constants, struct cosmology *cosmo, struct hydro_props *hydro, @@ -486,7 +504,8 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, const struct external_potential *potential, struct cooling_function_data *cooling_func, const struct star_formation *starform, - const struct chemistry_global_data *chemistry); + const struct chemistry_global_data *chemistry, + struct fof_props *fof_properties); void engine_config(int restart, struct engine *e, struct swift_params *params, int nr_nodes, int nodeID, int nr_threads, int with_aff, int verbose, const char *restart_file); @@ -494,7 +513,7 @@ void engine_dump_index(struct engine *e); void engine_launch(struct engine *e); void engine_prepare(struct engine *e); void engine_init_particles(struct engine *e, int flag_entropy_ICs, - int clean_h_values); + int clean_h_values, int compute_init_accel); void engine_step(struct engine *e); void engine_split(struct engine *e, struct partition *initial_partition); void engine_exchange_strays(struct engine *e, const size_t offset_parts, @@ -515,10 +534,15 @@ void engine_pin(void); void engine_unpin(void); void engine_clean(struct engine *e); int engine_estimate_nr_tasks(const struct engine *e); +void engine_print_task_counts(const struct engine *e); +void engine_fof(struct engine *e); /* Function prototypes, engine_maketasks.c. */ void engine_maketasks(struct engine *e); +/* Function prototypes, engine_maketasks.c. */ +void engine_make_fof_tasks(struct engine *e); + /* Function prototypes, engine_marktasks.c. */ int engine_marktasks(struct engine *e); diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index 5a5434cc6821dbd8b40dbc1b4e7ed30888ea6748..56e5bfd564eeaca003bec9577103de9d59afff41 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -2532,6 +2532,134 @@ void engine_addtasks_recv_mapper(void *map_data, int num_elements, } } +/** + * @brief Constructs the top-level self + pair tasks for the FOF loop over + * neighbours. + * + * Here we construct all the tasks for all possible neighbouring non-empty + * local cells in the hierarchy. No dependencies are being added thus far. + * Additional loop over neighbours can later be added by simply duplicating + * all the tasks created by this function. + * + * @param map_data Offset of first two indices disguised as a pointer. + * @param num_elements Number of cells to traverse. + * @param extra_data The #engine. + */ +void engine_make_fofloop_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + + /* Extract the engine pointer. */ + struct engine *e = (struct engine *)extra_data; + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + const int *cdim = s->cdim; + struct cell *cells = s->cells_top; + + /* Loop through the elements, which are just byte offsets from NULL. */ + for (int ind = 0; ind < num_elements; ind++) { + + /* Get the cell index. */ + const int cid = (size_t)(map_data) + ind; + const int i = cid / (cdim[1] * cdim[2]); + const int j = (cid / cdim[2]) % cdim[1]; + const int k = cid % cdim[2]; + + /* Get the cell */ + struct cell *ci = &cells[cid]; + + /* Skip cells without gravity particles */ + if (ci->grav.count == 0) continue; + + /* If the cells is local build a self-interaction */ + if (ci->nodeID == nodeID) + scheduler_addtask(sched, task_type_fof_self, task_subtype_none, 0, 0, ci, + NULL); + else + continue; + + /* Now loop over all the neighbours of this cell */ + for (int ii = -1; ii < 2; ii++) { + int iii = i + ii; + if (!s->periodic && (iii < 0 || iii >= cdim[0])) continue; + iii = (iii + cdim[0]) % cdim[0]; + for (int jj = -1; jj < 2; jj++) { + int jjj = j + jj; + if (!s->periodic && (jjj < 0 || jjj >= cdim[1])) continue; + jjj = (jjj + cdim[1]) % cdim[1]; + for (int kk = -1; kk < 2; kk++) { + int kkk = k + kk; + if (!s->periodic && (kkk < 0 || kkk >= cdim[2])) continue; + kkk = (kkk + cdim[2]) % cdim[2]; + + /* Get the neighbouring cell */ + const int cjd = cell_getid(cdim, iii, jjj, kkk); + struct cell *cj = &cells[cjd]; + + /* Is that neighbour local and does it have particles ? */ + if (cid >= cjd || cj->grav.count == 0 || (ci->nodeID != cj->nodeID)) + continue; + + /* Construct the pair task */ + scheduler_addtask(sched, task_type_fof_pair, task_subtype_none, 0, 0, + ci, cj); + } + } + } + } +} + +/** + * @brief Fill the #space's task list with FOF tasks. + * + * @param e The #engine we are working with. + */ +void engine_make_fof_tasks(struct engine *e) { + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + ticks tic = getticks(); + + /* Construct a FOF loop over neighbours */ + if (e->policy & engine_policy_fof) + threadpool_map(&e->threadpool, engine_make_fofloop_tasks_mapper, NULL, + s->nr_cells, 1, 0, e); + + if (e->verbose) + message("Making FOF tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + tic = getticks(); + + /* Split the tasks. */ + scheduler_splittasks(sched, /*fof_tasks=*/1); + + if (e->verbose) + message("Splitting FOF tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that we are not left with invalid tasks */ + for (int i = 0; i < e->sched.nr_tasks; ++i) { + const struct task *t = &e->sched.tasks[i]; + if (t->ci == NULL && t->cj != NULL && !t->skip) error("Invalid task"); + } +#endif + + /* Report the number of tasks we actually used */ + if (e->verbose) + message( + "Nr. of tasks: %d allocated tasks: %d ratio: %f memory use: %zd MB.", + e->sched.nr_tasks, e->sched.size, + (float)e->sched.nr_tasks / (float)e->sched.size, + e->sched.size * sizeof(struct task) / (1024 * 1024)); + + if (e->verbose) + message("took %.3f %s (including reweight).", + clocks_from_ticks(getticks() - tic), clocks_getunit()); +} + /** * @brief Fill the #space's task list. * @@ -2581,7 +2709,7 @@ void engine_maketasks(struct engine *e) { tic2 = getticks(); /* Split the tasks. */ - scheduler_splittasks(sched); + scheduler_splittasks(sched, /*fof_tasks=*/0); if (e->verbose) message("Splitting tasks took %.3f %s.", diff --git a/src/fof.c b/src/fof.c new file mode 100644 index 0000000000000000000000000000000000000000..5ed57ba6c2bc3916e3d69589963af884f1ae6815 --- /dev/null +++ b/src/fof.c @@ -0,0 +1,2790 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <errno.h> +#include <libgen.h> +#include <unistd.h> + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* This object's header. */ +#include "fof.h" + +/* Local headers. */ +#include "black_holes.h" +#include "common_io.h" +#include "engine.h" +#include "hashmap.h" +#include "memuse.h" +#include "proxy.h" +#include "threadpool.h" + +#define fof_props_default_group_id 2147483647 +#define fof_props_default_group_id_offset 1 +#define fof_props_default_group_link_size 20000 + +/* Constants. */ +#define UNION_BY_SIZE_OVER_MPI (1) +#define FOF_COMPRESS_PATHS_MIN_LENGTH (2) + +/** + * @brief Properties of a group used for black hole seeding + */ +enum fof_halo_seeding_props { + fof_halo_has_no_gas = -1LL, + fof_halo_has_black_hole = -2LL, + fof_halo_has_too_low_mass = -3LL +}; + +#ifdef WITH_MPI + +/* MPI types used for communications */ +MPI_Datatype fof_mpi_type; +MPI_Datatype group_length_mpi_type; +MPI_Datatype fof_final_index_type; +MPI_Datatype fof_final_mass_type; + +/*! Offset between the first particle on this MPI rank and the first particle in + * the global order */ +size_t node_offset; +#endif + +#ifdef SWIFT_DEBUG_CHECKS +static integertime_t ti_current; +#endif + +/** + * @brief Initialise the properties of the FOF code. + * + * @param props the #fof_props structure to fill. + * @param params the parameter file parser. + * @param phys_const The physical constants in internal units. + * @param us The internal unit system. + */ +void fof_init(struct fof_props *props, struct swift_params *params, + const struct phys_const *phys_const, + const struct unit_system *us) { + + /* Base name for the FOF output file */ + parser_get_param_string(params, "FOF:basename", props->base_name); + + /* Check that we can write outputs by testing if the output + * directory exists and is searchable and writable. */ + const char *dirp = dirname(props->base_name); + if (access(dirp, W_OK | X_OK) != 0) { + error("Cannot write FOF outputs in directory %s (%s)", dirp, + strerror(errno)); + } + + /* Read the minimum group size. */ + props->min_group_size = parser_get_param_int(params, "FOF:min_group_size"); + + /* Read the default group ID of particles in groups below the minimum group + * size. */ + props->group_id_default = parser_get_opt_param_int( + params, "FOF:group_id_default", fof_props_default_group_id); + + /* Read the starting group ID. */ + props->group_id_offset = parser_get_opt_param_int( + params, "FOF:group_id_offset", fof_props_default_group_id_offset); + + /* Read the linking length ratio to the mean inter-particle separation. */ + props->l_x_ratio = + parser_get_param_double(params, "FOF:linking_length_ratio"); + + if (props->l_x_ratio <= 0.) + error("The FOF linking length can't be negative!"); + + /* Read value of absolute linking length aksed by the user */ + props->l_x_absolute = + parser_get_opt_param_double(params, "FOF:absolute_linking_length", -1.); + + if (props->l_x_ratio <= 0. && props->l_x_ratio != -1.) + error("The FOF linking length can't be negative!"); + + /* Read the minimal halo mass for black hole seeding */ + props->seed_halo_mass = + parser_get_param_double(params, "FOF:black_hole_seed_halo_mass_Msun"); + + /* Convert to internal units */ + props->seed_halo_mass *= phys_const->const_solar_mass; + +#if defined(WITH_MPI) && defined(UNION_BY_SIZE_OVER_MPI) + if (engine_rank == 0) + message( + "Performing FOF over MPI using union by size and union by rank " + "locally."); +#else + message("Performing FOF using union by rank."); +#endif +} + +/** + * @brief Registers MPI types used by FOF. + */ +void fof_create_mpi_types() { + +#ifdef WITH_MPI + if (MPI_Type_contiguous(sizeof(struct fof_mpi) / sizeof(unsigned char), + MPI_BYTE, &fof_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&fof_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for fof."); + } + if (MPI_Type_contiguous(sizeof(struct group_length) / sizeof(unsigned char), + MPI_BYTE, &group_length_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&group_length_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for group_length."); + } + /* Define type for sending fof_final_index struct */ + if (MPI_Type_contiguous(sizeof(struct fof_final_index), MPI_BYTE, + &fof_final_index_type) != MPI_SUCCESS || + MPI_Type_commit(&fof_final_index_type) != MPI_SUCCESS) { + error("Failed to create MPI type for fof_final_index."); + } + /* Define type for sending fof_final_mass struct */ + if (MPI_Type_contiguous(sizeof(struct fof_final_mass), MPI_BYTE, + &fof_final_mass_type) != MPI_SUCCESS || + MPI_Type_commit(&fof_final_mass_type) != MPI_SUCCESS) { + error("Failed to create MPI type for fof_final_mass."); + } +#else + error("Calling an MPI function in non-MPI code."); +#endif +} + +/** + * @brief Allocate the memory and initialise the arrays for a FOF calculation. + * + * @param s The #space to act on. + * @param total_nr_DM_particles The total number of DM particles in the + * simulation. + * @param props The properties of the FOF structure. + */ +void fof_allocate(const struct space *s, const long long total_nr_DM_particles, + struct fof_props *props) { + + /* Calculate the particle linking length based upon the mean inter-particle + * spacing of the DM particles. */ + const double mean_inter_particle_sep = + s->dim[0] / cbrt((double)total_nr_DM_particles); + const double l_x = props->l_x_ratio * mean_inter_particle_sep; + + /* Are we using the aboslute value or the one derived from the mean + inter-particle sepration? */ + if (props->l_x_absolute != -1.) { + props->l_x2 = props->l_x_absolute * props->l_x_absolute; + } else { + props->l_x2 = l_x * l_x; + } + +#ifdef WITH_MPI + /* Check size of linking length against the top-level cell dimensions. */ + if (props->l_x2 > s->width[0] * s->width[0]) + error( + "Linking length greater than the width of a top-level cell. Need to " + "check more than one layer of top-level cells for links."); +#endif + + const size_t nr_local_gparts = s->nr_gparts; + struct gpart *gparts = s->gparts; + + /* Allocate and initialise a group index array. */ + if (swift_memalign("fof_group_index", (void **)&props->group_index, 64, + nr_local_gparts * sizeof(size_t)) != 0) + error("Failed to allocate list of particle group indices for FOF search."); + + /* Allocate and initialise a group size array. */ + if (swift_memalign("fof_group_size", (void **)&props->group_size, 64, + nr_local_gparts * sizeof(size_t)) != 0) + error("Failed to allocate list of group size for FOF search."); + + /* Set initial group ID of the gparts */ + const size_t group_id_default = props->group_id_default; + for (size_t i = 0; i < nr_local_gparts; i++) { + gparts[i].group_id = group_id_default; + } + + /* Set initial group index and group size */ + size_t *group_index = props->group_index; + size_t *group_size = props->group_size; + for (size_t i = 0; i < nr_local_gparts; i++) { + group_index[i] = i; + group_size[i] = 1; + } + +#ifdef SWIFT_DEBUG_CHECKS + ti_current = s->e->ti_current; +#endif +} + +/** + * @brief Comparison function for qsort call comparing group sizes. + * + * @param a The first #group_length object. + * @param b The second #group_length object. + * @return 1 if the size of the group b is larger than the size of group a, -1 + * if a is the largest and 0 if they are equal. + */ +int cmp_func_group_size(const void *a, const void *b) { + struct group_length *a_group_size = (struct group_length *)a; + struct group_length *b_group_size = (struct group_length *)b; + if (b_group_size->size > a_group_size->size) + return 1; + else if (b_group_size->size < a_group_size->size) + return -1; + else + return 0; +} + +#ifdef WITH_MPI + +/** + * @brief Comparison function for qsort call comparing group global roots. + * + * @param a The first #fof_final_index object. + * @param b The second #fof_final_index object. + * @return 1 if the global of the group b is *smaller* than the global group of + * group a, -1 if a is the smaller one and 0 if they are equal. + */ +int compare_fof_final_index_global_root(const void *a, const void *b) { + struct fof_final_index *fof_final_index_a = (struct fof_final_index *)a; + struct fof_final_index *fof_final_index_b = (struct fof_final_index *)b; + if (fof_final_index_b->global_root < fof_final_index_a->global_root) + return 1; + else if (fof_final_index_b->global_root > fof_final_index_a->global_root) + return -1; + else + return 0; +} + +/** + * @brief Comparison function for qsort call comparing group global roots + * + * @param a The first #fof_final_mass object. + * @param b The second #fof_final_mass object. + * @return 1 if the global of the group b is *smaller* than the global group of + * group a, -1 if a is the smaller one and 0 if they are equal. + */ +int compare_fof_final_mass_global_root(const void *a, const void *b) { + struct fof_final_mass *fof_final_mass_a = (struct fof_final_mass *)a; + struct fof_final_mass *fof_final_mass_b = (struct fof_final_mass *)b; + if (fof_final_mass_b->global_root < fof_final_mass_a->global_root) + return 1; + else if (fof_final_mass_b->global_root > fof_final_mass_a->global_root) + return -1; + else + return 0; +} + +/** + * @brief Check whether a given group ID is on the local node. + * + * This function only makes sense in MPI mode. + * + * @param group_id The ID to check. + * @param nr_gparts The number of gparts on this node. + */ +__attribute__((always_inline)) INLINE static int is_local( + const size_t group_id, const size_t nr_gparts) { +#ifdef WITH_MPI + return (group_id >= node_offset && group_id < node_offset + nr_gparts); +#else + error("Calling MPI function in non-MPI mode"); + return 1; +#endif +} + +/** + * @brief Find the global root ID of a given particle + * + * This function only makes sense in MPI mode. + * + * @param i Index of the particle. + * @param group_index Array of group root indices. + * @param nr_gparts The number of g-particles on this node. + */ +__attribute__((always_inline)) INLINE static size_t fof_find_global( + const size_t i, const size_t *group_index, const size_t nr_gparts) { + +#ifdef WITH_MPI + size_t root = node_offset + i; + if (!is_local(root, nr_gparts)) { + + /* Non local --> This is the root */ + return root; + } else { + + /* Local --> Follow the links until we find the root */ + while (root != group_index[root - node_offset]) { + root = group_index[root - node_offset]; + if (!is_local(root, nr_gparts)) break; + } + } + + /* Perform path compression. */ + // int index = i; + // while(index != root) { + // int next = group_index[index]; + // group_index[index] = root; + // index = next; + //} + + return root; +#else + error("Calling MPI function in non-MPI mode"); + return -1; +#endif +} + +#endif /* WITH_MPI */ + +/** + * @brief Finds the local root ID of the group a particle exists in + * when group_index contains globally unique identifiers - + * i.e. we stop *before* we advance to a foreign root. + * + * Here we assume that the input i is a local index and we + * return the local index of the root. + * + * @param i Index of the particle. + * @param nr_gparts The number of g-particles on this node. + * @param group_index Array of group root indices. + */ +__attribute__((always_inline)) INLINE static size_t fof_find_local( + const size_t i, const size_t nr_gparts, const size_t *group_index) { +#ifdef WITH_MPI + size_t root = node_offset + i; + + while ((group_index[root - node_offset] != root) && + (group_index[root - node_offset] >= node_offset) && + (group_index[root - node_offset] < node_offset + nr_gparts)) { + root = group_index[root - node_offset]; + } + + return root - node_offset; +#else + size_t root = i; + + while ((group_index[root] != root) && (group_index[root] < nr_gparts)) { + root = group_index[root]; + } + + return root; +#endif +} + +/** + * @brief Finds the local root ID of the group a particle exists in. + * + * We follow the group_index array until reaching the root of the group. + * + * Also performs path compression if the path is long. + * + * @param i The index of the particle. + * @param group_index Array of group root indices. + */ +__attribute__((always_inline)) INLINE static size_t fof_find( + const size_t i, size_t *group_index) { + + size_t root = i; + int tree_depth = 0; + + while (root != group_index[root]) { +#ifdef PATH_HALVING + atomic_cas(&group_index[root], group_index[root], + group_index[group_index[root]]); +#endif + root = group_index[root]; + tree_depth++; + } + + /* Only perform path compression on trees with a depth of + * FOF_COMPRESS_PATHS_MIN_LENGTH or higher. */ + if (tree_depth >= FOF_COMPRESS_PATHS_MIN_LENGTH) + atomic_cas(&group_index[i], group_index[i], root); + + return root; +} + +/** + * @brief Atomically update the root of a group + * + * @param address The address of the value to update. + * @param y The new value to write. + * + * @return 1 If successful, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int atomic_update_root( + volatile size_t *address, const size_t y) { + + size_t *size_t_ptr = (size_t *)address; + + size_t old_val = *address; + size_t test_val = old_val; + size_t new_val = y; + + /* atomic_cas returns old_val if *size_t_ptr has not changed since being + * read.*/ + old_val = atomic_cas(size_t_ptr, test_val, new_val); + + if (test_val == old_val) + return 1; + else + return 0; +} + +/** + * @brief Unifies two groups by setting them to the same root. + * + * @param root_i The root of the first group. Will be updated. + * @param root_j The root of the second group. + * @param group_index The list of group roots. + */ +__attribute__((always_inline)) INLINE static void fof_union( + size_t *root_i, const size_t root_j, size_t *group_index) { + + int result = 0; + + /* Loop until the root can be set to a new value. */ + do { + size_t root_i_new = fof_find(*root_i, group_index); + const size_t root_j_new = fof_find(root_j, group_index); + + /* Skip particles in the same group. */ + if (root_i_new == root_j_new) return; + + /* If the root ID of pj is lower than pi's root ID set pi's root to point to + * pj's. Otherwise set pj's root to point to pi's.*/ + if (root_j_new < root_i_new) { + + /* Updates the root and checks that its value has not been changed since + * being read. */ + result = atomic_update_root(&group_index[root_i_new], root_j_new); + + /* Update root_i on the fly. */ + *root_i = root_j_new; + } else { + + /* Updates the root and checks that its value has not been changed since + * being read. */ + result = atomic_update_root(&group_index[root_j_new], root_i_new); + + /* Update root_i on the fly. */ + *root_i = root_i_new; + } + } while (result != 1); +} + +/** + * @brief Compute th minimal distance between any two points in two cells. + * + * @param ci The first #cell. + * @param cj The second #cell. + * @param dim The size of the simulation domain. + */ +__attribute__((always_inline)) INLINE static double cell_min_dist( + const struct cell *restrict ci, const struct cell *restrict cj, + const double dim[3]) { + + /* Get cell locations. */ + const double cix_min = ci->loc[0]; + const double ciy_min = ci->loc[1]; + const double ciz_min = ci->loc[2]; + const double cjx_min = cj->loc[0]; + const double cjy_min = cj->loc[1]; + const double cjz_min = cj->loc[2]; + + const double cix_max = ci->loc[0] + ci->width[0]; + const double ciy_max = ci->loc[1] + ci->width[1]; + const double ciz_max = ci->loc[2] + ci->width[2]; + const double cjx_max = cj->loc[0] + cj->width[0]; + const double cjy_max = cj->loc[1] + cj->width[1]; + const double cjz_max = cj->loc[2] + cj->width[2]; + + double not_same_range[3]; + + /* If two cells are in the same range of coordinates along + any of the 3 axis, the distance along this axis is 0 */ + if (ci->width[0] > cj->width[0]) { + if ((cix_min <= cjx_min) && (cjx_max <= cix_max)) + not_same_range[0] = 0.; + else + not_same_range[0] = 1.; + } else { + if ((cjx_min <= cix_min) && (cix_max <= cjx_max)) + not_same_range[0] = 0.; + else + not_same_range[0] = 1.; + } + if (ci->width[1] > cj->width[1]) { + if ((ciy_min <= cjy_min) && (cjy_max <= ciy_max)) + not_same_range[1] = 0.; + else + not_same_range[1] = 1.; + } else { + if ((cjy_min <= ciy_min) && (ciy_max <= cjy_max)) + not_same_range[1] = 0.; + else + not_same_range[1] = 1.; + } + if (ci->width[2] > cj->width[2]) { + if ((ciz_min <= cjz_min) && (cjz_max <= ciz_max)) + not_same_range[2] = 0.; + else + not_same_range[2] = 1.; + } else { + if ((cjz_min <= ciz_min) && (ciz_max <= cjz_max)) + not_same_range[2] = 0.; + else + not_same_range[2] = 1.; + } + + /* Find the shortest distance between cells, remembering to account for + * periodic boundary conditions. */ + double dx[3]; + dx[0] = min4(fabs(nearest(cix_min - cjx_min, dim[0])), + fabs(nearest(cix_min - cjx_max, dim[0])), + fabs(nearest(cix_max - cjx_min, dim[0])), + fabs(nearest(cix_max - cjx_max, dim[0]))); + + dx[1] = min4(fabs(nearest(ciy_min - cjy_min, dim[1])), + fabs(nearest(ciy_min - cjy_max, dim[1])), + fabs(nearest(ciy_max - cjy_min, dim[1])), + fabs(nearest(ciy_max - cjy_max, dim[1]))); + + dx[2] = min4(fabs(nearest(ciz_min - cjz_min, dim[2])), + fabs(nearest(ciz_min - cjz_max, dim[2])), + fabs(nearest(ciz_max - cjz_min, dim[2])), + fabs(nearest(ciz_max - cjz_max, dim[2]))); + + double r2 = 0.; + for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k] * not_same_range[k]; + + return r2; +} + +#ifdef WITH_MPI + +/* Add a group to the hash table. */ +__attribute__((always_inline)) INLINE static void hashmap_add_group( + const size_t group_id, const size_t group_offset, hashmap_t *map) { + + int created_new_element = 0; + hashmap_value_t *offset = + hashmap_get_new(map, group_id, &created_new_element); + + if (offset != NULL) { + + /* If the element is a new entry set its value. */ + if (created_new_element) { + (*offset).value_st = group_offset; + } + } else + error("Couldn't find key (%zu) or create new one.", group_id); +} + +/* Find a group in the hash table. */ +__attribute__((always_inline)) INLINE static size_t hashmap_find_group_offset( + const size_t group_id, hashmap_t *map) { + + hashmap_value_t *group_offset = hashmap_get(map, group_id); + + if (group_offset == NULL) + error("Couldn't find key (%zu) or create new one.", group_id); + + return (size_t)(*group_offset).value_st; +} + +/* Compute send/recv offsets for MPI communication. */ +__attribute__((always_inline)) INLINE static void fof_compute_send_recv_offsets( + const int nr_nodes, int *sendcount, int **recvcount, int **sendoffset, + int **recvoffset, size_t *nrecv) { + + /* Determine number of entries to receive */ + *recvcount = malloc(nr_nodes * sizeof(int)); + MPI_Alltoall(sendcount, 1, MPI_INT, *recvcount, 1, MPI_INT, MPI_COMM_WORLD); + + /* Compute send/recv offsets */ + *sendoffset = malloc(nr_nodes * sizeof(int)); + + (*sendoffset)[0] = 0; + for (int i = 1; i < nr_nodes; i++) + (*sendoffset)[i] = (*sendoffset)[i - 1] + sendcount[i - 1]; + + *recvoffset = malloc(nr_nodes * sizeof(int)); + + (*recvoffset)[0] = 0; + for (int i = 1; i < nr_nodes; i++) + (*recvoffset)[i] = (*recvoffset)[i - 1] + (*recvcount)[i - 1]; + + /* Allocate receive buffer */ + *nrecv = 0; + for (int i = 0; i < nr_nodes; i++) (*nrecv) += (*recvcount)[i]; +} + +#endif /* WITH_MPI */ + +/** + * @brief Perform a FOF search using union-find on a given leaf-cell + * + * @param props The properties fof the FOF scheme. + * @param l_x2 The square of the FOF linking length. + * @param space_gparts The start of the #gpart array in the #space structure. + * @param c The #cell in which to perform FOF. + */ +void fof_search_self_cell(const struct fof_props *props, const double l_x2, + const struct gpart *const space_gparts, + struct cell *c) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->split) error("Performing the FOF search at a non-leaf level!"); +#endif + + const size_t count = c->grav.count; + struct gpart *gparts = c->grav.parts; + + /* Index of particles in the global group list */ + size_t *group_index = props->group_index; + + /* Make a list of particle offsets into the global gparts array. */ + size_t *const offset = group_index + (ptrdiff_t)(gparts - space_gparts); + + if (c->nodeID != engine_rank) + error("Performing self FOF search on foreign cell."); + + /* Loop over particles and find which particles belong in the same group. */ + for (size_t i = 0; i < count; i++) { + + struct gpart *pi = &gparts[i]; + + /* Ignore inhibited particles */ + if (pi->time_bin >= time_bin_inhibited) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (pi->ti_drift != ti_current) + error("Running FOF on an un-drifted particle!"); +#endif + + const double pix = pi->x[0]; + const double piy = pi->x[1]; + const double piz = pi->x[2]; + + /* Find the root of pi. */ + size_t root_i = fof_find(offset[i], group_index); + + for (size_t j = i + 1; j < count; j++) { + + struct gpart *pj = &gparts[j]; + + /* Ignore inhibited particles */ + if (pj->time_bin >= time_bin_inhibited) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (pj->ti_drift != ti_current) + error("Running FOF on an un-drifted particle!"); +#endif + + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; + + /* Find the root of pj. */ + const size_t root_j = fof_find(offset[j], group_index); + + /* Skip particles in the same group. */ + if (root_i == root_j) continue; + + /* Compute the pairwise distance */ + float dx[3], r2 = 0.0f; + dx[0] = pix - pjx; + dx[1] = piy - pjy; + dx[2] = piz - pjz; + + for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k]; + + /* Hit or miss? */ + if (r2 < l_x2) { + + /* Merge the groups */ + fof_union(&root_i, root_j, group_index); + } + } + } +} + +/** + * @brief Perform a FOF search using union-find between two cells + * + * @param props The properties fof the FOF scheme. + * @param dim The dimension of the simulation volume. + * @param l_x2 The square of the FOF linking length. + * @param periodic Are we using periodic BCs? + * @param space_gparts The start of the #gpart array in the #space structure. + * @param ci The first #cell in which to perform FOF. + * @param cj The second #cell in which to perform FOF. + */ +void fof_search_pair_cells(const struct fof_props *props, const double dim[3], + const double l_x2, const int periodic, + const struct gpart *const space_gparts, + struct cell *restrict ci, struct cell *restrict cj) { + + const size_t count_i = ci->grav.count; + const size_t count_j = cj->grav.count; + struct gpart *gparts_i = ci->grav.parts; + struct gpart *gparts_j = cj->grav.parts; + + /* Index of particles in the global group list */ + size_t *group_index = props->group_index; + + /* Make a list of particle offsets into the global gparts array. */ + size_t *const offset_i = group_index + (ptrdiff_t)(gparts_i - space_gparts); + size_t *const offset_j = group_index + (ptrdiff_t)(gparts_j - space_gparts); + +#ifdef SWIFT_DEBUG_CHECKS + if (offset_j > offset_i && (offset_j < offset_i + count_i)) + error("Overlapping cells"); + if (offset_i > offset_j && (offset_i < offset_j + count_j)) + error("Overlapping cells"); +#endif + + /* Account for boundary conditions.*/ + double shift[3] = {0.0, 0.0, 0.0}; + + /* Get the relative distance between the pairs, wrapping. */ + double diff[3]; + for (int k = 0; k < 3; k++) { + diff[k] = cj->loc[k] - ci->loc[k]; + if (periodic && diff[k] < -dim[k] * 0.5) + shift[k] = dim[k]; + else if (periodic && diff[k] > dim[k] * 0.5) + shift[k] = -dim[k]; + else + shift[k] = 0.0; + diff[k] += shift[k]; + } + + /* Loop over particles and find which particles belong in the same group. */ + for (size_t i = 0; i < count_i; i++) { + + struct gpart *pi = &gparts_i[i]; + + /* Ignore inhibited particles */ + if (pi->time_bin >= time_bin_inhibited) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (pi->ti_drift != ti_current) + error("Running FOF on an un-drifted particle!"); +#endif + + const double pix = pi->x[0] - shift[0]; + const double piy = pi->x[1] - shift[1]; + const double piz = pi->x[2] - shift[2]; + + /* Find the root of pi. */ + size_t root_i = fof_find(offset_i[i], group_index); + + for (size_t j = 0; j < count_j; j++) { + + struct gpart *pj = &gparts_j[j]; + + /* Ignore inhibited particles */ + if (pj->time_bin >= time_bin_inhibited) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (pj->ti_drift != ti_current) + error("Running FOF on an un-drifted particle!"); +#endif + + /* Find the root of pj. */ + const size_t root_j = fof_find(offset_j[j], group_index); + + /* Skip particles in the same group. */ + if (root_i == root_j) continue; + + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; + + /* Compute pairwise distance, remembering to account for boundary + * conditions. */ + float dx[3], r2 = 0.0f; + dx[0] = pix - pjx; + dx[1] = piy - pjy; + dx[2] = piz - pjz; + + for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k]; + + /* Hit or miss? */ + if (r2 < l_x2) { + + /* Merge the groups */ + fof_union(&root_i, root_j, group_index); + } + } + } +} + +/* Perform a FOF search between a local and foreign cell using the Union-Find + * algorithm. Store any links found between particles.*/ +void fof_search_pair_cells_foreign( + const struct fof_props *props, const double dim[3], const double l_x2, + const int periodic, const struct gpart *const space_gparts, + const size_t nr_gparts, const struct cell *restrict ci, + const struct cell *restrict cj, int *restrict link_count, + struct fof_mpi **group_links, int *restrict group_links_size) { + +#ifdef WITH_MPI + const size_t count_i = ci->grav.count; + const size_t count_j = cj->grav.count; + const struct gpart *gparts_i = ci->grav.parts; + const struct gpart *gparts_j = cj->grav.parts; + + /* Get local pointers */ + size_t *group_index = props->group_index; + size_t *group_size = props->group_size; + + /* Values local to this function to avoid dereferencing */ + struct fof_mpi *local_group_links = *group_links; + int local_link_count = *link_count; + + /* Make a list of particle offsets into the global gparts array. */ + size_t *const offset_i = group_index + (ptrdiff_t)(gparts_i - space_gparts); + +#ifdef SWIFT_DEBUG_CHECKS + + /* Check whether cells are local to the node. */ + const int ci_local = (ci->nodeID == engine_rank); + const int cj_local = (cj->nodeID == engine_rank); + + if ((ci_local && cj_local) || (!ci_local && !cj_local)) + error( + "FOF search of foreign cells called on two local cells or two foreign " + "cells."); + + if (!ci_local) { + error("Cell ci, is not local."); + } +#endif + + double shift[3] = {0.0, 0.0, 0.0}; + + /* Get the relative distance between the pairs, wrapping. */ + for (int k = 0; k < 3; k++) { + const double diff = cj->loc[k] - ci->loc[k]; + if (periodic && diff < -dim[k] / 2) + shift[k] = dim[k]; + else if (periodic && diff > dim[k] / 2) + shift[k] = -dim[k]; + else + shift[k] = 0.0; + } + + /* Loop over particles and find which particles belong in the same group. */ + for (size_t i = 0; i < count_i; i++) { + + const struct gpart *pi = &gparts_i[i]; + + /* Ignore inhibited particles */ + if (pi->time_bin >= time_bin_inhibited) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (pi->ti_drift != ti_current) + error("Running FOF on an un-drifted particle!"); +#endif + + const double pix = pi->x[0] - shift[0]; + const double piy = pi->x[1] - shift[1]; + const double piz = pi->x[2] - shift[2]; + + /* Find the root of pi. */ + const size_t root_i = + fof_find_global(offset_i[i] - node_offset, group_index, nr_gparts); + + for (size_t j = 0; j < count_j; j++) { + + const struct gpart *pj = &gparts_j[j]; + + /* Ignore inhibited particles */ + if (pj->time_bin >= time_bin_inhibited) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (pj->ti_drift != ti_current) + error("Running FOF on an un-drifted particle!"); +#endif + + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; + + /* Compute pairwise distance, remembering to account for boundary + * conditions. */ + float dx[3], r2 = 0.0f; + dx[0] = pix - pjx; + dx[1] = piy - pjy; + dx[2] = piz - pjz; + + for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k]; + + /* Hit or miss? */ + if (r2 < l_x2) { + + int found = 0; + + /* Check that the links have not already been added to the list. */ + for (int l = 0; l < local_link_count; l++) { + if ((local_group_links)[l].group_i == root_i && + (local_group_links)[l].group_j == pj->group_id) { + found = 1; + break; + } + } + + if (!found) { + + /* If the group_links array is not big enough re-allocate it. */ + if (local_link_count + 1 > *group_links_size) { + + const int new_size = 2 * (*group_links_size); + + *group_links_size = new_size; + + (*group_links) = (struct fof_mpi *)realloc( + *group_links, new_size * sizeof(struct fof_mpi)); + + /* Reset the local pointer */ + local_group_links = *group_links; + + message("Re-allocating local group links from %d to %d elements.", + local_link_count, new_size); + } + + /* Store the particle group properties for communication. */ + local_group_links[local_link_count].group_i = root_i; + local_group_links[local_link_count].group_i_size = + group_size[root_i - node_offset]; + + local_group_links[local_link_count].group_j = pj->group_id; + local_group_links[local_link_count].group_j_size = pj->group_size; + + local_link_count++; + } + } + } + } + + /* Update the returned values */ + *link_count = local_link_count; + +#else + error("Calling MPI function in non-MPI mode."); +#endif +} + +/** + * @brief Recursively perform a union-find FOF between two cells. + * + * If cells are more distant than the linking length, we abort early. + * + * @param props The properties fof the FOF scheme. + * @param dim The dimension of the space. + * @param search_r2 the square of the FOF linking length. + * @param periodic Are we using periodic BCs? + * @param space_gparts The start of the #gpart array in the #space structure. + * @param ci The first #cell in which to perform FOF. + * @param cj The second #cell in which to perform FOF. + */ +void rec_fof_search_pair(const struct fof_props *props, const double dim[3], + const double search_r2, const int periodic, + const struct gpart *const space_gparts, + struct cell *restrict ci, struct cell *restrict cj) { + + /* Find the shortest distance between cells, remembering to account for + * boundary conditions. */ + const double r2 = cell_min_dist(ci, cj, dim); + +#ifdef SWIFT_DEBUG_CHECKS + if (ci == cj) error("Pair FOF called on same cell!!!"); +#endif + + /* Return if cells are out of range of each other. */ + if (r2 > search_r2) return; + + /* Recurse on both cells if they are both split. */ + if (ci->split && cj->split) { + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) { + + for (int l = 0; l < 8; l++) + if (cj->progeny[l] != NULL) + rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts, + ci->progeny[k], cj->progeny[l]); + } + } + } else if (ci->split) { + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts, + ci->progeny[k], cj); + } + } else if (cj->split) { + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) + rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts, ci, + cj->progeny[k]); + } + } else { + /* Perform FOF search between pairs of cells that are within the linking + * length and not the same cell. */ + fof_search_pair_cells(props, dim, search_r2, periodic, space_gparts, ci, + cj); + } +} +#ifdef WITH_MPI + +/* Recurse on a pair of cells (one local, one foreign) and perform a FOF search + * between cells that are within range. */ +void rec_fof_search_pair_foreign( + const struct fof_props *props, const double dim[3], const double search_r2, + const int periodic, const struct gpart *const space_gparts, + const size_t nr_gparts, const struct cell *ci, const struct cell *cj, + int *restrict link_count, struct fof_mpi **group_links, + int *restrict group_links_size) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci == cj) error("Pair FOF called on same cell!!!"); +#endif + + /* Find the shortest distance between cells, remembering to account for + * boundary conditions. */ + const double r2 = cell_min_dist(ci, cj, dim); + + /* Return if cells are out of range of each other. */ + if (r2 > search_r2) return; + + /* Recurse on both cells if they are both split. */ + if (ci->split && cj->split) { + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) { + + for (int l = 0; l < 8; l++) + if (cj->progeny[l] != NULL) + rec_fof_search_pair_foreign(props, dim, search_r2, periodic, + space_gparts, nr_gparts, ci->progeny[k], + cj->progeny[l], link_count, group_links, + group_links_size); + } + } + } else if (ci->split) { + + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + rec_fof_search_pair_foreign(props, dim, search_r2, periodic, + space_gparts, nr_gparts, ci->progeny[k], cj, + link_count, group_links, group_links_size); + } + } else if (cj->split) { + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) + rec_fof_search_pair_foreign(props, dim, search_r2, periodic, + space_gparts, nr_gparts, ci, cj->progeny[k], + link_count, group_links, group_links_size); + } + } else { + /* Perform FOF search between pairs of cells that are within the linking + * length and not the same cell. */ + fof_search_pair_cells_foreign(props, dim, search_r2, periodic, space_gparts, + nr_gparts, ci, cj, link_count, group_links, + group_links_size); + } +} + +#endif /* WITH_MPI */ + +/** + * @brief Recursively perform a union-find FOF on a cell. + * + * @param props The properties fof the FOF scheme. + * @param dim The dimension of the space. + * @param space_gparts The start of the #gpart array in the #space structure. + * @param search_r2 the square of the FOF linking length. + * @param periodic Are we using periodic BCs? + * @param c The #cell in which to perform FOF. + */ +void rec_fof_search_self(const struct fof_props *props, const double dim[3], + const double search_r2, const int periodic, + const struct gpart *const space_gparts, + struct cell *c) { + + /* Recurse? */ + if (c->split) { + + /* Loop over all progeny. Perform pair and self recursion on progenies.*/ + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + + rec_fof_search_self(props, dim, search_r2, periodic, space_gparts, + c->progeny[k]); + + for (int l = k + 1; l < 8; l++) + if (c->progeny[l] != NULL) + rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts, + c->progeny[k], c->progeny[l]); + } + } + } + /* Otherwise, compute self-interaction. */ + else + fof_search_self_cell(props, search_r2, space_gparts, c); +} + +/* Mapper function to atomically update the group size array. */ +void fof_update_group_size_mapper(hashmap_key_t key, hashmap_value_t *value, + void *data) { + + size_t *group_size = (size_t *)data; + + /* Use key to index into group size array. */ + atomic_add(&group_size[key], value->value_st); +} + +/** + * @brief Mapper function to calculate the group sizes. + * + * @param map_data An array of #gpart%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to a #space. + */ +void fof_calc_group_size_mapper(void *map_data, int num_elements, + void *extra_data) { + + /* Retrieve mapped data. */ + struct space *s = (struct space *)extra_data; + struct gpart *gparts = (struct gpart *)map_data; + size_t *group_index = s->e->fof_properties->group_index; + size_t *group_size = s->e->fof_properties->group_size; + + /* Offset into gparts array. */ + ptrdiff_t gparts_offset = (ptrdiff_t)(gparts - s->gparts); + size_t *const group_index_offset = group_index + gparts_offset; + + /* Create hash table. */ + hashmap_t map; + hashmap_init(&map); + + /* Loop over particles and find which cells are in range of each other to + * perform the FOF search. */ + for (int ind = 0; ind < num_elements; ind++) { + + hashmap_key_t root = + (hashmap_key_t)fof_find(group_index_offset[ind], group_index); + const size_t gpart_index = gparts_offset + ind; + + /* Only add particles which aren't the root of a group. Stops groups of size + * 1 being added to the hash table. */ + if (root != gpart_index) { + hashmap_value_t *size = hashmap_get(&map, root); + + if (size != NULL) + (*size).value_st++; + else + error("Couldn't find key (%zu) or create new one.", root); + } + } + + /* Update the group size array. */ + if (map.size > 0) + hashmap_iterate(&map, fof_update_group_size_mapper, group_size); + + hashmap_free(&map); +} + +/* Mapper function to atomically update the group mass array. */ +static INLINE void fof_update_group_mass_mapper(hashmap_key_t key, + hashmap_value_t *value, + void *data) { + + double *group_mass = (double *)data; + + /* Use key to index into group mass array. */ + atomic_add_d(&group_mass[key], value->value_dbl); +} + +/** + * @brief Mapper function to calculate the group masses. + * + * @param map_data An array of #gpart%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to a #space. + */ +void fof_calc_group_mass_mapper(void *map_data, int num_elements, + void *extra_data) { + + /* Retrieve mapped data. */ + struct space *s = (struct space *)extra_data; + struct gpart *gparts = (struct gpart *)map_data; + double *group_mass = s->e->fof_properties->group_mass; + const size_t group_id_default = s->e->fof_properties->group_id_default; + const size_t group_id_offset = s->e->fof_properties->group_id_offset; + + /* Create hash table. */ + hashmap_t map; + hashmap_init(&map); + + /* Loop over particles and increment the group mass for groups above + * min_group_size. */ + for (int ind = 0; ind < num_elements; ind++) { + + /* Only check groups above the minimum size. */ + if (gparts[ind].group_id != group_id_default) { + + hashmap_key_t index = gparts[ind].group_id - group_id_offset; + hashmap_value_t *data = hashmap_get(&map, index); + + /* Update group mass */ + if (data != NULL) + (*data).value_dbl += gparts[ind].mass; + else + error("Couldn't find key (%zu) or create new one.", index); + } + } + + /* Update the group mass array. */ + if (map.size > 0) + hashmap_iterate(&map, fof_update_group_mass_mapper, group_mass); + + hashmap_free(&map); +} + +#ifdef WITH_MPI +/* Mapper function to unpack hash table into array. */ +void fof_unpack_group_mass_mapper(hashmap_key_t key, hashmap_value_t *value, + void *data) { + + struct fof_mass_send_hashmap *fof_mass_send = + (struct fof_mass_send_hashmap *)data; + struct fof_final_mass *mass_send = fof_mass_send->mass_send; + size_t *nsend = &fof_mass_send->nsend; + + /* Store elements from hash table in array. */ + mass_send[*nsend].global_root = key; + mass_send[(*nsend)].group_mass = value->value_dbl; + mass_send[(*nsend)].max_part_density_index = value->value_st; + mass_send[(*nsend)++].max_part_density = value->value_flt; +} + +#endif /* WITH_MPI */ + +/** + * @brief Calculates the total mass of each group above min_group_size and finds + * the densest particle for black hole seeding. + */ +void fof_calc_group_mass(struct fof_props *props, const struct space *s, + const size_t num_groups_local, + const size_t num_groups_prev, + size_t *restrict num_on_node, + size_t *restrict first_on_node, double *group_mass) { + + const size_t nr_gparts = s->nr_gparts; + struct gpart *gparts = s->gparts; + const struct part *parts = s->parts; + const size_t group_id_offset = props->group_id_offset; + const size_t group_id_default = props->group_id_default; + const double seed_halo_mass = props->seed_halo_mass; + +#ifdef WITH_MPI + size_t *group_index = props->group_index; + const int nr_nodes = s->e->nr_nodes; + + /* Allocate and initialise the densest particle array. */ + if (swift_memalign("max_part_density_index", + (void **)&props->max_part_density_index, 32, + num_groups_local * sizeof(long long)) != 0) + error( + "Failed to allocate list of max group density indices for FOF search."); + + if (swift_memalign("max_part_density", (void **)&props->max_part_density, 32, + num_groups_local * sizeof(float)) != 0) + error("Failed to allocate list of max group densities for FOF search."); + + /* Direct pointers to the arrays */ + long long *max_part_density_index = props->max_part_density_index; + float *max_part_density = props->max_part_density; + + /* No densest particle found so far */ + bzero(max_part_density, num_groups_local * sizeof(float)); + + /* Start by assuming that the haloes have no gas */ + for (size_t i = 0; i < num_groups_local; i++) { + max_part_density_index[i] = fof_halo_has_no_gas; + } + + /* Start the hash map */ + hashmap_t map; + hashmap_init(&map); + + /* JSW TODO: Parallelise with threadpool*/ + for (size_t i = 0; i < nr_gparts; i++) { + + /* Check if the particle is in a group above the threshold. */ + if (gparts[i].group_id != group_id_default) { + + const size_t root = fof_find_global(i, group_index, nr_gparts); + + /* Increment the mass of groups that are local */ + if (is_local(root, nr_gparts)) { + + const size_t index = + gparts[i].group_id - group_id_offset - num_groups_prev; + + /* Update group mass */ + group_mass[index] += gparts[i].mass; + + } + /* Add mass fragments of groups that have a foreign root to a hash table. + */ + else { + + hashmap_value_t *data = hashmap_get(&map, (hashmap_key_t)root); + + if (data != NULL) { + data->value_dbl += gparts[i].mass; + + /* Find the densest gas particle. + * Account for groups that already have a black hole and groups that + * contain no gas. */ + if (gparts[i].type == swift_type_gas && + data->value_st != fof_halo_has_black_hole) { + + /* Update index if a denser gas particle is found. */ + if (parts[-gparts[i].id_or_neg_offset].rho > data->value_flt) { + data->value_flt = parts[-gparts[i].id_or_neg_offset].rho; + data->value_st = -gparts[i].id_or_neg_offset; + } + } + /* If there is already a black hole in the group we don't need to + create a new one. */ + else if (gparts[i].type == swift_type_black_hole) { + data->value_st = fof_halo_has_black_hole; + data->value_flt = 0.f; + } + } else + error("Couldn't find key (%zu) or create new one.", root); + } + } + } + + /* Loop over particles and find the densest particle in each group. */ + /* JSW TODO: Parallelise with threadpool*/ + for (size_t i = 0; i < nr_gparts; i++) { + + /* Only check groups above the minimum size and mass threshold. */ + if (gparts[i].group_id != group_id_default) { + + size_t root = fof_find_global(i, group_index, nr_gparts); + + /* Increment the mass of groups that are local */ + if (is_local(root, nr_gparts)) { + + size_t index = gparts[i].group_id - group_id_offset - num_groups_prev; + + /* Only seed groups above the mass threshold. */ + if (group_mass[index] > seed_halo_mass) { + + /* Find the densest gas particle. + * Account for groups that already have a black hole and groups that + * contain no gas. */ + if (gparts[i].type == swift_type_gas && + max_part_density_index[index] != fof_halo_has_black_hole) { + + /* Update index if a denser gas particle is found. */ + if (parts[-gparts[i].id_or_neg_offset].rho > + max_part_density[index]) { + max_part_density_index[index] = -gparts[i].id_or_neg_offset; + max_part_density[index] = parts[-gparts[i].id_or_neg_offset].rho; + } + } + /* If there is already a black hole in the group we don't need to + create a new one. */ + else if (gparts[i].type == swift_type_black_hole) { + max_part_density_index[index] = fof_halo_has_black_hole; + } + } + } + } + } + + size_t nsend = map.size; + struct fof_mass_send_hashmap hashmap_mass_send; + + /* Allocate and initialise a mass array. */ + if (posix_memalign((void **)&hashmap_mass_send.mass_send, 32, + nsend * sizeof(struct fof_mass_send_hashmap)) != 0) + error("Failed to allocate list of group masses for FOF search."); + + hashmap_mass_send.nsend = 0; + + struct fof_final_mass *fof_mass_send = hashmap_mass_send.mass_send; + + /* Unpack mass fragments and roots from hash table. */ + if (map.size > 0) + hashmap_iterate(&map, fof_unpack_group_mass_mapper, &hashmap_mass_send); + + nsend = hashmap_mass_send.nsend; + + if (nsend != map.size) + error("No. of mass fragments to send != elements in hash table."); + + hashmap_free(&map); + + /* Sort by global root - this puts the groups in order of which node they're + * stored on */ + qsort(fof_mass_send, nsend, sizeof(struct fof_final_mass), + compare_fof_final_mass_global_root); + + /* Determine how many entries go to each node */ + int *sendcount = malloc(nr_nodes * sizeof(int)); + for (int i = 0; i < nr_nodes; i += 1) sendcount[i] = 0; + int dest = 0; + for (size_t i = 0; i < nsend; i += 1) { + while ((fof_mass_send[i].global_root >= + first_on_node[dest] + num_on_node[dest]) || + (num_on_node[dest] == 0)) + dest += 1; + if (dest >= nr_nodes) error("Node index out of range!"); + sendcount[dest] += 1; + } + + int *recvcount = NULL, *sendoffset = NULL, *recvoffset = NULL; + size_t nrecv = 0; + + fof_compute_send_recv_offsets(nr_nodes, sendcount, &recvcount, &sendoffset, + &recvoffset, &nrecv); + + struct fof_final_mass *fof_mass_recv = + malloc(nrecv * sizeof(struct fof_final_mass)); + + /* Exchange group mass */ + MPI_Alltoallv(fof_mass_send, sendcount, sendoffset, fof_final_mass_type, + fof_mass_recv, recvcount, recvoffset, fof_final_mass_type, + MPI_COMM_WORLD); + + /* For each received global root, look up the group ID we assigned and + * increment the group mass */ + for (size_t i = 0; i < nrecv; i += 1) { + if ((fof_mass_recv[i].global_root < node_offset) || + (fof_mass_recv[i].global_root >= node_offset + nr_gparts)) { + error("Received global root index out of range!"); + } + group_mass[gparts[fof_mass_recv[i].global_root - node_offset].group_id - + group_id_offset - num_groups_prev] += + fof_mass_recv[i].group_mass; + } + + /* For each received global root, look up the group ID we assigned and find + * the global maximum gas density */ + for (size_t i = 0; i < nrecv; i++) { + + const int offset = + gparts[fof_mass_recv[i].global_root - node_offset].group_id - + group_id_offset - num_groups_prev; + + /* Only seed groups above the mass threshold. */ + if (group_mass[offset] > seed_halo_mass) { + + /* Only check groups that don't already contain a black hole. */ + if (max_part_density_index[offset] != fof_halo_has_black_hole) { + + /* Find the densest particle in each group using the densest particle + * from each group fragment. */ + if (fof_mass_recv[i].max_part_density > max_part_density[offset]) { + max_part_density[offset] = fof_mass_recv[i].max_part_density; + max_part_density_index[offset] = + fof_mass_recv[i].max_part_density_index; + } + } + /* If there is already a black hole in the group we don't need to create a + new one. */ + else if (fof_mass_recv[i].max_part_density_index == + fof_halo_has_black_hole) { + max_part_density_index[offset] = fof_halo_has_black_hole; + } + } else { + max_part_density_index[offset] = fof_halo_has_no_gas; + } + } + + /* For each received global root, look up the group ID we assigned and send + * the global maximum gas density index back */ + for (size_t i = 0; i < nrecv; i++) { + if ((fof_mass_recv[i].global_root < node_offset) || + (fof_mass_recv[i].global_root >= node_offset + nr_gparts)) { + error("Received global root index out of range!"); + } + + const int offset = + gparts[fof_mass_recv[i].global_root - node_offset].group_id - + group_id_offset - num_groups_prev; + + /* If the densest particle found locally is not the global max, make sure we + * don't seed two black holes. */ + /* If the local index has been set to a foreign index then we don't need to + * seed a black hole locally. */ + if (max_part_density_index[offset] == + fof_mass_recv[i].max_part_density_index) { + max_part_density_index[offset] = fof_halo_has_black_hole; + } + /* The densest particle is on the same node as the global root so we don't + need to seed a black hole on the other node. */ + else { + fof_mass_recv[i].max_part_density_index = fof_halo_has_black_hole; + } + } + + /* Send the result back */ + MPI_Alltoallv(fof_mass_recv, recvcount, recvoffset, fof_final_mass_type, + fof_mass_send, sendcount, sendoffset, fof_final_mass_type, + MPI_COMM_WORLD); + + int extra_seed_count = 0; + size_t density_index_size = num_groups_local; + + /* Add the index of the densest particle to the local list if the global root + * is not on this node. */ + for (size_t i = 0; i < nsend; i++) { + + /* Only add the index if: + * 1) there is not already a black hole in the group + * AND + * 2) there is gas in the group. */ + if (fof_mass_send[i].max_part_density_index >= 0) { + + /* Re-allocate the list if it's needed. */ + if (num_groups_local + extra_seed_count >= density_index_size) { + const size_t new_size = 2 * density_index_size; + + max_part_density_index = (long long *)realloc( + max_part_density_index, new_size * sizeof(long long)); + + message( + "Re-allocating max_part_density_index from %zu to %zu elements.", + density_index_size, new_size); + + density_index_size = new_size; + + props->max_part_density_index = max_part_density_index; + } + + /* Add particle index onto the end of the array. */ + max_part_density_index[num_groups_local + extra_seed_count] = + fof_mass_send[i].max_part_density_index; + extra_seed_count++; + } + } + + props->extra_bh_seed_count = extra_seed_count; + + free(sendcount); + free(recvcount); + free(sendoffset); + free(recvoffset); + free(fof_mass_send); + free(fof_mass_recv); +#else + + /* Allocate and initialise the densest particle array. */ + if (swift_memalign("max_part_density_index", + (void **)&props->max_part_density_index, 32, + num_groups_local * sizeof(long long)) != 0) + error( + "Failed to allocate list of max group density indices for FOF search."); + + if (swift_memalign("max_part_density", (void **)&props->max_part_density, 32, + num_groups_local * sizeof(float)) != 0) + error("Failed to allocate list of max group densities for FOF search."); + + /* Direct pointers to the arrays */ + long long *max_part_density_index = props->max_part_density_index; + float *max_part_density = props->max_part_density; + + /* No densest particle found so far */ + bzero(max_part_density, num_groups_local * sizeof(float)); + + /* Start by assuming that the haloes have no gas */ + for (size_t i = 0; i < num_groups_local; i++) { + max_part_density_index[i] = fof_halo_has_no_gas; + } + + /* Increment the group mass for groups above min_group_size. */ + threadpool_map(&s->e->threadpool, fof_calc_group_mass_mapper, gparts, + nr_gparts, sizeof(struct gpart), 0, (struct space *)s); + + /* Loop over particles and find the densest particle in each group. */ + /* JSW TODO: Parallelise with threadpool*/ + for (size_t i = 0; i < nr_gparts; i++) { + + const size_t index = gparts[i].group_id - group_id_offset; + + /* Only check groups above the minimum mass threshold. */ + if (gparts[i].group_id != group_id_default) { + + if (group_mass[index] > seed_halo_mass) { + + /* Find the densest gas particle. + * Account for groups that already have a black hole and groups that + * contain no gas. */ + if (gparts[i].type == swift_type_gas && + max_part_density_index[index] != fof_halo_has_black_hole) { + + const size_t gas_index = -gparts[i].id_or_neg_offset; + const float rho_com = hydro_get_comoving_density(&parts[gas_index]); + + /* Update index if a denser gas particle is found. */ + if (rho_com > max_part_density[index]) { + max_part_density_index[index] = gas_index; + max_part_density[index] = rho_com; + } + } + /* If there is already a black hole in the group we don't need to create + a new one. */ + else if (gparts[i].type == swift_type_black_hole) { + max_part_density_index[index] = fof_halo_has_black_hole; + } + + } else { + max_part_density_index[index] = fof_halo_has_too_low_mass; + } + } + } + + props->extra_bh_seed_count = 0; +#endif +} + +/** + * @brief Mapper function to perform FOF search. + * + * @param map_data An array of #cell pair indices. + * @param num_elements Chunk size. + * @param extra_data Pointer to a #space. + */ +void fof_find_foreign_links_mapper(void *map_data, int num_elements, + void *extra_data) { + +#ifdef WITH_MPI + + /* Retrieve mapped data. */ + struct space *s = (struct space *)extra_data; + const int periodic = s->periodic; + const size_t nr_gparts = s->nr_gparts; + const struct gpart *const gparts = s->gparts; + const struct engine *e = s->e; + struct fof_props *props = e->fof_properties; + struct cell_pair_indices *cell_pairs = (struct cell_pair_indices *)map_data; + + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const double search_r2 = props->l_x2; + + /* Store links in an array local to this thread. */ + int local_link_count = 0; + int local_group_links_size = props->group_links_size / e->nr_threads; + + /* Init the local group links buffer. */ + struct fof_mpi *local_group_links = (struct fof_mpi *)swift_calloc( + "fof_local_group_links", sizeof(struct fof_mpi), local_group_links_size); + if (local_group_links == NULL) + error("Failed to allocate temporary group links buffer."); + + /* Loop over all pairs of local and foreign cells, recurse then perform a + * FOF search. */ + for (int ind = 0; ind < num_elements; ind++) { + + /* Get the local and foreign cells to recurse on. */ + struct cell *restrict local_cell = cell_pairs[ind].local; + struct cell *restrict foreign_cell = cell_pairs[ind].foreign; + + rec_fof_search_pair_foreign(props, dim, search_r2, periodic, gparts, + nr_gparts, local_cell, foreign_cell, + &local_link_count, &local_group_links, + &local_group_links_size); + } + + /* Add links found by this thread to the global link list. */ + /* Lock to prevent race conditions while adding to the global link list.*/ + if (lock_lock(&s->lock) == 0) { + + /* Get pointers to global arrays. */ + int *group_links_size = &props->group_links_size; + int *group_link_count = &props->group_link_count; + struct fof_mpi **group_links = &props->group_links; + + /* If the global group_links array is not big enough re-allocate it. */ + if (*group_link_count + local_link_count > *group_links_size) { + + const int old_size = *group_links_size; + const int new_size = + max(*group_link_count + local_link_count, 2 * old_size); + + (*group_links) = (struct fof_mpi *)realloc( + *group_links, new_size * sizeof(struct fof_mpi)); + + *group_links_size = new_size; + + message("Re-allocating global group links from %d to %d elements.", + old_size, new_size); + } + + /* Copy the local links to the global list. */ + for (int i = 0; i < local_link_count; i++) { + + int found = 0; + + /* Check that the links have not already been added to the list by another + * thread. */ + for (int l = 0; l < *group_link_count; l++) { + if ((*group_links)[l].group_i == local_group_links[i].group_i && + (*group_links)[l].group_j == local_group_links[i].group_j) { + found = 1; + break; + } + } + + if (!found) { + + (*group_links)[*group_link_count].group_i = + local_group_links[i].group_i; + (*group_links)[*group_link_count].group_i_size = + local_group_links[i].group_i_size; + + (*group_links)[*group_link_count].group_j = + local_group_links[i].group_j; + (*group_links)[*group_link_count].group_j_size = + local_group_links[i].group_j_size; + + (*group_link_count) = (*group_link_count) + 1; + } + } + } + + /* Release lock. */ + if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space"); + + swift_free("fof_local_group_links", local_group_links); +#endif +} + +void fof_seed_black_holes(const struct fof_props *props, + const struct black_holes_props *bh_props, + const struct phys_const *constants, + const struct cosmology *cosmo, struct space *s, + int num_groups, struct group_length *group_sizes) { + + const long long *max_part_density_index = props->max_part_density_index; + + /* Count the number of black holes to seed */ + int num_seed_black_holes = 0; + for (int i = 0; i < num_groups + props->extra_bh_seed_count; i++) { + if (max_part_density_index[i] >= 0) ++num_seed_black_holes; + } + +#ifdef WITH_MPI + int total_num_seed_black_holes = 0; + /* Sum the total number of black holes over each MPI rank. */ + MPI_Reduce(&num_seed_black_holes, &total_num_seed_black_holes, 1, MPI_INT, + MPI_SUM, 0, MPI_COMM_WORLD); +#else + int total_num_seed_black_holes = num_seed_black_holes; +#endif + + if (engine_rank == 0) + message("Seeding %d black hole(s)", total_num_seed_black_holes); + + /* Anything to do this time on this rank? */ + if (num_seed_black_holes == 0) return; + + /* Do we need to reallocate the black hole array for the new particles? */ + if (s->nr_bparts + num_seed_black_holes > s->size_bparts) { + const size_t nr_bparts_new = s->nr_bparts + num_seed_black_holes; + + s->size_bparts = engine_parts_size_grow * nr_bparts_new; + + struct bpart *bparts_new = NULL; + if (swift_memalign("bparts", (void **)&bparts_new, bpart_align, + sizeof(struct bpart) * s->size_bparts) != 0) + error("Failed to allocate new bpart data."); + memcpy(bparts_new, s->bparts, sizeof(struct bpart) * s->nr_bparts); + swift_free("bparts", s->bparts); + + s->bparts = bparts_new; + } + + int k = s->nr_bparts; + + /* Loop over the local groups */ + for (int i = 0; i < num_groups + props->extra_bh_seed_count; i++) { + + const long long part_index = max_part_density_index[i]; + + /* Should we seed? */ + if (part_index >= 0) { + + /* Handle on the particle to convert */ + struct part *p = &s->parts[part_index]; + struct gpart *gp = p->gpart; + + /* Let's destroy the gas particle */ + p->time_bin = time_bin_inhibited; + p->gpart = NULL; + + /* Mark the gpart as black hole */ + gp->type = swift_type_black_hole; + + /* Basic properties of the black hole */ + struct bpart *bp = &s->bparts[k]; + bzero(bp, sizeof(struct bpart)); + bp->time_bin = gp->time_bin; + + /* Re-link things */ + bp->gpart = gp; + gp->id_or_neg_offset = -(bp - s->bparts); + + /* Synchronize masses, positions and velocities */ + bp->mass = gp->mass; + bp->x[0] = gp->x[0]; + bp->x[1] = gp->x[1]; + bp->x[2] = gp->x[2]; + bp->v[0] = gp->v_full[0]; + bp->v[1] = gp->v_full[1]; + bp->v[2] = gp->v_full[2]; + + /* Set a smoothing length */ + bp->h = p->h; + +#ifdef SWIFT_DEBUG_CHECKS + bp->ti_kick = p->ti_kick; + bp->ti_drift = p->ti_drift; +#endif + + /* Copy over all the gas properties that we want */ + black_holes_create_from_gas(bp, bh_props, constants, cosmo, p); + + /* Move to the next BH slot */ + k++; + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if ((int)(s->nr_bparts) + num_seed_black_holes != k) { + error("Seeded the wrong number of black holes!"); + } +#endif + + /* Update the count of black holes. */ + s->nr_bparts = k; +} + +/* Dump FOF group data. */ +void fof_dump_group_data(const struct fof_props *props, + const char *out_file_name, struct space *s, + int num_groups, struct group_length *group_sizes) { + + FILE *file = fopen(out_file_name, "w"); + + struct gpart *gparts = s->gparts; + struct part *parts = s->parts; + size_t *group_size = props->group_size; + double *group_mass = props->group_mass; + const long long *max_part_density_index = props->max_part_density_index; + const float *max_part_density = props->max_part_density; + + fprintf(file, "# %8s %12s %12s %12s %18s %18s %12s\n", "Group ID", + "Group Size", "Group Mass", "Max Density", "Max Density Index", + "Particle ID", "Particle Density"); + fprintf(file, + "#-------------------------------------------------------------------" + "-------------\n"); + + int bh_seed_count = 0; + + for (int i = 0; i < num_groups; i++) { + + const size_t group_offset = group_sizes[i].index; + const long long part_id = max_part_density_index[i] >= 0 + ? parts[max_part_density_index[i]].id + : -1; +#ifdef WITH_MPI + fprintf(file, " %8zu %12zu %12e %12e %18lld %18lld\n", + gparts[group_offset - node_offset].group_id, + group_size[group_offset - node_offset], group_mass[i], + max_part_density[i], max_part_density_index[i], part_id); +#else + fprintf(file, " %8zu %12zu %12e %12e %18lld %18lld\n", + gparts[group_offset].group_id, group_size[group_offset], + group_mass[i], max_part_density[i], max_part_density_index[i], + part_id); +#endif + + if (max_part_density_index[i] >= 0) bh_seed_count++; + } + + /* Dump the extra black hole seeds. */ + for (int i = num_groups; i < num_groups + props->extra_bh_seed_count; i++) { + const long long part_id = max_part_density_index[i] >= 0 + ? parts[max_part_density_index[i]].id + : -1; + fprintf(file, " %8zu %12zu %12e %12e %18lld %18lld\n", 0UL, 0UL, 0., 0., + 0LL, part_id); + + if (max_part_density_index[i] >= 0) bh_seed_count++; + } + + int total_bh_seed_count = 0; + +#ifdef WITH_MPI + /* Sum the total number of black holes over each MPI rank. */ + MPI_Reduce(&bh_seed_count, &total_bh_seed_count, 1, MPI_INT, MPI_SUM, 0, + MPI_COMM_WORLD); +#else + total_bh_seed_count = bh_seed_count; +#endif + + if (engine_rank == 0) + message("Seeding %d black hole(s).", total_bh_seed_count); + + fclose(file); +} + +/** + * @brief Search foreign cells for links and communicate any found to the + * appropriate node. + * + * @param props the properties of the FOF scheme. + * @param s Pointer to a #space. + */ +void fof_search_foreign_cells(struct fof_props *props, const struct space *s) { + +#ifdef WITH_MPI + + struct engine *e = s->e; + int verbose = e->verbose; + size_t *group_index = props->group_index; + size_t *group_size = props->group_size; + const size_t nr_gparts = s->nr_gparts; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const double search_r2 = props->l_x2; + + ticks tic = getticks(); + + /* Make group IDs globally unique. */ + for (size_t i = 0; i < nr_gparts; i++) group_index[i] += node_offset; + + struct cell_pair_indices *cell_pairs = NULL; + int group_link_count = 0; + int cell_pair_count = 0; + + props->group_links_size = fof_props_default_group_link_size; + props->group_link_count = 0; + + int num_cells_out = 0; + int num_cells_in = 0; + + /* Find the maximum no. of cell pairs. */ + for (int i = 0; i < e->nr_proxies; i++) { + + for (int j = 0; j < e->proxies[i].nr_cells_out; j++) { + + /* Only include gravity cells. */ + if (e->proxies[i].cells_out_type[j] & proxy_cell_type_gravity) + num_cells_out++; + } + + for (int j = 0; j < e->proxies[i].nr_cells_in; j++) { + + /* Only include gravity cells. */ + if (e->proxies[i].cells_in_type[j] & proxy_cell_type_gravity) + num_cells_in++; + } + } + + const int cell_pair_size = num_cells_in * num_cells_out; + + if (swift_memalign("fof_group_links", (void **)&props->group_links, + SWIFT_STRUCT_ALIGNMENT, + props->group_links_size * sizeof(struct fof_mpi)) != 0) + error("Error while allocating memory for FOF links over an MPI domain"); + + if (swift_memalign("fof_cell_pairs", (void **)&cell_pairs, + SWIFT_STRUCT_ALIGNMENT, + cell_pair_size * sizeof(struct cell_pair_indices)) != 0) + error("Error while allocating memory for FOF cell pair indices"); + + /* Loop over cells_in and cells_out for each proxy and find which cells are in + * range of each other to perform the FOF search. Store local cells that are + * touching foreign cells in a list. */ + for (int i = 0; i < e->nr_proxies; i++) { + + /* Only find links across an MPI domain on one rank. */ + if (engine_rank == min(engine_rank, e->proxies[i].nodeID)) { + + for (int j = 0; j < e->proxies[i].nr_cells_out; j++) { + + /* Skip non-gravity cells. */ + if (!(e->proxies[i].cells_out_type[j] & proxy_cell_type_gravity)) + continue; + + struct cell *restrict local_cell = e->proxies[i].cells_out[j]; + + /* Skip empty cells. */ + if (local_cell->grav.count == 0) continue; + + for (int k = 0; k < e->proxies[i].nr_cells_in; k++) { + + /* Skip non-gravity cells. */ + if (!(e->proxies[i].cells_in_type[k] & proxy_cell_type_gravity)) + continue; + + struct cell *restrict foreign_cell = e->proxies[i].cells_in[k]; + + /* Skip empty cells. */ + if (foreign_cell->grav.count == 0) continue; + + /* Check if local cell has already been added to the local list of + * cells. */ + const double r2 = cell_min_dist(local_cell, foreign_cell, dim); + if (r2 < search_r2) { + cell_pairs[cell_pair_count].local = local_cell; + cell_pairs[cell_pair_count++].foreign = foreign_cell; + } + } + } + } + } + + /* Set the root of outgoing particles. */ + for (int i = 0; i < e->nr_proxies; i++) { + + for (int j = 0; j < e->proxies[i].nr_cells_out; j++) { + + struct cell *restrict local_cell = e->proxies[i].cells_out[j]; + struct gpart *gparts = local_cell->grav.parts; + + /* Make a list of particle offsets into the global gparts array. */ + size_t *const offset = group_index + (ptrdiff_t)(gparts - s->gparts); + + /* Set each particle's root and group properties found in the local FOF.*/ + for (int k = 0; k < local_cell->grav.count; k++) { + const size_t root = + fof_find_global(offset[k] - node_offset, group_index, nr_gparts); + gparts[k].group_id = root; + gparts[k].group_size = group_size[root - node_offset]; + } + } + } + + if (verbose) + message( + "Finding local/foreign cell pairs and initialising particle roots " + "took: " + "%.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + tic = getticks(); + + struct scheduler *sched = &e->sched; + struct task *tasks = sched->tasks; + + /* Activate the send and receive tasks for the gparts. */ + for (int i = 0; i < sched->nr_tasks; i++) { + + struct task *t = &tasks[i]; + + if ((t->type == task_type_send && t->subtype == task_subtype_gpart) || + (t->type == task_type_recv && t->subtype == task_subtype_gpart)) { + scheduler_activate(sched, t); + } else + t->skip = 1; + } + + if (verbose) + message("MPI send/recv task activation took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + ticks local_fof_tic = getticks(); + + MPI_Barrier(MPI_COMM_WORLD); + + if (verbose) + message("Local FOF imbalance: %.3f %s.", + clocks_from_ticks(getticks() - local_fof_tic), clocks_getunit()); + + tic = getticks(); + + /* Perform send and receive tasks. */ + engine_launch(e); + + if (verbose) + message("MPI send/recv comms took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + tic = getticks(); + + /* Perform search of group links between local and foreign cells with the + * threadpool. */ + threadpool_map(&s->e->threadpool, fof_find_foreign_links_mapper, cell_pairs, + cell_pair_count, sizeof(struct cell_pair_indices), 1, + (struct space *)s); + + group_link_count = props->group_link_count; + + /* Clean up memory. */ + swift_free("fof_cell_pairs", cell_pairs); + + if (verbose) + message("Searching for foreign links took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + tic = getticks(); + + struct fof_mpi *global_group_links = NULL; + int *displ = NULL, *group_link_counts = NULL; + int global_group_link_count = 0; + + ticks comms_tic = getticks(); + + MPI_Barrier(MPI_COMM_WORLD); + + if (verbose) + message("Imbalance took: %.3f %s.", + clocks_from_ticks(getticks() - comms_tic), clocks_getunit()); + + comms_tic = getticks(); + + /* Sum the total number of links across MPI domains over each MPI rank. */ + MPI_Allreduce(&group_link_count, &global_group_link_count, 1, MPI_INT, + MPI_SUM, MPI_COMM_WORLD); + + /* Unique set of links is half of all group links as each link is found twice + * by opposing MPI ranks. */ + if (swift_memalign("fof_global_group_links", (void **)&global_group_links, + SWIFT_STRUCT_ALIGNMENT, + global_group_link_count * sizeof(struct fof_mpi)) != 0) + error("Error while allocating memory for the global list of group links"); + + if (posix_memalign((void **)&group_link_counts, SWIFT_STRUCT_ALIGNMENT, + e->nr_nodes * sizeof(int)) != 0) + error( + "Error while allocating memory for the number of group links on each " + "MPI rank"); + + if (posix_memalign((void **)&displ, SWIFT_STRUCT_ALIGNMENT, + e->nr_nodes * sizeof(int)) != 0) + error( + "Error while allocating memory for the displacement in memory for the " + "global group link list"); + + /* Gather the total number of links on each rank. */ + MPI_Allgather(&group_link_count, 1, MPI_INT, group_link_counts, 1, MPI_INT, + MPI_COMM_WORLD); + + /* Set the displacements into the global link list using the link counts from + * each rank */ + displ[0] = 0; + for (int i = 1; i < e->nr_nodes; i++) + displ[i] = displ[i - 1] + group_link_counts[i - 1]; + + /* Gather the global link list on all ranks. */ + MPI_Allgatherv(props->group_links, group_link_count, fof_mpi_type, + global_group_links, group_link_counts, displ, fof_mpi_type, + MPI_COMM_WORLD); + + /* Clean up memory. */ + free(displ); + swift_free("fof_group_links", props->group_links); + props->group_links = NULL; + + if (verbose) { + message("Communication took: %.3f %s.", + clocks_from_ticks(getticks() - comms_tic), clocks_getunit()); + + message("Global comms took: %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + } + + tic = getticks(); + + /* Transform the group IDs to a local list going from 0-group_count so a + * union-find can be performed. */ + size_t *global_group_index = NULL, *global_group_id = NULL, + *global_group_size = NULL; + const int global_group_list_size = 2 * global_group_link_count; + int group_count = 0; + + if (swift_memalign("fof_global_group_index", (void **)&global_group_index, + SWIFT_STRUCT_ALIGNMENT, + global_group_list_size * sizeof(size_t)) != 0) + error( + "Error while allocating memory for the displacement in memory for the " + "global group link list"); + + if (swift_memalign("fof_global_group_id", (void **)&global_group_id, + SWIFT_STRUCT_ALIGNMENT, + global_group_list_size * sizeof(size_t)) != 0) + error( + "Error while allocating memory for the displacement in memory for the " + "global group link list"); + + if (swift_memalign("fof_global_group_size", (void **)&global_group_size, + SWIFT_STRUCT_ALIGNMENT, + global_group_list_size * sizeof(size_t)) != 0) + error( + "Error while allocating memory for the displacement in memory for the " + "global group link list"); + + bzero(global_group_size, global_group_list_size * sizeof(size_t)); + + /* Create hash table. */ + hashmap_t map; + hashmap_init(&map); + + /* Store each group ID and its properties. */ + for (int i = 0; i < global_group_link_count; i++) { + + size_t group_i = global_group_links[i].group_i; + size_t group_j = global_group_links[i].group_j; + + global_group_size[group_count] += global_group_links[i].group_i_size; + global_group_id[group_count] = group_i; + hashmap_add_group(group_i, group_count++, &map); + + global_group_size[group_count] += global_group_links[i].group_j_size; + global_group_id[group_count] = group_j; + hashmap_add_group(group_j, group_count++, &map); + } + + if (verbose) + message("Global list compression took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + tic = getticks(); + + /* Create a global_group_index list of groups across MPI domains so that you + * can perform a union-find locally on each node. */ + /* The value of which is an offset into global_group_id, which is the actual + * root. */ + for (int i = 0; i < group_count; i++) global_group_index[i] = i; + + /* Store the original group size before incrementing in the Union-Find. */ + size_t *orig_global_group_size = NULL; + + if (swift_memalign("fof_orig_global_group_size", + (void **)&orig_global_group_size, SWIFT_STRUCT_ALIGNMENT, + group_count * sizeof(size_t)) != 0) + error( + "Error while allocating memory for the displacement in memory for the " + "global group link list"); + + for (int i = 0; i < group_count; i++) + orig_global_group_size[i] = global_group_size[i]; + + /* Perform a union-find on the group links. */ + for (int i = 0; i < global_group_link_count; i++) { + + /* Use the hash table to find the group offsets in the index array. */ + size_t find_i = + hashmap_find_group_offset(global_group_links[i].group_i, &map); + size_t find_j = + hashmap_find_group_offset(global_group_links[i].group_j, &map); + + /* Use the offset to find the group's root. */ + size_t root_i = fof_find(find_i, global_group_index); + size_t root_j = fof_find(find_j, global_group_index); + + size_t group_i = global_group_id[root_i]; + size_t group_j = global_group_id[root_j]; + + if (group_i == group_j) continue; + + /* Update roots accordingly. */ + size_t size_i = global_group_size[root_i]; + size_t size_j = global_group_size[root_j]; +#ifdef UNION_BY_SIZE_OVER_MPI + if (size_i < size_j) { + global_group_index[root_i] = root_j; + global_group_size[root_j] += size_i; + } else { + global_group_index[root_j] = root_i; + global_group_size[root_i] += size_j; + } +#else + if (group_j < group_i) { + global_group_index[root_i] = root_j; + global_group_size[root_j] += size_i; + } else { + global_group_index[root_j] = root_i; + global_group_size[root_i] += size_j; + } +#endif + } + + hashmap_free(&map); + + if (verbose) + message("global_group_index construction took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + tic = getticks(); + + /* Update each group locally with new root information. */ + for (int i = 0; i < group_count; i++) { + + size_t group_id = global_group_id[i]; + size_t offset = fof_find(global_group_index[i], global_group_index); + size_t new_root = global_group_id[offset]; + + /* If the group is local update its root and size. */ + if (is_local(group_id, nr_gparts) && new_root != group_id) { + group_index[group_id - node_offset] = new_root; + + group_size[group_id - node_offset] -= orig_global_group_size[i]; + } + + /* If the group linked to a local root update its size. */ + if (is_local(new_root, nr_gparts) && new_root != group_id) { + + /* Use group sizes before Union-Find */ + group_size[new_root - node_offset] += orig_global_group_size[i]; + } + } + + if (verbose) + message("Updating groups locally took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + /* Clean up memory. */ + swift_free("fof_global_group_links", global_group_links); + swift_free("fof_global_group_index", global_group_index); + swift_free("fof_global_group_size", global_group_size); + swift_free("fof_global_group_id", global_group_id); + swift_free("fof_orig_global_group_size", orig_global_group_size); + +#endif /* WITH_MPI */ +} + +/** + * @brief Perform a FOF search on gravity particles using the cells and applying + * the Union-Find algorithm. + * + * @param props The properties of the FOF scheme. + * @param bh_props The properties of the black hole scheme. + * @param constants The physical constants in internal units. + * @param cosmo The current cosmological model. + * @param s The #space containing the particles. + * @param dump_results Do we want to write the group catalogue to a file? + * @param seed_black_holes Do we want to seed black holes in haloes? + */ +void fof_search_tree(struct fof_props *props, + const struct black_holes_props *bh_props, + const struct phys_const *constants, + const struct cosmology *cosmo, struct space *s, + const int dump_results, const int seed_black_holes) { + + const size_t nr_gparts = s->nr_gparts; + const size_t min_group_size = props->min_group_size; + const size_t group_id_offset = props->group_id_offset; +#ifdef WITH_MPI + const int nr_nodes = s->e->nr_nodes; +#endif + struct gpart *gparts = s->gparts; + size_t *group_index, *group_size; + int num_groups = 0, num_parts_in_groups = 0, max_group_size = 0; + int verbose = s->e->verbose; + ticks tic_total = getticks(); + + char output_file_name[PARSER_MAX_LINE_SIZE]; + snprintf(output_file_name, PARSER_MAX_LINE_SIZE, "%s", props->base_name); + + if (verbose) + message("Searching %zu gravity particles for links with l_x: %lf", + nr_gparts, sqrt(props->l_x2)); + + if (engine_rank == 0 && verbose) + message("Size of hash table element: %ld", sizeof(hashmap_element_t)); + + const size_t group_id_default = props->group_id_default; + +#ifdef WITH_MPI + + /* Reset global variable */ + node_offset = 0; + + /* Determine number of gparts on lower numbered MPI ranks */ + long long nr_gparts_cumulative; + long long nr_gparts_local = s->nr_gparts; + MPI_Scan(&nr_gparts_local, &nr_gparts_cumulative, 1, MPI_LONG_LONG, MPI_SUM, + MPI_COMM_WORLD); + node_offset = nr_gparts_cumulative - nr_gparts_local; + + snprintf(output_file_name + strlen(output_file_name), FILENAME_BUFFER_SIZE, + "_mpi_rank_%d.dat", engine_rank); +#else + snprintf(output_file_name + strlen(output_file_name), FILENAME_BUFFER_SIZE, + ".dat"); +#endif + + /* Local copy of the arrays */ + group_index = props->group_index; + group_size = props->group_size; + + ticks tic_calc_group_size = getticks(); + + threadpool_map(&s->e->threadpool, fof_calc_group_size_mapper, gparts, + nr_gparts, sizeof(struct gpart), 0, s); + + if (verbose) + message("FOF calc group size took (scaling): %.3f %s.", + clocks_from_ticks(getticks() - tic_calc_group_size), + clocks_getunit()); + +#ifdef WITH_MPI + if (nr_nodes > 1) { + + ticks tic_mpi = getticks(); + + /* Search for group links across MPI domains. */ + fof_search_foreign_cells(props, s); + + if (verbose) + message("fof_search_foreign_cells() took: %.3f %s.", + clocks_from_ticks(getticks() - tic_mpi), clocks_getunit()); + } +#endif + + size_t num_groups_local = 0, num_parts_in_groups_local = 0, + max_group_size_local = 0; + + for (size_t i = 0; i < nr_gparts; i++) { + +#ifdef WITH_MPI + /* Find the total number of groups. */ + if (group_index[i] == i + node_offset && group_size[i] >= min_group_size) + num_groups_local++; +#else + /* Find the total number of groups. */ + if (group_index[i] == i && group_size[i] >= min_group_size) + num_groups_local++; +#endif + + /* Find the total number of particles in groups. */ + if (group_size[i] >= min_group_size) + num_parts_in_groups_local += group_size[i]; + + /* Find the largest group. */ + if (group_size[i] > max_group_size_local) + max_group_size_local = group_size[i]; + } + + /* Sort the groups in descending order based upon size and re-label their IDs + * 0-num_groups. */ + struct group_length *high_group_sizes = NULL; + int group_count = 0; + + if (swift_memalign("fof_high_group_sizes", (void **)&high_group_sizes, 32, + num_groups_local * sizeof(struct group_length)) != 0) + error("Failed to allocate list of large groups."); + + /* Store the group_sizes and their offset. */ + for (size_t i = 0; i < nr_gparts; i++) { + +#ifdef WITH_MPI + if (group_index[i] == i + node_offset && group_size[i] >= min_group_size) { + high_group_sizes[group_count].index = node_offset + i; + high_group_sizes[group_count++].size = group_size[i]; + } +#else + if (group_index[i] == i && group_size[i] >= min_group_size) { + high_group_sizes[group_count].index = i; + high_group_sizes[group_count++].size = group_size[i]; + } +#endif + } + + ticks tic = getticks(); + + /* Find global properties. */ +#ifdef WITH_MPI + MPI_Allreduce(&num_groups_local, &num_groups, 1, MPI_INT, MPI_SUM, + MPI_COMM_WORLD); + MPI_Reduce(&num_parts_in_groups_local, &num_parts_in_groups, 1, MPI_INT, + MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&max_group_size_local, &max_group_size, 1, MPI_INT, MPI_MAX, 0, + MPI_COMM_WORLD); +#else + num_groups = num_groups_local; + num_parts_in_groups = num_parts_in_groups_local; + max_group_size = max_group_size_local; +#endif /* WITH_MPI */ + props->num_groups = num_groups; + + /* Find number of groups on lower numbered MPI ranks */ +#ifdef WITH_MPI + long long nglocal = num_groups_local; + long long ngsum; + MPI_Scan(&nglocal, &ngsum, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + const size_t num_groups_prev = (size_t)(ngsum - nglocal); +#endif /* WITH_MPI */ + + /* Sort local groups into descending order of size */ + qsort(high_group_sizes, num_groups_local, sizeof(struct group_length), + cmp_func_group_size); + + /* Set default group ID for all particles */ + for (size_t i = 0; i < nr_gparts; i++) gparts[i].group_id = group_id_default; + + /* + Assign final group IDs to local root particles where the global root is on + this node and the group is large enough. Within a node IDs are assigned in + descending order of particle number. + */ + for (size_t i = 0; i < num_groups_local; i++) { +#ifdef WITH_MPI + gparts[high_group_sizes[i].index - node_offset].group_id = + group_id_offset + i + num_groups_prev; +#else + gparts[high_group_sizes[i].index].group_id = group_id_offset + i; +#endif + } + +#ifdef WITH_MPI + /* + Now, for each local root where the global root is on some other node + AND the total size of the group is >= min_group_size we need to retrieve + the gparts.group_id we just assigned to the global root. + + Will do that by sending the group_index of these lcoal roots to the node + where their global root is stored and receiving back the new group_id + associated with that particle. + */ + + /* + Identify local roots with global root on another node and large enough + group_size. Store index of the local and global roots in these cases. + + NOTE: if group_size only contains the total FoF mass for global roots, + then we have to communicate ALL fragments where the global root is not + on this node. Hence the commented out extra conditions below. + */ + size_t nsend = 0; + for (size_t i = 0; i < nr_gparts; i += 1) { + if ((!is_local(group_index[i], + nr_gparts))) { /* && (group_size[i] >= min_group_size)) { */ + nsend += 1; + } + } + struct fof_final_index *fof_index_send = + swift_malloc("fof_index_send", sizeof(struct fof_final_index) * nsend); + nsend = 0; + for (size_t i = 0; i < nr_gparts; i += 1) { + if ((!is_local(group_index[i], + nr_gparts))) { /* && (group_size[i] >= min_group_size)) { */ + fof_index_send[nsend].local_root = node_offset + i; + fof_index_send[nsend].global_root = group_index[i]; + nsend += 1; + } + } + + /* Sort by global root - this puts the groups in order of which node they're + * stored on */ + qsort(fof_index_send, nsend, sizeof(struct fof_final_index), + compare_fof_final_index_global_root); + + /* Determine range of global indexes (i.e. particles) on each node */ + size_t *num_on_node = malloc(nr_nodes * sizeof(size_t)); + MPI_Allgather(&nr_gparts, sizeof(size_t), MPI_BYTE, num_on_node, + sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + size_t *first_on_node = malloc(nr_nodes * sizeof(size_t)); + first_on_node[0] = 0; + for (int i = 1; i < nr_nodes; i += 1) + first_on_node[i] = first_on_node[i - 1] + num_on_node[i - 1]; + + /* Determine how many entries go to each node */ + int *sendcount = malloc(nr_nodes * sizeof(int)); + for (int i = 0; i < nr_nodes; i += 1) sendcount[i] = 0; + int dest = 0; + for (size_t i = 0; i < nsend; i += 1) { + while ((fof_index_send[i].global_root >= + first_on_node[dest] + num_on_node[dest]) || + (num_on_node[dest] == 0)) + dest += 1; + if (dest >= nr_nodes) error("Node index out of range!"); + sendcount[dest] += 1; + } + + int *recvcount = NULL, *sendoffset = NULL, *recvoffset = NULL; + size_t nrecv = 0; + + fof_compute_send_recv_offsets(nr_nodes, sendcount, &recvcount, &sendoffset, + &recvoffset, &nrecv); + + struct fof_final_index *fof_index_recv = + swift_malloc("fof_index_recv", nrecv * sizeof(struct fof_final_index)); + + /* Exchange group indexes */ + MPI_Alltoallv(fof_index_send, sendcount, sendoffset, fof_final_index_type, + fof_index_recv, recvcount, recvoffset, fof_final_index_type, + MPI_COMM_WORLD); + + /* For each received global root, look up the group ID we assigned and store + * it in the struct */ + for (size_t i = 0; i < nrecv; i += 1) { + if ((fof_index_recv[i].global_root < node_offset) || + (fof_index_recv[i].global_root >= node_offset + nr_gparts)) { + error("Received global root index out of range!"); + } + fof_index_recv[i].global_root = + gparts[fof_index_recv[i].global_root - node_offset].group_id; + } + + /* Send the result back */ + MPI_Alltoallv(fof_index_recv, recvcount, recvoffset, fof_final_index_type, + fof_index_send, sendcount, sendoffset, fof_final_index_type, + MPI_COMM_WORLD); + + /* Update local gparts.group_id */ + for (size_t i = 0; i < nsend; i += 1) { + if ((fof_index_send[i].local_root < node_offset) || + (fof_index_send[i].local_root >= node_offset + nr_gparts)) { + error("Sent local root index out of range!"); + } + gparts[fof_index_send[i].local_root - node_offset].group_id = + fof_index_send[i].global_root; + } + + free(sendcount); + free(recvcount); + free(sendoffset); + free(recvoffset); + swift_free("fof_index_send", fof_index_send); + swift_free("fof_index_recv", fof_index_recv); + +#endif /* WITH_MPI */ + + /* Assign every particle the group_id of its local root. */ + for (size_t i = 0; i < nr_gparts; i++) { + const size_t root = fof_find_local(i, nr_gparts, group_index); + gparts[i].group_id = gparts[root].group_id; + } + + if (verbose) + message("Group sorting took: %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + + /* Allocate and initialise a group mass array. */ + if (swift_memalign("group_mass", (void **)&props->group_mass, 32, + num_groups_local * sizeof(double)) != 0) + error("Failed to allocate list of group masses for FOF search."); + + bzero(props->group_mass, num_groups_local * sizeof(double)); + + ticks tic_seeding = getticks(); + + double *group_mass = props->group_mass; +#ifdef WITH_MPI + fof_calc_group_mass(props, s, num_groups_local, num_groups_prev, num_on_node, + first_on_node, group_mass); + free(num_on_node); + free(first_on_node); +#else + fof_calc_group_mass(props, s, num_groups_local, 0, NULL, NULL, group_mass); +#endif + + if (verbose) + message("Black hole seeding took: %.3f %s.", + clocks_from_ticks(getticks() - tic_seeding), clocks_getunit()); + + /* Dump group data. */ + if (dump_results) { + fof_dump_group_data(props, output_file_name, s, num_groups_local, + high_group_sizes); + } + + /* Seed black holes */ + if (seed_black_holes) { + fof_seed_black_holes(props, bh_props, constants, cosmo, s, num_groups_local, + high_group_sizes); + } + + /* Free the left-overs */ + swift_free("fof_high_group_sizes", high_group_sizes); + swift_free("fof_group_index", props->group_index); + swift_free("fof_group_size", props->group_size); + swift_free("fof_group_mass", props->group_mass); + swift_free("fof_max_part_density_index", props->max_part_density_index); + swift_free("fof_max_part_density", props->max_part_density); + props->group_index = NULL; + props->group_size = NULL; + props->group_mass = NULL; + props->max_part_density_index = NULL; + props->max_part_density = NULL; + + if (engine_rank == 0) { + message( + "No. of groups: %d. No. of particles in groups: %d. No. of particles " + "not in groups: %lld.", + num_groups, num_parts_in_groups, + s->e->total_nr_gparts - num_parts_in_groups); + + message("Largest group by size: %d", max_group_size); + } + if (verbose) + message("FOF search took: %.3f %s.", + clocks_from_ticks(getticks() - tic_total), clocks_getunit()); + +#ifdef WITH_MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif +} + +void fof_struct_dump(const struct fof_props *props, FILE *stream) { + + struct fof_props temp = *props; + temp.num_groups = 0; + temp.group_link_count = 0; + temp.group_links_size = 0; + temp.group_index = NULL; + temp.group_size = NULL; + temp.group_mass = NULL; + temp.max_part_density_index = NULL; + temp.max_part_density = NULL; + temp.group_links = NULL; + + restart_write_blocks((void *)&temp, sizeof(struct fof_props), 1, stream, + "fof_props", "fof_props"); +} + +void fof_struct_restore(struct fof_props *props, FILE *stream) { + + restart_read_blocks((void *)props, sizeof(struct fof_props), 1, stream, NULL, + "fof_props"); +} diff --git a/src/fof.h b/src/fof.h new file mode 100644 index 0000000000000000000000000000000000000000..160f7c0af8f7f6f7a589c6978cd107b7ffbff1fc --- /dev/null +++ b/src/fof.h @@ -0,0 +1,188 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_FOF_H +#define SWIFT_FOF_H + +/* Config parameters. */ +#include "../config.h" + +/* Local headers */ +#include "align.h" +#include "parser.h" + +/* Avoid cyclic inclusions */ +struct gpart; +struct space; +struct engine; +struct unit_system; +struct phys_const; +struct black_holes_props; +struct cosmology; + +/* MPI message required for FOF. */ +struct fof_mpi { + + /* The local particle's root ID.*/ + size_t group_i; + + /* The local group's size.*/ + size_t group_i_size; + + /* The foreign particle's root ID.*/ + size_t group_j; + + /* The local group's size.*/ + size_t group_j_size; + +} SWIFT_STRUCT_ALIGN; + +struct fof_props { + + /* ----------- Parameters of the FOF search ------- */ + + /*! The linking length in units of the mean DM inter-particle separation. */ + double l_x_ratio; + + /*! The absolute linking length in internal units if the user wants to + * overwrite the one based on the mean inter-particle separation. */ + double l_x_absolute; + + /*! The square of the linking length. */ + double l_x2; + + /*! The minimum halo mass for black hole seeding. */ + double seed_halo_mass; + + /*! Minimal number of particles in a group */ + size_t min_group_size; + + /*! Default group ID to give to particles not in a group */ + size_t group_id_default; + + /*! ID of the first (largest) group. */ + size_t group_id_offset; + + /*! The base name of the output file */ + char base_name[PARSER_MAX_LINE_SIZE]; + + /* ------------ Group properties ----------------- */ + + /*! Number of groups */ + int num_groups; + + /*! Number of local black holes that belong to groups whose roots are on a + * different node. */ + int extra_bh_seed_count; + + /*! Index of the root particle of the group a given gpart belongs to. */ + size_t *group_index; + + /*! Size of the group a given gpart belongs to. */ + size_t *group_size; + + /*! Mass of the group a given gpart belongs to. */ + double *group_mass; + + /*! Index of the part with the maximal density of each group. */ + long long *max_part_density_index; + + /*! Maximal density of all parts of each group. */ + float *max_part_density; + + /* ------------ MPI-related arrays --------------- */ + + /*! The number of links between pairs of particles on this node and + * a foreign node */ + int group_link_count; + + /*! The allocated size of the links array */ + int group_links_size; + + /*! The links between pairs of particles on this node and a foreign + * node */ + struct fof_mpi *group_links; + +} SWIFT_STRUCT_ALIGN; + +/* Store group size and offset into array. */ +struct group_length { + + size_t index, size; + +} SWIFT_STRUCT_ALIGN; + +#ifdef WITH_MPI +/* Struct used to find final group ID when using MPI */ +struct fof_final_index { + size_t local_root; + size_t global_root; +} SWIFT_STRUCT_ALIGN; + +/* Struct used to find the total mass of a group when using MPI */ +struct fof_final_mass { + size_t global_root; + double group_mass; + long long max_part_density_index; + float max_part_density; +} SWIFT_STRUCT_ALIGN; + +/* Struct used to iterate over the hash table and unpack the mass fragments of a + * group when using MPI */ +struct fof_mass_send_hashmap { + struct fof_final_mass *mass_send; + size_t nsend; +} SWIFT_STRUCT_ALIGN; +#endif + +/* Store local and foreign cell indices that touch. */ +struct cell_pair_indices { + + struct cell *local, *foreign; + +} SWIFT_STRUCT_ALIGN; + +/* Function prototypes. */ +void fof_init(struct fof_props *props, struct swift_params *params, + const struct phys_const *phys_const, + const struct unit_system *us); +void fof_create_mpi_types(void); +void fof_allocate(const struct space *s, const long long total_nr_DM_particles, + struct fof_props *props); +void fof_search_tree(struct fof_props *props, + const struct black_holes_props *bh_props, + const struct phys_const *constants, + const struct cosmology *cosmo, struct space *s, + const int dump_results, const int seed_black_holes); +void rec_fof_search_self(const struct fof_props *props, const double dim[3], + const double search_r2, const int periodic, + const struct gpart *const space_gparts, + struct cell *c); +void rec_fof_search_pair(const struct fof_props *props, const double dim[3], + const double search_r2, const int periodic, + const struct gpart *const space_gparts, + struct cell *restrict ci, struct cell *restrict cj); +void fof_struct_dump(const struct fof_props *props, FILE *stream); +void fof_struct_restore(struct fof_props *props, FILE *stream); +#ifdef WITH_MPI +/* MPI data type for the particle transfers */ +extern MPI_Datatype fof_mpi_type; +extern MPI_Datatype group_length_mpi_type; +#endif + +#endif /* SWIFT_FOF_H */ diff --git a/src/gravity/Default/gravity_io.h b/src/gravity/Default/gravity_io.h index 1ba3899e7ecc346227c10679bb8b704937c625b2..2e443e8fdc2479f3f2feff30281dccad9a7b6519 100644 --- a/src/gravity/Default/gravity_io.h +++ b/src/gravity/Default/gravity_io.h @@ -104,7 +104,7 @@ INLINE static void darkmatter_write_particles(const struct gpart* gparts, int* num_fields) { /* Say how much we want to write */ - *num_fields = 4; + *num_fields = 5; /* List what we want to write */ list[0] = io_make_output_field_convert_gpart( @@ -115,6 +115,8 @@ INLINE static void darkmatter_write_particles(const struct gpart* gparts, io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, gparts, mass); list[3] = io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, gparts, id_or_neg_offset); + list[4] = io_make_output_field("GroupIDs", INT, 1, UNIT_CONV_NO_UNITS, gparts, + group_id); } #endif /* SWIFT_DEFAULT_GRAVITY_IO_H */ diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index f065e6d3a2994ff1e522fc3ae9a38fcf591d92af..2c16a34a11d3a7674f5a6c122da780aeaff4a9dc 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -44,6 +44,9 @@ struct gpart { /*! Type of the #gpart (DM, gas, star, ...) */ enum part_type type; + /* Particle group ID and size in the FOF. */ + size_t group_id, group_size; + #ifdef SWIFT_DEBUG_CHECKS /* Numer of gparts this gpart interacted with */ diff --git a/src/hashmap.c b/src/hashmap.c new file mode 100644 index 0000000000000000000000000000000000000000..0b74b14abf9f29ce71871e5b9af3899a57788340 --- /dev/null +++ b/src/hashmap.c @@ -0,0 +1,507 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 James Willis (james.s.willis@durham.ac.uk) + * Pedro Gonnet (pedro.gonnet@gmail.com) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +/* + * Generic hashmap manipulation functions + * + * Originally by Elliot C Back - + * http://elliottback.com/wp/hashmap-implementation-in-c/ + * + * Modified by Pete Warden to fix a serious performance problem, support strings + * as keys and removed thread synchronization - http://petewarden.typepad.com + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "error.h" +#include "hashmap.h" +#include "memuse.h" + +#define INITIAL_NUM_CHUNKS (1) +#define HASHMAP_GROWTH_FACTOR (2) +#define HASHMAP_MAX_FILL_RATIO (1.0) +#define HASHMAP_MAX_CHUNK_FILL_RATIO (0.5) + +/** + * @brief Pre-allocate a number of chunks for the graveyard. + */ +void hashmap_allocate_chunks(hashmap_t *m, int num_chunks) { + /* Allocate a fresh set of chunks. */ + hashmap_chunk_t *alloc; + if ((alloc = (hashmap_chunk_t *)swift_calloc( + "hashmap", num_chunks, sizeof(hashmap_chunk_t))) == NULL) { + error("Unable to allocate chunks."); + } + + /* Hook up the alloc, so that we can clean it up later. */ + if (m->allocs_count == m->allocs_size) { + m->allocs_size *= 2; + void **new_allocs = + (void **)swift_malloc("hashmap", sizeof(void *) * m->allocs_size); + memcpy(new_allocs, m->allocs, sizeof(void *) * m->allocs_count); + swift_free("hashmap", m->allocs); + m->allocs = new_allocs; + } + m->allocs[m->allocs_count++] = alloc; + + /* Link the chunks together. */ + for (int k = 0; k < num_chunks - 1; k++) { + alloc[k].next = &alloc[k + 1]; + } + + /* Last chunk points to current graveyard. */ + alloc[num_chunks - 1].next = m->graveyard; + + /* Graveyard points to first new chunk. */ + m->graveyard = alloc; +} + +void hashmap_init(hashmap_t *m) { + /* Allocate the first (empty) list of chunks. */ + m->nr_chunks = INITIAL_NUM_CHUNKS; + if ((m->chunks = (hashmap_chunk_t **)swift_calloc( + "hashmap", m->nr_chunks, sizeof(hashmap_chunk_t *))) == NULL) { + error("Unable to allocate hashmap chunks."); + } + + /* The graveyard is currently empty. */ + m->graveyard = NULL; + m->allocs = NULL; + + /* Init the array of allocations. */ + m->allocs_size = HASHMAP_ALLOCS_INITIAL_SIZE; + if ((m->allocs = (void **)swift_malloc( + "hashmap", sizeof(void *) * m->allocs_size)) == NULL) { + error("Unable to allocate allocs pointer array."); + } + m->allocs_count = 0; + + /* Set initial sizes. */ + m->table_size = m->nr_chunks * HASHMAP_ELEMENTS_PER_CHUNK; + m->size = 0; + + /* Inform the men. */ + if (HASHMAP_DEBUG_OUTPUT) { + message( + "Created hash table of size: %zu each element is %zu bytes. Allocated " + "%zu empty chunks.", + m->table_size * sizeof(hashmap_element_t), sizeof(hashmap_element_t), + m->nr_chunks); + } +} + +/** + * @brief Put a used chunk back into the recycling bin. + */ +void hashmap_release_chunk(hashmap_t *m, hashmap_chunk_t *chunk) { + /* Clear all the chunk's data. */ + memset(chunk, 0, sizeof(hashmap_chunk_t)); + + /* Hook it up with the other stiffs in the graveyard. */ + chunk->next = m->graveyard; + m->graveyard = chunk; +} + +/** + * @brief Return a new chunk, either recycled or freshly allocated. + */ +hashmap_chunk_t *hashmap_get_chunk(hashmap_t *m) { + int num_chunks = m->nr_chunks * HASHMAP_ALLOC_SIZE_FRACTION; + if (!num_chunks) num_chunks = 1; + if (m->graveyard == NULL) { + hashmap_allocate_chunks(m, num_chunks); + } + + hashmap_chunk_t *res = m->graveyard; + m->graveyard = res->next; + res->next = NULL; + + return res; +} + +/** + * @brief Looks for the given key and retuns a pointer to the corresponding + * element. + * + * The returned element is either the one that already existed in the hashmap, + * or a newly-reseverd element initialized to zero. + * + * If the hashmap is full, NULL is returned. + * + * We use `rand_r` as a hashing function. The key is first hashed to obtain an + * initial global position. If there is a collision, the hashing function is + * re-applied to the key to obtain a new offset *within the same bucket*. This + * is repeated for at most HASHMAP_MAX_CHAIN_LENGTH steps, at which point + * insertion fails. + */ +hashmap_element_t *hashmap_find(hashmap_t *m, hashmap_key_t key, int create_new, + int *chain_length, int *created_new_element) { + /* If full, return immediately */ + if (create_new && m->size > m->table_size * HASHMAP_MAX_FILL_RATIO) { + if (HASHMAP_DEBUG_OUTPUT) { + message("hashmap is too full (%zu of %zu elements used), re-hashing.", + m->size, m->table_size); + } + return NULL; + } + + /* We will use rand_r as our hash function. */ + unsigned int curr = (unsigned int)key; + + /* Get offsets to the entry, its chunk, it's mask, etc. */ + const size_t offset = rand_r(&curr) % m->table_size; + const size_t chunk_offset = offset / HASHMAP_ELEMENTS_PER_CHUNK; + size_t offset_in_chunk = offset - chunk_offset * HASHMAP_ELEMENTS_PER_CHUNK; + + /* Allocate the chunk if needed. */ + if (m->chunks[chunk_offset] == NULL) { + /* Quit here if we don't want to create a new entry. */ + if (!create_new) return NULL; + + /* Get a new chunk for this offset. */ + m->chunks[chunk_offset] = hashmap_get_chunk(m); + } + hashmap_chunk_t *chunk = m->chunks[chunk_offset]; + + /* Count the number of free elements in this chunk and bail if it is too low. + */ + int chunk_count = 0; + for (int k = 0; k < HASHMAP_MASKS_PER_CHUNK; k++) { + chunk_count += __builtin_popcountll(chunk->masks[k]); + } + if (create_new && + chunk_count > HASHMAP_ELEMENTS_PER_CHUNK * HASHMAP_MAX_CHUNK_FILL_RATIO) { + if (HASHMAP_DEBUG_OUTPUT) { + message( + "chunk %zu is too full (%i of %i elements used, fill ratio is " + "%.2f%%), re-hashing.", + chunk_offset, chunk_count, HASHMAP_ELEMENTS_PER_CHUNK, + (100.0 * m->size) / m->table_size); + } + return NULL; + } + + /* Linear probing (well, not really, but whatever). */ + for (int i = 0; i < HASHMAP_MAX_CHAIN_LENGTH; i++) { + /* Record the chain_length, if not NULL. */ + if (chain_length) *chain_length = i; + + /* Compute the offsets within the masks of this chunk. */ + const int mask_offset = offset_in_chunk / HASHMAP_BITS_PER_MASK; + const int offset_in_mask = + offset_in_chunk - mask_offset * HASHMAP_BITS_PER_MASK; + + /* Is the offset empty? */ + hashmap_mask_t search_mask = ((hashmap_mask_t)1) << offset_in_mask; + if (!(chunk->masks[mask_offset] & search_mask)) { + /* Quit here if we don't want to create a new element. */ + if (!create_new) return NULL; + + /* Mark this element as taken and increase the size counter. */ + chunk->masks[mask_offset] |= search_mask; + m->size += 1; + if (created_new_element) *created_new_element = 1; + + /* Set the key. */ + chunk->data[offset_in_chunk].key = key; + + /* Return a pointer to the new element. */ + return &chunk->data[offset_in_chunk]; + } + + /* Does the offset by chance contain the key we are looking for? */ + else if (chunk->data[offset_in_chunk].key == key) { + return &chunk->data[offset_in_chunk]; + } + + /* None of the above, so this is a collision. Re-hash, but within the same + chunk. I guess this is Hopscotch Hashing? */ + else { + offset_in_chunk = rand_r(&curr) % HASHMAP_ELEMENTS_PER_CHUNK; + } + } + + /* We lucked out, so return nothing. */ + if (HASHMAP_DEBUG_OUTPUT) { + message("maximum chain length exceeded, re-hashing."); + } + return NULL; +} + +/** + * @brief Grows the hashmap and rehashes all the elements + */ +void hashmap_grow(hashmap_t *m) { + /* Hold on to the old data. */ + const size_t old_table_size = m->table_size; + hashmap_chunk_t **old_chunks = m->chunks; + + /* Re-allocate the chunk array. */ + m->table_size *= HASHMAP_GROWTH_FACTOR; + m->nr_chunks = (m->table_size + HASHMAP_ELEMENTS_PER_CHUNK - 1) / + HASHMAP_ELEMENTS_PER_CHUNK; + m->table_size = m->nr_chunks * HASHMAP_ELEMENTS_PER_CHUNK; + + if (HASHMAP_DEBUG_OUTPUT) { + message("Increasing hash table size from %zu (%zu kb) to %zu (%zu kb).", + old_table_size, old_table_size * sizeof(hashmap_element_t) / 1024, + m->table_size, m->table_size * sizeof(hashmap_element_t) / 1024); + } + + if ((m->chunks = (hashmap_chunk_t **)swift_calloc( + "hashmap", m->nr_chunks, sizeof(hashmap_chunk_t *))) == NULL) { + error("Unable to allocate hashmap chunks."); + } + + /* Reset size. */ + m->size = 0; + + /* Iterate over the chunks and add their entries to the new table. */ + for (size_t cid = 0; cid < old_table_size / HASHMAP_ELEMENTS_PER_CHUNK; + cid++) { + /* Skip empty chunks. */ + hashmap_chunk_t *chunk = old_chunks[cid]; + if (!chunk) continue; + + /* Loop over the masks in this chunk. */ + for (int mid = 0; mid < HASHMAP_MASKS_PER_CHUNK; mid++) { + /* Skip empty masks. */ + if (chunk->masks[mid] == 0) continue; + + /* Loop over the mask entries. */ + for (int eid = 0; eid < HASHMAP_BITS_PER_MASK; eid++) { + hashmap_mask_t element_mask = ((hashmap_mask_t)1) << eid; + if (chunk->masks[mid] & element_mask) { + hashmap_element_t *element = + &chunk->data[mid * HASHMAP_BITS_PER_MASK + eid]; + + /* Copy the element over to the new hashmap. */ + hashmap_element_t *new_element = + hashmap_find(m, element->key, /*create_new=*/1, + /*chain_length=*/NULL, /*created_new_element=*/NULL); + if (!new_element) { + /* TODO(pedro): Deal with this type of failure more elegantly. */ + error("Failed to re-hash element."); + } + new_element->value = element->value; + } + } + } + + /* We're through with this chunk, recycle it. */ + hashmap_release_chunk(m, chunk); + } + + /* Free the old list of chunks. */ + swift_free("hashmap", old_chunks); +} + +void hashmap_put(hashmap_t *m, hashmap_key_t key, hashmap_value_t value) { + + /* Try to find an element for the given key. */ + hashmap_element_t *element = + hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL, + /*created_new_element=*/NULL); + + /* Loop around, trying to find our place in the world. */ + while (!element) { + hashmap_grow(m); + element = hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL, + /*created_new_element=*/NULL); + } + + /* Set the value. */ + element->value = value; +} + +hashmap_value_t *hashmap_get(hashmap_t *m, hashmap_key_t key) { + + /* Look for the given key. */ + hashmap_element_t *element = + hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL, + /*created_new_element=*/NULL); + while (!element) { + hashmap_grow(m); + element = hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL, + /*created_new_element=*/NULL); + } + return &element->value; +} + +hashmap_value_t *hashmap_get_new(hashmap_t *m, hashmap_key_t key, + int *created_new_element) { + /* Look for the given key. */ + hashmap_element_t *element = hashmap_find( + m, key, /*create_new=*/1, /*chain_length=*/NULL, created_new_element); + while (!element) { + hashmap_grow(m); + element = hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL, + created_new_element); + } + return &element->value; +} + +hashmap_value_t *hashmap_lookup(hashmap_t *m, hashmap_key_t key) { + hashmap_element_t *element = + hashmap_find(m, key, /*create_new=*/0, /*chain_length=*/NULL, + /*created_new_element=*/NULL); + return element ? &element->value : NULL; +} + +void hashmap_iterate(hashmap_t *m, hashmap_mapper_t f, void *data) { + /* Loop over the chunks. */ + for (size_t cid = 0; cid < m->nr_chunks; cid++) { + hashmap_chunk_t *chunk = m->chunks[cid]; + if (!chunk) continue; + + /* Loop over the masks. */ + for (int mid = 0; mid < HASHMAP_MASKS_PER_CHUNK; mid++) { + hashmap_mask_t mask = chunk->masks[mid]; + if (!mask) continue; + + /* Loop over each element in the mask. */ + for (int eid = 0; + eid < HASHMAP_BITS_PER_MASK && + mid * HASHMAP_BITS_PER_MASK + eid < HASHMAP_ELEMENTS_PER_CHUNK; + eid++) { + /* If the element exists, call the function on it. */ + if (mask & (((hashmap_mask_t)1) << eid)) { + hashmap_element_t *element = + &chunk->data[mid * HASHMAP_BITS_PER_MASK + eid]; + f(element->key, &element->value, data); + } + } + } + } +} + +void hashmap_free(hashmap_t *m) { + /* Free the list of active chunks. Note that the actual chunks will be freed + as part of the allocs below. */ + swift_free("hashmap", m->chunks); + + /* Re-set some pointers and values, just in case. */ + m->chunks = NULL; + m->graveyard = NULL; + m->size = 0; + m->table_size = 0; + m->nr_chunks = 0; + + /* Free the chunk allocs. */ + for (size_t k = 0; k < m->allocs_count; k++) { + swift_free("hashmap", m->allocs[k]); + } + swift_free("hashmap", m->allocs); +} + +size_t hashmap_size(hashmap_t *m) { + if (m != NULL) + return m->size; + else + return 0; +} + +#if HASHMAP_DEBUG_OUTPUT +void hashmap_count_chain_lengths(hashmap_key_t key, hashmap_value_t *value, + void *data) { + hashmap_t *m = (hashmap_t *)data; + int count = 0; + hashmap_find(m, key, /*create_entry=*/0, &count, + /*created_new_element=*/NULL); + m->chain_length_counts[count] += 1; +} +#endif + +void hashmap_print_stats(hashmap_t *m) { + /* Basic stats. */ + message("size: %zu, table_size: %zu, nr_chunks: %zu.", m->size, m->table_size, + m->nr_chunks); + + /* Count the number of populated chunks, graveyard chunks, and allocs. */ + int chunk_counter = 0; + for (size_t k = 0; k < m->nr_chunks; k++) { + if (m->chunks[k]) chunk_counter += 1; + } + int graveyard_counter = 0; + for (hashmap_chunk_t *finger = m->graveyard; finger != NULL; + finger = finger->next) { + graveyard_counter += 1; + } + message( + "populated chunks: %i (%zu kb), graveyard chunks: %i (%zu kb), allocs: " + "%zu", + chunk_counter, sizeof(hashmap_chunk_t) * chunk_counter / 1024, + graveyard_counter, sizeof(hashmap_chunk_t) * graveyard_counter / 1024, + m->allocs_count); + + /* Print fill ratios. */ + message("element-wise fill ratio: %.2f%%, chunk-wise fill ratio: %.2f%%", + (100.0 * m->size) / m->table_size, + (100.0 * chunk_counter) / m->nr_chunks); + +#if HASHMAP_DEBUG_OUTPUT + /* Compute the chain lengths. */ + for (int k = 0; k < HASHMAP_MAX_CHAIN_LENGTH; k++) { + m->chain_length_counts[k] = 0; + } + hashmap_iterate(m, hashmap_count_chain_lengths, m); + message("chain lengths:"); + for (int k = 0; k < HASHMAP_MAX_CHAIN_LENGTH; k++) { + message(" chain_length_counts[%i]: %zu (%.2f%%)", k, + m->chain_length_counts[k], + (100.0 * m->chain_length_counts[k]) / m->size); + } +#endif + + /* Compute chunk fill ratios. */ + size_t chunk_fill_ratio_counts[11]; + for (int k = 0; k < 11; k++) { + chunk_fill_ratio_counts[k] = 0; + } + for (size_t k = 0; k < m->nr_chunks; k++) { + hashmap_chunk_t *chunk = m->chunks[k]; + if (!chunk) { + chunk_fill_ratio_counts[0] += 1; + } else { + int count = 0; + for (int j = 0; j < HASHMAP_MASKS_PER_CHUNK; j++) { + count += __builtin_popcountll(chunk->masks[j]); + } + chunk_fill_ratio_counts[(int)(10 * count / HASHMAP_MAX_CHUNK_FILL_RATIO / + HASHMAP_ELEMENTS_PER_CHUNK)] += 1; + } + } + message("chunk fill ratios:"); + for (int k = 0; k < 11; k++) { + message(" %3i%% - %3i%%: %zu (%.2f%%)", + (int)(k * 10 * HASHMAP_MAX_CHUNK_FILL_RATIO), + (int)((k + 1) * 10 * HASHMAP_MAX_CHUNK_FILL_RATIO), + chunk_fill_ratio_counts[k], + (100.0 * chunk_fill_ratio_counts[k]) / m->nr_chunks); + } + + /* Print struct sizes. */ + message("sizeof(hashmap_element_t): %zu", sizeof(hashmap_element_t)); + message("sizeof(hashmap_chunk_t): %zu", sizeof(hashmap_chunk_t)); + message("HASHMAP_TARGET_CHUNK_BYTES: %i", HASHMAP_TARGET_CHUNK_BYTES); + message("HASHMAP_BITS_PER_ELEMENT: %i", HASHMAP_BITS_PER_ELEMENT); + message("HASHMAP_ELEMENTS_PER_CHUNK: %i", HASHMAP_ELEMENTS_PER_CHUNK); + message("HASHMAP_MASKS_PER_CHUNK: %i", HASHMAP_MASKS_PER_CHUNK); +} diff --git a/src/hashmap.h b/src/hashmap.h new file mode 100644 index 0000000000000000000000000000000000000000..2a4cdeaf64154d838e40df5a14f61f1084473029 --- /dev/null +++ b/src/hashmap.h @@ -0,0 +1,182 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 James Willis (james.s.willis@durham.ac.uk) + * Pedro Gonnet (pedro.gonnet@gmail.com) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +/* + * Generic hashmap manipulation functions + * + * Originally by Elliot C Back - + * http://elliottback.com/wp/hashmap-implementation-in-c/ + * + * Modified by Pete Warden to fix a serious performance problem, support strings + * as keys and removed thread synchronization - http://petewarden.typepad.com + */ +#ifndef SWIFT_HASHMAP_H +#define SWIFT_HASHMAP_H + +/* Some standard headers. */ +#include <stdbool.h> +#include <stddef.h> + +// Type used for chunk bitmasks. +typedef size_t hashmap_mask_t; + +#define HASHMAP_BITS_PER_MASK ((int)sizeof(hashmap_mask_t) * 8) + +// Type used for the hashmap keys (must have a valid '==' operation). +#ifndef hashmap_key_t +#define hashmap_key_t size_t +#endif + +// Type used for the hashmap values (must have a valid '==' operation). +#ifndef hashmap_value_t +typedef struct _hashmap_struct { + long long value_st; + float value_flt; + double value_dbl; +} hashmap_struct_t; +#define hashmap_value_t hashmap_struct_t +#endif + +/* We need to keep keys and values */ +typedef struct _hashmap_element { + hashmap_key_t key; + hashmap_value_t value; +} hashmap_element_t; + +/* Make sure a chunk fits in a given size. */ +#define HASHMAP_TARGET_CHUNK_BYTES (4 * 1024) +#define HASHMAP_BITS_PER_ELEMENT ((int)sizeof(hashmap_element_t) * 8 + 1) +#define HASHMAP_ELEMENTS_PER_CHUNK \ + ((HASHMAP_TARGET_CHUNK_BYTES * 8) / HASHMAP_BITS_PER_ELEMENT) +#define HASHMAP_MASKS_PER_CHUNK \ + ((HASHMAP_ELEMENTS_PER_CHUNK + HASHMAP_BITS_PER_MASK - 1) / \ + HASHMAP_BITS_PER_MASK) + +#define HASHMAP_ALLOCS_INITIAL_SIZE (8) +#define HASHMAP_ALLOC_SIZE_FRACTION (0.1) + +#define HASHMAP_MAX_CHAIN_LENGTH (HASHMAP_ELEMENTS_PER_CHUNK / 8) +#ifndef HASHMAP_DEBUG_OUTPUT +#define HASHMAP_DEBUG_OUTPUT (0) +#endif // HASHMAP_DEBUG_OUTPUT + +/* A chunk of hashmap_element, with the corresponding bitmask. */ +typedef struct _hashmap_chunk { + union { + hashmap_mask_t masks[HASHMAP_MASKS_PER_CHUNK]; + void *next; + }; + hashmap_element_t data[HASHMAP_ELEMENTS_PER_CHUNK]; +} hashmap_chunk_t; + +/* A hashmap has some maximum size and current size, + * as well as the data to hold. */ +typedef struct _hashmap { + size_t table_size; + size_t size; + size_t nr_chunks; + hashmap_chunk_t * + *chunks; // Pointer to chunks in use, but not densely populated. + hashmap_chunk_t + *graveyard; // Pointer to allocated, but currently unused chunks. + + void **allocs; // Pointers to allocated blocks of chunks. + size_t allocs_size; // Size of the allocs array. + size_t allocs_count; // Number of elements in the allocs array. + +#if HASHMAP_DEBUG_OUTPUT + /* Chain lengths, used for debugging only. */ + size_t chain_length_counts[HASHMAP_MAX_CHAIN_LENGTH]; +#endif +} hashmap_t; + +/** + * Pointer to a function that can take a key, a pointer to a value, and a + * void pointer extra data payload. + */ +typedef void (*hashmap_mapper_t)(hashmap_key_t, hashmap_value_t *, void *); + +/** + * @brief Initialize a hashmap. + */ +void hashmap_init(hashmap_t *m); + +/** + * @brief Add a key/value pair to the hashmap, overwriting whatever was + * previously there. + */ +extern void hashmap_put(hashmap_t *m, hashmap_key_t key, hashmap_value_t value); + +/** + * @brief Get the value for a given key. If no value exists a new one will be + * created. + * + * Note that the returned pointer is volatile and will be invalidated if the + * hashmap is re-hashed! + */ +extern hashmap_value_t *hashmap_get(hashmap_t *m, hashmap_key_t key); + +/** + * @brief Get the value for a given key. If no value exists a new one will be + * created. Return a flag indicating whether a new element has been added. + * + * Note that the returned pointer is volatile and will be invalidated if the + * hashmap is re-hashed! + */ +extern hashmap_value_t *hashmap_get_new(hashmap_t *m, hashmap_key_t key, + int *created_new_element); + +/** + * @brief Look for the given key and return a pointer to its value or NULL if + * it is not in the hashmap. + * + * Note that the returned pointer is volatile and will be invalidated if the + * hashmap is re-hashed! + */ +extern hashmap_value_t *hashmap_lookup(hashmap_t *m, hashmap_key_t key); + +/** + * @brief Iterate the function parameter over each element in the hashmap. + * + * The function `f` takes three arguments, the first and second are the element + * key and a pointer to the correspondig value, respectively, while the third + * is the `void *data` argument. + */ +extern void hashmap_iterate(hashmap_t *m, hashmap_mapper_t f, void *data); + +/** + * @brief De-allocate memory associated with this hashmap, clears all the + * entries. + * + * After a call to `hashmap_free`, the hashmap cna be re-initialized with + * `hashmap_init`. + */ +extern void hashmap_free(hashmap_t *m); + +/** + * Get the current size of a hashmap + */ +extern size_t hashmap_size(hashmap_t *m); + +/** + * @brief Print all sorts of stats on the given hashmap. + */ +void hashmap_print_stats(hashmap_t *m); + +#endif /* SWIFT_HASHMAP_H */ diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h index 54154ce970a9fccf0f2f6324d86faaebed392555..5d68dc5d9f0f4ac0d12f7829f2c2a12d9964fe4a 100644 --- a/src/hydro/Gadget2/hydro_io.h +++ b/src/hydro/Gadget2/hydro_io.h @@ -130,6 +130,16 @@ INLINE static void convert_part_potential(const struct engine* e, ret[0] = 0.f; } +INLINE static void convert_part_group_id(const struct engine* e, + const struct part* p, + const struct xpart* xp, int* ret) { + + if (p->gpart != NULL) + ret[0] = p->gpart->group_id; + else + ret[0] = 0; +} + /** * @brief Specifies which particle fields to write to a dataset * @@ -143,7 +153,7 @@ INLINE static void hydro_write_particles(const struct part* parts, struct io_props* list, int* num_fields) { - *num_fields = 10; + *num_fields = 11; /* List what we want to write */ list[0] = io_make_output_field_convert_part("Coordinates", DOUBLE, 3, @@ -170,6 +180,9 @@ INLINE static void hydro_write_particles(const struct part* parts, list[9] = io_make_output_field_convert_part("Potential", FLOAT, 1, UNIT_CONV_POTENTIAL, parts, xparts, convert_part_potential); + list[10] = + io_make_output_field_convert_part("GroupIDs", INT, 1, UNIT_CONV_NO_UNITS, + parts, xparts, convert_part_group_id); #ifdef DEBUG_INTERACTIONS_SPH diff --git a/src/io_properties.h b/src/io_properties.h index 815f261be3eef7fc04ed8116b0bf0479fa9f075a..0c2fb52f53a6dda649ea05e30355c25c638be762 100644 --- a/src/io_properties.h +++ b/src/io_properties.h @@ -40,6 +40,9 @@ enum DATA_IMPORTANCE { COMPULSORY = 1, OPTIONAL = 0, UNUSED = -1 }; typedef void (*conversion_func_part_float)(const struct engine*, const struct part*, const struct xpart*, float*); +typedef void (*conversion_func_part_int)(const struct engine*, + const struct part*, + const struct xpart*, int*); typedef void (*conversion_func_part_double)(const struct engine*, const struct part*, const struct xpart*, double*); @@ -93,6 +96,7 @@ struct io_props { /* Pointer to the start of the temporary buffer used in i/o */ char* start_temp_c; + int* start_temp_i; float* start_temp_f; double* start_temp_d; long long* start_temp_l; @@ -115,6 +119,7 @@ struct io_props { /* Conversion function for part */ conversion_func_part_float convert_part_f; + conversion_func_part_int convert_part_i; conversion_func_part_double convert_part_d; conversion_func_part_long_long convert_part_l; @@ -247,6 +252,52 @@ INLINE static struct io_props io_make_output_field_( io_make_output_field_convert_part_##type( \ name, type, dim, units, sizeof(part[0]), part, xpart, convert) +/** + * @brief Construct an #io_props from its parameters + * + * @param name Name of the field to read + * @param type The type of the data + * @param dimension Dataset dimension (1D, 3D, ...) + * @param units The units of the dataset + * @param partSize The size in byte of the particle + * @param parts The particle array + * @param xparts The xparticle array + * @param functionPtr The function used to convert a particle to an int + * + * Do not call this function directly. Use the macro defined above. + */ +INLINE static struct io_props io_make_output_field_convert_part_INT( + const char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, + enum unit_conversion_factor units, size_t partSize, + const struct part* parts, const struct xpart* xparts, + conversion_func_part_int functionPtr) { + + struct io_props r; + strcpy(r.name, name); + r.type = type; + r.dimension = dimension; + r.importance = UNUSED; + r.units = units; + r.field = NULL; + r.partSize = partSize; + r.parts = parts; + r.xparts = xparts; + r.sparts = NULL; + r.conversion = 1; + r.convert_part_i = functionPtr; + r.convert_part_f = NULL; + r.convert_part_d = NULL; + r.convert_part_l = NULL; + r.convert_gpart_f = NULL; + r.convert_gpart_d = NULL; + r.convert_gpart_l = NULL; + r.convert_spart_f = NULL; + r.convert_spart_d = NULL; + r.convert_spart_l = NULL; + + return r; +} + /** * @brief Construct an #io_props from its parameters * @@ -281,6 +332,7 @@ INLINE static struct io_props io_make_output_field_convert_part_FLOAT( r.sparts = NULL; r.bparts = NULL; r.conversion = 1; + r.convert_part_i = NULL; r.convert_part_f = functionPtr; r.convert_part_d = NULL; r.convert_part_l = NULL; @@ -331,6 +383,7 @@ INLINE static struct io_props io_make_output_field_convert_part_DOUBLE( r.sparts = NULL; r.bparts = NULL; r.conversion = 1; + r.convert_part_i = NULL; r.convert_part_f = NULL; r.convert_part_d = functionPtr; r.convert_part_l = NULL; @@ -381,6 +434,7 @@ INLINE static struct io_props io_make_output_field_convert_part_LONGLONG( r.sparts = NULL; r.bparts = NULL; r.conversion = 1; + r.convert_part_i = NULL; r.convert_part_f = NULL; r.convert_part_d = NULL; r.convert_part_l = functionPtr; @@ -437,6 +491,7 @@ INLINE static struct io_props io_make_output_field_convert_gpart_FLOAT( r.sparts = NULL; r.bparts = NULL; r.conversion = 1; + r.convert_part_i = NULL; r.convert_part_f = NULL; r.convert_part_d = NULL; r.convert_part_l = NULL; @@ -485,6 +540,7 @@ INLINE static struct io_props io_make_output_field_convert_gpart_DOUBLE( r.sparts = NULL; r.bparts = NULL; r.conversion = 1; + r.convert_part_i = NULL; r.convert_part_f = NULL; r.convert_part_d = NULL; r.convert_part_l = NULL; @@ -533,6 +589,7 @@ INLINE static struct io_props io_make_output_field_convert_gpart_LONGLONG( r.sparts = NULL; r.bparts = NULL; r.conversion = 1; + r.convert_part_i = NULL; r.convert_part_f = NULL; r.convert_part_d = NULL; r.convert_part_l = NULL; @@ -589,6 +646,7 @@ INLINE static struct io_props io_make_output_field_convert_spart_FLOAT( r.sparts = sparts; r.bparts = NULL; r.conversion = 1; + r.convert_part_i = NULL; r.convert_part_f = NULL; r.convert_part_d = NULL; r.convert_part_l = NULL; @@ -637,6 +695,7 @@ INLINE static struct io_props io_make_output_field_convert_spart_DOUBLE( r.sparts = sparts; r.bparts = NULL; r.conversion = 1; + r.convert_part_i = NULL; r.convert_part_f = NULL; r.convert_part_d = NULL; r.convert_part_l = NULL; @@ -685,6 +744,7 @@ INLINE static struct io_props io_make_output_field_convert_spart_LONGLONG( r.sparts = sparts; r.bparts = NULL; r.conversion = 1; + r.convert_part_i = NULL; r.convert_part_f = NULL; r.convert_part_d = NULL; r.convert_part_l = NULL; diff --git a/src/outputlist.c b/src/outputlist.c index cab4013bc3841698183b825c7985a75e7095b29c..2ff2263a17dad3e54cf2ed286da026813edb0ae6 100644 --- a/src/outputlist.c +++ b/src/outputlist.c @@ -257,7 +257,7 @@ void output_list_init(struct output_list **list, const struct engine *e, void output_list_print(const struct output_list *outputlist) { printf("/*\t Time Array\t */\n"); - printf("Number of Line: %lu\n", outputlist->size); + printf("Number of Line: %zu\n", outputlist->size); for (size_t ind = 0; ind < outputlist->size; ind++) { if (ind == outputlist->cur_ind) printf("\t%lf <-- Current\n", outputlist->times[ind]); diff --git a/src/part.h b/src/part.h index a1b4ecfbe4a9472acbee6f4bd6ba3a7b2e67820f..253cec7b4edad08234a751ee20e6c05fcff7e6ec 100644 --- a/src/part.h +++ b/src/part.h @@ -32,6 +32,7 @@ /* Local headers. */ #include "align.h" +#include "fof.h" #include "part_type.h" #include "timeline.h" diff --git a/src/runner.c b/src/runner.c index 3694e9dc539c07412e9b4ca35dbfa0f8edfaea0f..2b509b3557ebc78c84c763e73dc8f85428a11690 100644 --- a/src/runner.c +++ b/src/runner.c @@ -4278,6 +4278,12 @@ void *runner_main(void *data) { case task_type_star_formation: runner_do_star_formation(r, t->ci, 1); break; + case task_type_fof_self: + runner_do_fof_self(r, t->ci, 1); + break; + case task_type_fof_pair: + runner_do_fof_pair(r, t->ci, t->cj, 1); + break; default: error("Unknown/invalid task type (%d).", t->type); } @@ -4375,3 +4381,52 @@ void runner_do_logger(struct runner *r, struct cell *c, int timer) { error("Logger disabled, please enable it during configuration"); #endif } + +/** + * @brief Recursively search for FOF groups in a single cell. + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_do_fof_self(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + const struct engine *e = r->e; + struct space *s = e->s; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int periodic = s->periodic; + const struct gpart *const gparts = s->gparts; + const double search_r2 = e->fof_properties->l_x2; + + rec_fof_search_self(e->fof_properties, dim, search_r2, periodic, gparts, c); + + if (timer) TIMER_TOC(timer_fof_self); +} + +/** + * @brief Recursively search for FOF groups between a pair of cells. + * + * @param r runner task + * @param ci cell i + * @param cj cell j + * @param timer 1 if the time is to be recorded. + */ +void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj, + int timer) { + + TIMER_TIC; + + const struct engine *e = r->e; + struct space *s = e->s; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int periodic = s->periodic; + const struct gpart *const gparts = s->gparts; + const double search_r2 = e->fof_properties->l_x2; + + rec_fof_search_pair(e->fof_properties, dim, search_r2, periodic, gparts, ci, + cj); + + if (timer) TIMER_TOC(timer_fof_pair); +} diff --git a/src/runner.h b/src/runner.h index fa32befc4f01fd5e39e21d5e907ed8f71ccb55e2..c7b923ed262965e9dd7566dd093084c6f0948079 100644 --- a/src/runner.h +++ b/src/runner.h @@ -86,6 +86,9 @@ void runner_do_cooling(struct runner *r, struct cell *c, int timer); void runner_do_grav_external(struct runner *r, struct cell *c, int timer); void runner_do_grav_fft(struct runner *r, int timer); void runner_do_logger(struct runner *r, struct cell *c, int timer); +void runner_do_fof_self(struct runner *r, struct cell *c, int timer); +void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj, + int timer); void *runner_main(void *data); void runner_do_unskip_mapper(void *map_data, int num_elements, void *extra_data); diff --git a/src/scheduler.c b/src/scheduler.c index bcc6ff325526982c3541d644287a44994771a4c8..6367fe2cb57734b6d73e3737c992958d3a1d3ed0 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -863,7 +863,100 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { } /** - * @brief Mapper function to split tasks that may be too large. + * @brief Split a FOF task if too large. + * + * @param t The #task + * @param s The #scheduler we are working in. + */ +static void scheduler_splittask_fof(struct task *t, struct scheduler *s) { + + /* Iterate on this task until we're done with it. */ + int redo = 1; + while (redo) { + + /* Reset the redo flag. */ + redo = 0; + + /* Non-splittable task? */ + if ((t->ci == NULL) || (t->type == task_type_fof_pair && t->cj == NULL) || + t->ci->grav.count == 0 || (t->cj != NULL && t->cj->grav.count == 0)) { + t->type = task_type_none; + t->subtype = task_subtype_none; + t->cj = NULL; + t->skip = 1; + break; + } + + /* Self-interaction? */ + if (t->type == task_type_fof_self) { + + /* Get a handle on the cell involved. */ + struct cell *ci = t->ci; + + /* Foreign task? */ + if (ci->nodeID != s->nodeID) { + t->skip = 1; + break; + } + + /* Is this cell even split? */ + if (cell_can_split_self_fof_task(ci)) { + + /* Take a step back (we're going to recycle the current task)... */ + redo = 1; + + /* Add the self tasks. */ + int first_child = 0; + while (ci->progeny[first_child] == NULL) first_child++; + t->ci = ci->progeny[first_child]; + for (int k = first_child + 1; k < 8; k++) + if (ci->progeny[k] != NULL && ci->progeny[k]->grav.count) + scheduler_splittask_fof( + scheduler_addtask(s, task_type_fof_self, t->subtype, 0, 0, + ci->progeny[k], NULL), + s); + + /* Make a task for each pair of progeny */ + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != NULL && ci->progeny[j]->grav.count) + for (int k = j + 1; k < 8; k++) + if (ci->progeny[k] != NULL && ci->progeny[k]->grav.count) + scheduler_splittask_fof( + scheduler_addtask(s, task_type_fof_pair, t->subtype, 0, 0, + ci->progeny[j], ci->progeny[k]), + s); + } /* Cell is split */ + + } /* Self interaction */ + + } /* iterate over the current task. */ +} + +/** + * @brief Mapper function to split FOF tasks that may be too large. + * + * @param map_data the tasks to process + * @param num_elements the number of tasks. + * @param extra_data The #scheduler we are working in. + */ +void scheduler_splittasks_fof_mapper(void *map_data, int num_elements, + void *extra_data) { + /* Extract the parameters. */ + struct scheduler *s = (struct scheduler *)extra_data; + struct task *tasks = (struct task *)map_data; + + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &tasks[ind]; + + /* Invoke the correct splitting strategy */ + if (t->type == task_type_fof_self || t->type == task_type_fof_pair) { + scheduler_splittask_fof(t, s); + } + } +} + +/** + * @brief Mapper function to split non-FOF tasks that may be too large. * * @param map_data the tasks to process * @param num_elements the number of tasks. @@ -889,7 +982,8 @@ void scheduler_splittasks_mapper(void *map_data, int num_elements, /* For future use */ } else { #ifdef SWIFT_DEBUG_CHECKS - error("Unexpected task sub-type"); + error("Unexpected task sub-type %s/%s", taskID_names[t->type], + subtaskID_names[t->subtype]); #endif } } @@ -899,11 +993,21 @@ void scheduler_splittasks_mapper(void *map_data, int num_elements, * @brief Splits all the tasks in the scheduler that are too large. * * @param s The #scheduler. + * @param fof_tasks Are we splitting the FOF tasks (1)? Or the regular tasks + * (0)? */ -void scheduler_splittasks(struct scheduler *s) { - /* Call the mapper on each current task. */ - threadpool_map(s->threadpool, scheduler_splittasks_mapper, s->tasks, - s->nr_tasks, sizeof(struct task), 0, s); +void scheduler_splittasks(struct scheduler *s, const int fof_tasks) { + + if (fof_tasks) { + /* Call the mapper on each current task. */ + threadpool_map(s->threadpool, scheduler_splittasks_fof_mapper, s->tasks, + s->nr_tasks, sizeof(struct task), 0, s); + + } else { + /* Call the mapper on each current task. */ + threadpool_map(s->threadpool, scheduler_splittasks_mapper, s->tasks, + s->nr_tasks, sizeof(struct task), 0, s); + } } /** @@ -1118,6 +1222,7 @@ void scheduler_ranktasks(struct scheduler *s) { * @param size The maximum number of tasks in the #scheduler. */ void scheduler_reset(struct scheduler *s, int size) { + /* Do we need to re-allocate? */ if (size > s->size) { /* Free existing task lists if necessary. */ diff --git a/src/scheduler.h b/src/scheduler.h index 95d6c4760b26e97b20bf2cc6edeb6bebf7b554d9..ac9bc754db1ea03f91c0ce522dc325c18d25ec09 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -190,7 +190,7 @@ void scheduler_reweight(struct scheduler *s, int verbose); struct task *scheduler_addtask(struct scheduler *s, enum task_types type, enum task_subtypes subtype, int flags, int implicit, struct cell *ci, struct cell *cj); -void scheduler_splittasks(struct scheduler *s); +void scheduler_splittasks(struct scheduler *s, const int fof_tasks); struct task *scheduler_done(struct scheduler *s, struct task *t); struct task *scheduler_unlock(struct scheduler *s, struct task *t); void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task *tb); diff --git a/src/serial_io.c b/src/serial_io.c index 9ea2b0f07fca332ba7b374afbb1a42693185d58a..e16a3351d75e176bf71d6dc1c791a37574de96f9 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -1190,11 +1190,11 @@ void write_output_serial(struct engine* e, const char* baseName, if (swift_memalign("parts_written", (void**)&parts_written, part_align, Ngas_written * sizeof(struct part)) != 0) - error("Error while allocating temporart memory for parts"); + error("Error while allocating temporary memory for parts"); if (swift_memalign("xparts_written", (void**)&xparts_written, xpart_align, Ngas_written * sizeof(struct xpart)) != 0) - error("Error while allocating temporart memory for xparts"); + error("Error while allocating temporary memory for xparts"); /* Collect the particles we want to write */ io_collect_parts_to_write(parts, xparts, parts_written, diff --git a/src/swift.h b/src/swift.h index 562a1fd5f49e3599693940aafd4cb305743eed7e..b9f8818d8b833231971abb1afb36ee4507648488 100644 --- a/src/swift.h +++ b/src/swift.h @@ -46,6 +46,7 @@ #include "gravity.h" #include "gravity_derivatives.h" #include "gravity_properties.h" +#include "hashmap.h" #include "hydro.h" #include "hydro_properties.h" #include "lock.h" diff --git a/src/task.c b/src/task.c index 1f45c2e17bc83cae3fd56fa3fe7b3f2cbcda0564..77731ae02f8c84597565717319241997a77fadf7 100644 --- a/src/task.c +++ b/src/task.c @@ -91,7 +91,9 @@ const char *taskID_names[task_type_count] = {"none", "stars_sort", "bh_in", "bh_out", - "bh_ghost"}; + "bh_ghost", + "fof_self", + "fof_pair"}; /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { @@ -215,6 +217,8 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( case task_type_kick1: case task_type_kick2: case task_type_logger: + case task_type_fof_self: + case task_type_fof_pair: case task_type_timestep: case task_type_send: case task_type_recv: diff --git a/src/task.h b/src/task.h index 9143ccdfa171467e54ae13b5937d88cae3d46725..eef0aca5fb075725fb1da51ccaf4033a3608e078 100644 --- a/src/task.h +++ b/src/task.h @@ -87,6 +87,8 @@ enum task_types { task_type_bh_in, /* Implicit */ task_type_bh_out, /* Implicit */ task_type_bh_ghost, + task_type_fof_self, + task_type_fof_pair, task_type_count } __attribute__((packed)); diff --git a/src/timers.c b/src/timers.c index 8b0d4c4d83f7051bc9b6b9c02576d614303855b7..8c193f9f2e15746bd551f8a56edafe7c80753334 100644 --- a/src/timers.c +++ b/src/timers.c @@ -106,6 +106,8 @@ const char* timers_names[timer_count] = { "step", "logger", "do_stars_sort", + "fof_self", + "fof_pair", }; /* File to store the timers */ diff --git a/src/timers.h b/src/timers.h index cf8e9683bae77032861f9690a3235453e549ac98..abcd1019a708cfae28094875fa094c9886804d26 100644 --- a/src/timers.h +++ b/src/timers.h @@ -107,6 +107,8 @@ enum { timer_step, timer_logger, timer_do_stars_sort, + timer_fof_self, + timer_fof_pair, timer_count, }; diff --git a/tests/Makefile.am b/tests/Makefile.am index 5f95e7d8c1d7f8cf78a9207b6b4ac374a0b87745..3407de7588dc297b072567a8b320c99e3ece0462 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -41,7 +41,7 @@ check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \ testGravityDerivatives testPotentialSelf testPotentialPair testEOS testUtilities \ testSelectOutput testCbrt testCosmology testOutputList test27cellsStars \ - test27cellsStars_subset testCooling testFeedback + test27cellsStars_subset testCooling testFeedback testHashmap # Rebuild tests when SWIFT is updated. $(check_PROGRAMS): ../src/.libs/libswiftsim.a @@ -134,6 +134,8 @@ testCooling_SOURCES = testCooling.c testFeedback_SOURCES = testFeedback.c +testHashmap_SOURCES = testHashmap.c + # Files necessary for distribution EXTRA_DIST = testReading.sh makeInput.py testActivePair.sh \ test27cells.sh test27cellsPerturbed.sh testParser.sh testPeriodicBC.sh \ diff --git a/tests/testHashmap.c b/tests/testHashmap.c new file mode 100644 index 0000000000000000000000000000000000000000..af299732318744f2924a364ce4947482a23d3034 --- /dev/null +++ b/tests/testHashmap.c @@ -0,0 +1,79 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2019 James Willis (james.s.willis@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +#include <fenv.h> + +/* Local headers. */ +#include "swift.h" + +#define NUM_KEYS (26 * 1000 * 1000) + +int main(int argc, char *argv[]) { + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + +/* Choke on FPEs */ +#ifdef HAVE_FE_ENABLE_EXCEPT + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + + hashmap_t m; + + message("Initialising hash table..."); + hashmap_init(&m); + + message("Populating hash table..."); + for (hashmap_key_t key = 0; key < NUM_KEYS; key++) { + hashmap_value_t value; + value.value_st = (long long)key; + hashmap_put(&m, key, value); + } + + message("Dumping hashmap stats."); + hashmap_print_stats(&m); + + message("Retrieving elements from the hash table..."); + for (hashmap_key_t key = 0; key < NUM_KEYS; key++) { + hashmap_value_t value = *hashmap_lookup(&m, key); + + if (value.value_st != (long long)key) + error("Incorrect value (%lld) found for key: %lld", value.value_st, + (long long)key); + // else message("Retrieved element, Key: %zu Value: %zu", key, value); + } + + message("Checking for invalid key..."); + if (hashmap_lookup(&m, NUM_KEYS + 1) != NULL) + error("Key: %d shouldn't exist or be created.", NUM_KEYS + 1); + + message("Checking hash table size..."); + if (m.size != NUM_KEYS) + error( + "The no. of elements stored in the hash table are not equal to the no. " + "of keys. No. of elements: %zu, no. of keys: %d", + m.size, NUM_KEYS); + + message("Freeing hash table..."); + hashmap_free(&m); +} diff --git a/tools/task_plots/analyse_tasks.py b/tools/task_plots/analyse_tasks.py index 841865047663e5de4f24e47ed41c559b66af2406..fd07340940b8903bf09fe807f2741d9e65261129 100755 --- a/tools/task_plots/analyse_tasks.py +++ b/tools/task_plots/analyse_tasks.py @@ -107,6 +107,8 @@ TASKTYPES = [ "bh_in", "bh_out", "bh_ghost", + "fof_self", + "fof_pair", "count", ] diff --git a/tools/task_plots/plot_tasks.py b/tools/task_plots/plot_tasks.py index 94e4d9a5534c4e6ba4efb5d0e06ab1207156063a..070ae2f3c509d6ae8fef8bad9ffec6c316060a7c 100755 --- a/tools/task_plots/plot_tasks.py +++ b/tools/task_plots/plot_tasks.py @@ -192,6 +192,8 @@ TASKTYPES = [ "bh_in", "bh_out", "bh_ghost", + "fof_self", + "fof_pair", "count", ] @@ -477,11 +479,13 @@ for rank in ranks: toc = int(data[line, toccol]) / CPU_CLOCK tasks[thread][-1]["tic"] = tic tasks[thread][-1]["toc"] = toc - if ( - "self" in tasktype - or "pair" in tasktype - or "recv" in tasktype - or "send" in tasktype + if ("fof" in tasktype): + tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype] + elif( + "self" in tasktype + or "pair" in tasktype + or "recv" in tasktype + or "send" in tasktype ): fulltype = tasktype + "/" + subtype if fulltype in SUBCOLOURS: