diff --git a/AUTHORS b/AUTHORS index a95d5060dc25ac3170f5e6615e690ea6171e0b83..07c7dd91c9f2b3ddcfcd517b4f4c08c6fad8d0c1 100644 --- a/AUTHORS +++ b/AUTHORS @@ -24,3 +24,4 @@ Evgenii Chaikin chaikin@strw.leidenuniv.nl Sylvia Ploeckinger ploeckinger@lorentz.leidenuniv.nl Willem Elbers willem.h.elbers@durham.ac.uk TK Chan chantsangkeung@gmail.com +Marcel van Daalen daalen@strw.leidenuniv.nl \ No newline at end of file diff --git a/README b/README index 3acdc3fa9eab31090098b0f0eb2d562a7c156eef..ffd8a640756663f55f394065db0300eeb5bfc822 100644 --- a/README +++ b/README @@ -106,6 +106,7 @@ Parameters: --dump-tasks-threshold=<flt> Fraction of the total step's time spent in a task to trigger a dump of the task plot on this step - + --power Run with power spectrum outputs. + See the file examples/parameter_example.yml for an example of parameter file. diff --git a/README.md b/README.md index 5564da74037216fae70b0e23cc75ad6a01940fc7..60d7e91e83cbeb29ad9e53f206f9f3a52ef3030e 100644 --- a/README.md +++ b/README.md @@ -194,6 +194,7 @@ Parameters: --dump-tasks-threshold=<flt> Fraction of the total step's time spent in a task to trigger a dump of the task plot on this step + --power Run with power spectrum outputs. See the file examples/parameter_example.yml for an example of parameter file. ``` diff --git a/doc/RTD/source/CommandLineOptions/index.rst b/doc/RTD/source/CommandLineOptions/index.rst index 60210ec5a91189479263d1a19c49ecd4d7466306..ea525638a8e7c0b3995b59f88f33758f83ad74e8 100644 --- a/doc/RTD/source/CommandLineOptions/index.rst +++ b/doc/RTD/source/CommandLineOptions/index.rst @@ -103,6 +103,7 @@ can be found by typing ``./swift -h``: --dump-tasks-threshold=<flt> Fraction of the total step's time spent in a task to trigger a dump of the task plot on this step + --power Run with power spectrum outputs. See the file examples/parameter_example.yml for an example of parameter file. diff --git a/doc/RTD/source/ParameterFiles/output_selection.rst b/doc/RTD/source/ParameterFiles/output_selection.rst index 44fe7bcb362fb55cdd0f8c4c12b5e3e78a33523e..fb4e3a2a3375092ab7a475e20b8d46b709004997 100644 --- a/doc/RTD/source/ParameterFiles/output_selection.rst +++ b/doc/RTD/source/ParameterFiles/output_selection.rst @@ -5,7 +5,8 @@ Output List ~~~~~~~~~~~ -In the sections ``Snapshots``, ``Statistics`` and ``StructureFinding``, you can +In the sections ``Snapshots``, ``Statistics``, ``StructureFinding``, +``LineOfSight``, ``PowerSpectrum``, and ``FOF`` you can specify the options ``output_list_on`` and ``output_list`` which receive an int and a filename. The ``output_list_on`` enable or not the output list and ``output_list`` is the filename containing the output times. With the file diff --git a/doc/RTD/source/ParameterFiles/parameter_description.rst b/doc/RTD/source/ParameterFiles/parameter_description.rst index d97e520c9b16b23fa88113650a8dd2890dabaeb6..00a3ed3e2840d34e2ff0f4452ce5e3904831fc43 100644 --- a/doc/RTD/source/ParameterFiles/parameter_description.rst +++ b/doc/RTD/source/ParameterFiles/parameter_description.rst @@ -808,6 +808,14 @@ It is also possible to run the FOF algorithm just before writing each snapshot. See the section :ref:`Parameters_fof` for details of the FOF parameters. +It is also possible to run the power spectrum calculation just before writing +each snapshot. + +* Run PS every time a snapshot is dumped: ``invoke_ps`` + (default: ``0``). + +See the section :ref:`Parameters_ps` for details of the power spectrum parameters. + When running over MPI, users have the option to split the snapshot over more than one file. This can be useful if the parallel-io on a given system is slow but has the drawback of producing many files per time slice. This is activated @@ -1205,6 +1213,78 @@ parameter sets the thermal energy per unit mass. planetary_ANEOS_iron_table_file: ./EoSTables/ANEOS_iron_S20.txt planetary_ANEOS_Fe85Si15_table_file: ./EoSTables/ANEOS_Fe85Si15_S20.txt +.. _Parameters_ps: + +Power Spectra Calculation +------------------------- + + +SWIFT can compute a variety of auto- and cross- power spectra at user-specified +intervals. The behaviour of this output type is governed by the ``PowerSpectrum`` +section of the parameter file. The calculation is performed on a regular grid +(typically of size 256^3) and foldings are used to extend the range probed to +smaller scales. + +The options are: + + * The size of the base grid to perform the PS calculation: + ``grid_side_length``. + * The number of grid foldings to use: ``num_folds``. + * The factor by which to fold at each iteration: ``fold_factor`` (default: 4) + * The order of the window function: ``window_order`` (default: 3) + +The window order sets the way the particle properties get assigned to the mesh. +Order 1 corresponds to the nearest-grid-point (NGP), order 2 to cloud-in-cell +(CIC), and order 3 to triangular-shaped-cloud (TSC). Higher-order schemes are not +implemented. + +Finally, the quantities for which a PS should be computed are specified as a +list of pairs of values for the parameter ``requested_spectra``. Auto-spectra +are specified by using the same type for both pair members. The available values +listed in the following table: + ++---------------------+---------------------------------------------------+ +| Name | Description | ++=====================+===================================================+ +| ``matter`` | Mass density of all matter | ++---------------------+---------------------------------------------------+ +| ``cdm`` | Mass density of all dark matter | ++---------------------+---------------------------------------------------+ +| ``gas`` | Mass density of all gas | ++---------------------+---------------------------------------------------+ +| ``starBH`` | Mass density of all stars and BHs | ++---------------------+---------------------------------------------------+ +| ``neutrino`` | Mass density of all neutrinos | ++---------------------+---------------------------------------------------+ +| ``neutrino1`` | Mass density of a random half of the neutrinos | ++---------------------+---------------------------------------------------+ +| ``neutrino2`` | Mass density of a the other half of the neutrinos | ++---------------------+---------------------------------------------------+ +| ``pressure`` | Electron pressure | ++---------------------+---------------------------------------------------+ + +A dark matter mass density auto-spectrum is specified as ``cdm-cdm`` and a gas +density - electron pressure cross-spectrum as ``gas-pressure``. + +The ``neutrino1`` and ``neutrino2`` selections are based on the particle IDs and +are mutually exclusive. The particles selected in each half are different in +each output. Note that neutrino PS can only be computed when neutrinos are +simulated using particles. + +An example of a valid power-spectrum section of the parameter file looks like: + +.. code:: YAML + + PowerSpectrum: + grid_side_length: 256 + num_folds: 3 + requested_spectra: ["matter-matter", "cdm-cdm", "cdm-matter"] # Total-matter and CDM auto-spectra + CDM-total cross-spectrum + +Some additional specific options for the power-spectra outputs are described in the +following pages: + +* :ref:`Output_list_label` (to have PS not evenly spaced in time) + .. _Parameters_fof: diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_DM/run.sh b/examples/SmallCosmoVolume/SmallCosmoVolume_DM/run.sh index 5d166e00a630ca93ff92a42f6d26b012b132e097..f909a7c98dd03759d75927d4a7c277573833c790 100755 --- a/examples/SmallCosmoVolume/SmallCosmoVolume_DM/run.sh +++ b/examples/SmallCosmoVolume/SmallCosmoVolume_DM/run.sh @@ -8,5 +8,5 @@ then fi # Run SWIFT -../../swift --cosmology --self-gravity --threads=8 small_cosmo_volume_dm.yml 2>&1 | tee output.log +../../swift --cosmology --self-gravity --power --threads=8 small_cosmo_volume_dm.yml 2>&1 | tee output.log diff --git a/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml b/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml index 2d11c7c5f3f89536fc1b5c296912859da9fca3a1..f469b972e5d6e93f71c51a277e8549e9b2ab9d47 100644 --- a/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml +++ b/examples/SmallCosmoVolume/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml @@ -35,6 +35,7 @@ Snapshots: delta_time: 1.0816 # Only every second VELOCIraptor invoke gets a full snapshot dump. scale_factor_first: 0.1 # z = 9 compression: 4 + invoke_ps: 1 # Parameters governing the conserved quantities statistics Statistics: @@ -58,3 +59,11 @@ StructureFinding: basename: ./stf scale_factor_first: 0.1 # z = 9 delta_time: 1.04 + +# Power spectrum calculation options +PowerSpectrum: + grid_side_length: 256 + num_folds: 4 + fold_factor: 2 + window_order: 2 + requested_spectra: ["matter-matter"] diff --git a/examples/main.c b/examples/main.c index 51f497ab07dc799ea22d8d2be0fa020c4f64b5a6..37c1dd099c5714d29108881c758b8335ca69ca96 100644 --- a/examples/main.c +++ b/examples/main.c @@ -93,6 +93,7 @@ int main(int argc, char *argv[]) { struct extra_io_properties extra_io_props; struct star_formation starform; struct pm_mesh mesh; + struct power_spectrum_data pow_data; struct gpart *gparts = NULL; struct gravity_props gravity_properties; struct hydro_props hydro_properties; @@ -188,6 +189,7 @@ int main(int argc, char *argv[]) { int with_gear = 0; int with_line_of_sight = 0; int with_rt = 0; + int with_power = 0; int verbose = 0; int nr_threads = 1; int nr_pool_threads = -1; @@ -342,6 +344,8 @@ int main(int argc, char *argv[]) { "Fraction of the total step's time spent in a task to trigger " "a dump of the task plot on this step", NULL, 0, 0), + OPT_BOOLEAN(0, "power", &with_power, "Run with power spectrum outputs.", + NULL, 0, 0), OPT_END(), }; struct argparse argparse; @@ -1173,6 +1177,17 @@ int main(int argc, char *argv[]) { } #endif + /* Initialize power spectra calculation */ + if (with_power) { +#ifdef HAVE_FFTW + power_init(&pow_data, params, nr_threads); +#else + error("No FFTW library found. Cannot compute power spectra."); +#endif + } else { + bzero(&pow_data, sizeof(struct power_spectrum_data)); + } + /* Be verbose about what happens next */ if (myrank == 0) message("Reading ICs from file '%s'", ICfileName); if (myrank == 0 && cleanup_h) @@ -1478,6 +1493,7 @@ int main(int argc, char *argv[]) { if (with_line_of_sight) engine_policies |= engine_policy_line_of_sight; if (with_sink) engine_policies |= engine_policy_sinks; if (with_rt) engine_policies |= engine_policy_rt; + if (with_power) engine_policies |= engine_policy_power_spectra; /* Initialize the engine with the space and policies. */ engine_init(&e, &s, params, output_options, N_total[swift_type_gas], @@ -1489,8 +1505,8 @@ int main(int argc, char *argv[]) { &gravity_properties, &stars_properties, &black_holes_properties, &sink_properties, &neutrino_properties, &neutrino_response, &feedback_properties, &pressure_floor_props, &rt_properties, - &mesh, &potential, &cooling_func, &starform, &chemistry, - &extra_io_props, &fof_properties, &los_properties, + &mesh, &pow_data, &potential, &cooling_func, &starform, + &chemistry, &extra_io_props, &fof_properties, &los_properties, &lightcone_array_properties, &ics_metadata); engine_config(/*restart=*/0, /*fof=*/0, &e, params, nr_nodes, myrank, nr_threads, nr_pool_threads, with_aff, talking, restart_file, @@ -1576,6 +1592,10 @@ int main(int argc, char *argv[]) { /*seed_black_holes=*/0, /*buffers allocated=*/1); } + /* If we want power spectra, output them now as well */ + if (with_power) + calc_all_power_spectra(e.power_data, e.s, &e.threadpool, e.verbose); + engine_dump_snapshot(&e); } @@ -1590,7 +1610,7 @@ int main(int argc, char *argv[]) { if (myrank == 0) { printf( "# %6s %14s %12s %12s %14s %9s %12s %12s %12s %12s %12s %16s [%s] " - "%6s %s [%s] \n", + "%6s %12s [%s] \n", "Step", "Time", "Scale-factor", "Redshift", "Time-step", "Time-bins", "Updates", "g-Updates", "s-Updates", "sink-Updates", "b-Updates", "Wall-clock time", clocks_getunit(), "Props", "Dead time", @@ -1758,7 +1778,7 @@ int main(int argc, char *argv[]) { printf( " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %12lld " "%12lld" - " %21.3f %6d %21.3f\n", + " %21.3f %6d %17.3f\n", e.step, e.time, e.cosmology->a, e.cosmology->z, e.time_step, e.min_active_bin, e.max_active_bin, e.updates, e.g_updates, e.s_updates, e.sink_updates, e.b_updates, e.wallclock_time, e.step_props, dead_time); @@ -1766,7 +1786,7 @@ int main(int argc, char *argv[]) { fprintf(e.file_timesteps, " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %12lld" - " %12lld %21.3f %6d %21.3f\n", + " %12lld %21.3f %6d %17.3f\n", e.step, e.time, e.cosmology->a, e.cosmology->z, e.time_step, e.min_active_bin, e.max_active_bin, e.updates, e.g_updates, e.s_updates, e.sink_updates, e.b_updates, e.wallclock_time, @@ -1823,6 +1843,10 @@ int main(int argc, char *argv[]) { /*seed_black_holes=*/0, /*buffers allocated=*/1); } + if (with_power) { + calc_all_power_spectra(e.power_data, e.s, &e.threadpool, e.verbose); + } + #ifdef HAVE_VELOCIRAPTOR if (with_structure_finding && e.snapshot_invoke_stf && !e.stf_this_timestep) @@ -1879,6 +1903,7 @@ int main(int argc, char *argv[]) { if (with_feedback) feedback_clean(e.feedback_props); if (with_lightcone) lightcone_array_clean(e.lightcone_array_properties); if (with_rt) rt_clean(e.rt_props, restart); + if (with_power) power_clean(e.power_data); extra_io_clean(e.io_extra_props); engine_clean(&e, /*fof=*/0, restart); free(params); diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index ff472d4eba03c90134ef285c88283c7c827f43da..6641820d59af40586daa0d440561af4d394fa46a 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -103,6 +103,7 @@ ForceChecks: only_when_all_active: 1 # (Optional) Only compute exact forces during timesteps when all gparts are active (default: 0). only_at_snapshots: 1 # (Optional) Only compute exact forces during timesteps when a snapshot is being dumped (default: 0). +# Parameters related to the Friends-Of-Friends halo finding FOF: basename: fof_output # Filename for the FOF outputs (Unused when FoF is only run to seed BHs). scale_factor_first: 0.91 # Scale-factor of first FoF black hole seeding calls (needed for cosmological runs). @@ -116,6 +117,8 @@ FOF: absolute_linking_length: -1. # (Optional) Absolute linking length (in internal units). When not set to -1, this will overwrite the linking length computed from 'linking_length_ratio'. group_id_default: 2147483647 # (Optional) Sets the group ID of particles in groups below the minimum size. Defaults to 2^31 - 1 if unspecified. Has to be positive. group_id_offset: 1 # (Optional) Sets the offset of group ID labeling. Defaults to 1 if unspecified. + output_list_on: 0 # (Optional) Enable the output list + output_list: ./output_list_fof.txt # (Optional) File containing the output times (see documentation in "Parameter File" section) # Parameters for the task scheduling Scheduler: @@ -164,6 +167,7 @@ Snapshots: delta_time: 0.01 # Time difference between consecutive outputs (in internal units) invoke_stf: 0 # (Optional) Call VELOCIraptor every time a snapshot is written irrespective of the VELOCIraptor output strategy. invoke_fof: 0 # (Optional) Call FOF every time a snapshot is written + invoke_ps: 0 # (Optional) Call a power-spectrum calculation every time a snapshot is written compression: 0 # (Optional) Set the level of GZIP compression of the HDF5 datasets [0-9]. 0 does no compression. The lossless compression is applied to *all* the fields. distributed: 0 # (Optional) When running over MPI, should each rank write a partial snapshot or do we want a single file? 1 implies one file per MPI rank. lustre_OST_count: 0 # (Optional) If > 0, the number of lustre OSTs to distribure the single-striped files over. Has no effect on non-Lustre filesystems. Has an effect only on distributed snapshots. @@ -269,6 +273,7 @@ LineOfSight: time_first: 0.01 # (Optional) Time of the first line-of-sight output (in internal units). delta_time: 1.02 # (Optional) Time difference between consecutive line-of-sight outputs (in internal units) in simulation time intervals. output_list_on: 0 # (Optional) Enable the use of an output list + output_list: ./output_list_los.txt # (Optional) File containing the output times (see documentation in "Parameter File" section) num_along_x: 0 # Number of sight-lines along the x-axis num_along_y: 0 # Number of sight-lines along the y-axis num_along_z: 100 # Number of sight-lines along the z-axis @@ -705,9 +710,6 @@ GEARRT: stellar_spectrum_blackbody_temperature_K: 1.e4 # (Conditional) if stellar_spectrum_type=1, use this temperature (in K) for the blackbody spectrum. stars_max_timestep: -1. # (Optional) restrict the maximal timestep of stars to this value (in internal units). Set to negative to turn off. - - - SPHM1RT: cred: 2.99792458e10 # value of reduced speed of light for the RT solver in code unit CFL_condition: 0.1 # CFL condition for RT, independent of hydro @@ -720,7 +722,17 @@ SPHM1RT: stellar_spectrum_blackbody_temperature_K: 1.e4 # (Conditional) if stellar_spectrum_type=1, use this temperature (in K) for the blackbody spectrum. stars_max_timestep: -1. # (Optional) restrict the maximal timestep of stars to this value (in internal units). Set to negative to turn off. - +# Parameters related to power spectra -------------------------------------------- + +PowerSpectrum: + grid_side_length: 256 # Size of the grid used in power spectrum calculation. + num_folds: 6 # Number of foldings (1 means no foldings), determines the max k + fold_factor: 4 # (Optional) factor by which to reduce the box along each side each folding (default: 4) + window_order: 3 # (Optional) order of the mass assignment scheme (default: 3, TSC) + output_list_on: 0 # (Optional) Enable the output list + output_list: ./output_list_ps.txt # (Optional) File containing the output times (see documentation in "Parameter File" section) + requested_spectra: ["matter-matter","cdm-cdm","starBH-starBH","gas-matter","pressure-pressure","matter-pressure", "neutrino0-neutrino1"] # Array of strings indicating which components should be correlated for power spectra + # Parameters related to lightcones ----------------------------------------------- # Parameters in the LightconeCommon section apply to all lightcones but can be overridden in the LightconeX sections. diff --git a/src/Makefile.am b/src/Makefile.am index 97eefa55dc6af5baa17dc490b694a3c4cefdba2a..15c6abd37043300340ef27a65b3328d0d3f6057a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -72,6 +72,7 @@ include_HEADERS += lightcone/lightcone.h lightcone/lightcone_particle_io.h light include_HEADERS += lightcone/lightcone_crossing.h lightcone/lightcone_array.h lightcone/lightcone_map.h include_HEADERS += lightcone/lightcone_map_types.h lightcone/projected_kernel.h lightcone/lightcone_shell.h include_HEADERS += lightcone/healpix_util.h lightcone/pixel_index.h +include_HEADERS += power_spectrum.h # source files for EAGLE extra I/O EAGLE_EXTRA_IO_SOURCES= @@ -162,6 +163,7 @@ AM_SOURCES += rt_parameters.c hdf5_object_to_blob.c ic_info.c exchange_structs.c AM_SOURCES += lightcone/lightcone.c lightcone/lightcone_particle_io.c lightcone/lightcone_replications.c AM_SOURCES += lightcone/healpix_util.c lightcone/lightcone_array.c lightcone/lightcone_map.c AM_SOURCES += lightcone/lightcone_map_types.c lightcone/projected_kernel.c lightcone/lightcone_shell.c +AM_SOURCES += power_spectrum.c AM_SOURCES += $(EAGLE_EXTRA_IO_SOURCES) AM_SOURCES += $(QLA_COOLING_SOURCES) $(QLA_EAGLE_COOLING_SOURCES) AM_SOURCES += $(EAGLE_COOLING_SOURCES) $(EAGLE_FEEDBACK_SOURCES) diff --git a/src/engine.c b/src/engine.c index cff7057b2a5e5a58d96855267704607c6ce43726..fcdba8a307b2b8924bfcabe5f2f345ef7b9c8b3e 100644 --- a/src/engine.c +++ b/src/engine.c @@ -87,6 +87,7 @@ #include "output_options.h" #include "partition.h" #include "potential.h" +#include "power_spectrum.h" #include "pressure_floor.h" #include "profiler.h" #include "proxy.h" @@ -131,7 +132,8 @@ const char *engine_policy_names[] = {"none", "csds", "line of sight", "sink", - "rt"}; + "rt", + "power spectra"}; const int engine_default_snapshot_subsample[swift_type_count] = {0}; @@ -2138,7 +2140,7 @@ void engine_step(struct engine *e) { /* Print some information to the screen */ printf( " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld " - "%12lld %12lld %21.3f %6d %21.3f\n", + "%12lld %12lld %21.3f %6d %17.3f\n", e->step, e->time, e->cosmology->a, e->cosmology->z, e->time_step, e->min_active_bin, e->max_active_bin, e->updates, e->g_updates, e->s_updates, e->sink_updates, e->b_updates, e->wallclock_time, @@ -2165,7 +2167,7 @@ void engine_step(struct engine *e) { fprintf( e->file_timesteps, " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %12lld " - "%12lld %21.3f %6d %21.3f\n", + "%12lld %21.3f %6d %17.3f\n", e->step, e->time, e->cosmology->a, e->cosmology->z, e->time_step, e->min_active_bin, e->max_active_bin, e->updates, e->g_updates, e->s_updates, e->sink_updates, e->b_updates, e->wallclock_time, @@ -2827,6 +2829,7 @@ void engine_numa_policies(int rank, int verbose) { * @param neutrinos The #neutrino_props used for this run. * @param feedback The #feedback_props used for this run. * @param mesh The #pm_mesh used for the long-range periodic forces. + * @param pow_data The properties and pointers for power spectrum calculation. * @param potential The properties of the external potential. * @param cooling_func The properties of the cooling function. * @param starform The #star_formation model of this run. @@ -2852,7 +2855,8 @@ void engine_init( struct neutrino_response *neutrino_response, struct feedback_props *feedback, struct pressure_floor_props *pressure_floor, struct rt_props *rt, - struct pm_mesh *mesh, const struct external_potential *potential, + struct pm_mesh *mesh, struct power_spectrum_data *pow_data, + const struct external_potential *potential, struct cooling_function_data *cooling_func, const struct star_formation *starform, const struct chemistry_global_data *chemistry, @@ -2923,6 +2927,8 @@ void engine_init( parser_get_opt_param_int(params, "Snapshots:invoke_stf", 0); e->snapshot_invoke_fof = parser_get_opt_param_int(params, "Snapshots:invoke_fof", 0); + e->snapshot_invoke_ps = + parser_get_opt_param_int(params, "Snapshots:invoke_ps", 0); e->snapshot_use_delta_from_edge = parser_get_opt_param_int(params, "Snapshots:use_delta_from_edge", 0); if (e->snapshot_use_delta_from_edge) { @@ -2940,6 +2946,7 @@ void engine_init( e->snapshot_output_count = 0; e->stf_output_count = 0; e->los_output_count = 0; + e->ps_output_count = 0; e->dt_min = parser_get_param_double(params, "TimeIntegration:dt_min"); e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max"); e->dt_max_RMS_displacement = FLT_MAX; @@ -2957,6 +2964,7 @@ void engine_init( e->ti_next_stats = 0; e->ti_next_stf = 0; e->ti_next_fof = 0; + e->ti_next_ps = 0; e->verbose = verbose; e->wallclock_time = 0.f; e->physical_constants = physical_constants; @@ -2970,6 +2978,7 @@ void engine_init( e->neutrino_properties = neutrinos; e->neutrino_response = neutrino_response; e->mesh = mesh; + e->power_data = pow_data; e->external_potential = potential; e->cooling_func = cooling_func; e->star_formation = starform; @@ -3063,6 +3072,16 @@ void engine_init( parser_get_opt_param_double(params, "LineOfSight:delta_time", -1.); } + /* Initialise power spectrum output. */ + if (e->policy & engine_policy_power_spectra) { + e->time_first_ps_output = + parser_get_opt_param_double(params, "PowerSpectrum:time_first", 0.); + e->a_first_ps_output = parser_get_opt_param_double( + params, "PowerSpectrum:scale_factor_first", 0.1); + e->delta_time_ps = + parser_get_opt_param_double(params, "PowerSpectrum:delta_time", -1.); + } + /* Initialise FoF calls frequency. */ if (e->policy & engine_policy_fof) { @@ -3352,6 +3371,7 @@ void engine_clean(struct engine *e, const int fof, const int restart) { output_list_clean(&e->output_list_stats); output_list_clean(&e->output_list_stf); output_list_clean(&e->output_list_los); + output_list_clean(&e->output_list_ps); output_options_clean(e->output_options); @@ -3411,6 +3431,7 @@ void engine_clean(struct engine *e, const int fof, const int restart) { free((void *)e->internal_units); free((void *)e->cosmology); free((void *)e->mesh); + free((void *)e->power_data); free((void *)e->chemistry); free((void *)e->entropy_floor); free((void *)e->cooling_func); @@ -3430,6 +3451,7 @@ void engine_clean(struct engine *e, const int fof, const int restart) { if (e->output_list_stats) free((void *)e->output_list_stats); if (e->output_list_stf) free((void *)e->output_list_stf); if (e->output_list_los) free((void *)e->output_list_los); + if (e->output_list_ps) free((void *)e->output_list_ps); #ifdef WITH_CSDS if (e->policy & engine_policy_csds) free((void *)e->csds); #endif @@ -3469,6 +3491,7 @@ void engine_struct_dump(struct engine *e, FILE *stream) { gravity_props_struct_dump(e->gravity_properties, stream); stars_props_struct_dump(e->stars_properties, stream); pm_mesh_struct_dump(e->mesh, stream); + power_spectrum_struct_dump(e->power_data, stream); potential_struct_dump(e->external_potential, stream); cooling_struct_dump(e->cooling_func, stream); starformation_struct_dump(e->star_formation, stream); @@ -3575,6 +3598,11 @@ void engine_struct_restore(struct engine *e, FILE *stream) { pm_mesh_struct_restore(mesh, stream); e->mesh = mesh; + struct power_spectrum_data *pow_data = + (struct power_spectrum_data *)malloc(sizeof(struct power_spectrum_data)); + power_spectrum_struct_restore(pow_data, stream); + e->power_data = pow_data; + struct external_potential *external_potential = (struct external_potential *)malloc(sizeof(struct external_potential)); potential_struct_restore(external_potential, stream); diff --git a/src/engine.h b/src/engine.h index 66f3c19acb156da8c55ad8b0b4c54ff6e81e96ef..ba1498df6a3755138b5e8ce75632dcfd5d1090ac 100644 --- a/src/engine.h +++ b/src/engine.h @@ -86,8 +86,9 @@ enum engine_policy { engine_policy_line_of_sight = (1 << 24), engine_policy_sinks = (1 << 25), engine_policy_rt = (1 << 26), + engine_policy_power_spectra = (1 << 27), }; -#define engine_maxpolicy 27 +#define engine_maxpolicy 28 extern const char *engine_policy_names[engine_maxpolicy + 1]; /** @@ -104,7 +105,8 @@ enum engine_step_properties { engine_step_prop_stf = (1 << 6), engine_step_prop_fof = (1 << 7), engine_step_prop_mesh = (1 << 8), - engine_step_prop_done = (1 << 9), + engine_step_prop_power_spectra = (1 << 9), + engine_step_prop_done = (1 << 10), }; /* Some constants */ @@ -333,6 +335,7 @@ struct engine { int snapshot_compression; int snapshot_invoke_stf; int snapshot_invoke_fof; + int snapshot_invoke_ps; struct unit_system *snapshot_units; int snapshot_use_delta_from_edge; double snapshot_delta_from_edge; @@ -369,6 +372,18 @@ struct engine { int run_fof; int dump_catalogue_when_seeding; + /* power spectrum information */ + double a_first_ps_output; + double time_first_ps_output; + double delta_time_ps; + int ps_output_count; + + /* Output_List for the power spectrum */ + struct output_list *output_list_ps; + + /* Integer time of the next ps output */ + integertime_t ti_next_ps; + /* Statistics information */ double a_first_statistics; double time_first_statistics; @@ -487,6 +502,9 @@ struct engine { /* The mesh used for long-range gravity forces */ struct pm_mesh *mesh; + /* Properties and pointers for the power spectrum */ + struct power_spectrum_data *power_data; + /* Properties of external gravitational potential */ const struct external_potential *external_potential; @@ -613,6 +631,7 @@ void engine_compute_next_stf_time(struct engine *e); void engine_compute_next_fof_time(struct engine *e); void engine_compute_next_statistics_time(struct engine *e); void engine_compute_next_los_time(struct engine *e); +void engine_compute_next_ps_time(struct engine *e); void engine_recompute_displacement_constraint(struct engine *e); void engine_unskip(struct engine *e); void engine_drift_all(struct engine *e, const int drift_mpoles); @@ -641,7 +660,8 @@ void engine_init( struct neutrino_response *neutrino_response, struct feedback_props *feedback, struct pressure_floor_props *pressure_floor, struct rt_props *rt, - struct pm_mesh *mesh, const struct external_potential *potential, + struct pm_mesh *mesh, struct power_spectrum_data *pow_data, + const struct external_potential *potential, struct cooling_function_data *cooling_func, const struct star_formation *starform, const struct chemistry_global_data *chemistry, diff --git a/src/engine_config.c b/src/engine_config.c index d54f0736bebfda2d298207a10f1cee18962dc2b0..5b9780aaf571472ff7b131c5bafa66ce20346e26 100644 --- a/src/engine_config.c +++ b/src/engine_config.c @@ -472,7 +472,7 @@ void engine_config(int restart, int fof, struct engine *e, fprintf(e->file_timesteps, "# %6s %14s %12s %12s %14s %9s %12s %12s %12s %12s %12s %16s " - "[%s] %6s %s [%s]\n", + "[%s] %6s %12s [%s]\n", "Step", "Time", "Scale-factor", "Redshift", "Time-step", "Time-bins", "Updates", "g-Updates", "s-Updates", "Sink-Updates", "b-Updates", "Wall-clock time", clocks_getunit(), "Props", diff --git a/src/engine_io.c b/src/engine_io.c index 863602d168630413a81bb5cbef194be0538fb855..0f1fae68eae717c41fe06b1089357893097e4706 100644 --- a/src/engine_io.c +++ b/src/engine_io.c @@ -38,6 +38,7 @@ #include "lightcone/lightcone_array.h" #include "line_of_sight.h" #include "parallel_io.h" +#include "power_spectrum.h" #include "serial_io.h" #include "single_io.h" @@ -81,6 +82,12 @@ void engine_dump_restarts(struct engine *e, int drifted_all, int force) { /* Drift all particles first (may have just been done). */ if (!drifted_all) engine_drift_all(e, /*drift_mpole=*/1); + /* Free the foreign particles to get more breathing space. */ +#ifdef WITH_MPI + if (e->free_foreign_when_dumping_restart) + space_free_foreign_parts(e->s, /*clear_cell_pointers=*/1); +#endif + #ifdef WITH_LIGHTCONE /* Flush lightcone buffers before dumping restarts */ lightcone_array_flush(e->lightcone_array_properties, &(e->threadpool), @@ -92,12 +99,6 @@ void engine_dump_restarts(struct engine *e, int drifted_all, int force) { #endif #endif - /* Free the foreign particles to get more breathing space. */ -#ifdef WITH_MPI - if (e->free_foreign_when_dumping_restart) - space_free_foreign_parts(e->s, /*clear_cell_pointers=*/1); -#endif - restart_write(e, e->restart_file); #ifdef WITH_MPI @@ -253,12 +254,14 @@ void engine_check_for_dumps(struct engine *e) { const int with_stf = (e->policy & engine_policy_structure_finding); const int with_los = (e->policy & engine_policy_line_of_sight); const int with_fof = (e->policy & engine_policy_fof); + const int with_power = (e->policy & engine_policy_power_spectra); /* What kind of output are we getting? */ enum output_type { output_none, output_snapshot, output_statistics, + output_ps, output_stf, output_los, }; @@ -286,6 +289,14 @@ void engine_check_for_dumps(struct engine *e) { } } + /* Do we want a power-spectrum? */ + if (e->ti_end_min > e->ti_next_ps && e->ti_next_ps > 0) { + if (e->ti_next_ps < ti_output) { + ti_output = e->ti_next_ps; + type = output_ps; + } + } + /* Do we want to perform structure finding? */ if (with_stf) { if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) { @@ -365,6 +376,12 @@ void engine_check_for_dumps(struct engine *e) { #endif } + /* Do we want power spectrum outputs? */ + if (with_power && e->snapshot_invoke_ps) { + calc_all_power_spectra(e->power_data, e->s, &e->threadpool, + e->verbose); + } + /* Dump... */ engine_dump_snapshot(e); @@ -439,6 +456,16 @@ void engine_check_for_dumps(struct engine *e) { break; + case output_ps: + + /* Compute the PS */ + calc_all_power_spectra(e->power_data, e->s, &e->threadpool, e->verbose); + + /* Move on */ + engine_compute_next_ps_time(e); + + break; + default: error("Invalid dump type"); } @@ -465,6 +492,14 @@ void engine_check_for_dumps(struct engine *e) { } } + /* Do we want a power-spectrum? */ + if (e->ti_end_min > e->ti_next_ps && e->ti_next_ps > 0) { + if (e->ti_next_ps < ti_output) { + ti_output = e->ti_next_ps; + type = output_ps; + } + } + /* Do we want to perform structure finding? */ if (with_stf) { if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) { @@ -834,6 +869,76 @@ void engine_compute_next_fof_time(struct engine *e) { } } +/** + * @brief Computes the next time (on the time line) for a power-spectrum dump + * + * @param e The #engine. + */ +void engine_compute_next_ps_time(struct engine *e) { + /* Do output_list file case */ + if (e->output_list_ps) { + output_list_read_next_time(e->output_list_ps, e, "power spectrum", + &e->ti_next_ps); + return; + } + + /* Find upper-bound on last output */ + double time_end; + if (e->policy & engine_policy_cosmology) + time_end = e->cosmology->a_end * e->delta_time_ps; + else + time_end = e->time_end + e->delta_time_ps; + + /* Find next ps above current time */ + double time; + if (e->policy & engine_policy_cosmology) + time = e->a_first_ps_output; + else + time = e->time_first_ps_output; + + int found_ps_time = 0; + while (time < time_end) { + + /* Output time on the integer timeline */ + if (e->policy & engine_policy_cosmology) + e->ti_next_ps = log(time / e->cosmology->a_begin) / e->time_base; + else + e->ti_next_ps = (time - e->time_begin) / e->time_base; + + /* Found it? */ + if (e->ti_next_ps > e->ti_current) { + found_ps_time = 1; + break; + } + + if (e->policy & engine_policy_cosmology) + time *= e->delta_time_ps; + else + time += e->delta_time_ps; + } + + /* Deal with last line of sight */ + if (!found_ps_time) { + e->ti_next_ps = -1; + if (e->verbose) message("No further PS output time."); + } else { + + /* Be nice, talk... */ + if (e->policy & engine_policy_cosmology) { + const double next_ps_time = + exp(e->ti_next_ps * e->time_base) * e->cosmology->a_begin; + if (e->verbose) + message("Next output time for power spectrum set to a=%e.", + next_ps_time); + } else { + const double next_ps_time = e->ti_next_ps * e->time_base + e->time_begin; + if (e->verbose) + message("Next output time for power spectrum set to t=%e.", + next_ps_time); + } + } +} + /** * @brief Initialize all the output_list required by the engine * @@ -910,4 +1015,18 @@ void engine_init_output_lists(struct engine *e, struct swift_params *params, else e->time_first_los = e->ti_next_los * e->time_base + e->time_begin; } + + /* Deal with power-spectra */ + e->output_list_ps = NULL; + output_list_init(&e->output_list_ps, e, "PowerSpectrum", &e->delta_time_ps); + + if (e->output_list_ps) { + engine_compute_next_ps_time(e); + + if (e->policy & engine_policy_cosmology) + e->a_first_ps_output = + exp(e->ti_next_ps * e->time_base) * e->cosmology->a_begin; + else + e->time_first_ps_output = e->ti_next_ps * e->time_base + e->time_begin; + } } diff --git a/src/periodic.h b/src/periodic.h index edcde55377040f86af2c82771d37eb29e1db130b..509a204acde71292c71694f90f040825bc81aeed 100644 --- a/src/periodic.h +++ b/src/periodic.h @@ -39,6 +39,20 @@ _x < _a ? (_x + (_b - _a)) : ((_x >= _b) ? (_x - (_b - _a)) : _x); \ }) +/** + * @brief Limits the value of x to be between a and b + */ +__attribute__((always_inline, const)) INLINE static double box_wrap_multiple( + double x, const double a, const double b) { + while (x < a) { + x += (b - a); + } + while (x >= b) { + x -= (b - a); + } + return x; +} + /** * @brief Find the smallest distance dx along one axis within a box of size * box_size diff --git a/src/power_spectrum.c b/src/power_spectrum.c new file mode 100644 index 0000000000000000000000000000000000000000..c26e3a4553ddbb6cf71158b4c4f52110e4017342 --- /dev/null +++ b/src/power_spectrum.c @@ -0,0 +1,1466 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2021 Marcel van Daalen (daalen@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +#ifdef WITH_MPI +#include <mpi.h> +#endif + +#ifdef HAVE_FFTW +#include <fftw3.h> +#endif + +#include <string.h> + +/* This object's header. */ +#include "power_spectrum.h" + +/* Local includes. */ +#include "cooling.h" +#include "engine.h" +#include "minmax.h" +#include "neutrino.h" +#include "random.h" +#include "row_major_id.h" +#include "space.h" +#include "threadpool.h" +#include "tools.h" +#include "units.h" + +#define power_data_default_grid_side_length 256 +#define power_data_default_fold_factor 4 +#define power_data_default_window_order 3 + +/** + * @brief Return the #power_type corresponding to a given string. + */ +INLINE static enum power_type power_spectrum_get_type(const char* name) { + if (strcasecmp(name, "matter") == 0) + return pow_type_matter; + else if (strcasecmp(name, "cdm") == 0) + return pow_type_cdm; + else if (strcasecmp(name, "gas") == 0) + return pow_type_gas; + else if (strcasecmp(name, "starBH") == 0) + return pow_type_starBH; + else if (strcasecmp(name, "pressure") == 0) + return pow_type_pressure; + else if (strcasecmp(name, "neutrino") == 0) + return pow_type_neutrino; + else if (strcasecmp(name, "neutrino0") == 0) + return pow_type_neutrino_0; + else if (strcasecmp(name, "neutrino1") == 0) + return pow_type_neutrino_1; + else + error("Do not recognize the power spectrum type '%s'.", name); + return pow_type_count; +} + +/** + * @brief Get the name of the type of power component. + * + * @param type The #power_type for which we want the name. + */ +INLINE static const char* get_powtype_name(const enum power_type type) { + + static const char* powtype_names[pow_type_count] = {"Matter", + "CDM", + "gas", + "stars/BHs", + "electron pressure", + "neutrino", + "neutrino (even)", + "neutrino (odd)"}; + + return powtype_names[type]; +} + +INLINE static const char* get_powtype_filename(const enum power_type type) { + + static const char* powtype_filenames[pow_type_count] = { + "matter", "cdm", "gas", "starBH", + "pressure", "neutrino", "neutrino0", "neutrino1"}; + + return powtype_filenames[type]; +} + +/** + * @brief Initialize power spectra calculation. + * + * Reads in the power spectrum parameters, sets up FFTW + * (likely already done for PM), then sets up an FFT plan + * that can be reused every time. + * + * @param nr_threads The number of threads used. + */ +void power_init(struct power_spectrum_data* p, struct swift_params* params, + int nr_threads) { + + /* Get power spectrum parameters */ + p->Ngrid = parser_get_opt_param_int(params, "PowerSpectrum:grid_side_length", + power_data_default_grid_side_length); + p->Nfold = parser_get_param_int(params, "PowerSpectrum:num_folds"); + p->foldfac = parser_get_opt_param_int(params, "PowerSpectrum:fold_factor", + power_data_default_fold_factor); + p->windoworder = parser_get_opt_param_int( + params, "PowerSpectrum:window_order", power_data_default_window_order); + + if (p->windoworder > 3 || p->windoworder < 1) + error("Power spectrum calculation is not implemented for %dth order!", + p->windoworder); + if (p->windoworder == 1) + message("WARNING: window order is recommended to be at least 2 (CIC)."); + if (p->windoworder <= 2 && p->foldfac > 4) + message( + "WARNING: fold factor is recommended not to exceed 4 for a " + "mass assignment order of 2 (CIC) or below."); + if (p->windoworder == 3 && p->foldfac > 6) + message( + "WARNING: fold factor is recommended not to exceed 6 for a " + "mass assignment order of 3 (TSC) or below."); + + /* Make sensible choices for the k-cuts */ + const int kcutn = (p->windoworder >= 3) ? 90 : 70; + const int kcutleft = (int)(p->Ngrid / 256.0 * kcutn); + const int kcutright = (int)(p->Ngrid / 256.0 * (double)kcutn / p->foldfac); + if (kcutright < 10 || (kcutleft - kcutright) < 30) + error( + "Combination of power grid size and fold factor do not allow for " + "enough overlap between foldings!"); + + p->nr_threads = nr_threads; + +#ifdef HAVE_THREADED_FFTW + /* Initialise the thread-parallel FFTW version + (probably already done for the PM, but does not matter) */ + if (p->Ngrid >= 64) { + fftw_init_threads(); + fftw_plan_with_nthreads(nr_threads); + } +#else + message("Note that FFTW is not threaded!"); +#endif + + char** requested_spectra; + parser_get_param_string_array(params, "PowerSpectrum:requested_spectra", + &p->spectrumcount, &requested_spectra); + + p->types1 = + (enum power_type*)malloc(p->spectrumcount * sizeof(enum power_type)); + p->types2 = + (enum power_type*)malloc(p->spectrumcount * sizeof(enum power_type)); + + /* Parse which spectra are being requested */ + for (int i = 0; i < p->spectrumcount; ++i) { + + char* pstr = strtok(requested_spectra[i], "-"); + if (pstr == NULL) + error("Requested power spectra are not in the format type1-type2!"); + char type1[32]; + strcpy(type1, pstr); + + pstr = strtok(NULL, "-"); + if (pstr == NULL) + error("Requested power spectra are not in the format type1-type2!"); + char type2[32]; + strcpy(type2, pstr); + + p->types1[i] = power_spectrum_get_type(type1); + p->types2[i] = power_spectrum_get_type(type2); + } + + /* Initialize the plan only once -- much faster for FFTs run often! + * Does require us to allocate the grids, but we delete them right away. + * Plan can only be used for the same FFTW call */ + const int Ngrid = p->Ngrid; + + /* Grid is padded to allow for in-place FFT */ + p->powgrid = fftw_alloc_real(Ngrid * Ngrid * (Ngrid + 2)); + /* Pointer to grid to interpret it as complex data */ + p->powgridft = (fftw_complex*)p->powgrid; + + p->fftplanpow = fftw_plan_dft_r2c_3d(Ngrid, Ngrid, Ngrid, p->powgrid, + p->powgridft, FFTW_MEASURE); + + fftw_free(p->powgrid); + p->powgrid = NULL; + p->powgridft = NULL; + + /* Do the same for a second grid/plan to allow for cross power */ + + /* Grid is padded to allow for in-place FFT */ + p->powgrid2 = fftw_alloc_real(Ngrid * Ngrid * (Ngrid + 2)); + /* Pointer to grid to interpret it as complex data */ + p->powgridft2 = (fftw_complex*)p->powgrid2; + + p->fftplanpow2 = fftw_plan_dft_r2c_3d(Ngrid, Ngrid, Ngrid, p->powgrid2, + p->powgridft2, FFTW_MEASURE); + + fftw_free(p->powgrid2); + p->powgrid2 = NULL; + p->powgridft2 = NULL; + + /* Create directories for power spectra and foldings */ + if (engine_rank == 0) { + safe_checkdir("power_spectra", /*create=*/1); + safe_checkdir("power_spectra/foldings", /*create=*/1); + } +} + +/** + * @brief Shared information for shot noise to be used by all the threads in the + * pool. + */ +struct shot_mapper_data { + const struct cell* cells; + double tot12; + enum power_type type1; + enum power_type type2; + const struct engine* e; + struct neutrino_model* nu_model; +}; + +/** + * @brief Shared information about the mesh to be used by all the threads in the + * pool. + */ +struct grid_mapper_data { + const struct cell* cells; + double* dens; + int N; + enum power_type type; + int windoworder; + double dim[3]; + double fac; + const struct engine* e; + struct neutrino_model* nu_model; +}; + +/** + * @brief Shared information about the mesh to be used by all the threads in the + * pool. + */ +struct conv_mapper_data { + double* grid; + int Ngrid; + double invcellmean; +}; + +/** + * @brief Shared information needed for calculating power from a Fourier grid. + */ +struct pow_mapper_data { + fftw_complex* powgridft; + fftw_complex* powgridft2; + int Ngrid; + int windoworder; + int* kbin; + int* modecounts; + double* powersum; + double jfac; +}; + +/** + * @brief Decided whether or not a given particle should be considered or + * not for the specific #power_type we are computing. + */ +int should_collect_mass(const enum power_type type, const struct gpart* gp, + const integertime_t ti_current) { + + if (type == pow_type_matter) { + /* Select all particles */ + return 1; + } else if (type == pow_type_cdm) { + if (gp->type == swift_type_dark_matter || + gp->type == swift_type_dark_matter_background) + return 1; + } else if (type == pow_type_gas) { + if (gp->type == swift_type_gas) return 1; + } else if (type == pow_type_starBH) { + if (gp->type == swift_type_stars || gp->type == swift_type_black_hole) + return 1; + } else if (type == pow_type_neutrino) { + if (gp->type == swift_type_neutrino) return 1; + } else if (type == pow_type_neutrino_0) { + if (gp->type == swift_type_neutrino) { + /* Randomly divide the neutrino ensemble in half */ + return random_unit_interval(gp->id_or_neg_offset, ti_current, + random_number_powerspectrum_split) < 0.5; + } + } else if (type == pow_type_neutrino_1) { + if (gp->type == swift_type_neutrino) + /* Randomly divide the neutrino ensemble in half */ + return random_unit_interval(gp->id_or_neg_offset, ti_current, + random_number_powerspectrum_split) >= 0.5; + } else { +#ifdef SWIFT_DEBUG_CHECKS + error("Invalid type!"); +#endif + } + + return 0; +} + +/** + * @brief Calculates the necessary mass terms for shot noise. + * + * @param c The #cell. + * @param tot12 The shot noise contribution returned. + * @param type1 The component type of field 1. + * @param type2 The component type of field 2. + * @param e The #engine. + */ +void shotnoiseterms(const struct cell* c, double* tot12, + const enum power_type type1, const enum power_type type2, + const struct engine* e, struct neutrino_model* nu_model) { + + const int gcount = c->grav.count; + const struct gpart* gparts = c->grav.parts; + + /* Handle on the other particle types */ + const struct part* parts = e->s->parts; + const struct xpart* xparts = e->s->xparts; + + /* Handle on the physics modules */ + const struct cosmology* cosmo = e->cosmology; + const struct hydro_props* hydro_props = e->hydro_properties; + const struct unit_system* us = e->internal_units; + const struct phys_const* phys_const = e->physical_constants; + const struct cooling_function_data* cool_func = e->cooling_func; + + /* Local accumulator for this cell */ + double local_tot12 = 0.; + + /* Calculate the value each particle adds to the grids */ + for (int i = 0; i < gcount; ++i) { + + /* Skip invalid particles */ + if (gparts[i].time_bin == time_bin_inhibited) continue; + + double quantity1; + + /* Special case first for the electron pressure */ + if (type1 == pow_type_pressure) { + + /* Skip non-gas particles */ + if (gparts[i].type != swift_type_gas) continue; + + const struct part* p = &parts[-gparts[i].id_or_neg_offset]; + const struct xpart* xp = &xparts[-gparts[i].id_or_neg_offset]; + quantity1 = cooling_get_electron_pressure(phys_const, hydro_props, us, + cosmo, cool_func, p, xp); + } else { + + /* We are collecting a mass of some kind. + * We skip any particle not matching the PS type we want */ + if (!should_collect_mass(type1, &gparts[i], e->ti_current)) continue; + + /* Compute weight (for neutrino delta-f weighting) */ + double weight1 = 1.0; + if (gparts[i].type == swift_type_neutrino) + gpart_neutrino_weight_mesh_only(&gparts[i], nu_model, &weight1); + + /* And eventually... collect */ + quantity1 = gparts[i].mass * weight1; + } + + /* Can we assign already? (i.e. this is an auto-spectrum) */ + if (type1 == type2) { + + /* Now assign to the shot noise collection */ + local_tot12 += quantity1 * quantity1; + } + + /* This is a cross-spectrum */ + else { + + double quantity2; + + /* Special case first for the electron pressure */ + if (type2 == pow_type_pressure) { + + /* Skip non-gas particles */ + if (gparts[i].type != swift_type_gas) continue; + + const struct part* p = &parts[-gparts[i].id_or_neg_offset]; + const struct xpart* xp = &xparts[-gparts[i].id_or_neg_offset]; + quantity2 = cooling_get_electron_pressure(phys_const, hydro_props, us, + cosmo, cool_func, p, xp); + } else { + + /* We are collecting a mass of some kind. + * We skip any particle not matching the PS type we want */ + if (!should_collect_mass(type2, &gparts[i], e->ti_current)) continue; + + /* Compute weight (for neutrino delta-f weighting) */ + double weight2 = 1.0; + if (gparts[i].type == swift_type_neutrino) + gpart_neutrino_weight_mesh_only(&gparts[i], nu_model, &weight2); + + /* And eventually... collect */ + quantity2 = gparts[i].mass * weight2; + } + + /* Now assign to the shot noise collection */ + local_tot12 += quantity1 * quantity2; + } + + } /* Loop over particles */ + + /* Now that we are done with this cell, write back to the global accumulator + */ + atomic_add_d(tot12, local_tot12); +} + +/** + * @brief Threadpool mapper function for the shot noise calculation. + * + * @param map_data A chunk of the list of local cells. + * @param num The number of cells in the chunk. + * @param extra The information about the cells. + */ +void shotnoise_mapper(void* map_data, int num, void* extra) { + + /* Unpack the shared information */ + struct shot_mapper_data* data = (struct shot_mapper_data*)extra; + const struct cell* cells = data->cells; + const struct engine* e = data->e; + struct neutrino_model* nu_model = data->nu_model; + + /* Pointer to the chunk to be processed */ + int* local_cells = (int*)map_data; + + /* Loop over the elements assigned to this thread */ + for (int i = 0; i < num; ++i) { + /* Pointer to local cell */ + const struct cell* c = &cells[local_cells[i]]; + + /* Calculate the necessary mass terms */ + shotnoiseterms(c, &data->tot12, data->type1, data->type2, e, nu_model); + } +} + +__attribute__((always_inline)) INLINE static void TSC_set( + double* mesh, const int N, const int i, const int j, const int k, + const double dx, const double dy, const double dz, const double value) { + + const double lx = 0.5 * (0.5 - dx) * (0.5 - dx); /* left side, dist 1 + dx */ + const double mx = 0.75 - dx * dx; /* center, dist |dx| */ + const double rx = + 0.5 * (0.5 + dx) * (0.5 + dx); /* right side, dist 1 - dx */ + + const double ly = 0.5 * (0.5 - dy) * (0.5 - dy); /* left side, dist 1 + dy */ + const double my = 0.75 - dy * dy; /* center, dist |dy| */ + const double ry = + 0.5 * (0.5 + dy) * (0.5 + dy); /* right side, dist 1 - dy */ + + const double lz = 0.5 * (0.5 - dz) * (0.5 - dz); /* left side, dist 1 + dz */ + const double mz = 0.75 - dz * dz; /* center, dist |dz| */ + const double rz = 0.5 * (0.5 + dz) * (0.5 + dz); /* right side, dist 1 - dz */ + + /* TSC interpolation */ + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i - 1, j - 1, k - 1, N, 2)], + value * lx * ly * lz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i - 1, j - 1, k + 0, N, 2)], + value * lx * ly * mz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i - 1, j - 1, k + 1, N, 2)], + value * lx * ly * rz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i - 1, j + 0, k - 1, N, 2)], + value * lx * my * lz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i - 1, j + 0, k + 0, N, 2)], + value * lx * my * mz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i - 1, j + 0, k + 1, N, 2)], + value * lx * my * rz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i - 1, j + 1, k - 1, N, 2)], + value * lx * ry * lz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i - 1, j + 1, k + 0, N, 2)], + value * lx * ry * mz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i - 1, j + 1, k + 1, N, 2)], + value * lx * ry * rz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j - 1, k - 1, N, 2)], + value * mx * ly * lz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j - 1, k + 0, N, 2)], + value * mx * ly * mz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j - 1, k + 1, N, 2)], + value * mx * ly * rz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j + 0, k - 1, N, 2)], + value * mx * my * lz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j + 0, k + 0, N, 2)], + value * mx * my * mz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j + 0, k + 1, N, 2)], + value * mx * my * rz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j + 1, k - 1, N, 2)], + value * mx * ry * lz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j + 1, k + 0, N, 2)], + value * mx * ry * mz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j + 1, k + 1, N, 2)], + value * mx * ry * rz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j - 1, k - 1, N, 2)], + value * rx * ly * lz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j - 1, k + 0, N, 2)], + value * rx * ly * mz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j - 1, k + 1, N, 2)], + value * rx * ly * rz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j + 0, k - 1, N, 2)], + value * rx * my * lz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j + 0, k + 0, N, 2)], + value * rx * my * mz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j + 0, k + 1, N, 2)], + value * rx * my * rz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j + 1, k - 1, N, 2)], + value * rx * ry * lz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j + 1, k + 0, N, 2)], + value * rx * ry * mz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j + 1, k + 1, N, 2)], + value * rx * ry * rz); +} + +INLINE static void gpart_to_grid_TSC(const struct gpart* gp, double* rho, + const int N, const double fac, + const double dim[3], const double value) { + + /* Fold the particle position position */ + const double pos_x = box_wrap_multiple(gp->x[0], 0., dim[0]) * fac; + const double pos_y = box_wrap_multiple(gp->x[1], 0., dim[1]) * fac; + const double pos_z = box_wrap_multiple(gp->x[2], 0., dim[2]) * fac; + + /* Workout the TSC coefficients */ + const int i = (int)(pos_x + 0.5); + const double dx = pos_x - i; + + const int j = (int)(pos_y + 0.5); + const double dy = pos_y - j; + + const int k = (int)(pos_z + 0.5); + const double dz = pos_z - k; + +#ifdef SWIFT_DEBUG_CHECKS + if (i < 0 || i > N) error("Invalid gpart position in x"); + if (j < 0 || j > N) error("Invalid gpart position in y"); + if (k < 0 || k > N) error("Invalid gpart position in z"); +#endif + + TSC_set(rho, N, i, j, k, dx, dy, dz, value); +} + +__attribute__((always_inline)) INLINE static void CIC_set( + double* mesh, const int N, const int i, const int j, const int k, + const double tx, const double ty, const double tz, const double dx, + const double dy, const double dz, const double value) { + + /* Classic CIC interpolation */ + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j + 0, k + 0, N, 2)], + value * tx * ty * tz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j + 0, k + 1, N, 2)], + value * tx * ty * dz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j + 1, k + 0, N, 2)], + value * tx * dy * tz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 0, j + 1, k + 1, N, 2)], + value * tx * dy * dz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j + 0, k + 0, N, 2)], + value * dx * ty * tz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j + 0, k + 1, N, 2)], + value * dx * ty * dz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j + 1, k + 0, N, 2)], + value * dx * dy * tz); + atomic_add_d( + &mesh[row_major_id_periodic_with_padding(i + 1, j + 1, k + 1, N, 2)], + value * dx * dy * dz); +} + +INLINE static void gpart_to_grid_CIC(const struct gpart* gp, double* rho, + const int N, const double fac, + const double dim[3], const double value) { + + /* Fold the particle position position */ + const double pos_x = box_wrap_multiple(gp->x[0], 0., dim[0]) * fac; + const double pos_y = box_wrap_multiple(gp->x[1], 0., dim[1]) * fac; + const double pos_z = box_wrap_multiple(gp->x[2], 0., dim[2]) * fac; + + /* Workout the CIC coefficients */ + const int i = (int)pos_x; + const double dx = pos_x - i; + const double tx = 1. - dx; + + const int j = (int)pos_y; + const double dy = pos_y - j; + const double ty = 1. - dy; + + const int k = (int)pos_z; + const double dz = pos_z - k; + const double tz = 1. - dz; + +#ifdef SWIFT_DEBUG_CHECKS + if (i < 0 || i > N) error("Invalid gpart position in x"); + if (j < 0 || j > N) error("Invalid gpart position in y"); + if (k < 0 || k > N) error("Invalid gpart position in z"); +#endif + + CIC_set(rho, N, i, j, k, tx, ty, tz, dx, dy, dz, value); +} + +INLINE static void gpart_to_grid_NGP(const struct gpart* gp, double* rho, + const int N, const double fac, + const double dim[3], const double value) { + + /* Fold the particle position position */ + const double pos_x = box_wrap_multiple(gp->x[0], 0., dim[0]) * fac; + const double pos_y = box_wrap_multiple(gp->x[1], 0., dim[1]) * fac; + const double pos_z = box_wrap_multiple(gp->x[2], 0., dim[2]) * fac; + + const int xi = (int)(pos_x + 0.5) % N; + const int yi = (int)(pos_y + 0.5) % N; + const int zi = (int)(pos_z + 0.5) % N; + atomic_add_d(&rho[(xi * N + yi) * (N + 2) + zi], value); +} + +/** + * @brief Assigns all the #gpart of a #cell to the power grid using the + * chosen mass assignment method. + * + * @param c The #cell. + * @param rho The density grid. + * @param N the size of the grid along one axis. + * @param fac Conversion factor of wrapped position to grid. + * @param type The #power_type we want to assign to the grid. + * @param windoworder The window to use for grid assignment. + * @param e The #engine. + */ +void cell_to_powgrid(const struct cell* c, double* rho, const int N, + const double fac, const enum power_type type, + const int windoworder, const double dim[3], + const struct engine* e, struct neutrino_model* nu_model) { + + const int gcount = c->grav.count; + const struct gpart* gparts = c->grav.parts; + + /* Handle on the other particle types */ + const struct part* parts = e->s->parts; + const struct xpart* xparts = e->s->xparts; + + /* Handle on the physics modules */ + const struct cosmology* cosmo = e->cosmology; + const struct hydro_props* hydro_props = e->hydro_properties; + const struct unit_system* us = e->internal_units; + const struct phys_const* phys_const = e->physical_constants; + const struct cooling_function_data* cool_func = e->cooling_func; + + /* Assign all the gpart of that cell to the mesh */ + for (int i = 0; i < gcount; ++i) { + + /* Skip invalid particles */ + if (gparts[i].time_bin == time_bin_inhibited) continue; + + /* Collect the quantity to assign to the mesh */ + double quantity; + + /* Special case first for the electron pressure */ + if (type == pow_type_pressure) { + + /* Skip non-gas particles */ + if (gparts[i].type != swift_type_gas) continue; + + const struct part* p = &parts[-gparts[i].id_or_neg_offset]; + const struct xpart* xp = &xparts[-gparts[i].id_or_neg_offset]; + quantity = cooling_get_electron_pressure(phys_const, hydro_props, us, + cosmo, cool_func, p, xp); + } else { + + /* We are collecting a mass of some kind. + * We skip any particle not matching the PS type we want */ + if (!should_collect_mass(type, &gparts[i], e->ti_current)) continue; + + /* Compute weight (for neutrino delta-f weighting) */ + double weight = 1.0; + if (gparts[i].type == swift_type_neutrino) + gpart_neutrino_weight_mesh_only(&gparts[i], nu_model, &weight); + + /* And eventually... collect */ + quantity = gparts[i].mass * weight; + } + + /* Assign the quantity to the grid */ + switch (windoworder) { + case 1: + gpart_to_grid_NGP(&gparts[i], rho, N, fac, dim, quantity); + break; + case 2: + gpart_to_grid_CIC(&gparts[i], rho, N, fac, dim, quantity); + break; + case 3: + gpart_to_grid_TSC(&gparts[i], rho, N, fac, dim, quantity); + break; + default: +#ifdef SWIFT_DEBUG_CHECKS + error("Not implemented!"); +#endif + break; + } + + } /* Loop over particles */ +} + +/** + * @brief Threadpool mapper function for the power grid assignment of a cell. + * + * @param map_data A chunk of the list of local cells. + * @param num The number of cells in the chunk. + * @param extra The information about the grid and cells. + */ +void cell_to_powgrid_mapper(void* map_data, int num, void* extra) { + + /* Unpack the shared information */ + const struct grid_mapper_data* data = (struct grid_mapper_data*)extra; + const struct cell* cells = data->cells; + double* grid = data->dens; + const int Ngrid = data->N; + const enum power_type type = data->type; + const int order = data->windoworder; + const double dim[3] = {data->dim[0], data->dim[1], data->dim[2]}; + const double gridfac = data->fac; + const struct engine* e = data->e; + struct neutrino_model* nu_model = data->nu_model; + + /* Pointer to the chunk to be processed */ + int* local_cells = (int*)map_data; + + /* Loop over the elements assigned to this thread */ + for (int i = 0; i < num; ++i) { + /* Pointer to local cell */ + const struct cell* c = &cells[local_cells[i]]; + + /* Assign this cell's content to the grid */ + cell_to_powgrid(c, grid, Ngrid, gridfac, type, order, dim, e, nu_model); + } +} + +/** + * @brief Mapper function for the conversion of mass to density contrast. + * + * @param map_data The density field array. + * @param num The number of elements to iterate on (along the x-axis). + * @param extra The information about the grid and conversion. + */ +void mass_to_contrast_mapper(void* map_data, int num, void* extra) { + + /* Unpack the shared information */ + const struct conv_mapper_data* data = (struct conv_mapper_data*)extra; + double* grid = data->grid; + const int Ngrid = data->Ngrid; + const double invcellmean = data->invcellmean; + + /* Range handled by this call */ + const int xi_start = (double*)map_data - grid; + const int xi_end = xi_start + num; + + /* Loop over the assigned cells, convert to density contrast */ + for (int xi = xi_start; xi < xi_end; ++xi) { + for (int yi = 0; yi < Ngrid; ++yi) { + for (int zi = 0; zi < Ngrid; ++zi) { + const int index = (xi * Ngrid + yi) * (Ngrid + 2) + + zi; /* Ngrid+2 because of padding */ + grid[index] = grid[index] * invcellmean; /* -1.0; -1 only affects k=0 */ + } + } + } +} + +/** + * @brief Mapper function for calculating the power from a Fourier grid. + * + * @param map_data The array of the density field Fourier transform. + * @param num The number of elements to iterate on (along the x-axis). + * @param extra Arrays to store the results/helper variables. + */ +void pow_from_grid_mapper(void* map_data, const int num, void* extra) { + + struct pow_mapper_data* data = (struct pow_mapper_data*)extra; + + /* Unpack the data struct */ + fftw_complex* restrict powgridft = data->powgridft; + fftw_complex* restrict powgridft2 = data->powgridft2; + const int Ngrid = data->Ngrid; + const int Nhalf = Ngrid / 2; + const int nyq2 = Nhalf * Nhalf; + const int windoworder = data->windoworder; + const int* restrict kbin = data->kbin; + const double jfac = data->jfac; + + /* Output data */ + int* restrict modecounts = data->modecounts; + double* restrict powersum = data->powersum; + + /* Range handled by this call */ + const int xi_start = (fftw_complex*)map_data - powgridft; + const int xi_end = xi_start + num; + + /* Loop over the assigned FT'd cells, get deconvolved power from them */ + for (int xi = xi_start; xi < xi_end; ++xi) { + + int kx = xi; + if (kx > Nhalf) kx -= Ngrid; + const double fx = kx * jfac; + const double invsincx = (xi == 0) ? 1.0 : fx / sin(fx); + + for (int yi = 0; yi < Ngrid; ++yi) { + + int ky = yi; + if (ky > Nhalf) ky -= Ngrid; + const double fy = ky * jfac; + const double invsincy = (yi == 0) ? 1.0 : fy / sin(fy); + + /* Real-data FFT, so there is no second z half (symmetric) */ + for (int zi = 0; zi < (Nhalf + 1); ++zi) { + + const int kz = zi; + const double fz = kz * jfac; + const double invsincz = (zi == 0) ? 1.0 : fz / sin(fz); + + const int kk = kx * kx + ky * ky + kz * kz; + + /* Avoid singularity and don't go beyond Nyquist (otherwise we spend + lots of time for little additional info */ + if (kk == 0 || kk > nyq2) continue; + + /* De-aliasing/deconvolution of mass assignment + (Jing 2005 but without iterations) */ + double W = invsincx * invsincy * invsincz; + + /* W = W^windoworder */ + W = integer_pow(W, windoworder); + + /* Avoid sqrt with a lookup table kbin[kk] (does cost more memory) + * bin = (int)(sqrt(kk) + 0.5); */ + const int bin = kbin[kk]; + + const int mult = (zi == 0) ? 1 : 2; + atomic_add(&modecounts[bin], mult); + + const int index = (xi * Ngrid + yi) * (Nhalf + 1) + zi; + atomic_add_d(&powersum[bin], + mult * W * W * + (powgridft[index][0] * powgridft2[index][0] + + powgridft[index][1] * powgridft2[index][1])); + } /* Loop over z */ + } /* Loop over y */ + } /* Loop over z */ +} + +/** + * @brief Initialize a power spectrum output file + * + * @param fp the file pointer + * @param us The current internal system of units. + * @param phys_const Physical constants in internal units + */ +INLINE static void power_init_output_file(FILE* fp, const enum power_type type1, + const enum power_type type2, + const struct unit_system* restrict us, + const struct phys_const* phys_const) { + + /* Write a header to the output file */ + if (type1 != type2) + fprintf(fp, "# %s-%s cross-spectrum\n", get_powtype_name(type1), + get_powtype_name(type2)); + else + fprintf(fp, "# %s power spectrum\n", get_powtype_name(type1)); + fprintf(fp, "######################################################\n"); + fprintf(fp, "#\n"); + fprintf(fp, "# (0) Redshift\n"); + fprintf(fp, "# Unit = dimensionless\n"); + fprintf(fp, "# (1) Fourier scale k\n"); + fprintf(fp, "# Unit = %e cm**-1\n", pow(us->UnitLength_in_cgs, -1.)); + fprintf(fp, "# Unit = %e pc**-1\n", + pow(1. / phys_const->const_parsec, -1.)); + fprintf(fp, "# Unit = %e Mpc**-1\n", + pow(1. / phys_const->const_parsec / 1e6, -1.)); + fprintf(fp, "# (2) The shot-noise subtracted power\n"); + if (type1 != pow_type_pressure && type2 != pow_type_pressure) { + fprintf(fp, "# Unit = %e cm**3\n", pow(us->UnitLength_in_cgs, 3.)); + fprintf(fp, "# Unit = %e pc**3\n", + pow(1. / phys_const->const_parsec, 3.)); + fprintf(fp, "# Unit = %e Mpc**3\n", + pow(1. / phys_const->const_parsec / 1e6, 3.)); + } else if (type1 == pow_type_pressure && type2 == pow_type_pressure) { + fprintf(fp, "# Unit = %e Mpc**3 * eV**2 * cm**-6\n", + pow(1. / phys_const->const_parsec / 1e6, 3.)); + } else { + fprintf(fp, "# Unit = %e Mpc**3 * eV * cm**-3\n", + pow(1. / phys_const->const_parsec / 1e6, 3.)); + } + fprintf(fp, "# (3) The shot-noise power (scale-independant)\n"); + if (type1 != pow_type_pressure && type2 != pow_type_pressure) { + fprintf(fp, "# Unit = %e cm**3\n", pow(us->UnitLength_in_cgs, 3.)); + fprintf(fp, "# Unit = %e pc**3\n", + pow(1. / phys_const->const_parsec, 3.)); + fprintf(fp, "# Unit = %e Mpc**3\n", + pow(1. / phys_const->const_parsec / 1e6, 3.)); + } else if (type1 == pow_type_pressure && type2 == pow_type_pressure) { + fprintf(fp, "# Unit = %e Mpc**3 * eV**2 * cm**-6\n", + pow(1. / phys_const->const_parsec / 1e6, 3.)); + } else { + fprintf(fp, "# Unit = %e Mpc**3 * eV * cm**-3\n", + pow(1. / phys_const->const_parsec / 1e6, 3.)); + } + fprintf(fp, "#\n"); + fprintf(fp, "# %13s %15s %15s %15s\n", "(0) ", "(1) ", "(2) ", + "(3) "); + fprintf(fp, "# %13s %15s %15s %15s\n", "z", "k", "P(k)", "P_noise"); +} + +/** + * @brief Compute the power spectrum between type1 and type2, including + * foldings and dealiasing. Only the real part of the power is returned. + * + * Distributes the cells into slabs, performs the FFT, calculates the dealiased + * power and writes this to file. Then repeats this procedure for a given + * number of foldings. + * + * @param type1 The #enum power_type indicating the first quantity to correlate. + * @param type2 The #enum power_type indicating the second quantity to + * correlate. + * @param powdata The #power_spectrum_data containing power spectrum + * parameters, FFT plans and pointers to the grids. + * @param s The #space containing the particles. + * @param tp The #threadpool object used for parallelisation. + * @param verbose Are we talkative? + */ +void power_spectrum(const enum power_type type1, const enum power_type type2, + struct power_spectrum_data* pow_data, const struct space* s, + struct threadpool* tp, const int verbose) { + + const int* local_cells = s->local_cells_top; + const int nr_local_cells = s->nr_local_cells; + const struct engine* e = s->e; + const struct unit_system* us = e->internal_units; + const struct phys_const* phys_const = e->physical_constants; + const int snapnum = e->ps_output_count; /* -1 if after snapshot dump */ + + /* Extract some useful constants */ + const int Ngrid = pow_data->Ngrid; + const int Ngrid2 = Ngrid * Ngrid; + const int Ngrid3 = Ngrid2 * Ngrid; + const int Nhalf = Ngrid / 2; + const int Nfold = pow_data->Nfold; + const int foldfac = pow_data->foldfac; + const double jfac = M_PI / Ngrid; + + /* Gather some neutrino constants if using delta-f weighting on the mesh */ + struct neutrino_model nu_model; + bzero(&nu_model, sizeof(struct neutrino_model)); + if (s->e->neutrino_properties->use_delta_f_mesh_only) + gather_neutrino_consts(s, &nu_model); + + if (verbose) + message("Preparing to calculate the %s-%s power spectrum.", + get_powtype_name(type1), get_powtype_name(type2)); + + /* could loop over particles but for now just abort */ + if (nr_local_cells == 0) + error("Cell infrastructure is not in place for power spectra."); + + /* Allocate the grids based on whether this is an auto- or cross-spectrum*/ + pow_data->powgrid = fftw_alloc_real(Ngrid2 * (Ngrid + 2)); + memuse_log_allocation("fftw_grid.grid", pow_data->powgrid, 1, + sizeof(double) * Ngrid2 * (Ngrid + 2)); + pow_data->powgridft = (fftw_complex*)pow_data->powgrid; + if (type1 != type2) { + pow_data->powgrid2 = fftw_alloc_real(Ngrid2 * (Ngrid + 2)); + memuse_log_allocation("fftw_grid.grid2", pow_data->powgrid2, 1, + sizeof(double) * Ngrid2 * (Ngrid + 2)); + pow_data->powgridft2 = (fftw_complex*)pow_data->powgrid2; + } else { + pow_data->powgrid2 = pow_data->powgrid; + pow_data->powgridft2 = pow_data->powgridft; + } + + /* Constants used for the normalization */ + double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const double volume = dim[0] * dim[1] * dim[2]; /* units Mpc^3 */ + const double Omega_m = s->e->cosmology->Omega_cdm + s->e->cosmology->Omega_b + + s->e->cosmology->Omega_nu_0; + const double meanrho = + Omega_m * s->e->cosmology->critical_density_0; /* 1e10 Msun/Mpc^3 */ + const double conv_EV = units_cgs_conversion_factor(us, UNIT_CONV_INV_VOLUME) / + phys_const->const_electron_volt; + + /* Inverse of the cosmic mean mass per grid cell in code units */ + double invcellmean, invcellmean2; + + if (type1 != pow_type_pressure) + invcellmean = Ngrid3 / (meanrho * volume); + else + invcellmean = Ngrid3 / volume * conv_EV; + + if (type2 != pow_type_pressure) + invcellmean2 = Ngrid3 / (meanrho * volume); + else + invcellmean2 = Ngrid3 / volume * conv_EV; + + /* When splitting the neutrino ensemble in half, double the inverse mean */ + if (type1 == pow_type_neutrino_0 || type1 == pow_type_neutrino_1) + invcellmean *= 2.0; + if (type2 == pow_type_neutrino_0 || type2 == pow_type_neutrino_1) + invcellmean2 *= 2.0; + + /* Gather the shared information to be used by the threads + for density computation */ + struct grid_mapper_data densdata; + struct grid_mapper_data densdata2; + + densdata.cells = s->cells_top; + densdata.dens = pow_data->powgrid; + densdata.N = Ngrid; + densdata.type = type1; + densdata.windoworder = pow_data->windoworder; + densdata.e = s->e; + densdata.nu_model = &nu_model; + if (type1 != type2) { + densdata2.cells = s->cells_top; + densdata2.dens = pow_data->powgrid2; + densdata2.N = Ngrid; + densdata2.type = type2; + densdata2.windoworder = pow_data->windoworder; + densdata2.e = s->e; + densdata2.nu_model = &nu_model; + } + + if (verbose) message("Calculating the shot noise."); + + /* Calculate mass terms for shot noise */ + double shot = 0.; + if (type1 == pow_type_matter || type2 == pow_type_matter || type1 == type2 || + (type1 == pow_type_gas && type2 == pow_type_pressure) || + (type2 == pow_type_gas && type1 == pow_type_pressure)) { + + /* Note that for cross-power, there is only shot noise for particles + that occur in both fields */ + struct shot_mapper_data shotdata; + shotdata.cells = s->cells_top; + shotdata.tot12 = 0; + shotdata.type1 = type1; + shotdata.type2 = type2; + shotdata.e = s->e; + shotdata.nu_model = &nu_model; + threadpool_map(tp, shotnoise_mapper, (void*)local_cells, nr_local_cells, + sizeof(int), threadpool_auto_chunk_size, (void*)&shotdata); +#ifdef WITH_MPI + /* Add up everybody's shot noise term */ + MPI_Allreduce(MPI_IN_PLACE, &shotdata.tot12, 1, MPI_DOUBLE, MPI_SUM, + MPI_COMM_WORLD); +#endif + + /* Store shot noise */ + shot = shotdata.tot12 / volume; + if (type1 != pow_type_pressure) + shot /= meanrho; + else + shot *= conv_EV; + if (type2 != pow_type_pressure) + shot /= meanrho; + else + shot *= conv_EV; + } + + /* Gather the shared information to be used by the threads + for density conversion */ + struct conv_mapper_data convdata; + convdata.Ngrid = Ngrid; + + /* Create a lookup table for k (could also do this when initializing) */ + int* kbin = (int*)malloc((Nhalf * Nhalf + 1) * sizeof(int)); + for (int i = 0; i < Nhalf; ++i) { + for (int j = 0; j <= i; ++j) kbin[i * i + j] = i; + for (int j = i + 1; j <= 2 * i; ++j) kbin[i * i + j] = i + 1; + } + kbin[Nhalf * Nhalf] = Nhalf; + + /* Allocate arrays for power computation from FFT */ + int* modecounts = (int*)malloc((Nhalf + 1) * sizeof(int)); + double* powersum = (double*)malloc((Nhalf + 1) * sizeof(double)); + + struct pow_mapper_data powmapdata; + powmapdata.powgridft = pow_data->powgridft; + powmapdata.powgridft2 = pow_data->powgridft2; + powmapdata.Ngrid = Ngrid; + powmapdata.windoworder = pow_data->windoworder; + powmapdata.modecounts = modecounts; + powmapdata.powersum = powersum; + powmapdata.kbin = kbin; + powmapdata.jfac = jfac; + + /* Allocate arrays for combined power spectrum */ + const int kcutn = (pow_data->windoworder >= 3) ? 90 : 70; + const int kcutleft = (int)(Ngrid / 256.0 * kcutn); + const int kcutright = (int)(Ngrid / 256.0 * (double)kcutn / foldfac); + /* numtot = 76 * Nfold + 14; + * assumes a 256 grid, foldfac=6 and windoworder=2 */ + const int numtot = kcutleft + (Nfold - 1) * (kcutleft - kcutright + 1); + int numstart = 0; + + double* kcomb = (double*)malloc(numtot * sizeof(double)); + double* pcomb = (double*)malloc(numtot * sizeof(double)); + + /* Determine output file name */ + char outputfileBase[200] = ""; + char outputfileName[256] = ""; + + sprintf(outputfileBase, "power_%s", get_powtype_filename(type1)); + if (type1 != type2) { + const int length = strlen(outputfileBase); + sprintf(outputfileBase + length, "-%s", get_powtype_filename(type2)); + } + + /* Loop over foldings */ + for (int i = 0; i < Nfold; ++i) { + + if (verbose) message("Calculating the power for folding num. %d.", i); + + /* Note: implicitly assuming a cubic box here */ + densdata.fac = Ngrid / dim[0]; + densdata2.fac = Ngrid / dim[0]; + densdata.dim[0] = dim[0]; + densdata.dim[1] = dim[1]; + densdata.dim[2] = dim[2]; + densdata2.dim[0] = dim[0]; + densdata2.dim[1] = dim[1]; + densdata2.dim[2] = dim[2]; + const double kfac = 2 * M_PI / dim[0]; + + /* Empty the grid(s) */ + bzero(pow_data->powgrid, Ngrid2 * (Ngrid + 2) * sizeof(double)); + if (type1 != type2) + bzero(pow_data->powgrid2, Ngrid2 * (Ngrid + 2) * sizeof(double)); + + /* Fill out the folded grid(s) */ + threadpool_map(tp, cell_to_powgrid_mapper, (void*)local_cells, + nr_local_cells, sizeof(int), threadpool_auto_chunk_size, + (void*)&densdata); + if (type1 != type2) + threadpool_map(tp, cell_to_powgrid_mapper, (void*)local_cells, + nr_local_cells, sizeof(int), threadpool_auto_chunk_size, + (void*)&densdata2); +#ifdef WITH_MPI + /* Merge everybody's share of the grid onto rank 0 */ + if (e->nodeID == 0) + MPI_Reduce(MPI_IN_PLACE, pow_data->powgrid, Ngrid2 * (Ngrid + 2), + MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + else + MPI_Reduce(pow_data->powgrid, NULL, Ngrid2 * (Ngrid + 2), MPI_DOUBLE, + MPI_SUM, 0, MPI_COMM_WORLD); + + /* Same for the secondary grid */ + if (type1 != type2) { + + if (e->nodeID == 0) + MPI_Reduce(MPI_IN_PLACE, pow_data->powgrid2, Ngrid2 * (Ngrid + 2), + MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + else + MPI_Reduce(pow_data->powgrid2, NULL, Ngrid2 * (Ngrid + 2), MPI_DOUBLE, + MPI_SUM, 0, MPI_COMM_WORLD); + } +#endif + + /* Only rank 0 needs to perform all the remaining work */ + if (e->nodeID == 0) { + + /* Convert mass to density contrast or pressure to eV/cm^3 */ + convdata.grid = pow_data->powgrid; + convdata.invcellmean = invcellmean; + if (Ngrid < 32) { + mass_to_contrast_mapper(pow_data->powgrid, Ngrid, &convdata); + } else { + threadpool_map(tp, mass_to_contrast_mapper, pow_data->powgrid, Ngrid, + sizeof(double), threadpool_auto_chunk_size, &convdata); + } + + if (type1 != type2) { + convdata.grid = pow_data->powgrid2; + convdata.invcellmean = invcellmean2; + if (Ngrid < 32) { + mass_to_contrast_mapper(pow_data->powgrid2, Ngrid, &convdata); + } else { + threadpool_map(tp, mass_to_contrast_mapper, pow_data->powgrid2, Ngrid, + sizeof(double), threadpool_auto_chunk_size, &convdata); + } + } + + /* Perform FFT(s) */ + fftw_execute_dft_r2c(pow_data->fftplanpow, pow_data->powgrid, + pow_data->powgridft); + if (type1 != type2) + fftw_execute_dft_r2c(pow_data->fftplanpow2, pow_data->powgrid2, + pow_data->powgridft2); + + powmapdata.powgridft = pow_data->powgridft; + powmapdata.powgridft2 = pow_data->powgridft2; + + /* Zero the mode arrays */ + bzero(modecounts, (Nhalf + 1) * sizeof(int)); + bzero(powersum, (Nhalf + 1) * sizeof(double)); + + /* Calculate compensated mode contributions */ + if (Ngrid < 32) { + pow_from_grid_mapper(pow_data->powgridft, Ngrid, &powmapdata); + } else { + threadpool_map(tp, pow_from_grid_mapper, pow_data->powgridft, Ngrid, + sizeof(fftw_complex), threadpool_auto_chunk_size, + &powmapdata); + } + + /* Write this folding to the detail file */ + const double volfac = (volume / Ngrid3) / Ngrid3; + sprintf(outputfileName, "%s/%s_%04d_%d.txt", "power_spectra/foldings", + outputfileBase, snapnum, i); + FILE* outputfile = fopen(outputfileName, "w"); + + /* Determine units of power */ + char powunits[32] = ""; + if (type1 != pow_type_pressure && type2 != pow_type_pressure) + sprintf(powunits, "Mpc^3"); + else if (type1 == pow_type_pressure && type2 == pow_type_pressure) + sprintf(powunits, "Mpc^3 (eV cm^(-3))^2"); + else + sprintf(powunits, "Mpc^3 eV cm^(-3)"); + + fprintf(outputfile, "# Folding %d, all lengths/volumes are comoving\n", + i); + fprintf(outputfile, "# Shotnoise [%s]\n", powunits); + fprintf(outputfile, "%g\n", shot); + fprintf(outputfile, "# Redshift [dimensionless]\n"); + fprintf(outputfile, "%g\n", s->e->cosmology->z); + fprintf(outputfile, "# k [Mpc^(-1)] p [%s]\n", powunits); + + for (int j = 1; j <= Nhalf; ++j) { + fprintf(outputfile, "%g %g\n", j * kfac, + powersum[j] / modecounts[j] * volfac); + } + fclose(outputfile); + + /* Combine most accurate measurements from foldings */ + if (i == 0) { + + for (int j = 0; j < kcutleft; ++j) { + kcomb[j] = (j + 1) * kfac; + pcomb[j] = powersum[j + 1] / modecounts[j + 1] * volfac; + } + + numstart += kcutleft; + + } else { + + const int off = kcutright + 1; + for (int j = 0; j < (kcutleft - kcutright + 1); ++j) { + kcomb[j + numstart] = (j + off) * kfac; + pcomb[j + numstart] = + powersum[j + off] / modecounts[j + off] * volfac; + } + numstart += (kcutleft - kcutright + 1); + } + + } /* Work of rank 0 */ + + /* Fold the box */ + for (int j = 0; j < 3; ++j) dim[j] /= foldfac; + + } /* Loop over the foldings */ + + if (e->nodeID == 0) { + + /* Output attempt at combined measurement */ + sprintf(outputfileName, "%s/%s_%04d.txt", "power_spectra", outputfileBase, + snapnum); + + FILE* outputfile = fopen(outputfileName, "w"); + + /* Header and units */ + power_init_output_file(outputfile, type1, type2, us, phys_const); + + for (int j = 0; j < numtot; ++j) { + fprintf(outputfile, "%15.8f %15.8e %15.8e %15.8e\n", s->e->cosmology->z, + kcomb[j], (pcomb[j] - shot), shot); + } + fclose(outputfile); + } + + /* Done. Just clean up memory */ + if (verbose) message("Calculated the power! Cleaning up and leaving."); + + free(pcomb); + free(kcomb); + free(powersum); + free(modecounts); + free(kbin); + if (type1 != type2) { + memuse_log_allocation("fftw_grid.grid2", pow_data->powgrid2, 0, 0); + fftw_free(pow_data->powgrid2); + } + pow_data->powgrid2 = NULL; + pow_data->powgridft2 = NULL; + memuse_log_allocation("fftw_grid.grid", pow_data->powgrid, 0, 0); + fftw_free(pow_data->powgrid); + pow_data->powgrid = NULL; + pow_data->powgridft = NULL; +} + +void calc_all_power_spectra(struct power_spectrum_data* pow_data, + const struct space* s, struct threadpool* tp, + const int verbose) { + + const ticks tic = getticks(); + + /* Loop over all type combinations the user requested */ + for (int i = 0; i < pow_data->spectrumcount; ++i) + power_spectrum(pow_data->types1[i], pow_data->types2[i], pow_data, s, tp, + verbose); + + /* Increment the PS output counter */ + s->e->ps_output_count++; + + /* Flag we did something special this step */ + s->e->step_props |= engine_step_prop_power_spectra; + + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +void power_clean(struct power_spectrum_data* pow_data) { + fftw_destroy_plan(pow_data->fftplanpow); + fftw_destroy_plan(pow_data->fftplanpow2); + free(pow_data->types2); + free(pow_data->types1); +#ifdef HAVE_THREADED_FFTW + // Probably already done for PM at this point + fftw_cleanup_threads(); +#endif +} + +/** + * @brief Write a power_spectrum_data struct to the given FILE as a stream of + * bytes. + * + * @param p the struct + * @param stream the file stream + */ +void power_spectrum_struct_dump(const struct power_spectrum_data* p, + FILE* stream) { + restart_write_blocks((void*)p, sizeof(struct power_spectrum_data), 1, stream, + "power spectrum data", "power spectrum data"); + restart_write_blocks(p->types1, p->spectrumcount, sizeof(enum power_type), + stream, "power types 1", "power types 1"); + restart_write_blocks(p->types2, p->spectrumcount, sizeof(enum power_type), + stream, "power types 2", "power types 2"); +} + +/** + * @brief Restore a power_spectrum_data struct from the given FILE as a stream + * of bytes. + * + * @param p the struct + * @param stream the file stream + */ +void power_spectrum_struct_restore(struct power_spectrum_data* p, + FILE* stream) { + restart_read_blocks((void*)p, sizeof(struct power_spectrum_data), 1, stream, + NULL, "power spectrum data"); + p->types1 = malloc(p->spectrumcount * sizeof(enum power_type)); + restart_read_blocks(p->types1, p->spectrumcount, sizeof(enum power_type), + stream, NULL, "power types 1"); + p->types2 = malloc(p->spectrumcount * sizeof(enum power_type)); + restart_read_blocks(p->types2, p->spectrumcount, sizeof(enum power_type), + stream, NULL, "power types 2"); + +#ifdef HAVE_THREADED_FFTW + /* Initialise the thread-parallel FFTW version + (probably already done for the PM, but does not matter) */ + if (p->Ngrid >= 64) { + fftw_init_threads(); + fftw_plan_with_nthreads(p->nr_threads); + } // if +#else + message("Note that FFTW is not threaded!"); +#endif + + /* Initialize the plan only once -- much faster for FFTs run often! + Does require us to allocate the grids, but we delete them right away */ + int Ngrid = p->Ngrid; + + /* Grid is padded to allow for in-place FFT */ + p->powgrid = fftw_alloc_real(Ngrid * Ngrid * (Ngrid + 2)); + /* Pointer to grid to interpret it as complex data */ + p->powgridft = (fftw_complex*)p->powgrid; + + p->fftplanpow = fftw_plan_dft_r2c_3d(Ngrid, Ngrid, Ngrid, p->powgrid, + p->powgridft, FFTW_MEASURE); + + fftw_free(p->powgrid); + p->powgrid = NULL; + p->powgridft = NULL; + + /* Do the same for a second grid/plan to allow for cross power */ + + /* Grid is padded to allow for in-place FFT */ + p->powgrid2 = fftw_alloc_real(Ngrid * Ngrid * (Ngrid + 2)); + /* Pointer to grid to interpret it as complex data */ + p->powgridft2 = (fftw_complex*)p->powgrid2; + + p->fftplanpow2 = fftw_plan_dft_r2c_3d(Ngrid, Ngrid, Ngrid, p->powgrid2, + p->powgridft2, FFTW_MEASURE); + + fftw_free(p->powgrid2); + p->powgrid2 = NULL; + p->powgridft2 = NULL; +} diff --git a/src/power_spectrum.h b/src/power_spectrum.h new file mode 100644 index 0000000000000000000000000000000000000000..c3565ddd3bfb1c33e434774ec1e01e7e409798c5 --- /dev/null +++ b/src/power_spectrum.h @@ -0,0 +1,116 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2021 Marcel van Daalen (daalen@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_POWER_H +#define SWIFT_POWER_H + +/* Config parameters. */ +#include "../config.h" + +/* Local headers */ +#ifdef HAVE_FFTW +#include <fftw3.h> +#endif + +/* Forward declarations */ +struct space; +struct gpart; +struct threadpool; +struct swift_params; + +/** + * @brief The different types of components we can calculate the power for. + */ +enum power_type { + pow_type_matter, + pow_type_cdm, + pow_type_gas, + pow_type_starBH, + pow_type_pressure, + pow_type_neutrino, + pow_type_neutrino_0, + pow_type_neutrino_1, + pow_type_count /* Nb. of power spect. types (always last elem. in the enum) */ +} __attribute__((packed)); + +/** + * @brief Data structure for power spectrum variables and parameters + */ +struct power_spectrum_data { + + /*! Number of grid cells on a side */ + int Ngrid; + + /*! The number of threads used by the FFTW library */ + int nr_threads; + + /*! Number of foldings to use (1 means no folding) */ + int Nfold; + + /*! Factor by which to fold along each dimension */ + int foldfac; + + /*! Number of different power spectra to calculate */ + int spectrumcount; + + /*! The order of the mass assignment window */ + int windoworder; + + /*! Array of component types to correlate on the "left" side */ + enum power_type* types1; + + /*! Array of component types to correlate on the "right" side */ + enum power_type* types2; + + /*! Pointer to the grid to be reused */ + double* powgrid; + + /*! Pointer to a second grid for cross-power spectra */ + double* powgrid2; + +#ifdef HAVE_FFTW + /*! Pointer to the grid in Fourier space */ + fftw_complex* powgridft; + + /*! Pointer to the second grid in Fourier space */ + fftw_complex* powgridft2; + + /*! The FFT plan to be reused */ + fftw_plan fftplanpow; + + /*! The FFT plan to be reused for the second grid */ + fftw_plan fftplanpow2; +#endif +}; + +void power_init(struct power_spectrum_data* p, struct swift_params* params, + int nr_threads); +void power_spectrum(const enum power_type type1, const enum power_type type2, + struct power_spectrum_data* pow_data, const struct space* s, + struct threadpool* tp, const int verbose); +void calc_all_power_spectra(struct power_spectrum_data* pow_data, + const struct space* s, struct threadpool* tp, + const int verbose); +void power_clean(struct power_spectrum_data* pow_data); + +/* Dump/restore. */ +void power_spectrum_struct_dump(const struct power_spectrum_data* p, + FILE* stream); +void power_spectrum_struct_restore(struct power_spectrum_data* p, FILE* stream); + +#endif /* SWIFT_POWER_H */ diff --git a/src/random.h b/src/random.h index 60cbfa63d78d781f9b0c28d0b06a98b54b529b03..d2015136aa3e88412517ec667df64edb46a38891 100644 --- a/src/random.h +++ b/src/random.h @@ -42,7 +42,6 @@ * generator. * In case new numbers need to be added other possible * numbers could be: - * 126247697 * 193877777 * 303595777 */ @@ -71,6 +70,7 @@ enum random_number_type { random_number_mosaic_powerlaw = 406586897LL, random_number_mosaic_schechter = 562448657LL, random_number_mosaic_poisson = 384160001LL, + random_number_powerspectrum_split = 126247697LL, }; #ifndef __APPLE__ diff --git a/src/row_major_id.h b/src/row_major_id.h index 5db90ba1e4c166b475c88020e829517bd89d671f..206e7f2d086edfbb4582d447d47f7c75c3fec9f3 100644 --- a/src/row_major_id.h +++ b/src/row_major_id.h @@ -42,6 +42,26 @@ __attribute__((always_inline, const)) INLINE static int row_major_id_periodic( return (((i + N) % N) * N * N + ((j + N) % N) * N + ((k + N) % N)); } +/** + * @brief Returns 1D index of a 3D NxNxN array using row-major style. + * + * Wraps around in the corresponding dimension if any of the 3 indices is >= N + * or < 0. + * + * Padding is added along the x axis. + * + * @param i Index along x. + * @param j Index along y. + * @param k Index along z. + * @param N Size of the array along one axis. + */ +__attribute__((always_inline, const)) INLINE static int +row_major_id_periodic_with_padding(const int i, const int j, const int k, + const int N, const int pad) { + + return ((((i + N) % N) * N + ((j + N) % N)) * (N + pad) + ((k + N) % N)); +} + /** * @brief Returns 1D index of a FFTW-padded 3D NxNxN array using row-major * style. diff --git a/src/swift.h b/src/swift.h index 9c22bc3fa52b183d3092015f6d5a9ea8927bf41f..62a66b0c4d88e10d0fbe1481941f1590c963f75d 100644 --- a/src/swift.h +++ b/src/swift.h @@ -72,6 +72,7 @@ #include "periodic.h" #include "physical_constants.h" #include "potential.h" +#include "power_spectrum.h" #include "pressure_floor.h" #include "pressure_floor_iact.h" #include "profiler.h"