diff --git a/.gitignore b/.gitignore index 27ff2cf7ff880de2007e27978fcc1cfa22d2bd75..44d98c6ff5fa70ab2d5bf53752c30b40b69f6272 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *~ *.hdf5 +*.dirstamp Makefile Makefile.in @@ -44,12 +45,16 @@ examples/*/restart/* examples/*/*/used_parameters.yml examples/*/err_file* examples/*/out_file* -examples/*/stf_output* -examples/*/stf_ouput* +examples/*/stf_* examples/*/log* examples/*/*/unused_parameters.yml examples/*/*.mpg examples/*/gravity_checks_*.dat +examples/*/coolingtables.tar.gz +examples/*/coolingtables +examples/CoolingRates/cooling_rates +examples/CoolingRates/cooling_element_*.dat +examples/CoolingRates/cooling_output.dat tests/testActivePair tests/testActivePair.sh @@ -63,12 +68,18 @@ tests/brute_force_perturbed.dat tests/swift_dopair_perturbed.dat tests/test27cells tests/test27cells_subset +tests/test27cellsStars +tests/test27cellsStars_subset tests/testPeriodicBC tests/test125cells tests/brute_force_27_standard.dat tests/swift_dopair_27_standard.dat tests/brute_force_27_perturbed.dat tests/swift_dopair_27_perturbed.dat +tests/star_brute_force_27_standard.dat +tests/swift_star_dopair_27_standard.dat +tests/star_brute_force_27_perturbed.dat +tests/swift_star_dopair_27_perturbed.dat tests/brute_force_125_standard.dat tests/swift_dopair_125_standard.dat tests/brute_force_125_perturbed.dat @@ -106,6 +117,8 @@ tests/testPeriodicBC.sh tests/testPeriodicBCPerturbed.sh tests/test27cells.sh tests/test27cellsPerturbed.sh +tests/test27cellsStars.sh +tests/test27cellsStarsPerturbed.sh tests/test125cells.sh tests/test125cellsPerturbed.sh tests/testParser.sh @@ -131,7 +144,11 @@ tests/testEOS tests/testEOS*.txt tests/testEOS*.png tests/testUtilities +tests/testCosmology +tests/testOutputList tests/testCbrt +tests/testFormat.sh +tests/testCooling theory/latex/swift.pdf theory/SPH/Kernels/kernels.pdf @@ -148,6 +165,7 @@ theory/Multipoles/potential_long.pdf theory/Multipoles/potential_short.pdf theory/Multipoles/force_short.pdf theory/Cosmology/cosmology.pdf +theory/Cooling/eagle_cooling.pdf m4/libtool.m4 m4/ltoptions.m4 @@ -165,6 +183,8 @@ m4/lt~obsolete.m4 /stamp-h1 /test-driver +src/equation_of_state/planetary/*.txt + # Intel compiler optimization reports *.optrpt @@ -316,3 +336,6 @@ sympy-plots-for-*.tex/ #ctags *tags + +# vim +*.swp diff --git a/AUTHORS b/AUTHORS index 6f283405b69a7d3a5397916f0a3afa7f4fb54a4a..3bbcc3c251d52bfcf372e807e9e5c3d02ea30ca5 100644 --- a/AUTHORS +++ b/AUTHORS @@ -2,12 +2,14 @@ Pedro Gonnet gonnet@google.com Matthieu Schaller matthieu.schaller@durham.ac.uk Aidan Chalk aidan.chalk@durham.ac.uk Peter W. Draper p.w.draper@durham.ac.uk -Bert Vandenbrouck bert.vandenbroucke@gmail.com +Bert Vandenbroucke bert.vandenbroucke@gmail.com James S. Willis james.s.willis@durham.ac.uk John A. Regan john.a.regan@durham.ac.uk Angus Lepper angus.lepper@ed.ac.uk Tom Theuns tom.theuns@durham.ac.uk Richard G. Bower r.g.bower@durham.ac.uk Stefan Arridge stefan.arridge@durham.ac.uk -Massimiliano Culpo massimiliano.culpo@googlemail.com +Josh Borrow joshua.borrow@durham.ac.uk +Loic Hausammann loic.hausammann@epfl.ch Yves Revaz yves.revaz@epfl.ch +Jacob Kegerreis jacob.kegerreis@durham.ac.uk diff --git a/Makefile.am b/Makefile.am index fb4eb5f6d6b63a7d0e034e0a3202ac61066e6e25..e1ee2ff2f371649e23f7f39f818af26a9e808003 100644 --- a/Makefile.am +++ b/Makefile.am @@ -19,7 +19,10 @@ ACLOCAL_AMFLAGS = -I m4 # Show the way... -SUBDIRS = src examples doc tests +SUBDIRS = src argparse examples doc tests tools +if HAVEEAGLECOOLING +SUBDIRS += examples/CoolingRates +endif # Non-standard files that should be part of the distribution. EXTRA_DIST = INSTALL.swift .clang-format format.sh diff --git a/README b/README index 1ac1624b6a55fad43c73a8936b1a711ff956ca4d..97d096ef1804a4348e88dfaf67c22b2c427a3ad8 100644 --- a/README +++ b/README @@ -1,45 +1,69 @@ Welcome to the cosmological hydrodynamical code ______ _________________ / ___/ | / / _/ ___/_ __/ - \__ \| | /| / // // /_ / / - ___/ /| |/ |/ // // __/ / / - /____/ |__/|__/___/_/ /_/ + \__ \| | /| / // // /_ / / + ___/ /| |/ |/ // // __/ / / + /____/ |__/|__/___/_/ /_/ SPH With Inter-dependent Fine-grained Tasking - Website: www.swiftsim.com +Website: www.swiftsim.com Twitter: @SwiftSimulation See INSTALL.swift for install instructions. -Usage: swift [OPTION]... PARAMFILE - swift_mpi [OPTION]... PARAMFILE - -Valid options are: - -a Pin runners using processor affinity. - -c Run with cosmological time integration. - -C Run with cooling. - -d Dry run. Read the parameter file, allocate memory but does not read - the particles from ICs and exit before the start of time integration. - Allows user to check validity of parameter and IC files as well as memory limits. - -D Always drift all particles even the ones far from active particles. This emulates - Gadget-[23] and GIZMO's default behaviours. - -e Enable floating-point exceptions (debugging mode). - -f {int} Overwrite the CPU frequency (Hz) to be used for time measurements. - -g Run with an external gravitational potential. - -G Run with self-gravity. - -M Reconstruct the multipoles every time-step. - -n {int} Execute a fixed number of time steps. When unset use the time_end parameter to stop. - -P {sec:par:val} Set parameter value and overwrites values read from the parameters file. Can be used more than once. - -s Run with hydrodynamics. - -S Run with stars. - -t {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified. - -T Print timers every time-step. - -v [12] Increase the level of verbosity: - 1: MPI-rank 0 writes, - 2: All MPI-ranks write. - -y {int} Time-step frequency at which task graphs are dumped. - -Y {int} Time-step frequency at which threadpool tasks are dumped. - -h Print this help message and exit. - -See the file parameter_example.yml for an example of parameter file. +Usage: swift [options] [[--] param-file] + or: swift [options] param-file + or: swift_mpi [options] [[--] param-file] + or: swift_mpi [options] param-file +Parameters: + + -h, --help show this help message and exit + + Simulation options: + -b, --feedback Run with stars feedback + -c, --cosmology Run with cosmological time integration. + -C, --cooling Run with cooling + -D, --drift-all Always drift all particles even the ones + far from active particles. This emulates + Gadget-[23] and GIZMO's default behaviours. + -F, --sourceterms + -g, --external-gravity Run with an external gravitational potential. + -G, --self-gravity Run with self-gravity. + -M, --multipole-reconstruction Reconstruct the multipoles every time-step. + -s, --hydro Run with hydrodynamics. + -S, --stars Run with stars + -x, --velociraptor Run with structure finding + + Control options: + -a, --pin Pin runners using processor affinity. + -d, --dry-run Dry run. Read the parameter file, allocates + memory but does not read the particles + from ICs. Exits before the start of time + integration. Checks the validity of + parameters and IC files as well as memory + limits. + -e, --fpe Enable floating-point exceptions (debugging + mode). + -f, --cpu-frequency=<str> Overwrite the CPU frequency (Hz) to be + used for time measurements. + -n, --steps=<int> Execute a fixed number of time steps. + When unset use the time_end parameter + to stop. + -o, --output-params=<str> Generate a default output parameter + file. + -P, --param=<str> Set parameter value, overiding the value + read from the parameter file. Can be used + more than once {sec:par:value}. + -r, --restart Continue using restart files. + -t, --threads=<int> The number of threads to use on each MPI + rank. Defaults to 1 if not specified. + -T, --timers=<int> Print timers every time-step. + -v, --verbose=<int> Run in verbose mode, in MPI mode 2 outputs + from all ranks. + -y, --task-dumps=<int> Time-step frequency at which task graphs + are dumped. + -Y, --threadpool-dumps=<int> Time-step frequency at which threadpool + tasks are dumped. + +See the file examples/parameter_example.yml for an example of parameter file. diff --git a/README.md b/README.md index e9ce99f10901f4a3aa5fe93d14dea4f36a54fe34..94e95776cd80c1bb822f0f68290c2add9d2bb58b 100644 --- a/README.md +++ b/README.md @@ -49,9 +49,9 @@ are highly encouraged. Welcome to the cosmological hydrodynamical code ______ _________________ / ___/ | / / _/ ___/_ __/ - \__ \| | /| / // // /_ / / - ___/ /| |/ |/ // // __/ / / - /____/ |__/|__/___/_/ /_/ + \__ \| | /| / // // /_ / / + ___/ /| |/ |/ // // __/ / / + /____/ |__/|__/___/_/ /_/ SPH With Inter-dependent Fine-grained Tasking Website: www.swiftsim.com @@ -59,35 +59,59 @@ are highly encouraged. See INSTALL.swift for install instructions. -Usage: swift [OPTION]... PARAMFILE - swift_mpi [OPTION]... PARAMFILE - -Valid options are: - -a Pin runners using processor affinity. - -c Run with cosmological time integration. - -C Run with cooling. - -d Dry run. Read the parameter file, allocate memory but does not read - the particles from ICs and exit before the start of time integration. - Allows user to check validity of parameter and IC files as well as memory limits. - -D Always drift all particles even the ones far from active particles. This emulates - Gadget-[23] and GIZMO's default behaviours. - -e Enable floating-point exceptions (debugging mode). - -f {int} Overwrite the CPU frequency (Hz) to be used for time measurements. - -g Run with an external gravitational potential. - -G Run with self-gravity. - -M Reconstruct the multipoles every time-step. - -n {int} Execute a fixed number of time steps. When unset use the time_end parameter to stop. - -P {sec:par:val} Set parameter value and overwrites values read from the parameters file. Can be used more than once. - -s Run with hydrodynamics. - -S Run with stars. - -t {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified. - -T Print timers every time-step. - -v [12] Increase the level of verbosity: - 1: MPI-rank 0 writes, - 2: All MPI-ranks write. - -y {int} Time-step frequency at which task graphs are dumped. - -Y {int} Time-step frequency at which threadpool tasks are dumped. - -h Print this help message and exit. +Usage: swift [options] [[--] param-file] + or: swift [options] param-file + or: swift_mpi [options] [[--] param-file] + or: swift_mpi [options] param-file + +Parameters: + + -h, --help show this help message and exit + + Simulation options: + -b, --feedback Run with stars feedback + -c, --cosmology Run with cosmological time integration. + -C, --cooling Run with cooling + -D, --drift-all Always drift all particles even the ones + far from active particles. This emulates + Gadget-[23] and GIZMO's default behaviours. + -F, --sourceterms + -g, --external-gravity Run with an external gravitational potential. + -G, --self-gravity Run with self-gravity. + -M, --multipole-reconstruction Reconstruct the multipoles every time-step. + -s, --hydro Run with hydrodynamics. + -S, --stars Run with stars + -x, --velociraptor Run with structure finding + + Control options: + -a, --pin Pin runners using processor affinity. + -d, --dry-run Dry run. Read the parameter file, allocates + memory but does not read the particles + from ICs. Exits before the start of time + integration. Checks the validity of + parameters and IC files as well as memory + limits. + -e, --fpe Enable floating-point exceptions (debugging + mode). + -f, --cpu-frequency=<str> Overwrite the CPU frequency (Hz) to be + used for time measurements. + -n, --steps=<int> Execute a fixed number of time steps. + When unset use the time_end parameter + to stop. + -o, --output-params=<str> Generate a default output parameter + file. + -P, --param=<str> Set parameter value, overiding the value + read from the parameter file. Can be used + more than once {sec:par:value}. + -r, --restart Continue using restart files. + -t, --threads=<int> The number of threads to use on each MPI + rank. Defaults to 1 if not specified. + -T, --timers=<int> Print timers every time-step. + -v, --verbose=<int> Run in verbose mode, in MPI mode 2 outputs + from all ranks. + -y, --task-dumps=<int> Time-step frequency at which task graphs + are dumped. + -Y, --threadpool-dumps=<int> Time-step frequency at which threadpool + tasks are dumped. See the file examples/parameter_example.yml for an example of parameter file. -``` diff --git a/argparse/FAQs.md b/argparse/FAQs.md new file mode 100644 index 0000000000000000000000000000000000000000..c760807070b192c33e624e3f98af4cd24fe16fca --- /dev/null +++ b/argparse/FAQs.md @@ -0,0 +1,36 @@ +# FAQs + +## Why removing parsed command-line switches/options? + +It destroys the original `argv` array, not compatible with other arguments parsing +library. + +This is because this library is used for short-lived programs, e.g. cli tools +at beginning. It's very convenient to process remain arguments if we remove +parsed command-line arguments, e.g. `<comamnd> [-[s]|--switch]... arguments`. + +If you want keep original `argc/argv`, you can make a copy, then pass them to +`argparse_parse`, e.g. + +```c +int copy_argc = argc; +const char **copy_argv = argv; +copy_argv = malloc(copy_argc * sizeof(char *)); +for (int i = 0; i < argc; i++) { + copy_argv[i] = (char *)argv[i]; +} +argparse_parse(&argparse, copy_argc, copy_argv); +``` + +Issues: + +- https://github.com/cofyc/argparse/issues/3 +- https://github.com/cofyc/argparse/issues/9 + +## Why using `intptr_t` to hold associated data? Why not `void *`? + +I choose `intptr_t` because it's a integer type which also can be used to hold +a pointer value. Most of the time, we only need a integer to hold +user-provided value, see `OPT_BIT` as example. If you want to provide a pointer +which points to a large amount of data, you can cast it to `intptr_t` and cast +it back to original pointer in callback function. diff --git a/argparse/LICENSE b/argparse/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..3c777497645ca9998899db5d8a8041e9831a4604 --- /dev/null +++ b/argparse/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2012-2013 Yecheng Fu <cofyc.jackson@gmail.com> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/argparse/Makefile.am b/argparse/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..2fa6fb9e9ef4c014697a2c434cd86741cc74d79c --- /dev/null +++ b/argparse/Makefile.am @@ -0,0 +1,28 @@ +# This file is part of SWIFT. +# Copyright (c) 2018 Peter W. Draper (p.w.draper@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +lib_LTLIBRARIES = libargparse.la + +include_HEADERS = argparse.h + +AM_SOURCES = argparse.c + +# Sources and flags for regular library +libargparse_la_SOURCES = $(AM_SOURCES) +libargparse_la_CFLAGS = $(AM_CFLAGS) +libargparse_la_LDFLAGS = $(AM_LDFLAGS) + +EXTRA_DIST = LICENSE README.md diff --git a/argparse/OWNERS b/argparse/OWNERS new file mode 100644 index 0000000000000000000000000000000000000000..8cad69dd488010bcaa66ed80d5e3d425f647064c --- /dev/null +++ b/argparse/OWNERS @@ -0,0 +1,2 @@ +approvers: +- cofyc diff --git a/argparse/README.md b/argparse/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ddf66b4777e9baaf68be270df3e8994cb96a2998 --- /dev/null +++ b/argparse/README.md @@ -0,0 +1,103 @@ +# argparse [](https://travis-ci.org/cofyc/argparse) + +argparse - A command line arguments parsing library in C (compatible with C++). + +## Description + +This module is inspired by parse-options.c (git) and python's argparse +module. + +Arguments parsing is common task in cli program, but traditional `getopt` +libraries are not easy to use. This library provides high-level arguments +parsing solutions. + +The program defines what arguments it requires, and `argparse` will figure +out how to parse those out of `argc` and `argv`, it also automatically +generates help and usage messages and issues errors when users give the +program invalid arguments. + +## Features + + - handles both optional and positional arguments + - produces highly informative usage messages + - issues errors when given invalid arguments + +There are basically three types of options: + + - boolean options + - options with mandatory argument + - options with optional argument + +There are basically two forms of options: + + - short option consist of one dash (`-`) and one alphanumeric character. + - long option begin with two dashes (`--`) and some alphanumeric characters. + +Short options may be bundled, e.g. `-a -b` can be specified as `-ab`. + +Options are case-sensitive. + +Options and non-option arguments can clearly be separated using the `--` option. + +## Examples + +```c +#include "argparse.h" + +static const char *const usage[] = { + "test_argparse [options] [[--] args]", + "test_argparse [options]", + NULL, +}; + +#define PERM_READ (1<<0) +#define PERM_WRITE (1<<1) +#define PERM_EXEC (1<<2) + +int +main(int argc, const char **argv) +{ + int force = 0; + int test = 0; + int num = 0; + const char *path = NULL; + int perms = 0; + struct argparse_option options[] = { + OPT_HELP(), + OPT_GROUP("Basic options"), + OPT_BOOLEAN('f', "force", &force, "force to do"), + OPT_BOOLEAN('t', "test", &test, "test only"), + OPT_STRING('p', "path", &path, "path to read"), + OPT_INTEGER('n', "num", &num, "selected num"), + OPT_GROUP("Bits options"), + OPT_BIT(0, "read", &perms, "read perm", NULL, PERM_READ, OPT_NONEG), + OPT_BIT(0, "write", &perms, "write perm", NULL, PERM_WRITE), + OPT_BIT(0, "exec", &perms, "exec perm", NULL, PERM_EXEC), + OPT_END(), + }; + + struct argparse argparse; + argparse_init(&argparse, options, usage, 0); + argparse_describe(&argparse, "\nA brief description of what the program does and how it works.", "\nAdditional description of the program after the description of the arguments."); + argc = argparse_parse(&argparse, argc, argv); + if (force != 0) + printf("force: %d\n", force); + if (test != 0) + printf("test: %d\n", test); + if (path != NULL) + printf("path: %s\n", path); + if (num != 0) + printf("num: %d\n", num); + if (argc != 0) { + printf("argc: %d\n", argc); + int i; + for (i = 0; i < argc; i++) { + printf("argv[%d]: %s\n", i, *(argv + i)); + } + } + if (perms) { + printf("perms: %d\n", perms); + } + return 0; +} +``` diff --git a/argparse/argparse.c b/argparse/argparse.c new file mode 100644 index 0000000000000000000000000000000000000000..bb6c8c0c0012964090ff3676bcb02d0f5139d22a --- /dev/null +++ b/argparse/argparse.c @@ -0,0 +1,379 @@ +/** + * Copyright (C) 2012-2015 Yecheng Fu <cofyc.jackson at gmail dot com> + * All rights reserved. + * + * Use of this source code is governed by a MIT-style license that can be found + * in the LICENSE file. + */ +#include "argparse.h" +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define OPT_UNSET 1 +#define OPT_LONG (1 << 1) + +static const char *prefix_skip(const char *str, const char *prefix) { + size_t len = strlen(prefix); + return strncmp(str, prefix, len) ? NULL : str + len; +} + +static int prefix_cmp(const char *str, const char *prefix) { + for (;; str++, prefix++) + if (!*prefix) { + return 0; + } else if (*str != *prefix) { + return (unsigned char)*prefix - (unsigned char)*str; + } +} + +static void argparse_error(struct argparse *self, + const struct argparse_option *opt, + const char *reason, int flags) { + (void)self; + if (flags & OPT_LONG) { + fprintf(stderr, "error: option `--%s` %s\n", opt->long_name, reason); + } else { + fprintf(stderr, "error: option `-%c` %s\n", opt->short_name, reason); + } + exit(1); +} + +static int argparse_getvalue(struct argparse *self, + const struct argparse_option *opt, int flags) { + const char *s = NULL; + if (!opt->value) goto skipped; + switch (opt->type) { + case ARGPARSE_OPT_BOOLEAN: + if (flags & OPT_UNSET) { + *(int *)opt->value = *(int *)opt->value - 1; + } else { + *(int *)opt->value = *(int *)opt->value + 1; + } + if (*(int *)opt->value < 0) { + *(int *)opt->value = 0; + } + break; + case ARGPARSE_OPT_BIT: + if (flags & OPT_UNSET) { + *(int *)opt->value &= ~opt->data; + } else { + *(int *)opt->value |= opt->data; + } + break; + case ARGPARSE_OPT_STRING: + if (self->optvalue) { + *(const char **)opt->value = self->optvalue; + self->optvalue = NULL; + } else if (self->argc > 1) { + self->argc--; + *(const char **)opt->value = *++self->argv; + } else { + argparse_error(self, opt, "requires a value", flags); + } + break; + case ARGPARSE_OPT_INTEGER: + errno = 0; + if (self->optvalue) { + *(int *)opt->value = strtol(self->optvalue, (char **)&s, 0); + self->optvalue = NULL; + } else if (self->argc > 1) { + self->argc--; + *(int *)opt->value = strtol(*++self->argv, (char **)&s, 0); + } else { + argparse_error(self, opt, "requires a value", flags); + } + if (errno) argparse_error(self, opt, strerror(errno), flags); + if (s[0] != '\0') + argparse_error(self, opt, "expects an integer value", flags); + break; + case ARGPARSE_OPT_FLOAT: + errno = 0; + if (self->optvalue) { + *(float *)opt->value = strtof(self->optvalue, (char **)&s); + self->optvalue = NULL; + } else if (self->argc > 1) { + self->argc--; + *(float *)opt->value = strtof(*++self->argv, (char **)&s); + } else { + argparse_error(self, opt, "requires a value", flags); + } + if (errno) argparse_error(self, opt, strerror(errno), flags); + if (s[0] != '\0') + argparse_error(self, opt, "expects a numerical value", flags); + break; + default: + assert(0); + } + +skipped: + if (opt->callback) { + return opt->callback(self, opt); + } + + return 0; +} + +static void argparse_options_check(const struct argparse_option *options) { + for (; options->type != ARGPARSE_OPT_END; options++) { + switch (options->type) { + case ARGPARSE_OPT_END: + case ARGPARSE_OPT_BOOLEAN: + case ARGPARSE_OPT_BIT: + case ARGPARSE_OPT_INTEGER: + case ARGPARSE_OPT_FLOAT: + case ARGPARSE_OPT_STRING: + case ARGPARSE_OPT_GROUP: + continue; + default: + fprintf(stderr, "wrong option type: %d", options->type); + break; + } + } +} + +static int argparse_short_opt(struct argparse *self, + const struct argparse_option *options) { + for (; options->type != ARGPARSE_OPT_END; options++) { + if (options->short_name == *self->optvalue) { + self->optvalue = self->optvalue[1] ? self->optvalue + 1 : NULL; + return argparse_getvalue(self, options, 0); + } + } + return -2; +} + +static int argparse_long_opt(struct argparse *self, + const struct argparse_option *options) { + for (; options->type != ARGPARSE_OPT_END; options++) { + const char *rest; + int opt_flags = 0; + if (!options->long_name) continue; + + rest = prefix_skip(self->argv[0] + 2, options->long_name); + if (!rest) { + // negation disabled? + if (options->flags & OPT_NONEG) { + continue; + } + // only OPT_BOOLEAN/OPT_BIT supports negation + if (options->type != ARGPARSE_OPT_BOOLEAN && + options->type != ARGPARSE_OPT_BIT) { + continue; + } + + if (prefix_cmp(self->argv[0] + 2, "no-")) { + continue; + } + rest = prefix_skip(self->argv[0] + 2 + 3, options->long_name); + if (!rest) continue; + opt_flags |= OPT_UNSET; + } + if (*rest) { + if (*rest != '=') continue; + self->optvalue = rest + 1; + } + return argparse_getvalue(self, options, opt_flags | OPT_LONG); + } + return -2; +} + +int argparse_init(struct argparse *self, struct argparse_option *options, + const char *const *usages, int flags) { + memset(self, 0, sizeof(*self)); + self->options = options; + self->usages = usages; + self->flags = flags; + self->description = NULL; + self->epilog = NULL; + return 0; +} + +void argparse_describe(struct argparse *self, const char *description, + const char *epilog) { + self->description = description; + self->epilog = epilog; +} + +int argparse_parse(struct argparse *self, int argc, const char **argv) { + self->argc = argc - 1; + self->argv = argv + 1; + self->out = argv; + + argparse_options_check(self->options); + + for (; self->argc; self->argc--, self->argv++) { + const char *arg = self->argv[0]; + if (arg[0] != '-' || !arg[1]) { + if (self->flags & ARGPARSE_STOP_AT_NON_OPTION) { + goto end; + } + // if it's not option or is a single char '-', copy verbatim + self->out[self->cpidx++] = self->argv[0]; + continue; + } + // short option + if (arg[1] != '-') { + self->optvalue = arg + 1; + switch (argparse_short_opt(self, self->options)) { + case -1: + break; + case -2: + goto unknown; + } + while (self->optvalue) { + switch (argparse_short_opt(self, self->options)) { + case -1: + break; + case -2: + goto unknown; + } + } + continue; + } + // if '--' presents + if (!arg[2]) { + self->argc--; + self->argv++; + break; + } + // long option + switch (argparse_long_opt(self, self->options)) { + case -1: + break; + case -2: + goto unknown; + } + continue; + + unknown: + fprintf(stderr, "error: unknown option `%s`\n", self->argv[0]); + argparse_usage(self); + exit(1); + } + +end: + memmove(self->out + self->cpidx, self->argv, self->argc * sizeof(*self->out)); + self->out[self->cpidx + self->argc] = NULL; + + return self->cpidx + self->argc; +} + +void argparse_usage(struct argparse *self) { + if (self->usages) { + fprintf(stdout, "Usage: %s\n", *self->usages++); + while (*self->usages && **self->usages) + fprintf(stdout, " or: %s\n", *self->usages++); + } else { + fprintf(stdout, "Usage:\n"); + } + + // print description + if (self->description) fprintf(stdout, "%s\n", self->description); + + fputc('\n', stdout); + + const struct argparse_option *options; + + // figure out best width + size_t usage_opts_width = 0; + size_t len; + options = self->options; + for (; options->type != ARGPARSE_OPT_END; options++) { + len = 0; + if ((options)->short_name) { + len += 2; + } + if ((options)->short_name && (options)->long_name) { + len += 2; // separator ", " + } + if ((options)->long_name) { + len += strlen((options)->long_name) + 2; + } + if (options->type == ARGPARSE_OPT_INTEGER) { + len += strlen("=<int>"); + } + if (options->type == ARGPARSE_OPT_FLOAT) { + len += strlen("=<flt>"); + } else if (options->type == ARGPARSE_OPT_STRING) { + len += strlen("=<str>"); + } + len = (len + 3) - ((len + 3) & 3); + if (usage_opts_width < len) { + usage_opts_width = len; + } + } + usage_opts_width += 4; // 4 spaces prefix + + options = self->options; + for (; options->type != ARGPARSE_OPT_END; options++) { + size_t pos = 0; + int pad = 0; + if (options->type == ARGPARSE_OPT_GROUP) { + fputc('\n', stdout); + fprintf(stdout, "%s", options->help); + fputc('\n', stdout); + continue; + } + pos = fprintf(stdout, " "); + if (options->short_name) { + pos += fprintf(stdout, "-%c", options->short_name); + } + if (options->long_name && options->short_name) { + pos += fprintf(stdout, ", "); + } + if (options->long_name) { + pos += fprintf(stdout, "--%s", options->long_name); + } + if (options->type == ARGPARSE_OPT_INTEGER) { + pos += fprintf(stdout, "=<int>"); + } + if (options->type == ARGPARSE_OPT_FLOAT) { + pos += fprintf(stdout, "=<flt>"); + } else if (options->type == ARGPARSE_OPT_STRING) { + pos += fprintf(stdout, "=<str>"); + } + if (pos <= usage_opts_width) { + pad = usage_opts_width - pos; + } else { + fputc('\n', stdout); + pad = usage_opts_width; + } + if (options->help != NULL && strlen(options->help) > 0) { + char *str = strdup(options->help); + char *token = strtok(str, " "); + fprintf(stdout, "%*s%s ", pad + 2, "", token); + int count = strlen(token); + int dangling = 1; + while ((token = strtok(NULL, " ")) != NULL) { + if (count == 0) { + fprintf(stdout, "%*s", (int)pos + pad + 2, ""); + dangling = 1; + } + printf("%s ", token); + count += strlen(token); + if (count > 30) { + count = 0; + fprintf(stdout, "\n"); + dangling = 0; + } + } + if (dangling) fprintf(stdout, "\n"); + free(str); + } else { + fprintf(stdout, "\n"); + } + } + + // print epilog + if (self->epilog) fprintf(stdout, "%s\n", self->epilog); +} + +int argparse_help_cb(struct argparse *self, + const struct argparse_option *option) { + (void)option; + argparse_usage(self); + exit(0); +} diff --git a/argparse/argparse.h b/argparse/argparse.h new file mode 100644 index 0000000000000000000000000000000000000000..186214b4bc90cea90ef141380bf0017cc50af128 --- /dev/null +++ b/argparse/argparse.h @@ -0,0 +1,137 @@ +/** + * Copyright (C) 2012-2015 Yecheng Fu <cofyc.jackson at gmail dot com> + * All rights reserved. + * + * Use of this source code is governed by a MIT-style license that can be found + * in the LICENSE file. + */ +#ifndef ARGPARSE_H +#define ARGPARSE_H + +/* For c++ compatibility */ +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +struct argparse; +struct argparse_option; + +typedef int argparse_callback(struct argparse *self, + const struct argparse_option *option); + +enum argparse_flag { + ARGPARSE_STOP_AT_NON_OPTION = 1, +}; + +enum argparse_option_type { + /* special */ + ARGPARSE_OPT_END, + ARGPARSE_OPT_GROUP, + /* options with no arguments */ + ARGPARSE_OPT_BOOLEAN, + ARGPARSE_OPT_BIT, + /* options with arguments (optional or required) */ + ARGPARSE_OPT_INTEGER, + ARGPARSE_OPT_FLOAT, + ARGPARSE_OPT_STRING, +}; + +enum argparse_option_flags { + OPT_NONEG = 1, /* disable negation */ +}; + +/** + * argparse option + * + * `type`: + * holds the type of the option, you must have an ARGPARSE_OPT_END last in + * your array. + * + * `short_name`: + * the character to use as a short option name, '\0' if none. + * + * `long_name`: + * the long option name, without the leading dash, NULL if none. + * + * `value`: + * stores pointer to the value to be filled. + * + * `help`: + * the short help message associated to what the option does. + * Must never be NULL (except for ARGPARSE_OPT_END). + * + * `callback`: + * function is called when corresponding argument is parsed. + * + * `data`: + * associated data. Callbacks can use it like they want. + * + * `flags`: + * option flags. + */ +struct argparse_option { + enum argparse_option_type type; + const char short_name; + const char *long_name; + void *value; + const char *help; + argparse_callback *callback; + intptr_t data; + int flags; +}; + +/** + * argpparse + */ +struct argparse { + // user supplied + const struct argparse_option *options; + const char *const *usages; + int flags; + const char *description; // a description after usage + const char *epilog; // a description at the end + // internal context + int argc; + const char **argv; + const char **out; + int cpidx; + const char *optvalue; // current option value +}; + +// built-in callbacks +int argparse_help_cb(struct argparse *self, + const struct argparse_option *option); + +// built-in option macros +#define OPT_END() \ + { ARGPARSE_OPT_END, 0, NULL, NULL, 0, NULL, 0, 0 } +#define OPT_BOOLEAN(...) \ + { ARGPARSE_OPT_BOOLEAN, __VA_ARGS__ } +#define OPT_BIT(...) \ + { ARGPARSE_OPT_BIT, __VA_ARGS__ } +#define OPT_INTEGER(...) \ + { ARGPARSE_OPT_INTEGER, __VA_ARGS__ } +#define OPT_FLOAT(...) \ + { ARGPARSE_OPT_FLOAT, __VA_ARGS__ } +#define OPT_STRING(...) \ + { ARGPARSE_OPT_STRING, __VA_ARGS__ } +#define OPT_GROUP(h) \ + { ARGPARSE_OPT_GROUP, 0, NULL, NULL, h, NULL, 0, 0 } +#define OPT_HELP() \ + OPT_BOOLEAN('h', "help", NULL, "show this help message and exit", \ + argparse_help_cb, 0, 0) + +int argparse_init(struct argparse *self, struct argparse_option *options, + const char *const *usages, int flags); +void argparse_describe(struct argparse *self, const char *description, + const char *epilog); +int argparse_parse(struct argparse *self, int argc, const char **argv); +void argparse_usage(struct argparse *self); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/argparse/tap-functions b/argparse/tap-functions new file mode 100644 index 0000000000000000000000000000000000000000..84f700e644c0c09246ca747ce5c22cc884dfff6c --- /dev/null +++ b/argparse/tap-functions @@ -0,0 +1,445 @@ +#!/bin/bash + + +_version='1.02' + +_plan_set=0 +_no_plan=0 +_skip_all=0 +_test_died=0 +_expected_tests=0 +_executed_tests=0 +_failed_tests=0 +TODO= + + +usage(){ + cat <<'USAGE' +tap-functions: A TAP-producing BASH library + +PLAN: + plan_no_plan + plan_skip_all [REASON] + plan_tests NB_TESTS + +TEST: + ok RESULT [NAME] + okx COMMAND + is RESULT EXPECTED [NAME] + isnt RESULT EXPECTED [NAME] + like RESULT PATTERN [NAME] + unlike RESULT PATTERN [NAME] + pass [NAME] + fail [NAME] + +SKIP: + skip [CONDITION] [REASON] [NB_TESTS=1] + + skip $feature_not_present "feature not present" 2 || { + is $a "a" + is $b "b" + } + +TODO: + Specify TODO mode by setting $TODO: + TODO="not implemented yet" + ok $result "some not implemented test" + unset TODO + +OTHER: + diag MSG + +EXAMPLE: + #!/bin/bash + + . tap-functions + + plan_tests 7 + + me=$USER + is $USER $me "I am myself" + like $HOME $me "My home is mine" + like "`id`" $me "My id matches myself" + + /bin/ls $HOME 1>&2 + ok $? "/bin/ls $HOME" + # Same thing using okx shortcut + okx /bin/ls $HOME + + [[ "`id -u`" != "0" ]] + i_am_not_root=$? + skip $i_am_not_root "Must be root" || { + okx ls /root + } + + TODO="figure out how to become root..." + okx [ "$HOME" == "/root" ] + unset TODO +USAGE + exit +} + +opt= +set_u= +while getopts ":sx" opt ; do + case $_opt in + u) set_u=1 ;; + *) usage ;; + esac +done +shift $(( OPTIND - 1 )) +# Don't allow uninitialized variables if requested +[[ -n "$set_u" ]] && set -u +unset opt set_u + +# Used to call _cleanup on shell exit +trap _exit EXIT + + + +plan_no_plan(){ + (( _plan_set != 0 )) && "You tried to plan twice!" + + _plan_set=1 + _no_plan=1 + + return 0 +} + + +plan_skip_all(){ + local reason=${1:-''} + + (( _plan_set != 0 )) && _die "You tried to plan twice!" + + _print_plan 0 "Skip $reason" + + _skip_all=1 + _plan_set=1 + _exit 0 + + return 0 +} + + +plan_tests(){ + local tests=${1:?} + + (( _plan_set != 0 )) && _die "You tried to plan twice!" + (( tests == 0 )) && _die "You said to run 0 tests! You've got to run something." + + _print_plan $tests + _expected_tests=$tests + _plan_set=1 + + return $tests +} + + +_print_plan(){ + local tests=${1:?} + local directive=${2:-''} + + echo -n "1..$tests" + [[ -n "$directive" ]] && echo -n " # $directive" + echo +} + + +pass(){ + local name=$1 + ok 0 "$name" +} + + +fail(){ + local name=$1 + ok 1 "$name" +} + + +# This is the workhorse method that actually +# prints the tests result. +ok(){ + local result=${1:?} + local name=${2:-''} + + (( _plan_set == 0 )) && _die "You tried to run a test without a plan! Gotta have a plan." + + _executed_tests=$(( $_executed_tests + 1 )) + + if [[ -n "$name" ]] ; then + if _matches "$name" "^[0-9]+$" ; then + diag " You named your test '$name'. You shouldn't use numbers for your test names." + diag " Very confusing." + fi + fi + + if (( result != 0 )) ; then + echo -n "not " + _failed_tests=$(( _failed_tests + 1 )) + fi + echo -n "ok $_executed_tests" + + if [[ -n "$name" ]] ; then + local ename=${name//\#/\\#} + echo -n " - $ename" + fi + + if [[ -n "$TODO" ]] ; then + echo -n " # TODO $TODO" ; + if (( result != 0 )) ; then + _failed_tests=$(( _failed_tests - 1 )) + fi + fi + + echo + if (( result != 0 )) ; then + local file='tap-functions' + local func= + local line= + + local i=0 + local bt=$(caller $i) + while _matches "$bt" "tap-functions$" ; do + i=$(( $i + 1 )) + bt=$(caller $i) + done + local backtrace= + eval $(caller $i | (read line func file ; echo "backtrace=\"$file:$func() at line $line.\"")) + + local t= + [[ -n "$TODO" ]] && t="(TODO) " + + if [[ -n "$name" ]] ; then + diag " Failed ${t}test '$name'" + diag " in $backtrace" + else + diag " Failed ${t}test in $backtrace" + fi + fi + + return $result +} + + +okx(){ + local command="$@" + + local line= + diag "Output of '$command':" + $command | while read line ; do + diag "$line" + done + ok ${PIPESTATUS[0]} "$command" +} + + +_equals(){ + local result=${1:?} + local expected=${2:?} + + if [[ "$result" == "$expected" ]] ; then + return 0 + else + return 1 + fi +} + + +# Thanks to Aaron Kangas for the patch to allow regexp matching +# under bash < 3. + _bash_major_version=${BASH_VERSION%%.*} +_matches(){ + local result=${1:?} + local pattern=${2:?} + + if [[ -z "$result" || -z "$pattern" ]] ; then + return 1 + else + if (( _bash_major_version >= 3 )) ; then + eval '[[ "$result" =~ "$pattern" ]]' + else + echo "$result" | egrep -q "$pattern" + fi + fi +} + + +_is_diag(){ + local result=${1:?} + local expected=${2:?} + + diag " got: '$result'" + diag " expected: '$expected'" +} + + +is(){ + local result=${1:?} + local expected=${2:?} + local name=${3:-''} + + _equals "$result" "$expected" + (( $? == 0 )) + ok $? "$name" + local r=$? + (( r != 0 )) && _is_diag "$result" "$expected" + return $r +} + + +isnt(){ + local result=${1:?} + local expected=${2:?} + local name=${3:-''} + + _equals "$result" "$expected" + (( $? != 0 )) + ok $? "$name" + local r=$? + (( r != 0 )) && _is_diag "$result" "$expected" + return $r +} + + +like(){ + local result=${1:?} + local pattern=${2:?} + local name=${3:-''} + + _matches "$result" "$pattern" + (( $? == 0 )) + ok $? "$name" + local r=$? + (( r != 0 )) && diag " '$result' doesn't match '$pattern'" + return $r +} + + +unlike(){ + local result=${1:?} + local pattern=${2:?} + local name=${3:-''} + + _matches "$result" "$pattern" + (( $? != 0 )) + ok $? "$name" + local r=$? + (( r != 0 )) && diag " '$result' matches '$pattern'" + return $r +} + + +skip(){ + local condition=${1:?} + local reason=${2:-''} + local n=${3:-1} + + if (( condition == 0 )) ; then + local i= + for (( i=0 ; i<$n ; i++ )) ; do + _executed_tests=$(( _executed_tests + 1 )) + echo "ok $_executed_tests # skip: $reason" + done + return 0 + else + return + fi +} + + +diag(){ + local msg=${1:?} + + if [[ -n "$msg" ]] ; then + echo "# $msg" + fi + + return 1 +} + + +_die(){ + local reason=${1:-'<unspecified error>'} + + echo "$reason" >&2 + _test_died=1 + _exit 255 +} + + +BAIL_OUT(){ + local reason=${1:-''} + + echo "Bail out! $reason" >&2 + _exit 255 +} + + +_cleanup(){ + local rc=0 + + if (( _plan_set == 0 )) ; then + diag "Looks like your test died before it could output anything." + return $rc + fi + + if (( _test_died != 0 )) ; then + diag "Looks like your test died just after $_executed_tests." + return $rc + fi + + if (( _skip_all == 0 && _no_plan != 0 )) ; then + _print_plan $_executed_tests + fi + + local s= + if (( _no_plan == 0 && _expected_tests < _executed_tests )) ; then + s= ; (( _expected_tests > 1 )) && s=s + local extra=$(( _executed_tests - _expected_tests )) + diag "Looks like you planned $_expected_tests test$s but ran $extra extra." + rc=-1 ; + fi + + if (( _no_plan == 0 && _expected_tests > _executed_tests )) ; then + s= ; (( _expected_tests > 1 )) && s=s + diag "Looks like you planned $_expected_tests test$s but only ran $_executed_tests." + fi + + if (( _failed_tests > 0 )) ; then + s= ; (( _failed_tests > 1 )) && s=s + diag "Looks like you failed $_failed_tests test$s of $_executed_tests." + fi + + return $rc +} + + +_exit_status(){ + if (( _no_plan != 0 || _plan_set == 0 )) ; then + return $_failed_tests + fi + + if (( _expected_tests < _executed_tests )) ; then + return $(( _executed_tests - _expected_tests )) + fi + + return $(( _failed_tests + ( _expected_tests - _executed_tests ))) +} + + +_exit(){ + local rc=${1:-''} + if [[ -z "$rc" ]] ; then + _exit_status + rc=$? + fi + + _cleanup + local alt_rc=$? + (( alt_rc != 0 )) && rc=$alt_rc + trap - EXIT + exit $rc +} + diff --git a/argparse/test.sh b/argparse/test.sh new file mode 100755 index 0000000000000000000000000000000000000000..192357d3cc43947593b1db50f2ff46b3092340e6 --- /dev/null +++ b/argparse/test.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +. tap-functions +plan_no_plan + +is "$(./test_argparse -f --path=/path/to/file a 2>&1)" 'force: 1 +path: /path/to/file +argc: 1 +argv[0]: a' + +is "$(./test_argparse -f -f --force --no-force 2>&1)" 'force: 2' + +is "$(./test_argparse -i 2>&1)" 'error: option `-i` requires a value' + +is "$(./test_argparse -i 2 2>&1)" 'int_num: 2' + +is "$(./test_argparse -i2 2>&1)" 'int_num: 2' + +is "$(./test_argparse -ia 2>&1)" 'error: option `-i` expects an integer value' + +is "$(./test_argparse -i 0xFFFFFFFFFFFFFFFFF 2>&1)" \ + 'error: option `-i` Numerical result out of range' + +is "$(./test_argparse -s 2.4 2>&1)" 'flt_num: 2.4' + +is "$(./test_argparse -s2.4 2>&1)" 'flt_num: 2.4' + +is "$(./test_argparse -sa 2>&1)" 'error: option `-s` expects a numerical value' + +is "$(./test_argparse -s 1e999 2>&1)" \ + 'error: option `-s` Numerical result out of range' + +is "$(./test_argparse -f -- do -f -h 2>&1)" 'force: 1 +argc: 3 +argv[0]: do +argv[1]: -f +argv[2]: -h' + +is "$(./test_argparse -tf 2>&1)" 'force: 1 +test: 1' + +is "$(./test_argparse --read --write 2>&1)" 'perms: 3' + +is "$(./test_argparse -h)" 'Usage: test_argparse [options] [[--] args] + or: test_argparse [options] + +A brief description of what the program does and how it works. + + -h, --help show this help message and exit + +Basic options + -f, --force force to do + -t, --test test only + -p, --path=<str> path to read + -i, --int=<int> selected integer + -s, --float=<flt> selected float + +Bits options + --read read perm + --write write perm + --exec exec perm + +Additional description of the program after the description of the arguments.' diff --git a/argparse/test_argparse.c b/argparse/test_argparse.c new file mode 100644 index 0000000000000000000000000000000000000000..5f411833aafa603d085258f11b8bbb35ff1c6d39 --- /dev/null +++ b/argparse/test_argparse.c @@ -0,0 +1,80 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "argparse.h" + +static const char *const usages[] = { + "test_argparse [options] [[--] args]", + "test_argparse [options]", + NULL, +}; + +#define PERM_READ (1 << 0) +#define PERM_WRITE (1 << 1) +#define PERM_EXEC (1 << 2) + +struct stuff { + const char *path[10]; + int npath; +}; + +static int callback(struct argparse *self, const struct argparse_option *opt) { + printf("Called back... %s\n", *(char **)opt->value); + struct stuff *data = (struct stuff *)opt->data; + data->path[data->npath] = *(char **)opt->value; + data->npath++; + return 1; +} + +int main(int argc, const char **argv) { + int force = 0; + int self_gravity = 0; + int int_num = 0; + float flt_num = 0.f; + struct stuff data; + data.npath = 0; + data.path[0] = NULL; + const char *buffer; + int perms = 0; + int npath; + + struct argparse_option options[] = { + OPT_HELP(), + OPT_GROUP("Basic options"), + OPT_BOOLEAN('f', "force", &force, "force to do", NULL, 0, 0), + OPT_BOOLEAN(0, "self-gravity", &self_gravity, "use self gravity", NULL, 0, + 0), + OPT_STRING('P', "path", &buffer, "path to read", &callback, + (intptr_t)&data, 0), + OPT_INTEGER('i', "int", &int_num, "selected integer", NULL, 0, 0), + OPT_FLOAT('s', "float", &flt_num, "selected float", NULL, 0, 0), + OPT_END(), + }; + + struct argparse argparse; + argparse_init(&argparse, options, usages, 0); + argparse_describe( + &argparse, + "\nA brief description of what the program does and how it works.", + "\nAdditional description of the program after the description of the " + "arguments."); + argc = argparse_parse(&argparse, argc, argv); + if (force != 0) printf("force: %d\n", force); + if (self_gravity != 0) printf("self_gravity: %d\n", self_gravity); + if (data.npath > 0) { + for (int i = 0; i < data.npath; i++) printf("path: %s\n", data.path[i]); + } + if (int_num != 0) printf("int_num: %d\n", int_num); + if (flt_num != 0) printf("flt_num: %g\n", flt_num); + if (argc != 0) { + printf("argc: %d\n", argc); + int i; + for (i = 0; i < argc; i++) { + printf("argv[%d]: %s\n", i, *(argv + i)); + } + } + if (perms) { + printf("perms: %d\n", perms); + } + return 0; +} diff --git a/configure.ac b/configure.ac index 56f88116c845def33300b87630524f06fec666bb..ca1aeb626b30d3170f71420e9700673f9597e07e 100644 --- a/configure.ac +++ b/configure.ac @@ -16,7 +16,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Init the project. -AC_INIT([SWIFT],[0.7.0],[https://gitlab.cosma.dur.ac.uk/swift/swiftsim]) +AC_INIT([SWIFT],[0.8.0],[https://gitlab.cosma.dur.ac.uk/swift/swiftsim]) swift_config_flags="$*" AC_COPYRIGHT @@ -54,6 +54,19 @@ AX_COMPILER_VERSION # Restrict support. AC_C_RESTRICT +# logger +AC_ARG_ENABLE([logger], + [AS_HELP_STRING([--enable-logger], + [enable the particle logger] + )], + [with_logger="${enableval}"], + [with_logger="no"] +) + +if test "$with_logger" = "yes"; then + AC_DEFINE([WITH_LOGGER], 1, [logger enabled]) +fi + # Interprocedural optimization support. Needs special handling for linking and # archiving as well as compilation with Intels, needs to be done before # libtool is configured (to use correct LD). @@ -270,6 +283,18 @@ elif test "$gravity_force_checks" != "no"; then AC_DEFINE_UNQUOTED([SWIFT_GRAVITY_FORCE_CHECKS], [$enableval] ,[Enable gravity brute-force checks]) fi +# Check whether we want to switch on glass making +AC_ARG_ENABLE([glass-making], + [AS_HELP_STRING([--enable-glass-making], + [Activate the glass-making procedure by reversing the sign of gravity @<:@yes/no@:>@] + )], + [gravity_glass_making="$enableval"], + [gravity_glass_making="no"] +) +if test "$gravity_glass_making" == "yes"; then + AC_DEFINE([SWIFT_MAKE_GRAVITY_GLASS], 1, [Make the code run in a way to produce a glass file for gravity/cosmology]) +fi + # Check if we want to zero the gravity forces for all particles below some ID. AC_ARG_ENABLE([no-gravity-below-id], [AS_HELP_STRING([--enable-no-gravity-below-id], @@ -310,6 +335,16 @@ AC_ARG_ENABLE([vec], [enable_vec="yes"] ) +# Disable hand written vectorisation. Slightly odd implementation as want +# to describe as --disable-hand-vec, but macro is enable (there is no enable action). +AC_ARG_ENABLE([hand-vec], + [AS_HELP_STRING([--disable-hand-vec], + [Disable intrinsic vectorization] + )], + [enable_hand_vec="$enableval"], + [enable_hand_vec="yes"] +) + HAVEVECTORIZATION=0 if test "$enable_opt" = "yes" ; then @@ -333,7 +368,6 @@ if test "$enable_opt" = "yes" ; then fi fi - if test "$enable_vec" = "no"; then if test "$ax_cv_c_compiler_vendor" = "intel"; then CFLAGS="$CFLAGS -no-vec -no-simd" @@ -347,8 +381,8 @@ if test "$enable_opt" = "yes" ; then else AC_MSG_WARN([Do not know how to disable vectorization for this compiler]) fi - else - AC_DEFINE([WITH_VECTORIZATION],1,[Enable vectorization]) + elif test "$enable_hand_vec" = "yes"; then + AC_DEFINE([WITH_VECTORIZATION],1,[Enable hand-written vectorization]) HAVEVECTORIZATION=1 fi fi @@ -484,18 +518,20 @@ AC_CHECK_LIB(pthread, posix_fallocate, AC_DEFINE([HAVE_POSIX_FALLOCATE], [1], [The posix library implements file allocation functions.]), AC_MSG_WARN(POSIX implementation does not have file allocation functions.)) -# Check for METIS. Note AX_LIB_METIS exists, but cannot be configured -# to be default off (i.e. given no option it tries to locate METIS), so we -# don't use that. +# Check for METIS. have_metis="no" AC_ARG_WITH([metis], [AS_HELP_STRING([--with-metis=PATH], - [root directory where metis is installed @<:@yes/no@:>@] + [root directory where METIS is installed @<:@yes/no@:>@] )], [with_metis="$withval"], [with_metis="no"] ) + +METIS_LIBS="" if test "x$with_metis" != "xno"; then + +# Check if we have METIS. if test "x$with_metis" != "xyes" -a "x$with_metis" != "x"; then METIS_LIBS="-L$with_metis/lib -lmetis" METIS_INCS="-I$with_metis/include" @@ -503,15 +539,67 @@ if test "x$with_metis" != "xno"; then METIS_LIBS="-lmetis" METIS_INCS="" fi - have_metis="yes" - AC_CHECK_LIB([metis],[METIS_PartGraphKway], - AC_DEFINE([HAVE_METIS],1,[The metis library appears to be present.]), - AC_MSG_ERROR(something is wrong with the metis library!),$METIS_LIBS) + AC_CHECK_LIB([metis],[METIS_PartGraphKway], [have_metis="yes"], + [have_metis="no"], $METIS_LIBS) + if test "$have_metis" == "yes"; then + AC_DEFINE([HAVE_METIS],1,[The METIS library is present.]) + else + AC_MSG_ERROR("Failed to find a METIS library") + fi fi + AC_SUBST([METIS_LIBS]) AC_SUBST([METIS_INCS]) AM_CONDITIONAL([HAVEMETIS],[test -n "$METIS_LIBS"]) +# Check for ParMETIS note we can have both as ParMETIS uses METIS. +have_parmetis="no" +AC_ARG_WITH([parmetis], + [AS_HELP_STRING([--with-parmetis=PATH], + [root directory where ParMETIS is installed @<:@yes/no@:>@] + )], + [with_parmetis="$withval"], + [with_parmetis="no"] +) + +if test "x$with_parmetis" != "xno"; then + +# Check if we have ParMETIS. + if test "x$with_parmetis" != "xyes" -a "x$with_parmetis" != "x"; then + PARMETIS_LIBS="-L$with_parmetis/lib -lparmetis" + PARMETIS_INCS="-I$with_parmetis/include" + else + PARMETIS_LIBS="-lparmetis" + PARMETIS_INCS="" + fi + AC_CHECK_LIB([parmetis],[ParMETIS_V3_RefineKway], [have_parmetis="yes"], + [have_parmetis="no"], $PARMETIS_LIBS) + if test "$have_parmetis" == "no"; then + +# A build may use an external METIS library, check for that. + + if test "x$with_parmetis" != "xyes" -a "x$with_parmetis" != "x"; then + PARMETIS_LIBS="-L$with_parmetis/lib -lparmetis -lmetis" + PARMETIS_INCS="-I$with_parmetis/include" + else + PARMETIS_LIBS="-lparmetis -lmetis" + PARMETIS_INCS="" + fi + AC_CHECK_LIB([parmetis],[ParMETIS_V3_RefineKway], [have_parmetis="yes"], + [have_parmetis="no"], [$METIS_LIBS $PARMETIS_LIBS]) + + fi + if test "$have_parmetis" == "yes"; then + AC_DEFINE([HAVE_PARMETIS],1,[The ParMETIS library is present.]) + else + AC_MSG_ERROR("Failed to find a ParMETIS library") + fi +fi + +AC_SUBST([PARMETIS_LIBS]) +AC_SUBST([PARMETIS_INCS]) +AM_CONDITIONAL([HAVEPARMETIS],[test -n "$PARMETIS_LIBS"]) + # METIS fixed width integer printing can require this, so define. Only needed # for some non C99 compilers, i.e. C++ pre C++11. AH_VERBATIM([__STDC_FORMAT_MACROS], @@ -522,7 +610,8 @@ AH_VERBATIM([__STDC_FORMAT_MACROS], # Check for FFTW. We test for this in the standard directories by default, # and only disable if using --with-fftw=no or --without-fftw. When a value -# is given GSL must be found. +# is given FFTW must be found. +# If FFTW is found, we check whether this is the threaded version. have_fftw="no" AC_ARG_WITH([fftw], [AS_HELP_STRING([--with-fftw=PATH], @@ -532,6 +621,8 @@ AC_ARG_WITH([fftw], [with_fftw="test"] ) if test "x$with_fftw" != "xno"; then + + # Was FFTW's location specifically given? if test "x$with_fftw" != "xyes" -a "x$with_fftw" != "xtest" -a "x$with_fftw" != "x"; then FFTW_LIBS="-L$with_fftw/lib -lfftw3" FFTW_INCS="-I$with_fftw/include" @@ -539,22 +630,51 @@ if test "x$with_fftw" != "xno"; then FFTW_LIBS="-lfftw3" FFTW_INCS="" fi + # FFTW is not specified, so just check if we have it. if test "x$with_fftw" = "xtest"; then AC_CHECK_LIB([fftw3],[fftw_malloc],[have_fftw="yes"],[have_fftw="no"],$FFTW_LIBS) if test "x$have_fftw" != "xno"; then AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.]) fi + # FFTW was specified, check that it was a valid location. else AC_CHECK_LIB([fftw3],[fftw_malloc], AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.]), AC_MSG_ERROR(something is wrong with the FFTW library!), $FFTW_LIBS) have_fftw="yes" fi + + # FFTW was requested not to be used. if test "$have_fftw" = "no"; then FFTW_LIBS="" FFTW_INCS="" fi + + # Now, check whether we have the threaded version of FFTW + if test "x$have_fftw" = "xyes"; then + + # Was FFTW's location specifically given? + if test "x$with_fftw" != "xyes" -a "x$with_fftw" != "xtest" -a "x$with_fftw" != "x"; then + FFTW_THREADED_LIBS="-L$with_fftw/lib -lfftw3_threads -lfftw3" + FFTW_THREADED_INCS="-I$with_fftw/include" + else + FFTW_THREADED_LIBS="-lfftw3_threads -lfftw3" + FFTW_THREADED_INCS="" + fi + + # Verify that the library is threaded + AC_CHECK_LIB([fftw3],[fftw_init_threads],[have_threaded_fftw="yes"], + [have_threaded_fftw="no"], $FFTW_THREADED_LIBS) + + # If found, update things + if test "x$have_threaded_fftw" = "xyes"; then + AC_DEFINE([HAVE_THREADED_FFTW],1,[The threaded FFTW library appears to be present.]) + FFTW_LIBS=$FFTW_THREADED_LIBS + FFTW_INCS=$FFTW_THREADED_INCS + have_fftw="yes - threaded" + fi + fi fi AC_SUBST([FFTW_LIBS]) AC_SUBST([FFTW_INCS]) @@ -576,7 +696,7 @@ if test "x$with_profiler" != "xno"; then proflibs="-lprofiler" fi AC_CHECK_LIB([profiler],[ProfilerFlush], - [have_profiler="yes" + [have_profiler="yes" AC_DEFINE([WITH_PROFILER],1,[Link against the gperftools profiling library.])], [have_profiler="no"], $proflibs) @@ -771,6 +891,43 @@ if test "$with_hdf5" = "yes"; then fi AM_CONDITIONAL([HAVEPARALLELHDF5],[test "$have_parallel_hdf5" = "yes"]) +# Check for grackle. +have_grackle="no" +AC_ARG_WITH([grackle], + [AS_HELP_STRING([--with-grackle=PATH], + [root directory where grackle is installed @<:@yes/no@:>@] + )], + [with_grackle="$withval"], + [with_grackle="no"] +) +if test "x$with_grackle" != "xno"; then + AC_PROG_FC + AC_FC_LIBRARY_LDFLAGS + if test "x$with_grackle" != "xyes" -a "x$with_grackle" != "x"; then + GRACKLE_LIBS="-L$with_grackle/lib -lgrackle" + GRACKLE_INCS="-I$with_grackle/include" + else + GRACKLE_LIBS="-lgrackle" + GRACKLE_INCS="" + fi + + have_grackle="yes" + + echo $GRACKLE_LIBS + + AC_CHECK_LIB( + [grackle], + [initialize_chemistry_data], + [AC_DEFINE([HAVE_GRACKLE],1,[The GRACKLE library appears to be present.]) + AC_DEFINE([CONFIG_BFLOAT_8],1,[Use doubles in grackle]) + ], + [AC_MSG_ERROR(Cannot find grackle library!)], + [$GRACKLE_LIBS]) +fi +AC_SUBST([GRACKLE_LIBS]) +AC_SUBST([GRACKLE_INCS]) +AM_CONDITIONAL([HAVEGRACKLE],[test -n "$GRACKLE_LIBS"]) + # Check for VELOCIraptor. have_velociraptor="no" AC_ARG_WITH([velociraptor], @@ -803,6 +960,22 @@ fi AC_SUBST([VELOCIRAPTOR_LIBS]) AM_CONDITIONAL([HAVEVELOCIRAPTOR],[test -n "$VELOCIRAPTOR_LIBS"]) +# Check for dummy VELOCIraptor. +AC_ARG_ENABLE([dummy-velociraptor], + [AS_HELP_STRING([--enable-dummy-velociraptor], + [Enable dummy velociraptor compilation @<:@yes/no@:>@] + )], + [enable_dummy_velociraptor="$enableval"], + [enable_dummy_velociraptor="no"] +) + +if test "$enable_dummy_velociraptor" = "yes"; then + have_velociraptor="yes" + + AC_DEFINE(HAVE_VELOCIRAPTOR,1,[The VELOCIraptor library appears to be present.]) + AC_DEFINE(HAVE_DUMMY_VELOCIRAPTOR,1,[The dummy VELOCIraptor library is present.]) +fi + # Check for floating-point execeptions AC_CHECK_FUNC(feenableexcept, AC_DEFINE([HAVE_FE_ENABLE_EXCEPT],[1], [Defined if the floating-point exception can be enabled using non-standard GNU functions.])) @@ -938,11 +1111,15 @@ case "$with_subgrid" in with_subgrid_cooling=grackle with_subgrid_chemistry=GEAR with_subgrid_hydro=gadget2 + with_subgrid_stars=GEAR + with_subgrid_feedback=thermal ;; EAGLE) with_subgrid_cooling=EAGLE with_subgrid_chemistry=EAGLE with_subgrid_hydro=gadget2 + with_subgrid_stars=none + with_subgrid_feedback=none ;; *) AC_MSG_ERROR([Unknown subgrid choice: $with_subgrid]) @@ -973,7 +1150,7 @@ esac # Hydro scheme. AC_ARG_WITH([hydro], [AS_HELP_STRING([--with-hydro=<scheme>], - [Hydro dynamics to use @<:@gadget2, minimal, pressure-entropy, pressure-energy, default, gizmo-mfv, gizmo-mfm, shadowfax, minimal-multi-mat, debug default: gadget2@:>@] + [Hydro dynamics to use @<:@gadget2, minimal, pressure-entropy, pressure-energy, pressure-energy-monaghan, default, gizmo-mfv, gizmo-mfm, shadowfax, planetary, debug default: gadget2@:>@] )], [with_hydro="$withval"], [with_hydro="gadget2"] @@ -1000,6 +1177,9 @@ case "$with_hydro" in pressure-energy) AC_DEFINE([HOPKINS_PU_SPH], [1], [Pressure-Energy SPH]) ;; + pressure-energy-monaghan) + AC_DEFINE([HOPKINS_PU_SPH_MONAGHAN], [1], [Pressure-Energy SPH with M&M Variable A.V.]) + ;; default) AC_DEFINE([DEFAULT_SPH], [1], [Default SPH]) ;; @@ -1012,15 +1192,35 @@ case "$with_hydro" in shadowfax) AC_DEFINE([SHADOWFAX_SPH], [1], [Shadowfax SPH]) ;; - minimal-multi-mat) - AC_DEFINE([MINIMAL_MULTI_MAT_SPH], [1], [Minimal Multiple Material SPH]) + planetary) + AC_DEFINE([PLANETARY_SPH], [1], [Planetary SPH]) ;; + *) AC_MSG_ERROR([Unknown hydrodynamics scheme: $with_hydro]) ;; esac +# Check if debugging interactions stars is switched on. +AC_ARG_ENABLE([debug-interactions-stars], + [AS_HELP_STRING([--enable-debug-interactions-stars], + [Activate interaction debugging for stars, logging a maximum of @<:@N@:>@ neighbours. Defaults to 256 if no value set.] + )], + [enable_debug_interactions_stars="$enableval"], + [enable_debug_interactions_stars="no"] +) +if test "$enable_debug_interactions_stars" != "no"; then + AC_DEFINE([DEBUG_INTERACTIONS_STARS],1,[Enable interaction debugging for stars]) + if test "$enable_debug_interactions_stars" == "yes"; then + AC_DEFINE([MAX_NUM_OF_NEIGHBOURS_STARS],256,[The maximum number of particle neighbours to be logged for stars]) + [enable_debug_interactions_stars="yes (Logging up to 256 neighbours)"] + else + AC_DEFINE_UNQUOTED([MAX_NUM_OF_NEIGHBOURS_STARS], [$enableval] ,[The maximum number of particle neighbours to be logged for stars]) + [enable_debug_interactions_stars="yes (Logging up to $enableval neighbours)"] + fi +fi + # Check if debugging interactions is switched on. AC_ARG_ENABLE([debug-interactions], [AS_HELP_STRING([--enable-debug-interactions], @@ -1044,6 +1244,7 @@ if test "$enable_debug_interactions" != "no"; then fi fi + # SPH Kernel function AC_ARG_WITH([kernel], [AS_HELP_STRING([--with-kernel=<kernel>], @@ -1173,43 +1374,6 @@ case "$with_riemann" in AC_MSG_ERROR([Unknown Riemann solver: $with_riemann]) ;; esac - -# Check for grackle. -have_grackle="no" -AC_ARG_WITH([grackle], - [AS_HELP_STRING([--with-grackle=PATH], - [root directory where grackle is installed @<:@yes/no@:>@] - )], - [with_grackle="$withval"], - [with_grackle="no"] -) -if test "x$with_grackle" != "xno"; then - AC_PROG_FC - AC_FC_LIBRARY_LDFLAGS - if test "x$with_grackle" != "xyes" -a "x$with_grackle" != "x"; then - GRACKLE_LIBS="-L$with_grackle/lib -lgrackle" - GRACKLE_INCS="-I$with_grackle/include" - else - GRACKLE_LIBS="-lgrackle" - GRACKLE_INCS="" - fi - - have_grackle="yes" - - AC_CHECK_LIB( - [grackle], - [initialize_chemistry_data], - [AC_DEFINE([HAVE_GRACKLE],1,[The GRACKLE library appears to be present.]) - AC_DEFINE([CONFIG_BFLOAT_8],1,[Use doubles in grackle]) - ], - [AC_MSG_ERROR(Cannot find grackle library!)], - [$GRACKLE_LIBS $GRACKLE_INCS $FCLIBS] - ) -fi -AC_SUBST([GRACKLE_LIBS]) -AC_SUBST([GRACKLE_INCS]) -AM_CONDITIONAL([HAVEGRACKLE],[test -n "$GRACKLE_LIBS"]) - # Cooling function AC_ARG_WITH([cooling], [AS_HELP_STRING([--with-cooling=<function>], @@ -1237,6 +1401,9 @@ case "$with_cooling" in const-lambda) AC_DEFINE([COOLING_CONST_LAMBDA], [1], [Const Lambda cooling function]) ;; + compton) + AC_DEFINE([COOLING_COMPTON], [1], [Compton cooling off the CMB]) + ;; grackle) AC_DEFINE([COOLING_GRACKLE], [1], [Cooling via the grackle library]) AC_DEFINE([COOLING_GRACKLE_MODE], [0], [Grackle chemistry network, mode 0]) @@ -1293,10 +1460,68 @@ case "$with_chemistry" in ;; esac +# Stellar model. +AC_ARG_WITH([stars], + [AS_HELP_STRING([--with-stars=<model>], + [Stellar model to use @<:@none, GEAR, debug default: none@:>@] + )], + [with_stars="$withval"], + [with_stars="none"] +) + +if test "$with_subgrid" != "none"; then + if test "$with_stars" != "none"; then + AC_MSG_ERROR([Cannot provide with-subgrid and with-stars together]) + else + with_stars="$with_subgrid_stars" + fi +fi + +case "$with_stars" in + GEAR) + AC_DEFINE([STARS_GEAR], [1], [GEAR stellar model]) + ;; + none) + ;; + + *) + AC_MSG_ERROR([Unknown stellar model: $with_stars]) + ;; +esac + +# Feedback model +AC_ARG_WITH([feedback], + [AS_HELP_STRING([--with-feedback=<model>], + [Feedback model to use @<:@none, thermal, debug default: none@:>@] + )], + [with_feedback="$withval"], + [with_feedback="none"] +) + +if test "$with_subgrid" != "none"; then + if test "$with_feedback" != "none"; then + AC_MSG_ERROR([Cannot provide with-subgrid and with-feedback together]) + else + with_feedback="$with_subgrid_feedback" + fi +fi + +case "$with_feedback" in + thermal) + AC_DEFINE([FEEDBACK_THERMAL], [1], [Thermal Blastwave]) + ;; + none) + ;; + + *) + AC_MSG_ERROR([Unknown feedback model: $with_feedback]) + ;; +esac + # External potential AC_ARG_WITH([ext-potential], [AS_HELP_STRING([--with-ext-potential=<pot>], - [external potential @<:@none, point-mass, point-mass-ring, point-mass-softened, isothermal, softened-isothermal, disc-patch, sine-wave, default: none@:>@] + [external potential @<:@none, point-mass, point-mass-ring, point-mass-softened, isothermal, softened-isothermal, nfw, hernquist, disc-patch, sine-wave, default: none@:>@] )], [with_potential="$withval"], [with_potential="none"] @@ -1311,6 +1536,12 @@ case "$with_potential" in isothermal) AC_DEFINE([EXTERNAL_POTENTIAL_ISOTHERMAL], [1], [Isothermal external potential]) ;; + hernquist) + AC_DEFINE([EXTERNAL_POTENTIAL_HERNQUIST], [1], [Hernquist external potential]) + ;; + nfw) + AC_DEFINE([EXTERNAL_POTENTIAL_NFW], [1], [Navarro-Frenk-White external potential]) + ;; disc-patch) AC_DEFINE([EXTERNAL_POTENTIAL_DISC_PATCH], [1], [Disc-patch external potential]) ;; @@ -1346,12 +1577,18 @@ AC_SUBST([GIT_CMD]) DX_INIT_DOXYGEN(libswift,doc/Doxyfile,doc/) AM_CONDITIONAL([HAVE_DOXYGEN], [test "$ac_cv_path_ac_pt_DX_DOXYGEN" != ""]) +# Check if using EAGLE cooling +AM_CONDITIONAL([HAVEEAGLECOOLING], [test $with_cooling = "EAGLE"]) + # Handle .in files. -AC_CONFIG_FILES([Makefile src/Makefile examples/Makefile doc/Makefile doc/Doxyfile tests/Makefile]) +AC_CONFIG_FILES([Makefile src/Makefile examples/Makefile examples/CoolingRates/Makefile doc/Makefile doc/Doxyfile tests/Makefile]) +AC_CONFIG_FILES([argparse/Makefile tools/Makefile]) AC_CONFIG_FILES([tests/testReading.sh], [chmod +x tests/testReading.sh]) AC_CONFIG_FILES([tests/testActivePair.sh], [chmod +x tests/testActivePair.sh]) AC_CONFIG_FILES([tests/test27cells.sh], [chmod +x tests/test27cells.sh]) AC_CONFIG_FILES([tests/test27cellsPerturbed.sh], [chmod +x tests/test27cellsPerturbed.sh]) +AC_CONFIG_FILES([tests/test27cellsStars.sh], [chmod +x tests/test27cellsStars.sh]) +AC_CONFIG_FILES([tests/test27cellsStarsPerturbed.sh], [chmod +x tests/test27cellsStarsPerturbed.sh]) AC_CONFIG_FILES([tests/test125cells.sh], [chmod +x tests/test125cells.sh]) AC_CONFIG_FILES([tests/test125cellsPerturbed.sh], [chmod +x tests/test125cellsPerturbed.sh]) AC_CONFIG_FILES([tests/testPeriodicBC.sh], [chmod +x tests/testPeriodicBC.sh]) @@ -1359,12 +1596,14 @@ AC_CONFIG_FILES([tests/testPeriodicBCPerturbed.sh], [chmod +x tests/testPeriodic AC_CONFIG_FILES([tests/testInteractions.sh], [chmod +x tests/testInteractions.sh]) AC_CONFIG_FILES([tests/testParser.sh], [chmod +x tests/testParser.sh]) AC_CONFIG_FILES([tests/testSelectOutput.sh], [chmod +x tests/testSelectOutput.sh]) +AC_CONFIG_FILES([tests/testFormat.sh], [chmod +x tests/testFormat.sh]) # Save the compilation options AC_DEFINE_UNQUOTED([SWIFT_CONFIG_FLAGS],["$swift_config_flags"],[Flags passed to configure]) -# Make sure the latest git revision string gets included -touch src/version.c +# Make sure the latest git revision string gets included, when we are +# working in a checked out repository. +test -d ${srcdir}/.git && touch ${srcdir}/src/version.c # Need to define this, instead of using fifth argument of AC_INIT, until # 2.64. Defer until now as this redefines PACKAGE_URL, which can emit a @@ -1387,7 +1626,7 @@ AC_MSG_RESULT([ MPI enabled : $enable_mpi HDF5 enabled : $with_hdf5 - parallel : $have_parallel_hdf5 - Metis enabled : $have_metis + METIS/ParMETIS : $have_metis / $have_parmetis FFTW3 enabled : $have_fftw GSL enabled : $have_gsl libNUMA enabled : $have_numa @@ -1396,6 +1635,7 @@ AC_MSG_RESULT([ CPU profiler : $have_profiler Pthread barriers : $have_pthread_barrier VELOCIraptor enabled : $have_velociraptor + Particle Logger : $with_logger Hydro scheme : $with_hydro Dimensionality : $with_dimension @@ -1407,18 +1647,22 @@ AC_MSG_RESULT([ Gravity scheme : $with_gravity Multipole order : $with_multipole_order No gravity below ID : $no_gravity_below_id + Make gravity glass : $gravity_glass_making External potential : $with_potential Cooling function : $with_cooling Chemistry : $with_chemistry - - Individual timers : $enable_timers - Task debugging : $enable_task_debugging - Threadpool debugging : $enable_threadpool_debugging - Debugging checks : $enable_debugging_checks - Interaction debugging : $enable_debug_interactions - Naive interactions : $enable_naive_interactions - Gravity checks : $gravity_force_checks - Custom icbrtf : $enable_custom_icbrtf + Stellar model : $with_stars + Feedback model : $with_feedback + + Individual timers : $enable_timers + Task debugging : $enable_task_debugging + Threadpool debugging : $enable_threadpool_debugging + Debugging checks : $enable_debugging_checks + Interaction debugging : $enable_debug_interactions + Stars interaction debugging : $enable_debug_interactions_stars + Naive interactions : $enable_naive_interactions + Gravity checks : $gravity_force_checks + Custom icbrtf : $enable_custom_icbrtf ------------------------]) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index cba52250ccc37f50ed130c70d8a5039d8c786474..d2dd87257ea7da2b78bfe0503870112b830ee22c 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -761,11 +761,15 @@ WARN_LOGFILE = INPUT = @top_srcdir@ @top_srcdir@/src @top_srcdir@/tests @top_srcdir@/examples INPUT += @top_srcdir@/src/hydro/Minimal +INPUT += @top_srcdir@/src/hydro/Gadget2 INPUT += @top_srcdir@/src/gravity/Default INPUT += @top_srcdir@/src/stars/Default INPUT += @top_srcdir@/src/riemann INPUT += @top_srcdir@/src/potential/point_mass INPUT += @top_srcdir@/src/equation_of_state/ideal_gas +INPUT += @top_srcdir@/src/cooling/const_du +INPUT += @top_srcdir@/src/cooling/const_lambda +INPUT += @top_srcdir@/src/cooling/Compton INPUT += @top_srcdir@/src/cooling/EAGLE INPUT += @top_srcdir@/src/chemistry/EAGLE diff --git a/doc/RTD/source/CommandLineOptions/index.rst b/doc/RTD/source/CommandLineOptions/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..9fb9d784d6057e4d9aa4a923143d622e577f142c --- /dev/null +++ b/doc/RTD/source/CommandLineOptions/index.rst @@ -0,0 +1,62 @@ +.. Command line options + Matthieu Schaller, 21st October 2018 + +.. _cmdline-options: + +Command line options +==================== + +SWIFT requires a number of runtime options to run and get any sensible output. +For instance, just running the ``swift`` binary will not use any SPH or gravity; +the particles will just sit still! + +Below is a list of the command line options and when they should be used. The same list +can be found by typing ``./swift -h``:: + + -h, --help show this help message and exit + + Simulation options: + -b, --feedback Run with stars feedback + -c, --cosmology Run with cosmological time integration. + -C, --cooling Run with cooling + -D, --drift-all Always drift all particles even the ones + far from active particles. This emulates + Gadget-[23] and GIZMO's default behaviours. + -F, --sourceterms + -g, --external-gravity Run with an external gravitational potential. + -G, --self-gravity Run with self-gravity. + -M, --multipole-reconstruction Reconstruct the multipoles every time-step. + -s, --hydro Run with hydrodynamics. + -S, --stars Run with stars + -x, --velociraptor Run with structure finding + + Control options: + -a, --pin Pin runners using processor affinity. + -d, --dry-run Dry run. Read the parameter file, allocates + memory but does not read the particles + from ICs. Exits before the start of time + integration. Checks the validity of + parameters and IC files as well as memory + limits. + -e, --fpe Enable floating-point exceptions (debugging + mode). + -f, --cpu-frequency=<str> Overwrite the CPU frequency (Hz) to be + used for time measurements. + -n, --steps=<int> Execute a fixed number of time steps. + When unset use the time_end parameter + to stop. + -o, --output-params=<str> Generate a default output parameter + file. + -P, --param=<str> Set parameter value, overiding the value + read from the parameter file. Can be used + more than once {sec:par:value}. + -r, --restart Continue using restart files. + -t, --threads=<int> The number of threads to use on each MPI + rank. Defaults to 1 if not specified. + -T, --timers=<int> Print timers every time-step. + -v, --verbose=<int> Run in verbose mode, in MPI mode 2 outputs + from all ranks. + -y, --task-dumps=<int> Time-step frequency at which task graphs + are dumped. + -Y, --threadpool-dumps=<int> Time-step frequency at which threadpool + tasks are dumped. diff --git a/doc/RTD/source/Cooling/index.rst b/doc/RTD/source/Cooling/index.rst index 46a01b2a054629b7fc13f0ea190c2a5a0fdd6d9c..00b84489c647bbca4948714a95b3d5cd5fb3cce1 100644 --- a/doc/RTD/source/Cooling/index.rst +++ b/doc/RTD/source/Cooling/index.rst @@ -31,7 +31,7 @@ cooling contains a temperature floor avoiding negative temperature. Grackle ~~~~~~~ -Grackle is a chemistry and cooling library presented in B. Smith et al. 2016 +Grackle is a chemistry and cooling library presented in `B. Smith et al. 2016 <https://arxiv.org/abs/1610.09591>`_ (do not forget to cite if used). Four different modes are available: equilibrium, 6 species network (H, H\\( ^+ \\), e\\( ^- \\), He, He\\( ^+ \\) and He\\( ^{++} \\)), 9 species network (adds H\\(^-\\), H\\(_2\\) and @@ -45,6 +45,19 @@ to provide an HDF5 table computed by Cloudy. When starting a simulation without providing the different fractions, the code supposes an equilibrium and computes the fractions automatically. +In order to compile SWIFT with Grackle, you need to provide the options ``with-grackle`` +and ``with-chemistry``. + +You will need a Grackle version later than 3.1. To compile it, run +the following commands from the root directory of Grackle: +``./configure; cd src/clib``. +Update the variables ``LOCAL_HDF5_INSTALL`` and ``MACH_INSTALL_PREFIX`` in +the file ``src/clib/Make.mach.linux-gnu``. +Finish with ``make machine-linux-gnu; make && make install``. +If you encounter any problem, you can look at the `Grackle documentation <https://grackle.readthedocs.io/en/latest/>`_ + +You can now provide the path given for ``MACH_INSTALL_PREFIX`` to ``with-grackle``. + Eagle ~~~~~ @@ -53,7 +66,7 @@ TODO How to Implement a New Cooling ------------------------------ -The developper should provide at least one function for: +The developer should provide at least one function for: * writing the cooling name in HDF5 * cooling a particle * the maximal time step possible diff --git a/doc/RTD/source/ExternalPotentials/index.rst b/doc/RTD/source/ExternalPotentials/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..ca33eb8189eea216863feb02579344aa22916696 --- /dev/null +++ b/doc/RTD/source/ExternalPotentials/index.rst @@ -0,0 +1,80 @@ +.. External potentials in SWIFT + Folkert Nobels, 25th October 2018 + +External Potentials +=================== + +SWIFT can be run with an external potential on this page we will summarize the +current potentials which can be run with SWIFT and how to implement your own +potential in SWIFT. + +Implemented External Potentials +------------------------------- + +Currently there are several potentials implemented in SWIFT. On this page we +give a short overview of the potentials that are implemented in the code: + +1. No potential (none) +2. Point mass potential (point-mass): classical point mass, can be placed at + a position with a mass. +3. Plummer potential (point-mass-softened): in the code a softened point mass + corresponds to a Plummer potential, can be placed at a position with a mass. +4. Isothermal potential (isothermal): An isothermal potential which corresponds + to a density profile which is :math:`\propto r^{-2}` and a potential which is + logarithmic. This potential has as free parameters the rotation velocity + and the position. +5. Hernquist potential (hernquist): A potential that is given by the Hernquist + potential: + + :math:`\Phi(r) = - \frac{GM}{r+a}.` + + The free parameters of Hernquist potential are mass, scale length, + and softening. The potential can be set at any position in the box. +6. NFW potential (nfw): The most used potential to describe dark matter halos, the + potential is given by: + + :math:`\Phi(r) = - \frac{4\pi G \rho_0 R_s^3}{r} \ln \left( 1+ + \frac{r}{R_s} \right).` + + This potential has as free parameters the concentration of the DM halo, the + virial mass (:math:`M_{200}`) and the critical density. +7. Sine wave (sine-wave) +8. Point mass ring (point-mass-ring) +9. Disc Patch (disc-patch) + + +How to implement your own potential +----------------------------------- + +The first step in implementing your own potential is making a directory of your +potential in the ``src/potential`` folder and creating a file in the folder +called ``potential.h``. + +Configuring the potential +^^^^^^^^^^^^^^^^^^^^^^^^^ + +To get started you can copy a ``potential.h`` file from an already implemented +potential. In this potential the header guards (e.g. ``#IFDEF <>``) need to be +changed to the specific potential and the ``struct`` and +``potential_init_backend`` need to be changed such that it uses your potential +and reads the correct potential from the parameter file during running the +program. + +Add the potential to the ``potential.h`` file in the ``src`` directory such that +the program knows that it is possible to run with this potential. + +Furthermore during the configuration of the code it also needs to be clear for +the program that the code can be configured to run with the different +potentials. This means that the ``configure.ac`` file needs to be changed. +This can be done to add an other case in the potential:: + + case "$with_potential" in + none) + AC_DEFINE([EXTERNAL_POTENTIAL_NONE], [1], [No external potential]) + ;; + newpotential) + AC_DEFINE([EXTERNAL_POTENTIAL_NEWPOTENTIAL], [1], [New external potential]) + ;; + +After this change it is possible to configure the code to use your new potential. + diff --git a/doc/RTD/source/GettingStarted/configuration_options.rst b/doc/RTD/source/GettingStarted/configuration_options.rst index e37384cfd1c29cb1df82cc180a763f4859650b2e..7dca5cddb0012b9a2146640b9373cfbe81c8dbdd 100644 --- a/doc/RTD/source/GettingStarted/configuration_options.rst +++ b/doc/RTD/source/GettingStarted/configuration_options.rst @@ -45,6 +45,6 @@ Several cooling implementations (including GRACKLE) are available. Many external potentials are available for use with SWIFT. You can choose between them at compile time. Some examples include a central potential, a softened central potential, and a sinusoidal potential. You will need to -configure, for example, the mass in your parameterfile at runtime. +configure, for example, the mass in your parameter file at runtime. diff --git a/doc/RTD/source/GettingStarted/index.rst b/doc/RTD/source/GettingStarted/index.rst index d15a8eee2f3b9089a1c8ee033f9aa3ee7ad92a5f..2086bcfb4af0ac1b7bbc24c34caa85fa1ebec498 100644 --- a/doc/RTD/source/GettingStarted/index.rst +++ b/doc/RTD/source/GettingStarted/index.rst @@ -20,6 +20,6 @@ and keep on your desk. running_example runtime_options configuration_options - parameter_file what_about_mpi running_on_large_systems + special_modes diff --git a/doc/RTD/source/GettingStarted/parameter_file.rst b/doc/RTD/source/GettingStarted/parameter_file.rst deleted file mode 100644 index 1bf4cbd3940a104c126aa256759edc24ca996338..0000000000000000000000000000000000000000 --- a/doc/RTD/source/GettingStarted/parameter_file.rst +++ /dev/null @@ -1,56 +0,0 @@ -.. Parameter File - Loic Hausammann, 1 june 2018 - -.. _Parameter_File_label: - -Parameter File -============== - -To run SWIFT, you will need to provide a ``yaml`` parameter file. An example is -given in ``examples/parameter_file.yml`` which should contain all possible -parameters. Each section in this file corresponds to a different option in -SWIFT and are not always required depending on the configuration options and -the run time parameters. - -Output List -~~~~~~~~~~~ - -In the sections ``Snapshots`` and ``Statistics``, you can specify the options ``output_list_on`` and ``output_list`` which receive an int and a filename. -The ``output_list_on`` enable or not the output list and ``output_list`` is the filename containing the output times. -With the file header, you can choose between writing redshifts, scale factors or times. - -Example of file containing with times (in internal units): -:: - # Time - 0.5 - 1.5 - 3.0 - 12.5 - -Example of file with scale factors: -:: - # Scale Factor - 0.1 - 0.2 - 0.3 - -Example of file with redshift: -:: - # Redshift - 20 - 15 - 10 - 5 - -Output Selection -~~~~~~~~~~~~~~~~ - -With SWIFT, you can select the particle fields to output in snapshot using the parameter file. -In section ``SelectOutput``, you can remove a field by adding a parameter formatted in the -following way ``field_parttype`` where ``field`` is the name of the field that you -want to remove (e.g. ``Masses``) and ``parttype`` is the type of particles that -contains this field (e.g. ``Gas``, ``DM`` or ``Star``). For a parameter, the only -values accepted are 0 (skip this field when writing) or 1 (default, do not skip -this field when writing). - -You can generate a ``yaml`` file containing all the possible fields with ``./swift -o output.yml``. By default, all the fields are written. diff --git a/doc/RTD/source/GettingStarted/running_example.rst b/doc/RTD/source/GettingStarted/running_example.rst index 854e74cf830d58e51cf866d59a93ede6dceb57b6..9dfbdd8c8ec98ea59892a551691aa5f230052e2e 100644 --- a/doc/RTD/source/GettingStarted/running_example.rst +++ b/doc/RTD/source/GettingStarted/running_example.rst @@ -14,19 +14,19 @@ as ``wget`` for grabbing the glass). cd examples/SodShock_3D ./getGlass.sh python makeIC.py - ../swift -s -t 4 sodShock.yml + ../swift --hydro --threads=4 sodShock.yml python plotSolution.py 1 This will run the 'SodShock' in 3D and produce a nice plot that shows you -how the density has varied. Try running with GIZMO (this will take +how the density has varied. Try running with GIZMO-MFV (this will take _significantly_ longer than with SPH) to see the difference. For that, you will need to reconfigure with the following options: .. code-block:: bash ./configure \ - --with-hydro=gizmo \ + --with-hydro=gizmo-mfv \ --with-riemann-solver=hllc diff --git a/doc/RTD/source/GettingStarted/running_on_large_systems.rst b/doc/RTD/source/GettingStarted/running_on_large_systems.rst index 55eb812cef21474045931490591b3978841a4085..8bd3a76985e9181bae1a715564e58d2052dd15fc 100644 --- a/doc/RTD/source/GettingStarted/running_on_large_systems.rst +++ b/doc/RTD/source/GettingStarted/running_on_large_systems.rst @@ -38,5 +38,5 @@ each with 2x16 core processors for a total of 512 cores): #SBATCH -N 16 # Number of nodes to run on #SBATCH --tasks-per-node=2 # This system has 2 chips per node - mpirun -np 32 swift_mpi -t 16 -a parameter.yml + mpirun -np 32 swift_mpi --threads=16 --pin parameter.yml diff --git a/doc/RTD/source/GettingStarted/runtime_options.rst b/doc/RTD/source/GettingStarted/runtime_options.rst index b2ca10640d8830b9b5ecb8e117bf047af738889c..fdd2c1233cc09cc3a46c8eb2e38efb10729a2950 100644 --- a/doc/RTD/source/GettingStarted/runtime_options.rst +++ b/doc/RTD/source/GettingStarted/runtime_options.rst @@ -8,34 +8,5 @@ SWIFT requires a number of runtime options to run and get any sensible output. For instance, just running the ``swift`` binary will not use any SPH or gravity; the particles will just sit still! -Below is a list of the runtime options and when they should be used. The same list -can be found by typing ``./swift -h``. +A list of available command line options can be found on the :ref:`cmdline-options` page. -+ ``-a``: Pin runners using processor affinity. -+ ``-c``: Run with cosmological time integration. -+ ``-C``: Run with cooling. -+ ``-d``: Dry run. Read the parameter file, allocate memory but does not read - the particles from ICs and exit before the start of time integration. Allows - user to check validity of parameter and IC files as well as memory limits. -+ ``-D``: Always drift all particles even the ones far from active particles. - This emulates Gadget-[23] and GIZMO's default behaviours. -+ ``-e``: Enable floating-point exceptions (debugging mode). -+ ``-f``: {int} Overwrite the CPU frequency (Hz) to be used for time measurements. -+ ``-g``: Run with an external gravitational potential. -+ ``-G``: Run with self-gravity. -+ ``-M``: Reconstruct the multipoles every time-step. -+ ``-n``: {int} Execute a fixed number of time steps. When unset use the - time_end parameter to stop. -+ ``-o``: {str} Generate a default output parameter file. -+ ``-P``: {sec:par:val} Set parameter value and overwrites values read from the - parameters file. Can be used more than once. -+ ``-s``: Run with hydrodynamics. -+ ``-S``: Run with stars. -+ ``-t``: {int} The number of threads to use on each MPI rank. Defaults to 1 if - not specified. -+ ``-T``: Print timers every time-step. -+ ``-v``: [12] Increase the level of verbosity: 1, MPI-rank 0 writes, 2, All - MPI-ranks write. -+ ``-y``: {int} Time-step frequency at which task graphs are dumped. -+ ``-Y``: {int} Time-step frequency at which threadpool tasks are dumped. -+ ``-h``: Print a help message and exit. diff --git a/doc/RTD/source/GettingStarted/special_modes.rst b/doc/RTD/source/GettingStarted/special_modes.rst new file mode 100644 index 0000000000000000000000000000000000000000..636fc5e5d2237a6eaf4a2f4811cd17fa3e7010f5 --- /dev/null +++ b/doc/RTD/source/GettingStarted/special_modes.rst @@ -0,0 +1,43 @@ +.. Special modes + Matthieu Schaller, 20/08/2018 + +Special modes +============= + +SWIFT comes with a few special modes of operating to perform additional tasks. + +Static particles +~~~~~~~~~~~~~~~~ + +For some test problems it is convenient to have a set of particles that do not +perceive any gravitational forces and just act as sources for the force +calculation. This can be achieved by configuring SWIFT with the option +``--enable-no-gravity-below-id=N``. This will zero the *accelerations* of all +particles with ``id`` (strictly) lower than ``N`` at every time-step. Note that +if these particles have an initial velocity they will keep moving at that +speed. + +This will also naturally set their time-step to the maximal value +(``TimeIntegration:dt_max``) set in the parameter file. + +A typical use-case for this feature is to study the evolution of one particle +orbiting a static halo made of particles. This can be used to assess the +quality of the gravity tree and time integration. As more particles are added +to the halo, the orbits will get closer to the analytic solution as the noise +in the sampling of the halo is reduced. + +Note also that this does not affect the hydrodynamic forces. This mode is +purely designed for gravity-only accuracy tests. + +Gravity glasses +~~~~~~~~~~~~~~~ + +For many problems in cosmology, it is important to start a simulation with no +initial noise in the particle distribution. Such a "glass" can be created by +starting from a random distribution of particles and running with the sign of +gravity reversed until all the particles reach a steady state. To run SWIFT in +this mode, configure the code with ``--enable-glass-making``. + +Note that this will *not* generate the initial random distribution of +particles. An initial condition file with particles still has to be provided. + diff --git a/doc/RTD/source/GettingStarted/what_about_mpi.rst b/doc/RTD/source/GettingStarted/what_about_mpi.rst index 098fd35d80d71866cb86d2342d5d54710cd73a82..98141049f3e36506d6033259e7f5bb9394daf997 100644 --- a/doc/RTD/source/GettingStarted/what_about_mpi.rst +++ b/doc/RTD/source/GettingStarted/what_about_mpi.rst @@ -9,4 +9,4 @@ and the other ``swift_mpi``. Current wisdom is to run ``swift`` if you are only using one node (i.e. without any interconnect), and one MPI rank per NUMA region using ``swift_mpi`` for anything larger. You will need some GADGET-2 HDF5 initial conditions to run SWIFT, as well as a compatible yaml -parameterfile. +parameter file. diff --git a/doc/RTD/source/HydroSchemes/adding_your_own.rst b/doc/RTD/source/HydroSchemes/adding_your_own.rst index 2d7e640f66153a17e19f4e4c456cd37eed19a95a..549a7a42a22e7f755ad342b86b24c28f67118838 100644 --- a/doc/RTD/source/HydroSchemes/adding_your_own.rst +++ b/doc/RTD/source/HydroSchemes/adding_your_own.rst @@ -13,7 +13,7 @@ Adding Hydro Schemes SWIFT is engineered to enable you to add your own hydrodynamics schemes easily. We enable this through the use of header files to encapsulate each scheme. -Note that it's unlikely you will ever have to consider paralellism or 'loops over +Note that it's unlikely you will ever have to consider parallelism or 'loops over neighbours' for SWIFT; all of this is handled by the tasking system. All we ask for is the interaction functions that tell us how to a) compute the density and b) compute forces. @@ -69,7 +69,7 @@ will need to 'fill out' the following: + ``hydro_compute_timestep(p, xp, hydro_props, cosmo)`` returns the timestep for the hydrodynamics particles. + ``hydro_timestep_extra(p, dt)`` does some extra hydro operations once the - physical timestel for the particle is known. + physical timestep for the particle is known. + ``hydro_init_part(p, hydro_space)`` initialises the particle in preparation for the density calculation. This essentially sets properties, such as the density, to zero. diff --git a/doc/RTD/source/HydroSchemes/gizmo.rst b/doc/RTD/source/HydroSchemes/gizmo.rst index 365e1dc41c27f7c92bfb33859bedad2d96f35248..bbfcae04e1abac57b1476e4533bf92e051e6769d 100644 --- a/doc/RTD/source/HydroSchemes/gizmo.rst +++ b/doc/RTD/source/HydroSchemes/gizmo.rst @@ -10,7 +10,7 @@ GIZMO-Like Scheme :caption: Contents: -There is a meshless finite volume (MFV) GIZMO-like scheme implemented in SWIFT +There is a mesh-less finite volume (MFV) GIZMO-like scheme implemented in SWIFT (see Hopkins 2015 for more information). You will need a Riemann solver to run this, and configure as follows: @@ -19,7 +19,7 @@ this, and configure as follows: ./configure --with-hydro="gizmo-mfv" --with-riemann-solver="hllc" -We also have the meshless finite mass (MFM) GIZMO-like scheme. You can select +We also have the mesh-less finite mass (MFM) GIZMO-like scheme. You can select this at compile-time with the following configuration flags: .. code-block:: bash diff --git a/doc/RTD/source/HydroSchemes/hopkins_sph.rst b/doc/RTD/source/HydroSchemes/hopkins_sph.rst index bcc51e0ad96b18956f1c8e54f7bf2bf3b352c138..e4f1479230df96eabaa1fe16994960059858613b 100644 --- a/doc/RTD/source/HydroSchemes/hopkins_sph.rst +++ b/doc/RTD/source/HydroSchemes/hopkins_sph.rst @@ -28,3 +28,9 @@ scheme it includes a Monaghan AV scheme and a Balsara switch. .. code-block:: bash ./configure --with-hydro="pressure-energy" + +Both of the above schemes use a very simple, fixed artificial viscosity, only +the ``SPH:viscosity_alpha`` parameter has any effect for this scheme. This will +change the strength of the artificial viscosity throughout the simulation, and +has a default of 0.8. + diff --git a/doc/RTD/source/HydroSchemes/minimal_sph.rst b/doc/RTD/source/HydroSchemes/minimal_sph.rst index 1a16a23360aaba8b28920150af0d4f4b05c74c2f..bbcbe026b56381c007f58920f31115f9f9160d05 100644 --- a/doc/RTD/source/HydroSchemes/minimal_sph.rst +++ b/doc/RTD/source/HydroSchemes/minimal_sph.rst @@ -10,11 +10,17 @@ Minimal (Density-Energy) SPH :caption: Contents: This scheme is a textbook implementation of Density-Energy SPH, and can be used -as a pedagogical example. It also implements a Monaghan AV scheme, like the -GADGET-2 scheme. It uses very similar equations, but differs in implementation -details; namely it tracks the internal energy \(u\) as the thermodynamic -variable, rather than entropy \(A\). To use the minimal scheme, use +as a pedagogical example. It also implements a Monaghan AV scheme with a +Balsara switch, like the GADGET-2 scheme. It uses very similar equations, but +differs in implementation details; namely it tracks the internal energy \(u\) +as the thermodynamic variable, rather than entropy \(A\). To use the minimal +scheme, use .. code-block:: bash ./configure --with-hydro="minimal" + +As it uses a very simple, fixed artificial viscosity, only the +``SPH:viscosity_alpha`` parameter has any effect for this scheme. This will +change the strength of the artificial viscosity throughout the simulation, +and has a default of 0.8. diff --git a/doc/RTD/source/HydroSchemes/traditional_sph.rst b/doc/RTD/source/HydroSchemes/traditional_sph.rst index c69ea5f60644119b8590414ffe00a75246de49a6..455e8bebe516bd9be9f6df889f1ead2088ca94d2 100644 --- a/doc/RTD/source/HydroSchemes/traditional_sph.rst +++ b/doc/RTD/source/HydroSchemes/traditional_sph.rst @@ -15,3 +15,8 @@ a Monaghan artificial viscosity scheme and Balsara switch. To use this hydro scheme, you need no extra configuration options -- it is the default! +As it uses a very simple, fixed artificial viscosity, only the +``SPH:viscosity_alpha`` parameter has any effect for this scheme. This will +change the strength of the artificial viscosity throughout the simulation, +and has a default of 0.8. + diff --git a/doc/RTD/source/InitialConditions/index.rst b/doc/RTD/source/InitialConditions/index.rst index eba438c722fbf4ffd78984aa55d6bfa5efcd71ad..e585c9aa55f269ebbbf9b2d83034b96a688a99f4 100644 --- a/doc/RTD/source/InitialConditions/index.rst +++ b/doc/RTD/source/InitialConditions/index.rst @@ -11,17 +11,21 @@ conditions format as the popular `GADGET-2 its type 3 format. Note that we do not support the GADGET-2 types 1 and 2 formats. +One crucial difference is that whilst GADGET-2 can have initial conditions split +over many files SWIFT only supports initial conditions in one single file. **ICs +split over multiple files cannot be read by SWIFT**. See the +":ref:`multiple_files_ICs`" section below for possible solutions. In GADGET-2 +having multiple files allows multiple ones to be read in parallel and is the +only way the code can handle more than 2^31 particles. This limitation is not in +place in SWIFT. A single file can contain any number of particles (well... up to +2^64...) and the file is read in parallel by HDF5 when running on more than one +compute node. + The original GADGET-2 file format only contains 2 types of particles: gas -particles and 5 sorts of collisionless particles that allow users to run with 5 +particles and 5 sorts of collision-less particles that allow users to run with 5 separate particle masses and softenings. In SWIFT, we expand on this by using two of these types for stars and black holes. -GADGET-2 can have initial conditions split over many files. This allow multiple -ones to be read in parallel and is the only way the code can handle more than -2^31 particles. This limitation is not in place in SWIFT. A single file can -contain any number of particles (well... up to 2^64...) and the file is read in -parallel by HDF5 when running on more than one compute node. - As the original documentation for the GADGET-2 initial conditions format is quite sparse, we lay out here all of the necessary components. If you are generating your initial conditions from python, we recommend you use the h5py @@ -35,7 +39,7 @@ You can find out more about the HDF5 format on their `webpages Structure of the File --------------------- -There are several groups that contain 'auxilliary' information, such as +There are several groups that contain 'auxiliary' information, such as ``Header``. Particle data is placed in separate groups depending of the type of the particles. Some types are currently ignored by SWIFT but are kept in the file format for compatibility reasons. @@ -98,7 +102,7 @@ In the ``/Header/`` group, the following attributes are required: ``NumPart_Total`` to be >2^31, the use of ``NumPart_Total_HighWord`` is only here for compatibility reasons. + ``Flag_Entropy_ICs``, a historical value that tells the code if you have - included entropy or internal energy values in your intial conditions files. + included entropy or internal energy values in your initial conditions files. Acceptable values are 0 or 1. We recommend using internal energies over entropy in the ICs and hence have this flag set to 0. @@ -113,19 +117,12 @@ GADGET-2 based analysis programs: exactly the same as the ``NumPart_Total`` array. As SWIFT only uses ICs contained in a single file, this is not necessary for SWIFT-only ICs. + ``NumFilesPerSnapshot``, again a historical integer value that tells the code - how many files there are per snapshot. You will probably want to set this to 1. + how many files there are per snapshot. You will probably want to set + this to 1. If this field is present in a SWIFT IC file and has a + value different from 1, the code will return an error message. + ``Time``, time of the start of the simulation in internal units or expressed as a scale-factor for cosmological runs. SWIFT ignores this and reads it from the parameter file. - -RuntimePars -~~~~~~~~~~~ - -In the ``/RuntimePars/``, the following attributes are required: - -+ ``PeriodicBoundaryConditionsOn``, a flag to tell the code whether or not you - have periodic boundaries switched on. Again, this is historical; it should be - set to 1 (default) if you have the code running in periodic mode, or 0 otherwise. Particle Data @@ -145,12 +142,12 @@ individual particle type (e.g. ``/PartType0/``) that have the following *dataset velocities divided by ``sqrt(a)`` (see below for a fix). + ``ParticleIDs``, an array of length N that are unique identifying numbers for each particle. Note that these have to be unique to a particle, and cannot be - the same even between particle types. The **IDs must be >1**. 0 or negative + the same even between particle types. The **IDs must be >= 0**. Negative IDs will be rejected by the code. + ``Masses``, an array of length N that gives the masses of the particles. For ``PartType0`` (i.e. particles that interact through hydro-dynamics), you will -need the following auxilliary items: +need the following auxiliary items: + ``SmoothingLength``, the smoothing lengths of the particles. These will be tidied up a bit, but it is best if you provide accurate numbers. In @@ -172,11 +169,13 @@ h-free quantities. Switching this parameter on will also affect the box size read from the ``/Header/`` group (see above). Similarly, GADGET cosmological ICs have traditionally used velocities expressed -as peculiar velocities divided by ``sqrt(a)``. This can be undone by swicthing +as peculiar velocities divided by ``sqrt(a)``. This can be undone by switching on the parameter ``InitialConditions:cleanup_velocity_factors`` in the :ref:`Parameter_File_label`. - + +.. _ICs_units_label: + Optional Components ------------------- @@ -214,8 +213,6 @@ You should have an HDF5 file with the following structure: Flag_Entropy_ICs=0 NumPart_Total=[0, 1, 0, 0, 4, 5] NumPart_Total_HighWord=[0, 0, 0, 0, 0, 0] - RuntimePars/ - PeriodicBoundariesOn=1 Units/ Unit current in cgs (U_I)=1.0 Unit length in cgs (U_L)=1.0 @@ -235,4 +232,27 @@ You should have an HDF5 file with the following structure: ParticleIDs=[...] Masses=[...] +.. _multiple_files_ICs: + +ICs split over multiple files +----------------------------- + +A basic script ``tools/combine_ics.py`` is provided to merge basic GADGET-2 +initial conditions split into multiple files into one single valid file. This +script can handle simple HDF5 files (GADGET-2 type 3 ICs) that follow the format +described above but split over multiple files. + +The script can also convert ICs using a ``MassTable`` and create the +corresponding particle fields. Note that additional fields present in ICs beyond +the simple GADGET-2 specification will not be merged. + +One additional option is to compress the fields in the files using HDF5's gzip +compression. This is very effective for the fields such as masses or particle +IDs which are very similar. A checksum filter is also applied in all cases to +help with data curation. + +**We caution that this script is very basic and should only be used with great +caution.** + + diff --git a/doc/RTD/source/NewOption/index.rst b/doc/RTD/source/NewOption/index.rst index a7445524017fefd99d76c80a4a1ecc646874bd7a..441cd860ed79dabad2005b39ae4549d1496ab98d 100644 --- a/doc/RTD/source/NewOption/index.rst +++ b/doc/RTD/source/NewOption/index.rst @@ -1,4 +1,4 @@ -.. Equation of State +.. Adding new schemes Loic Hausammann, 7th April 2018 .. _new_option: diff --git a/doc/RTD/source/ParameterFiles/index.rst b/doc/RTD/source/ParameterFiles/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..93cf9b6e86895f9f20c8d644d8d24ccab5df93d6 --- /dev/null +++ b/doc/RTD/source/ParameterFiles/index.rst @@ -0,0 +1,496 @@ +.. Parameter Files + Matthieu Schaller, 21st October 2018 + +.. _Parameter_File_label: + +Parameter Files +=============== + +File format and basic information +--------------------------------- + +The parameter file uses a format similar to the `YAML format +<https://en.wikipedia.org/wiki/YAML>`_ but reduced to only the +elements required for the SWIFT parameters. Options are given by a +name followed by a column and the value of the parameter: + +.. code:: YAML + + ICs: santa_barbara.hdf5 + dt_max: 1.5 + shift: [2., 4., 5.] + +Comments can be inserted anywhere and start with a hash: + +.. code:: YAML + + # Description of the physics + viscosity_alpha: 2.0 + dt_max: 1.5 # seconds + +A typical SWIFT parameter file is split into multiple sections that +may or may not be present depending on the different configuration +options. The sections start with a label and can contain any number of +parameters: + +.. code:: YAML + + Cosmology: # Planck13 + Omega_m: 0.307 + Omega_lambda: 0.693 + Omega_b: 0.0455 + h: 0.6777 + a_begin: 0.0078125 # z = 127 + +The options can be integer values, floating point numbers, characters +or strings. If SWIFT expects a number and string is given, an error +will be raised. The code can also read an array of values: + +.. code:: YAML + + shift: [2., 4., 5.] + +Some options in the parameter file are optional and +when not provided, SWIFT will run with the default value. However, if +a compulsory parameter is missing an error will be raised at +start-up. + +Finally, SWIFT outputs two YAML files at the start of a run. The first +one ``used_parameters.yml`` contains all the parameters that were used +for this run, **including all the optional parameters with their +default values**. This file can be used to start an exact copy of the +run. The second file, ``unused_parameters.yml`` contains all the +values that were not read from the parameter file. This can be used to +simplify the parameter file or check that nothing important was +ignored (for instance because the code is not configured to use some +options). + +The rest of this page describes all the SWIFT parameters, split by +section. A list of all the possible parameters is kept in the file +``examples/parameter_examples.yml``. + +Internal Unit System +-------------------- + +The ``InternalUnitSystem`` section describes the units used internally by the +code. This is the system of units in which all the equations are solved. All +physical constants are converted to this system and if the ICs use a different +system (see :ref:`ICs_units_label`) the particle quantities will be converted +when read in. + +The system of units is described using the value of the 5 basic units +of any system with respect to the CGS system. Instead of using a unit +of time we use a unit of velocity as this is more intuitive. Users +hence need to provide: + +* a unit of length: ``UnitLength_in_cgs``, +* a unit of mass: ``UnitMass_in_cgs``, +* a unit of velocity ``UnitVelocity_in_cgs``, +* a unit of electric current ``UnitCurrent_in_cgs``, +* a unit of temperature ``UnitTemp_in_cgs``. + +All these need to be expressed with respect to their cgs counter-part +(i.e. :math:`cm`, :math:`g`, :math:`cm/s`, :math:`A` and :math:`K`). Recall +that there are no h-factors in any of SWIFT's quantities; we, for instance, +use :math:`cm` and not :math:`cm/h`. + +For instance to use the commonly adopted system of 10^10 Msun as a +unit for mass, mega-parsec as a unit of length and km/s as a unit of +speed, we would use: + +.. code:: YAML + + # Common unit system for cosmo sims + InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun in grams + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc in centimeters + UnitVelocity_in_cgs: 1e5 # 1 km/s in centimeters per second + UnitCurrent_in_cgs: 1 # 1 Ampere + UnitTemp_in_cgs: 1 # 1 Kelvin + +Note that there are currently no variables in any of the SWIFT physics +schemes that make use of the unit of electric current. There is also +no incentive to use anything else than Kelvin but that makes the whole +system consistent with any possible unit system. + +If one is interested in using the more humorous `FFF unit +system <https://en.wikipedia.org/wiki/FFF_system>`_ one would use + +.. code:: YAML + + # FFF unit system + InternalUnitSystem: + UnitMass_in_cgs: 40823.3133 # 1 Firkin (fir) in grams + UnitLength_in_cgs: 20116.8 # 1 Furlong (fur) in cm + UnitVelocity_in_cgs: 0.01663095 # 1 Furlong (fur) per Fortnight (ftn) in cm/s + UnitCurrent_in_cgs: 1 # 1 Ampere + UnitTemp_in_cgs: 1 # 1 Kelvin + +The value of the physical constants in this system is left as an +exercise for the reader [#f1]_. + +Cosmology +--------- + +When running a cosmological simulation, the section ``Cosmology`` sets the values of the +cosmological model. The expanded :math:`\Lambda\rm{CDM}` parameters governing the +background evolution of the Universe need to be specified here. These are: + +* The reduced Hubble constant: :math:`h`: ``h``, +* The matter density parameter :math:`\Omega_m`: ``Omega_m``, +* The cosmological constant density parameter :math:`\Omega_\Lambda`: ``Omega_lambda``, +* The baryon density parameter :math:`\Omega_b`: ``Omega_b``, +* The radiation density parameter :math:`\Omega_r`: ``Omega_r``. + +The last parameter can be omitted and will default to :math:`\Omega_r = 0`. Note +that SWIFT will verify on start-up that the matter content of the initial conditions +matches the cosmology specified in this section. + +This section also specifies the start and end of the simulation expressed in +terms of scale-factors. The two parameters are: + +* Initial scale-factor: ``a_begin``, +* Final scale-factor: ``a_end``. + +Two additional optional parameters can be used to change the equation of +state of dark energy :math:`w(a)`. We use the evolution law :math:`w(a) = +w_0 + w_a (1 - a)`. The two parameters in the YAML file are: + +* The :math:`z=0` dark energy equation of state parameter :math:`w_0`: ``w_0`` +* The dark energy equation of state evolution parameter :math:`w_a`: ``w_a`` + +If unspecified these parameters default to the default +:math:`\Lambda\rm{CDM}` values of :math:`w_0 = -1` and :math:`w_a = 0`. + +For a Planck+13 cosmological model (ignoring radiation density as is +commonly done) and running from :math:`z=127` to :math:`z=0`, one would hence +use the following parameters: + +.. code:: YAML + + Cosmology: + a_begin: 0.0078125 # z = 127 + a_end: 1.0 # z = 0 + h: 0.6777 + Omega_m: 0.307 + Omega_lambda: 0.693 + Omega_b: 0.0455 + Omega_r: 0. # (Optional) + w_0: -1.0 # (Optional) + w_a: 0. # (Optional) + +When running a non-cosmological simulation (i.e. without the ``-c`` run-time +flag) this section of the YAML file is entirely ignored. + +Gravity +------- + +The behaviour of the self-gravity solver can be modified by the parameters +provided in the ``Gravity`` section. The theory document puts these parameters into the +context of the equations being solved. We give a brief overview here. + +* The Plummer-equivalent co-moving softening length used for all particles :math:`\epsilon_{com}`: ``comoving_softening``, +* The Plummer-equivalent maximal physical softening length used for all particles :math:`\epsilon_{max}`: ``comoving_softening``, + +At any redshift :math:`z`, the Plummer-equivalent softening length used by the +code will be :math:`\epsilon=\min(\epsilon_{max}, +\frac{\epsilon_{com}}{z+1})`. This is expressed in internal units. + +* The opening angle (multipole acceptance criterion) used in the FMM :math:`\theta`: ``theta``, +* The time-step size pre-factor :math:`\eta`: ``eta``, + +The time-step of a given particle is given by :math:`\Delta t = +\eta\sqrt{\frac{\epsilon}{|\overrightarrow{a}|}}`, where +:math:`\overrightarrow{a}` is the particle's acceleration. Power et al. (2003) recommend using :math:`\eta=0.025`. +The last tree-related parameter is + +* The tree rebuild frequency: ``rebuild_frequency``. + +The tree rebuild frequency is an optional parameter defaulting to +:math:`0.01`. It is used to trigger the re-construction of the tree every time a +fraction of the particles have been integrated (kicked) forward in time. + +Simulations using periodic boundary conditions use additional parameters for the +Particle-Mesh part of the calculation. The last three are optional: + +* The number cells along each axis of the mesh :math:`N`: ``mesh_side_length``, +* The mesh smoothing scale in units of the mesh cell-size :math:`a_{\rm + smooth}`: ``a_smooth`` (default: ``1.25``), +* The scale above which the short-range forces are assumed to be 0 (in units of + the mesh cell-size multiplied by :math:`a_{\rm smooth}`) :math:`r_{\rm + cut,max}`: ``r_cut_max`` (default: ``4.5``), +* The scale below which the short-range forces are assumed to be exactly Newtonian (in units of + the mesh cell-size multiplied by :math:`a_{\rm smooth}`) :math:`r_{\rm + cut,min}`: ``r_cut_min`` (default: ``0.1``), + +For most runs, the default values can be used. Only the number of cells along +each axis needs to be specified. The remaining three values are best described +in the context of the full set of equations in the theory documents. + +As a summary, here are the values used for the EAGLE :math:`100^3~{\rm Mpc}^3` +simulation: + +.. code:: YAML + + # Parameters for the self-gravity scheme for the EAGLE-100 box + Gravity: + eta: 0.025 + theta: 0.7 + comoving_softening: 0.0026994 # 0.7 proper kpc at z=2.8. + max_physical_softening: 0.0007 # 0.7 proper kpc + rebuild_frequency: 0.01 # Default optional value + mesh_side_length: 512 + a_smooth: 1.25 # Default optional value + r_cut_max: 4.5 # Default optional value + r_cut_min: 0.1 # Default optional value + + +SPH +--- + +Time Integration +---------------- + +The ``TimeIntegration`` section is used to set some general parameters related to time +integration. In all cases, users have to provide a minimal and maximal time-step +size: + +* Maximal time-step size: ``dt_max`` +* Minimal time-step size: ``dt_min`` + +These quantities are expressed in internal units. All particles will have their +time-step limited by the maximal value on top of all the other criteria that may +apply to them (gravity acceleration, Courant condition, etc.). If a particle +demands a time-step size smaller than the minimum, SWIFT will abort with an +error message. This is a safe-guard against simulations that would never +complete due to the number of steps to run being too large. + +When running a non-cosmological simulation, the user also has to provide the +time of the start and the time of the end of the simulation: + +* Start time: ``time_begin`` +* End time: ``time_end`` + +Both are expressed in internal units. The start time is typically set to ``0`` +but SWIFT can handle any value here. For cosmological runs, these values are +ignored and the start- and end-points of the runs are specified by the start and +end scale-factors in the cosmology section of the parameter file. + +Additionally, when running a cosmological volume, advanced users can specify the +value of the dimensionless pre-factor entering the time-step condition linked +with the motion of particles with respect to the background expansion and mesh +size. See the theory document for the exact equations. + +* Dimensionless pre-factor of the maximal allowed displacement: + ``max_dt_RMS_factor`` (default: ``0.25``) + +This value rarely needs altering. + +A full time-step section for a non-cosmological run would be: + +.. code:: YAML + + TimeIntegration: + time_begin: 0 # Start time in internal units. + time_end: 10. # End time in internal units. + dt_max: 1e-2 + dt_min: 1e-6 + +Whilst for a cosmological run, one would need: + +.. code:: YAML + + TimeIntegration: + dt_max: 1e-4 + dt_min: 1e-10 + max_dt_RMS_factor: 0.25 # Default optional value + +Initial Conditions +------------------ + +This ``InitialConditions`` section of the parameter file contains all the options related to +the initial conditions. The main two parameters are + +* The name of the initial conditions file: ``file_name``, +* Whether the problem uses periodic boundary conditions or not: ``periodic``. + +The file path is relative to where the code is being executed. These +parameters can be complemented by some optional values to drive some +specific behaviour of the code. + +* Whether to generate gas particles from the DM particles: ``generate_gas_in_ics`` (default: ``0``), +* Whether to activate an additional clean-up of the SPH smoothing lengths: ``cleanup_smoothing_lengths`` (default: ``0``) + +The procedure used to generate gas particles from the DM ones is +outlined in the theory documents and is too long for a full +description here. The cleaning of the smoothing lengths is an +expensive operation but can be necessary in the cases where the +initial conditions are of poor quality and the values of the smoothing +lengths are far from the values they should have. + +When starting from initial conditions created for Gadget, some +additional flags can be used to convert the values from h-full to +h-free and remove the additional :math:`\sqrt{a}` in the velocities: + +* Whether to re-scale all the fields to remove powers of h from the quantities: ``cleanup_h_factors`` (default: ``0``), +* Whether to re-scale the velocities to remove the :math:`\sqrt{a}` assumed by Gadget : ``cleanup_velocity_factors`` (default: ``0``). + +The h-factors are self-consistently removed according to their units +and this is applied to all the quantities irrespective of particle +types. The correct power of ``h`` is always calculated for each +quantity. + +Finally, SWIFT also offers these options: + +* A factor to re-scale all the smoothing-lengths by a fixed amount: ``smoothing_length_scaling`` (default: ``1.``), +* A shift to apply to all the particles: ``shift`` (default: ``[0.0,0.0,0.0]``), +* Whether to replicate the box along each axis: ``replicate`` (default: ``1``). + +The shift is expressed in internal units. The option to replicate the +box is especially useful for weak-scaling tests. When set to an +integer >1, the box size is multiplied by this integer along each axis +and the particles are duplicated and shifted such as to create exact +copies of the simulation volume. + +The full section to start a DM+hydro run from Gadget DM-only ICs would +be: + +.. code:: YAML + + InitialConditions: + file_name: my_ics.hdf5 + periodic: 1 + cleanup_h_factors: 1 + cleanup_velocity_factors: 1 + generate_gas_in_ics: 1 + cleanup_smoothing_lengths: 1 + + +Physical Constants +------------------ + +For some idealised test it can be useful to overwrite the value of +some physical constants; in particular the value of the gravitational +constant. SWIFT offers an optional parameter to overwrite the value of +:math:`G_N`. + +.. code:: YAML + + PhysicalConstants: + G: 1 + +Note that this set :math:`G` to the specified value in the internal system +of units. Setting a value of `1` when using the system of units (10^10 Msun, +Mpc, km/s) will mean that :math:`G_N=1` in these units [#f2]_ instead of the +normal value :math:`G_N=43.00927`. + +This option is only used for specific tests and debugging. This entire +section of the YAML file can typically be left out. More constants may +be handled in the same way in future versions. + +Snapshots +--------- + +Some additional specific options for the snapshot outputs are described in the +following pages: + +.. toctree:: + :maxdepth: 1 + + output_selection + +Statistics +---------- + +Restarts +-------- + +SWIFT can write check-pointing files and restart from them. The behaviour of +this mechanism is driven by the options in the ``Restarts`` section of the YAML +parameter file. All the parameters are optional but default to values that +ensure a reasonable behaviour. + +* Whether or not to enable the dump of restart files: ``enable`` (default: + ``1``). + +This parameter acts a master-switch for the check-pointing capabilities. All the +other options require the ``enable`` parameter to be set to ``1``. + +* Whether or not to save a copy of the previous set of check-pointing files: + ``save`` (default: ``1``), +* Whether or not to dump a set of restart file on regular exit: ``onexit`` + (default: ``0``), +* The wall-clock time in hours between two sets of restart files: + ``delta_hours`` (default: ``6.0``). + +Note that there is no buffer time added to the ``delta_hours`` value. If the +system's batch queue run time limit is set to 6 hours, the user must specify a +smaller value to allow for enough time to safely dump the check-point files. + +* The sub-directory in which to store the restart files: ``subdir`` (default: + ``restart``), +* The basename of the restart files: ``basename`` (default: ``swift``) + +If the directory does not exist, SWIFT will create it. When resuming a run, +SWIFT, will look for files with the name provided in the sub-directory specified +here. The files themselves are named ``basename_000001.rst`` where the basename +is replaced by the user-specified name and the 6-digits number corresponds to +the MPI-rank. SWIFT writes one file per MPI rank. If the ``save`` option has +been activated, the previous set of restart files will be named +``basename_000000.rst.prev``. + +SWIFT can also be stopped by creating an empty file called ``stop`` in the +directory where the code runs. This will make SWIFT dump a fresh set of restart +file (irrespective of the specified ``delta_time`` between dumps) and exit +cleanly. One parameter governs this behaviour: + +* Number of steps between two checks for the presence of a ``stop`` file: + ``stop_steps`` (default: ``100``). + +The default value is chosen such that SWIFT does not need to poll the +file-system to often, which can take a significant amount of time on distributed +systems. For runs where the small time-steps take a much larger amount of time, +a smaller value is recommended to allow for a finer control over when the code +can be stopped. + +Finally, SWIFT can automatically stop after a specified amount of wall-clock +time. The code can also run a command when exiting in this fashion, which can be +used, for instance, to interact with the batch queue system: + +* Maximal wall-clock run time in hours: ``max_run_time`` (default: ``24.0``), +* Whether or not to run a command on exit: ``resubmit_on_exit`` (default: + ``0``), +* The command to run on exit: ``resubmit_command`` (default: ``./resub.sh``). + +Note that no check is performed on the validity of the command to run. SWIFT +simply calls ``system()`` with the user-specified command. + +To run SWIFT, dumping check-pointing files every 6 hours and running for 24 +hours after which a shell command will be run, one would use: + +.. code:: YAML + + Restarts: + enable: 1 + save: 1 # Keep copies + onexit: 0 + subdir: restart # Sub-directory of the directory where SWIFT is run + basename: swift + delta_hours: 6.0 + stop_steps: 100 + max_run_time: 24.0 # In hours + resubmit_on_exit: 1 + resubmit_command: ./resub.sh + + + +Scheduler +--------- + +Domain Decomposition +-------------------- + +.. [#f1] The thorough reader (or overly keen SWIFT tester) would find that the speed of light is :math:`c=1.8026\times10^{12}\,\rm{fur}\,\rm{ftn}^{-1}`, Newton's constant becomes :math:`G_N=4.896735\times10^{-4}~\rm{fur}^3\,\rm{fir}^{-1}\,\rm{ftn}^{-2}` and Planck's constant turns into :math:`h=4.851453\times 10^{-34}~\rm{fur}^2\,\rm{fir}\,\rm{ftn}^{-1}`. + + +.. [#f2] which would translate into a constant :math:`G_N=1.5517771\times10^{-9}~cm^{3}\,g^{-1}\,s^{-2}` if expressed in the CGS system. diff --git a/doc/RTD/source/ParameterFiles/output_selection.rst b/doc/RTD/source/ParameterFiles/output_selection.rst new file mode 100644 index 0000000000000000000000000000000000000000..90ab0f9a7c738c28832bc36de83c4034141d4b21 --- /dev/null +++ b/doc/RTD/source/ParameterFiles/output_selection.rst @@ -0,0 +1,65 @@ +.. Parameter File + Loic Hausammann, 1 June 2018 + +.. _Output_list_label: + +Output List +~~~~~~~~~~~ + +In the sections ``Snapshots`` and ``Statistics``, you can specify the +options ``output_list_on`` and ``output_list`` which receive an int +and a filename. The ``output_list_on`` enable or not the output list +and ``output_list`` is the filename containing the output times. With +the file header, you can choose between writing redshifts, scale +factors or times. + +Example of file containing with times (in internal units):: + + # Time + 0.5 + 1.5 + 3.0 + 12.5 + +Example of file with scale factors:: + + # Scale Factor + 0.1 + 0.2 + 0.3 + +Example of file with redshift:: + + # Redshift + 20 + 15 + 10 + 5 + +.. _Output_selection_label: + +Output Selection +~~~~~~~~~~~~~~~~ + +With SWIFT, you can select the particle fields to output in snapshot +using the parameter file. In section ``SelectOutput``, you can remove +a field by adding a parameter formatted in the following way +``field_parttype`` where ``field`` is the name of the field that you +want to remove (e.g. ``Masses``) and ``parttype`` is the type of +particles that contains this field (e.g. ``Gas``, ``DM`` or ``Star``). +For a parameter, the only values accepted are 0 (skip this field when +writing) or 1 (default, do not skip this field when writing). By +default all fields are written. + +This field is mostly used to remove unnecessary output by listing them +with 0's. A classic use-case for this feature is a DM-only simulation +(pure n-body) where all particles have the same mass. Outputting the +mass field in the snapshots results in extra i/o time and unnecessary +waste of disk space. The corresponding section of the ``yaml`` +parameter file would look like this:: + + SelectOutput: + Masses_DM: 0 + +You can generate a ``yaml`` file containing all the possible fields +available for a given configuration of SWIFT by running ``./swift --output-params output.yml``. diff --git a/doc/RTD/source/Task/adding_your_own.rst b/doc/RTD/source/Task/adding_your_own.rst new file mode 100644 index 0000000000000000000000000000000000000000..6f6b37899b505a5bbf6a09d8757232e0b547a081 --- /dev/null +++ b/doc/RTD/source/Task/adding_your_own.rst @@ -0,0 +1,262 @@ +.. Task + Loic Hausammann 17th July 2018 + +.. _task_adding_your_own: +.. highlight:: c + +Adding a Task +============= + +First you will need to understand the dependencies between tasks +using the file ``dependency_graph.dot`` generated by swift at the beginning of any simulation and then decide where it will fit (see :ref:`task`). + +For the next paragraphs, let's assume that we want to implement the existing task ``cooling``. + +Adding it to the Task List +-------------------------- +First you will need to add it to the task list situated in ``task.h`` and ``task.c``. + +In ``task.h``, you need to provide an additional entry to the enum ``task_types`` (e.g. ``task_type_cooling``). +The last entry ``task_type_count`` should always stay at the end as it is a counter of the number of elements. +For example:: + + enum task_types { + task_type_none = 0, + task_type_sort, + task_type_self, + task_type_pair, + task_type_sub_self, + task_type_sub_pair, + task_type_ghost_in, + task_type_ghost, + task_type_ghost_out, + task_type_extra_ghost, + task_type_drift_part, + task_type_end_force, + task_type_kick1, + task_type_kick2, + task_type_timestep, + task_type_send, + task_type_recv, + task_type_cooling, + task_type_count + } __attribute__((packed)); + + +In ``task.c``, you will find an array containing the name of each task and need to add your own (e.g. ``cooling``). +Be careful with the order that should be the same than in the previous list. +For example:: + + /* Task type names. */ + const char *taskID_names[task_type_count] = { + "none", "sort", "self", "pair", "sub_self", + "sub_pair", "ghost_in", "ghost", "ghost_out", + "extra_ghost", "drift_part", "end_force", "kick1", + "kick2", "timestep", "send", "recv", + "cooling"}; + + +Adding it to the Cells +---------------------- + +Each cell contains a list to its tasks and therefore you need to provide a link for it. + +In ``cell.h``, add a pointer to a task in the structure. +In order to stay clean, please put the new task in the same group than the other tasks. +For example:: + + struct cell { + /* Lot of stuff before. */ + + /*! Task for the cooling */ + struct task *cooling; + + /*! The second kick task */ + struct task *kick2; + + /* Lot of stuff after */ + } + + +Adding a new Timer +------------------ + +As SWIFT is HPC oriented, any new task need to be optimized. +It cannot be done without timing the function. + +In ``timers.h``, you will find an enum that contains all the tasks. +You will need to add yours inside it. +For example:: + + enum { + timer_none = 0, + timer_prepare, + timer_init, + timer_drift_part, + timer_drift_gpart, + timer_kick1, + timer_kick2, + timer_timestep, + timer_endforce, + timer_dosort, + timer_doself_density, + timer_doself_gradient, + timer_doself_force, + timer_dopair_density, + timer_dopair_gradient, + timer_dopair_force, + timer_dosub_self_density, + timer_dosub_self_gradient, + timer_dosub_self_force, + timer_dosub_pair_density, + timer_dosub_pair_gradient, + timer_dosub_pair_force, + timer_doself_subset, + timer_dopair_subset, + timer_dopair_subset_naive, + timer_dosub_subset, + timer_do_ghost, + timer_do_extra_ghost, + timer_dorecv_part, + timer_do_cooling, + timer_gettask, + timer_qget, + timer_qsteal, + timer_locktree, + timer_runners, + timer_step, + timer_cooling, + timer_count, + }; + +As for ``task.h``, +you will need to give a name to your timer in ``timers.c``:: + + const char* timers_names[timer_count] = { + "none", + "prepare", + "init", + "drift_part", + "kick1", + "kick2", + "timestep", + "endforce", + "dosort", + "doself_density", + "doself_gradient", + "doself_force", + "dopair_density", + "dopair_gradient", + "dopair_force", + "dosub_self_density", + "dosub_self_gradient", + "dosub_self_force", + "dosub_pair_density", + "dosub_pair_gradient", + "dosub_pair_force", + "doself_subset", + "dopair_subset", + "dopair_subset_naive", + "dosub_subset", + "do_ghost", + "do_extra_ghost", + "dorecv_part", + "gettask", + "qget", + "qsteal", + "locktree", + "runners", + "step", + "cooling", + }; + + +You can now easily time +your functions by using:: + + TIMER_TIC; + /* Your complicated functions */ + if (timer) TIMER_TOC(timer_cooling); + + +Adding your Task to the System +------------------------------ + +Now the tricky part happens. +SWIFT is able to deal automatically with the conflicts between tasks, but unfortunately cannot understand the dependencies. + +To implement your new task in the task system, you will need to modify a few functions in ``engine.c``. + +First, you will need to add mainly two functions: ``scheduler_addtask`` and ``scheduler_addunlocks`` in the ``engine_make_hierarchical_tasks_*`` functions (depending on the type of task you implement, you will need to write it to a different function). + +In ``engine_make_hierarchical_tasks_hydro``, +we add the task through the following call:: + + /* Add the cooling task. */ + c->cooling = + scheduler_addtask(s, task_type_cooling, task_subtype_none, 0, + 0, c, NULL); + +As the ``cooling`` cannot be done before the end of the force computation +and the second kick cannot be done before the cooling:: + + scheduler_addunlock(s, c->super->end_force, c->cooling); + scheduler_addunlock(s, c->cooling, c->super->kick2); + + +The next step is to activate your task +in ``engine_marktasks_mapper``:: + + else if (t->type == task_type_cooling || t->type == task_type_sourceterms) { + if (cell_is_active_hydro(t->ci, e)) scheduler_activate(s, t); + } + +Then you will need to update the estimate for the number of tasks in ``engine_estimate_nr_tasks`` by modifying ``n1`` or ``n2``. + +Initially, the engine will need to skip the task that updates the particles. +It is the case for the cooling, therefore you will need to add it in ``engine_skip_force_and_kick``. + +Implementing your Task +---------------------- + +The last part is situated in ``runner.c``. + +You will need to implement a function ``runner_do_cooling`` +(do not forget to time it):: + + void runner_do_cooling(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + /* Now you can check if something is required at this time step. + * You may want to use a different cell_is_active function depending + * on your task + */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_cooling(r, c->progeny[k], 0); + } else { + /* Implement your cooling here */ + } + + if (timer) TIMER_TOC(timer_do_cooling); + } + + + +and add a call to this function in ``runner_main`` +in the switch:: + + case task_type_cooling: + runner_do_cooling(r, t->ci, 1); + break; + + +Finalizing your Task +-------------------- + +Now that you have done the easiest part, you can start debugging by implementing a test and/or an example. +Before creating your merge request with your new task, do not forget the most funny part that consists in writing a nice and beautiful documentation ;) diff --git a/doc/RTD/source/Task/index.rst b/doc/RTD/source/Task/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..e701e924a79f9256d4c0b034b6c15651f41ff405 --- /dev/null +++ b/doc/RTD/source/Task/index.rst @@ -0,0 +1,21 @@ +.. Task + Loic Hausammann 17th July 2018 + +.. _task: + +Task System +=========== + +This section of the documentation includes information on the task system +available in SWIFT, as well as how to implement your own task. + +SWIFT produces at the beginning of each simulation a ``dot`` file (see the graphviz library for more information). +It contains the full hierarchy of tasks used in this simulation. +You can convert the ``dot`` file into a ``png`` with the following command +``dot -Tpng dependency_graph.dot -o dependency_graph.png``. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + adding_your_own diff --git a/doc/RTD/source/VELOCIraptorInterface/index.rst b/doc/RTD/source/VELOCIraptorInterface/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..312b9cd3f893dd44f814ad80ac40748db12bd4d5 --- /dev/null +++ b/doc/RTD/source/VELOCIraptorInterface/index.rst @@ -0,0 +1,24 @@ +.. VELOCIraptor Interface + Folkert Nobels, 8th October 2018 + +VELOCIraptor Interface +====================== + +This section includes information on the VELOCIraptor interface implemented in +SWIFT. There are mainly four subsections; the first section explains shortly +how VELOCIraptor works, the second subsection explains how to configure SWIFT +with VELOCIraptor, the third subsection explains how to configure a standalone +version of VELOCIraptor and the last subsection explains how the output format +of VELOCIraptor works. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + whatis + stfwithswift + stfalone + output + + + diff --git a/doc/RTD/source/VELOCIraptorInterface/output.rst b/doc/RTD/source/VELOCIraptorInterface/output.rst new file mode 100644 index 0000000000000000000000000000000000000000..946841fbece0207430846725b6a995cbc3f12613 --- /dev/null +++ b/doc/RTD/source/VELOCIraptorInterface/output.rst @@ -0,0 +1,355 @@ +.. VELOCIraptor output + Folkert Nobels 12th of October + +VELOCIraptor Output +=================== + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Contents: + +In general VELOCIraptor outputs six files per snapshot, of which 2 files are +for unbound particles specifically. In this part we will explain what is +inside the different files. + +Catalog_groups file +------------------- + +The first output file of VELOCIraptor is the ``.catalog_group`` file, +this file contains all the information that is group specific, and does not go +into depth of physical properties but only on numbers of particles and +group sizes, the interesting data in the ``.catalog_group`` files are: + ++ The ``group_size``: gives a list of all the halos and the number of particles + in the halo, this list is numbered from 0 until the number of groups minus + one. It is important that the groups are not ordered in any way [#order]_. + It is also important to note that the group size includes both the bound and + unbound particles; always use the ``Offset`` and ``Offset_unbound`` data + when reading from the ``catalog_particles`` files. ++ The ``Num_of_groups`` or ``Total_num_of_groups``: gives the total number of + groups in the snapshot. ++ The ``Offset`` list: This list gives the offset off the particles. In the + output of VELOCIraptor there is no file which has an ID for every particle + and a corresponding group, rather the particles are ordered according to in + which group they are. So if we want to access the particles in group 0, we + need to look at the particles from ``Offset[0]`` until ``Offset[1]`` in the + ``.catalog_particles`` hdf5 file. In general this means that for group N we + need to look at particles ``Offset[N]`` until ``Offset[N+1]``. ++ The ``Offset_unbound`` list: This list works exactly the same as the + ``Offset`` list only this list is for the gravitational unbound particles. + +Catalog_particles file +---------------------- + +The second file that is produced by VELOCIraptor is the ``.catalog_particles`` +file, this file contains mainly all the IDs of the particles and has two +interesting parameters: + ++ The ``Num_of_particles_in_groups`` and ``Total_num_of_particles_in_all_groups`` + parameter: Gives the total number of particles in the file or the total + number of particles that are in halos. ++ The ``Particle_IDs``: The list of particles as sorted by halo, in which halo + the individual particles are present can be found by using the + ``.catalog_group`` file and the corresponding ``Offset`` list. + +Besides the ``.catalog_particles`` file, there is also a +``.catalog_particles.unbound`` file, this file contains the same information +but only for the unbound particles, a particle can only be present in one of +these two lists. + +Extracting the particles in a given halo +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``.catalog_particles`` file returns particle IDs that need to be matched +with those in your snapshot to find the particles in the file that you +wish to extract. The python snippet below should give you an idea of how to +go about doing this for the bound particles. + +First, we need to extract the offset from the ``.catalog_group`` file, and +work out how many _bound_ particles are in our halo. We can do this by +looking at the next offset. Then, we can ID match those with the snapshot +file, and get the mask for the _positions_ in the file that correspond +to our bound particles. (Note this requires ``numpy > 1.15.0``). + +.. code-block:: python + :linenos: + + import numpy as np + import h5py + + snapshot_file = h5py.File("swift_snapshot.hdf5", "r") + group_file = h5py.File("velociraptor_output.catalog_group", "r") + particles_file = h5py.File("velociraptor_output.catalog_particles", "r") + + halo = 100 + # Grab the start position in the particles file to read from + halo_start_position = group_file["Offset"][halo] + halo_end_position = group_file["Offset"][halo + 1] + # We're done with that file now, best to close earlier rather than later + group_file.close() + + # Get the relevant particle IDs for that halo; this includes particles + # of _all_ types. + particle_ids_in_halo = particles_file["Particle_IDs"][ + halo_start_position:halo_end_position + ] + # Again, we're done with that file. + particles_file.close() + + # Now, the tricky bit. We need to create the correspondence between the + # positions in the snapshot file, and the ids. + + # Let's look for the dark matter particles in that halo. + particle_ids_from_snapshot = snapshot_file["PartType1/ParticleIDs"][...] + + _, indices_v, indices_p = np.intersect1d( + particle_ids_in_halo, + particle_ids_from_snapshot, + assume_unique=True, + return_indices=True, + ) + + # indices_p gives the positions in the particle file where we will find + # the co-ordinates that we're looking for! To get the positions of all of + # those particles, + particle_positions_in_halo = snapshot_file["PartType1/Coordinates"][indices_p] + + +Catalog_parttypes file +---------------------- + +The third file that is produced by VELOCIraptor is the ``.catalog_parttypes`` +file, this file contains the information what type of particle every particle +is, it is ordered the same as the ``Particle_IDs`` in ``.catalog_particles``. +There are only two interesting parameters of the file which are: + ++ The ``Num_of_particles_in_groups`` parameter: Gives the total number of + particles in the file which are in a halo. ++ The ``Particle_types`` list: Gives a list of particles types similar to the + snap shots (0 - gas, 1 - dm, 4 - stars). + +Besides the ``.catalog_parttypes`` file, there is also a +``.catalog_parttypes.unbound`` file, this file contains this information for +the unbound particles. + +Properties file +--------------- + +The fourth file is the ``.properties`` file, this file contains many physical +useful information of the corresponding halos. This can be divided in several +useful groups of physical parameters, on this page we have divided the several +variables which are present in the ``.properties`` file. This file has most +physical interesting parameters of the halos. + +Mass-Radius determination: +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``.properties`` file contains many ways to determine the size and mass +of the halos, in this subsection we will list several available variables in +the output of VELOCIraptor and we list several mass and radius parameters in +the output which are not classified as a mass-radius pair. + +Critical Density related: +""""""""""""""""""""""""" + ++ ``Mass_200crit``: The mass of a halo with an over density on average of + :math:`\Delta=200` based on the critical density of the Universe + (:math:`M_{200}`). ++ ``R_200crit``: The :math:`R_{200}` radius of the halo based on the + critical density of the Universe + +Mean Density related: +""""""""""""""""""""" + ++ ``Mass_200mean``: The mass of a halo with an over density on average of + :math:`\Delta=200` based on the mean density of the Universe + (:math:`M_{200}`). ++ ``R_200mean``: The :math:`R_{200}` radius of the halo based on the + mean density of the Universe. + +Virial properties: +"""""""""""""""""" + ++ ``Mvir``: The virial mass of the halos. ++ ``Rvir``: The virial radius of the halo (:math:`R_{vir}`). + +Bryan and Norman 1998 properties: +""""""""""""""""""""""""""""""""" + ++ ``Mass_BN98``, The Bryan and Norman (1998) determination of the mass of the + halo [#BN98]_. ++ ``R_BN98``, the Bryan and Norman (1998) corresponding radius [#BN98]_. + +Several Mass types: +""""""""""""""""""" +This is a list of masses which cannot be categorized as easy as the other +properties. + ++ ``Mass_FOF``: The friends-of-friends mass of the halos. ++ ``M_gas``: The gas mass in the halo. ++ ``Mass_tot``: The total mass of the halo ++ ``M_gas_30kpc``: The gas mass within 30 kpc of the halo centre. ++ ``M_gas_500c``: The gas mass of the over-density of 500 times the critical + density ++ ``M_gas_Rvmax``: The gas mass within the maximum rotation velocity. + +Several Radius types: +""""""""""""""""""""" + ++ ``R_HalfMass``: Radius of half the mass of the halo. ++ ``R_HalfMass_gas``: Radius of half the gas mass of the halo. ++ ``R_size``: ++ ``Rmax``: + +Mass Structure of the Halos: +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this subsection we listed the properties of the halos that are determining +the mass structure of the halo, so the exact profile and the inertia tensor. + +NFW profile properties: +""""""""""""""""""""""" ++ ``Xc``, ``Yc`` and ``Zc``: The x,y and z centre positions of the + halos. + + Centres are calculated using first all particles belonging to the + structure and then VELOCIraptor uses shrinking spheres to iterate to + a centre, stopping once the sphere contains <10% of all the + particles (this value can be changed to smaller amounts and there is + also a minimum particle number which can also be changed). + ++ ``Xc_gas``, ``Yc_gas``, ``Zc_gas``: The offset of the centre + positions of the halo based on the gas, to find the position of the + gas the offsets need to be added to ``Xc``, ``Yc`` and ``Zc``. + ++ ``cNFW``: The concentration of the halo. + + This is calculated using Vmax and Vvir, not using a fitted profile. + ++ ``VXc``, ``VYc`` and ``VZc`` are the velocities in the centre of the halo + [#check]_. ++ ``VXc_gas``, ``VYc_gas`` and ``VZc_gas`` are the velocities of the gas in + the centre of the halo [#check]_. + +Inertia Tensor properties: +""""""""""""""""""""""""""" + ++ ``eig_ij``: Are the normalized eigenvectors of the inertia tensor. ++ The eigenvalue ratios: + + 1. ``q`` is the semi-major over major; + 2. ``s`` is the minor over major. + ++ ``eig_ij_gas``: Are the normalized eigenvectors of the inertia tensor for + only the gas particles. ++ The eigenvalue ratios for only the gas, similar to all particles: + + 1. ``q_gas`` is the semi-major over major for only gas; + 2. ``s_gas`` is the minor over major for only gas. + +Dynamical Structure of the Halos: +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this subsection we list several properties that determine the dynamical +structure of the halo, like the angular momentum and the velocity dispersion +tensor. + +Angular momentum and spin parameters: +""""""""""""""""""""""""""""""""""""" + ++ ``lambda_b`` is the bullock spin parameter, see the paper by Bullock et al. + (2001) [#Bullock]_. ++ ``Lx``, ``Ly`` and ``Lz`` are the angular momentum of the halos, the + calculation includes all the particle types. ++ ``Lx_gas``, ``Ly_gas`` and ``Lz_gas`` are the angular momentum for only + the gas particles in the snapshot. + +Velocity Dispersion related: +"""""""""""""""""""""""""""" + ++ The complete velocity dispersion tensor (:math:`\sigma_{ij}`) which has + an array per component which gives the value for all the halos. In + general these components are called ``veldisp_ij`` in which i and j are + given by ``x``, ``y`` or ``z``. This means that there are nine + components stored in the ``.properties`` file. This omits the fact + that the dispersion tensor by nature is a symmetric tensor. All the + components are given by: + ``veldisp_xx``, ``veldisp_xy``, ``veldisp_xz``, ``veldisp_yx``, + ``veldisp_yy``, ``veldisp_yz``, ``veldisp_zx``, ``veldisp_zy``, + and ``veldisp_zz`` [#velodisp]_. ++ ``sigV``, the scalar velocity dispersion which corresponds with the + trace of the velocity dispersion tensor + (:math:`\sigma = \text{Tr}(\sigma_{ij})`). + + +Energy properties of the halos: +""""""""""""""""""""""""""""""" + ++ ``Ekin``, the kinetic energy of the halo. ++ ``Epot``, the potential energy of the halo. ++ ``Krot``, the rotational energy of the halo. ++ ``Krot_gas``, the rotational energy of the gas in the halo. + + +Halo and subhalo abstract variables: +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this subsection we list the ID convention for subhalos and halos and +some other abstract quantities of the halo which are not physical but +rather properties of the simulations. + +Structure types: +"""""""""""""""" + ++ ``ID`` is the halo ID. ++ ``Structuretype`` is the parameter that indicates what kind of structure + the current halo is. Halos have a structure type of ``10`` and subhalos + have a structure type of ``15``. ++ ``hostHaloID``, indicates the halo ID number of the host halo, in the case + that the halo has no parent (e.g. is the largest halo), the hostHaloID will + be ``-1``. ++ ``numSubStruct``, the number of substructures or subhalos in the halo. + +Particle types: +""""""""""""""" + ++ ``npart`` is the number of particles in the halo (all types of particles). ++ ``n_gas`` is the number of gas particles in the halo. + +Not specified parameters: +^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this section we list parameters which cannot specifically be classified +in a group. + + +Most Bound Particle (MBP): +"""""""""""""""""""""""""" + ++ ``ID_mbp``, the ID of the most bound particle in the halo. ++ ``Xcmbp``, ``Ycmbp`` and ``Zcmbp`` are the positions of the most bound + halo particle [#check]_. ++ ``VXcmbp``, ``VYcmbp`` and ``VZcmbp`` are the velocities of the most bound + halo particle [#check]_. + +.. [#order] In most cases more massive groups appear earlier in the list, but + this is not guaranteed for larger simulations. The order of the groups is + more a matter of the way that VELOCIraptor searches instead of a physical + reason. +.. [#center] This is not the average positions of the halos particles, but + the halo position found by the VELOCIraptor algorithm. This includes a + fit for all the parameters including the gas particles or other types of + particles. +.. [#velodisp] In the velocity dispersion tensor ( :math:`\sigma_{ij}` ) + the following relations are satisfied between components: + + + :math:`\sigma_{xy}=\sigma_{yx}` + + :math:`\sigma_{xz}=\sigma_{zx}` + + :math:`\sigma_{yz}=\sigma_{yz}` +.. [#Bullock] The Bullock spin parameter is given by + :math:`\lambda = \frac{J}{\sqrt{2}MVR}`, for more information see + https://arxiv.org/abs/astro-ph/0011001. +.. [#BN98] The Bryan and Norman (1998) paper can be found here: + https://arxiv.org/abs/astro-ph/9710107 +.. [#check] Needs to be checked. diff --git a/doc/RTD/source/VELOCIraptorInterface/stfalone.rst b/doc/RTD/source/VELOCIraptorInterface/stfalone.rst new file mode 100644 index 0000000000000000000000000000000000000000..191d990c3d485bbc548c435d9b548686b9446397 --- /dev/null +++ b/doc/RTD/source/VELOCIraptorInterface/stfalone.rst @@ -0,0 +1,92 @@ +.. VELOCIraptor stand alone + Folkert Nobels 12th October 2018 + +Stand alone VELOCIraptor configuration +====================================== + + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Contents: + +Besides running VELOCIraptor on the fly when using SWIFT, it is also possible +to run VELOCIraptor alone without using SWIFT. In this section we explain how +VELOCIraptor can be run stand alone without using SWIFT. + +Setting up VELOCIraptor +----------------------- + +The first step is setting up VELOCIraptor, this requires us to download the +git repository as:: + + git clone https://github.com/pelahi/VELOCIraptor-STF + +Similar to the SWIFT with VELOCIraptor configuration, we can use the +swift-interface branch to analyse individual snapshots. We can use this branch +by doing:: + + cd VELOCIraptor-STF + git fetch + git checkout swift-interface + +Again we need to copy the default SWIFT config file to a other config file by +doing:: + + cd stf + cp Makefile.config.SWIFT-template Makefile.config + +Similar to configuring VELOCIraptor with swift we need to change the first 20 +lines of ``Makefile.config`` to work with our compiler, but we also need to +change the fact that we do not use the swift-interface but the standalone +version of the code, so change ``SWIFTINTERFACE="on"`` to +``SWIFTINTERFACE="off"``. + +Compiling VELOCIraptor +---------------------- + +Compiling goes completely different as compared to the on the fly halo finder +configuration with SWIFT. In this case we can compile the code as:: + + make + +After this an additional folder is created in ``VELOCIraptor-stf/stf`` called +``bin``, in which the binary files of ``stf-gas`` is present (assuming you +run a simulation with SPH [#nosph]_) + +Running VELOCIraptor on a Snapshot +---------------------------------- + +After the code is compile the next step is using VELOCIraptor on a single +snapshot of a simulation. The code has several options which can be used, which +can be displayed by running a terminal command of an invalid letter like:: + + ./stf-gas -h + +which gives the information about the usage of the command:: + + USAGE: + + -C <configuration file (overrides other options)> + -I <input format [Gadget (Default) 1, HDF (if implemented)2, TIPSY 3, RAMSES 4, HDF 2, NCHILADA 5> + -i <input file> + -s <number of files per output for gadget input 1 [default]> + -Z <number of threads used in parallel read (1)> + -o <output filename> + ===== EXTRA OPTIONS FOR GADGET INPUT ====== + -g <number of extra sph/gas blocks for gadget> + -s <number of extra star blocks for gadget> + -b <number of extra bh blocks for gadget> + ===== EXTRA OPTIONS REQUIRED FOR RAMSES INPUT ====== + -t <ramses snapnumber> + +After this we can run a VELOCIraptor on a snapshot as:: + + ./stf-gas -i input -o output -C configfile.txt + + +.. [#nosph] In the case that in the ``Makefile.config`` it is indicate that the + simulation does only contain dark matter this will reflect back on the + generated binary file. So ``stf-gas`` will change to ``stf`` in the case of + a dark matter only simulation. + diff --git a/doc/RTD/source/VELOCIraptorInterface/stfwithswift.rst b/doc/RTD/source/VELOCIraptorInterface/stfwithswift.rst new file mode 100644 index 0000000000000000000000000000000000000000..245b455d583d3ccdca02463e2afc6100e14dfb31 --- /dev/null +++ b/doc/RTD/source/VELOCIraptorInterface/stfwithswift.rst @@ -0,0 +1,94 @@ +.. SWIFT with VELOCIraptor + Folkert Nobels 12th October 2018 + + +Configuring SWIFT with VELOCIraptor +=================================== + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Contents: + +In the following three paragraphs we will explain how to setup VELOCIraptor, +how to compile it and how to compile SWIFT with VELOCIraptor. + + +Setting up VELOCIraptor +----------------------- + +Before we can run SWIFT with VELOCIraptor we first need to download +VELOCIraptor. This can be done by cloning the repository on GitHub_:: + + git clone https://github.com/pelahi/VELOCIraptor-STF + +Currently the best version that works with SWIFT is the swift-interface branch +of VELOCIraptor, to get this branch use:: + + cd VELOCIraptor-STF + git fetch + git checkout swift-interface + +To get the default that works with SWIFT simply copy the SWIFT template file in +the ``Makefile.config``:: + + cd stf + cp Makefile.config.SWIFT-template Makefile.config + +Depending on your compiler you want to change the first 20 lines of your +``Makefile.config`` to work with your compiler and whether you want to use MPI +or not. + + +Compiling VELOCIraptor +---------------------- + +After we downloaded the files and made a configuration file we can compile +VELOCIraptor as follows:: + + make lib + make libstf + +After the compilation of your code, there is an additional folder created in +the ``VELOCIraptor-stf/stf`` directory called ``lib`` this directory has the +library of VELOCIraptor and is required to run SWIFT with +VELOCIraptor. Note that VELOCIraptor needs a serial version of the +HDF5 library, not a parallel build. + +Compiling SWIFT +--------------- +The next part is compiling SWIFT with VELOCIraptor and assumes you already +downloaded SWIFT from the GitLab_, this can be done by running:: + + ./autogen.sh + ./configure --with-velociraptor=/path/to/VELOCIraptor-STF/stf/lib + make + +In which ``./autogen.sh`` only needs to be run once after the code is cloned +from the GitLab_, and ``/path/to/`` is the path to the ``VELOCIraptor-STF`` +directory on your machine. In general ``./configure`` can be run with other +options as desired. After this we can run SWIFT with VELOCIraptor, but for this +we first need to add several lines to the yaml file of our simulation:: + + + #structure finding options + StructureFinding: + config_file_name: stf_input_6dfof_dmonly_sub.cfg + basename: ./stf + output_time_format: 1 + scale_factor_first: 0.02 + delta_time: 1.02 + +In which we specify the ``.cfg`` file that is used by VELOCIraptor and the +other parameters which SWIFT needs to use. In the case of +the Small Cosmological Volume DMO example we can run a simulation with halo +finder as:: + + cd examples/SmallCosmoVolume_DM + ../swift --cosmology --hydro --self-gravity --velociraptor --threads=8 small_cosmo_volume_dm.yml + +Which activates the VELOCIraptor interface. + + +.. _GitHub: https://github.com/pelahi/VELOCIraptor-STF +.. _GitLab: https://gitlab.cosma.dur.ac.uk/swift/swiftsim diff --git a/doc/RTD/source/VELOCIraptorInterface/whatis.rst b/doc/RTD/source/VELOCIraptorInterface/whatis.rst new file mode 100644 index 0000000000000000000000000000000000000000..a0a2a7441c52c188cc603910b43c112b3e24029e --- /dev/null +++ b/doc/RTD/source/VELOCIraptorInterface/whatis.rst @@ -0,0 +1,65 @@ +.. What is VELOCIraptor + Folkert Nobels 12th October 2018 + + +What is VELOCIraptor? +===================== + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Contents: + +In SWIFT it is possible to run a cosmological simulation and at the same time +do on the fly halo finding at specific predefined intervals. For finding the +halos SWIFT uses VELOCIraptor (Elahi, Thacker and Widrow; 2011) [#velo]_, this +is a C++ halo finder that can use MPI. It differs from other halo finder +algorithms in the sense that it uses the velocity distributions of the +particles in the simulations and the the positions of the particles to get +a better estimate of which particles are part of a specific halo and +whether there are substructures in halos. + +The Algorithm +------------- + +The VELOCIraptor algorithm consists basically of the following steps [#ref]_: + +1. A kd-tree is constructed based on the maximization of the Shannon-entropy, + this means that every level in the kd-tree an equal number of particles + are distributed between the 8 lower nodes. This is based on their position + and their corresponding density, this results in more equal density + distributed nodes. This is also the implicit step in the algorithm that + takes into account the absolute positions of the particles. +2. The next part is calculating the the centre of mass velocity and the + velocity distribution for every individual node in the kd-tree. +3. Then the algorithm estimates the background velocity density function for + every particle based on the cell of the particle and the six nearest + neighbour cells. This prevents the background velocity density function + to be over sensitive for variations between different cells due to dominant + halo features in the velocity density function. +4. After this the algorithm searches for the nearest velocity neighbours + (:math:`N_v`) from a set of nearest position neighbours (:math:`N_x>N_v`). + The neighbours' positions do not need to be in the cell of the particles, in + general the set of nearest position neighbours is substantially larger than + the nearest velocity neighbours, the default is set as :math:`N_x=32 N_v`. +5. The individual local velocity density function is calculated for every + particle. +6. The fractional difference is calculated between the local velocity density + function and the background velocity density function. +7. Based on the calculated ratio, outliers are picked and the outliers are + grouped together in halos and subhalos. + + + +.. Every halo finder has limitations, the limitations of VELOCIraptor are: + +.. 1. The algorithm is mostly sensitive to substructures that are on the tail + of the Gaussian velocity density function, this means that VELOCIraptor + is most sensitive for subhalos which are cold (slow rotating) but have + a large bulk velocity + + +.. _Velociraptor: http://adsabs.harvard.edu/abs/2011MNRAS.418..320E +.. [#velo] For technical information regarding VELOCIraptor see: Velociraptor_ +.. [#ref] This part is based on the explanation given in the Elahi, Thacker and + Widrow (2011) paper (Velociraptor_) diff --git a/doc/RTD/source/conf.py b/doc/RTD/source/conf.py index 031687ea5228252e2d2e44ec0bd6f53b1b64d732..46cff147efff3e7f23ff3f618898a17da3f85459 100644 --- a/doc/RTD/source/conf.py +++ b/doc/RTD/source/conf.py @@ -23,9 +23,9 @@ copyright = '2018, SWIFT Collaboration' author = 'SWIFT Team' # The short X.Y version -version = '0.7' +version = '0.8' # The full version, including alpha/beta/rc tags -release = '0.7.0' +release = '0.8.0' # -- General configuration --------------------------------------------------- diff --git a/doc/RTD/source/index.rst b/doc/RTD/source/index.rst index 888945a5c0101bb6f59b574a30f1f736ad134079..d148398c1bd77eafbce5e0037457b34efddb4eca 100644 --- a/doc/RTD/source/index.rst +++ b/doc/RTD/source/index.rst @@ -15,8 +15,13 @@ difference is the parameter file that will need to be adapted for SWIFT. :maxdepth: 2 GettingStarted/index + CommandLineOptions/index + ParameterFiles/index InitialConditions/index HydroSchemes/index Cooling/index EquationOfState/index + ExternalPotentials/index NewOption/index + Task/index + VELOCIraptorInterface/index diff --git a/examples/AgoraDisk/agora_disk.yml b/examples/AgoraDisk/agora_disk.yml index 7368700d8a2a5ca8de7d677e1da78be51d669835..92f2532b3132c0f6314b7697f0b9b65f1afedb3b 100644 --- a/examples/AgoraDisk/agora_disk.yml +++ b/examples/AgoraDisk/agora_disk.yml @@ -39,20 +39,18 @@ Gravity: SPH: resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. - minimal_temperature: 10 # (internal units) + minimal_temperature: 10. # Kelvin # Parameters related to the initial conditions InitialConditions: file_name: ./agora_disk.hdf5 # The file to read - cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget - shift: [674.1175, 674.1175, 674.1175] # (Optional) A shift to apply to all particles read from the ICs (in internal units). + periodic: 0 # Non-periodic BCs + cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget + shift: [674.1175, 674.1175, 674.1175] # Centre the box # Dimensionless pre-factor for the time-step condition LambdaCooling: - lambda_cgs: 1.0e-22 # Cooling rate (in cgs units) - minimum_temperature: 1.0e2 # Minimal temperature (Kelvin) - mean_molecular_weight: 0.59 # Mean molecular weight - hydrogen_mass_abundance: 0.75 # Hydrogen mass abundance (dimensionless) + lambda_nH2_cgs: 1e-22 # Cooling rate divided by square Hydrogen number density (in cgs units [erg * s^-1 * cm^3]) cooling_tstep_mult: 1.0 # Dimensionless pre-factor for the time-step condition # Cooling with Grackle 2.0 diff --git a/examples/AgoraDisk/getIC.sh b/examples/AgoraDisk/getIC.sh old mode 100644 new mode 100755 index 620a751bedaf6c646119247270fad6dd3f740fde..c234b52b943ccb8d6dededed7d0f5070cd9fe5b2 --- a/examples/AgoraDisk/getIC.sh +++ b/examples/AgoraDisk/getIC.sh @@ -6,4 +6,4 @@ if [ "$#" -ne 1 ]; then exit fi -wget https://obswww.unige.ch/~lhausamm/swift/IC/AgoraDisk/$1 +wget https://obswww.unige.ch/~lhausamm/swift/IC/AgoraDisk/$1.hdf5 diff --git a/examples/AgoraDisk/getSolution.sh b/examples/AgoraDisk/getSolution.sh old mode 100644 new mode 100755 diff --git a/examples/AgoraDisk/run.sh b/examples/AgoraDisk/run.sh old mode 100644 new mode 100755 index d7e284db52c2e6750fd713b3607a7f423bac7769..6bf9a98342b4b1977579761da79e06226c6ded40 --- a/examples/AgoraDisk/run.sh +++ b/examples/AgoraDisk/run.sh @@ -38,7 +38,7 @@ cp $sim.hdf5 agora_disk.hdf5 python3 changeType.py agora_disk.hdf5 # Run SWIFT -#../swift $flag -s -G -t 4 agora_disk.yml 2>&1 | tee output.log +#../swift $flag --hydro --self-gravity --threads=4 agora_disk.yml 2>&1 | tee output.log echo "Changing smoothing length to be Gadget compatible" diff --git a/examples/ConstantCosmoVolume/Gadget2/README b/examples/ConstantCosmoVolume/Gadget2/README new file mode 100644 index 0000000000000000000000000000000000000000..8063a5da1e68b608759d35373e6006d17bf5047e --- /dev/null +++ b/examples/ConstantCosmoVolume/Gadget2/README @@ -0,0 +1,6 @@ +This parameter file can be used to run the exact same example +with the Gadget-2 code. + +The Gadget code has to be compiled with at least the following options: + - PERIODIC + - HAVE_HDF5 diff --git a/examples/ConstantCosmoVolume/Gadget2/constant_volume.param b/examples/ConstantCosmoVolume/Gadget2/constant_volume.param new file mode 100644 index 0000000000000000000000000000000000000000..a57e3293ae9dce92743737d42605615d3e365f7a --- /dev/null +++ b/examples/ConstantCosmoVolume/Gadget2/constant_volume.param @@ -0,0 +1,138 @@ + +% System of units + +UnitLength_in_cm 3.08567758e24 % 1.0 Mpc +UnitMass_in_g 1.98848e43 % 1.0e10 solar masses +UnitVelocity_in_cm_per_s 1e5 % 1 km/sec +GravityConstantInternal 4.300927e+01 % Same value as SWIFT + +% Relevant files +InitCondFile constantBox +OutputDir data/ + +EnergyFile energy.txt +InfoFile info.txt +TimingsFile timings.txt +CpuFile cpu.txt + +RestartFile restart +SnapshotFileBase box + +OutputListFilename dummy + +% CPU time -limit + +TimeLimitCPU 360000 % = 10 hours +ResubmitOn 0 +ResubmitCommand my-scriptfile + + +% Code options + +ICFormat 3 +SnapFormat 3 +ComovingIntegrationOn 1 + +TypeOfTimestepCriterion 0 +OutputListOn 0 +PeriodicBoundariesOn 1 + +% Caracteristics of run + +TimeBegin 0.00990099 % z = 100 +TimeMax 1. % z = 0. + +Omega0 1.0 +OmegaLambda 0.0 +OmegaBaryon 1.0 +HubbleParam 1.0 +BoxSize 64. + +% Output frequency + +TimeBetSnapshot 1.04 +TimeOfFirstSnapshot 0.00991 + +CpuTimeBetRestartFile 36000.0 ; here in seconds +TimeBetStatistics 0.05 + +NumFilesPerSnapshot 1 +NumFilesWrittenInParallel 1 + +% Accuracy of time integration + +ErrTolIntAccuracy 0.025 +MaxRMSDisplacementFac 0.25 +CourantFac 0.1 +MaxSizeTimestep 0.002 +MinSizeTimestep 1e-7 + + +% Tree algorithm, force accuracy, domain update frequency + +ErrTolTheta 0.3 +TypeOfOpeningCriterion 0 +ErrTolForceAcc 0.005 + +TreeDomainUpdateFrequency 0.01 + +% Further parameters of SPH + +DesNumNgb 48 +MaxNumNgbDeviation 1. +ArtBulkViscConst 0.8 +InitGasTemp 0. +MinGasTemp 0. + +% Memory allocation + +PartAllocFactor 1.6 +TreeAllocFactor 0.8 +BufferSize 30 + +% Softening lengths + +MinGasHsmlFractional 0.001 + +SofteningGas 0.08 # 80 kpc / h = 1/25 of mean inter-particle separation +SofteningHalo 0 +SofteningDisk 0 +SofteningBulge 0 +SofteningStars 0 +SofteningBndry 0 + +SofteningGasMaxPhys 0.08 # 80 kpc / h = 1/25 of mean inter-particle separation +SofteningHaloMaxPhys 0 +SofteningDiskMaxPhys 0 +SofteningBulgeMaxPhys 0 +SofteningStarsMaxPhys 0 +SofteningBndryMaxPhys 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/ConstantCosmoVolume/README b/examples/ConstantCosmoVolume/README new file mode 100644 index 0000000000000000000000000000000000000000..de84f6909a7c9086603f5d717232d60ff5e312e3 --- /dev/null +++ b/examples/ConstantCosmoVolume/README @@ -0,0 +1,7 @@ +This test is a small cosmological volume with constant density and internal energy. +The ICs are generated from a glass file to minimize the build-up of peculiar velocities +over time. + +The cosmology model is very simple by design. We use Omega_m = 1, Omega_b = 1, h = 1. + +The solution script plots the expected solution both in comoving and physical frames. diff --git a/examples/ConstantCosmoVolume/constant_volume.yml b/examples/ConstantCosmoVolume/constant_volume.yml new file mode 100644 index 0000000000000000000000000000000000000000..ebfcc4ffd72121571fa1a69f900985917b440c65 --- /dev/null +++ b/examples/ConstantCosmoVolume/constant_volume.yml @@ -0,0 +1,54 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc + UnitVelocity_in_cgs: 1e5 # 1 km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +Cosmology: + Omega_m: 1. + Omega_lambda: 0. + Omega_b: 1. + h: 1. + a_begin: 0.00990099 # z_ini = 100. + a_end: 1.0 # z_end = 0. + +# Parameters governing the time integration +TimeIntegration: + dt_min: 1e-7 + dt_max: 2e-3 + +# Parameters governing the snapshots +Snapshots: + basename: box + delta_time: 1.04 + scale_factor_first: 0.00991 + compression: 4 + +# Parameters governing the conserved quantities statistics +Statistics: + scale_factor_first: 0.00991 + delta_time: 1.1 + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # "48 ngb" for the 3D cubic spline + CFL_condition: 0.1 + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./constantBox.hdf5 + periodic: 1 + +Scheduler: + max_top_level_cells: 8 + cell_split_size: 50 + +Gravity: + mesh_side_length: 32 + eta: 0.025 + theta: 0.3 + comoving_softening: 0.08 # 80 kpc = 1/25 of mean inter-particle separation + max_physical_softening: 0.08 # 80 kpc = 1/25 of mean inter-particle separation + diff --git a/examples/ConstantCosmoVolume/getGlass.sh b/examples/ConstantCosmoVolume/getGlass.sh new file mode 100755 index 0000000000000000000000000000000000000000..01b4474ac21666c843b7abedfa39a76948934911 --- /dev/null +++ b/examples/ConstantCosmoVolume/getGlass.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/gravity_glassCube_32.hdf5 diff --git a/examples/ConstantCosmoVolume/makeIC.py b/examples/ConstantCosmoVolume/makeIC.py new file mode 100644 index 0000000000000000000000000000000000000000..d63cb34299017380795b302115ae69b3af22b088 --- /dev/null +++ b/examples/ConstantCosmoVolume/makeIC.py @@ -0,0 +1,147 @@ +################################################################################ +# This file is part of SWIFT. +# Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ + +import h5py +from numpy import * + +# Parameters +T_i = 100. # Initial temperature of the gas (in K) +z_i = 100. # Initial redshift +gamma = 5./3. # Gas adiabatic index +numPart_1D = 32 +glassFile = "gravity_glassCube_32.hdf5" +fileName = "constantBox.hdf5" + + +# Some units +Mpc_in_m = 3.08567758e22 +Msol_in_kg = 1.98848e30 +Gyr_in_s = 3.08567758e19 +mH_in_kg = 1.6737236e-27 + +# Some constants +kB_in_SI = 1.38064852e-23 +G_in_SI = 6.67408e-11 + +# Some useful variables in h-full units +H_0 = 1. / Mpc_in_m * 10**5 # h s^-1 +rho_0 = 3. * H_0**2 / (8* math.pi * G_in_SI) # h^2 kg m^-3 +lambda_i = 64. / H_0 * 10**5 # h^-1 m (= 64 h^-1 Mpc) +x_min = -0.5 * lambda_i +x_max = 0.5 * lambda_i + +# SI system of units +unit_l_in_si = Mpc_in_m +unit_m_in_si = Msol_in_kg * 1.e10 +unit_t_in_si = Gyr_in_s +unit_v_in_si = unit_l_in_si / unit_t_in_si +unit_u_in_si = unit_v_in_si**2 + +#--------------------------------------------------- + +# Read the glass file +glass = h5py.File(glassFile, "r" ) + +# Read particle positions and h from the glass +pos = glass["/PartType1/Coordinates"][:,:] +glass.close() + +# Total number of particles +numPart = size(pos)/3 +if numPart != numPart_1D**3: + print("Non-matching glass file") +numPart = numPart_1D**3 + +# Set box size and interparticle distance +boxSize = x_max - x_min +delta_x = boxSize / numPart_1D + +# Get the particle mass +a_i = 1. / (1. + z_i) +m_i = boxSize**3 * rho_0 / numPart + +# Build the arrays +pos *= boxSize +v = zeros((numPart, 3)) +ids = linspace(1, numPart, numPart) +m = zeros(numPart) +h = zeros(numPart) +u = zeros(numPart) + +# Set the particles on the left +for i in range(numPart_1D): + for j in range(numPart_1D): + for k in range(numPart_1D): + index = i * numPart_1D**2 + j * numPart_1D + k + #coords[index,0] = (i + 0.5) * delta_x + #coords[index,1] = (j + 0.5) * delta_x + #coords[index,2] = (k + 0.5) * delta_x + u[index] = kB_in_SI * T_i / (gamma - 1.) / mH_in_kg + h[index] = 1.2348 * delta_x + m[index] = m_i + v[index,0] = 0. + v[index,1] = 0. + v[index,2] = 0. + +# Unit conversion +pos /= unit_l_in_si +v /= unit_v_in_si +m /= unit_m_in_si +h /= unit_l_in_si +u /= unit_u_in_si + +boxSize /= unit_l_in_si + +#File +file = h5py.File(fileName, 'w') + +# Header +grp = file.create_group("/Header") +grp.attrs["BoxSize"] = [boxSize, boxSize, boxSize] +grp.attrs["NumPart_Total"] = [numPart, 0, 0, 0, 0, 0] +grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] +grp.attrs["NumPart_ThisFile"] = [numPart, 0, 0, 0, 0, 0] +grp.attrs["Time"] = 0.0 +grp.attrs["NumFilesPerSnapshot"] = 1 +grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +grp.attrs["Flag_Entropy_ICs"] = 0 +grp.attrs["Dimension"] = 3 + +#Runtime parameters +grp = file.create_group("/RuntimePars") +grp.attrs["PeriodicBoundariesOn"] = 1 + +#Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 100. * unit_l_in_si +grp.attrs["Unit mass in cgs (U_M)"] = 1000. * unit_m_in_si +grp.attrs["Unit time in cgs (U_t)"] = 1. * unit_t_in_si +grp.attrs["Unit current in cgs (U_I)"] = 1. +grp.attrs["Unit temperature in cgs (U_T)"] = 1. + +#Particle group +grp = file.create_group("/PartType0") +grp.create_dataset('Coordinates', data=pos, dtype='d', compression="gzip", shuffle=True) +grp.create_dataset('Velocities', data=v, dtype='f',compression="gzip", shuffle=True) +grp.create_dataset('Masses', data=m, dtype='f', compression="gzip", shuffle=True) +grp.create_dataset('SmoothingLength', data=h, dtype='f', compression="gzip", shuffle=True) +grp.create_dataset('InternalEnergy', data=u, dtype='f', compression="gzip", shuffle=True) +grp.create_dataset('ParticleIDs', data=ids, dtype='L', compression="gzip", shuffle=True) + +file.close() diff --git a/examples/ConstantCosmoVolume/plotSolution.py b/examples/ConstantCosmoVolume/plotSolution.py new file mode 100644 index 0000000000000000000000000000000000000000..f77889d7cb19c230accf25290b88a321e0f59616 --- /dev/null +++ b/examples/ConstantCosmoVolume/plotSolution.py @@ -0,0 +1,257 @@ +################################################################################ +# This file is part of SWIFT. +# Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ + +# Computes the analytical solution of the Zeldovich pancake and compares with +# the simulation result + +# Parameters +T_i = 100. # Initial temperature of the gas (in K) +z_c = 1. # Redshift of caustic formation (non-linear collapse) +z_i = 100. # Initial redshift +gas_gamma = 5./3. # Gas adiabatic index + +# Physical constants needed for internal energy to temperature conversion +kB_in_SI = 1.38064852e-23 +mH_in_kg = 1.6737236e-27 + +import matplotlib +matplotlib.use("Agg") +from pylab import * +import h5py +import os.path + +# Plot parameters +params = {'axes.labelsize': 10, +'axes.titlesize': 10, +'font.size': 12, +'legend.fontsize': 12, +'xtick.labelsize': 10, +'ytick.labelsize': 10, +'text.usetex': True, + 'figure.figsize' : (9.90,6.45), +'figure.subplot.left' : 0.06, +'figure.subplot.right' : 0.99, +'figure.subplot.bottom' : 0.06, +'figure.subplot.top' : 0.99, +'figure.subplot.wspace' : 0.21, +'figure.subplot.hspace' : 0.13, +'lines.markersize' : 6, +'lines.linewidth' : 3., +'text.latex.unicode': True +} +rcParams.update(params) +rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + +# Read the simulation data +sim = h5py.File("box_0000.hdf5", "r") +boxSize = sim["/Header"].attrs["BoxSize"][0] +time = sim["/Header"].attrs["Time"][0] +redshift = sim["/Header"].attrs["Redshift"][0] +a = sim["/Header"].attrs["Scale-factor"][0] +scheme = sim["/HydroScheme"].attrs["Scheme"] +kernel = sim["/HydroScheme"].attrs["Kernel function"] +neighbours = sim["/HydroScheme"].attrs["Kernel target N_ngb"] +eta = sim["/HydroScheme"].attrs["Kernel eta"] +git = sim["Code"].attrs["Git Revision"] +H_0 = sim["/Cosmology"].attrs["H0 [internal units]"][0] +unit_length_in_cgs = sim["/Units"].attrs["Unit length in cgs (U_L)"] +unit_mass_in_cgs = sim["/Units"].attrs["Unit mass in cgs (U_M)"] +unit_time_in_cgs = sim["/Units"].attrs["Unit time in cgs (U_t)"] +m_gas = sim["/PartType0/Masses"][0] +N = sim["/Header"].attrs["NumPart_Total"][0] +sim.close() + +# Expected comoving quantities +rho_0 = N * m_gas / boxSize**3 +u_0 = kB_in_SI * T_i / (gas_gamma - 1.) / mH_in_kg +u_0 *= 1e-6 # conversion to internal units +u_0 *= a**(-3*(1-gas_gamma)) +S_0 = (gas_gamma - 1.) * u_0 * rho_0**(-(gas_gamma - 1.)) + +# Mean quantities over time +z = np.zeros(119) +a = np.zeros(119) +S_mean = np.zeros(119) +S_std = np.zeros(119) +u_mean = np.zeros(119) +u_std = np.zeros(119) +P_mean = np.zeros(119) +P_std = np.zeros(119) +rho_mean = np.zeros(119) +rho_std = np.zeros(119) + +vx_mean = np.zeros(119) +vy_mean = np.zeros(119) +vz_mean = np.zeros(119) +vx_std = np.zeros(119) +vy_std = np.zeros(119) +vz_std = np.zeros(119) + +for i in range(119): + sim = h5py.File("box_%04d.hdf5"%i, "r") + + z[i] = sim["/Cosmology"].attrs["Redshift"][0] + a[i] = sim["/Cosmology"].attrs["Scale-factor"][0] + + S = sim["/PartType0/Entropy"][:] + S_mean[i] = np.mean(S) + S_std[i] = np.std(S) + + u = sim["/PartType0/InternalEnergy"][:] + u_mean[i] = np.mean(u) + u_std[i] = np.std(u) + + P = sim["/PartType0/Pressure"][:] + P_mean[i] = np.mean(P) + P_std[i] = np.std(P) + + rho = sim["/PartType0/Density"][:] + rho_mean[i] = np.mean(rho) + rho_std[i] = np.std(rho) + + v = sim["/PartType0/Velocities"][:,:] + vx_mean[i] = np.mean(v[:,0]) + vy_mean[i] = np.mean(v[:,1]) + vz_mean[i] = np.mean(v[:,2]) + vx_std[i] = np.std(v[:,0]) + vy_std[i] = np.std(v[:,1]) + vz_std[i] = np.std(v[:,2]) + +# Move to physical quantities +rho_mean_phys = rho_mean / a**3 +u_mean_phys = u_mean / a**(3*(gas_gamma - 1.)) +S_mean_phys = S_mean + +# Solution in physical coordinates +#T_solution = np.ones(T) / a + +figure() + +# Density evolution -------------------------------- +subplot(231)#, yscale="log") +semilogx(a, rho_mean / rho_0, '-', color='r', lw=1) +semilogx([1e-10, 1e10], np.ones(2), '-', color='0.6', lw=1.) +semilogx([1e-10, 1e10], np.ones(2)*0.99, '--', color='0.6', lw=1.) +semilogx([1e-10, 1e10], np.ones(2)*1.01, '--', color='0.6', lw=1.) +text(1e-2, 1.0105, "+1\\%", color='0.6', fontsize=9) +text(1e-2, 0.9895, "-1\\%", color='0.6', fontsize=9, va="top") +text(1e-2, 1.015, "$\\rho_0=%.3f$"%rho_0) +ylim(0.98, 1.02) +xlim(8e-3, 1.1) +xlabel("${\\rm Scale-factor}$", labelpad=0.) +ylabel("${\\rm Comoving~density}~\\rho / \\rho_0$", labelpad=0.) + +# Thermal energy evolution -------------------------------- +subplot(232)#, yscale="log") +semilogx(a, u_mean / u_0, '-', color='r', lw=1) +semilogx([1e-10, 1e10], np.ones(2), '-', color='0.6', lw=1.) +semilogx([1e-10, 1e10], np.ones(2)*0.99, '--', color='0.6', lw=1.) +semilogx([1e-10, 1e10], np.ones(2)*1.01, '--', color='0.6', lw=1.) +text(1e-2, 1.0105, "+1\\%", color='0.6', fontsize=9) +text(1e-2, 0.9895, "-1\\%", color='0.6', fontsize=9, va="top") +text(1e-2, 1.015, "$u_0=%.3e$"%(u_0)) +ylim(0.98, 1.02) +xlim(8e-3, 1.1) +xlabel("${\\rm Scale-factor}$", labelpad=0.) +ylabel("${\\rm Comoving~internal~energy}~u / u_0$", labelpad=0.) + +# Entropy evolution -------------------------------- +subplot(233)#, yscale="log") +semilogx(a, S_mean / S_0, '-', color='r', lw=1) +semilogx([1e-10, 1e10], np.ones(2), '-', color='0.6', lw=1.) +semilogx([1e-10, 1e10], np.ones(2)*0.99, '--', color='0.6', lw=1.) +semilogx([1e-10, 1e10], np.ones(2)*1.01, '--', color='0.6', lw=1.) +text(1e-2, 1.0105, "+1\\%", color='0.6', fontsize=9) +text(1e-2, 0.9895, "-1\\%", color='0.6', fontsize=9, va="top") +text(1e-2, 1.015, "$A_0=%.3e$"%(S_0)) +ylim(0.98, 1.02) +xlim(8e-3, 1.1) +xlabel("${\\rm Scale-factor}$", labelpad=0.) +ylabel("${\\rm Comoving~entropy}~A / A_0$", labelpad=0.) + +# Peculiar velocity evolution --------------------- +subplot(234) +semilogx(a, vx_mean, '-', color='r', lw=1) +semilogx(a, vy_mean, '-', color='g', lw=1) +semilogx(a, vz_mean, '-', color='b', lw=1) +xlabel("${\\rm Scale-factor}$", labelpad=0.) +ylabel("${\\rm Peculiar~velocity~mean}$", labelpad=-5.) + +# Peculiar velocity evolution --------------------- +subplot(235) +semilogx(a, vx_std, '--', color='r', lw=1) +semilogx(a, vy_std, '--', color='g', lw=1) +semilogx(a, vz_std, '--', color='b', lw=1) +xlabel("${\\rm Scale-factor}$", labelpad=0.) +ylabel("${\\rm Peculiar~velocity~std-dev}$", labelpad=0.) + + +# Information ------------------------------------- +subplot(236, frameon=False) + +plot([-0.49, 0.1], [0.62, 0.62], 'k-', lw=1) +text(-0.49, 0.5, "$\\textsc{Swift}$ %s"%git, fontsize=10) +text(-0.49, 0.4, scheme, fontsize=10) +text(-0.49, 0.3, kernel, fontsize=10) +text(-0.49, 0.2, "$%.2f$ neighbours ($\\eta=%.3f$)"%(neighbours, eta), fontsize=10) +xlim(-0.5, 0.5) +ylim(0, 1) +xticks([]) +yticks([]) + +savefig("ConstantBox_comoving.png", dpi=200) + + + +figure() + +# Density evolution -------------------------------- +subplot(231)#, yscale="log") +loglog(a, rho_mean_phys, '-', color='r', lw=1) +xlabel("${\\rm Scale-factor}$") +ylabel("${\\rm Physical~density}$") + +# Thermal energy evolution -------------------------------- +subplot(232)#, yscale="log") +loglog(a, u_mean_phys, '-', color='r', lw=1) +xlabel("${\\rm Scale-factor}$") +ylabel("${\\rm Physical~internal~energy}$") + +# Entropy evolution -------------------------------- +subplot(233)#, yscale="log") +semilogx(a, S_mean_phys, '-', color='r', lw=1) +xlabel("${\\rm Scale-factor}$") +ylabel("${\\rm Physical~entropy}$") + +# Information ------------------------------------- +subplot(236, frameon=False) + +plot([-0.49, 0.1], [0.62, 0.62], 'k-', lw=1) +text(-0.49, 0.5, "$\\textsc{Swift}$ %s"%git, fontsize=10) +text(-0.49, 0.4, scheme, fontsize=10) +text(-0.49, 0.3, kernel, fontsize=10) +text(-0.49, 0.2, "$%.2f$ neighbours ($\\eta=%.3f$)"%(neighbours, eta), fontsize=10) +xlim(-0.5, 0.5) +ylim(0, 1) +xticks([]) +yticks([]) + +savefig("ConstantBox_physical.png", dpi=200) + + diff --git a/examples/ConstantCosmoVolume/run.sh b/examples/ConstantCosmoVolume/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c7180b5232428a25c253e6f59726497c363eb1f4 --- /dev/null +++ b/examples/ConstantCosmoVolume/run.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Generate the initial conditions if they are not present. +if [ ! -e gravity_glassCube_32.hdf5 ] +then + echo "Fetching initial grvity glass file for the constant cosmological box example..." + ./getGlass.sh +fi +if [ ! -e constantBox.hdf5 ] +then + echo "Generating initial conditions for the uniform cosmo box example..." + python makeIC.py +fi + +# Run SWIFT +../swift --hydro --cosmology --self-gravity --threads=8 constant_volume.yml 2>&1 | tee output.log + +# Plot the result +python plotSolution.py $i diff --git a/examples/CoolingBox/coolingBox.yml b/examples/CoolingBox/coolingBox.yml index 2bd2f19f6d78388ae638521f590255d410bc8697..df2c29c0b612eff377423b7bb76e2c8e1e530df1 100644 --- a/examples/CoolingBox/coolingBox.yml +++ b/examples/CoolingBox/coolingBox.yml @@ -27,17 +27,16 @@ Statistics: SPH: resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + minimal_temperature: 100. # Kelvin # Parameters related to the initial conditions InitialConditions: file_name: ./coolingBox.hdf5 # The file to read - + periodic: 1 + # Dimensionless pre-factor for the time-step condition LambdaCooling: - lambda_cgs: 1.0e-22 # Cooling rate (in cgs units) - minimum_temperature: 1.0e4 # Minimal temperature (Kelvin) - mean_molecular_weight: 0.59 # Mean molecular weight - hydrogen_mass_abundance: 0.75 # Hydrogen mass abundance (dimensionless) + lambda_nH2_cgs: 1e-22 # Cooling rate divided by square Hydrogen number density (in cgs units [erg * s^-1 * cm^3]) cooling_tstep_mult: 1.0 # Dimensionless pre-factor for the time-step condition # Cooling with Grackle 2.0 @@ -53,17 +52,24 @@ GrackleCooling: MaxSteps: 1000 ConvergenceLimit: 1e-2 +EagleCooling: + filename: /cosma5/data/Eagle/BG_Tables/CoolingTables/ + reionisation_redshift: 8.989 + he_reion_z_center: 3.5 + he_reion_z_sigma: 0.5 + he_reion_ev_pH: 2.0 + EAGLEChemistry: - InitMetallicity: 0. - InitAbundance_Hydrogen: 0.752 - InitAbundance_Helium: 0.248 - InitAbundance_Carbon: 0.000 - InitAbundance_Nitrogen: 0.000 - InitAbundance_Oxygen: 0.000 - InitAbundance_Neon: 0.000 - InitAbundance_Magnesium: 0.000 - InitAbundance_Silicon: 0.000 - InitAbundance_Iron: 0.000 + InitMetallicity: 0.014 + InitAbundance_Hydrogen: 0.70649785 + InitAbundance_Helium: 0.28055534 + InitAbundance_Carbon: 2.0665436e-3 + InitAbundance_Nitrogen: 8.3562563e-4 + InitAbundance_Oxygen: 5.4926244e-3 + InitAbundance_Neon: 1.4144605e-3 + InitAbundance_Magnesium: 5.907064e-4 + InitAbundance_Silicon: 6.825874e-4 + InitAbundance_Iron: 1.1032152e-3 CalciumOverSilicon: 0.0941736 SulphurOverSilicon: 0.6054160 diff --git a/examples/CoolingBox/run.sh b/examples/CoolingBox/run.sh index 30b2177a6e8bb95a20146397f8b6a5021161b27f..9cf6208e7777ebf66d8b434a11ebb24ecfacd156 100755 --- a/examples/CoolingBox/run.sh +++ b/examples/CoolingBox/run.sh @@ -21,7 +21,7 @@ then fi # Run SWIFT -../swift -s -C -t 1 coolingBox.yml +../swift --cosmology --hydro --cooling --threads=4 -n 1000 coolingBox.yml # Check energy conservation and cooling rate python energy_plot.py diff --git a/examples/CoolingHalo/cooling_halo.yml b/examples/CoolingHalo/cooling_halo.yml index 68c3478b717261698ac175835fc246e134e3a6a7..3d6e44ae3efdb4ad0687f61d904d87d55bb2837b 100644 --- a/examples/CoolingHalo/cooling_halo.yml +++ b/examples/CoolingHalo/cooling_halo.yml @@ -27,11 +27,13 @@ Snapshots: SPH: resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. - + minimal_temperature: 1e4 # Kelvin + # Parameters related to the initial conditions InitialConditions: file_name: CoolingHalo.hdf5 # The file to read - + periodic: 1 + # External potential parameters IsothermalPotential: vrot: 200. # rotation speed of isothermal potential in internal units @@ -40,8 +42,5 @@ IsothermalPotential: # Cooling parameters LambdaCooling: - lambda_cgs: 1.0e-22 # Cooling rate (in cgs units) - minimum_temperature: 1.0e4 # Minimal temperature (Kelvin) - mean_molecular_weight: 0.59 # Mean molecular weight - hydrogen_mass_abundance: 0.75 # Hydrogen mass abundance (dimensionless) + lambda_nH2_cgs: 1e-22 # Cooling rate divided by square Hydrogen number density (in cgs units [erg * s^-1 * cm^3]) cooling_tstep_mult: 1.0 # Dimensionless pre-factor for the time-step condition diff --git a/examples/CoolingHalo/makeIC.py b/examples/CoolingHalo/makeIC.py index 3ec1be6f7b5e568ebe8e0fefe508ef8287edb29c..046e5d619f047f8c6d40eab5a5cfce2e3a02074d 100644 --- a/examples/CoolingHalo/makeIC.py +++ b/examples/CoolingHalo/makeIC.py @@ -91,10 +91,6 @@ grp.attrs["Unit current in cgs (U_I)"] = 1. grp.attrs["Unit temperature in cgs (U_T)"] = 1. -# Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - # set seed for random number np.random.seed(1234) diff --git a/examples/CoolingHalo/makeIC_random_box.py b/examples/CoolingHalo/makeIC_random_box.py index 4295cb135233f2d5a59405b44e6d8e9c80a1f6c0..be8f2f172e5b7aef385f0974445e44068021c99d 100644 --- a/examples/CoolingHalo/makeIC_random_box.py +++ b/examples/CoolingHalo/makeIC_random_box.py @@ -102,10 +102,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 3 -# Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - # set seed for random number np.random.seed(1234) diff --git a/examples/CoolingHalo/run.sh b/examples/CoolingHalo/run.sh index 60ceae649d183dce3a7e5019a1ff94ce7bc4f08d..c7c00fe81c960925fe490cd3d65fbc49d331f2d0 100755 --- a/examples/CoolingHalo/run.sh +++ b/examples/CoolingHalo/run.sh @@ -4,7 +4,7 @@ echo "Generating initial conditions for the isothermal potential box example..." python makeIC.py 10000 -../swift -g -s -C -t 16 cooling_halo.yml 2>&1 | tee output.log +../swift --external-gravity --hydro --cooling --threads=16 cooling_halo.yml 2>&1 | tee output.log python radial_profile.py 2. 200 100 diff --git a/examples/CoolingHaloWithSpin/cooling_halo.yml b/examples/CoolingHaloWithSpin/cooling_halo.yml index f6e9fe3b124631fc2d5336db8a7ffb18f7b34a95..1b29e1376e47ad32beacaf9bfb5408b8ff4d3191 100644 --- a/examples/CoolingHaloWithSpin/cooling_halo.yml +++ b/examples/CoolingHaloWithSpin/cooling_halo.yml @@ -27,11 +27,13 @@ Snapshots: SPH: resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + minimal_temperature: 1e4 # Kelvin # Parameters related to the initial conditions InitialConditions: file_name: CoolingHalo.hdf5 # The file to read - + periodic: 1 + # External potential parameters IsothermalPotential: vrot: 200. # Rotation speed of isothermal potential in internal units @@ -40,8 +42,5 @@ IsothermalPotential: # Cooling parameters LambdaCooling: - lambda_cgs: 1.0e-22 # Cooling rate (in cgs units) - minimum_temperature: 1.0e4 # Minimal temperature (Kelvin) - mean_molecular_weight: 0.59 # Mean molecular weight - hydrogen_mass_abundance: 0.75 # Hydrogen mass abundance (dimensionless) + lambda_nH2_cgs: 1e-22 # Cooling rate divided by square Hydrogen number density (in cgs units [erg * s^-1 * cm^3]) cooling_tstep_mult: 0.1 # Dimensionless pre-factor for the time-step condition diff --git a/examples/CoolingHaloWithSpin/makeIC.py b/examples/CoolingHaloWithSpin/makeIC.py index 2cf3127c743f61756b3ff6c4a7738c83d185f9cd..9a839bfd01594fd1d1c899d43223d0ebce12a72f 100644 --- a/examples/CoolingHaloWithSpin/makeIC.py +++ b/examples/CoolingHaloWithSpin/makeIC.py @@ -92,11 +92,6 @@ grp.attrs["Unit time in cgs (U_t)"] = const_unit_length_in_cgs / const_unit_velo grp.attrs["Unit current in cgs (U_I)"] = 1. grp.attrs["Unit temperature in cgs (U_T)"] = 1. - -# Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - # set seed for random number np.random.seed(1234) diff --git a/examples/CoolingHaloWithSpin/makeIC_random_box.py b/examples/CoolingHaloWithSpin/makeIC_random_box.py index 4295cb135233f2d5a59405b44e6d8e9c80a1f6c0..be8f2f172e5b7aef385f0974445e44068021c99d 100644 --- a/examples/CoolingHaloWithSpin/makeIC_random_box.py +++ b/examples/CoolingHaloWithSpin/makeIC_random_box.py @@ -102,10 +102,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 3 -# Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - # set seed for random number np.random.seed(1234) diff --git a/examples/CoolingHaloWithSpin/run.sh b/examples/CoolingHaloWithSpin/run.sh index 131fbf3cb10d2014546683b5f43194840544fd55..90d0cc84e78858e8d163ff51e25375b03c6818e5 100755 --- a/examples/CoolingHaloWithSpin/run.sh +++ b/examples/CoolingHaloWithSpin/run.sh @@ -5,7 +5,7 @@ echo "Generating initial conditions for the isothermal potential box example..." python makeIC.py 10000 # Run SWIFT with external potential, SPH and cooling -../swift -g -s -C -t 1 cooling_halo.yml 2>&1 | tee output.log +../swift --external-gravity --hydro --cooling --threads=1 cooling_halo.yml 2>&1 | tee output.log # python radial_profile.py 10 diff --git a/examples/CoolingRates/Makefile.am b/examples/CoolingRates/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..058cdaf2efa3df3647af6f6e0263f65a0e515a15 --- /dev/null +++ b/examples/CoolingRates/Makefile.am @@ -0,0 +1,32 @@ +# tHIS FIle is part of SWIFT. +# Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Add the source directory and the non-standard paths to the included library headers to CFLAGS +AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) + +AM_LDFLAGS = $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(GSL_LIBS) $(PROFILER_LIBS) + +# Extra libraries. +EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(VELOCIRAPTOR_LIBS) $(GSL_LIBS) + +# Programs. +bin_PROGRAMS = cooling_rates + +# Sources +cooling_rates_SOURCES = cooling_rates.c +cooling_rates_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) +cooling_rates_LDADD = ../../src/.libs/libswiftsim.a $(EXTRA_LIBS) + diff --git a/examples/CoolingRates/README b/examples/CoolingRates/README new file mode 100644 index 0000000000000000000000000000000000000000..429554ef0e4b6dd61c528ea35f62bd888e738656 --- /dev/null +++ b/examples/CoolingRates/README @@ -0,0 +1,18 @@ +This is a test that produces a plot of the contribution to the cooling +rate from each of the elements depending on internal energy, density +and redshift based on the EAGLE tables. To do so, the function in +src/cooling/EAGLE returning the cooling rate is run for multiple +values of the internal energy. The resulting cooling rates are written +to files and plotted with a python script (cooling_rates_plot.py). + +The test may be run by: +./getCoolingTables.sh +./cooling_rates -z X -d Y +python plot_cooling_rates.py + +where X is the redshift at which the cooling rates are evaluated and Y +is the base 10 logarithm of the hydrogen number density. Different +metallicities may be specified in testCooling.yml + +Running with -z 3 -d -4 should reproduce the Fig.4 of Wiersma+09 with +the added Compton cooling contribution. diff --git a/examples/CoolingRates/cooling_rates.c b/examples/CoolingRates/cooling_rates.c new file mode 100644 index 0000000000000000000000000000000000000000..8a2d098c8d4bd935f52103c89c5cda51bb0a573d --- /dev/null +++ b/examples/CoolingRates/cooling_rates.c @@ -0,0 +1,314 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "../config.h" + +/* Some standard headers. */ +#include <fenv.h> +#include <unistd.h> + +/* Local headers. */ +#include "swift.h" + +#if defined(COOLING_EAGLE) && defined(CHEMISTRY_EAGLE) && defined(GADGET2_SPH) +#include "cooling/EAGLE/cooling_rates.h" +#include "cooling/EAGLE/cooling_tables.h" + +/* Flag used for printing cooling rate contribution from each + * element. For testing only. Incremented by 1/(number of elements) + * until reaches 1 after which point append to files instead of + * writing new file. */ +static float print_cooling_rate_contribution_flag = 0; + +/** + * @brief Wrapper function used to calculate cooling rate and dLambda_du. + * Writes to file contribution from each element to cooling rate for testing + * purposes (this function is not used when running SWIFT). Table indices + * and offsets for redshift, hydrogen number density and helium fraction are + * passed in so as to compute them only once per particle. + * + * @param n_h_i Particle hydrogen number density index + * @param d_n_h Particle hydrogen number density offset + * @param He_i Particle helium fraction index + * @param d_He Particle helium fraction offset + * @param p Particle structure + * @param cooling #cooling_function_data structure + * @param cosmo #cosmology structure + * @param phys_const #phys_const structure + * @param abundance_ratio Ratio of element abundance to solar + */ +INLINE static double eagle_print_metal_cooling_rate( + int n_h_i, float d_n_h, int He_i, float d_He, const struct part *restrict p, + const struct xpart *restrict xp, + const struct cooling_function_data *restrict cooling, + const struct cosmology *restrict cosmo, const struct phys_const *phys_const, + float *abundance_ratio) { + + /* array to store contributions to cooling rates from each of the + * elements */ + double *element_lambda; + element_lambda = malloc((eagle_cooling_N_metal + 2) * sizeof(double)); + + /* Get the H and He mass fractions */ + const float XH = p->chemistry_data.metal_mass_fraction[chemistry_element_H]; + + /* convert Hydrogen mass fraction in Hydrogen number density */ + const double n_h = hydro_get_physical_density(p, cosmo) * XH / + phys_const->const_proton_mass * + cooling->number_density_to_cgs; + + /* cooling rate, derivative of cooling rate and internal energy */ + double lambda_net = 0.0; + double u = hydro_get_physical_internal_energy(p, xp, cosmo) * + cooling->internal_energy_to_cgs; + + /* Open files for writing contributions to cooling rate. Each element + * gets its own file. */ + char output_filename[32]; + FILE **output_file = malloc((eagle_cooling_N_metal + 2) * sizeof(FILE *)); + + /* Once this flag reaches 1 we stop overwriting and start appending. */ + print_cooling_rate_contribution_flag += 1.0 / (eagle_cooling_N_metal + 2); + + /* Loop over each element */ + for (int element = 0; element < eagle_cooling_N_metal + 2; element++) { + sprintf(output_filename, "%s%d%s", "cooling_element_", element, ".dat"); + if (print_cooling_rate_contribution_flag < 1) { + /* If this is the first time we're running this function, overwrite the + * output files */ + output_file[element] = fopen(output_filename, "w"); + print_cooling_rate_contribution_flag += 1.0 / (eagle_cooling_N_metal + 2); + } else { + /* append to existing files */ + output_file[element] = fopen(output_filename, "a"); + } + if (output_file == NULL) { + error("Error opening file!\n"); + } + } + + /* calculate cooling rates */ + for (int j = 0; j < eagle_cooling_N_metal + 2; j++) element_lambda[j] = 0.0; + lambda_net = eagle_metal_cooling_rate( + log10(u), cosmo->z, n_h, abundance_ratio, n_h_i, d_n_h, He_i, d_He, + cooling, /*dLambdaNet_du=*/NULL, element_lambda); + + /* write cooling rate contributions to their own files. */ + for (int j = 0; j < eagle_cooling_N_metal + 2; j++) { + fprintf(output_file[j], "%.5e\n", element_lambda[j]); + } + + for (int i = 0; i < eagle_cooling_N_metal + 2; i++) fclose(output_file[i]); + free(output_file); + free(element_lambda); + + return lambda_net; +} + +/** + * @brief Assign particle density and entropy corresponding to the + * hydrogen number density and internal energy specified. + * + * @param p Particle data structure + * @param cooling Cooling function data structure + * @param cosmo Cosmology data structure + * @param internal_const Physical constants data structure + * @param nh Hydrogen number density (cgs units) + * @param u Internal energy (cgs units) + */ +void set_quantities(struct part *restrict p, struct xpart *restrict xp, + const struct unit_system *restrict us, + const struct cooling_function_data *restrict cooling, + const struct cosmology *restrict cosmo, + const struct phys_const *restrict internal_const, float nh, + double u) { + + double hydrogen_number_density = + nh * pow(units_cgs_conversion_factor(us, UNIT_CONV_LENGTH), 3); + p->rho = hydrogen_number_density * internal_const->const_proton_mass / + p->chemistry_data.metal_mass_fraction[chemistry_element_H]; + + float pressure = (u * cosmo->a * cosmo->a) * + cooling->internal_energy_from_cgs * p->rho * + (hydro_gamma_minus_one); + p->entropy = pressure * (pow(p->rho, -hydro_gamma)); + xp->entropy_full = p->entropy; +} + +/** + * @brief Produces contributions to cooling rates for different + * hydrogen number densities, from different metals, + * tests 1d and 4d table interpolations produce + * same results for cooling rate, dlambda/du and temperature. + */ +int main(int argc, char **argv) { + // Declare relevant structs + struct swift_params *params = malloc(sizeof(struct swift_params)); + struct unit_system us; + struct chemistry_global_data chem_data; + struct part p; + struct xpart xp; + struct phys_const internal_const; + struct cooling_function_data cooling; + struct cosmology cosmo; + const char *parametersFileName = "./cooling_rates.yml"; + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + +/* Choke on FP-exceptions */ +#ifdef HAVE_FE_ENABLE_EXCEPT + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + + const int npts = 250; // number of values for the internal energy at which + // cooling rate is evaluated + + // Set some default values + float redshift = 0.0, log_10_nh = -1; + + // Read options + int param; + while ((param = getopt(argc, argv, "z:d:")) != -1) switch (param) { + case 'z': + // read redshift + redshift = atof(optarg); + break; + case 'd': + // read log10 of hydrogen number density + log_10_nh = atof(optarg); + break; + case '?': + if (optopt == 'z') + printf("Option -%c requires an argument.\n", optopt); + else + printf("Unknown option character `\\x%x'.\n", optopt); + error("invalid option(s) to cooling_rates"); + } + + // Read the parameter file + if (params == NULL) error("Error allocating memory for the parameter file."); + message("Reading runtime parameters from file '%s'", parametersFileName); + parser_read_file(parametersFileName, params); + + // Init units + units_init_from_params(&us, params, "InternalUnitSystem"); + phys_const_init(&us, params, &internal_const); + + // Init chemistry + chemistry_init(params, &us, &internal_const, &chem_data); + chemistry_first_init_part(&internal_const, &us, &cosmo, &chem_data, &p, &xp); + chemistry_print(&chem_data); + + // Init cosmology + cosmology_init(params, &us, &internal_const, &cosmo); + + // Set redshift and associated quantities + const float scale_factor = 1.0 / (1.0 + redshift); + integertime_t ti_current = + log(scale_factor / cosmo.a_begin) / cosmo.time_base; + cosmology_update(&cosmo, &internal_const, ti_current); + message("Redshift is %f", cosmo.z); + + // Init cooling + cooling_init(params, &us, &internal_const, &cooling); + cooling_print(&cooling); + cooling_update(&cosmo, &cooling, /*restart=*/0); + + // Calculate abundance ratios + float abundance_ratio[(chemistry_element_count + 2)]; + abundance_ratio_to_solar(&p, &cooling, abundance_ratio); + + // extract mass fractions, calculate table indices and offsets + float XH = p.chemistry_data.metal_mass_fraction[chemistry_element_H]; + float HeFrac = + p.chemistry_data.metal_mass_fraction[chemistry_element_He] / + (XH + p.chemistry_data.metal_mass_fraction[chemistry_element_He]); + int He_i, n_h_i; + float d_He, d_n_h; + get_index_1d(cooling.HeFrac, eagle_cooling_N_He_frac, HeFrac, &He_i, &d_He); + + // Calculate contributions from metals to cooling rate + // open file + FILE *output_file = fopen("cooling_output.dat", "w"); + if (output_file == NULL) { + error("Error opening output file!\n"); + } + + // set hydrogen number density + const float nh = exp(M_LN10 * log_10_nh); + + /* Initial internal energy */ + double u = 1.0e14; + + // set internal energy to dummy value, will get reset when looping over + // internal energies + set_quantities(&p, &xp, &us, &cooling, &cosmo, &internal_const, nh, u); + float inn_h = hydro_get_physical_density(&p, &cosmo) * XH / + internal_const.const_proton_mass * + cooling.number_density_to_cgs; + get_index_1d(cooling.nH, eagle_cooling_N_density, log10(inn_h), &n_h_i, + &d_n_h); + + // Loop over internal energy + for (int j = 0; j < npts; j++) { + + // Update the particle with the new values + set_quantities(&p, &xp, &us, &cooling, &cosmo, &internal_const, nh, + pow(10.0, 10.0 + j * 8.0 / npts)); + + // New internal energy + u = hydro_get_physical_internal_energy(&p, &xp, &cosmo) * + cooling.internal_energy_to_cgs; + + // calculate cooling rates + const double temperature = eagle_convert_u_to_temp( + log10(u), cosmo.z, 0, NULL, n_h_i, He_i, d_n_h, d_He, &cooling); + + const double cooling_du_dt = eagle_print_metal_cooling_rate( + n_h_i, d_n_h, He_i, d_He, &p, &xp, &cooling, &cosmo, &internal_const, + abundance_ratio); + + // Dump... + fprintf(output_file, "%.5e %.5e\n", exp(M_LN10 * temperature), + cooling_du_dt); + } + fclose(output_file); + message("done cooling rates test"); + + /* Clean everything */ + cosmology_clean(&cosmo); + cooling_clean(&cooling); + + free(params); + return 0; +} + +#else + +int main(int argc, char **argv) { + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + + message("This test is only defined for the EAGLE cooling model."); + return 0; +} +#endif diff --git a/examples/CoolingRates/cooling_rates.yml b/examples/CoolingRates/cooling_rates.yml new file mode 100644 index 0000000000000000000000000000000000000000..e0ac9f691cf64b292c50a36d2b1878bf3a368975 --- /dev/null +++ b/examples/CoolingRates/cooling_rates.yml @@ -0,0 +1,39 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.989e43 # 10^10 M_sun in grams + UnitLength_in_cgs: 3.085678e24 # Mpc in centimeters + UnitVelocity_in_cgs: 1e5 # km/s in centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Cosmological parameters +Cosmology: + h: 0.6777 # Reduced Hubble constant + a_begin: 0.04 # Initial scale-factor of the simulation + a_end: 1.0 # Final scale factor of the simulation + Omega_m: 0.307 # Matter density parameter + Omega_lambda: 0.693 # Dark-energy density parameter + Omega_b: 0.0455 # Baryon density parameter + +EAGLEChemistry: + InitMetallicity: 0.014 + InitAbundance_Hydrogen: 0.70649785 + InitAbundance_Helium: 0.28055534 + InitAbundance_Carbon: 2.0665436e-3 + InitAbundance_Nitrogen: 8.3562563e-4 + InitAbundance_Oxygen: 5.4926244e-3 + InitAbundance_Neon: 1.4144605e-3 + InitAbundance_Magnesium: 5.907064e-4 + InitAbundance_Silicon: 6.825874e-4 + InitAbundance_Iron: 1.1032152e-3 + CalciumOverSilicon: 0.0941736 + SulphurOverSilicon: 0.6054160 + + +EagleCooling: + filename: ./coolingtables/ + reionisation_redshift: 9.5 + He_reion_z_centre: 3.5 + He_reion_z_sigma: 0.5 + He_reion_ev_pH: 2.0 + diff --git a/examples/CoolingRates/getCoolingTable.sh b/examples/CoolingRates/getCoolingTable.sh new file mode 100755 index 0000000000000000000000000000000000000000..5cfd93ef0f4603e40b7675f3f2c254b2250f699f --- /dev/null +++ b/examples/CoolingRates/getCoolingTable.sh @@ -0,0 +1,3 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/CoolingTables/EAGLE/coolingtables.tar.gz +tar -xf coolingtables.tar.gz diff --git a/examples/CoolingRates/plot_cooling_rates.py b/examples/CoolingRates/plot_cooling_rates.py new file mode 100644 index 0000000000000000000000000000000000000000..cca12468befd8c407d769ee00fcb03ecd52db3ec --- /dev/null +++ b/examples/CoolingRates/plot_cooling_rates.py @@ -0,0 +1,53 @@ +# Plots contribution to cooling rates from each of the different metals +# based on cooling_output.dat and cooling_element_*.dat files produced +# by testCooling. + +import matplotlib.pyplot as plt +import numpy as np + +# Number of metals tracked by EAGLE cooling +elements = 11 + +# Declare arrays of internal energy and cooling rate +u = [] +cooling_rate = [[] for i in range(elements + 1)] +Temperature = [[] for i in range(elements + 1)] + +# Read in total cooling rate +file_in = open("cooling_output.dat", "r") +for line in file_in: + data = line.split() + u.append(float(data[0])) + cooling_rate[0].append(-float(data[1])) +file_in.close() + +# Read in contributions to cooling rates from each of the elements +for elem in range(elements): + file_in = open("cooling_element_" + str(elem) + ".dat", "r") + for line in file_in: + data = line.split() + cooling_rate[elem + 1].append(-float(data[0])) + file_in.close() + +# Plot +ax = plt.subplot(111) +p0, = plt.loglog(u, cooling_rate[0], linewidth=0.5, color="k", label="Total") +p1, = plt.loglog( + u, cooling_rate[1], linewidth=0.5, color="k", linestyle="--", label="H + He" +) +p2, = plt.loglog(u, cooling_rate[3], linewidth=0.5, color="b", label="C") +p3, = plt.loglog(u, cooling_rate[4], linewidth=0.5, color="g", label="N") +p4, = plt.loglog(u, cooling_rate[5], linewidth=0.5, color="r", label="O") +p5, = plt.loglog(u, cooling_rate[6], linewidth=0.5, color="c", label="Ne") +p6, = plt.loglog(u, cooling_rate[7], linewidth=0.5, color="m", label="Mg") +p7, = plt.loglog(u, cooling_rate[8], linewidth=0.5, color="y", label="Si") +p8, = plt.loglog(u, cooling_rate[9], linewidth=0.5, color="lightgray", label="S") +p9, = plt.loglog(u, cooling_rate[10], linewidth=0.5, color="olive", label="Ca") +p10, = plt.loglog(u, cooling_rate[11], linewidth=0.5, color="saddlebrown", label="Fe") +ax.set_position([0.15, 0.15, 0.75, 0.75]) +plt.xlim([1e3, 1e8]) +plt.ylim([1e-24, 1e-21]) +plt.xlabel("Temperature ${\\rm{[K]}}$", fontsize=14) +plt.ylabel("${\Lambda/n_H^2 }$ ${\\rm{[erg \cdot cm^3 \cdot s^{-1}]}}$", fontsize=14) +plt.legend(handles=[p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10]) +plt.savefig("cooling_rates", dpi=200) diff --git a/examples/DiscPatch/GravityOnly/disc-patch.yml b/examples/DiscPatch/GravityOnly/disc-patch.yml index 4ec061add978bec82c267660cc343cf0bfa8f4c6..bcc7d1a3decfb36201b60349eedb5d214e61f9a6 100644 --- a/examples/DiscPatch/GravityOnly/disc-patch.yml +++ b/examples/DiscPatch/GravityOnly/disc-patch.yml @@ -34,7 +34,8 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: Disc-Patch.hdf5 # The file to read - + periodic: 1 + # External potential parameters DiscPatchPotential: surface_density: 10. diff --git a/examples/DiscPatch/GravityOnly/makeIC.py b/examples/DiscPatch/GravityOnly/makeIC.py index 5f9650f44277cf858021c9b628d68134c47a19b7..3abf4f87fc6b6f78ed1814be08ca0d8e39359a26 100644 --- a/examples/DiscPatch/GravityOnly/makeIC.py +++ b/examples/DiscPatch/GravityOnly/makeIC.py @@ -111,10 +111,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - # set seed for random number numpy.random.seed(1234) diff --git a/examples/DiscPatch/GravityOnly/run.sh b/examples/DiscPatch/GravityOnly/run.sh index 9af1011ee653253f0d1b2cd26db0ac13cf11adc0..999ffa120f809ffe5aa0a3f104e53f8148cfe1c2 100755 --- a/examples/DiscPatch/GravityOnly/run.sh +++ b/examples/DiscPatch/GravityOnly/run.sh @@ -7,4 +7,4 @@ then python makeIC.py 1000 fi -../../swift -g -t 2 disc-patch.yml +../../swift --external-gravity --threads=2 disc-patch.yml diff --git a/examples/DiscPatch/HydroStatic/disc-patch-icc.yml b/examples/DiscPatch/HydroStatic/disc-patch-icc.yml index 983a7dcc103135ab4db61d6ea77701532226c101..aee54057cf2c5b9d178abac5599d9e4133652362 100644 --- a/examples/DiscPatch/HydroStatic/disc-patch-icc.yml +++ b/examples/DiscPatch/HydroStatic/disc-patch-icc.yml @@ -37,7 +37,8 @@ EoS: # Parameters related to the initial conditions InitialConditions: file_name: Disc-Patch.hdf5 # The file to read - + periodic: 1 + # External potential parameters DiscPatchPotential: surface_density: 10. diff --git a/examples/DiscPatch/HydroStatic/disc-patch.yml b/examples/DiscPatch/HydroStatic/disc-patch.yml index 422e1cf910202e8f6dc0a9395fc7e36ce80443ed..8651ac09dbc4c4a97f0915ce7df6c678837b2f45 100644 --- a/examples/DiscPatch/HydroStatic/disc-patch.yml +++ b/examples/DiscPatch/HydroStatic/disc-patch.yml @@ -34,7 +34,8 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: Disc-Patch-dynamic.hdf5 # The file to read - + periodic: 1 + # External potential parameters DiscPatchPotential: surface_density: 10. diff --git a/examples/DiscPatch/HydroStatic/makeIC.py b/examples/DiscPatch/HydroStatic/makeIC.py index 8b4c55560c34e7bdb538f2b4732369216f91a087..dd50a821a2eb376c0785afd849a3ea575e349703 100644 --- a/examples/DiscPatch/HydroStatic/makeIC.py +++ b/examples/DiscPatch/HydroStatic/makeIC.py @@ -182,10 +182,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - # write gas particles grp0 = file.create_group("/PartType0") diff --git a/examples/DiscPatch/HydroStatic/run.sh b/examples/DiscPatch/HydroStatic/run.sh index e1f47ecad54e7e171d78b7da080d56579e985d1e..2dccc93f0445e03750bd090be073b7219b2cd353 100755 --- a/examples/DiscPatch/HydroStatic/run.sh +++ b/examples/DiscPatch/HydroStatic/run.sh @@ -13,6 +13,6 @@ then fi # Run SWIFT -../../swift -g -s -t 4 disc-patch-icc.yml 2>&1 | tee output.log +../../swift --external-gravity --hydro --threads=4 disc-patch-icc.yml 2>&1 | tee output.log python plotSolution.py diff --git a/examples/DiscPatch/HydroStatic_1D/disc-patch-icc.yml b/examples/DiscPatch/HydroStatic_1D/disc-patch-icc.yml index 450689034f4ae782cc74bf01dac93e723e5d2ce2..ea5d2e24eb93c64e21f37a8c137603b22885392c 100644 --- a/examples/DiscPatch/HydroStatic_1D/disc-patch-icc.yml +++ b/examples/DiscPatch/HydroStatic_1D/disc-patch-icc.yml @@ -34,7 +34,8 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: Disc-Patch.hdf5 # The file to read - + periodic: 1 + # External potential parameters DiscPatchPotential: surface_density: 10. diff --git a/examples/DiscPatch/HydroStatic_1D/makeIC.py b/examples/DiscPatch/HydroStatic_1D/makeIC.py index 983a550a3442c6470611792081a5884d38023a6a..b193c85e50d3526b8518cac06b9b00c3071c383a 100644 --- a/examples/DiscPatch/HydroStatic_1D/makeIC.py +++ b/examples/DiscPatch/HydroStatic_1D/makeIC.py @@ -168,10 +168,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 1 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - # write gas particles grp0 = file.create_group("/PartType0") diff --git a/examples/DiscPatch/HydroStatic_1D/run.sh b/examples/DiscPatch/HydroStatic_1D/run.sh index e9d073a6cc7a06ec9ebd9fdb556c44778d32c7f4..3e22727adab274a58110e43f127343b9bdcb0c82 100755 --- a/examples/DiscPatch/HydroStatic_1D/run.sh +++ b/examples/DiscPatch/HydroStatic_1D/run.sh @@ -8,6 +8,6 @@ then fi # Run SWIFT -../../swift -g -s -t 4 disc-patch-icc.yml 2>&1 | tee output.log +../../swift --external-gravity --hydro --threads=4 disc-patch-icc.yml 2>&1 | tee output.log python plotSolution.py diff --git a/examples/DwarfGalaxy/README b/examples/DwarfGalaxy/README new file mode 100644 index 0000000000000000000000000000000000000000..7a9167694a24c088997316180233b28b9126f298 --- /dev/null +++ b/examples/DwarfGalaxy/README @@ -0,0 +1,7 @@ +This example is a galaxy extracted from the example "ZoomIn". It allows +to test SWIFT on a smaller problem. See the README in "ZoomIn" for more +information. + + +MD5 check-sum of the ICS: +ae2af84d88f30011b6a8af3f37d140cf dwarf_galaxy.hdf5 \ No newline at end of file diff --git a/examples/DwarfGalaxy/dwarf_galaxy.yml b/examples/DwarfGalaxy/dwarf_galaxy.yml new file mode 100644 index 0000000000000000000000000000000000000000..0d815a99c42249bcbbdaf21dbaa34a55f61731aa --- /dev/null +++ b/examples/DwarfGalaxy/dwarf_galaxy.yml @@ -0,0 +1,72 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun in grams + UnitLength_in_cgs: 3.08567758e21 # kpc in centimeters + UnitVelocity_in_cgs: 1e5 # km/s in centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Structure finding options +StructureFinding: + config_file_name: stf_input.cfg # Name of the STF config file. + basename: ./stf # Common part of the name of output files. + output_time_format: 0 # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time). + scale_factor_first: 0.92 # Scale-factor of the first snaphot (cosmological run) + time_first: 0.01 # Time of the first structure finding output (in internal units). + delta_step: 1000 # Time difference between consecutive structure finding outputs (in internal units) in simulation steps. + delta_time: 1.10 # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals. + +# Cosmological parameters +Cosmology: + h: 0.673 # Reduced Hubble constant + a_begin: 0.9873046739 # Initial scale-factor of the simulation + a_end: 1.0 # Final scale factor of the simulation + Omega_m: 0.315 # Matter density parameter + Omega_lambda: 0.685 # Dark-energy density parameter + Omega_b: 0.0486 # Baryon density parameter + +Scheduler: + max_top_level_cells: 8 + cell_split_size: 400 # (Optional) Maximal number of particles per cell (this is the default value). + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: dwarf_galaxy # Common part of the name of output files + time_first: 0. # Time of the first output (non-cosmological run) (in internal units) + delta_time: 0.02 # Time difference between consecutive outputs (in internal units) + compression: 1 + +# Parameters governing the conserved quantities statistics +Statistics: + scale_factor_first: 0.987345 # Scale-factor of the first stat dump (cosmological run) + time_first: 0.01 # Time of the first stat dump (non-cosmological run) (in internal units) + delta_time: 1.05 # Time between statistics output + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 # Constant dimensionless multiplier for time integration. + theta: 0.7 # Opening angle (Multipole acceptance criterion) + comoving_softening: 0.05 # Comoving softening length (in internal units). + max_physical_softening: 0.01 # Physical softening length (in internal units). + mesh_side_length: 16 + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + minimal_temperature: 100 # (internal units) + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./dwarf_galaxy.hdf5 # The file to read + periodic: 1 + cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget + cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget + diff --git a/examples/DwarfGalaxy/getIC.sh b/examples/DwarfGalaxy/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..92f4cd3939845d57a61683e95135163b8639371f --- /dev/null +++ b/examples/DwarfGalaxy/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget https://obswww.unige.ch/~lhausamm/swift/IC/DwarfGalaxy/dwarf_galaxy.hdf5 diff --git a/examples/DwarfGalaxy/run.sh b/examples/DwarfGalaxy/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..17c0ac0c7e3456c3b43ade953ed94052edb37ac8 --- /dev/null +++ b/examples/DwarfGalaxy/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e dwarf_galaxy.hdf5 ] +then + echo "Fetching initial conditions for the dwarf galaxy example..." + ./getIC.sh +fi + +../swift --feedback --self-gravity --hydro --stars --threads=8 $@ dwarf_galaxy.yml 2>&1 | tee output.log + diff --git a/examples/EAGLE_100/eagle_100.yml b/examples/EAGLE_100/eagle_100.yml index 439bb7eb6dc5d460752771addc83c89e27f69b7f..3bcda091bdac5b740f3568de9c0814cc84c3b846 100644 --- a/examples/EAGLE_100/eagle_100.yml +++ b/examples/EAGLE_100/eagle_100.yml @@ -23,7 +23,7 @@ TimeIntegration: dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). Scheduler: - max_top_level_cells: 80 + max_top_level_cells: 32 # Parameters governing the snapshots Snapshots: @@ -44,6 +44,7 @@ Gravity: theta: 0.85 # Opening angle (Multipole acceptance criterion) comoving_softening: 0.0026994 # Comoving softening length (in internal units). max_physical_softening: 0.0007 # Physical softening length (in internal units). + mesh_side_length: 256 # Parameters for the hydrodynamics scheme SPH: @@ -54,6 +55,7 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./EAGLE_ICs_100.hdf5 # The file to read + periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget diff --git a/examples/EAGLE_100/run.sh b/examples/EAGLE_100/run.sh index 9c990a902a6350eff348aad40c482723d1ba954c..12b6ff2a6e3486f45a9a66a4cca3346336160a3d 100755 --- a/examples/EAGLE_100/run.sh +++ b/examples/EAGLE_100/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift -c -s -G -S -t 16 eagle_100.yml 2>&1 | tee output.log +../swift --cosmology --hydro --self-gravity --stars --threads=16 eagle_100.yml 2>&1 | tee output.log diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml index 8ebe29fb0216e16aeaafcdc086085d8c9879fc5f..90b546f311be7e617929eb53146aa7b3daf8114c 100644 --- a/examples/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_12/eagle_12.yml @@ -20,11 +20,8 @@ TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 1e-2 # The end time of the simulation (in internal units). dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). -Scheduler: - max_top_level_cells: 8 - # Parameters governing the snapshots Snapshots: basename: eagle # Common part of the name of output files @@ -56,6 +53,28 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./EAGLE_ICs_12.hdf5 # The file to read + periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget +EAGLEChemistry: # Solar abundances + InitMetallicity: 0.014 + InitAbundance_Hydrogen: 0.70649785 + InitAbundance_Helium: 0.28055534 + InitAbundance_Carbon: 2.0665436e-3 + InitAbundance_Nitrogen: 8.3562563e-4 + InitAbundance_Oxygen: 5.4926244e-3 + InitAbundance_Neon: 1.4144605e-3 + InitAbundance_Magnesium: 5.907064e-4 + InitAbundance_Silicon: 6.825874e-4 + InitAbundance_Iron: 1.1032152e-3 + CalciumOverSilicon: 0.0941736 + SulphurOverSilicon: 0.6054160 + +EagleCooling: + filename: /cosma5/data/Eagle/BG_Tables/CoolingTables/ + reionisation_redshift: 11.5 + he_reion_z_center: 3.5 + he_reion_z_sigma: 0.5 + he_reion_ev_pH: 2.0 + diff --git a/examples/EAGLE_12/run.sh b/examples/EAGLE_12/run.sh index 67f1c24a1ead927823b9240cdeb718b35580d573..90128c4f6c291a86f90e794d452641b08248fbdc 100755 --- a/examples/EAGLE_12/run.sh +++ b/examples/EAGLE_12/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift -c -s -G -S -t 16 eagle_12.yml 2>&1 | tee output.log +../swift --cosmology --hydro --self-gravity --stars --threads=16 eagle_12.yml 2>&1 | tee output.log diff --git a/examples/EAGLE_25/eagle_25.yml b/examples/EAGLE_25/eagle_25.yml index d6f9ad2474cb4fc207145c73a1c1c694f2f11386..bd74473d13acd235a703d7391d187495fc33204f 100644 --- a/examples/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_25/eagle_25.yml @@ -30,10 +30,7 @@ TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 1e-2 # The end time of the simulation (in internal units). dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). - -Scheduler: - max_top_level_cells: 16 + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots Snapshots: @@ -54,7 +51,7 @@ Gravity: theta: 0.7 # Opening angle (Multipole acceptance criterion) comoving_softening: 0.0026994 # Comoving softening length (in internal units). max_physical_softening: 0.0007 # Physical softening length (in internal units). - mesh_side_length: 32 + mesh_side_length: 64 # Parameters for the hydrodynamics scheme SPH: @@ -65,7 +62,28 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./EAGLE_ICs_25.hdf5 # The file to read + periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget +EAGLEChemistry: # Solar abundances + InitMetallicity: 0.014 + InitAbundance_Hydrogen: 0.70649785 + InitAbundance_Helium: 0.28055534 + InitAbundance_Carbon: 2.0665436e-3 + InitAbundance_Nitrogen: 8.3562563e-4 + InitAbundance_Oxygen: 5.4926244e-3 + InitAbundance_Neon: 1.4144605e-3 + InitAbundance_Magnesium: 5.907064e-4 + InitAbundance_Silicon: 6.825874e-4 + InitAbundance_Iron: 1.1032152e-3 + CalciumOverSilicon: 0.0941736 + SulphurOverSilicon: 0.6054160 + +EagleCooling: + filename: /cosma5/data/Eagle/BG_Tables/CoolingTables/ + reionisation_redshift: 11.5 + he_reion_z_center: 3.5 + he_reion_z_sigma: 0.5 + he_reion_ev_pH: 2.0 diff --git a/examples/EAGLE_25/run.sh b/examples/EAGLE_25/run.sh index 0b6cf77d7b2461864fc24055811ee00c7dd00613..af1218f70729663d8efe337c312f6ef2fe8d6620 100755 --- a/examples/EAGLE_25/run.sh +++ b/examples/EAGLE_25/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift -c -s -G -S -t 16 eagle_25.yml 2>&1 | tee output.log +../swift --cosmology --hydro --self-gravity --stars--threads=16 eagle_25.yml 2>&1 | tee output.log diff --git a/examples/EAGLE_50/eagle_50.yml b/examples/EAGLE_50/eagle_50.yml index 04c157fa86fc25f90a952e0c216285aa2235cb72..b86a3d87ddc5561002a5dc3adf2e82d47fb1b02f 100644 --- a/examples/EAGLE_50/eagle_50.yml +++ b/examples/EAGLE_50/eagle_50.yml @@ -20,10 +20,10 @@ TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 1e-2 # The end time of the simulation (in internal units). dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). Scheduler: - max_top_level_cells: 20 + max_top_level_cells: 16 # Parameters governing the snapshots Snapshots: @@ -44,7 +44,7 @@ Gravity: theta: 0.7 # Opening angle (Multipole acceptance criterion) comoving_softening: 0.0026994 # Comoving softening length (in internal units). max_physical_softening: 0.0007 # Physical softening length (in internal units). - mesh_side_length: 64 + mesh_side_length: 128 # Parameters for the hydrodynamics scheme SPH: @@ -55,7 +55,27 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./EAGLE_ICs_50.hdf5 # The file to read + periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget +EAGLEChemistry: # Solar abundances + InitMetallicity: 0.014 + InitAbundance_Hydrogen: 0.70649785 + InitAbundance_Helium: 0.28055534 + InitAbundance_Carbon: 2.0665436e-3 + InitAbundance_Nitrogen: 8.3562563e-4 + InitAbundance_Oxygen: 5.4926244e-3 + InitAbundance_Neon: 1.4144605e-3 + InitAbundance_Magnesium: 5.907064e-4 + InitAbundance_Silicon: 6.825874e-4 + InitAbundance_Iron: 1.1032152e-3 + CalciumOverSilicon: 0.0941736 + SulphurOverSilicon: 0.6054160 +EagleCooling: + filename: /cosma5/data/Eagle/BG_Tables/CoolingTables/ + reionisation_redshift: 11.5 + he_reion_z_center: 3.5 + he_reion_z_sigma: 0.5 + he_reion_ev_pH: 2.0 diff --git a/examples/EAGLE_50/run.sh b/examples/EAGLE_50/run.sh index a0d5dee11dc58e8d19d4d0e551c5ad8eceb90548..a08e3125b1a8c1d65013ebd537f033abf6de5c3e 100755 --- a/examples/EAGLE_50/run.sh +++ b/examples/EAGLE_50/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift -c -s -G -S -t 16 eagle_50.yml 2>&1 | tee output.log +../swift --cosmology --hydro --self-gravity --stars --threads=16 eagle_50.yml 2>&1 | tee output.log diff --git a/examples/EAGLE_6/eagle_6.yml b/examples/EAGLE_6/eagle_6.yml index eb374df964e8b021ef2b7d90caf8a1824cf3a833..494f48b833f124ffcdc816f170b6b077c4c59857 100644 --- a/examples/EAGLE_6/eagle_6.yml +++ b/examples/EAGLE_6/eagle_6.yml @@ -66,7 +66,27 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./EAGLE_ICs_6.hdf5 # The file to read + periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget +EAGLEChemistry: # Solar abundances + InitMetallicity: 0.014 + InitAbundance_Hydrogen: 0.70649785 + InitAbundance_Helium: 0.28055534 + InitAbundance_Carbon: 2.0665436e-3 + InitAbundance_Nitrogen: 8.3562563e-4 + InitAbundance_Oxygen: 5.4926244e-3 + InitAbundance_Neon: 1.4144605e-3 + InitAbundance_Magnesium: 5.907064e-4 + InitAbundance_Silicon: 6.825874e-4 + InitAbundance_Iron: 1.1032152e-3 + CalciumOverSilicon: 0.0941736 + SulphurOverSilicon: 0.6054160 +EagleCooling: + filename: /cosma5/data/Eagle/BG_Tables/CoolingTables/ + reionisation_redshift: 11.5 + he_reion_z_center: 3.5 + he_reion_z_sigma: 0.5 + he_reion_ev_pH: 2.0 diff --git a/examples/EAGLE_6/run.sh b/examples/EAGLE_6/run.sh index 7ef3fc2abdd1bb3fed1a228bf993bf09fb13f42c..6df5902a057434ad1e7b5145b235655fa981e40e 100755 --- a/examples/EAGLE_6/run.sh +++ b/examples/EAGLE_6/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift -c -s -G -S -t 16 eagle_6.yml 2>&1 | tee output.log +../swift --cosmology --hydro --self-gravity --stars --threads=16 eagle_6.yml 2>&1 | tee output.log diff --git a/examples/EAGLE_6/testVELOCIraptor.sh b/examples/EAGLE_6/testVELOCIraptor.sh index 14ec30487006f0b7e86356837c9a801950c15c83..3f0ae1d6f0da9736b867f53b898752efbfd50324 100755 --- a/examples/EAGLE_6/testVELOCIraptor.sh +++ b/examples/EAGLE_6/testVELOCIraptor.sh @@ -36,8 +36,8 @@ if [ "$RUN_DM" = "1" ]; then rm $VEL_OUTPUT/vel_$TEST* # Run test using SWIFT + VELOCIraptor - echo "Running: mpirun -np $NUM_MPI_PROC ../swift_mpi -G -t 8 eagle_6.yml -x -n 5 -P StructureFinding:basename:./$OUTPUT/stf -P StructureFinding:config_file_name:./stf_input_$TEST.cfg -P Snapshots:basename:./eagle_dmonly" - mpirun -np $NUM_MPI_PROC ../swift_mpi -G -t 8 eagle_6.yml -x -n 5 -P StructureFinding:basename:./$OUTPUT/stf -P StructureFinding:config_file_name:./stf_input_$TEST.cfg -P Snapshots:basename:./eagle_dmonly + echo "Running: mpirun -np $NUM_MPI_PROC ../swift_mpi --self-gravity --threads=8 eagle_6.yml --velociraptor --steps=5 -P StructureFinding:basename:./$OUTPUT/stf -P StructureFinding:config_file_name:./stf_input_$TEST.cfg -P Snapshots:basename:./eagle_dmonly" + mpirun -np $NUM_MPI_PROC ../swift_mpi --self-gravity --threads=8 eagle_6.yml --velociraptor --steps=5 -P StructureFinding:basename:./$OUTPUT/stf -P StructureFinding:config_file_name:./stf_input_$TEST.cfg -P Snapshots:basename:./eagle_dmonly # Run test using VELOCIraptor echo "Running: mpirun -np $NUM_MPI_PROC $VELOCIRAPTOR_PATH/bin/stf-gas -I 2 -i eagle_dmonly_0000 -C $VELOCIRAPTOR_PATH/stf_input_$TEST.cfg -o ./$VEL_OUTPUT/vel_$TEST" @@ -80,8 +80,8 @@ if [ "$RUN_GAS" = "1" ]; then rm $VEL_OUTPUT/vel_$TEST* # Run test using SWIFT + VELOCIraptor - echo "Running: mpirun -np $NUM_MPI_PROC ../swift_mpi -s -G -t 8 eagle_6.yml -x -n 5 -P StructureFinding:basename:./$OUTPUT/stf -P StructureFinding:config_file_name:./stf_input_$TEST.cfg -P Snapshots:basename:./eagle_gas" - mpirun -np $NUM_MPI_PROC ../swift_mpi -s -G -t 8 eagle_6.yml -x -n 5 -P StructureFinding:basename:./$OUTPUT/stf -P StructureFinding:config_file_name:./stf_input_$TEST.cfg -P Snapshots:basename:./eagle_gas + echo "Running: mpirun -np $NUM_MPI_PROC ../swift_mpi --hydro --self-gravity --threads=8 eagle_6.yml --velociraptor --steps=5 -P StructureFinding:basename:./$OUTPUT/stf -P StructureFinding:config_file_name:./stf_input_$TEST.cfg -P Snapshots:basename:./eagle_gas" + mpirun -np $NUM_MPI_PROC ../swift_mpi --hydro --self-gravity --threads=8 eagle_6.yml --velociraptor --steps=5 -P StructureFinding:basename:./$OUTPUT/stf -P StructureFinding:config_file_name:./stf_input_$TEST.cfg -P Snapshots:basename:./eagle_gas # Run test using VELOCIraptor echo "Running: mpirun -np $NUM_MPI_PROC $VELOCIRAPTOR_PATH/bin/stf-gas -I 2 -i eagle_gas_0000 -C ./stf_input_$TEST.cfg -o ./$VEL_OUTPUT/vel_$TEST" diff --git a/examples/EAGLE_DMO_100/eagle_100.yml b/examples/EAGLE_DMO_100/eagle_100.yml index f04c32c8d08b5548c2c710cf8782b39a59c3821e..5a3066195647b79eeb6a6d67d037d15ce8370c39 100644 --- a/examples/EAGLE_DMO_100/eagle_100.yml +++ b/examples/EAGLE_DMO_100/eagle_100.yml @@ -49,6 +49,7 @@ Gravity: # Parameters related to the initial conditions InitialConditions: file_name: EAGLE_DMO_ICs_100.hdf5 + periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget diff --git a/examples/EAGLE_DMO_100/run.sh b/examples/EAGLE_DMO_100/run.sh index 642c9247cf4aefa299e8f11c9674d737f4770296..6a3b3a00d69c6f3ff78159290d3e41c7d534b8ae 100755 --- a/examples/EAGLE_DMO_100/run.sh +++ b/examples/EAGLE_DMO_100/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift -c -G -t 16 eagle_100.yml 2>&1 | tee output.log +../swift --cosmology --self-gravity --threads=16 eagle_100.yml 2>&1 | tee output.log diff --git a/examples/EAGLE_DMO_12/eagle_12.yml b/examples/EAGLE_DMO_12/eagle_12.yml index 2354216a5b0dcefe139d6e39699b4c67035a4173..0660d98e87adfae62a2d795efec7ad6509cc1354 100644 --- a/examples/EAGLE_DMO_12/eagle_12.yml +++ b/examples/EAGLE_DMO_12/eagle_12.yml @@ -51,6 +51,7 @@ Gravity: # Parameters related to the initial conditions InitialConditions: file_name: EAGLE_DMO_ICs_12.hdf5 + periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget diff --git a/examples/EAGLE_DMO_12/run.sh b/examples/EAGLE_DMO_12/run.sh index ebf24ee6a5c873d595c58e74a31838eb2d013d92..f5e7e729470d632aab9a539f50d3bb922b7ae88f 100755 --- a/examples/EAGLE_DMO_12/run.sh +++ b/examples/EAGLE_DMO_12/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift -c -G -t 16 eagle_12.yml 2>&1 | tee output.log +../swift --cosmology --self-gravity --threads=16 eagle_12.yml 2>&1 | tee output.log diff --git a/examples/EAGLE_DMO_25/eagle_25.yml b/examples/EAGLE_DMO_25/eagle_25.yml index b02f9742a597687d2742b7c2d9eddf836258b06a..558c68ffaad204ebbe1d5781f945f0d95108d227 100644 --- a/examples/EAGLE_DMO_25/eagle_25.yml +++ b/examples/EAGLE_DMO_25/eagle_25.yml @@ -50,6 +50,7 @@ Gravity: # Parameters related to the initial conditions InitialConditions: file_name: EAGLE_DMO_ICs_25.hdf5 + periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget diff --git a/examples/EAGLE_DMO_25/run.sh b/examples/EAGLE_DMO_25/run.sh index ae0a6d3c49b89239da973c7417530204b4751729..6387472050b46d84932d27739b9490b727d4d31f 100755 --- a/examples/EAGLE_DMO_25/run.sh +++ b/examples/EAGLE_DMO_25/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift -c -G -t 16 eagle_25.yml 2>&1 | tee output.log +../swift --cosmology --self-gravity --threads=16 eagle_25.yml 2>&1 | tee output.log diff --git a/examples/EAGLE_DMO_50/eagle_50.yml b/examples/EAGLE_DMO_50/eagle_50.yml index 97299df063cd1f611f59a56ccd9b091b1217bef3..3cab2b1dc869b5187cf647caa7893281b783591a 100644 --- a/examples/EAGLE_DMO_50/eagle_50.yml +++ b/examples/EAGLE_DMO_50/eagle_50.yml @@ -49,6 +49,7 @@ Gravity: # Parameters related to the initial conditions InitialConditions: file_name: EAGLE_DMO_ICs_50.hdf5 + periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget diff --git a/examples/EAGLE_DMO_50/run.sh b/examples/EAGLE_DMO_50/run.sh index 31980a5a883e62c972b27a41bbdebe06c7c71539..486f7f33f7a2062563d63873e823c8623f10cf0d 100755 --- a/examples/EAGLE_DMO_50/run.sh +++ b/examples/EAGLE_DMO_50/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift -c -G -t 16 eagle_50.yml 2>&1 | tee output.log +../swift --cosmology --self-gravity --threads=16 eagle_50.yml 2>&1 | tee output.log diff --git a/examples/EvrardCollapse_3D/evrard.yml b/examples/EvrardCollapse_3D/evrard.yml index f9a4e69f72e6bb19b818cb985ef92122b1a10b2a..c14f9151b5a4ba6af60307a689d5b2530068deb3 100644 --- a/examples/EvrardCollapse_3D/evrard.yml +++ b/examples/EvrardCollapse_3D/evrard.yml @@ -39,6 +39,7 @@ Gravity: # Parameters related to the initial conditions InitialConditions: file_name: ./evrard.hdf5 # The file to read - + periodic: 0 + PhysicalConstants: G: 1. diff --git a/examples/EvrardCollapse_3D/makeIC.py b/examples/EvrardCollapse_3D/makeIC.py index f4d3c4c5bf7f91e5f79cfcd4e9ae23388932144e..29c4acd69ebf0638edf1273efc0f411766aebb6d 100644 --- a/examples/EvrardCollapse_3D/makeIC.py +++ b/examples/EvrardCollapse_3D/makeIC.py @@ -86,10 +86,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 0 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/EvrardCollapse_3D/run.sh b/examples/EvrardCollapse_3D/run.sh index abb7614f66fc877aa670db9b0e1335fbfe2e85d2..d9005a7214e297fc79053b5c2ca97b2cafc856a6 100755 --- a/examples/EvrardCollapse_3D/run.sh +++ b/examples/EvrardCollapse_3D/run.sh @@ -8,7 +8,7 @@ then fi # Run SWIFT -../swift -s -G -t 4 evrard.yml 2>&1 | tee output.log +../swift --hydro --self-gravity --threads=4 evrard.yml 2>&1 | tee output.log # Get the high resolution 1D reference result if not present. if [ ! -e evrardCollapse3D_exact.txt ] diff --git a/examples/ExternalPointMass/energy_plot.py b/examples/ExternalPointMass/energy_plot.py index 1863305614c226f64faac3d86fa2f809d49b9d74..5644e48f8bd954800526369cc152da7024d069dd 100644 --- a/examples/ExternalPointMass/energy_plot.py +++ b/examples/ExternalPointMass/energy_plot.py @@ -91,8 +91,8 @@ for i in range(402): E_tot_snap[i] = E_kin_snap[i] + E_pot_snap[i] Lz_snap[i] = np.sum(Lz) -print "Starting energy:", E_kin_stats[0], E_pot_stats[0], E_tot_stats[0] -print "Ending energy:", E_kin_stats[-1], E_pot_stats[-1], E_tot_stats[-1] +print("Starting energy:", E_kin_stats[0], E_pot_stats[0], E_tot_stats[0]) +print("Ending energy:", E_kin_stats[-1], E_pot_stats[-1], E_tot_stats[-1]) # Plot energy evolution figure() diff --git a/examples/ExternalPointMass/externalPointMass.yml b/examples/ExternalPointMass/externalPointMass.yml index de05a9ff3c10afa7871ebeafbf4d8d272056d39f..c9b1ef34d618eddfc2ba410785deb4919ed1b835 100644 --- a/examples/ExternalPointMass/externalPointMass.yml +++ b/examples/ExternalPointMass/externalPointMass.yml @@ -31,11 +31,13 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: PointMass.hdf5 # The file to read + periodic: 0 shift: [50.,50.,50.] # A shift to apply to all particles read from the ICs (in internal units). # External potential parameters PointMassPotential: position: [50.,50.,50.] # location of external point mass in internal units + useabspos: 1 # Position is absolute mass: 1e10 # mass of external point mass in internal units timestep_mult: 0.03 # controls time step diff --git a/examples/ExternalPointMass/makeIC.py b/examples/ExternalPointMass/makeIC.py index fdc5b1fd67ffcbd85beae3a9d6d1274d3d48c279..6780430d22e39350e7efeb52190708c78141bd4f 100644 --- a/examples/ExternalPointMass/makeIC.py +++ b/examples/ExternalPointMass/makeIC.py @@ -36,16 +36,16 @@ const_unit_length_in_cgs = (1000*PARSEC_IN_CGS) const_unit_mass_in_cgs = (SOLAR_MASS_IN_CGS) const_unit_velocity_in_cgs = (1e5) -print "UnitMass_in_cgs: ", const_unit_mass_in_cgs -print "UnitLength_in_cgs: ", const_unit_length_in_cgs -print "UnitVelocity_in_cgs: ", const_unit_velocity_in_cgs -print "UnitTime_in_cgs: ", const_unit_length_in_cgs / const_unit_velocity_in_cgs +print("UnitMass_in_cgs: ", const_unit_mass_in_cgs) +print("UnitLength_in_cgs: ", const_unit_length_in_cgs) +print("UnitVelocity_in_cgs: ", const_unit_velocity_in_cgs) +print("UnitTime_in_cgs: ", const_unit_length_in_cgs / const_unit_velocity_in_cgs) # derived units const_unit_time_in_cgs = (const_unit_length_in_cgs / const_unit_velocity_in_cgs) const_G = ((NEWTON_GRAVITY_CGS*const_unit_mass_in_cgs*const_unit_time_in_cgs*const_unit_time_in_cgs/(const_unit_length_in_cgs*const_unit_length_in_cgs*const_unit_length_in_cgs))) -print '---------------------' -print 'G in internal units: ', const_G +print('---------------------') +print('G in internal units: ', const_G) # Parameters @@ -53,7 +53,7 @@ periodic = 1 # 1 For periodic box boxSize = 100. # max_radius = boxSize / 4. # maximum radius of particles Mass = 1e10 -print "Mass at the centre: ", Mass +print("Mass at the centre: ", Mass) numPart = int(sys.argv[1]) # Number of particles mass = 1. @@ -79,9 +79,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic #Units grp = file.create_group("/Units") @@ -96,9 +93,9 @@ grp1 = file.create_group("/PartType1") #generate particle positions radius = max_radius * (numpy.random.rand(numPart))**(1./3.) -print '---------------------' -print 'Radius: minimum = ',min(radius) -print 'Radius: maximum = ',max(radius) +print('---------------------') +print('Radius: minimum = ',min(radius)) +print('Radius: maximum = ',max(radius)) radius = numpy.sort(radius) r = numpy.zeros((numPart, 3)) r[:,0] = radius @@ -107,9 +104,9 @@ r[:,0] = radius speed = numpy.sqrt(const_G * Mass / radius) omega = speed / radius period = 2.*math.pi/omega -print '---------------------' -print 'Period: minimum = ',min(period) -print 'Period: maximum = ',max(period) +print('---------------------') +print('Period: minimum = ',min(period)) +print('Period: maximum = ',max(period)) v = numpy.zeros((numPart, 3)) v[:,0] = -omega * r[:,1] diff --git a/examples/ExternalPointMass/run.sh b/examples/ExternalPointMass/run.sh index e074c384c4e002a161c7d8258e9068663204099f..fa3a40e20c8101a3b5cdb5f0a443ac937cd680c7 100755 --- a/examples/ExternalPointMass/run.sh +++ b/examples/ExternalPointMass/run.sh @@ -8,6 +8,6 @@ then fi rm -rf pointMass_*.hdf5 -../swift -g -t 1 externalPointMass.yml 2>&1 | tee output.log +../swift --external-gravity --threads=1 externalPointMass.yml 2>&1 | tee output.log python energy_plot.py diff --git a/examples/Gradients/gradientsCartesian.yml b/examples/Gradients/gradientsCartesian.yml index b2131bdd4d3a9242d30ff0f32b7bf3395cb433a8..0264e9ced8652f45feeba79573d3143e6b0086bb 100644 --- a/examples/Gradients/gradientsCartesian.yml +++ b/examples/Gradients/gradientsCartesian.yml @@ -31,4 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./Gradients_cartesian.hdf5 # The file to read - + periodic: 1 diff --git a/examples/Gradients/gradientsRandom.yml b/examples/Gradients/gradientsRandom.yml index 57ae849898bf8ccd63ccd7a5d685f9690403403d..1c6fcc1d077e0fd260b42e7de77490d58fb5aea9 100644 --- a/examples/Gradients/gradientsRandom.yml +++ b/examples/Gradients/gradientsRandom.yml @@ -31,4 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./Gradients_random.hdf5 # The file to read - + periodic: 1 diff --git a/examples/Gradients/makeICs.py b/examples/Gradients/makeICs.py index 38d035d2ad2dd3dd6daacfd6f58d824e9daf6742..be70a9e614e8bc32db0c0979c42ab892ef7d068f 100644 --- a/examples/Gradients/makeICs.py +++ b/examples/Gradients/makeICs.py @@ -26,7 +26,6 @@ import sys # reconstruction # Parameters -periodic= 1 # 1 For periodic box gamma = 5./3. # Gas adiabatic index gridtype = "cartesian" if len(sys.argv) > 1: @@ -153,10 +152,6 @@ grp.attrs["NumFilesPerSnapshot"] = 1 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - #Particle group grp = file.create_group("/PartType0") ds = grp.create_dataset('Coordinates', (npart, 3), 'd') diff --git a/examples/Gradients/run.sh b/examples/Gradients/run.sh index 44c25ac5695175c40483d9f8b3bbd160b2fcbc0a..4a0a80a3aef3a37c54108d1489abe80cb0fd3f10 100755 --- a/examples/Gradients/run.sh +++ b/examples/Gradients/run.sh @@ -1,13 +1,13 @@ #! /bin/bash python makeICs.py stretched -../swift -s -t 2 gradientsStretched.yml +../swift --hydro --threads=2 gradientsStretched.yml python plot.py gradients_stretched_0001.hdf5 stretched python makeICs.py cartesian -../swift -s -t 2 gradientsCartesian.yml +../swift --hydro --threads=2 gradientsCartesian.yml python plot.py gradients_cartesian_0001.hdf5 cartesian python makeICs.py random -../swift -s -t 2 gradientsRandom.yml +../swift --hydro --threads=2 gradientsRandom.yml python plot.py gradients_random_0001.hdf5 random diff --git a/examples/Gravity_glass/README b/examples/Gravity_glass/README new file mode 100644 index 0000000000000000000000000000000000000000..2df2eb1e72cd2979750f9232936a6e183e8636da --- /dev/null +++ b/examples/Gravity_glass/README @@ -0,0 +1,8 @@ +This example can be used to generate a glass file for gravity calculation. +The makeIC.py script will generate a uniform Poisson distribution of particles +in a cubic box with zero initial velocities. + +By running the code with the SWIFT configuration option --enable-glass-making +the code will run with gravity as a repulsive force and the particles will +moe towards a state of minimal energy. These glass files can be used to then +start simulations with a minimal level of noise. diff --git a/examples/UniformDMBox/makeIC.py b/examples/Gravity_glass/makeIC.py similarity index 74% rename from examples/UniformDMBox/makeIC.py rename to examples/Gravity_glass/makeIC.py index 8f3cd943b3cf19c4ae231d125c5ef97d076e0e8e..f573c79b19a5e3655d4f55f761ef20a6468342de 100644 --- a/examples/UniformDMBox/makeIC.py +++ b/examples/Gravity_glass/makeIC.py @@ -20,7 +20,7 @@ import h5py import sys -from numpy import * +import numpy as np # Generates a swift IC file containing a cartesian distribution of DM particles # with a density of 1 @@ -30,7 +30,7 @@ periodic= 1 # 1 For periodic box boxSize = 1. rho = 1. L = int(sys.argv[1]) # Number of particles along one axis -fileName = "uniformDMBox_%d.hdf5"%L +fileName = "uniform_DM_box.hdf5" #--------------------------------------------------- numPart = L**3 @@ -53,10 +53,6 @@ grp.attrs["MassTable"] = [0.0, mass, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. @@ -69,27 +65,16 @@ grp.attrs["Unit temperature in cgs (U_T)"] = 1. #Particle group grp = file.create_group("/PartType1") -v = zeros((numPart, 3)) -ds = grp.create_dataset('Velocities', (numPart, 3), 'f') -ds[()] = v -v = zeros(1) +v = np.zeros((numPart, 3)) +ds = grp.create_dataset('Velocities', (numPart, 3), 'f', data=v) -m = full((numPart, 1), mass) -ds = grp.create_dataset('Masses', (numPart,1), 'f') -ds[()] = m -m = zeros(1) +m = np.full((numPart, 1), mass) +ds = grp.create_dataset('Masses', (numPart,1), 'f', data=m) -ids = linspace(0, numPart, numPart, endpoint=False).reshape((numPart,1)) +ids = np.linspace(0, numPart, numPart, endpoint=False).reshape((numPart,1)) ds = grp.create_dataset('ParticleIDs', (numPart, 1), 'L') ds[()] = ids + 1 -x = ids % L; -y = ((ids - x) / L) % L; -z = (ids - x - L * y) / L**2; -coords = zeros((numPart, 3)) -coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L) -coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L) -coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L) -ds = grp.create_dataset('Coordinates', (numPart, 3), 'd') -ds[()] = coords +coords = np.random.rand(numPart, 3) * boxSize +ds = grp.create_dataset('Coordinates', (numPart, 3), 'd', data=coords) file.close() diff --git a/examples/Gravity_glass/uniform_DM_box.yml b/examples/Gravity_glass/uniform_DM_box.yml new file mode 100644 index 0000000000000000000000000000000000000000..00a5864cdb6ff0897501248437b3cc00be0f7acf --- /dev/null +++ b/examples/Gravity_glass/uniform_DM_box.yml @@ -0,0 +1,45 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Let's overwrite G to make this more effective +PhysicalConstants: + G: 1. + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. + time_end: 100. + dt_min: 1e-6 + dt_max: 1. + +Scheduler: + max_top_level_cells: 8 + +# Parameters governing the snapshots +Snapshots: + basename: uniform_DM_box + time_first: 0. + delta_time: 1. + compression: 4 + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 + theta: 0.3 + mesh_side_length: 32 + comoving_softening: 0.001 + max_physical_softening: 0.001 + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 0.1 + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./uniform_DM_box.hdf5 + periodic: 1 \ No newline at end of file diff --git a/examples/GreshoVortex_2D/gresho.yml b/examples/GreshoVortex_2D/gresho.yml index df941450196a7de6cd1471e1d258756ca8c36fb1..2006bb451179ce646ec2cc41cb3aa5603489dc29 100644 --- a/examples/GreshoVortex_2D/gresho.yml +++ b/examples/GreshoVortex_2D/gresho.yml @@ -34,3 +34,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./greshoVortex.hdf5 # The file to read + periodic: 1 \ No newline at end of file diff --git a/examples/GreshoVortex_2D/makeIC.py b/examples/GreshoVortex_2D/makeIC.py index 4f4ec3407b04971882fbf3d7d7479e74bf56c762..4fb382925e41a1d00463b369bc8d95c4bc6b0aa1 100644 --- a/examples/GreshoVortex_2D/makeIC.py +++ b/examples/GreshoVortex_2D/makeIC.py @@ -89,10 +89,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = fileOutput.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = fileOutput.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/GreshoVortex_2D/run.sh b/examples/GreshoVortex_2D/run.sh index 6d537bcc96c68385434f685bd551a2d423f469e0..855d528bdff2351cebb0602115f48a23bf4f1bc5 100755 --- a/examples/GreshoVortex_2D/run.sh +++ b/examples/GreshoVortex_2D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 1 gresho.yml 2>&1 | tee output.log +../swift --hydro --threads=1 gresho.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 11 diff --git a/examples/GreshoVortex_3D/gresho.yml b/examples/GreshoVortex_3D/gresho.yml index 113c03b9bd0e411bf04f29c70937ac7fab3708f3..a95a0eae3255b87337fc838f1eabe5469a724a09 100644 --- a/examples/GreshoVortex_3D/gresho.yml +++ b/examples/GreshoVortex_3D/gresho.yml @@ -35,3 +35,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./greshoVortex.hdf5 # The file to read + periodic: 1 \ No newline at end of file diff --git a/examples/GreshoVortex_3D/makeIC.py b/examples/GreshoVortex_3D/makeIC.py index cba2158016bc86f58b6e89f83cbfb473798e1cf7..03f99df1082928bd57779ff2c0e7e85f112b4f1f 100644 --- a/examples/GreshoVortex_3D/makeIC.py +++ b/examples/GreshoVortex_3D/makeIC.py @@ -90,10 +90,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = fileOutput.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = fileOutput.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/GreshoVortex_3D/run.sh b/examples/GreshoVortex_3D/run.sh index da7d6cee111aebcfd2fcb0f3508af80ef73cbeb0..1967a77ba70bfb30191df8fef6e1e5e7bd68e4eb 100755 --- a/examples/GreshoVortex_3D/run.sh +++ b/examples/GreshoVortex_3D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 4 gresho.yml 2>&1 | tee output.log +../swift --hydro --threads=4 gresho.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 11 diff --git a/examples/Hernquist_circularorbit/hernquistcirc.yml b/examples/Hernquist_circularorbit/hernquistcirc.yml new file mode 100755 index 0000000000000000000000000000000000000000..5e81d180003283ecb74209b19e1ff3db8097b08f --- /dev/null +++ b/examples/Hernquist_circularorbit/hernquistcirc.yml @@ -0,0 +1,38 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.988e+33 # Grams + UnitLength_in_cgs: 3.086e+21 # Centimeters + UnitVelocity_in_cgs: 1e5 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 2.0 # The end time of the simulation (in internal units). + dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e0 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: output # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 1e-3 # Time difference between consecutive outputs (in internal units) + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e0 # Time between statistics output + +# Parameters related to the initial conditions +InitialConditions: + file_name: circularorbitshernquist.hdf5 # The file to read + periodic: 0 + +# Hernquist potential parameters +HernquistPotential: + useabspos: 0 # 0 -> positions based on centre, 1 -> absolute positions + position: [0.,0.,0.] # Location of centre of isothermal potential with respect to centre of the box (if 0) otherwise absolute (if 1) (internal units) + mass: 2e12 # Mass of the Hernquist potential + scalelength: 10.0 # Scale length of the potential + timestep_mult: 0.005 # Dimensionless pre-factor for the time-step condition + epsilon: 0.1 # Softening size (internal units) diff --git a/examples/Hernquist_circularorbit/makeIC.py b/examples/Hernquist_circularorbit/makeIC.py new file mode 100755 index 0000000000000000000000000000000000000000..474450f0e23704bfc43730872a978107f28704e9 --- /dev/null +++ b/examples/Hernquist_circularorbit/makeIC.py @@ -0,0 +1,81 @@ +############################################################################### +# This file is part of SWIFT. +# Copyright (c) 2018 Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ +from galpy.potential import NFWPotential +from galpy.orbit import Orbit +from galpy.util import bovy_conversion +import numpy as np +import matplotlib.pyplot as plt +from astropy import units +import h5py as h5 + +C = 8.0 +M_200 = 2.0 +N_PARTICLES = 3 +print("Initial conditions written to 'test_nfw.hdf5'") + +pos = np.zeros((3, 3)) +pos[0, 2] = 50.0 +pos[1, 2] = 10.0 +pos[2, 2] = 2.0 +pos = pos + 500.0 +vel = np.zeros((3, 3)) +vel[0, 1] = 348.0 +vel[1, 1] = 466.9 +vel[2, 1] = 348.0 +ids = np.array([1.0, 2.0, 3.0]) +mass = np.array([1.0, 1.0, 1.0]) + +# File +file = h5.File("circularorbitshernquist.hdf5", "w") + +# Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 3.086e21 +grp.attrs["Unit mass in cgs (U_M)"] = 1.988e33 +grp.attrs["Unit time in cgs (U_t)"] = 3.086e16 +grp.attrs["Unit current in cgs (U_I)"] = 1.0 +grp.attrs["Unit temperature in cgs (U_T)"] = 1.0 + +# Header +grp = file.create_group("/Header") +grp.attrs["BoxSize"] = 1000.0 +grp.attrs["NumPart_Total"] = [0, N_PARTICLES, 0, 0, 0, 0] +grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] +grp.attrs["NumPart_ThisFile"] = [0, N_PARTICLES, 0, 0, 0, 0] +grp.attrs["Time"] = 0.0 +grp.attrs["NumFilesPerSnapshot"] = 1 +grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] +grp.attrs["Dimension"] = 3 + +# Runtime parameters +grp = file.create_group("/RuntimePars") +grp.attrs["PeriodicBoundariesOn"] = 1 + +# Particle group +grp1 = file.create_group("/PartType1") +ds = grp1.create_dataset("Velocities", (N_PARTICLES, 3), "f", data=vel) + +ds = grp1.create_dataset("Masses", (N_PARTICLES,), "f", data=mass) + +ds = grp1.create_dataset("ParticleIDs", (N_PARTICLES,), "L", data=ids) + +ds = grp1.create_dataset("Coordinates", (N_PARTICLES, 3), "d", data=pos) + +file.close() diff --git a/examples/Hernquist_circularorbit/plotprog.py b/examples/Hernquist_circularorbit/plotprog.py new file mode 100755 index 0000000000000000000000000000000000000000..a19c66e7f30e0e4012a23a4d38dd23045deea6e2 --- /dev/null +++ b/examples/Hernquist_circularorbit/plotprog.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +############################################################################### +# This file is part of SWIFT. +# Copyright (c) 2018 Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ +import numpy as np +import h5py +import matplotlib.pyplot as plt +from scipy.integrate import odeint + +t = np.linspace(0, 40, 1e5) +y0 = [0, 10] +a = 30.0 +G = 4.300927e-06 +M = 1e15 +GM = G * M + + +lengthrun = 2001 +numbpar = 3 + +radius = np.zeros((numbpar, lengthrun)) +xx = np.zeros((numbpar, lengthrun)) +yy = np.zeros((numbpar, lengthrun)) +zz = np.zeros((numbpar, lengthrun)) +time = np.zeros(lengthrun) +for i in range(0, lengthrun): + Data = h5py.File("output_%04d.hdf5" % i, "r") + header = Data["Header"] + time[i] = header.attrs["Time"] + particles = Data["PartType1"] + positions = particles["Coordinates"] + xx[:, i] = positions[:, 0] - 500.0 + yy[:, i] = positions[:, 1] - 500.0 + zz[:, i] = positions[:, 2] - 500.0 + +col = ["b", "r", "c", "y", "k"] + +for i in range(0, numbpar): + plt.plot(xx[i, :], yy[i, :], col[i]) + +plt.ylabel("y (kpc)") +plt.xlabel("x (kpc)") +plt.savefig("xyplot.png") +plt.close() + + +for i in range(0, numbpar): + plt.plot(xx[i, :], zz[i, :], col[i]) + +plt.ylabel("z (kpc)") +plt.xlabel("x (kpc)") +plt.savefig("xzplot.png") +plt.close() + +for i in range(0, numbpar): + plt.plot(yy[i, :], zz[i, :], col[i]) + +plt.ylabel("z (kpc)") +plt.xlabel("y (kpc)") +plt.savefig("yzplot.png") +plt.close() diff --git a/examples/Hernquist_circularorbit/run.sh b/examples/Hernquist_circularorbit/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..fe2eb0bd0780c3aa66d2b52b10e76b146fb9dde2 --- /dev/null +++ b/examples/Hernquist_circularorbit/run.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +if [ ! -e circularorbitshernquist.hdf5 ] +then + echo "Generate initial conditions for circular orbits" + if command -v python3 &>/dev/null; then + python3 makeIC.py + else + python makeIC.py + fi + +fi + +# self gravity G, external potential g, hydro s, threads t and high verbosity v +../swift --external-gravity --threads=6 hernquistcirc.yml 2>&1 | tee output.log + + +echo "Save plots of the circular orbits" +if command -v python3 &>/dev/null; then + python3 plotprog.py +else + python plotprog.py +fi diff --git a/examples/Hernquist_radialinfall/README b/examples/Hernquist_radialinfall/README new file mode 100644 index 0000000000000000000000000000000000000000..be22a1a11a5b1e0538723781607aa374644a4e0f --- /dev/null +++ b/examples/Hernquist_radialinfall/README @@ -0,0 +1,3 @@ +This example generates 5 particles at radii of 10, 20, 30, 40 and 50 kpc +without velocitiy and follows the evolution of these particles in an Hernquist +potential as they are free falling. diff --git a/examples/Hernquist_radialinfall/hernquist.yml b/examples/Hernquist_radialinfall/hernquist.yml new file mode 100644 index 0000000000000000000000000000000000000000..adea54ed9a33ee889b39bb519c8098917b33ef9f --- /dev/null +++ b/examples/Hernquist_radialinfall/hernquist.yml @@ -0,0 +1,39 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e33 # M_sun + UnitLength_in_cgs: 3.08567758e21 # kpc + UnitVelocity_in_cgs: 1e5 # km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 40. # The end time of the simulation (in internal units). + dt_min: 9e-10 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-3 # Time between statistics output + +# Parameters governing the snapshots +Snapshots: + basename: hernquist # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.02 # Time difference between consecutive outputs (in internal units) + +# Parameters related to the initial conditions +InitialConditions: + file_name: Hernquist.hdf5 # The file to read + periodic: 1 + shift: [200.,200.,200.] # Shift all particles to be in the potential + +# External potential parameters +HernquistPotential: + useabspos: 0 # Whether to use absolute position (1) or relative potential to centre of box (0) + position: [0.,0.,0.] + mass: 1e9 + scalelength: 1.0 + timestep_mult: 0.01 # controls time step + epsilon: 2.0 # No softening at the centre of the halo diff --git a/examples/Hernquist_radialinfall/makeIC.py b/examples/Hernquist_radialinfall/makeIC.py new file mode 100644 index 0000000000000000000000000000000000000000..567e15a95302bc8848c1d026b82dc5be54c7a0c6 --- /dev/null +++ b/examples/Hernquist_radialinfall/makeIC.py @@ -0,0 +1,167 @@ +############################################################################### +# This file is part of SWIFT. +# Copyright (c) 2018 Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +############################################################################## + +import h5py +import sys +import numpy +import math +import random +import numpy as np + +# Generates N particles in a spherical distribution centred on [0,0,0], to be moved in an isothermal potential +# usage: python makeIC.py 1000 0 : generate 1000 particles on circular orbits +# python makeIC.py 1000 1 : generate 1000 particles with Lz/L uniform in [0,1] +# all particles move in the xy plane, and start at y=0 + +# physical constants in cgs +NEWTON_GRAVITY_CGS = 6.67408e-8 +SOLAR_MASS_IN_CGS = 1.98848e33 +PARSEC_IN_CGS = 3.08567758e18 +YEAR_IN_CGS = 3.15569252e7 + +# choice of units +const_unit_length_in_cgs = 1000 * PARSEC_IN_CGS +const_unit_mass_in_cgs = SOLAR_MASS_IN_CGS +const_unit_velocity_in_cgs = 1e5 + + +# Properties of the Hernquist potential +Mass = 1e15 +scaleLength = 30.0 # kpc + + +# derived units +const_unit_time_in_cgs = const_unit_length_in_cgs / const_unit_velocity_in_cgs +const_G = ( + NEWTON_GRAVITY_CGS + * const_unit_mass_in_cgs + * const_unit_time_in_cgs + * const_unit_time_in_cgs + / (const_unit_length_in_cgs * const_unit_length_in_cgs * const_unit_length_in_cgs) +) +print("G=", const_G) + + +def hernquistcircvel(r, M, a): + """ Function that calculates the circular velocity in a + Hernquist potential. + @param r: radius from centre of potential + @param M: mass of the Hernquist potential + @param a: Scale length of the potential + @return: circular velocity + """ + return (const_G * M * r) ** 0.5 / (r + a) + + +# Parameters +periodic = 1 # 1 For periodic box +boxSize = 400.0 # [kpc] +Radius = 100.0 # maximum radius of particles [kpc] +G = const_G + +N = 5 +L = N ** (1.0 / 3.0) + +fileName = "Hernquist.hdf5" + + +# --------------------------------------------------- +numPart = N +mass = 1 + +# -------------------------------------------------- + +# File +file = h5py.File(fileName, "w") + +# Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = const_unit_length_in_cgs +grp.attrs["Unit mass in cgs (U_M)"] = const_unit_mass_in_cgs +grp.attrs["Unit time in cgs (U_t)"] = ( + const_unit_length_in_cgs / const_unit_velocity_in_cgs +) +grp.attrs["Unit current in cgs (U_I)"] = 1.0 +grp.attrs["Unit temperature in cgs (U_T)"] = 1.0 + +# Header +grp = file.create_group("/Header") +grp.attrs["BoxSize"] = boxSize +grp.attrs["NumPart_Total"] = [0, numPart, 0, 0, 0, 0] +grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] +grp.attrs["NumPart_ThisFile"] = [0, numPart, 0, 0, 0, 0] +grp.attrs["Time"] = 0.0 +grp.attrs["NumFilesPerSnapshot"] = 1 +grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] +grp.attrs["Dimension"] = 3 + +# set seed for random number +numpy.random.seed(1234) + +# Particle group +grp1 = file.create_group("/PartType1") +# generate particle positions +# radius = Radius * (numpy.random.rand(N))**(1./3.) + 10. +radius = np.zeros(N) +radius[0] = 10 +radius[1] = 20 +radius[2] = 30 +radius[3] = 40 +radius[4] = 50 +# this part is not even used: +# ctheta = -1. + 2 * numpy.random.rand(N) +# stheta = numpy.sqrt(1.-ctheta**2) +# phi = 2 * math.pi * numpy.random.rand(N) +# end +r = numpy.zeros((numPart, 3)) +r[:, 0] = radius + +# import matplotlib.pyplot as plt +# plt.plot(r[:,0],'.') +# plt.show() + +# print('Mass = ', Mass) +# print('radius = ', radius) +# print('scaleLength = ',scaleLength) +# +v = numpy.zeros((numPart, 3)) +# v[:,0] = hernquistcircvel(radius,Mass,scaleLength) +omega = v[:, 0] / radius +period = 2.0 * math.pi / omega +print("period = minimum = ", min(period), " maximum = ", max(period)) +print("Circular velocity = minimum =", min(v[:, 0]), " maximum = ", max(v[:, 0])) + +omegav = omega + +v[:, 0] = -omegav * r[:, 1] +v[:, 1] = omegav * r[:, 0] + +ds = grp1.create_dataset("Velocities", (numPart, 3), "f", data=v) + +m = numpy.full((numPart,), mass, dtype="f") +ds = grp1.create_dataset("Masses", (numPart,), "f", data=m) + +ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False) +ds = grp1.create_dataset("ParticleIDs", (numPart,), "L", data=ids) + +ds = grp1.create_dataset("Coordinates", (numPart, 3), "d", data=r) + + +file.close() diff --git a/examples/Hernquist_radialinfall/plotprog.py b/examples/Hernquist_radialinfall/plotprog.py new file mode 100755 index 0000000000000000000000000000000000000000..d8de00a6b694bb33bf96ef7065c972aa6bb3f6cb --- /dev/null +++ b/examples/Hernquist_radialinfall/plotprog.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +############################################################################### +# This file is part of SWIFT. +# Copyright (c) 2018 Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ +import numpy as np +import h5py +import matplotlib.pyplot as plt +from scipy.integrate import odeint + + +lengthrun = 2001 +numbpar = 5 + +radius = np.zeros((numbpar, lengthrun)) +time = np.zeros(lengthrun) +for i in range(0, lengthrun): + Data = h5py.File("hernquist_%04d.hdf5" % i, "r") + header = Data["Header"] + time[i] = header.attrs["Time"] + particles = Data["PartType1"] + positions = particles["Coordinates"] + radius[:, i] = positions[:, 0] - 200.0 + +col = ["b", "r", "c", "y", "k"] + +for i in range(0, numbpar): + plt.plot(time, radius[i, :], col[i]) + plt.axhline(np.max(radius[i, :]), color=col[i], linestyle="--") + plt.axhline(-np.max(radius[i, :]), color=col[i], linestyle="--") + + +plt.ylabel("Radial distance (kpc)") +plt.xlabel("Simulation time (internal units)") +plt.savefig("radial_infall.png") +plt.close() diff --git a/examples/Hernquist_radialinfall/run.sh b/examples/Hernquist_radialinfall/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..0ad64e2dd2c3c0bfd66f2da3d8bee1edb30fb5f2 --- /dev/null +++ b/examples/Hernquist_radialinfall/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Generate the initial conditions if they are not present. +if [ ! -e Hernquist.hdf5 ] +then + echo "Generate initial conditions for radial orbits" + if command -v python3 &>/dev/null; then + python3 makeIC.py + else + python makeIC.py + fi +fi + +rm -rf hernquist_*.hdf5 +../swift --external-gravity --threads=1 hernquist.yml 2>&1 | tee output.log + + + +echo "Make plots of the radially free falling particles" +if command -v python3 &>/dev/null; then + python3 plotprog.py +else + python plotprog.py +fi diff --git a/examples/HydrostaticHalo/hydrostatic.yml b/examples/HydrostaticHalo/hydrostatic.yml index 0cc11d0d8708b518b8b0b3a8df1374b6a5ead7e2..874d6344cf5787bb310b6a1b730acb3455a8b6a6 100644 --- a/examples/HydrostaticHalo/hydrostatic.yml +++ b/examples/HydrostaticHalo/hydrostatic.yml @@ -31,7 +31,8 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: Hydrostatic.hdf5 # The file to read - + periodic: 1 + # External potential parameters IsothermalPotential: vrot: 200. # rotation speed of isothermal potential in internal units diff --git a/examples/HydrostaticHalo/makeIC.py b/examples/HydrostaticHalo/makeIC.py index d5081ac84473edc87857c6872278b4d0ca6389b1..b8a4036b77c430866f700047fd06bf2c8de490e7 100644 --- a/examples/HydrostaticHalo/makeIC.py +++ b/examples/HydrostaticHalo/makeIC.py @@ -91,10 +91,6 @@ grp.attrs["Unit current in cgs (U_I)"] = 1. grp.attrs["Unit temperature in cgs (U_T)"] = 1. -# Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - # set seed for random number np.random.seed(1234) diff --git a/examples/HydrostaticHalo/run.sh b/examples/HydrostaticHalo/run.sh index 82584282559c1fceb0492aada671ff83fb74c924..cb67be62aa014627c021ef784e68495cc48f637b 100755 --- a/examples/HydrostaticHalo/run.sh +++ b/examples/HydrostaticHalo/run.sh @@ -8,7 +8,7 @@ then fi # Run for 10 dynamical times -../swift -g -s -t 1 hydrostatic.yml 2>&1 | tee output.log +../swift --external-gravity --hydro --threads=1 hydrostatic.yml 2>&1 | tee output.log echo "Plotting density profiles" mkdir plots diff --git a/examples/InteractingBlastWaves_1D/interactingBlastWaves.yml b/examples/InteractingBlastWaves_1D/interactingBlastWaves.yml index e845599730828fd7b9880ae9aca11420ba50026c..c4960dfa2c07b6b08cd6559b1de49f27b518bf94 100644 --- a/examples/InteractingBlastWaves_1D/interactingBlastWaves.yml +++ b/examples/InteractingBlastWaves_1D/interactingBlastWaves.yml @@ -31,3 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./interactingBlastWaves.hdf5 # The file to read + periodic: 1 diff --git a/examples/InteractingBlastWaves_1D/makeIC.py b/examples/InteractingBlastWaves_1D/makeIC.py index bed0e20c833ccbe54ed571b954cad03ab93f4c0c..3a47bf7c42e1359dc1a9aa151e360ad0f93d2d32 100644 --- a/examples/InteractingBlastWaves_1D/makeIC.py +++ b/examples/InteractingBlastWaves_1D/makeIC.py @@ -62,10 +62,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 1 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/InteractingBlastWaves_1D/run.sh b/examples/InteractingBlastWaves_1D/run.sh index 31717bd806ddd6c98c24dfc1def6f79dddff42ff..cb8519e7f924357c19271dfb8edd902f629a4649 100755 --- a/examples/InteractingBlastWaves_1D/run.sh +++ b/examples/InteractingBlastWaves_1D/run.sh @@ -8,7 +8,7 @@ then fi # Run SWIFT -../swift -s -t 1 interactingBlastWaves.yml 2>&1 | tee output.log +../swift --hydro --threads=1 interactingBlastWaves.yml 2>&1 | tee output.log # Get the high resolution reference solution if not present. if [ ! -e interactingBlastWaves1D_exact.txt ] diff --git a/examples/IsolatedGalaxy_dmparticles/angularmomentum.py b/examples/IsolatedGalaxy_dmparticles/angularmomentum.py new file mode 100755 index 0000000000000000000000000000000000000000..4398dfeb8b079143886c5565e7667f72fc0bdcef --- /dev/null +++ b/examples/IsolatedGalaxy_dmparticles/angularmomentum.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python +############################################################################### +# This file is part of SWIFT. +# Copyright (c) 2018 Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ +import numpy as np +import h5py +import matplotlib.pyplot as plt +import scipy.optimize as sco + + +Nmax = 2001 +steps = 10 +angmomcomp = False + +iterarray = np.arange(0, Nmax + 1, steps) +Lxtot = np.zeros(len(iterarray)) +Lytot = np.zeros(len(iterarray)) +Lztot = np.zeros(len(iterarray)) +Ltot = np.zeros(len(iterarray)) +time_array = np.zeros(len(iterarray)) + + +for i in iterarray: + f = h5py.File("output_%04d.hdf5" % i, "r") + + boxsize = f["Header"].attrs["BoxSize"] / 2.0 + + time_array[int(i / steps)] = f["Header"].attrs["Time"] + + particles = f["PartType4"] + coordinates = particles["Coordinates"][:, :] + velocities = particles["Velocities"][:, :] + masses = particles["Masses"][:] + + R = ( + (coordinates[:, 0] - boxsize[0]) ** 2 + (coordinates[:, 1] - boxsize[1]) ** 2 + ) ** 0.5 + X = np.abs(coordinates[:, 0] - boxsize[0]) + Y = np.abs(coordinates[:, 1] - boxsize[1]) + Z = np.abs(coordinates[:, 2] - boxsize[2]) + + vx = velocities[:, 0] + vy = velocities[:, 1] + vz = velocities[:, 2] + + Lx = (Y * vz - Z * vy) * masses + Ly = (Z * vx - X * vz) * masses + Lz = (X * vy - Y * vx) * masses + + L = (Lx ** 2 + Ly ** 2 + Lz ** 2) ** 0.5 + + Lxtot[int(i / steps)] = np.sum(Lx) + Lytot[int(i / steps)] = np.sum(Ly) + Lztot[int(i / steps)] = np.sum(Lz) + Ltot[int(i / steps)] = np.sum(L) + +time_array[-1] = 2.0 +if angmomcomp: + plt.plot(time_array, Lxtot / Lxtot[0] - 1, label="Lx total") + plt.plot(time_array, Lytot / Lytot[0] - 1, label="Ly total") + plt.plot(time_array, Lztot / Lztot[0] - 1, label="Lz total") +plt.plot(time_array, Ltot / Ltot[0] - 1, label="L total") +plt.xlabel("Time") +plt.ylabel("ratio between current and zero angular momentum") +plt.legend() +plt.show() + +plt.semilogy(time_array, np.absolute(Ltot / Ltot[0] - 1)) +plt.xlabel("Time (Gyr)") +plt.ylabel("Fractional change of total angular momentum") +plt.savefig("Total_angular_momentum.png") +plt.show() +plt.close() diff --git a/examples/IsolatedGalaxy_dmparticles/getIC.sh b/examples/IsolatedGalaxy_dmparticles/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..39e62d6fc3b7c83648aa66541c9cba2cdf758ac3 --- /dev/null +++ b/examples/IsolatedGalaxy_dmparticles/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget https://home.strw.leidenuniv.nl/~nobels/data/3e11-star-only-DM-halo-galaxy.hdf5 diff --git a/examples/IsolatedGalaxy_dmparticles/isolated_galaxy.yml b/examples/IsolatedGalaxy_dmparticles/isolated_galaxy.yml new file mode 100644 index 0000000000000000000000000000000000000000..dccfb28a3f1c888d2a83b5e28b759a30a6928754 --- /dev/null +++ b/examples/IsolatedGalaxy_dmparticles/isolated_galaxy.yml @@ -0,0 +1,43 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.9891E43 # 10^10 solar masses + UnitLength_in_cgs: 3.08567758E21 # 1 kpc + UnitVelocity_in_cgs: 1E5 # km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the self-gravity scheme +Gravity: + mesh_side_length: 32 # Number of cells along each axis for the periodic gravity mesh. + eta: 0.025 # Constant dimensionless multiplier for time integration. + theta: 0.7 # Opening angle (Multipole acceptance criterion). + comoving_softening: 0.0026994 # Comoving softening length (in internal units). + max_physical_softening: 0.0007 # Physical softening length (in internal units). + +# Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: output # Common part of the name of output files + time_first: 0. # (Optional) Time of the first output if non-cosmological time-integration (in internal units) + delta_time: 0.001 # Time difference between consecutive outputs (in internal units) + +Scheduler: + max_top_level_cells: 96 + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-2 # Time between statistics output + time_first: 0. # (Optional) Time of the first stats output if non-cosmological time-integration (in internal units) + +# Parameters related to the initial conditions +InitialConditions: + file_name: 3e11-star-only-DM-halo-galaxy.hdf5 # The file to read + periodic: 0 # Are we running with periodic ICs? + + diff --git a/examples/IsolatedGalaxy_dmparticles/profilefit.py b/examples/IsolatedGalaxy_dmparticles/profilefit.py new file mode 100755 index 0000000000000000000000000000000000000000..e7755062ea45de4f42716b14c5896b0da676f001 --- /dev/null +++ b/examples/IsolatedGalaxy_dmparticles/profilefit.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python +############################################################################### +# This file is part of SWIFT. +# Copyright (c) 2018 Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ +import numpy as np +import h5py +import matplotlib.pyplot as plt +from matplotlib.colors import BoundaryNorm +from matplotlib.ticker import MaxNLocator +import scipy.optimize as sco +import os + + +def linearfunc(x, a, b): + return a * x + b + + +def radialfunc(r, h, A): + return A * np.exp(-r / h) * r + + +def verticalfunc(z, A, z0, zoff): + return 2 * A * np.exp(-(z - zoff) / z0) + + +def verticalfunc2(z, A, z0): + return 2 * A * np.exp(-(z) / z0) + + +def verticalfunc3(z, A, z0, zoff, b): + return 2 * A * np.exp(-(z - zoff) / z0) + b + + +Nmax = 2001 +steps = 10 +storefits = False +logfit = True +normalfit = False + +# if the user wants to store the indivudal fits +if storefits: + if not os.path.exists("radial"): + os.mkdir("radial") + os.mkdir("vertical") + os.mkdir("histsnap") + + +# Initialize the arrays +R_ideal = np.linspace(0, 40, 100) +Z_ideal = np.linspace(0, 10, 100) + +iterarray = np.arange(0, Nmax + 1, steps) + +Z0t = np.zeros(len(iterarray)) +Z0terr = np.zeros(len(iterarray)) +h0t = np.zeros(len(iterarray)) +h0terr = np.zeros(len(iterarray)) +Ar = np.zeros(len(iterarray)) +Arerr = np.zeros(len(iterarray)) +Az = np.zeros(len(iterarray)) +Azerr = np.zeros(len(iterarray)) +time_array = np.zeros(len(iterarray)) + +ar = np.zeros(len(iterarray)) +arerr = np.zeros(len(iterarray)) +br = np.zeros(len(iterarray)) +brerr = np.zeros(len(iterarray)) +az = np.zeros(len(iterarray)) +azerr = np.zeros(len(iterarray)) +bz = np.zeros(len(iterarray)) +bzerr = np.zeros(len(iterarray)) +eps = 1e-6 + + +for i in iterarray: + # Getting the data from the snapshots + f = h5py.File("output_%04d.hdf5" % i, "r") + + boxsize = f["Header"].attrs["BoxSize"] / 2.0 + + time_array[int(i / steps)] = f["Header"].attrs["Time"] + + particles = f["PartType4"] + coordinates = particles["Coordinates"][:, :] + masses = particles["Masses"][:] + + R = ( + (coordinates[:, 0] - boxsize[0]) ** 2 + (coordinates[:, 1] - boxsize[1]) ** 2 + ) ** 0.5 + Z = np.abs(coordinates[:, 1] - boxsize[1]) + + # Bin the coordinates to make them suitable for fitting + Rhist = np.histogram(R, bins=100, range=[0, 40], normed=True) + Zhist = np.histogram(Z, bins=100, range=[0, 10.0], normed=True) + + # Create correct variables for fitting + Ry = Rhist[0] + Rx = (Rhist[1][1:] + Rhist[1][: len(Rhist[0])]) / 2.0 + + Zy = Zhist[0] + Zx = (Zhist[1][1:] + Zhist[1][: len(Zhist[0])]) / 2.0 + + # Fit with two methods: non-linear LSQ and linear LSQ in log space + bestsolR = sco.curve_fit(radialfunc, Rx[10:], Ry[10:], p0=[2.0, 0.2]) + bestsolZ = sco.curve_fit(verticalfunc, Zx[40:], Zy[40:]) + bestsolRlog = sco.curve_fit(linearfunc, Rx[10:], np.log10(Ry[10:] + eps)) + bestsolZlog = sco.curve_fit(linearfunc, Zx[40:], np.log10(Zy[40:] + eps)) + + # Store variables + h0t[int(i / steps)] = bestsolR[0][0] + Z0t[int(i / steps)] = bestsolZ[0][1] + Ar[int(i / steps)] = bestsolR[0][1] + Az[int(i / steps)] = bestsolZ[0][0] + Z0terr[int(i / steps)] = (bestsolZ[1][1, 1]) ** 0.5 + h0terr[int(i / steps)] = (bestsolR[1][0, 0]) ** 0.5 + Arerr[int(i / steps)] = (bestsolR[1][1, 1]) ** 0.5 + Azerr[int(i / steps)] = (bestsolZ[1][0, 0]) ** 0.5 + + ar[int(i / steps)] = bestsolRlog[0][0] + arerr[int(i / steps)] = (bestsolRlog[1][0, 0]) ** 0.5 + br[int(i / steps)] = bestsolRlog[0][1] + brerr[int(i / steps)] = (bestsolRlog[1][1, 1]) ** 0.5 + az[int(i / steps)] = bestsolZlog[0][0] + azerr[int(i / steps)] = (bestsolZlog[1][0, 0]) ** 0.5 + bz[int(i / steps)] = bestsolZlog[0][1] + bzerr[int(i / steps)] = (bestsolZlog[1][1, 1]) ** 0.5 + + if storefits: + plt.step(Rx, Ry) + plt.plot( + R_ideal, + radialfunc(R_ideal, bestsolR[0][0], bestsolR[0][1]), + label="Non linear LSQ", + ) + plt.plot( + R_ideal, + 10 ** (linearfunc(R_ideal, bestsolRlog[0][0], bestsolRlog[0][1])), + label="Linear LSQ", + ) + plt.xlim(0, 40) + plt.ylim(0, 0.25) + plt.xlabel("R (kpc)") + plt.ylabel("Probability") + plt.savefig("./radial/radialsnap%04d.png" % i) + plt.close() + + plt.step(Zx, Zy) + plt.plot( + Z_ideal, + verticalfunc(Z_ideal, bestsolZ[0][0], bestsolZ[0][1], bestsolZ[0][2]), + label="Non linear LSQ", + ) + plt.plot( + Z_ideal, + 10 ** (linearfunc(Z_ideal, bestsolZlog[0][0], bestsolZlog[0][1])), + label="Linear LSQ", + ) + plt.xlim(0, 10.0) + plt.ylim(0, 0.6) + plt.xlabel("z (kpc)") + plt.ylabel("Probability") + plt.savefig("./vertical/verticalsnap%04d.png" % i) + plt.close() + +time_array[-1] = 2.0 + +ax = plt.subplot(111) +ax.set_yscale("log") +if logfit: + plt.errorbar( + time_array, + np.absolute(az / (az[0]) - 1), + yerr=azerr / (az[0]), + label="z0 scale height (Log space)", + ) + plt.errorbar( + time_array, + np.absolute(ar / (ar[0]) - 1), + yerr=arerr / (ar[0]), + label="h scale lenght (Log space)", + ) +if normalfit: + plt.errorbar( + time_array, + np.absolute(Z0t / (Z0t[0]) - 1), + yerr=Z0terr / (Z0t[0]), + label="z0 scale height (normal space)", + ) + plt.errorbar( + time_array, + np.absolute(h0t / (h0t[0]) - 1), + yerr=h0terr / (h0t[0]), + label="h scale height (normal space)", + ) +ax.set_xlabel("Time (Gyr)") +ax.set_ylabel("Fractional difference") +plt.legend() +plt.savefig("Fitdifference-witherror.pdf") +plt.close() + + +ax = plt.subplot(111) +ax.set_yscale("log") +if logfit: + plt.plot( + time_array, np.absolute(az / (az[0]) - 1), label="z0 scale height (Log space)" + ) + plt.plot( + time_array, np.absolute(ar / (ar[0]) - 1), label="h scale lenght (Log space)" + ) +if normalfit: + plt.plot( + time_array, + np.absolute(Z0t / (Z0t[0]) - 1), + label="z0 scale height (normal space)", + ) + plt.plot( + time_array, + np.absolute(h0t / (h0t[0]) - 1), + label="h scale height (normal space)", + ) +ax.set_xlabel("Time (Gyr)") +ax.set_ylabel("Fractional difference") +plt.legend() +plt.savefig("Fitdifference.pdf") +plt.show() diff --git a/examples/IsolatedGalaxy_dmparticles/run.sh b/examples/IsolatedGalaxy_dmparticles/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..5a4d5b6ce44150983fda6daf62cf0c77854e1845 --- /dev/null +++ b/examples/IsolatedGalaxy_dmparticles/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +if [ ! -e reddeathgalaxywithDM.hdf5 ] +then + echo "Fetching initial conditons for the isolated galaxy with an external potential ..." + ./getIC.sh +fi + +../swift --external-gravity --self-gravity --stars --threads=16 isolated_galaxy.yml 2>&1 | tee output.log + + +echo "Make plots of conservation of total angular momentum" +if command -v python3 &>/dev/null; then + python3 angularmomentum.py +else + python angularmomentum.py +fi + +echo "Make plots of change of vertical and radial profile" +if command -v python3 &>/dev/null; then + python3 profilefit.py +else + python profilefit.py +fi diff --git a/examples/IsolatedGalaxy_potential/angularmomentum.py b/examples/IsolatedGalaxy_potential/angularmomentum.py new file mode 100755 index 0000000000000000000000000000000000000000..4398dfeb8b079143886c5565e7667f72fc0bdcef --- /dev/null +++ b/examples/IsolatedGalaxy_potential/angularmomentum.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python +############################################################################### +# This file is part of SWIFT. +# Copyright (c) 2018 Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ +import numpy as np +import h5py +import matplotlib.pyplot as plt +import scipy.optimize as sco + + +Nmax = 2001 +steps = 10 +angmomcomp = False + +iterarray = np.arange(0, Nmax + 1, steps) +Lxtot = np.zeros(len(iterarray)) +Lytot = np.zeros(len(iterarray)) +Lztot = np.zeros(len(iterarray)) +Ltot = np.zeros(len(iterarray)) +time_array = np.zeros(len(iterarray)) + + +for i in iterarray: + f = h5py.File("output_%04d.hdf5" % i, "r") + + boxsize = f["Header"].attrs["BoxSize"] / 2.0 + + time_array[int(i / steps)] = f["Header"].attrs["Time"] + + particles = f["PartType4"] + coordinates = particles["Coordinates"][:, :] + velocities = particles["Velocities"][:, :] + masses = particles["Masses"][:] + + R = ( + (coordinates[:, 0] - boxsize[0]) ** 2 + (coordinates[:, 1] - boxsize[1]) ** 2 + ) ** 0.5 + X = np.abs(coordinates[:, 0] - boxsize[0]) + Y = np.abs(coordinates[:, 1] - boxsize[1]) + Z = np.abs(coordinates[:, 2] - boxsize[2]) + + vx = velocities[:, 0] + vy = velocities[:, 1] + vz = velocities[:, 2] + + Lx = (Y * vz - Z * vy) * masses + Ly = (Z * vx - X * vz) * masses + Lz = (X * vy - Y * vx) * masses + + L = (Lx ** 2 + Ly ** 2 + Lz ** 2) ** 0.5 + + Lxtot[int(i / steps)] = np.sum(Lx) + Lytot[int(i / steps)] = np.sum(Ly) + Lztot[int(i / steps)] = np.sum(Lz) + Ltot[int(i / steps)] = np.sum(L) + +time_array[-1] = 2.0 +if angmomcomp: + plt.plot(time_array, Lxtot / Lxtot[0] - 1, label="Lx total") + plt.plot(time_array, Lytot / Lytot[0] - 1, label="Ly total") + plt.plot(time_array, Lztot / Lztot[0] - 1, label="Lz total") +plt.plot(time_array, Ltot / Ltot[0] - 1, label="L total") +plt.xlabel("Time") +plt.ylabel("ratio between current and zero angular momentum") +plt.legend() +plt.show() + +plt.semilogy(time_array, np.absolute(Ltot / Ltot[0] - 1)) +plt.xlabel("Time (Gyr)") +plt.ylabel("Fractional change of total angular momentum") +plt.savefig("Total_angular_momentum.png") +plt.show() +plt.close() diff --git a/examples/IsolatedGalaxy_potential/getIC.sh b/examples/IsolatedGalaxy_potential/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..0e621c990792d5ee86c75c91141457f402319b72 --- /dev/null +++ b/examples/IsolatedGalaxy_potential/getIC.sh @@ -0,0 +1,3 @@ +#!/bin/bash +wget https://home.strw.leidenuniv.nl/~nobels/data/3e11-star-only-static-potential-galaxy.hdf5 + diff --git a/examples/IsolatedGalaxy_potential/isolated_galaxy.yml b/examples/IsolatedGalaxy_potential/isolated_galaxy.yml new file mode 100644 index 0000000000000000000000000000000000000000..deee132ee38ae5e04397839a21a677f4851e6bac --- /dev/null +++ b/examples/IsolatedGalaxy_potential/isolated_galaxy.yml @@ -0,0 +1,56 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.9891E43 # 10^10 solar masses + UnitLength_in_cgs: 3.08567758E21 # 1 kpc + UnitVelocity_in_cgs: 1E5 # km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the self-gravity scheme +Gravity: + mesh_side_length: 32 # Number of cells along each axis for the periodic gravity mesh. + eta: 0.025 # Constant dimensionless multiplier for time integration. + theta: 0.7 # Opening angle (Multipole acceptance criterion). + comoving_softening: 0.0300 # Comoving softening length (in internal units). + max_physical_softening: 0.0300 # Physical softening length (in internal units). + +# Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 2. # The end time of the simulation (in internal units). + dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: output # Common part of the name of output files + time_first: 0. # (Optional) Time of the first output if non-cosmological time-integration (in internal units) + delta_time: 0.001 # Time difference between consecutive outputs (in internal units) + + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-2 # Time between statistics output + time_first: 0. # (Optional) Time of the first stats output if non-cosmological time-integration (in internal units) + +Scheduler: + max_top_level_cells: 96 + +# Parameters related to the initial conditions +InitialConditions: + file_name: 3e11-star-only-static-potential-galaxy.hdf5 # The file to read + periodic: 0 # Are we running with periodic ICs? + +# Hernquist potential parameters +HernquistPotential: + useabspos: 0 # 0 -> positions based on centre, 1 -> absolute positions + position: [0.,0.,0.] # Location of centre of isothermal potential with respect to centre of the box (if 0) otherwise absolute (if 1) (internal units) + idealizeddisk: 1 # Run with an idealized galaxy disk + M200: 30.0 # M200 of the galaxy disk + h: 0.704 # reduced Hubble constant (value does not specify the used units!) + concentration: 7.1 # concentration of the Halo + diskfraction: 0.0434370991372 # Disk mass fraction + bulgefraction: 0.00705852860979 # Bulge mass fraction + timestep_mult: 0.01 # Dimensionless pre-factor for the time-step condition, basically determines the fraction of the orbital time we use to do the time integration + epsilon: 0.030 # Softening size (internal units) + diff --git a/examples/IsolatedGalaxy_potential/profilefit.py b/examples/IsolatedGalaxy_potential/profilefit.py new file mode 100755 index 0000000000000000000000000000000000000000..e7755062ea45de4f42716b14c5896b0da676f001 --- /dev/null +++ b/examples/IsolatedGalaxy_potential/profilefit.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python +############################################################################### +# This file is part of SWIFT. +# Copyright (c) 2018 Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ +import numpy as np +import h5py +import matplotlib.pyplot as plt +from matplotlib.colors import BoundaryNorm +from matplotlib.ticker import MaxNLocator +import scipy.optimize as sco +import os + + +def linearfunc(x, a, b): + return a * x + b + + +def radialfunc(r, h, A): + return A * np.exp(-r / h) * r + + +def verticalfunc(z, A, z0, zoff): + return 2 * A * np.exp(-(z - zoff) / z0) + + +def verticalfunc2(z, A, z0): + return 2 * A * np.exp(-(z) / z0) + + +def verticalfunc3(z, A, z0, zoff, b): + return 2 * A * np.exp(-(z - zoff) / z0) + b + + +Nmax = 2001 +steps = 10 +storefits = False +logfit = True +normalfit = False + +# if the user wants to store the indivudal fits +if storefits: + if not os.path.exists("radial"): + os.mkdir("radial") + os.mkdir("vertical") + os.mkdir("histsnap") + + +# Initialize the arrays +R_ideal = np.linspace(0, 40, 100) +Z_ideal = np.linspace(0, 10, 100) + +iterarray = np.arange(0, Nmax + 1, steps) + +Z0t = np.zeros(len(iterarray)) +Z0terr = np.zeros(len(iterarray)) +h0t = np.zeros(len(iterarray)) +h0terr = np.zeros(len(iterarray)) +Ar = np.zeros(len(iterarray)) +Arerr = np.zeros(len(iterarray)) +Az = np.zeros(len(iterarray)) +Azerr = np.zeros(len(iterarray)) +time_array = np.zeros(len(iterarray)) + +ar = np.zeros(len(iterarray)) +arerr = np.zeros(len(iterarray)) +br = np.zeros(len(iterarray)) +brerr = np.zeros(len(iterarray)) +az = np.zeros(len(iterarray)) +azerr = np.zeros(len(iterarray)) +bz = np.zeros(len(iterarray)) +bzerr = np.zeros(len(iterarray)) +eps = 1e-6 + + +for i in iterarray: + # Getting the data from the snapshots + f = h5py.File("output_%04d.hdf5" % i, "r") + + boxsize = f["Header"].attrs["BoxSize"] / 2.0 + + time_array[int(i / steps)] = f["Header"].attrs["Time"] + + particles = f["PartType4"] + coordinates = particles["Coordinates"][:, :] + masses = particles["Masses"][:] + + R = ( + (coordinates[:, 0] - boxsize[0]) ** 2 + (coordinates[:, 1] - boxsize[1]) ** 2 + ) ** 0.5 + Z = np.abs(coordinates[:, 1] - boxsize[1]) + + # Bin the coordinates to make them suitable for fitting + Rhist = np.histogram(R, bins=100, range=[0, 40], normed=True) + Zhist = np.histogram(Z, bins=100, range=[0, 10.0], normed=True) + + # Create correct variables for fitting + Ry = Rhist[0] + Rx = (Rhist[1][1:] + Rhist[1][: len(Rhist[0])]) / 2.0 + + Zy = Zhist[0] + Zx = (Zhist[1][1:] + Zhist[1][: len(Zhist[0])]) / 2.0 + + # Fit with two methods: non-linear LSQ and linear LSQ in log space + bestsolR = sco.curve_fit(radialfunc, Rx[10:], Ry[10:], p0=[2.0, 0.2]) + bestsolZ = sco.curve_fit(verticalfunc, Zx[40:], Zy[40:]) + bestsolRlog = sco.curve_fit(linearfunc, Rx[10:], np.log10(Ry[10:] + eps)) + bestsolZlog = sco.curve_fit(linearfunc, Zx[40:], np.log10(Zy[40:] + eps)) + + # Store variables + h0t[int(i / steps)] = bestsolR[0][0] + Z0t[int(i / steps)] = bestsolZ[0][1] + Ar[int(i / steps)] = bestsolR[0][1] + Az[int(i / steps)] = bestsolZ[0][0] + Z0terr[int(i / steps)] = (bestsolZ[1][1, 1]) ** 0.5 + h0terr[int(i / steps)] = (bestsolR[1][0, 0]) ** 0.5 + Arerr[int(i / steps)] = (bestsolR[1][1, 1]) ** 0.5 + Azerr[int(i / steps)] = (bestsolZ[1][0, 0]) ** 0.5 + + ar[int(i / steps)] = bestsolRlog[0][0] + arerr[int(i / steps)] = (bestsolRlog[1][0, 0]) ** 0.5 + br[int(i / steps)] = bestsolRlog[0][1] + brerr[int(i / steps)] = (bestsolRlog[1][1, 1]) ** 0.5 + az[int(i / steps)] = bestsolZlog[0][0] + azerr[int(i / steps)] = (bestsolZlog[1][0, 0]) ** 0.5 + bz[int(i / steps)] = bestsolZlog[0][1] + bzerr[int(i / steps)] = (bestsolZlog[1][1, 1]) ** 0.5 + + if storefits: + plt.step(Rx, Ry) + plt.plot( + R_ideal, + radialfunc(R_ideal, bestsolR[0][0], bestsolR[0][1]), + label="Non linear LSQ", + ) + plt.plot( + R_ideal, + 10 ** (linearfunc(R_ideal, bestsolRlog[0][0], bestsolRlog[0][1])), + label="Linear LSQ", + ) + plt.xlim(0, 40) + plt.ylim(0, 0.25) + plt.xlabel("R (kpc)") + plt.ylabel("Probability") + plt.savefig("./radial/radialsnap%04d.png" % i) + plt.close() + + plt.step(Zx, Zy) + plt.plot( + Z_ideal, + verticalfunc(Z_ideal, bestsolZ[0][0], bestsolZ[0][1], bestsolZ[0][2]), + label="Non linear LSQ", + ) + plt.plot( + Z_ideal, + 10 ** (linearfunc(Z_ideal, bestsolZlog[0][0], bestsolZlog[0][1])), + label="Linear LSQ", + ) + plt.xlim(0, 10.0) + plt.ylim(0, 0.6) + plt.xlabel("z (kpc)") + plt.ylabel("Probability") + plt.savefig("./vertical/verticalsnap%04d.png" % i) + plt.close() + +time_array[-1] = 2.0 + +ax = plt.subplot(111) +ax.set_yscale("log") +if logfit: + plt.errorbar( + time_array, + np.absolute(az / (az[0]) - 1), + yerr=azerr / (az[0]), + label="z0 scale height (Log space)", + ) + plt.errorbar( + time_array, + np.absolute(ar / (ar[0]) - 1), + yerr=arerr / (ar[0]), + label="h scale lenght (Log space)", + ) +if normalfit: + plt.errorbar( + time_array, + np.absolute(Z0t / (Z0t[0]) - 1), + yerr=Z0terr / (Z0t[0]), + label="z0 scale height (normal space)", + ) + plt.errorbar( + time_array, + np.absolute(h0t / (h0t[0]) - 1), + yerr=h0terr / (h0t[0]), + label="h scale height (normal space)", + ) +ax.set_xlabel("Time (Gyr)") +ax.set_ylabel("Fractional difference") +plt.legend() +plt.savefig("Fitdifference-witherror.pdf") +plt.close() + + +ax = plt.subplot(111) +ax.set_yscale("log") +if logfit: + plt.plot( + time_array, np.absolute(az / (az[0]) - 1), label="z0 scale height (Log space)" + ) + plt.plot( + time_array, np.absolute(ar / (ar[0]) - 1), label="h scale lenght (Log space)" + ) +if normalfit: + plt.plot( + time_array, + np.absolute(Z0t / (Z0t[0]) - 1), + label="z0 scale height (normal space)", + ) + plt.plot( + time_array, + np.absolute(h0t / (h0t[0]) - 1), + label="h scale height (normal space)", + ) +ax.set_xlabel("Time (Gyr)") +ax.set_ylabel("Fractional difference") +plt.legend() +plt.savefig("Fitdifference.pdf") +plt.show() diff --git a/examples/IsolatedGalaxy_potential/run.sh b/examples/IsolatedGalaxy_potential/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..e93b43b55bdb233282da05fb5098347969279f13 --- /dev/null +++ b/examples/IsolatedGalaxy_potential/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +if [ ! -e reddeathgalaxy.hdf5 ] +then + echo "Fetching initial conditons for the isolated galaxy with an external potential ..." + ./getIC.sh +fi + +../swift --external-gravity --self-gravity --stars --threads=16 isolated_galaxy.yml 2>&1 | tee output.log + + +echo "Make plots of conservation of total angular momentum" +if command -v python3 &>/dev/null; then + python3 angularmomentum.py +else + python angularmomentum.py +fi + +echo "Make plots of change of vertical and radial profile" +if command -v python3 &>/dev/null; then + python3 profilefit.py +else + python profilefit.py +fi diff --git a/examples/IsothermalPotential/energy_plot.py b/examples/IsothermalPotential/energy_plot.py index dab30715fbdaa0393f62c764ba552bbe4106325d..d157e4233cae2221f23d37f6bdf0c30a2486f972 100644 --- a/examples/IsothermalPotential/energy_plot.py +++ b/examples/IsothermalPotential/energy_plot.py @@ -86,7 +86,7 @@ for i in range(402): time_snap[i] = f["Header"].attrs["Time"] E_kin_snap[i] = np.sum(0.5 * mass * (vel_x[:]**2 + vel_y[:]**2 + vel_z[:]**2)) - E_pot_snap[i] = np.sum(-mass * Vrot**2 * log(r)) + E_pot_snap[i] = np.sum(mass * Vrot**2 * log(r)) E_tot_snap[i] = E_kin_snap[i] + E_pot_snap[i] Lz_snap[i] = np.sum(Lz) diff --git a/examples/IsothermalPotential/isothermal.yml b/examples/IsothermalPotential/isothermal.yml index 5f626ff72e979ad0f3d404e01002be6b6018c758..4f8d98a1f7615659ddb3c922b149fc2db04415c6 100644 --- a/examples/IsothermalPotential/isothermal.yml +++ b/examples/IsothermalPotential/isothermal.yml @@ -26,10 +26,13 @@ Snapshots: # Parameters related to the initial conditions InitialConditions: file_name: Isothermal.hdf5 # The file to read + periodic: 1 shift: [200.,200.,200.] # Shift all particles to be in the potential # External potential parameters IsothermalPotential: + useabspos: 0 # Whether to use absolute position (1) or relative potential to centre of box (0) + position: [0.,0.,0.] vrot: 200. # rotation speed of isothermal potential in internal units timestep_mult: 0.01 # controls time step epsilon: 0. # No softening at the centre of the halo diff --git a/examples/IsothermalPotential/makeIC.py b/examples/IsothermalPotential/makeIC.py index eab16d21e6a4abd077dc0f4a015a4577427a3591..ebcbb6dda11f1a2d88dfcfb717578f114f3512e9 100644 --- a/examples/IsothermalPotential/makeIC.py +++ b/examples/IsothermalPotential/makeIC.py @@ -97,10 +97,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - # set seed for random number numpy.random.seed(1234) diff --git a/examples/IsothermalPotential/run.sh b/examples/IsothermalPotential/run.sh index a5f03f32f82e27660d0a950335d731cf0ff7401d..bc44b2bd1f05b84354f171801b9c2d6c28ea3641 100755 --- a/examples/IsothermalPotential/run.sh +++ b/examples/IsothermalPotential/run.sh @@ -8,6 +8,6 @@ then fi rm -rf Isothermal_*.hdf5 -../swift -g -t 1 isothermal.yml 2>&1 | tee output.log +../swift --external-gravity --threads=1 isothermal.yml 2>&1 | tee output.log python energy_plot.py diff --git a/examples/KelvinHelmholtzGrowthRate_2D/kelvinHelmholtzGrowthRate.yml b/examples/KelvinHelmholtzGrowthRate_2D/kelvinHelmholtzGrowthRate.yml index 380dc2ab3a530e89b952aa41f425e50709d73ee9..e5a46cca1aa0c8972a5427126d2ce57a26d1b262 100644 --- a/examples/KelvinHelmholtzGrowthRate_2D/kelvinHelmholtzGrowthRate.yml +++ b/examples/KelvinHelmholtzGrowthRate_2D/kelvinHelmholtzGrowthRate.yml @@ -31,3 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./kelvinHelmholtzGrowthRate.hdf5 # The file to read + periodic: 1 diff --git a/examples/KelvinHelmholtzGrowthRate_2D/makeIC.py b/examples/KelvinHelmholtzGrowthRate_2D/makeIC.py index f21d0c0abf9b15f8253f627bcb1da43ae276fb35..25ef65fd758e0dd97d45732a2da6d2aa19f793bc 100644 --- a/examples/KelvinHelmholtzGrowthRate_2D/makeIC.py +++ b/examples/KelvinHelmholtzGrowthRate_2D/makeIC.py @@ -76,10 +76,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = fileOutput.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = fileOutput.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/KelvinHelmholtzGrowthRate_2D/makeIC_regular.py b/examples/KelvinHelmholtzGrowthRate_2D/makeIC_regular.py index 5029165a6a328b6c706d37b632b14cbcd51501d0..55cd17823a1101164191c89810029370dee21e26 100644 --- a/examples/KelvinHelmholtzGrowthRate_2D/makeIC_regular.py +++ b/examples/KelvinHelmholtzGrowthRate_2D/makeIC_regular.py @@ -82,10 +82,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = fileOutput.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = fileOutput.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/KelvinHelmholtzGrowthRate_2D/run.sh b/examples/KelvinHelmholtzGrowthRate_2D/run.sh index 3e6e026f66b14846a5c6e8e9daf99797dc3ff87a..505d64498463bfa414ed30b25abb91ccd1b7f0df 100755 --- a/examples/KelvinHelmholtzGrowthRate_2D/run.sh +++ b/examples/KelvinHelmholtzGrowthRate_2D/run.sh @@ -9,7 +9,7 @@ then fi # Run SWIFT -../swift -s -t 1 kelvinHelmholtzGrowthRate.yml 2>&1 | tee output.log +../swift --hydro --threads=1 kelvinHelmholtzGrowthRate.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 100 diff --git a/examples/KelvinHelmholtzGrowthRate_3D/kelvinHelmholtzGrowthRate.yml b/examples/KelvinHelmholtzGrowthRate_3D/kelvinHelmholtzGrowthRate.yml index e39c01645b766ae585558452683dc8e1bdf425a8..f5f7157f7d3252e8fe256b7bfc4ba83cb09ef03e 100644 --- a/examples/KelvinHelmholtzGrowthRate_3D/kelvinHelmholtzGrowthRate.yml +++ b/examples/KelvinHelmholtzGrowthRate_3D/kelvinHelmholtzGrowthRate.yml @@ -32,3 +32,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./kelvinHelmholtzGrowthRate.hdf5 # The file to read + periodic: 1 diff --git a/examples/KelvinHelmholtzGrowthRate_3D/makeIC.py b/examples/KelvinHelmholtzGrowthRate_3D/makeIC.py index a9bc20559b9fbb5da400ba5de2563cd715f473d5..d28f3617214193eca6159a7220263d36500dd1aa 100644 --- a/examples/KelvinHelmholtzGrowthRate_3D/makeIC.py +++ b/examples/KelvinHelmholtzGrowthRate_3D/makeIC.py @@ -76,10 +76,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = fileOutput.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = fileOutput.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/KelvinHelmholtzGrowthRate_3D/makeIC_regular.py b/examples/KelvinHelmholtzGrowthRate_3D/makeIC_regular.py index aa7dd8f214f8ece1c1d142bf02bd653cd35f9973..51ab694f387d380c83a0b646696fd23111b3f98c 100644 --- a/examples/KelvinHelmholtzGrowthRate_3D/makeIC_regular.py +++ b/examples/KelvinHelmholtzGrowthRate_3D/makeIC_regular.py @@ -84,10 +84,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = fileOutput.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = fileOutput.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/KelvinHelmholtzGrowthRate_3D/run.sh b/examples/KelvinHelmholtzGrowthRate_3D/run.sh index 3e6e026f66b14846a5c6e8e9daf99797dc3ff87a..505d64498463bfa414ed30b25abb91ccd1b7f0df 100755 --- a/examples/KelvinHelmholtzGrowthRate_3D/run.sh +++ b/examples/KelvinHelmholtzGrowthRate_3D/run.sh @@ -9,7 +9,7 @@ then fi # Run SWIFT -../swift -s -t 1 kelvinHelmholtzGrowthRate.yml 2>&1 | tee output.log +../swift --hydro --threads=1 kelvinHelmholtzGrowthRate.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 100 diff --git a/examples/KelvinHelmholtz_2D/kelvinHelmholtz.yml b/examples/KelvinHelmholtz_2D/kelvinHelmholtz.yml index ccc7526b391374a4da0883f6615a65c7b93a0948..6e4e2bd43cfa3def8386b85c84570e9b9a48fbcf 100644 --- a/examples/KelvinHelmholtz_2D/kelvinHelmholtz.yml +++ b/examples/KelvinHelmholtz_2D/kelvinHelmholtz.yml @@ -31,3 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./kelvinHelmholtz.hdf5 # The file to read + periodic: 1 diff --git a/examples/KelvinHelmholtz_2D/makeIC.py b/examples/KelvinHelmholtz_2D/makeIC.py index 744b39de8260720521ae8e77ed5d0a12161f2b6a..919066955c519dbac4e78e8e2a0eece842c40ab3 100644 --- a/examples/KelvinHelmholtz_2D/makeIC.py +++ b/examples/KelvinHelmholtz_2D/makeIC.py @@ -122,10 +122,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = fileOutput.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = fileOutput.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/KelvinHelmholtz_2D/makeMovie.py b/examples/KelvinHelmholtz_2D/makeMovie.py index 84fe99106bf607830e89b6aa663135b48b6c0744..a52784891ab4689dcd59dc27945e573e602785f3 100644 --- a/examples/KelvinHelmholtz_2D/makeMovie.py +++ b/examples/KelvinHelmholtz_2D/makeMovie.py @@ -91,6 +91,7 @@ if __name__ == "__main__": # Creation of first frame fig, ax = plt.subplots(1, 1, figsize=(1, 1), frameon=False) + ax.axis("off") # Remove annoying black frame. data_x, data_y, density = load_and_extract("kelvinhelmholtz_0000.hdf5") diff --git a/examples/KelvinHelmholtz_2D/run.sh b/examples/KelvinHelmholtz_2D/run.sh index dbb39caf383279dbc71c2baa125499d115538654..ef823cb85b88ff1b228b6906835fac8383db8b8c 100755 --- a/examples/KelvinHelmholtz_2D/run.sh +++ b/examples/KelvinHelmholtz_2D/run.sh @@ -8,7 +8,7 @@ then fi # Run SWIFT -../swift -s -t 4 kelvinHelmholtz.yml 2>&1 | tee output.log +../swift --hydro --threads=4 kelvinHelmholtz.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 6 diff --git a/examples/KeplerianRing/README.md b/examples/KeplerianRing/README.md index 1c361f275d60ef1ca46d696e2e9507bb749e531c..1cb2e2119d0f0bb093abf194ab18da91dd587d32 100644 --- a/examples/KeplerianRing/README.md +++ b/examples/KeplerianRing/README.md @@ -69,7 +69,7 @@ Plotting Once you have ran swift (we suggest that you use the following) - ../swift -g -S -s -t 16 keplerian_ring.yml 2>&1 | tee output.log + ../swift --external-gravity --stars --hydro --threads=16 keplerian_ring.yml 2>&1 | tee output.log there will be around 350 ```.hdf5``` files in your directory. To check out the results of the example use the plotting script: diff --git a/examples/KeplerianRing/keplerian_ring.yml b/examples/KeplerianRing/keplerian_ring.yml index cc5db2a06adbe9678207454c6504a6fa315675cf..2195acfb55121ff595c471ad146b40752d9aa84e 100644 --- a/examples/KeplerianRing/keplerian_ring.yml +++ b/examples/KeplerianRing/keplerian_ring.yml @@ -32,7 +32,8 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: initial_conditions.hdf5 # The file to read - + periodic: 1 + # External potential parameters PointMassPotential: position: [5.,5.,5.] # location of external point mass in internal units diff --git a/examples/KeplerianRing/run.sh b/examples/KeplerianRing/run.sh index 0195846a8839a27083594c20569b1fd4d49f4c16..236f512d1c9f5b5d660fc7d9f540c085ad3e29ab 100755 --- a/examples/KeplerianRing/run.sh +++ b/examples/KeplerianRing/run.sh @@ -9,7 +9,7 @@ then fi rm -rf keplerian_ring_*.hdf5 -../swift -g -s -t 1 -v 1 keplerian_ring.yml 2>&1 | tee output.log +../swift --external-gravity --hydro --threads=1 --verbose=1 keplerian_ring.yml 2>&1 | tee output.log echo echo diff --git a/examples/Makefile.am b/examples/Makefile.am index e4a12fc1a90c0f52c767efd378f4bea646c0d425..fae4e132cdf1ac24f293060d6e3a293729d109f4 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -1,4 +1,4 @@ -# tHIS FIle is part of SWIFT. +# This file is part of SWIFT. # Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), # Matthieu Schaller (matthieu.schaller@durham.ac.uk). # @@ -19,16 +19,19 @@ MYFLAGS = # Add the source directory and the non-standard paths to the included library headers to CFLAGS -AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) +AM_CFLAGS = -I$(top_srcdir)/src -I$(top_srcdir)/argparse $(HDF5_CPPFLAGS) \ + $(GSL_INCS) $(FFTW_INCS) $(GRACKLE_INCS) AM_LDFLAGS = $(HDF5_LDFLAGS) # Extra libraries. -EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(VELOCIRAPTOR_LIBS) $(GSL_LIBS) +EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) \ + $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) \ + $(VELOCIRAPTOR_LIBS) $(GSL_LIBS) # MPI libraries. -MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) -MPI_FLAGS = -DWITH_MPI $(METIS_INCS) +MPI_LIBS = $(PARMETIS_LIBS) $(METIS_LIBS) $(MPI_THREAD_LIBS) +MPI_FLAGS = -DWITH_MPI $(PARMETIS_INCS) $(METIS_INCS) # Programs. bin_PROGRAMS = swift @@ -48,15 +51,16 @@ endif # Sources for swift swift_SOURCES = main.c swift_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DENGINE_POLICY="engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" -swift_LDADD = ../src/.libs/libswiftsim.a $(EXTRA_LIBS) +swift_LDADD = ../src/.libs/libswiftsim.a ../argparse/.libs/libargparse.a $(EXTRA_LIBS) # Sources for swift_mpi, do we need an affinity policy for MPI? swift_mpi_SOURCES = main.c swift_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" -swift_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(MPI_LIBS) $(EXTRA_LIBS) +swift_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a ../argparse/.libs/libargparse.a $(MPI_LIBS) $(EXTRA_LIBS) # Scripts to generate ICs EXTRA_DIST = CoolingBox/coolingBox.yml CoolingBox/energy_plot.py CoolingBox/makeIC.py CoolingBox/run.sh \ + ConstantCosmoVolume/run.sh ConstantCosmoVolume/makeIC.py ConstantCosmoVolume/plotSolution.py ConstantCosmoVolume/constant_volume.yml \ EAGLE_6/eagle_6.yml EAGLE_6/getIC.sh EAGLE_6/README EAGLE_6/run.sh \ EAGLE_12/eagle_12.yml EAGLE_12/getIC.sh EAGLE_12/README EAGLE_12/run.sh \ EAGLE_25/eagle_25.yml EAGLE_25/getIC.sh EAGLE_25/README EAGLE_25/run.sh \ @@ -81,36 +85,26 @@ EXTRA_DIST = CoolingBox/coolingBox.yml CoolingBox/energy_plot.py CoolingBox/make Noh_3D/makeIC.py Noh_3D/noh.yml Noh_3D/plotSolution.py Noh_3D/run.sh Noh_3D/getGlass.sh \ PerturbedBox_2D/makeIC.py PerturbedBox_2D/perturbedPlane.yml \ PerturbedBox_3D/makeIC.py PerturbedBox_3D/perturbedBox.yml PerturbedBox_3D/run.sh \ + PMillennium-384/p-mill-384.yml \ + PMillennium-768/p-mill-768.yml \ + SantaBarbara/README SantaBarbara/getIC.sh SantaBarbara/santa_barbara.yml SantaBarbara/run.sh \ + SantaBarbara_low/README SantaBarbara_low/getIC.sh SantaBarbara_low/santa_barbara.yml SantaBarbara_low/run.sh \ SedovBlast_1D/makeIC.py SedovBlast_1D/plotSolution.py SedovBlast_1D/run.sh SedovBlast_1D/sedov.yml \ SedovBlast_2D/getGlass.sh SedovBlast_2D/makeIC.py SedovBlast_2D/plotSolution.py SedovBlast_2D/run.sh SedovBlast_2D/sedov.yml \ SedovBlast_3D/getGlass.sh SedovBlast_3D/makeIC.py SedovBlast_3D/plotSolution.py SedovBlast_3D/run.sh SedovBlast_3D/sedov.yml \ SineWavePotential_1D/makeIC.py SineWavePotential_1D/plotSolution.py SineWavePotential_1D/run.sh SineWavePotential_1D/sineWavePotential.yml \ SineWavePotential_2D/makeIC.py SineWavePotential_2D/plotSolution.py SineWavePotential_2D/run.sh SineWavePotential_2D/sineWavePotential.yml \ SineWavePotential_3D/makeIC.py SineWavePotential_3D/plotSolution.py SineWavePotential_3D/run.sh SineWavePotential_3D/sineWavePotential.yml \ - SmallCosmoVolume/README SmallCosmoVolume/getIC.sh SmallCosmoVolume/run.sh SmallCosmoVolume/small_cosmo_volume.yml \ + SmallCosmoVolume/README SmallCosmoVolume/getIC.sh SmallCosmoVolume/run.sh SmallCosmoVolume/small_cosmo_volume.yml SmallCosmoVolume/plotTempEvolution.py \ + SmallCosmoVolume_DM/README SmallCosmoVolume_DM/getIC.sh SmallCosmoVolume_DM/run.sh SmallCosmoVolume_DM/small_cosmo_volume_dm.yml SmallCosmoVolume_DM/stf_input_6dfof_dmonly_sub.cfg \ SodShock_1D/makeIC.py SodShock_1D/plotSolution.py SodShock_1D/run.sh SodShock_1D/sodShock.yml \ SodShock_2D/getGlass.sh SodShock_2D/makeIC.py SodShock_2D/plotSolution.py SodShock_2D/run.sh SodShock_2D/sodShock.yml \ SodShock_3D/getGlass.sh SodShock_3D/makeIC.py SodShock_3D/plotSolution.py SodShock_3D/run.sh SodShock_3D/sodShock.yml \ SquareTest_2D/makeIC.py SquareTest_2D/plotSolution.py SquareTest_2D/run.sh SquareTest_2D/square.yml \ UniformBox_2D/makeIC.py UniformBox_2D/run.sh UniformBox_2D/uniformPlane.yml \ UniformBox_3D/makeICbig.py UniformBox_3D/makeIC.py UniformBox_3D/run.sh UniformBox_3D/uniformBox.yml \ - UniformDMBox/makeIC.py \ + Gravity_glass/makeIC.py Gravity_glass/README Gravity_glass/uniform_DM_box.yml \ ZeldovichPancake_3D/makeIC.py ZeldovichPancake_3D/zeldovichPancake.yml ZeldovichPancake_3D/run.sh ZeldovichPancake_3D/plotSolution.py # Default parameter file EXTRA_DIST += parameter_example.yml - -# Scripts to plot task graphs -EXTRA_DIST += plot_tasks.py analyse_tasks.py process_plot_tasks_MPI process_plot_tasks - -# Scripts to plot threadpool 'task' graphs -EXTRA_DIST += analyse_threadpool_tasks.py \ - plot_threadpool.py \ - process_plot_threadpool - -# Script for scaling plot -EXTRA_DIST += plot_scaling_results.py \ - plot_scaling_results_breakdown.py - -# Script for gravity accuracy -EXTRA_DIST += plot_gravity_checks.py diff --git a/examples/MoonFormingImpact/README.md b/examples/MoonFormingImpact/README.md deleted file mode 100644 index 97a84f67c6aeeff4176a1385381f1cfe9e340c91..0000000000000000000000000000000000000000 --- a/examples/MoonFormingImpact/README.md +++ /dev/null @@ -1,34 +0,0 @@ -Canonical Moon-Forming Giant Impact -=================================== - -NOTE: This doesn't really work because the EOS are different to Canup (2004) so -the impactor just glances then flies away! - -A version of the canonical moon-forming giant impact of Theia onto the early -Earth (Canup 2004; Barr 2016). Both bodies are here made of a (Tillotson) iron -core and granite mantle. Only ~10,000 particles are used for a quick and crude -simulation. - -Setup ------ - -In `swiftsim/`: - -`$ ./configure --with-hydro=minimal-multi-mat --with-equation-of-state=planetary` -`$ make` - -In `swiftsim/examples/MoonFormingImpact/`: - -`$ ./get_init_cond.sh` - -Run ---- - -`$ ./run.sh` - -Output ------- - -`$ python plot.py` -`$ mplayer anim.mpg` - diff --git a/examples/MoonFormingImpact/get_init_cond.sh b/examples/MoonFormingImpact/get_init_cond.sh deleted file mode 100755 index 7d63943c2c5dc3bd4ab88e63a2abba62cc3f04a5..0000000000000000000000000000000000000000 --- a/examples/MoonFormingImpact/get_init_cond.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/moon_forming_impact.hdf5 diff --git a/examples/MoonFormingImpact/moon_forming_impact.yml b/examples/MoonFormingImpact/moon_forming_impact.yml deleted file mode 100644 index 323adf7f3ac73f41b45b50eaa76a95033dca35d7..0000000000000000000000000000000000000000 --- a/examples/MoonFormingImpact/moon_forming_impact.yml +++ /dev/null @@ -1,48 +0,0 @@ -# Define the system of units to use internally. -InternalUnitSystem: - UnitMass_in_cgs: 5.9724e27 # Grams - UnitLength_in_cgs: 6.371e8 # Centimeters - UnitVelocity_in_cgs: 6.371e8 # Centimeters per second - UnitCurrent_in_cgs: 1 # Amperes - UnitTemp_in_cgs: 1 # Kelvin - -# Parameters governing the time integration -TimeIntegration: - time_begin: 0 # The starting time of the simulation (in internal units). - time_end: 100000 # The end time of the simulation (in internal units). - dt_min: 0.001 # The minimal time-step size of the simulation (in internal units). - dt_max: 100 # The maximal time-step size of the simulation (in internal units). - -# Parameters governing the snapshots -Snapshots: - # Common part of the name of output files - basename: snapshots/moon_forming_impact - time_first: 0 # Time of the first output (in internal units) - delta_time: 100 # Time difference between consecutive outputs (in internal units) - label_delta: 100 # Integer increment between snapshot output labels - -# Parameters governing the conserved quantities statistics -Statistics: - delta_time: 500 # Time between statistics output - -# Parameters for the hydrodynamics scheme -SPH: - resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. - CFL_condition: 0.2 # Courant-Friedrich-Levy condition for time integration. - -# Parameters for the self-gravity scheme -Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - comoving_softening: 0.005 # Comoving softening length (in internal units). - max_physical_softening: 0.005 # Physical softening length (in internal units). - -# Parameters related to the initial conditions -InitialConditions: - # The initial conditions file to read - file_name: moon_forming_impact.hdf5 - -# Parameters related to the equation of state -EoS: - planetary_use_Til: 1 # Whether to prepare the Tillotson EOS diff --git a/examples/MoonFormingImpact/plot.py b/examples/MoonFormingImpact/plot.py deleted file mode 100644 index aa0d64a5d0d06709d51b1db231c507e22861f36c..0000000000000000000000000000000000000000 --- a/examples/MoonFormingImpact/plot.py +++ /dev/null @@ -1,285 +0,0 @@ -""" -############################################################################### -# This file is part of SWIFT. -# Copyright (c) 2018 Jacob Kegerreis (jacob.kegerreis@durham.ac.uk) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -############################################################################### - -Plotting script for the Canonical Moon-Forming Giant Impact example. - -Save a figure for each snapshot in `./plots/` then make them into a simple -animation with ffmpeg in `./`. - -Usage: - `$ python plot.py time_end delta_time` - - Sys args: - + `time_end` | (opt) int | The time of the last snapshot to plot. - Default = 100000 - + `delta_time` | (opt) int | The time between successive snapshots. - Default = 100 -""" - -from __future__ import division -import numpy as np -import matplotlib -import matplotlib.pyplot as plt -import h5py -import sys -import subprocess - -# Particle array fields -dtype_picle = [ - ('m', float), ('x', float), ('y', float), ('z', float), ('v_x', float), - ('v_y', float), ('v_z', float), ('ID', int), ('rho', float), ('u', float), - ('phi', float), ('P', float), ('h', float), ('mat_ID', int), ('r', float) - ] - -s_to_hour = 1 / 60**2 - -# Snapshot info -file_snap = "./snapshots/moon_forming_impact_" -file_plot = "./plots/moon_forming_impact_" -# Number of particles in the target body -num_target = 9496 - -# Material types (copied from src/equation_of_state/planetary/equation_of_state.h) -type_factor = 100 -Di_type = { - 'Til' : 1, - 'HM80' : 2, - 'ANEOS' : 3, - 'SESAME' : 4, -} -Di_material = { - # Tillotson - 'Til_iron' : Di_type['Til']*type_factor, - 'Til_granite' : Di_type['Til']*type_factor + 1, - 'Til_water' : Di_type['Til']*type_factor + 2, - # Hubbard & MacFarlane (1980) Uranus/Neptune - 'HM80_HHe' : Di_type['HM80']*type_factor, # Hydrogen-helium atmosphere - 'HM80_ice' : Di_type['HM80']*type_factor + 1, # H20-CH4-NH3 ice mix - 'HM80_rock' : Di_type['HM80']*type_factor + 2, # SiO2-MgO-FeS-FeO rock mix - # ANEOS - 'ANEOS_iron' : Di_type['ANEOS']*type_factor, - 'MANEOS_forsterite' : Di_type['ANEOS']*type_factor + 1, - # SESAME - 'SESAME_iron' : Di_type['SESAME']*type_factor, -} - -# Material offset for impactor particles -ID_imp = 10000 -# Material colours -Di_mat_colour = { - # Target - Di_material['Til_iron'] : 'orange', - Di_material['Til_granite'] : '#FFF0E0', - # Impactor - Di_material['Til_iron'] + ID_imp : 'dodgerblue', - Di_material['Til_granite'] + ID_imp : '#A080D0', - } - - -def load_snapshot(filename): - """ Load the hdf5 snapshot file and return the structured particle array. - """ - # Add extension if needed - if (filename[-5:] != ".hdf5"): - filename += ".hdf5" - - # Load the hdf5 file - with h5py.File(filename, 'r') as f: - header = f['Header'].attrs - A2_pos = f['PartType0/Coordinates'].value - A2_vel = f['PartType0/Velocities'].value - - # Structured array of all particle data - A2_picle = np.empty(header['NumPart_Total'][0], - dtype=dtype_picle) - - A2_picle['x'] = A2_pos[:, 0] - A2_picle['y'] = A2_pos[:, 1] - A2_picle['z'] = A2_pos[:, 2] - A2_picle['v_x'] = A2_vel[:, 0] - A2_picle['v_y'] = A2_vel[:, 1] - A2_picle['v_z'] = A2_vel[:, 2] - A2_picle['m'] = f['PartType0/Masses'].value - A2_picle['ID'] = f['PartType0/ParticleIDs'].value - A2_picle['rho'] = f['PartType0/Density'].value - A2_picle['u'] = f['PartType0/InternalEnergy'].value - A2_picle['phi'] = f['PartType0/Potential'].value - A2_picle['P'] = f['PartType0/Pressure'].value - A2_picle['h'] = f['PartType0/SmoothingLength'].value - A2_picle['mat_ID'] = f['PartType0/MaterialID'].value - - return A2_picle - - -def process_particles(A2_picle, num_target): - """ Modify things like particle units, material IDs, and coordinate origins. - """ - # Offset material IDs for impactor particles - A2_picle['mat_ID'][A2_picle['ID'] >= num_target] += ID_imp - - # Shift coordinates to the centre of the target's core's mass and momentum - sel_tar = np.where(A2_picle['mat_ID'] == Di_material['Til_iron'])[0] - - # Centre of mass - m_tot = np.sum(A2_picle[sel_tar]['m']) - x_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['x']) / m_tot - y_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['y']) / m_tot - z_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['z']) / m_tot - - # Change origin to the centre-of-mass - A2_picle['x'] -= x_com - A2_picle['y'] -= y_com - A2_picle['z'] -= z_com - A2_picle['r'] = np.sqrt( - A2_picle['x']**2 + A2_picle['y']**2 + A2_picle['z']**2 - ) - - # Centre of momentum - v_x_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['v_x']) / m_tot - v_y_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['v_y']) / m_tot - v_z_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['v_z']) / m_tot - - # Change to the centre-of-momentum frame of reference - A2_picle['v_x'] -= v_x_com - A2_picle['v_y'] -= v_y_com - A2_picle['v_z'] -= v_z_com - - return A2_picle - - -def plot_snapshot(A2_picle, filename, time, ax_lim=100, dz=0.1): - """ Plot the snapshot particles and save the figure. - """ - # Add extension if needed - if (filename[-5:] != ".png"): - filename += ".png" - - fig = plt.figure(figsize=(9, 9)) - ax = fig.add_subplot(111, aspect='equal') - - # Plot slices in z below zero - for z in np.arange(-ax_lim, 0, dz): - sel_z = np.where((z < A2_picle['z']) & (A2_picle['z'] < z+dz))[0] - A2_picle_z = A2_picle[sel_z] - - # Plot each material - for mat_ID, colour in Di_mat_colour.iteritems(): - sel_col = np.where(A2_picle_z['mat_ID'] == mat_ID)[0] - - ax.scatter( - A2_picle_z[sel_col]['x'], A2_picle_z[sel_col]['y'], - c=colour, edgecolors='none', marker='.', s=50, alpha=0.7 - ) - - # Axes etc. - ax.set_axis_bgcolor('k') - - ax.set_xlabel("x Position ($R_\oplus$)") - ax.set_ylabel("y Position ($R_\oplus$)") - - ax.set_xlim(-ax_lim, ax_lim) - ax.set_ylim(-ax_lim, ax_lim) - - plt.text( - -0.92*ax_lim, 0.85*ax_lim, "%.1f h" % (time*s_to_hour), fontsize=20, - color='w' - ) - - # Font sizes - for item in ( - [ax.title, ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + - ax.get_yticklabels() - ): - item.set_fontsize(20) - - plt.tight_layout() - - plt.savefig(filename) - plt.close() - - -if __name__ == '__main__': - # Sys args - try: - time_end = int(sys.argv[1]) - - try: - delta_time = int(sys.argv[2]) - except IndexError: - delta_time = 100 - except IndexError: - time_end = 100000 - delta_time = 100 - - # Load and plot each snapshot - for i_snap in range(int(time_end/delta_time) + 1): - snap_time = i_snap * delta_time - print "\rPlotting snapshot %06d (%d of %d)" % ( - snap_time, i_snap+1, int(time_end/delta_time) - ), - sys.stdout.flush() - - # Load particle data - filename = "%s%06d" % (file_snap, snap_time) - A2_picle = load_snapshot(filename) - - # Process particle data - A2_picle = process_particles(A2_picle, num_target) - - # Plot particles - filename = "%s%06d" % (file_plot, snap_time) - plot_snapshot(A2_picle, filename, snap_time) - - # Animation - command = ( - "ffmpeg -framerate 12 -i plots/moon_forming_impact_%*.png -r 25 " - "anim.mpg -y" - ) - print "\n%s\n" % command - subprocess.check_output(command, shell=True) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/examples/MoonFormingImpact/run.sh b/examples/MoonFormingImpact/run.sh deleted file mode 100755 index 165dae3a24a9c30960959fbb37aa6e1da2eb851f..0000000000000000000000000000000000000000 --- a/examples/MoonFormingImpact/run.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -../swift -G -s -t 8 moon_forming_impact.yml diff --git a/examples/MultiTypes/makeIC.py b/examples/MultiTypes/makeIC.py index 41a5ef5f2ffc4073ef8a4e93a130b43fcbe2c1f5..80d49c762b1fe13bbfafd05c6818d3f202e5b033 100644 --- a/examples/MultiTypes/makeIC.py +++ b/examples/MultiTypes/makeIC.py @@ -93,10 +93,6 @@ for n in range(num_files): grp.attrs["MassTable"] = [0.0, massDM, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 - - #Runtime parameters - grp = file.create_group("/RuntimePars") - grp.attrs["PeriodicBoundariesOn"] = periodic #Units grp = file.create_group("/Units") diff --git a/examples/MultiTypes/multiTypes.yml b/examples/MultiTypes/multiTypes.yml index 04647f0f00e69f5baf2560aca0feeb14a26cc50a..121a15b0837df19e4d2e9e64a56107c24fbde066 100644 --- a/examples/MultiTypes/multiTypes.yml +++ b/examples/MultiTypes/multiTypes.yml @@ -31,6 +31,7 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./multiTypes.hdf5 # The file to read + periodic: 1 replicate: 2 # Replicate all particles twice along each axis # External potential parameters diff --git a/examples/MultiTypes/run.sh b/examples/MultiTypes/run.sh index 38cba70393861539f18bf9fa360d51f46dd3367d..b0a3953e3223ecf5f686e74b2c0681ae31b708d8 100755 --- a/examples/MultiTypes/run.sh +++ b/examples/MultiTypes/run.sh @@ -7,4 +7,4 @@ then python makeIC.py 9 13 7 1 fi -../swift -s -g -S -t 1 multiTypes.yml 2>&1 | tee output.log +../swift --hydro --external-gravity --stars --threads=1 multiTypes.yml 2>&1 | tee output.log diff --git a/examples/NFW_Halo/README b/examples/NFW_Halo/README new file mode 100755 index 0000000000000000000000000000000000000000..059d35c9a94d7851233dd0fa423abca3a1d7cddf --- /dev/null +++ b/examples/NFW_Halo/README @@ -0,0 +1,5 @@ +This just provides a test that the NFW potential is giving the correct orbit +for an elliptical orbit as calculated by Jo Bovy's galpy package. If +galpy is not installed on your system you can install it by using: +pp install galpy --user + diff --git a/examples/NFW_Halo/makeIC.py b/examples/NFW_Halo/makeIC.py new file mode 100755 index 0000000000000000000000000000000000000000..68d8108f84aa759fe16956226122d53765c5ed1d --- /dev/null +++ b/examples/NFW_Halo/makeIC.py @@ -0,0 +1,75 @@ +################################################################################ +# This file is part of SWIFT. +# Copyright (c) 2018 Ashley Kelly () +# Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ + +import numpy as np +import matplotlib.pyplot as plt +from astropy import units +import h5py as h5 + +C = 8.0 +M_200 = 2.0 +N_PARTICLES = 1 + + +print("\nInitial conditions written to 'test_nfw.hdf5'") + +pos = np.array([8.0, 0.0, 0.0]) + 500.0 +vel = np.array([0.0, 240.0, 5.0]) +ids = np.array([1.0]) +mass = np.array([1.0]) + +# File +file = h5.File("test_nfw.hdf5", "w") + +# Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 3.086e21 +grp.attrs["Unit mass in cgs (U_M)"] = 1.988e33 +grp.attrs["Unit time in cgs (U_t)"] = 3.086e16 +grp.attrs["Unit current in cgs (U_I)"] = 1.0 +grp.attrs["Unit temperature in cgs (U_T)"] = 1.0 + +# Header +grp = file.create_group("/Header") +grp.attrs["BoxSize"] = 1000.0 +grp.attrs["NumPart_Total"] = [0, N_PARTICLES, 0, 0, 0, 0] +grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] +grp.attrs["NumPart_ThisFile"] = [0, N_PARTICLES, 0, 0, 0, 0] +grp.attrs["Time"] = 0.0 +grp.attrs["NumFilesPerSnapshot"] = 1 +grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] +grp.attrs["Dimension"] = 3 + +# Runtime parameters +grp = file.create_group("/RuntimePars") +grp.attrs["PeriodicBoundariesOn"] = 1 + +# Particle group +grp1 = file.create_group("/PartType1") +ds = grp1.create_dataset("Velocities", (N_PARTICLES, 3), "f", data=vel) + +ds = grp1.create_dataset("Masses", (N_PARTICLES,), "f", data=mass) + +ds = grp1.create_dataset("ParticleIDs", (N_PARTICLES,), "L", data=ids) + +ds = grp1.create_dataset("Coordinates", (N_PARTICLES, 3), "d", data=pos) + +file.close() diff --git a/examples/NFW_Halo/makePlots.py b/examples/NFW_Halo/makePlots.py new file mode 100755 index 0000000000000000000000000000000000000000..5e6f24d7a72dafe47d26ccb1b2d33b136affad98 --- /dev/null +++ b/examples/NFW_Halo/makePlots.py @@ -0,0 +1,73 @@ +################################################################################ +# This file is part of SWIFT. +# Copyright (c) 2018 Ashley Kelly () +# Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ +from galpy.potential import NFWPotential +from galpy.orbit import Orbit +import numpy as np +import matplotlib.pyplot as plt +from astropy import units +import h5py as h5 + +C = 8.0 +M_200 = 2.0 + + +def read_data(): + R = np.array([]) + z = np.array([]) + for frame in range(0, 599, 1): + try: + sim = h5.File("output_%04d.hdf5" % frame, "r") + except IOError: + break + + boxSize = sim["/Header"].attrs["BoxSize"][0] + pos = sim["/PartType1/Coordinates"][:, :] - boxSize / 2.0 + R = np.append(R, np.sqrt(pos[0, 0] ** 2 + pos[0, 1] ** 2)) + z = np.append(z, pos[0, 2]) + return (R, z) + + +def galpy_nfw_orbit(): + # Setting up the potential + nfw = NFWPotential(conc=C, mvir=M_200, H=70.0, wrtcrit=True, overdens=200) + nfw.turn_physical_on() + vxvv = [ + 8.0 * units.kpc, + 0.0 * units.km / units.s, + 240.0 * units.km / units.s, + 0.0 * units.pc, + 5.0 * units.km / units.s, + ] + + # Calculating the orbit + ts = np.linspace(0.0, 0.58, 1000) * units.Gyr + o = Orbit(vxvv=vxvv) + o.integrate(ts, nfw, method="odeint") + + return o + + +o = galpy_nfw_orbit() +(R, z) = read_data() + +o.plot() +plt.scatter(R, z, s=1, color="black", marker="x") +plt.savefig("comparison.png") +plt.close() diff --git a/examples/NFW_Halo/run.sh b/examples/NFW_Halo/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..3a21c9678487a070f568a67cccb989089e59d66a --- /dev/null +++ b/examples/NFW_Halo/run.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +if [ ! -e test_nfw.hdf5 ] +then + echo "Generate initial conditions for NFW example" + if command -v python3 &>/dev/null; then + python3 makeIC.py + else + python makeIC.py + fi +fi + +# self gravity G, external potential g, hydro s, threads t and high verbosity v +../swift --external-gravity --threads=6 test.yml 2>&1 | tee output.log + +if command -v python3 &>/dev/null; then + python3 makePlots.py +else + python makePlots.py +fi diff --git a/examples/NFW_Halo/test.yml b/examples/NFW_Halo/test.yml new file mode 100755 index 0000000000000000000000000000000000000000..73831af30769942bd7aa1c89bd7464025d2ddc85 --- /dev/null +++ b/examples/NFW_Halo/test.yml @@ -0,0 +1,41 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.988e+33 # Solar mass + UnitLength_in_cgs: 3.086e+21 # kpc + UnitVelocity_in_cgs: 1e5 # km / s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 0.6 # The end time of the simulation (in internal units). + dt_min: 1e-8 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-1 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: output # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 1e-3 # Time difference between consecutive outputs (in internal units) + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-3 # Time between statistics output + +# Parameters related to the initial conditions +InitialConditions: + file_name: test_nfw.hdf5 # The file to read + shift_x: 0. # (Optional) A shift to apply to all particles read from the ICs (in internal units). + shift_y: 0. + shift_z: 0. + periodic: 0 + +# Isothermal potential parameters +NFWPotential: + useabspos: 0 + position: [0.0,0.0,0.0] # Location of centre of potential with respect to centre of the box (internal units) + concentration: 8. + M_200: 2.0e+12 # Virial mass (internal units) + critical_density: 140 # Critical density (internal units) + timestep_mult: 0.01 # Dimensionless pre-factor for the time-step condition diff --git a/examples/Noh_1D/makeIC.py b/examples/Noh_1D/makeIC.py index 176f3517455db7a8b0994ac7d1e65fb9cb7419d4..9d9a5e5b62edeedd8f5b2732c240b9ea2878c92d 100644 --- a/examples/Noh_1D/makeIC.py +++ b/examples/Noh_1D/makeIC.py @@ -66,10 +66,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 1 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/Noh_1D/noh.yml b/examples/Noh_1D/noh.yml index 1d126f19babd0c9fe28afff907b3fe8259467a24..58e13ddda8939c8fc5fa4360a498a87f1c5b189a 100644 --- a/examples/Noh_1D/noh.yml +++ b/examples/Noh_1D/noh.yml @@ -31,4 +31,6 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./noh.hdf5 # The file to read + periodic: 1 + \ No newline at end of file diff --git a/examples/Noh_1D/run.sh b/examples/Noh_1D/run.sh index 77788bfa8429e2fbf0502068baa70598acaaa791..15efd56e7b5b41dedab31a8f1cbdf11e89a2704d 100755 --- a/examples/Noh_1D/run.sh +++ b/examples/Noh_1D/run.sh @@ -8,7 +8,7 @@ then fi # Run SWIFT -../swift -s -t 1 noh.yml 2>&1 | tee output.log +../swift --hydro --threads=1 noh.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 12 diff --git a/examples/Noh_2D/makeIC.py b/examples/Noh_2D/makeIC.py index f7239fa3cd188637df929f86451d20a9978bd1f5..83bb1ac6773074d0c10d3eb425b34c082a971fd8 100644 --- a/examples/Noh_2D/makeIC.py +++ b/examples/Noh_2D/makeIC.py @@ -73,10 +73,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/Noh_2D/noh.yml b/examples/Noh_2D/noh.yml index 1d126f19babd0c9fe28afff907b3fe8259467a24..eaf991631854e9a9781f0fcee50d996f8af949cd 100644 --- a/examples/Noh_2D/noh.yml +++ b/examples/Noh_2D/noh.yml @@ -31,4 +31,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./noh.hdf5 # The file to read + periodic: 1 diff --git a/examples/Noh_2D/run.sh b/examples/Noh_2D/run.sh index cff200801018e04ea560bd2c3fbd84057aec2d7c..5595ad8439eac02fd32dbb426e4affb41215d41a 100755 --- a/examples/Noh_2D/run.sh +++ b/examples/Noh_2D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 2 noh.yml 2>&1 | tee output.log +../swift --hydro --threads=2 noh.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 12 diff --git a/examples/Noh_3D/makeIC.py b/examples/Noh_3D/makeIC.py index 0c25a5c8b3e967185cf16bae4b1f21c215266def..2d560a1e869c6c12e557c82402d6e8629ecf661c 100644 --- a/examples/Noh_3D/makeIC.py +++ b/examples/Noh_3D/makeIC.py @@ -75,10 +75,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/Noh_3D/noh.yml b/examples/Noh_3D/noh.yml index cc15af7ec190cd2c10cdff3a3ccb3f0beaf7e177..e005d394a6d3645ca33950af625b0267a62ca7d7 100644 --- a/examples/Noh_3D/noh.yml +++ b/examples/Noh_3D/noh.yml @@ -32,4 +32,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./noh.hdf5 # The file to read + periodic: 1 diff --git a/examples/Noh_3D/run.sh b/examples/Noh_3D/run.sh index b9e4fb145b2465433aa2bc0362aba19cc1267461..7ff7b084f4b6297c76d3d60f4bde4ac7d12e944e 100755 --- a/examples/Noh_3D/run.sh +++ b/examples/Noh_3D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 2 noh.yml 2>&1 | tee output.log +../swift --hydro --threads=2 noh.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 12 diff --git a/examples/PMillennium-384/p-mill-384.yml b/examples/PMillennium-384/p-mill-384.yml new file mode 100644 index 0000000000000000000000000000000000000000..4aede77c0c1a8a6818c95c318364150ede919a01 --- /dev/null +++ b/examples/PMillennium-384/p-mill-384.yml @@ -0,0 +1,51 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc + UnitVelocity_in_cgs: 1e5 # 1 km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Planck-13 cosmology +Cosmology: + h: 0.6777 + a_begin: 0.02 # z_ini = 49 + a_end: 1.0 # z_end = 0 + Omega_m: 0.307 + Omega_lambda: 0.693 + Omega_b: 0.0455 + +# Parameters governing the time integration +TimeIntegration: + dt_min: 1e-6 + dt_max: 1e-2 + +Scheduler: + max_top_level_cells: 16 + cell_split_size: 100 + +# Parameters governing the snapshots +Snapshots: + basename: PMill + delta_time: 1.02 + scale_factor_first: 0.02 + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1.02 + scale_factor_first: 0.02 + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 + theta: 0.5 + comoving_softening: 0.08333 # 83.333 kpc = 1/25 mean inter-particle separation + max_physical_softening: 0.08333 # 83.333 kpc = 1/25 mean inter-particle separation + mesh_side_length: 128 + +# Parameters related to the initial conditions +InitialConditions: + file_name: ics.hdf5 + periodic: 1 + cleanup_h_factors: 1 + cleanup_velocity_factors: 1 diff --git a/examples/PMillennium-768/p-mill-768.yml b/examples/PMillennium-768/p-mill-768.yml new file mode 100644 index 0000000000000000000000000000000000000000..a70c9c70831af9c237a466165b25b6300df69336 --- /dev/null +++ b/examples/PMillennium-768/p-mill-768.yml @@ -0,0 +1,51 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc + UnitVelocity_in_cgs: 1e5 # 1 km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Planck-13 cosmology +Cosmology: + h: 0.6777 + a_begin: 0.02 # z_ini = 49 + a_end: 1.0 # z_end = 0 + Omega_m: 0.307 + Omega_lambda: 0.693 + Omega_b: 0.0455 + +# Parameters governing the time integration +TimeIntegration: + dt_min: 1e-6 + dt_max: 1e-2 + +Scheduler: + max_top_level_cells: 16 + cell_split_size: 100 + +# Parameters governing the snapshots +Snapshots: + basename: PMill + delta_time: 1.02 + scale_factor_first: 0.02 + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1.02 + scale_factor_first: 0.02 + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 + theta: 0.5 + comoving_softening: 0.041666 # 41.6666 kpc = 1/25 mean inter-particle separation + max_physical_softening: 0.041666 # 41.6666 kpc = 1/25 mean inter-particle separation + mesh_side_length: 256 + +# Parameters related to the initial conditions +InitialConditions: + file_name: ics.hdf5 + periodic: 1 + cleanup_h_factors: 1 + cleanup_velocity_factors: 1 diff --git a/examples/PerturbedBox_2D/makeIC.py b/examples/PerturbedBox_2D/makeIC.py index 87a41517772570870e04c79d3694c115a909e214..7f52525bdf508603a23f93c0fc7d8cda7f8f13cb 100644 --- a/examples/PerturbedBox_2D/makeIC.py +++ b/examples/PerturbedBox_2D/makeIC.py @@ -86,10 +86,6 @@ grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["NumPart_Total"] = numPart grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/PerturbedBox_2D/perturbedPlane.yml b/examples/PerturbedBox_2D/perturbedPlane.yml index a0c6b6d9dbc7a677002dbce5abc6e5d268b56e97..4d03b30398bec34414636803caf6bf3bdc99251d 100644 --- a/examples/PerturbedBox_2D/perturbedPlane.yml +++ b/examples/PerturbedBox_2D/perturbedPlane.yml @@ -31,3 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./perturbedPlane.hdf5 # The file to read + periodic: 1 diff --git a/examples/PerturbedBox_3D/makeIC.py b/examples/PerturbedBox_3D/makeIC.py index 1b0fc284e4c40b51fca45f117b92175a0ea45f31..f2d8357f2f96a4aa6efaa14822c442a884415b56 100644 --- a/examples/PerturbedBox_3D/makeIC.py +++ b/examples/PerturbedBox_3D/makeIC.py @@ -88,10 +88,6 @@ grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["NumPart_Total"] = numPart grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/PerturbedBox_3D/perturbedBox.yml b/examples/PerturbedBox_3D/perturbedBox.yml index 3148510979d0e349c0d6242bf11e1a0db94f9e1f..6010cf457b2b67c0fce0332a0216aa9359673e3b 100644 --- a/examples/PerturbedBox_3D/perturbedBox.yml +++ b/examples/PerturbedBox_3D/perturbedBox.yml @@ -31,3 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./perturbedBox.hdf5 # The file to read + periodic: 1 diff --git a/examples/PerturbedBox_3D/run.sh b/examples/PerturbedBox_3D/run.sh index e20bff52d18322ce377fb589900fd9e13eefe64d..ab771ea1f01e6cca3b9a3dc2cf02a4d596a64006 100755 --- a/examples/PerturbedBox_3D/run.sh +++ b/examples/PerturbedBox_3D/run.sh @@ -7,4 +7,4 @@ then python makeIC.py 50 fi -../swift -s -t 16 perturbedBox.yml 2>&1 | tee output.log +../swift --hydro --threads=16 perturbedBox.yml 2>&1 | tee output.log diff --git a/examples/SantaBarbara/README b/examples/SantaBarbara/README new file mode 100644 index 0000000000000000000000000000000000000000..e5bba3752215c438c01ff35931d22901c3a9d0d3 --- /dev/null +++ b/examples/SantaBarbara/README @@ -0,0 +1,21 @@ +Initital conditions for the Santa-Barbara cluster comparison project. +These have been regenerated from the orinigal Frenk et al. 1999 paper. + +The cosmology is Omega_m = 1, Omega_b = 0.1, h = 0.5 and sigma_8 = 0.9. + +The ICs are 256^3 particles in a 64^3 Mpc^3 volume. This is about 10x +higher resolution than in the original paper. The ICs have been +created for Gadget and the positions and box size are hence expressed +in h-full units (e.g. box size of 32 / h Mpc). Similarly, the peculiar +velocitites contain an extra sqrt(a) factor. + +We will use SWIFT to cancel the h- and a-factors from the ICs. Gas +particles will be generated at startup. + +MD5 check-sum of the ICS: +ba9ab4f00a70d39fa601a4a59984b343 SantaBarbara.hdf5 + +You can use the script run_velociraptor.sh to also run a basic 3D FoF +with VELOCIraptor on your output data. You will need to set the +VELOCIRAPTOR_PATH environment variable to tell us where the stf-gas +binary lives. diff --git a/examples/SantaBarbara/getIC.sh b/examples/SantaBarbara/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..a3073631ceedea47c8ac218a5e62529efee6fc56 --- /dev/null +++ b/examples/SantaBarbara/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/SantaBarbara.hdf5 diff --git a/examples/SantaBarbara/makeImage.py b/examples/SantaBarbara/makeImage.py new file mode 100644 index 0000000000000000000000000000000000000000..db6416010447952b3edd6b235237d045b16bdefd --- /dev/null +++ b/examples/SantaBarbara/makeImage.py @@ -0,0 +1,268 @@ +""" +Makes an image of the Santa Barbara cluster. + +Requires py-sphviewer. + +Invoke as follows: + +python3 makeImage.py <name of hdf5 file> \ + <number of particle type (i.e. 0 or 1)> \ + <colour map to use (default viridis)> \ + <text color (default white)> \ + <image resolution (default 2048x2048)> +""" + +import numpy as np +import matplotlib.pyplot as plt +import h5py +import matplotlib + +from sphviewer.tools import QuickView +from matplotlib.patches import Rectangle + +from typing import Tuple +from collections import namedtuple + + +# Set up our simulation data collection to keep stuff together +SimulationData = namedtuple( + "SimulationData", + ["coordinates", "masses", "sph_name", "dark_matter_mass", "swift_name", "boxsize"], +) + + +def latex_float(f): + """ + Taken from: + https://stackoverflow.com/questions/13490292/format-number-using-latex-notation-in-python. + + Formats a float to LaTeX style. + """ + + float_str = "{0:.2g}".format(f) + if "e" in float_str: + base, exponent = float_str.split("e") + return r"{0} \times 10^{{{1}}}".format(base, int(exponent)) + else: + return float_str + + +def read_data_from_file(filename: str, part_type=0) -> SimulationData: + """ + Reads the relevant data from the HDF5 file. + """ + part_type_name = f"PartType{part_type}" + + with h5py.File(filename, "r") as file: + coordinates, boxsize = move_box(file[f"{part_type_name}/Coordinates"][...].T) + masses = file[f"{part_type_name}/Masses"][...] + + sph_name = file["HydroScheme"].attrs["Scheme"].decode("utf-8") + unit_mass = ( + float(file["Units"].attrs["Unit mass in cgs (U_M)"]) / 2e33 + ) # in M_sun + + dark_matter_mass = float(file["PartType1/Masses"][0]) * unit_mass + + code_revision = file["Code"].attrs["Git Revision"].decode("utf-8") + swift_name = f"SWIFT {code_revision}" + + data = SimulationData( + coordinates=coordinates, + masses=masses, + sph_name=sph_name, + dark_matter_mass=dark_matter_mass, + swift_name=swift_name, + boxsize=boxsize, + ) + + return data + + +def move_box(coordinates: np.ndarray) -> np.ndarray: + """ + Takes the coordinates and moves them in the x-y plane. This moves them 20 + code units to the left/right to ensure that the zoomed-out version of the + cluster image is nicely shown + """ + + boxsize = np.max(coordinates[0]) + coordinates[0] -= 20 + coordinates[1] -= 20 + coordinates[0] %= boxsize + coordinates[1] %= boxsize + + return coordinates, boxsize + + +def generate_views(data: SimulationData, res=2048) -> Tuple[np.ndarray]: + """ + Generates the views on the data from py-sphviewer. + + Returns the overall image for the whole box and then a zoomed region. + """ + + qv_all = QuickView( + data.coordinates, + data.masses, + r="infinity", + plot=False, + xsize=res, + ysize=res, + logscale=False, + p=0, + np=48, + ) + zoomed_res = (res * 6) // 10 + mask = np.logical_and( + np.logical_and( + data.coordinates[0] > (data.boxsize/2-4-20), + data.coordinates[0] < (data.boxsize/2+6-20) + ), + np.logical_and( + data.coordinates[1] > (data.boxsize/2-3.5-20), + data.coordinates[1] < (data.boxsize/2+6.5-20) + ) + ) + qv_zoomed = QuickView( + data.coordinates.T[mask].T, + data.masses[mask], + r="infinity", + plot=False, + xsize=zoomed_res, + ysize=zoomed_res, + logscale=False, + np=48, + ) + + return qv_all.get_image(), qv_zoomed.get_image() + + +def create_plot(data: SimulationData, res=2048, cmap="viridis", text_color="white"): + """ + Creates a figure and axes object and returns them for you to do with what you wish. + """ + + img_all, img_zoomed = generate_views(data, res) + + fig, ax = plt.subplots(figsize=(8, 8)) + + # Set up in "image" mode + ax.axis("off") + fig.subplots_adjust(0, 0, 1, 1) + + ax.imshow( + np.log10(img_all + np.min(img_all[img_all != 0])), + origin="lower", + extent=[-1, 1, -1, 1], + cmap=cmap, + ) + + lower_left = [(-24 / (0.5 * data.boxsize)), (-23.5 / (0.5 * data.boxsize))] + zoom_rect = Rectangle( + lower_left, + 10 / (0.5 * data.boxsize), + 10 / (0.5 * data.boxsize), + linewidth=2, + edgecolor=text_color, + facecolor="none", + ) + ax.add_patch(zoom_rect) + + # Remove ticks as we want "image mode" + ax2 = fig.add_axes([0.35, 0.35, 0.6, 0.6], frame_on=True, xticks=[], yticks=[]) + + ax2.imshow( + np.log10(img_zoomed + np.min(img_zoomed[img_zoomed != 0])), + origin="lower", + extent=[-1, 1, -1, 1], + cmap=cmap, + ) + + # This ugly hack sets the box around the subfigure to be white + for child in ax2.get_children(): + if isinstance(child, matplotlib.spines.Spine): + child.set_color(text_color) + child.set_linewidth(2) + + # Draw lines between boxes + + # Bottom Right + ax.plot( + [(-14 / (0.5 * data.boxsize)), 0.9], + [(-23.5 / (0.5 * data.boxsize)), -0.3], + lw=2, + color=text_color, + ) + # Top Left + ax.plot( + [(-24 / (0.5 * data.boxsize)), -0.3], + [(-13.5 / (0.5 * data.boxsize)), 0.9], + lw=2, + color=text_color, + ) + + ax.text(0.95, -0.95, data.swift_name, color=text_color, ha="right") + formatted_dark_matter_mass = latex_float(data.dark_matter_mass) + ax.text( + -0.95, + 0.95, + rf"M$_{{\rm DM}} = {formatted_dark_matter_mass}$ M$_\odot$", + color=text_color, + va="top", + ) + ax.text( + -0.95, + -0.95, + data.sph_name + "\n" + r"Santa Barbara Cluster (re-ran from Frenk+ 1999)", + color=text_color, + ) + + return fig, ax + + +if __name__ == "__main__": + import sys + + try: + filename = sys.argv[1] + except IndexError: + filename = "santabarbara_0153.hdf5" + + try: + part_type = int(sys.argv[2]) + except IndexError: + part_type = 0 + + try: + cmap = sys.argv[3] + except IndexError: + cmap = "viridis" + + try: + text_color = sys.argv[4] + except IndexError: + text_color = "white" + + try: + res = int(sys.argv[5]) + except IndexError: + res = 2048 + + # Read in the data from file + + try: + data = read_data_from_file(filename, part_type) + except IndexError: + # Must be a dark matter only run + part_type = 1 + data = read_data_from_file(filename, part_type) + + # Make the plot + + fig, ax = create_plot(data, res, cmap, text_color) + + fig.savefig( + f"SantaBarbara_{data.sph_name[:8]}_{cmap}_PartType{part_type}_res{res}.png", + dpi=res // 8, + ) diff --git a/examples/SantaBarbara/plotSolution.py b/examples/SantaBarbara/plotSolution.py new file mode 100644 index 0000000000000000000000000000000000000000..a23aa2089a0f82a9dad989134d1ebf11a97af2fe --- /dev/null +++ b/examples/SantaBarbara/plotSolution.py @@ -0,0 +1,391 @@ +""" +Plots the "solution" (i.e. some profiles) for the Santa Barbara cluster. + +Invoke as follows: + +python3 plotSolution.py <snapshot number> <catalogue directory> <number of bins (optional)> +""" + +import matplotlib.pyplot as plt +import numpy as np + +import h5py + +from collections import namedtuple +from typing import Tuple + +try: + import makeImage + + create_images = True +except: + create_images = False + +# Simulation data +SimulationParticleData = namedtuple( + "SimulationData", ["gas", "dark_matter", "metadata"] +) +ParticleData = namedtuple( + "ParticleData", ["coordinates", "radii", "masses", "densities", "energies"] +) +MetaData = namedtuple("MetaData", ["header", "code", "hydroscheme"]) +HaloData = namedtuple("HaloData", ["c", "Rvir", "Mvir", "center"]) + + +def get_energies(handle: h5py.File): + """ + Gets the energies with the correct units. + """ + u = handle["PartType0/InternalEnergy"][:] + unit_length_in_cgs = handle["/Units"].attrs["Unit length in cgs (U_L)"] + unit_mass_in_cgs = handle["/Units"].attrs["Unit mass in cgs (U_M)"] + unit_time_in_cgs = handle["/Units"].attrs["Unit time in cgs (U_t)"] + gas_gamma = handle["/HydroScheme"].attrs["Adiabatic index"][0] + a = handle["/Cosmology"].attrs["Scale-factor"][0] + + unit_length_in_si = 0.01 * unit_length_in_cgs + unit_mass_in_si = 0.001 * unit_mass_in_cgs + unit_time_in_si = unit_time_in_cgs + + u *= unit_length_in_si ** 2 / unit_time_in_si ** 2 + u /= a ** (3 * (gas_gamma - 1.)) + + return u + + +def load_data(filename: str, center: np.array) -> SimulationParticleData: + """ + Loads the relevant data for making the profiles, as well as some metadata + for the plot. + + Center is the center of the SB cluster and is used to calculate the radial + distances to the particles. + """ + + with h5py.File(filename, "r") as file: + gas_handle = file["PartType0"] + dm_handle = file["PartType1"] + + gas_data = ParticleData( + coordinates=gas_handle["Coordinates"][...], + radii=get_radial_distances(gas_handle["Coordinates"][...], center), + masses=gas_handle["Masses"][...], + energies=get_energies(file), + densities=gas_handle["Density"][...], + ) + + dm_data = ParticleData( + coordinates=dm_handle["Coordinates"][...], + radii=get_radial_distances(dm_handle["Coordinates"][...], center), + masses=dm_handle["Masses"][...], + energies=None, + densities=None, + ) + + metadata = MetaData( + header=dict(file["Header"].attrs), + code=dict(file["Code"].attrs), + hydroscheme=dict(file["HydroScheme"].attrs), + ) + + simulation_data = SimulationParticleData( + gas=gas_data, dark_matter=dm_data, metadata=metadata + ) + + return simulation_data + + +def get_halo_data(catalogue_filename: str) -> HaloData: + """ + Gets the halo center of the largest halo (i.e. the SB cluster). + + You will want the .properties file, probably + + halo/santabarbara.properties + + that is given by VELOCIraptor. + """ + + with h5py.File(catalogue_filename, "r") as file: + x = file["Xc"][0] + y = file["Yc"][0] + z = file["Zc"][0] + Mvir = file["Mass_200crit"][0] + Rvir = file["R_200crit"][0] + c = file["cNFW"][0] + + return HaloData(c=c, Rvir=Rvir, Mvir=Mvir, center=np.array([x, y, z])) + + +def get_radial_distances(coordinates: np.ndarray, center: np.array) -> np.array: + """ + Gets the radial distances for all particles. + """ + dx = coordinates - center + + return np.sqrt(np.sum(dx * dx, axis=1)) + + +def get_radial_density_profile(radii, masses, bins: int) -> Tuple[np.ndarray]: + """ + Gets the radial gas density profile, after generating similar bins to those + used in similar works. + """ + + bins = np.logspace(-2, 1, bins) + + histogram, bin_edges = np.histogram(a=radii, weights=masses, bins=bins) + + volumes = np.array( + [ + (4. * np.pi / 3.) * (r_outer ** 3 - r_inner ** 3) + for r_outer, r_inner in zip(bin_edges[1:], bin_edges[:-1]) + ] + ) + + return histogram / volumes, bin_edges # densities + + +def mu(T, H_frac, T_trans): + """ + Get the molecular weight as a function of temperature. + """ + if T > T_trans: + return 4. / (8. - 5. * (1. - H_frac)) + else: + return 4. / (1. + 3. * H_frac) + + +def T(u, metadata: MetaData): + """ + Temperature of primordial gas. + """ + + gas_gamma = metadata.hydroscheme["Adiabatic index"][0] + H_frac = metadata.hydroscheme["Hydrogen mass fraction"][0] + T_trans = metadata.hydroscheme["Hydrogen ionization transition temperature"][0] + + k_in_J_K = 1.38064852e-23 + mH_in_kg = 1.6737236e-27 + + T_over_mu = (gas_gamma - 1.) * u * mH_in_kg / k_in_J_K + ret = np.ones(np.size(u)) * T_trans + + # Enough energy to be ionized? + mask_ionized = T_over_mu > (T_trans + 1) / mu(T_trans + 1, H_frac, T_trans) + if np.sum(mask_ionized) > 0: + ret[mask_ionized] = T_over_mu[mask_ionized] * mu(T_trans * 10, H_frac, T_trans) + + # Neutral gas? + mask_neutral = T_over_mu < (T_trans - 1) / mu((T_trans - 1), H_frac, T_trans) + if np.sum(mask_neutral) > 0: + ret[mask_neutral] = T_over_mu[mask_neutral] * mu(0, H_frac, T_trans) + + return ret + + +def get_radial_temperature_profile( + data: SimulationParticleData, bins: int +) -> np.ndarray: + """ + Gets the radial gas density profile, after generating similar bins to those + used in similar works. + """ + + temperatures = T(data.gas.energies, data.metadata) + radii = data.gas.radii + + bins = np.logspace(-2, 1, bins) + + histogram, _ = np.histogram(a=radii, weights=temperatures, bins=bins) + + counts, _ = np.histogram(a=radii, weights=np.ones_like(radii), bins=bins) + + return histogram / counts # need to get mean value in bin + + +def get_radial_entropy_profile(data: SimulationParticleData, bins: int) -> np.ndarray: + """ + Gets the radial gas density profile, after generating similar bins to those + used in similar works. + """ + + gas_gamma = data.metadata.hydroscheme["Adiabatic index"][0] + gamma_minus_one = gas_gamma - 1.0 + + entropies = ( + data.gas.energies * (gamma_minus_one) / data.gas.densities ** gamma_minus_one + ) + print("Warning: Current entropy profile assumes all gas is ionised") + radii = data.gas.radii + + bins = np.logspace(-2, 1, bins) + + histogram, _ = np.histogram(a=radii, weights=entropies, bins=bins) + + counts, _ = np.histogram(a=radii, weights=np.ones_like(radii), bins=bins) + + return histogram / counts # need to get mean value in bin + + +def nfw(R, halo_data: HaloData): + """ + NFW profile at radius R. + """ + + R_s = halo_data.Rvir / halo_data.c + rho_0 = (4 * np.pi * R_s ** 3) / (halo_data.Mvir) + rho_0 *= np.log(1 + halo_data.c) - halo_data.c / (halo_data.c + 1) + rho_0 = 1.0 / rho_0 + + ratio = R / R_s + + return rho_0 / (ratio * (1 + ratio) ** 2) + + +def create_plot( + data: SimulationParticleData, + halo_data: HaloData, + bins: int, + create_images: bool, + image_data: np.ndarray, +): + """ + Creates the figure and axes objects and plots the data on them. + """ + + fig, axes = plt.subplots(2, 3, figsize=(12, 8)) + + gas_density, bin_edges = get_radial_density_profile( + data.gas.radii, data.gas.masses, bins=bins + ) + dm_density, _ = get_radial_density_profile( + data.dark_matter.radii, data.dark_matter.masses, bins=bins + ) + temperature = get_radial_temperature_profile(data, bins=bins) + entropy = get_radial_entropy_profile(data, bins=bins) + + bin_centers = [0.5 * (x + y) for x, y in zip(bin_edges[:-1], bin_edges[1:])] + nfw_R = np.logspace(-2, 1, bins * 100) + nfw_rho = nfw(nfw_R, halo_data) + + axes[0][0].loglog() + axes[0][0].plot(nfw_R, 0.1 * nfw_rho, ls="dashed", color="grey") + axes[0][0].scatter(bin_centers, gas_density) + axes[0][0].set_ylabel(r"$\rho_{\rm gas} (R)$ [$10^{10}$ M$_\odot$ Mpc$^{-3}$]") + axes[0][0].set_xlabel(r"R [Mpc]") + axes[0][0].set_xlim(0.01, 10) + + axes[0][1].semilogx() + axes[0][1].scatter(bin_centers, np.log(entropy)) + axes[0][1].set_ylabel( + r"Entropy $\log(A$ [K ($10^{10}$ M$_\odot$)$^{2/3}$ Mpc$^{-2}$])" + ) + axes[0][1].set_xlabel(r"R [Mpc]") + axes[0][1].set_xlim(0.01, 10) + + if create_images: + axes[0][2].imshow(np.log10(image_data)) + + axes[0][2].set_xticks([]) + axes[0][2].set_yticks([]) + + axes[1][0].loglog() + axes[1][0].scatter(bin_centers, temperature) + axes[1][0].set_ylabel(r"$T_{\rm gas} (R)$ [K]") + axes[1][0].set_xlabel(r"R [Mpc]") + axes[1][0].set_xlim(0.01, 10) + + axes[1][1].loglog() + axes[1][1].scatter(bin_centers, dm_density) + axes[1][1].plot(nfw_R, 0.9 * nfw_rho, ls="dashed", color="grey") + axes[1][1].set_ylabel(r"$\rho_{\rm DM} (R)$ [$10^{10}$ M$_\odot$ Mpc$^{-3}$]") + axes[1][1].set_xlabel(r"R [Mpc]") + axes[1][1].set_xlim(0.01, 10) + axes[1][1].text( + 0.02, + 5, + "$c_{{vir}} = {:2.2f}$\n$R_{{vir}} = {:2.2f}$ Mpc\n$M_{{vir}} = {:2.2f}$ $10^{{10}}$ M$_\odot$".format( + halo_data.c, halo_data.Rvir, halo_data.Mvir + ), + va="bottom", + ha="left", + ) + + axes[1][2].text( + -0.49, + 0.7, + "Santa Barbara with $\\gamma={:2.2f}$ in 3D".format( + data.metadata.hydroscheme["Adiabatic index"][0] + ), + ) + + scheme_list = data.metadata.hydroscheme["Scheme"].decode("utf-8").split(" ") + i = 4 + while i < len(scheme_list): + scheme_list.insert(i, "\n") + i += 4 + 1 + wrapped_scheme = " ".join(scheme_list) + wrapped_scheme.replace("\n ", "\n") + + axes[1][2].text(-0.49, 0.8, wrapped_scheme) + + axes[1][2].plot([-0.49, 0.1], [0.62, 0.62], "k-", lw=1) + + axes[1][2].text( + -0.49, 0.5, f"SWIFT {data.metadata.code['Git Revision'].decode('utf-8')}" + ) + + axes[1][2].text( + -0.49, + 0.3, + data.metadata.hydroscheme["Kernel function"].decode("utf-8"), + fontsize=10, + ) + axes[1][2].text( + -0.49, + 0.2, + "{:2.3f} neighbours ($\\eta={:3.3f}$)".format( + data.metadata.hydroscheme["Kernel target N_ngb"][0], + data.metadata.hydroscheme["Kernel eta"][0], + ), + ) + axes[1][2].set_xlim(-0.5, 0.5) + axes[1][2].set_ylim(0, 1) + axes[1][2].axis("off") + + fig.tight_layout() + + return fig, axes + + +if __name__ == "__main__": + import sys + + filename = "santabarbara_{:04d}.hdf5".format(int(sys.argv[1])) + catalogue_filename = f"{sys.argv[2]}/santabarbara.properties" + + try: + bins = int(sys.argv[3]) + except: + bins = 25 + + halo_data = get_halo_data(catalogue_filename) + simulation_data = load_data(filename, halo_data.center) + + if create_images: + data = makeImage.read_data_from_file(filename, part_type=0) + _, image_data = makeImage.generate_views(data) + del data + else: + image_data = None + + fig, _ = create_plot( + data=simulation_data, + halo_data=halo_data, + bins=bins, + create_images=create_images, + image_data=image_data, + ) + + fig.savefig("santabarbara.png", dpi=300) diff --git a/examples/SantaBarbara/run.sh b/examples/SantaBarbara/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..206ba7bad03e0fff12e683ee10bbe2434a73a500 --- /dev/null +++ b/examples/SantaBarbara/run.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# Run SWIFT +../swift --cosmology --hydro --self-gravity --threads=28 santa_barbara.yml + diff --git a/examples/SantaBarbara/run_velociraptor.sh b/examples/SantaBarbara/run_velociraptor.sh new file mode 100644 index 0000000000000000000000000000000000000000..3b7ca06ec8125d05066762f20c7324f9faa42348 --- /dev/null +++ b/examples/SantaBarbara/run_velociraptor.sh @@ -0,0 +1,2 @@ +mkdir halo +${VELOCIRAPTOR_PATH} -I 2 -i santabarbara_0153 -C velociraptor_cfg.cfg -o ./halo/santabarbara diff --git a/examples/SantaBarbara/santa_barbara.yml b/examples/SantaBarbara/santa_barbara.yml new file mode 100644 index 0000000000000000000000000000000000000000..f217fc6a67db92eb9ea5e4d541343e7bf89c5860 --- /dev/null +++ b/examples/SantaBarbara/santa_barbara.yml @@ -0,0 +1,82 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 Msun + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc + UnitVelocity_in_cgs: 1e5 # 1 km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Cosmological parameters +Cosmology: + h: 0.5 + a_begin: 0.047619048 # z_ini = 20 + a_end: 1.0 # z_end = 0 + Omega_m: 1.0 + Omega_lambda: 0.0 + Omega_b: 0.1 + +# Parameters governing the time integration +TimeIntegration: + dt_max: 0.01 + dt_min: 1e-10 + +Scheduler: + max_top_level_cells: 16 + cell_split_size: 100 + +# Parameters governing the snapshots +Snapshots: + basename: santabarbara + scale_factor_first: 0.05 + delta_time: 1.02 + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1.02 + scale_factor_first: 0.05 + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 + theta: 0.5 + comoving_softening: 0.01 # 10 kpc = 1/25 mean inter-particle separation + max_physical_softening: 0.00263 # 10 ckpc = 2.63 pkpc at z=2.8 (EAGLE-like evolution of softening). + mesh_side_length: 128 + +# Parameters of the hydro scheme +SPH: + resolution_eta: 1.2348 # "48 Ngb" with the cubic spline kernel + CFL_condition: 0.1 + initial_temperature: 1200. # (1 + z_ini)^2 * 2.72K + minimal_temperature: 100. + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./SantaBarbara.hdf5 + periodic: 1 + cleanup_h_factors: 1 # ICs were generated for Gadget, we need to get rid of h-factors + cleanup_velocity_factors: 1 # ICs were generated for Gadget, we need to get rid of sqrt(a) factors in the velocity + generate_gas_in_ics: 1 # Generate gas particles from the DM-only ICs + cleanup_smoothing_lengths: 1 # Since we generate gas, make use of the (expensive) cleaning-up procedure. + +EAGLEChemistry: + InitMetallicity: 0.0 + InitAbundance_Hydrogen: 0.752 + InitAbundance_Helium: 0.248 + InitAbundance_Carbon: 0.0 + InitAbundance_Nitrogen: 0.0 + InitAbundance_Oxygen: 0.0 + InitAbundance_Neon: 0.0 + InitAbundance_Magnesium: 0.0 + InitAbundance_Silicon: 0.0 + InitAbundance_Iron: 0.0 + CalciumOverSilicon: 0.0 + SulphurOverSilicon: 0.0 + +EagleCooling: + filename: /cosma5/data/Eagle/BG_Tables/CoolingTables/ + reionisation_redshift: 8.898 + he_reion_z_center: 3.5 + he_reion_z_sigma: 0.5 + he_reion_ev_pH: 2.0 + diff --git a/examples/SantaBarbara/velociraptor_cfg.cfg b/examples/SantaBarbara/velociraptor_cfg.cfg new file mode 100644 index 0000000000000000000000000000000000000000..4b9b441b71cc65a85a9731018d38ffc2f003c0ff --- /dev/null +++ b/examples/SantaBarbara/velociraptor_cfg.cfg @@ -0,0 +1,135 @@ +#configuration file. +#It is suggested that you alter this file as necessary as not all options will be desired and some conflict. +#This file is simply meant to show options available. + +################################ +#input related +################################ +#input is from a cosmological so can use parameters like box size, h, Omega_m to calculate length and density scales +Cosmological_input=1 + +#Type of snapshot to read. Ignored when using within SWIFT. +HDF_name_convention=6 # ILLUSTRIS 0, GADGETX 1, EAGLE 2, GIZMO 3, SIMBA 4, MUFASA 5, SWIFTEAGLE 6 + +Particle_search_type=2 #search all particles, see allvars for other types +Baryon_searchflag=0 #if 1 search for baryons separately using phase-space search when identifying substructures, 2 allows special treatment in field FOF linking and phase-space substructure search, 0 treat the same as dark matter particles +Search_for_substructure=0 #if 0, end search once field objects are found +FoF_Field_search_type=5 #5 3DFOF search for field halos, 4 for 6DFOF clean up of field halos, 3 for 6DFOF with velocity scale distinct for each halo +Unbind_flag=0 #run unbinding +Halo_core_search=0 +Significance_level=1.0 #how significant a substructure is relative to Poisson noise. Values >= 1 are fine. + +################################ +# unit options, should always be provided +################################ + +# This is only for i/o. Specifies what units the code was running in. +# These should be set to whatever internal units we use. +# They have no impact on the way the code runs. +Length_unit_to_kpc=1000. #conversion of output length units to kpc +Velocity_to_kms=1.0 #conversion of output velocity units to km/s +Mass_to_solarmass=1e+10 #conversion of output mass units to solar masses + +# units conversion from input to desired internal unit. +# These should be set to 1 unless a conversion is expected. +Length_unit=1.0 #default length unit +Velocity_unit=1.0 #default velocity unit +Mass_unit=1.0 #default mass unit + +# These are ignored when running within SWIFT. +# When using standalone code, G and H must match the value used in the run. +Gravity=4.300927e+01 # In internal units (here 10^10 Msun, km/s, Mpc) +Hubble_unit=100.0 # This is H0 / h in internal units. + +################################ +#search related options +################################ + +#how to search a simulation +# searches for separate 6dfof cores in field haloes, and then more than just flags halo as merging, assigns particles to each merging "halo". 2 is full separation, 1 is flagging, 0 is off +#also useful for zoom simulations or simulations of individual objects, setting this flag means no field structure search is run +Singlehalo_search=0 #if file is single halo in which one wishes to search for substructure +#additional option for field haloes +Keep_FOF=0 #if field 6DFOF search is done, allows to keep structures found in 3DFOF (can be interpreted as the inter halo stellar mass when only stellar search is used).\n + +#minimum size for structures +Minimum_size=256 #min 20 particles +Minimum_halo_size=-1 #if field halos have different minimum sizes, otherwise set to -1. + +#for field fof halo search +Halo_linking_length_factor=2.0 #factor by which Physical_linking_length is changed when searching for field halos. Typical values are ~2 when using iterative substructure search. +Halo_velocity_linking_length_factor=5.0 #for 6d fof halo search increase ellv from substructure search + +#for mean field estimates and local velocity density distribution funciton estimator related quantiites, rarely need to change this +Cell_fraction = 0.01 #fraction of field fof halo used to determine mean velocity distribution function. Typical values are ~0.005-0.02 +Grid_type=1 #normal entropy based grid, shouldn't have to change +Nsearch_velocity=32 #number of velocity neighbours used to calculate local velocity distribution function. Typial values are ~32 +Nsearch_physical=256 #numerof physical neighbours from which the nearest velocity neighbour set is based. Typical values are 128-512 + +#for substructure search, rarely ever need to change this +FoF_search_type=1 #default phase-space FOF search. Don't really need to change +Iterative_searchflag=1 #iterative substructure search, for substructure find initial candidate substructures with smaller linking lengths then expand search region +Outlier_threshold=2.5 #outlier threshold for a particle to be considered residing in substructure, that is how dynamically distinct a particle is. Typical values are >2 +Velocity_ratio=2.0 #ratio of speeds used in phase-space FOF +Velocity_opening_angle=0.10 #angle between velocities. 18 degrees here, typical values are ~10-30 +Physical_linking_length=0.10 #physical linking length. IF reading periodic volumes in gadget/hdf/ramses, in units of the effective inter-particle spacing. Otherwise in user defined code units. Here set to 0.10 as iterative flag one, values of 0.1-0.3 are typical. +Velocity_linking_length=0.20 #where scaled by structure dispersion + +#for iterative substructure search, rarely ever need to change this +Iterative_threshold_factor=1.0 #change in threshold value when using iterative search. Here no increase in threshold if iterative or not +Iterative_linking_length_factor=2.0 #increase in final linking final iterative substructure search will be sqrt(2.25)*this factor +Iterative_Vratio_factor=1.0 #change in Vratio when using iterative search. no change in vratio +Iterative_ThetaOp_factor=1.0 #change in velocity opening angle. no change in velocity opening angle + +#for checking for halo merger remnants, which are defined as large, well separated phase-space density maxima + +#if searching for cores, linking lengths. likely does not need to change much +Use_adaptive_core_search=2 #calculate dispersions in configuration & vel space to determine linking lengths +Halo_core_ellx_fac=1.0 #how linking lengths are changed when searching for local 6DFOF cores, +Halo_core_ellv_fac=1.0 #how velocity lengths based on dispersions are changed when searching for local 6DFOF cores +Halo_core_ncellfac=0.05 #fraction of total halo particle number setting min size of a local 6DFOF core +Halo_core_adaptive_sigma_fac=2.0 #used when running fully adaptive core search with phase-space tensors, specifies the width of the physical linking length in configuration space dispersion (think of this as how many sigma to include). Typically values are 2 +Halo_core_num_loops=3 #allows the core search to iterate, shrinking the velocity linking length to used till the number of cores identified decreases or this limit is reached. Allows apative search with larger linking length to be robust. Typically values are 3-5 +Halo_core_loop_ellv_fac=0.75 #Factor by which velocity linking length is decreased when running loops for core search. Typically values are 0.75 + +################################ +#Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) +################################ + +#unbinding related items + +Min_bound_mass_frac=0.2 #minimum bound mass fraction, not yet implemented +#alpha factor used to determine whether particle is "bound" alaph*T+W<0. For standard subhalo catalogues use >0.9 but if interested in tidal debris 0.2-0.5 +Allowed_kinetic_potential_ratio=0.2 +#run unbinding of field structures, aka halos +Bound_halos=0 +#simple Plummer softening length when calculating gravitational energy. If cosmological simulation with period, is fraction of interparticle spacing +Softening_length=0.00263 +#don't keep background potential when unbinding +Keep_background_potential=0 + +################################ +#Calculation of properties related options +################################ +#when calculating properties, for field objects calculate inclusive masses +Inclusive_halo_masses=1 #calculate inclusive masses +#ensures that output is comoving distances per little h +Comoving_units=0 + +################################ +#output related +################################ + +Write_group_array_file=0 #write a group array file +Separate_output_files=0 #separate output into field and substructure files similar to subfind +Binary_output=2 #binary output 1, ascii 0, and HDF 2 + +#halo ids are adjusted by this value * 1000000000000 (or 1000000 if code compiled with the LONGINTS option turned off) +#to ensure that halo ids are temporally unique. So if you had 100 snapshots, for snap 100 set this to 100 and 100*1000000000000 will +#be added to the halo id as set for this snapshot, so halo 1 becomes halo 100*1000000000000+1 and halo 1 of snap 0 would just have ID=1 +Snapshot_value=1 + +################################ +#other options +################################ +Verbose=0 #how talkative do you want the code to be, 0 not much, 1 a lot, 2 chatterbox diff --git a/examples/SantaBarbara_low/README b/examples/SantaBarbara_low/README new file mode 100644 index 0000000000000000000000000000000000000000..f86f1a4a4e1d16c3f4011c9e3ed8f35f643bd47e --- /dev/null +++ b/examples/SantaBarbara_low/README @@ -0,0 +1,21 @@ +Initital conditions for the Santa-Barbara cluster comparison project. +These have been regenerated from the orinigal Frenk et al. 1999 paper. + +The cosmology is Omega_m = 1, Omega_b = 0.1, h = 0.5 and sigma_8 = 0.9. + +The ICs are 128^3 particles in a 64^3 Mpc^3 volume. This is about 10x +higher resolution than in the original paper. The ICs have been +created for Gadget and the positions and box size are hence expressed +in h-full units (e.g. box size of 32 / h Mpc). Similarly, the peculiar +velocitites contain an extra sqrt(a) factor. + +We will use SWIFT to cancel the h- and a-factors from the ICs. Gas +particles will be generated at startup. + +MD5 check-sum of the ICS: +1a1600b41002789b6057b1fa6333f3f0 SantaBarbara_128.hdf5 + +You can use the script run_velociraptor.sh to also run a basic 3D FoF +with VELOCIraptor on your output data. You will need to set the +VELOCIRAPTOR_PATH environment variable to tell us where the stf-gas +binary lives. diff --git a/examples/SantaBarbara_low/getIC.sh b/examples/SantaBarbara_low/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..759cef50dcfc346b389b1400054fe38358793fdd --- /dev/null +++ b/examples/SantaBarbara_low/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/SantaBarbara_128.hdf5 diff --git a/examples/SantaBarbara_low/run.sh b/examples/SantaBarbara_low/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..206ba7bad03e0fff12e683ee10bbe2434a73a500 --- /dev/null +++ b/examples/SantaBarbara_low/run.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# Run SWIFT +../swift --cosmology --hydro --self-gravity --threads=28 santa_barbara.yml + diff --git a/examples/SantaBarbara_low/santa_barbara.yml b/examples/SantaBarbara_low/santa_barbara.yml new file mode 100644 index 0000000000000000000000000000000000000000..0e3c66b1a1c1e04bf6fad30e806327b83f03737e --- /dev/null +++ b/examples/SantaBarbara_low/santa_barbara.yml @@ -0,0 +1,60 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun in grams + UnitLength_in_cgs: 3.08567758e24 # Mpc in centimeters + UnitVelocity_in_cgs: 1e5 # 1 km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Cosmological parameters +Cosmology: + h: 0.5 + a_begin: 0.047619048 # z_ini = 20 + a_end: 1.0 # z_end = 0 + Omega_m: 1.0 + Omega_lambda: 0.0 + Omega_b: 0.1 + +# Parameters governing the time integration +TimeIntegration: + dt_max: 0.01 + dt_min: 1e-10 + +Scheduler: + max_top_level_cells: 16 + cell_split_size: 100 + +# Parameters governing the snapshots +Snapshots: + basename: santabarbara_low + scale_factor_first: 0.05 + delta_time: 1.02 + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1.02 + scale_factor_first: 0.05 + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 + theta: 0.5 + comoving_softening: 0.02 # 20 kpc = 1/25 mean inter-particle separation + max_physical_softening: 0.00526 # 20 ckpc = 5.26 pkpc at z=2.8 (EAGLE-like evolution of softening). + mesh_side_length: 64 + +# Parameters of the hydro scheme +SPH: + resolution_eta: 1.2348 # "48 Ngb" with the cubic spline kernel + CFL_condition: 0.1 + initial_temperature: 1200. # (1 + z_ini)^2 * 2.72K + minimal_temperature: 100. + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./SantaBarbara_128.hdf5 + periodic: 1 + cleanup_h_factors: 1 # ICs were generated for Gadget, we need to get rid of h-factors + cleanup_velocity_factors: 1 # ICs were generated for Gadget, we need to get rid of sqrt(a) factors in the velocity + generate_gas_in_ics: 1 # Generate gas particles from the DM-only ICs + cleanup_smoothing_lengths: 1 # Since we generate gas, make use of the (expensive) cleaning-up procedure. \ No newline at end of file diff --git a/examples/SedovBlast_1D/makeIC.py b/examples/SedovBlast_1D/makeIC.py index 7177f3a7670aa054e3d7341a11a7359b3d855837..28b9c4bfd69395b94628bda3cfc3e59166460c79 100644 --- a/examples/SedovBlast_1D/makeIC.py +++ b/examples/SedovBlast_1D/makeIC.py @@ -72,10 +72,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 1 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SedovBlast_1D/run.sh b/examples/SedovBlast_1D/run.sh index 4b9a84f069673bd6def3b96faec71b9d4fdd0dda..ba479214961c5957a2b19d6aa118e0f0e7ee0f63 100755 --- a/examples/SedovBlast_1D/run.sh +++ b/examples/SedovBlast_1D/run.sh @@ -8,7 +8,7 @@ then fi # Run SWIFT -../swift -s -t 1 sedov.yml 2>&1 | tee output.log +../swift --hydro --threads=1 sedov.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 5 diff --git a/examples/SedovBlast_1D/sedov.yml b/examples/SedovBlast_1D/sedov.yml index 5ef105b06c23ba577129f29a817c058457e7387f..b4912a95e797440dc6eb0c9f48806a5954adbc41 100644 --- a/examples/SedovBlast_1D/sedov.yml +++ b/examples/SedovBlast_1D/sedov.yml @@ -31,4 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./sedov.hdf5 # The file to read - + periodic: 1 diff --git a/examples/SedovBlast_2D/makeIC.py b/examples/SedovBlast_2D/makeIC.py index 0e83c7b19b9ac9bd69e20950a64e8a49dd8d0df9..cd1e433c104fd013a71c5a501c166194a7f3f50f 100644 --- a/examples/SedovBlast_2D/makeIC.py +++ b/examples/SedovBlast_2D/makeIC.py @@ -72,10 +72,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SedovBlast_2D/run.sh b/examples/SedovBlast_2D/run.sh index a32c8f0d6f3116d5486fe1bd086bf8df49d06020..b481d4555241c17015452a2139c04c541ccf1cdc 100755 --- a/examples/SedovBlast_2D/run.sh +++ b/examples/SedovBlast_2D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 1 sedov.yml 2>&1 | tee output.log +../swift --hydro --threads=1 sedov.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 5 diff --git a/examples/SedovBlast_2D/sedov.yml b/examples/SedovBlast_2D/sedov.yml index 098ca7a0d6264f016727709723aafdfb1224d460..84177ece31ef98ec55c41513276c9c0158e69bcf 100644 --- a/examples/SedovBlast_2D/sedov.yml +++ b/examples/SedovBlast_2D/sedov.yml @@ -31,4 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./sedov.hdf5 # The file to read - + periodic: 1 diff --git a/examples/SedovBlast_3D/makeIC.py b/examples/SedovBlast_3D/makeIC.py index e1b743c6cdcd8dcc2f8da14d1d5589fb9ed111f0..30e0e31927db6343e58549bc9c7754bc274f51ce 100644 --- a/examples/SedovBlast_3D/makeIC.py +++ b/examples/SedovBlast_3D/makeIC.py @@ -72,10 +72,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SedovBlast_3D/run.sh b/examples/SedovBlast_3D/run.sh index 00d5e5b91c31e64f824a3d2a28c8e1a126684a74..88aec36a7b96b5fd2a7fde41f0e0c9dc7185f1e8 100755 --- a/examples/SedovBlast_3D/run.sh +++ b/examples/SedovBlast_3D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 4 sedov.yml 2>&1 | tee output.log +../swift --hydro --threads=4 sedov.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 5 diff --git a/examples/SedovBlast_3D/sedov.yml b/examples/SedovBlast_3D/sedov.yml index 75849e33c0c644a18cd7357f901699d0d682c160..6cf5b02427b8004787b646e6bcdd4bacaa25bc06 100644 --- a/examples/SedovBlast_3D/sedov.yml +++ b/examples/SedovBlast_3D/sedov.yml @@ -32,5 +32,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./sedov.hdf5 + periodic: 1 smoothing_length_scaling: 3.33 - diff --git a/examples/SineWavePotential_1D/makeIC.py b/examples/SineWavePotential_1D/makeIC.py index afbf1bc0fa47a27677cb9c5645d439432bd9fd9a..39a78393650c7a8c0c01814fa10f514cc277e685 100644 --- a/examples/SineWavePotential_1D/makeIC.py +++ b/examples/SineWavePotential_1D/makeIC.py @@ -74,10 +74,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 1 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SineWavePotential_1D/run.sh b/examples/SineWavePotential_1D/run.sh index 077cf1c0cc64ef7a85cfd0e67f8f490b0de4ba37..a725a88a175ff39c20b958c07f2e7e84e388d417 100755 --- a/examples/SineWavePotential_1D/run.sh +++ b/examples/SineWavePotential_1D/run.sh @@ -6,7 +6,7 @@ then python makeIC.py fi -../swift -g -s -t 2 sineWavePotential.yml 2>&1 | tee output.log +../swift --external-gravity --hydro --threads=2 sineWavePotential.yml 2>&1 | tee output.log for f in sineWavePotential_*.hdf5 do diff --git a/examples/SineWavePotential_1D/sineWavePotential.yml b/examples/SineWavePotential_1D/sineWavePotential.yml index e6285785099f10902ea60b21334a0ad26c0593de..a21a0b5936ab0a62a7b1f29c56145bed79ba73c4 100644 --- a/examples/SineWavePotential_1D/sineWavePotential.yml +++ b/examples/SineWavePotential_1D/sineWavePotential.yml @@ -31,7 +31,8 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: sineWavePotential.hdf5 # The file to read - + periodic: 1 + # External potential parameters SineWavePotential: amplitude: 10. diff --git a/examples/SineWavePotential_2D/makeIC.py b/examples/SineWavePotential_2D/makeIC.py index 62ae89f8f52bff9c0db37cd537f286ab817da3fe..057760502e561b5ec5d98e716b79119e3637ef57 100644 --- a/examples/SineWavePotential_2D/makeIC.py +++ b/examples/SineWavePotential_2D/makeIC.py @@ -70,10 +70,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SineWavePotential_2D/run.sh b/examples/SineWavePotential_2D/run.sh index 077cf1c0cc64ef7a85cfd0e67f8f490b0de4ba37..a725a88a175ff39c20b958c07f2e7e84e388d417 100755 --- a/examples/SineWavePotential_2D/run.sh +++ b/examples/SineWavePotential_2D/run.sh @@ -6,7 +6,7 @@ then python makeIC.py fi -../swift -g -s -t 2 sineWavePotential.yml 2>&1 | tee output.log +../swift --external-gravity --hydro --threads=2 sineWavePotential.yml 2>&1 | tee output.log for f in sineWavePotential_*.hdf5 do diff --git a/examples/SineWavePotential_2D/sineWavePotential.yml b/examples/SineWavePotential_2D/sineWavePotential.yml index 9107652f65c343d68fc92e699d45710265d65308..63d575e7e2486cf4428bb8b11e1ba16da6e08d99 100644 --- a/examples/SineWavePotential_2D/sineWavePotential.yml +++ b/examples/SineWavePotential_2D/sineWavePotential.yml @@ -31,7 +31,8 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: sineWavePotential.hdf5 # The file to read - + periodic: 1 + # External potential parameters SineWavePotential: amplitude: 10. diff --git a/examples/SineWavePotential_3D/makeIC.py b/examples/SineWavePotential_3D/makeIC.py index 4833ec1b055e27b63751136f0491e972fb9e492a..a4f39238ba40bf6769e0fb44fe8da706730fe45b 100644 --- a/examples/SineWavePotential_3D/makeIC.py +++ b/examples/SineWavePotential_3D/makeIC.py @@ -81,10 +81,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SineWavePotential_3D/run.sh b/examples/SineWavePotential_3D/run.sh index 077cf1c0cc64ef7a85cfd0e67f8f490b0de4ba37..a725a88a175ff39c20b958c07f2e7e84e388d417 100755 --- a/examples/SineWavePotential_3D/run.sh +++ b/examples/SineWavePotential_3D/run.sh @@ -6,7 +6,7 @@ then python makeIC.py fi -../swift -g -s -t 2 sineWavePotential.yml 2>&1 | tee output.log +../swift --external-gravity --hydro --threads=2 sineWavePotential.yml 2>&1 | tee output.log for f in sineWavePotential_*.hdf5 do diff --git a/examples/SineWavePotential_3D/sineWavePotential.yml b/examples/SineWavePotential_3D/sineWavePotential.yml index 8a49d8bc40eb662d62b2b6550b70fe380a7564f5..5b91feae0ecf8ad2f4f257374900a01f031acff1 100644 --- a/examples/SineWavePotential_3D/sineWavePotential.yml +++ b/examples/SineWavePotential_3D/sineWavePotential.yml @@ -31,7 +31,8 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: sineWavePotential.hdf5 # The file to read - + periodic: 1 + # External potential parameters SineWavePotential: amplitude: 10. diff --git a/examples/SmallCosmoVolume/README b/examples/SmallCosmoVolume/README index 68c137aee30c08bb476b760c75dceaa5e1ede87e..a0abad5f814f87133dccc31d414bdc546609df88 100644 --- a/examples/SmallCosmoVolume/README +++ b/examples/SmallCosmoVolume/README @@ -4,6 +4,11 @@ We use a softening length of 1/25th of the mean inter-particle separation. The ICs have been generated to run with Gadget-2 so we need to switch on the options to cancel the h-factors and a-factors at reading time. +We generate gas from the ICs using SWIFT's internal mechanism and set the +temperature to the expected gas temperature at this redshift. + +The 'plotTempEvolution.py' plots the temperature evolution of the gas +in the simulated volume. MD5 checksum of the ICs: -2a9c603ffb1f6d29f3d98a3ecb9d3238 small_cosmo_volume.hdf5 +08736c3101fd738e22f5159f78e6022b small_cosmo_volume.hdf5 diff --git a/examples/SmallCosmoVolume/plotTempEvolution.py b/examples/SmallCosmoVolume/plotTempEvolution.py new file mode 100644 index 0000000000000000000000000000000000000000..aa6c5df5fe5ff5c7d0944a45bb11344f70c57844 --- /dev/null +++ b/examples/SmallCosmoVolume/plotTempEvolution.py @@ -0,0 +1,182 @@ +################################################################################ +# This file is part of SWIFT. +# Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ + +# Computes the temperature evolution of the gas in a cosmological box + +# Physical constants needed for internal energy to temperature conversion +k_in_J_K = 1.38064852e-23 +mH_in_kg = 1.6737236e-27 + +# Number of snapshots generated +n_snapshots = 200 + +import matplotlib +matplotlib.use("Agg") +from pylab import * +import h5py +import os.path + +# Plot parameters +params = {'axes.labelsize': 10, +'axes.titlesize': 10, +'font.size': 9, +'legend.fontsize': 9, +'xtick.labelsize': 10, +'ytick.labelsize': 10, +'text.usetex': True, + 'figure.figsize' : (3.15,3.15), +'figure.subplot.left' : 0.14, +'figure.subplot.right' : 0.99, +'figure.subplot.bottom' : 0.12, +'figure.subplot.top' : 0.99, +'figure.subplot.wspace' : 0.15, +'figure.subplot.hspace' : 0.12, +'lines.markersize' : 6, +'lines.linewidth' : 2., +'text.latex.unicode': True +} +rcParams.update(params) +rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + +# Read the simulation data +sim = h5py.File("snap_0000.hdf5", "r") +boxSize = sim["/Header"].attrs["BoxSize"][0] +time = sim["/Header"].attrs["Time"][0] +scheme = sim["/HydroScheme"].attrs["Scheme"][0] +kernel = sim["/HydroScheme"].attrs["Kernel function"][0] +neighbours = sim["/HydroScheme"].attrs["Kernel target N_ngb"][0] +eta = sim["/HydroScheme"].attrs["Kernel eta"][0] +alpha = sim["/HydroScheme"].attrs["Alpha viscosity"][0] +H_mass_fraction = sim["/HydroScheme"].attrs["Hydrogen mass fraction"][0] +H_transition_temp = sim["/HydroScheme"].attrs["Hydrogen ionization transition temperature"][0] +T_initial = sim["/HydroScheme"].attrs["Initial temperature"][0] +T_minimal = sim["/HydroScheme"].attrs["Minimal temperature"][0] +git = sim["Code"].attrs["Git Revision"] + +# Cosmological parameters +H_0 = sim["/Cosmology"].attrs["H0 [internal units]"][0] +gas_gamma = sim["/HydroScheme"].attrs["Adiabatic index"][0] + +unit_length_in_cgs = sim["/Units"].attrs["Unit length in cgs (U_L)"] +unit_mass_in_cgs = sim["/Units"].attrs["Unit mass in cgs (U_M)"] +unit_time_in_cgs = sim["/Units"].attrs["Unit time in cgs (U_t)"] + +unit_length_in_si = 0.01 * unit_length_in_cgs +unit_mass_in_si = 0.001 * unit_mass_in_cgs +unit_time_in_si = unit_time_in_cgs + +# Primoridal ean molecular weight as a function of temperature +def mu(T, H_frac=H_mass_fraction, T_trans=H_transition_temp): + if T > T_trans: + return 4. / (8. - 5. * (1. - H_frac)) + else: + return 4. / (1. + 3. * H_frac) + +# Temperature of some primoridal gas with a given internal energy +def T(u, H_frac=H_mass_fraction, T_trans=H_transition_temp): + T_over_mu = (gas_gamma - 1.) * u * mH_in_kg / k_in_J_K + ret = np.ones(np.size(u)) * T_trans + + # Enough energy to be ionized? + mask_ionized = (T_over_mu > (T_trans+1) / mu(T_trans+1, H_frac, T_trans)) + if np.sum(mask_ionized) > 0: + ret[mask_ionized] = T_over_mu[mask_ionized] * mu(T_trans*10, H_frac, T_trans) + + # Neutral gas? + mask_neutral = (T_over_mu < (T_trans-1) / mu((T_trans-1), H_frac, T_trans)) + if np.sum(mask_neutral) > 0: + ret[mask_neutral] = T_over_mu[mask_neutral] * mu(0, H_frac, T_trans) + + return ret + + +z = np.zeros(n_snapshots) +a = np.zeros(n_snapshots) +T_mean = np.zeros(n_snapshots) +T_std = np.zeros(n_snapshots) +T_log_mean = np.zeros(n_snapshots) +T_log_std = np.zeros(n_snapshots) +T_median = np.zeros(n_snapshots) +T_min = np.zeros(n_snapshots) +T_max = np.zeros(n_snapshots) + +# Loop over all the snapshots +for i in range(n_snapshots): + sim = h5py.File("snap_%04d.hdf5"%i, "r") + + z[i] = sim["/Cosmology"].attrs["Redshift"][0] + a[i] = sim["/Cosmology"].attrs["Scale-factor"][0] + + u = sim["/PartType0/InternalEnergy"][:] + + # Compute the temperature + u *= (unit_length_in_si**2 / unit_time_in_si**2) + u /= a[i]**(3 * (gas_gamma - 1.)) + Temp = T(u) + + # Gather statistics + T_median[i] = np.median(Temp) + T_mean[i] = Temp.mean() + T_std[i] = Temp.std() + T_log_mean[i] = np.log10(Temp).mean() + T_log_std[i] = np.log10(Temp).std() + T_min[i] = Temp.min() + T_max[i] = Temp.max() + +# CMB evolution +a_evol = np.logspace(-3, 0, 60) +T_cmb = (1. / a_evol)**2 * 2.72 + +# Plot the interesting quantities +figure() +subplot(111, xscale="log", yscale="log") + +fill_between(a, T_mean-T_std, T_mean+T_std, color='C0', alpha=0.1) +plot(a, T_max, ls='-.', color='C0', lw=1., label="${\\rm max}~T$") +plot(a, T_min, ls=':', color='C0', lw=1., label="${\\rm min}~T$") +plot(a, T_mean, color='C0', label="${\\rm mean}~T$", lw=1.5) +fill_between(a, 10**(T_log_mean-T_log_std), 10**(T_log_mean+T_log_std), color='C1', alpha=0.1) +plot(a, 10**T_log_mean, color='C1', label="${\\rm mean}~{\\rm log} T$", lw=1.5) +plot(a, T_median, color='C2', label="${\\rm median}~T$", lw=1.5) + +legend(loc="upper left", frameon=False, handlelength=1.5) + +# Expected lines +plot([1e-10, 1e10], [H_transition_temp, H_transition_temp], 'k--', lw=0.5, alpha=0.7) +text(2.5e-2, H_transition_temp*1.07, "$T_{\\rm HII\\rightarrow HI}$", va="bottom", alpha=0.7, fontsize=8) +plot([1e-10, 1e10], [T_minimal, T_minimal], 'k--', lw=0.5, alpha=0.7) +text(1e-2, T_minimal*0.8, "$T_{\\rm min}$", va="top", alpha=0.7, fontsize=8) +plot(a_evol, T_cmb, 'k--', lw=0.5, alpha=0.7) +text(a_evol[20], T_cmb[20]*0.55, "$(1+z)^2\\times T_{\\rm CMB,0}$", rotation=-34, alpha=0.7, fontsize=8, va="top", bbox=dict(facecolor='w', edgecolor='none', pad=1.0, alpha=0.9)) + + +redshift_ticks = np.array([0., 1., 2., 5., 10., 20., 50., 100.]) +redshift_labels = ["$0$", "$1$", "$2$", "$5$", "$10$", "$20$", "$50$", "$100$"] +a_ticks = 1. / (redshift_ticks + 1.) + +xticks(a_ticks, redshift_labels) +minorticks_off() + +xlabel("${\\rm Redshift}~z$", labelpad=0) +ylabel("${\\rm Temperature}~T~[{\\rm K}]$", labelpad=0) +xlim(9e-3, 1.1) +ylim(20, 2.5e7) + +savefig("Temperature_evolution.png", dpi=200) + diff --git a/examples/SmallCosmoVolume/run.sh b/examples/SmallCosmoVolume/run.sh index fe67706d7512d6f4ff1537ce008ce3a52a6ce6a6..12c98d1b636ef4ce324f861f6db236738785f856 100755 --- a/examples/SmallCosmoVolume/run.sh +++ b/examples/SmallCosmoVolume/run.sh @@ -7,5 +7,8 @@ then ./getIC.sh fi -../swift -c -G -t 8 small_cosmo_volume.yml 2>&1 | tee output.log +# Run SWIFT +../swift --cosmology --hydro --self-gravity --threads=8 small_cosmo_volume.yml 2>&1 | tee output.log +# Plot the temperature evolution +python plotTempEvolution.py diff --git a/examples/SmallCosmoVolume/small_cosmo_volume.yml b/examples/SmallCosmoVolume/small_cosmo_volume.yml index 32ec15db6be35fed4eb0c0168f52f0ba919158ea..a6ce1f28198a99422ea1c80178fc8000b66d777e 100644 --- a/examples/SmallCosmoVolume/small_cosmo_volume.yml +++ b/examples/SmallCosmoVolume/small_cosmo_volume.yml @@ -1,29 +1,18 @@ # Define the system of units to use internally. InternalUnitSystem: - UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun in grams - UnitLength_in_cgs: 3.08567758e24 # Mpc in centimeters - UnitVelocity_in_cgs: 1e5 # km/s in centimeters per second + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc + UnitVelocity_in_cgs: 1e5 # 1 km/s UnitCurrent_in_cgs: 1 # Amperes UnitTemp_in_cgs: 1 # Kelvin -# Structure finding options -StructureFinding: - config_file_name: stf_input_6dfof_dmonly_sub.cfg # Name of the STF config file. - basename: ./stf # Common part of the name of output files. - output_time_format: 0 # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time). - scale_factor_first: 0.92 # Scale-factor of the first snaphot (cosmological run) - time_first: 0.01 # Time of the first structure finding output (in internal units). - delta_step: 1000 # Time difference between consecutive structure finding outputs (in internal units) in simulation steps. - delta_time: 1.02 # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals. - -# WMAP9 cosmology -Cosmology: +Cosmology: # WMAP9 cosmology Omega_m: 0.276 Omega_lambda: 0.724 Omega_b: 0.0455 h: 0.703 - a_begin: 0.0196078 - a_end: 1.0 + a_begin: 0.019607843 # z_ini = 50. + a_end: 1.0 # z_end = 0. # Parameters governing the time integration TimeIntegration: @@ -34,10 +23,17 @@ TimeIntegration: Gravity: eta: 0.025 theta: 0.3 - comoving_softening: 0.08 - max_physical_softening: 0.08 - mesh_side_length: 32 - + comoving_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc + max_physical_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc + mesh_side_length: 64 + +# Parameters of the hydro scheme +SPH: + resolution_eta: 1.2348 # "48 Ngb" with the cubic spline kernel + CFL_condition: 0.1 + initial_temperature: 7075. # (1 + z_ini)^2 * 2.72K + minimal_temperature: 100. + # Parameters governing the snapshots Snapshots: basename: snap @@ -56,5 +52,8 @@ Scheduler: # Parameters related to the initial conditions InitialConditions: file_name: small_cosmo_volume.hdf5 + periodic: 1 cleanup_h_factors: 1 cleanup_velocity_factors: 1 + generate_gas_in_ics: 1 # Generate gas particles from the DM-only ICs + cleanup_smoothing_lengths: 1 # Since we generate gas, make use of the (expensive) cleaning-up procedure. diff --git a/examples/SmallCosmoVolume_DM/Gadget2/README b/examples/SmallCosmoVolume_DM/Gadget2/README new file mode 100644 index 0000000000000000000000000000000000000000..8063a5da1e68b608759d35373e6006d17bf5047e --- /dev/null +++ b/examples/SmallCosmoVolume_DM/Gadget2/README @@ -0,0 +1,6 @@ +This parameter file can be used to run the exact same example +with the Gadget-2 code. + +The Gadget code has to be compiled with at least the following options: + - PERIODIC + - HAVE_HDF5 diff --git a/examples/SmallCosmoVolume_DM/Gadget2/small_cosmo_volume_dm.param b/examples/SmallCosmoVolume_DM/Gadget2/small_cosmo_volume_dm.param new file mode 100644 index 0000000000000000000000000000000000000000..4eaaab4cb124db898928c75e7a7a03bb850c5a9f --- /dev/null +++ b/examples/SmallCosmoVolume_DM/Gadget2/small_cosmo_volume_dm.param @@ -0,0 +1,137 @@ + +% System of units + +UnitLength_in_cm 3.08567758e24 % 1.0 Mpc +UnitMass_in_g 1.98848e43 % 1.0e10 solar masses +UnitVelocity_in_cm_per_s 1e5 % 1 km/sec +GravityConstantInternal 4.300927e+01 % Same value as SWIFT + +% Relevant files +InitCondFile small_cosmo_volume +OutputDir data/ + +EnergyFile energy.txt +InfoFile info.txt +TimingsFile timings.txt +CpuFile cpu.txt + +RestartFile restart +SnapshotFileBase box + +OutputListFilename dummy + +% CPU time -limit + +TimeLimitCPU 360000 % = 10 hours +ResubmitOn 0 +ResubmitCommand my-scriptfile + + +% Code options + +ICFormat 3 +SnapFormat 3 +ComovingIntegrationOn 1 + +TypeOfTimestepCriterion 0 +OutputListOn 0 +PeriodicBoundariesOn 1 + +% Caracteristics of run + +TimeBegin 0.019607843 % z = 50. +TimeMax 1. % z = 0. + +Omega0 0.276 +OmegaLambda 0.724 +OmegaBaryon 0.0455 +HubbleParam 0.703 +BoxSize 100. % Mpc / h + +% Output frequency + +TimeBetSnapshot 1.02 +TimeOfFirstSnapshot 0.02 + +CpuTimeBetRestartFile 36000.0 ; here in seconds +TimeBetStatistics 0.02 + +NumFilesPerSnapshot 1 +NumFilesWrittenInParallel 1 + +% Accuracy of time integration + +ErrTolIntAccuracy 0.025 +MaxRMSDisplacementFac 0.25 +CourantFac 0.1 +MaxSizeTimestep 0.01 +MinSizeTimestep 1e-6 + + +% Tree algorithm, force accuracy, domain update frequency + +ErrTolTheta 0.3 +TypeOfOpeningCriterion 1 +ErrTolForceAcc 0.005 +TreeDomainUpdateFrequency 0.01 + +% Further parameters of SPH + +DesNumNgb 48 +MaxNumNgbDeviation 1. +ArtBulkViscConst 0.8 +InitGasTemp 0. +MinGasTemp 0. + +% Memory allocation + +PartAllocFactor 1.6 +TreeAllocFactor 0.8 +BufferSize 30 + +% Softening lengths + +MinGasHsmlFractional 0.001 + +SofteningGas 0 +SofteningHalo 0.0625 # 62.5 kpc / h = 1/25 of mean inter-particle separation +SofteningDisk 0 +SofteningBulge 0 +SofteningStars 0 +SofteningBndry 0 + +SofteningGasMaxPhys 0 +SofteningHaloMaxPhys 0.0625 # 62.5 kpc / h = 1/25 of mean inter-particle separation +SofteningDiskMaxPhys 0 +SofteningBulgeMaxPhys 0 +SofteningStarsMaxPhys 0 +SofteningBndryMaxPhys 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/SmallCosmoVolume_DM/README b/examples/SmallCosmoVolume_DM/README new file mode 100644 index 0000000000000000000000000000000000000000..14a289cf4a1d638c18f421f23ca8bcf0ced68d1b --- /dev/null +++ b/examples/SmallCosmoVolume_DM/README @@ -0,0 +1,9 @@ +Small LCDM cosmological simulation generated by C. Power. Cosmology +is WMAP9 and the box is 100Mpc/h in size with 64^3 particles. +We use a softening length of 1/25th of the mean inter-particle separation. + +The ICs have been generated to run with Gadget-2 so we need to switch +on the options to cancel the h-factors and a-factors at reading time. + +MD5 checksum of the ICs: +08736c3101fd738e22f5159f78e6022b small_cosmo_volume.hdf5 diff --git a/examples/SmallCosmoVolume_DM/getIC.sh b/examples/SmallCosmoVolume_DM/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..3b8136cc5aca00a25792655c6c505cfeeb0f2bc9 --- /dev/null +++ b/examples/SmallCosmoVolume_DM/getIC.sh @@ -0,0 +1,3 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/small_cosmo_volume.hdf5 + diff --git a/examples/SmallCosmoVolume_DM/run.sh b/examples/SmallCosmoVolume_DM/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..9c002f4643ad3cad073e6c4a39066bc20cf7b52a --- /dev/null +++ b/examples/SmallCosmoVolume_DM/run.sh @@ -0,0 +1,12 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e small_cosmo_volume.hdf5 ] +then + echo "Fetching initial conditions for the small cosmological volume example..." + ./getIC.sh +fi + +# Run SWIFT +../swift --cosmology --self-gravity --threads=8 small_cosmo_volume_dm.yml 2>&1 | tee output.log + diff --git a/examples/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml b/examples/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml new file mode 100644 index 0000000000000000000000000000000000000000..910137edc442c994a9f31a8c62e16818ca4ae97d --- /dev/null +++ b/examples/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml @@ -0,0 +1,58 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc + UnitVelocity_in_cgs: 1e5 # 1 km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Structure finding options +StructureFinding: + config_file_name: stf_input_6dfof_dmonly_sub.cfg + basename: ./stf + output_time_format: 1 + scale_factor_first: 0.02 + delta_time: 1.02 + +Cosmology: # WMAP9 cosmology + Omega_m: 0.276 + Omega_lambda: 0.724 + Omega_b: 0.0455 + h: 0.703 + a_begin: 0.019607843 # z_ini = 50. + a_end: 1.0 # z_end = 0. + +# Parameters governing the time integration +TimeIntegration: + dt_min: 1e-6 + dt_max: 1e-2 + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 + theta: 0.3 + comoving_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc + max_physical_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc + mesh_side_length: 64 + +# Parameters governing the snapshots +Snapshots: + basename: snap + delta_time: 1.02 + scale_factor_first: 0.02 + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1.02 + scale_factor_first: 0.02 + +Scheduler: + max_top_level_cells: 8 + cell_split_size: 50 + +# Parameters related to the initial conditions +InitialConditions: + file_name: small_cosmo_volume.hdf5 + periodic: 1 + cleanup_h_factors: 1 + cleanup_velocity_factors: 1 diff --git a/examples/SmallCosmoVolume/stf_input_6dfof_dmonly_sub.cfg b/examples/SmallCosmoVolume_DM/stf_input_6dfof_dmonly_sub.cfg similarity index 99% rename from examples/SmallCosmoVolume/stf_input_6dfof_dmonly_sub.cfg rename to examples/SmallCosmoVolume_DM/stf_input_6dfof_dmonly_sub.cfg index 872e0ad6f44d8092ce1da6ac030a949dc4dba5d5..7368e5654204ad600192eff3defdd5f96e986ce5 100644 --- a/examples/SmallCosmoVolume/stf_input_6dfof_dmonly_sub.cfg +++ b/examples/SmallCosmoVolume_DM/stf_input_6dfof_dmonly_sub.cfg @@ -104,7 +104,7 @@ Allowed_kinetic_potential_ratio=0.2 #run unbinding of field structures, aka halos Bound_halos=0 #simple Plummer softening length when calculating gravitational energy. If cosmological simulation with period, is fraction of interparticle spacing -Softening_length=0. +Softening_length=0.04 #don't keep background potential when unbinding Keep_background_potential=0 diff --git a/examples/SmallCosmoVolume_VELOCIraptor/README b/examples/SmallCosmoVolume_VELOCIraptor/README new file mode 100644 index 0000000000000000000000000000000000000000..76eab82c0c434ceab334f82be8bd52e0d2dd4d08 --- /dev/null +++ b/examples/SmallCosmoVolume_VELOCIraptor/README @@ -0,0 +1,16 @@ +Small LCDM cosmological simulation generated by C. Power. Cosmology +is WMAP9 and the box is 100Mpc/h in size with 64^3 particles. +We use a softening length of 1/25th of the mean inter-particle separation. + +The ICs have been generated to run with Gadget-2 so we need to switch +on the options to cancel the h-factors and a-factors at reading time. +We generate gas from the ICs using SWIFT's internal mechanism and set the +temperature to the expected gas temperature at this redshift. + +This example runs with Hydrodynamics and a halo finder, the halo finder +is run while running the simulation. At the end it is possible to +calculate the halo mass function of the halos in the simulated +volume, this is done by using haloevol.py. + +MD5 checksum of the ICs: +08736c3101fd738e22f5159f78e6022b small_cosmo_volume.hdf5 diff --git a/examples/SmallCosmoVolume_VELOCIraptor/getHMF.py b/examples/SmallCosmoVolume_VELOCIraptor/getHMF.py new file mode 100755 index 0000000000000000000000000000000000000000..e56df323b004dfcfcd2c75c427fa6f3ecbe37a29 --- /dev/null +++ b/examples/SmallCosmoVolume_VELOCIraptor/getHMF.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +############################################################################### +# This file is part of SWIFT. +# Copyright (c) 2018 Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ +import numpy as np +from hmf import MassFunction +import hmf +from astropy.cosmology import FlatLambdaCDM + + + +def getHMFz(z, H0=70.3, Om0=0.276, Ob0=0.0455, Tcmb0=2.725, Mmin=1e10, Mmax=1e15): + """ Fast function to call the HMF from hmf, this function only has + 7 variables and will return the dn/d(log10 M) and M array. + z: redshift + H0: Hubble constant + Om0: Matter density + Ob0: Baryon density + Tcmb0: CMB temperature at z=0 + Mmin: minimum mass (solar masses) + Mmax: Maximum mass (solar masses) + """ + new_model = FlatLambdaCDM(H0=H0, Om0=Om0, Ob0=Ob0, Tcmb0=Tcmb0) + hmff = MassFunction( + cosmo_model=new_model, + Mmax=np.log10(Mmax), + Mmin=np.log10(Mmin), + z=z, + hmf_model="ST", + ) + return hmff.m, hmff.dndlog10m + + +def getHMFztinker(z, H0=70.3, Om0=0.276, Ob0=0.0455, Tcmb0=2.725, Mmin=1e10, Mmax=1e15): + """ Fast function to call the HMF from hmf, this function only has + 6 variables and will return the dn/d(log10 M) and M array. + H0: Hubble constant + Om0: Matter density + Ob0: Baryon density + Tcmb0: CMB temperature at z=0 + Mmin: minimum mass (solar masses) + Mmax: Maximum mass (solar masses) + """ + new_model = FlatLambdaCDM(H0=H0, Om0=Om0, Ob0=Ob0, Tcmb0=Tcmb0) + hmff = MassFunction( + cosmo_model=new_model, Mmax=np.log10(Mmax), Mmin=np.log10(Mmin), z=z + ) + return hmff.m, hmff.dndlog10m + + diff --git a/examples/SmallCosmoVolume_VELOCIraptor/getIC.sh b/examples/SmallCosmoVolume_VELOCIraptor/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..3b8136cc5aca00a25792655c6c505cfeeb0f2bc9 --- /dev/null +++ b/examples/SmallCosmoVolume_VELOCIraptor/getIC.sh @@ -0,0 +1,3 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/small_cosmo_volume.hdf5 + diff --git a/examples/SmallCosmoVolume_VELOCIraptor/haloevol.py b/examples/SmallCosmoVolume_VELOCIraptor/haloevol.py new file mode 100755 index 0000000000000000000000000000000000000000..94e206cdf686ef5d2d3676d6fc36d6dfe8aea558 --- /dev/null +++ b/examples/SmallCosmoVolume_VELOCIraptor/haloevol.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +############################################################################### +# This file is part of SWIFT. +# Copyright (c) 2018 Folkert Nobels (nobels@strw.leidenuniv.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ +import numpy as np +import h5py +import matplotlib.pyplot as plt +from getHMF import getHMFz, getHMFztinker + +dlogm = 0.2 +bins = 10 ** (np.arange(12, 15.2, dlogm)) +V = 142.0 ** 3 + +itervalues = np.array([175, 185, 192, 198]) + +for j in itervalues: + # Load the data + g = h5py.File("stf_%04d.VELOCIraptor.properties" % j, "r") + mass = g["Mass_200crit"][:] * 1e10 # convert to the correct unit + binnedmass, massrange = np.histogram(mass, bins=bins) + + massnlarger = np.zeros(len(binnedmass)) + for i in range(0, len(massnlarger)): + massnlarger[i] = np.sum(binnedmass[i:]) + + f = h5py.File("snap_%04d.hdf5" % (j + 1)) + cosmo = f["Cosmology"] + redshift = cosmo.attrs["Redshift"][0] + a = cosmo.attrs["Scale-factor"][0] + + # Determine the HMF + errormassn = massnlarger ** 0.5 + numbden = massnlarger / V / a ** 3 + numbdenerr = errormassn / V / a ** 3 + massplot = (massrange[0:15] + massrange[1:16]) / 2 + dernumbden = -np.diff(numbden) / np.diff(np.log10(massplot)) + dererr = 2 ** 0.5 / dlogm * (numbdenerr[0:14] + numbdenerr[1:15]) / 2 + + plt.plot( + (massplot[0:14] + massplot[1:15]) / 2, dernumbden, label="SWIFT - SPH $64^3$" + ) + plt.fill_between( + (massplot[0:14] + massplot[1:15]) / 2, + dernumbden - dererr, + dernumbden + dererr, + alpha=0.4, + ) + plt.xscale("log") + plt.ylim(1e-6, 1e-1) + plt.xlim(10 ** 11, 10 ** 15.5) + + xplace = 10 ** 14.5 + plt.text(xplace, 10 ** -2.3, "$\Omega_m=0.276$") + plt.text(xplace, 10 ** -2.6, "$\Omega_b=0.0455$") + plt.text(xplace, 10 ** -2.9, "$\Omega_\Lambda=0.724$") + plt.text(xplace, 10 ** -3.2, "$h=0.703$") + plt.text(xplace, 10 ** -3.5, "$z=%2.2f$" % redshift) + + m, dndlogm = getHMFz(redshift) + plt.plot(m / 0.7, dndlogm * 0.7 ** 3, label="Sheth et al. 2001") + + m, dndlogm = getHMFztinker(redshift) + plt.plot(m / 0.7, dndlogm * 0.7 ** 3, label="Tinker et al. 2008") + + plt.xlabel("M${}_{200}$ ($M_\odot$)") + plt.ylabel("dn/d($\log$10(M${}_{200}$) ($Mpc^{-3}$)") + plt.axvline(x=32 * 3.5e11, linestyle="--", color="k") + plt.yscale("log") + plt.legend() + plt.savefig("./HMF_%04d.png" % j) + plt.close() diff --git a/examples/SmallCosmoVolume_VELOCIraptor/run.sh b/examples/SmallCosmoVolume_VELOCIraptor/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..cde550f0a52186b8284427606ff3b71aa718cd34 --- /dev/null +++ b/examples/SmallCosmoVolume_VELOCIraptor/run.sh @@ -0,0 +1,18 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e small_cosmo_volume.hdf5 ] +then + echo "Fetching initial conditions for the small cosmological volume example..." + ./getIC.sh +fi + +# Run SWIFT +../swift --cosmology --hydro --self-gravity --velociraptor --threads=8 small_cosmo_volume.yml 2>&1 | tee output.log + +echo "Make a plot of the HMF" +if command -v python3 &>/dev/null; then + python3 haloevol.py +else + python haloevol.py +fi diff --git a/examples/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml b/examples/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8157a7a0e0065b1f58667fb8437b9e3883eda75 --- /dev/null +++ b/examples/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml @@ -0,0 +1,67 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc + UnitVelocity_in_cgs: 1e5 # 1 km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +Cosmology: # WMAP9 cosmology + Omega_m: 0.276 + Omega_lambda: 0.724 + Omega_b: 0.0455 + h: 0.703 + a_begin: 0.019607843 # z_ini = 50. + a_end: 1.0 # z_end = 0. + +# Parameters governing the time integration +TimeIntegration: + dt_min: 1e-6 + dt_max: 1e-2 + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 + theta: 0.3 + comoving_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc + max_physical_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc + mesh_side_length: 64 + +# Parameters of the hydro scheme +SPH: + resolution_eta: 1.2348 # "48 Ngb" with the cubic spline kernel + CFL_condition: 0.1 + initial_temperature: 7075. # (1 + z_ini)^2 * 2.72K + minimal_temperature: 100. + +# Parameters governing the snapshots +Snapshots: + basename: snap + delta_time: 1.02 + scale_factor_first: 0.02 + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1.02 + scale_factor_first: 0.02 + +Scheduler: + max_top_level_cells: 8 + cell_split_size: 50 + +# Parameters related to the initial conditions +InitialConditions: + file_name: small_cosmo_volume.hdf5 + cleanup_h_factors: 1 + cleanup_velocity_factors: 1 + generate_gas_in_ics: 1 # Generate gas particles from the DM-only ICs + cleanup_smoothing_lengths: 1 # Since we generate gas, make use of the (expensive) cleaning-up procedure. + +# Structure finding options (requires velociraptor) +StructureFinding: + config_file_name: stfconfig_input.cfg + basename: ./stf + output_time_format: 1 + scale_factor_first: 0.02 + delta_time: 1.02 + diff --git a/examples/SmallCosmoVolume_VELOCIraptor/stfconfig_input.cfg b/examples/SmallCosmoVolume_VELOCIraptor/stfconfig_input.cfg new file mode 100644 index 0000000000000000000000000000000000000000..4306bae3d23aab924ce8fa3a5c50e839823fbc2f --- /dev/null +++ b/examples/SmallCosmoVolume_VELOCIraptor/stfconfig_input.cfg @@ -0,0 +1,165 @@ +#suggested configuration file for hydro run and subhalo (and galaxy ie: associated baryons) catalog + +################################ +#input related +################################ +#input is from a cosmological so can use parameters like box size, h, Omega_m to calculate length and density scales +Cosmological_input=1 + +#sets the total buffer size in bytes used to store temporary particle information +#of mpi read threads before they are broadcast to the appropriate waiting non-read threads +#if not set, default value is equivalent to 1e6 particles per mpi process, quite large +#but significantly minimises the number of send/receives +#in this example the buffer size is roughly that for a send/receive of 10000 particles +#for 100 mpi processes +MPI_particle_total_buf_size=100000000 + +#gadget input related +#NSPH_extra_blocks=0 #read extra sph blocks +#NStar_extra_blocks=0 #read extra star blocks +#NBH_extra_blocks=0 #read extra black hole blocks + +#HDF related input +#Set the HDF name convection, 0 is illustris, 1 is gadget x, 2 is Eagle, 3 is gizmo +HDF_name_convention=0 +#whether star particles are present in the input +Input_includes_star_particle=1 +#bhs present +Input_includes_bh_particle=1 +#no wind present +Input_includes_wind_particle=0 +#no tracers present +Input_includes_tracer_particle=0 +#no low res/extra dm particle types present +Input_includes_extradm_particle=0 + +################################ +#unit options, should always be provided +################################ +#EDIT THIS SECTION!!!! +#conversion of output length units to kpc +Length_unit_to_kpc=1.0 +#conversion of output velocity units to km/s +Velocity_to_kms=1.0 +#conversion of output mass units to solar masses +Mass_to_solarmass=1.0 +#units conversion from input input to desired internal unit +Length_unit=1.0 #default code unit, +Velocity_unit=1.0 #default velocity unit, +Mass_unit=1.0 #default mass unit, +Gravity=43.0211349 #for 1e10 Msun, km/s and Mpc +Hubble_unit=100.0 # assuming units are km/s and Mpc, then value of Hubble in km/s/Mpc + +################################ +#search related options +################################ + +#how to search a simulation +Particle_search_type=1 #search all particles, see allvars for other types +#for baryon search +Baryon_searchflag=2 #if 1 search for baryons separately using phase-space search when identifying substructures, 2 allows special treatment in field FOF linking and phase-space substructure search, 0 treat the same as dark matter particles +#for search for substruture +Search_for_substructure=1 #if 0, end search once field objects are found +#also useful for zoom simulations or simulations of individual objects, setting this flag means no field structure search is run +Singlehalo_search=0 #if file is single halo in which one wishes to search for substructure +#additional option for field haloes +Keep_FOF=0 #if field 6DFOF search is done, allows to keep structures found in 3DFOF (can be interpreted as the inter halo stellar mass when only stellar search is used).\n + +#minimum size for structures +Minimum_size=20 #min 20 particles +Minimum_halo_size=-1 #if field halos have different minimum sizes, otherwise set to -1. + +#for field fof halo search +FoF_Field_search_type=3 #5 3DFOF search for field halos, 4 for 6DFOF clean up of field halos, 3 for 6DFOF with velocity scale distinct for each halo +Halo_linking_length_factor=2.0 #factor by which Physical_linking_length is changed when searching for field halos. Typical values are ~2 when using iterative substructure search. +Halo_velocity_linking_length_factor=5.0 #for 6d fof halo search increase ellv from substructure search + +#for mean field estimates and local velocity density distribution funciton estimator related quantiites, rarely need to change this +Cell_fraction = 0.01 #fraction of field fof halo used to determine mean velocity distribution function. Typical values are ~0.005-0.02 +Grid_type=1 #normal entropy based grid, shouldn't have to change +Nsearch_velocity=32 #number of velocity neighbours used to calculate local velocity distribution function. Typial values are ~32 +Nsearch_physical=256 #numerof physical neighbours from which the nearest velocity neighbour set is based. Typical values are 128-512 + +#for substructure search, rarely ever need to change this +FoF_search_type=1 #default phase-space FOF search. Don't really need to change +Iterative_searchflag=1 #iterative substructure search, for substructure find initial candidate substructures with smaller linking lengths then expand search region +Outlier_threshold=2.5 #outlier threshold for a particle to be considered residing in substructure, that is how dynamically distinct a particle is. Typical values are >2 +Velocity_ratio=2.0 #ratio of speeds used in phase-space FOF +Velocity_opening_angle=0.10 #angle between velocities. 18 degrees here, typical values are ~10-30 +Physical_linking_length=0.10 #physical linking length. IF reading periodic volumes in gadget/hdf/ramses, in units of the effective inter-particle spacing. Otherwise in user defined code units. Here set to 0.10 as iterative flag one, values of 0.1-0.3 are typical. +Velocity_linking_length=0.20 #where scaled by structure dispersion +Significance_level=1.0 #how significant a substructure is relative to Poisson noise. Values >= 1 are fine. + +#for iterative substructure search, rarely ever need to change this +Iterative_threshold_factor=1.0 #change in threshold value when using iterative search. Here no increase in threshold if iterative or not +Iterative_linking_length_factor=2.0 #increase in final linking final iterative substructure search will be sqrt(2.25)*this factor +Iterative_Vratio_factor=1.0 #change in Vratio when using iterative search. no change in vratio +Iterative_ThetaOp_factor=1.0 #change in velocity opening angle. no change in velocity opening angle + +#for checking for halo merger remnants, which are defined as large, well separated phase-space density maxima +Halo_core_search=2 # searches for separate 6dfof cores in field haloes, and then more than just flags halo as merging, assigns particles to each merging "halo". 2 is full separation, 1 is flagging, 0 is off +#if searching for cores, linking lengths. likely does not need to change much +Use_adaptive_core_search=0 #calculate dispersions in configuration & vel space to determine linking lengths +Use_phase_tensor_core_growth=2 #use full stepped phase-space tensor assignment +Halo_core_ellx_fac=0.7 #how linking lengths are changed when searching for local 6DFOF cores, +Halo_core_ellv_fac=2.0 #how velocity lengths based on dispersions are changed when searching for local 6DFOF cores +Halo_core_ncellfac=0.005 #fraction of total halo particle number setting min size of a local 6DFOF core +Halo_core_num_loops=8 #number of loops to iteratively search for cores +Halo_core_loop_ellx_fac=0.75 #how much to change the configuration space linking per iteration +Halo_core_loop_ellv_fac=1.0 #how much to change the velocity space linking per iteration +Halo_core_loop_elln_fac=1.2 #how much to change the min number of particles per iteration +Halo_core_phase_significance=2.0 #how significant a core must be in terms of dispersions (sigma) significance + +################################ +#Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) +################################ + +#unbinding related items +Unbind_flag=1 #run unbinding +#alpha factor used to determine whether particle is "bound" alaph*T+W<0. For standard subhalo catalogues use >0.9 but if interested in tidal debris 0.2-0.5 +Allowed_kinetic_potential_ratio=0.95 +#run unbinding of field structures, aka halos +Bound_halos=0 +#simple Plummer softening length when calculating gravitational energy. If cosmological simulation with period, is fraction of interparticle spacing +Softening_length=0. +#don't keep background potential when unbinding +Keep_background_potential=0 + +################################ +#Cosmological parameters +#this is typically overwritten by information in the gadget/hdf header if those input file types are read +################################ +h_val=1.0 +Omega_m=0.3 +Omega_Lambda=0.7 +Critical_density=1.0 +Virial_density=200 #so-called virial overdensity value +Omega_b=0. #no baryons + +################################ +#Calculation of properties related options +################################ +#when calculating properties, for field objects calculate inclusive masses +Inclusive_halo_masses=1 #calculate inclusive masses +#ensures that output is comoving distances per little h +Comoving_units=0 + +################################ +#output related +################################ + +Write_group_array_file=0 #write a group array file +Separate_output_files=0 #separate output into field and substructure files similar to subfind +Binary_output=2 #binary output 1, ascii 0, and HDF 2 + +#halo ids are adjusted by this value * 1000000000000 (or 1000000 if code compiled with the LONGINTS option turned off) +#to ensure that halo ids are temporally unique. So if you had 100 snapshots, for snap 100 set this to 100 and 100*1000000000000 will +#be added to the halo id as set for this snapshot, so halo 1 becomes halo 100*1000000000000+1 and halo 1 of snap 0 would just have ID=1 + +#ALTER THIS as part of a script to get temporally unique ids +Snapshot_value=SNAP + +################################ +#other options +################################ +Verbose=0 #how talkative do you want the code to be, 0 not much, 1 a lot, 2 chatterbox diff --git a/examples/SmallCosmoVolume_cooling/README b/examples/SmallCosmoVolume_cooling/README new file mode 100644 index 0000000000000000000000000000000000000000..357250f79e5e2b5d5408b3685c95767838f4bb70 --- /dev/null +++ b/examples/SmallCosmoVolume_cooling/README @@ -0,0 +1,22 @@ +Small LCDM cosmological simulation generated by C. Power. Cosmology +is WMAP9 and the box is 100Mpc/h in size with 64^3 particles. +We use a softening length of 1/25th of the mean inter-particle separation. + +The ICs have been generated to run with Gadget-2 so we need to switch +on the options to cancel the h-factors and a-factors at reading time. +We generate gas from the ICs using SWIFT's internal mechanism and set the +temperature to the expected gas temperature at this redshift. + +This example runs with cooling switch on. Depending on the cooling +model chosen at the time SWIFT was configured, the answer will be +different. Interesting cases to compare to the no-cooling case are +a constant cooling rate or Compton cooling. + +The 'plotTempEvolution.py' script plots the temperature evolution of +the gas in the simulated volume. + +The 'plotRhoT.py script plots the phase-space diagram for a given +snapshot. + +MD5 checksum of the ICs: +08736c3101fd738e22f5159f78e6022b small_cosmo_volume.hdf5 diff --git a/examples/SmallCosmoVolume_cooling/getCoolingTables.sh b/examples/SmallCosmoVolume_cooling/getCoolingTables.sh new file mode 100755 index 0000000000000000000000000000000000000000..ecd581fd3dd44a13af1218d7dee6af72a25a324a --- /dev/null +++ b/examples/SmallCosmoVolume_cooling/getCoolingTables.sh @@ -0,0 +1,3 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/CoolingTables/EAGLE/coolingtables.tar.gz +tar -xvzf coolingtables.tar.gz diff --git a/examples/SmallCosmoVolume_cooling/getIC.sh b/examples/SmallCosmoVolume_cooling/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..3b8136cc5aca00a25792655c6c505cfeeb0f2bc9 --- /dev/null +++ b/examples/SmallCosmoVolume_cooling/getIC.sh @@ -0,0 +1,3 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/small_cosmo_volume.hdf5 + diff --git a/examples/SmallCosmoVolume_cooling/plotRhoT.py b/examples/SmallCosmoVolume_cooling/plotRhoT.py new file mode 100644 index 0000000000000000000000000000000000000000..4ba8ad66daca1d9614be8917a77407dd99209dea --- /dev/null +++ b/examples/SmallCosmoVolume_cooling/plotRhoT.py @@ -0,0 +1,163 @@ +################################################################################ +# This file is part of SWIFT. +# Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ + +# Computes the temperature evolution of the gas in a cosmological box + +# Physical constants needed for internal energy to temperature conversion +k_in_J_K = 1.38064852e-23 +mH_in_kg = 1.6737236e-27 + +import matplotlib + +matplotlib.use("Agg") +from pylab import * +import h5py +import os.path + +# Plot parameters +params = { + "axes.labelsize": 10, + "axes.titlesize": 10, + "font.size": 9, + "legend.fontsize": 9, + "xtick.labelsize": 10, + "ytick.labelsize": 10, + "text.usetex": True, + "figure.figsize": (3.15, 3.15), + "figure.subplot.left": 0.15, + "figure.subplot.right": 0.99, + "figure.subplot.bottom": 0.13, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.15, + "figure.subplot.hspace": 0.12, + "lines.markersize": 6, + "lines.linewidth": 2.0, + "text.latex.unicode": True, +} +rcParams.update(params) +rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]}) + +snap = int(sys.argv[1]) + +# Read the simulation data +sim = h5py.File("snap_%04d.hdf5" % snap, "r") +boxSize = sim["/Header"].attrs["BoxSize"][0] +time = sim["/Header"].attrs["Time"][0] +z = sim["/Cosmology"].attrs["Redshift"][0] +a = sim["/Cosmology"].attrs["Scale-factor"][0] +scheme = sim["/HydroScheme"].attrs["Scheme"][0] +kernel = sim["/HydroScheme"].attrs["Kernel function"][0] +neighbours = sim["/HydroScheme"].attrs["Kernel target N_ngb"][0] +eta = sim["/HydroScheme"].attrs["Kernel eta"][0] +alpha = sim["/HydroScheme"].attrs["Alpha viscosity"][0] +H_mass_fraction = sim["/HydroScheme"].attrs["Hydrogen mass fraction"][0] +H_transition_temp = sim["/HydroScheme"].attrs[ + "Hydrogen ionization transition temperature" +][0] +T_initial = sim["/HydroScheme"].attrs["Initial temperature"][0] +T_minimal = sim["/HydroScheme"].attrs["Minimal temperature"][0] +git = sim["Code"].attrs["Git Revision"] + +# Cosmological parameters +H_0 = sim["/Cosmology"].attrs["H0 [internal units]"][0] +gas_gamma = sim["/HydroScheme"].attrs["Adiabatic index"][0] + +unit_length_in_cgs = sim["/Units"].attrs["Unit length in cgs (U_L)"] +unit_mass_in_cgs = sim["/Units"].attrs["Unit mass in cgs (U_M)"] +unit_time_in_cgs = sim["/Units"].attrs["Unit time in cgs (U_t)"] + +unit_length_in_si = 0.01 * unit_length_in_cgs +unit_mass_in_si = 0.001 * unit_mass_in_cgs +unit_time_in_si = unit_time_in_cgs + +# Primoridal ean molecular weight as a function of temperature +def mu(T, H_frac=H_mass_fraction, T_trans=H_transition_temp): + if T > T_trans: + return 4.0 / (8.0 - 5.0 * (1.0 - H_frac)) + else: + return 4.0 / (1.0 + 3.0 * H_frac) + + +# Temperature of some primoridal gas with a given internal energy +def T(u, H_frac=H_mass_fraction, T_trans=H_transition_temp): + T_over_mu = (gas_gamma - 1.0) * u * mH_in_kg / k_in_J_K + ret = np.ones(np.size(u)) * T_trans + + # Enough energy to be ionized? + mask_ionized = T_over_mu > (T_trans + 1) / mu(T_trans + 1, H_frac, T_trans) + if np.sum(mask_ionized) > 0: + ret[mask_ionized] = T_over_mu[mask_ionized] * mu(T_trans * 10, H_frac, T_trans) + + # Neutral gas? + mask_neutral = T_over_mu < (T_trans - 1) / mu((T_trans - 1), H_frac, T_trans) + if np.sum(mask_neutral) > 0: + ret[mask_neutral] = T_over_mu[mask_neutral] * mu(0, H_frac, T_trans) + + return ret + + +rho = sim["/PartType0/Density"][:] +u = sim["/PartType0/InternalEnergy"][:] + +# Compute the temperature +u *= unit_length_in_si ** 2 / unit_time_in_si ** 2 +u /= a ** (3 * (gas_gamma - 1.0)) +Temp = T(u) + +# Compute the physical density +rho *= unit_mass_in_cgs / unit_length_in_cgs ** 3 +rho /= a ** 3 +rho /= mH_in_kg + +# Life is better in log-space +log_T = np.log10(Temp) +log_rho = np.log10(rho) + + +# Make a 2D histogram +log_rho_min = -6 +log_rho_max = 3 +log_T_min = 1 +log_T_max = 8 + +bins_x = np.linspace(log_rho_min, log_rho_max, 54) +bins_y = np.linspace(log_T_min, log_T_max, 54) +H, _, _ = histogram2d(log_rho, log_T, bins=[bins_x, bins_y], normed=True) + + +# Plot the interesting quantities +figure() + +pcolormesh(bins_x, bins_y, np.log10(H).T) + +text(-5, 8.0, "$z=%.2f$" % z) + +xticks( + [-5, -4, -3, -2, -1, 0, 1, 2, 3], + ["", "$10^{-4}$", "", "$10^{-2}$", "", "$10^0$", "", "$10^2$", ""], +) +yticks( + [2, 3, 4, 5, 6, 7, 8], ["$10^{2}$", "", "$10^{4}$", "", "$10^{6}$", "", "$10^8$"] +) +xlabel("${\\rm Density}~n_{\\rm H}~[{\\rm cm^{-3}}]$", labelpad=0) +ylabel("${\\rm Temperature}~T~[{\\rm K}]$", labelpad=2) +xlim(-5.2, 3.2) +ylim(1, 8.5) + +savefig("rhoT_%04d.png" % snap, dpi=200) diff --git a/examples/SmallCosmoVolume_cooling/plotTempEvolution.py b/examples/SmallCosmoVolume_cooling/plotTempEvolution.py new file mode 100644 index 0000000000000000000000000000000000000000..988ea36163203a50928cc7fd8f9c81f4d3a377ff --- /dev/null +++ b/examples/SmallCosmoVolume_cooling/plotTempEvolution.py @@ -0,0 +1,193 @@ +################################################################################ +# This file is part of SWIFT. +# Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ + +# Computes the temperature evolution of the gas in a cosmological box + +# Physical constants needed for internal energy to temperature conversion +k_in_J_K = 1.38064852e-23 +mH_in_kg = 1.6737236e-27 + +# Number of snapshots generated +n_snapshots = 200 + +import matplotlib +matplotlib.use("Agg") +from pylab import * +import h5py +import os.path + +# Plot parameters +params = {'axes.labelsize': 10, +'axes.titlesize': 10, +'font.size': 9, +'legend.fontsize': 9, +'xtick.labelsize': 10, +'ytick.labelsize': 10, +'text.usetex': True, + 'figure.figsize' : (3.15,3.15), +'figure.subplot.left' : 0.14, +'figure.subplot.right' : 0.99, +'figure.subplot.bottom' : 0.12, +'figure.subplot.top' : 0.99, +'figure.subplot.wspace' : 0.15, +'figure.subplot.hspace' : 0.12, +'lines.markersize' : 6, +'lines.linewidth' : 2., +'text.latex.unicode': True +} +rcParams.update(params) +rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + +# Read the simulation data +sim = h5py.File("snap_0000.hdf5", "r") +boxSize = sim["/Header"].attrs["BoxSize"][0] +time = sim["/Header"].attrs["Time"][0] +scheme = sim["/HydroScheme"].attrs["Scheme"][0] +kernel = sim["/HydroScheme"].attrs["Kernel function"][0] +neighbours = sim["/HydroScheme"].attrs["Kernel target N_ngb"][0] +eta = sim["/HydroScheme"].attrs["Kernel eta"][0] +alpha = sim["/HydroScheme"].attrs["Alpha viscosity"][0] +H_mass_fraction = sim["/HydroScheme"].attrs["Hydrogen mass fraction"][0] +H_transition_temp = sim["/HydroScheme"].attrs["Hydrogen ionization transition temperature"][0] +T_initial = sim["/HydroScheme"].attrs["Initial temperature"][0] +T_minimal = sim["/HydroScheme"].attrs["Minimal temperature"][0] +git = sim["Code"].attrs["Git Revision"] +cooling_model = sim["/SubgridScheme"].attrs["Cooling Model"] + +if cooling_model == "Constant Lambda": + Lambda = sim["/SubgridScheme"].attrs["Lambda/n_H^2 [cgs]"][0] + +# Cosmological parameters +H_0 = sim["/Cosmology"].attrs["H0 [internal units]"][0] +gas_gamma = sim["/HydroScheme"].attrs["Adiabatic index"][0] + +unit_length_in_cgs = sim["/Units"].attrs["Unit length in cgs (U_L)"] +unit_mass_in_cgs = sim["/Units"].attrs["Unit mass in cgs (U_M)"] +unit_time_in_cgs = sim["/Units"].attrs["Unit time in cgs (U_t)"] + +unit_length_in_si = 0.01 * unit_length_in_cgs +unit_mass_in_si = 0.001 * unit_mass_in_cgs +unit_time_in_si = unit_time_in_cgs + +# Primoridal mean molecular weight as a function of temperature +def mu(T, H_frac=H_mass_fraction, T_trans=H_transition_temp): + if T > T_trans: + return 4. / (8. - 5. * (1. - H_frac)) + else: + return 4. / (1. + 3. * H_frac) + +# Temperature of some primoridal gas with a given internal energy +def T(u, H_frac=H_mass_fraction, T_trans=H_transition_temp): + T_over_mu = (gas_gamma - 1.) * u * mH_in_kg / k_in_J_K + ret = np.ones(np.size(u)) * T_trans + + # Enough energy to be ionized? + mask_ionized = (T_over_mu > (T_trans+1) / mu(T_trans+1, H_frac, T_trans)) + if np.sum(mask_ionized) > 0: + ret[mask_ionized] = T_over_mu[mask_ionized] * mu(T_trans*10, H_frac, T_trans) + + # Neutral gas? + mask_neutral = (T_over_mu < (T_trans-1) / mu((T_trans-1), H_frac, T_trans)) + if np.sum(mask_neutral) > 0: + ret[mask_neutral] = T_over_mu[mask_neutral] * mu(0, H_frac, T_trans) + + return ret + +z = np.zeros(n_snapshots) +a = np.zeros(n_snapshots) +T_mean = np.zeros(n_snapshots) +T_std = np.zeros(n_snapshots) +T_log_mean = np.zeros(n_snapshots) +T_log_std = np.zeros(n_snapshots) +T_median = np.zeros(n_snapshots) +T_min = np.zeros(n_snapshots) +T_max = np.zeros(n_snapshots) + +# Loop over all the snapshots +for i in range(n_snapshots): + sim = h5py.File("snap_%04d.hdf5"%i, "r") + + z[i] = sim["/Cosmology"].attrs["Redshift"][0] + a[i] = sim["/Cosmology"].attrs["Scale-factor"][0] + + u = sim["/PartType0/InternalEnergy"][:] + + # Compute the temperature + u *= (unit_length_in_si**2 / unit_time_in_si**2) + u /= a[i]**(3 * (gas_gamma - 1.)) + Temp = T(u) + + # Gather statistics + T_median[i] = np.median(Temp) + T_mean[i] = Temp.mean() + T_std[i] = Temp.std() + T_log_mean[i] = np.log10(Temp).mean() + T_log_std[i] = np.log10(Temp).std() + T_min[i] = Temp.min() + T_max[i] = Temp.max() + +# CMB evolution +a_evol = np.logspace(-3, 0, 60) +T_cmb = (1. / a_evol)**2 * 2.72 + +# Plot the interesting quantities +figure() +subplot(111, xscale="log", yscale="log") + +fill_between(a, T_mean-T_std, T_mean+T_std, color='C0', alpha=0.1) +plot(a, T_max, ls='-.', color='C0', lw=1., label="${\\rm max}~T$") +plot(a, T_min, ls=':', color='C0', lw=1., label="${\\rm min}~T$") +plot(a, T_mean, color='C0', label="${\\rm mean}~T$", lw=1.5) +fill_between(a, 10**(T_log_mean-T_log_std), 10**(T_log_mean+T_log_std), color='C1', alpha=0.1) +plot(a, 10**T_log_mean, color='C1', label="${\\rm mean}~{\\rm log} T$", lw=1.5) +plot(a, T_median, color='C2', label="${\\rm median}~T$", lw=1.5) + +legend(loc="upper left", frameon=False, handlelength=1.5) + +# Cooling model +if cooling_model == "Constant Lambda": + text(1e-2, 6e4, "$\Lambda_{\\rm const}/n_{\\rm H}^2 = %.1f\\times10^{%d}~[\\rm{cgs}]$"%(Lambda/10.**(int(log10(Lambda))), log10(Lambda)), fontsize=7) +elif cooling_model == "EAGLE": + text(1e-2, 6e4, "EAGLE (Wiersma et al. (2009)") +else: + text(1e-2, 6e4, "No cooling") + +# Expected lines +plot([1e-10, 1e10], [H_transition_temp, H_transition_temp], 'k--', lw=0.5, alpha=0.7) +text(2.5e-2, H_transition_temp*1.07, "$T_{\\rm HII\\rightarrow HI}$", va="bottom", alpha=0.7, fontsize=8) +plot([1e-10, 1e10], [T_minimal, T_minimal], 'k--', lw=0.5, alpha=0.7) +text(1e-2, T_minimal*0.8, "$T_{\\rm min}$", va="top", alpha=0.7, fontsize=8) +plot(a_evol, T_cmb, 'k--', lw=0.5, alpha=0.7) +text(a_evol[20], T_cmb[20]*0.55, "$(1+z)^2\\times T_{\\rm CMB,0}$", rotation=-34, alpha=0.7, fontsize=8, va="top", bbox=dict(facecolor='w', edgecolor='none', pad=1.0, alpha=0.9)) + + +redshift_ticks = np.array([0., 1., 2., 5., 10., 20., 50., 100.]) +redshift_labels = ["$0$", "$1$", "$2$", "$5$", "$10$", "$20$", "$50$", "$100$"] +a_ticks = 1. / (redshift_ticks + 1.) + +xticks(a_ticks, redshift_labels) +minorticks_off() + +xlabel("${\\rm Redshift}~z$", labelpad=0) +ylabel("${\\rm Temperature}~T~[{\\rm K}]$", labelpad=0) +xlim(9e-3, 1.1) +ylim(20, 2.5e7) + +savefig("Temperature_evolution.png", dpi=200) + diff --git a/examples/SmallCosmoVolume_cooling/run.sh b/examples/SmallCosmoVolume_cooling/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..5a2c67c77e2e2ec99d0c9659f6efac8434a818ed --- /dev/null +++ b/examples/SmallCosmoVolume_cooling/run.sh @@ -0,0 +1,20 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e small_cosmo_volume.hdf5 ] +then + echo "Fetching initial conditions for the small cosmological volume example..." + ./getIC.sh +fi + +if [ ! -e coolingtables ] +then + echo "Fetching cooling tables for the small cosmological volume example..." + ./getCoolingTables.sh +fi + +# Run SWIFT +../swift --cosmology --hydro --self-gravity --cooling --threads=8 small_cosmo_volume.yml 2>&1 | tee output.log + +# Plot the temperature evolution +python plotTempEvolution.py diff --git a/examples/SmallCosmoVolume_cooling/small_cosmo_volume.yml b/examples/SmallCosmoVolume_cooling/small_cosmo_volume.yml new file mode 100644 index 0000000000000000000000000000000000000000..8ad9ae074f4d6c3f00ad95ec5dfb11255045d01a --- /dev/null +++ b/examples/SmallCosmoVolume_cooling/small_cosmo_volume.yml @@ -0,0 +1,87 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun + UnitLength_in_cgs: 3.08567758e24 # 1 Mpc + UnitVelocity_in_cgs: 1e5 # 1 km/s + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +Cosmology: # WMAP9 cosmology + Omega_m: 0.276 + Omega_lambda: 0.724 + Omega_b: 0.0455 + h: 0.703 + a_begin: 0.019607843 # z_ini = 50. + a_end: 1.0 # z_end = 0. + +# Parameters governing the time integration +TimeIntegration: + dt_min: 1e-6 + dt_max: 1e-2 + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 + theta: 0.3 + comoving_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc + max_physical_softening: 0.0889 # 1/25th of the mean inter-particle separation: 88.9 kpc + mesh_side_length: 64 + +# Parameters of the hydro scheme +SPH: + resolution_eta: 1.2348 # "48 Ngb" with the cubic spline kernel + CFL_condition: 0.1 + initial_temperature: 7075. # (1 + z_ini)^2 * 2.72K + minimal_temperature: 100. + +# Parameters governing the snapshots +Snapshots: + basename: snap + delta_time: 1.02 + scale_factor_first: 0.02 + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1.02 + scale_factor_first: 0.02 + +Scheduler: + max_top_level_cells: 8 + cell_split_size: 50 + +# Parameters related to the initial conditions +InitialConditions: + file_name: small_cosmo_volume.hdf5 + periodic: 1 + cleanup_h_factors: 1 + cleanup_velocity_factors: 1 + generate_gas_in_ics: 1 # Generate gas particles from the DM-only ICs + cleanup_smoothing_lengths: 1 # Since we generate gas, make use of the (expensive) cleaning-up procedure. + +# Constant lambda cooling function +LambdaCooling: + lambda_nH2_cgs: 1e-26 # Cooling rate divided by square Hydrogen number density (in cgs units [erg * s^-1 * cm^3]) + +# EAGLE cooling function +EagleCooling: + filename: ./coolingtables/ + reionisation_redshift: 8.5 + he_reion: 1 + he_reion_z_center: 3.5 + he_reion_z_sigma: 0.5 + he_reion_ev_pH: 2.0 + +# Impose primoridal metallicity +EAGLEChemistry: + InitMetallicity: 0.0 + InitAbundance_Hydrogen: 0.755 + InitAbundance_Helium: 0.245 + InitAbundance_Carbon: 0. + InitAbundance_Nitrogen: 0. + InitAbundance_Oxygen: 0. + InitAbundance_Neon: 0. + InitAbundance_Magnesium: 0. + InitAbundance_Silicon: 0. + InitAbundance_Iron: 0. + CalciumOverSilicon: 0. + SulphurOverSilicon: 0. diff --git a/examples/SmoothedMetallicity/makeIC.py b/examples/SmoothedMetallicity/makeIC.py index 86679d5efe897b9dfae7db94b36d74bb047661e6..542b4c5911c942015d16595f42e73ca8978d20da 100644 --- a/examples/SmoothedMetallicity/makeIC.py +++ b/examples/SmoothedMetallicity/makeIC.py @@ -84,10 +84,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -# Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - # Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SmoothedMetallicity/run.sh b/examples/SmoothedMetallicity/run.sh index de8c55d678bcb611934af450940d8ed8e6c15d6b..9f31eac3692be4ce4ad6bb554d6a95c11d21e791 100755 --- a/examples/SmoothedMetallicity/run.sh +++ b/examples/SmoothedMetallicity/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -n 1 -s -t 4 smoothed_metallicity.yml 2>&1 | tee output.log +../swift --steps=1 --hydro --threads=4 smoothed_metallicity.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 1 diff --git a/examples/SmoothedMetallicity/smoothed_metallicity.yml b/examples/SmoothedMetallicity/smoothed_metallicity.yml index 2e37695392b12c545bbbdbe7fd94748d5b3b9ff8..f6841c6bd0744b4bbeacbe136a126b4ed5631f6f 100644 --- a/examples/SmoothedMetallicity/smoothed_metallicity.yml +++ b/examples/SmoothedMetallicity/smoothed_metallicity.yml @@ -31,4 +31,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./smoothed_metallicity.hdf5 # The file to read + periodic: 1 diff --git a/examples/SodShockSpherical_2D/makeIC.py b/examples/SodShockSpherical_2D/makeIC.py index ac9f6e193769d7466f5b8e41a408da2350777be6..bc2c7ed1dcae5adfbfdcaf01c6b5a36bf5669e9e 100644 --- a/examples/SodShockSpherical_2D/makeIC.py +++ b/examples/SodShockSpherical_2D/makeIC.py @@ -100,10 +100,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SodShockSpherical_2D/run.sh b/examples/SodShockSpherical_2D/run.sh index d662d20f40ef9e221285d5820e867607804e9dbe..90be0cc83c53bee0c53d3e522ce989d829549579 100755 --- a/examples/SodShockSpherical_2D/run.sh +++ b/examples/SodShockSpherical_2D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 1 sodShock.yml 2>&1 | tee output.log +../swift --hydro --threads=1 sodShock.yml 2>&1 | tee output.log # Get the high resolution 1D reference solution if not present. if [ ! -e sodShockSpherical2D_exact.txt ] diff --git a/examples/SodShockSpherical_2D/sodShock.yml b/examples/SodShockSpherical_2D/sodShock.yml index a26ab95b21c782ce83310038432ac08df0e024c3..4ef13c26ccf55163f9276b6e095c351efd9ecb35 100644 --- a/examples/SodShockSpherical_2D/sodShock.yml +++ b/examples/SodShockSpherical_2D/sodShock.yml @@ -31,4 +31,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./sodShock.hdf5 # The file to read + periodic: 1 diff --git a/examples/SodShockSpherical_3D/makeIC.py b/examples/SodShockSpherical_3D/makeIC.py index be8f9b61a1beef00f49786860ce94287b30e2ab3..3884fc29280209d465b721230ae19b474a42f6a0 100644 --- a/examples/SodShockSpherical_3D/makeIC.py +++ b/examples/SodShockSpherical_3D/makeIC.py @@ -102,10 +102,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SodShockSpherical_3D/run.sh b/examples/SodShockSpherical_3D/run.sh index faf979869e175172ce31db6ac5021daf1758f3b0..b28b69e0aa33d3a6657aae0db819298c66e26860 100755 --- a/examples/SodShockSpherical_3D/run.sh +++ b/examples/SodShockSpherical_3D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 4 sodShock.yml 2>&1 | tee output.log +../swift --hydro --threads=4 sodShock.yml 2>&1 | tee output.log # Get the high resolution 1D reference solution if not present. if [ ! -e sodShockSpherical3D_exact.txt ] diff --git a/examples/SodShockSpherical_3D/sodShock.yml b/examples/SodShockSpherical_3D/sodShock.yml index 3fc4a1fb2b8cc5f6a603abf4c87ac99c7647b9bd..16d3bd313cf8a365fb82d3142ba1ac4fd065d193 100644 --- a/examples/SodShockSpherical_3D/sodShock.yml +++ b/examples/SodShockSpherical_3D/sodShock.yml @@ -32,4 +32,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./sodShock.hdf5 # The file to read + periodic: 1 diff --git a/examples/SodShock_1D/makeIC.py b/examples/SodShock_1D/makeIC.py index a5c7f03b24d10e81057dbe25855f33f795218f19..d26bbbb4dbf71c1d6a63ad3c7900edfabe0fb9ec 100644 --- a/examples/SodShock_1D/makeIC.py +++ b/examples/SodShock_1D/makeIC.py @@ -92,10 +92,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 1 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SodShock_1D/plotSolution.py b/examples/SodShock_1D/plotSolution.py index e001a8d87a03cb246be63ab10d245f95eb1a7ce7..0149d66a0c28c777a4265da10d86ed160086995d 100644 --- a/examples/SodShock_1D/plotSolution.py +++ b/examples/SodShock_1D/plotSolution.py @@ -70,11 +70,11 @@ snap = int(sys.argv[1]) sim = h5py.File("sodShock_%04d.hdf5"%snap, "r") boxSize = sim["/Header"].attrs["BoxSize"][0] time = sim["/Header"].attrs["Time"][0] -scheme = sim["/HydroScheme"].attrs["Scheme"] -kernel = sim["/HydroScheme"].attrs["Kernel function"] +scheme = str(sim["/HydroScheme"].attrs["Scheme"]) +kernel = str(sim["/HydroScheme"].attrs["Kernel function"]) neighbours = sim["/HydroScheme"].attrs["Kernel target N_ngb"] eta = sim["/HydroScheme"].attrs["Kernel eta"] -git = sim["Code"].attrs["Git Revision"] +git = str(sim["Code"].attrs["Git Revision"]) x = sim["/PartType0/Coordinates"][:,0] v = sim["/PartType0/Velocities"][:,0] @@ -82,6 +82,11 @@ u = sim["/PartType0/InternalEnergy"][:] S = sim["/PartType0/Entropy"][:] P = sim["/PartType0/Pressure"][:] rho = sim["/PartType0/Density"][:] +try: + alpha = sim["/PartType0/Viscosity"][:] + plot_alpha = True +except: + plot_alpha = False N = 1000 # Number of points x_min = -1. @@ -259,14 +264,23 @@ ylabel("${\\rm{Internal~Energy}}~u$", labelpad=0) xlim(-0.5, 0.5) ylim(0.8, 2.2) -# Entropy profile --------------------------------- +# Entropy/alpha profile --------------------------------- subplot(235) -plot(x, S, '.', color='r', ms=4.0) -plot(x_s, s_s, '--', color='k', alpha=0.8, lw=1.2) + +if plot_alpha: + plot(x, alpha, '.', color='r', ms=4.0) + ylabel(r"${\rm{Viscosity}}~\alpha$", labelpad=0) + # Show location of shock + plot([x_56, x_56], [-100, 100], color="k", alpha=0.5, ls="dashed", lw=1.2) + ylim(0, 1) +else: + plot(x, S, '.', color='r', ms=4.0) + plot(x_s, s_s, '--', color='k', alpha=0.8, lw=1.2) + ylabel("${\\rm{Entropy}}~S$", labelpad=0) + ylim(0.8, 3.8) + xlabel("${\\rm{Position}}~x$", labelpad=0) -ylabel("${\\rm{Entropy}}~S$", labelpad=0) xlim(-0.5, 0.5) -ylim(0.8, 3.8) # Information ------------------------------------- subplot(236, frameon=False) @@ -284,5 +298,6 @@ ylim(0, 1) xticks([]) yticks([]) +tight_layout() savefig("SodShock.png", dpi=200) diff --git a/examples/SodShock_1D/run.sh b/examples/SodShock_1D/run.sh index 4be4254baa4a87b105a5f3c1bfbf9059348a1e9e..fb2ba77daca880a36c58949983cf19fd631cb102 100755 --- a/examples/SodShock_1D/run.sh +++ b/examples/SodShock_1D/run.sh @@ -8,7 +8,7 @@ then fi # Run SWIFT -../swift -s -t 1 sodShock.yml 2>&1 | tee output.log +../swift --hydro --threads=1 sodShock.yml 2>&1 | tee output.log # Plot the result python plotSolution.py 1 diff --git a/examples/SodShock_1D/sodShock.yml b/examples/SodShock_1D/sodShock.yml index e827edadb9c287975d83214249d4fdd7734a5f6c..69554b4db733166fc5dbb6d198966fd8f9b8d49c 100644 --- a/examples/SodShock_1D/sodShock.yml +++ b/examples/SodShock_1D/sodShock.yml @@ -31,4 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./sodShock.hdf5 # The file to read - + periodic: 1 diff --git a/examples/SodShock_2D/makeIC.py b/examples/SodShock_2D/makeIC.py index 850ca24f54c39990a9b0c54c0d2f361a2aa01e95..2d3bd75fcc41e0fee6dd7cfde62873980bbc7143 100644 --- a/examples/SodShock_2D/makeIC.py +++ b/examples/SodShock_2D/makeIC.py @@ -98,10 +98,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SodShock_2D/run.sh b/examples/SodShock_2D/run.sh index 9e6bbfdf1c0a7c206ce6966fdca7b20a28047dd8..730cabcb9d51c0c870b38512ad1ce6ed80253ebd 100755 --- a/examples/SodShock_2D/run.sh +++ b/examples/SodShock_2D/run.sh @@ -13,6 +13,6 @@ then fi # Run SWIFT -../swift -s -t 1 sodShock.yml 2>&1 | tee output.log +../swift --hydro --threads=1 sodShock.yml 2>&1 | tee output.log python plotSolution.py 1 diff --git a/examples/SodShock_2D/sodShock.yml b/examples/SodShock_2D/sodShock.yml index 51a188b6d4537d490cb837a03dab15f74c3b083c..b831dd78278fea619d75e2db8806cf00d8faf575 100644 --- a/examples/SodShock_2D/sodShock.yml +++ b/examples/SodShock_2D/sodShock.yml @@ -31,4 +31,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./sodShock.hdf5 # The file to read + periodic: 1 diff --git a/examples/SodShock_3D/makeIC.py b/examples/SodShock_3D/makeIC.py index c71c07c6c97bb715c580f747cf8d39ddf08445c3..69f1bc506680d3f2f149c0fd7b75b069f9b00b64 100644 --- a/examples/SodShock_3D/makeIC.py +++ b/examples/SodShock_3D/makeIC.py @@ -98,10 +98,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SodShock_3D/run.sh b/examples/SodShock_3D/run.sh index 8ed85baf73425b75f402c491a3c66785f6c6fce0..fee82ce3e1bbbe600fb170f6249f9063ab5aec70 100755 --- a/examples/SodShock_3D/run.sh +++ b/examples/SodShock_3D/run.sh @@ -13,6 +13,6 @@ then fi # Run SWIFT -../swift -s -t 4 sodShock.yml 2>&1 | tee output.log +../swift --hydro --threads=4 sodShock.yml 2>&1 | tee output.log python plotSolution.py 1 diff --git a/examples/SodShock_3D/sodShock.yml b/examples/SodShock_3D/sodShock.yml index 6042c8090d00fef5467a7fed3d6f5a104c626f43..b2d783cd74d66a8eaa3cbbf4b08fc686b0298244 100644 --- a/examples/SodShock_3D/sodShock.yml +++ b/examples/SodShock_3D/sodShock.yml @@ -32,4 +32,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./sodShock.hdf5 # The file to read + periodic: 1 diff --git a/examples/SquareTest_2D/makeIC.py b/examples/SquareTest_2D/makeIC.py index 186e653124a6ff62a964c37cf0fb2220f1152a0e..12a394873edf42f7ecfdf07c9795b62e3ad89745 100644 --- a/examples/SquareTest_2D/makeIC.py +++ b/examples/SquareTest_2D/makeIC.py @@ -96,10 +96,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = fileOutput.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = fileOutput.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/SquareTest_2D/run.sh b/examples/SquareTest_2D/run.sh index 7d77e9c5bd89732970b47feb3a297ef92b345a01..f96bd4c0e19958f91c631616321a7280fa090c40 100755 --- a/examples/SquareTest_2D/run.sh +++ b/examples/SquareTest_2D/run.sh @@ -8,7 +8,7 @@ then fi # Run SWIFT -../swift -s -t 1 square.yml 2>&1 | tee output.log +../swift --hydro --threads=1 square.yml 2>&1 | tee output.log # Plot the solution python plotSolution.py 5 diff --git a/examples/SquareTest_2D/square.yml b/examples/SquareTest_2D/square.yml index b700c441a619ef8faac52656909567c944e344c3..54e0effa676cd5b1233ae7c38aded18d089f0ef2 100644 --- a/examples/SquareTest_2D/square.yml +++ b/examples/SquareTest_2D/square.yml @@ -31,3 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./square.hdf5 # The file to read + periodic: 1 diff --git a/examples/SupernovaeFeedback/SN_feedback.yml b/examples/SupernovaeFeedback/SN_feedback.yml new file mode 100644 index 0000000000000000000000000000000000000000..a59ae302ff4052a3daf0535e93a0c2cd5e9904f5 --- /dev/null +++ b/examples/SupernovaeFeedback/SN_feedback.yml @@ -0,0 +1,44 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Values of some physical constants +PhysicalConstants: + G: 0 # (Optional) Overwrite the value of Newton's constant used internally by the code. + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 5e-2 # The end time of the simulation (in internal units). + dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: SN_feedback # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 1e-2 # Time difference between consecutive outputs (in internal units) + compression: 1 + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-3 # Time between statistics output + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./SN_feedback.hdf5 + smoothing_length_scaling: 1. + periodic: 1 # Are we running with periodic ICs? + +# Parameters for the stellar models +Stars: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). diff --git a/examples/SupernovaeFeedback/getGlass.sh b/examples/SupernovaeFeedback/getGlass.sh new file mode 100644 index 0000000000000000000000000000000000000000..d5c5f590ac37c9c9431d626a2ea61b0c12c1513c --- /dev/null +++ b/examples/SupernovaeFeedback/getGlass.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/glassCube_64.hdf5 diff --git a/examples/SupernovaeFeedback/makeIC.py b/examples/SupernovaeFeedback/makeIC.py new file mode 100644 index 0000000000000000000000000000000000000000..8a0fca2bfd21f2cf1d5052660581d97184705480 --- /dev/null +++ b/examples/SupernovaeFeedback/makeIC.py @@ -0,0 +1,117 @@ +############################################################################### + # This file is part of SWIFT. + # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## + +import h5py +from numpy import * + +# Generates a swift IC file for the Sedov blast test in a periodic cubic box + +# Parameters +gamma = 5./3. # Gas adiabatic index +rho0 = 1. # Background density +P0 = 1.e-6 # Background pressure +E0= 1. # Energy of the explosion +N_inject = 15 # Number of particles in which to inject energy +fileName = "SN_feedback.hdf5" + +#--------------------------------------------------- +glass = h5py.File("glassCube_64.hdf5", "r") + +# Read particle positions and h from the glass +pos = glass["/PartType0/Coordinates"][:,:] +eps = 1e-6 +pos = (pos - pos.min()) / (pos.max() - pos.min() + eps) +h = glass["/PartType0/SmoothingLength"][:] * 0.3 * 3.3 + +numPart = size(h) +vol = 1. +Boxsize = 1. + +# Generate extra arrays +v = zeros((numPart, 3)) +ids = linspace(1, numPart, numPart) +m = zeros(numPart) +u = zeros(numPart) +r = zeros(numPart) + +r = sqrt((pos[:,0] - 0.5)**2 + (pos[:,1] - 0.5)**2 + (pos[:,2] - 0.5)**2) +m[:] = rho0 * vol / numPart +u[:] = P0 / (rho0 * (gamma - 1)) + +#-------------------------------------------------- + +star_pos = zeros((1, 3)) +star_pos[:,:] = 0.5 * Boxsize + +star_v = zeros((1, 3)) +star_v[:,:] = 0. + +# increase mass to keep it at center +star_m = 1e3 * array([rho0 * vol / numPart]) +star_ids = array([numPart + 1]) +star_h = array([h.max()]) + +#-------------------------------------------------- + +#File +file = h5py.File(fileName, 'w') + +# Header +grp = file.create_group("/Header") +grp.attrs["BoxSize"] = [Boxsize]*3 +grp.attrs["NumPart_Total"] = [numPart, 0, 0, 0, 1, 0] +grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] +grp.attrs["NumPart_ThisFile"] = [numPart, 0, 0, 0, 1, 0] +grp.attrs["Time"] = 0.0 +grp.attrs["NumFilesPerSnapshot"] = 1 +grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +grp.attrs["Flag_Entropy_ICs"] = 0 +grp.attrs["Dimension"] = 3 + +#Runtime parameters +grp = file.create_group("/RuntimePars") +grp.attrs["PeriodicBoundariesOn"] = 0 + +#Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 1. +grp.attrs["Unit mass in cgs (U_M)"] = 1. +grp.attrs["Unit time in cgs (U_t)"] = 1. +grp.attrs["Unit current in cgs (U_I)"] = 1. +grp.attrs["Unit temperature in cgs (U_T)"] = 1. + +#Particle group +grp = file.create_group("/PartType0") +grp.create_dataset('Coordinates', data=pos, dtype='d') +grp.create_dataset('Velocities', data=v, dtype='f') +grp.create_dataset('Masses', data=m, dtype='f') +grp.create_dataset('SmoothingLength', data=h, dtype='f') +grp.create_dataset('InternalEnergy', data=u, dtype='f') +grp.create_dataset('ParticleIDs', data=ids, dtype='L') + +# stellar group +grp = file.create_group("/PartType4") +grp.create_dataset("Coordinates", data=star_pos, dtype="d") +grp.create_dataset('Velocities', data=star_v, dtype='f') +grp.create_dataset('Masses', data=star_m, dtype='f') +grp.create_dataset('SmoothingLength', data=star_h, dtype='f') +grp.create_dataset('ParticleIDs', data=star_ids, dtype='L') + + +file.close() diff --git a/examples/SupernovaeFeedback/run.sh b/examples/SupernovaeFeedback/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..984cee2b415f2273f951ec997f4c81d65f1a4d4a --- /dev/null +++ b/examples/SupernovaeFeedback/run.sh @@ -0,0 +1,19 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e glassCube_64.hdf5 ] +then + echo "Fetching initial glass file for the Supernovae feedback example..." + ./getGlass.sh +fi +if [ ! -e SN_feedback.hdf5 ] +then + echo "Generating initial conditions for the Supernovae feedback example..." + python makeIC.py +fi + +# Run SWIFT +../swift --external-gravity --feedback --hydro --stars --threads=4 SN_feedback.yml 2>&1 | tee output.log + +# Plot the solution +# TODO diff --git a/examples/UniformBox_2D/makeIC.py b/examples/UniformBox_2D/makeIC.py index 642896c6ec406a5a75127e024d19775ea4a8e09b..36bb1ba6118a31db3251a1cd7f332f01b2ba3df1 100644 --- a/examples/UniformBox_2D/makeIC.py +++ b/examples/UniformBox_2D/makeIC.py @@ -85,10 +85,6 @@ grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp.attrs["NumPart_Total"] = numPart grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/UniformBox_2D/run.sh b/examples/UniformBox_2D/run.sh index ee3ef109968a65e2437ea17b42013266195d3314..3c146d8b48d1e191b043a2ead52715cd49edf9e5 100755 --- a/examples/UniformBox_2D/run.sh +++ b/examples/UniformBox_2D/run.sh @@ -7,4 +7,4 @@ then python makeIC.py 100 fi -../swift -s -t 16 uniformPlane.yml 2>&1 | tee output.log +../swift --hydro --threads=16 uniformPlane.yml 2>&1 | tee output.log diff --git a/examples/UniformBox_2D/uniformPlane.yml b/examples/UniformBox_2D/uniformPlane.yml index 58fe0d50557db0c0624fe89cbde888d2c92775e5..77f53d59c497b10b1c95ce5dcb763fa8bffcd5ca 100644 --- a/examples/UniformBox_2D/uniformPlane.yml +++ b/examples/UniformBox_2D/uniformPlane.yml @@ -31,3 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./uniformPlane.hdf5 # The file to read + periodic: 1 diff --git a/examples/UniformBox_3D/makeIC.py b/examples/UniformBox_3D/makeIC.py index 01e37c67b6e2eec2984d62f4ffd503b23b5bd9ec..8311aae951f921b4c7f759ba09cc8fe73cf4a9f1 100644 --- a/examples/UniformBox_3D/makeIC.py +++ b/examples/UniformBox_3D/makeIC.py @@ -57,10 +57,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/UniformBox_3D/run.sh b/examples/UniformBox_3D/run.sh index 08891cdd08fccf8f43089951e94dddb33e162030..eb2648cc5277f830436dadf5902df5531985a3f0 100755 --- a/examples/UniformBox_3D/run.sh +++ b/examples/UniformBox_3D/run.sh @@ -7,4 +7,4 @@ then python makeIC.py 100 fi -../swift -s -t 16 uniformBox.yml 2>&1 | tee output.log +../swift --hydro --threads=16 uniformBox.yml 2>&1 | tee output.log diff --git a/examples/UniformBox_3D/uniformBox.yml b/examples/UniformBox_3D/uniformBox.yml index 17dd5632edd345802402cb9c6d1dcf184e909806..202ff8298fe763a8c194ab4570b1252fe352dccc 100644 --- a/examples/UniformBox_3D/uniformBox.yml +++ b/examples/UniformBox_3D/uniformBox.yml @@ -31,3 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./uniformBox.hdf5 # The file to read + periodic: 1 diff --git a/examples/UniformDMBox/plot_gravity_checks.py b/examples/UniformDMBox/plot_gravity_checks.py deleted file mode 100644 index 5efd5847ca9749fffaee48e586c0a1976fbac9d5..0000000000000000000000000000000000000000 --- a/examples/UniformDMBox/plot_gravity_checks.py +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/env python - -import sys -import glob -import re -import numpy as np -import matplotlib.pyplot as plt - -params = {'axes.labelsize': 14, -'axes.titlesize': 18, -'font.size': 12, -'legend.fontsize': 12, -'xtick.labelsize': 14, -'ytick.labelsize': 14, -'text.usetex': True, -'figure.figsize': (10, 10), -'figure.subplot.left' : 0.06, -'figure.subplot.right' : 0.99 , -'figure.subplot.bottom' : 0.06 , -'figure.subplot.top' : 0.985 , -'figure.subplot.wspace' : 0.14 , -'figure.subplot.hspace' : 0.14 , -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True -} -plt.rcParams.update(params) -plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']}) - -min_error = 1e-6 -max_error = 1e-1 -num_bins = 51 - -# Construct the bins -bin_edges = np.linspace(np.log10(min_error), np.log10(max_error), num_bins + 1) -bin_size = (np.log10(max_error) - np.log10(min_error)) / num_bins -bins = 0.5*(bin_edges[1:] + bin_edges[:-1]) -bin_edges = 10**bin_edges -bins = 10**bins - -# Colours -cols = ['b', 'g', 'r', 'm'] - -# Time-step to plot -step = int(sys.argv[1]) - -# Find the files for the different expansion orders -order_list = glob.glob("gravity_checks_step%d_order*.dat"%step) -num_order = len(order_list) - -# Get the multipole orders -order = np.zeros(num_order) -for i in range(num_order): - order[i] = int(order_list[i][26]) - -# Start the plot -plt.figure() - -# Get the Gadget-2 data if existing -gadget2_file_list = glob.glob("forcetest_gadget2.txt") -if len(gadget2_file_list) != 0: - - gadget2_data = np.loadtxt(gadget2_file_list[0]) - gadget2_ids = gadget2_data[:,0] - gadget2_pos = gadget2_data[:,1:4] - gadget2_a_exact = gadget2_data[:,4:7] - gadget2_a_grav = gadget2_data[:, 7:10] - - # Sort stuff - sort_index = np.argsort(gadget2_ids) - gadget2_ids = gadget2_ids[sort_index] - gadget2_pos = gadget2_pos[sort_index, :] - gadget2_a_exact = gadget2_a_exact[sort_index, :] - gadget2_a_grav = gadget2_a_grav[sort_index, :] - - # Compute the error norm - diff = gadget2_a_exact - gadget2_a_grav - - norm_diff = np.sqrt(diff[:,0]**2 + diff[:,1]**2 + diff[:,2]**2) - norm_a = np.sqrt(gadget2_a_exact[:,0]**2 + gadget2_a_exact[:,1]**2 + gadget2_a_exact[:,2]**2) - - norm_error = norm_diff / norm_a - error_x = abs(diff[:,0]) / norm_a - error_y = abs(diff[:,1]) / norm_a - error_z = abs(diff[:,2]) / norm_a - - # Bin the error - norm_error_hist,_ = np.histogram(norm_error, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_x_hist,_ = np.histogram(error_x, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_y_hist,_ = np.histogram(error_y, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_z_hist,_ = np.histogram(error_z, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - - norm_median = np.median(norm_error) - median_x = np.median(error_x) - median_y = np.median(error_y) - median_z = np.median(error_z) - - norm_per95 = np.percentile(norm_error,95) - per95_x = np.percentile(error_x,95) - per95_y = np.percentile(error_y,95) - per95_z = np.percentile(error_z,95) - - plt.subplot(221) - plt.semilogx(bins, norm_error_hist, 'k--', label="Gadget-2") - plt.plot([norm_median, norm_median], [2.7, 3], 'k-', lw=1) - plt.plot([norm_per95, norm_per95], [2.7, 3], 'k:', lw=1) - plt.subplot(222) - plt.semilogx(bins, error_x_hist, 'k--', label="Gadget-2") - plt.plot([median_x, median_x], [1.8, 2], 'k-', lw=1) - plt.plot([per95_x, per95_x], [1.8, 2], 'k:', lw=1) - plt.subplot(223) - plt.semilogx(bins, error_y_hist, 'k--', label="Gadget-2") - plt.plot([median_y, median_y], [1.8, 2], 'k-', lw=1) - plt.plot([per95_y, per95_y], [1.8, 2], 'k:', lw=1) - plt.subplot(224) - plt.semilogx(bins, error_z_hist, 'k--', label="Gadget-2") - plt.plot([median_z, median_z], [1.8, 2], 'k-', lw=1) - plt.plot([per95_z, per95_z], [1.8, 2], 'k:', lw=1) - - -# Plot the different histograms -for i in range(num_order-1, -1, -1): - data = np.loadtxt(order_list[i]) - ids = data[:,0] - pos = data[:,1:4] - a_exact = data[:,4:7] - a_grav = data[:, 7:10] - - # Sort stuff - sort_index = np.argsort(ids) - ids = ids[sort_index] - pos = pos[sort_index, :] - a_exact = a_exact[sort_index, :] - a_grav = a_grav[sort_index, :] - - # Cross-checks - if not np.array_equal(ids, gadget2_ids): - print "Comparing different IDs !" - - if not np.array_equal(pos, gadget2_pos): - print "Comparing different positions ! max difference:", np.max(pos - gadget2_pos) - - if not np.array_equal(a_exact, gadget2_a_exact): - print "Comparing different exact accelerations ! max difference:", np.max(a_exact - gadget2_a_exact) - - - # Compute the error norm - diff = a_exact - a_grav - - norm_diff = np.sqrt(diff[:,0]**2 + diff[:,1]**2 + diff[:,2]**2) - norm_a = np.sqrt(a_exact[:,0]**2 + a_exact[:,1]**2 + a_exact[:,2]**2) - - norm_error = norm_diff / norm_a - error_x = abs(diff[:,0]) / norm_a - error_y = abs(diff[:,1]) / norm_a - error_z = abs(diff[:,2]) / norm_a - - # Bin the error - norm_error_hist,_ = np.histogram(norm_error, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_x_hist,_ = np.histogram(error_x, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_y_hist,_ = np.histogram(error_y, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_z_hist,_ = np.histogram(error_z, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - - norm_median = np.median(norm_error) - median_x = np.median(error_x) - median_y = np.median(error_y) - median_z = np.median(error_z) - - norm_per95 = np.percentile(norm_error,95) - per95_x = np.percentile(error_x,95) - per95_y = np.percentile(error_y,95) - per95_z = np.percentile(error_z,95) - - plt.subplot(221) - plt.semilogx(bins, norm_error_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.plot([norm_median, norm_median], [2.7, 3],'-', color=cols[i], lw=1) - plt.plot([norm_per95, norm_per95], [2.7, 3],':', color=cols[i], lw=1) - plt.subplot(222) - plt.semilogx(bins, error_x_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.plot([median_x, median_x], [1.8, 2],'-', color=cols[i], lw=1) - plt.plot([per95_x, per95_x], [1.8, 2],':', color=cols[i], lw=1) - plt.subplot(223) - plt.semilogx(bins, error_y_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.plot([median_y, median_y], [1.8, 2],'-', color=cols[i], lw=1) - plt.plot([per95_y, per95_y], [1.8, 2],':', color=cols[i], lw=1) - plt.subplot(224) - plt.semilogx(bins, error_z_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.plot([median_z, median_z], [1.8, 2],'-', color=cols[i], lw=1) - plt.plot([per95_z, per95_z], [1.8, 2],':', color=cols[i], lw=1) - - -plt.subplot(221) -plt.xlabel("$|\delta \overrightarrow{a}|/|\overrightarrow{a}_{exact}|$") -plt.ylabel("Density") -plt.xlim(min_error, 2*max_error) -plt.ylim(0,3) -plt.legend(loc="upper left") -plt.subplot(222) -plt.xlabel("$\delta a_x/|\overrightarrow{a}_{exact}|$") -plt.ylabel("Density") -plt.xlim(min_error, 2*max_error) -plt.ylim(0,2) -#plt.legend(loc="center left") -plt.subplot(223) -plt.xlabel("$\delta a_y/|\overrightarrow{a}_{exact}|$") -plt.ylabel("Density") -plt.xlim(min_error, 2*max_error) -plt.ylim(0,2) -#plt.legend(loc="center left") -plt.subplot(224) -plt.xlabel("$\delta a_z/|\overrightarrow{a}_{exact}|$") -plt.ylabel("Density") -plt.xlim(min_error, 2*max_error) -plt.ylim(0,2) -#plt.legend(loc="center left") - - - -plt.savefig("gravity_checks_step%d.png"%step) diff --git a/examples/UniformDMBox/uniformBox.yml b/examples/UniformDMBox/uniformBox.yml deleted file mode 100644 index 1abb256671f1cc8c87daa711bd63f7ea6abdbbab..0000000000000000000000000000000000000000 --- a/examples/UniformDMBox/uniformBox.yml +++ /dev/null @@ -1,38 +0,0 @@ -# Define the system of units to use internally. -InternalUnitSystem: - UnitMass_in_cgs: 1 # Grams - UnitLength_in_cgs: 1 # Centimeters - UnitVelocity_in_cgs: 1 # Centimeters per second - UnitCurrent_in_cgs: 1 # Amperes - UnitTemp_in_cgs: 1 # Kelvin - -# Parameters governing the time integration -TimeIntegration: - time_begin: 0. # The starting time of the simulation (in internal units). - time_end: 100. # The end time of the simulation (in internal units). - dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). - dt_max: 1. # The maximal time-step size of the simulation (in internal units). - -Scheduler: - max_top_level_cells: 8 - cell_split_size: 50 - -# Parameters governing the snapshots -Snapshots: - basename: uniformDMBox # Common part of the name of output files - time_first: 0. # Time of the first output (in internal units) - delta_time: 10. # Time difference between consecutive outputs (in internal units) - -# Parameters for the self-gravity scheme -Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.8 # Opening angle (Multipole acceptance criterion) - epsilon: 0.01 # Softening length (in internal units). - -# Parameters governing the conserved quantities statistics -Statistics: - delta_time: 5. # Time between statistics output - -# Parameters related to the initial conditions -InitialConditions: - file_name: ./uniformDMBox_16.hdf5 # The file to read diff --git a/examples/UranusImpact/README.md b/examples/UranusImpact/README.md deleted file mode 100644 index 178a3937ecbe527df8e8e82a0d8fd8bcbf9dbef7..0000000000000000000000000000000000000000 --- a/examples/UranusImpact/README.md +++ /dev/null @@ -1,40 +0,0 @@ -Uranus Giant Impact -=================== - -A simple version of the low angular momentum impact onto the early Uranus shown -in Kegerreis et al. (2018), Fig. 2; with only ~10,000 particles for a quick and -crude simulation. - -The collision of a 2 Earth mass impactor onto a proto-Uranus that can explain -the spin of the present-day planet, with an angular momentum of 2e36 kg m^2 s^-1 -and velocity at infinity of 5 km s^-1 for a relatively head-on impact. - -Both bodies have a rocky core and icy mantle, with a hydrogen-helium atmosphere -on the target as well. Although with this low number of particles it cannot be -modelled in any detail. - -Setup ------ - -In `swiftsim/`: - -`$ ./configure --with-hydro=minimal-multi-mat --with-equation-of-state=planetary` - -`$ make` - -In `swiftsim/examples/UranusImpact/`: - -`$ ./get_init_cond.sh` - -Run ---- - -`$ ./run.sh` - -Analysis --------- - -`$ python plot.py` - -`$ mplayer anim.mpg` - diff --git a/examples/UranusImpact/get_init_cond.sh b/examples/UranusImpact/get_init_cond.sh deleted file mode 100755 index e12e009adfbd727cb2452ac21c477b3ecd77b9c9..0000000000000000000000000000000000000000 --- a/examples/UranusImpact/get_init_cond.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/uranus_impact.hdf5 diff --git a/examples/UranusImpact/plot.py b/examples/UranusImpact/plot.py deleted file mode 100644 index 3db3bf21bb15862ec524a069c38e47564b48df1d..0000000000000000000000000000000000000000 --- a/examples/UranusImpact/plot.py +++ /dev/null @@ -1,291 +0,0 @@ -""" -############################################################################### -# This file is part of SWIFT. -# Copyright (c) 2018 Jacob Kegerreis (jacob.kegerreis@durham.ac.uk) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -############################################################################### - -Plotting script for the Uranus Giant Impact example. - -Save a figure for each snapshot in `./plots/` then make them into a simple -animation with ffmpeg in `./`. - -The snapshot plots show all particles with z < 0, coloured by their material. - -Usage: - `$ python plot.py time_end delta_time` - - Sys args: - + `time_end` | (opt) int | The time of the last snapshot to plot. - Default = 100000 - + `delta_time` | (opt) int | The time between successive snapshots. - Default = 500 -""" - -from __future__ import division -import numpy as np -import matplotlib -import matplotlib.pyplot as plt -import h5py -import sys -import subprocess - -# Particle array fields -dtype_picle = [ - ('m', float), ('x', float), ('y', float), ('z', float), ('v_x', float), - ('v_y', float), ('v_z', float), ('ID', int), ('rho', float), ('u', float), - ('phi', float), ('P', float), ('h', float), ('mat_ID', int), ('r', float) - ] - -s_to_hour = 1 / 60**2 -R_Ea = 6.371e6 - -# Default sys args -time_end_default = 100000 -delta_time_default = 500 - -# Snapshot info -file_snap = "./snapshots/uranus_impact_" -file_plot = "./plots/uranus_impact_" - -# Number of particles in the target body -num_target = 8992 - -# Material types (copied from src/equation_of_state/planetary/equation_of_state.h) -type_factor = 100 -Di_type = { - 'Til' : 1, - 'HM80' : 2, - 'ANEOS' : 3, - 'SESAME' : 4, -} -Di_material = { - # Tillotson - 'Til_iron' : Di_type['Til']*type_factor, - 'Til_granite' : Di_type['Til']*type_factor + 1, - 'Til_water' : Di_type['Til']*type_factor + 2, - # Hubbard & MacFarlane (1980) Uranus/Neptune - 'HM80_HHe' : Di_type['HM80']*type_factor, # Hydrogen-helium atmosphere - 'HM80_ice' : Di_type['HM80']*type_factor + 1, # H20-CH4-NH3 ice mix - 'HM80_rock' : Di_type['HM80']*type_factor + 2, # SiO2-MgO-FeS-FeO rock mix - # ANEOS - 'ANEOS_iron' : Di_type['ANEOS']*type_factor, - 'MANEOS_forsterite' : Di_type['ANEOS']*type_factor + 1, - # SESAME - 'SESAME_iron' : Di_type['SESAME']*type_factor, -} - -# Material offset for impactor particles -ID_imp = 10000 -# Material colours -Di_mat_colour = { - # Target - Di_material['HM80_HHe'] : '#33DDFF', - Di_material['HM80_ice'] : 'lightsteelblue', - Di_material['HM80_rock'] : 'slategrey', - # Impactor - Di_material['HM80_ice'] + ID_imp : '#A080D0', - Di_material['HM80_rock'] + ID_imp : '#706050', - } - - -def load_snapshot(filename): - """ Load the hdf5 snapshot file and return the structured particle array. - """ - # Add extension if needed - if (filename[-5:] != ".hdf5"): - filename += ".hdf5" - - # Load the hdf5 file - with h5py.File(filename, 'r') as f: - header = f['Header'].attrs - A2_pos = f['PartType0/Coordinates'].value - A2_vel = f['PartType0/Velocities'].value - - # Structured array of all particle data - A2_picle = np.empty(header['NumPart_Total'][0], - dtype=dtype_picle) - - A2_picle['x'] = A2_pos[:, 0] - A2_picle['y'] = A2_pos[:, 1] - A2_picle['z'] = A2_pos[:, 2] - A2_picle['v_x'] = A2_vel[:, 0] - A2_picle['v_y'] = A2_vel[:, 1] - A2_picle['v_z'] = A2_vel[:, 2] - A2_picle['m'] = f['PartType0/Masses'].value - A2_picle['ID'] = f['PartType0/ParticleIDs'].value - A2_picle['rho'] = f['PartType0/Density'].value - A2_picle['u'] = f['PartType0/InternalEnergy'].value - A2_picle['phi'] = f['PartType0/Potential'].value - A2_picle['P'] = f['PartType0/Pressure'].value - A2_picle['h'] = f['PartType0/SmoothingLength'].value - A2_picle['mat_ID'] = f['PartType0/MaterialID'].value - - return A2_picle - - -def process_particles(A2_picle, num_target): - """ Modify things like particle units, material IDs, and coordinate origins. - """ - # Offset material IDs for impactor particles - A2_picle['mat_ID'][A2_picle['ID'] >= num_target] += ID_imp - - # Shift coordinates to the centre of the target's core's mass and momentum - sel_tar = np.where(A2_picle['mat_ID'] == Di_material['HM80_rock'])[0] - - # Centre of mass - m_tot = np.sum(A2_picle[sel_tar]['m']) - x_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['x']) / m_tot - y_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['y']) / m_tot - z_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['z']) / m_tot - - # Change origin to the centre-of-mass - A2_picle['x'] -= x_com - A2_picle['y'] -= y_com - A2_picle['z'] -= z_com - A2_picle['r'] = np.sqrt( - A2_picle['x']**2 + A2_picle['y']**2 + A2_picle['z']**2 - ) - - # Centre of momentum - v_x_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['v_x']) / m_tot - v_y_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['v_y']) / m_tot - v_z_com = np.sum(A2_picle[sel_tar]['m'] * A2_picle[sel_tar]['v_z']) / m_tot - - # Change to the centre-of-momentum frame of reference - A2_picle['v_x'] -= v_x_com - A2_picle['v_y'] -= v_y_com - A2_picle['v_z'] -= v_z_com - - return A2_picle - - -def plot_snapshot(A2_picle, filename, time, ax_lim=13, dz=0.1): - """ Plot the snapshot particles and save the figure. - """ - # Add extension if needed - if (filename[-5:] != ".png"): - filename += ".png" - - fig = plt.figure(figsize=(9, 9)) - ax = fig.add_subplot(111, aspect='equal') - - # Plot slices in z below zero - for z in np.arange(-ax_lim, 0, dz): - sel_z = np.where((z < A2_picle['z']) & (A2_picle['z'] < z+dz))[0] - A2_picle_z = A2_picle[sel_z] - - # Plot each material - for mat_ID, colour in Di_mat_colour.iteritems(): - sel_col = np.where(A2_picle_z['mat_ID'] == mat_ID)[0] - - ax.scatter( - A2_picle_z[sel_col]['x'], A2_picle_z[sel_col]['y'], - c=colour, edgecolors='none', marker='.', s=50, alpha=0.7 - ) - - # Axes etc. - ax.set_axis_bgcolor('k') - - ax.set_xlabel("x Position ($R_\oplus$)") - ax.set_ylabel("y Position ($R_\oplus$)") - - ax.set_xlim(-ax_lim, ax_lim) - ax.set_ylim(-ax_lim, ax_lim) - - plt.text( - -0.92*ax_lim, 0.85*ax_lim, "%.1f h" % (time*s_to_hour), fontsize=20, - color='w' - ) - - # Font sizes - for item in ( - [ax.title, ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + - ax.get_yticklabels() - ): - item.set_fontsize(20) - - plt.tight_layout() - - plt.savefig(filename) - plt.close() - - -if __name__ == '__main__': - # Sys args - try: - time_end = int(sys.argv[1]) - try: - delta_time = int(sys.argv[2]) - except IndexError: - delta_time = delta_time_default - except IndexError: - time_end = time_end_default - delta_time = delta_time_default - - # Load and plot each snapshot - for i_snap in range(int(time_end/delta_time) + 1): - snap_time = i_snap * delta_time - print "\rPlotting snapshot %06d (%d of %d)" % ( - snap_time, i_snap+1, int(time_end/delta_time) - ), - sys.stdout.flush() - - # Load particle data - filename = "%s%06d" % (file_snap, snap_time) - A2_picle = load_snapshot(filename) - - # Process particle data - A2_picle = process_particles(A2_picle, num_target) - - # Plot particles - filename = "%s%06d" % (file_plot, snap_time) - plot_snapshot(A2_picle, filename, snap_time) - - # Animation - command = ( - "ffmpeg -framerate 10 -i plots/uranus_impact_%*.png -r 25 anim.mpg -y" - ) - print "\n$ %s\n" % command - subprocess.call(command, shell=True) - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/examples/UranusImpact/run.sh b/examples/UranusImpact/run.sh deleted file mode 100755 index c6773b7e40fff3fa312dfcb5ba4ada9d9e4b1b8d..0000000000000000000000000000000000000000 --- a/examples/UranusImpact/run.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -../swift -G -s -t 8 uranus_impact.yml diff --git a/examples/UranusImpact/uranus_impact.yml b/examples/UranusImpact/uranus_impact.yml deleted file mode 100644 index fabddca00f80fcdd79ff6114ff0544cd251046f4..0000000000000000000000000000000000000000 --- a/examples/UranusImpact/uranus_impact.yml +++ /dev/null @@ -1,51 +0,0 @@ -# Define the system of units to use internally. -InternalUnitSystem: - UnitMass_in_cgs: 5.9724e27 # Grams - UnitLength_in_cgs: 6.371e8 # Centimeters - UnitVelocity_in_cgs: 6.371e8 # Centimeters per second - UnitCurrent_in_cgs: 1 # Amperes - UnitTemp_in_cgs: 1 # Kelvin - -# Parameters governing the time integration -TimeIntegration: - time_begin: 0 # The starting time of the simulation (in internal units). - time_end: 100000 # The end time of the simulation (in internal units). - dt_min: 0.001 # The minimal time-step size of the simulation (in internal units). - dt_max: 100 # The maximal time-step size of the simulation (in internal units). - -# Parameters governing the snapshots -Snapshots: - # Common part of the name of output files - basename: snapshots/uranus_impact - time_first: 0 # Time of the first output (in internal units) - delta_time: 500 # Time difference between consecutive outputs (in internal units) - label_delta: 500 # Integer increment between snapshot output labels - -# Parameters governing the conserved quantities statistics -Statistics: - delta_time: 1000 # Time between statistics output - -# Parameters for the hydrodynamics scheme -SPH: - resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. - CFL_condition: 0.2 # Courant-Friedrich-Levy condition for time integration. - -# Parameters for the self-gravity scheme -Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - comoving_softening: 0.01 # Comoving softening length (in internal units). - max_physical_softening: 0.01 # Physical softening length (in internal units). - -# Parameters related to the initial conditions -InitialConditions: - file_name: uranus_impact.hdf5 # The initial conditions file to read - -# Parameters related to the equation of state -EoS: - planetary_use_HM80: 1 # Whether to prepare the Hubbard & MacFarlane (1980) EOS - # Table file paths - planetary_HM80_HHe_table_file: /gpfs/data/dc-kege1/gihr_data/P_rho_u_HHe.txt - planetary_HM80_ice_table_file: /gpfs/data/dc-kege1/gihr_data/P_rho_u_ice.txt - planetary_HM80_rock_table_file: /gpfs/data/dc-kege1/gihr_data/P_rho_u_roc.txt diff --git a/examples/VacuumSpherical_2D/makeIC.py b/examples/VacuumSpherical_2D/makeIC.py index 498f1b5bc5277188d8ff8d34a5ec24cd314332d4..05f0d8414cfa88755ecceb2be757e24ca3cefdde 100644 --- a/examples/VacuumSpherical_2D/makeIC.py +++ b/examples/VacuumSpherical_2D/makeIC.py @@ -77,10 +77,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/VacuumSpherical_2D/run.sh b/examples/VacuumSpherical_2D/run.sh index 51d32b4de679877741b7ecd74238fecb785579e7..c88095abf3a94d4f9e9c007722896a8ac2303bb4 100755 --- a/examples/VacuumSpherical_2D/run.sh +++ b/examples/VacuumSpherical_2D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 4 vacuum.yml 2>&1 | tee output.log +../swift --hydro --threads=4 vacuum.yml 2>&1 | tee output.log # Get the 1D high resolution reference result if not present. if [ ! -e vacuumSpherical2D_exact.txt ] diff --git a/examples/VacuumSpherical_2D/vacuum.yml b/examples/VacuumSpherical_2D/vacuum.yml index 881b155b62c7f1f2af12a1d013ff5c05f1c16a88..1d5642d5c1b645808229c5c6b99fb6d319351880 100644 --- a/examples/VacuumSpherical_2D/vacuum.yml +++ b/examples/VacuumSpherical_2D/vacuum.yml @@ -31,4 +31,4 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./vacuum.hdf5 # The file to read - + periodic: 1 diff --git a/examples/VacuumSpherical_3D/makeIC.py b/examples/VacuumSpherical_3D/makeIC.py index d67a30707a904268a09641210a6a3bfcbf305dad..dd4ddd7e8a8d6335e4d3d3b383c54bf301a06f1d 100644 --- a/examples/VacuumSpherical_3D/makeIC.py +++ b/examples/VacuumSpherical_3D/makeIC.py @@ -80,10 +80,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/VacuumSpherical_3D/run.sh b/examples/VacuumSpherical_3D/run.sh index a136929678f745f6a3d0859ba146e1bc1c6c43d0..ef6a84be13b3f4612b2d79237e9caa47fa0bd665 100755 --- a/examples/VacuumSpherical_3D/run.sh +++ b/examples/VacuumSpherical_3D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 16 vacuum.yml 2>&1 | tee output.log +../swift --hydro --threads=16 vacuum.yml 2>&1 | tee output.log # Get the reference solution if it is not present. if [ ! -e vacuumSpherical3D_exact.txt ] diff --git a/examples/VacuumSpherical_3D/vacuum.yml b/examples/VacuumSpherical_3D/vacuum.yml index 8792f029d97f413882ae0ea6c8603d64efaddbfa..851abf74441a48a58eac551bd0526f1d4b6e4ce0 100644 --- a/examples/VacuumSpherical_3D/vacuum.yml +++ b/examples/VacuumSpherical_3D/vacuum.yml @@ -32,4 +32,6 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./vacuum.hdf5 # The file to read + periodic: 1 + \ No newline at end of file diff --git a/examples/Vacuum_1D/makeIC.py b/examples/Vacuum_1D/makeIC.py index 067304ec951182da862cf2812cdc68a51a56d23b..5b057b340cbfa9718fb230ab1af839bc63678032 100644 --- a/examples/Vacuum_1D/makeIC.py +++ b/examples/Vacuum_1D/makeIC.py @@ -63,10 +63,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 1 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/Vacuum_1D/run.sh b/examples/Vacuum_1D/run.sh index b141f91f877c5b553281e53cdf02fbea948b0a97..a840f7c1ba6ef4b938b53839f057072f1867a8b6 100755 --- a/examples/Vacuum_1D/run.sh +++ b/examples/Vacuum_1D/run.sh @@ -8,7 +8,7 @@ then fi # Run SWIFT -../swift -s -t 1 vacuum.yml 2>&1 | tee output.log +../swift --hydro --threads=1 vacuum.yml 2>&1 | tee output.log # Plot the result python plotSolution.py 1 diff --git a/examples/Vacuum_1D/vacuum.yml b/examples/Vacuum_1D/vacuum.yml index 5ef5ce3da68febb086a14ad1a2207711f680d9ff..0be6427e50e1f674f7f59d4b865f2c4f9605a378 100644 --- a/examples/Vacuum_1D/vacuum.yml +++ b/examples/Vacuum_1D/vacuum.yml @@ -31,4 +31,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./vacuum.hdf5 # The file to read + periodic: 1 diff --git a/examples/Vacuum_2D/makeIC.py b/examples/Vacuum_2D/makeIC.py index ef267c092cafdb95457d5adad1e6858df0e14bd3..4d9181b83c0e383d0e3fb0dc6ca79dbda6f88891 100644 --- a/examples/Vacuum_2D/makeIC.py +++ b/examples/Vacuum_2D/makeIC.py @@ -71,10 +71,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 2 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/Vacuum_2D/run.sh b/examples/Vacuum_2D/run.sh index 5c0b2ca5e19e33e813b7ff478ed4494752c0a2a5..5a526bb5062fd6e5bf1dbe442689e915d4f20352 100755 --- a/examples/Vacuum_2D/run.sh +++ b/examples/Vacuum_2D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 4 vacuum.yml 2>&1 | tee output.log +../swift --hydro --threads=4 vacuum.yml 2>&1 | tee output.log # Plot the result python plotSolution.py 1 diff --git a/examples/Vacuum_2D/vacuum.yml b/examples/Vacuum_2D/vacuum.yml index 5ef5ce3da68febb086a14ad1a2207711f680d9ff..0be6427e50e1f674f7f59d4b865f2c4f9605a378 100644 --- a/examples/Vacuum_2D/vacuum.yml +++ b/examples/Vacuum_2D/vacuum.yml @@ -31,4 +31,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./vacuum.hdf5 # The file to read + periodic: 1 diff --git a/examples/Vacuum_3D/makeIC.py b/examples/Vacuum_3D/makeIC.py index 09c3cb4d6f5525d54fab59643ab4a7d0540a2a92..cee2d28d5190305a3536315001453e7595b7c7f2 100644 --- a/examples/Vacuum_3D/makeIC.py +++ b/examples/Vacuum_3D/makeIC.py @@ -73,10 +73,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 1. diff --git a/examples/Vacuum_3D/run.sh b/examples/Vacuum_3D/run.sh index 5029626f67659bba1f22600bb5bd38859dd805ce..eaf7a32d4f833af43cdea74829b768b53fb2f5e3 100755 --- a/examples/Vacuum_3D/run.sh +++ b/examples/Vacuum_3D/run.sh @@ -13,7 +13,7 @@ then fi # Run SWIFT -../swift -s -t 16 vacuum.yml 2>&1 | tee output.log +../swift --hydro --threads=16 vacuum.yml 2>&1 | tee output.log # Plot the result python plotSolution.py 1 diff --git a/examples/Vacuum_3D/vacuum.yml b/examples/Vacuum_3D/vacuum.yml index cf44d2441f5009d2fc75084a2c872e3618e40912..49bd9747d677bfdf64009bd1e02a86bc52a8db9c 100644 --- a/examples/Vacuum_3D/vacuum.yml +++ b/examples/Vacuum_3D/vacuum.yml @@ -32,4 +32,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./vacuum.hdf5 # The file to read + periodic: 1 diff --git a/examples/ZeldovichPancake_3D/makeIC.py b/examples/ZeldovichPancake_3D/makeIC.py index 15fb8bdef95f6e830e78f4d1b2c419051a6f00af..efce60f128cacd04e153912d97e0d94b4ab15785 100644 --- a/examples/ZeldovichPancake_3D/makeIC.py +++ b/examples/ZeldovichPancake_3D/makeIC.py @@ -28,29 +28,34 @@ z_c = 1. # Redshift of caustic formation (non-linear collapse) z_i = 100. # Initial redshift gamma = 5./3. # Gas adiabatic index numPart_1D = 32 # Number of particles along each dimension +fileName = "zeldovichPancake.hdf5" # Some units -Mpc_in_m = 3.085678e22 -Msol_in_kg = 1.989e30 -Gyr_in_s = 3.085678e19 +Mpc_in_m = 3.08567758e22 +Msol_in_kg = 1.98848e30 +Gyr_in_s = 3.08567758e19 mH_in_kg = 1.6737236e-27 -k_in_J_K = 1.38064852e-23 -# Parameters -rho_0 = 1.8788e-26 # h^2 kg m^-3 -H_0 = 1. / Mpc_in_m * 10**5 # s^-1 +# Some constants +kB_in_SI = 1.38064852e-23 +G_in_SI = 6.67408e-11 + +# Some useful variables in h-full units +H_0 = 1. / Mpc_in_m * 10**5 # h s^-1 +rho_0 = 3. * H_0**2 / (8* math.pi * G_in_SI) # h^2 kg m^-3 lambda_i = 64. / H_0 * 10**5 # h^-1 m (= 64 h^-1 Mpc) x_min = -0.5 * lambda_i x_max = 0.5 * lambda_i -fileName = "zeldovichPancake.hdf5" +# SI system of units unit_l_in_si = Mpc_in_m unit_m_in_si = Msol_in_kg * 1.e10 unit_t_in_si = Gyr_in_s unit_v_in_si = unit_l_in_si / unit_t_in_si unit_u_in_si = unit_v_in_si**2 +# Total number of particles numPart = numPart_1D**3 #--------------------------------------------------- @@ -87,7 +92,7 @@ for i in range(numPart_1D): coords[index,1] = (j + 0.5) * delta_x coords[index,2] = (k + 0.5) * delta_x T = T_i * (1. / (1. - zfac * cos(k_i * q)))**(2. / 3.) - u[index] = k_in_J_K * T / (gamma - 1.) / mH_in_kg + u[index] = kB_in_SI * T / (gamma - 1.) / mH_in_kg h[index] = 1.2348 * delta_x m[index] = m_i v[index,0] = -H_0 * (1. + z_c) / sqrt(1. + z_i) * sin(k_i * q) / k_i @@ -118,10 +123,6 @@ grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 grp.attrs["Dimension"] = 3 -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = 1 - #Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 100. * unit_l_in_si @@ -141,7 +142,7 @@ grp.create_dataset('ParticleIDs', data=ids, dtype='L') file.close() -import pylab as pl +#import pylab as pl -pl.plot(coords[:,0], v[:,0], "k.") -pl.show() +#pl.plot(coords[:,0], v[:,0], "k.") +#pl.show() diff --git a/examples/ZeldovichPancake_3D/plotSolution.py b/examples/ZeldovichPancake_3D/plotSolution.py index 163e61eb9d3738c39912c8c42c0b6c6bb9990cac..eef247fb761e75f8dde8e8abe84075efbd7cb46a 100644 --- a/examples/ZeldovichPancake_3D/plotSolution.py +++ b/examples/ZeldovichPancake_3D/plotSolution.py @@ -69,6 +69,7 @@ scheme = sim["/HydroScheme"].attrs["Scheme"] kernel = sim["/HydroScheme"].attrs["Kernel function"] neighbours = sim["/HydroScheme"].attrs["Kernel target N_ngb"] eta = sim["/HydroScheme"].attrs["Kernel eta"] +alpha = sim["/HydroScheme"].attrs["Alpha viscosity"] git = sim["Code"].attrs["Git Revision"] # Cosmological parameters @@ -82,7 +83,12 @@ S = sim["/PartType0/Entropy"][:] P = sim["/PartType0/Pressure"][:] rho = sim["/PartType0/Density"][:] m = sim["/PartType0/Masses"][:] -phi = sim["/PartType0/Potential"][:] +try: + phi = sim["/PartType0/Potential"][:] +except KeyError: + # We didn't write the potential, try to go on without + print("Couldn't find potential in your output file") + phi = np.zeros_like(m) x -= 0.5 * boxSize @@ -96,7 +102,7 @@ if os.path.exists(filename_g): rho_g = sim_g["/PartType0/Density"][:] phi_g = sim_g["/PartType0/Potential"][:] a_g = sim_g["/Header"].attrs["Time"] - print "Gadget Scale-factor:", a_g, "redshift:", 1/a_g - 1. + print("Gadget Scale-factor:", a_g, "redshift:", 1/a_g - 1.) x_g -= 0.5 * boxSize else: @@ -138,8 +144,8 @@ if np.size(x_g) > 1: plot(x_g, v_g, 's', color='g', alpha=0.8, lw=1.2, ms=4) plot(x, v, '.', color='r', ms=4.0) plot(x_s, v_s, '--', color='k', alpha=0.8, lw=1.2) -xlabel("${\\rm{Comoving Position}}~x$", labelpad=0) -ylabel("${\\rm{Peculiar Velocity}}~v_x$", labelpad=0) +xlabel("${\\rm{Comoving~position}}~x$", labelpad=0) +ylabel("${\\rm{Peculiar~velocity}}~v_x$", labelpad=0) # Density profile -------------------------------- @@ -148,7 +154,7 @@ if np.size(x_g) > 1: plot(x_g, rho_g/rho_0, 's', color='g', alpha=0.8, lw=1.2, ms=4) plot(x, rho/rho_0, '.', color='r', ms=4.0) plot(x_s, rho_s/rho_0, '--', color='k', alpha=0.8, lw=1.2) -xlabel("${\\rm{Comoving Position}}~x$", labelpad=0) +xlabel("${\\rm{Comoving~position}}~x$", labelpad=0) ylabel("${\\rm{Density}}~\\rho / \\rho_0$", labelpad=0) # Potential profile -------------------------------- @@ -156,7 +162,7 @@ subplot(233) if np.size(x_g) > 1: plot(x_g, phi_g, 's', color='g', alpha=0.8, lw=1.2, ms=4) plot(x, phi, '.', color='r', ms=4.0) -xlabel("${\\rm{Comoving Position}}~x$", labelpad=0) +xlabel("${\\rm{Comoving~position}}~x$", labelpad=0) ylabel("${\\rm{Potential}}~\\phi$", labelpad=0) # Temperature profile ------------------------- @@ -167,19 +173,19 @@ u /= a**(3 * (gas_gamma - 1.)) u_g /= a**(3 * (gas_gamma - 1.)) T = (gas_gamma - 1.) * u * mH_in_kg / k_in_J_K T_g = (gas_gamma - 1.) * u_g * mH_in_kg / k_in_J_K -print "z = {0:.2f}, T_avg = {1:.2f}".format(redshift, T.mean()) +print("z = {0:.2f}, T_avg = {1:.2f}".format(redshift, T.mean())) if np.size(x_g) > 1: plot(x_g, T_g, 's', color='g', alpha=0.8, lw=1.2, ms=4) plot(x, T, '.', color='r', ms=4.0) plot(x_s, T_s, '--', color='k', alpha=0.8, lw=1.2) -xlabel("${\\rm{Comoving Position}}~x$", labelpad=0) +xlabel("${\\rm{Comoving~position}}~x$", labelpad=0) ylabel("${\\rm{Temperature}}~T$", labelpad=0) # Information ------------------------------------- subplot(236, frameon=False) -text(-0.49, 0.9, "Zeldovich pancake with $\\gamma=%.3f$ in 1D at $t=%.2f$"%(gas_gamma,time), fontsize=10) -text(-0.49, 0.8, "$z={0:.2f}$".format(redshift)) +text(-0.49, 0.9, "Zeldovich pancake at z=%.2f "%(redshift), fontsize=10) +text(-0.49, 0.8, "adiabatic index $\\gamma=%.2f$, viscosity $\\alpha=%.2f$"%(gas_gamma, alpha), fontsize=10) plot([-0.49, 0.1], [0.62, 0.62], 'k-', lw=1) text(-0.49, 0.5, "$\\textsc{Swift}$ %s"%git, fontsize=10) text(-0.49, 0.4, scheme, fontsize=10) diff --git a/examples/ZeldovichPancake_3D/run.sh b/examples/ZeldovichPancake_3D/run.sh index 9b6b8166ac0d084898b96e2de6b0fc1ef378aecd..4153f2598f65af54b80b066cf97f76e64c06fce2 100755 --- a/examples/ZeldovichPancake_3D/run.sh +++ b/examples/ZeldovichPancake_3D/run.sh @@ -8,7 +8,7 @@ then fi # Run SWIFT -../swift -a -s -c -G -t 8 zeldovichPancake.yml 2>&1 | tee output.log +../swift --hydro --cosmology --self-gravity --threads=8 zeldovichPancake.yml 2>&1 | tee output.log # Plot the result for i in {0..119} diff --git a/examples/ZeldovichPancake_3D/zeldovichPancake.yml b/examples/ZeldovichPancake_3D/zeldovichPancake.yml index 481432d5875470aa464f69d5aa47fb76328cde7d..a1d2342b56d6816c3cfbe7da70220ab244104fbd 100644 --- a/examples/ZeldovichPancake_3D/zeldovichPancake.yml +++ b/examples/ZeldovichPancake_3D/zeldovichPancake.yml @@ -17,14 +17,15 @@ Cosmology: # Parameters governing the time integration TimeIntegration: dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). + dt_max: 4e-3 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots Snapshots: basename: zeldovichPancake # Common part of the name of output files time_first: 0. # Time of the first output (in internal units) delta_time: 1.04 # Time difference between consecutive outputs (in internal units) - scale_factor_first: 0.00991 + scale_factor_first: 0.00991 + compression: 4 # Parameters governing the conserved quantities statistics Statistics: @@ -38,14 +39,14 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./zeldovichPancake.hdf5 # The file to read - + periodic: 1 + Scheduler: max_top_level_cells: 8 cell_split_size: 50 - tasks_per_cell: 125 Gravity: - mesh_side_length: 16 + mesh_side_length: 32 eta: 0.025 theta: 0.3 r_cut_max: 5. diff --git a/examples/ZoomIn/README b/examples/ZoomIn/README new file mode 100644 index 0000000000000000000000000000000000000000..cffc275f2ae1046156d392f8725a7b542c80471a --- /dev/null +++ b/examples/ZoomIn/README @@ -0,0 +1,16 @@ +Initial conditions for a zoom in cosmological simulation of dwarf +galaxies. These have been generated by MUSIC and ran up to z=0 with +GEAR (see Revaz and Jablonka 2018 for more details on the simulation). + +The cosmology is taken from Planck 2015. + +The initial conditions have been cleaned to contain only the required +fields. The ICs have been created for Gadget and the positions and box +size are hence expressed in h-full units (e.g. box size of 32 / h Mpc). +Similarly, the peculiar velocitites contain an extra sqrt(a) factor. + +We will use SWIFT to cancel the h- and a-factors from the ICs. Gas +particles will be generated at startup. + +MD5 check-sum of the ICS: +9aafe154438478ed435e88664c1c5dba zoom_in.hdf5 diff --git a/examples/ZoomIn/getIC.sh b/examples/ZoomIn/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..6cdfaec981af515249578faa72798c53448e7ecb --- /dev/null +++ b/examples/ZoomIn/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget https://obswww.unige.ch/~lhausamm/swift/IC/ZoomIn/zoom_in.hdf5 diff --git a/examples/ZoomIn/run.sh b/examples/ZoomIn/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d9650dd7783ba90f732c222543c49d13256b4958 --- /dev/null +++ b/examples/ZoomIn/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e zoom_in.hdf5 ] +then + echo "Fetching initial conditions for the zoom in example..." + ./getIC.sh +fi + +../swift --feedback --cosmology --self-gravity --hydro --stars --threads=8 zoom_in.yml 2>&1 | tee output.log + diff --git a/examples/ZoomIn/zoom_in.yml b/examples/ZoomIn/zoom_in.yml new file mode 100644 index 0000000000000000000000000000000000000000..8e5763c4af700b7fd95beb6188ed886198b559b3 --- /dev/null +++ b/examples/ZoomIn/zoom_in.yml @@ -0,0 +1,62 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun in grams + UnitLength_in_cgs: 3.08567758e21 # kpc in centimeters + UnitVelocity_in_cgs: 1e5 # km/s in centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Cosmological parameters +Cosmology: + h: 0.673 # Reduced Hubble constant + a_begin: 0.9873046739 # Initial scale-factor of the simulation + a_end: 1.0 # Final scale factor of the simulation + Omega_m: 0.315 # Matter density parameter + Omega_lambda: 0.685 # Dark-energy density parameter + Omega_b: 0.0486 # Baryon density parameter + +Scheduler: + max_top_level_cells: 8 + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1e-2 # The end time of the simulation (in internal units). + dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: zoom_in # Common part of the name of output files + scale_factor_first: 0.987345 # Scale-factor of the first snaphot (cosmological run) + time_first: 0.01 # Time of the first output (non-cosmological run) (in internal units) + delta_time: 1.01 # Time difference between consecutive outputs (in internal units) + compression: 1 + +# Parameters governing the conserved quantities statistics +Statistics: + scale_factor_first: 0.987345 # Scale-factor of the first stat dump (cosmological run) + time_first: 0.01 # Time of the first stat dump (non-cosmological run) (in internal units) + delta_time: 1.05 # Time between statistics output + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 # Constant dimensionless multiplier for time integration. + theta: 0.7 # Opening angle (Multipole acceptance criterion) + comoving_softening: 0.05 # Comoving softening length (in internal units). + max_physical_softening: 0.01 # Physical softening length (in internal units). + mesh_side_length: 16 + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + minimal_temperature: 100 # (internal units) + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./zoom_in.hdf5 # The file to read + periodic: 1 + cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget + cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget + diff --git a/examples/analyse_tasks.py b/examples/analyse_tasks.py deleted file mode 100755 index a72ee0ce637b6ac2da4b8b95dac5bacab3d40a99..0000000000000000000000000000000000000000 --- a/examples/analyse_tasks.py +++ /dev/null @@ -1,373 +0,0 @@ -#!/usr/bin/env python -""" -Usage: - analyse_tasks.py [options] input.dat - -where input.dat is a thread info file for a step (MPI or non-MPI). Use the -'-y interval' flag of the swift and swift_mpi commands to create these -(you will also need to configure with the --enable-task-debugging option). - -The output is an analysis of the task timings, including deadtime per thread -and step, total amount of time spent for each task type, for the whole step -and per thread and the minimum and maximum times spent per task type. - -This file is part of SWIFT. -Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk) - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published -by the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -""" - -import matplotlib -matplotlib.use("Agg") -import matplotlib.collections as collections -import matplotlib.ticker as plticker -import pylab as pl -import sys -import argparse - -# Handle the command line. -parser = argparse.ArgumentParser(description="Analyse task dumps") - -parser.add_argument("input", help="Thread data file (-y output)") -parser.add_argument("-v", "--verbose", dest="verbose", - help="Verbose output (default: False)", - default=False, action="store_true") -parser.add_argument("-r", "--rank", dest="rank", - help="Rank to process (default: all)", - default="all", action="store") - -args = parser.parse_args() -infile = args.input - -# Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", - "init_grav", "init_grav_out", "ghost_in", "ghost", "ghost_out", "extra_ghost", "drift_part", "drift_gpart", - "end_force", "kick1", "kick2", "timestep", "send", "recv", "grav_long_range", "grav_mm", "grav_down_in", - "grav_down", "grav_mesh", "cooling", "sourceterms", "count"] - -SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", - "tend", "xv", "rho", "gpart", "multipole", "spart", "count"] - -SIDS = ["(-1,-1,-1)", "(-1,-1, 0)", "(-1,-1, 1)", "(-1, 0,-1)", - "(-1, 0, 0)", "(-1, 0, 1)", "(-1, 1,-1)", "(-1, 1, 0)", - "(-1, 1, 1)", "( 0,-1,-1)", "( 0,-1, 0)", "( 0,-1, 1)", - "( 0, 0,-1)"] - -# Read input. -data = pl.loadtxt( infile ) -full_step = data[0,:] - -# Do we have an MPI file? -full_step = data[0,:] -if full_step.size == 13: - print "# MPI mode" - mpimode = True - nranks = int(max(data[:,0])) + 1 - print "# Number of ranks:", nranks - rankcol = 0 - threadscol = 1 - taskcol = 2 - subtaskcol = 3 - ticcol = 5 - toccol = 6 - updates = int(full_step[7]) - g_updates = int(full_step[8]) - s_updates = int(full_step[9]) -else: - print "# non MPI mode" - nranks = 1 - mpimode = False - rankcol = -1 - threadscol = 0 - taskcol = 1 - subtaskcol = 2 - ticcol = 4 - toccol = 5 - updates = int(full_step[6]) - g_updates = int(full_step[7]) - s_updates = int(full_step[8]) - -# Get the CPU clock to convert ticks into milliseconds. -CPU_CLOCK = float(full_step[-1]) / 1000.0 -if args.verbose: - print "# CPU frequency:", CPU_CLOCK * 1000.0 -print "# updates:", updates -print "# g_updates:", g_updates -print "# s_updates:", s_updates - -if mpimode: - if args.rank == "all": - ranks = range(nranks) - else: - ranks = [int(args.rank)] - if ranks[0] >= nranks: - print "Error: maximum rank is " + str(nranks - 1) - sys.exit(1) -else: - ranks = [1] - -maxthread = int(max(data[:,threadscol])) + 1 -print "# Maximum thread id:", maxthread - -# Avoid start and end times of zero. -sdata = data[data[:,ticcol] != 0] -sdata = data[data[:,toccol] != 0] - -# Now we process the required ranks. -for rank in ranks: - if mpimode: - print "# Rank", rank - data = sdata[sdata[:,rankcol] == rank] - full_step = data[0,:] - else: - data = sdata - - # Recover the start and end time - tic_step = int(full_step[ticcol]) - toc_step = int(full_step[toccol]) - data = data[1:,:] - - # Avoid start and end times of zero. - data = data[data[:,ticcol] != 0] - data = data[data[:,toccol] != 0] - - # Calculate the time range. - total_t = (toc_step - tic_step)/ CPU_CLOCK - print "# Data range: ", total_t, "ms" - print - - # Correct times to relative values. - start_t = float(tic_step) - data[:,ticcol] -= start_t - data[:,toccol] -= start_t - end_t = (toc_step - start_t) / CPU_CLOCK - - tasks = {} - tasks[-1] = [] - for i in range(maxthread): - tasks[i] = [] - - # Gather into by thread data. - num_lines = pl.shape(data)[0] - for line in range(num_lines): - thread = int(data[line,threadscol]) - tic = int(data[line,ticcol]) / CPU_CLOCK - toc = int(data[line,toccol]) / CPU_CLOCK - tasktype = int(data[line,taskcol]) - subtype = int(data[line,subtaskcol]) - sid = int(data[line, -1]) - - tasks[thread].append([tic,toc,tasktype,subtype, sid]) - - # Sort by tic and gather used threads. - threadids = [] - for i in range(maxthread): - tasks[i] = sorted(tasks[i], key=lambda task: task[0]) - threadids.append(i) - - # Times per task. - print "# Task times:" - print "# -----------" - print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\ - .format("type/subtype", "count","minimum", "maximum", - "sum", "mean", "percent") - - alltasktimes = {} - sidtimes = {} - for i in threadids: - tasktimes = {} - for task in tasks[i]: - key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]] - dt = task[1] - task[0] - if not key in tasktimes: - tasktimes[key] = [] - tasktimes[key].append(dt) - - if not key in alltasktimes: - alltasktimes[key] = [] - alltasktimes[key].append(dt) - - my_sid = task[4] - if my_sid > -1: - if not my_sid in sidtimes: - sidtimes[my_sid] = [] - sidtimes[my_sid].append(dt) - - print "# Thread : ", i - for key in sorted(tasktimes.keys()): - taskmin = min(tasktimes[key]) - taskmax = max(tasktimes[key]) - tasksum = sum(tasktimes[key]) - print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(key, len(tasktimes[key]), taskmin, taskmax, tasksum, - tasksum / len(tasktimes[key]), tasksum / total_t * 100.0) - print - - print "# All threads : " - for key in sorted(alltasktimes.keys()): - taskmin = min(alltasktimes[key]) - taskmax = max(alltasktimes[key]) - tasksum = sum(alltasktimes[key]) - print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum, - tasksum / len(alltasktimes[key]), - tasksum / (len(threadids) * total_t) * 100.0) - print - - # For pairs, show stuff sorted by SID - print "# By SID (all threads): " - print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\ - .format("Pair/Sub-pair SID", "count","minimum", "maximum", - "sum", "mean", "percent") - - for sid in range(0,13): - if sid in sidtimes: - sidmin = min(sidtimes[sid]) - sidmax = max(sidtimes[sid]) - sidsum = sum(sidtimes[sid]) - sidcount = len(sidtimes[sid]) - sidmean = sidsum / sidcount - else: - sidmin = 0. - sidmax = 0. - sidsum = 0. - sidcount = 0 - sidmean = 0. - print "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}"\ - .format(sid, SIDS[sid], sidcount, sidmin, sidmax, sidsum, - sidmean, sidsum / (len(threadids) * total_t) * 100.0) - print - - # Dead times. - print "# Times not in tasks (deadtimes)" - print "# ------------------------------" - print "# Time before first task:" - print "# no. : {0:>9s} {1:>9s}".format("value", "percent") - predeadtimes = [] - for i in threadids: - if len(tasks[i]) > 0: - predeadtime = tasks[i][0][0] - print "thread {0:2d}: {1:9.4f} {2:9.4f}"\ - .format(i, predeadtime, predeadtime / total_t * 100.0) - predeadtimes.append(predeadtime) - else: - predeadtimes.append(0.0) - - predeadmin = min(predeadtimes) - predeadmax = max(predeadtimes) - predeadsum = sum(predeadtimes) - print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") - print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(predeadtimes), predeadmin, predeadmax, predeadsum, - predeadsum / len(predeadtimes), - predeadsum / (len(threadids) * total_t ) * 100.0) - print - - print "# Time after last task:" - print "# no. : {0:>9s} {1:>9s}".format("value", "percent") - postdeadtimes = [] - for i in threadids: - if len(tasks[i]) > 0: - postdeadtime = total_t - tasks[i][-1][1] - print "thread {0:2d}: {1:9.4f} {2:9.4f}"\ - .format(i, postdeadtime, postdeadtime / total_t * 100.0) - postdeadtimes.append(postdeadtime) - else: - postdeadtimes.append(0.0) - - postdeadmin = min(postdeadtimes) - postdeadmax = max(postdeadtimes) - postdeadsum = sum(postdeadtimes) - print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") - print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(postdeadtimes), postdeadmin, postdeadmax, postdeadsum, - postdeadsum / len(postdeadtimes), - postdeadsum / (len(threadids) * total_t ) * 100.0) - print - - # Time in engine, i.e. from first to last tasks. - print "# Time between tasks (engine deadtime):" - print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") - enginedeadtimes = [] - for i in threadids: - deadtimes = [] - if len(tasks[i]) > 0: - last = tasks[i][0][0] - else: - last = 0.0 - for task in tasks[i]: - dt = task[0] - last - deadtimes.append(dt) - last = task[1] - - # Drop first value, last value already gone. - if len(deadtimes) > 1: - deadtimes = deadtimes[1:] - else: - # Only one or fewer tasks, so no deadtime by definition. - deadtimes = [0.0] - - deadmin = min(deadtimes) - deadmax = max(deadtimes) - deadsum = sum(deadtimes) - print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(i, len(deadtimes), deadmin, deadmax, deadsum, - deadsum / len(deadtimes), deadsum / total_t * 100.0) - enginedeadtimes.extend(deadtimes) - - deadmin = min(enginedeadtimes) - deadmax = max(enginedeadtimes) - deadsum = sum(enginedeadtimes) - print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(enginedeadtimes), deadmin, deadmax, deadsum, - deadsum / len(enginedeadtimes), - deadsum / (len(threadids) * total_t ) * 100.0) - print - - # All times in step. - print "# All deadtimes:" - print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") - alldeadtimes = [] - for i in threadids: - deadtimes = [] - last = 0 - for task in tasks[i]: - dt = task[0] - last - deadtimes.append(dt) - last = task[1] - dt = total_t - last - deadtimes.append(dt) - - deadmin = min(deadtimes) - deadmax = max(deadtimes) - deadsum = sum(deadtimes) - print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(i, len(deadtimes), deadmin, deadmax, deadsum, - deadsum / len(deadtimes), deadsum / total_t * 100.0) - alldeadtimes.extend(deadtimes) - - deadmin = min(alldeadtimes) - deadmax = max(alldeadtimes) - deadsum = sum(alldeadtimes) - print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(alldeadtimes), deadmin, deadmax, deadsum, - deadsum / len(alldeadtimes), - deadsum / (len(threadids) * total_t ) * 100.0) - print - -sys.exit(0) diff --git a/examples/check_ngbs.py b/examples/check_ngbs.py deleted file mode 100644 index a4a07ce7bd6ffb817e8106b74d9895a0edbceca7..0000000000000000000000000000000000000000 --- a/examples/check_ngbs.py +++ /dev/null @@ -1,321 +0,0 @@ -import h5py as h -import numpy as np -import matplotlib -matplotlib.use("Agg") -from pylab import * -import os.path - -kernel_gamma = 1.825742 -kernel_gamma2 = kernel_gamma * kernel_gamma -kernel_gamma_dim = np.power(kernel_gamma,3) -hydro_dimension_unit_sphere = 4. * np.pi / 3. -kernel_norm = hydro_dimension_unit_sphere * kernel_gamma_dim -error = False - -inputFile1 = "" -inputFile2 = "" - -# Compare the values of two floats -def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): - return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) - -# Check list of density neighbours and check that they are correct. -def check_density_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, - h_naive, h_sort, num_invalid, acc): - - for k in range(0,num_invalid): - - # Filter neighbour lists for valid particle ids - filter_neigh_naive = [i for i in ngb_ids_naive[mask][k] if i > -1] - filter_neigh_sort = [i for i in ngb_ids_sort[mask][k] if i > -1] - - # Check neighbour lists for differences - id_list = set(filter_neigh_naive).symmetric_difference(set(filter_neigh_sort)) - - # Check for duplicate IDs - duplicate_check_naive = len(filter_neigh_naive) != len(set(filter_neigh_naive)) - duplicate_check_sort = len(filter_neigh_sort) != len(set(filter_neigh_sort)) - - if duplicate_check_naive: - print "Duplicate neighbour ID found in: ", inputFile1 - print filter_neigh_naive - return True - - if duplicate_check_sort: - print "Duplicate neighbour ID found in: ", inputFile2 - print filter_neigh_sort - return True - - pid = pids[mask][k] - - # Loop over discrepancies and check if they are actually neighbours - for pjd in id_list: - pi_pos = pos[np.where(pids == pid)] - pj_pos = pos[np.where(pids == pjd)] - - hi = h_naive[np.where(pids == pid)] - - dx = pi_pos[0][0] - pj_pos[0][0] - dy = pi_pos[0][1] - pj_pos[0][1] - dz = pi_pos[0][2] - pj_pos[0][2] - - # Correct for BCs - dx = nearest(dx) - dy = nearest(dy) - dz = nearest(dz) - - r2 = dx*dx + dy*dy + dz*dz - - hig2 = hi*hi*kernel_gamma2 - - diff = abs(r2 - hig2) - - print "Particle {} is missing {}, hig2: {}, r2: {}, |r2 - hig2|: {}".format(pid,pjd,hig2, r2, diff) - - if diff < acc * hig2: - print "Missing interaction due to precision issue will be ignored." - else: - hi_2 = h_sort[np.where(pids == pid)] - - # If a neigbour is missing and the particle has the same h throw - # an error. - if(isclose(hi,hi_2)): - print "Missing interaction found but particle has the same smoothing length (hi_1: %e, hi_2: %e)."%(hi, hi_2) - return True - else: - print "Missing interaction due to different smoothing lengths will be ignored (hi_1: %e, hi_2: %e)."%(hi, hi_2) - - return False - -# Check list of force neighbours and check that they are correct. -def check_force_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, - h_naive, h_sort, num_invalid, acc): - - error_val = False - - for k in range(0,num_invalid): - - # Filter neighbour lists for valid particle ids - filter_neigh_naive = [i for i in ngb_ids_naive[mask][k] if i > -1] - filter_neigh_sort = [i for i in ngb_ids_sort[mask][k] if i > -1] - - # Check neighbour lists for differences - id_list = set(filter_neigh_naive).symmetric_difference(set(filter_neigh_sort)) - - pid = pids[mask][k] - - # Loop over discrepancies and check if they are actually neighbours - for pjd in id_list: - pi_pos = pos[np.where(pids == pid)] - pj_pos = pos[np.where(pids == pjd)] - - hi = h_naive[np.where(pids == pid)] - hj = h_naive[np.where(pids == pjd)] - - dx = pi_pos[0][0] - pj_pos[0][0] - dy = pi_pos[0][1] - pj_pos[0][1] - dz = pi_pos[0][2] - pj_pos[0][2] - - # Correct for BCs - dx = nearest(dx) - dy = nearest(dy) - dz = nearest(dz) - - r2 = dx*dx + dy*dy + dz*dz - - hig2 = hi*hi*kernel_gamma2 - hjg2 = hj*hj*kernel_gamma2 - - diff = abs(r2 - max(hig2, hjg2)) - - print "Particle {} is missing {}, hig2: {}, hjg2: {}, r2: {}, |r2 - max(hig2,hjg2)|: {}".format(pid,pjd,hig2, hjg2, r2, diff) - - if diff < acc * max(hig2,hjg2): - print "Missing interaction due to precision issue will be ignored." - else: - hi_2 = h_sort[np.where(pids == pid)] - if(isclose(hi,hi_2)): - print "Missing interaction due to the same smoothing lengths will not be ignored (hi_1: %e, hi_2: %e)."%(hi, hi_2) - error_val = True - else: - print "Missing interaction due to different smoothing lengths will be ignored (hi_1: %e, hi_2: %e)."%(hi, hi_2) - - return error_val - -def nearest(dx): - if(dx > 0.5 * box_size): - return dx - box_size - elif(dx < -0.5 * box_size): - return dx + box_size - else: - return dx - -# Parse command line arguments -if len(sys.argv) < 3: - print "Error: pass input files as arguments" - sys.exit() -else: - inputFile1 = sys.argv[1] - inputFile2 = sys.argv[2] - if os.path.exists(inputFile1) != 1: - print "\n{} does not exist!\n".format(inputFile1) - sys.exit() - if os.path.exists(inputFile2) != 1: - print "\n{} does not exist!\n".format(inputFile2) - sys.exit() - -# Open input files -file_naive = h.File(inputFile1, "r") -file_sort = h.File(inputFile2, "r") - -box_size = file_naive["/Header"].attrs["BoxSize"][0] - -# Read input file fields -ids_naive = file_naive["/PartType0/ParticleIDs"][:] -ids_sort = file_sort["/PartType0/ParticleIDs"][:] - -h_naive = file_naive["/PartType0/SmoothingLength"][:] -h_sort = file_sort["/PartType0/SmoothingLength"][:] - -pos_naive = file_naive["/PartType0/Coordinates"][:,:] -#pos_sort = file_sort["/PartType0/Coordinates"][:,:] - -num_density_naive = file_naive["/PartType0/Num_ngb_density"][:] -num_density_sort = file_sort["/PartType0/Num_ngb_density"][:] - -num_force_naive = file_naive["/PartType0/Num_ngb_force"][:] -num_force_sort = file_sort["/PartType0/Num_ngb_force"][:] - -neighbour_ids_density_naive = file_naive["/PartType0/Ids_ngb_density"][:] -neighbour_ids_density_sort = file_sort["/PartType0/Ids_ngb_density"][:] - -neighbour_ids_force_naive = file_naive["/PartType0/Ids_ngb_force"][:] -neighbour_ids_force_sort = file_sort["/PartType0/Ids_ngb_force"][:] - - -#wcount_naive = file_naive["/PartType0/Wcount"][:] -#wcount_sort = file_sort["/PartType0/Wcount"][:] -# -#wcount_naive = wcount_naive * np.power(h_naive,3) * kernel_norm -#wcount_sort = wcount_sort * np.power(h_sort,3) * kernel_norm - -# Cross check -max_density_ngbs_naive = np.max(num_density_naive) -max_density_ngbs_sort = np.max(num_density_sort) -max_force_ngbs_naive = np.max(num_force_naive) -max_force_ngbs_sort = np.max(num_force_sort) - -print " Min Mean Max " -print " ---------------------" -print "Ngbs density naiv: ", np.min(num_density_naive), np.mean(num_density_naive), max_density_ngbs_naive -print "Ngbs density sort: ", np.min(num_density_sort), np.mean(num_density_sort), max_density_ngbs_sort -print "Ngbs force naiv: ", np.min(num_force_naive), np.mean(num_force_naive), max_force_ngbs_naive -print "Ngbs force sort: ", np.min(num_force_sort), np.mean(num_force_sort), max_force_ngbs_sort -#print "Wcount naiv: ", np.min(wcount_naive), np.mean(wcount_naive), np.max(wcount_naive) -#print "Wcount sort: ", np.min(wcount_sort), np.mean(wcount_sort), np.max(wcount_sort) - -# Sort -index_naive = np.argsort(ids_naive) -index_sort = np.argsort(ids_sort) - -num_density_naive = num_density_naive[index_naive] -num_density_sort = num_density_sort[index_sort] -num_force_naive = num_force_naive[index_naive] -num_force_sort = num_force_sort[index_sort] -ids_naive = ids_naive[index_naive] -ids_sort = ids_sort[index_sort] -neighbour_ids_density_naive = neighbour_ids_density_naive[index_naive] -neighbour_ids_density_sort = neighbour_ids_density_sort[index_sort] -neighbour_ids_force_naive = neighbour_ids_force_naive[index_naive] -neighbour_ids_force_sort = neighbour_ids_force_sort[index_sort] -#wcount_naive = wcount_naive[index_naive] -#wcount_sort = wcount_sort[index_sort] -h_naive = h_naive[index_naive] -h_sort = h_sort[index_sort] -pos_naive = pos_naive[index_naive] -#pos_sort = pos_sort[index_sort] - -neighbour_length_naive = len(neighbour_ids_density_naive[0]) -neighbour_length_sort = len(neighbour_ids_density_sort[0]) - -# Check that input files are logging the same number of neighbours -if neighbour_length_naive != neighbour_length_sort: - print "Input files have logged different numbers of neighbour lengths!" - print "{} has logged: {} neighbours".format(inputFile1, neighbour_length_naive) - print "{} has logged: {} neighbours".format(inputFile2, neighbour_length_sort) - exit(1) - -if (max_density_ngbs_naive > neighbour_length_naive or max_force_ngbs_naive > neighbour_length_naive or - max_density_ngbs_sort > neighbour_length_sort or max_force_ngbs_sort > neighbour_length_sort): - print "The number of neighbours has exceeded the number of neighbours logged." - print "Modify NUM_OF_NEIGHBOURS in hydro_part.h to log more neighbours." - print "The highest neighbour count is: ", max(max_density_ngbs_naive,max_force_ngbs_naive, max_density_ngbs_sort,max_force_ngbs_sort) - exit(1) - -# First check -print "\n Min Max" -print " ----------" -print "Differences for density: ", min(num_density_naive - num_density_sort), max(num_density_naive - num_density_sort) -print "Differences for force: ", min(num_force_naive - num_force_sort), max(num_force_naive - num_force_sort) - -# Get the IDs that are different -mask_density = num_density_naive != num_density_sort -mask_force = num_force_naive != num_force_sort -num_invalid_density = np.sum(mask_density) -num_invalid_force = np.sum(mask_force) - -print "\nNum non-zero density: ", num_invalid_density -print "Num non-zero force: ", num_invalid_force - -print "\nParticle IDs with incorrect densities" -print "----------------------------------------" -print ids_naive[mask_density] - -# Check density neighbour lists -error += check_density_neighbours(ids_naive, neighbour_ids_density_naive, - neighbour_ids_density_sort, mask_density, pos_naive, h_naive, h_sort, - num_invalid_density, 2e-6) - -print "Num of density interactions", inputFile1 -print num_density_naive[mask_density] - -print "Num of density interactions", inputFile2 -print num_density_sort[mask_density] - -print "\nParticle IDs with incorrect forces" -print "------------------------------------" -print ids_naive[mask_force] - -# Check force neighbour lists -error += check_force_neighbours(ids_naive, neighbour_ids_force_naive, - neighbour_ids_force_sort, mask_force, pos_naive, h_naive, h_sort, - num_invalid_force, 2e-6) - -print "Num of force interactions", inputFile1 -print num_force_naive[mask_force] - -#print "Smoothing lengths", inputFile1 -#print h_naive[mask_force] - -print "Num of force interactions", inputFile2 -print num_force_sort[mask_force] - -#print "Smoothing lengths", inputFile2 -#print h_sort[mask_force] - -# Statistics of h difference -h_relative = (h_naive - h_sort) / h_naive -print "h statistics: {} {} (Min, 1st Percentile)".format(np.min(h_relative), np.percentile(h_relative,1)) -print "h statistics: {} {} (Mean, Median)".format(np.mean(h_relative), np.median(h_relative)) -print "h statistics: {} {} (Max, 99th Percentile)".format(np.max(h_relative), np.percentile(h_relative, 99)) - -if error: - print "\n------------------" - print "Differences found." - print "------------------" - exit(1) -else: - print "\n---------------------" - print "No differences found." - print "---------------------" - exit(0) diff --git a/examples/getCoolingTable.sh b/examples/getCoolingTable.sh old mode 100644 new mode 100755 diff --git a/examples/main.c b/examples/main.c index 773accc461fa26c819f0b603e7de689fa50e6cc6..93074a637e7427ac7bec53c51d7e6e608b6cdff2 100644 --- a/examples/main.c +++ b/examples/main.c @@ -41,6 +41,7 @@ #endif /* Local headers. */ +#include "argparse.h" #include "swift.h" /* Engine policy flags. */ @@ -51,69 +52,27 @@ /* Global profiler. */ struct profiler prof; -/** - * @brief Help messages for the command line parameters. - */ -void print_help_message(void) { - - printf("\nUsage: swift [OPTION]... PARAMFILE\n"); - printf(" swift_mpi [OPTION]... PARAMFILE\n\n"); - - printf("Valid options are:\n"); - printf(" %2s %14s %s\n", "-a", "", "Pin runners using processor affinity."); - printf(" %2s %14s %s\n", "-c", "", - "Run with cosmological time integration."); - printf(" %2s %14s %s\n", "-C", "", "Run with cooling."); - printf( - " %2s %14s %s\n", "-d", "", - "Dry run. Read the parameter file, allocate memory but does not read "); - printf( - " %2s %14s %s\n", "", "", - "the particles from ICs and exit before the start of time integration."); - printf(" %2s %14s %s\n", "", "", - "Allows user to check validy of parameter and IC files as well as " - "memory limits."); - printf(" %2s %14s %s\n", "-D", "", - "Always drift all particles even the ones far from active particles. " - "This emulates"); - printf(" %2s %14s %s\n", "", "", - "Gadget-[23] and GIZMO's default behaviours."); - printf(" %2s %14s %s\n", "-e", "", - "Enable floating-point exceptions (debugging mode)."); - printf(" %2s %14s %s\n", "-f", "{int}", - "Overwrite the CPU frequency (Hz) to be used for time measurements."); - printf(" %2s %14s %s\n", "-g", "", - "Run with an external gravitational potential."); - printf(" %2s %14s %s\n", "-G", "", "Run with self-gravity."); - printf(" %2s %14s %s\n", "-M", "", - "Reconstruct the multipoles every time-step."); - printf(" %2s %14s %s\n", "-n", "{int}", - "Execute a fixed number of time steps. When unset use the time_end " - "parameter to stop."); - printf(" %2s %14s %s\n", "-o", "{str}", - "Generate a default output parameter file."); - printf(" %2s %14s %s\n", "-P", "{sec:par:val}", - "Set parameter value and overwrites values read from the parameters " - "file. Can be used more than once."); - printf(" %2s %14s %s\n", "-r", "", "Continue using restart files."); - printf(" %2s %14s %s\n", "-s", "", "Run with hydrodynamics."); - printf(" %2s %14s %s\n", "-S", "", "Run with stars."); - printf(" %2s %14s %s\n", "-t", "{int}", - "The number of threads to use on each MPI rank. Defaults to 1 if not " - "specified."); - printf(" %2s %14s %s\n", "-T", "", "Print timers every time-step."); - printf(" %2s %14s %s\n", "-v", "[12]", "Increase the level of verbosity:"); - printf(" %2s %14s %s\n", "", "", "1: MPI-rank 0 writes,"); - printf(" %2s %14s %s\n", "", "", "2: All MPI-ranks write."); - printf(" %2s %14s %s\n", "-x", "", "Run with structure finding."); - printf(" %2s %14s %s\n", "-y", "{int}", - "Time-step frequency at which task graphs are dumped."); - printf(" %2s %14s %s\n", "-Y", "{int}", - "Time-step frequency at which threadpool tasks are dumped."); - printf(" %2s %14s %s\n", "-h", "", "Print this help message and exit."); - printf( - "\nSee the file parameter_example.yml for an example of " - "parameter file.\n"); +// Usage string. +static const char *const swift_usage[] = { + "swift [options] [[--] param-file]", + "swift [options] param-file", + "swift_mpi [options] [[--] param-file]", + "swift_mpi [options] param-file", + NULL, +}; + +// Function to handle multiple -P arguments. +struct cmdparams { + const char *param[PARSER_MAX_NO_OF_PARAMS]; + int nparam; +}; + +static int handle_cmdparam(struct argparse *self, + const struct argparse_option *opt) { + struct cmdparams *cmdps = (struct cmdparams *)opt->data; + cmdps->param[cmdps->nparam] = *(char **)opt->value; + cmdps->nparam++; + return 1; } /** @@ -135,6 +94,7 @@ int main(int argc, char *argv[]) { struct gpart *gparts = NULL; struct gravity_props gravity_properties; struct hydro_props hydro_properties; + struct stars_props stars_properties; struct part *parts = NULL; struct phys_const prog_const; struct sourceterms sourceterms; @@ -192,6 +152,7 @@ int main(int argc, char *argv[]) { int with_self_gravity = 0; int with_hydro = 0; int with_stars = 0; + int with_feedback = 0; int with_fp_exceptions = 0; int with_drift_all = 0; int with_mpole_reconstruction = 0; @@ -199,188 +160,185 @@ int main(int argc, char *argv[]) { int verbose = 0; int nr_threads = 1; int with_verbose_timers = 0; - int nparams = 0; - char output_parameters_filename[200] = ""; - char *cmdparams[PARSER_MAX_NO_OF_PARAMS]; - char paramFileName[200] = ""; + char *output_parameters_filename = NULL; + char *cpufreqarg = NULL; + char *param_filename = NULL; char restart_file[200] = ""; unsigned long long cpufreq = 0; + struct cmdparams cmdps; + cmdps.nparam = 0; + cmdps.param[0] = NULL; + char *buffer = NULL; + + /* Parse the command-line parameters. */ + struct argparse_option options[] = { + OPT_HELP(), + + OPT_GROUP(" Simulation options:"), + OPT_BOOLEAN('b', "feedback", &with_feedback, "Run with stars feedback", + NULL, 0, 0), + OPT_BOOLEAN('c', "cosmology", &with_cosmology, + "Run with cosmological time integration.", NULL, 0, 0), + OPT_BOOLEAN('C', "cooling", &with_cooling, "Run with cooling", NULL, 0, + 0), + OPT_BOOLEAN('D', "drift-all", &with_drift_all, + "Always drift all particles even the ones far from active " + "particles. This emulates Gadget-[23] and GIZMO's default " + "behaviours.", + NULL, 0, 0), + OPT_BOOLEAN('F', "sourceterms", &with_sourceterms, "", NULL, 0, 0), + OPT_BOOLEAN('g', "external-gravity", &with_external_gravity, + "Run with an external gravitational potential.", NULL, 0, 0), + OPT_BOOLEAN('G', "self-gravity", &with_self_gravity, + "Run with self-gravity.", NULL, 0, 0), + OPT_BOOLEAN('M', "multipole-reconstruction", &with_mpole_reconstruction, + "Reconstruct the multipoles every time-step.", NULL, 0, 0), + OPT_BOOLEAN('s', "hydro", &with_hydro, "Run with hydrodynamics.", NULL, 0, + 0), + OPT_BOOLEAN('S', "stars", &with_stars, "Run with stars", NULL, 0, 0), + OPT_BOOLEAN('x', "velociraptor", &with_structure_finding, + "Run with structure finding", NULL, 0, 0), + + OPT_GROUP(" Control options:"), + OPT_BOOLEAN('a', "pin", &with_aff, + "Pin runners using processor affinity.", NULL, 0, 0), + OPT_BOOLEAN('d', "dry-run", &dry_run, + "Dry run. Read the parameter file, allocates memory but does " + "not read the particles from ICs. Exits before the start of " + "time integration. Checks the validity of parameters and IC " + "files as well as memory limits.", + NULL, 0, 0), + OPT_BOOLEAN('e', "fpe", &with_fp_exceptions, + "Enable floating-point exceptions (debugging mode).", NULL, 0, + 0), + OPT_STRING('f', "cpu-frequency", &cpufreqarg, + "Overwrite the CPU " + "frequency (Hz) to be used for time measurements.", + NULL, 0, 0), + OPT_INTEGER('n', "steps", &nsteps, + "Execute a fixed number of time steps. When unset use the " + "time_end parameter to stop.", + NULL, 0, 0), + OPT_STRING('o', "output-params", &output_parameters_filename, + "Generate a default output parameter file.", NULL, 0, 0), + OPT_STRING('P', "param", &buffer, + "Set parameter value, overiding the value read from the " + "parameter file. Can be used more than once {sec:par:value}.", + handle_cmdparam, (intptr_t)&cmdps, 0), + OPT_BOOLEAN('r', "restart", &restart, "Continue using restart files.", + NULL, 0, 0), + OPT_INTEGER('t', "threads", &nr_threads, + "The number of threads to use on each MPI rank. Defaults to " + "1 if not specified.", + NULL, 0, 0), + OPT_INTEGER('T', "timers", &with_verbose_timers, + "Print timers every time-step.", NULL, 0, 0), + OPT_INTEGER('v', "verbose", &verbose, + "Run in verbose mode, in MPI mode 2 outputs from all ranks.", + NULL, 0, 0), + OPT_INTEGER('y', "task-dumps", &dump_tasks, + "Time-step frequency at which task graphs are dumped.", NULL, + 0, 0), + OPT_INTEGER('Y', "threadpool-dumps", &dump_threadpool, + "Time-step frequency at which threadpool tasks are dumped.", + NULL, 0, 0), + OPT_END(), + }; + struct argparse argparse; + argparse_init(&argparse, options, swift_usage, 0); + argparse_describe(&argparse, "\nParameters:", + "\nSee the file examples/parameter_example.yml for an " + "example of parameter file."); + int nargs = argparse_parse(&argparse, argc, (const char **)argv); + + /* Need a parameter file. */ + if (nargs != 1) { + if (myrank == 0) argparse_usage(&argparse); + printf("\nError: no parameter file was supplied.\n"); + return 1; + } + param_filename = argv[0]; - /* Parse the parameters */ - int c; - while ((c = getopt(argc, argv, "acCdDef:FgGhMn:o:P:rsSt:Tv:xy:Y:")) != -1) - switch (c) { - case 'a': -#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) - with_aff = 1; -#else - error("Need NUMA support for thread affinity"); + /* Checks of options. */ +#if !defined(HAVE_SETAFFINITY) || !defined(HAVE_LIBNUMA) + if (with_aff) { + printf("Error: no NUMA support for thread affinity\n"); + return 1; + } #endif - break; - case 'c': - with_cosmology = 1; - break; - case 'C': - with_cooling = 1; - break; - case 'd': - dry_run = 1; - break; - case 'D': - with_drift_all = 1; - break; - case 'e': -#ifdef HAVE_FE_ENABLE_EXCEPT - with_fp_exceptions = 1; -#else - error("Need support for floating point exception on this platform"); + +#ifndef HAVE_FE_ENABLE_EXCEPT + if (with_fp_exceptions) { + printf("Error: no support for floating point exceptions\n"); + return 1; + } #endif - break; - case 'f': - if (sscanf(optarg, "%llu", &cpufreq) != 1) { - if (myrank == 0) printf("Error parsing CPU frequency (-f).\n"); - if (myrank == 0) print_help_message(); - return 1; - } - break; - case 'F': - with_sourceterms = 1; - break; - case 'g': - with_external_gravity = 1; - break; - case 'G': - with_self_gravity = 1; - break; - case 'h': - if (myrank == 0) print_help_message(); - return 0; - case 'M': - with_mpole_reconstruction = 1; - break; - case 'n': - if (sscanf(optarg, "%d", &nsteps) != 1) { - if (myrank == 0) printf("Error parsing fixed number of steps.\n"); - if (myrank == 0) print_help_message(); - return 1; - } - break; - case 'o': - if (sscanf(optarg, "%s", output_parameters_filename) != 1) { - if (myrank == 0) { - printf("Error parsing output fields filename"); - print_help_message(); - } - return 1; - } - break; - case 'P': - cmdparams[nparams] = optarg; - nparams++; - break; - case 'r': - restart = 1; - break; - case 's': - with_hydro = 1; - break; - case 'S': - with_stars = 1; - break; - case 't': - if (sscanf(optarg, "%d", &nr_threads) != 1) { - if (myrank == 0) - printf("Error parsing the number of threads (-t).\n"); - if (myrank == 0) print_help_message(); - return 1; - } - break; - case 'T': - with_verbose_timers = 1; - break; - case 'v': - if (sscanf(optarg, "%d", &verbose) != 1) { - if (myrank == 0) printf("Error parsing verbosity level (-v).\n"); - if (myrank == 0) print_help_message(); - return 1; - } - break; - case 'x': -#ifdef HAVE_VELOCIRAPTOR - with_structure_finding = 1; -#else - error( - "Error: (-x) needs to have the code compiled with VELOCIraptor " - "linked in."); + +#ifndef HAVE_VELOCIRAPTOR + if (with_structure_finding) { + printf("Error: VELOCIraptor is not available\n"); + return 1; + } #endif - break; - case 'y': - if (sscanf(optarg, "%d", &dump_tasks) != 1) { - if (myrank == 0) printf("Error parsing dump_tasks (-y). \n"); - if (myrank == 0) print_help_message(); - return 1; - } + #ifndef SWIFT_DEBUG_TASKS - if (dump_tasks) { - error( - "Task dumping is only possible if SWIFT was configured with the " - "--enable-task-debugging option."); - } + if (dump_tasks) { + printf( + "Error: task dumping is only possible if SWIFT was configured" + " with the --enable-task-debugging option.\n"); + return 1; + } #endif - break; - case 'Y': - if (sscanf(optarg, "%d", &dump_threadpool) != 1) { - if (myrank == 0) printf("Error parsing dump_threadpool (-Y). \n"); - if (myrank == 0) print_help_message(); - return 1; - } + #ifndef SWIFT_DEBUG_THREADPOOL - if (dump_threadpool) { - error( - "Threadpool dumping is only possible if SWIFT was configured " - "with the " - "--enable-threadpool-debugging option."); - } + if (dump_threadpool) { + printf( + "Error: threadpool dumping is only possible if SWIFT was " + "configured with the --enable-threadpool-debugging option.\n"); + return 1; + } #endif - break; - case '?': - if (myrank == 0) print_help_message(); - return 1; - break; + + /* The CPU frequency is a long long, so we need to parse that ourselves. */ + if (cpufreqarg != NULL) { + if (sscanf(cpufreqarg, "%llu", &cpufreq) != 1) { + if (myrank == 0) + printf("Error parsing CPU frequency (%s).\n", cpufreqarg); + return 1; } + } /* Write output parameter file */ - if (myrank == 0 && strcmp(output_parameters_filename, "") != 0) { + if (myrank == 0 && output_parameters_filename != NULL) { io_write_output_field_parameter(output_parameters_filename); printf("End of run.\n"); return 0; } - /* check inputs */ - if (optind == argc - 1) { - if (!strcpy(paramFileName, argv[optind++])) - error("Error reading parameter file name."); - } else if (optind > argc - 1) { - if (myrank == 0) printf("Error: A parameter file name must be provided\n"); - if (myrank == 0) print_help_message(); - return 1; - } else { - if (myrank == 0) printf("Error: Too many parameters given\n"); - if (myrank == 0) print_help_message(); - return 1; - } if (!with_self_gravity && !with_hydro && !with_external_gravity) { - if (myrank == 0) - printf("Error: At least one of -s, -g or -G must be chosen.\n"); - if (myrank == 0) print_help_message(); + if (myrank == 0) { + argparse_usage(&argparse); + printf("\nError: At least one of -s, -g or -G must be chosen.\n"); + } return 1; } if (with_stars && !with_external_gravity && !with_self_gravity) { - if (myrank == 0) + if (myrank == 0) { + argparse_usage(&argparse); printf( - "Error: Cannot process stars without gravity, -g or -G must be " + "\nError: Cannot process stars without gravity, -g or -G " + "must be chosen.\n"); + } + return 1; + } + + if (!with_stars && with_feedback) { + if (myrank == 0) { + argparse_usage(&argparse); + printf( + "\nError: Cannot process feedback without stars, -S must be " "chosen.\n"); - if (myrank == 0) print_help_message(); + } return 1; } @@ -470,15 +428,16 @@ int main(int argc, char *argv[]) { (struct swift_params *)malloc(sizeof(struct swift_params)); if (params == NULL) error("Error allocating memory for the parameter file."); if (myrank == 0) { - message("Reading runtime parameters from file '%s'", paramFileName); - parser_read_file(paramFileName, params); + message("Reading runtime parameters from file '%s'", param_filename); + parser_read_file(param_filename, params); /* Handle any command-line overrides. */ - if (nparams > 0) { + if (cmdps.nparam > 0) { message( "Overwriting values read from the YAML file with command-line " "values."); - for (int k = 0; k < nparams; k++) parser_set_param(params, cmdparams[k]); + for (int k = 0; k < cmdps.nparam; k++) + parser_set_param(params, cmdps.param[k]); } } #ifdef WITH_MPI @@ -486,6 +445,21 @@ int main(int argc, char *argv[]) { MPI_Bcast(params, sizeof(struct swift_params), MPI_BYTE, 0, MPI_COMM_WORLD); #endif + /* Temporary early aborts for modes not supported over MPI. */ +#ifdef WITH_MPI + if (with_mpole_reconstruction && nr_nodes > 1) + error("Cannot reconstruct m-poles every step over MPI (yet)."); +#endif + +#ifdef WITH_MPI + if (with_feedback) error("Can't run with feedback over MPI (yet)."); +#endif + +#if defined(WITH_MPI) && defined(HAVE_VELOCIRAPTOR) + if (with_structure_finding && nr_nodes > 1) + error("VEOCIraptor not yet enabled over MPI."); +#endif + /* Check that we can write the snapshots by testing if the output * directory exists and is searchable and writable. */ char basename[PARSER_MAX_LINE_SIZE]; @@ -516,12 +490,20 @@ int main(int argc, char *argv[]) { /* Let's report what we did */ if (myrank == 0) { - message("Using initial partition %s", +#if defined(HAVE_PARMETIS) + if (reparttype.usemetis) + message("Using METIS serial partitioning:"); + else + message("Using ParMETIS partitioning:"); +#else + message("Using METIS serial partitioning:"); +#endif + message(" initial partitioning: %s", initial_partition_name[initial_partition.type]); if (initial_partition.type == INITPART_GRID) - message("grid set to [ %i %i %i ].", initial_partition.grid[0], + message(" grid set to [ %i %i %i ].", initial_partition.grid[0], initial_partition.grid[1], initial_partition.grid[2]); - message("Using %s repartitioning", repartition_name[reparttype.type]); + message(" repartitioning: %s", repartition_name[reparttype.type]); } #endif @@ -555,9 +537,23 @@ int main(int argc, char *argv[]) { /* How often to check for the stop file and dump restarts and exit the * application. */ - int restart_stop_steps = + const int restart_stop_steps = parser_get_opt_param_int(params, "Restarts:stop_steps", 100); + /* Get the maximal wall-clock time of this run */ + const float restart_max_hours_runtime = + parser_get_opt_param_float(params, "Restarts:max_run_time", FLT_MAX); + + /* Do we want to resubmit when we hit the limit? */ + const int resubmit_after_max_hours = + parser_get_opt_param_int(params, "Restarts:resubmit_on_exit", 0); + + /* What command should we run to resubmit at the end? */ + char resubmit_command[PARSER_MAX_LINE_SIZE]; + if (resubmit_after_max_hours) + parser_get_param_string(params, "Restarts:resubmit_command", + resubmit_command); + /* If restarting, look for the restart files. */ if (restart) { @@ -634,7 +630,7 @@ int main(int argc, char *argv[]) { /* Initialize unit system and constants */ units_init_from_params(&us, params, "InternalUnitSystem"); phys_const_init(&us, params, &prog_const); - if (myrank == 0 && verbose > 0) { + if (myrank == 0) { message("Internal unit system: U_M = %e g.", us.UnitMass_in_cgs); message("Internal unit system: U_L = %e cm.", us.UnitLength_in_cgs); message("Internal unit system: U_t = %e s.", us.UnitTime_in_cgs); @@ -643,25 +639,11 @@ int main(int argc, char *argv[]) { phys_const_print(&prog_const); } - /* Initialise the cosmology */ - if (with_cosmology) - cosmology_init(params, &us, &prog_const, &cosmo); - else - cosmology_init_no_cosmo(&cosmo); - if (myrank == 0 && with_cosmology) cosmology_print(&cosmo); - - /* Initialise the hydro properties */ - if (with_hydro) - hydro_props_init(&hydro_properties, &prog_const, &us, params); - if (with_hydro) eos_init(&eos, &prog_const, &us, params); - - /* Initialise the gravity properties */ - if (with_self_gravity) - gravity_props_init(&gravity_properties, params, &cosmo, with_cosmology); - - /* Read particles and space information from (GADGET) ICs */ + /* Read particles and space information from ICs */ char ICfileName[200] = ""; parser_get_param_string(params, "InitialConditions:file_name", ICfileName); + const int periodic = + parser_get_param_int(params, "InitialConditions:periodic"); const int replicate = parser_get_opt_param_int(params, "InitialConditions:replicate", 1); clean_smoothing_length_values = parser_get_opt_param_int( @@ -672,10 +654,47 @@ int main(int argc, char *argv[]) { params, "InitialConditions:cleanup_velocity_factors", 0); const int generate_gas_in_ics = parser_get_opt_param_int( params, "InitialConditions:generate_gas_in_ics", 0); + + /* Some checks that we are not doing something stupid */ if (generate_gas_in_ics && flag_entropy_ICs) error("Can't generate gas if the entropy flag is set in the ICs."); if (generate_gas_in_ics && !with_cosmology) error("Can't generate gas if the run is not cosmological."); + + /* Initialise the cosmology */ + if (with_cosmology) + cosmology_init(params, &us, &prog_const, &cosmo); + else + cosmology_init_no_cosmo(&cosmo); + if (myrank == 0 && with_cosmology) cosmology_print(&cosmo); + + /* Initialise the hydro properties */ + if (with_hydro) + hydro_props_init(&hydro_properties, &prog_const, &us, params); + else + bzero(&hydro_properties, sizeof(struct hydro_props)); + + /* Initialise the equation of state */ + if (with_hydro) + eos_init(&eos, &prog_const, &us, params); + else + bzero(&eos, sizeof(struct eos_parameters)); + + /* Initialise the stars properties */ + if (with_stars) + stars_props_init(&stars_properties, &prog_const, &us, params, + &hydro_properties); + else + bzero(&stars_properties, sizeof(struct stars_props)); + + /* Initialise the gravity properties */ + if (with_self_gravity) + gravity_props_init(&gravity_properties, params, &cosmo, with_cosmology, + periodic); + else + bzero(&gravity_properties, sizeof(struct gravity_props)); + + /* Be verbose about what happens next */ if (myrank == 0) message("Reading ICs from file '%s'", ICfileName); if (myrank == 0 && cleanup_h) message("Cleaning up h-factors (h=%f)", cosmo.h); @@ -686,20 +705,19 @@ int main(int argc, char *argv[]) { /* Get ready to read particles of all kinds */ size_t Ngas = 0, Ngpart = 0, Nspart = 0; double dim[3] = {0., 0., 0.}; - int periodic = 0; if (myrank == 0) clocks_gettime(&tic); #if defined(HAVE_HDF5) #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) read_ic_parallel(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, - &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, with_hydro, + &Ngpart, &Nspart, &flag_entropy_ICs, with_hydro, (with_external_gravity || with_self_gravity), with_stars, cleanup_h, cleanup_sqrt_a, cosmo.h, cosmo.a, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, nr_threads, dry_run); #else read_ic_serial(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, - &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, with_hydro, + &Ngpart, &Nspart, &flag_entropy_ICs, with_hydro, (with_external_gravity || with_self_gravity), with_stars, cleanup_h, cleanup_sqrt_a, cosmo.h, cosmo.a, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, nr_threads, @@ -707,7 +725,7 @@ int main(int argc, char *argv[]) { #endif #else read_ic_single(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, - &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, with_hydro, + &Ngpart, &Nspart, &flag_entropy_ICs, with_hydro, (with_external_gravity || with_self_gravity), with_stars, cleanup_h, cleanup_sqrt_a, cosmo.h, cosmo.a, nr_threads, dry_run); @@ -720,20 +738,11 @@ int main(int argc, char *argv[]) { fflush(stdout); } -#ifdef WITH_MPI - if (periodic && with_self_gravity) - error("Periodic self-gravity over MPI temporarily disabled."); -#endif - -#if defined(WITH_MPI) && defined(HAVE_VELOCIRAPTOR) - if (with_structure_finding) error("VEOCIraptor not yet enabled over MPI."); -#endif - #ifdef SWIFT_DEBUG_CHECKS /* Check once and for all that we don't have unwanted links */ if (!with_stars && !dry_run) { for (size_t k = 0; k < Ngpart; ++k) - if (gparts[k].type == swift_type_star) error("Linking problem"); + if (gparts[k].type == swift_type_stars) error("Linking problem"); } if (!with_hydro && !dry_run) { for (size_t k = 0; k < Ngpart; ++k) @@ -759,31 +768,17 @@ int main(int argc, char *argv[]) { if (myrank == 0) message( - "Read %lld gas particles, %lld star particles and %lld gparts from " - "the " - "ICs.", + "Read %lld gas particles, %lld stars particles and %lld gparts from " + "the ICs.", N_total[0], N_total[2], N_total[1]); /* Verify that the fields to dump actually exist */ if (myrank == 0) io_check_output_fields(params, N_total); - /* Initialise the long-range gravity mesh */ - if (with_self_gravity && periodic) { -#ifdef HAVE_FFTW - pm_mesh_init(&mesh, &gravity_properties, dim); -#else - /* Need the FFTW library if periodic and self gravity. */ - error( - "No FFTW library found. Cannot compute periodic long-range forces."); -#endif - } else { - pm_mesh_init_no_mesh(&mesh, dim); - } - /* Initialize the space with these data. */ if (myrank == 0) clocks_gettime(&tic); space_init(&s, params, &cosmo, dim, parts, gparts, sparts, Ngas, Ngpart, - Nspart, periodic, replicate, generate_gas_in_ics, + Nspart, periodic, replicate, generate_gas_in_ics, with_hydro, with_self_gravity, talking, dry_run); if (myrank == 0) { @@ -793,6 +788,19 @@ int main(int argc, char *argv[]) { fflush(stdout); } + /* Initialise the long-range gravity mesh */ + if (with_self_gravity && periodic) { +#ifdef HAVE_FFTW + pm_mesh_init(&mesh, &gravity_properties, s.dim, nr_threads); +#else + /* Need the FFTW library if periodic and self gravity. */ + error( + "No FFTW library found. Cannot compute periodic long-range forces."); +#endif + } else { + pm_mesh_init_no_mesh(&mesh, s.dim); + } + /* Check that the matter content matches the cosmology given in the * parameter file. */ if (with_cosmology && with_self_gravity && !dry_run) @@ -869,15 +877,19 @@ int main(int argc, char *argv[]) { if (with_cooling) engine_policies |= engine_policy_cooling; if (with_sourceterms) engine_policies |= engine_policy_sourceterms; if (with_stars) engine_policies |= engine_policy_stars; + if (with_feedback) engine_policies |= engine_policy_feedback; if (with_structure_finding) engine_policies |= engine_policy_structure_finding; + // MATTHIEU: Temporary star formation law + // engine_policies |= engine_policy_star_formation; + /* Initialize the engine with the space and policies. */ if (myrank == 0) clocks_gettime(&tic); engine_init(&e, &s, params, N_total[0], N_total[1], N_total[2], engine_policies, talking, &reparttype, &us, &prog_const, &cosmo, - &hydro_properties, &gravity_properties, &mesh, &potential, - &cooling_func, &chemistry, &sourceterms); + &hydro_properties, &gravity_properties, &stars_properties, + &mesh, &potential, &cooling_func, &chemistry, &sourceterms); engine_config(0, &e, params, nr_nodes, myrank, nr_threads, with_aff, talking, restart_file); @@ -892,15 +904,14 @@ int main(int argc, char *argv[]) { if (myrank == 0) { long long N_DM = N_total[1] - N_total[2] - N_total[0]; message( - "Running on %lld gas particles, %lld star particles and %lld DM " + "Running on %lld gas particles, %lld stars particles and %lld DM " "particles (%lld gravity particles)", N_total[0], N_total[2], N_total[1] > 0 ? N_DM : 0, N_total[1]); message( - "from t=%.3e until t=%.3e with %d threads and %d queues " - "(dt_min=%.3e, " - "dt_max=%.3e)...", - e.time_begin, e.time_end, e.nr_threads, e.sched.nr_queues, e.dt_min, - e.dt_max); + "from t=%.3e until t=%.3e with %d ranks, %d threads / rank and %d " + "task queues / rank (dt_min=%.3e, dt_max=%.3e)...", + e.time_begin, e.time_end, nr_nodes, e.nr_threads, e.sched.nr_queues, + e.dt_min, e.dt_max); fflush(stdout); } } @@ -940,9 +951,16 @@ int main(int argc, char *argv[]) { engine_init_particles(&e, flag_entropy_ICs, clean_smoothing_length_values); /* Write the state of the system before starting time integration. */ +#ifdef WITH_LOGGER + logger_log_all(e.logger, &e); + engine_dump_index(&e); +#endif engine_dump_snapshot(&e); engine_print_stats(&e); + /* Is there a dump before the end of the first time-step? */ + engine_check_for_dumps(&e); + #ifdef HAVE_VELOCIRAPTOR /* Call VELOCIraptor for the first time after the first snapshot dump. */ // if (e.policy & engine_policy_structure_finding) { @@ -954,7 +972,7 @@ int main(int argc, char *argv[]) { /* Legend */ if (myrank == 0) { - printf("# %6s %14s %14s %10s %14s %9s %12s %12s %12s %16s [%s] %6s\n", + printf("# %6s %14s %12s %12s %14s %9s %12s %12s %12s %16s [%s] %6s\n", "Step", "Time", "Scale-factor", "Redshift", "Time-step", "Time-bins", "Updates", "g-Updates", "s-Updates", "Wall-clock time", clocks_getunit(), "Props"); @@ -978,7 +996,7 @@ int main(int argc, char *argv[]) { /* Main simulation loop */ /* ==================== */ - int force_stop = 0; + int force_stop = 0, resubmit = 0; for (int j = 0; !engine_is_done(&e) && e.step - 1 != nsteps && !force_stop; j++) { @@ -999,6 +1017,13 @@ int main(int argc, char *argv[]) { message("Forcing application exit, dumping restart files..."); } + /* Did we exceed the maximal runtime? */ + if (clocks_get_hours_since_start() > restart_max_hours_runtime) { + force_stop = 1; + message("Runtime limit reached, dumping restart files..."); + if (resubmit_after_max_hours) resubmit = 1; + } + /* Also if using nsteps to exit, will not have saved any restarts on exit, * make sure we do that (useful in testing only). */ if (force_stop || (e.restart_onexit && e.step - 1 == nsteps)) @@ -1010,8 +1035,8 @@ int main(int argc, char *argv[]) { #ifdef WITH_MPI /* Make sure output file is empty, only on one rank. */ - char dumpfile[30]; - snprintf(dumpfile, 30, "thread_info_MPI-step%d.dat", j + 1); + char dumpfile[35]; + snprintf(dumpfile, sizeof(dumpfile), "thread_info_MPI-step%d.dat", j + 1); FILE *file_thread; if (myrank == 0) { file_thread = fopen(dumpfile, "w"); @@ -1037,21 +1062,24 @@ int main(int argc, char *argv[]) { int count = 0; for (int l = 0; l < e.sched.nr_tasks; l++) { if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) { - fprintf( - file_thread, - " %03i %i %i %i %i %lli %lli %i %i %i %i %i %i\n", myrank, - e.sched.tasks[l].rid, e.sched.tasks[l].type, - e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL), - e.sched.tasks[l].tic, e.sched.tasks[l].toc, - (e.sched.tasks[l].ci != NULL) ? e.sched.tasks[l].ci->count - : 0, - (e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->count - : 0, - (e.sched.tasks[l].ci != NULL) ? e.sched.tasks[l].ci->gcount - : 0, - (e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->gcount - : 0, - e.sched.tasks[l].flags, e.sched.tasks[l].sid); + fprintf(file_thread, + " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n", + myrank, e.sched.tasks[l].rid, e.sched.tasks[l].type, + e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL), + e.sched.tasks[l].tic, e.sched.tasks[l].toc, + (e.sched.tasks[l].ci != NULL) + ? e.sched.tasks[l].ci->hydro.count + : 0, + (e.sched.tasks[l].cj != NULL) + ? e.sched.tasks[l].cj->hydro.count + : 0, + (e.sched.tasks[l].ci != NULL) + ? e.sched.tasks[l].ci->grav.count + : 0, + (e.sched.tasks[l].cj != NULL) + ? e.sched.tasks[l].cj->grav.count + : 0, + e.sched.tasks[l].flags, e.sched.tasks[l].sid); } fflush(stdout); count++; @@ -1064,8 +1092,8 @@ int main(int argc, char *argv[]) { } #else - char dumpfile[30]; - snprintf(dumpfile, 30, "thread_info-step%d.dat", j + 1); + char dumpfile[32]; + snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", j + 1); FILE *file_thread; file_thread = fopen(dumpfile, "w"); /* Add some information to help with the plots */ @@ -1079,10 +1107,14 @@ int main(int argc, char *argv[]) { e.sched.tasks[l].rid, e.sched.tasks[l].type, e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL), e.sched.tasks[l].tic, e.sched.tasks[l].toc, - (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->count, - (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count, - (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->gcount, - (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->gcount, + (e.sched.tasks[l].ci == NULL) ? 0 + : e.sched.tasks[l].ci->hydro.count, + (e.sched.tasks[l].cj == NULL) ? 0 + : e.sched.tasks[l].cj->hydro.count, + (e.sched.tasks[l].ci == NULL) ? 0 + : e.sched.tasks[l].ci->grav.count, + (e.sched.tasks[l].cj == NULL) ? 0 + : e.sched.tasks[l].cj->grav.count, e.sched.tasks[l].sid); } } @@ -1124,33 +1156,41 @@ int main(int argc, char *argv[]) { /* Print some information to the screen */ printf( - " %6d %14e %14e %10.5f %14e %4d %4d %12lld %12lld %12lld %21.3f %6d\n", + " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %21.3f " + "%6d\n", e.step, e.time, e.cosmology->a, e.cosmology->z, e.time_step, e.min_active_bin, e.max_active_bin, e.updates, e.g_updates, e.s_updates, e.wallclock_time, e.step_props); fflush(stdout); - fprintf( - e.file_timesteps, - " %6d %14e %14e %10.5f %14e %4d %4d %12lld %12lld %12lld %21.3f %6d\n", - e.step, e.time, e.cosmology->a, e.cosmology->z, e.time_step, - e.min_active_bin, e.max_active_bin, e.updates, e.g_updates, e.s_updates, - e.wallclock_time, e.step_props); + fprintf(e.file_timesteps, + " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %21.3f " + "%6d\n", + e.step, e.time, e.cosmology->a, e.cosmology->z, e.time_step, + e.min_active_bin, e.max_active_bin, e.updates, e.g_updates, + e.s_updates, e.wallclock_time, e.step_props); fflush(e.file_timesteps); } /* Write final output. */ - engine_drift_all(&e); - engine_print_stats(&e); - engine_dump_snapshot(&e); + if (!force_stop) { + engine_drift_all(&e, /*drift_mpole=*/0); + engine_print_stats(&e); +#ifdef WITH_LOGGER + logger_log_all(e.logger, &e); + engine_dump_index(&e); +#endif + // write a final snapshot with logger, in order to facilitate a restart + engine_dump_snapshot(&e); #ifdef HAVE_VELOCIRAPTOR - /* Call VELOCIraptor at the end of the run to find groups. */ - if (e.policy & engine_policy_structure_finding) { - velociraptor_init(&e); - velociraptor_invoke(&e); - } + /* Call VELOCIraptor at the end of the run to find groups. */ + if (e.policy & engine_policy_structure_finding) { + velociraptor_init(&e); + velociraptor_invoke(&e); + } #endif + } #ifdef WITH_MPI if ((res = MPI_Finalize()) != MPI_SUCCESS) @@ -1161,10 +1201,20 @@ int main(int argc, char *argv[]) { * stop file if normal exit happened first. */ if (myrank == 0) force_stop = restart_stop_now(restart_dir, 1); + /* Did we want to run a re-submission command just before dying? */ + if (myrank == 0 && resubmit) { + message("Running the resubmission command:"); + restart_resubmit(resubmit_command); + fflush(stdout); + fflush(stderr); + message("resubmission command completed."); + } + /* Clean everything */ if (with_verbose_timers) timers_close_file(); - if (with_cosmology) cosmology_clean(&cosmo); - if (with_self_gravity) pm_mesh_clean(&mesh); + if (with_cosmology) cosmology_clean(e.cosmology); + if (with_self_gravity) pm_mesh_clean(e.mesh); + if (with_cooling) cooling_clean(&cooling_func); engine_clean(&e); free(params); diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index ddb71c594122a3e8d6ddbd7c5b73e0474b404a75..6adccf2963dbeff67755bdac946e7bfb10d4a897 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -32,7 +32,12 @@ SPH: max_ghost_iterations: 30 # (Optional) Maximal number of iterations allowed to converge towards the smoothing length. initial_temperature: 0 # (Optional) Initial temperature (in internal units) to set the gas particles at start-up. Value is ignored if set to 0. minimal_temperature: 0 # (Optional) Minimal temperature (in internal units) allowed for the gas particles. Value is ignored if set to 0. - H_mass_fraction: 0.76 # (Optional) Hydrogen mass fraction used for initial conversion from temp to internal energy. + H_mass_fraction: 0.755 # (Optional) Hydrogen mass fraction used for initial conversion from temp to internal energy. Default value is derived from the physical constants. + H_ionization_temperature: 1e4 # (Optional) Temperature of the transition from neutral to ionized Hydrogen for primoridal gas. + viscosity_alpha: 0.8 # (Optional) Override for the initial value of the artificial viscosity. In schemes that have a fixed AV, this remains as alpha throughout the run. + viscosity_alpha_max: 2.0 # (Optional) Maximal value for the artificial viscosity in schemes that allow alpha to vary + viscosity_alpha_min: 0.1 # (Optional) Minimal value for the artificial viscosity in schemes that allow alpha to vary + viscosity_length: 0.1 # (Optional) Decay length for the artificial viscosity in schemes that allow alpha to vary # Parameters for the self-gravity scheme Gravity: @@ -54,8 +59,10 @@ Scheduler: cell_sub_size_self_hydro: 32000 # (Optional) Maximal number of interactions per sub-self hydro task (this is the default value). cell_sub_size_pair_grav: 256000000 # (Optional) Maximal number of interactions per sub-pair gravity task (this is the default value). cell_sub_size_self_grav: 32000 # (Optional) Maximal number of interactions per sub-self gravity task (this is the default value). + cell_sub_size_pair_stars: 256000000 # (Optional) Maximal number of interactions per sub-pair stars task (this is the default value). + cell_sub_size_self_stars: 32000 # (Optional) Maximal number of interactions per sub-self stars task (this is the default value). cell_split_size: 400 # (Optional) Maximal number of particles per cell (this is the default value). - cell_subdepth_grav: 2 # (Optional) Maximal depth the gravity tasks can be pushed down (this is the default value). + cell_subdepth_diff_grav: 4 # (Optional) Maximal depth difference between leaves and a cell that gravity tasks can be pushed down to (this is the default value). max_top_level_cells: 12 # (Optional) Maximal number of top-level cells in any dimension. The number of top-level cells will be the cube of this (this is the default value). tasks_per_cell: 0 # (Optional) The average number of tasks per cell. If not large enough the simulation will fail (means guess...). mpi_message_limit: 4096 # (Optional) Maximum MPI task message size to send non-buffered, KB. @@ -75,7 +82,7 @@ Snapshots: time_first: 0. # (Optional) Time of the first output if non-cosmological time-integration (in internal units) delta_time: 0.01 # Time difference between consecutive outputs (in internal units) compression: 0 # (Optional) Set the level of compression of the HDF5 datasets [0-9]. 0 does no compression. - label_delta: 1 # (Optional) Set the integer increment between snapshot output labels + int_time_label_on: 0 # (Optional) Enable to label the snapshots using the time rounded to an integer (in internal units) UnitMass_in_cgs: 1 # (Optional) Unit system for the outputs (Grams) UnitLength_in_cgs: 1 # (Optional) Unit system for the outputs (Centimeters) UnitVelocity_in_cgs: 1 # (Optional) Unit system for the outputs (Centimeters per second) @@ -84,6 +91,13 @@ Snapshots: output_list_on: 0 # (Optional) Enable the output list output_list: snaplist.txt # (Optional) File containing the output times (see documentation in "Parameter File" section) +# Parameters governing the logger snapshot system +Logger: + delta_step: 10 # Update the particle log every this many updates + initial_buffer_size: 1 # buffer size in GB + buffer_scale: 10 # (Optional) When buffer size is too small, update it with required memory times buffer_scale + basename: index # Common part of the filenames + # Parameters governing the conserved quantities statistics Statistics: delta_time: 1e-2 # Time between statistics output @@ -97,6 +111,7 @@ Statistics: # Parameters related to the initial conditions InitialConditions: file_name: SedovBlast/sedov.hdf5 # The file to read + periodic: 1 # Are we running with periodic ICs? generate_gas_in_ics: 0 # (Optional) Generate gas particles from the DM-only ICs (e.g. from panphasia). cleanup_h_factors: 0 # (Optional) Clean up the h-factors used in the ICs (e.g. in Gadget files). cleanup_velocity_factors: 0 # (Optional) Clean up the scale-factors used in the definition of the velocity variable in the ICs (e.g. in Gadget files). @@ -107,30 +122,36 @@ InitialConditions: # Parameters controlling restarts Restarts: - enable: 1 # (Optional) whether to enable dumping restarts at fixed intervals. - save: 1 # (Optional) whether to save copies of the previous set of restart files (named .prev) - onexit: 0 # (Optional) whether to dump restarts on exit (*needs enable*) - subdir: restart # (Optional) name of subdirectory for restart files. - basename: swift # (Optional) prefix used in naming restart files. - delta_hours: 6.0 # (Optional) decimal hours between dumps of restart files. - stop_steps: 100 # (Optional) how many steps to process before checking if the <subdir>/stop file exists. When present the application will attempt to exit early, dumping restart files first. + enable: 1 # (Optional) whether to enable dumping restarts at fixed intervals. + save: 1 # (Optional) whether to save copies of the previous set of restart files (named .prev) + onexit: 0 # (Optional) whether to dump restarts on exit (*needs enable*) + subdir: restart # (Optional) name of subdirectory for restart files. + basename: swift # (Optional) prefix used in naming restart files. + delta_hours: 6.0 # (Optional) decimal hours between dumps of restart files. + stop_steps: 100 # (Optional) how many steps to process before checking if the <subdir>/stop file exists. When present the application will attempt to exit early, dumping restart files first. + max_run_time: 24.0 # (optional) Maximal wall-clock time in hours. The application will exit when this limit is reached. + resubmit_on_exit: 0 # (Optional) whether to run a command when exiting after the time limit has been reached. + resubmit_command: ./resub.sh # (Optional) Command to run when time limit is reached. Compulsory if resubmit_on_exit is switched on. Note potentially unsafe. # Parameters governing domain decomposition DomainDecomposition: - initial_type: simple_metis # (Optional) The initial decomposition strategy: "grid", - # "simple_metis", "weighted_metis", or "vectorized". - initial_grid: [10,10,10] # (Optional) Grid sizes if the "grid" strategy is chosen. + initial_type: memory # (Optional) The initial decomposition strategy: "grid", + # "region", "memory", or "vectorized". + initial_grid: [10,10,10] # (Optional) Grid sizes if the "grid" strategy is chosen. repartition_type: costs/costs # (Optional) The re-decomposition strategy, one of: - # "none/none", "costs/costs", "counts/none", "none/costs", "counts/costs", - # "costs/time" or "none/time". - # These are vertex/edge weights with "costs" as task timing, "counts" as - # sum of particles and "time" as the expected time of the next updates + # "none/none", "costs/costs", "none/costs", "costs/none" or "costs/time". + # These are vertex/edge weights with "costs" as task timing + # and "time" as the expected time of the next updates trigger: 0.05 # (Optional) Fractional (<1) CPU time difference between MPI ranks required to trigger a # new decomposition, or number of steps (>1) between decompositions minfrac: 0.9 # (Optional) Fractional of all particles that should be updated in previous step when # using CPU time trigger + usemetis: 0 # Use serial METIS when ParMETIS is also available. + adaptive: 1 # Use adaptive repartition when ParMETIS is available, otherwise simple refinement. + itr: 100 # When adaptive defines the ratio of inter node communication time to data redistribution time, in the range 0.00001 to 10000000.0. + # Lower values give less data movement during redistributions, at the cost of global balance which may require more communication. # Parameters related to the equation of state ------------------------------------------ @@ -142,14 +163,19 @@ EoS: planetary_use_ANEOS: 0 # (Optional) Whether to prepare the ANEOS EOS planetary_use_SESAME: 0 # (Optional) Whether to prepare the SESAME EOS # (Optional) Table file paths - planetary_HM80_HHe_table_file: HM80_HHe.txt - planetary_HM80_ice_table_file: HM80_ice.txt - planetary_HM80_rock_table_file: HM80_rock.txt + planetary_HM80_HHe_table_file: ./equation_of_state/planetary_HM80_HHe.txt + planetary_HM80_ice_table_file: ./equation_of_state/planetary_HM80_ice.txt + planetary_HM80_rock_table_file: ./equation_of_state/planetary_HM80_rock.txt + planetary_SESAME_iron_table_file: ./equation_of_state/planetary_SESAME_iron_2140.txt + planetary_SESAME_basalt_table_file: ./equation_of_state/planetary_SESAME_basalt_7530.txt + planetary_SESAME_water_table_file: ./equation_of_state/planetary_SESAME_water_7154.txt + planetary_SS08_water_table_file: ./equation_of_state/planetary_SS08_water.txt # Parameters related to external potentials -------------------------------------------- # Point mass external potentials PointMassPotential: + useabspos: 0 # 0 -> positions based on centre, 1 -> absolute positions position: [50.,50.0,50.] # location of external point mass (internal units) mass: 1e10 # mass of external point mass (internal units) timestep_mult: 0.03 # Dimensionless pre-factor for the time-step condition @@ -157,10 +183,38 @@ PointMassPotential: # Isothermal potential parameters IsothermalPotential: + useabspos: 0 # 0 -> positions based on centre, 1 -> absolute positions position: [100.,100.,100.] # Location of centre of isothermal potential with respect to centre of the box (internal units) vrot: 200. # Rotation speed of isothermal potential (internal units) timestep_mult: 0.03 # Dimensionless pre-factor for the time-step condition epsilon: 0.1 # Softening size (internal units) + +# Hernquist potential parameters +HernquistPotential: + useabspos: 0 # 0 -> positions based on centre, 1 -> absolute positions + position: [100.,100.,100.] # Location of centre of isothermal potential with respect to centre of the box (if 0) otherwise absolute (if 1) (internal units) + idealizeddisk: 0 # (Optional) Whether to run with idealizeddisk or without, 0 used the mass and scalelength as mandatory parameters, while 1 uses more advanced disk dependent paramters + mass: 1e10 # (Optional 0) default parameter, Mass of the Hernquist potential + scalelength: 10.0 # (Optional 0) default parameter, Scale length of the potential + # If multiple X200 values are given, only one is used, in the order M200 > V200 > R200. + M200: 3e11 # (Optional 1a) M200 of the galaxy+halo (when used V200 and R200 are not used) + V200: 100. # (Optional 1b) V200 of the galaxy+halo (when used M200 and R200 are not used, if M200 is given M200 is used) + R200: 10. # (Optional 1c) R200 of the galaxy+halo (when used M200 and V200 are not used, if M200 or V200 are given they are used) + h: 0.704 # (Optional 1) reduced Hubble constant + concentration: 7.1 # (Optional 1) concentration of the Halo + diskfraction: 0.0434370991372 # (Optional 1) Disk mass fraction (equal to MD in MakeNewDisk and GalIC) + bulgefraction: 0.00705852860979 # (Optional 1) Bulge mass fraction (equal to MB in MakeNewDisk and GalIC) + timestep_mult: 0.01 # Dimensionless pre-factor for the time-step condition, basically determines the fraction of the orbital time we use to do the time integration + epsilon: 0.1 # Softening size (internal units) + +# Isothermal potential parameters +NFWPotential: + useabspos: 0 + position: [0.0,0.0,0.0] # Location of centre of isothermal potential with respect to centre of the box (internal units) if useabspos=0 otherwise with respect to the 0,0,0, coordinates. + concentration: 8. # Concentration of the halo + M_200: 2.0e+12 # Mass of the halo (M_200 in internal units) + critical_density: 127.4 # Critical density (internal units). + timestep_mult: 0.01 # Dimensionless pre-factor for the time-step condition, basically determines fraction of orbital time we need to do an integration step # Disk-patch potential parameters DiscPatchPotential: @@ -188,11 +242,8 @@ ConstCooling: # Constant lambda cooling function LambdaCooling: - lambda: 2.0 # Cooling rate (in cgs units) - minimum_temperature: 1.0e4 # Minimal temperature (Kelvin) - mean_molecular_weight: 0.59 # Mean molecular weight - hydrogen_mass_abundance: 0.75 # Hydrogen mass abundance (dimensionless) - cooling_tstep_mult: 1.0 # Dimensionless pre-factor for the time-step condition + lambda_nH2_cgs: 1e-22 # Cooling rate divided by square Hydrogen number density (in cgs units [erg * s^-1 * cm^3]) + cooling_tstep_mult: 1.0 # (Optional) Dimensionless pre-factor for the time-step condition. # Cooling with Grackle 3.0 GrackleCooling: diff --git a/examples/plot_gravity_checks.py b/examples/plot_gravity_checks.py deleted file mode 100755 index 23866ac2a6952ff918dbc80533269c0d2e9bcbc5..0000000000000000000000000000000000000000 --- a/examples/plot_gravity_checks.py +++ /dev/null @@ -1,307 +0,0 @@ -#!/usr/bin/env python - -import sys -import glob -import re -import numpy as np -import matplotlib.pyplot as plt - -params = {'axes.labelsize': 14, -'axes.titlesize': 18, -'font.size': 12, -'legend.fontsize': 12, -'xtick.labelsize': 14, -'ytick.labelsize': 14, -'text.usetex': True, -'figure.figsize': (12, 10), -'figure.subplot.left' : 0.06, -'figure.subplot.right' : 0.99 , -'figure.subplot.bottom' : 0.06 , -'figure.subplot.top' : 0.99 , -'figure.subplot.wspace' : 0.14 , -'figure.subplot.hspace' : 0.14 , -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True -} -plt.rcParams.update(params) -plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']}) - -min_error = 1e-7 -max_error = 3e-1 -num_bins = 64 - -# Construct the bins -bin_edges = np.linspace(np.log10(min_error), np.log10(max_error), num_bins + 1) -bin_size = (np.log10(max_error) - np.log10(min_error)) / num_bins -bins = 0.5*(bin_edges[1:] + bin_edges[:-1]) -bin_edges = 10**bin_edges -bins = 10**bins - -# Colours -cols = ['#332288', '#88CCEE', '#117733', '#DDCC77', '#CC6677'] - -# Time-step to plot -step = int(sys.argv[1]) -periodic = int(sys.argv[2]) - -# Find the files for the different expansion orders -order_list = glob.glob("gravity_checks_swift_step%.4d_order*.dat"%step) -num_order = len(order_list) - -# Get the multipole orders -order = np.zeros(num_order) -for i in range(num_order): - order[i] = int(order_list[i][35]) -order = sorted(order) -order_list = sorted(order_list) - -# Read the exact accelerations first -if periodic: - data = np.loadtxt('gravity_checks_exact_periodic_step%.4d.dat'%step) -else: - data = np.loadtxt('gravity_checks_exact_step%.4d.dat'%step) -exact_ids = data[:,0] -exact_pos = data[:,1:4] -exact_a = data[:,4:7] -exact_pot = data[:,7] -# Sort stuff -sort_index = np.argsort(exact_ids) -exact_ids = exact_ids[sort_index] -exact_pos = exact_pos[sort_index, :] -exact_a = exact_a[sort_index, :] -exact_pot = exact_pot[sort_index] -exact_a_norm = np.sqrt(exact_a[:,0]**2 + exact_a[:,1]**2 + exact_a[:,2]**2) - -print "Number of particles tested:", np.size(exact_ids) - -# Start the plot -plt.figure() - -count = 0 - -# Get the Gadget-2 data if existing -if periodic: - gadget2_file_list = glob.glob("forcetest_gadget2_periodic.txt") -else: - gadget2_file_list = glob.glob("forcetest_gadget2.txt") -if len(gadget2_file_list) != 0: - - gadget2_data = np.loadtxt(gadget2_file_list[0]) - gadget2_ids = gadget2_data[:,0] - gadget2_pos = gadget2_data[:,1:4] - gadget2_a_exact = gadget2_data[:,4:7] - gadget2_a_grav = gadget2_data[:, 7:10] - - # Sort stuff - sort_index = np.argsort(gadget2_ids) - gadget2_ids = gadget2_ids[sort_index] - gadget2_pos = gadget2_pos[sort_index, :] - gadget2_a_exact = gadget2_a_exact[sort_index, :] - gadget2_exact_a_norm = np.sqrt(gadget2_a_exact[:,0]**2 + gadget2_a_exact[:,1]**2 + gadget2_a_exact[:,2]**2) - gadget2_a_grav = gadget2_a_grav[sort_index, :] - - # Cross-checks - if not np.array_equal(exact_ids, gadget2_ids): - print "Comparing different IDs !" - - if np.max(np.abs(exact_pos - gadget2_pos)/np.abs(gadget2_pos)) > 1e-6: - print "Comparing different positions ! max difference:" - index = np.argmax(exact_pos[:,0]**2 + exact_pos[:,1]**2 + exact_pos[:,2]**2 - gadget2_pos[:,0]**2 - gadget2_pos[:,1]**2 - gadget2_pos[:,2]**2) - print "Gadget2 (id=%d):"%gadget2_ids[index], gadget2_pos[index,:], "exact (id=%d):"%exact_ids[index], exact_pos[index,:], "\n" - - diff = np.abs(exact_a_norm - gadget2_exact_a_norm) / np.abs(gadget2_exact_a_norm) - max_diff = np.max(diff) - if max_diff > 2e-6: - print "Comparing different exact accelerations !" - print "Median=", np.median(diff), "Mean=", np.mean(diff), "99%=", np.percentile(diff, 99) - print "max difference ( relative diff =", max_diff, "):" - #index = np.argmax(exact_a[:,0]**2 + exact_a[:,1]**2 + exact_a[:,2]**2 - gadget2_a_exact[:,0]**2 - gadget2_a_exact[:,1]**2 - gadget2_a_exact[:,2]**2) - index = np.argmax(diff) - print "a_exact --- Gadget2:", gadget2_a_exact[index,:], "exact:", exact_a[index,:] - print "pos --- Gadget2: (id=%d):"%gadget2_ids[index], gadget2_pos[index,:], "exact (id=%d):"%gadget2_ids[index], gadget2_pos[index,:],"\n" - - - # Compute the error norm - diff = gadget2_a_exact - gadget2_a_grav - - norm_diff = np.sqrt(diff[:,0]**2 + diff[:,1]**2 + diff[:,2]**2) - norm_a = np.sqrt(gadget2_a_exact[:,0]**2 + gadget2_a_exact[:,1]**2 + gadget2_a_exact[:,2]**2) - - norm_error = norm_diff / norm_a - error_x = abs(diff[:,0]) / norm_a - error_y = abs(diff[:,1]) / norm_a - error_z = abs(diff[:,2]) / norm_a - - # Bin the error - norm_error_hist,_ = np.histogram(norm_error, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_x_hist,_ = np.histogram(error_x, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_y_hist,_ = np.histogram(error_y, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_z_hist,_ = np.histogram(error_z, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - - norm_median = np.median(norm_error) - median_x = np.median(error_x) - median_y = np.median(error_y) - median_z = np.median(error_z) - - norm_per99 = np.percentile(norm_error,99) - per99_x = np.percentile(error_x,99) - per99_y = np.percentile(error_y,99) - per99_z = np.percentile(error_z,99) - - norm_max = np.max(norm_error) - max_x = np.max(error_x) - max_y = np.max(error_y) - max_z = np.max(error_z) - - print "Gadget-2 ---- " - print "Norm: median= %f 99%%= %f max= %f"%(norm_median, norm_per99, norm_max) - print "X : median= %f 99%%= %f max= %f"%(median_x, per99_x, max_x) - print "Y : median= %f 99%%= %f max= %f"%(median_y, per99_y, max_y) - print "Z : median= %f 99%%= %f max= %f"%(median_z, per99_z, max_z) - print "" - - plt.subplot(231) - plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(norm_median, norm_per99), ha="left", va="top", alpha=0.8) - plt.semilogx(bins, norm_error_hist, 'k--', label="Gadget-2", alpha=0.8) - plt.subplot(232) - plt.semilogx(bins, error_x_hist, 'k--', label="Gadget-2", alpha=0.8) - plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_x, per99_x), ha="left", va="top", alpha=0.8) - plt.subplot(233) - plt.semilogx(bins, error_y_hist, 'k--', label="Gadget-2", alpha=0.8) - plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_y, per99_y), ha="left", va="top", alpha=0.8) - plt.subplot(234) - plt.semilogx(bins, error_z_hist, 'k--', label="Gadget-2", alpha=0.8) - plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_z, per99_z), ha="left", va="top", alpha=0.8) - - count += 1 - - -# Plot the different histograms -for i in range(num_order): - data = np.loadtxt(order_list[i]) - ids = data[:,0] - pos = data[:,1:4] - a_grav = data[:, 4:7] - pot = data[:, 7] - - # Sort stuff - sort_index = np.argsort(ids) - ids = ids[sort_index] - pos = pos[sort_index, :] - a_grav = a_grav[sort_index, :] - pot = pot[sort_index] - - # Cross-checks - if not np.array_equal(exact_ids, ids): - print "Comparing different IDs !" - - if np.max(np.abs(exact_pos - pos)/np.abs(pos)) > 1e-6: - print "Comparing different positions ! max difference:" - index = np.argmax(exact_pos[:,0]**2 + exact_pos[:,1]**2 + exact_pos[:,2]**2 - pos[:,0]**2 - pos[:,1]**2 - pos[:,2]**2) - print "SWIFT (id=%d):"%ids[index], pos[index,:], "exact (id=%d):"%exact_ids[index], exact_pos[index,:], "\n" - - # Compute the error norm - diff = exact_a - a_grav - diff_pot = exact_pot - pot - - # Correct for different normalization of potential - print "Difference in normalization of potential:", np.mean(diff_pot), - print "std_dev=", np.std(diff_pot), "99-percentile:", np.percentile(diff_pot, 99)-np.median(diff_pot), "1-percentile:", np.median(diff_pot) - np.percentile(diff_pot, 1) - - exact_pot -= np.mean(diff_pot) - diff_pot = exact_pot - pot - - norm_diff = np.sqrt(diff[:,0]**2 + diff[:,1]**2 + diff[:,2]**2) - - norm_error = norm_diff / exact_a_norm - error_x = abs(diff[:,0]) / exact_a_norm - error_y = abs(diff[:,1]) / exact_a_norm - error_z = abs(diff[:,2]) / exact_a_norm - error_pot = abs(diff_pot) / abs(exact_pot) - - # Bin the error - norm_error_hist,_ = np.histogram(norm_error, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_x_hist,_ = np.histogram(error_x, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_y_hist,_ = np.histogram(error_y, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_z_hist,_ = np.histogram(error_z, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_pot_hist,_ = np.histogram(error_pot, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - - norm_median = np.median(norm_error) - median_x = np.median(error_x) - median_y = np.median(error_y) - median_z = np.median(error_z) - median_pot = np.median(error_pot) - - norm_per99 = np.percentile(norm_error,99) - per99_x = np.percentile(error_x,99) - per99_y = np.percentile(error_y,99) - per99_z = np.percentile(error_z,99) - per99_pot = np.percentile(error_pot, 99) - - norm_max = np.max(norm_error) - max_x = np.max(error_x) - max_y = np.max(error_y) - max_z = np.max(error_z) - max_pot = np.max(error_pot) - - print "Order %d ---- "%order[i] - print "Norm: median= %f 99%%= %f max= %f"%(norm_median, norm_per99, norm_max) - print "X : median= %f 99%%= %f max= %f"%(median_x, per99_x, max_x) - print "Y : median= %f 99%%= %f max= %f"%(median_y, per99_y, max_y) - print "Z : median= %f 99%%= %f max= %f"%(median_z, per99_z, max_z) - print "Pot : median= %f 99%%= %f max= %f"%(median_pot, per99_pot, max_pot) - print "" - - plt.subplot(231) - plt.semilogx(bins, error_x_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_x, per99_x), ha="left", va="top", color=cols[i]) - plt.subplot(232) - plt.semilogx(bins, error_y_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_y, per99_y), ha="left", va="top", color=cols[i]) - plt.subplot(233) - plt.semilogx(bins, error_z_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_z, per99_z), ha="left", va="top", color=cols[i]) - plt.subplot(234) - plt.semilogx(bins, norm_error_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(norm_median, norm_per99), ha="left", va="top", color=cols[i]) - plt.subplot(235) - plt.semilogx(bins, error_pot_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_pot, per99_pot), ha="left", va="top", color=cols[i]) - - count += 1 - -plt.subplot(231) -plt.xlabel("$\delta a_x/|\overrightarrow{a}_{exact}|$") -#plt.ylabel("Density") -plt.xlim(min_error, max_error) -plt.ylim(0,1.75) -#plt.legend(loc="center left") -plt.subplot(232) -plt.xlabel("$\delta a_y/|\overrightarrow{a}_{exact}|$") -#plt.ylabel("Density") -plt.xlim(min_error, max_error) -plt.ylim(0,1.75) -#plt.legend(loc="center left") -plt.subplot(233) -plt.xlabel("$\delta a_z/|\overrightarrow{a}_{exact}|$") -#plt.ylabel("Density") -plt.xlim(min_error, max_error) -plt.ylim(0,1.75) -plt.subplot(234) -plt.xlabel("$|\delta \overrightarrow{a}|/|\overrightarrow{a}_{exact}|$") -#plt.ylabel("Density") -plt.xlim(min_error, max_error) -plt.ylim(0,2.5) -plt.legend(loc="upper left") -plt.subplot(235) -plt.xlabel("$\delta \phi/\phi_{exact}$") -#plt.ylabel("Density") -plt.xlim(min_error, max_error) -plt.ylim(0,1.75) -#plt.legend(loc="center left") - - - -plt.savefig("gravity_checks_step%.4d.png"%step, dpi=200) -plt.savefig("gravity_checks_step%.4d.pdf"%step, dpi=200) diff --git a/examples/plot_scaling_results.py b/examples/plot_scaling_results.py deleted file mode 100755 index e39f0d2d0c00eecf7680b2f090bd2c0aa29ed8bb..0000000000000000000000000000000000000000 --- a/examples/plot_scaling_results.py +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env python -# -# Usage: -# python plot_scaling_results.py input-file1-ext input-file2-ext ... -# -# Description: -# Plots speed up, parallel efficiency and time to solution given a "timesteps" output file generated by SWIFT. -# -# Example: -# python plot_scaling_results.py _hreads_cosma_stdout.txt _threads_knl_stdout.txt -# -# The working directory should contain files 1_threads_cosma_stdout.txt - 64_threads_cosma_stdout.txt and 1_threads_knl_stdout.txt - 64_threads_knl_stdout.txt, i.e wall clock time for each run using a given number of threads - -import sys -import glob -import re -import numpy as np -import matplotlib.pyplot as plt -import scipy.stats -import ntpath - -params = {'axes.labelsize': 14, -'axes.titlesize': 18, -'font.size': 12, -'legend.fontsize': 12, -'xtick.labelsize': 14, -'ytick.labelsize': 14, -'text.usetex': True, -'figure.subplot.left' : 0.055, -'figure.subplot.right' : 0.98 , -'figure.subplot.bottom' : 0.05 , -'figure.subplot.top' : 0.95 , -'figure.subplot.wspace' : 0.14 , -'figure.subplot.hspace' : 0.12 , -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True -} -plt.rcParams.update(params) -plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']}) - -version = [] -branch = [] -revision = [] -hydro_scheme = [] -hydro_kernel = [] -hydro_neighbours = [] -hydro_eta = [] -threadList = [] -hexcols = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77', - '#CC6677', '#882255', '#AA4499', '#661100', '#6699CC', '#AA4466', - '#4477AA'] -linestyle = (hexcols[0],hexcols[1],hexcols[3],hexcols[5],hexcols[6],hexcols[8],hexcols[2],hexcols[4],hexcols[7],hexcols[9]) -numTimesteps = 0 -legendTitle = ' ' - -inputFileNames = [] - -# Work out how many data series there are -if len(sys.argv) == 1: - print "Please specify an input file in the arguments." - sys.exit() -else: - for fileName in sys.argv[1:]: - inputFileNames.append(fileName) - numOfSeries = int(len(sys.argv) - 1) - -# Get the names of the branch, Git revision, hydro scheme and hydro kernel -def parse_header(inputFile): - with open(inputFile, 'r') as f: - found_end = False - for line in f: - if 'Branch:' in line: - s = line.split() - line = s[2:] - branch.append(" ".join(line)) - elif 'Revision:' in line: - s = line.split() - revision.append(s[2]) - elif 'Hydrodynamic scheme:' in line: - line = line[2:-1] - s = line.split() - line = s[2:] - hydro_scheme.append(" ".join(line)) - elif 'Hydrodynamic kernel:' in line: - line = line[2:-1] - s = line.split() - line = s[2:5] - hydro_kernel.append(" ".join(line)) - elif 'neighbours:' in line: - s = line.split() - hydro_neighbours.append(s[4]) - elif 'Eta:' in line: - s = line.split() - hydro_eta.append(s[2]) - return - -# Parse file and return total time taken, speed up and parallel efficiency -def parse_files(): - - totalTime = [] - sumTotal = [] - speedUp = [] - parallelEff = [] - - for i in range(0,numOfSeries): # Loop over each data series - - # Get path to set of files - path, name = ntpath.split(inputFileNames[i]) - - # Get each file that starts with the cmd line arg - file_list = glob.glob(inputFileNames[i] + "*") - - threadList.append([]) - - # Remove path from file names - for j in range(0,len(file_list)): - p, filename = ntpath.split(file_list[j]) - file_list[j] = filename - - # Create a list of threads using the list of files - for fileName in file_list: - s = re.split(r'[_.]+',fileName) - threadList[i].append(int(s[1])) - - # Re-add path once each file has been found - if len(path) != 0: - for j in range(0,len(file_list)): - file_list[j] = path + '/' + file_list[j] - - # Sort the thread list in ascending order and save the indices - sorted_indices = np.argsort(threadList[i]) - threadList[i].sort() - - # Sort the file list in ascending order acording to the thread number - file_list = [ file_list[j] for j in sorted_indices] - - parse_header(file_list[0]) - - branch[i] = branch[i].replace("_", "\\_") - - #version.append("$\\textrm{%s}$"%str(branch[i]))# + " " + revision[i])# + "\n" + hydro_scheme[i] + -# "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + -# r", $\eta=%.3f$"%float(hydro_eta[i])) - totalTime.append([]) - speedUp.append([]) - parallelEff.append([]) - - # Loop over all files for a given series and load the times - for j in range(0,len(file_list)): - times = np.loadtxt(file_list[j],usecols=(9,)) - updates = np.loadtxt(file_list[j],usecols=(6,)) - totalTime[i].append(np.sum(times)) - - sumTotal.append(np.sum(totalTime[i])) - - # Sort the total times in descending order - sorted_indices = np.argsort(sumTotal)[::-1] - - totalTime = [ totalTime[j] for j in sorted_indices] - branchNew = [ branch[j] for j in sorted_indices] - - for i in range(0,numOfSeries): - version.append("$\\textrm{%s}$"%str(branchNew[i])) - - global numTimesteps - numTimesteps = len(times) - - # Find speed-up and parallel efficiency - for i in range(0,numOfSeries): - for j in range(0,len(file_list)): - speedUp[i].append(totalTime[i][0] / totalTime[i][j]) - parallelEff[i].append(speedUp[i][j] / threadList[i][j]) - - return (totalTime,speedUp,parallelEff) - -def print_results(totalTime,parallelEff,version): - - for i in range(0,numOfSeries): - print " " - print "------------------------------------" - print version[i] - print "------------------------------------" - print "Wall clock time for: {} time steps".format(numTimesteps) - print "------------------------------------" - - for j in range(0,len(threadList[i])): - print str(threadList[i][j]) + " threads: {}".format(totalTime[i][j]) - - print " " - print "------------------------------------" - print "Parallel Efficiency for: {} time steps".format(numTimesteps) - print "------------------------------------" - - for j in range(0,len(threadList[i])): - print str(threadList[i][j]) + " threads: {}".format(parallelEff[i][j]) - - return - -# Returns a lighter/darker version of the colour -def color_variant(hex_color, brightness_offset=1): - - rgb_hex = [hex_color[x:x+2] for x in [1, 3, 5]] - new_rgb_int = [int(hex_value, 16) + brightness_offset for hex_value in rgb_hex] - new_rgb_int = [min([255, max([0, i])]) for i in new_rgb_int] # make sure new values are between 0 and 255 - # hex() produces "0x88", we want just "88" - - return "#" + "".join([hex(i)[2:] for i in new_rgb_int]) - -def plot_results(totalTime,speedUp,parallelEff,numSeries): - - fig, axarr = plt.subplots(2, 2, figsize=(10,10), frameon=True) - speedUpPlot = axarr[0, 0] - parallelEffPlot = axarr[0, 1] - totalTimePlot = axarr[1, 0] - emptyPlot = axarr[1, 1] - - # Plot speed up - speedUpPlot.plot(threadList[0],threadList[0], linestyle='--', lw=1.5, color='0.2') - for i in range(0,numSeries): - speedUpPlot.plot(threadList[0],speedUp[i],linestyle[i],label=version[i]) - - speedUpPlot.set_ylabel("${\\rm Speed\\textendash up}$", labelpad=0.) - speedUpPlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - speedUpPlot.set_xlim([0.7,threadList[0][-1]+1]) - speedUpPlot.set_ylim([0.7,threadList[0][-1]+1]) - - # Plot parallel efficiency - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [1,1], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.9,0.9], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.75,0.75], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.5,0.5], 'k--', lw=1.5, color='0.2') - for i in range(0,numSeries): - parallelEffPlot.plot(threadList[0],parallelEff[i],linestyle[i]) - - parallelEffPlot.set_xscale('log') - parallelEffPlot.set_ylabel("${\\rm Parallel~efficiency}$", labelpad=0.) - parallelEffPlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - parallelEffPlot.set_ylim([0,1.1]) - parallelEffPlot.set_xlim([0.9,10**(np.floor(np.log10(threadList[0][-1]))+0.5)]) - - # Plot time to solution - for i in range(0,numOfSeries): - pts = [1, 10**np.floor(np.log10(threadList[i][-1])+1)] - totalTimePlot.loglog(pts,totalTime[i][0]/pts, 'k--', lw=1., color='0.2') - totalTimePlot.loglog(threadList[i],totalTime[i],linestyle[i],label=version[i]) - - y_min = 10**np.floor(np.log10(np.min(totalTime[:][0])*0.6)) - y_max = 1.0*10**np.floor(np.log10(np.max(totalTime[:][0]) * 1.5)+1) - totalTimePlot.set_xscale('log') - totalTimePlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - totalTimePlot.set_ylabel("${\\rm Time~to~solution}~[{\\rm ms}]$", labelpad=0.) - totalTimePlot.set_xlim([0.9, 10**(np.floor(np.log10(threadList[0][-1]))+0.5)]) - totalTimePlot.set_ylim(y_min, y_max) - - totalTimePlot.legend(bbox_to_anchor=(1.21, 0.97), loc=2, borderaxespad=0.,prop={'size':12}, frameon=False,title=legendTitle) - emptyPlot.axis('off') - - for i, txt in enumerate(threadList[0]): - if 2**np.floor(np.log2(threadList[0][i])) == threadList[0][i]: # only powers of 2 - speedUpPlot.annotate("$%s$"%txt, (threadList[0][i],speedUp[0][i]), (threadList[0][i],speedUp[0][i] + 0.3), color=hexcols[0]) - parallelEffPlot.annotate("$%s$"%txt, (threadList[0][i],parallelEff[0][i]), (threadList[0][i], parallelEff[0][i]+0.02), color=hexcols[0]) - totalTimePlot.annotate("$%s$"%txt, (threadList[0][i],totalTime[0][i]), (threadList[0][i], totalTime[0][i]*1.1), color=hexcols[0]) - - #fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(numTimesteps),cmdLine,platform)) - fig.suptitle("${\\rm Speed\\textendash up,~parallel~efficiency~and~time~to~solution~for}~%d~{\\rm time\\textendash steps}$"%numTimesteps, fontsize=16) - - return - -# Calculate results -(totalTime,speedUp,parallelEff) = parse_files() - -legendTitle = version[0] - -plot_results(totalTime,speedUp,parallelEff,numOfSeries) - -print_results(totalTime,parallelEff,version) - -# And plot -plt.show() diff --git a/examples/plot_scaling_results_breakdown.py b/examples/plot_scaling_results_breakdown.py deleted file mode 100755 index 6a87e42bcd393d543187e768e31a15bc56f1ae6a..0000000000000000000000000000000000000000 --- a/examples/plot_scaling_results_breakdown.py +++ /dev/null @@ -1,289 +0,0 @@ -#!/usr/bin/env python -# -# Usage: -# python plot_scaling_results.py input-file1-ext input-file2-ext ... -# -# Description: -# Plots speed up, parallel efficiency and time to solution given a "timesteps" output file generated by SWIFT. -# -# Example: -# python plot_scaling_results.py _hreads_cosma_stdout.txt _threads_knl_stdout.txt -# -# The working directory should contain files 1_threads_cosma_stdout.txt - 64_threads_cosma_stdout.txt and 1_threads_knl_stdout.txt - 64_threads_knl_stdout.txt, i.e wall clock time for each run using a given number of threads - -import sys -import glob -import re -import numpy as np -import matplotlib.pyplot as plt -import scipy.stats -import ntpath - -params = {'axes.labelsize': 14, -'axes.titlesize': 18, -'font.size': 12, -'legend.fontsize': 12, -'xtick.labelsize': 14, -'ytick.labelsize': 14, -'text.usetex': True, -'figure.subplot.left' : 0.055, -'figure.subplot.right' : 0.98 , -'figure.subplot.bottom' : 0.05 , -'figure.subplot.top' : 0.95 , -'figure.subplot.wspace' : 0.14 , -'figure.subplot.hspace' : 0.12 , -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True -} -plt.rcParams.update(params) -plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']}) - -version = [] -branch = [] -revision = [] -hydro_scheme = [] -hydro_kernel = [] -hydro_neighbours = [] -hydro_eta = [] -threadList = [] -hexcols = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77', - '#CC6677', '#882255', '#AA4499', '#661100', '#6699CC', '#AA4466', - '#4477AA'] -linestyle = (hexcols[0],hexcols[1],hexcols[3],hexcols[5],hexcols[6],hexcols[8],hexcols[2],hexcols[4],hexcols[7],hexcols[9]) -numTimesteps = 0 -legendTitle = ' ' - -inputFileNames = [] - -# Work out how many data series there are -if len(sys.argv) == 1: - print "Please specify an input file in the arguments." - sys.exit() -else: - for fileName in sys.argv[1:]: - inputFileNames.append(fileName) - numOfSeries = int(len(sys.argv) - 1) - -# Get the names of the branch, Git revision, hydro scheme and hydro kernel -def parse_header(inputFile): - with open(inputFile, 'r') as f: - found_end = False - for line in f: - if 'Branch:' in line: - s = line.split() - line = s[2:] - branch.append(" ".join(line)) - elif 'Revision:' in line: - s = line.split() - revision.append(s[2]) - elif 'Hydrodynamic scheme:' in line: - line = line[2:-1] - s = line.split() - line = s[2:] - hydro_scheme.append(" ".join(line)) - elif 'Hydrodynamic kernel:' in line: - line = line[2:-1] - s = line.split() - line = s[2:5] - hydro_kernel.append(" ".join(line)) - elif 'neighbours:' in line: - s = line.split() - hydro_neighbours.append(s[4]) - elif 'Eta:' in line: - s = line.split() - hydro_eta.append(s[2]) - return - -# Parse file and return total time taken, speed up and parallel efficiency -def parse_files(): - - totalTime = [] - sumTotal = [] - speedUp = [] - parallelEff = [] - - for i in range(0,numOfSeries): # Loop over each data series - - # Get path to set of files - path, name = ntpath.split(inputFileNames[i]) - - # Get each file that starts with the cmd line arg - file_list = glob.glob(inputFileNames[i] + "*") - - threadList.append([]) - - # Remove path from file names - for j in range(0,len(file_list)): - p, filename = ntpath.split(file_list[j]) - file_list[j] = filename - - # Create a list of threads using the list of files - for fileName in file_list: - s = re.split(r'[_.]+',fileName) - threadList[i].append(int(s[1])) - - # Re-add path once each file has been found - if len(path) != 0: - for j in range(0,len(file_list)): - file_list[j] = path + '/' + file_list[j] - - # Sort the thread list in ascending order and save the indices - sorted_indices = np.argsort(threadList[i]) - threadList[i].sort() - - # Sort the file list in ascending order acording to the thread number - file_list = [ file_list[j] for j in sorted_indices] - - parse_header(file_list[0]) - - branch[i] = branch[i].replace("_", "\\_") - - - #version.append("$\\textrm{%s}$"%str(branch[i]))# + " " + revision[i])# + "\n" + hydro_scheme[i] + -# "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + -# r", $\eta=%.3f$"%float(hydro_eta[i])) - totalTime.append([]) - speedUp.append([]) - parallelEff.append([]) - - # Loop over all files for a given series and load the times - for j in range(0,len(file_list)): - times = np.loadtxt(file_list[j],usecols=(9,)) - updates = np.loadtxt(file_list[j],usecols=(6,)) - totalTime[i].append(np.sum(times)) - - sumTotal.append(np.sum(totalTime[i])) - - # Sort the total times in descending order - sorted_indices = np.argsort(sumTotal)[::-1] - - totalTime = [ totalTime[j] for j in sorted_indices] - branchNew = [ branch[j] for j in sorted_indices] - - for i in range(0,numOfSeries): - version.append("$\\textrm{%s}$"%str(branchNew[i])) - - global numTimesteps - numTimesteps = len(times) - - # Find speed-up and parallel efficiency - for i in range(0,numOfSeries): - for j in range(0,len(file_list)): - speedUp[i].append(totalTime[i][0] / totalTime[i][j]) - parallelEff[i].append(speedUp[i][j] / threadList[i][j]) - - return (totalTime,speedUp,parallelEff) - -def print_results(totalTime,parallelEff,version): - - for i in range(0,numOfSeries): - print " " - print "------------------------------------" - print version[i] - print "------------------------------------" - print "Wall clock time for: {} time steps".format(numTimesteps) - print "------------------------------------" - - for j in range(0,len(threadList[i])): - print str(threadList[i][j]) + " threads: {}".format(totalTime[i][j]) - - print " " - print "------------------------------------" - print "Parallel Efficiency for: {} time steps".format(numTimesteps) - print "------------------------------------" - - for j in range(0,len(threadList[i])): - print str(threadList[i][j]) + " threads: {}".format(parallelEff[i][j]) - - return - -# Returns a lighter/darker version of the colour -def color_variant(hex_color, brightness_offset=1): - - rgb_hex = [hex_color[x:x+2] for x in [1, 3, 5]] - new_rgb_int = [int(hex_value, 16) + brightness_offset for hex_value in rgb_hex] - new_rgb_int = [min([255, max([0, i])]) for i in new_rgb_int] # make sure new values are between 0 and 255 - # hex() produces "0x88", we want just "88" - - return "#" + "".join([hex(i)[2:] for i in new_rgb_int]) - -def plot_results(totalTime,speedUp,parallelEff,numSeries): - - fig, axarr = plt.subplots(2, 2, figsize=(10,10), frameon=True) - speedUpPlot = axarr[0, 0] - parallelEffPlot = axarr[0, 1] - totalTimePlot = axarr[1, 0] - emptyPlot = axarr[1, 1] - - # Plot speed up - speedUpPlot.plot(threadList[0],threadList[0], linestyle='--', lw=1.5, color='0.2') - for i in range(0,numSeries): - speedUpPlot.plot(threadList[0],speedUp[i],linestyle[i],label=version[i]) - - speedUpPlot.set_ylabel("${\\rm Speed\\textendash up}$", labelpad=0.) - speedUpPlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - speedUpPlot.set_xlim([0.7,threadList[0][-1]+1]) - speedUpPlot.set_ylim([0.7,threadList[0][-1]+1]) - - # Plot parallel efficiency - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [1,1], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.9,0.9], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.75,0.75], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.5,0.5], 'k--', lw=1.5, color='0.2') - for i in range(0,numSeries): - parallelEffPlot.plot(threadList[0],parallelEff[i],linestyle[i]) - - parallelEffPlot.set_xscale('log') - parallelEffPlot.set_ylabel("${\\rm Parallel~efficiency}$", labelpad=0.) - parallelEffPlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - parallelEffPlot.set_ylim([0,1.1]) - parallelEffPlot.set_xlim([0.9,10**(np.floor(np.log10(threadList[0][-1]))+0.5)]) - - # Plot time to solution - for i in range(0,numSeries): - for j in range(0,len(threadList[0])): - totalTime[i][j] = totalTime[i][j] * threadList[i][j] - if i > 1: - totalTime[i][j] = totalTime[i][j] + totalTime[i-1][j] - totalTimePlot.plot(threadList[0],totalTime[i],linestyle[i],label=version[i]) - - if i > 1: - colour = color_variant(linestyle[i],100) - totalTimePlot.fill_between(threadList[0],np.array(totalTime[i]),np.array(totalTime[i-1]), facecolor=colour) - elif i==1: - colour = color_variant(linestyle[i],100) - totalTimePlot.fill_between(threadList[0], totalTime[i],facecolor=colour) - - totalTimePlot.set_xscale('log') - totalTimePlot.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) - totalTimePlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - totalTimePlot.set_ylabel("${\\rm Time~to~solution~x~No.~of~cores}~[{\\rm ms}]$", labelpad=0.) - totalTimePlot.set_xlim([0.9, 10**(np.floor(np.log10(threadList[0][-1]))+0.5)]) - #totalTimePlot.set_ylim(y_min, y_max) - - totalTimePlot.legend(bbox_to_anchor=(1.21, 0.97), loc=2, borderaxespad=0.,prop={'size':12}, frameon=False,title=legendTitle) - emptyPlot.axis('off') - - for i, txt in enumerate(threadList[0]): - if 2**np.floor(np.log2(threadList[0][i])) == threadList[0][i]: # only powers of 2 - speedUpPlot.annotate("$%s$"%txt, (threadList[0][i],speedUp[0][i]), (threadList[0][i],speedUp[0][i] + 0.3), color=hexcols[0]) - parallelEffPlot.annotate("$%s$"%txt, (threadList[0][i],parallelEff[0][i]), (threadList[0][i], parallelEff[0][i]+0.02), color=hexcols[0]) - totalTimePlot.annotate("$%s$"%txt, (threadList[0][i],totalTime[0][i]), (threadList[0][i], totalTime[0][i]*1.1), color=hexcols[0]) - - #fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(numTimesteps),cmdLine,platform)) - fig.suptitle("${\\rm Speed\\textendash up,~parallel~efficiency~and~time~to~solution~x~no.~of~cores~for}~%d~{\\rm time\\textendash steps}$"%numTimesteps, fontsize=16) - - return - -# Calculate results -(totalTime,speedUp,parallelEff) = parse_files() - -legendTitle = version[0] - -plot_results(totalTime,speedUp,parallelEff,numOfSeries) - -print_results(totalTime,parallelEff,version) - -# And plot -plt.show() diff --git a/format.sh b/format.sh index 9fea13bf363b1513f0e4356a67b2c9d1166771d1..91346334c9b2eaf9fbb343aba44f8a02d866d1ef 100755 --- a/format.sh +++ b/format.sh @@ -1,3 +1,80 @@ #!/bin/bash -clang-format-5.0 -style=file -i src/*.[ch] src/*/*.[ch] src/*/*/*.[ch] examples/main.c tests/*.[ch] +# Clang format command, can be overridden using CLANG_FORMAT_CMD. +# We currrently use version 5.0 so any overrides should provide that. +clang=${CLANG_FORMAT_CMD:="clang-format-5.0"} + +# Formatting command +cmd="$clang -style=file $(git ls-files | grep '\.[ch]$')" + +# Test if `clang-format-5.0` works +command -v $clang > /dev/null +if [[ $? -ne 0 ]] +then + echo "ERROR: cannot find $clang" + exit 1 +fi + +# Print the help +function show_help { + echo -e "This script formats SWIFT according to Google style" + echo -e " -h, --help \t Show this help" + echo -e " -t, --test \t Test if SWIFT is well formatted" +} + +# Parse arguments (based on https://stackoverflow.com/questions/192249) +TEST=0 +while [[ $# -gt 0 ]] +do + key="$1" + + case $key in + # print the help and exit + -h|--help) + show_help + exit + ;; + # check if the code is well formatted + -t|--test) + TEST=1 + shift + ;; + # unknown option + *) + echo "Argument '$1' not implemented" + show_help + exit + ;; + esac +done + +# Run the required commands +if [[ $TEST -eq 1 ]] +then + # Note trapping the exit status from both commands in the pipe. Also note + # do not use -q in grep as that closes the pipe on first match and we get + # a SIGPIPE error. + echo "Testing if SWIFT is correctly formatted" + $cmd -output-replacements-xml | grep "<replacement " > /dev/null + status=("${PIPESTATUS[@]}") + + # Trap if first command failed. Note 141 is SIGPIPE, that happens when no + # output + if [[ ${status[0]} -ne 0 ]] + then + echo "ERROR: $clang command failed" + exit 1 + fi + + # Check formatting + if [[ ${status[1]} -eq 0 ]] + then + echo "ERROR: needs formatting" + exit 1 + else + echo "...is correctly formatted" + fi +else + echo "Formatting SWIFT" + $cmd -i +fi diff --git a/m4/ax_gcc_archflag.m4 b/m4/ax_gcc_archflag.m4 index b91c9e8f4003ce7ee70a3f587b89df754f7302d5..ef8e7c199da1622354a029ec142386b7f1f9e442 100644 --- a/m4/ax_gcc_archflag.m4 +++ b/m4/ax_gcc_archflag.m4 @@ -65,7 +65,7 @@ # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. -#serial 21 (modified for SWIFT) +#serial 22 (modified for SWIFT) AC_DEFUN([AX_GCC_ARCHFLAG], [AC_REQUIRE([AC_PROG_CC]) @@ -109,7 +109,7 @@ case $host_cpu in *3?6[[ae]]?:*:*:*) ax_gcc_arch="ivybridge core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; *3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*) ax_gcc_arch="haswell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; *3?6d?:*:*:*|*4?6[[7f]]?:*:*:*|*5?66?:*:*:*) ax_gcc_arch="broadwell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; - *4?6[[de]]?:*:*:*) ax_gcc_arch="skylake haswell sandybridge core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; + *4?6[[de]]?:*:*:*|*5?6[[de]]?:*:*:*) ax_gcc_arch="skylake haswell sandybridge core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; *5?6[[56]]?:*:*:*) ax_gcc_arch="skylake-avx512 skylake haswell sandybridge core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; *8?6[[de]]?:*:*:*|*9?6[[de]]?:*:*:*) ax_gcc_arch="kabylake skylake broadwell haswell sandybridge core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; *1?6c?:*:*:*|*2?6[[67]]?:*:*:*|*3?6[[56]]?:*:*:*) ax_gcc_arch="bonnell atom core2 pentium-m pentium3 pentiumpro" ;; @@ -201,6 +201,10 @@ case $host_cpu in *POWER4*|*power4*|*gq*) ax_gcc_arch="power4 970";; *POWER5*|*power5*|*gr*|*gs*) ax_gcc_arch="power5 power4 970";; 603ev|8240) ax_gcc_arch="$cputype 603e 603";; + *POWER7*) ax_gcc_arch="power7";; + *POWER8*) ax_gcc_arch="power8";; + *POWER9*) ax_gcc_arch="power9";; + *POWER10*) ax_gcc_arch="power10";; *) ax_gcc_arch=$cputype ;; esac ax_gcc_arch="$ax_gcc_arch powerpc" diff --git a/src/Makefile.am b/src/Makefile.am index 0f61fb108d8d8acbd420c994266f4803a3f69d3e..78531f1f03205374231b78df4de4bc697fed3178 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -16,7 +16,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Add the non-standard paths to the included library headers -AM_CFLAGS = $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) +AM_CFLAGS = $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(GRACKLE_INCS) # Assign a "safe" version number AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0 @@ -25,11 +25,11 @@ AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0 GIT_CMD = @GIT_CMD@ # Additional dependencies for shared libraries. -EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIB) $(GSL_LIBS) +EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(GSL_LIBS) # MPI libraries. -MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) -MPI_FLAGS = -DWITH_MPI $(METIS_INCS) +MPI_LIBS = $(PARMETIS_LIBS) $(METIS_LIBS) $(MPI_THREAD_LIBS) +MPI_FLAGS = -DWITH_MPI $(PARMETIS_INCS) $(METIS_INCS) # Build the libswiftsim library lib_LTLIBRARIES = libswiftsim.la @@ -48,12 +48,19 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \ gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h \ chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \ - mesh_gravity.h cbrt.h velociraptor_interface.h swift_velociraptor_part.h outputlist.h + mesh_gravity.h cbrt.h velociraptor_interface.h swift_velociraptor_part.h outputlist.h \ + logger_io.h + +# source files for EAGLE cooling +EAGLE_COOLING_SOURCES = +if HAVEEAGLECOOLING +EAGLE_COOLING_SOURCES += cooling/EAGLE/cooling.c cooling/EAGLE/cooling_tables.c +endif # Common source files -AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ - serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \ - units.c common_io.c single_io.c multipole.c version.c map.c \ +AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c \ + engine_marktasks.c engine_drift.c serial_io.c timers.c debug.c scheduler.c \ + proxy.c parallel_io.c units.c common_io.c single_io.c multipole.c version.c map.c \ kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \ physical_constants.c potential.c hydro_properties.c \ threadpool.c cooling.c sourceterms.c \ @@ -61,13 +68,14 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ part_type.c xmf.c gravity_properties.c gravity.c \ collectgroup.c hydro_space.c equation_of_state.c \ chemistry.c cosmology.c restart.c mesh_gravity.c velociraptor_interface.c \ - outputlist.c + outputlist.c velociraptor_dummy.c logger_io.c $(EAGLE_COOLING_SOURCES) # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ gravity_iact.h kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h \ - runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \ - dimension.h part_type.h periodic.h memswap.h dump.h logger.h sign.h \ + runner_doiact_nosort.h runner_doiact_stars.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h \ + adiabatic_index.h io_properties.h dimension.h part_type.h periodic.h memswap.h dump.h logger.h sign.h \ + logger_io.h \ gravity.h gravity_io.h gravity_cache.h \ gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \ gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \ @@ -126,13 +134,15 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h riemann/riemann_exact.h riemann/riemann_vacuum.h \ riemann/riemann_checks.h \ stars.h stars_io.h \ - stars/Default/star.h stars/Default/star_iact.h stars/Default/star_io.h \ - stars/Default/star_debug.h stars/Default/star_part.h \ + stars/Default/stars.h stars/Default/stars_iact.h stars/Default/stars_io.h \ + stars/Default/stars_debug.h stars/Default/stars_part.h \ potential/none/potential.h potential/point_mass/potential.h \ potential/isothermal/potential.h potential/disc_patch/potential.h \ potential/sine_wave/potential.h \ cooling/none/cooling.h cooling/none/cooling_struct.h \ cooling/none/cooling_io.h \ + cooling/Compton/cooling.h cooling/Compton/cooling_struct.h \ + cooling/Compton/cooling_io.h \ cooling/const_du/cooling.h cooling/const_du/cooling_struct.h \ cooling/const_du/cooling_io.h \ cooling/const_lambda/cooling.h cooling/const_lambda/cooling_struct.h \ @@ -140,7 +150,7 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h cooling/grackle/cooling.h cooling/grackle/cooling_struct.h \ cooling/grackle/cooling_io.h \ cooling/EAGLE/cooling.h cooling/EAGLE/cooling_struct.h \ - cooling/EAGLE/cooling_io.h \ + cooling/EAGLE/cooling_io.h cooling/EAGLE/interpolate.h cooling/EAGLE/cooling_rates.h \ chemistry/none/chemistry.h \ chemistry/none/chemistry_io.h \ chemistry/none/chemistry_struct.h \ diff --git a/src/active.h b/src/active.h index 3fe52a86b373ff0b33b88eca0dac9b7c6b58a216..5bbbd3803cb09e7aa05ddb15e2e5c2a15b27602c 100644 --- a/src/active.h +++ b/src/active.h @@ -39,15 +39,16 @@ __attribute__((always_inline)) INLINE static int cell_are_part_drifted( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_old_part > e->ti_current) + if (c->hydro.ti_old_part > e->ti_current) error( - "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) " + "Cell has been drifted too far forward in time! c->ti_old_part=%lld " + "(t=%e) " "and e->ti_current=%lld (t=%e, a=%e)", - c->ti_old_part, c->ti_old_part * e->time_base, e->ti_current, - e->ti_current * e->time_base, e->cosmology->a); + c->hydro.ti_old_part, c->hydro.ti_old_part * e->time_base, + e->ti_current, e->ti_current * e->time_base, e->cosmology->a); #endif - return (c->ti_old_part == e->ti_current); + return (c->hydro.ti_old_part == e->ti_current); } /** @@ -62,15 +63,31 @@ __attribute__((always_inline)) INLINE static int cell_are_gpart_drifted( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_old_gpart > e->ti_current) + if (c->grav.ti_old_part > e->ti_current) error( "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) " "and e->ti_current=%lld (t=%e)", - c->ti_old_gpart, c->ti_old_gpart * e->time_base, e->ti_current, + c->grav.ti_old_part, c->grav.ti_old_part * e->time_base, e->ti_current, e->ti_current * e->time_base); #endif - return (c->ti_old_gpart == e->ti_current); + return (c->grav.ti_old_part == e->ti_current); +} + +/** + * @brief Check that the #spart in a #cell have been drifted to the current + * time. + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if the #cell has been drifted to the current time, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_are_spart_drifted( + const struct cell *c, const struct engine *e) { + + /* Currently just use the gpart drift + * This function is just for clarity */ + return cell_are_gpart_drifted(c, e); } /* Are cells / particles active for regular tasks ? */ @@ -86,15 +103,15 @@ __attribute__((always_inline)) INLINE static int cell_is_active_hydro( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_hydro_end_min < e->ti_current) + if (c->hydro.ti_end_min < e->ti_current) error( "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and " "e->ti_current=%lld (t=%e, a=%e)", - c->ti_hydro_end_min, c->ti_hydro_end_min * e->time_base, e->ti_current, + c->hydro.ti_end_min, c->hydro.ti_end_min * e->time_base, e->ti_current, e->ti_current * e->time_base, e->cosmology->a); #endif - return (c->ti_hydro_end_min == e->ti_current); + return (c->hydro.ti_end_min == e->ti_current); } /** @@ -108,14 +125,14 @@ __attribute__((always_inline)) INLINE static int cell_is_all_active_hydro( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_hydro_end_max < e->ti_current) + if (c->hydro.ti_end_max < e->ti_current) error( "cell in an impossible time-zone! c->ti_end_max=%lld " "e->ti_current=%lld", - c->ti_hydro_end_max, e->ti_current); + c->hydro.ti_end_max, e->ti_current); #endif - return (c->ti_hydro_end_max == e->ti_current); + return (c->hydro.ti_end_max == e->ti_current); } /** @@ -129,15 +146,28 @@ __attribute__((always_inline)) INLINE static int cell_is_active_gravity( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_gravity_end_min < e->ti_current) + if (c->grav.ti_end_min < e->ti_current) error( "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and " "e->ti_current=%lld (t=%e, a=%e)", - c->ti_gravity_end_min, c->ti_gravity_end_min * e->time_base, - e->ti_current, e->ti_current * e->time_base, e->cosmology->a); + c->grav.ti_end_min, c->grav.ti_end_min * e->time_base, e->ti_current, + e->ti_current * e->time_base, e->cosmology->a); #endif - return (c->ti_gravity_end_min == e->ti_current); + return (c->grav.ti_end_min == e->ti_current); +} + +/** + * @brief Does a cell contain any multipole requiring calculation ? + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if the #cell contains at least an active particle, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_is_active_gravity_mm( + const struct cell *c, const struct engine *e) { + + return (c->grav.ti_end_min == e->ti_current); } /** @@ -151,14 +181,36 @@ __attribute__((always_inline)) INLINE static int cell_is_all_active_gravity( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_gravity_end_max < e->ti_current) + if (c->grav.ti_end_max < e->ti_current) error( "cell in an impossible time-zone! c->ti_end_max=%lld " "e->ti_current=%lld", - c->ti_gravity_end_max, e->ti_current); + c->grav.ti_end_max, e->ti_current); +#endif + + return (c->grav.ti_end_max == e->ti_current); +} + +/** + * @brief Does a cell contain any s-particle finishing their time-step now ? + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if the #cell contains at least an active particle, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_is_active_stars( + const struct cell *c, const struct engine *e) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->stars.ti_end_min < e->ti_current) + error( + "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and " + "e->ti_current=%lld (t=%e, a=%e)", + c->stars.ti_end_min, c->stars.ti_end_min * e->time_base, e->ti_current, + e->ti_current * e->time_base, e->cosmology->a); #endif - return (c->ti_gravity_end_max == e->ti_current); + return (c->stars.ti_end_min == e->ti_current); } /** @@ -249,6 +301,42 @@ __attribute__((always_inline)) INLINE static int spart_is_active( return (spart_bin <= max_active_bin); } +/** + * @brief Has this particle been inhibited? + * + * @param p The #part. + * @param e The #engine containing information about the current time. + * @return 1 if the #part is inhibited, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int part_is_inhibited( + const struct part *p, const struct engine *e) { + return p->time_bin == time_bin_inhibited; +} + +/** + * @brief Has this gravity particle been inhibited? + * + * @param gp The #gpart. + * @param e The #engine containing information about the current time. + * @return 1 if the #part is inhibited, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int gpart_is_inhibited( + const struct gpart *gp, const struct engine *e) { + return gp->time_bin == time_bin_inhibited; +} + +/** + * @brief Has this star particle been inhibited? + * + * @param sp The #spart. + * @param e The #engine containing information about the current time. + * @return 1 if the #part is inhibited, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int spart_is_inhibited( + const struct spart *sp, const struct engine *e) { + return sp->time_bin == time_bin_inhibited; +} + /* Are cells / particles active for kick1 tasks ? */ /** @@ -262,15 +350,15 @@ __attribute__((always_inline)) INLINE static int cell_is_starting_hydro( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_hydro_beg_max > e->ti_current) + if (c->hydro.ti_beg_max > e->ti_current) error( "cell in an impossible time-zone! c->ti_beg_max=%lld (t=%e) and " "e->ti_current=%lld (t=%e, a=%e)", - c->ti_hydro_beg_max, c->ti_hydro_beg_max * e->time_base, e->ti_current, + c->hydro.ti_beg_max, c->hydro.ti_beg_max * e->time_base, e->ti_current, e->ti_current * e->time_base, e->cosmology->a); #endif - return (c->ti_hydro_beg_max == e->ti_current); + return (c->hydro.ti_beg_max == e->ti_current); } /** @@ -284,15 +372,15 @@ __attribute__((always_inline)) INLINE static int cell_is_starting_gravity( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_gravity_beg_max > e->ti_current) + if (c->grav.ti_beg_max > e->ti_current) error( "cell in an impossible time-zone! c->ti_beg_max=%lld (t=%e) and " "e->ti_current=%lld (t=%e, a=%e)", - c->ti_gravity_beg_max, c->ti_gravity_beg_max * e->time_base, - e->ti_current, e->ti_current * e->time_base, e->cosmology->a); + c->grav.ti_beg_max, c->grav.ti_beg_max * e->time_base, e->ti_current, + e->ti_current * e->time_base, e->cosmology->a); #endif - return (c->ti_gravity_beg_max == e->ti_current); + return (c->grav.ti_beg_max == e->ti_current); } /** @@ -378,4 +466,5 @@ __attribute__((always_inline)) INLINE static int spart_is_starting( return (spart_bin <= max_active_bin); } + #endif /* SWIFT_ACTIVE_H */ diff --git a/src/align.h b/src/align.h index 6d329ae7983d68aee096f6f9e65990d5fed6a0f2..24ff0828b09855f31c187b655b1d751e78af8769 100644 --- a/src/align.h +++ b/src/align.h @@ -44,6 +44,8 @@ * alignment. * * Note that this turns into a no-op but gives information to the compiler. + * For GCC versions older than 4.6 this is ignored as the builtin does not + * exist. * * @param type The type of the array. * @param array The array. @@ -52,11 +54,11 @@ #if defined(__ICC) #define swift_align_information(type, array, alignment) \ __assume_aligned(array, alignment); -#elif defined(__GNUC__) +#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ > 6) #define swift_align_information(type, array, alignment) \ array = (type *)__builtin_assume_aligned(array, alignment); #else -#define swift_align_information(array, alignment) ; +#define swift_align_information(type, array, alignment) ; #endif /** diff --git a/src/approx_math.h b/src/approx_math.h index 90ea4eb997c71311e0c1ce854bbdd0a0ba7396ce..f347bab44790d1e3120675bcbd6e7a457ca09821 100644 --- a/src/approx_math.h +++ b/src/approx_math.h @@ -21,6 +21,31 @@ #include "inline.h" +/** + * @brief Approximate version of the complementay error function erfcf(x). + * + * This is based on eq. 7.1.27 of Abramowitz & Stegun, 1972. + * The absolute error is < 4.7*10^-4 over the range 0 < x < infinity. + * + * Returns garbage for x < 0. + * @param x The number to compute erfc for. + */ +__attribute__((always_inline, const)) INLINE static float approx_erfcf( + float x) { + + /* 1 + 0.278393*x + 0.230389*x^2 + 0.000972*x^3 + 0.078108*x^4 */ + float arg = 0.078108f; + arg = x * arg + 0.000972f; + arg = x * arg + 0.230389f; + arg = x * arg + 0.278393f; + arg = x * arg + 1.f; + + /* 1 / arg^4 */ + const float arg2 = arg * arg; + const float arg4 = arg2 * arg2; + return 1.f / arg4; +} + /** * @brief Approximate version of expf(x) using a 4th order Taylor expansion * diff --git a/src/atomic.h b/src/atomic.h index 69df59e9fba965422eaf9a3b3de9d28ab9f09dad..10548c6a20249b4b0c362c5e6ab78ea5d85b2091 100644 --- a/src/atomic.h +++ b/src/atomic.h @@ -127,4 +127,36 @@ __attribute__((always_inline)) INLINE static void atomic_add_f( } while (test_val.as_int != old_val.as_int); } +/** + * @brief Atomic add operation on doubles. + * + * This is a text-book implementation based on an atomic CAS. + * + * We create a temporary union to cope with the int-only atomic CAS + * and the floating-point add that we want. + * + * @param address The address to update. + * @param y The value to update the address with. + */ +__attribute__((always_inline)) INLINE static void atomic_add_d( + volatile double *const address, const double y) { + + long long *const long_long_ptr = (long long *)address; + + typedef union { + double as_double; + long long as_long_long; + } cast_type; + + cast_type test_val, old_val, new_val; + old_val.as_double = *address; + + do { + test_val.as_long_long = old_val.as_long_long; + new_val.as_double = old_val.as_double + y; + old_val.as_long_long = + atomic_cas(long_long_ptr, test_val.as_long_long, new_val.as_long_long); + } while (test_val.as_long_long != old_val.as_long_long); +} + #endif /* SWIFT_ATOMIC_H */ diff --git a/src/cache.h b/src/cache.h index c41e11c34246ef0de93bb1ae7500277aab555b9e..5dd8164b1dc80795a8593cc2af42c2c9e7e68885 100644 --- a/src/cache.h +++ b/src/cache.h @@ -197,12 +197,12 @@ __attribute__((always_inline)) INLINE void cache_read_particles( swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT); - const struct part *restrict parts = ci->parts; + const struct part *restrict parts = ci->hydro.parts; const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]}; /* Shift the particles positions to a local frame so single precision can be * used instead of double precision. */ - for (int i = 0; i < ci->count; i++) { + for (int i = 0; i < ci->hydro.count; i++) { x[i] = (float)(parts[i].x[0] - loc[0]); y[i] = (float)(parts[i].x[1] - loc[1]); z[i] = (float)(parts[i].x[2] - loc[2]); @@ -248,7 +248,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT); - const struct part *restrict parts = ci->parts; + const struct part *restrict parts = ci->hydro.parts; /* The cell is on the right so read the particles * into the cache from the start of the cell. */ @@ -258,7 +258,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( const int pad = VEC_SIZE - rem; /* Increase last_pi if there are particles in the cell left to read. */ - if (*last_pi + pad < ci->count) *last_pi += pad; + if (*last_pi + pad < ci->hydro.count) *last_pi += pad; } /* Shift the particles positions to a local frame so single precision can be @@ -278,11 +278,11 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( /* Pad cache with fake particles that exist outside the cell so will not * interact. We use values of the same magnitude (but negative!) as the real * particles to avoid overflow problems. */ - const double max_dx = ci->dx_max_part; + const double max_dx = ci->hydro.dx_max_part; const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), -(2. * ci->width[1] + max_dx), -(2. * ci->width[2] + max_dx)}; - const float h_padded = ci->parts[0].h; + const float h_padded = ci->hydro.parts[0].h; for (int i = *last_pi; i < *last_pi + VEC_SIZE; i++) { x[i] = pos_padded[0]; @@ -299,7 +299,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( /* The cell is on the left so read the particles * into the cache from the end of the cell. */ else { - const int rem = (ci->count - *first_pi) % VEC_SIZE; + const int rem = (ci->hydro.count - *first_pi) % VEC_SIZE; if (rem != 0) { const int pad = VEC_SIZE - rem; @@ -307,7 +307,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( if (*first_pi - pad >= 0) *first_pi -= pad; } - const int ci_cache_count = ci->count - *first_pi; + const int ci_cache_count = ci->hydro.count - *first_pi; /* Shift the particles positions to a local frame so single precision can be * used instead of double precision. */ @@ -326,14 +326,14 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( /* Pad cache with fake particles that exist outside the cell so will not * interact. We use values of the same magnitude (but negative!) as the real * particles to avoid overflow problems. */ - const double max_dx = ci->dx_max_part; + const double max_dx = ci->hydro.dx_max_part; const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), -(2. * ci->width[1] + max_dx), -(2. * ci->width[2] + max_dx)}; - const float h_padded = ci->parts[0].h; + const float h_padded = ci->hydro.parts[0].h; - for (int i = ci->count - *first_pi; i < ci->count - *first_pi + VEC_SIZE; - i++) { + for (int i = ci->hydro.count - *first_pi; + i < ci->hydro.count - *first_pi + VEC_SIZE; i++) { x[i] = pos_padded[0]; y[i] = pos_padded[1]; z[i] = pos_padded[2]; @@ -382,12 +382,12 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles( swift_declare_aligned_ptr(float, soundspeed, ci_cache->soundspeed, SWIFT_CACHE_ALIGNMENT); - const struct part *restrict parts = ci->parts; + const struct part *restrict parts = ci->hydro.parts; const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]}; /* Shift the particles positions to a local frame so single precision can be * used instead of double precision. */ - for (int i = 0; i < ci->count; i++) { + for (int i = 0; i < ci->hydro.count; i++) { x[i] = (float)(parts[i].x[0] - loc[0]); y[i] = (float)(parts[i].x[1] - loc[1]); z[i] = (float)(parts[i].x[2] - loc[2]); @@ -433,7 +433,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( * cache. */ /* Is the number of particles to read a multiple of the vector size? */ - int rem = (ci->count - *first_pi) % VEC_SIZE; + int rem = (ci->hydro.count - *first_pi) % VEC_SIZE; if (rem != 0) { int pad = VEC_SIZE - rem; @@ -446,14 +446,14 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( int pad = VEC_SIZE - rem; /* Increase last_pj if there are particles in the cell left to read. */ - if (*last_pj + pad < cj->count) *last_pj += pad; + if (*last_pj + pad < cj->hydro.count) *last_pj += pad; } /* Get some local pointers */ const int first_pi_align = *first_pi; const int last_pj_align = *last_pj; - const struct part *restrict parts_i = ci->parts; - const struct part *restrict parts_j = cj->parts; + const struct part *restrict parts_i = ci->hydro.parts; + const struct part *restrict parts_j = cj->hydro.parts; /* Shift particles to the local frame and account for boundary conditions.*/ const double total_ci_shift[3] = { @@ -471,7 +471,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT); - int ci_cache_count = ci->count - first_pi_align; + int ci_cache_count = ci->hydro.count - first_pi_align; /* Shift the particles positions to a local frame (ci frame) so single * precision can be used instead of double precision. */ @@ -491,11 +491,14 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( #ifdef SWIFT_DEBUG_CHECKS const float shift_threshold_x = - 2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part); + 2. * ci->width[0] + + 2. * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); const float shift_threshold_y = - 2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part); + 2. * ci->width[1] + + 2. * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); const float shift_threshold_z = - 2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part); + 2. * ci->width[2] + + 2. * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); /* Make sure that particle positions have been shifted correctly. */ for (int i = 0; i < ci_cache_count; i++) { @@ -529,14 +532,14 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( /* Pad cache with fake particles that exist outside the cell so will not * interact. We use values of the same magnitude (but negative!) as the real * particles to avoid overflow problems. */ - const double max_dx = max(ci->dx_max_part, cj->dx_max_part); + const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), -(2. * ci->width[1] + max_dx), -(2. * ci->width[2] + max_dx)}; - const float h_padded = ci->parts[0].h; + const float h_padded = ci->hydro.parts[0].h; - for (int i = ci->count - first_pi_align; - i < ci->count - first_pi_align + VEC_SIZE; i++) { + for (int i = ci->hydro.count - first_pi_align; + i < ci->hydro.count - first_pi_align + VEC_SIZE; i++) { x[i] = pos_padded[0]; y[i] = pos_padded[1]; z[i] = pos_padded[2]; @@ -609,7 +612,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx), -(2. * cj->width[1] + max_dx), -(2. * cj->width[2] + max_dx)}; - const float h_padded_j = cj->parts[0].h; + const float h_padded_j = cj->hydro.parts[0].h; for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) { xj[i] = pos_padded_j[0]; @@ -650,7 +653,7 @@ cache_read_two_partial_cells_sorted_force( * cache. */ /* Is the number of particles to read a multiple of the vector size? */ - int rem = (ci->count - *first_pi) % VEC_SIZE; + int rem = (ci->hydro.count - *first_pi) % VEC_SIZE; if (rem != 0) { int pad = VEC_SIZE - rem; @@ -663,14 +666,14 @@ cache_read_two_partial_cells_sorted_force( int pad = VEC_SIZE - rem; /* Increase last_pj if there are particles in the cell left to read. */ - if (*last_pj + pad < cj->count) *last_pj += pad; + if (*last_pj + pad < cj->hydro.count) *last_pj += pad; } /* Get some local pointers */ const int first_pi_align = *first_pi; const int last_pj_align = *last_pj; - const struct part *restrict parts_i = ci->parts; - const struct part *restrict parts_j = cj->parts; + const struct part *restrict parts_i = ci->hydro.parts; + const struct part *restrict parts_j = cj->hydro.parts; /* Shift particles to the local frame and account for boundary conditions.*/ const double total_ci_shift[3] = { @@ -697,7 +700,7 @@ cache_read_two_partial_cells_sorted_force( swift_declare_aligned_ptr(float, soundspeed, ci_cache->soundspeed, SWIFT_CACHE_ALIGNMENT); - int ci_cache_count = ci->count - first_pi_align; + int ci_cache_count = ci->hydro.count - first_pi_align; /* Shift the particles positions to a local frame (ci frame) so single * precision can be used instead of double precision. */ for (int i = 0; i < ci_cache_count; i++) { @@ -723,14 +726,14 @@ cache_read_two_partial_cells_sorted_force( /* Pad cache with fake particles that exist outside the cell so will not * interact. We use values of the same magnitude (but negative!) as the real * particles to avoid overflow problems. */ - const double max_dx = max(ci->dx_max_part, cj->dx_max_part); + const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), -(2. * ci->width[1] + max_dx), -(2. * ci->width[2] + max_dx)}; - const float h_padded = ci->parts[0].h; + const float h_padded = ci->hydro.parts[0].h; - for (int i = ci->count - first_pi_align; - i < ci->count - first_pi_align + VEC_SIZE; i++) { + for (int i = ci->hydro.count - first_pi_align; + i < ci->hydro.count - first_pi_align + VEC_SIZE; i++) { x[i] = pos_padded[0]; y[i] = pos_padded[1]; z[i] = pos_padded[2]; @@ -791,7 +794,7 @@ cache_read_two_partial_cells_sorted_force( const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx), -(2. * cj->width[1] + max_dx), -(2. * cj->width[2] + max_dx)}; - const float h_padded_j = cj->parts[0].h; + const float h_padded_j = cj->hydro.parts[0].h; for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) { xj[i] = pos_padded_j[0]; @@ -831,6 +834,7 @@ static INLINE void cache_clean(struct cache *c) { free(c->balsara); free(c->soundspeed); } + c->count = 0; } #endif /* WITH_VECTORIZATION */ diff --git a/src/cell.c b/src/cell.c index 85f8531c261ed8878bff4e32ba2419616b754372..3fe5e21e7c888f2358395a27e13710db460fd74c 100644 --- a/src/cell.c +++ b/src/cell.c @@ -61,7 +61,9 @@ #include "scheduler.h" #include "space.h" #include "space_getsid.h" +#include "stars.h" #include "timers.h" +#include "tools.h" /* Global variables. */ int cell_next_tag = 0; @@ -95,7 +97,7 @@ int cell_getsize(struct cell *c) { */ int cell_link_parts(struct cell *c, struct part *parts) { - c->parts = parts; + c->hydro.parts = parts; /* Fill the progeny recursively, depth-first. */ if (c->split) { @@ -107,7 +109,7 @@ int cell_link_parts(struct cell *c, struct part *parts) { } /* Return the total number of linked particles. */ - return c->count; + return c->hydro.count; } /** @@ -120,7 +122,7 @@ int cell_link_parts(struct cell *c, struct part *parts) { */ int cell_link_gparts(struct cell *c, struct gpart *gparts) { - c->gparts = gparts; + c->grav.parts = gparts; /* Fill the progeny recursively, depth-first. */ if (c->split) { @@ -132,7 +134,7 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts) { } /* Return the total number of linked particles. */ - return c->gcount; + return c->grav.count; } /** @@ -145,7 +147,7 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts) { */ int cell_link_sparts(struct cell *c, struct spart *sparts) { - c->sparts = sparts; + c->stars.parts = sparts; /* Fill the progeny recursively, depth-first. */ if (c->split) { @@ -157,7 +159,7 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) { } /* Return the total number of linked particles. */ - return c->scount; + return c->stars.count; } /** @@ -166,26 +168,46 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) { * @param c The #cell. * @param pc Pointer to an array of packed cells in which the * cells will be packed. + * @param with_gravity Are we running with gravity and hence need + * to exchange multipoles? * * @return The number of packed cells. */ -int cell_pack(struct cell *restrict c, struct pcell *restrict pc) { +int cell_pack(struct cell *restrict c, struct pcell *restrict pc, + const int with_gravity) { #ifdef WITH_MPI /* Start by packing the data of the current cell. */ - pc->h_max = c->h_max; - pc->ti_hydro_end_min = c->ti_hydro_end_min; - pc->ti_hydro_end_max = c->ti_hydro_end_max; - pc->ti_gravity_end_min = c->ti_gravity_end_min; - pc->ti_gravity_end_max = c->ti_gravity_end_max; - pc->ti_old_part = c->ti_old_part; - pc->ti_old_gpart = c->ti_old_gpart; - pc->ti_old_multipole = c->ti_old_multipole; - pc->count = c->count; - pc->gcount = c->gcount; - pc->scount = c->scount; - c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag; + pc->hydro.h_max = c->hydro.h_max; + pc->hydro.ti_end_min = c->hydro.ti_end_min; + pc->hydro.ti_end_max = c->hydro.ti_end_max; + pc->grav.ti_end_min = c->grav.ti_end_min; + pc->grav.ti_end_max = c->grav.ti_end_max; + pc->stars.ti_end_min = c->stars.ti_end_min; + pc->hydro.ti_old_part = c->hydro.ti_old_part; + pc->grav.ti_old_part = c->grav.ti_old_part; + pc->grav.ti_old_multipole = c->grav.ti_old_multipole; + pc->hydro.count = c->hydro.count; + pc->grav.count = c->grav.count; + pc->stars.count = c->stars.count; + pc->maxdepth = c->maxdepth; + + /* Copy the Multipole related information */ + if (with_gravity) { + const struct gravity_tensors *mp = c->grav.multipole; + + pc->grav.m_pole = mp->m_pole; + pc->grav.CoM[0] = mp->CoM[0]; + pc->grav.CoM[1] = mp->CoM[1]; + pc->grav.CoM[2] = mp->CoM[2]; + pc->grav.CoM_rebuild[0] = mp->CoM_rebuild[0]; + pc->grav.CoM_rebuild[1] = mp->CoM_rebuild[1]; + pc->grav.CoM_rebuild[2] = mp->CoM_rebuild[2]; + pc->grav.r_max = mp->r_max; + pc->grav.r_max_rebuild = mp->r_max_rebuild; + } + #ifdef SWIFT_DEBUG_CHECKS pc->cellID = c->cellID; #endif @@ -195,12 +217,47 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) { for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { pc->progeny[k] = count; - count += cell_pack(c->progeny[k], &pc[count]); - } else + count += cell_pack(c->progeny[k], &pc[count], with_gravity); + } else { pc->progeny[k] = -1; + } /* Return the number of packed cells used. */ - c->pcell_size = count; + c->mpi.pcell_size = count; + return count; + +#else + error("SWIFT was not compiled with MPI support."); + return 0; +#endif +} + +/** + * @brief Pack the tag of the given cell and all it's sub-cells. + * + * @param c The #cell. + * @param tags Pointer to an array of packed tags. + * + * @return The number of packed tags. + */ +int cell_pack_tags(const struct cell *c, int *tags) { + +#ifdef WITH_MPI + + /* Start by packing the data of the current cell. */ + tags[0] = c->mpi.tag; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + count += cell_pack_tags(c->progeny[k], &tags[count]); + +#ifdef SWIFT_DEBUG_CHECKS + if (c->mpi.pcell_size != count) error("Inconsistent tag and pcell count!"); +#endif // SWIFT_DEBUG_CHECKS + + /* Return the number of packed tags used. */ return count; #else @@ -215,42 +272,63 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) { * @param pc An array of packed #pcell. * @param c The #cell in which to unpack the #pcell. * @param s The #space in which the cells are created. + * @param with_gravity Are we running with gravity and hence need + * to exchange multipoles? * * @return The number of cells created. */ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, - struct space *restrict s) { + struct space *restrict s, const int with_gravity) { #ifdef WITH_MPI /* Unpack the current pcell. */ - c->h_max = pc->h_max; - c->ti_hydro_end_min = pc->ti_hydro_end_min; - c->ti_hydro_end_max = pc->ti_hydro_end_max; - c->ti_gravity_end_min = pc->ti_gravity_end_min; - c->ti_gravity_end_max = pc->ti_gravity_end_max; - c->ti_old_part = pc->ti_old_part; - c->ti_old_gpart = pc->ti_old_gpart; - c->ti_old_multipole = pc->ti_old_multipole; - c->count = pc->count; - c->gcount = pc->gcount; - c->scount = pc->scount; - c->tag = pc->tag; + c->hydro.h_max = pc->hydro.h_max; + c->hydro.ti_end_min = pc->hydro.ti_end_min; + c->hydro.ti_end_max = pc->hydro.ti_end_max; + c->grav.ti_end_min = pc->grav.ti_end_min; + c->grav.ti_end_max = pc->grav.ti_end_max; + c->stars.ti_end_min = pc->stars.ti_end_min; + c->hydro.ti_old_part = pc->hydro.ti_old_part; + c->grav.ti_old_part = pc->grav.ti_old_part; + c->grav.ti_old_multipole = pc->grav.ti_old_multipole; + c->hydro.count = pc->hydro.count; + c->grav.count = pc->grav.count; + c->stars.count = pc->stars.count; + c->maxdepth = pc->maxdepth; + #ifdef SWIFT_DEBUG_CHECKS c->cellID = pc->cellID; #endif + /* Copy the Multipole related information */ + if (with_gravity) { + + struct gravity_tensors *mp = c->grav.multipole; + + mp->m_pole = pc->grav.m_pole; + mp->CoM[0] = pc->grav.CoM[0]; + mp->CoM[1] = pc->grav.CoM[1]; + mp->CoM[2] = pc->grav.CoM[2]; + mp->CoM_rebuild[0] = pc->grav.CoM_rebuild[0]; + mp->CoM_rebuild[1] = pc->grav.CoM_rebuild[1]; + mp->CoM_rebuild[2] = pc->grav.CoM_rebuild[2]; + mp->r_max = pc->grav.r_max; + mp->r_max_rebuild = pc->grav.r_max_rebuild; + } + /* Number of new cells created. */ int count = 1; /* Fill the progeny recursively, depth-first. */ + c->split = 0; for (int k = 0; k < 8; k++) if (pc->progeny[k] >= 0) { struct cell *temp; space_getcells(s, 1, &temp); - temp->count = 0; - temp->gcount = 0; - temp->scount = 0; + temp->hydro.count = 0; + temp->grav.count = 0; + temp->stars.count = 0; temp->loc[0] = c->loc[0]; temp->loc[1] = c->loc[1]; temp->loc[2] = c->loc[2]; @@ -263,17 +341,56 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, if (k & 1) temp->loc[2] += temp->width[2]; temp->depth = c->depth + 1; temp->split = 0; - temp->dx_max_part = 0.f; - temp->dx_max_sort = 0.f; + temp->hydro.dx_max_part = 0.f; + temp->hydro.dx_max_sort = 0.f; + temp->stars.dx_max_part = 0.f; + temp->stars.dx_max_sort = 0.f; temp->nodeID = c->nodeID; temp->parent = c; c->progeny[k] = temp; c->split = 1; - count += cell_unpack(&pc[pc->progeny[k]], temp, s); + count += cell_unpack(&pc[pc->progeny[k]], temp, s, with_gravity); } /* Return the total number of unpacked cells. */ - c->pcell_size = count; + c->mpi.pcell_size = count; + return count; + +#else + error("SWIFT was not compiled with MPI support."); + return 0; +#endif +} + +/** + * @brief Unpack the tags of a given cell and its sub-cells. + * + * @param tags An array of tags. + * @param c The #cell in which to unpack the tags. + * + * @return The number of tags created. + */ +int cell_unpack_tags(const int *tags, struct cell *restrict c) { + +#ifdef WITH_MPI + + /* Unpack the current pcell. */ + c->mpi.tag = tags[0]; + + /* Number of new cells created. */ + int count = 1; + + /* Fill the progeny recursively, depth-first. */ + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_unpack_tags(&tags[count], c->progeny[k]); + } + +#ifdef SWIFT_DEBUG_CHECKS + if (c->mpi.pcell_size != count) error("Inconsistent tag and pcell count!"); +#endif // SWIFT_DEBUG_CHECKS + + /* Return the total number of unpacked tags. */ return count; #else @@ -296,11 +413,13 @@ int cell_pack_end_step(struct cell *restrict c, #ifdef WITH_MPI /* Pack this cell's data. */ - pcells[0].ti_hydro_end_min = c->ti_hydro_end_min; - pcells[0].ti_hydro_end_max = c->ti_hydro_end_max; - pcells[0].ti_gravity_end_min = c->ti_gravity_end_min; - pcells[0].ti_gravity_end_max = c->ti_gravity_end_max; - pcells[0].dx_max_part = c->dx_max_part; + pcells[0].hydro.ti_end_min = c->hydro.ti_end_min; + pcells[0].hydro.ti_end_max = c->hydro.ti_end_max; + pcells[0].grav.ti_end_min = c->grav.ti_end_min; + pcells[0].grav.ti_end_max = c->grav.ti_end_max; + pcells[0].stars.ti_end_min = c->stars.ti_end_min; + pcells[0].hydro.dx_max_part = c->hydro.dx_max_part; + pcells[0].stars.dx_max_part = c->stars.dx_max_part; /* Fill in the progeny, depth-first recursion. */ int count = 1; @@ -332,11 +451,13 @@ int cell_unpack_end_step(struct cell *restrict c, #ifdef WITH_MPI /* Unpack this cell's data. */ - c->ti_hydro_end_min = pcells[0].ti_hydro_end_min; - c->ti_hydro_end_max = pcells[0].ti_hydro_end_max; - c->ti_gravity_end_min = pcells[0].ti_gravity_end_min; - c->ti_gravity_end_max = pcells[0].ti_gravity_end_max; - c->dx_max_part = pcells[0].dx_max_part; + c->hydro.ti_end_min = pcells[0].hydro.ti_end_min; + c->hydro.ti_end_max = pcells[0].hydro.ti_end_max; + c->grav.ti_end_min = pcells[0].grav.ti_end_min; + c->grav.ti_end_max = pcells[0].grav.ti_end_max; + c->stars.ti_end_min = pcells[0].stars.ti_end_min; + c->hydro.dx_max_part = pcells[0].hydro.dx_max_part; + c->stars.dx_max_part = pcells[0].stars.dx_max_part; /* Fill in the progeny, depth-first recursion. */ int count = 1; @@ -369,7 +490,7 @@ int cell_pack_multipoles(struct cell *restrict c, #ifdef WITH_MPI /* Pack this cell's data. */ - pcells[0] = *c->multipole; + pcells[0] = *c->grav.multipole; /* Fill in the progeny, depth-first recursion. */ int count = 1; @@ -401,7 +522,7 @@ int cell_unpack_multipoles(struct cell *restrict c, #ifdef WITH_MPI /* Unpack this cell's data. */ - *c->multipole = pcells[0]; + *c->grav.multipole = pcells[0]; /* Fill in the progeny, depth-first recursion. */ int count = 1; @@ -430,16 +551,16 @@ int cell_locktree(struct cell *c) { TIMER_TIC /* First of all, try to lock this cell. */ - if (c->hold || lock_trylock(&c->lock) != 0) { + if (c->hydro.hold || lock_trylock(&c->hydro.lock) != 0) { TIMER_TOC(timer_locktree); return 1; } /* Did somebody hold this cell in the meantime? */ - if (c->hold) { + if (c->hydro.hold) { /* Unlock this cell. */ - if (lock_unlock(&c->lock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->hydro.lock) != 0) error("Failed to unlock cell."); /* Admit defeat. */ TIMER_TOC(timer_locktree); @@ -451,13 +572,13 @@ int cell_locktree(struct cell *c) { for (finger = c->parent; finger != NULL; finger = finger->parent) { /* Lock this cell. */ - if (lock_trylock(&finger->lock) != 0) break; + if (lock_trylock(&finger->hydro.lock) != 0) break; /* Increment the hold. */ - atomic_inc(&finger->hold); + atomic_inc(&finger->hydro.hold); /* Unlock the cell. */ - if (lock_unlock(&finger->lock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&finger->hydro.lock) != 0) error("Failed to unlock cell."); } /* If we reached the top of the tree, we're done. */ @@ -472,10 +593,10 @@ int cell_locktree(struct cell *c) { /* Undo the holds up to finger. */ for (struct cell *finger2 = c->parent; finger2 != finger; finger2 = finger2->parent) - atomic_dec(&finger2->hold); + atomic_dec(&finger2->hydro.hold); /* Unlock this cell. */ - if (lock_unlock(&c->lock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->hydro.lock) != 0) error("Failed to unlock cell."); /* Admit defeat. */ TIMER_TOC(timer_locktree); @@ -494,16 +615,16 @@ int cell_glocktree(struct cell *c) { TIMER_TIC /* First of all, try to lock this cell. */ - if (c->ghold || lock_trylock(&c->glock) != 0) { + if (c->grav.phold || lock_trylock(&c->grav.plock) != 0) { TIMER_TOC(timer_locktree); return 1; } /* Did somebody hold this cell in the meantime? */ - if (c->ghold) { + if (c->grav.phold) { /* Unlock this cell. */ - if (lock_unlock(&c->glock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->grav.plock) != 0) error("Failed to unlock cell."); /* Admit defeat. */ TIMER_TOC(timer_locktree); @@ -515,13 +636,13 @@ int cell_glocktree(struct cell *c) { for (finger = c->parent; finger != NULL; finger = finger->parent) { /* Lock this cell. */ - if (lock_trylock(&finger->glock) != 0) break; + if (lock_trylock(&finger->grav.plock) != 0) break; /* Increment the hold. */ - atomic_inc(&finger->ghold); + atomic_inc(&finger->grav.phold); /* Unlock the cell. */ - if (lock_unlock(&finger->glock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&finger->grav.plock) != 0) error("Failed to unlock cell."); } /* If we reached the top of the tree, we're done. */ @@ -536,10 +657,10 @@ int cell_glocktree(struct cell *c) { /* Undo the holds up to finger. */ for (struct cell *finger2 = c->parent; finger2 != finger; finger2 = finger2->parent) - atomic_dec(&finger2->ghold); + atomic_dec(&finger2->grav.phold); /* Unlock this cell. */ - if (lock_unlock(&c->glock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->grav.plock) != 0) error("Failed to unlock cell."); /* Admit defeat. */ TIMER_TOC(timer_locktree); @@ -558,16 +679,16 @@ int cell_mlocktree(struct cell *c) { TIMER_TIC /* First of all, try to lock this cell. */ - if (c->mhold || lock_trylock(&c->mlock) != 0) { + if (c->grav.mhold || lock_trylock(&c->grav.mlock) != 0) { TIMER_TOC(timer_locktree); return 1; } /* Did somebody hold this cell in the meantime? */ - if (c->mhold) { + if (c->grav.mhold) { /* Unlock this cell. */ - if (lock_unlock(&c->mlock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->grav.mlock) != 0) error("Failed to unlock cell."); /* Admit defeat. */ TIMER_TOC(timer_locktree); @@ -579,13 +700,13 @@ int cell_mlocktree(struct cell *c) { for (finger = c->parent; finger != NULL; finger = finger->parent) { /* Lock this cell. */ - if (lock_trylock(&finger->mlock) != 0) break; + if (lock_trylock(&finger->grav.mlock) != 0) break; /* Increment the hold. */ - atomic_inc(&finger->mhold); + atomic_inc(&finger->grav.mhold); /* Unlock the cell. */ - if (lock_unlock(&finger->mlock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&finger->grav.mlock) != 0) error("Failed to unlock cell."); } /* If we reached the top of the tree, we're done. */ @@ -600,10 +721,10 @@ int cell_mlocktree(struct cell *c) { /* Undo the holds up to finger. */ for (struct cell *finger2 = c->parent; finger2 != finger; finger2 = finger2->parent) - atomic_dec(&finger2->mhold); + atomic_dec(&finger2->grav.mhold); /* Unlock this cell. */ - if (lock_unlock(&c->mlock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->grav.mlock) != 0) error("Failed to unlock cell."); /* Admit defeat. */ TIMER_TOC(timer_locktree); @@ -622,16 +743,16 @@ int cell_slocktree(struct cell *c) { TIMER_TIC /* First of all, try to lock this cell. */ - if (c->shold || lock_trylock(&c->slock) != 0) { + if (c->stars.hold || lock_trylock(&c->stars.lock) != 0) { TIMER_TOC(timer_locktree); return 1; } /* Did somebody hold this cell in the meantime? */ - if (c->shold) { + if (c->stars.hold) { /* Unlock this cell. */ - if (lock_unlock(&c->slock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->stars.lock) != 0) error("Failed to unlock cell."); /* Admit defeat. */ TIMER_TOC(timer_locktree); @@ -643,13 +764,13 @@ int cell_slocktree(struct cell *c) { for (finger = c->parent; finger != NULL; finger = finger->parent) { /* Lock this cell. */ - if (lock_trylock(&finger->slock) != 0) break; + if (lock_trylock(&finger->stars.lock) != 0) break; /* Increment the hold. */ - atomic_inc(&finger->shold); + atomic_inc(&finger->stars.hold); /* Unlock the cell. */ - if (lock_unlock(&finger->slock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&finger->stars.lock) != 0) error("Failed to unlock cell."); } /* If we reached the top of the tree, we're done. */ @@ -664,10 +785,10 @@ int cell_slocktree(struct cell *c) { /* Undo the holds up to finger. */ for (struct cell *finger2 = c->parent; finger2 != finger; finger2 = finger2->parent) - atomic_dec(&finger2->shold); + atomic_dec(&finger2->stars.hold); /* Unlock this cell. */ - if (lock_unlock(&c->slock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->stars.lock) != 0) error("Failed to unlock cell."); /* Admit defeat. */ TIMER_TOC(timer_locktree); @@ -685,11 +806,11 @@ void cell_unlocktree(struct cell *c) { TIMER_TIC /* First of all, try to unlock this cell. */ - if (lock_unlock(&c->lock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->hydro.lock) != 0) error("Failed to unlock cell."); /* Climb up the tree and unhold the parents. */ for (struct cell *finger = c->parent; finger != NULL; finger = finger->parent) - atomic_dec(&finger->hold); + atomic_dec(&finger->hydro.hold); TIMER_TOC(timer_locktree); } @@ -704,11 +825,11 @@ void cell_gunlocktree(struct cell *c) { TIMER_TIC /* First of all, try to unlock this cell. */ - if (lock_unlock(&c->glock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->grav.plock) != 0) error("Failed to unlock cell."); /* Climb up the tree and unhold the parents. */ for (struct cell *finger = c->parent; finger != NULL; finger = finger->parent) - atomic_dec(&finger->ghold); + atomic_dec(&finger->grav.phold); TIMER_TOC(timer_locktree); } @@ -723,11 +844,11 @@ void cell_munlocktree(struct cell *c) { TIMER_TIC /* First of all, try to unlock this cell. */ - if (lock_unlock(&c->mlock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->grav.mlock) != 0) error("Failed to unlock cell."); /* Climb up the tree and unhold the parents. */ for (struct cell *finger = c->parent; finger != NULL; finger = finger->parent) - atomic_dec(&finger->mhold); + atomic_dec(&finger->grav.mhold); TIMER_TOC(timer_locktree); } @@ -742,11 +863,11 @@ void cell_sunlocktree(struct cell *c) { TIMER_TIC /* First of all, try to unlock this cell. */ - if (lock_unlock(&c->slock) != 0) error("Failed to unlock cell."); + if (lock_unlock(&c->stars.lock) != 0) error("Failed to unlock cell."); /* Climb up the tree and unhold the parents. */ for (struct cell *finger = c->parent; finger != NULL; finger = finger->parent) - atomic_dec(&finger->shold); + atomic_dec(&finger->stars.hold); TIMER_TOC(timer_locktree); } @@ -756,25 +877,26 @@ void cell_sunlocktree(struct cell *c) { * * @param c The #cell array to be sorted. * @param parts_offset Offset of the cell parts array relative to the - * space's parts array, i.e. c->parts - s->parts. + * space's parts array, i.e. c->hydro.parts - s->parts. * @param sparts_offset Offset of the cell sparts array relative to the - * space's sparts array, i.e. c->sparts - s->sparts. - * @param buff A buffer with at least max(c->count, c->gcount) entries, - * used for sorting indices. - * @param sbuff A buffer with at least max(c->scount, c->gcount) entries, - * used for sorting indices for the sparts. - * @param gbuff A buffer with at least max(c->count, c->gcount) entries, - * used for sorting indices for the gparts. + * space's sparts array, i.e. c->stars.parts - s->stars.parts. + * @param buff A buffer with at least max(c->hydro.count, c->grav.count) + * entries, used for sorting indices. + * @param sbuff A buffer with at least max(c->stars.count, c->grav.count) + * entries, used for sorting indices for the sparts. + * @param gbuff A buffer with at least max(c->hydro.count, c->grav.count) + * entries, used for sorting indices for the gparts. */ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, struct cell_buff *buff, struct cell_buff *sbuff, struct cell_buff *gbuff) { - const int count = c->count, gcount = c->gcount, scount = c->scount; - struct part *parts = c->parts; - struct xpart *xparts = c->xparts; - struct gpart *gparts = c->gparts; - struct spart *sparts = c->sparts; + const int count = c->hydro.count, gcount = c->grav.count, + scount = c->stars.count; + struct part *parts = c->hydro.parts; + struct xpart *xparts = c->hydro.xparts; + struct gpart *gparts = c->grav.parts; + struct spart *sparts = c->stars.parts; const double pivot[3] = {c->loc[0] + c->width[0] / 2, c->loc[1] + c->width[1] / 2, c->loc[2] + c->width[2] / 2}; @@ -849,9 +971,9 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, /* Store the counts and offsets. */ for (int k = 0; k < 8; k++) { - c->progeny[k]->count = bucket_count[k]; - c->progeny[k]->parts = &c->parts[bucket_offset[k]]; - c->progeny[k]->xparts = &c->xparts[bucket_offset[k]]; + c->progeny[k]->hydro.count = bucket_count[k]; + c->progeny[k]->hydro.parts = &c->hydro.parts[bucket_offset[k]]; + c->progeny[k]->hydro.xparts = &c->hydro.xparts[bucket_offset[k]]; } #ifdef SWIFT_DEBUG_CHECKS @@ -865,54 +987,55 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, /* Verify that _all_ the parts have been assigned to a cell. */ for (int k = 1; k < 8; k++) - if (&c->progeny[k - 1]->parts[c->progeny[k - 1]->count] != - c->progeny[k]->parts) + if (&c->progeny[k - 1]->hydro.parts[c->progeny[k - 1]->hydro.count] != + c->progeny[k]->hydro.parts) error("Particle sorting failed (internal consistency)."); - if (c->progeny[0]->parts != c->parts) + if (c->progeny[0]->hydro.parts != c->hydro.parts) error("Particle sorting failed (left edge)."); - if (&c->progeny[7]->parts[c->progeny[7]->count] != &c->parts[count]) + if (&c->progeny[7]->hydro.parts[c->progeny[7]->hydro.count] != + &c->hydro.parts[count]) error("Particle sorting failed (right edge)."); /* Verify a few sub-cells. */ - for (int k = 0; k < c->progeny[0]->count; k++) - if (c->progeny[0]->parts[k].x[0] >= pivot[0] || - c->progeny[0]->parts[k].x[1] >= pivot[1] || - c->progeny[0]->parts[k].x[2] >= pivot[2]) + for (int k = 0; k < c->progeny[0]->hydro.count; k++) + if (c->progeny[0]->hydro.parts[k].x[0] >= pivot[0] || + c->progeny[0]->hydro.parts[k].x[1] >= pivot[1] || + c->progeny[0]->hydro.parts[k].x[2] >= pivot[2]) error("Sorting failed (progeny=0)."); - for (int k = 0; k < c->progeny[1]->count; k++) - if (c->progeny[1]->parts[k].x[0] >= pivot[0] || - c->progeny[1]->parts[k].x[1] >= pivot[1] || - c->progeny[1]->parts[k].x[2] < pivot[2]) + for (int k = 0; k < c->progeny[1]->hydro.count; k++) + if (c->progeny[1]->hydro.parts[k].x[0] >= pivot[0] || + c->progeny[1]->hydro.parts[k].x[1] >= pivot[1] || + c->progeny[1]->hydro.parts[k].x[2] < pivot[2]) error("Sorting failed (progeny=1)."); - for (int k = 0; k < c->progeny[2]->count; k++) - if (c->progeny[2]->parts[k].x[0] >= pivot[0] || - c->progeny[2]->parts[k].x[1] < pivot[1] || - c->progeny[2]->parts[k].x[2] >= pivot[2]) + for (int k = 0; k < c->progeny[2]->hydro.count; k++) + if (c->progeny[2]->hydro.parts[k].x[0] >= pivot[0] || + c->progeny[2]->hydro.parts[k].x[1] < pivot[1] || + c->progeny[2]->hydro.parts[k].x[2] >= pivot[2]) error("Sorting failed (progeny=2)."); - for (int k = 0; k < c->progeny[3]->count; k++) - if (c->progeny[3]->parts[k].x[0] >= pivot[0] || - c->progeny[3]->parts[k].x[1] < pivot[1] || - c->progeny[3]->parts[k].x[2] < pivot[2]) + for (int k = 0; k < c->progeny[3]->hydro.count; k++) + if (c->progeny[3]->hydro.parts[k].x[0] >= pivot[0] || + c->progeny[3]->hydro.parts[k].x[1] < pivot[1] || + c->progeny[3]->hydro.parts[k].x[2] < pivot[2]) error("Sorting failed (progeny=3)."); - for (int k = 0; k < c->progeny[4]->count; k++) - if (c->progeny[4]->parts[k].x[0] < pivot[0] || - c->progeny[4]->parts[k].x[1] >= pivot[1] || - c->progeny[4]->parts[k].x[2] >= pivot[2]) + for (int k = 0; k < c->progeny[4]->hydro.count; k++) + if (c->progeny[4]->hydro.parts[k].x[0] < pivot[0] || + c->progeny[4]->hydro.parts[k].x[1] >= pivot[1] || + c->progeny[4]->hydro.parts[k].x[2] >= pivot[2]) error("Sorting failed (progeny=4)."); - for (int k = 0; k < c->progeny[5]->count; k++) - if (c->progeny[5]->parts[k].x[0] < pivot[0] || - c->progeny[5]->parts[k].x[1] >= pivot[1] || - c->progeny[5]->parts[k].x[2] < pivot[2]) + for (int k = 0; k < c->progeny[5]->hydro.count; k++) + if (c->progeny[5]->hydro.parts[k].x[0] < pivot[0] || + c->progeny[5]->hydro.parts[k].x[1] >= pivot[1] || + c->progeny[5]->hydro.parts[k].x[2] < pivot[2]) error("Sorting failed (progeny=5)."); - for (int k = 0; k < c->progeny[6]->count; k++) - if (c->progeny[6]->parts[k].x[0] < pivot[0] || - c->progeny[6]->parts[k].x[1] < pivot[1] || - c->progeny[6]->parts[k].x[2] >= pivot[2]) + for (int k = 0; k < c->progeny[6]->hydro.count; k++) + if (c->progeny[6]->hydro.parts[k].x[0] < pivot[0] || + c->progeny[6]->hydro.parts[k].x[1] < pivot[1] || + c->progeny[6]->hydro.parts[k].x[2] >= pivot[2]) error("Sorting failed (progeny=6)."); - for (int k = 0; k < c->progeny[7]->count; k++) - if (c->progeny[7]->parts[k].x[0] < pivot[0] || - c->progeny[7]->parts[k].x[1] < pivot[1] || - c->progeny[7]->parts[k].x[2] < pivot[2]) + for (int k = 0; k < c->progeny[7]->hydro.count; k++) + if (c->progeny[7]->hydro.parts[k].x[0] < pivot[0] || + c->progeny[7]->hydro.parts[k].x[1] < pivot[1] || + c->progeny[7]->hydro.parts[k].x[2] < pivot[2]) error("Sorting failed (progeny=7)."); #endif @@ -965,8 +1088,8 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, /* Store the counts and offsets. */ for (int k = 0; k < 8; k++) { - c->progeny[k]->scount = bucket_count[k]; - c->progeny[k]->sparts = &c->sparts[bucket_offset[k]]; + c->progeny[k]->stars.count = bucket_count[k]; + c->progeny[k]->stars.parts = &c->stars.parts[bucket_offset[k]]; } /* Finally, do the same song and dance for the gparts. */ @@ -1006,7 +1129,7 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, if (gparts[j].type == swift_type_gas) { parts[-gparts[j].id_or_neg_offset - parts_offset].gpart = &gparts[j]; - } else if (gparts[j].type == swift_type_star) { + } else if (gparts[j].type == swift_type_stars) { sparts[-gparts[j].id_or_neg_offset - sparts_offset].gpart = &gparts[j]; } @@ -1016,7 +1139,7 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, gbuff[k] = temp_buff; if (gparts[k].type == swift_type_gas) { parts[-gparts[k].id_or_neg_offset - parts_offset].gpart = &gparts[k]; - } else if (gparts[k].type == swift_type_star) { + } else if (gparts[k].type == swift_type_stars) { sparts[-gparts[k].id_or_neg_offset - sparts_offset].gpart = &gparts[k]; } @@ -1027,8 +1150,8 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, /* Store the counts and offsets. */ for (int k = 0; k < 8; k++) { - c->progeny[k]->gcount = bucket_count[k]; - c->progeny[k]->gparts = &c->gparts[bucket_offset[k]]; + c->progeny[k]->grav.count = bucket_count[k]; + c->progeny[k]->grav.parts = &c->grav.parts[bucket_offset[k]]; } } @@ -1044,9 +1167,12 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, */ void cell_sanitize(struct cell *c, int treated) { - const int count = c->count; - struct part *parts = c->parts; + const int count = c->hydro.count; + const int scount = c->stars.count; + struct part *parts = c->hydro.parts; + struct spart *sparts = c->stars.parts; float h_max = 0.f; + float stars_h_max = 0.f; /* Treat cells will <1000 particles */ if (count < 1000 && !treated) { @@ -1059,6 +1185,10 @@ void cell_sanitize(struct cell *c, int treated) { if (parts[i].h == 0.f || parts[i].h > upper_h_max) parts[i].h = upper_h_max; } + for (int i = 0; i < scount; ++i) { + if (sparts[i].h == 0.f || sparts[i].h > upper_h_max) + sparts[i].h = upper_h_max; + } } /* Recurse and gather the new h_max values */ @@ -1071,17 +1201,21 @@ void cell_sanitize(struct cell *c, int treated) { cell_sanitize(c->progeny[k], (count < 1000)); /* And collect */ - h_max = max(h_max, c->progeny[k]->h_max); + h_max = max(h_max, c->progeny[k]->hydro.h_max); + stars_h_max = max(stars_h_max, c->progeny[k]->stars.h_max); } } } else { /* Get the new value of h_max */ for (int i = 0; i < count; ++i) h_max = max(h_max, parts[i].h); + for (int i = 0; i < scount; ++i) + stars_h_max = max(stars_h_max, sparts[i].h); } /* Record the change */ - c->h_max = h_max; + c->hydro.h_max = h_max; + c->stars.h_max = stars_h_max; } /** @@ -1091,10 +1225,11 @@ void cell_sanitize(struct cell *c, int treated) { * @param data Unused parameter */ void cell_clean_links(struct cell *c, void *data) { - c->density = NULL; - c->gradient = NULL; - c->force = NULL; - c->grav = NULL; + c->hydro.density = NULL; + c->hydro.gradient = NULL; + c->hydro.force = NULL; + c->grav.grav = NULL; + c->grav.mm = NULL; } /** @@ -1115,14 +1250,18 @@ void cell_check_part_drift_point(struct cell *c, void *data) { /* Only check local cells */ if (c->nodeID != engine_rank) return; - if (c->ti_old_part != ti_drift) - error("Cell in an incorrect time-zone! c->ti_old_part=%lld ti_drift=%lld", - c->ti_old_part, ti_drift); + /* Only check cells with content */ + if (c->hydro.count == 0) return; + + if (c->hydro.ti_old_part != ti_drift) + error("Cell in an incorrect time-zone! c->hydro.ti_old=%lld ti_drift=%lld", + c->hydro.ti_old_part, ti_drift); - for (int i = 0; i < c->count; ++i) - if (c->parts[i].ti_drift != ti_drift) + for (int i = 0; i < c->hydro.count; ++i) + if (c->hydro.parts[i].ti_drift != ti_drift && + c->hydro.parts[i].time_bin != time_bin_inhibited) error("part in an incorrect time-zone! p->ti_drift=%lld ti_drift=%lld", - c->parts[i].ti_drift, ti_drift); + c->hydro.parts[i].ti_drift, ti_drift); #else error("Calling debugging code without debugging flag activated."); #endif @@ -1146,19 +1285,26 @@ void cell_check_gpart_drift_point(struct cell *c, void *data) { /* Only check local cells */ if (c->nodeID != engine_rank) return; - if (c->ti_old_gpart != ti_drift) - error("Cell in an incorrect time-zone! c->ti_old_gpart=%lld ti_drift=%lld", - c->ti_old_gpart, ti_drift); + /* Only check cells with content */ + if (c->grav.count == 0) return; + + if (c->grav.ti_old_part != ti_drift) + error( + "Cell in an incorrect time-zone! c->grav.ti_old_part=%lld " + "ti_drift=%lld", + c->grav.ti_old_part, ti_drift); - for (int i = 0; i < c->gcount; ++i) - if (c->gparts[i].ti_drift != ti_drift) + for (int i = 0; i < c->grav.count; ++i) + if (c->grav.parts[i].ti_drift != ti_drift && + c->grav.parts[i].time_bin != time_bin_inhibited) error("g-part in an incorrect time-zone! gp->ti_drift=%lld ti_drift=%lld", - c->gparts[i].ti_drift, ti_drift); + c->grav.parts[i].ti_drift, ti_drift); - for (int i = 0; i < c->scount; ++i) - if (c->sparts[i].ti_drift != ti_drift) + for (int i = 0; i < c->stars.count; ++i) + if (c->stars.parts[i].ti_drift != ti_drift && + c->stars.parts[i].time_bin != time_bin_inhibited) error("s-part in an incorrect time-zone! sp->ti_drift=%lld ti_drift=%lld", - c->sparts[i].ti_drift, ti_drift); + c->stars.parts[i].ti_drift, ti_drift); #else error("Calling debugging code without debugging flag activated."); #endif @@ -1178,11 +1324,18 @@ void cell_check_multipole_drift_point(struct cell *c, void *data) { const integertime_t ti_drift = *(integertime_t *)data; - if (c->ti_old_multipole != ti_drift) + /* Only check local cells */ + if (c->nodeID != engine_rank) return; + + /* Only check cells with content */ + if (c->grav.count == 0) return; + + if (c->grav.ti_old_multipole != ti_drift) error( - "Cell multipole in an incorrect time-zone! c->ti_old_multipole=%lld " - "ti_drift=%lld (depth=%d)", - c->ti_old_multipole, ti_drift, c->depth); + "Cell multipole in an incorrect time-zone! " + "c->grav.ti_old_multipole=%lld " + "ti_drift=%lld (depth=%d, node=%d)", + c->grav.ti_old_multipole, ti_drift, c->depth, c->nodeID); #else error("Calling debugging code without debugging flag activated."); @@ -1215,7 +1368,7 @@ void cell_reset_task_counters(struct cell *c) { void cell_make_multipoles(struct cell *c, integertime_t ti_current) { /* Reset everything */ - gravity_reset(c->multipole); + gravity_reset(c->grav.multipole); if (c->split) { @@ -1231,7 +1384,7 @@ void cell_make_multipoles(struct cell *c, integertime_t ti_current) { for (int k = 0; k < 8; ++k) { if (c->progeny[k] != NULL) { - const struct gravity_tensors *m = c->progeny[k]->multipole; + const struct gravity_tensors *m = c->progeny[k]->grav.multipole; CoM[0] += m->CoM[0] * m->m_pole.M_000; CoM[1] += m->CoM[1] * m->m_pole.M_000; CoM[2] += m->CoM[2] * m->m_pole.M_000; @@ -1240,9 +1393,9 @@ void cell_make_multipoles(struct cell *c, integertime_t ti_current) { } const double mass_inv = 1. / mass; - c->multipole->CoM[0] = CoM[0] * mass_inv; - c->multipole->CoM[1] = CoM[1] * mass_inv; - c->multipole->CoM[2] = CoM[2] * mass_inv; + c->grav.multipole->CoM[0] = CoM[0] * mass_inv; + c->grav.multipole->CoM[1] = CoM[1] * mass_inv; + c->grav.multipole->CoM[2] = CoM[2] * mass_inv; /* Now shift progeny multipoles and add them up */ struct multipole temp; @@ -1250,64 +1403,112 @@ void cell_make_multipoles(struct cell *c, integertime_t ti_current) { for (int k = 0; k < 8; ++k) { if (c->progeny[k] != NULL) { const struct cell *cp = c->progeny[k]; - const struct multipole *m = &cp->multipole->m_pole; + const struct multipole *m = &cp->grav.multipole->m_pole; /* Contribution to multipole */ - gravity_M2M(&temp, m, c->multipole->CoM, cp->multipole->CoM); - gravity_multipole_add(&c->multipole->m_pole, &temp); + gravity_M2M(&temp, m, c->grav.multipole->CoM, cp->grav.multipole->CoM); + gravity_multipole_add(&c->grav.multipole->m_pole, &temp); /* Upper limit of max CoM<->gpart distance */ - const double dx = c->multipole->CoM[0] - cp->multipole->CoM[0]; - const double dy = c->multipole->CoM[1] - cp->multipole->CoM[1]; - const double dz = c->multipole->CoM[2] - cp->multipole->CoM[2]; + const double dx = + c->grav.multipole->CoM[0] - cp->grav.multipole->CoM[0]; + const double dy = + c->grav.multipole->CoM[1] - cp->grav.multipole->CoM[1]; + const double dz = + c->grav.multipole->CoM[2] - cp->grav.multipole->CoM[2]; const double r2 = dx * dx + dy * dy + dz * dz; - r_max = max(r_max, cp->multipole->r_max + sqrt(r2)); + r_max = max(r_max, cp->grav.multipole->r_max + sqrt(r2)); } } /* Alternative upper limit of max CoM<->gpart distance */ - const double dx = c->multipole->CoM[0] > c->loc[0] + c->width[0] * 0.5 - ? c->multipole->CoM[0] - c->loc[0] - : c->loc[0] + c->width[0] - c->multipole->CoM[0]; - const double dy = c->multipole->CoM[1] > c->loc[1] + c->width[1] * 0.5 - ? c->multipole->CoM[1] - c->loc[1] - : c->loc[1] + c->width[1] - c->multipole->CoM[1]; - const double dz = c->multipole->CoM[2] > c->loc[2] + c->width[2] * 0.5 - ? c->multipole->CoM[2] - c->loc[2] - : c->loc[2] + c->width[2] - c->multipole->CoM[2]; + const double dx = c->grav.multipole->CoM[0] > c->loc[0] + c->width[0] * 0.5 + ? c->grav.multipole->CoM[0] - c->loc[0] + : c->loc[0] + c->width[0] - c->grav.multipole->CoM[0]; + const double dy = c->grav.multipole->CoM[1] > c->loc[1] + c->width[1] * 0.5 + ? c->grav.multipole->CoM[1] - c->loc[1] + : c->loc[1] + c->width[1] - c->grav.multipole->CoM[1]; + const double dz = c->grav.multipole->CoM[2] > c->loc[2] + c->width[2] * 0.5 + ? c->grav.multipole->CoM[2] - c->loc[2] + : c->loc[2] + c->width[2] - c->grav.multipole->CoM[2]; /* Take minimum of both limits */ - c->multipole->r_max = min(r_max, sqrt(dx * dx + dy * dy + dz * dz)); + c->grav.multipole->r_max = min(r_max, sqrt(dx * dx + dy * dy + dz * dz)); } else { - if (c->gcount > 0) { - gravity_P2M(c->multipole, c->gparts, c->gcount); - const double dx = c->multipole->CoM[0] > c->loc[0] + c->width[0] * 0.5 - ? c->multipole->CoM[0] - c->loc[0] - : c->loc[0] + c->width[0] - c->multipole->CoM[0]; - const double dy = c->multipole->CoM[1] > c->loc[1] + c->width[1] * 0.5 - ? c->multipole->CoM[1] - c->loc[1] - : c->loc[1] + c->width[1] - c->multipole->CoM[1]; - const double dz = c->multipole->CoM[2] > c->loc[2] + c->width[2] * 0.5 - ? c->multipole->CoM[2] - c->loc[2] - : c->loc[2] + c->width[2] - c->multipole->CoM[2]; - c->multipole->r_max = sqrt(dx * dx + dy * dy + dz * dz); + if (c->grav.count > 0) { + gravity_P2M(c->grav.multipole, c->grav.parts, c->grav.count); + const double dx = + c->grav.multipole->CoM[0] > c->loc[0] + c->width[0] * 0.5 + ? c->grav.multipole->CoM[0] - c->loc[0] + : c->loc[0] + c->width[0] - c->grav.multipole->CoM[0]; + const double dy = + c->grav.multipole->CoM[1] > c->loc[1] + c->width[1] * 0.5 + ? c->grav.multipole->CoM[1] - c->loc[1] + : c->loc[1] + c->width[1] - c->grav.multipole->CoM[1]; + const double dz = + c->grav.multipole->CoM[2] > c->loc[2] + c->width[2] * 0.5 + ? c->grav.multipole->CoM[2] - c->loc[2] + : c->loc[2] + c->width[2] - c->grav.multipole->CoM[2]; + c->grav.multipole->r_max = sqrt(dx * dx + dy * dy + dz * dz); } else { - gravity_multipole_init(&c->multipole->m_pole); - c->multipole->CoM[0] = c->loc[0] + c->width[0] * 0.5; - c->multipole->CoM[1] = c->loc[1] + c->width[1] * 0.5; - c->multipole->CoM[2] = c->loc[2] + c->width[2] * 0.5; - c->multipole->r_max = 0.; + gravity_multipole_init(&c->grav.multipole->m_pole); + c->grav.multipole->CoM[0] = c->loc[0] + c->width[0] * 0.5; + c->grav.multipole->CoM[1] = c->loc[1] + c->width[1] * 0.5; + c->grav.multipole->CoM[2] = c->loc[2] + c->width[2] * 0.5; + c->grav.multipole->r_max = 0.; } } /* Also update the values at rebuild time */ - c->multipole->r_max_rebuild = c->multipole->r_max; - c->multipole->CoM_rebuild[0] = c->multipole->CoM[0]; - c->multipole->CoM_rebuild[1] = c->multipole->CoM[1]; - c->multipole->CoM_rebuild[2] = c->multipole->CoM[2]; + c->grav.multipole->r_max_rebuild = c->grav.multipole->r_max; + c->grav.multipole->CoM_rebuild[0] = c->grav.multipole->CoM[0]; + c->grav.multipole->CoM_rebuild[1] = c->grav.multipole->CoM[1]; + c->grav.multipole->CoM_rebuild[2] = c->grav.multipole->CoM[2]; + + c->grav.ti_old_multipole = ti_current; +} + +/** + * @brief Recursively verify that the multipoles are the sum of their progenies. + * + * This function does not check whether the multipoles match the particle + * content as we may not have received the particles. + * + * @param c The #cell to recursively search and verify. + */ +void cell_check_foreign_multipole(const struct cell *c) { + +#ifdef SWIFT_DEBUG_CHECKS + + if (c->split) { + + double M_000 = 0.; + long long num_gpart = 0; - c->ti_old_multipole = ti_current; + for (int k = 0; k < 8; k++) { + const struct cell *cp = c->progeny[k]; + + if (cp != NULL) { + + /* Check the mass */ + M_000 += cp->grav.multipole->m_pole.M_000; + + /* Check the number of particles */ + num_gpart += cp->grav.multipole->m_pole.num_gpart; + + /* Now recurse */ + cell_check_foreign_multipole(cp); + } + } + + if (num_gpart != c->grav.multipole->m_pole.num_gpart) + error("Sum of particles in progenies does not match"); + } + +#else + error("Calling debugging code without debugging flag activated."); +#endif } /** @@ -1315,44 +1516,41 @@ void cell_make_multipoles(struct cell *c, integertime_t ti_current) { * recursively computed one. * * @param c Cell to act upon - * @param data Unused parameter */ -void cell_check_multipole(struct cell *c, void *data) { +void cell_check_multipole(struct cell *c) { #ifdef SWIFT_DEBUG_CHECKS struct gravity_tensors ma; const double tolerance = 1e-3; /* Relative */ - return; - /* First recurse */ if (c->split) for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) cell_check_multipole(c->progeny[k], NULL); + if (c->progeny[k] != NULL) cell_check_multipole(c->progeny[k]); - if (c->gcount > 0) { + if (c->grav.count > 0) { /* Brute-force calculation */ - gravity_P2M(&ma, c->gparts, c->gcount); + gravity_P2M(&ma, c->grav.parts, c->grav.count); /* Now compare the multipole expansion */ - if (!gravity_multipole_equal(&ma, c->multipole, tolerance)) { + if (!gravity_multipole_equal(&ma, c->grav.multipole, tolerance)) { message("Multipoles are not equal at depth=%d! tol=%f", c->depth, tolerance); message("Correct answer:"); gravity_multipole_print(&ma.m_pole); message("Recursive multipole:"); - gravity_multipole_print(&c->multipole->m_pole); + gravity_multipole_print(&c->grav.multipole->m_pole); error("Aborting"); } /* Check that the upper limit of r_max is good enough */ - if (!(c->multipole->r_max >= ma.r_max)) { + if (!(1.1 * c->grav.multipole->r_max >= ma.r_max)) { error("Upper-limit r_max=%e too small. Should be >=%e.", - c->multipole->r_max, ma.r_max); - } else if (c->multipole->r_max * c->multipole->r_max > + c->grav.multipole->r_max, ma.r_max); + } else if (c->grav.multipole->r_max * c->grav.multipole->r_max > 3. * c->width[0] * c->width[0]) { - error("r_max=%e larger than cell diagonal %e.", c->multipole->r_max, + error("r_max=%e larger than cell diagonal %e.", c->grav.multipole->r_max, sqrt(3. * c->width[0] * c->width[0])); } } @@ -1368,10 +1566,18 @@ void cell_check_multipole(struct cell *c, void *data) { */ void cell_clean(struct cell *c) { + /* Hydro */ + for (int i = 0; i < 13; i++) + if (c->hydro.sort[i] != NULL) { + free(c->hydro.sort[i]); + c->hydro.sort[i] = NULL; + } + + /* Stars */ for (int i = 0; i < 13; i++) - if (c->sort[i] != NULL) { - free(c->sort[i]); - c->sort[i] = NULL; + if (c->stars.sort[i] != NULL) { + free(c->stars.sort[i]); + c->stars.sort[i] = NULL; } /* Recurse */ @@ -1383,10 +1589,10 @@ void cell_clean(struct cell *c) { * @brief Clear the drift flags on the given cell. */ void cell_clear_drift_flags(struct cell *c, void *data) { - c->do_drift = 0; - c->do_sub_drift = 0; - c->do_grav_drift = 0; - c->do_grav_sub_drift = 0; + c->hydro.do_drift = 0; + c->hydro.do_sub_drift = 0; + c->grav.do_drift = 0; + c->grav.do_sub_drift = 0; } /** @@ -1395,29 +1601,30 @@ void cell_clear_drift_flags(struct cell *c, void *data) { void cell_activate_drift_part(struct cell *c, struct scheduler *s) { /* If this cell is already marked for drift, quit early. */ - if (c->do_drift) return; + if (c->hydro.do_drift) return; /* Mark this cell for drifting. */ - c->do_drift = 1; + c->hydro.do_drift = 1; /* Set the do_sub_drifts all the way up and activate the super drift if this has not yet been done. */ - if (c == c->super_hydro) { + if (c == c->hydro.super) { #ifdef SWIFT_DEBUG_CHECKS - if (c->drift_part == NULL) - error("Trying to activate un-existing c->drift_part"); + if (c->hydro.drift == NULL) + error("Trying to activate un-existing c->hydro.drift"); #endif - scheduler_activate(s, c->drift_part); + scheduler_activate(s, c->hydro.drift); } else { for (struct cell *parent = c->parent; - parent != NULL && !parent->do_sub_drift; parent = parent->parent) { - parent->do_sub_drift = 1; - if (parent == c->super_hydro) { + parent != NULL && !parent->hydro.do_sub_drift; + parent = parent->parent) { + parent->hydro.do_sub_drift = 1; + if (parent == c->hydro.super) { #ifdef SWIFT_DEBUG_CHECKS - if (parent->drift_part == NULL) - error("Trying to activate un-existing parent->drift_part"); + if (parent->hydro.drift == NULL) + error("Trying to activate un-existing parent->hydro.drift"); #endif - scheduler_activate(s, parent->drift_part); + scheduler_activate(s, parent->hydro.drift); break; } } @@ -1430,58 +1637,75 @@ void cell_activate_drift_part(struct cell *c, struct scheduler *s) { void cell_activate_drift_gpart(struct cell *c, struct scheduler *s) { /* If this cell is already marked for drift, quit early. */ - if (c->do_grav_drift) return; + if (c->grav.do_drift) return; /* Mark this cell for drifting. */ - c->do_grav_drift = 1; + c->grav.do_drift = 1; + + if (c->grav.drift_out != NULL) scheduler_activate(s, c->grav.drift_out); /* Set the do_grav_sub_drifts all the way up and activate the super drift if this has not yet been done. */ - if (c == c->super_gravity) { + if (c == c->grav.super) { #ifdef SWIFT_DEBUG_CHECKS - if (c->drift_gpart == NULL) - error("Trying to activate un-existing c->drift_gpart"); + if (c->grav.drift == NULL) + error("Trying to activate un-existing c->grav.drift"); #endif - scheduler_activate(s, c->drift_gpart); + scheduler_activate(s, c->grav.drift); } else { for (struct cell *parent = c->parent; - parent != NULL && !parent->do_grav_sub_drift; + parent != NULL && !parent->grav.do_sub_drift; parent = parent->parent) { - parent->do_grav_sub_drift = 1; - if (parent == c->super_gravity) { + parent->grav.do_sub_drift = 1; + + if (parent->grav.drift_out) { + scheduler_activate(s, parent->grav.drift_out); + } + + if (parent == c->grav.super) { #ifdef SWIFT_DEBUG_CHECKS - if (parent->drift_gpart == NULL) - error("Trying to activate un-existing parent->drift_gpart"); + if (parent->grav.drift == NULL) + error("Trying to activate un-existing parent->grav.drift"); #endif - scheduler_activate(s, parent->drift_gpart); + scheduler_activate(s, parent->grav.drift); break; } } } } +/** + * @brief Activate the #spart drifts on the given cell. + */ +void cell_activate_drift_spart(struct cell *c, struct scheduler *s) { + // MATTHIEU: This will need changing + cell_activate_drift_gpart(c, s); +} + /** * @brief Activate the sorts up a cell hierarchy. */ -void cell_activate_sorts_up(struct cell *c, struct scheduler *s) { +void cell_activate_hydro_sorts_up(struct cell *c, struct scheduler *s) { - if (c == c->super_hydro) { + if (c == c->hydro.super) { #ifdef SWIFT_DEBUG_CHECKS - if (c->sorts == NULL) error("Trying to activate un-existing c->sorts"); + if (c->hydro.sorts == NULL) + error("Trying to activate un-existing c->hydro.sorts"); #endif - scheduler_activate(s, c->sorts); + scheduler_activate(s, c->hydro.sorts); if (c->nodeID == engine_rank) cell_activate_drift_part(c, s); } else { for (struct cell *parent = c->parent; - parent != NULL && !parent->do_sub_sort; parent = parent->parent) { - parent->do_sub_sort = 1; - if (parent == c->super_hydro) { + parent != NULL && !parent->hydro.do_sub_sort; + parent = parent->parent) { + parent->hydro.do_sub_sort = 1; + if (parent == c->hydro.super) { #ifdef SWIFT_DEBUG_CHECKS - if (parent->sorts == NULL) - error("Trying to activate un-existing parents->sorts"); + if (parent->hydro.sorts == NULL) + error("Trying to activate un-existing parents->hydro.sorts"); #endif - scheduler_activate(s, parent->sorts); + scheduler_activate(s, parent->hydro.sorts); if (parent->nodeID == engine_rank) cell_activate_drift_part(parent, s); break; } @@ -1492,32 +1716,94 @@ void cell_activate_sorts_up(struct cell *c, struct scheduler *s) { /** * @brief Activate the sorts on a given cell, if needed. */ -void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s) { +void cell_activate_hydro_sorts(struct cell *c, int sid, struct scheduler *s) { + + /* Do we need to re-sort? */ + if (c->hydro.dx_max_sort > space_maxreldx * c->dmin) { + + /* Climb up the tree to active the sorts in that direction */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + if (finger->hydro.requires_sorts) { + atomic_or(&finger->hydro.do_sort, finger->hydro.requires_sorts); + cell_activate_hydro_sorts_up(finger, s); + } + finger->hydro.sorted = 0; + } + } + + /* Has this cell been sorted at all for the given sid? */ + if (!(c->hydro.sorted & (1 << sid)) || c->nodeID != engine_rank) { + atomic_or(&c->hydro.do_sort, (1 << sid)); + cell_activate_hydro_sorts_up(c, s); + } +} + +/** + * @brief Activate the sorts up a cell hierarchy. + */ +void cell_activate_stars_sorts_up(struct cell *c, struct scheduler *s) { + + if (c == c->super) { +#ifdef SWIFT_DEBUG_CHECKS + if (c->stars.sorts == NULL) + error("Trying to activate un-existing c->stars.sorts"); +#endif + scheduler_activate(s, c->stars.sorts); + if (c->nodeID == engine_rank) { + // MATTHIEU: to do: do we actually need both drifts here? + cell_activate_drift_part(c, s); + cell_activate_drift_spart(c, s); + } + } else { + + for (struct cell *parent = c->parent; + parent != NULL && !parent->stars.do_sub_sort; + parent = parent->parent) { + parent->stars.do_sub_sort = 1; + if (parent == c->super) { +#ifdef SWIFT_DEBUG_CHECKS + if (parent->stars.sorts == NULL) + error("Trying to activate un-existing parents->stars.sorts"); +#endif + scheduler_activate(s, parent->stars.sorts); + if (parent->nodeID == engine_rank) { + cell_activate_drift_part(parent, s); + cell_activate_drift_spart(parent, s); + } + break; + } + } + } +} + +/** + * @brief Activate the sorts on a given cell, if needed. + */ +void cell_activate_stars_sorts(struct cell *c, int sid, struct scheduler *s) { /* Do we need to re-sort? */ - if (c->dx_max_sort > space_maxreldx * c->dmin) { + if (c->stars.dx_max_sort > space_maxreldx * c->dmin) { /* Climb up the tree to active the sorts in that direction */ for (struct cell *finger = c; finger != NULL; finger = finger->parent) { - if (finger->requires_sorts) { - atomic_or(&finger->do_sort, finger->requires_sorts); - cell_activate_sorts_up(finger, s); + if (finger->stars.requires_sorts) { + atomic_or(&finger->stars.do_sort, finger->stars.requires_sorts); + cell_activate_stars_sorts_up(finger, s); } - finger->sorted = 0; + finger->stars.sorted = 0; } } /* Has this cell been sorted at all for the given sid? */ - if (!(c->sorted & (1 << sid)) || c->nodeID != engine_rank) { - atomic_or(&c->do_sort, (1 << sid)); - cell_activate_sorts_up(c, s); + if (!(c->stars.sorted & (1 << sid)) || c->nodeID != engine_rank) { + atomic_or(&c->stars.do_sort, (1 << sid)); + cell_activate_stars_sorts_up(c, s); } } /** * @brief Traverse a sub-cell task and activate the hydro drift tasks that are - * required - * by a hydro task + * required by a hydro task * * @param ci The first #cell we recurse in. * @param cj The second #cell we recurse in. @@ -1528,18 +1814,19 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, const struct engine *e = s->space->e; /* Store the current dx_max and h_max values. */ - ci->dx_max_old = ci->dx_max_part; - ci->h_max_old = ci->h_max; + ci->hydro.dx_max_part_old = ci->hydro.dx_max_part; + ci->hydro.h_max_old = ci->hydro.h_max; + if (cj != NULL) { - cj->dx_max_old = cj->dx_max_part; - cj->h_max_old = cj->h_max; + cj->hydro.dx_max_part_old = cj->hydro.dx_max_part; + cj->hydro.h_max_old = cj->hydro.h_max; } /* Self interaction? */ if (cj == NULL) { /* Do anything? */ - if (ci->count == 0 || !cell_is_active_hydro(ci, e)) return; + if (ci->hydro.count == 0 || !cell_is_active_hydro(ci, e)) return; /* Recurse? */ if (cell_can_recurse_in_self_hydro_task(ci)) { @@ -1566,7 +1853,7 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, /* Should we even bother? */ if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return; - if (ci->count == 0 || cj->count == 0) return; + if (ci->hydro.count == 0 || cj->hydro.count == 0) return; /* Get the orientation of the pair. */ double shift[3]; @@ -1854,145 +2141,509 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, else if (cell_is_active_hydro(ci, e) || cell_is_active_hydro(cj, e)) { /* We are going to interact this pair, so store some values. */ - atomic_or(&ci->requires_sorts, 1 << sid); - atomic_or(&cj->requires_sorts, 1 << sid); - ci->dx_max_sort_old = ci->dx_max_sort; - cj->dx_max_sort_old = cj->dx_max_sort; + atomic_or(&ci->hydro.requires_sorts, 1 << sid); + atomic_or(&cj->hydro.requires_sorts, 1 << sid); + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; /* Activate the drifts if the cells are local. */ if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); /* Do we need to sort the cells? */ - cell_activate_sorts(ci, sid, s); - cell_activate_sorts(cj, sid, s); + cell_activate_hydro_sorts(ci, sid, s); + cell_activate_hydro_sorts(cj, sid, s); } } /* Otherwise, pair interation */ } -void cell_activate_grav_mm_task(struct cell *ci, struct cell *cj, - struct scheduler *s) { - /* Some constants */ - const struct engine *e = s->space->e; - - /* Anything to do here? */ - if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e)) - error("Inactive MM task being activated"); - - /* Atomically drift the multipole in ci */ - lock_lock(&ci->mlock); - if (ci->ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); - if (lock_unlock(&ci->mlock) != 0) error("Impossible to unlock m-pole"); - - /* Atomically drift the multipole in cj */ - lock_lock(&cj->mlock); - if (cj->ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e); - if (lock_unlock(&cj->mlock) != 0) error("Impossible to unlock m-pole"); -} - /** - * @brief Traverse a sub-cell task and activate the gravity drift tasks that - * are required by a self gravity task. + * @brief Traverse a sub-cell task and activate the stars drift tasks that are + * required by a stars task * * @param ci The first #cell we recurse in. * @param cj The second #cell we recurse in. * @param s The task #scheduler. */ -void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj, - struct scheduler *s) { - /* Some constants */ - const struct space *sp = s->space; - const struct engine *e = sp->e; +void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, + struct scheduler *s) { + const struct engine *e = s->space->e; + + /* Store the current dx_max and h_max values. */ + ci->stars.dx_max_part_old = ci->stars.dx_max_part; + ci->stars.h_max_old = ci->stars.h_max; + + if (cj != NULL) { + cj->stars.dx_max_part_old = cj->stars.dx_max_part; + cj->stars.h_max_old = cj->stars.h_max; + } /* Self interaction? */ if (cj == NULL) { /* Do anything? */ - if (ci->gcount == 0 || !cell_is_active_gravity(ci, e)) return; + if (!cell_is_active_stars(ci, e) || ci->hydro.count == 0 || + ci->stars.count == 0) + return; /* Recurse? */ - if (ci->split) { + if (cell_can_recurse_in_self_stars_task(ci)) { /* Loop over all progenies and pairs of progenies */ for (int j = 0; j < 8; j++) { if (ci->progeny[j] != NULL) { - cell_activate_subcell_grav_tasks(ci->progeny[j], NULL, s); + cell_activate_subcell_stars_tasks(ci->progeny[j], NULL, s); for (int k = j + 1; k < 8; k++) if (ci->progeny[k] != NULL) - cell_activate_subcell_grav_tasks(ci->progeny[j], ci->progeny[k], - s); + cell_activate_subcell_stars_tasks(ci->progeny[j], ci->progeny[k], + s); } } } else { - /* We have reached the bottom of the tree: activate gpart drift */ - cell_activate_drift_gpart(ci, s); + /* We have reached the bottom of the tree: activate drift */ + cell_activate_drift_spart(ci, s); + cell_activate_drift_part(ci, s); } } - /* Pair interaction */ + /* Otherwise, pair interation */ else { - /* Anything to do here? */ - if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e)) - return; - if (ci->gcount == 0 || cj->gcount == 0) return; - - /* Atomically drift the multipole in ci */ - lock_lock(&ci->mlock); - if (ci->ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); - if (lock_unlock(&ci->mlock) != 0) error("Impossible to unlock m-pole"); - - /* Atomically drift the multipole in cj */ - lock_lock(&cj->mlock); - if (cj->ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e); - if (lock_unlock(&cj->mlock) != 0) error("Impossible to unlock m-pole"); + /* Should we even bother? */ + if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; - /* Can we use multipoles ? */ - if (cell_can_use_pair_mm(ci, cj, e, sp)) { + int should_do = ci->stars.count != 0 && cj->hydro.count != 0; + should_do |= cj->stars.count != 0 && ci->hydro.count != 0; + if (!should_do) return; - /* Ok, no need to drift anything */ - return; - } - /* Otherwise, activate the gpart drifts if we are at the bottom. */ - else if (!ci->split && !cj->split) { + /* Get the orientation of the pair. */ + double shift[3]; + int sid = space_getsid(s->space, &ci, &cj, shift); - /* Activate the drifts if the cells are local. */ - if (cell_is_active_gravity(ci, e) || cell_is_active_gravity(cj, e)) { - if (ci->nodeID == engine_rank) cell_activate_drift_gpart(ci, s); - if (cj->nodeID == engine_rank) cell_activate_drift_gpart(cj, s); - } - } - /* Ok, we can still recurse */ - else { + /* recurse? */ + if (cell_can_recurse_in_pair_stars_task(ci) && + cell_can_recurse_in_pair_stars_task(cj)) { - /* Recover the multipole information */ - struct gravity_tensors *const multi_i = ci->multipole; - struct gravity_tensors *const multi_j = cj->multipole; - const double ri_max = multi_i->r_max; - const double rj_max = multi_j->r_max; + /* Different types of flags. */ + switch (sid) { - if (ri_max > rj_max) { - if (ci->split) { + /* Regular sub-cell interactions of a single cell. */ + case 0: /* ( 1 , 1 , 1 ) */ + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + break; - /* Loop over ci's children */ - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) - cell_activate_subcell_grav_tasks(ci->progeny[k], cj, s); - } + case 1: /* ( 1 , 1 , 0 ) */ + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[0], + s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[1], + s); + break; - } else if (cj->split) { + case 2: /* ( 1 , 1 , -1 ) */ + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + break; - /* Loop over cj's children */ - for (int k = 0; k < 8; k++) { - if (cj->progeny[k] != NULL) - cell_activate_subcell_grav_tasks(ci, cj->progeny[k], s); - } + case 3: /* ( 1 , 0 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[0], + s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[2], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[2], + s); + break; - } else { - error("Fundamental error in the logic"); - } - } else if (rj_max >= ri_max) { + case 4: /* ( 1 , 0 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[0], + s); + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[1], + s); + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[2], + s); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[3], + s); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[0], + s); + if (ci->progeny[5] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[1], + s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[2], + s); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[3], + s); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[0], + s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + if (ci->progeny[6] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[2], + s); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[3], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[1], + s); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[2], + s); + if (ci->progeny[7] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[3], + s); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[1], + s); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[3], + s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[3], + s); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[2], + s); + break; + + case 7: /* ( 1 , -1 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[2], + s); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[3], + s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[2], + s); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[3], + s); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[3], + s); + break; + + case 9: /* ( 0 , 1 , 1 ) */ + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[0], + s); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[4], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[4], + s); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + if (ci->progeny[2] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[0], + s); + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[1], + s); + if (ci->progeny[2] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[4], + s); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[5], + s); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[0], + s); + if (ci->progeny[3] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[1], + s); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[4], + s); + if (ci->progeny[3] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[5], + s); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[0], + s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + if (ci->progeny[6] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[4], + s); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[5], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[1], + s); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[4], + s); + if (ci->progeny[7] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[5], + s); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[1], + s); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[5], + s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[5], + s); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + if (ci->progeny[1] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[1], cj->progeny[0], + s); + if (ci->progeny[1] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[1], cj->progeny[2], + s); + if (ci->progeny[1] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[1], cj->progeny[4], + s); + if (ci->progeny[1] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[1], cj->progeny[6], + s); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[0], + s); + if (ci->progeny[3] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[2], + s); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[4], + s); + if (ci->progeny[3] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[6], + s); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[0], + s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[2], + s); + if (ci->progeny[5] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[4], + s); + if (ci->progeny[5] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[6], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[2], + s); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[4], + s); + if (ci->progeny[7] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[6], + s); + break; + } + + } + + /* Otherwise, activate the sorts and drifts. */ + else { + + if (cell_is_active_stars(ci, e) && cj->hydro.count != 0 && + ci->stars.count != 0) { + /* We are going to interact this pair, so store some values. */ + atomic_or(&cj->hydro.requires_sorts, 1 << sid); + atomic_or(&ci->stars.requires_sorts, 1 << sid); + + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; + + /* Activate the drifts if the cells are local. */ + if (ci->nodeID == engine_rank) cell_activate_drift_spart(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); + + /* Do we need to sort the cells? */ + cell_activate_hydro_sorts(cj, sid, s); + cell_activate_stars_sorts(ci, sid, s); + } + + if (cell_is_active_stars(cj, e) && ci->hydro.count != 0 && + cj->stars.count != 0) { + /* We are going to interact this pair, so store some values. */ + atomic_or(&cj->stars.requires_sorts, 1 << sid); + atomic_or(&ci->hydro.requires_sorts, 1 << sid); + + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; + + /* Activate the drifts if the cells are local. */ + if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_spart(cj, s); + + /* Do we need to sort the cells? */ + cell_activate_hydro_sorts(ci, sid, s); + cell_activate_stars_sorts(cj, sid, s); + } + } + } /* Otherwise, pair interation */ +} + +/** + * @brief Traverse a sub-cell task and activate the gravity drift tasks that + * are required by a self gravity task. + * + * @param ci The first #cell we recurse in. + * @param cj The second #cell we recurse in. + * @param s The task #scheduler. + */ +void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj, + struct scheduler *s) { + /* Some constants */ + const struct space *sp = s->space; + const struct engine *e = sp->e; + + /* Self interaction? */ + if (cj == NULL) { + + /* Do anything? */ + if (ci->grav.count == 0 || !cell_is_active_gravity(ci, e)) return; + + /* Recurse? */ + if (ci->split) { + + /* Loop over all progenies and pairs of progenies */ + for (int j = 0; j < 8; j++) { + if (ci->progeny[j] != NULL) { + cell_activate_subcell_grav_tasks(ci->progeny[j], NULL, s); + for (int k = j + 1; k < 8; k++) + if (ci->progeny[k] != NULL) + cell_activate_subcell_grav_tasks(ci->progeny[j], ci->progeny[k], + s); + } + } + } else { + + /* We have reached the bottom of the tree: activate gpart drift */ + cell_activate_drift_gpart(ci, s); + } + } + + /* Pair interaction */ + else { + + /* Anything to do here? */ + if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e)) + return; + if (ci->grav.count == 0 || cj->grav.count == 0) return; + + /* Atomically drift the multipole in ci */ + lock_lock(&ci->grav.mlock); + if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); + if (lock_unlock(&ci->grav.mlock) != 0) error("Impossible to unlock m-pole"); + + /* Atomically drift the multipole in cj */ + lock_lock(&cj->grav.mlock); + if (cj->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e); + if (lock_unlock(&cj->grav.mlock) != 0) error("Impossible to unlock m-pole"); + + /* Can we use multipoles ? */ + if (cell_can_use_pair_mm(ci, cj, e, sp)) { + + /* Ok, no need to drift anything */ + return; + } + /* Otherwise, activate the gpart drifts if we are at the bottom. */ + else if (!ci->split && !cj->split) { + + /* Activate the drifts if the cells are local. */ + if (cell_is_active_gravity(ci, e) || cell_is_active_gravity(cj, e)) { + if (ci->nodeID == engine_rank) cell_activate_drift_gpart(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_gpart(cj, s); + } + } + /* Ok, we can still recurse */ + else { + + /* Recover the multipole information */ + const struct gravity_tensors *const multi_i = ci->grav.multipole; + const struct gravity_tensors *const multi_j = cj->grav.multipole; + const double ri_max = multi_i->r_max; + const double rj_max = multi_j->r_max; + + if (ri_max > rj_max) { + if (ci->split) { + + /* Loop over ci's children */ + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + cell_activate_subcell_grav_tasks(ci->progeny[k], cj, s); + } + + } else if (cj->split) { + + /* Loop over cj's children */ + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) + cell_activate_subcell_grav_tasks(ci, cj->progeny[k], s); + } + + } else { + error("Fundamental error in the logic"); + } + } else if (rj_max >= ri_max) { if (cj->split) { /* Loop over cj's children */ @@ -2066,38 +2717,45 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { int rebuild = 0; /* Un-skip the density tasks involved with this cell. */ - for (struct link *l = c->density; l != NULL; l = l->next) { + for (struct link *l = c->hydro.density; l != NULL; l = l->next) { struct task *t = l->t; struct cell *ci = t->ci; struct cell *cj = t->cj; const int ci_active = cell_is_active_hydro(ci, e); const int cj_active = (cj != NULL) ? cell_is_active_hydro(cj, e) : 0; +#ifdef WITH_MPI + const int ci_nodeID = ci->nodeID; + const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; +#else + const int ci_nodeID = nodeID; + const int cj_nodeID = nodeID; +#endif /* Only activate tasks that involve a local active cell. */ - if ((ci_active && ci->nodeID == nodeID) || - (cj_active && cj->nodeID == nodeID)) { + if ((ci_active && ci_nodeID == nodeID) || + (cj_active && cj_nodeID == nodeID)) { scheduler_activate(s, t); /* Activate hydro drift */ if (t->type == task_type_self) { - if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); } /* Set the correct sorting flags and activate hydro drifts */ else if (t->type == task_type_pair) { /* Store some values. */ - atomic_or(&ci->requires_sorts, 1 << t->flags); - atomic_or(&cj->requires_sorts, 1 << t->flags); - ci->dx_max_sort_old = ci->dx_max_sort; - cj->dx_max_sort_old = cj->dx_max_sort; + atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; /* Activate the drift tasks. */ - if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); - if (cj->nodeID == nodeID) cell_activate_drift_part(cj, s); + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); + if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); /* Check the sorts and activate them if needed. */ - cell_activate_sorts(ci, t->flags, s); - cell_activate_sorts(cj, t->flags, s); + cell_activate_hydro_sorts(ci, t->flags, s); + cell_activate_hydro_sorts(cj, t->flags, s); } /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_pair || t->type == task_type_sub_self) { @@ -2114,27 +2772,27 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { #ifdef WITH_MPI /* Activate the send/recv tasks. */ - if (ci->nodeID != nodeID) { + if (ci_nodeID != nodeID) { /* If the local cell is active, receive data from the foreign cell. */ if (cj_active) { - scheduler_activate(s, ci->recv_xv); + scheduler_activate(s, ci->mpi.hydro.recv_xv); if (ci_active) { - scheduler_activate(s, ci->recv_rho); + scheduler_activate(s, ci->mpi.hydro.recv_rho); #ifdef EXTRA_HYDRO_LOOP - scheduler_activate(s, ci->recv_gradient); + scheduler_activate(s, ci->mpi.hydro.recv_gradient); #endif } } /* If the foreign cell is active, we want its ti_end values. */ - if (ci_active) scheduler_activate(s, ci->recv_ti); + if (ci_active) scheduler_activate(s, ci->mpi.recv_ti); /* Is the foreign cell active and will need stuff from us? */ if (ci_active) { - scheduler_activate_send(s, cj->send_xv, ci->nodeID); + scheduler_activate_send(s, cj->mpi.hydro.send_xv, ci_nodeID); /* Drift the cell which will be sent; note that not all sent particles will be drifted, only those that are needed. */ @@ -2142,38 +2800,38 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* If the local cell is also active, more stuff will be needed. */ if (cj_active) { - scheduler_activate_send(s, cj->send_rho, ci->nodeID); + scheduler_activate_send(s, cj->mpi.hydro.send_rho, ci_nodeID); #ifdef EXTRA_HYDRO_LOOP - scheduler_activate_send(s, cj->send_gradient, ci->nodeID); + scheduler_activate_send(s, cj->mpi.hydro.send_gradient, ci_nodeID); #endif } } /* If the local cell is active, send its ti_end values. */ - if (cj_active) scheduler_activate_send(s, cj->send_ti, ci->nodeID); + if (cj_active) scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); - } else if (cj->nodeID != nodeID) { + } else if (cj_nodeID != nodeID) { /* If the local cell is active, receive data from the foreign cell. */ if (ci_active) { - scheduler_activate(s, cj->recv_xv); + scheduler_activate(s, cj->mpi.hydro.recv_xv); if (cj_active) { - scheduler_activate(s, cj->recv_rho); + scheduler_activate(s, cj->mpi.hydro.recv_rho); #ifdef EXTRA_HYDRO_LOOP - scheduler_activate(s, cj->recv_gradient); + scheduler_activate(s, cj->mpi.hydro.recv_gradient); #endif } } /* If the foreign cell is active, we want its ti_end values. */ - if (cj_active) scheduler_activate(s, cj->recv_ti); + if (cj_active) scheduler_activate(s, cj->mpi.recv_ti); /* Is the foreign cell active and will need stuff from us? */ if (cj_active) { - scheduler_activate_send(s, ci->send_xv, cj->nodeID); + scheduler_activate_send(s, ci->mpi.hydro.send_xv, cj_nodeID); /* Drift the cell which will be sent; note that not all sent particles will be drifted, only those that are needed. */ @@ -2182,16 +2840,16 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* If the local cell is also active, more stuff will be needed. */ if (ci_active) { - scheduler_activate_send(s, ci->send_rho, cj->nodeID); + scheduler_activate_send(s, ci->mpi.hydro.send_rho, cj_nodeID); #ifdef EXTRA_HYDRO_LOOP - scheduler_activate_send(s, ci->send_gradient, cj->nodeID); + scheduler_activate_send(s, ci->mpi.hydro.send_gradient, cj_nodeID); #endif } } /* If the local cell is active, send its ti_end values. */ - if (ci_active) scheduler_activate_send(s, ci->send_ti, cj->nodeID); + if (ci_active) scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); } #endif } @@ -2200,28 +2858,188 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* Unskip all the other task types. */ if (c->nodeID == nodeID && cell_is_active_hydro(c, e)) { - for (struct link *l = c->gradient; l != NULL; l = l->next) + for (struct link *l = c->hydro.gradient; l != NULL; l = l->next) scheduler_activate(s, l->t); - for (struct link *l = c->force; l != NULL; l = l->next) + for (struct link *l = c->hydro.force; l != NULL; l = l->next) scheduler_activate(s, l->t); - if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost); - if (c->ghost_in != NULL) scheduler_activate(s, c->ghost_in); - if (c->ghost_out != NULL) scheduler_activate(s, c->ghost_out); - if (c->ghost != NULL) scheduler_activate(s, c->ghost); + if (c->hydro.extra_ghost != NULL) + scheduler_activate(s, c->hydro.extra_ghost); + if (c->hydro.ghost_in != NULL) scheduler_activate(s, c->hydro.ghost_in); + if (c->hydro.ghost_out != NULL) scheduler_activate(s, c->hydro.ghost_out); + if (c->hydro.ghost != NULL) scheduler_activate(s, c->hydro.ghost); + if (c->kick1 != NULL) scheduler_activate(s, c->kick1); + if (c->kick2 != NULL) scheduler_activate(s, c->kick2); + if (c->timestep != NULL) scheduler_activate(s, c->timestep); + if (c->end_force != NULL) scheduler_activate(s, c->end_force); + if (c->hydro.cooling != NULL) scheduler_activate(s, c->hydro.cooling); + if (c->hydro.star_formation != NULL) + scheduler_activate(s, c->hydro.star_formation); + if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms); + if (c->logger != NULL) scheduler_activate(s, c->logger); + } + + return rebuild; +} + +/** + * @brief Un-skips all the gravity tasks associated with a given cell and checks + * if the space needs to be rebuilt. + * + * @param c the #cell. + * @param s the #scheduler. + * + * @return 1 If the space needs rebuilding. 0 otherwise. + */ +int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { + + struct engine *e = s->space->e; + const int nodeID = e->nodeID; + int rebuild = 0; + + /* Un-skip the gravity tasks involved with this cell. */ + for (struct link *l = c->grav.grav; l != NULL; l = l->next) { + struct task *t = l->t; + struct cell *ci = t->ci; + struct cell *cj = t->cj; + const int ci_active = cell_is_active_gravity(ci, e); + const int cj_active = (cj != NULL) ? cell_is_active_gravity(cj, e) : 0; +#ifdef WITH_MPI + const int ci_nodeID = ci->nodeID; + const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; +#else + const int ci_nodeID = nodeID; + const int cj_nodeID = nodeID; +#endif + + /* Only activate tasks that involve a local active cell. */ + if ((ci_active && ci_nodeID == nodeID) || + (cj_active && cj_nodeID == nodeID)) { + + scheduler_activate(s, t); + + /* Set the drifting flags */ + if (t->type == task_type_self && + t->subtype == task_subtype_external_grav) { + cell_activate_subcell_external_grav_tasks(ci, s); + } else if (t->type == task_type_self && t->subtype == task_subtype_grav) { + cell_activate_subcell_grav_tasks(ci, NULL, s); + } else if (t->type == task_type_pair) { + cell_activate_subcell_grav_tasks(ci, cj, s); + } else if (t->type == task_type_grav_mm) { +#ifdef SWIFT_DEBUG_CHECKS + error("Incorrectly linked M-M task!"); +#endif + } + } + + if (t->type == task_type_pair) { + +#ifdef WITH_MPI + /* Activate the send/recv tasks. */ + if (ci_nodeID != nodeID) { + + /* If the local cell is active, receive data from the foreign cell. */ + if (cj_active) scheduler_activate(s, ci->mpi.grav.recv); + + /* If the foreign cell is active, we want its ti_end values. */ + if (ci_active) scheduler_activate(s, ci->mpi.recv_ti); + + /* Is the foreign cell active and will need stuff from us? */ + if (ci_active) { + + scheduler_activate_send(s, cj->mpi.grav.send, ci_nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_gpart(cj, s); + } + + /* If the local cell is active, send its ti_end values. */ + if (cj_active) scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); + + } else if (cj_nodeID != nodeID) { + + /* If the local cell is active, receive data from the foreign cell. */ + if (ci_active) scheduler_activate(s, cj->mpi.grav.recv); + + /* If the foreign cell is active, we want its ti_end values. */ + if (cj_active) scheduler_activate(s, cj->mpi.recv_ti); + + /* Is the foreign cell active and will need stuff from us? */ + if (cj_active) { + + scheduler_activate_send(s, ci->mpi.grav.send, cj_nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_gpart(ci, s); + } + + /* If the local cell is active, send its ti_end values. */ + if (ci_active) scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); + } +#endif + } + } + + for (struct link *l = c->grav.mm; l != NULL; l = l->next) { + + struct task *t = l->t; + struct cell *ci = t->ci; + struct cell *cj = t->cj; + const int ci_active = cell_is_active_gravity_mm(ci, e); + const int cj_active = cell_is_active_gravity_mm(cj, e); +#ifdef WITH_MPI + const int ci_nodeID = ci->nodeID; + const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; +#else + const int ci_nodeID = nodeID; + const int cj_nodeID = nodeID; +#endif + +#ifdef SWIFT_DEBUG_CHECKS + if (t->type != task_type_grav_mm) error("Incorrectly linked gravity task!"); +#endif + + /* Only activate tasks that involve a local active cell. */ + if ((ci_active && ci_nodeID == nodeID) || + (cj_active && cj_nodeID == nodeID)) { + + scheduler_activate(s, t); + } + } + + /* Unskip all the other task types. */ + if (c->nodeID == nodeID && cell_is_active_gravity(c, e)) { + + if (c->grav.init != NULL) scheduler_activate(s, c->grav.init); + if (c->grav.init_out != NULL) scheduler_activate(s, c->grav.init_out); if (c->kick1 != NULL) scheduler_activate(s, c->kick1); if (c->kick2 != NULL) scheduler_activate(s, c->kick2); if (c->timestep != NULL) scheduler_activate(s, c->timestep); if (c->end_force != NULL) scheduler_activate(s, c->end_force); - if (c->cooling != NULL) scheduler_activate(s, c->cooling); - if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms); + if (c->grav.down != NULL) scheduler_activate(s, c->grav.down); + if (c->grav.down_in != NULL) scheduler_activate(s, c->grav.down_in); + if (c->grav.mesh != NULL) scheduler_activate(s, c->grav.mesh); + if (c->grav.long_range != NULL) scheduler_activate(s, c->grav.long_range); + if (c->logger != NULL) scheduler_activate(s, c->logger); + + /* Subgrid tasks */ + if ((e->policy & engine_policy_cooling) && c->hydro.cooling != NULL) + scheduler_activate(s, c->hydro.cooling); + if ((e->policy & engine_policy_star_formation) && + c->hydro.star_formation != NULL) + scheduler_activate(s, c->hydro.star_formation); } return rebuild; } /** - * @brief Un-skips all the gravity tasks associated with a given cell and checks + * @brief Un-skips all the stars tasks associated with a given cell and checks * if the space needs to be rebuilt. * * @param c the #cell. @@ -2229,109 +3047,172 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { * * @return 1 If the space needs rebuilding. 0 otherwise. */ -int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { +int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { struct engine *e = s->space->e; const int nodeID = e->nodeID; int rebuild = 0; - /* Un-skip the gravity tasks involved with this cell. */ - for (struct link *l = c->grav; l != NULL; l = l->next) { + /* Un-skip the density tasks involved with this cell. */ + for (struct link *l = c->stars.density; l != NULL; l = l->next) { struct task *t = l->t; struct cell *ci = t->ci; struct cell *cj = t->cj; - const int ci_nodeID = ci->nodeID; - const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; - const int ci_active = cell_is_active_gravity(ci, e); - const int cj_active = (cj != NULL) ? cell_is_active_gravity(cj, e) : 0; + const int ci_active = cell_is_active_stars(ci, e); + const int cj_active = (cj != NULL) ? cell_is_active_stars(cj, e) : 0; /* Only activate tasks that involve a local active cell. */ - if ((ci_active && ci_nodeID == nodeID) || - (cj_active && cj_nodeID == nodeID)) { + if ((ci_active && ci->nodeID == nodeID) || + (cj_active && cj->nodeID == nodeID)) { scheduler_activate(s, t); - /* Set the drifting flags */ - if (t->type == task_type_self && - t->subtype == task_subtype_external_grav) { - cell_activate_subcell_external_grav_tasks(ci, s); - } else if (t->type == task_type_self && t->subtype == task_subtype_grav) { - cell_activate_subcell_grav_tasks(ci, NULL, s); - } else if (t->type == task_type_pair) { - cell_activate_subcell_grav_tasks(ci, cj, s); - } else if (t->type == task_type_grav_mm) { - cell_activate_grav_mm_task(ci, cj, s); + /* Activate drifts */ + if (t->type == task_type_self) { + if (ci->nodeID == nodeID) { + cell_activate_drift_part(ci, s); + cell_activate_drift_spart(ci, s); + } + } + + /* Set the correct sorting flags and activate hydro drifts */ + else if (t->type == task_type_pair) { + /* Do ci */ + /* stars for ci */ + atomic_or(&ci->stars.requires_sorts, 1 << t->flags); + ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; + + /* hydro for cj */ + atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + + /* Activate the drift tasks. */ + if (ci->nodeID == nodeID) cell_activate_drift_spart(ci, s); + if (cj->nodeID == nodeID) cell_activate_drift_part(cj, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_stars_sorts(ci, t->flags, s); + cell_activate_hydro_sorts(cj, t->flags, s); + + /* Do cj */ + /* hydro for ci */ + atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + + /* stars for cj */ + atomic_or(&cj->stars.requires_sorts, 1 << t->flags); + cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; + + /* Activate the drift tasks. */ + if (cj->nodeID == nodeID) cell_activate_drift_spart(cj, s); + if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_hydro_sorts(ci, t->flags, s); + cell_activate_stars_sorts(cj, t->flags, s); + + } + /* Store current values of dx_max and h_max. */ + else if (t->type == task_type_sub_pair || t->type == task_type_sub_self) { + cell_activate_subcell_stars_tasks(t->ci, t->cj, s); } } - if (t->type == task_type_pair) { + /* Only interested in pair interactions as of here. */ + if (t->type == task_type_pair || t->type == task_type_sub_pair) { + + /* Check whether there was too much particle motion, i.e. the + cell neighbour conditions were violated. */ + if (cell_need_rebuild_for_pair(ci, cj)) rebuild = 1; #ifdef WITH_MPI - /* Activate the send/recv tasks. */ - if (ci_nodeID != nodeID) { + error("MPI with stars not implemented"); + /* /\* Activate the send/recv tasks. *\/ */ + /* if (ci->nodeID != nodeID) { */ - /* If the local cell is active, receive data from the foreign cell. */ - if (cj_active) { - scheduler_activate(s, ci->recv_grav); - } + /* /\* If the local cell is active, receive data from the foreign cell. + * *\/ */ + /* if (cj_active) { */ + /* scheduler_activate(s, ci->hydro.recv_xv); */ + /* if (ci_active) { */ + /* scheduler_activate(s, ci->hydro.recv_rho); */ - /* If the foreign cell is active, we want its ti_end values. */ - if (ci_active) scheduler_activate(s, ci->recv_ti); + /* } */ + /* } */ - /* Is the foreign cell active and will need stuff from us? */ - if (ci_active) { + /* /\* If the foreign cell is active, we want its ti_end values. *\/ */ + /* if (ci_active) scheduler_activate(s, ci->mpi.recv_ti); */ - scheduler_activate_send(s, cj->send_grav, ci_nodeID); + /* /\* Is the foreign cell active and will need stuff from us? *\/ */ + /* if (ci_active) { */ - /* Drift the cell which will be sent at the level at which it is - sent, i.e. drift the cell specified in the send task (l->t) - itself. */ - cell_activate_drift_gpart(cj, s); - } + /* scheduler_activate_send(s, cj->hydro.send_xv, ci->nodeID); */ - /* If the local cell is active, send its ti_end values. */ - if (cj_active) scheduler_activate_send(s, cj->send_ti, ci_nodeID); + /* /\* Drift the cell which will be sent; note that not all sent */ + /* particles will be drifted, only those that are needed. *\/ */ + /* cell_activate_drift_part(cj, s); */ - } else if (cj_nodeID != nodeID) { + /* /\* If the local cell is also active, more stuff will be needed. + * *\/ */ + /* if (cj_active) { */ + /* scheduler_activate_send(s, cj->hydro.send_rho, ci->nodeID); */ - /* If the local cell is active, receive data from the foreign cell. */ - if (ci_active) { - scheduler_activate(s, cj->recv_grav); - } + /* } */ + /* } */ - /* If the foreign cell is active, we want its ti_end values. */ - if (cj_active) scheduler_activate(s, cj->recv_ti); + /* /\* If the local cell is active, send its ti_end values. *\/ */ + /* if (cj_active) scheduler_activate_send(s, cj->mpi.send_ti, + * ci->nodeID); + */ - /* Is the foreign cell active and will need stuff from us? */ - if (cj_active) { + /* } else if (cj->nodeID != nodeID) { */ - scheduler_activate_send(s, ci->send_grav, cj_nodeID); + /* /\* If the local cell is active, receive data from the foreign cell. + * *\/ */ + /* if (ci_active) { */ + /* scheduler_activate(s, cj->hydro.recv_xv); */ + /* if (cj_active) { */ + /* scheduler_activate(s, cj->hydro.recv_rho); */ - /* Drift the cell which will be sent at the level at which it is - sent, i.e. drift the cell specified in the send task (l->t) - itself. */ - cell_activate_drift_gpart(ci, s); - } + /* } */ + /* } */ - /* If the local cell is active, send its ti_end values. */ - if (ci_active) scheduler_activate_send(s, ci->send_ti, cj_nodeID); - } + /* /\* If the foreign cell is active, we want its ti_end values. *\/ */ + /* if (cj_active) scheduler_activate(s, cj->mpi.recv_ti); */ + + /* /\* Is the foreign cell active and will need stuff from us? *\/ */ + /* if (cj_active) { */ + + /* scheduler_activate_send(s, ci->hydro.send_xv, cj->nodeID); */ + + /* /\* Drift the cell which will be sent; note that not all sent */ + /* particles will be drifted, only those that are needed. *\/ */ + /* cell_activate_drift_part(ci, s); */ + + /* /\* If the local cell is also active, more stuff will be needed. + * *\/ */ + /* if (ci_active) { */ + + /* scheduler_activate_send(s, ci->hydro.send_rho, cj->nodeID); */ + + /* } */ + /* } */ + + /* /\* If the local cell is active, send its ti_end values. *\/ */ + /* if (ci_active) scheduler_activate_send(s, ci->mpi.send_ti, + * cj->nodeID); + */ + /* } */ #endif } } /* Unskip all the other task types. */ - if (c->nodeID == nodeID && cell_is_active_gravity(c, e)) { + if (c->nodeID == nodeID && cell_is_active_stars(c, e)) { - if (c->init_grav != NULL) scheduler_activate(s, c->init_grav); - if (c->init_grav_out != NULL) scheduler_activate(s, c->init_grav_out); - if (c->kick1 != NULL) scheduler_activate(s, c->kick1); - if (c->kick2 != NULL) scheduler_activate(s, c->kick2); - if (c->timestep != NULL) scheduler_activate(s, c->timestep); - if (c->end_force != NULL) scheduler_activate(s, c->end_force); - if (c->grav_down != NULL) scheduler_activate(s, c->grav_down); - if (c->grav_down_in != NULL) scheduler_activate(s, c->grav_down_in); - if (c->grav_mesh != NULL) scheduler_activate(s, c->grav_mesh); - if (c->grav_long_range != NULL) scheduler_activate(s, c->grav_long_range); + if (c->stars.ghost_in != NULL) scheduler_activate(s, c->stars.ghost_in); + if (c->stars.ghost_out != NULL) scheduler_activate(s, c->stars.ghost_out); + if (c->stars.ghost != NULL) scheduler_activate(s, c->stars.ghost); + if (c->logger != NULL) scheduler_activate(s, c->logger); } return rebuild; @@ -2346,7 +3227,7 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { void cell_set_super(struct cell *c, struct cell *super) { /* Are we in a cell with some kind of self/pair task ? */ - if (super == NULL && c->nr_tasks > 0) super = c; + if (super == NULL && (c->nr_tasks > 0 || c->grav.nr_mm_tasks > 0)) super = c; /* Set the super-cell */ c->super = super; @@ -2367,10 +3248,10 @@ void cell_set_super(struct cell *c, struct cell *super) { void cell_set_super_hydro(struct cell *c, struct cell *super_hydro) { /* Are we in a cell with some kind of self/pair task ? */ - if (super_hydro == NULL && c->density != NULL) super_hydro = c; + if (super_hydro == NULL && c->hydro.density != NULL) super_hydro = c; /* Set the super-cell */ - c->super_hydro = super_hydro; + c->hydro.super = super_hydro; /* Recurse */ if (c->split) @@ -2389,10 +3270,11 @@ void cell_set_super_hydro(struct cell *c, struct cell *super_hydro) { void cell_set_super_gravity(struct cell *c, struct cell *super_gravity) { /* Are we in a cell with some kind of self/pair task ? */ - if (super_gravity == NULL && c->grav != NULL) super_gravity = c; + if (super_gravity == NULL && (c->grav.grav != NULL || c->grav.mm != NULL)) + super_gravity = c; /* Set the super-cell */ - c->super_gravity = super_gravity; + c->grav.super = super_gravity; /* Recurse */ if (c->split) @@ -2415,6 +3297,11 @@ void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data) { for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &((struct cell *)map_data)[ind]; + /* All top-level cells get an MPI tag. */ +#ifdef WITH_MPI + cell_ensure_tagged(c); +#endif + /* Super-pointer for hydro */ if (e->policy & engine_policy_hydro) cell_set_super_hydro(c, NULL); @@ -2439,7 +3326,7 @@ void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data) { int cell_has_tasks(struct cell *c) { #ifdef WITH_MPI - if (c->timestep != NULL || c->recv_ti != NULL) return 1; + if (c->timestep != NULL || c->mpi.recv_ti != NULL) return 1; #else if (c->timestep != NULL) return 1; #endif @@ -2464,17 +3351,17 @@ int cell_has_tasks(struct cell *c) { void cell_drift_part(struct cell *c, const struct engine *e, int force) { const float hydro_h_max = e->hydro_properties->h_max; - const integertime_t ti_old_part = c->ti_old_part; + const integertime_t ti_old_part = c->hydro.ti_old_part; const integertime_t ti_current = e->ti_current; - struct part *const parts = c->parts; - struct xpart *const xparts = c->xparts; + struct part *const parts = c->hydro.parts; + struct xpart *const xparts = c->hydro.xparts; float dx_max = 0.f, dx2_max = 0.f; float dx_max_sort = 0.0f, dx2_max_sort = 0.f; float cell_h_max = 0.f; /* Drift irrespective of cell flags? */ - force |= c->do_drift; + force |= c->hydro.do_drift; #ifdef SWIFT_DEBUG_CHECKS /* Check that we only drift local cells. */ @@ -2484,8 +3371,23 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) { if (ti_current < ti_old_part) error("Attempt to drift to the past"); #endif + /* Early abort? */ + if (c->hydro.count == 0) { + + /* Clear the drift flags. */ + c->hydro.do_drift = 0; + c->hydro.do_sub_drift = 0; + + /* Update the time of the last drift */ + c->hydro.ti_old_part = ti_current; + + return; + } + + /* Ok, we have some particles somewhere in the hierarchy to drift */ + /* Are we not in a leaf ? */ - if (c->split && (force || c->do_sub_drift)) { + if (c->split && (force || c->hydro.do_sub_drift)) { /* Loop over the progeny and collect their data. */ for (int k = 0; k < 8; k++) { @@ -2496,19 +3398,19 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) { cell_drift_part(cp, e, force); /* Update */ - dx_max = max(dx_max, cp->dx_max_part); - dx_max_sort = max(dx_max_sort, cp->dx_max_sort); - cell_h_max = max(cell_h_max, cp->h_max); + dx_max = max(dx_max, cp->hydro.dx_max_part); + dx_max_sort = max(dx_max_sort, cp->hydro.dx_max_sort); + cell_h_max = max(cell_h_max, cp->hydro.h_max); } } /* Store the values */ - c->h_max = cell_h_max; - c->dx_max_part = dx_max; - c->dx_max_sort = dx_max_sort; + c->hydro.h_max = cell_h_max; + c->hydro.dx_max_part = dx_max; + c->hydro.dx_max_sort = dx_max_sort; /* Update the time of the last drift */ - c->ti_old_part = ti_current; + c->hydro.ti_old_part = ti_current; } else if (!c->split && force && ti_current > ti_old_part) { @@ -2531,26 +3433,59 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) { } /* Loop over all the gas particles in the cell */ - const size_t nr_parts = c->count; + const size_t nr_parts = c->hydro.count; for (size_t k = 0; k < nr_parts; k++) { /* Get a handle on the part. */ struct part *const p = &parts[k]; struct xpart *const xp = &xparts[k]; + /* Ignore inhibited particles */ + if (part_is_inhibited(p, e)) continue; + /* Drift... */ drift_part(p, xp, dt_drift, dt_kick_hydro, dt_kick_grav, dt_therm, ti_old_part, ti_current); #ifdef SWIFT_DEBUG_CHECKS /* Make sure the particle does not drift by more than a box length. */ - if (fabsf(xp->v_full[0] * dt_drift) > e->s->dim[0] || - fabsf(xp->v_full[1] * dt_drift) > e->s->dim[1] || - fabsf(xp->v_full[2] * dt_drift) > e->s->dim[2]) { + if (fabs(xp->v_full[0] * dt_drift) > e->s->dim[0] || + fabs(xp->v_full[1] * dt_drift) > e->s->dim[1] || + fabs(xp->v_full[2] * dt_drift) > e->s->dim[2]) { error("Particle drifts by more than a box length!"); } #endif +#ifdef PLANETARY_SPH + /* Remove particles that cross the non-periodic box edge */ + if (!(e->s->periodic)) { + for (int i = 0; i < 3; i++) { + if ((p->x[i] - xp->v_full[i] * dt_drift > e->s->dim[i]) || + (p->x[i] - xp->v_full[i] * dt_drift < 0.f) || + ((p->mass != 0.f) && ((p->x[i] < 0.01f * e->s->dim[i]) || + (p->x[i] > 0.99f * e->s->dim[i])))) { + /* (TEMPORARY) Crudely stop the particle manually */ + message( + "Particle %lld hit a box edge. \n" + " pos=%.4e %.4e %.4e vel=%.2e %.2e %.2e", + p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2]); + for (int j = 0; j < 3; j++) { + p->v[j] = 0.f; + p->gpart->v_full[j] = 0.f; + xp->v_full[j] = 0.f; + } + p->h = hydro_h_max; + p->time_bin = time_bin_inhibited; + p->gpart->time_bin = time_bin_inhibited; + hydro_part_has_no_neighbours(p, xp, e->cosmology); + p->mass = 0.f; + p->gpart->mass = 0.f; + break; + } + } + } +#endif + /* Limit h to within the allowed range */ p->h = min(p->h, hydro_h_max); @@ -2579,17 +3514,17 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) { dx_max_sort = sqrtf(dx2_max_sort); /* Store the values */ - c->h_max = cell_h_max; - c->dx_max_part = dx_max; - c->dx_max_sort = dx_max_sort; + c->hydro.h_max = cell_h_max; + c->hydro.dx_max_part = dx_max; + c->hydro.dx_max_sort = dx_max_sort; /* Update the time of the last drift */ - c->ti_old_part = ti_current; + c->hydro.ti_old_part = ti_current; } /* Clear the drift flags. */ - c->do_drift = 0; - c->do_sub_drift = 0; + c->hydro.do_drift = 0; + c->hydro.do_sub_drift = 0; } /** @@ -2601,13 +3536,18 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) { */ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { - const integertime_t ti_old_gpart = c->ti_old_gpart; + const float stars_h_max = e->stars_properties->h_max; + const integertime_t ti_old_gpart = c->grav.ti_old_part; const integertime_t ti_current = e->ti_current; - struct gpart *const gparts = c->gparts; - struct spart *const sparts = c->sparts; + struct gpart *const gparts = c->grav.parts; + struct spart *const sparts = c->stars.parts; + + float dx_max = 0.f, dx2_max = 0.f; + float dx_max_sort = 0.0f, dx2_max_sort = 0.f; + float cell_h_max = 0.f; /* Drift irrespective of cell flags? */ - force |= c->do_grav_drift; + force |= c->grav.do_drift; #ifdef SWIFT_DEBUG_CHECKS /* Check that we only drift local cells. */ @@ -2617,8 +3557,23 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { if (ti_current < ti_old_gpart) error("Attempt to drift to the past"); #endif + /* Early abort? */ + if (c->grav.count == 0) { + + /* Clear the drift flags. */ + c->grav.do_drift = 0; + c->grav.do_sub_drift = 0; + + /* Update the time of the last drift */ + c->grav.ti_old_part = ti_current; + + return; + } + + /* Ok, we have some particles somewhere in the hierarchy to drift */ + /* Are we not in a leaf ? */ - if (c->split && (force || c->do_grav_sub_drift)) { + if (c->split && (force || c->grav.do_sub_drift)) { /* Loop over the progeny and collect their data. */ for (int k = 0; k < 8; k++) { @@ -2627,11 +3582,21 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { /* Recurse */ cell_drift_gpart(cp, e, force); + + /* Update */ + dx_max = max(dx_max, cp->stars.dx_max_part); + dx_max_sort = max(dx_max_sort, cp->stars.dx_max_sort); + cell_h_max = max(cell_h_max, cp->stars.h_max); } } + /* Store the values */ + c->stars.h_max = cell_h_max; + c->stars.dx_max_part = dx_max; + c->stars.dx_max_sort = dx_max_sort; + /* Update the time of the last drift */ - c->ti_old_gpart = ti_current; + c->grav.ti_old_part = ti_current; } else if (!c->split && force && ti_current > ti_old_gpart) { @@ -2644,15 +3609,47 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { dt_drift = (ti_current - ti_old_gpart) * e->time_base; /* Loop over all the g-particles in the cell */ - const size_t nr_gparts = c->gcount; + const size_t nr_gparts = c->grav.count; for (size_t k = 0; k < nr_gparts; k++) { /* Get a handle on the gpart. */ struct gpart *const gp = &gparts[k]; + /* Ignore inhibited particles */ + if (gpart_is_inhibited(gp, e)) continue; + /* Drift... */ drift_gpart(gp, dt_drift, ti_old_gpart, ti_current); +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure the particle does not drift by more than a box length. */ + if (fabs(gp->v_full[0] * dt_drift) > e->s->dim[0] || + fabs(gp->v_full[1] * dt_drift) > e->s->dim[1] || + fabs(gp->v_full[2] * dt_drift) > e->s->dim[2]) { + error("Particle drifts by more than a box length!"); + } +#endif + +#ifdef PLANETARY_SPH + /* Remove particles that cross the non-periodic box edge */ + if (!(e->s->periodic)) { + for (int i = 0; i < 3; i++) { + if ((gp->x[i] - gp->v_full[i] * dt_drift > e->s->dim[i]) || + (gp->x[i] - gp->v_full[i] * dt_drift < 0.f) || + ((gp->mass != 0.f) && ((gp->x[i] < 0.01f * e->s->dim[i]) || + (gp->x[i] > 0.99f * e->s->dim[i])))) { + /* (TEMPORARY) Crudely stop the particle manually */ + for (int j = 0; j < 3; j++) { + gp->v_full[j] = 0.f; + } + gp->time_bin = time_bin_inhibited; + gp->mass = 0.f; + break; + } + } + } +#endif + /* Init gravity force fields. */ if (gpart_is_active(gp, e)) { gravity_init_gpart(gp); @@ -2660,25 +3657,63 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { } /* Loop over all the star particles in the cell */ - const size_t nr_sparts = c->scount; + const size_t nr_sparts = c->stars.count; for (size_t k = 0; k < nr_sparts; k++) { /* Get a handle on the spart. */ struct spart *const sp = &sparts[k]; + /* Ignore inhibited particles */ + if (spart_is_inhibited(sp, e)) continue; + /* Drift... */ drift_spart(sp, dt_drift, ti_old_gpart, ti_current); - /* Note: no need to compute dx_max as all spart have a gpart */ - } +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure the particle does not drift by more than a box length. */ + if (fabs(sp->v[0] * dt_drift) > e->s->dim[0] || + fabs(sp->v[1] * dt_drift) > e->s->dim[1] || + fabs(sp->v[2] * dt_drift) > e->s->dim[2]) { + error("Particle drifts by more than a box length!"); + } +#endif + + /* Limit h to within the allowed range */ + sp->h = min(sp->h, stars_h_max); + + /* Compute (square of) motion since last cell construction */ + const float dx2 = sp->x_diff[0] * sp->x_diff[0] + + sp->x_diff[1] * sp->x_diff[1] + + sp->x_diff[2] * sp->x_diff[2]; + dx2_max = max(dx2_max, dx2); + + const float dx2_sort = sp->x_diff_sort[0] * sp->x_diff_sort[0] + + sp->x_diff_sort[1] * sp->x_diff_sort[1] + + sp->x_diff_sort[2] * sp->x_diff_sort[2]; + + dx2_max_sort = max(dx2_max_sort, dx2_sort); + + /* Maximal smoothing length */ + cell_h_max = max(cell_h_max, sp->h); + + } /* Note: no need to compute dx_max as all spart have a gpart */ + + /* Now, get the maximal particle motion from its square */ + dx_max = sqrtf(dx2_max); + dx_max_sort = sqrtf(dx2_max_sort); + + /* Store the values */ + c->stars.h_max = cell_h_max; + c->stars.dx_max_part = dx_max; + c->stars.dx_max_sort = dx_max_sort; /* Update the time of the last drift */ - c->ti_old_gpart = ti_current; + c->grav.ti_old_part = ti_current; } /* Clear the drift flags. */ - c->do_grav_drift = 0; - c->do_grav_sub_drift = 0; + c->grav.do_drift = 0; + c->grav.do_sub_drift = 0; } /** @@ -2689,7 +3724,7 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { */ void cell_drift_all_multipoles(struct cell *c, const struct engine *e) { - const integertime_t ti_old_multipole = c->ti_old_multipole; + const integertime_t ti_old_multipole = c->grav.ti_old_multipole; const integertime_t ti_current = e->ti_current; #ifdef SWIFT_DEBUG_CHECKS @@ -2706,7 +3741,7 @@ void cell_drift_all_multipoles(struct cell *c, const struct engine *e) { dt_drift = (ti_current - ti_old_multipole) * e->time_base; /* Drift the multipole */ - if (ti_current > ti_old_multipole) gravity_drift(c->multipole, dt_drift); + if (ti_current > ti_old_multipole) gravity_drift(c->grav.multipole, dt_drift); /* Are we not in a leaf ? */ if (c->split) { @@ -2717,7 +3752,7 @@ void cell_drift_all_multipoles(struct cell *c, const struct engine *e) { } /* Update the time of the last drift */ - c->ti_old_multipole = ti_current; + c->grav.ti_old_multipole = ti_current; } /** @@ -2731,7 +3766,7 @@ void cell_drift_all_multipoles(struct cell *c, const struct engine *e) { */ void cell_drift_multipole(struct cell *c, const struct engine *e) { - const integertime_t ti_old_multipole = c->ti_old_multipole; + const integertime_t ti_old_multipole = c->grav.ti_old_multipole; const integertime_t ti_current = e->ti_current; #ifdef SWIFT_DEBUG_CHECKS @@ -2747,10 +3782,10 @@ void cell_drift_multipole(struct cell *c, const struct engine *e) { else dt_drift = (ti_current - ti_old_multipole) * e->time_base; - if (ti_current > ti_old_multipole) gravity_drift(c->multipole, dt_drift); + if (ti_current > ti_old_multipole) gravity_drift(c->grav.multipole, dt_drift); /* Update the time of the last drift */ - c->ti_old_multipole = ti_current; + c->grav.ti_old_multipole = ti_current; } /** @@ -2759,7 +3794,8 @@ void cell_drift_multipole(struct cell *c, const struct engine *e) { void cell_check_timesteps(struct cell *c) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_hydro_end_min == 0 && c->ti_gravity_end_min == 0 && c->nr_tasks > 0) + if (c->hydro.ti_end_min == 0 && c->grav.ti_end_min == 0 && + c->stars.ti_end_min == 0 && c->nr_tasks > 0) error("Cell without assigned time-step"); if (c->split) { @@ -2768,8 +3804,8 @@ void cell_check_timesteps(struct cell *c) { } else { if (c->nodeID == engine_rank) - for (int i = 0; i < c->count; ++i) - if (c->parts[i].time_bin == 0) + for (int i = 0; i < c->hydro.count; ++i) + if (c->hydro.parts[i].time_bin == 0) error("Particle without assigned time-bin"); } #else @@ -2777,6 +3813,165 @@ void cell_check_timesteps(struct cell *c) { #endif } +/** + * @brief "Remove" a gas particle from the calculation. + * + * The particle is inhibited and will officially be removed at the next rebuild. + * + * @param e The #engine running on this node. + * @param c The #cell from which to remove the particle. + * @param p The #part to remove. + * @param xp The extended data of the particle to remove. + */ +void cell_remove_part(const struct engine *e, struct cell *c, struct part *p, + struct xpart *xp) { + + /* Quick cross-check */ + if (c->nodeID != e->nodeID) + error("Can't remove a particle in a foreign cell."); + + /* Mark the particle as inhibited */ + p->time_bin = time_bin_inhibited; + + /* Mark the gpart as inhibited and stand-alone */ + if (p->gpart) { + p->gpart->time_bin = time_bin_inhibited; + p->gpart->id_or_neg_offset = p->id; + p->gpart->type = swift_type_dark_matter; + } + + /* Un-link the part */ + p->gpart = NULL; +} + +/** + * @brief "Remove" a gravity particle from the calculation. + * + * The particle is inhibited and will officially be removed at the next rebuild. + * + * @param e The #engine running on this node. + * @param c The #cell from which to remove the particle. + * @param gp The #gpart to remove. + */ +void cell_remove_gpart(const struct engine *e, struct cell *c, + struct gpart *gp) { + + /* Quick cross-check */ + if (c->nodeID != e->nodeID) + error("Can't remove a particle in a foreign cell."); + + if (gp->type != swift_type_dark_matter) + error("Trying to remove a non-dark matter gpart."); + + /* Mark the particle as inhibited */ + gp->time_bin = time_bin_inhibited; +} + +/** + * @brief "Remove" a star particle from the calculation. + * + * The particle is inhibited and will officially be removed at the next rebuild. + * + * @param e The #engine running on this node. + * @param c The #cell from which to remove the particle. + * @param sp The #spart to remove. + */ +void cell_remove_spart(const struct engine *e, struct cell *c, + struct spart *sp) { + + /* Quick cross-check */ + if (c->nodeID != e->nodeID) + error("Can't remove a particle in a foreign cell."); + + /* Mark the particle as inhibited and stand-alone */ + sp->time_bin = time_bin_inhibited; + if (sp->gpart) { + sp->gpart->time_bin = time_bin_inhibited; + sp->gpart->id_or_neg_offset = sp->id; + sp->gpart->type = swift_type_dark_matter; + } + + /* Un-link the spart */ + sp->gpart = NULL; +} + +/** + * @brief "Remove" a gas particle from the calculation and convert its gpart + * friend to a dark matter particle. + * + * The particle is inhibited and will officially be removed at the next rebuild. + * + * @param e The #engine running on this node. + * @param c The #cell from which to remove the particle. + * @param p The #part to remove. + * @param xp The extended data of the particle to remove. + */ +void cell_convert_part_to_gpart(const struct engine *e, struct cell *c, + struct part *p, struct xpart *xp) { + + /* Quick cross-checks */ + if (c->nodeID != e->nodeID) + error("Can't remove a particle in a foreign cell."); + + if (p->gpart == NULL) + error("Trying to convert part without gpart friend to dark matter!"); + + /* Get a handle */ + struct gpart *gp = p->gpart; + + /* Mark the particle as inhibited */ + p->time_bin = time_bin_inhibited; + + /* Un-link the part */ + p->gpart = NULL; + + /* Mark the gpart as dark matter */ + gp->type = swift_type_dark_matter; + gp->id_or_neg_offset = p->id; + +#ifdef SWIFT_DEBUG_CHECKS + gp->ti_kick = p->ti_kick; +#endif +} + +/** + * @brief "Remove" a spart particle from the calculation and convert its gpart + * friend to a dark matter particle. + * + * The particle is inhibited and will officially be removed at the next rebuild. + * + * @param e The #engine running on this node. + * @param c The #cell from which to remove the particle. + * @param sp The #spart to remove. + */ +void cell_convert_spart_to_gpart(const struct engine *e, struct cell *c, + struct spart *sp) { + + /* Quick cross-check */ + if (c->nodeID != e->nodeID) + error("Can't remove a particle in a foreign cell."); + + if (sp->gpart == NULL) + error("Trying to convert spart without gpart friend to dark matter!"); + + /* Get a handle */ + struct gpart *gp = sp->gpart; + + /* Mark the particle as inhibited */ + sp->time_bin = time_bin_inhibited; + + /* Un-link the spart */ + sp->gpart = NULL; + + /* Mark the gpart as dark matter */ + gp->type = swift_type_dark_matter; + gp->id_or_neg_offset = sp->id; + +#ifdef SWIFT_DEBUG_CHECKS + gp->ti_kick = sp->ti_kick; +#endif +} + /** * @brief Can we use the MM interactions fo a given pair of cells? * @@ -2793,8 +3988,8 @@ int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj, const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; /* Recover the multipole information */ - const struct gravity_tensors *const multi_i = ci->multipole; - const struct gravity_tensors *const multi_j = cj->multipole; + const struct gravity_tensors *const multi_i = ci->grav.multipole; + const struct gravity_tensors *const multi_j = cj->grav.multipole; /* Get the distance between the CoMs */ double dx = multi_i->CoM[0] - multi_j->CoM[0]; @@ -2811,3 +4006,67 @@ int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj, return gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2); } + +/** + * @brief Can we use the MM interactions fo a given pair of cells? + * + * This function uses the information gathered in the multipole at rebuild + * time and not the current position and radius of the multipole. + * + * @param ci The first #cell. + * @param cj The second #cell. + * @param e The #engine. + * @param s The #space. + */ +int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj, + const struct engine *e, + const struct space *s) { + + const double theta_crit2 = e->gravity_properties->theta_crit2; + const int periodic = s->periodic; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + + /* Recover the multipole information */ + const struct gravity_tensors *const multi_i = ci->grav.multipole; + const struct gravity_tensors *const multi_j = cj->grav.multipole; + +#ifdef SWIFT_DEBUG_CHECKS + + if (multi_i->CoM_rebuild[0] < ci->loc[0] || + multi_i->CoM_rebuild[0] > ci->loc[0] + ci->width[0]) + error("Invalid multipole position ci"); + if (multi_i->CoM_rebuild[1] < ci->loc[1] || + multi_i->CoM_rebuild[1] > ci->loc[1] + ci->width[1]) + error("Invalid multipole position ci"); + if (multi_i->CoM_rebuild[2] < ci->loc[2] || + multi_i->CoM_rebuild[2] > ci->loc[2] + ci->width[2]) + error("Invalid multipole position ci"); + + if (multi_j->CoM_rebuild[0] < cj->loc[0] || + multi_j->CoM_rebuild[0] > cj->loc[0] + cj->width[0]) + error("Invalid multipole position cj"); + if (multi_j->CoM_rebuild[1] < cj->loc[1] || + multi_j->CoM_rebuild[1] > cj->loc[1] + cj->width[1]) + error("Invalid multipole position cj"); + if (multi_j->CoM_rebuild[2] < cj->loc[2] || + multi_j->CoM_rebuild[2] > cj->loc[2] + cj->width[2]) + error("Invalid multipole position cj"); + +#endif + + /* Get the distance between the CoMs */ + double dx = multi_i->CoM_rebuild[0] - multi_j->CoM_rebuild[0]; + double dy = multi_i->CoM_rebuild[1] - multi_j->CoM_rebuild[1]; + double dz = multi_i->CoM_rebuild[2] - multi_j->CoM_rebuild[2]; + + /* Apply BC */ + if (periodic) { + dx = nearest(dx, dim[0]); + dy = nearest(dy, dim[1]); + dz = nearest(dz, dim[2]); + } + const double r2 = dx * dx + dy * dy + dz * dz; + + return gravity_M2L_accept(multi_i->r_max_rebuild, multi_j->r_max_rebuild, + theta_crit2, r2); +} diff --git a/src/cell.h b/src/cell.h index 31d525b02b49463563b21bf5aa904a8b4301f989..97ca22e584c67de20ca0826425f60523b8158ffa 100644 --- a/src/cell.h +++ b/src/cell.h @@ -77,47 +77,83 @@ struct link { */ struct pcell { - /*! Maximal smoothing length. */ - double h_max; + /*! Hydro variables */ + struct { - /*! Minimal integer end-of-timestep in this cell for hydro tasks */ - integertime_t ti_hydro_end_min; + /*! Maximal smoothing length. */ + double h_max; - /*! Maximal integer end-of-timestep in this cell for hydro tasks */ - integertime_t ti_hydro_end_max; + /*! Minimal integer end-of-timestep in this cell for hydro tasks */ + integertime_t ti_end_min; - /*! Maximal integer beginning-of-timestep in this cell for hydro tasks */ - integertime_t ti_hydro_beg_max; + /*! Maximal integer end-of-timestep in this cell for hydro tasks */ + integertime_t ti_end_max; - /*! Minimal integer end-of-timestep in this cell for gravity tasks */ - integertime_t ti_gravity_end_min; + /*! Maximal integer beginning-of-timestep in this cell for hydro tasks */ + integertime_t ti_beg_max; - /*! Maximal integer end-of-timestep in this cell for gravity tasks */ - integertime_t ti_gravity_end_max; + /*! Integer time of the last drift of the #part in this cell */ + integertime_t ti_old_part; - /*! Maximal integer beginning-of-timestep in this cell for gravity tasks */ - integertime_t ti_gravity_beg_max; + /*! Number of #part in this cell. */ + int count; - /*! Integer time of the last drift of the #part in this cell */ - integertime_t ti_old_part; + } hydro; - /*! Integer time of the last drift of the #gpart in this cell */ - integertime_t ti_old_gpart; + /*! Gravity variables */ + struct { - /*! Integer time of the last drift of the #multipole in this cell */ - integertime_t ti_old_multipole; + /*! This cell's gravity-related tensors */ + struct multipole m_pole; - /*! Number of #part in this cell. */ - int count; + /*! Centre of mass. */ + double CoM[3]; - /*! Number of #gpart in this cell. */ - int gcount; + /*! Centre of mass at rebuild time. */ + double CoM_rebuild[3]; - /*! Number of #spart in this cell. */ - int scount; + /*! Upper limit of the CoM<->gpart distance. */ + double r_max; - /*! tag used for MPI communication. */ - int tag; + /*! Upper limit of the CoM<->gpart distance at last rebuild. */ + double r_max_rebuild; + + /*! Minimal integer end-of-timestep in this cell for gravity tasks */ + integertime_t ti_end_min; + + /*! Maximal integer end-of-timestep in this cell for gravity tasks */ + integertime_t ti_end_max; + + /*! Maximal integer beginning-of-timestep in this cell for gravity tasks */ + integertime_t ti_beg_max; + + /*! Integer time of the last drift of the #gpart in this cell */ + integertime_t ti_old_part; + + /*! Integer time of the last drift of the #multipole in this cell */ + integertime_t ti_old_multipole; + + /*! Number of #gpart in this cell. */ + int count; + + } grav; + + /*! Stars variables */ + struct { + + /*! Number of #spart in this cell. */ + int count; + + /*! Maximal smoothing length. */ + double h_max; + + /*! Minimal integer end-of-timestep in this cell for stars tasks */ + integertime_t ti_end_min; + + } stars; + + /*! Maximal depth in that part of the tree */ + int maxdepth; /*! Relative indices of the cell's progeny. */ int progeny[8]; @@ -134,20 +170,41 @@ struct pcell { */ struct pcell_step { - /*! Minimal integer end-of-timestep in this cell (hydro) */ - integertime_t ti_hydro_end_min; + /*! Hydro variables */ + struct { + + /*! Minimal integer end-of-timestep in this cell (hydro) */ + integertime_t ti_end_min; - /*! Minimal integer end-of-timestep in this cell (hydro) */ - integertime_t ti_hydro_end_max; + /*! Minimal integer end-of-timestep in this cell (hydro) */ + integertime_t ti_end_max; - /*! Minimal integer end-of-timestep in this cell (gravity) */ - integertime_t ti_gravity_end_min; + /*! Maximal distance any #part has travelled since last rebuild */ + float dx_max_part; - /*! Minimal integer end-of-timestep in this cell (gravity) */ - integertime_t ti_gravity_end_max; + } hydro; - /*! Maximal distance any #part has travelled since last rebuild */ - float dx_max_part; + /*! Grav variables */ + struct { + + /*! Minimal integer end-of-timestep in this cell (gravity) */ + integertime_t ti_end_min; + + /*! Minimal integer end-of-timestep in this cell (gravity) */ + integertime_t ti_end_max; + + } grav; + + /*! Stars variables */ + struct { + + /*! Maximal distance any #part has travelled since last rebuild */ + float dx_max_part; + + /*! Minimal integer end-of-timestep in this cell (stars) */ + integertime_t ti_end_min; + + } stars; }; /** @@ -163,30 +220,9 @@ struct cell { /*! The cell dimensions. */ double width[3]; - /*! Max smoothing length in this cell. */ - double h_max; - - /*! This cell's multipole. */ - struct gravity_tensors *multipole; - /*! Linking pointer for "memory management". */ struct cell *next; - /*! Pointer to the #part data. */ - struct part *parts; - - /*! Pointer to the #xpart data. */ - struct xpart *xparts; - - /*! Pointer to the #gpart data. */ - struct gpart *gparts; - - /*! Pointer to the #spart data. */ - struct spart *sparts; - - /*! Pointer for the sorted indices. */ - struct entry *sort[13]; - /*! Pointers to the next level of cells. */ struct cell *progeny[8]; @@ -196,231 +232,379 @@ struct cell { /*! Super cell, i.e. the highest-level parent cell with *any* task */ struct cell *super; - /*! Super cell, i.e. the highest-level parent cell that has a hydro pair/self - * tasks */ - struct cell *super_hydro; + /*! Hydro variables */ + struct { - /*! Super cell, i.e. the highest-level parent cell that has a grav pair/self - * tasks */ - struct cell *super_gravity; + /*! Pointer to the #part data. */ + struct part *parts; - /*! Linked list of the tasks computing this cell's hydro density. */ - struct link *density; + /*! Pointer to the #xpart data. */ + struct xpart *xparts; - /* Linked list of the tasks computing this cell's hydro gradients. */ - struct link *gradient; + /*! Pointer for the sorted indices. */ + struct entry *sort[13]; - /*! Linked list of the tasks computing this cell's hydro forces. */ - struct link *force; + /*! Super cell, i.e. the highest-level parent cell that has a hydro + * pair/self tasks */ + struct cell *super; - /*! Linked list of the tasks computing this cell's gravity forces. */ - struct link *grav; + /*! Last (integer) time the cell's part were drifted forward in time. */ + integertime_t ti_old_part; - /*! The task computing this cell's sorts. */ - struct task *sorts; + /*! Maximum part movement in this cell since last construction. */ + float dx_max_part; - /*! The multipole initialistation task */ - struct task *init_grav; + /*! Maximum particle movement in this cell since the last sort. */ + float dx_max_sort; - /*! Implicit task for the gravity initialisation */ - struct task *init_grav_out; + /*! Max smoothing length in this cell. */ + double h_max; - /*! Dependency implicit task for the ghost (in->ghost->out)*/ - struct task *ghost_in; + /*! Minimum end of (integer) time step in this cell for hydro tasks. */ + integertime_t ti_end_min; - /*! Dependency implicit task for the ghost (in->ghost->out)*/ - struct task *ghost_out; + /*! Maximum end of (integer) time step in this cell for hydro tasks. */ + integertime_t ti_end_max; - /*! The ghost task itself */ - struct task *ghost; + /*! Maximum beginning of (integer) time step in this cell for hydro tasks. + */ + integertime_t ti_beg_max; - /*! The extra ghost task for complex hydro schemes */ - struct task *extra_ghost; + /*! Nr of #part in this cell. */ + int count; - /*! The drift task for parts */ - struct task *drift_part; + /*! Spin lock for various uses (#part case). */ + swift_lock_type lock; - /*! The drift task for gparts */ - struct task *drift_gpart; + /*! Number of #part updated in this cell. */ + int updated; - /*! The first kick task */ - struct task *kick1; + /*! Number of #part inhibited in this cell. */ + int inhibited; - /*! The second kick task */ - struct task *kick2; + /*! Is the #part data of this cell being used in a sub-cell? */ + int hold; - /*! The task to end the force calculation */ - struct task *end_force; + /*! Values of h_max before the drifts, used for sub-cell tasks. */ + float h_max_old; - /*! The task to compute time-steps */ - struct task *timestep; - - /*! Task computing long range non-periodic gravity interactions */ - struct task *grav_long_range; + /*! Values of dx_max before the drifts, used for sub-cell tasks. */ + float dx_max_part_old; - /*! Implicit task for the down propagation */ - struct task *grav_down_in; + /*! Values of dx_max_sort before the drifts, used for sub-cell tasks. */ + float dx_max_sort_old; - /*! Task propagating the mesh forces to the particles */ - struct task *grav_mesh; + /*! Bit mask of sort directions that will be needed in the next timestep. */ + unsigned int requires_sorts; - /*! Task propagating the multipole to the particles */ - struct task *grav_down; + /*! Bit mask of sorts that need to be computed for this cell. */ + unsigned int do_sort; - /*! Task for cooling */ - struct task *cooling; + /*! Does this cell need to be drifted (hydro)? */ + char do_drift; - /*! Task for source terms */ - struct task *sourceterms; + /*! Do any of this cell's sub-cells need to be drifted (hydro)? */ + char do_sub_drift; -#ifdef WITH_MPI + /*! Do any of this cell's sub-cells need to be sorted? */ + char do_sub_sort; - /* Task receiving hydro data (positions). */ - struct task *recv_xv; + /*! Bit-mask indicating the sorted directions */ + unsigned int sorted; - /* Task receiving hydro data (density). */ - struct task *recv_rho; + /*! The task computing this cell's sorts. */ + struct task *sorts; - /* Task receiving hydro data (gradient). */ - struct task *recv_gradient; + /*! The drift task for parts */ + struct task *drift; - /* Task receiving gpart data. */ - struct task *recv_grav; + /*! Linked list of the tasks computing this cell's hydro density. */ + struct link *density; - /* Task receiving data (time-step). */ - struct task *recv_ti; + /* Linked list of the tasks computing this cell's hydro gradients. */ + struct link *gradient; - /* Linked list for sending hydro data (positions). */ - struct link *send_xv; + /*! Linked list of the tasks computing this cell's hydro forces. */ + struct link *force; - /* Linked list for sending hydro data (density). */ - struct link *send_rho; + /*! Dependency implicit task for the ghost (in->ghost->out)*/ + struct task *ghost_in; - /* Linked list for sending hydro data (gradient). */ - struct link *send_gradient; + /*! Dependency implicit task for the ghost (in->ghost->out)*/ + struct task *ghost_out; - /* Linked list for sending gpart data. */ - struct link *send_grav; + /*! The ghost task itself */ + struct task *ghost; - /* Linked list for sending data (time-step). */ - struct link *send_ti; + /*! The extra ghost task for complex hydro schemes */ + struct task *extra_ghost; - /*! Bit mask of the proxies this cell is registered with. */ - unsigned long long int sendto; + /*! Task for cooling */ + struct task *cooling; - /*! Pointer to this cell's packed representation. */ - struct pcell *pcell; + /*! Task for star formation */ + struct task *star_formation; - /*! Size of the packed representation */ - int pcell_size; +#ifdef SWIFT_DEBUG_CHECKS - /*! MPI tag associated with this cell */ - int tag; + /*! Last (integer) time the cell's sort arrays were updated. */ + integertime_t ti_sort; #endif - /*! Minimum end of (integer) time step in this cell for hydro tasks. */ - integertime_t ti_hydro_end_min; + } hydro; - /*! Maximum end of (integer) time step in this cell for hydro tasks. */ - integertime_t ti_hydro_end_max; + /*! Grav variables */ + struct { - /*! Maximum beginning of (integer) time step in this cell for hydro tasks. */ - integertime_t ti_hydro_beg_max; + /*! Pointer to the #gpart data. */ + struct gpart *parts; - /*! Minimum end of (integer) time step in this cell for gravity tasks. */ - integertime_t ti_gravity_end_min; + /*! This cell's multipole. */ + struct gravity_tensors *multipole; - /*! Maximum end of (integer) time step in this cell for gravity tasks. */ - integertime_t ti_gravity_end_max; + /*! Super cell, i.e. the highest-level parent cell that has a grav pair/self + * tasks */ + struct cell *super; - /*! Maximum beginning of (integer) time step in this cell for gravity tasks. - */ - integertime_t ti_gravity_beg_max; + /*! Minimum end of (integer) time step in this cell for gravity tasks. */ + integertime_t ti_end_min; - /*! Last (integer) time the cell's part were drifted forward in time. */ - integertime_t ti_old_part; + /*! Maximum end of (integer) time step in this cell for gravity tasks. */ + integertime_t ti_end_max; - /*! Last (integer) time the cell's gpart were drifted forward in time. */ - integertime_t ti_old_gpart; + /*! Maximum beginning of (integer) time step in this cell for gravity tasks. + */ + integertime_t ti_beg_max; - /*! Last (integer) time the cell's multipole was drifted forward in time. */ - integertime_t ti_old_multipole; + /*! Last (integer) time the cell's gpart were drifted forward in time. */ + integertime_t ti_old_part; - /*! Minimum dimension, i.e. smallest edge of this cell (min(width)). */ - float dmin; + /*! Last (integer) time the cell's multipole was drifted forward in time. */ + integertime_t ti_old_multipole; - /*! Maximum particle movement in this cell since the last sort. */ - float dx_max_sort; + /*! Nr of #gpart in this cell. */ + int count; - /*! Maximum part movement in this cell since last construction. */ - float dx_max_part; + /*! Spin lock for various uses (#gpart case). */ + swift_lock_type plock; - /*! Nr of #part in this cell. */ - int count; + /*! Spin lock for various uses (#multipole case). */ + swift_lock_type mlock; - /*! Nr of #gpart in this cell. */ - int gcount; + /*! Number of #gpart updated in this cell. */ + int updated; - /*! Nr of #spart in this cell. */ - int scount; + /*! Number of #gpart inhibited in this cell. */ + int inhibited; - /*! Bit-mask indicating the sorted directions */ - unsigned int sorted; + /*! Is the #gpart data of this cell being used in a sub-cell? */ + int phold; - /*! Spin lock for various uses (#part case). */ - swift_lock_type lock; + /*! Is the #multipole data of this cell being used in a sub-cell? */ + int mhold; - /*! Spin lock for various uses (#gpart case). */ - swift_lock_type glock; + /*! Does this cell need to be drifted (gravity)? */ + char do_drift; - /*! Spin lock for various uses (#multipole case). */ - swift_lock_type mlock; + /*! Do any of this cell's sub-cells need to be drifted (gravity)? */ + char do_sub_drift; - /*! Spin lock for various uses (#spart case). */ - swift_lock_type slock; + /*! The drift task for gparts */ + struct task *drift; - /*! ID of the previous owner, e.g. runner. */ - int owner; + /*! Implicit task (going up- and down the tree) for the #gpart drifts */ + struct task *drift_out; - /*! Number of #part updated in this cell. */ - int updated; + /*! Linked list of the tasks computing this cell's gravity forces. */ + struct link *grav; - /*! Number of #gpart updated in this cell. */ - int g_updated; + /*! Linked list of the tasks computing this cell's gravity M-M forces. */ + struct link *mm; - /*! Number of #spart updated in this cell. */ - int s_updated; + /*! The multipole initialistation task */ + struct task *init; - /*! ID of the node this cell lives on. */ - int nodeID; + /*! Implicit task for the gravity initialisation */ + struct task *init_out; + + /*! Task computing long range non-periodic gravity interactions */ + struct task *long_range; + + /*! Implicit task for the down propagation */ + struct task *down_in; + + /*! Task propagating the mesh forces to the particles */ + struct task *mesh; + + /*! Task propagating the multipole to the particles */ + struct task *down; + + /*! Number of M-M tasks that are associated with this cell. */ + short int nr_mm_tasks; + + } grav; - /*! Is the #part data of this cell being used in a sub-cell? */ - int hold; + /*! Stars variables */ + struct { - /*! Is the #gpart data of this cell being used in a sub-cell? */ - int ghold; + /*! Pointer to the #spart data. */ + struct spart *parts; + + /*! Nr of #spart in this cell. */ + int count; + + /*! Max smoothing length in this cell. */ + double h_max; + + /*! Values of h_max before the drifts, used for sub-cell tasks. */ + float h_max_old; + + /*! Maximum part movement in this cell since last construction. */ + float dx_max_part; + + /*! Values of dx_max before the drifts, used for sub-cell tasks. */ + float dx_max_part_old; + + /*! Maximum particle movement in this cell since the last sort. */ + float dx_max_sort; + + /*! Values of dx_max_sort before the drifts, used for sub-cell tasks. */ + float dx_max_sort_old; + + /*! Bit mask of sort directions that will be needed in the next timestep. */ + unsigned int requires_sorts; + + /*! Pointer for the sorted indices. */ + struct entry *sort[13]; + + /*! Bit-mask indicating the sorted directions */ + unsigned int sorted; + + /*! Bit mask of sorts that need to be computed for this cell. */ + unsigned int do_sort; + + /*! Do any of this cell's sub-cells need to be sorted? */ + char do_sub_sort; + + /*! Maximum end of (integer) time step in this cell for gravity tasks. */ + integertime_t ti_end_min; + + /*! Dependency implicit task for the star ghost (in->ghost->out)*/ + struct task *ghost_in; + + /*! Dependency implicit task for the star ghost (in->ghost->out)*/ + struct task *ghost_out; + + /*! The star ghost task itself */ + struct task *ghost; + + /*! The task computing this cell's sorts. */ + struct task *sorts; + + /*! Linked list of the tasks computing this cell's star density. */ + struct link *density; + + /*! Number of #spart updated in this cell. */ + int updated; + + /*! Number of #spart inhibited in this cell. */ + int inhibited; + + /*! Is the #spart data of this cell being used in a sub-cell? */ + int hold; + + /*! Spin lock for various uses (#spart case). */ + swift_lock_type lock; + +#ifdef SWIFT_DEBUG_CHECKS + /*! Last (integer) time the cell's sort arrays were updated. */ + integertime_t ti_sort; +#endif + + } stars; + +#ifdef WITH_MPI + /*! MPI variables */ + struct { + + struct { + /* Task receiving hydro data (positions). */ + struct task *recv_xv; + + /* Task receiving hydro data (density). */ + struct task *recv_rho; + + /* Task receiving hydro data (gradient). */ + struct task *recv_gradient; + + /* Linked list for sending hydro data (positions). */ + struct link *send_xv; + + /* Linked list for sending hydro data (density). */ + struct link *send_rho; + + /* Linked list for sending hydro data (gradient). */ + struct link *send_gradient; + + } hydro; + + struct { + + /* Task receiving gpart data. */ + struct task *recv; + + /* Linked list for sending gpart data. */ + struct link *send; + } grav; + + /* Task receiving data (time-step). */ + struct task *recv_ti; + + /* Linked list for sending data (time-step). */ + struct link *send_ti; + + /*! Bit mask of the proxies this cell is registered with. */ + unsigned long long int sendto; + + /*! Pointer to this cell's packed representation. */ + struct pcell *pcell; + + /*! Size of the packed representation */ + int pcell_size; + + /*! MPI tag associated with this cell */ + int tag; + + } mpi; +#endif + + /*! The task to end the force calculation */ + struct task *end_force; + + /*! The first kick task */ + struct task *kick1; - /*! Is the #multipole data of this cell being used in a sub-cell? */ - int mhold; + /*! The second kick task */ + struct task *kick2; - /*! Is the #spart data of this cell being used in a sub-cell? */ - int shold; + /*! The task to compute time-steps */ + struct task *timestep; - /*! Values of dx_max before the drifts, used for sub-cell tasks. */ - float dx_max_old; + /*! Task for source terms */ + struct task *sourceterms; - /*! Values of h_max before the drifts, used for sub-cell tasks. */ - float h_max_old; + /*! The logger task */ + struct task *logger; - /*! Values of dx_max_sort before the drifts, used for sub-cell tasks. */ - float dx_max_sort_old; + /*! Minimum dimension, i.e. smallest edge of this cell (min(width)). */ + float dmin; - /*! Bit mask of sort directions that will be needed in the next timestep. */ - unsigned int requires_sorts; + /*! ID of the previous owner, e.g. runner. */ + int owner; - /*! Bit mask of sorts that need to be computed for this cell. */ - unsigned int do_sort; + /*! ID of the node this cell lives on. */ + int nodeID; /*! Number of tasks that are associated with this cell. */ short int nr_tasks; @@ -434,28 +618,10 @@ struct cell { /*! The maximal depth of this cell and its progenies */ char maxdepth; - /*! Does this cell need to be drifted (hydro)? */ - char do_drift; - - /*! Do any of this cell's sub-cells need to be drifted (hydro)? */ - char do_sub_drift; - - /*! Does this cell need to be drifted (gravity)? */ - char do_grav_drift; - - /*! Do any of this cell's sub-cells need to be drifted (gravity)? */ - char do_grav_sub_drift; - - /*! Do any of this cell's sub-cells need to be sorted? */ - char do_sub_sort; - #ifdef SWIFT_DEBUG_CHECKS /* Cell ID (for debugging) */ int cellID; - /*! Last (integer) time the cell's sort arrays were updated. */ - integertime_t ti_sort; - /*! The list of tasks that have been executed on this cell */ char tasks_executed[64]; @@ -482,8 +648,11 @@ int cell_mlocktree(struct cell *c); void cell_munlocktree(struct cell *c); int cell_slocktree(struct cell *c); void cell_sunlocktree(struct cell *c); -int cell_pack(struct cell *c, struct pcell *pc); -int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); +int cell_pack(struct cell *c, struct pcell *pc, const int with_gravity); +int cell_unpack(struct pcell *pc, struct cell *c, struct space *s, + const int with_gravity); +int cell_pack_tags(const struct cell *c, int *tags); +int cell_unpack_tags(const int *tags, struct cell *c); int cell_pack_end_step(struct cell *c, struct pcell_step *pcell); int cell_unpack_end_step(struct cell *c, struct pcell_step *pcell); int cell_pack_multipoles(struct cell *c, struct gravity_tensors *m); @@ -494,13 +663,15 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts); int cell_link_sparts(struct cell *c, struct spart *sparts); void cell_clean_links(struct cell *c, void *data); void cell_make_multipoles(struct cell *c, integertime_t ti_current); -void cell_check_multipole(struct cell *c, void *data); +void cell_check_multipole(struct cell *c); +void cell_check_foreign_multipole(const struct cell *c); void cell_clean(struct cell *c); void cell_check_part_drift_point(struct cell *c, void *data); void cell_check_gpart_drift_point(struct cell *c, void *data); void cell_check_multipole_drift_point(struct cell *c, void *data); void cell_reset_task_counters(struct cell *c); int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s); +int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s); int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s); void cell_set_super(struct cell *c, struct cell *super); void cell_drift_part(struct cell *c, const struct engine *e, int force); @@ -513,16 +684,94 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, struct scheduler *s); void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj, struct scheduler *s); +void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, + struct scheduler *s); void cell_activate_subcell_external_grav_tasks(struct cell *ci, struct scheduler *s); void cell_activate_drift_part(struct cell *c, struct scheduler *s); void cell_activate_drift_gpart(struct cell *c, struct scheduler *s); -void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s); +void cell_activate_drift_spart(struct cell *c, struct scheduler *s); +void cell_activate_hydro_sorts(struct cell *c, int sid, struct scheduler *s); +void cell_activate_stars_sorts(struct cell *c, int sid, struct scheduler *s); void cell_clear_drift_flags(struct cell *c, void *data); void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data); int cell_has_tasks(struct cell *c); +void cell_remove_part(const struct engine *e, struct cell *c, struct part *p, + struct xpart *xp); +void cell_remove_gpart(const struct engine *e, struct cell *c, + struct gpart *gp); +void cell_remove_spart(const struct engine *e, struct cell *c, + struct spart *sp); +void cell_convert_part_to_gpart(const struct engine *e, struct cell *c, + struct part *p, struct xpart *xp); +void cell_convert_spart_to_gpart(const struct engine *e, struct cell *c, + struct spart *sp); int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj, const struct engine *e, const struct space *s); +int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj, + const struct engine *e, const struct space *s); + +/** + * @brief Compute the square of the minimal distance between any two points in + * two cells of the same size + * + * @param ci The first #cell. + * @param cj The second #cell. + * @param periodic Are we using periodic BCs? + * @param dim The dimensions of the simulation volume + */ +__attribute__((always_inline)) INLINE static double cell_min_dist2_same_size( + const struct cell *restrict ci, const struct cell *restrict cj, + const int periodic, const double dim[3]) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->width[0] != cj->width[0]) error("Cells of different size!"); + if (ci->width[1] != cj->width[1]) error("Cells of different size!"); + if (ci->width[2] != cj->width[2]) error("Cells of different size!"); +#endif + + const double cix_min = ci->loc[0]; + const double ciy_min = ci->loc[1]; + const double ciz_min = ci->loc[2]; + const double cjx_min = cj->loc[0]; + const double cjy_min = cj->loc[1]; + const double cjz_min = cj->loc[2]; + + const double cix_max = ci->loc[0] + ci->width[0]; + const double ciy_max = ci->loc[1] + ci->width[1]; + const double ciz_max = ci->loc[2] + ci->width[2]; + const double cjx_max = cj->loc[0] + cj->width[0]; + const double cjy_max = cj->loc[1] + cj->width[1]; + const double cjz_max = cj->loc[2] + cj->width[2]; + + if (periodic) { + + const double dx = min4(fabs(nearest(cix_min - cjx_min, dim[0])), + fabs(nearest(cix_min - cjx_max, dim[0])), + fabs(nearest(cix_max - cjx_min, dim[0])), + fabs(nearest(cix_max - cjx_max, dim[0]))); + + const double dy = min4(fabs(nearest(ciy_min - cjy_min, dim[1])), + fabs(nearest(ciy_min - cjy_max, dim[1])), + fabs(nearest(ciy_max - cjy_min, dim[1])), + fabs(nearest(ciy_max - cjy_max, dim[1]))); + + const double dz = min4(fabs(nearest(ciz_min - cjz_min, dim[2])), + fabs(nearest(ciz_min - cjz_max, dim[2])), + fabs(nearest(ciz_max - cjz_min, dim[2])), + fabs(nearest(ciz_max - cjz_max, dim[2]))); + + return dx * dx + dy * dy + dz * dz; + + } else { + + const double dx = min(fabs(cix_max - cjx_min), fabs(cix_min - cjx_max)); + const double dy = min(fabs(ciy_max - cjy_min), fabs(ciy_min - cjy_max)); + const double dz = min(fabs(ciz_max - cjz_min), fabs(ciz_min - cjz_max)); + + return dx * dx + dy * dy + dz * dz; + } +} /* Inlined functions (for speed). */ @@ -539,8 +788,8 @@ cell_can_recurse_in_pair_hydro_task(const struct cell *c) { /* If so, is the cut-off radius plus the max distance the parts have moved */ /* smaller than the sub-cell sizes ? */ /* Note: We use the _old values as these might have been updated by a drift */ - return c->split && - ((kernel_gamma * c->h_max_old + c->dx_max_old) < 0.5f * c->dmin); + return c->split && ((kernel_gamma * c->hydro.h_max_old + + c->hydro.dx_max_part_old) < 0.5f * c->dmin); } /** @@ -553,7 +802,37 @@ __attribute__((always_inline)) INLINE static int cell_can_recurse_in_self_hydro_task(const struct cell *c) { /* Is the cell split and not smaller than the smoothing length? */ - return c->split && (kernel_gamma * c->h_max_old < 0.5f * c->dmin); + return c->split && (kernel_gamma * c->hydro.h_max_old < 0.5f * c->dmin); +} + +/** + * @brief Can a sub-pair star task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int +cell_can_recurse_in_pair_stars_task(const struct cell *c) { + + /* Is the cell split ? */ + /* If so, is the cut-off radius plus the max distance the parts have moved */ + /* smaller than the sub-cell sizes ? */ + /* Note: We use the _old values as these might have been updated by a drift */ + return c->split && ((kernel_gamma * c->stars.h_max_old + + c->stars.dx_max_part_old) < 0.5f * c->dmin); +} + +/** + * @brief Can a sub-self stars task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int +cell_can_recurse_in_self_stars_task(const struct cell *c) { + + /* Is the cell split and not smaller than the smoothing length? */ + return c->split && (kernel_gamma * c->stars.h_max_old < 0.5f * c->dmin); } /** @@ -570,7 +849,8 @@ __attribute__((always_inline)) INLINE static int cell_can_split_pair_hydro_task( /* the sub-cell sizes ? */ /* Note that since tasks are create after a rebuild no need to take */ /* into account any part motion (i.e. dx_max == 0 here) */ - return c->split && (space_stretch * kernel_gamma * c->h_max < 0.5f * c->dmin); + return c->split && + (space_stretch * kernel_gamma * c->hydro.h_max < 0.5f * c->dmin); } /** @@ -587,7 +867,44 @@ __attribute__((always_inline)) INLINE static int cell_can_split_self_hydro_task( /* the sub-cell sizes ? */ /* Note: No need for more checks here as all the sub-pairs and sub-self */ /* tasks will be created. So no need to check for h_max */ - return c->split && (space_stretch * kernel_gamma * c->h_max < 0.5f * c->dmin); + return c->split && + (space_stretch * kernel_gamma * c->hydro.h_max < 0.5f * c->dmin); +} + +/** + * @brief Can a pair stars task associated with a cell be split into smaller + * sub-tasks. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int cell_can_split_pair_stars_task( + const struct cell *c) { + + /* Is the cell split ? */ + /* If so, is the cut-off radius with some leeway smaller than */ + /* the sub-cell sizes ? */ + /* Note that since tasks are create after a rebuild no need to take */ + /* into account any part motion (i.e. dx_max == 0 here) */ + return c->split && + (space_stretch * kernel_gamma * c->stars.h_max < 0.5f * c->dmin); +} + +/** + * @brief Can a self stars task associated with a cell be split into smaller + * sub-tasks. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int cell_can_split_self_stars_task( + const struct cell *c) { + + /* Is the cell split ? */ + /* If so, is the cut-off radius with some leeway smaller than */ + /* the sub-cell sizes ? */ + /* Note: No need for more checks here as all the sub-pairs and sub-self */ + /* tasks will be created. So no need to check for h_max */ + return c->split && + (space_stretch * kernel_gamma * c->stars.h_max < 0.5f * c->dmin); } /** @@ -599,8 +916,8 @@ __attribute__((always_inline)) INLINE static int cell_can_split_self_hydro_task( __attribute__((always_inline)) INLINE static int cell_can_split_pair_gravity_task(const struct cell *c) { - /* Is the cell split ? */ - return c->split && c->depth < space_subdepth_grav; + /* Is the cell split and still far from the leaves ? */ + return c->split && ((c->maxdepth - c->depth) > space_subdepth_diff_grav); } /** @@ -612,8 +929,8 @@ cell_can_split_pair_gravity_task(const struct cell *c) { __attribute__((always_inline)) INLINE static int cell_can_split_self_gravity_task(const struct cell *c) { - /* Is the cell split ? */ - return c->split && c->depth < space_subdepth_grav; + /* Is the cell split and still far from the leaves ? */ + return c->split && ((c->maxdepth - c->depth) > space_subdepth_diff_grav); } /** @@ -629,9 +946,32 @@ __attribute__((always_inline)) INLINE static int cell_need_rebuild_for_pair( /* Is the cut-off radius plus the max distance the parts in both cells have */ /* moved larger than the cell size ? */ /* Note ci->dmin == cj->dmin */ - return (kernel_gamma * max(ci->h_max, cj->h_max) + ci->dx_max_part + - cj->dx_max_part > + return (kernel_gamma * max(ci->hydro.h_max, cj->hydro.h_max) + + ci->hydro.dx_max_part + cj->hydro.dx_max_part > cj->dmin); } +/** + * @brief Add a unique tag to a cell, mostly for MPI communications. + * + * This function locks the cell so that tags can be added concurrently. + * + * @param c The #cell to tag. + */ +__attribute__((always_inline)) INLINE static void cell_ensure_tagged( + struct cell *c) { +#ifdef WITH_MPI + + lock_lock(&c->hydro.lock); + if (c->mpi.tag < 0 && + (c->mpi.tag = atomic_inc(&cell_next_tag)) > cell_max_tag) + error("Ran out of cell tags."); + if (lock_unlock(&c->hydro.lock) != 0) { + error("Failed to unlock cell."); + } +#else + error("SWIFT was not compiled with MPI enabled."); +#endif // WITH_MPI +} + #endif /* SWIFT_CELL_H */ diff --git a/src/chemistry/EAGLE/chemistry.h b/src/chemistry/EAGLE/chemistry.h index 7f8a672669e1c5b1f8997ecf5971c63efee7522f..41a62c2d9f1387e58fe9027c4ca7ce0dee144514 100644 --- a/src/chemistry/EAGLE/chemistry.h +++ b/src/chemistry/EAGLE/chemistry.h @@ -60,7 +60,17 @@ chemistry_get_element_name(enum chemistry_element elem) { * @param cd #chemistry_global_data containing chemistry informations. */ __attribute__((always_inline)) INLINE static void chemistry_init_part( - struct part* restrict p, const struct chemistry_global_data* cd) {} + struct part* restrict p, const struct chemistry_global_data* cd) { + + struct chemistry_part_data* cpd = &p->chemistry_data; + + for (int i = 0; i < chemistry_element_count; i++) { + cpd->smoothed_metal_mass_fraction[i] = 0.f; + } + + cpd->smoothed_metal_mass_fraction_total = 0.f; + cpd->smoothed_iron_mass_fraction_from_SNIa = 0.f; +} /** * @brief Finishes the smooth metal calculation. @@ -76,7 +86,35 @@ __attribute__((always_inline)) INLINE static void chemistry_init_part( */ __attribute__((always_inline)) INLINE static void chemistry_end_density( struct part* restrict p, const struct chemistry_global_data* cd, - const struct cosmology* cosmo) {} + const struct cosmology* cosmo) { + + /* Some smoothing length multiples. */ + const float h = p->h; + const float h_inv = 1.0f / h; /* 1/h */ + const float factor = pow_dimension(h_inv) / p->rho; /* 1 / h^d * rho */ + const float m = p->mass; + + struct chemistry_part_data* cpd = &p->chemistry_data; + + for (int i = 0; i < chemistry_element_count; i++) { + /* Final operation on the density (add self-contribution). */ + cpd->smoothed_metal_mass_fraction[i] += + m * cpd->metal_mass_fraction[i] * kernel_root; + + /* Finish the calculation by inserting the missing h-factors */ + cpd->smoothed_metal_mass_fraction[i] *= factor; + } + + /* Smooth mass fraction of all metals */ + cpd->smoothed_metal_mass_fraction_total += + m * cpd->metal_mass_fraction_total * kernel_root; + cpd->smoothed_metal_mass_fraction_total *= factor; + + /* Smooth iron mass fraction from SNIa */ + cpd->smoothed_iron_mass_fraction_from_SNIa += + m * cpd->iron_mass_fraction_from_SNIa * kernel_root; + cpd->smoothed_iron_mass_fraction_from_SNIa *= factor; +} /** * @brief Sets all particle fields to sensible values when the #part has 0 ngbs. @@ -112,11 +150,15 @@ __attribute__((always_inline)) INLINE static void chemistry_first_init_part( const struct chemistry_global_data* data, struct part* restrict p, struct xpart* restrict xp) { - p->chemistry_data.metal_mass_fraction_total = - data->initial_metal_mass_fraction_total; - for (int elem = 0; elem < chemistry_element_count; ++elem) - p->chemistry_data.metal_mass_fraction[elem] = - data->initial_metal_mass_fraction[elem]; + // Add initialization of all other fields in chemistry_part_data struct. + if (data->initial_metal_mass_fraction_total != -1) { + p->chemistry_data.metal_mass_fraction_total = + data->initial_metal_mass_fraction_total; + for (int elem = 0; elem < chemistry_element_count; ++elem) + p->chemistry_data.metal_mass_fraction[elem] = + data->initial_metal_mass_fraction[elem]; + } + chemistry_init_part(p, data); } /** @@ -133,24 +175,26 @@ static INLINE void chemistry_init_backend(struct swift_params* parameter_file, struct chemistry_global_data* data) { /* Read the total metallicity */ - data->initial_metal_mass_fraction_total = - parser_get_param_float(parameter_file, "EAGLEChemistry:InitMetallicity"); - - /* Read the individual mass fractions */ - for (int elem = 0; elem < chemistry_element_count; ++elem) { - char buffer[50]; - sprintf(buffer, "EAGLEChemistry:InitAbundance_%s", - chemistry_get_element_name((enum chemistry_element)elem)); - - data->initial_metal_mass_fraction[elem] = - parser_get_param_float(parameter_file, buffer); + data->initial_metal_mass_fraction_total = parser_get_opt_param_float( + parameter_file, "EAGLEChemistry:InitMetallicity", -1); + + if (data->initial_metal_mass_fraction_total != -1) { + /* Read the individual mass fractions */ + for (int elem = 0; elem < chemistry_element_count; ++elem) { + char buffer[50]; + sprintf(buffer, "EAGLEChemistry:InitAbundance_%s", + chemistry_get_element_name((enum chemistry_element)elem)); + + data->initial_metal_mass_fraction[elem] = + parser_get_param_float(parameter_file, buffer); + } + + /* Read the constant ratios */ + data->calcium_over_silicon_ratio = parser_get_param_float( + parameter_file, "EAGLEChemistry:CalciumOverSilicon"); + data->sulphur_over_silicon_ratio = parser_get_param_float( + parameter_file, "EAGLEChemistry:SulphurOverSilicon"); } - - /* Read the constant ratios */ - data->calcium_over_silicon_ratio = parser_get_param_float( - parameter_file, "EAGLEChemistry:CalciumOverSilicon"); - data->sulphur_over_silicon_ratio = parser_get_param_float( - parameter_file, "EAGLEChemistry:SulphurOverSilicon"); } /** diff --git a/src/chemistry/EAGLE/chemistry_iact.h b/src/chemistry/EAGLE/chemistry_iact.h index bdbb8ac9bf7d260e29468b8bee0a84416b668d6a..5200facdc4e1d5783f2ad545fc239d5bd03b9012 100644 --- a/src/chemistry/EAGLE/chemistry_iact.h +++ b/src/chemistry/EAGLE/chemistry_iact.h @@ -39,7 +39,49 @@ */ __attribute__((always_inline)) INLINE static void runner_iact_chemistry( float r2, const float *dx, float hi, float hj, struct part *restrict pi, - struct part *restrict pj, float a, float H) {} + struct part *restrict pj, float a, float H) { + + struct chemistry_part_data *chi = &pi->chemistry_data; + struct chemistry_part_data *chj = &pj->chemistry_data; + + float wi, wi_dx; + float wj, wj_dx; + + /* Get the masses. */ + const float mi = pi->mass; + const float mj = pj->mass; + + /* Get r */ + const float r = sqrtf(r2); + + /* Compute the kernel function for pi */ + const float ui = r / hi; + kernel_deval(ui, &wi, &wi_dx); + + /* Compute the kernel function for pj */ + const float uj = r / hj; + kernel_deval(uj, &wj, &wj_dx); + + /* Compute contribution to the smooth metallicity */ + for (int i = 0; i < chemistry_element_count; i++) { + chi->smoothed_metal_mass_fraction[i] += + mj * chj->metal_mass_fraction[i] * wi; + chj->smoothed_metal_mass_fraction[i] += + mi * chi->metal_mass_fraction[i] * wj; + } + + // Smooth metal mass fraction of all metals + chi->smoothed_metal_mass_fraction_total += + mj * chj->metal_mass_fraction_total * wi; + chj->smoothed_metal_mass_fraction_total += + mi * chi->metal_mass_fraction_total * wj; + + // Smooth iron mass fraction from SNIa + chi->smoothed_iron_mass_fraction_from_SNIa += + mj * chj->iron_mass_fraction_from_SNIa * wi; + chj->smoothed_iron_mass_fraction_from_SNIa += + mi * chi->iron_mass_fraction_from_SNIa * wj; +} /** * @brief do chemistry computation after the runner_iact_density (non symmetric @@ -56,6 +98,36 @@ __attribute__((always_inline)) INLINE static void runner_iact_chemistry( */ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_chemistry( float r2, const float *dx, float hi, float hj, struct part *restrict pi, - const struct part *restrict pj, float a, float H) {} + const struct part *restrict pj, float a, float H) { + + struct chemistry_part_data *chi = &pi->chemistry_data; + const struct chemistry_part_data *chj = &pj->chemistry_data; + + float wi, wi_dx; + + /* Get the masses. */ + const float mj = pj->mass; + + /* Get r */ + const float r = sqrtf(r2); + + /* Compute the kernel function for pi */ + const float ui = r / hi; + kernel_deval(ui, &wi, &wi_dx); + + /* Compute contribution to the smooth metallicity */ + for (int i = 0; i < chemistry_element_count; i++) { + chi->smoothed_metal_mass_fraction[i] += + mj * chj->metal_mass_fraction[i] * wi; + } + + // Smooth metal mass fraction of all metals + chi->smoothed_metal_mass_fraction_total += + mj * chj->metal_mass_fraction_total * wi; + + // Smooth iron mass fraction from SNIa + chi->smoothed_iron_mass_fraction_from_SNIa += + mj * chj->iron_mass_fraction_from_SNIa * wi; +} #endif /* SWIFT_EAGLE_CHEMISTRY_IACT_H */ diff --git a/src/chemistry/EAGLE/chemistry_io.h b/src/chemistry/EAGLE/chemistry_io.h index d78a5f19a52e92426d5eb1f8575abe2b564e32ac..269c47204ccfe6c0a7eaad2343cd054cfbbd7050 100644 --- a/src/chemistry/EAGLE/chemistry_io.h +++ b/src/chemistry/EAGLE/chemistry_io.h @@ -32,9 +32,18 @@ */ INLINE static int chemistry_read_particles(struct part* parts, struct io_props* list) { - - /* Nothing to read */ - return 0; + /* List what we want to read */ + list[0] = io_make_input_field( + "ElementAbundance", FLOAT, chemistry_element_count, OPTIONAL, + UNIT_CONV_NO_UNITS, parts, chemistry_data.metal_mass_fraction); + list[1] = + io_make_input_field("Z", FLOAT, 1, OPTIONAL, UNIT_CONV_NO_UNITS, parts, + chemistry_data.metal_mass_fraction_total); + list[2] = io_make_input_field("IronFromSNIa", FLOAT, 1, OPTIONAL, + UNIT_CONV_NO_UNITS, parts, + chemistry_data.iron_mass_fraction_from_SNIa); + + return 3; } /** diff --git a/src/clocks.c b/src/clocks.c index cac0131acade08e41ee7ed4a22fabde49e197060..49297f5db1cc10a3d9f4537c5900610dded7ffba 100644 --- a/src/clocks.c +++ b/src/clocks.c @@ -29,6 +29,7 @@ #include "../config.h" /* Standard headers. */ +#include <limits.h> #include <stdio.h> #include <unistd.h> @@ -262,6 +263,17 @@ const char *clocks_get_timesincestart(void) { return buffer; } +/** + * Returns the wall-clock time since the start of execution in hours. + * + * Need to call clocks_set_cpufreq() to mark the start of execution. + * + * @result the time since the start of the execution + */ +double clocks_get_hours_since_start(void) { + return clocks_diff_ticks(getticks(), clocks_start) / (3600. * 1000.0); +} + /** * @brief return the cpu time used. * @@ -280,3 +292,23 @@ double clocks_get_cputime_used(void) { times(&tmstic); return (double)(tmstic.tms_utime + tmstic.tms_cutime); } + +/** + * @brief Return an integer based on the current time. + * + * Normally this will be the remainder of the current number of nanoseconds + * so not very dissimilar in the most significant figures unless the time + * between calls is greater than INT_MAX nanoseconds. For faster calls use + * fewer figures, if that matters. + * + * @result an integer. + */ +int clocks_random_seed(void) { +#ifdef HAVE_CLOCK_GETTIME + struct timespec timespec; + clock_gettime(CLOCK_REALTIME, ×pec); + return (timespec.tv_nsec % INT_MAX); +#else + return (getticks() % INT_MAX); +#endif +} diff --git a/src/clocks.h b/src/clocks.h index f3901584774c7586d6a68b4415d6b443cb53c466..ce08167bd504d47a76542870791057881c6d2f17 100644 --- a/src/clocks.h +++ b/src/clocks.h @@ -19,8 +19,13 @@ #ifndef SWIFT_CLOCKS_H #define SWIFT_CLOCKS_H +/* Config parameters. */ +#include "../config.h" + +/* System includes. */ #include <sys/times.h> -#include <time.h> + +/* Local includes */ #include "cycle.h" /* Struct to record a time for the clocks functions. */ @@ -42,7 +47,9 @@ double clocks_from_ticks(ticks tics); ticks clocks_to_ticks(double interval); double clocks_diff_ticks(ticks tic, ticks toc); const char *clocks_get_timesincestart(void); +double clocks_get_hours_since_start(void); double clocks_get_cputime_used(void); +int clocks_random_seed(void); #endif /* SWIFT_CLOCKS_H */ diff --git a/src/collectgroup.c b/src/collectgroup.c index c83d7bef3f03e672e8b5c9036e5daaab26b5d190..0b7b419b565612149fd2b295116b37aa65aa01e9 100644 --- a/src/collectgroup.c +++ b/src/collectgroup.c @@ -36,7 +36,8 @@ /* Local collections for MPI reduces. */ struct mpicollectgroup1 { - long long updates, g_updates, s_updates; + long long updated, g_updated, s_updated; + long long inhibited, g_inhibited, s_inhibited; integertime_t ti_hydro_end_min; integertime_t ti_gravity_end_min; int forcerebuild; @@ -85,9 +86,12 @@ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) { e->ti_end_min = min(e->ti_hydro_end_min, e->ti_gravity_end_min); e->ti_end_max = max(e->ti_hydro_end_max, e->ti_gravity_end_max); e->ti_beg_max = max(e->ti_hydro_beg_max, e->ti_gravity_beg_max); - e->updates = grp1->updates; - e->g_updates = grp1->g_updates; - e->s_updates = grp1->s_updates; + e->updates = grp1->updated; + e->g_updates = grp1->g_updated; + e->s_updates = grp1->s_updated; + e->nr_inhibited_parts = grp1->inhibited; + e->nr_inhibited_gparts = grp1->g_inhibited; + e->nr_inhibited_sparts = grp1->s_inhibited; e->forcerebuild = grp1->forcerebuild; } @@ -95,10 +99,16 @@ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) { * @brief Initialises a collectgroup1 struct ready for processing. * * @param grp1 The #collectgroup1 to initialise - * @param updates the number of updated hydro particles on this node this step. - * @param g_updates the number of updated gravity particles on this node this + * @param updated the number of updated hydro particles on this node this step. + * @param g_updated the number of updated gravity particles on this node this + * step. + * @param s_updated the number of updated star particles on this node this step. + * @param inhibited the number of inhibited hydro particles on this node this + * step. + * @param g_inhibited the number of inhibited gravity particles on this node + * this step. + * @param s_inhibited the number of inhibited star particles on this node this * step. - * @param s_updates the number of updated star particles on this node this step. * @param ti_hydro_end_min the minimum end time for next hydro time step after * this step. * @param ti_hydro_end_max the maximum end time for next hydro time step after @@ -113,17 +123,22 @@ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) { * after this step. * @param forcerebuild whether a rebuild is required after this step. */ -void collectgroup1_init(struct collectgroup1 *grp1, size_t updates, - size_t g_updates, size_t s_updates, +void collectgroup1_init(struct collectgroup1 *grp1, size_t updated, + size_t g_updated, size_t s_updated, size_t inhibited, + size_t g_inhibited, size_t s_inhibited, integertime_t ti_hydro_end_min, integertime_t ti_hydro_end_max, integertime_t ti_hydro_beg_max, integertime_t ti_gravity_end_min, integertime_t ti_gravity_end_max, integertime_t ti_gravity_beg_max, int forcerebuild) { - grp1->updates = updates; - grp1->g_updates = g_updates; - grp1->s_updates = s_updates; + + grp1->updated = updated; + grp1->g_updated = g_updated; + grp1->s_updated = s_updated; + grp1->inhibited = inhibited; + grp1->g_inhibited = g_inhibited; + grp1->s_inhibited = s_inhibited; grp1->ti_hydro_end_min = ti_hydro_end_min; grp1->ti_hydro_end_max = ti_hydro_end_max; grp1->ti_hydro_beg_max = ti_hydro_beg_max; @@ -147,9 +162,12 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) { /* Populate an MPI group struct and reduce this across all nodes. */ struct mpicollectgroup1 mpigrp11; - mpigrp11.updates = grp1->updates; - mpigrp11.g_updates = grp1->g_updates; - mpigrp11.s_updates = grp1->s_updates; + mpigrp11.updated = grp1->updated; + mpigrp11.g_updated = grp1->g_updated; + mpigrp11.s_updated = grp1->s_updated; + mpigrp11.inhibited = grp1->inhibited; + mpigrp11.g_inhibited = grp1->g_inhibited; + mpigrp11.s_inhibited = grp1->s_inhibited; mpigrp11.ti_hydro_end_min = grp1->ti_hydro_end_min; mpigrp11.ti_gravity_end_min = grp1->ti_gravity_end_min; mpigrp11.forcerebuild = grp1->forcerebuild; @@ -160,9 +178,12 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) { error("Failed to reduce mpicollection1."); /* And update. */ - grp1->updates = mpigrp12.updates; - grp1->g_updates = mpigrp12.g_updates; - grp1->s_updates = mpigrp12.s_updates; + grp1->updated = mpigrp12.updated; + grp1->g_updated = mpigrp12.g_updated; + grp1->s_updated = mpigrp12.s_updated; + grp1->inhibited = mpigrp12.inhibited; + grp1->g_inhibited = mpigrp12.g_inhibited; + grp1->s_inhibited = mpigrp12.s_inhibited; grp1->ti_hydro_end_min = mpigrp12.ti_hydro_end_min; grp1->ti_gravity_end_min = mpigrp12.ti_gravity_end_min; grp1->forcerebuild = mpigrp12.forcerebuild; @@ -182,9 +203,14 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11, /* Do what is needed for each part of the collection. */ /* Sum of updates. */ - mpigrp11->updates += mpigrp12->updates; - mpigrp11->g_updates += mpigrp12->g_updates; - mpigrp11->s_updates += mpigrp12->s_updates; + mpigrp11->updated += mpigrp12->updated; + mpigrp11->g_updated += mpigrp12->g_updated; + mpigrp11->s_updated += mpigrp12->s_updated; + + /* Sum of inhibited */ + mpigrp11->inhibited += mpigrp12->inhibited; + mpigrp11->g_inhibited += mpigrp12->g_inhibited; + mpigrp11->s_inhibited += mpigrp12->s_inhibited; /* Minimum end time. */ mpigrp11->ti_hydro_end_min = @@ -204,7 +230,7 @@ static void mpicollectgroup1_reduce(void *in, void *inout, int *len, MPI_Datatype *datatype) { for (int i = 0; i < *len; ++i) - doreduce1(&((struct mpicollectgroup1 *)inout)[0], + doreduce1(&((struct mpicollectgroup1 *)inout)[i], &((const struct mpicollectgroup1 *)in)[i]); } diff --git a/src/collectgroup.h b/src/collectgroup.h index 8bf8a9d1b75f9a5ddb3f19fa9cdb4103e044ea59..b6e8769ac993cc023ae402cdfc4b0169406f6181 100644 --- a/src/collectgroup.h +++ b/src/collectgroup.h @@ -35,7 +35,10 @@ struct engine; struct collectgroup1 { /* Number of particles updated */ - long long updates, g_updates, s_updates; + long long updated, g_updated, s_updated; + + /* Number of particles inhibited */ + long long inhibited, g_inhibited, s_inhibited; /* Times for the time-step */ integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; @@ -47,8 +50,9 @@ struct collectgroup1 { void collectgroup_init(void); void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e); -void collectgroup1_init(struct collectgroup1 *grp1, size_t updates, - size_t g_updates, size_t s_updates, +void collectgroup1_init(struct collectgroup1 *grp1, size_t updated, + size_t g_updated, size_t s_updated, size_t inhibited, + size_t g_inhibited, size_t s_inhibited, integertime_t ti_hydro_end_min, integertime_t ti_hydro_end_max, integertime_t ti_hydro_beg_max, diff --git a/src/common_io.c b/src/common_io.c index 68311107575a89ce8a2990a8e0f7a8eeb5d2d644..087697b489269d97a268966d341093dd666dd9c9 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -358,6 +358,10 @@ void io_write_code_description(hid_t h_file) { #ifdef HAVE_METIS io_write_attribute_s(h_grpcode, "METIS library version", metis_version()); #endif +#ifdef HAVE_PARMETIS + io_write_attribute_s(h_grpcode, "ParMETIS library version", + parmetis_version()); +#endif #else io_write_attribute_s(h_grpcode, "MPI library", "Non-MPI version of SWIFT"); #endif @@ -374,7 +378,7 @@ void io_write_engine_policy(hid_t h_file, const struct engine* e) { const hid_t h_grp = H5Gcreate1(h_file, "/Policy", 0); if (h_grp < 0) error("Error while creating policy group"); - for (int i = 1; i <= engine_maxpolicy; ++i) + for (int i = 1; i < engine_maxpolicy; ++i) if (e->policy & (1 << i)) io_write_attribute_i(h_grp, engine_policy_names[i + 1], 1); else @@ -518,6 +522,46 @@ void io_convert_gpart_d_mapper(void* restrict temp, int N, props.convert_gpart_d(e, gparts + delta + i, &temp_d[i * dim]); } +/** + * @brief Mapper function to copy #spart into a buffer of floats using a + * conversion function. + */ +void io_convert_spart_f_mapper(void* restrict temp, int N, + void* restrict extra_data) { + + const struct io_props props = *((const struct io_props*)extra_data); + const struct spart* restrict sparts = props.sparts; + const struct engine* e = props.e; + const size_t dim = props.dimension; + + /* How far are we with this chunk? */ + float* restrict temp_f = (float*)temp; + const ptrdiff_t delta = (temp_f - props.start_temp_f) / dim; + + for (int i = 0; i < N; i++) + props.convert_spart_f(e, sparts + delta + i, &temp_f[i * dim]); +} + +/** + * @brief Mapper function to copy #spart into a buffer of doubles using a + * conversion function. + */ +void io_convert_spart_d_mapper(void* restrict temp, int N, + void* restrict extra_data) { + + const struct io_props props = *((const struct io_props*)extra_data); + const struct spart* restrict sparts = props.sparts; + const struct engine* e = props.e; + const size_t dim = props.dimension; + + /* How far are we with this chunk? */ + double* restrict temp_d = (double*)temp; + const ptrdiff_t delta = (temp_d - props.start_temp_d) / dim; + + for (int i = 0; i < N; i++) + props.convert_spart_d(e, sparts + delta + i, &temp_d[i * dim]); +} + /** * @brief Copy the particle data into a temporary buffer ready for i/o. * @@ -599,6 +643,30 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, io_convert_gpart_d_mapper, temp_d, N, copySize, 0, (void*)&props); + } else if (props.convert_spart_f != NULL) { + + /* Prepare some parameters */ + float* temp_f = (float*)temp; + props.start_temp_f = (float*)temp; + props.e = e; + + /* Copy the whole thing into a buffer */ + threadpool_map((struct threadpool*)&e->threadpool, + io_convert_spart_f_mapper, temp_f, N, copySize, 0, + (void*)&props); + + } else if (props.convert_spart_d != NULL) { + + /* Prepare some parameters */ + double* temp_d = (double*)temp; + props.start_temp_d = (double*)temp; + props.e = e; + + /* Copy the whole thing into a buffer */ + threadpool_map((struct threadpool*)&e->threadpool, + io_convert_spart_d_mapper, temp_d, N, copySize, 0, + (void*)&props); + } else { error("Missing conversion function"); } @@ -630,9 +698,9 @@ void io_prepare_dm_gparts_mapper(void* restrict data, int Ndm, void* dummy) { /* Let's give all these gparts a negative id */ for (int i = 0; i < Ndm; ++i) { - /* 0 or negative ids are not allowed */ - if (gparts[i].id_or_neg_offset <= 0) - error("0 or negative ID for DM particle %i: ID=%lld", i, + /* Negative ids are not allowed */ + if (gparts[i].id_or_neg_offset < 0) + error("Negative ID for DM particle %i: ID=%lld", i, gparts[i].id_or_neg_offset); /* Set gpart type */ @@ -747,7 +815,7 @@ void io_duplicate_hydro_sparts_mapper(void* restrict data, int Nstars, gparts[i + Ndm].mass = sparts[i].mass; /* Set gpart type */ - gparts[i + Ndm].type = swift_type_star; + gparts[i + Ndm].type = swift_type_stars; /* Link the particles */ gparts[i + Ndm].id_or_neg_offset = -(long long)(offset + i); @@ -768,9 +836,10 @@ void io_duplicate_hydro_sparts_mapper(void* restrict data, int Nstars, * @param Nstars The number of stars particles read in. * @param Ndm The number of DM and gas particles read in. */ -void io_duplicate_star_gparts(struct threadpool* tp, struct spart* const sparts, - struct gpart* const gparts, size_t Nstars, - size_t Ndm) { +void io_duplicate_stars_gparts(struct threadpool* tp, + struct spart* const sparts, + struct gpart* const gparts, size_t Nstars, + size_t Ndm) { struct duplication_data data; data.gparts = gparts; @@ -782,35 +851,109 @@ void io_duplicate_star_gparts(struct threadpool* tp, struct spart* const sparts, } /** - * @brief Copy every DM #gpart into the dmparts array. + * @brief Copy every non-inhibited #part into the parts_written array. + * + * @param parts The array of #part containing all particles. + * @param xparts The array of #xpart containing all particles. + * @param parts_written The array of #part to fill with particles we want to + * write. + * @param xparts_written The array of #xpart to fill with particles we want to + * write. + * @param Nparts The total number of #part. + * @param Nparts_written The total number of #part to write. + */ +void io_collect_parts_to_write(const struct part* restrict parts, + const struct xpart* restrict xparts, + struct part* restrict parts_written, + struct xpart* restrict xparts_written, + const size_t Nparts, + const size_t Nparts_written) { + + size_t count = 0; + + /* Loop over all parts */ + for (size_t i = 0; i < Nparts; ++i) { + + /* And collect the ones that have not been removed */ + if (parts[i].time_bin != time_bin_inhibited) { + + parts_written[count] = parts[i]; + xparts_written[count] = xparts[i]; + count++; + } + } + + /* Check that everything is fine */ + if (count != Nparts_written) + error("Collected the wrong number of particles (%zu vs. %zu expected)", + count, Nparts_written); +} + +/** + * @brief Copy every non-inhibited #spart into the sparts_written array. + * + * @param sparts The array of #spart containing all particles. + * @param sparts_written The array of #spart to fill with particles we want to + * write. + * @param Nsparts The total number of #part. + * @param Nsparts_written The total number of #part to write. + */ +void io_collect_sparts_to_write(const struct spart* restrict sparts, + struct spart* restrict sparts_written, + const size_t Nsparts, + const size_t Nsparts_written) { + + size_t count = 0; + + /* Loop over all parts */ + for (size_t i = 0; i < Nsparts; ++i) { + + /* And collect the ones that have not been removed */ + if (sparts[i].time_bin != time_bin_inhibited) { + + sparts_written[count] = sparts[i]; + count++; + } + } + + /* Check that everything is fine */ + if (count != Nsparts_written) + error("Collected the wrong number of s-particles (%zu vs. %zu expected)", + count, Nsparts_written); +} + +/** + * @brief Copy every non-inhibited DM #gpart into the gparts_written array. * * @param gparts The array of #gpart containing all particles. - * @param Ntot The number of #gpart. - * @param dmparts The array of #gpart containg DM particles to be filled. - * @param Ndm The number of DM particles. + * @param gparts_written The array of #gpart to fill with particles we want to + * write. + * @param Ngparts The total number of #part. + * @param Ngparts_written The total number of #part to write. */ -void io_collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, - struct gpart* const dmparts, size_t Ndm) { +void io_collect_gparts_to_write(const struct gpart* restrict gparts, + struct gpart* restrict gparts_written, + const size_t Ngparts, + const size_t Ngparts_written) { size_t count = 0; - /* Loop over all gparts */ - for (size_t i = 0; i < Ntot; ++i) { + /* Loop over all parts */ + for (size_t i = 0; i < Ngparts; ++i) { - /* message("i=%zd count=%zd id=%lld part=%p", i, count, gparts[i].id, - * gparts[i].part); */ + /* And collect the ones that have not been removed */ + if ((gparts[i].time_bin != time_bin_inhibited) && + (gparts[i].type == swift_type_dark_matter)) { - /* And collect the DM ones */ - if (gparts[i].type == swift_type_dark_matter) { - dmparts[count] = gparts[i]; + gparts_written[count] = gparts[i]; count++; } } /* Check that everything is fine */ - if (count != Ndm) - error("Collected the wrong number of dm particles (%zu vs. %zu expected)", - count, Ndm); + if (count != Ngparts_written) + error("Collected the wrong number of s-particles (%zu vs. %zu expected)", + count, Ngparts_written); } /** @@ -853,8 +996,8 @@ void io_check_output_fields(const struct swift_params* params, darkmatter_write_particles(&gp, list, &num_fields); break; - case swift_type_star: - star_write_particles(&sp, list, &num_fields); + case swift_type_stars: + stars_write_particles(&sp, list, &num_fields); break; default: @@ -939,8 +1082,8 @@ void io_write_output_field_parameter(const char* filename) { darkmatter_write_particles(NULL, list, &num_fields); break; - case swift_type_star: - star_write_particles(NULL, list, &num_fields); + case swift_type_stars: + stars_write_particles(NULL, list, &num_fields); break; default: diff --git a/src/common_io.h b/src/common_io.h index 152b40a8d7c931b3398f4f04d3a61e9cf7f1836c..016c5138e18ae8636834c35d659e07d8fcd46e36 100644 --- a/src/common_io.h +++ b/src/common_io.h @@ -35,6 +35,7 @@ struct part; struct gpart; struct spart; +struct xpart; struct io_props; struct engine; struct threadpool; @@ -56,12 +57,6 @@ enum IO_DATA_TYPE { CHAR }; -/** - * @brief The different formats for when to run structure finding. - * - */ -enum IO_STF_OUTPUT_FORMAT { STEPS = 0, TIME }; - #if defined(HAVE_HDF5) hid_t io_hdf5_type(enum IO_DATA_TYPE type); @@ -97,16 +92,29 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, size_t io_sizeof_type(enum IO_DATA_TYPE type); int io_is_double_precision(enum IO_DATA_TYPE type); -void io_collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, - struct gpart* const dmparts, size_t Ndm); +void io_collect_parts_to_write(const struct part* restrict parts, + const struct xpart* restrict xparts, + struct part* restrict parts_written, + struct xpart* restrict xparts_written, + const size_t Nparts, + const size_t Nparts_written); +void io_collect_sparts_to_write(const struct spart* restrict sparts, + struct spart* restrict sparts_written, + const size_t Nsparts, + const size_t Nsparts_written); +void io_collect_gparts_to_write(const struct gpart* restrict gparts, + struct gpart* restrict gparts_written, + const size_t Ngparts, + const size_t Ngparts_written); void io_prepare_dm_gparts(struct threadpool* tp, struct gpart* const gparts, size_t Ndm); void io_duplicate_hydro_gparts(struct threadpool* tp, struct part* const parts, struct gpart* const gparts, size_t Ngas, size_t Ndm); -void io_duplicate_star_gparts(struct threadpool* tp, struct spart* const sparts, - struct gpart* const gparts, size_t Nstars, - size_t Ndm); +void io_duplicate_stars_gparts(struct threadpool* tp, + struct spart* const sparts, + struct gpart* const gparts, size_t Nstars, + size_t Ndm); void io_check_output_fields(const struct swift_params* params, const long long N_total[3]); diff --git a/src/const.h b/src/const.h index 6c5b5299c08efb7935b046ecfd0b3d67b7dc4c7a..e417b8ca3827ef87396706c56df36bb9bd3aed75 100644 --- a/src/const.h +++ b/src/const.h @@ -21,13 +21,10 @@ #define SWIFT_CONST_H /* SPH Viscosity constants. */ -#define const_viscosity_alpha 0.8f -#define const_viscosity_alpha_min \ - 0.1f /* Values taken from (Price,2004), not used in legacy gadget mode */ -#define const_viscosity_alpha_max \ - 2.0f /* Values taken from (Price,2004), not used in legacy gadget mode */ -#define const_viscosity_length \ - 0.1f /* Values taken from (Price,2004), not used in legacy gadget mode */ +/* Cosmology default beta=3.0. Planetary default beta=4.0 + * Alpha can be set in the parameter file. + * Beta is defined as in e.g. Price (2010) Eqn (103) */ +#define const_viscosity_beta 3.0f /* SPH Thermal conductivity constants. */ #define const_conductivity_alpha \ diff --git a/src/cooling.c b/src/cooling.c index 154b859f74402d9e9a8adf1fb6c796b5195b8cd1..376373ad80e1d784f183eecafbe51d20a80b3159 100644 --- a/src/cooling.c +++ b/src/cooling.c @@ -72,9 +72,12 @@ void cooling_struct_dump(const struct cooling_function_data* cooling, * * @param cooling the struct * @param stream the file stream + * @param cosmo #cosmology structure */ -void cooling_struct_restore(const struct cooling_function_data* cooling, - FILE* stream) { +void cooling_struct_restore(struct cooling_function_data* cooling, FILE* stream, + const struct cosmology* cosmo) { restart_read_blocks((void*)cooling, sizeof(struct cooling_function_data), 1, stream, NULL, "cooling function"); + + cooling_restore_tables(cooling, cosmo); } diff --git a/src/cooling.h b/src/cooling.h index 0fb04b9e484d989e746a254fc1934dc20033fb09..c1a78e256fdd77fcb1f5cde074f843bd16d412ec 100644 --- a/src/cooling.h +++ b/src/cooling.h @@ -34,6 +34,8 @@ #include "./cooling/const_du/cooling.h" #elif defined(COOLING_CONST_LAMBDA) #include "./cooling/const_lambda/cooling.h" +#elif defined(COOLING_COMPTON) +#include "./cooling/Compton/cooling.h" #elif defined(COOLING_GRACKLE) #include "./cooling/grackle/cooling.h" #elif defined(COOLING_EAGLE) @@ -53,7 +55,7 @@ void cooling_print(const struct cooling_function_data* cooling); /* Dump/restore. */ void cooling_struct_dump(const struct cooling_function_data* cooling, FILE* stream); -void cooling_struct_restore(const struct cooling_function_data* cooling, - FILE* stream); +void cooling_struct_restore(struct cooling_function_data* cooling, FILE* stream, + const struct cosmology* cosmo); #endif /* SWIFT_COOLING_H */ diff --git a/src/cooling/Compton/cooling.h b/src/cooling/Compton/cooling.h new file mode 100644 index 0000000000000000000000000000000000000000..f440cd03455c07d2eeb64c37189aed36efe78e09 --- /dev/null +++ b/src/cooling/Compton/cooling.h @@ -0,0 +1,366 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_COOLING_COMPTON_H +#define SWIFT_COOLING_COMPTON_H + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <float.h> +#include <math.h> + +/* Local includes. */ +#include "const.h" +#include "error.h" +#include "hydro.h" +#include "parser.h" +#include "part.h" +#include "physical_constants.h" +#include "units.h" + +/** + * @brief Common operations performed on the cooling function at a + * given time-step or redshift. + * + * @param cosmo The current cosmological model. + * @param cooling The #cooling_function_data used in the run. + * @param restart_flag Are we calling this directly after a restart? + */ +INLINE static void cooling_update(const struct cosmology* cosmo, + struct cooling_function_data* cooling, + const int restart_flag) { + // Add content if required. +} + +/** + * @brief Compute the mean molecular weight as a function of temperature for + * primordial gas. + * + * @param T The temperature of the gas [K]. + * @param H_mass_fraction The hydrogen mass fraction of the gas. + * @param T_transition The temperature of the transition from HII to HI [K]. + */ +__attribute__((always_inline, const)) INLINE static double +mean_molecular_weight(const double T, const double H_mass_fraction, + const double T_transition) { + + if (T > T_transition) + return 4. / (8. - 5. * (1. - H_mass_fraction)); + else + return 4. / (1. + 3. * H_mass_fraction); +} + +/** + * @brief Compute the temperature for a given internal energy per unit mass + * assuming primordial gas. + * + * @param u_cgs The internal energy per unit mass of the gas [erg * g^-1]. + * @param H_mass_fraction The hydrogen mass fraction of the gas. + * @param T_transition The temperature of the transition from HII to HI [K]. + * @param m_H_cgs The mass of the Hydorgen atom [g]. + * @param k_B_cgs The Boltzmann constant in cgs units [erg * K^-1] + * @return The temperature of the gas [K] + */ +__attribute__((always_inline, const)) INLINE static double +temperature_from_internal_energy(const double u_cgs, + const double H_mass_fraction, + const double T_transition, + const double m_H_cgs, const double k_B_cgs) { + + const double T_over_mu = hydro_gamma_minus_one * u_cgs * m_H_cgs / k_B_cgs; + + const double mu_high = + mean_molecular_weight(T_transition + 1., H_mass_fraction, T_transition); + const double mu_low = + mean_molecular_weight(T_transition - 1., H_mass_fraction, T_transition); + + if (T_over_mu > (T_transition + 1.) / mu_high) + return T_over_mu * mu_high; + else if (T_over_mu < (T_transition - 1.) / mu_low) + return T_over_mu * mu_low; + else + return T_transition; +} + +/** + * @brief Calculates du/dt in CGS units for a particle. + * + * + * @param cosmo The current cosmological model. + * @param hydro_props The properties of the hydro scheme. + * @param cooling The #cooling_function_data used in the run. + * @param z The current redshift. + * @param u The current internal energy in internal units. + * @param p Pointer to the particle data. + * @return The change in energy per unit mass due to cooling for this particle + * in cgs units [erg * g^-1 * s^-1]. + */ +__attribute__((always_inline)) INLINE static double Compton_cooling_rate_cgs( + const struct cosmology* cosmo, const struct hydro_props* hydro_props, + const struct cooling_function_data* cooling, const double z, const double u, + const struct part* p) { + + /* Get particle density */ + const double rho = hydro_get_physical_density(p, cosmo); + const double rho_cgs = rho * cooling->conv_factor_density_to_cgs; + + /* Powers of (1 + z) */ + const double zp1 = z + 1.; + const double zp1p2 = zp1 * zp1; + const double zp1p4 = zp1p2 * zp1p2; /* (1 + z)^4 */ + + /* CMB temperature at this redshift */ + const double T_CMB = cooling->const_T_CMB_0 * zp1; + + /* Gas properties */ + const double H_mass_fraction = hydro_props->hydrogen_mass_fraction; + const double T_transition = hydro_props->hydrogen_ionization_temperature; + + /* Particle temperature */ + const double u_cgs = u * cooling->conv_factor_energy_to_cgs; + const double T = temperature_from_internal_energy(u_cgs, H_mass_fraction, + T_transition, 1., 1.); + // MATTHIEU: to do: get H mass in cgs and k_B in cgs. + + /* Electron abundance */ + double electron_abundance = 0.; // MATTHIEU: To do: compute X_e + + /* Temperature difference with the CMB */ + const double delta_T = T - T_CMB; + + /* Electron density */ + const double electron_density_cgs = + rho_cgs * electron_abundance * cooling->proton_mass_cgs_inv; + + /* Compton formula */ + return cooling->const_Compton_rate_cgs * delta_T * zp1p4 * + electron_density_cgs / rho_cgs; +} + +/** + * @brief Apply the cooling function to a particle. + * + * @param phys_const The physical constants in internal units. + * @param us The internal system of units. + * @param cosmo The current cosmological model. + * @param hydro_props The properties of the hydro scheme. + * @param cooling The #cooling_function_data used in the run. + * @param p Pointer to the particle data. + * @param xp Pointer to the particle' extended data. + * @param dt The time-step of this particle. + * @param dt_therm The time-step operator used for thermal quantities. + */ +__attribute__((always_inline)) INLINE static void cooling_cool_part( + const struct phys_const* restrict phys_const, + const struct unit_system* restrict us, + const struct cosmology* restrict cosmo, + const struct hydro_props* hydro_props, + const struct cooling_function_data* restrict cooling, + struct part* restrict p, struct xpart* restrict xp, const float dt, + const float dt_therm) { + + /* Nothing to do here? */ + if (dt == 0.) return; + + /* Internal energy floor */ + const float u_floor = hydro_props->minimal_internal_energy; + + /* Current energy */ + const float u_old = hydro_get_physical_internal_energy(p, xp, cosmo); + + /* Current du_dt in physical coordinates (internal units) */ + const float hydro_du_dt = hydro_get_physical_internal_energy_dt(p, cosmo); + + /* Calculate cooling du_dt (in cgs units) */ + const double cooling_du_dt_cgs = + Compton_cooling_rate_cgs(cosmo, hydro_props, cooling, cosmo->z, u_old, p); + + /* Convert to internal units */ + float cooling_du_dt = + cooling_du_dt_cgs * cooling->conv_factor_energy_rate_from_cgs; + + /* Add cosmological term */ + cooling_du_dt *= cosmo->a * cosmo->a; + + float total_du_dt = hydro_du_dt + cooling_du_dt; + + /* We now need to check that we are not going to go below any of the limits */ + + /* First, check whether we may end up below the minimal energy after + * this step 1/2 kick + another 1/2 kick that could potentially be for + * a time-step twice as big. We hence check for 1.5 delta_t. */ + if (u_old + total_du_dt * 1.5 * dt_therm < u_floor) { + total_du_dt = (u_floor - u_old) / (1.5f * dt_therm); + } + + /* Second, check whether the energy used in the prediction could get negative. + * We need to check for the 1/2 dt kick followed by a full time-step drift + * that could potentially be for a time-step twice as big. We hence check + * for 2.5 delta_t but this time against 0 energy not the minimum */ + if (u_old + total_du_dt * 2.5 * dt_therm < 0.) { + total_du_dt = -u_old / ((2.5f + 0.0001f) * dt_therm); + } + + /* Update the internal energy time derivative */ + hydro_set_physical_internal_energy_dt(p, cosmo, total_du_dt); + + /* Store the radiated energy (assuming dt will not change) */ + xp->cooling_data.radiated_energy += + -hydro_get_mass(p) * (total_du_dt - hydro_du_dt) * dt_therm; +} + +/** + * @brief Computes the time-step due to cooling for this particle. + * + * We impose no time-step limit. + * + * @param cooling The #cooling_function_data used in the run. + * @param phys_const The physical constants in internal units. + * @param cosmo The current cosmological model. + * @param hydro_props The properties of the hydro scheme. + * @param us The internal system of units. + * @param p Pointer to the particle data. + * @param xp Pointer to the extended data of the particle. + */ +__attribute__((always_inline)) INLINE static float cooling_timestep( + const struct cooling_function_data* restrict cooling, + const struct phys_const* restrict phys_const, + const struct cosmology* restrict cosmo, + const struct unit_system* restrict us, + const struct hydro_props* hydro_props, const struct part* restrict p, + const struct xpart* restrict xp) { + + return FLT_MAX; +} + +/** + * @brief Sets the cooling properties of the (x-)particles to a valid start + * state. + * + * Nothing to do here. Just set the radiated energy counter to 0. + * + * @param phys_const The physical constants in internal units. + * @param cooling The properties of the cooling function. + * @param us The internal system of units. + * @param cosmo The current cosmological model. + * @param p Pointer to the particle data. + * @param xp Pointer to the extended particle data. + */ +__attribute__((always_inline)) INLINE static void cooling_first_init_part( + const struct phys_const* restrict phys_const, + const struct unit_system* restrict us, + const struct cosmology* restrict cosmo, + const struct cooling_function_data* restrict cooling, + const struct part* restrict p, struct xpart* restrict xp) { + + xp->cooling_data.radiated_energy = 0.f; +} + +/** + * @brief Returns the total radiated energy by this particle. + * + * @param xp The extended particle data + */ +__attribute__((always_inline)) INLINE static float cooling_get_radiated_energy( + const struct xpart* restrict xp) { + + return xp->cooling_data.radiated_energy; +} + +/** + * @brief Initialises the cooling properties. + * + * @param parameter_file The parsed parameter file. + * @param us The current internal system of units. + * @param phys_const The physical constants in internal units. + * @param cooling The cooling properties to initialize + */ +static INLINE void cooling_init_backend(struct swift_params* parameter_file, + const struct unit_system* us, + const struct phys_const* phys_const, + struct cooling_function_data* cooling) { + + /* Some useful conversion values */ + cooling->conv_factor_density_to_cgs = + units_cgs_conversion_factor(us, UNIT_CONV_DENSITY); + cooling->conv_factor_energy_to_cgs = + units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS); + cooling->conv_factor_energy_rate_from_cgs = + units_cgs_conversion_factor(us, UNIT_CONV_TIME) / + units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS); + + /* Useful constants */ + cooling->proton_mass_cgs_inv = + 1. / (phys_const->const_proton_mass * + units_cgs_conversion_factor(us, UNIT_CONV_MASS)); + + /* Temperature of the CMB in CGS */ + const double T_CMB_0 = phys_const->const_T_CMB_0 * + units_cgs_conversion_factor(us, UNIT_CONV_TEMPERATURE); + cooling->const_T_CMB_0 = T_CMB_0; /* [K] */ + + /* Compute the coefficient at the front of the Compton cooling expression */ + const double radiation_constant = + 4. * phys_const->const_stefan_boltzmann / phys_const->const_speed_light_c; + const double compton_coefficient = + 4. * radiation_constant * phys_const->const_thomson_cross_section * + phys_const->const_boltzmann_k / + (phys_const->const_electron_mass * phys_const->const_speed_light_c); + const float dimension_coefficient[5] = {1, 2, -3, 0, -5}; + + /* This should be ~1.0178085e-37 [g cm^2 s^-3 K^-5] */ + const double compton_coefficient_cgs = + compton_coefficient * + units_general_cgs_conversion_factor(us, dimension_coefficient); + + /* And now the Compton rate [g cm^2 s^-3 K^-1] == [erg s^-1 K^-1]*/ + cooling->const_Compton_rate_cgs = + compton_coefficient_cgs * T_CMB_0 * T_CMB_0 * T_CMB_0 * T_CMB_0; +} + +/** + * @brief Restore cooling tables (if applicable) after + * restart + * + * @param cooling the cooling_function_data structure + * @param cosmo cosmology structure + */ +static INLINE void cooling_restore_tables(struct cooling_function_data* cooling, + const struct cosmology* cosmo) {} + +/** + * @brief Prints the properties of the cooling model to stdout. + * + * @param cooling The properties of the cooling function. + */ +static INLINE void cooling_print_backend( + const struct cooling_function_data* cooling) { + + message("Cooling function is 'Compton cooling'."); +} + +/** + * @brief Clean-up the memory allocated for the cooling routines + * + * @param cooling the cooling data structure. + */ +static INLINE void cooling_clean(struct cooling_function_data* cooling) {} + +#endif /* SWIFT_COOLING_COMPTON_H */ diff --git a/src/cooling/Compton/cooling_io.h b/src/cooling/Compton/cooling_io.h new file mode 100644 index 0000000000000000000000000000000000000000..d020587c920f781450a5183954bc6c429e461512 --- /dev/null +++ b/src/cooling/Compton/cooling_io.h @@ -0,0 +1,62 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_COOLING_IO_COMPTON_H +#define SWIFT_COOLING_IO_COMPTON_H + +/* Config parameters. */ +#include "../config.h" + +/* Local includes */ +#include "io_properties.h" + +#ifdef HAVE_HDF5 + +/** + * @brief Writes the current model of cooling to the file + * @param h_grp The HDF5 group in which to write + * @param cooling the parameters of the cooling function. + */ +__attribute__((always_inline)) INLINE static void cooling_write_flavour( + hid_t h_grp, const struct cooling_function_data* cooling) { + + io_write_attribute_s(h_grp, "Cooling Model", "Compton cooling"); + io_write_attribute_d(h_grp, "Compton rate [erg s^-1 K^-1]", + cooling->const_Compton_rate_cgs); +} +#endif + +/** + * @brief Specifies which particle fields to write to a dataset + * + * Nothing to write for this scheme. + * + * @param xparts The extended particle array. + * @param list The list of i/o properties to write. + * @param cooling The #cooling_function_data + * + * @return Returns the number of fields to write. + */ +__attribute__((always_inline)) INLINE static int cooling_write_particles( + const struct xpart* xparts, struct io_props* list, + const struct cooling_function_data* cooling) { + + return 0; +} + +#endif /* SWIFT_COOLING_IO_COMPTON_H */ diff --git a/src/cooling/Compton/cooling_struct.h b/src/cooling/Compton/cooling_struct.h new file mode 100644 index 0000000000000000000000000000000000000000..1e09d492b1c8f6eb92f9d4f5faa00998dc2daec9 --- /dev/null +++ b/src/cooling/Compton/cooling_struct.h @@ -0,0 +1,56 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_COOLING_STRUCT_COMPTON_H +#define SWIFT_COOLING_STRUCT_COMPTON_H + +/** + * @brief Properties of the cooling function. + */ +struct cooling_function_data { + + /*! Compton rate in cgs [g cm^2 s^-3 K^-1] */ + double const_Compton_rate_cgs; + + /*! Temperature of the CMB at redshift 0 in cgs [K] */ + double const_T_CMB_0; + + /*! Conversion factor from internal units to cgs for density */ + double conv_factor_density_to_cgs; + + /*! Conversion factor from internal units to cgs for internal energy */ + double conv_factor_energy_to_cgs; + + /*! Conversion factor from internal units from cgs for internal energy + * derivative */ + double conv_factor_energy_rate_from_cgs; + + /*! Inverse of the proton mass in cgs units [g^-1] */ + double proton_mass_cgs_inv; +}; + +/** + * @brief Properties of the cooling stored in the particle data. + */ +struct cooling_xpart_data { + + /*! Energy radiated away by this particle since the start of the run */ + float radiated_energy; +}; + +#endif /* SWIFT_COOLING_STRUCT_COMPTON_H */ diff --git a/src/cooling/EAGLE/cooling.c b/src/cooling/EAGLE/cooling.c new file mode 100644 index 0000000000000000000000000000000000000000..8dcef4035f633954906a8539f0e92e1ea4e89ca0 --- /dev/null +++ b/src/cooling/EAGLE/cooling.c @@ -0,0 +1,786 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +/** + * @file src/cooling/EAGLE/cooling.c + * @brief EAGLE cooling functions + */ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <float.h> +#include <hdf5.h> +#include <math.h> +#include <time.h> + +/* Local includes. */ +#include "chemistry.h" +#include "cooling.h" +#include "cooling_rates.h" +#include "cooling_struct.h" +#include "cooling_tables.h" +#include "error.h" +#include "hydro.h" +#include "interpolate.h" +#include "io_properties.h" +#include "parser.h" +#include "part.h" +#include "physical_constants.h" +#include "units.h" + +/* Maximum number of iterations for newton + * and bisection integration schemes */ +static const int newton_max_iterations = 15; +static const int bisection_max_iterations = 150; + +/* Tolerances for termination criteria. */ +static const float explicit_tolerance = 0.05; +static const float newton_tolerance = 1.0e-4; +static const float bisection_tolerance = 1.0e-6; +static const float rounding_tolerance = 1.0e-4; +static const double bracket_factor = 1.0488088481701515; /* sqrt(1.1) */ +static const double newton_log_u_guess_cgs = 1.414213562e6; /* log10(2e12) */ + +/** + * @brief Find the index of the current redshift along the redshift dimension + * of the cooling tables. + * + * Since the redshift table is not evenly spaced, compare z with each + * table value in decreasing order starting with the previous redshift index + * + * The returned difference is expressed in units of the table separation. This + * means dx = (x - table[i]) / (table[i+1] - table[i]). It is always between + * 0 and 1. + * + * @param z Redshift we are searching for. + * @param z_index (return) Index of the redshift in the table. + * @param dz (return) Difference in redshift between z and table[z_index]. + * @param cooling #cooling_function_data structure containing redshift table. + */ +__attribute__((always_inline)) INLINE void get_redshift_index( + float z, int *z_index, float *dz, + struct cooling_function_data *restrict cooling) { + + /* before the earliest redshift or before hydrogen reionization, flag for + * collisional cooling */ + if (z > cooling->H_reion_z) { + *z_index = eagle_cooling_N_redshifts; + *dz = 0.0; + } + /* from reionization use the cooling tables */ + else if (z > cooling->Redshifts[eagle_cooling_N_redshifts - 1] && + z <= cooling->H_reion_z) { + *z_index = eagle_cooling_N_redshifts + 1; + *dz = 0.0; + } + /* at the end, just use the last value */ + else if (z <= cooling->Redshifts[0]) { + *z_index = 0; + *dz = 0.0; + } else { + + /* start at the previous index and search */ + for (int iz = cooling->previous_z_index; iz >= 0; iz--) { + if (z > cooling->Redshifts[iz]) { + + *z_index = iz; + cooling->previous_z_index = iz; + *dz = (z - cooling->Redshifts[iz]) / + (cooling->Redshifts[iz + 1] - cooling->Redshifts[iz]); + break; + } + } + } +} + +/** + * @brief Common operations performed on the cooling function at a + * given time-step or redshift. Predominantly used to read cooling tables + * above and below the current redshift, if not already read in. + * + * @param cosmo The current cosmological model. + * @param cooling The #cooling_function_data used in the run. + * @param restart_flag Flag indicating restarted run. + */ +void cooling_update(const struct cosmology *cosmo, + struct cooling_function_data *cooling, + const int restart_flag) { + + /* Current redshift */ + const float redshift = cosmo->z; + + /* Get index along the redshift index of the tables */ + int z_index = -1; + float dz = 0.f; + if (redshift > cooling->H_reion_z) { + z_index = -2; + } else if (redshift > cooling->Redshifts[eagle_cooling_N_redshifts - 1]) { + z_index = -1; + } else { + get_redshift_index(redshift, &z_index, &dz, cooling); + } + cooling->z_index = z_index; + cooling->dz = dz; + + eagle_check_cooling_tables(cooling, restart_flag); +} + +/** + * @brief Newton Raphson integration scheme to calculate particle cooling over + * timestep. This replaces bisection scheme used in EAGLE to minimize the + * number of array accesses. Integration defaults to bisection scheme (see + * function bisection_iter) if this function does not converge within a + * specified number of steps + * + * @param logu_init Initial guess for log(internal energy) + * @param u_ini Internal energy at beginning of hydro step + * @param n_h_i Particle hydrogen number density index + * @param d_n_h Particle hydrogen number density offset + * @param He_i Particle helium fraction index + * @param d_He Particle helium fraction offset + * @param He_reion_heat Heating due to helium reionization + * (only depends on redshift, so passed as parameter) + * @param p #part structure + * @param cosmo #cosmology structure + * @param cooling #cooling_function_data structure + * @param phys_const #phys_const data structure + * @param abundance_ratio Array of ratios of metal abundance to solar + * @param dt timestep + * @param bisection_flag Flag to identify if scheme failed to converge + */ +INLINE static float newton_iter( + float logu_init, double u_ini, int n_h_i, float d_n_h, int He_i, float d_He, + float He_reion_heat, struct part *restrict p, + const struct cosmology *restrict cosmo, + const struct cooling_function_data *restrict cooling, + const struct phys_const *restrict phys_const, + const float abundance_ratio[chemistry_element_count + 2], float dt, + int *bisection_flag) { + + double logu, logu_old; + double dLambdaNet_du = 0.0, LambdaNet; + + /* table bounds */ + const float log_table_bound_high = + (cooling->Therm[eagle_cooling_N_temperature - 1] - 0.05) / M_LOG10E; + const float log_table_bound_low = (cooling->Therm[0] + 0.05) / M_LOG10E; + + /* convert Hydrogen mass fraction in Hydrogen number density */ + const float XH = p->chemistry_data.metal_mass_fraction[chemistry_element_H]; + const double n_H = + hydro_get_physical_density(p, cosmo) * XH / phys_const->const_proton_mass; + const double n_H_cgs = n_H * cooling->number_density_to_cgs; + + /* compute ratefact = n_H * n_H / rho; Might lead to round-off error: + * replaced by equivalent expression below */ + const double ratefact_cgs = n_H_cgs * XH * cooling->inv_proton_mass_cgs; + + logu_old = logu_init; + logu = logu_old; + int i = 0; + + float LambdaNet_old = 0; + LambdaNet = 0; + do /* iterate to convergence */ + { + logu_old = logu; + LambdaNet_old = LambdaNet; + LambdaNet = + (He_reion_heat / (dt * ratefact_cgs)) + + eagle_cooling_rate(logu_old, cosmo->z, n_H_cgs, abundance_ratio, n_h_i, + d_n_h, He_i, d_He, cooling, &dLambdaNet_du); + + /* Newton iteration. For details on how the cooling equation is integrated + * see documentation in theory/Cooling/ */ + logu = logu_old - (1.0 - u_ini * exp(-logu_old) - + LambdaNet * ratefact_cgs * dt * exp(-logu_old)) / + (1.0 - dLambdaNet_du * ratefact_cgs * dt); + /* Check if first step passes over equilibrium solution, if it does adjust + * next guess */ + if (i == 1 && LambdaNet_old * LambdaNet < 0) logu = newton_log_u_guess_cgs; + + /* check whether iterations go within about 10% of the table bounds, + * if they do default to bisection method */ + if (logu > log_table_bound_high) { + i = newton_max_iterations; + break; + } else if (logu < log_table_bound_low) { + i = newton_max_iterations; + break; + } + + i++; + } while (fabs(logu - logu_old) > newton_tolerance && + i < newton_max_iterations); + if (i >= newton_max_iterations) { + /* flag to trigger bisection scheme */ + *bisection_flag = 1; + } + + return logu; +} + +/** + * @brief Bisection integration scheme + * + * @param u_ini_cgs Internal energy at beginning of hydro step in CGS. + * @param n_H_cgs Hydrogen number density in CGS. + * @param redshift Current redshift. + * @param n_h_i Particle hydrogen number density index. + * @param d_n_h Particle hydrogen number density offset. + * @param He_i Particle helium fraction index. + * @param d_He Particle helium fraction offset. + * @param Lambda_He_reion_cgs Cooling rate coming from He reionization. + * @param ratefact_cgs Multiplication factor to get a cooling rate. + * @param cooling #cooling_function_data structure. + * @param abundance_ratio Array of ratios of metal abundance to solar. + * @param dt_cgs timestep in CGS. + * @param ID ID of the particle (for debugging). + */ +INLINE static double bisection_iter( + const double u_ini_cgs, const double n_H_cgs, const double redshift, + int n_h_i, float d_n_h, int He_i, float d_He, double Lambda_He_reion_cgs, + double ratefact_cgs, const struct cooling_function_data *restrict cooling, + const float abundance_ratio[chemistry_element_count + 2], double dt_cgs, + long long ID) { + + /* Bracketing */ + double u_lower_cgs = u_ini_cgs; + double u_upper_cgs = u_ini_cgs; + + /*************************************/ + /* Let's get a first guess */ + /*************************************/ + + double LambdaNet_cgs = + Lambda_He_reion_cgs + eagle_cooling_rate(log(u_ini_cgs), redshift, + n_H_cgs, abundance_ratio, n_h_i, + d_n_h, He_i, d_He, cooling, + /*dLambdaNet_du=*/NULL); + + /*************************************/ + /* Let's try to bracket the solution */ + /*************************************/ + + if (LambdaNet_cgs < 0) { + + /* we're cooling! */ + u_lower_cgs /= bracket_factor; + u_upper_cgs *= bracket_factor; + + /* Compute a new rate */ + LambdaNet_cgs = + Lambda_He_reion_cgs + + eagle_cooling_rate(log(u_lower_cgs), redshift, n_H_cgs, abundance_ratio, + n_h_i, d_n_h, He_i, d_He, cooling, + /*dLambdaNet_du=*/NULL); + + int i = 0; + while (u_lower_cgs - u_ini_cgs - LambdaNet_cgs * ratefact_cgs * dt_cgs > + 0 && + i < bisection_max_iterations) { + + u_lower_cgs /= bracket_factor; + u_upper_cgs /= bracket_factor; + + /* Compute a new rate */ + LambdaNet_cgs = + Lambda_He_reion_cgs + + eagle_cooling_rate(log(u_lower_cgs), redshift, n_H_cgs, + abundance_ratio, n_h_i, d_n_h, He_i, d_He, cooling, + /*dLambdaNet_du=*/NULL); + i++; + } + + if (i >= bisection_max_iterations) { + error( + "particle %llu exceeded max iterations searching for bounds when " + "cooling", + ID); + } + } else { + + /* we are heating! */ + u_lower_cgs /= bracket_factor; + u_upper_cgs *= bracket_factor; + + /* Compute a new rate */ + LambdaNet_cgs = + Lambda_He_reion_cgs + + eagle_cooling_rate(log(u_upper_cgs), redshift, n_H_cgs, abundance_ratio, + n_h_i, d_n_h, He_i, d_He, cooling, + /*dLambdaNet_du=*/NULL); + + int i = 0; + while (u_upper_cgs - u_ini_cgs - LambdaNet_cgs * ratefact_cgs * dt_cgs < + 0 && + i < bisection_max_iterations) { + + u_lower_cgs *= bracket_factor; + u_upper_cgs *= bracket_factor; + + /* Compute a new rate */ + LambdaNet_cgs = + Lambda_He_reion_cgs + + eagle_cooling_rate(log(u_upper_cgs), redshift, n_H_cgs, + abundance_ratio, n_h_i, d_n_h, He_i, d_He, cooling, + /*dLambdaNet_du=*/NULL); + i++; + } + + if (i >= bisection_max_iterations) { + error( + "particle %llu exceeded max iterations searching for bounds when " + "heating", + ID); + } + } + + /********************************************/ + /* We now have an upper and lower bound. */ + /* Let's iterate by reducing the bracketing */ + /********************************************/ + + /* bisection iteration */ + int i = 0; + double u_next_cgs; + + do { + + /* New guess */ + u_next_cgs = 0.5 * (u_lower_cgs + u_upper_cgs); + + /* New rate */ + LambdaNet_cgs = + Lambda_He_reion_cgs + + eagle_cooling_rate(log(u_next_cgs), redshift, n_H_cgs, abundance_ratio, + n_h_i, d_n_h, He_i, d_He, cooling, + /*dLambdaNet_du=*/NULL); + + /* Where do we go next? */ + if (u_next_cgs - u_ini_cgs - LambdaNet_cgs * ratefact_cgs * dt_cgs > 0.0) { + u_upper_cgs = u_next_cgs; + } else { + u_lower_cgs = u_next_cgs; + } + + i++; + } while (fabs(u_upper_cgs - u_lower_cgs) / u_next_cgs > bisection_tolerance && + i < bisection_max_iterations); + + if (i >= bisection_max_iterations) + error("Particle id %llu failed to converge", ID); + + return u_upper_cgs; +} + +/** + * @brief Apply the cooling function to a particle. + * + * We want to compute u_new such that u_new = u_old + dt * du/dt(u_new, X), + * where X stands for the metallicity, density and redshift. These are + * kept constant. + * + * We first compute du/dt(u_old). If dt * du/dt(u_old) is small enough, we + * use an explicit integration and use this as our solution. + * + * Otherwise, we try to find a solution to the implicit time-integration + * problem. This leads to the root-finding problem: + * + * f(u_new) = u_new - u_old - dt * du/dt(u_new, X) = 0 + * + * We first try a few Newton-Raphson iteration if it does not converge, we + * revert to a bisection scheme. + * + * This is done by first bracketing the solution and then iterating + * towards the solution by reducing the window down to a certain tolerance. + * Note there is always at least one solution since + * f(+inf) is < 0 and f(-inf) is > 0. + * + * @param phys_const The physical constants in internal units. + * @param us The internal system of units. + * @param cosmo The current cosmological model. + * @param hydro_properties the hydro_props struct + * @param cooling The #cooling_function_data used in the run. + * @param p Pointer to the particle data. + * @param xp Pointer to the extended particle data. + * @param dt The cooling time-step of this particle. + * @param dt_therm The hydro time-step of this particle. + */ +void cooling_cool_part(const struct phys_const *restrict phys_const, + const struct unit_system *restrict us, + const struct cosmology *restrict cosmo, + const struct hydro_props *restrict hydro_properties, + const struct cooling_function_data *restrict cooling, + struct part *restrict p, struct xpart *restrict xp, + const float dt, const float dt_therm) { + + /* No cooling happens over zero time */ + if (dt == 0.) return; + + /* Get internal energy at the last kick step */ + const float u_start = hydro_get_physical_internal_energy(p, xp, cosmo); + + /* Get the change in internal energy due to hydro forces */ + const float hydro_du_dt = hydro_get_physical_internal_energy_dt(p, cosmo); + + /* Get internal energy at the end of the next kick step (assuming dt does not + * increase) */ + double u_0 = (u_start + hydro_du_dt * dt_therm); + + /* Check for minimal energy */ + u_0 = max(u_0, hydro_properties->minimal_internal_energy); + + /* Convert to CGS units */ + const double u_start_cgs = u_start * cooling->internal_energy_to_cgs; + const double u_0_cgs = u_0 * cooling->internal_energy_to_cgs; + const double dt_cgs = dt * units_cgs_conversion_factor(us, UNIT_CONV_TIME); + + /* Get this particle's abundance ratios */ + float abundance_ratio[chemistry_element_count + 2]; + abundance_ratio_to_solar(p, cooling, abundance_ratio); + + /* Get the H and He mass fractions */ + const float XH = p->chemistry_data.metal_mass_fraction[chemistry_element_H]; + const float HeFrac = + p->chemistry_data.metal_mass_fraction[chemistry_element_He] / + (XH + p->chemistry_data.metal_mass_fraction[chemistry_element_He]); + + /* convert Hydrogen mass fraction into Hydrogen number density */ + const double n_H = + hydro_get_physical_density(p, cosmo) * XH / phys_const->const_proton_mass; + const double n_H_cgs = n_H * cooling->number_density_to_cgs; + + /* ratefact = n_H * n_H / rho; Might lead to round-off error: replaced by + * equivalent expression below */ + const double ratefact_cgs = n_H_cgs * (XH * cooling->inv_proton_mass_cgs); + + /* Get helium and hydrogen reheating term */ + const double Helium_reion_heat_cgs = eagle_helium_reionization_extraheat( + cooling->z_index, -dt * cosmo->H * cosmo->a_inv, cooling); + + /* Convert this into a rate */ + const double Lambda_He_reion_cgs = + Helium_reion_heat_cgs / (dt_cgs * ratefact_cgs); + + /* compute hydrogen number density and helium fraction table indices and + * offsets (These are fixed for of u, so no need to recompute them) */ + int He_i, n_h_i; + float d_He, d_n_h; + get_index_1d(cooling->HeFrac, eagle_cooling_N_He_frac, HeFrac, &He_i, &d_He); + get_index_1d(cooling->nH, eagle_cooling_N_density, log10(n_H_cgs), &n_h_i, + &d_n_h); + + /* Let's compute the internal energy at the end of the step */ + double u_final_cgs; + + /* First try an explicit integration (note we ignore the derivative) */ + const double LambdaNet_cgs = + Lambda_He_reion_cgs + eagle_cooling_rate(log(u_0_cgs), cosmo->z, n_H_cgs, + abundance_ratio, n_h_i, d_n_h, + He_i, d_He, cooling, + /*dLambdaNet_du=*/NULL); + + /* if cooling rate is small, take the explicit solution */ + if (fabs(ratefact_cgs * LambdaNet_cgs * dt_cgs) < + explicit_tolerance * u_0_cgs) { + + u_final_cgs = u_0_cgs + ratefact_cgs * LambdaNet_cgs * dt_cgs; + + } else { + + int bisection_flag = 1; + +#ifdef TO_BE_DONE + if (cooling->newton_flag) { + /* Ok, try a Newton-Raphson scheme instead */ + log_u_final_cgs = newton_iter( + log(u_0_cgs), u_0_cgs, n_h_i, d_n_h, He_i, d_He, LambdaTune, p, cosmo, + cooling, phys_const, abundance_ratio, dt_cgs, &bisection_flag); + + /* Check if newton scheme sent us to a higher energy despite being in + a + * cooling regime If it did try newton scheme with a better guess. + (Guess + * internal energy near equilibrium solution). */ + if (LambdaNet < 0 && log_u_final_cgs > log(u_0_cgs)) { + bisection_flag = 0; + log_u_final_cgs = + newton_iter(newton_log_u_guess_cgs, u_0_cgs, n_h_i, d_n_h, He_i, + d_He, LambdaTune, p, cosmo, cooling, phys_const, + abundance_ratio, dt_cgs, &bisection_flag); + } + } +#endif + + /* Alright, all else failed, let's bisect */ + if (bisection_flag || !(cooling->newton_flag)) { + u_final_cgs = + bisection_iter(u_0_cgs, n_H_cgs, cosmo->z, n_h_i, d_n_h, He_i, d_He, + Lambda_He_reion_cgs, ratefact_cgs, cooling, + abundance_ratio, dt_cgs, p->id); + } + } + + /* Expected change in energy over the next kick step + (assuming no change in dt) */ + const double delta_u_cgs = u_final_cgs - u_start_cgs; + + /* Convert back to internal units */ + double delta_u = delta_u_cgs * cooling->internal_energy_from_cgs; + + /* We now need to check that we are not going to go below any of the limits */ + + /* First, check whether we may end up below the minimal energy after + * this step 1/2 kick + another 1/2 kick that could potentially be for + * a time-step twice as big. We hence check for 1.5 delta_u. */ + if (u_start + 1.5 * delta_u < hydro_properties->minimal_internal_energy) { + delta_u = (hydro_properties->minimal_internal_energy - u_start) / 1.5; + } + + /* Second, check whether the energy used in the prediction could get negative. + * We need to check for the 1/2 dt kick followed by a full time-step drift + * that could potentially be for a time-step twice as big. We hence check + * for 2.5 delta_u but this time against 0 energy not the minimum. + * To avoid numerical rounding bringing us below 0., we add a tiny tolerance. + */ + if (u_start + 2.5 * delta_u < 0.) { + delta_u = -u_start / (2.5 + rounding_tolerance); + } + + /* Turn this into a rate of change (including cosmology term) */ + const float cooling_du_dt = delta_u / dt_therm; + + /* Update the internal energy time derivative */ + hydro_set_physical_internal_energy_dt(p, cosmo, cooling_du_dt); + + /* Store the radiated energy */ + xp->cooling_data.radiated_energy -= hydro_get_mass(p) * cooling_du_dt * dt; +} + +/** + * @brief Computes the cooling time-step. + * + * The time-step is not set by the properties of cooling. + * + * @param cooling The #cooling_function_data used in the run. + * @param phys_const #phys_const data struct. + * @param us The internal system of units. + * @param cosmo #cosmology struct. + * @param hydro_props the properties of the hydro scheme. + * @param p #part data. + * @param xp extended particle data. + */ +__attribute__((always_inline)) INLINE float cooling_timestep( + const struct cooling_function_data *restrict cooling, + const struct phys_const *restrict phys_const, + const struct cosmology *restrict cosmo, + const struct unit_system *restrict us, + const struct hydro_props *hydro_props, const struct part *restrict p, + const struct xpart *restrict xp) { + + return FLT_MAX; +} + +/** + * @brief Sets the cooling properties of the (x-)particles to a valid start + * state. + * + * @param phys_const #phys_const data structure. + * @param us The internal system of units. + * @param cosmo #cosmology data structure. + * @param cooling #cooling_function_data struct. + * @param p #part data. + * @param xp Pointer to the #xpart data. + */ +__attribute__((always_inline)) INLINE void cooling_first_init_part( + const struct phys_const *restrict phys_const, + const struct unit_system *restrict us, + const struct cosmology *restrict cosmo, + const struct cooling_function_data *restrict cooling, + const struct part *restrict p, struct xpart *restrict xp) { + + xp->cooling_data.radiated_energy = 0.f; +} + +/** + * @brief Returns the total radiated energy by this particle. + * + * @param xp #xpart data struct + */ +__attribute__((always_inline)) INLINE float cooling_get_radiated_energy( + const struct xpart *restrict xp) { + + return xp->cooling_data.radiated_energy; +} + +/** + * @brief Initialises properties stored in the cooling_function_data struct + * + * @param parameter_file The parsed parameter file + * @param us Internal system of units data structure + * @param phys_const #phys_const data structure + * @param cooling #cooling_function_data struct to initialize + */ +void cooling_init_backend(struct swift_params *parameter_file, + const struct unit_system *us, + const struct phys_const *phys_const, + struct cooling_function_data *cooling) { + + /* read some parameters */ + parser_get_param_string(parameter_file, "EagleCooling:filename", + cooling->cooling_table_path); + cooling->H_reion_z = parser_get_param_float( + parameter_file, "EagleCooling:reionisation_redshift"); + cooling->calcium_over_silicon_ratio = parser_get_param_float( + parameter_file, "EAGLEChemistry:CalciumOverSilicon"); + cooling->sulphur_over_silicon_ratio = parser_get_param_float( + parameter_file, "EAGLEChemistry:SulphurOverSilicon"); + cooling->He_reion_z_centre = + parser_get_param_float(parameter_file, "EagleCooling:He_reion_z_centre"); + cooling->He_reion_z_sigma = + parser_get_param_float(parameter_file, "EagleCooling:He_reion_z_sigma"); + cooling->He_reion_heat_cgs = + parser_get_param_float(parameter_file, "EagleCooling:He_reion_ev_pH"); + + /* convert to cgs */ + cooling->He_reion_heat_cgs *= + phys_const->const_electron_volt * + units_cgs_conversion_factor(us, UNIT_CONV_ENERGY); + + /* read in cooling table header */ + get_cooling_redshifts(cooling); + char fname[eagle_table_path_name_length + 12]; + sprintf(fname, "%sz_0.000.hdf5", cooling->cooling_table_path); + read_cooling_header(fname, cooling); + + /* Allocate space for cooling tables */ + allocate_cooling_tables(cooling); + + /* compute conversion factors */ + cooling->internal_energy_to_cgs = + units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS); + cooling->internal_energy_from_cgs = 1. / cooling->internal_energy_to_cgs; + cooling->number_density_to_cgs = + units_cgs_conversion_factor(us, UNIT_CONV_NUMBER_DENSITY); + + /* Store some constants in CGS units */ + const double proton_mass_cgs = + phys_const->const_proton_mass * + units_cgs_conversion_factor(us, UNIT_CONV_MASS); + cooling->inv_proton_mass_cgs = 1. / proton_mass_cgs; + cooling->T_CMB_0 = phys_const->const_T_CMB_0 * + units_cgs_conversion_factor(us, UNIT_CONV_TEMPERATURE); + + /* Compute the coefficient at the front of the Compton cooling expression */ + const double radiation_constant = + 4. * phys_const->const_stefan_boltzmann / phys_const->const_speed_light_c; + const double compton_coefficient = + 4. * radiation_constant * phys_const->const_thomson_cross_section * + phys_const->const_boltzmann_k / + (phys_const->const_electron_mass * phys_const->const_speed_light_c); + const float dimension_coefficient[5] = {1, 2, -3, 0, -5}; + + /* This should be ~1.0178085e-37 g cm^2 s^-3 K^-5 */ + const double compton_coefficient_cgs = + compton_coefficient * + units_general_cgs_conversion_factor(us, dimension_coefficient); + +#ifdef SWIFT_DEBUG_CHECKS + const double expected_compton_coefficient_cgs = 1.0178085e-37; + if (fabs(compton_coefficient_cgs - expected_compton_coefficient_cgs) / + expected_compton_coefficient_cgs > + 0.01) + error("compton coefficient incorrect."); +#endif + + /* And now the Compton rate */ + cooling->compton_rate_cgs = compton_coefficient_cgs * cooling->T_CMB_0 * + cooling->T_CMB_0 * cooling->T_CMB_0 * + cooling->T_CMB_0; + + /* set low_z_index to -10 to indicate we haven't read any tables yet */ + cooling->low_z_index = -10; + /* set previous_z_index and to last value of redshift table*/ + cooling->previous_z_index = eagle_cooling_N_redshifts - 2; + + /* Check if we are running with the newton scheme */ + cooling->newton_flag = parser_get_opt_param_int( + parameter_file, "EagleCooling:newton_integration", 0); +} + +/** + * @brief Restore cooling tables (if applicable) after + * restart + * + * @param cooling the #cooling_function_data structure + * @param cosmo #cosmology structure + */ +void cooling_restore_tables(struct cooling_function_data *cooling, + const struct cosmology *cosmo) { + + /* Read redshifts */ + get_cooling_redshifts(cooling); + + /* Read cooling header */ + char fname[eagle_table_path_name_length + 12]; + sprintf(fname, "%sz_0.000.hdf5", cooling->cooling_table_path); + read_cooling_header(fname, cooling); + + /* Read relevant cooling tables. + * Third variable in cooling_update flag to mark restart*/ + allocate_cooling_tables(cooling); + cooling_update(cosmo, cooling, /*restart=*/1); +} + +/** + * @brief Prints the properties of the cooling model to stdout. + * + * @param cooling #cooling_function_data struct. + */ +void cooling_print_backend(const struct cooling_function_data *cooling) { + + message("Cooling function is 'EAGLE'."); +} + +/** + * @brief Clean-up the memory allocated for the cooling routines + * + * We simply free all the arrays. + * + * @param cooling the cooling data structure. + */ +void cooling_clean(struct cooling_function_data *cooling) { + + /* Free the side arrays */ + free(cooling->Redshifts); + free(cooling->nH); + free(cooling->Temp); + free(cooling->HeFrac); + free(cooling->Therm); + free(cooling->SolarAbundances); + + /* Free the tables */ + free(cooling->table.metal_heating); + free(cooling->table.electron_abundance); + free(cooling->table.temperature); + free(cooling->table.H_plus_He_heating); + free(cooling->table.H_plus_He_electron_abundance); +} diff --git a/src/cooling/EAGLE/cooling.h b/src/cooling/EAGLE/cooling.h index 1c56572856a88d763d5ef7ca77e14d378891a264..5685692c379c508bc8d6a4ab8d22bb89ba1f4a17 100644 --- a/src/cooling/EAGLE/cooling.h +++ b/src/cooling/EAGLE/cooling.h @@ -21,112 +21,56 @@ /** * @file src/cooling/EAGLE/cooling.h - * @brief EAGLE cooling function + * @brief EAGLE cooling function declarations */ -/* Config parameters. */ -#include "../config.h" - -/* Some standard headers. */ -#include <float.h> -#include <math.h> - /* Local includes. */ -#include "error.h" -#include "hydro.h" -#include "parser.h" +#include "cooling_struct.h" +#include "cosmology.h" +#include "hydro_properties.h" #include "part.h" #include "physical_constants.h" #include "units.h" -/** - * @brief Apply the cooling function to a particle. - * - * @param phys_const The physical constants in internal units. - * @param us The internal system of units. - * @param cosmo The current cosmological model. - * @param cooling The #cooling_function_data used in the run. - * @param p Pointer to the particle data. - * @param xp Pointer to the extended particle data. - * @param dt The time-step of this particle. - */ -__attribute__((always_inline)) INLINE static void cooling_cool_part( - const struct phys_const* restrict phys_const, - const struct unit_system* restrict us, - const struct cosmology* restrict cosmo, - const struct cooling_function_data* restrict cooling, - struct part* restrict p, struct xpart* restrict xp, float dt) {} +void cooling_update(const struct cosmology *cosmo, + struct cooling_function_data *cooling, + const int restart_flag); -/** - * @brief Computes the cooling time-step. - * - * @param cooling The #cooling_function_data used in the run. - * @param phys_const The physical constants in internal units. - * @param us The internal system of units. - * @param cosmo The current cosmological model. - * @param p Pointer to the particle data. - */ -__attribute__((always_inline)) INLINE static float cooling_timestep( - const struct cooling_function_data* restrict cooling, - const struct phys_const* restrict phys_const, - const struct cosmology* restrict cosmo, - const struct unit_system* restrict us, const struct part* restrict p) { +void cooling_cool_part(const struct phys_const *restrict phys_const, + const struct unit_system *restrict us, + const struct cosmology *restrict cosmo, + const struct hydro_props *restrict hydro_properties, + const struct cooling_function_data *restrict cooling, + struct part *restrict p, struct xpart *restrict xp, + const float dt, const float dt_therm); - return FLT_MAX; -} +float cooling_timestep(const struct cooling_function_data *restrict cooling, + const struct phys_const *restrict phys_const, + const struct cosmology *restrict cosmo, + const struct unit_system *restrict us, + const struct hydro_props *hydro_props, + const struct part *restrict p, + const struct xpart *restrict xp); -/** - * @brief Sets the cooling properties of the (x-)particles to a valid start - * state. - * - * @param phys_const The physical constants in internal units. - * @param us The internal system of units. - * @param cosmo The current cosmological model. - * @param cooling The properties of the cooling function. - * @param p Pointer to the particle data. - * @param xp Pointer to the extended particle data. - */ -__attribute__((always_inline)) INLINE static void cooling_first_init_part( - const struct phys_const* restrict phys_const, - const struct unit_system* restrict us, - const struct cosmology* restrict cosmo, - const struct cooling_function_data* restrict cooling, - const struct part* restrict p, struct xpart* restrict xp) {} +void cooling_first_init_part( + const struct phys_const *restrict phys_const, + const struct unit_system *restrict us, + const struct cosmology *restrict cosmo, + const struct cooling_function_data *restrict cooling, + const struct part *restrict p, struct xpart *restrict xp); -/** - * @brief Returns the total radiated energy by this particle. - * - * @param xp The extended particle data - */ -__attribute__((always_inline)) INLINE static float cooling_get_radiated_energy( - const struct xpart* restrict xp) { +float cooling_get_radiated_energy(const struct xpart *restrict xp); - return 0.f; -} +void cooling_init_backend(struct swift_params *parameter_file, + const struct unit_system *us, + const struct phys_const *phys_const, + struct cooling_function_data *cooling); -/** - * @brief Initialises the cooling properties. - * - * @param parameter_file The parsed parameter file. - * @param us The current internal system of units. - * @param phys_const The physical constants in internal units. - * @param cooling The cooling properties to initialize - */ -static INLINE void cooling_init_backend(struct swift_params* parameter_file, - const struct unit_system* us, - const struct phys_const* phys_const, - struct cooling_function_data* cooling) { -} +void cooling_restore_tables(struct cooling_function_data *cooling, + const struct cosmology *cosmo); -/** - * @brief Prints the properties of the cooling model to stdout. - * - * @param cooling The properties of the cooling function. - */ -static INLINE void cooling_print_backend( - const struct cooling_function_data* cooling) { +void cooling_print_backend(const struct cooling_function_data *cooling); - message("Cooling function is 'EAGLE'."); -} +void cooling_clean(struct cooling_function_data *data); #endif /* SWIFT_COOLING_EAGLE_H */ diff --git a/src/cooling/EAGLE/cooling_io.h b/src/cooling/EAGLE/cooling_io.h index f98539605de5c231a821758e9bd8fdb89bd19a59..48c845c254b41f02d8b4ea39ae43a990b0436ac8 100644 --- a/src/cooling/EAGLE/cooling_io.h +++ b/src/cooling/EAGLE/cooling_io.h @@ -28,13 +28,15 @@ #ifdef HAVE_HDF5 /** - * @brief Writes the current model of SPH to the file - * @param h_grpsph The HDF5 group in which to write + * @brief Writes the current model of cooling to the file. + * + * @param h_grp The HDF5 group in which to write + * @param cooling The #cooling_function_data */ __attribute__((always_inline)) INLINE static void cooling_write_flavour( - hid_t h_grpsph) { + hid_t h_grp, const struct cooling_function_data* cooling) { - io_write_attribute_s(h_grpsph, "Cooling Model", "EAGLE"); + io_write_attribute_s(h_grp, "Cooling Model", "EAGLE"); } #endif diff --git a/src/cooling/EAGLE/cooling_rates.h b/src/cooling/EAGLE/cooling_rates.h new file mode 100644 index 0000000000000000000000000000000000000000..621f7c7b0781ba9806b4b5922843947079941fee --- /dev/null +++ b/src/cooling/EAGLE/cooling_rates.h @@ -0,0 +1,722 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#ifndef SWIFT_EAGLE_COOLING_RATES_H +#define SWIFT_EAGLE_COOLING_RATES_H + +#include "../config.h" + +/* Local includes. */ +#include "cooling_tables.h" +#include "interpolate.h" + +/** + * @brief Calculate ratio of particle element abundances + * to solar abundance. + + * Multiple if statements are necessary because order of elements + * in tables is different from chemistry_element enum. + * Tables: H, He, C, N, O, Ne, Mg, Si, S, Ca, Fe + * Enum: H, He, C, N, O, Ne, Mg, Si, Fe + * The order in ratio_solar is: + * H, He, C, N, O, Ne, Mg, Si, Fe, S, Ca + * Hence Fe, S, Ca need to be treated separately to be put in the + * correct place in the output array. + * + * @param p Pointer to #part struct + * @param cooling #cooling_function_data struct + * @param ratio_solar Pointer to array or ratios + */ +__attribute__((always_inline)) INLINE void abundance_ratio_to_solar( + const struct part *p, const struct cooling_function_data *cooling, + float ratio_solar[chemistry_element_count + 2]) { + + /* compute ratios for all elements */ + for (enum chemistry_element elem = chemistry_element_H; + elem < chemistry_element_count; elem++) { + if (elem == chemistry_element_Fe) { + /* NOTE: solar abundances have iron last with calcium and sulphur directly + * before, hence +2 */ + ratio_solar[elem] = p->chemistry_data.metal_mass_fraction[elem] / + cooling->SolarAbundances[elem + 2]; + } else { + ratio_solar[elem] = p->chemistry_data.metal_mass_fraction[elem] / + cooling->SolarAbundances[elem]; + } + } + + /* assign ratios for Ca and S, note positions of these elements occur before + * Fe */ + ratio_solar[chemistry_element_count] = + p->chemistry_data.metal_mass_fraction[chemistry_element_Si] * + cooling->sulphur_over_silicon_ratio / + cooling->SolarAbundances[chemistry_element_count - 1]; + ratio_solar[chemistry_element_count + 1] = + p->chemistry_data.metal_mass_fraction[chemistry_element_Si] * + cooling->calcium_over_silicon_ratio / + cooling->SolarAbundances[chemistry_element_count]; +} + +/** + * @brief Computes the extra heat from Helium reionisation at a given redshift. + * + * We follow the implementation of Wiersma et al. 2009, MNRAS, 399, 574-600, + * section. 2. The calculation returns energy in CGS. + * + * Note that delta_z is negative. + * + * @param z The current redshift. + * @param delta_z The change in redhsift over the course of this time-step. + * @param cooling The #cooling_function_data used in the run. + * @return Helium reionization energy in CGS units. + */ +__attribute__((always_inline)) INLINE double +eagle_helium_reionization_extraheat( + double z, double delta_z, const struct cooling_function_data *cooling) { + +#ifdef SWIFT_DEBUG_CHECKS + if (delta_z > 0.f) error("Invalid value for delta_z. Should be negative."); +#endif + + /* Recover the values we need */ + const double z_centre = cooling->He_reion_z_centre; + const double z_sigma = cooling->He_reion_z_sigma; + const double heat_cgs = cooling->He_reion_heat_cgs; + + double extra_heat = 0.; + + /* Integral of the Gaussian between z and z - delta_z */ + extra_heat += erf((z - delta_z - z_centre) / (M_SQRT2 * z_sigma)); + extra_heat -= erf((z - z_centre) / (M_SQRT2 * z_sigma)); + + /* Multiply by the normalisation factor */ + extra_heat *= heat_cgs * 0.5; + + return extra_heat; +} + +/** + * @brief Computes the log_10 of the temperature corresponding to a given + * internal energy, hydrogen number density, Helium fraction and redshift. + * + * Note that the redshift is implicitly passed in via the currently loaded + * tables in the #cooling_function_data. + * + * For the low-z case, we interpolate the flattened 4D table 'u_to_temp' that + * is arranged in the following way: + * - 1st dim: redshift, length = eagle_cooling_N_loaded_redshifts + * - 2nd dim: Hydrogen density, length = eagle_cooling_N_density + * - 3rd dim: Helium fraction, length = eagle_cooling_N_He_frac + * - 4th dim: Internal energy, length = eagle_cooling_N_temperature + * + * For the high-z case, we interpolate the flattened 3D table 'u_to_temp' that + * is arranged in the following way: + * - 1st dim: Hydrogen density, length = eagle_cooling_N_density + * - 2nd dim: Helium fraction, length = eagle_cooling_N_He_frac + * - 3rd dim: Internal energy, length = eagle_cooling_N_temperature + * + * @param log_10_u_cgs Log base 10 of internal energy in cgs. + * @param redshift Current redshift. + * @param n_H_index Index along the Hydrogen density dimension. + * @param He_index Index along the Helium fraction dimension. + * @param d_n_H Offset between Hydrogen density and table[n_H_index]. + * @param d_He Offset between helium fraction and table[He_index]. + * @param cooling #cooling_function_data structure. + * + * @param compute_dT_du Do we want to compute dT/du ? + * @param dT_du (return) The value of dT/du + * + * @return log_10 of the temperature. + */ +__attribute__((always_inline)) INLINE double eagle_convert_u_to_temp( + const double log_10_u_cgs, const float redshift, const int compute_dT_du, + float *dT_du, int n_H_index, int He_index, float d_n_H, float d_He, + const struct cooling_function_data *restrict cooling) { + + /* Get index of u along the internal energy axis */ + int u_index; + float d_u; + get_index_1d(cooling->Therm, eagle_cooling_N_temperature, log_10_u_cgs, + &u_index, &d_u); + + /* Interpolate temperature table to return temperature for current + * internal energy (use 3D interpolation for high redshift table, + * otherwise 4D) */ + float log_10_T; + if (redshift > cooling->Redshifts[eagle_cooling_N_redshifts - 1]) { + + log_10_T = interpolation_3d(cooling->table.temperature, /* */ + n_H_index, He_index, u_index, /* */ + d_n_H, d_He, d_u, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_He_frac, /* */ + eagle_cooling_N_temperature); /* */ + } else { + + log_10_T = + interpolation_4d(cooling->table.temperature, /* */ + /*z_index=*/0, n_H_index, He_index, u_index, /* */ + cooling->dz, d_n_H, d_He, d_u, /* */ + eagle_cooling_N_loaded_redshifts, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_He_frac, /* */ + eagle_cooling_N_temperature); /* */ + } + + if (compute_dT_du) { + + float log_10_T_high, log_10_T_low; + + /* Interpolate temperature table to return temperature for internal energy + * at grid point above current internal energy for computing dT_du used for + * calculation of dlambda_du in cooling.c (use 3D interpolation for high + * redshift table, otherwise 4D) */ + if (redshift > cooling->Redshifts[eagle_cooling_N_redshifts - 1]) { + + log_10_T_high = interpolation_3d(cooling->table.temperature, /* */ + n_H_index, He_index, u_index, /* */ + d_n_H, d_He, /*delta_u=*/1.f, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_He_frac, /* */ + eagle_cooling_N_temperature); /* */ + + } else { + + log_10_T_high = + interpolation_4d(cooling->table.temperature, /* */ + /*z_index=*/0, n_H_index, He_index, u_index, /* */ + cooling->dz, d_n_H, d_He, /*delta_u=*/1.f, /* */ + eagle_cooling_N_loaded_redshifts, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_He_frac, /* */ + eagle_cooling_N_temperature); /* */ + } + + /* Interpolate temperature table to return temperature for internal energy + * at grid point below current internal energy for computing dT_du used for + * calculation of dlambda_du in cooling.c (use 3D interpolation for high + * redshift table, otherwise 4D) */ + if (redshift > cooling->Redshifts[eagle_cooling_N_redshifts - 1]) { + + log_10_T_low = interpolation_3d(cooling->table.temperature, /* */ + n_H_index, He_index, u_index, /* */ + d_n_H, d_He, /*delta_u=*/0.f, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_He_frac, /* */ + eagle_cooling_N_temperature); /* */ + + } else { + + log_10_T_low = + interpolation_4d(cooling->table.temperature, /* */ + /*z_index=*/0, n_H_index, He_index, u_index, /* */ + cooling->dz, d_n_H, d_He, /*delta_u=*/0.f, /* */ + eagle_cooling_N_loaded_redshifts, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_He_frac, /* */ + eagle_cooling_N_temperature); /* */ + } + + /* Calculate dT/du */ + const float delta_u = exp(cooling->Therm[u_index + 1] * M_LN10) - + exp(cooling->Therm[u_index] * M_LN10); + *dT_du = + (exp(M_LN10 * log_10_T_high) - exp(M_LN10 * log_10_T_low)) / delta_u; + } + + return log_10_T; +} + +/** + * @brief Compute the Compton cooling rate from the CMB at a given + * redshift, electron abundance, temperature and Hydrogen density. + * + * Uses an analytic formula. + * + * @param cooling The #cooling_function_data used in the run. + * @param redshift The current redshift. + * @param n_H_cgs The Hydrogen number density in CGS units. + * @param temperature The temperature. + * @param electron_abundance The electron abundance. + */ +__attribute__((always_inline)) INLINE double eagle_Compton_cooling_rate( + const struct cooling_function_data *cooling, const double redshift, + const double n_H_cgs, const double temperature, + const double electron_abundance) { + + const double zp1 = 1. + redshift; + const double zp1p2 = zp1 * zp1; + const double zp1p4 = zp1p2 * zp1p2; + + /* CMB temperature at this redshift */ + const double T_CMB = cooling->T_CMB_0 * zp1; + + /* Compton cooling rate */ + return cooling->compton_rate_cgs * (temperature - T_CMB) * zp1p4 * + electron_abundance / n_H_cgs; +} + +/** + * @brief Computes the cooling rate corresponding to a given internal energy, + * hydrogen number density, Helium fraction, redshift and metallicity from + * all the possible channels. + * + * 1) Metal-free cooling: + * We interpolate the flattened 4D table 'H_and_He_net_heating' that is + * arranged in the following way: + * - 1st dim: redshift, length = eagle_cooling_N_loaded_redshifts + * - 2nd dim: Hydrogen density, length = eagle_cooling_N_density + * - 3rd dim: Helium fraction, length = eagle_cooling_N_He_frac + * - 4th dim: Internal energy, length = eagle_cooling_N_temperature + * + * 2) Electron abundance + * We compute the electron abundance by interpolating the flattened 4d table + * 'H_and_He_electron_abundance' that is arranged in the following way: + * - 1st dim: redshift, length = eagle_cooling_N_loaded_redshifts + * - 2nd dim: Hydrogen density, length = eagle_cooling_N_density + * - 3rd dim: Helium fraction, length = eagle_cooling_N_He_frac + * - 4th dim: Internal energy, length = eagle_cooling_N_temperature + * + * 3) Compton cooling is applied via the analytic formula. + * + * 4) Solar electron abudance + * We compute the solar electron abundance by interpolating the flattened 3d + * table 'solar_electron_abundance' that is arranged in the following way: + * - 1st dim: redshift, length = eagle_cooling_N_loaded_redshifts + * - 2nd dim: Hydrogen density, length = eagle_cooling_N_density + * - 3rd dim: Internal energy, length = eagle_cooling_N_temperature + * + * 5) Metal-line cooling + * For each tracked element we interpolate the flattened 4D table + * 'table_metals_net_heating' that is arrange in the following way: + * - 1st dim: element, length = eagle_cooling_N_metal + * - 2nd dim: redshift, length = eagle_cooling_N_loaded_redshifts + * - 3rd dim: Hydrogen density, length = eagle_cooling_N_density + * - 4th dim: Internal energy, length = eagle_cooling_N_temperature + * + * Note that this is a fake 4D interpolation as we do not interpolate + * along the 1st dimension. We just do this once per element. + * + * Since only the temperature changes when cooling a given particle, + * the redshift, hydrogen number density and helium fraction indices + * and offsets passed in. + * + * If the arguement dlambda_du is non-NULL, the routine also + * calculates derivative of cooling rate with respect to internal + * energy. + * + * If the argument element_lambda is non-NULL, the routine also + * returns the cooling rate per element in the array. + * + * @param log10_u_cgs Log base 10 of internal energy per unit mass in CGS units. + * @param redshift The current redshift + * @param n_H_cgs The Hydrogen number density in CGS units. + * @param solar_ratio Array of ratios of particle metal abundances + * to solar metal abundances + * + * @param n_H_index Particle hydrogen number density index + * @param d_n_h Particle hydrogen number density offset + * @param He_index Particle helium fraction index + * @param d_He Particle helium fraction offset + * @param cooling Cooling data structure + * + * @param dlambda_du (return) Derivative of the cooling rate with respect to u. + * @param element_lambda (return) Cooling rate from each element + * + * @return The cooling rate + */ +INLINE static double eagle_metal_cooling_rate( + double log10_u_cgs, double redshift, double n_H_cgs, + const float solar_ratio[chemistry_element_count + 2], int n_H_index, + float d_n_h, int He_index, float d_He, + const struct cooling_function_data *restrict cooling, double *dlambda_du, + double *element_lambda) { + +#ifdef TO_BE_DONE + /* used for calculating dlambda_du */ + double temp_lambda_high = 0, temp_lambda_low = 0; + double h_plus_he_electron_abundance_high = 0; + double h_plus_he_electron_abundance_low = 0; + double solar_electron_abundance_high = 0; + double solar_electron_abundance_low = 0; + double elem_cool_low = 0, elem_cool_high = 0; +#endif + + /* We only need dT_du if dLambda_du is non-NULL */ + const int compute_dT_du = (dlambda_du != NULL) ? 1 : 0; + + /* Temperature */ + float dT_du = -1.f; + const double T = + eagle_convert_u_to_temp(log10_u_cgs, redshift, compute_dT_du, &dT_du, + n_H_index, He_index, d_n_h, d_He, cooling); + + /* Get index along temperature dimension of the tables */ + int T_index; + float d_T; + get_index_1d(cooling->Temp, eagle_cooling_N_temperature, T, &T_index, &d_T); + +#ifdef TO_BE_DONE + /* Difference between entries on the temperature table around u */ + const float delta_T = exp(M_LN10 * cooling->Temp[T_index + 1]) - + exp(M_LN10 * cooling->Temp[T_index]); +#endif + + /**********************/ + /* Metal-free cooling */ + /**********************/ + + double Lambda_free; + + if (redshift > cooling->Redshifts[eagle_cooling_N_redshifts - 1]) { + + /* If we're using the high redshift tables then we don't interpolate + * in redshift */ + Lambda_free = interpolation_3d(cooling->table.H_plus_He_heating, /* */ + n_H_index, He_index, T_index, /* */ + d_n_h, d_He, d_T, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_He_frac, /* */ + eagle_cooling_N_temperature); /* */ + +#ifdef TO_BE_DONE + /* compute values at temperature gridpoints above and below input + * temperature for calculation of dlambda_du. Pass in NULL pointer for + * dlambda_du in order to skip */ + if (dlambda_du != NULL) { + temp_lambda_high = interpolation_3d( + cooling->table.H_plus_He_heating, n_H_index, He_index, T_index, d_n_h, + d_He, 1.f, cooling->N_nH, cooling->N_He, cooling->N_Temp); + temp_lambda_low = interpolation_3d( + cooling->table.H_plus_He_heating, n_H_index, He_index, T_index, d_n_h, + d_He, 0.f, cooling->N_nH, cooling->N_He, cooling->N_Temp); + } +#endif + + } else { + + /* Using normal tables, have to interpolate in redshift */ + Lambda_free = + interpolation_4d(cooling->table.H_plus_He_heating, /* */ + /*z_index=*/0, n_H_index, He_index, T_index, /* */ + cooling->dz, d_n_h, d_He, d_T, /* */ + eagle_cooling_N_loaded_redshifts, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_He_frac, /* */ + eagle_cooling_N_temperature); /* */ + +#ifdef TO_BE_DONE + /* compute values at temperature gridpoints above and below input + * temperature for calculation of dlambda_du */ + if (dlambda_du != NULL) { + temp_lambda_high = + interpolation_4d(cooling->table.H_plus_He_heating, 0, n_H_index, + He_index, T_index, cooling->dz, d_n_h, d_He, 1.f, 2, + cooling->N_nH, cooling->N_He, cooling->N_Temp); + temp_lambda_low = + interpolation_4d(cooling->table.H_plus_He_heating, 0, n_H_index, + He_index, T_index, cooling->dz, d_n_h, d_He, 0.f, 2, + cooling->N_nH, cooling->N_He, cooling->N_Temp); + } +#endif + } + +#ifdef TO_BE_DONE + if (dlambda_du != NULL) { + *dlambda_du += (temp_lambda_high - temp_lambda_low) / delta_T * dT_du; + } +#endif + + /* If we're testing cooling rate contributions write to array */ + if (element_lambda != NULL) { + element_lambda[0] = Lambda_free; + } + + /**********************/ + /* Electron abundance */ + /**********************/ + + double H_plus_He_electron_abundance; + + if (redshift > cooling->Redshifts[eagle_cooling_N_redshifts - 1]) { + + H_plus_He_electron_abundance = + interpolation_3d(cooling->table.H_plus_He_electron_abundance, /* */ + n_H_index, He_index, T_index, /* */ + d_n_h, d_He, d_T, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_He_frac, /* */ + eagle_cooling_N_temperature); /* */ +#ifdef TO_BE_DONE + /* compute values at temperature gridpoints above and below input + * temperature for calculation of dlambda_du. Pass in NULL pointer for + * dlambda_du in order to skip */ + + h_plus_he_electron_abundance_high = + interpolation_3d(cooling->table.H_plus_He_electron_abundance, n_H_index, + He_index, T_index, d_n_h, d_He, 1.f, cooling->N_nH, + cooling->N_He, cooling->N_Temp); + h_plus_he_electron_abundance_low = + interpolation_3d(cooling->table.H_plus_He_electron_abundance, n_H_index, + He_index, T_index, d_n_h, d_He, 0.f, cooling->N_nH, + cooling->N_He, cooling->N_Temp); + +#endif + + } else { + + H_plus_He_electron_abundance = + interpolation_4d(cooling->table.H_plus_He_electron_abundance, /* */ + /*z_index=*/0, n_H_index, He_index, T_index, /* */ + cooling->dz, d_n_h, d_He, d_T, /* */ + eagle_cooling_N_loaded_redshifts, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_He_frac, /* */ + eagle_cooling_N_temperature); /* */ + +#ifdef TO_BE_DONE + /* compute values at temperature gridpoints above and below input + * temperature for calculation of dlambda_du */ + h_plus_he_electron_abundance_high = + interpolation_4d(cooling->table.H_plus_He_electron_abundance, 0, + n_H_index, He_index, T_index, cooling->dz, d_n_h, d_He, + 1.f, 2, cooling->N_nH, cooling->N_He, cooling->N_Temp); + h_plus_he_electron_abundance_low = + interpolation_4d(cooling->table.H_plus_He_electron_abundance, 0, + n_H_index, He_index, T_index, cooling->dz, d_n_h, d_He, + 0.f, 2, cooling->N_nH, cooling->N_He, cooling->N_Temp); +#endif + } + + /**********************/ + /* Compton cooling */ + /**********************/ + + double Lambda_Compton = 0.; + + /* Do we need to add the inverse Compton cooling? */ + /* It is *not* stored in the tables before re-ionisation */ + if ((redshift > cooling->Redshifts[eagle_cooling_N_redshifts - 1]) || + (redshift > cooling->H_reion_z)) { + + /* Note the minus sign */ + Lambda_Compton -= eagle_Compton_cooling_rate(cooling, redshift, n_H_cgs, T, + H_plus_He_electron_abundance); + } + + /* If we're testing cooling rate contributions write to array */ + if (element_lambda != NULL) { + element_lambda[1] = Lambda_Compton; + } + + /*******************************/ + /* Solar electron abundance */ + /*******************************/ + + double solar_electron_abundance; + + if (redshift > cooling->Redshifts[eagle_cooling_N_redshifts - 1]) { + + /* If we're using the high redshift tables then we don't interpolate + * in redshift */ + solar_electron_abundance = + interpolation_2d(cooling->table.electron_abundance, /* */ + n_H_index, T_index, /* */ + d_n_h, d_T, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_temperature); /* */ + +#ifdef TO_BE_DONE + /* compute values at temperature gridpoints above and below input + * temperature for calculation of dlambda_du */ + if (dlambda_du != NULL) { + solar_electron_abundance_high = + interpolation_2d(cooling->table.electron_abundance, n_H_index, + T_index, d_n_h, 1.f, cooling->N_nH, cooling->N_Temp); + solar_electron_abundance_low = + interpolation_2d(cooling->table.electron_abundance, n_H_index, + T_index, d_n_h, 0.f, cooling->N_nH, cooling->N_Temp); + } +#endif + + } else { + + /* Using normal tables, have to interpolate in redshift */ + solar_electron_abundance = + interpolation_3d(cooling->table.electron_abundance, /* */ + /*z_index=*/0, n_H_index, T_index, /* */ + cooling->dz, d_n_h, d_T, /* */ + eagle_cooling_N_loaded_redshifts, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_temperature); /* */ + +#ifdef TO_BE_DONE + /* compute values at temperature gridpoints above and below input + * temperature for calculation of dlambda_du */ + if (dlambda_du != NULL) { + solar_electron_abundance_high = interpolation_3d( + cooling->table.electron_abundance, 0, n_H_index, T_index, cooling->dz, + d_n_h, 1.f, 2, cooling->N_nH, cooling->N_Temp); + solar_electron_abundance_low = interpolation_3d( + cooling->table.electron_abundance, 0, n_H_index, T_index, cooling->dz, + d_n_h, 0.f, 2, cooling->N_nH, cooling->N_Temp); + } +#endif + } + + const double abundance_ratio = + H_plus_He_electron_abundance / solar_electron_abundance; + + /**********************/ + /* Metal-line cooling */ + /**********************/ + + /* for each element the cooling rate is multiplied by the ratio of H, He + * electron abundance to solar electron abundance then by the ratio of the + * particle metal abundance to solar metal abundance. */ + + double lambda_metal[eagle_cooling_N_metal]; + + if (redshift > cooling->Redshifts[eagle_cooling_N_redshifts - 1]) { + + for (int elem = 0; elem < eagle_cooling_N_metal; elem++) { + + lambda_metal[elem] = + interpolation_3d_no_x(cooling->table.metal_heating, /* */ + elem, n_H_index, T_index, /* */ + /*delta_elem=*/0.f, d_n_h, d_T, /* */ + eagle_cooling_N_metal, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_temperature); /* */ + + lambda_metal[elem] *= abundance_ratio; + lambda_metal[elem] *= solar_ratio[elem + 2]; + +#ifdef TO_BE_DONE + /* compute values at temperature gridpoints above and below input + * temperature for calculation of dlambda_du */ + if (dlambda_du != NULL) { + elem_cool_high = interpolation_3d_no_x( + cooling->table.metal_heating, elem, n_H_index, T_index, 0.f, d_n_h, + 1.f, cooling->N_Elements, cooling->N_nH, cooling->N_Temp); + + elem_cool_low = interpolation_3d_no_x( + cooling->table.metal_heating, elem, n_H_index, T_index, 0.f, d_n_h, + 0.f, cooling->N_nH, cooling->N_Temp, cooling->N_Elements); + + *dlambda_du += (elem_cool_high * h_plus_he_electron_abundance_high / + solar_electron_abundance_high - + elem_cool_low * h_plus_he_electron_abundance_low / + solar_electron_abundance_low) / + delta_T * dT_du * solar_ratio[elem + 2]; + } +#endif + } + + } else { + + for (int elem = 0; elem < eagle_cooling_N_metal; elem++) { + + lambda_metal[elem] = interpolation_4d_no_x( + cooling->table.metal_heating, /* */ + elem, /*z_index=*/0, n_H_index, T_index, /* */ + /*delta_elem=*/0.f, cooling->dz, d_n_h, d_T, /* */ + eagle_cooling_N_metal, /* */ + eagle_cooling_N_loaded_redshifts, /* */ + eagle_cooling_N_density, /* */ + eagle_cooling_N_temperature); /* */ + + lambda_metal[elem] *= abundance_ratio; + lambda_metal[elem] *= solar_ratio[elem + 2]; + +#ifdef TO_BE_DONE + /* compute values at temperature gridpoints above and below input + * temperature for calculation of dlambda_du */ + if (dlambda_du != NULL) { + elem_cool_high = interpolation_4d_no_x( + cooling->table.metal_heating, elem, 0, n_H_index, T_index, 0., + cooling->dz, d_n_h, 1.f, cooling->N_Elements, 2, cooling->N_nH, + cooling->N_Temp); + + elem_cool_low = interpolation_4d_no_x( + cooling->table.metal_heating, elem, 0, n_H_index, T_index, 0., + cooling->dz, d_n_h, 0.f, cooling->N_Elements, 2, cooling->N_nH, + cooling->N_Temp); + + *dlambda_du += (elem_cool_high * h_plus_he_electron_abundance_high / + solar_electron_abundance_high - + elem_cool_low * h_plus_he_electron_abundance_low / + solar_electron_abundance_low) / + delta_T * dT_du * solar_ratio[elem + 2]; + } +#endif + } + } + + if (element_lambda != NULL) { + for (int elem = 0; elem < eagle_cooling_N_metal; ++elem) { + element_lambda[elem + 2] = lambda_metal[elem]; + } + } + + /* Sum up all the contributions */ + double Lambda_net = Lambda_free + Lambda_Compton; + for (int elem = 0; elem < eagle_cooling_N_metal; ++elem) { + Lambda_net += lambda_metal[elem]; + } + + return Lambda_net; +} + +/** + * @brief Wrapper function used to calculate cooling rate and dLambda_du. + * Table indices and offsets for redshift, hydrogen number density and + * helium fraction are passed it so as to compute them only once per particle. + * + * @param log_u_cgs Natural log of internal energy per unit mass in CGS units. + * @param redshift The current redshift. + * @param n_H_cgs Hydrogen number density in CGS units. + * @param abundance_ratio Ratio of element abundance to solar. + * + * @param n_H_index Particle hydrogen number density index + * @param d_n_h Particle hydrogen number density offset + * @param He_index Particle helium fraction index + * @param d_He Particle helium fraction offset + * @param cooling #cooling_function_data structure + * + * @param dLambdaNet_du (return) Derivative of the cooling rate with respect to + * u. + * + * @return The cooling rate + */ +INLINE static double eagle_cooling_rate( + double log_u_cgs, double redshift, double n_H_cgs, + const float abundance_ratio[chemistry_element_count + 2], int n_H_index, + float d_n_h, int He_index, float d_He, + const struct cooling_function_data *restrict cooling, + double *dLambdaNet_du) { + + return eagle_metal_cooling_rate(log_u_cgs / M_LN10, redshift, n_H_cgs, + abundance_ratio, n_H_index, d_n_h, He_index, + d_He, cooling, dLambdaNet_du, + /*element_lambda=*/NULL); +} + +#endif /* SWIFT_EAGLE_COOLING_RATES_H */ diff --git a/src/cooling/EAGLE/cooling_struct.h b/src/cooling/EAGLE/cooling_struct.h index 24c8b2088bf5b54134fde7a4a76ab3d2ae61c6ba..09a16e8c10e5e0a1f03ea3455e3ffff8c942a982 100644 --- a/src/cooling/EAGLE/cooling_struct.h +++ b/src/cooling/EAGLE/cooling_struct.h @@ -19,14 +19,122 @@ #ifndef SWIFT_COOLING_STRUCT_EAGLE_H #define SWIFT_COOLING_STRUCT_EAGLE_H +#define eagle_table_path_name_length 500 + +/** + * @brief struct containing cooling tables + */ +struct cooling_tables { + + /* array of heating rates due to metals */ + float *metal_heating; + + /* array of heating rates due to hydrogen and helium */ + float *H_plus_He_heating; + + /* array of electron abundances due to hydrogen and helium */ + float *H_plus_He_electron_abundance; + + /* array of temperatures */ + float *temperature; + + /* array of electron abundances due to metals */ + float *electron_abundance; +}; + /** * @brief Properties of the cooling function. */ -struct cooling_function_data {}; +struct cooling_function_data { + + /*! Cooling tables */ + struct cooling_tables table; + + /*! Redshift bins */ + float *Redshifts; + + /*! Hydrogen number density bins */ + float *nH; + + /*! Temperature bins */ + float *Temp; + + /*! Helium fraction bins */ + float *HeFrac; + + /*! Internal energy bins */ + float *Therm; + + /*! Solar mass fractions */ + float *SolarAbundances; + + /*! Filepath to the directory containing the HDF5 cooling tables */ + char cooling_table_path[eagle_table_path_name_length]; + + /*! Redshit of H reionization */ + float H_reion_z; + + /*! Ca over Si abundance ratio */ + float calcium_over_silicon_ratio; + + /*! S over Si abundance ratio */ + float sulphur_over_silicon_ratio; + + /*! Redshift of He reionization */ + float He_reion_z_centre; + + /*! Spread of the He reionization */ + float He_reion_z_sigma; + + /*! He reionization energy in CGS units */ + float He_reion_heat_cgs; + + /*! Internal energy conversion from internal units to CGS (for quick access) + */ + double internal_energy_to_cgs; + + /*! Internal energy conversion from CGS to internal units (for quick access) + */ + double internal_energy_from_cgs; + + /*! Number density conversion from internal units to CGS (for quick access) */ + double number_density_to_cgs; + + /*! Inverse of proton mass in cgs (for quick access) */ + double inv_proton_mass_cgs; + + /*! Temperatur of the CMB at present day (for quick access) */ + double T_CMB_0; + + /*! Compton rate in cgs units */ + double compton_rate_cgs; + + /*! Index of the current redshift along the redshift index of the tables */ + int z_index; + + /*! Index of the previous tables along the redshift index of the tables */ + int previous_z_index; + + /*! Distance between the current redshift and the table[z_index] */ + float dz; + + /*! Index of the table below current redshift */ + int low_z_index; + + /*! Index of the table above current redshift */ + int high_z_index; + + /*! Are we doing Newton-Raphson iterations? */ + int newton_flag; +}; /** * @brief Properties of the cooling stored in the extended particle data. */ -struct cooling_xpart_data {}; +struct cooling_xpart_data { + + /*! Cumulative energy radiated by the particle */ + float radiated_energy; +}; #endif /* SWIFT_COOLING_STRUCT_EAGLE_H */ diff --git a/src/cooling/EAGLE/cooling_tables.c b/src/cooling/EAGLE/cooling_tables.c new file mode 100644 index 0000000000000000000000000000000000000000..4f3aed05746ee0f7f6559936ad151d8227bbb90c --- /dev/null +++ b/src/cooling/EAGLE/cooling_tables.c @@ -0,0 +1,774 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/** + * @file src/cooling/EAGLE/cooling_tables.c + * @brief Functions to read EAGLE tables + */ + +/* Config parameters. */ +#include "../config.h" + +#include <hdf5.h> +#include <math.h> +#include <stdlib.h> +#include <string.h> + +/* Local includes. */ +#include "cooling_struct.h" +#include "cooling_tables.h" +#include "error.h" +#include "interpolate.h" + +/** + * @brief Names of the elements in the order they are stored in the files + */ +static const char *eagle_tables_element_names[9] = { + "Carbon", "Nitrogen", "Oxygen", "Neon", "Magnesium", + "Silicon", "Sulphur", "Calcium", "Iron"}; + +/*! Number of elements in a z-slice of the H+He cooling rate tables */ +static const size_t num_elements_cooling_rate = + eagle_cooling_N_temperature * eagle_cooling_N_density; + +/*! Number of elements in a z-slice of the metal cooling rate tables */ +static const size_t num_elements_metal_heating = eagle_cooling_N_metal * + eagle_cooling_N_temperature * + eagle_cooling_N_density; + +/*! Number of elements in a z-slice of the metal electron abundance tables */ +static const size_t num_elements_electron_abundance = + eagle_cooling_N_temperature * eagle_cooling_N_density; + +/*! Number of elements in a z-slice of the temperature tables */ +static const size_t num_elements_temperature = eagle_cooling_N_He_frac * + eagle_cooling_N_temperature * + eagle_cooling_N_density; + +/*! Number of elements in a z-slice of the H+He cooling rate tables */ +static const size_t num_elements_HpHe_heating = eagle_cooling_N_He_frac * + eagle_cooling_N_temperature * + eagle_cooling_N_density; + +/*! Number of elements in a z-slice of the H+He electron abundance tables */ +static const size_t num_elements_HpHe_electron_abundance = + eagle_cooling_N_He_frac * eagle_cooling_N_temperature * + eagle_cooling_N_density; + +/** + * @brief Reads in EAGLE table of redshift values + * + * @param cooling #cooling_function_data structure + */ +void get_cooling_redshifts(struct cooling_function_data *cooling) { + + /* Read the list of table redshifts */ + char redshift_filename[eagle_table_path_name_length + 16]; + sprintf(redshift_filename, "%s/redshifts.dat", cooling->cooling_table_path); + + FILE *infile = fopen(redshift_filename, "r"); + if (infile == NULL) { + error("Cannot open the list of cooling table redshifts (%s)", + redshift_filename); + } + + int N_Redshifts = -1; + + /* Read the file */ + if (!feof(infile)) { + + char buffer[50]; + + /* Read the number of redshifts (1st line in the file) */ + if (fgets(buffer, 50, infile) != NULL) + N_Redshifts = atoi(buffer); + else + error("Impossible to read the number of redshifts"); + + /* Be verbose about it */ + message("Found cooling tables at %d redhsifts", N_Redshifts); + + /* Check value */ + if (N_Redshifts != eagle_cooling_N_redshifts) + error("Invalid redshift lenght array."); + + /* Allocate the list of redshifts */ + if (posix_memalign((void **)&cooling->Redshifts, SWIFT_STRUCT_ALIGNMENT, + eagle_cooling_N_redshifts * sizeof(float)) != 0) + error("Failed to allocate redshift table"); + + /* Read all the redshift values */ + int count = 0; + while (!feof(infile)) { + if (fgets(buffer, 50, infile) != NULL) { + cooling->Redshifts[count] = atof(buffer); + count++; + } + } + + /* Verify that the file was self-consistent */ + if (count != N_Redshifts) { + error( + "Redshift file (%s) does not contain the correct number of redshifts " + "(%d vs. %d)", + redshift_filename, count, N_Redshifts); + } + } else { + error("Redshift file (%s) is empty!", redshift_filename); + } + + /* We are done with this file */ + fclose(infile); + + /* EAGLE cooling assumes cooling->Redshifts table is in increasing order. Test + * this. */ + for (int i = 0; i < N_Redshifts - 1; i++) { + if (cooling->Redshifts[i + 1] < cooling->Redshifts[i]) { + error("table should be in increasing order\n"); + } + } +} + +/** + * @brief Reads in EAGLE cooling table header. Consists of tables + * of values for temperature, hydrogen number density, helium fraction + * solar element abundances, and elements used to index the cooling tables. + * + * @param fname Filepath for cooling table from which to read header + * @param cooling Cooling data structure + */ +void read_cooling_header(const char *fname, + struct cooling_function_data *cooling) { + +#ifdef HAVE_HDF5 + + int N_Temp, N_nH, N_He, N_SolarAbundances, N_Elements; + + /* read sizes of array dimensions */ + hid_t tempfile_id = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); + if (tempfile_id < 0) error("unable to open file %s\n", fname); + + /* read size of each table of values */ + hid_t dataset = + H5Dopen(tempfile_id, "/Header/Number_of_temperature_bins", H5P_DEFAULT); + herr_t status = + H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &N_Temp); + if (status < 0) error("error reading number of temperature bins"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Check value */ + if (N_Temp != eagle_cooling_N_temperature) + error("Invalid temperature array length."); + + dataset = H5Dopen(tempfile_id, "/Header/Number_of_density_bins", H5P_DEFAULT); + status = + H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &N_nH); + if (status < 0) error("error reading number of density bins"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Check value */ + if (N_nH != eagle_cooling_N_density) error("Invalid density array length."); + + dataset = + H5Dopen(tempfile_id, "/Header/Number_of_helium_fractions", H5P_DEFAULT); + status = + H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &N_He); + if (status < 0) error("error reading number of He fraction bins"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Check value */ + if (N_He != eagle_cooling_N_He_frac) + error("Invalid Helium fraction array length."); + + dataset = H5Dopen(tempfile_id, "/Header/Abundances/Number_of_abundances", + H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + &N_SolarAbundances); + if (status < 0) error("error reading number of solar abundance bins"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Check value */ + if (N_SolarAbundances != eagle_cooling_N_abundances) + error("Invalid solar abundances array length."); + + dataset = H5Dopen(tempfile_id, "/Header/Number_of_metals", H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + &N_Elements); + if (status < 0) error("error reading number of metal bins"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Check value */ + if (N_Elements != eagle_cooling_N_metal) error("Invalid metal array length."); + + /* allocate arrays of values for each of the above quantities */ + if (posix_memalign((void **)&cooling->Temp, SWIFT_STRUCT_ALIGNMENT, + N_Temp * sizeof(float)) != 0) + error("Failed to allocate temperature table"); + if (posix_memalign((void **)&cooling->Therm, SWIFT_STRUCT_ALIGNMENT, + N_Temp * sizeof(float)) != 0) + error("Failed to allocate internal energy table"); + if (posix_memalign((void **)&cooling->nH, SWIFT_STRUCT_ALIGNMENT, + N_nH * sizeof(float)) != 0) + error("Failed to allocate nH table"); + if (posix_memalign((void **)&cooling->HeFrac, SWIFT_STRUCT_ALIGNMENT, + N_He * sizeof(float)) != 0) + error("Failed to allocate HeFrac table"); + if (posix_memalign((void **)&cooling->SolarAbundances, SWIFT_STRUCT_ALIGNMENT, + N_SolarAbundances * sizeof(float)) != 0) + error("Failed to allocate Solar abundances table"); + + /* read in values for each of the arrays */ + dataset = H5Dopen(tempfile_id, "/Solar/Temperature_bins", H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + cooling->Temp); + if (status < 0) error("error reading temperature bins"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + dataset = H5Dopen(tempfile_id, "/Solar/Hydrogen_density_bins", H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + cooling->nH); + if (status < 0) error("error reading H density bins"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + dataset = H5Dopen(tempfile_id, "/Metal_free/Helium_mass_fraction_bins", + H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + cooling->HeFrac); + if (status < 0) error("error reading He fraction bins"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + dataset = H5Dopen(tempfile_id, "/Header/Abundances/Solar_mass_fractions", + H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + cooling->SolarAbundances); + if (status < 0) error("error reading solar mass fraction bins"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + dataset = H5Dopen(tempfile_id, "/Metal_free/Temperature/Energy_density_bins", + H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + cooling->Therm); + if (status < 0) error("error reading internal energy bins"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Convert to temperature, density and internal energy arrays to log10 */ + for (int i = 0; i < N_Temp; i++) { + cooling->Temp[i] = log10(cooling->Temp[i]); + cooling->Therm[i] = log10(cooling->Therm[i]); + } + for (int i = 0; i < N_nH; i++) { + cooling->nH[i] = log10(cooling->nH[i]); + } + +#else + error("Need HDF5 to read cooling tables"); +#endif +} + +/** + * @brief Allocate space for cooling tables. + * + * @param cooling #cooling_function_data structure + */ +void allocate_cooling_tables(struct cooling_function_data *restrict cooling) { + + /* Allocate arrays to store cooling tables. Arrays contain two tables of + * cooling rates with one table being for the redshift above current redshift + * and one below. */ + + if (posix_memalign((void **)&cooling->table.metal_heating, + SWIFT_STRUCT_ALIGNMENT, + eagle_cooling_N_loaded_redshifts * + num_elements_metal_heating * sizeof(float)) != 0) + error("Failed to allocate metal_heating array"); + + if (posix_memalign((void **)&cooling->table.electron_abundance, + SWIFT_STRUCT_ALIGNMENT, + eagle_cooling_N_loaded_redshifts * + num_elements_electron_abundance * sizeof(float)) != 0) + error("Failed to allocate electron_abundance array"); + + if (posix_memalign((void **)&cooling->table.temperature, + SWIFT_STRUCT_ALIGNMENT, + eagle_cooling_N_loaded_redshifts * + num_elements_temperature * sizeof(float)) != 0) + error("Failed to allocate temperature array"); + + if (posix_memalign((void **)&cooling->table.H_plus_He_heating, + SWIFT_STRUCT_ALIGNMENT, + eagle_cooling_N_loaded_redshifts * + num_elements_HpHe_heating * sizeof(float)) != 0) + error("Failed to allocate H_plus_He_heating array"); + + if (posix_memalign((void **)&cooling->table.H_plus_He_electron_abundance, + SWIFT_STRUCT_ALIGNMENT, + eagle_cooling_N_loaded_redshifts * + num_elements_HpHe_electron_abundance * + sizeof(float)) != 0) + error("Failed to allocate H_plus_He_electron_abundance array"); +} + +/** + * @brief Get the redshift invariant table of cooling rates (before reionization + * at redshift ~9) Reads in table of cooling rates and electron abundances due + * to metals (depending on temperature, hydrogen number density), cooling rates + * and electron abundances due to hydrogen and helium (depending on temperature, + * hydrogen number density and helium fraction), and temperatures (depending on + * internal energy, hydrogen number density and helium fraction; note: this is + * distinct from table of temperatures read in ReadCoolingHeader, as that table + * is used to index the cooling, electron abundance tables, whereas this one is + * used to obtain temperature of particle) + * + * @param cooling #cooling_function_data structure + */ +static void get_redshift_invariant_table( + struct cooling_function_data *restrict cooling) { +#ifdef HAVE_HDF5 + + /* Temporary tables */ + float *net_cooling_rate = NULL; + float *electron_abundance = NULL; + float *temperature = NULL; + float *he_net_cooling_rate = NULL; + float *he_electron_abundance = NULL; + + /* Allocate arrays for reading in cooling tables. */ + if (posix_memalign((void **)&net_cooling_rate, SWIFT_STRUCT_ALIGNMENT, + num_elements_cooling_rate * sizeof(float)) != 0) + error("Failed to allocate net_cooling_rate array"); + if (posix_memalign((void **)&electron_abundance, SWIFT_STRUCT_ALIGNMENT, + num_elements_electron_abundance * sizeof(float)) != 0) + error("Failed to allocate electron_abundance array"); + if (posix_memalign((void **)&temperature, SWIFT_STRUCT_ALIGNMENT, + num_elements_temperature * sizeof(float)) != 0) + error("Failed to allocate temperature array"); + if (posix_memalign((void **)&he_net_cooling_rate, SWIFT_STRUCT_ALIGNMENT, + num_elements_HpHe_heating * sizeof(float)) != 0) + error("Failed to allocate he_net_cooling_rate array"); + if (posix_memalign((void **)&he_electron_abundance, SWIFT_STRUCT_ALIGNMENT, + num_elements_HpHe_electron_abundance * sizeof(float)) != 0) + error("Failed to allocate he_electron_abundance array"); + + /* Decide which high redshift table to read. Indices set in cooling_update */ + char filename[eagle_table_path_name_length + 21]; + if (cooling->low_z_index == -1) { + sprintf(filename, "%sz_8.989nocompton.hdf5", cooling->cooling_table_path); + } else if (cooling->low_z_index == -2) { + sprintf(filename, "%sz_photodis.hdf5", cooling->cooling_table_path); + } + + hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); + if (file_id < 0) error("unable to open file %s\n", filename); + + char set_name[64]; + + /* read in cooling rates due to metals */ + for (int specs = 0; specs < eagle_cooling_N_metal; specs++) { + + /* Read in the cooling rate for this metal */ + sprintf(set_name, "/%s/Net_Cooling", eagle_tables_element_names[specs]); + hid_t dataset = H5Dopen(file_id, set_name, H5P_DEFAULT); + herr_t status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, + H5P_DEFAULT, net_cooling_rate); + if (status < 0) error("error reading metal cooling rate table"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Transpose from order tables are stored in (temperature, nH) + * to (metal species, nH, temperature) where fastest + * varying index is on right. Tables contain cooling rates but we + * want rate of change of internal energy, hence minus sign. */ + for (int j = 0; j < eagle_cooling_N_temperature; j++) { + for (int k = 0; k < eagle_cooling_N_density; k++) { + + /* Index in the HDF5 table */ + const int hdf5_index = row_major_index_2d( + j, k, eagle_cooling_N_temperature, eagle_cooling_N_density); + + /* Index in the internal table */ + const int internal_index = row_major_index_3d( + specs, k, j, eagle_cooling_N_metal, eagle_cooling_N_density, + eagle_cooling_N_temperature); + + /* Change the sign and transpose */ + cooling->table.metal_heating[internal_index] = + -net_cooling_rate[hdf5_index]; + } + } + } + + /* read in cooling rates due to H + He */ + strcpy(set_name, "/Metal_free/Net_Cooling"); + hid_t dataset = H5Dopen(file_id, set_name, H5P_DEFAULT); + herr_t status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, + H5P_DEFAULT, he_net_cooling_rate); + if (status < 0) error("error reading metal free cooling rate table"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* read in Temperatures */ + strcpy(set_name, "/Metal_free/Temperature/Temperature"); + dataset = H5Dopen(file_id, set_name, H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + temperature); + if (status < 0) error("error reading temperature table"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* read in H + He electron abundances */ + strcpy(set_name, "/Metal_free/Electron_density_over_n_h"); + dataset = H5Dopen(file_id, set_name, H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + he_electron_abundance); + if (status < 0) error("error reading electron density table"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Transpose from order tables are stored in (helium fraction, temperature, + * nH) to (nH, helium fraction, temperature) where fastest + * varying index is on right. Tables contain cooling rates but we + * want rate of change of internal energy, hence minus sign. */ + for (int i = 0; i < eagle_cooling_N_He_frac; i++) { + for (int j = 0; j < eagle_cooling_N_temperature; j++) { + for (int k = 0; k < eagle_cooling_N_density; k++) { + + /* Index in the HDF5 table */ + const int hdf5_index = row_major_index_3d( + i, j, k, eagle_cooling_N_He_frac, eagle_cooling_N_temperature, + eagle_cooling_N_density); + + /* Index in the internal table */ + const int internal_index = row_major_index_3d( + k, i, j, eagle_cooling_N_density, eagle_cooling_N_He_frac, + eagle_cooling_N_temperature); + + /* Change the sign and transpose */ + cooling->table.H_plus_He_heating[internal_index] = + -he_net_cooling_rate[hdf5_index]; + + /* Convert to log T and transpose */ + cooling->table.temperature[internal_index] = + log10(temperature[hdf5_index]); + + /* Just transpose */ + cooling->table.H_plus_He_electron_abundance[internal_index] = + he_electron_abundance[hdf5_index]; + } + } + } + + /* read in electron densities due to metals */ + strcpy(set_name, "/Solar/Electron_density_over_n_h"); + dataset = H5Dopen(file_id, set_name, H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + electron_abundance); + if (status < 0) error("error reading solar electron density table"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Transpose from order tables are stored in (temperature, nH) to + * (nH, temperature) where fastest varying index is on right. */ + for (int i = 0; i < eagle_cooling_N_temperature; i++) { + for (int j = 0; j < eagle_cooling_N_density; j++) { + + /* Index in the HDF5 table */ + const int hdf5_index = row_major_index_2d( + i, j, eagle_cooling_N_temperature, eagle_cooling_N_density); + + /* Index in the internal table */ + const int internal_index = row_major_index_2d( + j, i, eagle_cooling_N_density, eagle_cooling_N_temperature); + + /* Just transpose */ + cooling->table.electron_abundance[internal_index] = + electron_abundance[hdf5_index]; + } + } + + status = H5Fclose(file_id); + if (status < 0) error("error closing file"); + + free(net_cooling_rate); + free(electron_abundance); + free(temperature); + free(he_net_cooling_rate); + free(he_electron_abundance); + +#ifdef SWIFT_DEBUG_CHECKS + message("done reading in redshift invariant table"); +#endif + +#else + error("Need HDF5 to read cooling tables"); +#endif +} + +/** + * @brief Get redshift dependent table of cooling rates. + * Reads in table of cooling rates and electron abundances due to + * metals (depending on temperature, hydrogen number density), cooling rates and + * electron abundances due to hydrogen and helium (depending on temperature, + * hydrogen number density and helium fraction), and temperatures (depending on + * internal energy, hydrogen number density and helium fraction; note: this is + * distinct from table of temperatures read in ReadCoolingHeader, as that table + * is used to index the cooling, electron abundance tables, whereas this one is + * used to obtain temperature of particle) + * + * @param cooling #cooling_function_data structure + */ +static void get_cooling_table(struct cooling_function_data *restrict cooling) { + +#ifdef HAVE_HDF5 + + /* Temporary tables */ + float *net_cooling_rate = NULL; + float *electron_abundance = NULL; + float *temperature = NULL; + float *he_net_cooling_rate = NULL; + float *he_electron_abundance = NULL; + + /* Allocate arrays for reading in cooling tables. */ + if (posix_memalign((void **)&net_cooling_rate, SWIFT_STRUCT_ALIGNMENT, + num_elements_cooling_rate * sizeof(float)) != 0) + error("Failed to allocate net_cooling_rate array"); + if (posix_memalign((void **)&electron_abundance, SWIFT_STRUCT_ALIGNMENT, + num_elements_electron_abundance * sizeof(float)) != 0) + error("Failed to allocate electron_abundance array"); + if (posix_memalign((void **)&temperature, SWIFT_STRUCT_ALIGNMENT, + num_elements_temperature * sizeof(float)) != 0) + error("Failed to allocate temperature array"); + if (posix_memalign((void **)&he_net_cooling_rate, SWIFT_STRUCT_ALIGNMENT, + num_elements_HpHe_heating * sizeof(float)) != 0) + error("Failed to allocate he_net_cooling_rate array"); + if (posix_memalign((void **)&he_electron_abundance, SWIFT_STRUCT_ALIGNMENT, + num_elements_HpHe_electron_abundance * sizeof(float)) != 0) + error("Failed to allocate he_electron_abundance array"); + + /* Read in tables, transpose so that values for indices which vary most are + * adjacent. Repeat for redshift above and redshift below current value. */ + for (int z_index = cooling->low_z_index; z_index <= cooling->high_z_index; + z_index++) { + + /* Index along redhsift dimension for the subset of tables we read */ + const int local_z_index = z_index - cooling->low_z_index; + +#ifdef SWIFT_DEBUG_CHECKS + if (local_z_index >= eagle_cooling_N_loaded_redshifts) + error("Reading invalid number of tables along z axis."); +#endif + + /* Open table for this redshift index */ + char fname[eagle_table_path_name_length + 32]; + sprintf(fname, "%sz_%1.3f.hdf5", cooling->cooling_table_path, + cooling->Redshifts[z_index]); + hid_t file_id = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); + if (file_id < 0) error("unable to open file %s", fname); + + char set_name[64]; + + /* read in cooling rates due to metals */ + for (int specs = 0; specs < eagle_cooling_N_metal; specs++) { + + sprintf(set_name, "/%s/Net_Cooling", eagle_tables_element_names[specs]); + hid_t dataset = H5Dopen(file_id, set_name, H5P_DEFAULT); + herr_t status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, + H5P_DEFAULT, net_cooling_rate); + if (status < 0) error("error reading metal cooling rate table"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Transpose from order tables are stored in (temperature, nH) + * to (metal species, redshift, nH, temperature) where fastest + * varying index is on right. Tables contain cooling rates but we + * want rate of change of internal energy, hence minus sign. */ + for (int i = 0; i < eagle_cooling_N_density; i++) { + for (int j = 0; j < eagle_cooling_N_temperature; j++) { + + /* Index in the HDF5 table */ + const int hdf5_index = row_major_index_2d( + j, i, eagle_cooling_N_temperature, eagle_cooling_N_density); + + /* Index in the internal table */ + const int internal_index = row_major_index_4d( + specs, local_z_index, i, j, eagle_cooling_N_metal, + eagle_cooling_N_loaded_redshifts, eagle_cooling_N_density, + eagle_cooling_N_temperature); + + /* Change the sign and transpose */ + cooling->table.metal_heating[internal_index] = + -net_cooling_rate[hdf5_index]; + } + } + } + + /* read in cooling rates due to H + He */ + strcpy(set_name, "/Metal_free/Net_Cooling"); + hid_t dataset = H5Dopen(file_id, set_name, H5P_DEFAULT); + herr_t status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, + H5P_DEFAULT, he_net_cooling_rate); + if (status < 0) error("error reading metal free cooling rate table"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* read in Temperature */ + strcpy(set_name, "/Metal_free/Temperature/Temperature"); + dataset = H5Dopen(file_id, set_name, H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + temperature); + if (status < 0) error("error reading temperature table"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Read in H + He electron abundance */ + strcpy(set_name, "/Metal_free/Electron_density_over_n_h"); + dataset = H5Dopen(file_id, set_name, H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + he_electron_abundance); + if (status < 0) error("error reading electron density table"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Transpose from order tables are stored in (helium fraction, temperature, + * nH) to (redshift, nH, helium fraction, temperature) where fastest + * varying index is on right. */ + for (int i = 0; i < eagle_cooling_N_He_frac; i++) { + for (int j = 0; j < eagle_cooling_N_temperature; j++) { + for (int k = 0; k < eagle_cooling_N_density; k++) { + + /* Index in the HDF5 table */ + const int hdf5_index = row_major_index_3d( + i, j, k, eagle_cooling_N_He_frac, eagle_cooling_N_temperature, + eagle_cooling_N_density); + + /* Index in the internal table */ + const int internal_index = row_major_index_4d( + local_z_index, k, i, j, eagle_cooling_N_loaded_redshifts, + eagle_cooling_N_density, eagle_cooling_N_He_frac, + eagle_cooling_N_temperature); + + /* Change the sign and transpose */ + cooling->table.H_plus_He_heating[internal_index] = + -he_net_cooling_rate[hdf5_index]; + + /* Convert to log T and transpose */ + cooling->table.temperature[internal_index] = + log10(temperature[hdf5_index]); + + /* Just transpose */ + cooling->table.H_plus_He_electron_abundance[internal_index] = + he_electron_abundance[hdf5_index]; + } + } + } + + /* read in electron densities due to metals */ + strcpy(set_name, "/Solar/Electron_density_over_n_h"); + dataset = H5Dopen(file_id, set_name, H5P_DEFAULT); + status = H5Dread(dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + electron_abundance); + if (status < 0) error("error reading solar electron density table"); + status = H5Dclose(dataset); + if (status < 0) error("error closing cooling dataset"); + + /* Transpose from order tables are stored in (temperature, nH) to + * (redshift, nH, temperature) where fastest varying index is on right. */ + for (int i = 0; i < eagle_cooling_N_temperature; i++) { + for (int j = 0; j < eagle_cooling_N_density; j++) { + + /* Index in the HDF5 table */ + const int hdf5_index = row_major_index_2d( + i, j, eagle_cooling_N_temperature, eagle_cooling_N_density); + + /* Index in the internal table */ + const int internal_index = row_major_index_3d( + local_z_index, j, i, eagle_cooling_N_loaded_redshifts, + eagle_cooling_N_density, eagle_cooling_N_temperature); + + /* Just transpose */ + cooling->table.electron_abundance[internal_index] = + electron_abundance[hdf5_index]; + } + } + + status = H5Fclose(file_id); + if (status < 0) error("error closing file"); + } + + free(net_cooling_rate); + free(electron_abundance); + free(temperature); + free(he_net_cooling_rate); + free(he_electron_abundance); + +#ifdef SWIFT_DEBUG_CHECKS + message("done reading in general cooling table"); +#endif + +#else + error("Need HDF5 to read cooling tables"); +#endif +} + +/** + * @brief Constructs the data structure containting the relevant cooling tables + * for the redshift index (set in cooling_update) + * + * @param cooling #cooling_function_data structure + */ +static void eagle_readtable(struct cooling_function_data *restrict cooling) { + + if (cooling->z_index < 0) { + /* z_index is set to < 0 in cooling_update if need + * to read any of the high redshift tables */ + get_redshift_invariant_table(cooling); + } else { + get_cooling_table(cooling); + } +} + +/** + * @brief Checks the tables that are currently loaded in memory and read + * new ones if necessary. + * + * @param cooling The #cooling_function_data we play with. + * @param restart_flag Flag indicating if we are restarting a run + */ +void eagle_check_cooling_tables(struct cooling_function_data *restrict cooling, + const int restart_flag) { + + /* Do we already have the right table in memory? */ + if (cooling->low_z_index == cooling->z_index && restart_flag != 0) return; + + /* Record the table indices */ + cooling->low_z_index = cooling->z_index; + cooling->high_z_index = cooling->z_index + 1; + + /* Load the damn thing */ + eagle_readtable(cooling); +} diff --git a/src/cooling/EAGLE/cooling_tables.h b/src/cooling/EAGLE/cooling_tables.h new file mode 100644 index 0000000000000000000000000000000000000000..5c04f1e2f8ef93508cc388d0a8e796244b1e7a7e --- /dev/null +++ b/src/cooling/EAGLE/cooling_tables.h @@ -0,0 +1,62 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_EAGLE_COOL_TABLES_H +#define SWIFT_EAGLE_COOL_TABLES_H + +/** + * @file src/cooling/EAGLE/cooling.h + * @brief EAGLE cooling function + */ + +/* Config parameters. */ +#include "../config.h" + +#include "cooling_struct.h" + +/*! Number of different bins along the redhsift axis of the tables */ +#define eagle_cooling_N_redshifts 49 + +/*! Number of redshift bins loaded at any given point int time */ +#define eagle_cooling_N_loaded_redshifts 2 + +/*! Number of different bins along the temperature axis of the tables */ +#define eagle_cooling_N_temperature 176 + +/*! Number of different bins along the density axis of the tables */ +#define eagle_cooling_N_density 41 + +/*! Number of different bins along the metal axis of the tables */ +#define eagle_cooling_N_metal 9 + +/*! Number of different bins along the metal axis of the tables */ +#define eagle_cooling_N_He_frac 7 + +/*! Number of different bins along the abundances axis of the tables */ +#define eagle_cooling_N_abundances 11 + +void get_cooling_redshifts(struct cooling_function_data *cooling); + +void read_cooling_header(const char *fname, + struct cooling_function_data *cooling); + +void allocate_cooling_tables(struct cooling_function_data *restrict cooling); + +void eagle_check_cooling_tables(struct cooling_function_data *restrict cooling, + const int restart_flag); +#endif diff --git a/src/cooling/EAGLE/interpolate.h b/src/cooling/EAGLE/interpolate.h new file mode 100644 index 0000000000000000000000000000000000000000..8d49be96c68be367fa4101ef1de9ff00294f19b9 --- /dev/null +++ b/src/cooling/EAGLE/interpolate.h @@ -0,0 +1,491 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_INTERPOL_EAGLE_H +#define SWIFT_INTERPOL_EAGLE_H + +/** + * @file src/cooling/EAGLE/interpolate.h + * @brief Interpolation functions for EAGLE tables + */ + +/* Config parameters. */ +#include "../config.h" + +/* Local includes. */ +#include "align.h" +#include "error.h" +#include "inline.h" + +/** + * @brief Returns the 1d index of element with 2d indices x,y + * from a flattened 2d array in row major order + * + * @param x, y Indices of element of interest + * @param Nx, Ny Sizes of array dimensions + */ +__attribute__((always_inline)) INLINE int row_major_index_2d(const int x, + const int y, + const int Nx, + const int Ny) { +#ifdef SWIFT_DEBUG_CHECKS + assert(x < Nx); + assert(y < Ny); +#endif + return x * Ny + y; +} + +/** + * @brief Returns the 1d index of element with 3d indices x,y,z + * from a flattened 3d array in row major order + * + * @param x, y, z Indices of element of interest + * @param Nx, Ny, Nz Sizes of array dimensions + */ +__attribute__((always_inline)) INLINE int row_major_index_3d( + const int x, const int y, const int z, const int Nx, const int Ny, + const int Nz) { +#ifdef SWIFT_DEBUG_CHECKS + assert(x < Nx); + assert(y < Ny); + assert(z < Nz); +#endif + return x * Ny * Nz + y * Nz + z; +} + +/** + * @brief Returns the 1d index of element with 4d indices x,y,z,w + * from a flattened 4d array in row major order + * + * @param x, y, z, w Indices of element of interest + * @param Nx, Ny, Nz, Nw Sizes of array dimensions + */ +__attribute__((always_inline)) INLINE int row_major_index_4d( + const int x, const int y, const int z, const int w, const int Nx, + const int Ny, const int Nz, const int Nw) { +#ifdef SWIFT_DEBUG_CHECKS + assert(x < Nx); + assert(y < Ny); + assert(z < Nz); + assert(w < Nw); +#endif + return x * Ny * Nz * Nw + y * Nz * Nw + z * Nw + w; +} + +/** + * @brief Finds the index of a value in a table and compute delta to nearest + * element. + * + * This function assumes the table is monotonically increasing with a constant + * difference between adjacent values. + * + * The returned difference is expressed in units of the table separation. This + * means dx = (x - table[i]) / (table[i+1] - table[i]). It is always between + * 0 and 1. + * + * @param table The table to search in. + * @param size The number of elements in the table. + * @param x The value to search for. + * @param i (return) The index in the table of the element. + * @param *dx (return) The difference between x and table[i] + */ +__attribute__((always_inline)) INLINE void get_index_1d( + const float *restrict table, const int size, const float x, int *i, + float *restrict dx) { + + const float delta = (size - 1) / (table[size - 1] - table[0]); + + /* Indicate that the whole array is aligned on boundaries */ + swift_align_information(float, table, SWIFT_STRUCT_ALIGNMENT); + + if (x < table[0]) { + /* We are below the first element */ + *i = 0; + *dx = 0.f; + } else if (x < table[size - 1]) { + /* Normal case */ + *i = (x - table[0]) * delta; + +#ifdef SWIFT_DEBUG_CHECKS + if (*i > size || *i < 0) { + error( + "trying to get index for value outside table range. Table size: %d, " + "calculated index: %d, value: %.5e, table[0]: %.5e, grid size: %.5e", + size, *i, x, table[0], delta); + } +#endif + + *dx = (x - table[*i]) * delta; + } else { + /* We are after the last element */ + *i = size - 2; + *dx = 1.f; + } + +#ifdef SWIFT_DEBUG_CHECKS + if (*dx < -0.001f || *dx > 1.001f) error("Invalid distance found dx=%e", *dx); +#endif +} + +/** + * @brief Interpolate a flattened 2D table at a given position. + * + * This function uses linear interpolation along each axis. It also + * assumes that the table is aligned on SWIFT_STRUCT_ALIGNMENT. + * + * @param table The 2D table to interpolate. + * @param xi, yi Indices of element of interest. + * @param Nx, Ny Sizes of array dimensions. + * @param dx, dy Distance between the point and the index in units of + * the grid spacing. + */ +__attribute__((always_inline)) INLINE float interpolation_2d( + const float *table, const int xi, const int yi, const float dx, + const float dy, const int Nx, const int Ny) { + +#ifdef SWIFT_DEBUG_CHECKS + if (dx < -0.001f || dx > 1.001f) error("Invalid dx=%e", dx); + if (dy < -0.001f || dy > 1.001f) error("Invalid dy=%e", dy); +#endif + + const float tx = 1.f - dx; + const float ty = 1.f - dy; + + /* Indicate that the whole array is aligned on boundaries */ + swift_align_information(float, table, SWIFT_STRUCT_ALIGNMENT); + + /* Linear interpolation along each axis. We read the table 2^2=4 times */ + float result = tx * ty * table[row_major_index_2d(xi + 0, yi + 0, Nx, Ny)]; + + result += tx * dy * table[row_major_index_2d(xi + 0, yi + 1, Nx, Ny)]; + result += dx * ty * table[row_major_index_2d(xi + 1, yi + 0, Nx, Ny)]; + + result += dx * dy * table[row_major_index_2d(xi + 1, yi + 1, Nx, Ny)]; + + return result; +} + +/** + * @brief Interpolate a flattened 3D table at a given position. + * + * This function uses linear interpolation along each axis. It also + * assumes that the table is aligned on SWIFT_STRUCT_ALIGNMENT. + * + * @param table The 3D table to interpolate. + * @param xi, yi, zi Indices of element of interest. + * @param Nx, Ny, Nz Sizes of array dimensions. + * @param dx, dy, dz Distance between the point and the index in units of + * the grid spacing. + */ +__attribute__((always_inline)) INLINE float interpolation_3d( + const float *table, const int xi, const int yi, const int zi, + const float dx, const float dy, const float dz, const int Nx, const int Ny, + const int Nz) { + +#ifdef SWIFT_DEBUG_CHECKS + if (dx < -0.001f || dx > 1.001f) error("Invalid dx=%e", dx); + if (dy < -0.001f || dy > 1.001f) error("Invalid dy=%e", dy); + if (dz < -0.001f || dz > 1.001f) error("Invalid dz=%e", dz); +#endif + + const float tx = 1.f - dx; + const float ty = 1.f - dy; + const float tz = 1.f - dz; + + /* Indicate that the whole array is aligned on page boundaries */ + swift_align_information(float, table, SWIFT_STRUCT_ALIGNMENT); + + /* Linear interpolation along each axis. We read the table 2^3=8 times */ + float result = tx * ty * tz * + table[row_major_index_3d(xi + 0, yi + 0, zi + 0, Nx, Ny, Nz)]; + + result += tx * ty * dz * + table[row_major_index_3d(xi + 0, yi + 0, zi + 1, Nx, Ny, Nz)]; + result += tx * dy * tz * + table[row_major_index_3d(xi + 0, yi + 1, zi + 0, Nx, Ny, Nz)]; + result += dx * ty * tz * + table[row_major_index_3d(xi + 1, yi + 0, zi + 0, Nx, Ny, Nz)]; + + result += tx * dy * dz * + table[row_major_index_3d(xi + 0, yi + 1, zi + 1, Nx, Ny, Nz)]; + result += dx * ty * dz * + table[row_major_index_3d(xi + 1, yi + 0, zi + 1, Nx, Ny, Nz)]; + result += dx * dy * tz * + table[row_major_index_3d(xi + 1, yi + 1, zi + 0, Nx, Ny, Nz)]; + + result += dx * dy * dz * + table[row_major_index_3d(xi + 1, yi + 1, zi + 1, Nx, Ny, Nz)]; + + return result; +} + +/** + * @brief Interpolate a flattened 3D table at a given position but avoid the + * x-dimension. + * + * This function uses linear interpolation along each axis. + * We look at the xi coordoniate but do not interpolate around it. We just + * interpolate the remaining 2 dimensions. + * The function also assumes that the table is aligned on + * SWIFT_STRUCT_ALIGNMENT. + * + * @param table The 3D table to interpolate. + * @param xi, yi, zi Indices of element of interest. + * @param Nx, Ny, Nz Sizes of array dimensions. + * @param dx, dy, dz Distance between the point and the index in units of + * the grid spacing. + */ +__attribute__((always_inline)) INLINE float interpolation_3d_no_x( + const float *table, const int xi, const int yi, const int zi, + const float dx, const float dy, const float dz, const int Nx, const int Ny, + const int Nz) { + +#ifdef SWIFT_DEBUG_CHECKS + if (dx != 0.f) error("Attempting to interpolate along x!"); + if (dy < -0.001f || dy > 1.001f) error("Invalid dy=%e", dy); + if (dz < -0.001f || dz > 1.001f) error("Invalid dz=%e", dz); +#endif + + const float tx = 1.f; + const float ty = 1.f - dy; + const float tz = 1.f - dz; + + /* Indicate that the whole array is aligned on page boundaries */ + swift_align_information(float, table, SWIFT_STRUCT_ALIGNMENT); + + /* Linear interpolation along each axis. We read the table 2^2=4 times */ + /* Note that we intentionally kept the table access along the axis where */ + /* we do not interpolate as comments in the code to allow readers to */ + /* understand what is going on. */ + float result = tx * ty * tz * + table[row_major_index_3d(xi + 0, yi + 0, zi + 0, Nx, Ny, Nz)]; + + result += tx * ty * dz * + table[row_major_index_3d(xi + 0, yi + 0, zi + 1, Nx, Ny, Nz)]; + result += tx * dy * tz * + table[row_major_index_3d(xi + 0, yi + 1, zi + 0, Nx, Ny, Nz)]; + /* result += dx * ty * tz * */ + /* table[row_major_index_3d(xi + 1, yi + 0, zi + 0, Nx, Ny, Nz)]; */ + + result += tx * dy * dz * + table[row_major_index_3d(xi + 0, yi + 1, zi + 1, Nx, Ny, Nz)]; + /* result += dx * ty * dz * */ + /* table[row_major_index_3d(xi + 1, yi + 0, zi + 1, Nx, Ny, Nz)]; */ + /* result += dx * dy * tz * */ + /* table[row_major_index_3d(xi + 1, yi + 1, zi + 0, Nx, Ny, Nz)]; */ + + /* result += dx * dy * dz * */ + /* table[row_major_index_3d(xi + 1, yi + 1, zi + 1, Nx, Ny, Nz)]; */ + + return result; +} + +/** + * @brief Interpolate a flattened 4D table at a given position. + * + * This function uses linear interpolation along each axis. It also + * assumes that the table is aligned on SWIFT_STRUCT_ALIGNMENT. + * + * @param table The 4D table to interpolate. + * @param xi, yi, zi, wi Indices of element of interest. + * @param Nx, Ny, Nz, Nw Sizes of array dimensions. + * @param dx, dy, dz, dw Distance between the point and the index in units of + * the grid spacing. + */ +__attribute__((always_inline)) INLINE float interpolation_4d( + const float *table, const int xi, const int yi, const int zi, const int wi, + const float dx, const float dy, const float dz, const float dw, + const int Nx, const int Ny, const int Nz, const int Nw) { + +#ifdef SWIFT_DEBUG_CHECKS + if (dx < -0.001f || dx > 1.001f) error("Invalid dx=%e", dx); + if (dy < -0.001f || dy > 1.001f) error("Invalid dy=%e", dy); + if (dz < -0.001f || dz > 1.001f) error("Invalid dz=%e", dz); + if (dw < -0.001f || dw > 1.001f) error("Invalid dw=%e", dw); +#endif + + const float tx = 1.f - dx; + const float ty = 1.f - dy; + const float tz = 1.f - dz; + const float tw = 1.f - dw; + + /* Indicate that the whole array is aligned on page boundaries */ + swift_align_information(float, table, SWIFT_STRUCT_ALIGNMENT); + + /* Linear interpolation along each axis. We read the table 2^4=16 times */ + float result = + tx * ty * tz * tw * + table[row_major_index_4d(xi + 0, yi + 0, zi + 0, wi + 0, Nx, Ny, Nz, Nw)]; + + result += + tx * ty * tz * dw * + table[row_major_index_4d(xi + 0, yi + 0, zi + 0, wi + 1, Nx, Ny, Nz, Nw)]; + result += + tx * ty * dz * tw * + table[row_major_index_4d(xi + 0, yi + 0, zi + 1, wi + 0, Nx, Ny, Nz, Nw)]; + result += + tx * dy * tz * tw * + table[row_major_index_4d(xi + 0, yi + 1, zi + 0, wi + 0, Nx, Ny, Nz, Nw)]; + result += + dx * ty * tz * tw * + table[row_major_index_4d(xi + 1, yi + 0, zi + 0, wi + 0, Nx, Ny, Nz, Nw)]; + + result += + tx * ty * dz * dw * + table[row_major_index_4d(xi + 0, yi + 0, zi + 1, wi + 1, Nx, Ny, Nz, Nw)]; + result += + tx * dy * tz * dw * + table[row_major_index_4d(xi + 0, yi + 1, zi + 0, wi + 1, Nx, Ny, Nz, Nw)]; + result += + dx * ty * tz * dw * + table[row_major_index_4d(xi + 1, yi + 0, zi + 0, wi + 1, Nx, Ny, Nz, Nw)]; + result += + tx * dy * dz * tw * + table[row_major_index_4d(xi + 0, yi + 1, zi + 1, wi + 0, Nx, Ny, Nz, Nw)]; + result += + dx * ty * dz * tw * + table[row_major_index_4d(xi + 1, yi + 0, zi + 1, wi + 0, Nx, Ny, Nz, Nw)]; + result += + dx * dy * tz * tw * + table[row_major_index_4d(xi + 1, yi + 1, zi + 0, wi + 0, Nx, Ny, Nz, Nw)]; + + result += + dx * dy * dz * tw * + table[row_major_index_4d(xi + 1, yi + 1, zi + 1, wi + 0, Nx, Ny, Nz, Nw)]; + result += + dx * dy * tz * dw * + table[row_major_index_4d(xi + 1, yi + 1, zi + 0, wi + 1, Nx, Ny, Nz, Nw)]; + result += + dx * ty * dz * dw * + table[row_major_index_4d(xi + 1, yi + 0, zi + 1, wi + 1, Nx, Ny, Nz, Nw)]; + result += + tx * dy * dz * dw * + table[row_major_index_4d(xi + 0, yi + 1, zi + 1, wi + 1, Nx, Ny, Nz, Nw)]; + + result += + dx * dy * dz * dw * + table[row_major_index_4d(xi + 1, yi + 1, zi + 1, wi + 1, Nx, Ny, Nz, Nw)]; + + return result; +} + +/** + * @brief Interpolate a flattened 4D table at a given position but avoid the + * x-dimension. + * + * This function uses linear interpolation along each axis. + * We look at the xi coordoniate but do not interpolate around it. We just + * interpolate the remaining 3 dimensions. + * The function also assumes that the table is aligned on + * SWIFT_STRUCT_ALIGNMENT. + * + * @param table The 4D table to interpolate. + * @param xi, yi, zi, wi Indices of element of interest. + * @param Nx, Ny, Nz, Nw Sizes of array dimensions. + * @param dx, dy, dz, dw Distance between the point and the index in units of + * the grid spacing. + */ +__attribute__((always_inline)) INLINE float interpolation_4d_no_x( + const float *table, const int xi, const int yi, const int zi, const int wi, + const float dx, const float dy, const float dz, const float dw, + const int Nx, const int Ny, const int Nz, const int Nw) { + +#ifdef SWIFT_DEBUG_CHECKS + if (dx != 0.f) error("Attempting to interpolate along x!"); + if (dy < -0.001f || dy > 1.001f) error("Invalid dy=%e", dy); + if (dz < -0.001f || dz > 1.001f) error("Invalid dz=%e", dz); + if (dw < -0.001f || dw > 1.001f) error("Invalid dw=%e", dw); +#endif + + const float tx = 1.f; + const float ty = 1.f - dy; + const float tz = 1.f - dz; + const float tw = 1.f - dw; + + /* Indicate that the whole array is aligned on boundaries */ + swift_align_information(float, table, SWIFT_STRUCT_ALIGNMENT); + + /* Linear interpolation along each axis. We read the table 2^3=8 times */ + /* Note that we intentionally kept the table access along the axis where */ + /* we do not interpolate as comments in the code to allow readers to */ + /* understand what is going on. */ + float result = + tx * ty * tz * tw * + table[row_major_index_4d(xi + 0, yi + 0, zi + 0, wi + 0, Nx, Ny, Nz, Nw)]; + + result += + tx * ty * tz * dw * + table[row_major_index_4d(xi + 0, yi + 0, zi + 0, wi + 1, Nx, Ny, Nz, Nw)]; + result += + tx * ty * dz * tw * + table[row_major_index_4d(xi + 0, yi + 0, zi + 1, wi + 0, Nx, Ny, Nz, Nw)]; + result += + tx * dy * tz * tw * + table[row_major_index_4d(xi + 0, yi + 1, zi + 0, wi + 0, Nx, Ny, Nz, Nw)]; + /* result += */ + /* dx * ty * tz * tw * */ + /* table[row_major_index_4d(xi + 1, yi + 0, zi + 0, wi + 0, Nx, Ny, Nz, + * Nw)]; */ + + result += + tx * ty * dz * dw * + table[row_major_index_4d(xi + 0, yi + 0, zi + 1, wi + 1, Nx, Ny, Nz, Nw)]; + result += + tx * dy * tz * dw * + table[row_major_index_4d(xi + 0, yi + 1, zi + 0, wi + 1, Nx, Ny, Nz, Nw)]; + /* result += */ + /* dx * ty * tz * dw * */ + /* table[row_major_index_4d(xi + 1, yi + 0, zi + 0, wi + 1, Nx, Ny, Nz, + * Nw)]; */ + result += + tx * dy * dz * tw * + table[row_major_index_4d(xi + 0, yi + 1, zi + 1, wi + 0, Nx, Ny, Nz, Nw)]; + /* result += */ + /* dx * ty * dz * tw * */ + /* table[row_major_index_4d(xi + 1, yi + 0, zi + 1, wi + 0, Nx, Ny, Nz, + * Nw)]; */ + /* result += */ + /* dx * dy * tz * tw * */ + /* table[row_major_index_4d(xi + 1, yi + 1, zi + 0, wi + 0, Nx, Ny, Nz, */ + /* Nw)]; */ + + /* result += */ + /* dx * dy * dz * tw * */ + /* table[row_major_index_4d(xi + 1, yi + 1, zi + 1, wi + 0, Nx, Ny, Nz, */ + /* Nw)]; */ + /* result += */ + /* dx * dy * tz * dw * */ + /* table[row_major_index_4d(xi + 1, yi + 1, zi + 0, wi + 1, Nx, Ny, Nz, */ + /* Nw)]; */ + /* result += */ + /* dx * ty * dz * dw * */ + /* table[row_major_index_4d(xi + 1, yi + 0, zi + 1, wi + 1, Nx, Ny, Nz, + * Nw)]; */ + result += + tx * dy * dz * dw * + table[row_major_index_4d(xi + 0, yi + 1, zi + 1, wi + 1, Nx, Ny, Nz, Nw)]; + + /* result += */ + /* dx * dy * dz * dw * */ + /* table[row_major_index_4d(xi + 1, yi + 1, zi + 1, wi + 1, Nx, Ny, Nz, */ + /* Nw)]; */ + + return result; +} + +#endif diff --git a/src/cooling/const_du/cooling.h b/src/cooling/const_du/cooling.h index b6fea7eea7b0fb208c4bffece425ec836d5df0c0..dac92f09837cbb40cc49f1e8dc5d4c627ce7023a 100644 --- a/src/cooling/const_du/cooling.h +++ b/src/cooling/const_du/cooling.h @@ -25,9 +25,9 @@ * @file src/cooling/const_du/cooling.h * @brief Routines related to the "constant cooling" cooling function. * - * This is the simplest possible cooling function. A constant cooling rate with - * a minimal energy floor is applied. Should be used as a template for more - * realistic functions. + * This is the simplest possible cooling function. A constant cooling rate + * (du/dt) with a minimal energy floor is applied. Should be used as a template + * for more realistic functions. */ /* Config parameters. */ @@ -45,6 +45,20 @@ #include "physical_constants.h" #include "units.h" +/** + * @brief Common operations performed on the cooling function at a + * given time-step or redshift. + * + * @param cosmo The current cosmological model. + * @param cooling The #cooling_function_data used in the run. + * @param restart_flag Are we calling this directly after a restart? + */ +INLINE static void cooling_update(const struct cosmology* cosmo, + struct cooling_function_data* cooling, + const int restart_flag) { + // Add content if required. +} + /** * @brief Apply the cooling function to a particle. * @@ -54,26 +68,30 @@ * @param phys_const The physical constants in internal units. * @param us The internal system of units. * @param cosmo The current cosmological model. + * @param hydro_props The properties of the hydro scheme. * @param cooling The #cooling_function_data used in the run. * @param p Pointer to the particle data. * @param xp Pointer to the extended particle data. * @param dt The time-step of this particle. + * @param dt_therm The time-step operator used for thermal quantities. */ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct phys_const* restrict phys_const, const struct unit_system* restrict us, const struct cosmology* restrict cosmo, + const struct hydro_props* hydro_props, const struct cooling_function_data* restrict cooling, - struct part* restrict p, struct xpart* restrict xp, float dt) { + struct part* restrict p, struct xpart* restrict xp, const float dt, + const float dt_therm) { /* Internal energy floor */ const float u_floor = cooling->min_energy; /* Get current internal energy */ - const float u_old = hydro_get_physical_internal_energy(p, cosmo); + const float u_old = hydro_get_physical_internal_energy(p, xp, cosmo); /* Current du_dt */ - const float hydro_du_dt = hydro_get_internal_energy_dt(p); + const float hydro_du_dt = hydro_get_physical_internal_energy_dt(p, cosmo); /* Get cooling function properties */ float cooling_du_dt = -cooling->cooling_rate; @@ -86,7 +104,7 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( } /* Update the internal energy time derivative */ - hydro_set_internal_energy_dt(p, hydro_du_dt + cooling_du_dt); + hydro_set_physical_internal_energy_dt(p, cosmo, hydro_du_dt + cooling_du_dt); /* Store the radiated energy */ xp->cooling_data.radiated_energy += -hydro_get_mass(p) * cooling_du_dt * dt; @@ -103,16 +121,21 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( * @param phys_const The physical constants in internal units. * @param cosmo The current cosmological model. * @param us The internal system of units. + * @param hydro_props The properties of the hydro scheme. * @param p Pointer to the particle data. + * @param xp Pointer to the extedended particle data. */ __attribute__((always_inline)) INLINE static float cooling_timestep( const struct cooling_function_data* restrict cooling, const struct phys_const* restrict phys_const, const struct cosmology* restrict cosmo, - const struct unit_system* restrict us, const struct part* restrict p) { + const struct unit_system* restrict us, + const struct hydro_props* hydro_props, const struct part* restrict p, + const struct xpart* xp) { const float cooling_rate = cooling->cooling_rate; - const float internal_energy = hydro_get_physical_internal_energy(p, cosmo); + const float internal_energy = + hydro_get_physical_internal_energy(p, xp, cosmo); return cooling->cooling_tstep_mult * internal_energy / fabsf(cooling_rate); } @@ -126,7 +149,10 @@ __attribute__((always_inline)) INLINE static float cooling_timestep( * * @param p Pointer to the particle data. * @param xp Pointer to the extended particle data. + * @param phys_const The physical constants in internal units. * @param cooling The properties of the cooling function. + * @param us The internal system of units. + * @param cosmo The current cosmological model. */ __attribute__((always_inline)) INLINE static void cooling_first_init_part( const struct phys_const* restrict phys_const, @@ -176,6 +202,18 @@ static INLINE void cooling_init_backend(struct swift_params* parameter_file, parameter_file, "ConstCooling:cooling_tstep_mult"); } +/** + * @brief Restore cooling tables (if applicable) after + * restart + * + * Nothing to do here + * + * @param cooling the cooling_function_data structure + * @param cosmo cosmology structure + */ +static INLINE void cooling_restore_tables(struct cooling_function_data* cooling, + const struct cosmology* cosmo) {} + /** * @brief Prints the properties of the cooling model to stdout. * @@ -188,4 +226,11 @@ static INLINE void cooling_print_backend( cooling->cooling_rate, cooling->min_energy); } +/** + * @brief Clean-up the memory allocated for the cooling routines + * + * @param cooling the cooling data structure. + */ +static INLINE void cooling_clean(struct cooling_function_data* cooling) {} + #endif /* SWIFT_COOLING_CONST_DU_H */ diff --git a/src/cooling/const_du/cooling_io.h b/src/cooling/const_du/cooling_io.h index 52a943aca86e51665fd1841d7bcb8a100b046ed8..f4a327f14ec071bc62c4cf57bb118df71bab2b3e 100644 --- a/src/cooling/const_du/cooling_io.h +++ b/src/cooling/const_du/cooling_io.h @@ -21,6 +21,15 @@ #ifndef SWIFT_COOLING_CONST_DU_IO_H #define SWIFT_COOLING_CONST_DU_IO_H +/** + * @file src/cooling/const_du/cooling_io.h + * @brief i/o routines related to the "constant cooling" cooling function. + * + * This is the simplest possible cooling function. A constant cooling rate + * (du/dt) with a minimal energy floor is applied. Should be used as a template + * for more realistic functions. + */ + /* Config parameters. */ #include "../config.h" @@ -31,19 +40,20 @@ /** * @brief Writes the current model of SPH to the file - * @param h_grpsph The HDF5 group in which to write + * @param h_grp The HDF5 group in which to write + * @param cooling the parameters of the cooling function. */ __attribute__((always_inline)) INLINE static void cooling_write_flavour( - hid_t h_grpsph) { + hid_t h_grp, const struct cooling_function_data* cooling) { - io_write_attribute_s(h_grpsph, "Cooling Model", "Constant du/dt"); + io_write_attribute_s(h_grp, "Cooling Model", "Constant du/dt"); } #endif /** * @brief Specifies which particle fields to write to a dataset * - * @param parts The particle array. + * @param xparts The exended particle data array. * @param list The list of i/o properties to write. * @param cooling The #cooling_function_data * diff --git a/src/cooling/const_du/cooling_struct.h b/src/cooling/const_du/cooling_struct.h index cc00b001cf6b576266de02dac885f87d089bd8e4..94db6b6542cacda6dbdc43c6db6b9c2cac7961d6 100644 --- a/src/cooling/const_du/cooling_struct.h +++ b/src/cooling/const_du/cooling_struct.h @@ -23,11 +23,11 @@ /** * @file src/cooling/const_du/cooling_struct.h - * @brief Structure related to the "constant cooling" cooling function. + * @brief Structures related to the "constant cooling" cooling function. * - * This is the simplest possible cooling function. A constant cooling rate with - * a minimal energy floor is applied. Should be used as a template for more - * realistic functions. + * This is the simplest possible cooling function. A constant cooling rate + * (du/dt) with a minimal energy floor is applied. Should be used as a template + * for more realistic functions. */ /** diff --git a/src/cooling/const_lambda/cooling.h b/src/cooling/const_lambda/cooling.h index f1a7abdbe14a39d98bbd01eb36ba870c8af0ee1a..3c336060bdadae9b0cd0034bc0ccb1e9e9266aff 100644 --- a/src/cooling/const_lambda/cooling.h +++ b/src/cooling/const_lambda/cooling.h @@ -1,8 +1,6 @@ /******************************************************************************* * This file is part of SWIFT. - * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk) - * Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * Richard Bower (r.g.bower@durham.ac.uk) + * Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) * Stefan Arridge (stefan.arridge@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify @@ -19,10 +17,17 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ - #ifndef SWIFT_COOLING_CONST_LAMBDA_H #define SWIFT_COOLING_CONST_LAMBDA_H +/** + * @file src/cooling/const_lambda/cooling.h + * @brief Routines related to the "constant lambda" cooling function. + * + * This model assumes a constant cooling rate Lambda irrespective of redshift + * or density. + */ + /* Config parameters. */ #include "../config.h" @@ -40,30 +45,53 @@ #include "units.h" /** - * @brief Calculates du/dt in code units for a particle. + * @brief Common operations performed on the cooling function at a + * given time-step or redshift. * - * @param phys_const The physical constants in internal units. - * @param us The internal system of units. * @param cosmo The current cosmological model. * @param cooling The #cooling_function_data used in the run. - * @param p Pointer to the particle data.. + * @param restart_flag Are we calling this directly after a restart? */ -__attribute__((always_inline)) INLINE static float cooling_rate( - const struct phys_const* const phys_const, const struct unit_system* us, - const struct cosmology* restrict cosmo, +INLINE static void cooling_update(const struct cosmology* cosmo, + struct cooling_function_data* cooling, + const int restart_flag) { + // Add content if required. +} + +/** + * @brief Calculates du/dt in CGS units for a particle. + * + * The cooling rate is \f$\frac{du}{dt} = -\frac{\Lambda}{n_H^2} + * \frac{n_H^2}{\rho} \f$, where \f$ \frac{\Lambda}{n_H^2} \f$ is a constant in + * this model (lambda_nH2_cgs in #cooling_function_data). + * The returned value is in physical [erg * g^-1 * s^-1]. + * + * @param cosmo The current cosmological model. + * @param hydro_props The properties of the hydro scheme. + * @param cooling The #cooling_function_data used in the run. + * @param p Pointer to the particle data. + * @return The change in energy per unit mass due to cooling for this particle + * in cgs units [erg * g^-1 * s^-1]. + */ +__attribute__((always_inline)) INLINE static double cooling_rate_cgs( + const struct cosmology* cosmo, const struct hydro_props* hydro_props, const struct cooling_function_data* cooling, const struct part* p) { - /* Get particle density */ - const float rho = hydro_get_physical_density(p, cosmo); + /* Get particle density [g * cm^-3] */ + const double rho = hydro_get_physical_density(p, cosmo); + const double rho_cgs = rho * cooling->conv_factor_density_to_cgs; + + /* Get Hydrogen mass fraction */ + const double X_H = hydro_props->hydrogen_mass_fraction; + + /* Hydrogen number density (X_H * rho / m_p) [cm^-3] */ + const double n_H_cgs = X_H * rho_cgs * cooling->proton_mass_cgs_inv; - /* Get cooling function properties */ - const float X_H = cooling->hydrogen_mass_abundance; + /* Calculate du_dt ((Lambda / n_H^2) * n_H^2 / rho) */ + const double du_dt_cgs = + -cooling->lambda_nH2_cgs * n_H_cgs * n_H_cgs / rho_cgs; - /* Calculate du_dt */ - const float du_dt = -cooling->lambda * - (X_H * rho / phys_const->const_proton_mass) * - (X_H * rho / phys_const->const_proton_mass) / rho; - return du_dt; + return du_dt_cgs; } /** @@ -72,75 +100,125 @@ __attribute__((always_inline)) INLINE static float cooling_rate( * @param phys_const The physical constants in internal units. * @param us The internal system of units. * @param cosmo The current cosmological model. + * @param hydro_props The properties of the hydro scheme. * @param cooling The #cooling_function_data used in the run. * @param p Pointer to the particle data. + * @param xp Pointer to the particle' extended data. * @param dt The time-step of this particle. + * @param dt_therm The time-step operator used for thermal quantities. */ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct phys_const* restrict phys_const, const struct unit_system* restrict us, const struct cosmology* restrict cosmo, + const struct hydro_props* hydro_props, const struct cooling_function_data* restrict cooling, - struct part* restrict p, struct xpart* restrict xp, float dt) { + struct part* restrict p, struct xpart* restrict xp, const float dt, + const float dt_therm) { + + /* Nothing to do here? */ + if (dt == 0.) return; /* Internal energy floor */ - const float u_floor = cooling->min_energy; + const float u_floor = hydro_props->minimal_internal_energy; /* Current energy */ - const float u_old = hydro_get_physical_internal_energy(p, cosmo); + const float u_old = hydro_get_physical_internal_energy(p, xp, cosmo); + + /* Current du_dt in physical coordinates (internal units) */ + const float hydro_du_dt = hydro_get_physical_internal_energy_dt(p, cosmo); + + /* Calculate cooling du_dt (in cgs units) */ + const double cooling_du_dt_cgs = + cooling_rate_cgs(cosmo, hydro_props, cooling, p); + + /* Convert to internal units */ + float cooling_du_dt = + cooling_du_dt_cgs * cooling->conv_factor_energy_rate_from_cgs; + + /* Add cosmological term */ + cooling_du_dt *= cosmo->a * cosmo->a; + + float total_du_dt = hydro_du_dt + cooling_du_dt; - /* Current du_dt */ - const float hydro_du_dt = hydro_get_internal_energy_dt(p); + /* We now need to check that we are not going to go below any of the limits */ - /* Calculate cooling du_dt */ - float cooling_du_dt = cooling_rate(phys_const, us, cosmo, cooling, p); + /* First, check whether we may end up below the minimal energy after + * this step 1/2 kick + another 1/2 kick that could potentially be for + * a time-step twice as big. We hence check for 1.5 delta_t. */ + if (u_old + total_du_dt * 1.5 * dt_therm < u_floor) { + total_du_dt = (u_floor - u_old) / (1.5f * dt_therm); + } - /* Integrate cooling equation to enforce energy floor */ - /* Factor of 1.5 included since timestep could potentially double */ - if (u_old + (hydro_du_dt + cooling_du_dt) * 1.5f * dt < u_floor) { - cooling_du_dt = -(u_old + 1.5f * dt * hydro_du_dt - u_floor) / (1.5f * dt); + /* Second, check whether the energy used in the prediction could get negative. + * We need to check for the 1/2 dt kick followed by a full time-step drift + * that could potentially be for a time-step twice as big. We hence check + * for 2.5 delta_t but this time against 0 energy not the minimum */ + if (u_old + total_du_dt * 2.5 * dt_therm < 0.) { + total_du_dt = -u_old / ((2.5f + 0.0001f) * dt_therm); } /* Update the internal energy time derivative */ - hydro_set_internal_energy_dt(p, hydro_du_dt + cooling_du_dt); + hydro_set_physical_internal_energy_dt(p, cosmo, total_du_dt); - /* Store the radiated energy */ - xp->cooling_data.radiated_energy += -hydro_get_mass(p) * cooling_du_dt * dt; + /* Store the radiated energy (assuming dt will not change) */ + xp->cooling_data.radiated_energy += + -hydro_get_mass(p) * (total_du_dt - hydro_du_dt) * dt_therm; } /** - * @brief Computes the time-step due to cooling + * @brief Computes the time-step due to cooling for this particle. + * + * We compute a time-step \f$ \alpha \frac{u}{du/dt} \f$ in physical + * coordinates. \f$\alpha\f$ is a parameter of the cooling function. * * @param cooling The #cooling_function_data used in the run. * @param phys_const The physical constants in internal units. * @param cosmo The current cosmological model. + * @param hydro_props The properties of the hydro scheme. * @param us The internal system of units. * @param p Pointer to the particle data. + * @param xp Pointer to the extended data of the particle. */ __attribute__((always_inline)) INLINE static float cooling_timestep( const struct cooling_function_data* restrict cooling, const struct phys_const* restrict phys_const, const struct cosmology* restrict cosmo, - const struct unit_system* restrict us, const struct part* restrict p) { + const struct unit_system* restrict us, + const struct hydro_props* hydro_props, const struct part* restrict p, + const struct xpart* restrict xp) { - /* Get current internal energy */ - const float u = hydro_get_physical_internal_energy(p, cosmo); - const float du_dt = cooling_rate(phys_const, us, cosmo, cooling, p); + /* Start with the case where there is no limit */ + if (cooling->cooling_tstep_mult == FLT_MAX) return FLT_MAX; - /* If we are close to (or below) the energy floor, we ignore the condition */ - if (u < 1.01f * cooling->min_energy) + /* Get current internal energy and cooling rate */ + const float u = hydro_get_physical_internal_energy(p, xp, cosmo); + const double cooling_du_dt_cgs = + cooling_rate_cgs(cosmo, hydro_props, cooling, p); + + /* Convert to internal units */ + const float cooling_du_dt = + cooling_du_dt_cgs * cooling->conv_factor_energy_rate_from_cgs; + + /* If we are close to (or below) the limit, we ignore the condition */ + if (u < 1.01f * hydro_props->minimal_internal_energy || cooling_du_dt == 0.f) return FLT_MAX; else - return cooling->cooling_tstep_mult * u / fabsf(du_dt); + return cooling->cooling_tstep_mult * u / fabsf(cooling_du_dt); } /** * @brief Sets the cooling properties of the (x-)particles to a valid start * state. * + * Nothing to do here. Just set the radiated energy counter to 0. + * + * @param phys_const The physical constants in internal units. + * @param cooling The properties of the cooling function. + * @param us The internal system of units. + * @param cosmo The current cosmological model. * @param p Pointer to the particle data. * @param xp Pointer to the extended particle data. - * @param cooling The properties of the cooling function. */ __attribute__((always_inline)) INLINE static void cooling_first_init_part( const struct phys_const* restrict phys_const, @@ -176,32 +254,37 @@ static INLINE void cooling_init_backend(struct swift_params* parameter_file, const struct phys_const* phys_const, struct cooling_function_data* cooling) { - const double lambda_cgs = - parser_get_param_double(parameter_file, "LambdaCooling:lambda_cgs"); - const float min_temperature = parser_get_param_double( - parameter_file, "LambdaCooling:minimum_temperature"); - cooling->hydrogen_mass_abundance = parser_get_param_double( - parameter_file, "LambdaCooling:hydrogen_mass_abundance"); - cooling->mean_molecular_weight = parser_get_param_double( - parameter_file, "LambdaCooling:mean_molecular_weight"); - cooling->cooling_tstep_mult = parser_get_param_double( - parameter_file, "LambdaCooling:cooling_tstep_mult"); - - /* convert minimum temperature into minimum internal energy */ - const float u_floor = - phys_const->const_boltzmann_k * min_temperature / - (hydro_gamma_minus_one * cooling->mean_molecular_weight * - phys_const->const_proton_mass); - - cooling->min_energy = u_floor; - - /* convert lambda to code units */ - cooling->lambda = lambda_cgs * - units_cgs_conversion_factor(us, UNIT_CONV_TIME) / - (units_cgs_conversion_factor(us, UNIT_CONV_ENERGY) * - units_cgs_conversion_factor(us, UNIT_CONV_VOLUME)); + /* Read in the cooling parameters */ + cooling->lambda_nH2_cgs = + parser_get_param_double(parameter_file, "LambdaCooling:lambda_nH2_cgs"); + cooling->cooling_tstep_mult = parser_get_opt_param_float( + parameter_file, "LambdaCooling:cooling_tstep_mult", FLT_MAX); + + /* Some useful conversion values */ + cooling->conv_factor_density_to_cgs = + units_cgs_conversion_factor(us, UNIT_CONV_DENSITY); + cooling->conv_factor_energy_rate_from_cgs = + units_cgs_conversion_factor(us, UNIT_CONV_TIME) / + units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS); + + /* Useful constants */ + cooling->proton_mass_cgs_inv = + 1. / (phys_const->const_proton_mass * + units_cgs_conversion_factor(us, UNIT_CONV_MASS)); } +/** + * @brief Restore cooling tables (if applicable) after + * restart + * + * Nothing to do here + * + * @param cooling the cooling_function_data structure + * @param cosmo cosmology structure + */ +static INLINE void cooling_restore_tables(struct cooling_function_data* cooling, + const struct cosmology* cosmo) {} + /** * @brief Prints the properties of the cooling model to stdout. * @@ -211,11 +294,23 @@ static INLINE void cooling_print_backend( const struct cooling_function_data* cooling) { message( - "Cooling function is 'Constant lambda' with " - "(lambda,min_energy,hydrogen_mass_abundance,mean_molecular_weight) " - "= (%g,%g,%g,%g)", - cooling->lambda, cooling->min_energy, cooling->hydrogen_mass_abundance, - cooling->mean_molecular_weight); + "Cooling function is 'Constant lambda' with Lambda/n_H^2=%g [erg * s^-1 " + "* " + "cm^3]", + cooling->lambda_nH2_cgs); + + if (cooling->cooling_tstep_mult == FLT_MAX) + message("Cooling function time-step size is unlimited"); + else + message("Cooling function time-step size limited to %f of u/(du/dt)", + cooling->cooling_tstep_mult); } +/** + * @brief Clean-up the memory allocated for the cooling routines + * + * @param cooling the cooling data structure. + */ +static INLINE void cooling_clean(struct cooling_function_data* cooling) {} + #endif /* SWIFT_COOLING_CONST_LAMBDA_H */ diff --git a/src/cooling/const_lambda/cooling_io.h b/src/cooling/const_lambda/cooling_io.h index 89c9471a291a4a6a5740a8c6c816913cbc6316a0..0dca5011ebe5bc6c2a4866387e9cf1ac0ba3447a 100644 --- a/src/cooling/const_lambda/cooling_io.h +++ b/src/cooling/const_lambda/cooling_io.h @@ -1,8 +1,6 @@ /******************************************************************************* * This file is part of SWIFT. - * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk) - * Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * Richard Bower (r.g.bower@durham.ac.uk) + * Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) * Stefan Arridge (stefan.arridge@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify @@ -22,6 +20,14 @@ #ifndef SWIFT_COOLING_CONST_LAMBDA_IO_H #define SWIFT_COOLING_CONST_LAMBDA_IO_H +/** + * @file src/cooling/const_lambda/cooling_io.h + * @brief i/o routines related to the "constant lambda" cooling function. + * + * This model assumes a constant cooling rate Lambda irrespective of redshift + * or density. + */ + /* Config parameters. */ #include "../config.h" @@ -31,20 +37,24 @@ #ifdef HAVE_HDF5 /** - * @brief Writes the current model of SPH to the file - * @param h_grpsph The HDF5 group in which to write + * @brief Writes the current model of cooling to the file + * @param h_grp The HDF5 group in which to write + * @param cooling the parameters of the cooling function. */ __attribute__((always_inline)) INLINE static void cooling_write_flavour( - hid_t h_grpsph) { + hid_t h_grp, const struct cooling_function_data* cooling) { - io_write_attribute_s(h_grpsph, "Cooling Model", "Constant Lambda"); + io_write_attribute_s(h_grp, "Cooling Model", "Constant Lambda"); + io_write_attribute_d(h_grp, "Lambda/n_H^2 [cgs]", cooling->lambda_nH2_cgs); } #endif /** * @brief Specifies which particle fields to write to a dataset * - * @param parts The particle array. + * Nothing to write for this scheme. + * + * @param xparts The extended particle array. * @param list The list of i/o properties to write. * @param cooling The #cooling_function_data * @@ -53,6 +63,7 @@ __attribute__((always_inline)) INLINE static void cooling_write_flavour( __attribute__((always_inline)) INLINE static int cooling_write_particles( const struct xpart* xparts, struct io_props* list, const struct cooling_function_data* cooling) { + return 0; } diff --git a/src/cooling/const_lambda/cooling_struct.h b/src/cooling/const_lambda/cooling_struct.h index 30d4e5e4af9c7bd139337709897d8111f88d2aa8..cc671a857887af90bda630e757af1b044b479e49 100644 --- a/src/cooling/const_lambda/cooling_struct.h +++ b/src/cooling/const_lambda/cooling_struct.h @@ -1,8 +1,6 @@ /******************************************************************************* * This file is part of SWIFT. - * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk) - * Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * Richard Bower (r.g.bower@durham.ac.uk) + * Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) * Stefan Arridge (stefan.arridge@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify @@ -19,26 +17,34 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ - #ifndef SWIFT_COOLING_STRUCT_CONST_LAMBDA_H #define SWIFT_COOLING_STRUCT_CONST_LAMBDA_H +/** + * @file src/cooling/const_lambda/cooling_struct.h + * @brief Structures related to the "constant lambda" cooling function. + * + * This model assumes a constant cooling rate Lambda irrespective of redshift + * or density. + */ + /** * @brief Properties of the cooling function. */ struct cooling_function_data { - /*! Cooling rate in internal units */ - double lambda; + /*! Cooling rate / nH^2 in physical cgs units [erg * s^-1 * cm^3] */ + double lambda_nH2_cgs; - /*! Fraction of gas mass that is Hydrogen. Used to calculate n_H */ - float hydrogen_mass_abundance; + /*! Conversion factor from internal units to cgs for density */ + double conv_factor_density_to_cgs; - /*! 'mu', used to convert min_temperature to min_internal energy */ - float mean_molecular_weight; + /*! Conversion factor from internal units from cgs for internal energy + * derivative */ + double conv_factor_energy_rate_from_cgs; - /*! Minimally allowed internal energy of all the particles */ - float min_energy; + /*! Inverse of the proton mass in cgs units [g^-1] */ + double proton_mass_cgs_inv; /*! Constant multiplication factor for time-step criterion */ float cooling_tstep_mult; diff --git a/src/cooling/grackle/cooling.h b/src/cooling/grackle/cooling.h index cb77b63294aacee425b917c1900eefd7ebfa5f34..03f3f1f14dc008b6ee9feedb09893470a5ccde6f 100644 --- a/src/cooling/grackle/cooling.h +++ b/src/cooling/grackle/cooling.h @@ -36,6 +36,7 @@ /* Local includes. */ #include "chemistry.h" +#include "cooling_io.h" #include "error.h" #include "hydro.h" #include "parser.h" @@ -47,6 +48,20 @@ #define GRACKLE_NPART 1 #define GRACKLE_RANK 3 +/** + * @brief Common operations performed on the cooling function at a + * given time-step or redshift. + * + * @param cosmo The current cosmological model. + * @param cooling The #cooling_function_data used in the run. + * @param restart_flag Are we calling this directly after a restart? + */ +INLINE static void cooling_update(const struct cosmology* cosmo, + struct cooling_function_data* cooling, + const int restart_flag) { + // Add content if required. +} + /* prototypes */ static gr_float cooling_time( const struct phys_const* restrict phys_const, @@ -491,6 +506,12 @@ __attribute__((always_inline)) INLINE static gr_float cooling_rate( const struct cooling_function_data* restrict cooling, const struct part* restrict p, struct xpart* restrict xp, double dt) { + if (cosmo->Omega_m != 0. || cosmo->Omega_r != 0. || cosmo->Omega_k != 0. || + cosmo->Omega_lambda != 0. || cosmo->Omega_b != 0.) + error( + "Check cosmology factors (physical vs. co-moving and drifted vs. " + "un-drifted)!"); + /* set current time */ code_units units = cooling->units; if (cooling->redshift == -1) @@ -515,7 +536,8 @@ __attribute__((always_inline)) INLINE static gr_float cooling_rate( /* general particle data */ gr_float density = hydro_get_physical_density(p, cosmo); - const double energy_before = hydro_get_physical_internal_energy(p, cosmo); + const double energy_before = + hydro_get_drifted_physical_internal_energy(p, cosmo); gr_float energy = energy_before; /* initialize density */ @@ -534,22 +556,11 @@ __attribute__((always_inline)) INLINE static gr_float cooling_rate( /* solve chemistry */ chemistry_data chemistry_grackle = cooling->chemistry; - chemistry_data_storage my_rates = grackle_rates; - int error_code = _solve_chemistry( - &chemistry_grackle, &my_rates, &units, dt, data.grid_dx, data.grid_rank, - data.grid_dimension, data.grid_start, data.grid_end, data.density, - data.internal_energy, data.x_velocity, data.y_velocity, data.z_velocity, - data.HI_density, data.HII_density, data.HM_density, data.HeI_density, - data.HeII_density, data.HeIII_density, data.H2I_density, - data.H2II_density, data.DI_density, data.DII_density, data.HDI_density, - data.e_density, data.metal_density, data.volumetric_heating_rate, - data.specific_heating_rate, data.RT_heating_rate, - data.RT_HI_ionization_rate, data.RT_HeI_ionization_rate, - data.RT_HeII_ionization_rate, data.RT_H2_dissociation_rate, NULL); - if (error_code == 0) error("Error in solve_chemistry."); - // if (solve_chemistry(&units, &data, dt) == 0) { - // error("Error in solve_chemistry."); - //} + chemistry_data_storage chemistry_rates = grackle_rates; + if (local_solve_chemistry(&chemistry_grackle, &chemistry_rates, &units, &data, + dt) == 0) { + error("Error in solve_chemistry."); + } /* copy from grackle data to particle */ cooling_copy_from_grackle(data, p, xp, density); @@ -596,7 +607,8 @@ __attribute__((always_inline)) INLINE static gr_float cooling_time( data.grid_end = grid_end; /* general particle data */ - const gr_float energy_before = hydro_get_physical_internal_energy(p, cosmo); + const gr_float energy_before = + hydro_get_drifted_physical_internal_energy(p, cosmo); gr_float density = hydro_get_physical_density(p, cosmo); gr_float energy = energy_before; @@ -616,7 +628,10 @@ __attribute__((always_inline)) INLINE static gr_float cooling_time( /* Compute cooling time */ gr_float cooling_time; - if (calculate_cooling_time(&units, &data, &cooling_time) == 0) { + chemistry_data chemistry_grackle = cooling->chemistry; + chemistry_data_storage chemistry_rates = grackle_rates; + if (local_calculate_cooling_time(&chemistry_grackle, &chemistry_rates, &units, + &data, &cooling_time) == 0) { error("Error in calculate_cooling_time."); } @@ -636,18 +651,29 @@ __attribute__((always_inline)) INLINE static gr_float cooling_time( * @param cooling The #cooling_function_data used in the run. * @param p Pointer to the particle data. * @param dt The time-step of this particle. + * @param hydro_properties the hydro_props struct, used for + * getting the minimal internal energy allowed in by SWIFT. + * Read from yml file into engine struct. */ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct phys_const* restrict phys_const, const struct unit_system* restrict us, const struct cosmology* restrict cosmo, + const struct hydro_props* hydro_props, const struct cooling_function_data* restrict cooling, - struct part* restrict p, struct xpart* restrict xp, double dt) { + struct part* restrict p, struct xpart* restrict xp, double dt, + double dt_therm) { + + if (cosmo->Omega_m != 0. || cosmo->Omega_r != 0. || cosmo->Omega_k != 0. || + cosmo->Omega_lambda != 0. || cosmo->Omega_b != 0.) + error( + "Check cosmology factors (physical vs. co-moving and drifted vs. " + "un-drifted)!"); if (dt == 0.) return; /* Current du_dt */ - const float hydro_du_dt = hydro_get_internal_energy_dt(p); + const float hydro_du_dt = hydro_get_physical_internal_energy_dt(p, cosmo); /* compute cooling rate */ const float du_dt = cooling_rate(phys_const, us, cosmo, cooling, p, xp, dt); @@ -656,7 +682,7 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( xp->cooling_data.radiated_energy += -du_dt * dt * hydro_get_mass(p); /* Update the internal energy */ - hydro_set_internal_energy_dt(p, hydro_du_dt + du_dt); + hydro_set_physical_internal_energy_dt(p, cosmo, hydro_du_dt + du_dt); } /** @@ -674,7 +700,9 @@ __attribute__((always_inline)) INLINE static float cooling_timestep( const struct cooling_function_data* restrict cooling, const struct phys_const* restrict phys_const, const struct cosmology* restrict cosmo, - const struct unit_system* restrict us, const struct part* restrict p) { + const struct unit_system* restrict us, + const struct hydro_props* hydro_props, const struct part* restrict p, + const struct xpart* restrict xp) { return FLT_MAX; } @@ -787,4 +815,23 @@ __attribute__((always_inline)) INLINE static void cooling_init_backend( cooling_init_grackle(cooling); } +/** + * @brief Restore cooling tables (if applicable) after + * restart + * + * @param cooling the cooling_function_data structure + * @param cosmo cosmology structure + */ +static INLINE void cooling_restore_tables(struct cooling_function_data* cooling, + const struct cosmology* cosmo) {} +/** + * @brief Clean-up the memory allocated for the cooling routines + * + * @param cooling the cooling data structure. + */ +static INLINE void cooling_clean(struct cooling_function_data* cooling) { + + // MATTHIEU: To do: free stuff here +} + #endif /* SWIFT_COOLING_GRACKLE_H */ diff --git a/src/cooling/grackle/cooling_io.h b/src/cooling/grackle/cooling_io.h index faf84cf97d8449d54f2727ec26b16a9d81d117c6..684ab347e19c8ea5f1897a54f21951256ef5f50b 100644 --- a/src/cooling/grackle/cooling_io.h +++ b/src/cooling/grackle/cooling_io.h @@ -29,20 +29,20 @@ #ifdef HAVE_HDF5 /** - * @brief Writes the current model of SPH to the file - * @param h_grpsph The HDF5 group in which to write + * @brief Writes the current model of cooling to the file + * @param h_grp The HDF5 group in which to write */ __attribute__((always_inline)) INLINE static void cooling_write_flavour( - hid_t h_grpsph) { + hid_t h_grp, const struct cooling_function_data* cooling) { #if COOLING_GRACKLE_MODE == 0 - io_write_attribute_s(h_grpsph, "Cooling Model", "Grackle"); + io_write_attribute_s(h_grp, "Cooling Model", "Grackle"); #elif COOLING_GRACKLE_MODE == 1 - io_write_attribute_s(h_grpsph, "Cooling Model", "Grackle1"); + io_write_attribute_s(h_grp, "Cooling Model", "Grackle1"); #elif COOLING_GRACKLE_MODE == 2 - io_write_attribute_s(h_grpsph, "Cooling Model", "Grackle2"); + io_write_attribute_s(h_grp, "Cooling Model", "Grackle2"); #elif COOLING_GRACKLE_MODE == 3 - io_write_attribute_s(h_grpsph, "Cooling Model", "Grackle3"); + io_write_attribute_s(h_grp, "Cooling Model", "Grackle3"); #else error("This function should be called only with one of the Grackle cooling."); #endif diff --git a/src/cooling/grackle/cooling_struct.h b/src/cooling/grackle/cooling_struct.h index b714690ce4688268723748b29506e458cccc4be9..6d4b37a6240446d580818e16ff887c8079e319a6 100644 --- a/src/cooling/grackle/cooling_struct.h +++ b/src/cooling/grackle/cooling_struct.h @@ -16,14 +16,14 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_COOLING_STRUCT_NONE_H -#define SWIFT_COOLING_STRUCT_NONE_H +#ifndef SWIFT_COOLING_STRUCT_GRACKLE_H +#define SWIFT_COOLING_STRUCT_GRACKLE_H + +#include "../config.h" /* include grackle */ #include <grackle.h> -#include "../config.h" - /** * @file src/cooling/none/cooling_struct.h * @brief Empty infrastructure for the cases without cooling function @@ -113,4 +113,4 @@ struct cooling_xpart_data { float metal_frac; }; -#endif /* SWIFT_COOLING_STRUCT_NONE_H */ +#endif /* SWIFT_COOLING_STRUCT_GRACKLE_H */ diff --git a/src/cooling/none/cooling.h b/src/cooling/none/cooling.h index 0cc465adcdad8fe19afe4a9867e5d68a22ed9119..868bfad7fc12c2f89d54949642bd5e9d902b42b6 100644 --- a/src/cooling/none/cooling.h +++ b/src/cooling/none/cooling.h @@ -36,6 +36,21 @@ #include "physical_constants.h" #include "units.h" +/** + * @brief Common operations performed on the cooling function at a + * given time-step or redshift. + * + * @param phys_const The physical constants in internal units. + * @param us The internal system of units. + * @param cosmo The current cosmological model. + * @param cooling The #cooling_function_data used in the run. + */ +INLINE static void cooling_update(const struct cosmology* cosmo, + struct cooling_function_data* cooling, + const int restart_flag) { + // Add content if required. +} + /** * @brief Apply the cooling function to a particle. * @@ -44,17 +59,21 @@ * @param phys_const The physical constants in internal units. * @param us The internal system of units. * @param cosmo The current cosmological model. + * @param hydro_props The properties of the hydro scheme. * @param cooling The #cooling_function_data used in the run. * @param p Pointer to the particle data. * @param xp Pointer to the extended particle data. * @param dt The time-step of this particle. + * @param dt_therm The time-step operator used for thermal quantities. */ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct phys_const* restrict phys_const, const struct unit_system* restrict us, const struct cosmology* restrict cosmo, + const struct hydro_props* hydro_props, const struct cooling_function_data* restrict cooling, - struct part* restrict p, struct xpart* restrict xp, float dt) {} + struct part* restrict p, struct xpart* restrict xp, const float dt, + const float dt_therm) {} /** * @brief Computes the cooling time-step. @@ -64,14 +83,18 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( * @param cooling The #cooling_function_data used in the run. * @param phys_const The physical constants in internal units. * @param cosmo The current cosmological model. + * @param hydro_props The properties of the hydro scheme. * @param us The internal system of units. * @param p Pointer to the particle data. + * @param xp Pointer to the extended data of the particle. */ __attribute__((always_inline)) INLINE static float cooling_timestep( const struct cooling_function_data* restrict cooling, const struct phys_const* restrict phys_const, const struct cosmology* restrict cosmo, - const struct unit_system* restrict us, const struct part* restrict p) { + const struct unit_system* restrict us, + const struct hydro_props* hydro_props, const struct part* restrict p, + const struct xpart* restrict xp) { return FLT_MAX; } @@ -125,6 +148,18 @@ static INLINE void cooling_init_backend(struct swift_params* parameter_file, struct cooling_function_data* cooling) { } +/** + * @brief Restore cooling tables (if applicable) after + * restart + * + * Nothing to do here + * + * @param cooling the cooling_function_data structure + * @param cosmo cosmology structure + */ +static INLINE void cooling_restore_tables(struct cooling_function_data* cooling, + const struct cosmology* cosmo) {} + /** * @brief Prints the properties of the cooling model to stdout. * @@ -136,4 +171,11 @@ static INLINE void cooling_print_backend( message("Cooling function is 'No cooling'."); } +/** + * @brief Clean-up the memory allocated for the cooling routines + * + * @param cooling the cooling data structure. + */ +static INLINE void cooling_clean(struct cooling_function_data* cooling) {} + #endif /* SWIFT_COOLING_NONE_H */ diff --git a/src/cooling/none/cooling_io.h b/src/cooling/none/cooling_io.h index e4c84f506bcd31ff95ededb5be889fbf9a27261b..518c166480a0b81f6856c8a39e2a64d34369dc84 100644 --- a/src/cooling/none/cooling_io.h +++ b/src/cooling/none/cooling_io.h @@ -29,12 +29,13 @@ /** * @brief Writes the current model of SPH to the file - * @param h_grpsph The HDF5 group in which to write + * @param h_grp The HDF5 group in which to write + * @param cooling the parameters of the cooling function. */ __attribute__((always_inline)) INLINE static void cooling_write_flavour( - hid_t h_grpsph) { + hid_t h_grp, const struct cooling_function_data* cooling) { - io_write_attribute_s(h_grpsph, "Cooling Model", "None"); + io_write_attribute_s(h_grp, "Cooling Model", "None"); } #endif diff --git a/src/cooling_io.h b/src/cooling_io.h index 88eeae2cabdaa8a0909977b84a7dbcf03145d988..1ced353d7ff8320a48731545300274c654a20744 100644 --- a/src/cooling_io.h +++ b/src/cooling_io.h @@ -29,6 +29,8 @@ #include "./cooling/const_du/cooling_io.h" #elif defined(COOLING_CONST_LAMBDA) #include "./cooling/const_lambda/cooling_io.h" +#elif defined(COOLING_COMPTON) +#include "./cooling/Compton/cooling_io.h" #elif defined(COOLING_GRACKLE) #include "./cooling/grackle/cooling_io.h" #elif defined(COOLING_EAGLE) diff --git a/src/cooling_struct.h b/src/cooling_struct.h index 9c187d596e714fddaf60ae61323624569196ba70..93de8d1b7a0bcfd56d2b1a503aea1e8339bc8016 100644 --- a/src/cooling_struct.h +++ b/src/cooling_struct.h @@ -34,6 +34,8 @@ #include "./cooling/const_du/cooling_struct.h" #elif defined(COOLING_CONST_LAMBDA) #include "./cooling/const_lambda/cooling_struct.h" +#elif defined(COOLING_COMPTON) +#include "./cooling/Compton/cooling_struct.h" #elif defined(COOLING_GRACKLE) #include "./cooling/grackle/cooling_struct.h" #elif defined(COOLING_EAGLE) diff --git a/src/cosmology.c b/src/cosmology.c index a39efc6055330292d879450fbfe0f3fb61585610..4718ed5b316e514476e3ec38dd8771136f3a2f69 100644 --- a/src/cosmology.c +++ b/src/cosmology.c @@ -157,6 +157,8 @@ void cosmology_update(struct cosmology *c, const struct phys_const *phys_const, pow(a, -3. * hydro_gamma + 2.); /* 1 / a^(3*gamma - 2) */ c->a_factor_mu = pow(a, 0.5 * (3. * hydro_gamma - 5.)); /* a^{(3*gamma - 5) / 2} */ + c->a_factor_Balsara_eps = + pow(a, 0.5 * (1. - 3. * hydro_gamma)); /* a^{(1 - 3*gamma) / 2} */ /* Redshift */ c->z = a_inv - 1.; @@ -265,6 +267,29 @@ double hydro_kick_integrand(double a, void *param) { return (1. / H) * pow(a_inv, 3. * hydro_gamma_minus_one) * a_inv; } +/** + * @brief Computes \f$a dt\f$ for the current cosmology. + * + * @param a The scale-factor of interest. + * @param param The current #cosmology. + */ +double hydro_kick_corr_integrand(double a, void *param) { + + const struct cosmology *c = (const struct cosmology *)param; + const double Omega_r = c->Omega_r; + const double Omega_m = c->Omega_m; + const double Omega_k = c->Omega_k; + const double Omega_l = c->Omega_lambda; + const double w_0 = c->w_0; + const double w_a = c->w_a; + const double H0 = c->H0; + + const double E_z = E(Omega_r, Omega_m, Omega_k, Omega_l, w_0, w_a, a); + const double H = H0 * E_z; + + return 1. / H; +} + /** * @brief Computes \f$ dt \f$ for the current cosmology. * @@ -306,6 +331,8 @@ void cosmology_init_tables(struct cosmology *c) { (double *)malloc(cosmology_table_length * sizeof(double)); c->hydro_kick_fac_interp_table = (double *)malloc(cosmology_table_length * sizeof(double)); + c->hydro_kick_corr_interp_table = + (double *)malloc(cosmology_table_length * sizeof(double)); c->time_interp_table = (double *)malloc(cosmology_table_length * sizeof(double)); c->scale_factor_interp_table = @@ -354,6 +381,16 @@ void cosmology_init_tables(struct cosmology *c) { c->hydro_kick_fac_interp_table[i] = result; } + /* Integrate the kick correction factor \int_{a_begin}^{a_table[i]} a dt */ + F.function = &hydro_kick_corr_integrand; + for (int i = 0; i < cosmology_table_length; i++) { + gsl_integration_qag(&F, a_begin, a_table[i], 0, 1.0e-10, GSL_workspace_size, + GSL_INTEG_GAUSS61, space, &result, &abserr); + + /* Store result */ + c->hydro_kick_corr_interp_table[i] = result; + } + /* Integrate the time \int_{a_begin}^{a_table[i]} dt */ F.function = &time_integrand; for (int i = 0; i < cosmology_table_length; i++) { @@ -374,31 +411,43 @@ void cosmology_init_tables(struct cosmology *c) { GSL_INTEG_GAUSS61, space, &result, &abserr); c->universe_age_at_present_day = result; - /* Inverse t(a) */ - const double time_init = c->time_interp_table_offset; - const double delta_t = - (c->universe_age_at_present_day - time_init) / cosmology_table_length; + /* Update the times */ + c->time_begin = cosmology_get_time_since_big_bang(c, c->a_begin); + c->time_end = cosmology_get_time_since_big_bang(c, c->a_end); - int i_prev = 0; - for (int i = 0; i < cosmology_table_length; i++) { - /* Current time */ - double time_interp = delta_t * i; + /* + * Inverse t(a) + */ + + const double delta_t = (c->time_end - c->time_begin) / cosmology_table_length; + + /* index in the time_interp_table */ + int i_a = 0; + + for (int i_time = 0; i_time < cosmology_table_length; i_time++) { + /* Current time + * time_interp_table = \int_a_begin^a => no need of time_begin */ + double time_interp = delta_t * (i_time + 1); /* Find next time in time_interp_table */ - while (i_prev < cosmology_table_length && - c->time_interp_table[i_prev] <= time_interp) { - i_prev++; + while (i_a < cosmology_table_length && + c->time_interp_table[i_a] <= time_interp) { + i_a++; } /* Find linear interpolation scaling */ - double scale = time_interp - c->time_interp_table[i_prev - 1]; - scale /= c->time_interp_table[i_prev] - c->time_interp_table[i_prev - 1]; - scale += i_prev; + double scale = 0; + if (i_a != cosmology_table_length) { + scale = time_interp - c->time_interp_table[i_a - 1]; + scale /= c->time_interp_table[i_a] - c->time_interp_table[i_a - 1]; + } + + scale += i_a; /* Compute interpolated scale factor */ double log_a = c->log_a_begin + scale * (c->log_a_end - c->log_a_begin) / cosmology_table_length; - c->scale_factor_interp_table[i] = exp(log_a) - c->a_begin; + c->scale_factor_interp_table[i_time] = exp(log_a) - c->a_begin; } /* Free the workspace and temp array */ @@ -440,6 +489,11 @@ void cosmology_init(struct swift_params *params, const struct unit_system *us, c->time_base = (c->log_a_end - c->log_a_begin) / max_nr_timesteps; c->time_base_inv = 1. / c->time_base; + /* If a_begin == a_end we hang */ + + if (c->a_begin >= c->a_end) + error("a_begin must be strictly before (and not equal to) a_end"); + /* Construct derived quantities */ /* Curvature density (for closure) */ @@ -455,6 +509,10 @@ void cosmology_init(struct swift_params *params, const struct unit_system *us, c->H0 = H0_cgs * units_cgs_conversion_factor(us, UNIT_CONV_TIME); c->Hubble_time = 1. / c->H0; + /* Critical density at present day */ + c->critical_density_0 = + 3. * c->H0 * c->H0 / (8. * M_PI * phys_const->const_newton_G); + /* Initialise the interpolation tables */ c->drift_fac_interp_table = NULL; c->grav_kick_fac_interp_table = NULL; @@ -488,7 +546,7 @@ void cosmology_init_no_cosmo(struct cosmology *c) { c->w_0 = 0.; c->w_a = 0.; c->h = 1.; - c->w = 0.; + c->w = -1.; c->a_begin = 1.; c->a_end = 1.; @@ -496,7 +554,9 @@ void cosmology_init_no_cosmo(struct cosmology *c) { c->log_a_end = 0.; c->H = 0.; + c->H0 = 0.; c->a = 1.; + c->z = 0.; c->a_inv = 1.; c->a2_inv = 1.; c->a3_inv = 1.; @@ -504,10 +564,12 @@ void cosmology_init_no_cosmo(struct cosmology *c) { c->a_factor_pressure = 1.; c->a_factor_sound_speed = 1.; c->a_factor_mu = 1.; + c->a_factor_Balsara_eps = 1.; c->a_factor_hydro_accel = 1.; c->a_factor_grav_accel = 1.; c->critical_density = 0.; + c->critical_density_0 = 0.; c->time_step_factor = 1.; @@ -519,8 +581,10 @@ void cosmology_init_no_cosmo(struct cosmology *c) { c->drift_fac_interp_table = NULL; c->grav_kick_fac_interp_table = NULL; c->hydro_kick_fac_interp_table = NULL; + c->hydro_kick_corr_interp_table = NULL; c->time_interp_table = NULL; c->time_interp_table_offset = 0.; + c->scale_factor_interp_table = NULL; c->time_begin = 0.; c->time_end = 0.; @@ -585,7 +649,8 @@ double cosmology_get_grav_kick_factor(const struct cosmology *c, /** * @brief Computes the cosmology factor that enters the hydro kick operator. * - * Computes \f$ \int_{a_start}^{a_end} dt/a \f$ using the interpolation table. + * Computes \f$ \int_{a_start}^{a_end} dt/a^{3(gamma - 1)} \f$ using the + * interpolation table. * * @param c The current #cosmology. * @param ti_start the (integer) time of the start of the drift. @@ -602,9 +667,38 @@ double cosmology_get_hydro_kick_factor(const struct cosmology *c, const double a_start = c->log_a_begin + ti_start * c->time_base; const double a_end = c->log_a_begin + ti_end * c->time_base; - const double int_start = interp_table(c->drift_fac_interp_table, a_start, + const double int_start = interp_table(c->hydro_kick_fac_interp_table, a_start, c->log_a_begin, c->log_a_end); - const double int_end = interp_table(c->drift_fac_interp_table, a_end, + const double int_end = interp_table(c->hydro_kick_fac_interp_table, a_end, + c->log_a_begin, c->log_a_end); + + return int_end - int_start; +} + +/** + * @brief Computes the cosmology factor that enters the hydro kick correction + * operator for the meshless schemes (GIZMO-MFV). + * + * Computes \f$ \int_{a_start}^{a_end} a dt \f$ using the interpolation table. + * + * @param c The current #cosmology. + * @param ti_start the (integer) time of the start of the drift. + * @param ti_end the (integer) time of the end of the drift. + */ +double cosmology_get_corr_kick_factor(const struct cosmology *c, + integertime_t ti_start, + integertime_t ti_end) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_end < ti_start) error("ti_end must be >= ti_start"); +#endif + + const double a_start = c->log_a_begin + ti_start * c->time_base; + const double a_end = c->log_a_begin + ti_end * c->time_base; + + const double int_start = interp_table(c->hydro_kick_corr_interp_table, + a_start, c->log_a_begin, c->log_a_end); + const double int_end = interp_table(c->hydro_kick_corr_interp_table, a_end, c->log_a_begin, c->log_a_end); return int_end - int_start; @@ -631,9 +725,9 @@ double cosmology_get_therm_kick_factor(const struct cosmology *c, const double a_start = c->log_a_begin + ti_start * c->time_base; const double a_end = c->log_a_begin + ti_end * c->time_base; - const double int_start = interp_table(c->hydro_kick_fac_interp_table, a_start, + const double int_start = interp_table(c->drift_fac_interp_table, a_start, c->log_a_begin, c->log_a_end); - const double int_end = interp_table(c->hydro_kick_fac_interp_table, a_end, + const double int_end = interp_table(c->drift_fac_interp_table, a_end, c->log_a_begin, c->log_a_end); return int_end - int_start; @@ -671,9 +765,6 @@ double cosmology_get_delta_time(const struct cosmology *c, /** * @brief Compute scale factor from time since big bang (in internal units). * - * WARNING: This method has a low accuracy at high redshift. - * The relative error is around 1e-3 (testCosmology.c is measuring it). - * * @param c The current #cosmology. * @param t time since the big bang * @return The scale factor. @@ -706,6 +797,7 @@ void cosmology_clean(struct cosmology *c) { free(c->drift_fac_interp_table); free(c->grav_kick_fac_interp_table); free(c->hydro_kick_fac_interp_table); + free(c->hydro_kick_corr_interp_table); free(c->time_interp_table); free(c->scale_factor_interp_table); } diff --git a/src/cosmology.h b/src/cosmology.h index 7136b65667195953971060b76ddfd447a5fdf500..d6b7dfbdc854a66f89c5511a5076c4fb4a7a5d3f 100644 --- a/src/cosmology.h +++ b/src/cosmology.h @@ -54,10 +54,12 @@ struct cosmology { /*! Power of the scale-factor used for sound-speed conversion to physical */ double a_factor_sound_speed; - /*! Power of the scale-factor used for relative velocities in viscosity term - */ + /*! Power of the scale-factor used for relative velocities in visc. terms */ double a_factor_mu; + /*! {ower of the scale-factor used for epsilon term in the Balsara switch */ + double a_factor_Balsara_eps; + /*! Power of the scale-factor used for gravity accelerations */ double a_factor_grav_accel; @@ -73,6 +75,9 @@ struct cosmology { /*! The critical density at the current redshift (in internal units) */ double critical_density; + /*! The critical density at redshift 0 (in internal units) */ + double critical_density_0; + /*! Conversion factor from internal time-step size to cosmological step */ double time_step_factor; @@ -153,6 +158,9 @@ struct cosmology { /*! Kick factor (hydro) interpolation table */ double *hydro_kick_fac_interp_table; + /*! Kick factor (hydro correction) interpolation table (GIZMO-MFV only) */ + double *hydro_kick_corr_interp_table; + /*! Time interpolation table */ double *time_interp_table; @@ -180,6 +188,9 @@ double cosmology_get_hydro_kick_factor(const struct cosmology *cosmo, double cosmology_get_therm_kick_factor(const struct cosmology *cosmo, integertime_t ti_start, integertime_t ti_end); +double cosmology_get_corr_kick_factor(const struct cosmology *cosmo, + integertime_t ti_start, + integertime_t ti_end); double cosmology_get_delta_time(const struct cosmology *c, integertime_t ti_start, integertime_t ti_end); diff --git a/src/cycle.h b/src/cycle.h index 842510e066e2f6f94e736851bf636c9a73e4f25f..65fe3bee17173dd1efdc98f5d90f8cc4fe51f007 100644 --- a/src/cycle.h +++ b/src/cycle.h @@ -80,6 +80,7 @@ intrinsic.])], [rtc_ok=no]) /***************************************************************************/ +#include <stdint.h> #if TIME_WITH_SYS_TIME #include <sys/time.h> #include <time.h> @@ -531,7 +532,7 @@ INLINE_ELAPSED(inline) #endif #if defined(__aarch64__) && defined(HAVE_ARMV8_CNTVCT_EL0) && \ - !defined(HAVE_ARMV8_PMCCNTR_EL0) + !defined(HAVE_TICK_COUNTER) typedef uint64_t ticks; static inline ticks getticks(void) { uint64_t Rt; diff --git a/src/debug.c b/src/debug.c index da8ef0e118b57a6aa94577898b03bcf7c56b006a..809d7048c45888eb41bd277bdb4971d469a98dc3 100644 --- a/src/debug.c +++ b/src/debug.c @@ -48,6 +48,8 @@ #include "./hydro/PressureEntropy/hydro_debug.h" #elif defined(HOPKINS_PU_SPH) #include "./hydro/PressureEnergy/hydro_debug.h" +#elif defined(HOPKINS_PU_SPH_MONAGHAN) +#include "./hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h" #elif defined(DEFAULT_SPH) #include "./hydro/Default/hydro_debug.h" #elif defined(GIZMO_MFV_SPH) @@ -56,8 +58,8 @@ #include "./hydro/GizmoMFM/hydro_debug.h" #elif defined(SHADOWFAX_SPH) #include "./hydro/Shadowswift/hydro_debug.h" -#elif defined(MINIMAL_MULTI_MAT_SPH) -#include "./hydro/MinimalMultiMat/hydro_debug.h" +#elif defined(PLANETARY_SPH) +#include "./hydro/Planetary/hydro_debug.h" #else #error "Invalid choice of SPH variant" #endif @@ -172,8 +174,8 @@ int checkSpacehmax(struct space *s) { float cell_h_max = 0.0f; for (int k = 0; k < s->nr_cells; k++) { if (s->cells_top[k].nodeID == s->e->nodeID && - s->cells_top[k].h_max > cell_h_max) { - cell_h_max = s->cells_top[k].h_max; + s->cells_top[k].hydro.h_max > cell_h_max) { + cell_h_max = s->cells_top[k].hydro.h_max; } } @@ -191,9 +193,9 @@ int checkSpacehmax(struct space *s) { /* There is a problem. Hunt it down. */ for (int k = 0; k < s->nr_cells; k++) { if (s->cells_top[k].nodeID == s->e->nodeID) { - if (s->cells_top[k].h_max > part_h_max) { - message("cell %d is inconsistent (%f > %f)", k, s->cells_top[k].h_max, - part_h_max); + if (s->cells_top[k].hydro.h_max > part_h_max) { + message("cell %d is inconsistent (%f > %f)", k, + s->cells_top[k].hydro.h_max, part_h_max); } } } @@ -229,9 +231,9 @@ int checkCellhdxmax(const struct cell *c, int *depth) { const double loc_max[3] = {c->loc[0] + c->width[0], c->loc[1] + c->width[1], c->loc[2] + c->width[2]}; - const size_t nr_parts = c->count; - struct part *parts = c->parts; - struct xpart *xparts = c->xparts; + const size_t nr_parts = c->hydro.count; + struct part *parts = c->hydro.parts; + struct xpart *xparts = c->hydro.xparts; for (size_t k = 0; k < nr_parts; k++) { struct part *const p = &parts[k]; @@ -268,14 +270,15 @@ int checkCellhdxmax(const struct cell *c, int *depth) { } /* Check. */ - if (c->h_max != h_max) { - message("%d Inconsistent h_max: cell %f != parts %f", *depth, c->h_max, - h_max); + if (c->hydro.h_max != h_max) { + message("%d Inconsistent h_max: cell %f != parts %f", *depth, + c->hydro.h_max, h_max); message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); result = 0; } - if (c->dx_max_part != dx_max) { - message("%d Inconsistent dx_max: %f != %f", *depth, c->dx_max_part, dx_max); + if (c->hydro.dx_max_part != dx_max) { + message("%d Inconsistent dx_max: %f != %f", *depth, c->hydro.dx_max_part, + dx_max); message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); result = 0; } @@ -316,13 +319,13 @@ static void dumpCells_map(struct cell *c, void *data) { #endif /* Only cells with particles are dumped. */ - if (c->count > 0 || c->gcount > 0 || c->scount > 0) { + if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0) { /* In MPI mode we may only output cells with foreign partners. * These define the edges of the partitions. */ int ismpiactive = 0; #if WITH_MPI - ismpiactive = (c->send_xv != NULL); + ismpiactive = (c->mpi.hydro.send_xv != NULL); if (mpiactive) mpiactive = ismpiactive; else @@ -337,7 +340,8 @@ static void dumpCells_map(struct cell *c, void *data) { else active = 1; - /* So output local super cells that are active and have MPI + /* So output local super cells or top-level cells that are active and have + * MPI * tasks as requested. */ if (c->nodeID == e->nodeID && (!super || ((super && c->super == c) || (c->parent == NULL))) && @@ -346,14 +350,14 @@ static void dumpCells_map(struct cell *c, void *data) { /* If requested we work out how many particles are active in this cell. */ int pactcount = 0; if (pactive) { - const struct part *parts = c->parts; - for (int k = 0; k < c->count; k++) + const struct part *parts = c->hydro.parts; + for (int k = 0; k < c->hydro.count; k++) if (part_is_active(&parts[k], e)) pactcount++; - struct gpart *gparts = c->gparts; - for (int k = 0; k < c->gcount; k++) + struct gpart *gparts = c->grav.parts; + for (int k = 0; k < c->grav.count; k++) if (gpart_is_active(&gparts[k], e)) pactcount++; - struct spart *sparts = c->sparts; - for (int k = 0; k < c->scount; k++) + struct spart *sparts = c->stars.parts; + for (int k = 0; k < c->stars.count; k++) if (spart_is_active(&sparts[k], e)) pactcount++; } @@ -361,9 +365,9 @@ static void dumpCells_map(struct cell *c, void *data) { " %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d %6d %6d " "%6.1f %20lld %6d %6d %6d %6d %6d %6d %6d\n", c->loc[0], c->loc[1], c->loc[2], c->width[0], c->width[1], - c->width[2], e->step, c->count, c->gcount, c->scount, pactcount, - c->depth, ntasks, c->ti_hydro_end_min, - get_time_bin(c->ti_hydro_end_min), (c->super == c), + c->width[2], e->step, c->hydro.count, c->grav.count, + c->stars.count, pactcount, c->depth, ntasks, c->hydro.ti_end_min, + get_time_bin(c->hydro.ti_end_min), (c->super == c), (c->parent == NULL), cell_is_active_hydro(c, e), c->nodeID, c->nodeID == e->nodeID, ismpiactive); } @@ -414,13 +418,13 @@ void dumpCells(const char *prefix, int super, int active, int mpiactive, fclose(file); } -#if defined(WITH_MPI) && defined(HAVE_METIS) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) /** - * @brief Dump the METIS graph in standard format, simple format and weights + * @brief Dump a graph in METIS standard format, simple format and weights * only, to a file. * - * The standard format output can be read into the METIS + * The standard format output can be read into the METIS and some ParMETIS * command-line tools. The simple format is just the cell connectivity (this * should not change between calls). The weights format is the standard one, * minus the cell connectivity. @@ -552,7 +556,7 @@ void dumpMETISGraph(const char *prefix, idx_t nvertices, idx_t nvertexweights, } } -#endif /* HAVE_METIS */ +#endif /* HAVE_METIS || HAVE_PARMETIS */ #ifdef HAVE_MPI /** diff --git a/src/debug.h b/src/debug.h index c9d65ad06cf5307a5fd8596c9c5b6c8b83cb6d9e..ec3807c3ba911c6a553aa42d3f8a017662217001 100644 --- a/src/debug.h +++ b/src/debug.h @@ -39,7 +39,7 @@ int checkCellhdxmax(const struct cell *c, int *depth); void dumpCells(const char *prefix, int super, int active, int mpiactive, int pactive, struct space *s, int rank, int step); -#if defined(WITH_MPI) && defined(HAVE_METIS) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) #include "metis.h" void dumpMETISGraph(const char *prefix, idx_t nvtxs, idx_t ncon, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt); diff --git a/src/drift.h b/src/drift.h index ff0fea744012b7143afed2a05b286d4646cdd69a..a4bdf9be74aade4fe0f1349544cf472363c81c99 100644 --- a/src/drift.h +++ b/src/drift.h @@ -137,6 +137,13 @@ __attribute__((always_inline)) INLINE static void drift_spart( sp->x[0] += sp->v[0] * dt_drift; sp->x[1] += sp->v[1] * dt_drift; sp->x[2] += sp->v[2] * dt_drift; + + /* Compute offsets since last cell construction */ + for (int k = 0; k < 3; k++) { + const float dx = sp->v[k] * dt_drift; + sp->x_diff[k] -= dx; + sp->x_diff_sort[k] -= dx; + } } #endif /* SWIFT_DRIFT_H */ diff --git a/src/dump.c b/src/dump.c index ba50b517a72e71ab0ca5e791319c6336925762cb..9c519c2130b2612309e623b8234e3369214b52e2 100644 --- a/src/dump.c +++ b/src/dump.c @@ -48,17 +48,24 @@ */ void *dump_get(struct dump *d, size_t count, size_t *offset) { size_t local_offset = atomic_add(&d->count, count); +#ifdef SWIFT_DEBUG_CHECKS + if (d->count > d->size) error("Dump file is too small."); +#endif *offset = local_offset + d->file_offset; return (char *)d->data + local_offset; } /** * @brief Ensure that at least size bytes are available in the #dump. + * + * @param d The #dump. + * @param required_size The required size for the #dump + * @param increase_size If not enough size, increase by this amount */ -void dump_ensure(struct dump *d, size_t size) { +void dump_ensure(struct dump *d, size_t required_size, size_t increase_size) { /* If we have enough space already, just bail. */ - if (d->size - d->count > size) return; + if (d->size - d->count > required_size) return; /* Unmap the current data. */ if (munmap(d->data, d->size) != 0) { @@ -70,7 +77,7 @@ void dump_ensure(struct dump *d, size_t size) { const size_t trunc_count = d->count & d->page_mask; d->file_offset += trunc_count; d->count -= trunc_count; - d->size = (size * dump_grow_ensure_factor + ~d->page_mask) & d->page_mask; + d->size = (d->count + increase_size + ~d->page_mask) & d->page_mask; /* Re-allocate the file size. */ if (posix_fallocate(d->fd, d->file_offset, d->size) != 0) { @@ -121,7 +128,9 @@ void dump_close(struct dump *d) { */ void dump_init(struct dump *d, const char *filename, size_t size) { - /* Create the output file. */ + /* Create the output file. + The option O_RDWR seems to be required by mmap. + */ if ((d->fd = open(filename, O_CREAT | O_RDWR, 0660)) == -1) { error("Failed to create dump file '%s' (%s).", filename, strerror(errno)); } diff --git a/src/dump.h b/src/dump.h index 6857aa3a008a27e0e8ed23854d84f848ee0ca2be..021bc1e1dc22c178a893e42384c91fafdcf63112 100644 --- a/src/dump.h +++ b/src/dump.h @@ -27,9 +27,6 @@ /* Standard headers */ #include <stdlib.h> -/* Some constants. */ -#define dump_grow_ensure_factor 10 - /** The dump struct. */ struct dump { @@ -54,7 +51,7 @@ struct dump { /* Function prototypes. */ void dump_init(struct dump *d, const char *filename, size_t size); -void dump_ensure(struct dump *d, size_t size); +void dump_ensure(struct dump *d, size_t required_size, size_t increase_size); void dump_sync(struct dump *d); void dump_close(struct dump *d); void *dump_get(struct dump *d, size_t count, size_t *offset); diff --git a/src/engine.c b/src/engine.c index 8ba089295da4b52d8d094c7740faeb701bb61783..73b7d9b64e744e08e19145a574a3a2b896ab3995 100644 --- a/src/engine.c +++ b/src/engine.c @@ -62,10 +62,13 @@ #include "cosmology.h" #include "cycle.h" #include "debug.h" +#include "equation_of_state.h" #include "error.h" #include "gravity.h" #include "gravity_cache.h" #include "hydro.h" +#include "logger.h" +#include "logger_io.h" #include "map.h" #include "memswap.h" #include "minmax.h" @@ -81,6 +84,7 @@ #include "single_io.h" #include "sort_part.h" #include "sourceterms.h" +#include "stars_io.h" #include "statistics.h" #include "timers.h" #include "tools.h" @@ -108,7 +112,9 @@ const char *engine_policy_names[] = {"none", "cooling", "sourceterms", "stars", - "structure finding"}; + "structure finding", + "star formation", + "feedback"}; /** The rank of the engine as a global variable (for messages). */ int engine_rank; @@ -118,9 +124,11 @@ int engine_rank; */ struct end_of_step_data { - size_t updates, g_updates, s_updates; + size_t updated, g_updated, s_updated; + size_t inhibited, g_inhibited, s_inhibited; integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; + integertime_t ti_stars_end_min; struct engine *e; }; @@ -147,267 +155,6 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { res->next = atomic_swap(l, res); } -/** - * @brief Recursively add non-implicit ghost tasks to a cell hierarchy. - */ -void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in, - struct task *ghost_out) { - - /* If we have reached the leaf OR have to few particles to play with*/ - if (!c->split || c->count < engine_max_parts_per_ghost) { - - /* Add the ghost task and its dependencies */ - struct scheduler *s = &e->sched; - c->ghost = - scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0, c, NULL); - scheduler_addunlock(s, ghost_in, c->ghost); - scheduler_addunlock(s, c->ghost, ghost_out); - } else { - /* Keep recursing */ - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - engine_add_ghosts(e, c->progeny[k], ghost_in, ghost_out); - } -} - -/** - * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - - * i.e. all the O(Npart) tasks -- timestep version - * - * Tasks are only created here. The dependencies will be added later on. - * - * Note that there is no need to recurse below the super-cell. Note also - * that we only add tasks if the relevant particles are present in the cell. - * - * @param e The #engine. - * @param c The #cell. - */ -void engine_make_hierarchical_tasks_common(struct engine *e, struct cell *c) { - - struct scheduler *s = &e->sched; - const int is_with_cooling = (e->policy & engine_policy_cooling); - - /* Are we in a super-cell ? */ - if (c->super == c) { - - /* Local tasks only... */ - if (c->nodeID == e->nodeID) { - - /* Add the two half kicks */ - c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0, - c, NULL); - - c->kick2 = scheduler_addtask(s, task_type_kick2, task_subtype_none, 0, 0, - c, NULL); - - /* Add the time-step calculation task and its dependency */ - c->timestep = scheduler_addtask(s, task_type_timestep, task_subtype_none, - 0, 0, c, NULL); - - /* Add the task finishing the force calculation */ - c->end_force = scheduler_addtask(s, task_type_end_force, - task_subtype_none, 0, 0, c, NULL); - - if (!is_with_cooling) scheduler_addunlock(s, c->end_force, c->kick2); - scheduler_addunlock(s, c->kick2, c->timestep); - scheduler_addunlock(s, c->timestep, c->kick1); - } - - } else { /* We are above the super-cell so need to go deeper */ - - /* Recurse. */ - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - engine_make_hierarchical_tasks_common(e, c->progeny[k]); - } -} - -/** - * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - - * i.e. all the O(Npart) tasks -- hydro version - * - * Tasks are only created here. The dependencies will be added later on. - * - * Note that there is no need to recurse below the super-cell. Note also - * that we only add tasks if the relevant particles are present in the cell. - * - * @param e The #engine. - * @param c The #cell. - */ -void engine_make_hierarchical_tasks_hydro(struct engine *e, struct cell *c) { - - struct scheduler *s = &e->sched; - const int is_with_cooling = (e->policy & engine_policy_cooling); - const int is_with_sourceterms = (e->policy & engine_policy_sourceterms); - - /* Are we in a super-cell ? */ - if (c->super_hydro == c) { - - /* Add the sort task. */ - c->sorts = - scheduler_addtask(s, task_type_sort, task_subtype_none, 0, 0, c, NULL); - - /* Local tasks only... */ - if (c->nodeID == e->nodeID) { - - /* Add the drift task. */ - c->drift_part = scheduler_addtask(s, task_type_drift_part, - task_subtype_none, 0, 0, c, NULL); - - /* Generate the ghost tasks. */ - c->ghost_in = - scheduler_addtask(s, task_type_ghost_in, task_subtype_none, 0, - /* implicit = */ 1, c, NULL); - c->ghost_out = - scheduler_addtask(s, task_type_ghost_out, task_subtype_none, 0, - /* implicit = */ 1, c, NULL); - engine_add_ghosts(e, c, c->ghost_in, c->ghost_out); - -#ifdef EXTRA_HYDRO_LOOP - /* Generate the extra ghost task. */ - c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost, - task_subtype_none, 0, 0, c, NULL); -#endif - - /* Cooling task */ - if (is_with_cooling) { - c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none, - 0, 0, c, NULL); - - scheduler_addunlock(s, c->super->end_force, c->cooling); - scheduler_addunlock(s, c->cooling, c->super->kick2); - } - - /* add source terms */ - if (is_with_sourceterms) { - c->sourceterms = scheduler_addtask(s, task_type_sourceterms, - task_subtype_none, 0, 0, c, NULL); - } - } - - } else { /* We are above the super-cell so need to go deeper */ - - /* Recurse. */ - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - engine_make_hierarchical_tasks_hydro(e, c->progeny[k]); - } -} - -/** - * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - - * i.e. all the O(Npart) tasks -- gravity version - * - * Tasks are only created here. The dependencies will be added later on. - * - * Note that there is no need to recurse below the super-cell. Note also - * that we only add tasks if the relevant particles are present in the cell. - * - * @param e The #engine. - * @param c The #cell. - */ -void engine_make_hierarchical_tasks_gravity(struct engine *e, struct cell *c) { - - struct scheduler *s = &e->sched; - const int periodic = e->s->periodic; - const int is_self_gravity = (e->policy & engine_policy_self_gravity); - - /* Are we in a super-cell ? */ - if (c->super_gravity == c) { - - /* Local tasks only... */ - if (c->nodeID == e->nodeID) { - - c->drift_gpart = scheduler_addtask(s, task_type_drift_gpart, - task_subtype_none, 0, 0, c, NULL); - - if (is_self_gravity) { - - /* Initialisation of the multipoles */ - c->init_grav = scheduler_addtask(s, task_type_init_grav, - task_subtype_none, 0, 0, c, NULL); - - /* Gravity non-neighbouring pm calculations */ - c->grav_long_range = scheduler_addtask( - s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL); - - /* Gravity recursive down-pass */ - c->grav_down = scheduler_addtask(s, task_type_grav_down, - task_subtype_none, 0, 0, c, NULL); - - /* Implicit tasks for the up and down passes */ - c->init_grav_out = scheduler_addtask(s, task_type_init_grav_out, - task_subtype_none, 0, 1, c, NULL); - c->grav_down_in = scheduler_addtask(s, task_type_grav_down_in, - task_subtype_none, 0, 1, c, NULL); - - /* Gravity mesh force propagation */ - if (periodic) - c->grav_mesh = scheduler_addtask(s, task_type_grav_mesh, - task_subtype_none, 0, 0, c, NULL); - - if (periodic) scheduler_addunlock(s, c->drift_gpart, c->grav_mesh); - if (periodic) scheduler_addunlock(s, c->grav_mesh, c->grav_down); - scheduler_addunlock(s, c->init_grav, c->grav_long_range); - scheduler_addunlock(s, c->grav_long_range, c->grav_down); - scheduler_addunlock(s, c->grav_down, c->super->end_force); - - /* Link in the implicit tasks */ - scheduler_addunlock(s, c->init_grav, c->init_grav_out); - scheduler_addunlock(s, c->grav_down_in, c->grav_down); - } - } - } - - /* We are below the super-cell but not below the maximal splitting depth */ - else if (c->super_gravity != NULL && c->depth <= space_subdepth_grav) { - - /* Local tasks only... */ - if (c->nodeID == e->nodeID) { - - if (is_self_gravity) { - - c->init_grav_out = scheduler_addtask(s, task_type_init_grav_out, - task_subtype_none, 0, 1, c, NULL); - - c->grav_down_in = scheduler_addtask(s, task_type_grav_down_in, - task_subtype_none, 0, 1, c, NULL); - - scheduler_addunlock(s, c->parent->init_grav_out, c->init_grav_out); - scheduler_addunlock(s, c->grav_down_in, c->parent->grav_down_in); - } - } - } - - /* Recurse but not below the maximal splitting depth */ - if (c->split && c->depth <= space_subdepth_grav) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - engine_make_hierarchical_tasks_gravity(e, c->progeny[k]); -} - -void engine_make_hierarchical_tasks_mapper(void *map_data, int num_elements, - void *extra_data) { - struct engine *e = (struct engine *)extra_data; - const int is_with_hydro = (e->policy & engine_policy_hydro); - const int is_with_self_gravity = (e->policy & engine_policy_self_gravity); - const int is_with_external_gravity = - (e->policy & engine_policy_external_gravity); - - for (int ind = 0; ind < num_elements; ind++) { - struct cell *c = &((struct cell *)map_data)[ind]; - /* Make the common tasks (time integration) */ - engine_make_hierarchical_tasks_common(e, c); - /* Add the hydro stuff */ - if (is_with_hydro) engine_make_hierarchical_tasks_hydro(e, c); - /* And the gravity stuff */ - if (is_with_self_gravity || is_with_external_gravity) - engine_make_hierarchical_tasks_gravity(e, c); - } -} - #ifdef WITH_MPI /** * Do the exchange of one type of particles with all the other nodes. @@ -650,7 +397,7 @@ struct savelink_mapper_data { for (int k = 0; k < counts[nodeID * nr_nodes + node]; k++) { \ if (parts[k + offset].gpart != NULL) { \ if (CHECKS) \ - if (parts[k].gpart->id_or_neg_offset > 0) \ + if (parts[k + offset].gpart->id_or_neg_offset > 0) \ error("Trying to link a partnerless " #TYPE "!"); \ parts[k + offset].gpart->id_or_neg_offset = -count; \ count++; \ @@ -748,7 +495,7 @@ static void engine_redistribute_relink_mapper(void *map_data, int num_elements, } /* Does this gpart have a star partner ? */ - else if (s->gparts[k].type == swift_type_star) { + else if (s->gparts[k].type == swift_type_stars) { const ptrdiff_t partner_index = offset_sparts - s->gparts[k].id_or_neg_offset; @@ -788,11 +535,87 @@ void engine_redistribute(struct engine *e) { struct space *s = e->s; struct cell *cells = s->cells_top; const int nr_cells = s->nr_cells; + struct xpart *xparts = s->xparts; struct part *parts = s->parts; struct gpart *gparts = s->gparts; struct spart *sparts = s->sparts; ticks tic = getticks(); + size_t nr_parts = s->nr_parts; + size_t nr_gparts = s->nr_gparts; + size_t nr_sparts = s->nr_sparts; + + /* Start by moving inhibited particles to the end of the arrays */ + for (size_t k = 0; k < nr_parts; /* void */) { + if (parts[k].time_bin == time_bin_inhibited) { + nr_parts -= 1; + + /* Swap the particle */ + memswap(&parts[k], &parts[nr_parts], sizeof(struct part)); + + /* Swap the xpart */ + memswap(&xparts[k], &xparts[nr_parts], sizeof(struct xpart)); + + /* Swap the link with the gpart */ + if (parts[k].gpart != NULL) { + parts[k].gpart->id_or_neg_offset = -k; + } + if (parts[nr_parts].gpart != NULL) { + parts[nr_parts].gpart->id_or_neg_offset = -nr_parts; + } + } else { + k++; + } + } + + /* Now move inhibited star particles to the end of the arrays */ + for (size_t k = 0; k < nr_sparts; /* void */) { + if (sparts[k].time_bin == time_bin_inhibited) { + nr_sparts -= 1; + + /* Swap the particle */ + memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart)); + + /* Swap the link with the gpart */ + if (s->sparts[k].gpart != NULL) { + s->sparts[k].gpart->id_or_neg_offset = -k; + } + if (s->sparts[nr_sparts].gpart != NULL) { + s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts; + } + } else { + k++; + } + } + + /* Finally do the same with the gravity particles */ + for (size_t k = 0; k < nr_gparts; /* void */) { + if (gparts[k].time_bin == time_bin_inhibited) { + nr_gparts -= 1; + + /* Swap the particle */ + memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart)); + + /* Swap the link with part/spart */ + if (s->gparts[k].type == swift_type_gas) { + s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } else if (s->gparts[k].type == swift_type_stars) { + s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } + if (s->gparts[nr_gparts].type == swift_type_gas) { + s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; + } else if (s->gparts[nr_gparts].type == swift_type_stars) { + s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; + } + } else { + k++; + } + } + + /* Now we are ready to deal with real particles and can start the exchange. */ + /* Allocate temporary arrays to store the counts of particles to be sent * and the destination of each particle */ int *counts; @@ -800,7 +623,7 @@ void engine_redistribute(struct engine *e) { error("Failed to allocate counts temporary buffer."); int *dest; - if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) + if ((dest = (int *)malloc(sizeof(int) * nr_parts)) == NULL) error("Failed to allocate dest temporary buffer."); /* Simple index of node IDs, used for mappers over nodes. */ @@ -820,16 +643,16 @@ void engine_redistribute(struct engine *e) { redist_data.base = (void *)parts; threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_part, parts, - s->nr_parts, sizeof(struct part), 0, &redist_data); + nr_parts, sizeof(struct part), 0, &redist_data); /* Sort the particles according to their cell index. */ - if (s->nr_parts > 0) + if (nr_parts > 0) space_parts_sort(s->parts, s->xparts, dest, &counts[nodeID * nr_nodes], nr_nodes, 0); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the part have been sorted correctly. */ - for (size_t k = 0; k < s->nr_parts; k++) { + for (size_t k = 0; k < nr_parts; k++) { const struct part *p = &s->parts[k]; /* New cell index */ @@ -853,7 +676,7 @@ void engine_redistribute(struct engine *e) { /* We will need to re-link the gpart partners of parts, so save their * relative positions in the sent lists. */ - if (s->nr_parts > 0 && s->nr_gparts > 0) { + if (nr_parts > 0 && nr_gparts > 0) { struct savelink_mapper_data savelink_data; savelink_data.nr_nodes = nr_nodes; @@ -871,7 +694,7 @@ void engine_redistribute(struct engine *e) { error("Failed to allocate s_counts temporary buffer."); int *s_dest; - if ((s_dest = (int *)malloc(sizeof(int) * s->nr_sparts)) == NULL) + if ((s_dest = (int *)malloc(sizeof(int) * nr_sparts)) == NULL) error("Failed to allocate s_dest temporary buffer."); redist_data.counts = s_counts; @@ -879,16 +702,16 @@ void engine_redistribute(struct engine *e) { redist_data.base = (void *)sparts; threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_spart, sparts, - s->nr_sparts, sizeof(struct spart), 0, &redist_data); + nr_sparts, sizeof(struct spart), 0, &redist_data); /* Sort the particles according to their cell index. */ - if (s->nr_sparts > 0) + if (nr_sparts > 0) space_sparts_sort(s->sparts, s_dest, &s_counts[nodeID * nr_nodes], nr_nodes, 0); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the spart have been sorted correctly. */ - for (size_t k = 0; k < s->nr_sparts; k++) { + for (size_t k = 0; k < nr_sparts; k++) { const struct spart *sp = &s->sparts[k]; /* New cell index */ @@ -911,7 +734,7 @@ void engine_redistribute(struct engine *e) { #endif /* We need to re-link the gpart partners of sparts. */ - if (s->nr_sparts > 0) { + if (nr_sparts > 0) { struct savelink_mapper_data savelink_data; savelink_data.nr_nodes = nr_nodes; @@ -929,7 +752,7 @@ void engine_redistribute(struct engine *e) { error("Failed to allocate g_gcount temporary buffer."); int *g_dest; - if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL) + if ((g_dest = (int *)malloc(sizeof(int) * nr_gparts)) == NULL) error("Failed to allocate g_dest temporary buffer."); redist_data.counts = g_counts; @@ -937,16 +760,16 @@ void engine_redistribute(struct engine *e) { redist_data.base = (void *)gparts; threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_gpart, gparts, - s->nr_gparts, sizeof(struct gpart), 0, &redist_data); + nr_gparts, sizeof(struct gpart), 0, &redist_data); /* Sort the gparticles according to their cell index. */ - if (s->nr_gparts > 0) + if (nr_gparts > 0) space_gparts_sort(s->gparts, s->parts, s->sparts, g_dest, &g_counts[nodeID * nr_nodes], nr_nodes); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the gpart have been sorted correctly. */ - for (size_t k = 0; k < s->nr_gparts; k++) { + for (size_t k = 0; k < nr_gparts; k++) { const struct gpart *gp = &s->gparts[k]; /* New cell index */ @@ -1021,49 +844,50 @@ void engine_redistribute(struct engine *e) { /* Now each node knows how many parts, sparts and gparts will be transferred * to every other node. * Get the new numbers of particles for this node. */ - size_t nr_parts = 0, nr_gparts = 0, nr_sparts = 0; - for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID]; + size_t nr_parts_new = 0, nr_gparts_new = 0, nr_sparts_new = 0; for (int k = 0; k < nr_nodes; k++) - nr_gparts += g_counts[k * nr_nodes + nodeID]; + nr_parts_new += counts[k * nr_nodes + nodeID]; for (int k = 0; k < nr_nodes; k++) - nr_sparts += s_counts[k * nr_nodes + nodeID]; + nr_gparts_new += g_counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_sparts_new += s_counts[k * nr_nodes + nodeID]; /* Now exchange the particles, type by type to keep the memory required * under control. */ /* SPH particles. */ - void *new_parts = engine_do_redistribute(counts, (char *)s->parts, nr_parts, - sizeof(struct part), part_align, - part_mpi_type, nr_nodes, nodeID); + void *new_parts = engine_do_redistribute( + counts, (char *)s->parts, nr_parts_new, sizeof(struct part), part_align, + part_mpi_type, nr_nodes, nodeID); free(s->parts); s->parts = (struct part *)new_parts; - s->nr_parts = nr_parts; - s->size_parts = engine_redistribute_alloc_margin * nr_parts; + s->nr_parts = nr_parts_new; + s->size_parts = engine_redistribute_alloc_margin * nr_parts_new; /* Extra SPH particle properties. */ - new_parts = engine_do_redistribute(counts, (char *)s->xparts, nr_parts, + new_parts = engine_do_redistribute(counts, (char *)s->xparts, nr_parts_new, sizeof(struct xpart), xpart_align, xpart_mpi_type, nr_nodes, nodeID); free(s->xparts); s->xparts = (struct xpart *)new_parts; /* Gravity particles. */ - new_parts = engine_do_redistribute(g_counts, (char *)s->gparts, nr_gparts, + new_parts = engine_do_redistribute(g_counts, (char *)s->gparts, nr_gparts_new, sizeof(struct gpart), gpart_align, gpart_mpi_type, nr_nodes, nodeID); free(s->gparts); s->gparts = (struct gpart *)new_parts; - s->nr_gparts = nr_gparts; - s->size_gparts = engine_redistribute_alloc_margin * nr_gparts; + s->nr_gparts = nr_gparts_new; + s->size_gparts = engine_redistribute_alloc_margin * nr_gparts_new; /* Star particles. */ - new_parts = engine_do_redistribute(s_counts, (char *)s->sparts, nr_sparts, + new_parts = engine_do_redistribute(s_counts, (char *)s->sparts, nr_sparts_new, sizeof(struct spart), spart_align, spart_mpi_type, nr_nodes, nodeID); free(s->sparts); s->sparts = (struct spart *)new_parts; - s->nr_sparts = nr_sparts; - s->size_sparts = engine_redistribute_alloc_margin * nr_sparts; + s->nr_sparts = nr_sparts_new; + s->size_sparts = engine_redistribute_alloc_margin * nr_sparts_new; /* All particles have now arrived. Time for some final operations on the stuff we just received */ @@ -1090,7 +914,7 @@ void engine_redistribute(struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS /* Verify that all parts are in the right place. */ - for (size_t k = 0; k < nr_parts; k++) { + for (size_t k = 0; k < nr_parts_new; k++) { const int cid = cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0], s->parts[k].x[1] * s->iwidth[1], s->parts[k].x[2] * s->iwidth[2]); @@ -1098,7 +922,7 @@ void engine_redistribute(struct engine *e) { error("Received particle (%zu) that does not belong here (nodeID=%i).", k, cells[cid].nodeID); } - for (size_t k = 0; k < nr_gparts; k++) { + for (size_t k = 0; k < nr_gparts_new; k++) { const int cid = cell_getid(s->cdim, s->gparts[k].x[0] * s->iwidth[0], s->gparts[k].x[1] * s->iwidth[1], s->gparts[k].x[2] * s->iwidth[2]); @@ -1106,7 +930,7 @@ void engine_redistribute(struct engine *e) { error("Received g-particle (%zu) that does not belong here (nodeID=%i).", k, cells[cid].nodeID); } - for (size_t k = 0; k < nr_sparts; k++) { + for (size_t k = 0; k < nr_sparts_new; k++) { const int cid = cell_getid(s->cdim, s->sparts[k].x[0] * s->iwidth[0], s->sparts[k].x[1] * s->iwidth[1], s->sparts[k].x[2] * s->iwidth[2]); @@ -1116,8 +940,8 @@ void engine_redistribute(struct engine *e) { } /* Verify that the links are correct */ - part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts, - nr_sparts, e->verbose); + part_verify_links(s->parts, s->gparts, s->sparts, nr_parts_new, nr_gparts_new, + nr_sparts_new, e->verbose); #endif /* Be verbose about what just happened. */ @@ -1126,7 +950,7 @@ void engine_redistribute(struct engine *e) { for (int k = 0; k < nr_cells; k++) if (cells[k].nodeID == nodeID) my_cells += 1; message("node %i now has %zu parts, %zu sparts and %zu gparts in %i cells.", - nodeID, nr_parts, nr_sparts, nr_gparts, my_cells); + nodeID, nr_parts_new, nr_sparts_new, nr_gparts_new, my_cells); } /* Flag that a redistribute has taken place */ @@ -1147,7 +971,7 @@ void engine_redistribute(struct engine *e) { */ void engine_repartition(struct engine *e) { -#if defined(WITH_MPI) && defined(HAVE_METIS) +#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) ticks tic = getticks(); @@ -1157,8 +981,7 @@ void engine_repartition(struct engine *e) { fflush(stdout); /* Check that all cells have been drifted to the current time */ - space_check_drift_point(e->s, e->ti_current, - e->policy & engine_policy_self_gravity); + space_check_drift_point(e->s, e->ti_current, /*check_multipoles=*/0); #endif /* Clear the repartition flag. */ @@ -1205,7 +1028,7 @@ void engine_repartition(struct engine *e) { clocks_getunit()); #else if (e->reparttype->type != REPART_NONE) - error("SWIFT was not compiled with MPI and METIS support."); + error("SWIFT was not compiled with MPI and METIS or ParMETIS support."); /* Clear the repartition flag. */ e->forcerepart = 0; @@ -1221,6 +1044,8 @@ void engine_repartition_trigger(struct engine *e) { #ifdef WITH_MPI + const ticks tic = getticks(); + /* Do nothing if there have not been enough steps since the last * repartition, don't want to repeat this too often or immediately after * a repartition step. Also nothing to do when requested. */ @@ -1289,95 +1114,103 @@ void engine_repartition_trigger(struct engine *e) { /* We always reset CPU time for next check, unless it will not be used. */ if (e->reparttype->type != REPART_NONE) e->cputime_last_step = clocks_get_cputime_used(); + + if (e->verbose) + message("took %.3f %s", clocks_from_ticks(getticks() - tic), + clocks_getunit()); #endif } /** - * @brief Add send tasks for the hydro pairs to a hierarchy of cells. + * @brief Exchange cell structures with other nodes. * * @param e The #engine. - * @param ci The sending #cell. - * @param cj Dummy cell containing the nodeID of the receiving node. - * @param t_xv The send_xv #task, if it has already been created. - * @param t_rho The send_rho #task, if it has already been created. - * @param t_gradient The send_gradient #task, if already created. */ -void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, - struct cell *cj, struct task *t_xv, - struct task *t_rho, struct task *t_gradient) { +void engine_exchange_cells(struct engine *e) { #ifdef WITH_MPI - struct link *l = NULL; - struct scheduler *s = &e->sched; - const int nodeID = cj->nodeID; - - /* Check if any of the density tasks are for the target node. */ - for (l = ci->density; l != NULL; l = l->next) - if (l->t->ci->nodeID == nodeID || - (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) - break; - - /* If so, attach send tasks. */ - if (l != NULL) { - - /* Create the tasks and their dependencies? */ - if (t_xv == NULL) { - - t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, - 6 * ci->tag + 0, 0, ci, cj); - t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho, - 6 * ci->tag + 1, 0, ci, cj); -#ifdef EXTRA_HYDRO_LOOP - t_gradient = scheduler_addtask(s, task_type_send, task_subtype_gradient, - 6 * ci->tag + 3, 0, ci, cj); -#endif - -#ifdef EXTRA_HYDRO_LOOP - scheduler_addunlock(s, t_gradient, ci->super->kick2); + struct space *s = e->s; + const int nr_proxies = e->nr_proxies; + const int with_gravity = e->policy & engine_policy_self_gravity; + const ticks tic = getticks(); - scheduler_addunlock(s, ci->super_hydro->extra_ghost, t_gradient); + /* Exchange the cell structure with neighbouring ranks. */ + proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity); - /* The send_rho task should unlock the super_hydro-cell's extra_ghost - * task. */ - scheduler_addunlock(s, t_rho, ci->super_hydro->extra_ghost); + ticks tic2 = getticks(); - /* The send_rho task depends on the cell's ghost task. */ - scheduler_addunlock(s, ci->super_hydro->ghost_out, t_rho); + /* Count the number of particles we need to import and re-allocate + the buffer if needed. */ + size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0; + for (int k = 0; k < nr_proxies; k++) + for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) + count_parts_in += e->proxies[k].cells_in[j]->hydro.count; + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) + count_gparts_in += e->proxies[k].cells_in[j]->grav.count; + count_sparts_in += e->proxies[k].cells_in[j]->stars.count; + } + if (count_parts_in > s->size_parts_foreign) { + if (s->parts_foreign != NULL) free(s->parts_foreign); + s->size_parts_foreign = 1.1 * count_parts_in; + if (posix_memalign((void **)&s->parts_foreign, part_align, + sizeof(struct part) * s->size_parts_foreign) != 0) + error("Failed to allocate foreign part data."); + } + if (count_gparts_in > s->size_gparts_foreign) { + if (s->gparts_foreign != NULL) free(s->gparts_foreign); + s->size_gparts_foreign = 1.1 * count_gparts_in; + if (posix_memalign((void **)&s->gparts_foreign, gpart_align, + sizeof(struct gpart) * s->size_gparts_foreign) != 0) + error("Failed to allocate foreign gpart data."); + } + if (count_sparts_in > s->size_sparts_foreign) { + if (s->sparts_foreign != NULL) free(s->sparts_foreign); + s->size_sparts_foreign = 1.1 * count_sparts_in; + if (posix_memalign((void **)&s->sparts_foreign, spart_align, + sizeof(struct spart) * s->size_sparts_foreign) != 0) + error("Failed to allocate foreign spart data."); + } - /* The send_xv task should unlock the super_hydro-cell's ghost task. */ - scheduler_addunlock(s, t_xv, ci->super_hydro->ghost_in); + if (e->verbose) + message("Counting and allocating arrays took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); -#else - /* The send_rho task should unlock the super_hydro-cell's kick task. */ - scheduler_addunlock(s, t_rho, ci->super->end_force); + tic2 = getticks(); - /* The send_rho task depends on the cell's ghost task. */ - scheduler_addunlock(s, ci->super_hydro->ghost_out, t_rho); + /* Unpack the cells and link to the particle data. */ + struct part *parts = s->parts_foreign; + struct gpart *gparts = s->gparts_foreign; + struct spart *sparts = s->sparts_foreign; + for (int k = 0; k < nr_proxies; k++) { + for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - /* The send_xv task should unlock the super_hydro-cell's ghost task. */ - scheduler_addunlock(s, t_xv, ci->super_hydro->ghost_in); + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) { + cell_link_parts(e->proxies[k].cells_in[j], parts); + parts = &parts[e->proxies[k].cells_in[j]->hydro.count]; + } -#endif + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) { + cell_link_gparts(e->proxies[k].cells_in[j], gparts); + gparts = &gparts[e->proxies[k].cells_in[j]->grav.count]; + } - /* Drift before you send */ - scheduler_addunlock(s, ci->super_hydro->drift_part, t_xv); + cell_link_sparts(e->proxies[k].cells_in[j], sparts); + sparts = &sparts[e->proxies[k].cells_in[j]->stars.count]; } - - /* Add them to the local cell. */ - engine_addlink(e, &ci->send_xv, t_xv); - engine_addlink(e, &ci->send_rho, t_rho); -#ifdef EXTRA_HYDRO_LOOP - engine_addlink(e, &ci->send_gradient, t_gradient); -#endif } + s->nr_parts_foreign = parts - s->parts_foreign; + s->nr_gparts_foreign = gparts - s->gparts_foreign; + s->nr_sparts_foreign = sparts - s->sparts_foreign; + + if (e->verbose) + message("Recursively linking arrays took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); - /* Recurse? */ - if (ci->split) - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) - engine_addtasks_send_hydro(e, ci->progeny[k], cj, t_xv, t_rho, - t_gradient); + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); #else error("SWIFT was not compiled with MPI support."); @@ -1385,615 +1218,246 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, } /** - * @brief Add send tasks for the gravity pairs to a hierarchy of cells. + * @brief Exchange straying particles with other nodes. * * @param e The #engine. - * @param ci The sending #cell. - * @param cj Dummy cell containing the nodeID of the receiving node. - * @param t_grav The send_grav #task, if it has already been created. + * @param offset_parts The index in the parts array as of which the foreign + * parts reside (i.e. the current number of local #part). + * @param ind_part The foreign #cell ID of each part. + * @param Npart The number of stray parts, contains the number of parts received + * on return. + * @param offset_gparts The index in the gparts array as of which the foreign + * parts reside (i.e. the current number of local #gpart). + * @param ind_gpart The foreign #cell ID of each gpart. + * @param Ngpart The number of stray gparts, contains the number of gparts + * received on return. + * @param offset_sparts The index in the sparts array as of which the foreign + * parts reside (i.e. the current number of local #spart). + * @param ind_spart The foreign #cell ID of each spart. + * @param Nspart The number of stray sparts, contains the number of sparts + * received on return. + * + * Note that this function does not mess-up the linkage between parts and + * gparts, i.e. the received particles have correct linkeage. */ -void engine_addtasks_send_gravity(struct engine *e, struct cell *ci, - struct cell *cj, struct task *t_grav) { +void engine_exchange_strays(struct engine *e, const size_t offset_parts, + const int *ind_part, size_t *Npart, + const size_t offset_gparts, const int *ind_gpart, + size_t *Ngpart, const size_t offset_sparts, + const int *ind_spart, size_t *Nspart) { #ifdef WITH_MPI - struct link *l = NULL; - struct scheduler *s = &e->sched; - const int nodeID = cj->nodeID; - /* Check if any of the gravity tasks are for the target node. */ - for (l = ci->grav; l != NULL; l = l->next) - if (l->t->ci->nodeID == nodeID || - (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) - break; - - /* If so, attach send tasks. */ - if (l != NULL) { + struct space *s = e->s; + ticks tic = getticks(); - /* Create the tasks and their dependencies? */ - if (t_grav == NULL) { + /* Re-set the proxies. */ + for (int k = 0; k < e->nr_proxies; k++) { + e->proxies[k].nr_parts_out = 0; + e->proxies[k].nr_gparts_out = 0; + e->proxies[k].nr_sparts_out = 0; + } - t_grav = scheduler_addtask(s, task_type_send, task_subtype_gpart, - 6 * ci->tag + 4, 0, ci, cj); + /* Put the parts into the corresponding proxies. */ + for (size_t k = 0; k < *Npart; k++) { - /* The sends should unlock the down pass. */ - scheduler_addunlock(s, t_grav, ci->super_gravity->grav_down); + /* Ignore the particles we want to get rid of (inhibited, ...). */ + if (ind_part[k] == -1) continue; - /* Drift before you send */ - scheduler_addunlock(s, ci->super_gravity->drift_gpart, t_grav); + /* Get the target node and proxy ID. */ + const int node_id = e->s->cells_top[ind_part[k]].nodeID; + if (node_id < 0 || node_id >= e->nr_nodes) + error("Bad node ID %i.", node_id); + const int pid = e->proxy_ind[node_id]; + if (pid < 0) { + error( + "Do not have a proxy for the requested nodeID %i for part with " + "id=%lld, x=[%e,%e,%e].", + node_id, s->parts[offset_parts + k].id, + s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1], + s->parts[offset_parts + k].x[2]); } - /* Add them to the local cell. */ - engine_addlink(e, &ci->send_grav, t_grav); - } + /* Re-link the associated gpart with the buffer offset of the part. */ + if (s->parts[offset_parts + k].gpart != NULL) { + s->parts[offset_parts + k].gpart->id_or_neg_offset = + -e->proxies[pid].nr_parts_out; + } - /* Recurse? */ - if (ci->split) - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) - engine_addtasks_send_gravity(e, ci->progeny[k], cj, t_grav); +#ifdef SWIFT_DEBUG_CHECKS + if (s->parts[offset_parts + k].time_bin == time_bin_inhibited) + error("Attempting to exchange an inhibited particle"); +#endif -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Add send tasks for the time-step to a hierarchy of cells. - * - * @param e The #engine. - * @param ci The sending #cell. - * @param cj Dummy cell containing the nodeID of the receiving node. - * @param t_ti The send_ti #task, if it has already been created. - */ -void engine_addtasks_send_timestep(struct engine *e, struct cell *ci, - struct cell *cj, struct task *t_ti) { - -#ifdef WITH_MPI - struct link *l = NULL; - struct scheduler *s = &e->sched; - const int nodeID = cj->nodeID; - - /* Check if any of the gravity tasks are for the target node. */ - for (l = ci->grav; l != NULL; l = l->next) - if (l->t->ci->nodeID == nodeID || - (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) - break; - - /* Check whether instead any of the hydro tasks are for the target node. */ - if (l == NULL) - for (l = ci->density; l != NULL; l = l->next) - if (l->t->ci->nodeID == nodeID || - (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) - break; - - /* If found anything, attach send tasks. */ - if (l != NULL) { - - /* Create the tasks and their dependencies? */ - if (t_ti == NULL) { - - t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, - 6 * ci->tag + 2, 0, ci, cj); - - /* The super-cell's timestep task should unlock the send_ti task. */ - scheduler_addunlock(s, ci->super->timestep, t_ti); - } - - /* Add them to the local cell. */ - engine_addlink(e, &ci->send_ti, t_ti); - } - - /* Recurse? */ - if (ci->split) - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) - engine_addtasks_send_timestep(e, ci->progeny[k], cj, t_ti); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Add recv tasks for hydro pairs to a hierarchy of cells. - * - * @param e The #engine. - * @param c The foreign #cell. - * @param t_xv The recv_xv #task, if it has already been created. - * @param t_rho The recv_rho #task, if it has already been created. - * @param t_gradient The recv_gradient #task, if it has already been created. - */ -void engine_addtasks_recv_hydro(struct engine *e, struct cell *c, - struct task *t_xv, struct task *t_rho, - struct task *t_gradient) { - -#ifdef WITH_MPI - struct scheduler *s = &e->sched; - - /* Have we reached a level where there are any hydro tasks ? */ - if (t_xv == NULL && c->density != NULL) { - - /* Create the tasks. */ - t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, 6 * c->tag + 0, - 0, c, NULL); - t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho, - 6 * c->tag + 1, 0, c, NULL); -#ifdef EXTRA_HYDRO_LOOP - t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_gradient, - 6 * c->tag + 3, 0, c, NULL); -#endif - } - - c->recv_xv = t_xv; - c->recv_rho = t_rho; - c->recv_gradient = t_gradient; - - /* Add dependencies. */ - if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts); - - for (struct link *l = c->density; l != NULL; l = l->next) { - scheduler_addunlock(s, t_xv, l->t); - scheduler_addunlock(s, l->t, t_rho); - } -#ifdef EXTRA_HYDRO_LOOP - for (struct link *l = c->gradient; l != NULL; l = l->next) { - scheduler_addunlock(s, t_rho, l->t); - scheduler_addunlock(s, l->t, t_gradient); + /* Load the part and xpart into the proxy. */ + proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k], + &s->xparts[offset_parts + k], 1); } - for (struct link *l = c->force; l != NULL; l = l->next) - scheduler_addunlock(s, t_gradient, l->t); -#else - for (struct link *l = c->force; l != NULL; l = l->next) - scheduler_addunlock(s, t_rho, l->t); -#endif - - /* Recurse? */ - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - engine_addtasks_recv_hydro(e, c->progeny[k], t_xv, t_rho, t_gradient); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Add recv tasks for gravity pairs to a hierarchy of cells. - * - * @param e The #engine. - * @param c The foreign #cell. - * @param t_grav The recv_gpart #task, if it has already been created. - */ -void engine_addtasks_recv_gravity(struct engine *e, struct cell *c, - struct task *t_grav) { -#ifdef WITH_MPI - struct scheduler *s = &e->sched; - - /* Have we reached a level where there are any gravity tasks ? */ - if (t_grav == NULL && c->grav != NULL) { - - /* Create the tasks. */ - t_grav = scheduler_addtask(s, task_type_recv, task_subtype_gpart, - 6 * c->tag + 4, 0, c, NULL); - } + /* Put the sparts into the corresponding proxies. */ + for (size_t k = 0; k < *Nspart; k++) { - c->recv_grav = t_grav; + /* Ignore the particles we want to get rid of (inhibited, ...). */ + if (ind_spart[k] == -1) continue; - for (struct link *l = c->grav; l != NULL; l = l->next) - scheduler_addunlock(s, t_grav, l->t); + /* Get the target node and proxy ID. */ + const int node_id = e->s->cells_top[ind_spart[k]].nodeID; + if (node_id < 0 || node_id >= e->nr_nodes) + error("Bad node ID %i.", node_id); + const int pid = e->proxy_ind[node_id]; + if (pid < 0) { + error( + "Do not have a proxy for the requested nodeID %i for part with " + "id=%lld, x=[%e,%e,%e].", + node_id, s->sparts[offset_sparts + k].id, + s->sparts[offset_sparts + k].x[0], s->sparts[offset_sparts + k].x[1], + s->sparts[offset_sparts + k].x[2]); + } - /* Recurse? */ - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - engine_addtasks_recv_gravity(e, c->progeny[k], t_grav); + /* Re-link the associated gpart with the buffer offset of the spart. */ + if (s->sparts[offset_sparts + k].gpart != NULL) { + s->sparts[offset_sparts + k].gpart->id_or_neg_offset = + -e->proxies[pid].nr_sparts_out; + } -#else - error("SWIFT was not compiled with MPI support."); +#ifdef SWIFT_DEBUG_CHECKS + if (s->sparts[offset_sparts + k].time_bin == time_bin_inhibited) + error("Attempting to exchange an inhibited particle"); #endif -} -/** - * @brief Add recv tasks for gravity pairs to a hierarchy of cells. - * - * @param e The #engine. - * @param c The foreign #cell. - * @param t_ti The recv_ti #task, if already been created. - */ -void engine_addtasks_recv_timestep(struct engine *e, struct cell *c, - struct task *t_ti) { - -#ifdef WITH_MPI - struct scheduler *s = &e->sched; - - /* Have we reached a level where there are any self/pair tasks ? */ - if (t_ti == NULL && (c->grav != NULL || c->density != NULL)) { - - t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, - 6 * c->tag + 2, 0, c, NULL); + /* Load the spart into the proxy */ + proxy_sparts_load(&e->proxies[pid], &s->sparts[offset_sparts + k], 1); } - c->recv_ti = t_ti; - - for (struct link *l = c->grav; l != NULL; l = l->next) - scheduler_addunlock(s, l->t, t_ti); + /* Put the gparts into the corresponding proxies. */ + for (size_t k = 0; k < *Ngpart; k++) { - for (struct link *l = c->force; l != NULL; l = l->next) - scheduler_addunlock(s, l->t, t_ti); + /* Ignore the particles we want to get rid of (inhibited, ...). */ + if (ind_gpart[k] == -1) continue; - /* Recurse? */ - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - engine_addtasks_recv_timestep(e, c->progeny[k], t_ti); + /* Get the target node and proxy ID. */ + const int node_id = e->s->cells_top[ind_gpart[k]].nodeID; + if (node_id < 0 || node_id >= e->nr_nodes) + error("Bad node ID %i.", node_id); + const int pid = e->proxy_ind[node_id]; + if (pid < 0) { + error( + "Do not have a proxy for the requested nodeID %i for part with " + "id=%lli, x=[%e,%e,%e].", + node_id, s->gparts[offset_gparts + k].id_or_neg_offset, + s->gparts[offset_gparts + k].x[0], s->gparts[offset_gparts + k].x[1], + s->gparts[offset_gparts + k].x[2]); + } -#else - error("SWIFT was not compiled with MPI support."); +#ifdef SWIFT_DEBUG_CHECKS + if (s->gparts[offset_gparts + k].time_bin == time_bin_inhibited) + error("Attempting to exchange an inhibited particle"); #endif -} - -/** - * @brief Exchange cell structures with other nodes. - * - * @param e The #engine. - */ -void engine_exchange_cells(struct engine *e) { - -#ifdef WITH_MPI - - struct space *s = e->s; - struct cell *cells = s->cells_top; - const int nr_cells = s->nr_cells; - const int nr_proxies = e->nr_proxies; - int offset[nr_cells]; - MPI_Request reqs_in[engine_maxproxies]; - MPI_Request reqs_out[engine_maxproxies]; - MPI_Status status; - const ticks tic = getticks(); - /* Run through the cells and get the size of the ones that will be sent off. - */ - int count_out = 0; - for (int k = 0; k < nr_cells; k++) { - offset[k] = count_out; - if (cells[k].sendto) - count_out += (cells[k].pcell_size = cell_getsize(&cells[k])); + /* Load the gpart into the proxy */ + proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1); } - /* Allocate the pcells. */ - struct pcell *pcells = NULL; - if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT, - sizeof(struct pcell) * count_out) != 0) - error("Failed to allocate pcell buffer."); - - /* Pack the cells. */ - cell_next_tag = 0; - for (int k = 0; k < nr_cells; k++) - if (cells[k].sendto) { - cell_pack(&cells[k], &pcells[offset[k]]); - cells[k].pcell = &pcells[offset[k]]; - } - /* Launch the proxies. */ - for (int k = 0; k < nr_proxies; k++) { - proxy_cells_exch1(&e->proxies[k]); - reqs_in[k] = e->proxies[k].req_cells_count_in; - reqs_out[k] = e->proxies[k].req_cells_count_out; + MPI_Request reqs_in[4 * engine_maxproxies]; + MPI_Request reqs_out[4 * engine_maxproxies]; + for (int k = 0; k < e->nr_proxies; k++) { + proxy_parts_exchange_first(&e->proxies[k]); + reqs_in[k] = e->proxies[k].req_parts_count_in; + reqs_out[k] = e->proxies[k].req_parts_count_out; } /* Wait for each count to come in and start the recv. */ - for (int k = 0; k < nr_proxies; k++) { + for (int k = 0; k < e->nr_proxies; k++) { int pid = MPI_UNDEFINED; - if (MPI_Waitany(nr_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || + if (MPI_Waitany(e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE) != + MPI_SUCCESS || pid == MPI_UNDEFINED) error("MPI_Waitany failed."); // message( "request from proxy %i has arrived." , pid ); - proxy_cells_exch2(&e->proxies[pid]); + proxy_parts_exchange_second(&e->proxies[pid]); } /* Wait for all the sends to have finished too. */ - if (MPI_Waitall(nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) + if (MPI_Waitall(e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) error("MPI_Waitall on sends failed."); - /* Set the requests for the cells. */ - for (int k = 0; k < nr_proxies; k++) { - reqs_in[k] = e->proxies[k].req_cells_in; - reqs_out[k] = e->proxies[k].req_cells_out; + /* Count the total number of incoming particles and make sure we have + enough space to accommodate them. */ + int count_parts_in = 0; + int count_gparts_in = 0; + int count_sparts_in = 0; + for (int k = 0; k < e->nr_proxies; k++) { + count_parts_in += e->proxies[k].nr_parts_in; + count_gparts_in += e->proxies[k].nr_gparts_in; + count_sparts_in += e->proxies[k].nr_sparts_in; } - - /* Wait for each pcell array to come in from the proxies. */ - for (int k = 0; k < nr_proxies; k++) { - int pid = MPI_UNDEFINED; - if (MPI_Waitany(nr_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || - pid == MPI_UNDEFINED) - error("MPI_Waitany failed."); - // message( "cell data from proxy %i has arrived." , pid ); - for (int count = 0, j = 0; j < e->proxies[pid].nr_cells_in; j++) - count += cell_unpack(&e->proxies[pid].pcells_in[count], - e->proxies[pid].cells_in[j], e->s); + if (e->verbose) { + message("sent out %zu/%zu/%zu parts/gparts/sparts, got %i/%i/%i back.", + *Npart, *Ngpart, *Nspart, count_parts_in, count_gparts_in, + count_sparts_in); } - /* Wait for all the sends to have finished too. */ - if (MPI_Waitall(nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) - error("MPI_Waitall on sends failed."); + /* Reallocate the particle arrays if necessary */ + if (offset_parts + count_parts_in > s->size_parts) { + message("re-allocating parts array."); + s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow; + struct part *parts_new = NULL; + struct xpart *xparts_new = NULL; + if (posix_memalign((void **)&parts_new, part_align, + sizeof(struct part) * s->size_parts) != 0 || + posix_memalign((void **)&xparts_new, xpart_align, + sizeof(struct xpart) * s->size_parts) != 0) + error("Failed to allocate new part data."); + memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts); + memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts); + free(s->parts); + free(s->xparts); + s->parts = parts_new; + s->xparts = xparts_new; - /* Count the number of particles we need to import and re-allocate - the buffer if needed. */ - size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0; - for (int k = 0; k < nr_proxies; k++) - for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) - count_parts_in += e->proxies[k].cells_in[j]->count; - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) - count_gparts_in += e->proxies[k].cells_in[j]->gcount; - count_sparts_in += e->proxies[k].cells_in[j]->scount; + /* Reset the links */ + for (size_t k = 0; k < offset_parts; k++) { + if (s->parts[k].gpart != NULL) { + s->parts[k].gpart->id_or_neg_offset = -k; + } } - if (count_parts_in > s->size_parts_foreign) { - if (s->parts_foreign != NULL) free(s->parts_foreign); - s->size_parts_foreign = 1.1 * count_parts_in; - if (posix_memalign((void **)&s->parts_foreign, part_align, - sizeof(struct part) * s->size_parts_foreign) != 0) - error("Failed to allocate foreign part data."); - } - if (count_gparts_in > s->size_gparts_foreign) { - if (s->gparts_foreign != NULL) free(s->gparts_foreign); - s->size_gparts_foreign = 1.1 * count_gparts_in; - if (posix_memalign((void **)&s->gparts_foreign, gpart_align, - sizeof(struct gpart) * s->size_gparts_foreign) != 0) - error("Failed to allocate foreign gpart data."); - } - if (count_sparts_in > s->size_sparts_foreign) { - if (s->sparts_foreign != NULL) free(s->sparts_foreign); - s->size_sparts_foreign = 1.1 * count_sparts_in; - if (posix_memalign((void **)&s->sparts_foreign, spart_align, - sizeof(struct spart) * s->size_sparts_foreign) != 0) - error("Failed to allocate foreign spart data."); } + if (offset_sparts + count_sparts_in > s->size_sparts) { + message("re-allocating sparts array."); + s->size_sparts = (offset_sparts + count_sparts_in) * engine_parts_size_grow; + struct spart *sparts_new = NULL; + if (posix_memalign((void **)&sparts_new, spart_align, + sizeof(struct spart) * s->size_sparts) != 0) + error("Failed to allocate new spart data."); + memcpy(sparts_new, s->sparts, sizeof(struct spart) * offset_sparts); + free(s->sparts); + s->sparts = sparts_new; - /* Unpack the cells and link to the particle data. */ - struct part *parts = s->parts_foreign; - struct gpart *gparts = s->gparts_foreign; - struct spart *sparts = s->sparts_foreign; - for (int k = 0; k < nr_proxies; k++) { - for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) { - cell_link_parts(e->proxies[k].cells_in[j], parts); - parts = &parts[e->proxies[k].cells_in[j]->count]; + /* Reset the links */ + for (size_t k = 0; k < offset_sparts; k++) { + if (s->sparts[k].gpart != NULL) { + s->sparts[k].gpart->id_or_neg_offset = -k; } + } + } + if (offset_gparts + count_gparts_in > s->size_gparts) { + message("re-allocating gparts array."); + s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow; + struct gpart *gparts_new = NULL; + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * s->size_gparts) != 0) + error("Failed to allocate new gpart data."); + memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts); + free(s->gparts); + s->gparts = gparts_new; - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) { - cell_link_gparts(e->proxies[k].cells_in[j], gparts); - gparts = &gparts[e->proxies[k].cells_in[j]->gcount]; - } - - cell_link_sparts(e->proxies[k].cells_in[j], sparts); - sparts = &sparts[e->proxies[k].cells_in[j]->scount]; - } - } - s->nr_parts_foreign = parts - s->parts_foreign; - s->nr_gparts_foreign = gparts - s->gparts_foreign; - s->nr_sparts_foreign = sparts - s->sparts_foreign; - - /* Free the pcell buffer. */ - free(pcells); - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Exchange straying particles with other nodes. - * - * @param e The #engine. - * @param offset_parts The index in the parts array as of which the foreign - * parts reside. - * @param ind_part The foreign #cell ID of each part. - * @param Npart The number of stray parts, contains the number of parts received - * on return. - * @param offset_gparts The index in the gparts array as of which the foreign - * parts reside. - * @param ind_gpart The foreign #cell ID of each gpart. - * @param Ngpart The number of stray gparts, contains the number of gparts - * received on return. - * @param offset_sparts The index in the sparts array as of which the foreign - * parts reside. - * @param ind_spart The foreign #cell ID of each spart. - * @param Nspart The number of stray sparts, contains the number of sparts - * received on return. - * - * Note that this function does not mess-up the linkage between parts and - * gparts, i.e. the received particles have correct linkeage. - */ -void engine_exchange_strays(struct engine *e, size_t offset_parts, - int *ind_part, size_t *Npart, size_t offset_gparts, - int *ind_gpart, size_t *Ngpart, - size_t offset_sparts, int *ind_spart, - size_t *Nspart) { - -#ifdef WITH_MPI - - struct space *s = e->s; - ticks tic = getticks(); - - /* Re-set the proxies. */ - for (int k = 0; k < e->nr_proxies; k++) { - e->proxies[k].nr_parts_out = 0; - e->proxies[k].nr_gparts_out = 0; - e->proxies[k].nr_sparts_out = 0; - } - - /* Put the parts into the corresponding proxies. */ - for (size_t k = 0; k < *Npart; k++) { - /* Get the target node and proxy ID. */ - const int node_id = e->s->cells_top[ind_part[k]].nodeID; - if (node_id < 0 || node_id >= e->nr_nodes) - error("Bad node ID %i.", node_id); - const int pid = e->proxy_ind[node_id]; - if (pid < 0) { - error( - "Do not have a proxy for the requested nodeID %i for part with " - "id=%lld, x=[%e,%e,%e].", - node_id, s->parts[offset_parts + k].id, - s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1], - s->parts[offset_parts + k].x[2]); - } - - /* Re-link the associated gpart with the buffer offset of the part. */ - if (s->parts[offset_parts + k].gpart != NULL) { - s->parts[offset_parts + k].gpart->id_or_neg_offset = - -e->proxies[pid].nr_parts_out; - } - - /* Load the part and xpart into the proxy. */ - proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k], - &s->xparts[offset_parts + k], 1); - } - - /* Put the sparts into the corresponding proxies. */ - for (size_t k = 0; k < *Nspart; k++) { - const int node_id = e->s->cells_top[ind_spart[k]].nodeID; - if (node_id < 0 || node_id >= e->nr_nodes) - error("Bad node ID %i.", node_id); - const int pid = e->proxy_ind[node_id]; - if (pid < 0) - error( - "Do not have a proxy for the requested nodeID %i for part with " - "id=%lld, x=[%e,%e,%e].", - node_id, s->sparts[offset_sparts + k].id, - s->sparts[offset_sparts + k].x[0], s->sparts[offset_sparts + k].x[1], - s->sparts[offset_sparts + k].x[2]); - - /* Re-link the associated gpart with the buffer offset of the spart. */ - if (s->sparts[offset_sparts + k].gpart != NULL) { - s->sparts[offset_sparts + k].gpart->id_or_neg_offset = - -e->proxies[pid].nr_sparts_out; - } - - /* Load the spart into the proxy */ - proxy_sparts_load(&e->proxies[pid], &s->sparts[offset_sparts + k], 1); - } - - /* Put the gparts into the corresponding proxies. */ - for (size_t k = 0; k < *Ngpart; k++) { - const int node_id = e->s->cells_top[ind_gpart[k]].nodeID; - if (node_id < 0 || node_id >= e->nr_nodes) - error("Bad node ID %i.", node_id); - const int pid = e->proxy_ind[node_id]; - if (pid < 0) - error( - "Do not have a proxy for the requested nodeID %i for part with " - "id=%lli, x=[%e,%e,%e].", - node_id, s->gparts[offset_gparts + k].id_or_neg_offset, - s->gparts[offset_gparts + k].x[0], s->gparts[offset_gparts + k].x[1], - s->gparts[offset_gparts + k].x[2]); - - /* Load the gpart into the proxy */ - proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1); - } - - /* Launch the proxies. */ - MPI_Request reqs_in[4 * engine_maxproxies]; - MPI_Request reqs_out[4 * engine_maxproxies]; - for (int k = 0; k < e->nr_proxies; k++) { - proxy_parts_exch1(&e->proxies[k]); - reqs_in[k] = e->proxies[k].req_parts_count_in; - reqs_out[k] = e->proxies[k].req_parts_count_out; - } - - /* Wait for each count to come in and start the recv. */ - for (int k = 0; k < e->nr_proxies; k++) { - int pid = MPI_UNDEFINED; - if (MPI_Waitany(e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE) != - MPI_SUCCESS || - pid == MPI_UNDEFINED) - error("MPI_Waitany failed."); - // message( "request from proxy %i has arrived." , pid ); - proxy_parts_exch2(&e->proxies[pid]); - } - - /* Wait for all the sends to have finished too. */ - if (MPI_Waitall(e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) - error("MPI_Waitall on sends failed."); - - /* Count the total number of incoming particles and make sure we have - enough space to accommodate them. */ - int count_parts_in = 0; - int count_gparts_in = 0; - int count_sparts_in = 0; - for (int k = 0; k < e->nr_proxies; k++) { - count_parts_in += e->proxies[k].nr_parts_in; - count_gparts_in += e->proxies[k].nr_gparts_in; - count_sparts_in += e->proxies[k].nr_sparts_in; - } - if (e->verbose) { - message("sent out %zu/%zu/%zu parts/gparts/sparts, got %i/%i/%i back.", - *Npart, *Ngpart, *Nspart, count_parts_in, count_gparts_in, - count_sparts_in); - } - - /* Reallocate the particle arrays if necessary */ - if (offset_parts + count_parts_in > s->size_parts) { - message("re-allocating parts array."); - s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow; - struct part *parts_new = NULL; - struct xpart *xparts_new = NULL; - if (posix_memalign((void **)&parts_new, part_align, - sizeof(struct part) * s->size_parts) != 0 || - posix_memalign((void **)&xparts_new, xpart_align, - sizeof(struct xpart) * s->size_parts) != 0) - error("Failed to allocate new part data."); - memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts); - memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts); - free(s->parts); - free(s->xparts); - s->parts = parts_new; - s->xparts = xparts_new; - for (size_t k = 0; k < offset_parts; k++) { - if (s->parts[k].gpart != NULL) { - s->parts[k].gpart->id_or_neg_offset = -k; - } - } - } - if (offset_sparts + count_sparts_in > s->size_sparts) { - message("re-allocating sparts array."); - s->size_sparts = (offset_sparts + count_sparts_in) * engine_parts_size_grow; - struct spart *sparts_new = NULL; - if (posix_memalign((void **)&sparts_new, spart_align, - sizeof(struct spart) * s->size_sparts) != 0) - error("Failed to allocate new spart data."); - memcpy(sparts_new, s->sparts, sizeof(struct spart) * offset_sparts); - free(s->sparts); - s->sparts = sparts_new; - for (size_t k = 0; k < offset_sparts; k++) { - if (s->sparts[k].gpart != NULL) { - s->sparts[k].gpart->id_or_neg_offset = -k; - } - } - } - if (offset_gparts + count_gparts_in > s->size_gparts) { - message("re-allocating gparts array."); - s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow; - struct gpart *gparts_new = NULL; - if (posix_memalign((void **)&gparts_new, gpart_align, - sizeof(struct gpart) * s->size_gparts) != 0) - error("Failed to allocate new gpart data."); - memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts); - free(s->gparts); - s->gparts = gparts_new; - - for (size_t k = 0; k < offset_gparts; k++) { - if (s->gparts[k].type == swift_type_gas) { - s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; - } else if (s->gparts[k].type == swift_type_star) { - s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + /* Reset the links */ + for (size_t k = 0; k < offset_gparts; k++) { + if (s->gparts[k].type == swift_type_gas) { + s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } else if (s->gparts[k].type == swift_type_stars) { + s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; } } } @@ -2088,7 +1552,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, &s->parts[offset_parts + count_parts - gp->id_or_neg_offset]; gp->id_or_neg_offset = s->parts - p; p->gpart = gp; - } else if (gp->type == swift_type_star) { + } else if (gp->type == swift_type_stars) { struct spart *sp = &s->sparts[offset_sparts + count_sparts - gp->id_or_neg_offset]; gp->id_or_neg_offset = s->sparts - sp; @@ -2133,6 +1597,8 @@ void engine_exchange_top_multipoles(struct engine *e) { #ifdef WITH_MPI + ticks tic = getticks(); + #ifdef SWIFT_DEBUG_CHECKS for (int i = 0; i < e->s->nr_cells; ++i) { const struct gravity_tensors *m = &e->s->multipoles_top[i]; @@ -2159,19 +1625,16 @@ void engine_exchange_top_multipoles(struct engine *e) { /* Each node (space) has constructed its own top-level multipoles. * We now need to make sure every other node has a copy of everything. * - * WARNING: Adult stuff ahead: don't do this at home! - * - * Since all nodes have their top-level multi-poles computed - * and all foreign ones set to 0 (all bytes), we can gather all the m-poles - * by doing a bit-wise OR reduction across all the nodes directly in - * place inside the multi-poles_top array. - * This only works if the foreign m-poles on every nodes are zeroed and no - * multi-pole is present on more than one node (two things guaranteed by the - * domain decomposition). + * We use our home-made reduction operation that simply performs a XOR + * operation on the multipoles. Since only local multipoles are non-zero and + * each multipole is only present once, the bit-by-bit XOR will + * create the desired result. */ - MPI_Allreduce(MPI_IN_PLACE, e->s->multipoles_top, - e->s->nr_cells * sizeof(struct gravity_tensors), MPI_BYTE, - MPI_BOR, MPI_COMM_WORLD); + int err = MPI_Allreduce(MPI_IN_PLACE, e->s->multipoles_top, e->s->nr_cells, + multipole_mpi_type, multipole_mpi_reduce_op, + MPI_COMM_WORLD); + if (err != MPI_SUCCESS) + mpi_error(err, "Failed to all-reduce the top-level multipoles."); #ifdef SWIFT_DEBUG_CHECKS long long counter = 0; @@ -2180,6 +1643,9 @@ void engine_exchange_top_multipoles(struct engine *e) { for (int i = 0; i < e->s->nr_cells; ++i) { const struct gravity_tensors *m = &e->s->multipoles_top[i]; counter += m->m_pole.num_gpart; + if (m->m_pole.num_gpart < 0) { + error("m->m_pole.num_gpart is negative: %lld", m->m_pole.num_gpart); + } if (m->m_pole.M_000 > 0.) { if (m->CoM[0] < 0. || m->CoM[0] > e->s->dim[0]) error("Invalid multipole position in X"); @@ -2190,1600 +1656,170 @@ void engine_exchange_top_multipoles(struct engine *e) { } } if (counter != e->total_nr_gparts) - error("Total particles in multipoles inconsistent with engine"); + error( + "Total particles in multipoles inconsistent with engine.\n " + " counter = %lld, nr_gparts = %lld", + counter, e->total_nr_gparts); #endif + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); #else error("SWIFT was not compiled with MPI support."); -#endif -} - -void engine_exchange_proxy_multipoles(struct engine *e) { - -#ifdef WITH_MPI - - const ticks tic = getticks(); - - /* Start by counting the number of cells to send and receive */ - int count_send = 0; - int count_recv = 0; - int count_send_requests = 0; - int count_recv_requests = 0; - - /* Loop over the proxies. */ - for (int pid = 0; pid < e->nr_proxies; pid++) { - - /* Get a handle on the proxy. */ - const struct proxy *p = &e->proxies[pid]; - - /* Now collect the number of requests associated */ - count_recv_requests += p->nr_cells_in; - count_send_requests += p->nr_cells_out; - - /* And the actual number of things we are going to ship */ - for (int k = 0; k < p->nr_cells_in; k++) - count_recv += p->cells_in[k]->pcell_size; - - for (int k = 0; k < p->nr_cells_out; k++) - count_send += p->cells_out[k]->pcell_size; - } - - /* Allocate the buffers for the packed data */ - struct gravity_tensors *buffer_send = NULL; - if (posix_memalign((void **)&buffer_send, SWIFT_CACHE_ALIGNMENT, - count_send * sizeof(struct gravity_tensors)) != 0) - error("Unable to allocate memory for multipole transactions"); - - struct gravity_tensors *buffer_recv = NULL; - if (posix_memalign((void **)&buffer_recv, SWIFT_CACHE_ALIGNMENT, - count_recv * sizeof(struct gravity_tensors)) != 0) - error("Unable to allocate memory for multipole transactions"); - - /* Also allocate the MPI requests */ - const int count_requests = count_send_requests + count_recv_requests; - MPI_Request *requests = - (MPI_Request *)malloc(sizeof(MPI_Request) * count_requests); - if (requests == NULL) error("Unable to allocate memory for MPI requests"); - - int this_request = 0; - int this_recv = 0; - int this_send = 0; - - /* Loop over the proxies to issue the receives. */ - for (int pid = 0; pid < e->nr_proxies; pid++) { - - /* Get a handle on the proxy. */ - const struct proxy *p = &e->proxies[pid]; - - for (int k = 0; k < p->nr_cells_in; k++) { - - const int num_elements = p->cells_in[k]->pcell_size; - - /* Receive everything */ - MPI_Irecv(&buffer_recv[this_recv], - num_elements * sizeof(struct gravity_tensors), MPI_BYTE, - p->cells_in[k]->nodeID, p->cells_in[k]->tag, MPI_COMM_WORLD, - &requests[this_request]); - - /* Move to the next slot in the buffers */ - this_recv += num_elements; - this_request++; - } - - /* Loop over the proxies to issue the sends. */ - for (int k = 0; k < p->nr_cells_out; k++) { - - /* Number of multipoles in this cell hierarchy */ - const int num_elements = p->cells_out[k]->pcell_size; - - /* Let's pack everything recursively */ - cell_pack_multipoles(p->cells_out[k], &buffer_send[this_send]); - - /* Send everything (note the use of cells_in[0] to get the correct node - * ID. */ - MPI_Isend(&buffer_send[this_send], - num_elements * sizeof(struct gravity_tensors), MPI_BYTE, - p->cells_in[0]->nodeID, p->cells_out[k]->tag, MPI_COMM_WORLD, - &requests[this_request]); - - /* Move to the next slot in the buffers */ - this_send += num_elements; - this_request++; - } - } - - /* Wait for all the requests to arrive home */ - MPI_Status *stats = (MPI_Status *)malloc(count_requests * sizeof(MPI_Status)); - int res; - if ((res = MPI_Waitall(count_requests, requests, stats)) != MPI_SUCCESS) { - for (int k = 0; k < count_requests; ++k) { - char buff[MPI_MAX_ERROR_STRING]; - MPI_Error_string(stats[k].MPI_ERROR, buff, &res); - message("request from source %i, tag %i has error '%s'.", - stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff); - } - error("Failed during waitall for multipole data."); - } - - /* Let's now unpack the multipoles at the right place */ - this_recv = 0; - for (int pid = 0; pid < e->nr_proxies; pid++) { - - /* Get a handle on the proxy. */ - const struct proxy *p = &e->proxies[pid]; - - for (int k = 0; k < p->nr_cells_in; k++) { - - const int num_elements = p->cells_in[k]->pcell_size; - -#ifdef SWIFT_DEBUG_CHECKS - - /* Check that the first element (top-level cell's multipole) matches what - * we received */ - if (p->cells_in[k]->multipole->m_pole.num_gpart != - buffer_recv[this_recv].m_pole.num_gpart) - error("Current: M_000=%e num_gpart=%lld\n New: M_000=%e num_gpart=%lld", - p->cells_in[k]->multipole->m_pole.M_000, - p->cells_in[k]->multipole->m_pole.num_gpart, - buffer_recv[this_recv].m_pole.M_000, - buffer_recv[this_recv].m_pole.num_gpart); -#endif - - /* Unpack recursively */ - cell_unpack_multipoles(p->cells_in[k], &buffer_recv[this_recv]); - - /* Move to the next slot in the buffers */ - this_recv += num_elements; - } - } - - /* Free everything */ - free(stats); - free(buffer_send); - free(buffer_recv); - free(requests); - - /* How much time did this take? */ - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Constructs the top-level tasks for the short-range gravity - * and long-range gravity interactions. - * - * - All top-cells get a self task. - * - All pairs within range according to the multipole acceptance - * criterion get a pair task. - */ -void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, - void *extra_data) { - - struct engine *e = ((struct engine **)extra_data)[0]; - struct space *s = e->s; - struct scheduler *sched = &e->sched; - const int nodeID = e->nodeID; - const int periodic = s->periodic; - const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; - struct cell *cells = s->cells_top; - const double theta_crit = e->gravity_properties->theta_crit; - const double max_distance = e->mesh->r_cut_max; - - /* Compute how many cells away we need to walk */ - const double distance = 2.5 * cells[0].width[0] / theta_crit; - int delta = (int)(distance / cells[0].width[0]) + 1; - int delta_m = delta; - int delta_p = delta; - - /* Special case where every cell is in range of every other one */ - if (delta >= cdim[0] / 2) { - if (cdim[0] % 2 == 0) { - delta_m = cdim[0] / 2; - delta_p = cdim[0] / 2 - 1; - } else { - delta_m = cdim[0] / 2; - delta_p = cdim[0] / 2; - } - } - - /* Loop through the elements, which are just byte offsets from NULL. */ - for (int ind = 0; ind < num_elements; ind++) { - - /* Get the cell index. */ - const int cid = (size_t)(map_data) + ind; - - /* Integer indices of the cell in the top-level grid */ - const int i = cid / (cdim[1] * cdim[2]); - const int j = (cid / cdim[2]) % cdim[1]; - const int k = cid % cdim[2]; - - /* Get the cell */ - struct cell *ci = &cells[cid]; - - /* Skip cells without gravity particles */ - if (ci->gcount == 0) continue; - - /* Is that cell local ? */ - if (ci->nodeID != nodeID) continue; - - /* If the cells is local build a self-interaction */ - scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, NULL); - - /* Recover the multipole information */ - const struct gravity_tensors *const multi_i = ci->multipole; - const double CoM_i[3] = {multi_i->CoM[0], multi_i->CoM[1], multi_i->CoM[2]}; - -#ifdef SWIFT_DEBUG_CHECKS - if (cell_getid(cdim, i, j, k) != cid) - error("Incorrect calculation of indices (i,j,k)=(%d,%d,%d) cid=%d", i, j, - k, cid); - - if (multi_i->r_max != multi_i->r_max_rebuild) - error( - "Multipole size not equal ot it's size after rebuild. But we just " - "rebuilt..."); -#endif - - /* Loop over every other cell within (Manhattan) range delta */ - for (int x = -delta_m; x <= delta_p; x++) { - int ii = i + x; - if (ii >= cdim[0]) - ii -= cdim[0]; - else if (ii < 0) - ii += cdim[0]; - for (int y = -delta_m; y <= delta_p; y++) { - int jj = j + y; - if (jj >= cdim[1]) - jj -= cdim[1]; - else if (jj < 0) - jj += cdim[1]; - for (int z = -delta_m; z <= delta_p; z++) { - int kk = k + z; - if (kk >= cdim[2]) - kk -= cdim[2]; - else if (kk < 0) - kk += cdim[2]; - - /* Get the cell */ - const int cjd = cell_getid(cdim, ii, jj, kk); - struct cell *cj = &cells[cjd]; - -#ifdef SWIFT_DEBUG_CHECKS - const int iii = cjd / (cdim[1] * cdim[2]); - const int jjj = (cjd / cdim[2]) % cdim[1]; - const int kkk = cjd % cdim[2]; - - if (ii != iii || jj != jjj || kk != kkk) - error( - "Incorrect calculation of indices (iii,jjj,kkk)=(%d,%d,%d) " - "cjd=%d", - iii, jjj, kkk, cjd); -#endif - - /* Avoid duplicates of local pairs*/ - if (cid <= cjd && cj->nodeID == nodeID) continue; - - /* Skip cells without gravity particles */ - if (cj->gcount == 0) continue; - - /* Recover the multipole information */ - const struct gravity_tensors *const multi_j = cj->multipole; - - /* Get the distance between the CoMs */ - double dx = CoM_i[0] - multi_j->CoM[0]; - double dy = CoM_i[1] - multi_j->CoM[1]; - double dz = CoM_i[2] - multi_j->CoM[2]; - - /* Apply BC */ - if (periodic) { - dx = nearest(dx, dim[0]); - dy = nearest(dy, dim[1]); - dz = nearest(dz, dim[2]); - } - const double r2 = dx * dx + dy * dy + dz * dz; - - /* Minimal distance between any pair of particles */ - const double min_radius = - sqrt(r2) - (multi_i->r_max + multi_j->r_max); - - /* Are we beyond the distance where the truncated forces are 0 ?*/ - if (periodic && min_radius > max_distance) continue; - - /* Are the cells too close for a MM interaction ? */ - if (!cell_can_use_pair_mm(ci, cj, e, s)) { - - /* Ok, we need to add a direct pair calculation */ - scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, - ci, cj); - } - } - } - } - } -} - -/** - * @brief Constructs the top-level tasks for the short-range gravity - * interactions (master function). - * - * - Create the FFT task and the array of gravity ghosts. - * - Call the mapper function to create the other tasks. - * - * @param e The #engine. - */ -void engine_make_self_gravity_tasks(struct engine *e) { - - struct space *s = e->s; - struct task **ghosts = NULL; - - /* Create the multipole self and pair tasks. */ - void *extra_data[2] = {e, ghosts}; - threadpool_map(&e->threadpool, engine_make_self_gravity_tasks_mapper, NULL, - s->nr_cells, 1, 0, extra_data); -} - -/** - * @brief Constructs the top-level tasks for the external gravity. - * - * @param e The #engine. - */ -void engine_make_external_gravity_tasks(struct engine *e) { - - struct space *s = e->s; - struct scheduler *sched = &e->sched; - const int nodeID = e->nodeID; - struct cell *cells = s->cells_top; - const int nr_cells = s->nr_cells; - - for (int cid = 0; cid < nr_cells; ++cid) { - - struct cell *ci = &cells[cid]; - - /* Skip cells without gravity particles */ - if (ci->gcount == 0) continue; - - /* Is that neighbour local ? */ - if (ci->nodeID != nodeID) continue; - - /* If the cell is local, build a self-interaction */ - scheduler_addtask(sched, task_type_self, task_subtype_external_grav, 0, 0, - ci, NULL); - } -} - -/** - * @brief Constructs the top-level pair tasks for the first hydro loop over - * neighbours - * - * Here we construct all the tasks for all possible neighbouring non-empty - * local cells in the hierarchy. No dependencies are being added thus far. - * Additional loop over neighbours can later be added by simply duplicating - * all the tasks created by this function. - * - * @param map_data Offset of first two indices disguised as a pointer. - * @param num_elements Number of cells to traverse. - * @param extra_data The #engine. - */ -void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, - void *extra_data) { - - /* Extract the engine pointer. */ - struct engine *e = (struct engine *)extra_data; - - struct space *s = e->s; - struct scheduler *sched = &e->sched; - const int nodeID = e->nodeID; - const int *cdim = s->cdim; - struct cell *cells = s->cells_top; - - /* Loop through the elements, which are just byte offsets from NULL. */ - for (int ind = 0; ind < num_elements; ind++) { - - /* Get the cell index. */ - const int cid = (size_t)(map_data) + ind; - const int i = cid / (cdim[1] * cdim[2]); - const int j = (cid / cdim[2]) % cdim[1]; - const int k = cid % cdim[2]; - - /* Get the cell */ - struct cell *ci = &cells[cid]; - - /* Skip cells without hydro particles */ - if (ci->count == 0) continue; - - /* If the cells is local build a self-interaction */ - if (ci->nodeID == nodeID) - scheduler_addtask(sched, task_type_self, task_subtype_density, 0, 0, ci, - NULL); - - /* Now loop over all the neighbours of this cell */ - for (int ii = -1; ii < 2; ii++) { - int iii = i + ii; - if (!s->periodic && (iii < 0 || iii >= cdim[0])) continue; - iii = (iii + cdim[0]) % cdim[0]; - for (int jj = -1; jj < 2; jj++) { - int jjj = j + jj; - if (!s->periodic && (jjj < 0 || jjj >= cdim[1])) continue; - jjj = (jjj + cdim[1]) % cdim[1]; - for (int kk = -1; kk < 2; kk++) { - int kkk = k + kk; - if (!s->periodic && (kkk < 0 || kkk >= cdim[2])) continue; - kkk = (kkk + cdim[2]) % cdim[2]; - - /* Get the neighbouring cell */ - const int cjd = cell_getid(cdim, iii, jjj, kkk); - struct cell *cj = &cells[cjd]; - - /* Is that neighbour local and does it have particles ? */ - if (cid >= cjd || cj->count == 0 || - (ci->nodeID != nodeID && cj->nodeID != nodeID)) - continue; - - /* Construct the pair task */ - const int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; - scheduler_addtask(sched, task_type_pair, task_subtype_density, sid, 0, - ci, cj); - } - } - } - } -} - -/** - * @brief Counts the tasks associated with one cell and constructs the links - * - * For each hydrodynamic and gravity task, construct the links with - * the corresponding cell. Similarly, construct the dependencies for - * all the sorting tasks. - */ -void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, - void *extra_data) { - - struct engine *e = (struct engine *)extra_data; - struct scheduler *const sched = &e->sched; - - for (int ind = 0; ind < num_elements; ind++) { - struct task *t = &((struct task *)map_data)[ind]; - - struct cell *ci = t->ci; - struct cell *cj = t->cj; - const enum task_types t_type = t->type; - const enum task_subtypes t_subtype = t->subtype; - - /* Link sort tasks to all the higher sort task. */ - if (t_type == task_type_sort) { - for (struct cell *finger = t->ci->parent; finger != NULL; - finger = finger->parent) - if (finger->sorts != NULL) scheduler_addunlock(sched, t, finger->sorts); - } - - /* Link self tasks to cells. */ - else if (t_type == task_type_self) { - atomic_inc(&ci->nr_tasks); - - if (t_subtype == task_subtype_density) { - engine_addlink(e, &ci->density, t); - } else if (t_subtype == task_subtype_grav) { - engine_addlink(e, &ci->grav, t); - } else if (t_subtype == task_subtype_external_grav) { - engine_addlink(e, &ci->grav, t); - } - - /* Link pair tasks to cells. */ - } else if (t_type == task_type_pair) { - atomic_inc(&ci->nr_tasks); - atomic_inc(&cj->nr_tasks); - - if (t_subtype == task_subtype_density) { - engine_addlink(e, &ci->density, t); - engine_addlink(e, &cj->density, t); - } else if (t_subtype == task_subtype_grav) { - engine_addlink(e, &ci->grav, t); - engine_addlink(e, &cj->grav, t); - } -#ifdef SWIFT_DEBUG_CHECKS - else if (t_subtype == task_subtype_external_grav) { - error("Found a pair/external-gravity task..."); - } -#endif - - /* Link sub-self tasks to cells. */ - } else if (t_type == task_type_sub_self) { - atomic_inc(&ci->nr_tasks); - - if (t_subtype == task_subtype_density) { - engine_addlink(e, &ci->density, t); - } else if (t_subtype == task_subtype_grav) { - engine_addlink(e, &ci->grav, t); - } else if (t_subtype == task_subtype_external_grav) { - engine_addlink(e, &ci->grav, t); - } - - /* Link sub-pair tasks to cells. */ - } else if (t_type == task_type_sub_pair) { - atomic_inc(&ci->nr_tasks); - atomic_inc(&cj->nr_tasks); - - if (t_subtype == task_subtype_density) { - engine_addlink(e, &ci->density, t); - engine_addlink(e, &cj->density, t); - } else if (t_subtype == task_subtype_grav) { - engine_addlink(e, &ci->grav, t); - engine_addlink(e, &cj->grav, t); - } -#ifdef SWIFT_DEBUG_CHECKS - else if (t_subtype == task_subtype_external_grav) { - error("Found a sub-pair/external-gravity task..."); - } -#endif - - /* Note that we do not need to link the M-M tasks */ - /* since we already did so when splitting the gravity */ - /* tasks. */ - } - } -} - -/** - * @brief Creates all the task dependencies for the gravity - * - * @param e The #engine - */ -void engine_link_gravity_tasks(struct engine *e) { - - struct scheduler *sched = &e->sched; - const int nodeID = e->nodeID; - const int nr_tasks = sched->nr_tasks; - - for (int k = 0; k < nr_tasks; k++) { - - /* Get a pointer to the task. */ - struct task *t = &sched->tasks[k]; - - /* Get the cells we act on */ - struct cell *ci = t->ci; - struct cell *cj = t->cj; - const enum task_types t_type = t->type; - const enum task_subtypes t_subtype = t->subtype; - -/* Node ID (if running with MPI) */ -#ifdef WITH_MPI - const int ci_nodeID = ci->nodeID; - const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; -#else - const int ci_nodeID = nodeID; - const int cj_nodeID = nodeID; -#endif - - /* Self-interaction for self-gravity? */ - if (t_type == task_type_self && t_subtype == task_subtype_grav) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci_nodeID != nodeID) error("Non-local self task"); -#endif - - /* drift ---+-> gravity --> grav_down */ - /* init --/ */ - scheduler_addunlock(sched, ci->super_gravity->drift_gpart, t); - scheduler_addunlock(sched, ci->init_grav_out, t); - scheduler_addunlock(sched, t, ci->grav_down_in); - } - - /* Self-interaction for external gravity ? */ - if (t_type == task_type_self && t_subtype == task_subtype_external_grav) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci_nodeID != nodeID) error("Non-local self task"); -#endif - - /* drift -----> gravity --> end_force */ - scheduler_addunlock(sched, ci->super_gravity->drift_gpart, t); - scheduler_addunlock(sched, t, ci->end_force); - } - - /* Otherwise, pair interaction? */ - else if (t_type == task_type_pair && t_subtype == task_subtype_grav) { - - if (ci_nodeID == nodeID) { - - /* drift ---+-> gravity --> grav_down */ - /* init --/ */ - scheduler_addunlock(sched, ci->super_gravity->drift_gpart, t); - scheduler_addunlock(sched, ci->init_grav_out, t); - scheduler_addunlock(sched, t, ci->grav_down_in); - } - if (cj_nodeID == nodeID) { - - /* drift ---+-> gravity --> grav_down */ - /* init --/ */ - if (ci->super_gravity != cj->super_gravity) /* Avoid double unlock */ - scheduler_addunlock(sched, cj->super_gravity->drift_gpart, t); - scheduler_addunlock(sched, cj->init_grav_out, t); - scheduler_addunlock(sched, t, cj->grav_down_in); - } - } - - /* Otherwise, sub-self interaction? */ - else if (t_type == task_type_sub_self && t_subtype == task_subtype_grav) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci_nodeID != nodeID) error("Non-local sub-self task"); -#endif - /* drift ---+-> gravity --> grav_down */ - /* init --/ */ - scheduler_addunlock(sched, ci->super_gravity->drift_gpart, t); - scheduler_addunlock(sched, ci->init_grav_out, t); - scheduler_addunlock(sched, t, ci->grav_down_in); - } - - /* Sub-self-interaction for external gravity ? */ - else if (t_type == task_type_sub_self && - t_subtype == task_subtype_external_grav) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci_nodeID != nodeID) error("Non-local sub-self task"); -#endif - - /* drift -----> gravity --> end_force */ - scheduler_addunlock(sched, ci->super_gravity->drift_gpart, t); - scheduler_addunlock(sched, t, ci->end_force); - } - - /* Otherwise, sub-pair interaction? */ - else if (t_type == task_type_sub_pair && t_subtype == task_subtype_grav) { - - if (ci_nodeID == nodeID) { - - /* drift ---+-> gravity --> grav_down */ - /* init --/ */ - scheduler_addunlock(sched, ci->super_gravity->drift_gpart, t); - scheduler_addunlock(sched, ci->init_grav_out, t); - scheduler_addunlock(sched, t, ci->grav_down_in); - } - if (cj_nodeID == nodeID) { - - /* drift ---+-> gravity --> grav_down */ - /* init --/ */ - if (ci->super_gravity != cj->super_gravity) /* Avoid double unlock */ - scheduler_addunlock(sched, cj->super_gravity->drift_gpart, t); - scheduler_addunlock(sched, cj->init_grav_out, t); - scheduler_addunlock(sched, t, cj->grav_down_in); - } - } - - /* Otherwise M-M interaction? */ - else if (t_type == task_type_grav_mm) { - - if (ci_nodeID == nodeID) { - - /* init -----> gravity --> grav_down */ - scheduler_addunlock(sched, ci->init_grav_out, t); - scheduler_addunlock(sched, t, ci->grav_down_in); - } - if (cj_nodeID == nodeID) { - - /* init -----> gravity --> grav_down */ - scheduler_addunlock(sched, cj->init_grav_out, t); - scheduler_addunlock(sched, t, cj->grav_down_in); - } - } - } -} - -#ifdef EXTRA_HYDRO_LOOP - -/** - * @brief Creates the dependency network for the hydro tasks of a given cell. - * - * @param sched The #scheduler. - * @param density The density task to link. - * @param gradient The gradient task to link. - * @param force The force task to link. - * @param c The cell. - * @param with_cooling Do we have a cooling task ? - */ -static inline void engine_make_hydro_loops_dependencies( - struct scheduler *sched, struct task *density, struct task *gradient, - struct task *force, struct cell *c, int with_cooling) { - - /* density loop --> ghost --> gradient loop --> extra_ghost */ - /* extra_ghost --> force loop */ - scheduler_addunlock(sched, density, c->super_hydro->ghost_in); - scheduler_addunlock(sched, c->super_hydro->ghost_out, gradient); - scheduler_addunlock(sched, gradient, c->super_hydro->extra_ghost); - scheduler_addunlock(sched, c->super_hydro->extra_ghost, force); -} - -#else - -/** - * @brief Creates the dependency network for the hydro tasks of a given cell. - * - * @param sched The #scheduler. - * @param density The density task to link. - * @param force The force task to link. - * @param c The cell. - * @param with_cooling Are we running with cooling switched on ? - */ -static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, - struct task *density, - struct task *force, - struct cell *c, - int with_cooling) { - /* density loop --> ghost --> force loop */ - scheduler_addunlock(sched, density, c->super_hydro->ghost_in); - scheduler_addunlock(sched, c->super_hydro->ghost_out, force); -} - -#endif -/** - * @brief Duplicates the first hydro loop and construct all the - * dependencies for the hydro part - * - * This is done by looping over all the previously constructed tasks - * and adding another task involving the same cells but this time - * corresponding to the second hydro loop over neighbours. - * With all the relevant tasks for a given cell available, we construct - * all the dependencies for that cell. - */ -void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, - void *extra_data) { - - struct engine *e = (struct engine *)extra_data; - struct scheduler *sched = &e->sched; - const int nodeID = e->nodeID; - const int with_cooling = (e->policy & engine_policy_cooling); - - for (int ind = 0; ind < num_elements; ind++) { - struct task *t = &((struct task *)map_data)[ind]; - - /* Sort tasks depend on the drift of the cell. */ - if (t->type == task_type_sort && t->ci->nodeID == engine_rank) { - scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t); - } - - /* Self-interaction? */ - else if (t->type == task_type_self && t->subtype == task_subtype_density) { - - /* Make the self-density tasks depend on the drift only. */ - scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t); - -#ifdef EXTRA_HYDRO_LOOP - /* Start by constructing the task for the second and third hydro loop. */ - struct task *t2 = scheduler_addtask( - sched, task_type_self, task_subtype_gradient, 0, 0, t->ci, NULL); - struct task *t3 = scheduler_addtask( - sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL); - - /* Add the link between the new loops and the cell */ - engine_addlink(e, &t->ci->gradient, t2); - engine_addlink(e, &t->ci->force, t3); - - /* Now, build all the dependencies for the hydro */ - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, - with_cooling); - scheduler_addunlock(sched, t3, t->ci->super->end_force); -#else - - /* Start by constructing the task for the second hydro loop */ - struct task *t2 = scheduler_addtask( - sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL); - - /* Add the link between the new loop and the cell */ - engine_addlink(e, &t->ci->force, t2); - - /* Now, build all the dependencies for the hydro */ - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); - scheduler_addunlock(sched, t2, t->ci->super->end_force); -#endif - } - - /* Otherwise, pair interaction? */ - else if (t->type == task_type_pair && t->subtype == task_subtype_density) { - - /* Make all density tasks depend on the drift and the sorts. */ - if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t); - scheduler_addunlock(sched, t->ci->super_hydro->sorts, t); - if (t->ci->super_hydro != t->cj->super_hydro) { - if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->super_hydro->drift_part, t); - scheduler_addunlock(sched, t->cj->super_hydro->sorts, t); - } - -#ifdef EXTRA_HYDRO_LOOP - /* Start by constructing the task for the second and third hydro loop */ - struct task *t2 = scheduler_addtask( - sched, task_type_pair, task_subtype_gradient, 0, 0, t->ci, t->cj); - struct task *t3 = scheduler_addtask( - sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj); - - /* Add the link between the new loop and both cells */ - engine_addlink(e, &t->ci->gradient, t2); - engine_addlink(e, &t->cj->gradient, t2); - engine_addlink(e, &t->ci->force, t3); - engine_addlink(e, &t->cj->force, t3); - - /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super_hydro-cells */ - if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, - with_cooling); - scheduler_addunlock(sched, t3, t->ci->super->end_force); - } - if (t->cj->nodeID == nodeID) { - if (t->ci->super_hydro != t->cj->super_hydro) - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj, - with_cooling); - if (t->ci->super != t->cj->super) - scheduler_addunlock(sched, t3, t->cj->super->end_force); - } - -#else - - /* Start by constructing the task for the second hydro loop */ - struct task *t2 = scheduler_addtask( - sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj); - - /* Add the link between the new loop and both cells */ - engine_addlink(e, &t->ci->force, t2); - engine_addlink(e, &t->cj->force, t2); - - /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super_hydro-cells */ - if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); - scheduler_addunlock(sched, t2, t->ci->super->end_force); - } - if (t->cj->nodeID == nodeID) { - if (t->ci->super_hydro != t->cj->super_hydro) - engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, - with_cooling); - if (t->ci->super != t->cj->super) - scheduler_addunlock(sched, t2, t->cj->super->end_force); - } - -#endif - - } - - /* Otherwise, sub-self interaction? */ - else if (t->type == task_type_sub_self && - t->subtype == task_subtype_density) { - - /* Make all density tasks depend on the drift and sorts. */ - scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t); - scheduler_addunlock(sched, t->ci->super_hydro->sorts, t); - -#ifdef EXTRA_HYDRO_LOOP - - /* Start by constructing the task for the second and third hydro loop */ - struct task *t2 = - scheduler_addtask(sched, task_type_sub_self, task_subtype_gradient, - t->flags, 0, t->ci, t->cj); - struct task *t3 = - scheduler_addtask(sched, task_type_sub_self, task_subtype_force, - t->flags, 0, t->ci, t->cj); - - /* Add the link between the new loop and the cell */ - engine_addlink(e, &t->ci->gradient, t2); - engine_addlink(e, &t->ci->force, t3); - - /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super_hydro-cells */ - if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, - with_cooling); - scheduler_addunlock(sched, t3, t->ci->super->end_force); - } - -#else - /* Start by constructing the task for the second hydro loop */ - struct task *t2 = - scheduler_addtask(sched, task_type_sub_self, task_subtype_force, - t->flags, 0, t->ci, t->cj); - - /* Add the link between the new loop and the cell */ - engine_addlink(e, &t->ci->force, t2); - - /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super_hydro-cells */ - if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); - scheduler_addunlock(sched, t2, t->ci->super->end_force); - } -#endif - } - - /* Otherwise, sub-pair interaction? */ - else if (t->type == task_type_sub_pair && - t->subtype == task_subtype_density) { - - /* Make all density tasks depend on the drift. */ - if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t); - scheduler_addunlock(sched, t->ci->super_hydro->sorts, t); - if (t->ci->super_hydro != t->cj->super_hydro) { - if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->super_hydro->drift_part, t); - scheduler_addunlock(sched, t->cj->super_hydro->sorts, t); - } - -#ifdef EXTRA_HYDRO_LOOP - - /* Start by constructing the task for the second and third hydro loop */ - struct task *t2 = - scheduler_addtask(sched, task_type_sub_pair, task_subtype_gradient, - t->flags, 0, t->ci, t->cj); - struct task *t3 = - scheduler_addtask(sched, task_type_sub_pair, task_subtype_force, - t->flags, 0, t->ci, t->cj); - - /* Add the link between the new loop and both cells */ - engine_addlink(e, &t->ci->gradient, t2); - engine_addlink(e, &t->cj->gradient, t2); - engine_addlink(e, &t->ci->force, t3); - engine_addlink(e, &t->cj->force, t3); - - /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super_hydro-cells */ - if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, - with_cooling); - scheduler_addunlock(sched, t3, t->ci->super->end_force); - } - if (t->cj->nodeID == nodeID) { - if (t->ci->super_hydro != t->cj->super_hydro) - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj, - with_cooling); - if (t->ci->super != t->cj->super) - scheduler_addunlock(sched, t3, t->cj->super->end_force); - } - -#else - /* Start by constructing the task for the second hydro loop */ - struct task *t2 = - scheduler_addtask(sched, task_type_sub_pair, task_subtype_force, - t->flags, 0, t->ci, t->cj); - - /* Add the link between the new loop and both cells */ - engine_addlink(e, &t->ci->force, t2); - engine_addlink(e, &t->cj->force, t2); - - /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super_hydro-cells */ - if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); - scheduler_addunlock(sched, t2, t->ci->super->end_force); - } - if (t->cj->nodeID == nodeID) { - if (t->ci->super_hydro != t->cj->super_hydro) - engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, - with_cooling); - if (t->ci->super != t->cj->super) - scheduler_addunlock(sched, t2, t->cj->super->end_force); - } -#endif - } - } -} - -/** - * @brief Fill the #space's task list. - * - * @param e The #engine we are working with. - */ -void engine_maketasks(struct engine *e) { - - struct space *s = e->s; - struct scheduler *sched = &e->sched; - struct cell *cells = s->cells_top; - const int nr_cells = s->nr_cells; - const ticks tic = getticks(); - - /* Re-set the scheduler. */ - scheduler_reset(sched, engine_estimate_nr_tasks(e)); - - ticks tic2 = getticks(); - - /* Construct the firt hydro loop over neighbours */ - if (e->policy & engine_policy_hydro) - threadpool_map(&e->threadpool, engine_make_hydroloop_tasks_mapper, NULL, - s->nr_cells, 1, 0, e); - - if (e->verbose) - message("Making hydro tasks took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - - tic2 = getticks(); - - /* Add the self gravity tasks. */ - if (e->policy & engine_policy_self_gravity) engine_make_self_gravity_tasks(e); - - if (e->verbose) - message("Making gravity tasks took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - - /* Add the external gravity tasks. */ - if (e->policy & engine_policy_external_gravity) - engine_make_external_gravity_tasks(e); - - if (e->sched.nr_tasks == 0 && (s->nr_gparts > 0 || s->nr_parts > 0)) - error("We have particles but no hydro or gravity tasks were created."); - - /* Free the old list of cell-task links. */ - if (e->links != NULL) free(e->links); - e->size_links = 0; - -/* The maximum number of links is the - * number of cells (s->tot_cells) times the number of neighbours (26) times - * the number of interaction types, so 26 * 2 (density, force) pairs - * and 2 (density, force) self. - */ -#ifdef EXTRA_HYDRO_LOOP - const size_t hydro_tasks_per_cell = 27 * 3; -#else - const size_t hydro_tasks_per_cell = 27 * 2; -#endif - const size_t self_grav_tasks_per_cell = 125; - const size_t ext_grav_tasks_per_cell = 1; - - if (e->policy & engine_policy_hydro) - e->size_links += s->tot_cells * hydro_tasks_per_cell; - if (e->policy & engine_policy_external_gravity) - e->size_links += s->tot_cells * ext_grav_tasks_per_cell; - if (e->policy & engine_policy_self_gravity) - e->size_links += s->tot_cells * self_grav_tasks_per_cell; - - /* Allocate the new link list */ - if ((e->links = (struct link *)malloc(sizeof(struct link) * e->size_links)) == - NULL) - error("Failed to allocate cell-task links."); - e->nr_links = 0; - - tic2 = getticks(); - - /* Split the tasks. */ - scheduler_splittasks(sched); - - if (e->verbose) - message("Splitting tasks took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify that we are not left with invalid tasks */ - for (int i = 0; i < e->sched.nr_tasks; ++i) { - const struct task *t = &e->sched.tasks[i]; - if (t->ci == NULL && t->cj != NULL && !t->skip) error("Invalid task"); - } -#endif - - tic2 = getticks(); - - /* Count the number of tasks associated with each cell and - store the density tasks in each cell, and make each sort - depend on the sorts of its progeny. */ - threadpool_map(&e->threadpool, engine_count_and_link_tasks_mapper, - sched->tasks, sched->nr_tasks, sizeof(struct task), 0, e); - - if (e->verbose) - message("Counting and linking tasks took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - - tic2 = getticks(); - - /* Now that the self/pair tasks are at the right level, set the super - * pointers. */ - threadpool_map(&e->threadpool, cell_set_super_mapper, cells, nr_cells, - sizeof(struct cell), 0, e); - - if (e->verbose) - message("Setting super-pointers took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - - /* Append hierarchical tasks to each cell. */ - threadpool_map(&e->threadpool, engine_make_hierarchical_tasks_mapper, cells, - nr_cells, sizeof(struct cell), 0, e); - - tic2 = getticks(); - - /* Run through the tasks and make force tasks for each density task. - Each force task depends on the cell ghosts and unlocks the kick task - of its super-cell. */ - if (e->policy & engine_policy_hydro) - threadpool_map(&e->threadpool, engine_make_extra_hydroloop_tasks_mapper, - sched->tasks, sched->nr_tasks, sizeof(struct task), 0, e); - - if (e->verbose) - message("Making extra hydroloop tasks took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - - tic2 = getticks(); - - /* Add the dependencies for the gravity stuff */ - if (e->policy & (engine_policy_self_gravity | engine_policy_external_gravity)) - engine_link_gravity_tasks(e); - - if (e->verbose) - message("Linking gravity tasks took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - -#ifdef WITH_MPI - - /* Add the communication tasks if MPI is being used. */ - if (e->policy & engine_policy_mpi) { - - /* Loop over the proxies. */ - for (int pid = 0; pid < e->nr_proxies; pid++) { - - /* Get a handle on the proxy. */ - struct proxy *p = &e->proxies[pid]; - - for (int k = 0; k < p->nr_cells_in; k++) - engine_addtasks_recv_timestep(e, p->cells_in[k], NULL); - - for (int k = 0; k < p->nr_cells_out; k++) - engine_addtasks_send_timestep(e, p->cells_out[k], p->cells_in[0], NULL); - - /* Loop through the proxy's incoming cells and add the - recv tasks for the cells in the proxy that have a hydro connection. */ - if (e->policy & engine_policy_hydro) - for (int k = 0; k < p->nr_cells_in; k++) - if (p->cells_in_type[k] & proxy_cell_type_hydro) - engine_addtasks_recv_hydro(e, p->cells_in[k], NULL, NULL, NULL); - - /* Loop through the proxy's incoming cells and add the - recv tasks for the cells in the proxy that have a gravity connection. - */ - if (e->policy & engine_policy_self_gravity) - for (int k = 0; k < p->nr_cells_in; k++) - if (p->cells_in_type[k] & proxy_cell_type_gravity) - engine_addtasks_recv_gravity(e, p->cells_in[k], NULL); - - /* Loop through the proxy's outgoing cells and add the - send tasks for the cells in the proxy that have a hydro connection. */ - if (e->policy & engine_policy_hydro) - for (int k = 0; k < p->nr_cells_out; k++) - if (p->cells_out_type[k] & proxy_cell_type_hydro) - engine_addtasks_send_hydro(e, p->cells_out[k], p->cells_in[0], NULL, - NULL, NULL); - - /* Loop through the proxy's outgoing cells and add the - send tasks for the cells in the proxy that have a gravity connection. - */ - if (e->policy & engine_policy_self_gravity) - for (int k = 0; k < p->nr_cells_out; k++) - if (p->cells_out_type[k] & proxy_cell_type_gravity) - engine_addtasks_send_gravity(e, p->cells_out[k], p->cells_in[0], - NULL); - } - } -#endif - - tic2 = getticks(); - - /* Set the unlocks per task. */ - scheduler_set_unlocks(sched); - - if (e->verbose) - message("Setting unlocks took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - - tic2 = getticks(); - - /* Rank the tasks. */ - scheduler_ranktasks(sched); - - if (e->verbose) - message("Ranking the tasks took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - - /* Weight the tasks. */ - scheduler_reweight(sched, e->verbose); - - /* Set the tasks age. */ - e->tasks_age = 0; - - if (e->verbose) - message("took %.3f %s (including reweight).", - clocks_from_ticks(getticks() - tic), clocks_getunit()); -} - -/** - * @brief Mark tasks to be un-skipped and set the sort flags accordingly. - * Threadpool mapper function. - * - * @param map_data pointer to the tasks - * @param num_elements number of tasks - * @param extra_data pointer to int that will define if a rebuild is needed. - */ -void engine_marktasks_mapper(void *map_data, int num_elements, - void *extra_data) { - /* Unpack the arguments. */ - struct task *tasks = (struct task *)map_data; - size_t *rebuild_space = &((size_t *)extra_data)[1]; - struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[2]); - struct engine *e = (struct engine *)((size_t *)extra_data)[0]; - - for (int ind = 0; ind < num_elements; ind++) { - struct task *t = &tasks[ind]; - - /* Single-cell task? */ - if (t->type == task_type_self || t->type == task_type_sub_self) { - - /* Local pointer. */ - struct cell *ci = t->ci; - - if (ci->nodeID != engine_rank) error("Non-local self task found"); - - /* Activate the hydro drift */ - if (t->type == task_type_self && t->subtype == task_subtype_density) { - if (cell_is_active_hydro(ci, e)) { - scheduler_activate(s, t); - cell_activate_drift_part(ci, s); - } - } - - /* Store current values of dx_max and h_max. */ - else if (t->type == task_type_sub_self && - t->subtype == task_subtype_density) { - if (cell_is_active_hydro(ci, e)) { - scheduler_activate(s, t); - cell_activate_subcell_hydro_tasks(ci, NULL, s); - } - } - - else if (t->type == task_type_self && t->subtype == task_subtype_force) { - if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); - } - - else if (t->type == task_type_sub_self && - t->subtype == task_subtype_force) { - if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); - } - -#ifdef EXTRA_HYDRO_LOOP - else if (t->type == task_type_self && - t->subtype == task_subtype_gradient) { - if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); - } - - else if (t->type == task_type_sub_self && - t->subtype == task_subtype_gradient) { - if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); - } -#endif - - /* Activate the gravity drift */ - else if (t->type == task_type_self && t->subtype == task_subtype_grav) { - if (cell_is_active_gravity(ci, e)) { - scheduler_activate(s, t); - cell_activate_subcell_grav_tasks(t->ci, NULL, s); - } - } - - /* Activate the gravity drift */ - else if (t->type == task_type_self && - t->subtype == task_subtype_external_grav) { - if (cell_is_active_gravity(ci, e)) { - scheduler_activate(s, t); - cell_activate_drift_gpart(t->ci, s); - } - } - -#ifdef SWIFT_DEBUG_CHECKS - else { - error("Invalid task type / sub-type encountered"); - } -#endif - } - - /* Pair? */ - else if (t->type == task_type_pair || t->type == task_type_sub_pair) { - - /* Local pointers. */ - struct cell *ci = t->ci; - struct cell *cj = t->cj; - const int ci_active_hydro = cell_is_active_hydro(ci, e); - const int cj_active_hydro = cell_is_active_hydro(cj, e); - const int ci_active_gravity = cell_is_active_gravity(ci, e); - const int cj_active_gravity = cell_is_active_gravity(cj, e); - - /* Only activate tasks that involve a local active cell. */ - if ((t->subtype == task_subtype_density || - t->subtype == task_subtype_gradient || - t->subtype == task_subtype_force) && - ((ci_active_hydro && ci->nodeID == engine_rank) || - (cj_active_hydro && cj->nodeID == engine_rank))) { - - scheduler_activate(s, t); - - /* Set the correct sorting flags */ - if (t->type == task_type_pair && t->subtype == task_subtype_density) { - - /* Store some values. */ - atomic_or(&ci->requires_sorts, 1 << t->flags); - atomic_or(&cj->requires_sorts, 1 << t->flags); - ci->dx_max_sort_old = ci->dx_max_sort; - cj->dx_max_sort_old = cj->dx_max_sort; - - /* Activate the hydro drift tasks. */ - if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); - if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); - - /* Check the sorts and activate them if needed. */ - cell_activate_sorts(ci, t->flags, s); - cell_activate_sorts(cj, t->flags, s); - - } - - /* Store current values of dx_max and h_max. */ - else if (t->type == task_type_sub_pair && - t->subtype == task_subtype_density) { - cell_activate_subcell_hydro_tasks(t->ci, t->cj, s); - } - } - - if ((t->subtype == task_subtype_grav) && - ((ci_active_gravity && ci->nodeID == engine_rank) || - (cj_active_gravity && cj->nodeID == engine_rank))) { - - scheduler_activate(s, t); - - if (t->type == task_type_pair && t->subtype == task_subtype_grav) { - /* Activate the gravity drift */ - cell_activate_subcell_grav_tasks(t->ci, t->cj, s); - } - - else if (t->type == task_type_sub_pair && - t->subtype == task_subtype_grav) { - error("Invalid task sub-type encountered"); - } - } - - /* Only interested in density tasks as of here. */ - if (t->subtype == task_subtype_density) { - - /* Too much particle movement? */ - if (cell_need_rebuild_for_pair(ci, cj)) *rebuild_space = 1; - -#ifdef WITH_MPI - /* Activate the send/recv tasks. */ - if (ci->nodeID != engine_rank) { - - /* If the local cell is active, receive data from the foreign cell. */ - if (cj_active_hydro) { - scheduler_activate(s, ci->recv_xv); - if (ci_active_hydro) { - scheduler_activate(s, ci->recv_rho); -#ifdef EXTRA_HYDRO_LOOP - scheduler_activate(s, ci->recv_gradient); -#endif - } - } - - /* If the foreign cell is active, we want its ti_end values. */ - if (ci_active_hydro) scheduler_activate(s, ci->recv_ti); - - /* Is the foreign cell active and will need stuff from us? */ - if (ci_active_hydro) { - - struct link *l = - scheduler_activate_send(s, cj->send_xv, ci->nodeID); - - /* Drift the cell which will be sent at the level at which it is - sent, i.e. drift the cell specified in the send task (l->t) - itself. */ - cell_activate_drift_part(l->t->ci, s); - - /* If the local cell is also active, more stuff will be needed. */ - if (cj_active_hydro) { - scheduler_activate_send(s, cj->send_rho, ci->nodeID); - -#ifdef EXTRA_HYDRO_LOOP - scheduler_activate_send(s, cj->send_gradient, ci->nodeID); -#endif - } - } - - /* If the local cell is active, send its ti_end values. */ - if (cj_active_hydro) - scheduler_activate_send(s, cj->send_ti, ci->nodeID); - - } else if (cj->nodeID != engine_rank) { - - /* If the local cell is active, receive data from the foreign cell. */ - if (ci_active_hydro) { - scheduler_activate(s, cj->recv_xv); - if (cj_active_hydro) { - scheduler_activate(s, cj->recv_rho); -#ifdef EXTRA_HYDRO_LOOP - scheduler_activate(s, cj->recv_gradient); -#endif - } - } - - /* If the foreign cell is active, we want its ti_end values. */ - if (cj_active_hydro) scheduler_activate(s, cj->recv_ti); +#endif +} - /* Is the foreign cell active and will need stuff from us? */ - if (cj_active_hydro) { +void engine_exchange_proxy_multipoles(struct engine *e) { - struct link *l = - scheduler_activate_send(s, ci->send_xv, cj->nodeID); +#ifdef WITH_MPI - /* Drift the cell which will be sent at the level at which it is - sent, i.e. drift the cell specified in the send task (l->t) - itself. */ - cell_activate_drift_part(l->t->ci, s); + const ticks tic = getticks(); - /* If the local cell is also active, more stuff will be needed. */ - if (ci_active_hydro) { + /* Start by counting the number of cells to send and receive */ + int count_send_cells = 0; + int count_recv_cells = 0; + int count_send_requests = 0; + int count_recv_requests = 0; - scheduler_activate_send(s, ci->send_rho, cj->nodeID); + /* Loop over the proxies. */ + for (int pid = 0; pid < e->nr_proxies; pid++) { -#ifdef EXTRA_HYDRO_LOOP - scheduler_activate_send(s, ci->send_gradient, cj->nodeID); -#endif - } - } + /* Get a handle on the proxy. */ + const struct proxy *p = &e->proxies[pid]; - /* If the local cell is active, send its ti_end values. */ - if (ci_active_hydro) - scheduler_activate_send(s, ci->send_ti, cj->nodeID); - } -#endif - } + /* Now collect the number of requests associated */ + count_recv_requests += p->nr_cells_in; + count_send_requests += p->nr_cells_out; - /* Only interested in gravity tasks as of here. */ - if (t->subtype == task_subtype_grav) { + /* And the actual number of things we are going to ship */ + for (int k = 0; k < p->nr_cells_in; k++) + count_recv_cells += p->cells_in[k]->mpi.pcell_size; -#ifdef WITH_MPI - /* Activate the send/recv tasks. */ - if (ci->nodeID != engine_rank) { + for (int k = 0; k < p->nr_cells_out; k++) + count_send_cells += p->cells_out[k]->mpi.pcell_size; + } - /* If the local cell is active, receive data from the foreign cell. */ - if (cj_active_gravity) { - scheduler_activate(s, ci->recv_grav); - } + /* Allocate the buffers for the packed data */ + struct gravity_tensors *buffer_send = NULL; + if (posix_memalign((void **)&buffer_send, SWIFT_CACHE_ALIGNMENT, + count_send_cells * sizeof(struct gravity_tensors)) != 0) + error("Unable to allocate memory for multipole transactions"); - /* If the foreign cell is active, we want its ti_end values. */ - if (ci_active_gravity) scheduler_activate(s, ci->recv_ti); + struct gravity_tensors *buffer_recv = NULL; + if (posix_memalign((void **)&buffer_recv, SWIFT_CACHE_ALIGNMENT, + count_recv_cells * sizeof(struct gravity_tensors)) != 0) + error("Unable to allocate memory for multipole transactions"); - /* Is the foreign cell active and will need stuff from us? */ - if (ci_active_gravity) { + /* Also allocate the MPI requests */ + const int count_requests = count_send_requests + count_recv_requests; + MPI_Request *requests = + (MPI_Request *)malloc(sizeof(MPI_Request) * count_requests); + if (requests == NULL) error("Unable to allocate memory for MPI requests"); - struct link *l = - scheduler_activate_send(s, cj->send_grav, ci->nodeID); + int this_request = 0; + int this_recv = 0; + int this_send = 0; - /* Drift the cell which will be sent at the level at which it is - sent, i.e. drift the cell specified in the send task (l->t) - itself. */ - cell_activate_drift_gpart(l->t->ci, s); - } + /* Loop over the proxies to issue the receives. */ + for (int pid = 0; pid < e->nr_proxies; pid++) { - /* If the local cell is active, send its ti_end values. */ - if (cj_active_gravity) - scheduler_activate_send(s, cj->send_ti, ci->nodeID); + /* Get a handle on the proxy. */ + const struct proxy *p = &e->proxies[pid]; - } else if (cj->nodeID != engine_rank) { + for (int k = 0; k < p->nr_cells_in; k++) { - /* If the local cell is active, receive data from the foreign cell. */ - if (ci_active_gravity) { - scheduler_activate(s, cj->recv_grav); - } + const int num_elements = p->cells_in[k]->mpi.pcell_size; - /* If the foreign cell is active, we want its ti_end values. */ - if (cj_active_gravity) scheduler_activate(s, cj->recv_ti); + /* Receive everything */ + MPI_Irecv(&buffer_recv[this_recv], num_elements, multipole_mpi_type, + p->cells_in[k]->nodeID, p->cells_in[k]->mpi.tag, MPI_COMM_WORLD, + &requests[this_request]); - /* Is the foreign cell active and will need stuff from us? */ - if (cj_active_gravity) { + /* Move to the next slot in the buffers */ + this_recv += num_elements; + this_request++; + } - struct link *l = - scheduler_activate_send(s, ci->send_grav, cj->nodeID); + /* Loop over the proxies to issue the sends. */ + for (int k = 0; k < p->nr_cells_out; k++) { - /* Drift the cell which will be sent at the level at which it is - sent, i.e. drift the cell specified in the send task (l->t) - itself. */ - cell_activate_drift_gpart(l->t->ci, s); - } + /* Number of multipoles in this cell hierarchy */ + const int num_elements = p->cells_out[k]->mpi.pcell_size; - /* If the local cell is active, send its ti_end values. */ - if (ci_active_gravity) - scheduler_activate_send(s, ci->send_ti, cj->nodeID); - } -#endif - } - } + /* Let's pack everything recursively */ + cell_pack_multipoles(p->cells_out[k], &buffer_send[this_send]); - /* End force ? */ - else if (t->type == task_type_end_force) { + /* Send everything (note the use of cells_in[0] to get the correct node + * ID. */ + MPI_Isend(&buffer_send[this_send], num_elements, multipole_mpi_type, + p->cells_in[0]->nodeID, p->cells_out[k]->mpi.tag, + MPI_COMM_WORLD, &requests[this_request]); - if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e)) - scheduler_activate(s, t); + /* Move to the next slot in the buffers */ + this_send += num_elements; + this_request++; } + } - /* Kick ? */ - else if (t->type == task_type_kick1 || t->type == task_type_kick2) { - - if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e)) - scheduler_activate(s, t); + /* Wait for all the requests to arrive home */ + MPI_Status *stats = (MPI_Status *)malloc(count_requests * sizeof(MPI_Status)); + int res; + if ((res = MPI_Waitall(count_requests, requests, stats)) != MPI_SUCCESS) { + for (int k = 0; k < count_requests; ++k) { + char buff[MPI_MAX_ERROR_STRING]; + MPI_Error_string(stats[k].MPI_ERROR, buff, &res); + message("request from source %i, tag %i has error '%s'.", + stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff); } + error("Failed during waitall for multipole data."); + } - /* Hydro ghost tasks ? */ - else if (t->type == task_type_ghost || t->type == task_type_extra_ghost || - t->type == task_type_ghost_in || t->type == task_type_ghost_out) { - if (cell_is_active_hydro(t->ci, e)) scheduler_activate(s, t); - } + /* Let's now unpack the multipoles at the right place */ + this_recv = 0; + for (int pid = 0; pid < e->nr_proxies; pid++) { - /* Gravity stuff ? */ - else if (t->type == task_type_grav_down || t->type == task_type_grav_mesh || - t->type == task_type_grav_long_range || - t->type == task_type_init_grav || - t->type == task_type_init_grav_out || - t->type == task_type_grav_down_in) { - if (cell_is_active_gravity(t->ci, e)) scheduler_activate(s, t); - } + /* Get a handle on the proxy. */ + const struct proxy *p = &e->proxies[pid]; - else if (t->type == task_type_grav_mm) { + for (int k = 0; k < p->nr_cells_in; k++) { - /* Local pointers. */ - const struct cell *ci = t->ci; - const struct cell *cj = t->cj; - const int ci_nodeID = ci->nodeID; - const int cj_nodeID = cj->nodeID; - const int ci_active_gravity = cell_is_active_gravity(ci, e); - const int cj_active_gravity = cell_is_active_gravity(cj, e); + const int num_elements = p->cells_in[k]->mpi.pcell_size; - if ((ci_active_gravity && ci_nodeID == engine_rank) || - (cj_active_gravity && cj_nodeID == engine_rank)) - scheduler_activate(s, t); - } +#ifdef SWIFT_DEBUG_CHECKS - /* Time-step? */ - else if (t->type == task_type_timestep) { - t->ci->updated = 0; - t->ci->g_updated = 0; - t->ci->s_updated = 0; - if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e)) - scheduler_activate(s, t); - } + /* Check that the first element (top-level cell's multipole) matches what + * we received */ + if (p->cells_in[k]->grav.multipole->m_pole.num_gpart != + buffer_recv[this_recv].m_pole.num_gpart) + error("Current: M_000=%e num_gpart=%lld\n New: M_000=%e num_gpart=%lld", + p->cells_in[k]->grav.multipole->m_pole.M_000, + p->cells_in[k]->grav.multipole->m_pole.num_gpart, + buffer_recv[this_recv].m_pole.M_000, + buffer_recv[this_recv].m_pole.num_gpart); +#endif + + /* Unpack recursively */ + cell_unpack_multipoles(p->cells_in[k], &buffer_recv[this_recv]); - /* Subgrid tasks */ - else if (t->type == task_type_cooling || t->type == task_type_sourceterms) { - if (cell_is_active_hydro(t->ci, e)) scheduler_activate(s, t); + /* Move to the next slot in the buffers */ + this_recv += num_elements; } } -} - -/** - * @brief Mark tasks to be un-skipped and set the sort flags accordingly. - * - * @return 1 if the space has to be rebuilt, 0 otherwise. - */ -int engine_marktasks(struct engine *e) { - struct scheduler *s = &e->sched; - const ticks tic = getticks(); - int rebuild_space = 0; - - /* Run through the tasks and mark as skip or not. */ - size_t extra_data[3] = {(size_t)e, (size_t)rebuild_space, (size_t)&e->sched}; - threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks, s->nr_tasks, - sizeof(struct task), 0, extra_data); - rebuild_space = extra_data[1]; + /* Free everything */ + free(stats); + free(buffer_send); + free(buffer_recv); + free(requests); + /* How much time did this take? */ if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); - - /* All is well... */ - return rebuild_space; +#else + error("SWIFT was not compiled with MPI support."); +#endif } /** @@ -3855,6 +1891,10 @@ int engine_estimate_nr_tasks(struct engine *e) { int n1 = 0; int n2 = 0; if (e->policy & engine_policy_hydro) { + /* 2 self (density, force), 1 sort, 26/2 density pairs + 26/2 force pairs, 1 drift, 3 ghosts, 2 kicks, 1 time-step, + 1 end_force, 2 extra space + */ n1 += 37; n2 += 2; #ifdef WITH_MPI @@ -3882,14 +1922,27 @@ int engine_estimate_nr_tasks(struct engine *e) { n1 += 2; } if (e->policy & engine_policy_cooling) { + /* Cooling task + extra space */ n1 += 2; } if (e->policy & engine_policy_sourceterms) { n1 += 2; } if (e->policy & engine_policy_stars) { - n1 += 2; + /* 2 self (density, feedback), 1 sort, 26/2 density pairs + 26/2 feedback pairs, 1 drift, 3 ghosts, 2 kicks, 1 time-step, + 1 end_force, 2 extra space + */ + n1 += 37; + n2 += 2; +#ifdef WITH_MPI + n1 += 6; +#endif } +#if defined(WITH_LOGGER) + /* each cell logs its particles */ + n1 += 1; +#endif #ifdef WITH_MPI @@ -3903,7 +1956,7 @@ int engine_estimate_nr_tasks(struct engine *e) { struct cell *c = &e->s->cells_top[k]; /* Any cells with particles will have tasks (local & foreign). */ - int nparts = c->count + c->gcount + c->scount; + int nparts = c->hydro.count + c->grav.count + c->stars.count; if (nparts > 0) { ntop++; ncells++; @@ -3944,10 +1997,12 @@ int engine_estimate_nr_tasks(struct engine *e) { * @brief Rebuild the space and tasks. * * @param e The #engine. + * @param repartitioned Did we just redistribute? * @param clean_smoothing_length_values Are we cleaning up the values of * the smoothing lengths before building the tasks ? */ -void engine_rebuild(struct engine *e, int clean_smoothing_length_values) { +void engine_rebuild(struct engine *e, int repartitioned, + int clean_smoothing_length_values) { const ticks tic = getticks(); @@ -3956,11 +2011,34 @@ void engine_rebuild(struct engine *e, int clean_smoothing_length_values) { e->restarting = 0; /* Re-build the space. */ - space_rebuild(e->s, e->verbose); + space_rebuild(e->s, repartitioned, e->verbose); + + const ticks tic2 = getticks(); + + /* Update the global counters of particles */ + long long num_particles[3] = {(long long)e->s->nr_parts, + (long long)e->s->nr_gparts, + (long long)e->s->nr_sparts}; +#ifdef WITH_MPI + MPI_Allreduce(MPI_IN_PLACE, num_particles, 3, MPI_LONG_LONG, MPI_SUM, + MPI_COMM_WORLD); +#endif + e->total_nr_parts = num_particles[0]; + e->total_nr_gparts = num_particles[1]; + e->total_nr_sparts = num_particles[2]; + + /* Flag that there are no inhibited particles */ + e->nr_inhibited_parts = 0; + e->nr_inhibited_gparts = 0; + e->nr_inhibited_sparts = 0; + + if (e->verbose) + message("updating particle counts took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); /* Re-compute the mesh forces */ if ((e->policy & engine_policy_self_gravity) && e->s->periodic) - pm_mesh_compute_potential(e->mesh, e->s, e->verbose); + pm_mesh_compute_potential(e->mesh, e->s, &e->threadpool, e->verbose); /* Re-compute the maximal RMS displacement constraint */ if (e->policy & engine_policy_cosmology) @@ -3977,19 +2055,31 @@ void engine_rebuild(struct engine *e, int clean_smoothing_length_values) { /* If in parallel, exchange the cell structure, top-level and neighbouring * multipoles. */ #ifdef WITH_MPI + if (e->policy & engine_policy_self_gravity) engine_exchange_top_multipoles(e); + engine_exchange_cells(e); +#endif - if (e->policy & engine_policy_self_gravity) engine_exchange_top_multipoles(e); +#ifdef SWIFT_DEBUG_CHECKS - if (e->policy & engine_policy_self_gravity) - engine_exchange_proxy_multipoles(e); + /* Let's check that what we received makes sense */ + if (e->policy & engine_policy_self_gravity) { + long long counter = 0; + + for (int i = 0; i < e->s->nr_cells; ++i) { + const struct gravity_tensors *m = &e->s->multipoles_top[i]; + counter += m->m_pole.num_gpart; + } + if (counter != e->total_nr_gparts) + error("Total particles in multipoles inconsistent with engine"); + } #endif /* Re-build the tasks. */ engine_maketasks(e); /* Make the list of top-level cells that have tasks */ - space_list_cells_with_tasks(e->s); + space_list_useful_top_level_cells(e->s); #ifdef SWIFT_DEBUG_CHECKS /* Check that all cells have been drifted to the current time. @@ -3997,6 +2087,11 @@ void engine_rebuild(struct engine *e, int clean_smoothing_length_values) { * previously been active on this rank. */ space_check_drift_point(e->s, e->ti_current, e->policy & engine_policy_self_gravity); + + if (e->policy & engine_policy_self_gravity) { + for (int k = 0; k < e->s->nr_local_cells; k++) + cell_check_foreign_multipole(&e->s->cells_top[e->s->local_cells_top[k]]); + } #endif /* Run through the tasks and mark as skip or not. */ @@ -4030,34 +2125,42 @@ void engine_prepare(struct engine *e) { const ticks tic = getticks(); int drifted_all = 0; + int repartitioned = 0; /* Unskip active tasks and check for rebuild */ if (!e->forcerebuild && !e->forcerepart && !e->restarting) engine_unskip(e); + const ticks tic3 = getticks(); + #ifdef WITH_MPI MPI_Allreduce(MPI_IN_PLACE, &e->forcerebuild, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); #endif + if (e->verbose) + message("Communicating rebuild flag took %.3f %s.", + clocks_from_ticks(getticks() - tic3), clocks_getunit()); + /* Do we need repartitioning ? */ if (e->forcerepart) { /* Let's start by drifting everybody to the current time */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); drifted_all = 1; /* And repartition */ engine_repartition(e); + repartitioned = 1; } /* Do we need rebuilding ? */ if (e->forcerebuild) { /* Let's start by drifting everybody to the current time */ - if (!e->restarting && !drifted_all) engine_drift_all(e); + if (!e->restarting && !drifted_all) engine_drift_all(e, /*drift_mpole=*/0); /* And rebuild */ - engine_rebuild(e, 0); + engine_rebuild(e, repartitioned, 0); } #ifdef SWIFT_DEBUG_CHECKS @@ -4099,121 +2202,178 @@ void engine_barrier(struct engine *e) { } /** - * @brief Mapping function to collect the data from the kick. + * @brief Recursive function gathering end-of-step data. * - * @param c A super-cell. + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. */ -void engine_collect_end_of_step_recurse(struct cell *c) { +void engine_collect_end_of_step_recurse(struct cell *c, + const struct engine *e) { /* Skip super-cells (Their values are already set) */ #ifdef WITH_MPI - if (c->timestep != NULL || c->recv_ti != NULL) return; + if (c->timestep != NULL || c->mpi.recv_ti != NULL) return; #else if (c->timestep != NULL) return; #endif /* WITH_MPI */ /* Counters for the different quantities. */ size_t updated = 0, g_updated = 0, s_updated = 0; + size_t inhibited = 0, g_inhibited = 0, s_inhibited = 0; integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; /* Collect the values from the progeny. */ for (int k = 0; k < 8; k++) { struct cell *cp = c->progeny[k]; - if (cp != NULL && (cp->count > 0 || cp->gcount > 0 || cp->scount > 0)) { + if (cp != NULL && + (cp->hydro.count > 0 || cp->grav.count > 0 || cp->stars.count > 0)) { /* Recurse */ - engine_collect_end_of_step_recurse(cp); + engine_collect_end_of_step_recurse(cp, e); /* And update */ - ti_hydro_end_min = min(ti_hydro_end_min, cp->ti_hydro_end_min); - ti_hydro_end_max = max(ti_hydro_end_max, cp->ti_hydro_end_max); - ti_hydro_beg_max = max(ti_hydro_beg_max, cp->ti_hydro_beg_max); - ti_gravity_end_min = min(ti_gravity_end_min, cp->ti_gravity_end_min); - ti_gravity_end_max = max(ti_gravity_end_max, cp->ti_gravity_end_max); - ti_gravity_beg_max = max(ti_gravity_beg_max, cp->ti_gravity_beg_max); - updated += cp->updated; - g_updated += cp->g_updated; - s_updated += cp->s_updated; + ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min); + ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max); + ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max); + + ti_gravity_end_min = min(ti_gravity_end_min, cp->grav.ti_end_min); + ti_gravity_end_max = max(ti_gravity_end_max, cp->grav.ti_end_max); + ti_gravity_beg_max = max(ti_gravity_beg_max, cp->grav.ti_beg_max); + + ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); + + updated += cp->hydro.updated; + g_updated += cp->grav.updated; + s_updated += cp->stars.updated; + + inhibited += cp->hydro.inhibited; + g_inhibited += cp->grav.inhibited; + s_inhibited += cp->stars.inhibited; /* Collected, so clear for next time. */ - cp->updated = 0; - cp->g_updated = 0; - cp->s_updated = 0; + cp->hydro.updated = 0; + cp->grav.updated = 0; + cp->stars.updated = 0; } } /* Store the collected values in the cell. */ - c->ti_hydro_end_min = ti_hydro_end_min; - c->ti_hydro_end_max = ti_hydro_end_max; - c->ti_hydro_beg_max = ti_hydro_beg_max; - c->ti_gravity_end_min = ti_gravity_end_min; - c->ti_gravity_end_max = ti_gravity_end_max; - c->ti_gravity_beg_max = ti_gravity_beg_max; - c->updated = updated; - c->g_updated = g_updated; - c->s_updated = s_updated; + c->hydro.ti_end_min = ti_hydro_end_min; + c->hydro.ti_end_max = ti_hydro_end_max; + c->hydro.ti_beg_max = ti_hydro_beg_max; + c->grav.ti_end_min = ti_gravity_end_min; + c->grav.ti_end_max = ti_gravity_end_max; + c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.ti_end_min = ti_stars_end_min; + c->hydro.updated = updated; + c->grav.updated = g_updated; + c->stars.updated = s_updated; + c->hydro.inhibited = inhibited; + c->grav.inhibited = g_inhibited; + c->stars.inhibited = s_inhibited; } +/** + * @brief Mapping function to collect the data from the end of the step + * + * This function will call a recursive function on all the top-level cells + * to collect the information we are after. + * + * @param map_data The list of cells with tasks on this node. + * @param num_elements The number of elements in the list this thread will work + * on. + * @param extra_data The #engine. + */ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, void *extra_data) { struct end_of_step_data *data = (struct end_of_step_data *)extra_data; - struct engine *e = data->e; + const struct engine *e = data->e; struct space *s = e->s; int *local_cells = (int *)map_data; /* Local collectible */ - size_t updates = 0, g_updates = 0, s_updates = 0; + size_t updated = 0, g_updated = 0, s_updated = 0; + size_t inhibited = 0, g_inhibited = 0, s_inhibited = 0; integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &s->cells_top[local_cells[ind]]; - if (c->count > 0 || c->gcount > 0 || c->scount > 0) { + if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0) { /* Make the top-cells recurse */ - engine_collect_end_of_step_recurse(c); + engine_collect_end_of_step_recurse(c, e); /* And aggregate */ - ti_hydro_end_min = min(ti_hydro_end_min, c->ti_hydro_end_min); - ti_hydro_end_max = max(ti_hydro_end_max, c->ti_hydro_end_max); - ti_hydro_beg_max = max(ti_hydro_beg_max, c->ti_hydro_beg_max); - ti_gravity_end_min = min(ti_gravity_end_min, c->ti_gravity_end_min); - ti_gravity_end_max = max(ti_gravity_end_max, c->ti_gravity_end_max); - ti_gravity_beg_max = max(ti_gravity_beg_max, c->ti_gravity_beg_max); - updates += c->updated; - g_updates += c->g_updated; - s_updates += c->s_updated; + if (c->hydro.ti_end_min > e->ti_current) + ti_hydro_end_min = min(ti_hydro_end_min, c->hydro.ti_end_min); + ti_hydro_end_max = max(ti_hydro_end_max, c->hydro.ti_end_max); + ti_hydro_beg_max = max(ti_hydro_beg_max, c->hydro.ti_beg_max); + + if (c->grav.ti_end_min > e->ti_current) + ti_gravity_end_min = min(ti_gravity_end_min, c->grav.ti_end_min); + ti_gravity_end_max = max(ti_gravity_end_max, c->grav.ti_end_max); + ti_gravity_beg_max = max(ti_gravity_beg_max, c->grav.ti_beg_max); + + if (c->stars.ti_end_min > e->ti_current) + ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min); + + updated += c->hydro.updated; + g_updated += c->grav.updated; + s_updated += c->stars.updated; + + inhibited += c->hydro.inhibited; + g_inhibited += c->grav.inhibited; + s_inhibited += c->stars.inhibited; /* Collected, so clear for next time. */ - c->updated = 0; - c->g_updated = 0; - c->s_updated = 0; + c->hydro.updated = 0; + c->grav.updated = 0; + c->stars.updated = 0; } } /* Let's write back to the global data. * We use the space lock to garanty single access*/ if (lock_lock(&s->lock) == 0) { - data->updates += updates; - data->g_updates += g_updates; - data->s_updates += s_updates; - data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min); + data->updated += updated; + data->g_updated += g_updated; + data->s_updated += s_updated; + + data->inhibited += inhibited; + data->g_inhibited += g_inhibited; + data->s_inhibited += s_inhibited; + + if (ti_hydro_end_min > e->ti_current) + data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min); data->ti_hydro_end_max = max(ti_hydro_end_max, data->ti_hydro_end_max); data->ti_hydro_beg_max = max(ti_hydro_beg_max, data->ti_hydro_beg_max); - data->ti_gravity_end_min = - min(ti_gravity_end_min, data->ti_gravity_end_min); + + if (ti_gravity_end_min > e->ti_current) + data->ti_gravity_end_min = + min(ti_gravity_end_min, data->ti_gravity_end_min); data->ti_gravity_end_max = max(ti_gravity_end_max, data->ti_gravity_end_max); data->ti_gravity_beg_max = max(ti_gravity_beg_max, data->ti_gravity_beg_max); + + if (ti_stars_end_min > e->ti_current) + data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min); } + if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space"); } @@ -4237,9 +2397,10 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, void engine_collect_end_of_step(struct engine *e, int apply) { const ticks tic = getticks(); - const struct space *s = e->s; + struct space *s = e->s; struct end_of_step_data data; - data.updates = 0, data.g_updates = 0, data.s_updates = 0; + data.updated = 0, data.g_updated = 0, data.s_updated = 0; + data.inhibited = 0, data.g_inhibited = 0, data.s_inhibited = 0; data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0, data.ti_hydro_beg_max = 0; data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0, @@ -4248,14 +2409,20 @@ void engine_collect_end_of_step(struct engine *e, int apply) { /* Collect information from the local top-level cells */ threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper, - s->local_cells_top, s->nr_local_cells, sizeof(int), 0, &data); + s->local_cells_with_tasks_top, s->nr_local_cells_with_tasks, + sizeof(int), 0, &data); + + /* Store the local number of inhibited particles */ + s->nr_inhibited_parts = data.inhibited; + s->nr_inhibited_gparts = data.g_inhibited; + s->nr_inhibited_sparts = data.s_inhibited; /* Store these in the temporary collection group. */ - collectgroup1_init(&e->collect_group1, data.updates, data.g_updates, - data.s_updates, data.ti_hydro_end_min, - data.ti_hydro_end_max, data.ti_hydro_beg_max, - data.ti_gravity_end_min, data.ti_gravity_end_max, - data.ti_gravity_beg_max, e->forcerebuild); + collectgroup1_init( + &e->collect_group1, data.updated, data.g_updated, data.s_updated, + data.inhibited, data.g_inhibited, data.s_inhibited, data.ti_hydro_end_min, + data.ti_hydro_end_max, data.ti_hydro_beg_max, data.ti_gravity_end_min, + data.ti_gravity_end_max, data.ti_gravity_beg_max, e->forcerebuild); /* Aggregate collective data from the different nodes for this step. */ #ifdef WITH_MPI @@ -4280,21 +2447,37 @@ void engine_collect_end_of_step(struct engine *e, int apply) { in_i[1], e->collect_group1.ti_gravity_end_min); long long in_ll[3], out_ll[3]; - out_ll[0] = data.updates; - out_ll[1] = data.g_updates; - out_ll[2] = data.s_updates; + out_ll[0] = data.updated; + out_ll[1] = data.g_updated; + out_ll[2] = data.s_updated; + if (MPI_Allreduce(out_ll, in_ll, 3, MPI_LONG_LONG_INT, MPI_SUM, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate particle counts."); + if (in_ll[0] != (long long)e->collect_group1.updated) + error("Failed to get same updated, is %lld, should be %lld", in_ll[0], + e->collect_group1.updated); + if (in_ll[1] != (long long)e->collect_group1.g_updated) + error("Failed to get same g_updated, is %lld, should be %lld", in_ll[1], + e->collect_group1.g_updated); + if (in_ll[2] != (long long)e->collect_group1.s_updated) + error("Failed to get same s_updated, is %lld, should be %lld", in_ll[2], + e->collect_group1.s_updated); + + out_ll[0] = data.inhibited; + out_ll[1] = data.g_inhibited; + out_ll[2] = data.s_inhibited; if (MPI_Allreduce(out_ll, in_ll, 3, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to aggregate particle counts."); - if (in_ll[0] != (long long)e->collect_group1.updates) - error("Failed to get same updates, is %lld, should be %lld", in_ll[0], - e->collect_group1.updates); - if (in_ll[1] != (long long)e->collect_group1.g_updates) - error("Failed to get same g_updates, is %lld, should be %lld", in_ll[1], - e->collect_group1.g_updates); - if (in_ll[2] != (long long)e->collect_group1.s_updates) - error("Failed to get same s_updates, is %lld, should be %lld", in_ll[2], - e->collect_group1.s_updates); + if (in_ll[0] != (long long)e->collect_group1.inhibited) + error("Failed to get same inhibited, is %lld, should be %lld", in_ll[0], + e->collect_group1.inhibited); + if (in_ll[1] != (long long)e->collect_group1.g_inhibited) + error("Failed to get same g_inhibited, is %lld, should be %lld", in_ll[1], + e->collect_group1.g_inhibited); + if (in_ll[2] != (long long)e->collect_group1.s_inhibited) + error("Failed to get same s_inhibited, is %lld, should be %lld", in_ll[2], + e->collect_group1.s_inhibited); int buff = 0; if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX, @@ -4330,8 +2513,7 @@ void engine_print_stats(struct engine *e) { /* Check that all cells have been drifted to the current time. * That can include cells that have not * previously been active on this rank. */ - space_check_drift_point(e->s, e->ti_current, - e->policy & engine_policy_self_gravity); + space_check_drift_point(e->s, e->ti_current, /*chek_mpoles=*/0); /* Be verbose about this */ if (e->nodeID == 0) { @@ -4523,7 +2705,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, if (e->nodeID == 0) message("Computing initial gas densities."); /* Construct all cells and tasks to start everything */ - engine_rebuild(e, clean_h_values); + engine_rebuild(e, 0, clean_h_values); /* No time integration. We just want the density and ghosts */ engine_skip_force_and_kick(e); @@ -4534,6 +2716,20 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, /* Init the particle data (by hand). */ space_init_parts(s, e->verbose); space_init_gparts(s, e->verbose); + space_init_sparts(s, e->verbose); + + /* Update the cooling function */ + if (e->policy & engine_policy_cooling) + cooling_update(e->cosmology, e->cooling_func, /*restart_flag=*/0); + +#ifdef WITH_LOGGER + /* Mark the first time step in the particle logger file. */ + logger_log_timestamp(e->logger, e->ti_current, e->time, + &e->logger->timestamp_offset); + /* Make sure that we have enough space in the particle logger file + * to store the particles in current time step. */ + logger_ensure_size(e->logger, e->total_nr_parts, e->total_nr_gparts, 0); +#endif /* Now, launch the calculation */ TIMER_TIC; @@ -4560,7 +2756,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, long long num_gpart_mpole = 0; if (e->policy & engine_policy_self_gravity) { for (int i = 0; i < e->s->nr_cells; ++i) - num_gpart_mpole += e->s->cells_top[i].multipole->m_pole.num_gpart; + num_gpart_mpole += e->s->cells_top[i].grav.multipole->m_pole.num_gpart; if (num_gpart_mpole != e->total_nr_gparts) error( "Top-level multipoles don't contain the total number of gpart " @@ -4582,6 +2778,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, /* Init the particle data (by hand). */ space_init_parts(e->s, e->verbose); space_init_gparts(e->s, e->verbose); + space_init_sparts(e->s, e->verbose); /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); @@ -4593,6 +2790,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, #endif if (e->nodeID == 0) scheduler_write_dependencies(&e->sched, e->verbose); + if (e->nodeID == 0) scheduler_write_task_level(&e->sched); /* Run the 0th time-step */ TIMER_TIC2; @@ -4663,12 +2861,27 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, if (s->cells_top != NULL && s->nr_parts > 0) { for (int i = 0; i < s->nr_cells; i++) { struct cell *c = &s->cells_top[i]; - if (c->nodeID == engine_rank && c->count > 0) { - float part_h_max = c->parts[0].h; - for (int k = 1; k < c->count; k++) { - if (c->parts[k].h > part_h_max) part_h_max = c->parts[k].h; + if (c->nodeID == engine_rank && c->hydro.count > 0) { + float part_h_max = c->hydro.parts[0].h; + for (int k = 1; k < c->hydro.count; k++) { + if (c->hydro.parts[k].h > part_h_max) + part_h_max = c->hydro.parts[k].h; + } + c->hydro.h_max = max(part_h_max, c->hydro.h_max); + } + } + } + + if (s->cells_top != NULL && s->nr_sparts > 0) { + for (int i = 0; i < s->nr_cells; i++) { + struct cell *c = &s->cells_top[i]; + if (c->nodeID == engine_rank && c->stars.count > 0) { + float spart_h_max = c->stars.parts[0].h; + for (int k = 1; k < c->stars.count; k++) { + if (c->stars.parts[k].h > spart_h_max) + spart_h_max = c->stars.parts[k].h; } - c->h_max = max(part_h_max, c->h_max); + c->stars.h_max = max(spart_h_max, c->stars.h_max); } } } @@ -4709,24 +2922,26 @@ void engine_step(struct engine *e) { /* Print some information to the screen */ printf( - " %6d %14e %14e %10.5f %14e %4d %4d %12lld %12lld %12lld %21.3f %6d\n", + " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %21.3f " + "%6d\n", e->step, e->time, e->cosmology->a, e->cosmology->z, e->time_step, e->min_active_bin, e->max_active_bin, e->updates, e->g_updates, e->s_updates, e->wallclock_time, e->step_props); fflush(stdout); if (!e->restarting) - fprintf(e->file_timesteps, - " %6d %14e %14e %10.5f %14e %4d %4d %12lld %12lld %12lld %21.3f " - "%6d\n", - e->step, e->time, e->cosmology->a, e->cosmology->z, e->time_step, - e->min_active_bin, e->max_active_bin, e->updates, e->g_updates, - e->s_updates, e->wallclock_time, e->step_props); + fprintf( + e->file_timesteps, + " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %21.3f " + "%6d\n", + e->step, e->time, e->cosmology->a, e->cosmology->z, e->time_step, + e->min_active_bin, e->max_active_bin, e->updates, e->g_updates, + e->s_updates, e->wallclock_time, e->step_props); fflush(e->file_timesteps); } /* We need some cells to exist but not the whole task stuff. */ - if (e->restarting) space_rebuild(e->s, e->verbose); + if (e->restarting) space_rebuild(e->s, 0, e->verbose); /* Move forward in time */ e->ti_old = e->ti_current; @@ -4737,7 +2952,7 @@ void engine_step(struct engine *e) { e->step_props = engine_step_prop_none; /* When restarting, move everyone to the current time. */ - if (e->restarting) engine_drift_all(e); + if (e->restarting) engine_drift_all(e, /*drift_mpole=*/1); /* Get the physical value of the time and time-step size */ if (e->policy & engine_policy_cosmology) { @@ -4751,6 +2966,10 @@ void engine_step(struct engine *e) { e->time_step = (e->ti_current - e->ti_old) * e->time_base; } + /* Update the cooling function */ + if (e->policy & engine_policy_cooling) + cooling_update(e->cosmology, e->cooling_func, /*restart_flag=*/0); + /*****************************************************/ /* OK, we now know what the next end of time-step is */ /*****************************************************/ @@ -4765,9 +2984,18 @@ void engine_step(struct engine *e) { ((double)e->total_nr_gparts) * e->gravity_properties->rebuild_frequency)) e->forcerebuild = 1; +#ifdef WITH_LOGGER + /* Mark the current time step in the particle logger file. */ + logger_log_timestamp(e->logger, e->ti_current, e->time, + &e->logger->timestamp_offset); + /* Make sure that we have enough space in the particle logger file + * to store the particles in current time step. */ + logger_ensure_size(e->logger, e->total_nr_parts, e->total_nr_gparts, 0); +#endif + /* Are we drifting everything (a la Gadget/GIZMO) ? */ if (e->policy & engine_policy_drift_all && !e->forcerebuild) - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/1); /* Are we reconstructing the multipoles or drifting them ?*/ if ((e->policy & engine_policy_self_gravity) && !e->forcerebuild) { @@ -4790,14 +3018,14 @@ void engine_step(struct engine *e) { if (e->verbose) engine_print_task_counts(e); /* Dump local cells and active particle counts. */ - /* dumpCells("cells", 0, 0, 0, 0, e->s, e->nodeID, e->step); */ + // dumpCells("cells", 1, 0, 0, 0, e->s, e->nodeID, e->step); #ifdef SWIFT_DEBUG_CHECKS /* Check that we have the correct total mass in the top-level multipoles */ long long num_gpart_mpole = 0; if (e->policy & engine_policy_self_gravity) { for (int i = 0; i < e->s->nr_cells; ++i) - num_gpart_mpole += e->s->cells_top[i].multipole->m_pole.num_gpart; + num_gpart_mpole += e->s->cells_top[i].grav.multipole->m_pole.num_gpart; if (num_gpart_mpole != e->total_nr_gparts) error( "Multipoles don't contain the total number of gpart mpoles=%lld " @@ -4824,14 +3052,16 @@ void engine_step(struct engine *e) { #endif /* Collect information about the next time-step */ - engine_collect_end_of_step(e, 0); + engine_collect_end_of_step(e, 1); e->forcerebuild = e->collect_group1.forcerebuild; + e->updates_since_rebuild += e->collect_group1.updated; + e->g_updates_since_rebuild += e->collect_group1.g_updated; + e->s_updates_since_rebuild += e->collect_group1.s_updated; - /* Now apply all the collected time step updates and particle counts. */ - collectgroup1_apply(&e->collect_group1, e); - e->updates_since_rebuild += e->collect_group1.updates; - e->g_updates_since_rebuild += e->collect_group1.g_updates; - e->s_updates_since_rebuild += e->collect_group1.s_updates; +#ifdef SWIFT_DEBUG_CHECKS + if (e->ti_end_min == e->ti_current && e->ti_end_min < max_nr_timesteps) + error("Obtained a time-step of size 0"); +#endif /********************************************************/ /* OK, we are done with the regular stuff. Time for i/o */ @@ -4840,6 +3070,33 @@ void engine_step(struct engine *e) { /* Create a restart file if needed. */ engine_dump_restarts(e, 0, e->restart_onexit && engine_is_done(e)); + engine_check_for_dumps(e); + + TIMER_TOC2(timer_step); + + clocks_gettime(&time2); + e->wallclock_time = (float)clocks_diff(&time1, &time2); + +#ifdef SWIFT_DEBUG_TASKS + /* Time in ticks at the end of this step. */ + e->toc_step = getticks(); +#endif +} + +/** + * @brief Check whether any kind of i/o has to be performed during this + * step. + * + * This includes snapshots, stats and halo finder. We also handle the case + * of multiple outputs between two steps. + * + * @param e The #engine. + */ +void engine_check_for_dumps(struct engine *e) { + + const int with_stf = (e->policy & engine_policy_structure_finding); + const int stf_time_output = (e->stf_output_freq_format == io_stf_time); + /* Save some statistics ? */ int save_stats = 0; if (e->ti_end_min > e->ti_next_stats && e->ti_next_stats > 0) save_stats = 1; @@ -4851,12 +3108,11 @@ void engine_step(struct engine *e) { /* Do we want to perform structure finding? */ int run_stf = 0; - if ((e->policy & engine_policy_structure_finding)) { - if (e->stf_output_freq_format == STEPS && e->step % e->deltaStepSTF == 0) - run_stf = 1; - else if (e->stf_output_freq_format == TIME && - e->ti_end_min > e->ti_nextSTF && e->ti_nextSTF > 0) - run_stf = 1; + if (with_stf && stf_time_output) { + if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) run_stf = 1; + } + if (with_stf && !stf_time_output) { + if (e->step % e->delta_step_stf == 0) run_stf = 1; } /* Store information before attempting extra dump-related drifts */ @@ -4864,145 +3120,216 @@ void engine_step(struct engine *e) { timebin_t max_active_bin = e->max_active_bin; double time = e->time; - /* Write some form of output */ - if (dump_snapshot && save_stats) { + while (save_stats || dump_snapshot || run_stf) { - /* If both, need to figure out which one occurs first */ - if (e->ti_next_stats == e->ti_next_snapshot) { + /* Write some form of output */ + if (dump_snapshot && save_stats) { - /* Let's fake that we are at the common dump time */ - e->ti_current = e->ti_next_snapshot; - e->max_active_bin = 0; - if (!(e->policy & engine_policy_cosmology)) - e->time = e->ti_next_snapshot * e->time_base + e->time_begin; + /* If both, need to figure out which one occurs first */ + if (e->ti_next_stats == e->ti_next_snapshot) { - /* Drift everyone */ - engine_drift_all(e); + /* Let's fake that we are at the common dump time */ + e->ti_current = e->ti_next_snapshot; + e->max_active_bin = 0; + if ((e->policy & engine_policy_cosmology)) { + cosmology_update(e->cosmology, e->physical_constants, e->ti_current); + e->time = e->cosmology->time; + } else { + e->time = e->ti_next_stats * e->time_base + e->time_begin; + } - /* Dump everything */ - engine_print_stats(e); - engine_dump_snapshot(e); + /* Drift everyone */ + engine_drift_all(e, /*drift_mpole=*/0); - } else if (e->ti_next_stats < e->ti_next_snapshot) { + /* Dump everything */ + engine_print_stats(e); +#ifdef WITH_LOGGER + /* Write a file containing the offsets in the particle logger. */ + engine_dump_index(e); +#else + engine_dump_snapshot(e); +#endif - /* Let's fake that we are at the stats dump time */ - e->ti_current = e->ti_next_stats; - e->max_active_bin = 0; - if (!(e->policy & engine_policy_cosmology)) - e->time = e->ti_next_stats * e->time_base + e->time_begin; + } else if (e->ti_next_stats < e->ti_next_snapshot) { - /* Drift everyone */ - engine_drift_all(e); + /* Let's fake that we are at the stats dump time */ + e->ti_current = e->ti_next_stats; + e->max_active_bin = 0; + if ((e->policy & engine_policy_cosmology)) { + cosmology_update(e->cosmology, e->physical_constants, e->ti_current); + e->time = e->cosmology->time; + } else { + e->time = e->ti_next_stats * e->time_base + e->time_begin; + } - /* Dump stats */ - engine_print_stats(e); + /* Drift everyone */ + engine_drift_all(e, /*drift_mpole=*/0); + + /* Dump stats */ + engine_print_stats(e); + + /* Let's fake that we are at the snapshot dump time */ + e->ti_current = e->ti_next_snapshot; + e->max_active_bin = 0; + if (!(e->policy & engine_policy_cosmology)) + e->time = e->ti_next_snapshot * e->time_base + e->time_begin; + + /* Drift everyone */ + engine_drift_all(e, /*drift_mpole=*/0); + + /* Dump snapshot */ +#ifdef WITH_LOGGER + /* Write a file containing the offsets in the particle logger. */ + engine_dump_index(e); +#else + engine_dump_snapshot(e); +#endif + + } else if (e->ti_next_stats > e->ti_next_snapshot) { + + /* Let's fake that we are at the snapshot dump time */ + e->ti_current = e->ti_next_snapshot; + e->max_active_bin = 0; + if ((e->policy & engine_policy_cosmology)) { + cosmology_update(e->cosmology, e->physical_constants, e->ti_current); + e->time = e->cosmology->time; + } else { + e->time = e->ti_next_stats * e->time_base + e->time_begin; + } + + /* Drift everyone */ + engine_drift_all(e, /*drift_mpole=*/0); + + /* Dump snapshot */ +#ifdef WITH_LOGGER + /* Write a file containing the offsets in the particle logger. */ + engine_dump_index(e); +#else + engine_dump_snapshot(e); +#endif + + /* Let's fake that we are at the stats dump time */ + e->ti_current = e->ti_next_stats; + e->max_active_bin = 0; + if (!(e->policy & engine_policy_cosmology)) + e->time = e->ti_next_stats * e->time_base + e->time_begin; - /* Let's fake that we are at the snapshot dump time */ - e->ti_current = e->ti_next_snapshot; - e->max_active_bin = 0; - if (!(e->policy & engine_policy_cosmology)) - e->time = e->ti_next_snapshot * e->time_base + e->time_begin; + /* Drift everyone */ + engine_drift_all(e, /*drift_mpole=*/0); - /* Drift everyone */ - engine_drift_all(e); + /* Dump stats */ + engine_print_stats(e); + } - /* Dump snapshot */ - engine_dump_snapshot(e); + /* Let's compute the time of the next outputs */ + engine_compute_next_snapshot_time(e); + engine_compute_next_statistics_time(e); - } else if (e->ti_next_stats > e->ti_next_snapshot) { + } else if (dump_snapshot) { /* Let's fake that we are at the snapshot dump time */ e->ti_current = e->ti_next_snapshot; e->max_active_bin = 0; - if (!(e->policy & engine_policy_cosmology)) - e->time = e->ti_next_snapshot * e->time_base + e->time_begin; + if ((e->policy & engine_policy_cosmology)) { + cosmology_update(e->cosmology, e->physical_constants, e->ti_current); + e->time = e->cosmology->time; + } else { + e->time = e->ti_next_stats * e->time_base + e->time_begin; + } /* Drift everyone */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); - /* Dump snapshot */ + /* Dump... */ +#ifdef WITH_LOGGER + /* Write a file containing the offsets in the particle logger. */ + engine_dump_index(e); +#else engine_dump_snapshot(e); +#endif + + /* ... and find the next output time */ + engine_compute_next_snapshot_time(e); + + } else if (save_stats) { /* Let's fake that we are at the stats dump time */ e->ti_current = e->ti_next_stats; e->max_active_bin = 0; - if (!(e->policy & engine_policy_cosmology)) + if ((e->policy & engine_policy_cosmology)) { + cosmology_update(e->cosmology, e->physical_constants, e->ti_current); + e->time = e->cosmology->time; + } else { e->time = e->ti_next_stats * e->time_base + e->time_begin; + } /* Drift everyone */ - engine_drift_all(e); + engine_drift_all(e, /*drift_mpole=*/0); - /* Dump stats */ + /* Dump */ engine_print_stats(e); - } - /* Let's compute the time of the next outputs */ - engine_compute_next_snapshot_time(e); - engine_compute_next_statistics_time(e); - - } else if (dump_snapshot) { - - /* Let's fake that we are at the snapshot dump time */ - e->ti_current = e->ti_next_snapshot; - e->max_active_bin = 0; - if (!(e->policy & engine_policy_cosmology)) - e->time = e->ti_next_snapshot * e->time_base + e->time_begin; + /* and move on */ + engine_compute_next_statistics_time(e); + } - /* Drift everyone */ - engine_drift_all(e); + /* Perform structure finding? */ + if (run_stf) { - /* Dump... */ - engine_dump_snapshot(e); +#ifdef HAVE_VELOCIRAPTOR - /* ... and find the next output time */ - engine_compute_next_snapshot_time(e); - } else if (save_stats) { + // MATTHIEU: Check the order with the other i/o options. + if (!dump_snapshot && !save_stats) { - /* Let's fake that we are at the stats dump time */ - e->ti_current = e->ti_next_stats; - e->max_active_bin = 0; - if (!(e->policy & engine_policy_cosmology)) - e->time = e->ti_next_stats * e->time_base + e->time_begin; + /* Let's fake that we are at the stats dump time */ + e->ti_current = e->ti_next_stf; + e->max_active_bin = 0; + if ((e->policy & engine_policy_cosmology)) { + cosmology_update(e->cosmology, e->physical_constants, e->ti_current); + e->time = e->cosmology->time; + } else { + e->time = e->ti_next_stats * e->time_base + e->time_begin; + } - /* Drift everyone */ - engine_drift_all(e); + /* Drift everyone */ + engine_drift_all(e, /*drift_mpole=*/0); + } - /* Dump */ - engine_print_stats(e); + velociraptor_init(e); + velociraptor_invoke(e); - /* and move on */ - engine_compute_next_statistics_time(e); - } + /* ... and find the next output time */ + if (e->stf_output_freq_format == io_stf_time) + engine_compute_next_stf_time(e); +#endif + } - /* Perform structure finding? */ - if (run_stf) { + /* We need to see whether whether we are in the pathological case + * where there can be another dump before the next step. */ - // MATTHIEU: Add a drift_all here. And check the order with the order i/o - // options. + /* Save some statistics ? */ + save_stats = 0; + if (e->ti_end_min > e->ti_next_stats && e->ti_next_stats > 0) + save_stats = 1; -#ifdef HAVE_VELOCIRAPTOR - velociraptor_init(e); - velociraptor_invoke(e); + /* Do we want a snapshot? */ + dump_snapshot = 0; + if (e->ti_end_min > e->ti_next_snapshot && e->ti_next_snapshot > 0) + dump_snapshot = 1; - /* ... and find the next output time */ - if (e->stf_output_freq_format == TIME) engine_compute_next_stf_time(e); -#endif + /* Do we want to perform structure finding? */ + run_stf = 0; + if (with_stf && stf_time_output) { + if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) run_stf = 1; + } } /* Restore the information we stored */ e->ti_current = ti_current; + if (e->policy & engine_policy_cosmology) + cosmology_update(e->cosmology, e->physical_constants, e->ti_current); e->max_active_bin = max_active_bin; e->time = time; - - TIMER_TOC2(timer_step); - - clocks_gettime(&time2); - e->wallclock_time = (float)clocks_diff(&time1, &time2); - -#ifdef SWIFT_DEBUG_TASKS - /* Time in ticks at the end of this step. */ - e->toc_step = getticks(); -#endif } /** @@ -5034,7 +3361,7 @@ void engine_dump_restarts(struct engine *e, int drifted_all, int force) { restart_remove_previous(e->restart_file); /* Drift all particles first (may have just been done). */ - if (!drifted_all) engine_drift_all(e); + if (!drifted_all) engine_drift_all(e, /*drift_mpole=*/1); restart_write(e, e->restart_file); if (e->verbose) @@ -5075,14 +3402,17 @@ void engine_unskip(struct engine *e) { #endif // WITH_PROFILER /* Move the active local cells to the top of the list. */ - int *local_cells = e->s->local_cells_top; + int *local_cells = e->s->local_cells_with_tasks_top; int num_active_cells = 0; - for (int k = 0; k < s->nr_local_cells; k++) { + for (int k = 0; k < s->nr_local_cells_with_tasks; k++) { struct cell *c = &s->cells_top[local_cells[k]]; + if ((e->policy & engine_policy_hydro && cell_is_active_hydro(c, e)) || - (e->policy & - (engine_policy_self_gravity | engine_policy_external_gravity) && + (e->policy & engine_policy_self_gravity && + cell_is_active_gravity(c, e)) || + (e->policy & engine_policy_external_gravity && cell_is_active_gravity(c, e))) { + if (num_active_cells != k) memswap(&local_cells[k], &local_cells[num_active_cells], sizeof(int)); num_active_cells += 1; @@ -5102,123 +3432,6 @@ void engine_unskip(struct engine *e) { clocks_getunit()); } -/** - * @brief Mapper function to drift *all* particle types and multipoles forward - * in time. - * - * @param map_data An array of #cell%s. - * @param num_elements Chunk size. - * @param extra_data Pointer to an #engine. - */ -void engine_do_drift_all_mapper(void *map_data, int num_elements, - void *extra_data) { - - struct engine *e = (struct engine *)extra_data; - struct cell *cells = (struct cell *)map_data; - - for (int ind = 0; ind < num_elements; ind++) { - struct cell *c = &cells[ind]; - if (c != NULL && c->nodeID == e->nodeID) { - /* Drift all the particles */ - cell_drift_part(c, e, 1); - - /* Drift all the g-particles */ - cell_drift_gpart(c, e, 1); - } - - /* Drift the multipoles */ - if (e->policy & engine_policy_self_gravity) { - cell_drift_all_multipoles(c, e); - } - } -} - -/** - * @brief Drift *all* particles and multipoles at all levels - * forward to the current time. - * - * @param e The #engine. - */ -void engine_drift_all(struct engine *e) { - - const ticks tic = getticks(); - -#ifdef SWIFT_DEBUG_CHECKS - if (e->nodeID == 0) { - if (e->policy & engine_policy_cosmology) - message("Drifting all to a=%e", - exp(e->ti_current * e->time_base) * e->cosmology->a_begin); - else - message("Drifting all to t=%e", - e->ti_current * e->time_base + e->time_begin); - } -#endif - - threadpool_map(&e->threadpool, engine_do_drift_all_mapper, e->s->cells_top, - e->s->nr_cells, sizeof(struct cell), 0, e); - - /* Synchronize particle positions */ - space_synchronize_particle_positions(e->s); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that all cells have been drifted to the current time. */ - space_check_drift_point(e->s, e->ti_current, - e->policy & engine_policy_self_gravity); - part_verify_links(e->s->parts, e->s->gparts, e->s->sparts, e->s->nr_parts, - e->s->nr_gparts, e->s->nr_sparts, e->verbose); -#endif - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - -/** - * @brief Mapper function to drift *all* top-level multipoles forward in - * time. - * - * @param map_data An array of #cell%s. - * @param num_elements Chunk size. - * @param extra_data Pointer to an #engine. - */ -void engine_do_drift_top_multipoles_mapper(void *map_data, int num_elements, - void *extra_data) { - - struct engine *e = (struct engine *)extra_data; - struct cell *cells = (struct cell *)map_data; - - for (int ind = 0; ind < num_elements; ind++) { - struct cell *c = &cells[ind]; - if (c != NULL) { - - /* Drift the multipole at this level only */ - if (c->ti_old_multipole != e->ti_current) cell_drift_multipole(c, e); - } - } -} - -/** - * @brief Drift *all* top-level multipoles forward to the current time. - * - * @param e The #engine. - */ -void engine_drift_top_multipoles(struct engine *e) { - - const ticks tic = getticks(); - - threadpool_map(&e->threadpool, engine_do_drift_top_multipoles_mapper, - e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 0, e); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that all cells have been drifted to the current time. */ - space_check_top_multipoles_drift_point(e->s, e->ti_current); -#endif - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - void engine_do_reconstruct_multipoles_mapper(void *map_data, int num_elements, void *extra_data) { @@ -5271,24 +3484,43 @@ void engine_reconstruct_multipoles(struct engine *e) { void engine_makeproxies(struct engine *e) { #ifdef WITH_MPI + /* Let's time this */ + const ticks tic = getticks(); + + /* Useful local information */ const int nodeID = e->nodeID; const struct space *s = e->s; - const int *cdim = s->cdim; + + /* Handle on the cells and proxies */ + struct cell *cells = s->cells_top; + struct proxy *proxies = e->proxies; + + /* Some info about the domain */ + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; const int periodic = s->periodic; + const double cell_width[3] = {cells[0].width[0], cells[0].width[1], + cells[0].width[2]}; /* Get some info about the physics */ - const double *dim = s->dim; - const struct gravity_props *props = e->gravity_properties; - const double theta_crit2 = props->theta_crit2; const int with_hydro = (e->policy & engine_policy_hydro); const int with_gravity = (e->policy & engine_policy_self_gravity); + const double theta_crit_inv = e->gravity_properties->theta_crit_inv; + const double theta_crit2 = e->gravity_properties->theta_crit2; + const double max_mesh_dist = e->mesh->r_cut_max; + const double max_mesh_dist2 = max_mesh_dist * max_mesh_dist; - /* Handle on the cells and proxies */ - struct cell *cells = s->cells_top; - struct proxy *proxies = e->proxies; + /* Distance between centre of the cell and corners */ + const double r_diag2 = cell_width[0] * cell_width[0] + + cell_width[1] * cell_width[1] + + cell_width[2] * cell_width[2]; + const double r_diag = 0.5 * sqrt(r_diag2); - /* Let's time this */ - const ticks tic = getticks(); + /* Maximal distance from a shifted CoM to centre of cell */ + const double delta_CoM = engine_max_proxy_centre_frac * r_diag; + + /* Maximal distance from shifted CoM to any corner */ + const double r_max = r_diag + 2. * delta_CoM; /* Prepare the proxies and the proxy index. */ if (e->proxy_ind == NULL) @@ -5298,63 +3530,63 @@ void engine_makeproxies(struct engine *e) { e->nr_proxies = 0; /* Compute how many cells away we need to walk */ - int delta = 1; /*hydro case */ + int delta_cells = 1; /*hydro case */ + + /* Gravity needs to take the opening angle into account */ if (with_gravity) { - const double distance = 2.5 * cells[0].width[0] / props->theta_crit; - delta = (int)(distance / cells[0].width[0]) + 1; + const double distance = 2. * r_max * theta_crit_inv; + delta_cells = (int)(distance / cells[0].dmin) + 1; + } + + /* Turn this into upper and lower bounds for loops */ + int delta_m = delta_cells; + int delta_p = delta_cells; + + /* Special case where every cell is in range of every other one */ + if (delta_cells >= cdim[0] / 2) { + if (cdim[0] % 2 == 0) { + delta_m = cdim[0] / 2; + delta_p = cdim[0] / 2 - 1; + } else { + delta_m = cdim[0] / 2; + delta_p = cdim[0] / 2; + } } /* Let's be verbose about this choice */ if (e->verbose) - message("Looking for proxies up to %d top-level cells away", delta); + message( + "Looking for proxies up to %d top-level cells away (delta_m=%d " + "delta_p=%d)", + delta_cells, delta_m, delta_p); /* Loop over each cell in the space. */ - int ind[3]; - for (ind[0] = 0; ind[0] < cdim[0]; ind[0]++) { - for (ind[1] = 0; ind[1] < cdim[1]; ind[1]++) { - for (ind[2] = 0; ind[2] < cdim[2]; ind[2]++) { + for (int i = 0; i < cdim[0]; i++) { + for (int j = 0; j < cdim[1]; j++) { + for (int k = 0; k < cdim[2]; k++) { /* Get the cell ID. */ - const int cid = cell_getid(cdim, ind[0], ind[1], ind[2]); - - double CoM_i[3] = {0., 0., 0.}; - double r_max_i = 0.; - - if (with_gravity) { - - /* Get ci's multipole */ - const struct gravity_tensors *multi_i = cells[cid].multipole; - CoM_i[0] = multi_i->CoM[0]; - CoM_i[1] = multi_i->CoM[1]; - CoM_i[2] = multi_i->CoM[2]; - r_max_i = multi_i->r_max; - } - - /* Loop over all its neighbours (periodic). */ - for (int i = -delta; i <= delta; i++) { - int ii = ind[0] + i; - if (ii >= cdim[0]) - ii -= cdim[0]; - else if (ii < 0) - ii += cdim[0]; - for (int j = -delta; j <= delta; j++) { - int jj = ind[1] + j; - if (jj >= cdim[1]) - jj -= cdim[1]; - else if (jj < 0) - jj += cdim[1]; - for (int k = -delta; k <= delta; k++) { - int kk = ind[2] + k; - if (kk >= cdim[2]) - kk -= cdim[2]; - else if (kk < 0) - kk += cdim[2]; + const int cid = cell_getid(cdim, i, j, k); + + /* Loop over all its neighbours neighbours in range. */ + for (int ii = -delta_m; ii <= delta_p; ii++) { + int iii = i + ii; + if (!periodic && (iii < 0 || iii >= cdim[0])) continue; + iii = (iii + cdim[0]) % cdim[0]; + for (int jj = -delta_m; jj <= delta_p; jj++) { + int jjj = j + jj; + if (!periodic && (jjj < 0 || jjj >= cdim[1])) continue; + jjj = (jjj + cdim[1]) % cdim[1]; + for (int kk = -delta_m; kk <= delta_p; kk++) { + int kkk = k + kk; + if (!periodic && (kkk < 0 || kkk >= cdim[2])) continue; + kkk = (kkk + cdim[2]) % cdim[2]; /* Get the cell ID. */ - const int cjd = cell_getid(cdim, ii, jj, kk); + const int cjd = cell_getid(cdim, iii, jjj, kkk); - /* Early abort (same cell) */ - if (cid == cjd) continue; + /* Early abort */ + if (cid >= cjd) continue; /* Early abort (both same node) */ if (cells[cid].nodeID == nodeID && cells[cjd].nodeID == nodeID) @@ -5366,48 +3598,74 @@ void engine_makeproxies(struct engine *e) { int proxy_type = 0; - /* In the hydro case, only care about neighbours */ + /* In the hydro case, only care about direct neighbours */ if (with_hydro) { + // MATTHIEU: to do: Write a better expression for the + // non-periodic case. + /* This is super-ugly but checks for direct neighbours */ /* with periodic BC */ - if (((abs(ind[0] - ii) <= 1 || - abs(ind[0] - ii - cdim[0]) <= 1 || - abs(ind[0] - ii + cdim[0]) <= 1) && - (abs(ind[1] - jj) <= 1 || - abs(ind[1] - jj - cdim[1]) <= 1 || - abs(ind[1] - jj + cdim[1]) <= 1) && - (abs(ind[2] - kk) <= 1 || - abs(ind[2] - kk - cdim[2]) <= 1 || - abs(ind[2] - kk + cdim[2]) <= 1))) + if (((abs(i - iii) <= 1 || abs(i - iii - cdim[0]) <= 1 || + abs(i - iii + cdim[0]) <= 1) && + (abs(j - jjj) <= 1 || abs(j - jjj - cdim[1]) <= 1 || + abs(j - jjj + cdim[1]) <= 1) && + (abs(k - kkk) <= 1 || abs(k - kkk - cdim[2]) <= 1 || + abs(k - kkk + cdim[2]) <= 1))) proxy_type |= (int)proxy_cell_type_hydro; } /* In the gravity case, check distances using the MAC. */ if (with_gravity) { - /* Get cj's multipole */ - const struct gravity_tensors *multi_j = cells[cjd].multipole; - const double CoM_j[3] = {multi_j->CoM[0], multi_j->CoM[1], - multi_j->CoM[2]}; - const double r_max_j = multi_j->r_max; - - /* Let's compute the current distance between the cell pair*/ - double dx = CoM_i[0] - CoM_j[0]; - double dy = CoM_i[1] - CoM_j[1]; - double dz = CoM_i[2] - CoM_j[2]; - - /* Apply BC */ - if (periodic) { - dx = nearest(dx, dim[0]); - dy = nearest(dy, dim[1]); - dz = nearest(dz, dim[2]); - } - const double r2 = dx * dx + dy * dy + dz * dz; + /* First just add the direct neighbours. Then look for + some further out if the opening angle demands it */ + + /* This is super-ugly but checks for direct neighbours */ + /* with periodic BC */ + if (((abs(i - iii) <= 1 || abs(i - iii - cdim[0]) <= 1 || + abs(i - iii + cdim[0]) <= 1) && + (abs(j - jjj) <= 1 || abs(j - jjj - cdim[1]) <= 1 || + abs(j - jjj + cdim[1]) <= 1) && + (abs(k - kkk) <= 1 || abs(k - kkk - cdim[2]) <= 1 || + abs(k - kkk + cdim[2]) <= 1))) { - /* Are we too close for M2L? */ - if (!gravity_M2L_accept(r_max_i, r_max_j, theta_crit2, r2)) proxy_type |= (int)proxy_cell_type_gravity; + } else { + + /* We don't have multipoles yet (or there CoMs) so we will + have to cook up something based on cell locations only. We + hence need an upper limit on the distance that the CoMs in + those cells could have. We then can decide whether we are + too close for an M2L interaction and hence require a proxy + as this pair of cells cannot rely on just an M2L + calculation. */ + + /* Minimal distance between any two points in the cells */ + const double min_dist_centres2 = cell_min_dist2_same_size( + &cells[cid], &cells[cjd], periodic, dim); + + /* Let's now assume the CoMs will shift a bit */ + const double min_dist_CoM = + sqrt(min_dist_centres2) - 2. * delta_CoM; + const double min_dist_CoM2 = min_dist_CoM * min_dist_CoM; + + /* Are we beyond the distance where the truncated forces are 0 + * but not too far such that M2L can be used? */ + if (periodic) { + + if ((min_dist_CoM2 < max_mesh_dist2) && + (!gravity_M2L_accept(r_max, r_max, theta_crit2, + min_dist_CoM2))) + proxy_type |= (int)proxy_cell_type_gravity; + + } else { + + if (!gravity_M2L_accept(r_max, r_max, theta_crit2, + min_dist_CoM2)) + proxy_type |= (int)proxy_cell_type_gravity; + } + } } /* Abort if not in range at all */ @@ -5417,8 +3675,8 @@ void engine_makeproxies(struct engine *e) { if (cells[cid].nodeID == nodeID && cells[cjd].nodeID != nodeID) { /* Do we already have a relationship with this node? */ - int pid = e->proxy_ind[cells[cjd].nodeID]; - if (pid < 0) { + int proxy_id = e->proxy_ind[cells[cjd].nodeID]; + if (proxy_id < 0) { if (e->nr_proxies == engine_maxproxies) error("Maximum number of proxies exceeded."); @@ -5428,24 +3686,31 @@ void engine_makeproxies(struct engine *e) { /* Store the information */ e->proxy_ind[cells[cjd].nodeID] = e->nr_proxies; - pid = e->nr_proxies; + proxy_id = e->nr_proxies; e->nr_proxies += 1; + + /* Check the maximal proxy limit */ + if ((size_t)proxy_id > 8 * sizeof(long long)) + error( + "Created more than %zd proxies. cell.mpi.sendto will " + "overflow.", + 8 * sizeof(long long)); } /* Add the cell to the proxy */ - proxy_addcell_in(&proxies[pid], &cells[cjd], proxy_type); - proxy_addcell_out(&proxies[pid], &cells[cid], proxy_type); + proxy_addcell_in(&proxies[proxy_id], &cells[cjd], proxy_type); + proxy_addcell_out(&proxies[proxy_id], &cells[cid], proxy_type); /* Store info about where to send the cell */ - cells[cid].sendto |= (1ULL << pid); + cells[cid].mpi.sendto |= (1ULL << proxy_id); } /* Same for the symmetric case? */ if (cells[cjd].nodeID == nodeID && cells[cid].nodeID != nodeID) { /* Do we already have a relationship with this node? */ - int pid = e->proxy_ind[cells[cid].nodeID]; - if (pid < 0) { + int proxy_id = e->proxy_ind[cells[cid].nodeID]; + if (proxy_id < 0) { if (e->nr_proxies == engine_maxproxies) error("Maximum number of proxies exceeded."); @@ -5455,16 +3720,23 @@ void engine_makeproxies(struct engine *e) { /* Store the information */ e->proxy_ind[cells[cid].nodeID] = e->nr_proxies; - pid = e->nr_proxies; + proxy_id = e->nr_proxies; e->nr_proxies += 1; + + /* Check the maximal proxy limit */ + if ((size_t)proxy_id > 8 * sizeof(long long)) + error( + "Created more than %zd proxies. cell.mpi.sendto will " + "overflow.", + 8 * sizeof(long long)); } /* Add the cell to the proxy */ - proxy_addcell_in(&proxies[pid], &cells[cid], proxy_type); - proxy_addcell_out(&proxies[pid], &cells[cjd], proxy_type); + proxy_addcell_in(&proxies[proxy_id], &cells[cid], proxy_type); + proxy_addcell_out(&proxies[proxy_id], &cells[cjd], proxy_type); /* Store info about where to send the cell */ - cells[cjd].sendto |= (1ULL << pid); + cells[cjd].mpi.sendto |= (1ULL << proxy_id); } } } @@ -5491,6 +3763,8 @@ void engine_makeproxies(struct engine *e) { void engine_split(struct engine *e, struct partition *initial_partition) { #ifdef WITH_MPI + const ticks tic = getticks(); + struct space *s = e->s; /* Do the initial partition of the cells. */ @@ -5571,6 +3845,10 @@ void engine_split(struct engine *e, struct partition *initial_partition) { s->nr_sparts, e->verbose); #endif + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + #else error("SWIFT was not compiled with MPI support."); #endif @@ -5590,8 +3868,7 @@ void engine_dump_snapshot(struct engine *e) { /* Check that all cells have been drifted to the current time. * That can include cells that have not * previously been active on this rank. */ - space_check_drift_point(e->s, e->ti_current, - e->policy & engine_policy_self_gravity); + space_check_drift_point(e->s, e->ti_current, /* check_mpole=*/0); /* Be verbose about this */ if (e->nodeID == 0) { @@ -5640,6 +3917,42 @@ void engine_dump_snapshot(struct engine *e) { (float)clocks_diff(&time1, &time2), clocks_getunit()); } +/** + * @brief Writes an index file with the current state of the engine + * + * @param e The #engine. + */ +void engine_dump_index(struct engine *e) { + +#if defined(WITH_LOGGER) + struct clocks_time time1, time2; + clocks_gettime(&time1); + + if (e->verbose) { + if (e->policy & engine_policy_cosmology) + message("Writing index at a=%e", + exp(e->ti_current * e->time_base) * e->cosmology->a_begin); + else + message("Writing index at t=%e", + e->ti_current * e->time_base + e->time_begin); + } + + /* Dump... */ + write_index_single(e, e->logger->base_name, e->internal_units, + e->snapshot_units); + + /* Flag that we dumped a snapshot */ + e->step_props |= engine_step_prop_logger_index; + + clocks_gettime(&time2); + if (e->verbose) + message("writing particle indices took %.3f %s.", + (float)clocks_diff(&time1, &time2), clocks_getunit()); +#else + error("SWIFT was not compiled with the logger"); +#endif +} + #ifdef HAVE_SETAFFINITY /** * @brief Returns the initial affinity the main thread is using. @@ -5716,6 +4029,7 @@ void engine_unpin(void) { * @param cosmo The #cosmology used for this run. * @param hydro The #hydro_props used for this run. * @param gravity The #gravity_props used for this run. + * @param stars The #stars_props used for this run. * @param mesh The #pm_mesh used for the long-range periodic forces. * @param potential The properties of the external potential. * @param cooling_func The properties of the cooling function. @@ -5728,9 +4042,10 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, const struct unit_system *internal_units, const struct phys_const *physical_constants, struct cosmology *cosmo, const struct hydro_props *hydro, - struct gravity_props *gravity, struct pm_mesh *mesh, + struct gravity_props *gravity, const struct stars_props *stars, + struct pm_mesh *mesh, const struct external_potential *potential, - const struct cooling_function_data *cooling_func, + struct cooling_function_data *cooling_func, const struct chemistry_global_data *chemistry, struct sourceterms *sourceterms) { @@ -5767,8 +4082,8 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, parser_get_param_string(params, "Snapshots:basename", e->snapshot_base_name); e->snapshot_compression = parser_get_opt_param_int(params, "Snapshots:compression", 0); - e->snapshot_label_delta = - parser_get_opt_param_int(params, "Snapshots:label_delta", 1); + e->snapshot_int_time_label_on = + parser_get_opt_param_int(params, "Snapshots:int_time_label_on", 0); e->snapshot_units = (struct unit_system *)malloc(sizeof(struct unit_system)); units_init_default(e->snapshot_units, params, "Snapshots", internal_units); e->snapshot_output_count = 0; @@ -5790,6 +4105,7 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, e->cosmology = cosmo; e->hydro_properties = hydro; e->gravity_properties = gravity; + e->stars_properties = stars; e->mesh = mesh; e->external_potential = potential; e->cooling_func = cooling_func; @@ -5802,6 +4118,11 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, e->last_repartition = 0; #endif +#if defined(WITH_LOGGER) + e->logger = (struct logger *)malloc(sizeof(struct logger)); + logger_init(e->logger, params); +#endif + /* Make the space link back to the engine. */ s->e = e; @@ -5829,6 +4150,33 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, e->ti_current = 0; } + /* Initialise VELOCIraptor output. */ + if (e->policy & engine_policy_structure_finding) { + parser_get_param_string(params, "StructureFinding:basename", + e->stfBaseName); + e->time_first_stf_output = + parser_get_opt_param_double(params, "StructureFinding:time_first", 0.); + e->a_first_stf_output = parser_get_opt_param_double( + params, "StructureFinding:scale_factor_first", 0.1); + e->stf_output_freq_format = (enum io_stf_output_format)parser_get_param_int( + params, "StructureFinding:output_time_format"); + + if (e->stf_output_freq_format == io_stf_steps) { + e->delta_step_stf = + parser_get_param_int(params, "StructureFinding:delta_step"); + } else if (e->stf_output_freq_format == io_stf_time) { + e->delta_time_stf = + parser_get_param_double(params, "StructureFinding:delta_time"); + } else { + error( + "Invalid flag (%d) set for output time format of structure finding.", + e->stf_output_freq_format); + } + + /* overwrite input if outputlist */ + if (e->output_list_stf) e->stf_output_freq_format = io_stf_time; + } + engine_init_output_lists(e, params); } @@ -5879,34 +4227,8 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, e->restart_file = restart_file; e->restart_next = 0; e->restart_dt = 0; - e->timeFirstSTFOutput = 0; engine_rank = nodeID; - /* Initialise VELOCIraptor. */ - if (e->policy & engine_policy_structure_finding) { - parser_get_param_string(params, "StructureFinding:basename", - e->stfBaseName); - e->timeFirstSTFOutput = - parser_get_param_double(params, "StructureFinding:time_first"); - e->a_first_stf = parser_get_opt_param_double( - params, "StructureFinding:scale_factor_first", 0.1); - e->stf_output_freq_format = - parser_get_param_int(params, "StructureFinding:output_time_format"); - if (e->stf_output_freq_format == STEPS) { - e->deltaStepSTF = - parser_get_param_int(params, "StructureFinding:delta_step"); - } else if (e->stf_output_freq_format == TIME) { - e->deltaTimeSTF = - parser_get_param_double(params, "StructureFinding:delta_time"); - } else - error( - "Invalid flag (%d) set for output time format of structure finding.", - e->stf_output_freq_format); - - /* overwrite input if outputlist */ - if (e->output_list_stf) e->stf_output_freq_format = TIME; - } - /* Get the number of queues */ int nr_queues = parser_get_opt_param_int(params, "Scheduler:nr_queues", nr_threads); @@ -6104,7 +4426,7 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, engine_step_prop_snapshot, engine_step_prop_restarts); fprintf(e->file_timesteps, - "# %6s %14s %14s %10s %14s %9s %12s %12s %12s %16s [%s] %6s\n", + "# %6s %14s %12s %12s %14s %9s %12s %12s %12s %16s [%s] %6s\n", "Step", "Time", "Scale-factor", "Redshift", "Time-step", "Time-bins", "Updates", "g-Updates", "s-Updates", "Wall-clock time", clocks_getunit(), "Props"); @@ -6123,6 +4445,9 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, if (e->policy & engine_policy_self_gravity) if (e->nodeID == 0) gravity_props_print(e->gravity_properties); + if (e->policy & engine_policy_stars) + if (e->nodeID == 0) stars_props_print(e->stars_properties); + /* Check we have sensible time bounds */ if (e->time_begin >= e->time_end) error( @@ -6130,6 +4455,10 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, "(t_beg = %e)", e->time_end, e->time_begin); + /* Check we don't have inappropriate time labels */ + if ((e->snapshot_int_time_label_on == 1) && (e->time_end <= 1.f)) + error("Snapshot integer time labels enabled but end time <= 1"); + /* Check we have sensible time-step values */ if (e->dt_min > e->dt_max) error( @@ -6186,17 +4515,17 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, e->a_first_statistics, e->cosmology->a_begin); if ((e->policy & engine_policy_structure_finding) && - (e->stf_output_freq_format == TIME)) { + (e->stf_output_freq_format == io_stf_time)) { - if (e->deltaTimeSTF <= 1.) - error("Time between STF (%e) must be > 1.", e->deltaTimeSTF); + if (e->delta_time_stf <= 1.) + error("Time between STF (%e) must be > 1.", e->delta_time_stf); - if (e->a_first_stf < e->cosmology->a_begin) + if (e->a_first_stf_output < e->cosmology->a_begin) error( "Scale-factor of first stf output (%e) must be after the " "simulation " "start a=%e.", - e->a_first_stf, e->cosmology->a_begin); + e->a_first_stf_output, e->cosmology->a_begin); } } else { @@ -6222,23 +4551,21 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, e->time_first_statistics, e->time_begin); if ((e->policy & engine_policy_structure_finding) && - (e->stf_output_freq_format == TIME)) { + (e->stf_output_freq_format == io_stf_time)) { - if (e->deltaTimeSTF <= 0.) - error("Time between STF (%e) must be positive.", e->deltaTimeSTF); + if (e->delta_time_stf <= 0.) + error("Time between STF (%e) must be positive.", e->delta_time_stf); - if (e->timeFirstSTFOutput < e->time_begin) + if (e->time_first_stf_output < e->time_begin) error("Time of first STF (%e) must be after the simulation start t=%e.", - e->timeFirstSTFOutput, e->time_begin); + e->time_first_stf_output, e->time_begin); } } if (e->policy & engine_policy_structure_finding) { /* Find the time of the first stf output */ - if (e->stf_output_freq_format == TIME) { + if (e->stf_output_freq_format == io_stf_time) engine_compute_next_stf_time(e); - message("Next STF step will be: %lld", e->ti_nextSTF); - } } /* Get the total mass */ @@ -6252,7 +4579,14 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, MPI_COMM_WORLD); #endif - /* Find the time of the first snapshot output */ +#if defined(WITH_LOGGER) + if (e->nodeID == 0) + message( + "WARNING: There is currently no way of predicting the output " + "size, please use it carefully"); +#endif + + /* Find the time of the first snapshot output */ engine_compute_next_snapshot_time(e); /* Find the time of the first statistics output */ @@ -6292,7 +4626,10 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, /* Construct types for MPI communications */ #ifdef WITH_MPI part_create_mpi_types(); - stats_create_MPI_type(); + multipole_create_mpi_types(); + stats_create_mpi_type(); + proxy_create_mpi_type(); + task_create_mpi_comms(); #endif /* Initialise the collection group. */ @@ -6394,7 +4731,12 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, } } -/* Free the affinity stuff */ +#ifdef WITH_LOGGER + /* Write the particle logger header */ + logger_write_file_header(e->logger, e); +#endif + + /* Free the affinity stuff */ #if defined(HAVE_SETAFFINITY) if (with_aff) { free(cpuid); @@ -6570,54 +4912,56 @@ void engine_compute_next_statistics_time(struct engine *e) { void engine_compute_next_stf_time(struct engine *e) { /* Do output_list file case */ if (e->output_list_stf) { - output_list_read_next_time(e->output_list_stf, e, "stf", &e->ti_nextSTF); + output_list_read_next_time(e->output_list_stf, e, "stf", &e->ti_next_stf); return; } /* Find upper-bound on last output */ double time_end; if (e->policy & engine_policy_cosmology) - time_end = e->cosmology->a_end * e->deltaTimeSTF; + time_end = e->cosmology->a_end * e->delta_time_stf; else - time_end = e->time_end + e->deltaTimeSTF; + time_end = e->time_end + e->delta_time_stf; /* Find next snasphot above current time */ - double time = e->timeFirstSTFOutput; - + double time; + if (e->policy & engine_policy_cosmology) + time = e->a_first_stf_output; + else + time = e->time_first_stf_output; while (time < time_end) { /* Output time on the integer timeline */ if (e->policy & engine_policy_cosmology) - e->ti_nextSTF = log(time / e->cosmology->a_begin) / e->time_base; + e->ti_next_stf = log(time / e->cosmology->a_begin) / e->time_base; else - e->ti_nextSTF = (time - e->time_begin) / e->time_base; + e->ti_next_stf = (time - e->time_begin) / e->time_base; /* Found it? */ - if (e->ti_nextSTF > e->ti_current) break; + if (e->ti_next_stf > e->ti_current) break; if (e->policy & engine_policy_cosmology) - time *= e->deltaTimeSTF; + time *= e->delta_time_stf; else - time += e->deltaTimeSTF; + time += e->delta_time_stf; } /* Deal with last snapshot */ - if (e->ti_nextSTF >= max_nr_timesteps) { - e->ti_nextSTF = -1; + if (e->ti_next_stf >= max_nr_timesteps) { + e->ti_next_stf = -1; if (e->verbose) message("No further output time."); } else { /* Be nice, talk... */ if (e->policy & engine_policy_cosmology) { - const float next_snapshot_time = - exp(e->ti_nextSTF * e->time_base) * e->cosmology->a_begin; + const float next_stf_time = + exp(e->ti_next_stf * e->time_base) * e->cosmology->a_begin; if (e->verbose) - message("Next output time set to a=%e.", next_snapshot_time); + message("Next VELOCIraptor time set to a=%e.", next_stf_time); } else { - const float next_snapshot_time = - e->ti_nextSTF * e->time_base + e->time_begin; + const float next_stf_time = e->ti_next_stf * e->time_base + e->time_begin; if (e->verbose) - message("Next output time set to t=%e.", next_snapshot_time); + message("Next VELOCIraptor time set to t=%e.", next_stf_time); } } } @@ -6658,14 +5002,14 @@ void engine_init_output_lists(struct engine *e, struct swift_params *params) { /* Deal with stf */ double stf_time_first; e->output_list_stf = NULL; - output_list_init(&e->output_list_stf, e, "StructureFinding", &e->deltaTimeSTF, - &stf_time_first); + output_list_init(&e->output_list_stf, e, "StructureFinding", + &e->delta_time_stf, &stf_time_first); if (e->output_list_stf) { if (e->policy & engine_policy_cosmology) - e->a_first_stf = stf_time_first; + e->a_first_stf_output = stf_time_first; else - e->timeFirstSTFOutput = stf_time_first; + e->time_first_stf_output = stf_time_first; } } @@ -6677,12 +5021,16 @@ void engine_init_output_lists(struct engine *e, struct swift_params *params) { */ void engine_recompute_displacement_constraint(struct engine *e) { + const ticks tic = getticks(); + /* Get the cosmological information */ const struct cosmology *cosmo = e->cosmology; const float Om = cosmo->Omega_m; const float Ob = cosmo->Omega_b; - const float rho_crit = cosmo->critical_density; + const float H0 = cosmo->H0; const float a = cosmo->a; + const float G_newton = e->physical_constants->const_newton_G; + const float rho_crit0 = 3.f * H0 * H0 / (8.f * M_PI * G_newton); /* Start by reducing the minimal mass of each particle type */ float min_mass[swift_type_count] = {e->s->min_part_mass, @@ -6752,7 +5100,7 @@ void engine_recompute_displacement_constraint(struct engine *e) { const float min_mass_dm = min_mass[1]; /* Inter-particle sepration for the DM */ - const float d_dm = cbrtf(min_mass_dm / ((Om - Ob) * rho_crit)); + const float d_dm = cbrtf(min_mass_dm / ((Om - Ob) * rho_crit0)); /* RMS peculiar motion for the DM */ const float rms_vel_dm = vel_norm_dm / N_dm; @@ -6768,7 +5116,7 @@ void engine_recompute_displacement_constraint(struct engine *e) { const float min_mass_b = min(min_mass[0], min_mass[4]); /* Inter-particle sepration for the baryons */ - const float d_b = cbrtf(min_mass_b / (Ob * rho_crit)); + const float d_b = cbrtf(min_mass_b / (Ob * rho_crit0)); /* RMS peculiar motion for the baryons */ const float rms_vel_b = vel_norm_b / N_b; @@ -6785,6 +5133,10 @@ void engine_recompute_displacement_constraint(struct engine *e) { if (e->verbose) message("max_dt_RMS_displacement = %e", e->dt_max_RMS_displacement); + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -6802,20 +5154,17 @@ void engine_clean(struct engine *e) { } free(e->runners); free(e->snapshot_units); - if (e->output_list_snapshots) { - output_list_clean(e->output_list_snapshots); - free(e->output_list_snapshots); - } - if (e->output_list_stats) { - output_list_clean(e->output_list_stats); - free(e->output_list_stats); - } - if (e->output_list_stf) { - output_list_clean(e->output_list_stf); - free(e->output_list_stf); - } + + output_list_clean(&e->output_list_snapshots); + output_list_clean(&e->output_list_stats); + output_list_clean(&e->output_list_stf); + free(e->links); free(e->cell_loc); +#if defined(WITH_LOGGER) + logger_clean(e->logger); + free(e->logger); +#endif scheduler_clean(&e->sched); space_clean(e->s); threadpool_clean(&e->threadpool); @@ -6850,6 +5199,7 @@ void engine_struct_dump(struct engine *e, FILE *stream) { phys_const_struct_dump(e->physical_constants, stream); hydro_props_struct_dump(e->hydro_properties, stream); gravity_props_struct_dump(e->gravity_properties, stream); + stars_props_struct_dump(e->stars_properties, stream); pm_mesh_struct_dump(e->mesh, stream); potential_struct_dump(e->external_potential, stream); cooling_struct_dump(e->cooling_func, stream); @@ -6925,6 +5275,11 @@ void engine_struct_restore(struct engine *e, FILE *stream) { gravity_props_struct_restore(gravity_properties, stream); e->gravity_properties = gravity_properties; + struct stars_props *stars_properties = + (struct stars_props *)malloc(sizeof(struct stars_props)); + stars_props_struct_restore(stars_properties, stream); + e->stars_properties = stars_properties; + struct pm_mesh *mesh = (struct pm_mesh *)malloc(sizeof(struct pm_mesh)); pm_mesh_struct_restore(mesh, stream); e->mesh = mesh; @@ -6937,7 +5292,7 @@ void engine_struct_restore(struct engine *e, FILE *stream) { struct cooling_function_data *cooling_func = (struct cooling_function_data *)malloc( sizeof(struct cooling_function_data)); - cooling_struct_restore(cooling_func, stream); + cooling_struct_restore(cooling_func, stream, e->cosmology); e->cooling_func = cooling_func; struct chemistry_global_data *chemistry = @@ -6977,6 +5332,10 @@ void engine_struct_restore(struct engine *e, FILE *stream) { e->output_list_stf = output_list_stf; } +#ifdef EOS_PLANETARY + eos_init(&eos, e->physical_constants, e->snapshot_units, e->parameter_file); +#endif + /* Want to force a rebuild before using this engine. Wait to repartition.*/ e->forcerebuild = 1; e->forcerepart = 0; diff --git a/src/engine.h b/src/engine.h index aeb57c65ac36ff5ddbf4b74185adeb94f3d460da..9c39ea15a85d19cf26dfd94a9c42897251c6b42a 100644 --- a/src/engine.h +++ b/src/engine.h @@ -38,6 +38,7 @@ #include "clocks.h" #include "collectgroup.h" #include "cooling_struct.h" +#include "dump.h" #include "gravity_properties.h" #include "mesh_gravity.h" #include "parser.h" @@ -49,6 +50,7 @@ #include "space.h" #include "task.h" #include "units.h" +#include "velociraptor_interface.h" /** * @brief The different policies the #engine can follow. @@ -71,10 +73,12 @@ enum engine_policy { engine_policy_cooling = (1 << 13), engine_policy_sourceterms = (1 << 14), engine_policy_stars = (1 << 15), - engine_policy_structure_finding = (1 << 16) + engine_policy_structure_finding = (1 << 16), + engine_policy_star_formation = (1 << 17), + engine_policy_feedback = (1 << 18) }; -#define engine_maxpolicy 16 -extern const char *engine_policy_names[]; +#define engine_maxpolicy 19 +extern const char *engine_policy_names[engine_maxpolicy + 1]; /** * @brief The different unusual events that can take place in a time-step. @@ -86,17 +90,20 @@ enum engine_step_properties { engine_step_prop_repartition = (1 << 2), engine_step_prop_statistics = (1 << 3), engine_step_prop_snapshot = (1 << 4), - engine_step_prop_restarts = (1 << 5) + engine_step_prop_restarts = (1 << 5), + engine_step_prop_logger_index = (1 << 6) }; /* Some constants */ #define engine_maxproxies 64 #define engine_tasksreweight 1 #define engine_parts_size_grow 1.05 +#define engine_max_proxy_centre_frac 0.2 #define engine_redistribute_alloc_margin 1.2 #define engine_default_energy_file_name "energy" #define engine_default_timesteps_file_name "timesteps" #define engine_max_parts_per_ghost 1000 +#define engine_max_sparts_per_ghost 1000 /** * @brief The rank of the engine as a global variable (for messages). @@ -201,6 +208,15 @@ struct engine { /* Total numbers of particles in the system. */ long long total_nr_parts, total_nr_gparts, total_nr_sparts; + /* The total number of inhibted particles in the system. */ + long long nr_inhibited_parts, nr_inhibited_gparts, nr_inhibited_sparts; + +#ifdef SWIFT_DEBUG_CHECKS + /* Total number of particles removed from the system since the last rebuild */ + long long count_inhibited_parts, count_inhibited_gparts, + count_inhibited_sparts; +#endif + /* Total mass in the simulation */ double total_mass; @@ -223,22 +239,22 @@ struct engine { char snapshot_base_name[PARSER_MAX_LINE_SIZE]; int snapshot_compression; - int snapshot_label_delta; + int snapshot_int_time_label_on; struct unit_system *snapshot_units; int snapshot_output_count; /* Structure finding information */ - int stf_output_freq_format; - double a_first_stf; - double timeFirstSTFOutput; - double deltaTimeSTF; - int deltaStepSTF; + enum io_stf_output_format stf_output_freq_format; + int delta_step_stf; + double a_first_stf_output; + double time_first_stf_output; + double delta_time_stf; /* Output_List for the structure finding */ struct output_list *output_list_stf; /* Integer time of the next stf output */ - integertime_t ti_nextSTF; + integertime_t ti_next_stf; char stfBaseName[PARSER_MAX_LINE_SIZE]; @@ -299,6 +315,10 @@ struct engine { int forcerepart; struct repartition *reparttype; +#ifdef WITH_LOGGER + struct logger *logger; +#endif + /* How many steps have we done with the same set of tasks? */ int tasks_age; @@ -322,6 +342,9 @@ struct engine { /* Properties of the hydro scheme */ const struct hydro_props *hydro_properties; + /* Properties of the star model */ + const struct stars_props *stars_properties; + /* Properties of the self-gravity scheme */ struct gravity_props *gravity_properties; @@ -332,7 +355,7 @@ struct engine { const struct external_potential *external_potential; /* Properties of the cooling scheme */ - const struct cooling_function_data *cooling_func; + struct cooling_function_data *cooling_func; /* Properties of the chemistry model */ const struct chemistry_global_data *chemistry; @@ -369,7 +392,7 @@ struct engine { int restart_max_tasks; }; -/* Function prototypes. */ +/* Function prototypes, engine.c. */ void engine_addlink(struct engine *e, struct link **l, struct task *t); void engine_barrier(struct engine *e); void engine_compute_next_snapshot_time(struct engine *e); @@ -377,10 +400,11 @@ void engine_compute_next_stf_time(struct engine *e); void engine_compute_next_statistics_time(struct engine *e); void engine_recompute_displacement_constraint(struct engine *e); void engine_unskip(struct engine *e); -void engine_drift_all(struct engine *e); +void engine_drift_all(struct engine *e, const int drift_mpoles); void engine_drift_top_multipoles(struct engine *e); void engine_reconstruct_multipoles(struct engine *e); void engine_print_stats(struct engine *e); +void engine_check_for_dumps(struct engine *e); void engine_dump_snapshot(struct engine *e); void engine_init_output_lists(struct engine *e, struct swift_params *params); void engine_init(struct engine *e, struct space *s, struct swift_params *params, @@ -389,27 +413,28 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, const struct unit_system *internal_units, const struct phys_const *physical_constants, struct cosmology *cosmo, const struct hydro_props *hydro, - struct gravity_props *gravity, struct pm_mesh *mesh, + struct gravity_props *gravity, const struct stars_props *stars, + struct pm_mesh *mesh, const struct external_potential *potential, - const struct cooling_function_data *cooling_func, + struct cooling_function_data *cooling_func, const struct chemistry_global_data *chemistry, struct sourceterms *sourceterms); void engine_config(int restart, struct engine *e, struct swift_params *params, int nr_nodes, int nodeID, int nr_threads, int with_aff, int verbose, const char *restart_file); +void engine_dump_index(struct engine *e); void engine_launch(struct engine *e); void engine_prepare(struct engine *e); void engine_init_particles(struct engine *e, int flag_entropy_ICs, int clean_h_values); void engine_step(struct engine *e); -void engine_maketasks(struct engine *e); void engine_split(struct engine *e, struct partition *initial_partition); -void engine_exchange_strays(struct engine *e, size_t offset_parts, - int *ind_part, size_t *Npart, size_t offset_gparts, - int *ind_gpart, size_t *Ngpart, - size_t offset_sparts, int *ind_spart, - size_t *Nspart); -void engine_rebuild(struct engine *e, int clean_h_values); +void engine_exchange_strays(struct engine *e, const size_t offset_parts, + const int *ind_part, size_t *Npart, + const size_t offset_gparts, const int *ind_gpart, + size_t *Ngpart, const size_t offset_sparts, + const int *ind_spart, size_t *Nspart); +void engine_rebuild(struct engine *e, int redistributed, int clean_h_values); void engine_repartition(struct engine *e); void engine_repartition_trigger(struct engine *e); void engine_makeproxies(struct engine *e); @@ -421,6 +446,12 @@ void engine_unpin(void); void engine_clean(struct engine *e); int engine_estimate_nr_tasks(struct engine *e); +/* Function prototypes, engine_maketasks.c. */ +void engine_maketasks(struct engine *e); + +/* Function prototypes, engine_marktasks.c. */ +int engine_marktasks(struct engine *e); + #ifdef HAVE_SETAFFINITY cpu_set_t *engine_entry_affinity(void); #endif diff --git a/src/engine_drift.c b/src/engine_drift.c new file mode 100644 index 0000000000000000000000000000000000000000..7a842068b57813575c33dd670172059abb1e8fc0 --- /dev/null +++ b/src/engine_drift.c @@ -0,0 +1,297 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * Angus Lepper (angus.lepper@ed.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "engine.h" + +/** + * @brief Mapper function to drift *all* the #part to the current time. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an #engine. + */ +void engine_do_drift_all_part_mapper(void *map_data, int num_elements, + void *extra_data) { + + const struct engine *e = (const struct engine *)extra_data; + const int restarting = e->restarting; + struct space *s = e->s; + struct cell *cells_top; + int *local_cells_top; + + if (restarting) { + + /* When restarting, we loop over all top-level cells */ + cells_top = (struct cell *)map_data; + local_cells_top = NULL; + + } else { + + /* In any other case, we use the list of local cells with tasks */ + cells_top = s->cells_top; + local_cells_top = (int *)map_data; + } + + for (int ind = 0; ind < num_elements; ind++) { + + struct cell *c; + + /* When restarting, the list of local cells does not + yet exist. We use the raw list of top-level cells instead */ + if (restarting) + c = &cells_top[ind]; + else + c = &cells_top[local_cells_top[ind]]; + + if (c->nodeID == e->nodeID) { + + /* Drift all the particles */ + cell_drift_part(c, e, /* force the drift=*/1); + } + } +} + +/** + * @brief Mapper function to drift *all* the #gpart to the current time. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an #engine. + */ +void engine_do_drift_all_gpart_mapper(void *map_data, int num_elements, + void *extra_data) { + + const struct engine *e = (const struct engine *)extra_data; + const int restarting = e->restarting; + struct space *s = e->s; + struct cell *cells_top; + int *local_cells_top; + + if (restarting) { + + /* When restarting, we loop over all top-level cells */ + cells_top = (struct cell *)map_data; + local_cells_top = NULL; + + } else { + + /* In any other case, we use the list of local cells with tasks */ + cells_top = s->cells_top; + local_cells_top = (int *)map_data; + } + + for (int ind = 0; ind < num_elements; ind++) { + + struct cell *c; + + /* When restarting, the list of local cells does not + yet exist. We use the raw list of top-level cells instead */ + if (restarting) + c = &cells_top[ind]; + else + c = &cells_top[local_cells_top[ind]]; + + if (c->nodeID == e->nodeID) { + + /* Drift all the particles */ + cell_drift_gpart(c, e, /* force the drift=*/1); + } + } +} + +/** + * @brief Mapper function to drift *all* the multipoles to the current time. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an #engine. + */ +void engine_do_drift_all_multipole_mapper(void *map_data, int num_elements, + void *extra_data) { + + const struct engine *e = (const struct engine *)extra_data; + const int restarting = e->restarting; + struct space *s = e->s; + struct cell *cells_top; + int *local_cells_with_tasks_top; + + if (restarting) { + + /* When restarting, we loop over all top-level cells */ + cells_top = (struct cell *)map_data; + local_cells_with_tasks_top = NULL; + + } else { + + /* In any other case, we use the list of local cells with tasks */ + cells_top = s->cells_top; + local_cells_with_tasks_top = (int *)map_data; + } + + for (int ind = 0; ind < num_elements; ind++) { + + struct cell *c; + + /* When restarting, the list of local cells does not + yet exist. We use the raw list of top-level cells instead */ + if (restarting) + c = &cells_top[ind]; + else + c = &cells_top[local_cells_with_tasks_top[ind]]; + + cell_drift_all_multipoles(c, e); + } +} + +/** + * @brief Drift *all* particles and multipoles at all levels + * forward to the current time. + * + * @param e The #engine. + * @param drift_mpoles Do we want to drift all the multipoles as well? + */ +void engine_drift_all(struct engine *e, const int drift_mpoles) { + + const ticks tic = getticks(); + +#ifdef SWIFT_DEBUG_CHECKS + if (e->nodeID == 0) { + if (e->policy & engine_policy_cosmology) + message("Drifting all to a=%e", + exp(e->ti_current * e->time_base) * e->cosmology->a_begin); + else + message("Drifting all to t=%e", + e->ti_current * e->time_base + e->time_begin); + } +#endif + + if (!e->restarting) { + + /* Normal case: We have a list of local cells with tasks to play with */ + + if (e->s->nr_parts > 0) { + threadpool_map(&e->threadpool, engine_do_drift_all_part_mapper, + e->s->local_cells_top, e->s->nr_local_cells, sizeof(int), + /* default chunk */ 0, e); + } + if (e->s->nr_gparts > 0) { + threadpool_map(&e->threadpool, engine_do_drift_all_gpart_mapper, + e->s->local_cells_top, e->s->nr_local_cells, sizeof(int), + /* default chunk */ 0, e); + } + if (drift_mpoles && (e->policy & engine_policy_self_gravity)) { + threadpool_map(&e->threadpool, engine_do_drift_all_multipole_mapper, + e->s->local_cells_with_tasks_top, + e->s->nr_local_cells_with_tasks, sizeof(int), + /* default chunk */ 0, e); + } + + } else { + + /* When restarting, the list of local cells with tasks does not yet + exist. We use the raw list of top-level cells instead */ + + if (e->s->nr_parts > 0) { + threadpool_map(&e->threadpool, engine_do_drift_all_part_mapper, + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), + /* default chunk */ 0, e); + } + if (e->s->nr_gparts > 0) { + threadpool_map(&e->threadpool, engine_do_drift_all_gpart_mapper, + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), + /* default chunk */ 0, e); + } + if (e->policy & engine_policy_self_gravity) { + threadpool_map(&e->threadpool, engine_do_drift_all_multipole_mapper, + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), + /* default chunk */ 0, e); + } + } + + /* Synchronize particle positions */ + space_synchronize_particle_positions(e->s); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that all cells have been drifted to the current time. */ + space_check_drift_point( + e->s, e->ti_current, + drift_mpoles && (e->policy & engine_policy_self_gravity)); + part_verify_links(e->s->parts, e->s->gparts, e->s->sparts, e->s->nr_parts, + e->s->nr_gparts, e->s->nr_sparts, e->verbose); +#endif + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +/** + * @brief Mapper function to drift *all* top-level multipoles forward in + * time. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an #engine. + */ +void engine_do_drift_top_multipoles_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct cell *cells = (struct cell *)map_data; + + for (int ind = 0; ind < num_elements; ind++) { + struct cell *c = &cells[ind]; + if (c != NULL) { + + /* Drift the multipole at this level only */ + if (c->grav.ti_old_multipole != e->ti_current) cell_drift_multipole(c, e); + } + } +} + +/** + * @brief Drift *all* top-level multipoles forward to the current time. + * + * @param e The #engine. + */ +void engine_drift_top_multipoles(struct engine *e) { + + const ticks tic = getticks(); + + threadpool_map(&e->threadpool, engine_do_drift_top_multipoles_mapper, + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 0, e); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that all cells have been drifted to the current time. */ + space_check_top_multipoles_drift_point(e->s, e->ti_current); +#endif + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c new file mode 100644 index 0000000000000000000000000000000000000000..68841aa5999441e6a2621f867038a44e9f52794c --- /dev/null +++ b/src/engine_maketasks.c @@ -0,0 +1,2244 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * Angus Lepper (angus.lepper@ed.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <stdlib.h> +#include <unistd.h> + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* Load the profiler header, if needed. */ +#ifdef WITH_PROFILER +#include <gperftools/profiler.h> +#endif + +/* This object's header. */ +#include "engine.h" + +/* Local headers. */ +#include "atomic.h" +#include "cell.h" +#include "clocks.h" +#include "cycle.h" +#include "debug.h" +#include "error.h" +#include "proxy.h" +#include "timers.h" + +/** + * @brief Add send tasks for the gravity pairs to a hierarchy of cells. + * + * @param e The #engine. + * @param ci The sending #cell. + * @param cj Dummy cell containing the nodeID of the receiving node. + * @param t_grav The send_grav #task, if it has already been created. + */ +void engine_addtasks_send_gravity(struct engine *e, struct cell *ci, + struct cell *cj, struct task *t_grav) { + +#ifdef WITH_MPI + struct link *l = NULL; + struct scheduler *s = &e->sched; + const int nodeID = cj->nodeID; + + /* Check if any of the gravity tasks are for the target node. */ + for (l = ci->grav.grav; l != NULL; l = l->next) + if (l->t->ci->nodeID == nodeID || + (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) + break; + + /* If so, attach send tasks. */ + if (l != NULL) { + + /* Create the tasks and their dependencies? */ + if (t_grav == NULL) { + + /* Make sure this cell is tagged. */ + cell_ensure_tagged(ci); + + t_grav = scheduler_addtask(s, task_type_send, task_subtype_gpart, + ci->mpi.tag, 0, ci, cj); + + /* The sends should unlock the down pass. */ + scheduler_addunlock(s, t_grav, ci->grav.super->grav.down); + + /* Drift before you send */ + scheduler_addunlock(s, ci->grav.super->grav.drift, t_grav); + } + + /* Add them to the local cell. */ + engine_addlink(e, &ci->mpi.grav.send, t_grav); + } + + /* Recurse? */ + if (ci->split) + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) + engine_addtasks_send_gravity(e, ci->progeny[k], cj, t_grav); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Add send tasks for the hydro pairs to a hierarchy of cells. + * + * @param e The #engine. + * @param ci The sending #cell. + * @param cj Dummy cell containing the nodeID of the receiving node. + * @param t_xv The send_xv #task, if it has already been created. + * @param t_rho The send_rho #task, if it has already been created. + * @param t_gradient The send_gradient #task, if already created. + */ +void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, + struct cell *cj, struct task *t_xv, + struct task *t_rho, struct task *t_gradient) { + +#ifdef WITH_MPI + struct link *l = NULL; + struct scheduler *s = &e->sched; + const int nodeID = cj->nodeID; + + /* Check if any of the density tasks are for the target node. */ + for (l = ci->hydro.density; l != NULL; l = l->next) + if (l->t->ci->nodeID == nodeID || + (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) + break; + + /* If so, attach send tasks. */ + if (l != NULL) { + + /* Create the tasks and their dependencies? */ + if (t_xv == NULL) { + + /* Make sure this cell is tagged. */ + cell_ensure_tagged(ci); + + t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, ci->mpi.tag, + 0, ci, cj); + t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho, + ci->mpi.tag, 0, ci, cj); +#ifdef EXTRA_HYDRO_LOOP + t_gradient = scheduler_addtask(s, task_type_send, task_subtype_gradient, + ci->mpi.tag, 0, ci, cj); +#endif + +#ifdef EXTRA_HYDRO_LOOP + + scheduler_addunlock(s, t_gradient, ci->super->kick2); + + scheduler_addunlock(s, ci->hydro.super->hydro.extra_ghost, t_gradient); + + /* The send_rho task should unlock the super_hydro-cell's extra_ghost + * task. */ + scheduler_addunlock(s, t_rho, ci->hydro.super->hydro.extra_ghost); + + /* The send_rho task depends on the cell's ghost task. */ + scheduler_addunlock(s, ci->hydro.super->hydro.ghost_out, t_rho); + + /* The send_xv task should unlock the super_hydro-cell's ghost task. */ + scheduler_addunlock(s, t_xv, ci->hydro.super->hydro.ghost_in); + +#else + /* The send_rho task should unlock the super_hydro-cell's kick task. */ + scheduler_addunlock(s, t_rho, ci->super->end_force); + + /* The send_rho task depends on the cell's ghost task. */ + scheduler_addunlock(s, ci->hydro.super->hydro.ghost_out, t_rho); + + /* The send_xv task should unlock the super_hydro-cell's ghost task. */ + scheduler_addunlock(s, t_xv, ci->hydro.super->hydro.ghost_in); + +#endif + + /* Drift before you send */ + scheduler_addunlock(s, ci->hydro.super->hydro.drift, t_xv); + } + + /* Add them to the local cell. */ + engine_addlink(e, &ci->mpi.hydro.send_xv, t_xv); + engine_addlink(e, &ci->mpi.hydro.send_rho, t_rho); +#ifdef EXTRA_HYDRO_LOOP + engine_addlink(e, &ci->mpi.hydro.send_gradient, t_gradient); +#endif + } + + /* Recurse? */ + if (ci->split) + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) + engine_addtasks_send_hydro(e, ci->progeny[k], cj, t_xv, t_rho, + t_gradient); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Add send tasks for the time-step to a hierarchy of cells. + * + * @param e The #engine. + * @param ci The sending #cell. + * @param cj Dummy cell containing the nodeID of the receiving node. + * @param t_ti The send_ti #task, if it has already been created. + */ +void engine_addtasks_send_timestep(struct engine *e, struct cell *ci, + struct cell *cj, struct task *t_ti) { + +#ifdef WITH_MPI + struct link *l = NULL; + struct scheduler *s = &e->sched; + const int nodeID = cj->nodeID; + + /* Check if any of the gravity tasks are for the target node. */ + for (l = ci->grav.grav; l != NULL; l = l->next) + if (l->t->ci->nodeID == nodeID || + (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) + break; + + /* Check whether instead any of the hydro tasks are for the target node. */ + if (l == NULL) + for (l = ci->hydro.density; l != NULL; l = l->next) + if (l->t->ci->nodeID == nodeID || + (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) + break; + + /* If found anything, attach send tasks. */ + if (l != NULL) { + + /* Create the tasks and their dependencies? */ + if (t_ti == NULL) { + + /* Make sure this cell is tagged. */ + cell_ensure_tagged(ci); + + t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, + ci->mpi.tag, 0, ci, cj); + + /* The super-cell's timestep task should unlock the send_ti task. */ + scheduler_addunlock(s, ci->super->timestep, t_ti); + } + + /* Add them to the local cell. */ + engine_addlink(e, &ci->mpi.send_ti, t_ti); + } + + /* Recurse? */ + if (ci->split) + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) + engine_addtasks_send_timestep(e, ci->progeny[k], cj, t_ti); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Add recv tasks for hydro pairs to a hierarchy of cells. + * + * @param e The #engine. + * @param c The foreign #cell. + * @param t_xv The recv_xv #task, if it has already been created. + * @param t_rho The recv_rho #task, if it has already been created. + * @param t_gradient The recv_gradient #task, if it has already been created. + */ +void engine_addtasks_recv_hydro(struct engine *e, struct cell *c, + struct task *t_xv, struct task *t_rho, + struct task *t_gradient) { + +#ifdef WITH_MPI + struct scheduler *s = &e->sched; + + /* Have we reached a level where there are any hydro tasks ? */ + if (t_xv == NULL && c->hydro.density != NULL) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure this cell has a valid tag. */ + if (c->mpi.tag < 0) error("Trying to receive from untagged cell."); +#endif // SWIFT_DEBUG_CHECKS + + /* Create the tasks. */ + t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, c->mpi.tag, 0, + c, NULL); + t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho, c->mpi.tag, + 0, c, NULL); +#ifdef EXTRA_HYDRO_LOOP + t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_gradient, + c->mpi.tag, 0, c, NULL); +#endif + } + + c->mpi.hydro.recv_xv = t_xv; + c->mpi.hydro.recv_rho = t_rho; + c->mpi.hydro.recv_gradient = t_gradient; + + /* Add dependencies. */ + if (c->hydro.sorts != NULL) scheduler_addunlock(s, t_xv, c->hydro.sorts); + + for (struct link *l = c->hydro.density; l != NULL; l = l->next) { + scheduler_addunlock(s, t_xv, l->t); + scheduler_addunlock(s, l->t, t_rho); + } +#ifdef EXTRA_HYDRO_LOOP + for (struct link *l = c->hydro.gradient; l != NULL; l = l->next) { + scheduler_addunlock(s, t_rho, l->t); + scheduler_addunlock(s, l->t, t_gradient); + } + for (struct link *l = c->hydro.force; l != NULL; l = l->next) + scheduler_addunlock(s, t_gradient, l->t); +#else + for (struct link *l = c->hydro.force; l != NULL; l = l->next) + scheduler_addunlock(s, t_rho, l->t); +#endif + + /* Recurse? */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_addtasks_recv_hydro(e, c->progeny[k], t_xv, t_rho, t_gradient); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Add recv tasks for gravity pairs to a hierarchy of cells. + * + * @param e The #engine. + * @param c The foreign #cell. + * @param t_grav The recv_gpart #task, if it has already been created. + */ +void engine_addtasks_recv_gravity(struct engine *e, struct cell *c, + struct task *t_grav) { + +#ifdef WITH_MPI + struct scheduler *s = &e->sched; + + /* Have we reached a level where there are any gravity tasks ? */ + if (t_grav == NULL && c->grav.grav != NULL) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure this cell has a valid tag. */ + if (c->mpi.tag < 0) error("Trying to receive from untagged cell."); +#endif // SWIFT_DEBUG_CHECKS + + /* Create the tasks. */ + t_grav = scheduler_addtask(s, task_type_recv, task_subtype_gpart, + c->mpi.tag, 0, c, NULL); + } + + c->mpi.grav.recv = t_grav; + + for (struct link *l = c->grav.grav; l != NULL; l = l->next) + scheduler_addunlock(s, t_grav, l->t); + + /* Recurse? */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_addtasks_recv_gravity(e, c->progeny[k], t_grav); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Add recv tasks for gravity pairs to a hierarchy of cells. + * + * @param e The #engine. + * @param c The foreign #cell. + * @param t_ti The recv_ti #task, if already been created. + */ +void engine_addtasks_recv_timestep(struct engine *e, struct cell *c, + struct task *t_ti) { + +#ifdef WITH_MPI + struct scheduler *s = &e->sched; + + /* Have we reached a level where there are any self/pair tasks ? */ + if (t_ti == NULL && (c->grav.grav != NULL || c->hydro.density != NULL)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure this cell has a valid tag. */ + if (c->mpi.tag < 0) error("Trying to receive from untagged cell."); +#endif // SWIFT_DEBUG_CHECKS + + t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, c->mpi.tag, + 0, c, NULL); + } + + c->mpi.recv_ti = t_ti; + + for (struct link *l = c->grav.grav; l != NULL; l = l->next) + scheduler_addunlock(s, l->t, t_ti); + + for (struct link *l = c->hydro.force; l != NULL; l = l->next) + scheduler_addunlock(s, l->t, t_ti); + + /* Recurse? */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_addtasks_recv_timestep(e, c->progeny[k], t_ti); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - + * i.e. all the O(Npart) tasks -- timestep version + * + * Tasks are only created here. The dependencies will be added later on. + * + * Note that there is no need to recurse below the super-cell. Note also + * that we only add tasks if the relevant particles are present in the cell. + * + * @param e The #engine. + * @param c The #cell. + */ +void engine_make_hierarchical_tasks_common(struct engine *e, struct cell *c) { + + struct scheduler *s = &e->sched; + const int is_with_cooling = (e->policy & engine_policy_cooling); + const int is_with_star_formation = (e->policy & engine_policy_star_formation); + + /* Are we in a super-cell ? */ + if (c->super == c) { + + /* Local tasks only... */ + if (c->nodeID == e->nodeID) { + + /* Add the two half kicks */ + c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0, + c, NULL); + +#if defined(WITH_LOGGER) + c->logger = scheduler_addtask(s, task_type_logger, task_subtype_none, 0, + 0, c, NULL); +#endif + + c->kick2 = scheduler_addtask(s, task_type_kick2, task_subtype_none, 0, 0, + c, NULL); + + /* Add the time-step calculation task and its dependency */ + c->timestep = scheduler_addtask(s, task_type_timestep, task_subtype_none, + 0, 0, c, NULL); + + /* Add the task finishing the force calculation */ + c->end_force = scheduler_addtask(s, task_type_end_force, + task_subtype_none, 0, 0, c, NULL); + + /* Subgrid tasks */ + if (is_with_cooling) { + + c->hydro.cooling = scheduler_addtask(s, task_type_cooling, + task_subtype_none, 0, 0, c, NULL); + + scheduler_addunlock(s, c->end_force, c->hydro.cooling); + scheduler_addunlock(s, c->hydro.cooling, c->kick2); + + } else { + scheduler_addunlock(s, c->end_force, c->kick2); + } + + if (is_with_star_formation) { + + c->hydro.star_formation = scheduler_addtask( + s, task_type_star_formation, task_subtype_none, 0, 0, c, NULL); + + scheduler_addunlock(s, c->kick2, c->hydro.star_formation); + scheduler_addunlock(s, c->hydro.star_formation, c->timestep); + + } else { + scheduler_addunlock(s, c->kick2, c->timestep); + } + + scheduler_addunlock(s, c->timestep, c->kick1); + +#if defined(WITH_LOGGER) + scheduler_addunlock(s, c->kick1, c->logger); +#endif + } + } else { /* We are above the super-cell so need to go deeper */ + + /* Recurse. */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_make_hierarchical_tasks_common(e, c->progeny[k]); + } +} + +/** + * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - + * i.e. all the O(Npart) tasks -- gravity version + * + * Tasks are only created here. The dependencies will be added later on. + * + * Note that there is no need to recurse below the super-cell. Note also + * that we only add tasks if the relevant particles are present in the cell. + * + * @param e The #engine. + * @param c The #cell. + */ +void engine_make_hierarchical_tasks_gravity(struct engine *e, struct cell *c) { + + struct scheduler *s = &e->sched; + const int periodic = e->s->periodic; + const int is_self_gravity = (e->policy & engine_policy_self_gravity); + + /* Are we in a super-cell ? */ + if (c->grav.super == c) { + + /* Local tasks only... */ + if (c->nodeID == e->nodeID) { + + c->grav.drift = scheduler_addtask(s, task_type_drift_gpart, + task_subtype_none, 0, 0, c, NULL); + + if (is_self_gravity) { + + /* Initialisation of the multipoles */ + c->grav.init = scheduler_addtask(s, task_type_init_grav, + task_subtype_none, 0, 0, c, NULL); + + /* Gravity non-neighbouring pm calculations */ + c->grav.long_range = scheduler_addtask( + s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL); + + /* Gravity recursive down-pass */ + c->grav.down = scheduler_addtask(s, task_type_grav_down, + task_subtype_none, 0, 0, c, NULL); + + /* Implicit tasks for the up and down passes */ + c->grav.drift_out = scheduler_addtask(s, task_type_drift_gpart_out, + task_subtype_none, 0, 1, c, NULL); + c->grav.init_out = scheduler_addtask(s, task_type_init_grav_out, + task_subtype_none, 0, 1, c, NULL); + c->grav.down_in = scheduler_addtask(s, task_type_grav_down_in, + task_subtype_none, 0, 1, c, NULL); + + /* Gravity mesh force propagation */ + if (periodic) + c->grav.mesh = scheduler_addtask(s, task_type_grav_mesh, + task_subtype_none, 0, 0, c, NULL); + + if (periodic) scheduler_addunlock(s, c->grav.drift, c->grav.mesh); + if (periodic) scheduler_addunlock(s, c->grav.mesh, c->grav.down); + scheduler_addunlock(s, c->grav.init, c->grav.long_range); + scheduler_addunlock(s, c->grav.long_range, c->grav.down); + scheduler_addunlock(s, c->grav.down, c->super->end_force); + + /* Link in the implicit tasks */ + scheduler_addunlock(s, c->grav.init, c->grav.init_out); + scheduler_addunlock(s, c->grav.drift, c->grav.drift_out); + scheduler_addunlock(s, c->grav.down_in, c->grav.down); + } + } + } + + /* We are below the super-cell but not below the maximal splitting depth */ + else if ((c->grav.super != NULL) && + ((c->maxdepth - c->depth) >= space_subdepth_diff_grav)) { + + /* Local tasks only... */ + if (c->nodeID == e->nodeID) { + + if (is_self_gravity) { + + c->grav.drift_out = scheduler_addtask(s, task_type_drift_gpart_out, + task_subtype_none, 0, 1, c, NULL); + + c->grav.init_out = scheduler_addtask(s, task_type_init_grav_out, + task_subtype_none, 0, 1, c, NULL); + + c->grav.down_in = scheduler_addtask(s, task_type_grav_down_in, + task_subtype_none, 0, 1, c, NULL); + + scheduler_addunlock(s, c->parent->grav.init_out, c->grav.init_out); + scheduler_addunlock(s, c->parent->grav.drift_out, c->grav.drift_out); + scheduler_addunlock(s, c->grav.down_in, c->parent->grav.down_in); + } + } + } + + /* Recurse but not below the maximal splitting depth */ + if (c->split && ((c->maxdepth - c->depth) >= space_subdepth_diff_grav)) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_make_hierarchical_tasks_gravity(e, c->progeny[k]); +} + +/** + * @brief Recursively add non-implicit star ghost tasks to a cell hierarchy. + */ +void engine_add_stars_ghosts(struct engine *e, struct cell *c, + struct task *stars_ghost_in, + struct task *stars_ghost_out) { + + /* If we have reached the leaf OR have to few particles to play with*/ + if (!c->split || c->stars.count < engine_max_sparts_per_ghost) { + + /* Add the ghost task and its dependencies */ + struct scheduler *s = &e->sched; + c->stars.ghost = scheduler_addtask(s, task_type_stars_ghost, + task_subtype_none, 0, 0, c, NULL); + scheduler_addunlock(s, stars_ghost_in, c->stars.ghost); + scheduler_addunlock(s, c->stars.ghost, stars_ghost_out); + } else { + /* Keep recursing */ + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_add_stars_ghosts(e, c->progeny[k], stars_ghost_in, + stars_ghost_out); + } +} + +/** + * @brief Recursively add non-implicit ghost tasks to a cell hierarchy. + */ +void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in, + struct task *ghost_out) { + + /* If we have reached the leaf OR have to few particles to play with*/ + if (!c->split || c->hydro.count < engine_max_parts_per_ghost) { + + /* Add the ghost task and its dependencies */ + struct scheduler *s = &e->sched; + c->hydro.ghost = + scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0, c, NULL); + scheduler_addunlock(s, ghost_in, c->hydro.ghost); + scheduler_addunlock(s, c->hydro.ghost, ghost_out); + } else { + /* Keep recursing */ + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_add_ghosts(e, c->progeny[k], ghost_in, ghost_out); + } +} + +/** + * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - + * i.e. all the O(Npart) tasks -- hydro version + * + * Tasks are only created here. The dependencies will be added later on. + * + * Note that there is no need to recurse below the super-cell. Note also + * that we only add tasks if the relevant particles are present in the cell. + * + * @param e The #engine. + * @param c The #cell. + */ +void engine_make_hierarchical_tasks_hydro(struct engine *e, struct cell *c) { + + struct scheduler *s = &e->sched; + const int is_with_sourceterms = (e->policy & engine_policy_sourceterms); + + /* Are we in a super-cell ? */ + if (c->hydro.super == c) { + + /* Add the sort task. */ + c->hydro.sorts = + scheduler_addtask(s, task_type_sort, task_subtype_none, 0, 0, c, NULL); + + /* Local tasks only... */ + if (c->nodeID == e->nodeID) { + + /* Add the drift task. */ + c->hydro.drift = scheduler_addtask(s, task_type_drift_part, + task_subtype_none, 0, 0, c, NULL); + + /* Generate the ghost tasks. */ + c->hydro.ghost_in = + scheduler_addtask(s, task_type_ghost_in, task_subtype_none, 0, + /* implicit = */ 1, c, NULL); + c->hydro.ghost_out = + scheduler_addtask(s, task_type_ghost_out, task_subtype_none, 0, + /* implicit = */ 1, c, NULL); + engine_add_ghosts(e, c, c->hydro.ghost_in, c->hydro.ghost_out); + +#ifdef EXTRA_HYDRO_LOOP + /* Generate the extra ghost task. */ + c->hydro.extra_ghost = scheduler_addtask( + s, task_type_extra_ghost, task_subtype_none, 0, 0, c, NULL); +#endif + + /* add source terms */ + if (is_with_sourceterms) { + c->sourceterms = scheduler_addtask(s, task_type_sourceterms, + task_subtype_none, 0, 0, c, NULL); + } + } + + } else { /* We are above the super-cell so need to go deeper */ + + /* Recurse. */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_make_hierarchical_tasks_hydro(e, c->progeny[k]); + } +} + +/** + * @brief Generate the stars hierarchical tasks for a hierarchy of cells - + * i.e. all the O(Npart) tasks -- star version + * + * Tasks are only created here. The dependencies will be added later on. + * + * Note that there is no need to recurse below the super-cell. Note also + * that we only add tasks if the relevant particles are present in the cell. + * + * @param e The #engine. + * @param c The #cell. + */ +void engine_make_hierarchical_tasks_stars(struct engine *e, struct cell *c) { + + struct scheduler *s = &e->sched; + + /* Are we in a super-cell ? */ + if (c->super == c) { + + /* Add the sort task. */ + c->stars.sorts = scheduler_addtask(s, task_type_stars_sort, + task_subtype_none, 0, 0, c, NULL); + + /* Local tasks only... */ + if (c->nodeID == e->nodeID) { + + /* Generate the ghost tasks. */ + c->stars.ghost_in = + scheduler_addtask(s, task_type_stars_ghost_in, task_subtype_none, 0, + /* implicit = */ 1, c, NULL); + c->stars.ghost_out = + scheduler_addtask(s, task_type_stars_ghost_out, task_subtype_none, 0, + /* implicit = */ 1, c, NULL); + engine_add_stars_ghosts(e, c, c->stars.ghost_in, c->stars.ghost_out); + } + } else { /* We are above the super-cell so need to go deeper */ + + /* Recurse. */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_make_hierarchical_tasks_stars(e, c->progeny[k]); + } +} + +/** + * @brief Constructs the top-level tasks for the short-range gravity + * and long-range gravity interactions. + * + * - All top-cells get a self task. + * - All pairs within range according to the multipole acceptance + * criterion get a pair task. + */ +void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct space *s = e->s; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + const int periodic = s->periodic; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; + struct cell *cells = s->cells_top; + const double theta_crit = e->gravity_properties->theta_crit; + const double max_distance = e->mesh->r_cut_max; + const double max_distance2 = max_distance * max_distance; + + /* Compute how many cells away we need to walk */ + const double distance = 2.5 * cells[0].width[0] / theta_crit; + int delta = (int)(distance / cells[0].width[0]) + 1; + int delta_m = delta; + int delta_p = delta; + + /* Special case where every cell is in range of every other one */ + if (delta >= cdim[0] / 2) { + if (cdim[0] % 2 == 0) { + delta_m = cdim[0] / 2; + delta_p = cdim[0] / 2 - 1; + } else { + delta_m = cdim[0] / 2; + delta_p = cdim[0] / 2; + } + } + + /* Loop through the elements, which are just byte offsets from NULL. */ + for (int ind = 0; ind < num_elements; ind++) { + + /* Get the cell index. */ + const int cid = (size_t)(map_data) + ind; + + /* Integer indices of the cell in the top-level grid */ + const int i = cid / (cdim[1] * cdim[2]); + const int j = (cid / cdim[2]) % cdim[1]; + const int k = cid % cdim[2]; + + /* Get the cell */ + struct cell *ci = &cells[cid]; + + /* Skip cells without gravity particles */ + if (ci->grav.count == 0) continue; + + /* If the cell is local build a self-interaction */ + if (ci->nodeID == nodeID) { + scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, + NULL); + } + + /* Loop over every other cell within (Manhattan) range delta */ + for (int ii = -delta_m; ii <= delta_p; ii++) { + int iii = i + ii; + if (!periodic && (iii < 0 || iii >= cdim[0])) continue; + iii = (iii + cdim[0]) % cdim[0]; + for (int jj = -delta_m; jj <= delta_p; jj++) { + int jjj = j + jj; + if (!periodic && (jjj < 0 || jjj >= cdim[1])) continue; + jjj = (jjj + cdim[1]) % cdim[1]; + for (int kk = -delta_m; kk <= delta_p; kk++) { + int kkk = k + kk; + if (!periodic && (kkk < 0 || kkk >= cdim[2])) continue; + kkk = (kkk + cdim[2]) % cdim[2]; + + /* Get the cell */ + const int cjd = cell_getid(cdim, iii, jjj, kkk); + struct cell *cj = &cells[cjd]; + + /* Avoid duplicates, empty cells and completely foreign pairs */ + if (cid >= cjd || cj->grav.count == 0 || + (ci->nodeID != nodeID && cj->nodeID != nodeID)) + continue; + + /* Recover the multipole information */ + const struct gravity_tensors *multi_i = ci->grav.multipole; + const struct gravity_tensors *multi_j = cj->grav.multipole; + + if (multi_i == NULL && ci->nodeID != nodeID) + error("Multipole of ci was not exchanged properly via the proxies"); + if (multi_j == NULL && cj->nodeID != nodeID) + error("Multipole of cj was not exchanged properly via the proxies"); + + /* Minimal distance between any pair of particles */ + const double min_radius2 = + cell_min_dist2_same_size(ci, cj, periodic, dim); + + /* Are we beyond the distance where the truncated forces are 0 ?*/ + if (periodic && min_radius2 > max_distance2) continue; + + /* Are the cells too close for a MM interaction ? */ + if (!cell_can_use_pair_mm_rebuild(ci, cj, e, s)) { + + /* Ok, we need to add a direct pair calculation */ + scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, + ci, cj); + +#ifdef SWIFT_DEBUG_CHECKS +#ifdef WITH_MPI + + /* Let's cross-check that we had a proxy for that cell */ + if (ci->nodeID == nodeID && cj->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[cj->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", cj->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (; n < p->nr_cells_in; n++) + if (p->cells_in[n] == cj) { + break; + } + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "grav task!", + cjd); + } else if (cj->nodeID == nodeID && ci->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[ci->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", ci->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (; n < p->nr_cells_in; n++) + if (p->cells_in[n] == ci) { + break; + } + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "grav task!", + cid); + } +#endif /* WITH_MPI */ +#endif /* SWIFT_DEBUG_CHECKS */ + } + } + } + } + } +} + +void engine_make_hierarchical_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + struct engine *e = (struct engine *)extra_data; + const int is_with_hydro = (e->policy & engine_policy_hydro); + const int is_with_self_gravity = (e->policy & engine_policy_self_gravity); + const int is_with_external_gravity = + (e->policy & engine_policy_external_gravity); + const int is_with_feedback = (e->policy & engine_policy_feedback); + + for (int ind = 0; ind < num_elements; ind++) { + struct cell *c = &((struct cell *)map_data)[ind]; + /* Make the common tasks (time integration) */ + engine_make_hierarchical_tasks_common(e, c); + /* Add the hydro stuff */ + if (is_with_hydro) engine_make_hierarchical_tasks_hydro(e, c); + /* And the gravity stuff */ + if (is_with_self_gravity || is_with_external_gravity) + engine_make_hierarchical_tasks_gravity(e, c); + if (is_with_feedback) engine_make_hierarchical_tasks_stars(e, c); + } +} + +/** + * @brief Constructs the top-level tasks for the external gravity. + * + * @param e The #engine. + */ +void engine_make_external_gravity_tasks(struct engine *e) { + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + struct cell *cells = s->cells_top; + const int nr_cells = s->nr_cells; + + for (int cid = 0; cid < nr_cells; ++cid) { + + struct cell *ci = &cells[cid]; + + /* Skip cells without gravity particles */ + if (ci->grav.count == 0) continue; + + /* Is that neighbour local ? */ + if (ci->nodeID != nodeID) continue; + + /* If the cell is local, build a self-interaction */ + scheduler_addtask(sched, task_type_self, task_subtype_external_grav, 0, 0, + ci, NULL); + } +} + +/** + * @brief Counts the tasks associated with one cell and constructs the links + * + * For each hydrodynamic and gravity task, construct the links with + * the corresponding cell. Similarly, construct the dependencies for + * all the sorting tasks. + */ +void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct scheduler *const sched = &e->sched; + + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &((struct task *)map_data)[ind]; + + struct cell *ci = t->ci; + struct cell *cj = t->cj; + const enum task_types t_type = t->type; + const enum task_subtypes t_subtype = t->subtype; + + /* Link sort tasks to all the higher sort task. */ + if (t_type == task_type_sort) { + for (struct cell *finger = t->ci->parent; finger != NULL; + finger = finger->parent) + if (finger->hydro.sorts != NULL) + scheduler_addunlock(sched, t, finger->hydro.sorts); + } + + /* Link stars sort tasks to all the higher sort task. */ + if (t_type == task_type_stars_sort) { + for (struct cell *finger = t->ci->parent; finger != NULL; + finger = finger->parent) + if (finger->stars.sorts != NULL) + scheduler_addunlock(sched, t, finger->stars.sorts); + } + + /* Link self tasks to cells. */ + else if (t_type == task_type_self) { + atomic_inc(&ci->nr_tasks); + + if (t_subtype == task_subtype_density) { + engine_addlink(e, &ci->hydro.density, t); + } else if (t_subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav.grav, t); + } else if (t_subtype == task_subtype_external_grav) { + engine_addlink(e, &ci->grav.grav, t); + } else if (t->subtype == task_subtype_stars_density) { + engine_addlink(e, &ci->stars.density, t); + } + + /* Link pair tasks to cells. */ + } else if (t_type == task_type_pair) { + atomic_inc(&ci->nr_tasks); + atomic_inc(&cj->nr_tasks); + + if (t_subtype == task_subtype_density) { + engine_addlink(e, &ci->hydro.density, t); + engine_addlink(e, &cj->hydro.density, t); + } else if (t_subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav.grav, t); + engine_addlink(e, &cj->grav.grav, t); + } else if (t->subtype == task_subtype_stars_density) { + engine_addlink(e, &ci->stars.density, t); + engine_addlink(e, &cj->stars.density, t); + } +#ifdef SWIFT_DEBUG_CHECKS + else if (t_subtype == task_subtype_external_grav) { + error("Found a pair/external-gravity task..."); + } +#endif + + /* Link sub-self tasks to cells. */ + } else if (t_type == task_type_sub_self) { + atomic_inc(&ci->nr_tasks); + + if (t_subtype == task_subtype_density) { + engine_addlink(e, &ci->hydro.density, t); + } else if (t_subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav.grav, t); + } else if (t_subtype == task_subtype_external_grav) { + engine_addlink(e, &ci->grav.grav, t); + } else if (t->subtype == task_subtype_stars_density) { + engine_addlink(e, &ci->stars.density, t); + } + + /* Link sub-pair tasks to cells. */ + } else if (t_type == task_type_sub_pair) { + atomic_inc(&ci->nr_tasks); + atomic_inc(&cj->nr_tasks); + + if (t_subtype == task_subtype_density) { + engine_addlink(e, &ci->hydro.density, t); + engine_addlink(e, &cj->hydro.density, t); + } else if (t_subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav.grav, t); + engine_addlink(e, &cj->grav.grav, t); + } else if (t->subtype == task_subtype_stars_density) { + engine_addlink(e, &ci->stars.density, t); + engine_addlink(e, &cj->stars.density, t); + } +#ifdef SWIFT_DEBUG_CHECKS + else if (t_subtype == task_subtype_external_grav) { + error("Found a sub-pair/external-gravity task..."); + } +#endif + + /* Multipole-multipole interaction of progenies */ + } else if (t_type == task_type_grav_mm) { + + atomic_inc(&ci->grav.nr_mm_tasks); + atomic_inc(&cj->grav.nr_mm_tasks); + engine_addlink(e, &ci->grav.mm, t); + engine_addlink(e, &cj->grav.mm, t); + } + } +} + +/** + * @brief Creates all the task dependencies for the gravity + * + * @param e The #engine + */ +void engine_link_gravity_tasks(struct engine *e) { + + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + const int nr_tasks = sched->nr_tasks; + + for (int k = 0; k < nr_tasks; k++) { + + /* Get a pointer to the task. */ + struct task *t = &sched->tasks[k]; + + if (t->type == task_type_none) continue; + + /* Get the cells we act on */ + struct cell *ci = t->ci; + struct cell *cj = t->cj; + const enum task_types t_type = t->type; + const enum task_subtypes t_subtype = t->subtype; + + /* Pointers to the parent cells for tasks going up and down the tree + * In the case where we are at the super-level we don't + * want the parent as no tasks are defined above that level. */ + struct cell *ci_parent, *cj_parent; + if (ci->parent != NULL && ci->grav.super != ci) + ci_parent = ci->parent; + else + ci_parent = ci; + + if (cj != NULL && cj->parent != NULL && cj->grav.super != cj) + cj_parent = cj->parent; + else + cj_parent = cj; + +/* Node ID (if running with MPI) */ +#ifdef WITH_MPI + const int ci_nodeID = ci->nodeID; + const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; +#else + const int ci_nodeID = nodeID; + const int cj_nodeID = nodeID; +#endif + + /* Self-interaction for self-gravity? */ + if (t_type == task_type_self && t_subtype == task_subtype_grav) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci_nodeID != nodeID) error("Non-local self task"); +#endif + + /* drift ---+-> gravity --> grav_down */ + /* init --/ */ + scheduler_addunlock(sched, ci_parent->grav.drift_out, t); + scheduler_addunlock(sched, ci_parent->grav.init_out, t); + scheduler_addunlock(sched, t, ci_parent->grav.down_in); + } + + /* Self-interaction for external gravity ? */ + if (t_type == task_type_self && t_subtype == task_subtype_external_grav) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci_nodeID != nodeID) error("Non-local self task"); +#endif + + /* drift -----> gravity --> end_force */ + scheduler_addunlock(sched, ci->grav.super->grav.drift, t); + scheduler_addunlock(sched, t, ci->super->end_force); + } + + /* Otherwise, pair interaction? */ + else if (t_type == task_type_pair && t_subtype == task_subtype_grav) { + + if (ci_nodeID == nodeID) { + + /* drift ---+-> gravity --> grav_down */ + /* init --/ */ + scheduler_addunlock(sched, ci_parent->grav.drift_out, t); + scheduler_addunlock(sched, ci_parent->grav.init_out, t); + scheduler_addunlock(sched, t, ci_parent->grav.down_in); + } + if (cj_nodeID == nodeID) { + + /* drift ---+-> gravity --> grav_down */ + /* init --/ */ + if (ci_parent != cj_parent) { /* Avoid double unlock */ + scheduler_addunlock(sched, cj_parent->grav.drift_out, t); + scheduler_addunlock(sched, cj_parent->grav.init_out, t); + scheduler_addunlock(sched, t, cj_parent->grav.down_in); + } + } + } + + /* Otherwise, sub-self interaction? */ + else if (t_type == task_type_sub_self && t_subtype == task_subtype_grav) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci_nodeID != nodeID) error("Non-local sub-self task"); +#endif + /* drift ---+-> gravity --> grav_down */ + /* init --/ */ + scheduler_addunlock(sched, ci_parent->grav.drift_out, t); + scheduler_addunlock(sched, ci_parent->grav.init_out, t); + scheduler_addunlock(sched, t, ci_parent->grav.down_in); + } + + /* Sub-self-interaction for external gravity ? */ + else if (t_type == task_type_sub_self && + t_subtype == task_subtype_external_grav) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci_nodeID != nodeID) error("Non-local sub-self task"); +#endif + + /* drift -----> gravity --> end_force */ + scheduler_addunlock(sched, ci->grav.super->grav.drift, t); + scheduler_addunlock(sched, t, ci->super->end_force); + } + + /* Otherwise, sub-pair interaction? */ + else if (t_type == task_type_sub_pair && t_subtype == task_subtype_grav) { + + if (ci_nodeID == nodeID) { + + /* drift ---+-> gravity --> grav_down */ + /* init --/ */ + scheduler_addunlock(sched, ci_parent->grav.drift_out, t); + scheduler_addunlock(sched, ci_parent->grav.init_out, t); + scheduler_addunlock(sched, t, ci_parent->grav.down_in); + } + if (cj_nodeID == nodeID) { + + /* drift ---+-> gravity --> grav_down */ + /* init --/ */ + if (ci_parent != cj_parent) { /* Avoid double unlock */ + scheduler_addunlock(sched, cj_parent->grav.drift_out, t); + scheduler_addunlock(sched, cj_parent->grav.init_out, t); + scheduler_addunlock(sched, t, cj_parent->grav.down_in); + } + } + } + + /* Otherwise M-M interaction? */ + else if (t_type == task_type_grav_mm) { + + if (ci_nodeID == nodeID) { + + /* init -----> gravity --> grav_down */ + scheduler_addunlock(sched, ci_parent->grav.init_out, t); + scheduler_addunlock(sched, t, ci_parent->grav.down_in); + } + if (cj_nodeID == nodeID) { + + /* init -----> gravity --> grav_down */ + if (ci_parent != cj_parent) { /* Avoid double unlock */ + scheduler_addunlock(sched, cj_parent->grav.init_out, t); + scheduler_addunlock(sched, t, cj_parent->grav.down_in); + } + } + } + } +} + +#ifdef EXTRA_HYDRO_LOOP + +/** + * @brief Creates the dependency network for the hydro tasks of a given cell. + * + * @param sched The #scheduler. + * @param density The density task to link. + * @param gradient The gradient task to link. + * @param force The force task to link. + * @param c The cell. + * @param with_cooling Do we have a cooling task ? + */ +static inline void engine_make_hydro_loops_dependencies( + struct scheduler *sched, struct task *density, struct task *gradient, + struct task *force, struct cell *c, int with_cooling) { + + /* density loop --> ghost --> gradient loop --> extra_ghost */ + /* extra_ghost --> force loop */ + scheduler_addunlock(sched, density, c->hydro.super->hydro.ghost_in); + scheduler_addunlock(sched, c->hydro.super->hydro.ghost_out, gradient); + scheduler_addunlock(sched, gradient, c->hydro.super->hydro.extra_ghost); + scheduler_addunlock(sched, c->hydro.super->hydro.extra_ghost, force); +} + +#else + +/** + * @brief Creates the dependency network for the hydro tasks of a given cell. + * + * @param sched The #scheduler. + * @param density The density task to link. + * @param force The force task to link. + * @param c The cell. + * @param with_cooling Are we running with cooling switched on ? + */ +static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, + struct task *density, + struct task *force, + struct cell *c, + int with_cooling) { + /* density loop --> ghost --> force loop */ + scheduler_addunlock(sched, density, c->hydro.super->hydro.ghost_in); + scheduler_addunlock(sched, c->hydro.super->hydro.ghost_out, force); +} + +#endif +/** + * @brief Creates the dependency network for the stars tasks of a given cell. + * + * @param sched The #scheduler. + * @param density The density task to link. + * @param c The cell. + */ +static inline void engine_make_stars_loops_dependencies(struct scheduler *sched, + struct task *density, + struct cell *c) { + /* density loop --> ghost */ + scheduler_addunlock(sched, density, c->super->stars.ghost_in); +} + +/** + * @brief Duplicates the first hydro loop and construct all the + * dependencies for the hydro part + * + * This is done by looping over all the previously constructed tasks + * and adding another task involving the same cells but this time + * corresponding to the second hydro loop over neighbours. + * With all the relevant tasks for a given cell available, we construct + * all the dependencies for that cell. + */ +void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + const int with_cooling = (e->policy & engine_policy_cooling); + + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &((struct task *)map_data)[ind]; + + /* Sort tasks depend on the drift of the cell. */ + if (t->type == task_type_sort && t->ci->nodeID == engine_rank) { + scheduler_addunlock(sched, t->ci->hydro.super->hydro.drift, t); + } + + /* Self-interaction? */ + else if (t->type == task_type_self && t->subtype == task_subtype_density) { + + /* Make the self-density tasks depend on the drift only. */ + scheduler_addunlock(sched, t->ci->hydro.super->hydro.drift, t); + +#ifdef EXTRA_HYDRO_LOOP + /* Start by constructing the task for the second and third hydro loop. */ + struct task *t2 = scheduler_addtask( + sched, task_type_self, task_subtype_gradient, 0, 0, t->ci, NULL); + struct task *t3 = scheduler_addtask( + sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL); + + /* Add the link between the new loops and the cell */ + engine_addlink(e, &t->ci->hydro.gradient, t2); + engine_addlink(e, &t->ci->hydro.force, t3); + + /* Now, build all the dependencies for the hydro */ + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, + with_cooling); + scheduler_addunlock(sched, t3, t->ci->super->end_force); +#else + + /* Start by constructing the task for the second hydro loop */ + struct task *t2 = scheduler_addtask( + sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL); + + /* Add the link between the new loop and the cell */ + engine_addlink(e, &t->ci->hydro.force, t2); + + /* Now, build all the dependencies for the hydro */ + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); + scheduler_addunlock(sched, t2, t->ci->super->end_force); +#endif + } + + /* Otherwise, pair interaction? */ + else if (t->type == task_type_pair && t->subtype == task_subtype_density) { + + /* Make all density tasks depend on the drift and the sorts. */ + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->hydro.super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->hydro.super->hydro.sorts, t); + if (t->ci->hydro.super != t->cj->hydro.super) { + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->hydro.super->hydro.drift, t); + scheduler_addunlock(sched, t->cj->hydro.super->hydro.sorts, t); + } + +#ifdef EXTRA_HYDRO_LOOP + /* Start by constructing the task for the second and third hydro loop */ + struct task *t2 = scheduler_addtask( + sched, task_type_pair, task_subtype_gradient, 0, 0, t->ci, t->cj); + struct task *t3 = scheduler_addtask( + sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj); + + /* Add the link between the new loop and both cells */ + engine_addlink(e, &t->ci->hydro.gradient, t2); + engine_addlink(e, &t->cj->hydro.gradient, t2); + engine_addlink(e, &t->ci->hydro.force, t3); + engine_addlink(e, &t->cj->hydro.force, t3); + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super_hydro-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, + with_cooling); + scheduler_addunlock(sched, t3, t->ci->super->end_force); + } + if (t->cj->nodeID == nodeID) { + if (t->ci->hydro.super != t->cj->hydro.super) + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj, + with_cooling); + if (t->ci->super != t->cj->super) + scheduler_addunlock(sched, t3, t->cj->super->end_force); + } + +#else + + /* Start by constructing the task for the second hydro loop */ + struct task *t2 = scheduler_addtask( + sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj); + + /* Add the link between the new loop and both cells */ + engine_addlink(e, &t->ci->hydro.force, t2); + engine_addlink(e, &t->cj->hydro.force, t2); + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super_hydro-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); + scheduler_addunlock(sched, t2, t->ci->super->end_force); + } + if (t->cj->nodeID == nodeID) { + if (t->ci->hydro.super != t->cj->hydro.super) + engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, + with_cooling); + if (t->ci->super != t->cj->super) + scheduler_addunlock(sched, t2, t->cj->super->end_force); + } + +#endif + + } + + /* Otherwise, sub-self interaction? */ + else if (t->type == task_type_sub_self && + t->subtype == task_subtype_density) { + + /* Make all density tasks depend on the drift and sorts. */ + scheduler_addunlock(sched, t->ci->hydro.super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->hydro.super->hydro.sorts, t); + +#ifdef EXTRA_HYDRO_LOOP + + /* Start by constructing the task for the second and third hydro loop */ + struct task *t2 = + scheduler_addtask(sched, task_type_sub_self, task_subtype_gradient, + t->flags, 0, t->ci, t->cj); + struct task *t3 = + scheduler_addtask(sched, task_type_sub_self, task_subtype_force, + t->flags, 0, t->ci, t->cj); + + /* Add the link between the new loop and the cell */ + engine_addlink(e, &t->ci->hydro.gradient, t2); + engine_addlink(e, &t->ci->hydro.force, t3); + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super_hydro-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, + with_cooling); + scheduler_addunlock(sched, t3, t->ci->super->end_force); + } + +#else + /* Start by constructing the task for the second hydro loop */ + struct task *t2 = + scheduler_addtask(sched, task_type_sub_self, task_subtype_force, + t->flags, 0, t->ci, t->cj); + + /* Add the link between the new loop and the cell */ + engine_addlink(e, &t->ci->hydro.force, t2); + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super_hydro-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); + scheduler_addunlock(sched, t2, t->ci->super->end_force); + } +#endif + } + + /* Otherwise, sub-pair interaction? */ + else if (t->type == task_type_sub_pair && + t->subtype == task_subtype_density) { + + /* Make all density tasks depend on the drift. */ + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->hydro.super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->hydro.super->hydro.sorts, t); + if (t->ci->hydro.super != t->cj->hydro.super) { + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->hydro.super->hydro.drift, t); + scheduler_addunlock(sched, t->cj->hydro.super->hydro.sorts, t); + } + +#ifdef EXTRA_HYDRO_LOOP + + /* Start by constructing the task for the second and third hydro loop */ + struct task *t2 = + scheduler_addtask(sched, task_type_sub_pair, task_subtype_gradient, + t->flags, 0, t->ci, t->cj); + struct task *t3 = + scheduler_addtask(sched, task_type_sub_pair, task_subtype_force, + t->flags, 0, t->ci, t->cj); + + /* Add the link between the new loop and both cells */ + engine_addlink(e, &t->ci->hydro.gradient, t2); + engine_addlink(e, &t->cj->hydro.gradient, t2); + engine_addlink(e, &t->ci->hydro.force, t3); + engine_addlink(e, &t->cj->hydro.force, t3); + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super_hydro-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, + with_cooling); + scheduler_addunlock(sched, t3, t->ci->super->end_force); + } + if (t->cj->nodeID == nodeID) { + if (t->ci->hydro.super != t->cj->hydro.super) + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj, + with_cooling); + if (t->ci->super != t->cj->super) + scheduler_addunlock(sched, t3, t->cj->super->end_force); + } + +#else + /* Start by constructing the task for the second hydro loop */ + struct task *t2 = + scheduler_addtask(sched, task_type_sub_pair, task_subtype_force, + t->flags, 0, t->ci, t->cj); + + /* Add the link between the new loop and both cells */ + engine_addlink(e, &t->ci->hydro.force, t2); + engine_addlink(e, &t->cj->hydro.force, t2); + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super_hydro-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); + scheduler_addunlock(sched, t2, t->ci->super->end_force); + } + if (t->cj->nodeID == nodeID) { + if (t->ci->hydro.super != t->cj->hydro.super) + engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, + with_cooling); + if (t->ci->super != t->cj->super) + scheduler_addunlock(sched, t2, t->cj->super->end_force); + } +#endif + } + } +} + +/** + * @brief Creates all the task dependencies for the stars + * + * @param map_data The tasks + * @param num_elements number of tasks + * @param extra_data The #engine + */ +void engine_link_stars_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &((struct task *)map_data)[ind]; + + /* Sort tasks depend on the drift of the cell. */ + if (t->type == task_type_stars_sort && t->ci->nodeID == engine_rank) { + scheduler_addunlock(sched, t->ci->super->grav.drift, t); + } + + /* Self-interaction? */ + if (t->type == task_type_self && t->subtype == task_subtype_stars_density) { + + /* Make the self-density tasks depend on the drifts. */ + scheduler_addunlock(sched, t->ci->super->hydro.drift, t); + + scheduler_addunlock(sched, t->ci->super->grav.drift, t); + + /* Now, build all the dependencies for the stars */ + engine_make_stars_loops_dependencies(sched, t, t->ci); + if (t->ci == t->ci->super) + scheduler_addunlock(sched, t->ci->super->stars.ghost_out, + t->ci->super->end_force); + } + + /* Otherwise, pair interaction? */ + else if (t->type == task_type_pair && + t->subtype == task_subtype_stars_density) { + + /* Make all density tasks depend on the drift and the sorts. */ + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->hydro.drift, t); + scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); + + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->grav.drift, t); + scheduler_addunlock(sched, t->ci->super->stars.sorts, t); + + if (t->ci->super != t->cj->super) { + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->grav.drift, t); + scheduler_addunlock(sched, t->cj->super->stars.sorts, t); + } + + /* Now, build all the dependencies for the stars for the cells */ + /* that are local and are not descendant of the same super-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_stars_loops_dependencies(sched, t, t->ci); + } + if (t->cj->nodeID == nodeID) { + if (t->ci->super != t->cj->super) + engine_make_stars_loops_dependencies(sched, t, t->cj); + } + + } + + /* Otherwise, sub-self interaction? */ + else if (t->type == task_type_sub_self && + t->subtype == task_subtype_stars_density) { + + /* Make all density tasks depend on the drift and sorts. */ + scheduler_addunlock(sched, t->ci->super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + scheduler_addunlock(sched, t->ci->super->grav.drift, t); + scheduler_addunlock(sched, t->ci->super->stars.sorts, t); + + /* Now, build all the dependencies for the stars for the cells */ + /* that are local and are not descendant of the same super-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_stars_loops_dependencies(sched, t, t->ci); + } else + error("oo"); + } + + /* Otherwise, sub-pair interaction? */ + else if (t->type == task_type_sub_pair && + t->subtype == task_subtype_stars_density) { + + /* Make all density tasks depend on the drift. */ + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->hydro.drift, t); + scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); + + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->grav.drift, t); + scheduler_addunlock(sched, t->ci->super->stars.sorts, t); + + if (t->ci->super != t->cj->super) { + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->grav.drift, t); + scheduler_addunlock(sched, t->cj->super->stars.sorts, t); + } + + /* Now, build all the dependencies for the stars for the cells */ + /* that are local and are not descendant of the same super-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_stars_loops_dependencies(sched, t, t->ci); + } + if (t->cj->nodeID == nodeID) { + if (t->ci->super != t->cj->super) + engine_make_stars_loops_dependencies(sched, t, t->cj); + } + } + } +} + +/** + * @brief Constructs the top-level pair tasks for the star loop over + * neighbours + * + * Here we construct all the tasks for all possible neighbouring non-empty + * local cells in the hierarchy. No dependencies are being added thus far. + * Additional loop over neighbours can later be added by simply duplicating + * all the tasks created by this function. + * + * @param map_data Offset of first two indices disguised as a pointer. + * @param num_elements Number of cells to traverse. + * @param extra_data The #engine. + */ +void engine_make_starsloop_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + + /* Extract the engine pointer. */ + struct engine *e = (struct engine *)extra_data; + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + const int *cdim = s->cdim; + struct cell *cells = s->cells_top; + + /* Loop through the elements, which are just byte offsets from NULL. */ + for (int ind = 0; ind < num_elements; ind++) { + + /* Get the cell index. */ + const int cid = (size_t)(map_data) + ind; + const int i = cid / (cdim[1] * cdim[2]); + const int j = (cid / cdim[2]) % cdim[1]; + const int k = cid % cdim[2]; + + /* Get the cell */ + struct cell *ci = &cells[cid]; + + /* Skip cells without particles */ + if (ci->stars.count == 0 && ci->hydro.count == 0) continue; + + /* If the cells is local build a self-interaction */ + if (ci->nodeID == nodeID) + scheduler_addtask(sched, task_type_self, task_subtype_stars_density, 0, 0, + ci, NULL); + + /* Now loop over all the neighbours of this cell */ + for (int ii = -1; ii < 2; ii++) { + int iii = i + ii; + if (!s->periodic && (iii < 0 || iii >= cdim[0])) continue; + iii = (iii + cdim[0]) % cdim[0]; + for (int jj = -1; jj < 2; jj++) { + int jjj = j + jj; + if (!s->periodic && (jjj < 0 || jjj >= cdim[1])) continue; + jjj = (jjj + cdim[1]) % cdim[1]; + for (int kk = -1; kk < 2; kk++) { + int kkk = k + kk; + if (!s->periodic && (kkk < 0 || kkk >= cdim[2])) continue; + kkk = (kkk + cdim[2]) % cdim[2]; + + /* Get the neighbouring cell */ + const int cjd = cell_getid(cdim, iii, jjj, kkk); + struct cell *cj = &cells[cjd]; + + /* Is that neighbour local and does it have particles ? */ + if (cid >= cjd || (cj->stars.count == 0 && cj->hydro.count == 0) || + (ci->nodeID != nodeID && cj->nodeID != nodeID)) + continue; + + /* Construct the pair task */ + const int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; + scheduler_addtask(sched, task_type_pair, task_subtype_stars_density, + sid, 0, ci, cj); + } + } + } + } +} + +/** + * @brief Constructs the top-level pair tasks for the first hydro loop over + * neighbours + * + * Here we construct all the tasks for all possible neighbouring non-empty + * local cells in the hierarchy. No dependencies are being added thus far. + * Additional loop over neighbours can later be added by simply duplicating + * all the tasks created by this function. + * + * @param map_data Offset of first two indices disguised as a pointer. + * @param num_elements Number of cells to traverse. + * @param extra_data The #engine. + */ +void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + + /* Extract the engine pointer. */ + struct engine *e = (struct engine *)extra_data; + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + const int *cdim = s->cdim; + struct cell *cells = s->cells_top; + + /* Loop through the elements, which are just byte offsets from NULL. */ + for (int ind = 0; ind < num_elements; ind++) { + + /* Get the cell index. */ + const int cid = (size_t)(map_data) + ind; + + /* Integer indices of the cell in the top-level grid */ + const int i = cid / (cdim[1] * cdim[2]); + const int j = (cid / cdim[2]) % cdim[1]; + const int k = cid % cdim[2]; + + /* Get the cell */ + struct cell *ci = &cells[cid]; + + /* Skip cells without hydro particles */ + if (ci->hydro.count == 0) continue; + + /* If the cell is local build a self-interaction */ + if (ci->nodeID == nodeID) { + scheduler_addtask(sched, task_type_self, task_subtype_density, 0, 0, ci, + NULL); + } + + /* Now loop over all the neighbours of this cell */ + for (int ii = -1; ii < 2; ii++) { + int iii = i + ii; + if (!s->periodic && (iii < 0 || iii >= cdim[0])) continue; + iii = (iii + cdim[0]) % cdim[0]; + for (int jj = -1; jj < 2; jj++) { + int jjj = j + jj; + if (!s->periodic && (jjj < 0 || jjj >= cdim[1])) continue; + jjj = (jjj + cdim[1]) % cdim[1]; + for (int kk = -1; kk < 2; kk++) { + int kkk = k + kk; + if (!s->periodic && (kkk < 0 || kkk >= cdim[2])) continue; + kkk = (kkk + cdim[2]) % cdim[2]; + + /* Get the neighbouring cell */ + const int cjd = cell_getid(cdim, iii, jjj, kkk); + struct cell *cj = &cells[cjd]; + + /* Is that neighbour local and does it have particles ? */ + if (cid >= cjd || cj->hydro.count == 0 || + (ci->nodeID != nodeID && cj->nodeID != nodeID)) + continue; + + /* Construct the pair task */ + const int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; + scheduler_addtask(sched, task_type_pair, task_subtype_density, sid, 0, + ci, cj); + +#ifdef SWIFT_DEBUG_CHECKS +#ifdef WITH_MPI + + /* Let's cross-check that we had a proxy for that cell */ + if (ci->nodeID == nodeID && cj->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[cj->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", cj->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (n = 0; n < p->nr_cells_in; n++) + if (p->cells_in[n] == cj) break; + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "hydro task!", + cjd); + } else if (cj->nodeID == nodeID && ci->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[ci->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", ci->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (n = 0; n < p->nr_cells_in; n++) + if (p->cells_in[n] == ci) break; + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "hydro task!", + cid); + } +#endif /* WITH_MPI */ +#endif /* SWIFT_DEBUG_CHECKS */ + } + } + } + } +} + +struct cell_type_pair { + struct cell *ci, *cj; + int type; +}; + +void engine_addtasks_send_mapper(void *map_data, int num_elements, + void *extra_data) { + struct engine *e = (struct engine *)extra_data; + struct cell_type_pair *cell_type_pairs = (struct cell_type_pair *)map_data; + + for (int k = 0; k < num_elements; k++) { + struct cell *ci = cell_type_pairs[k].ci; + struct cell *cj = cell_type_pairs[k].cj; + const int type = cell_type_pairs[k].type; + + /* Add the send task for the particle timesteps. */ + engine_addtasks_send_timestep(e, ci, cj, NULL); + + /* Add the send tasks for the cells in the proxy that have a hydro + * connection. */ + if ((e->policy & engine_policy_hydro) && (type & proxy_cell_type_hydro)) + engine_addtasks_send_hydro(e, ci, cj, /*t_xv=*/NULL, + /*t_rho=*/NULL, /*t_gradient=*/NULL); + + /* Add the send tasks for the cells in the proxy that have a gravity + * connection. */ + if ((e->policy & engine_policy_self_gravity) && + (type & proxy_cell_type_gravity)) + engine_addtasks_send_gravity(e, ci, cj, NULL); + } +} + +void engine_addtasks_recv_mapper(void *map_data, int num_elements, + void *extra_data) { + struct engine *e = (struct engine *)extra_data; + struct cell_type_pair *cell_type_pairs = (struct cell_type_pair *)map_data; + + for (int k = 0; k < num_elements; k++) { + struct cell *ci = cell_type_pairs[k].ci; + const int type = cell_type_pairs[k].type; + + /* Add the recv task for the particle timesteps. */ + engine_addtasks_recv_timestep(e, ci, NULL); + + /* Add the recv tasks for the cells in the proxy that have a hydro + * connection. */ + if ((e->policy & engine_policy_hydro) && (type & proxy_cell_type_hydro)) + engine_addtasks_recv_hydro(e, ci, NULL, NULL, NULL); + + /* Add the recv tasks for the cells in the proxy that have a gravity + * connection. */ + if ((e->policy & engine_policy_self_gravity) && + (type & proxy_cell_type_gravity)) + engine_addtasks_recv_gravity(e, ci, NULL); + } +} + +/** + * @brief Fill the #space's task list. + * + * @param e The #engine we are working with. + */ +void engine_maketasks(struct engine *e) { + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + struct cell *cells = s->cells_top; + const int nr_cells = s->nr_cells; + const ticks tic = getticks(); + + /* Re-set the scheduler. */ + scheduler_reset(sched, engine_estimate_nr_tasks(e)); + + ticks tic2 = getticks(); + + /* Construct the first hydro loop over neighbours */ + if (e->policy & engine_policy_hydro) + threadpool_map(&e->threadpool, engine_make_hydroloop_tasks_mapper, NULL, + s->nr_cells, 1, 0, e); + + if (e->verbose) + message("Making hydro tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + + /* Construct the stars hydro loop over neighbours */ + if (e->policy & engine_policy_feedback) { + threadpool_map(&e->threadpool, engine_make_starsloop_tasks_mapper, NULL, + s->nr_cells, 1, 0, e); + } + + if (e->verbose) + message("Making stellar feedback tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + + /* Add the self gravity tasks. */ + if (e->policy & engine_policy_self_gravity) { + threadpool_map(&e->threadpool, engine_make_self_gravity_tasks_mapper, NULL, + s->nr_cells, 1, 0, e); + } + + if (e->verbose) + message("Making gravity tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + /* Add the external gravity tasks. */ + if (e->policy & engine_policy_external_gravity) + engine_make_external_gravity_tasks(e); + + if (e->sched.nr_tasks == 0 && (s->nr_gparts > 0 || s->nr_parts > 0)) + error("We have particles but no hydro or gravity tasks were created."); + + /* Free the old list of cell-task links. */ + if (e->links != NULL) free(e->links); + e->size_links = 0; + +/* The maximum number of links is the + * number of cells (s->tot_cells) times the number of neighbours (26) times + * the number of interaction types, so 26 * 2 (density, force) pairs + * and 2 (density, force) self. + */ +#ifdef EXTRA_HYDRO_LOOP + const size_t hydro_tasks_per_cell = 27 * 3; +#else + const size_t hydro_tasks_per_cell = 27 * 2; +#endif + const size_t self_grav_tasks_per_cell = 125; + const size_t ext_grav_tasks_per_cell = 1; + const size_t stars_tasks_per_cell = 27; + + if (e->policy & engine_policy_hydro) + e->size_links += s->tot_cells * hydro_tasks_per_cell; + if (e->policy & engine_policy_external_gravity) + e->size_links += s->tot_cells * ext_grav_tasks_per_cell; + if (e->policy & engine_policy_self_gravity) + e->size_links += s->tot_cells * self_grav_tasks_per_cell; + if (e->policy & engine_policy_stars) + e->size_links += s->tot_cells * stars_tasks_per_cell; + + /* Allocate the new link list */ + if ((e->links = (struct link *)malloc(sizeof(struct link) * e->size_links)) == + NULL) + error("Failed to allocate cell-task links."); + e->nr_links = 0; + + tic2 = getticks(); + + /* Split the tasks. */ + scheduler_splittasks(sched); + + if (e->verbose) + message("Splitting tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that we are not left with invalid tasks */ + for (int i = 0; i < e->sched.nr_tasks; ++i) { + const struct task *t = &e->sched.tasks[i]; + if (t->ci == NULL && t->cj != NULL && !t->skip) error("Invalid task"); + } +#endif + + tic2 = getticks(); + + /* Count the number of tasks associated with each cell and + store the density tasks in each cell, and make each sort + depend on the sorts of its progeny. */ + threadpool_map(&e->threadpool, engine_count_and_link_tasks_mapper, + sched->tasks, sched->nr_tasks, sizeof(struct task), 0, e); + + if (e->verbose) + message("Counting and linking tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + + /* Re-set the tag counter. MPI tags are defined for top-level cells in + * cell_set_super_mapper. */ +#ifdef WITH_MPI + cell_next_tag = 0; +#endif + + /* Now that the self/pair tasks are at the right level, set the super + * pointers. */ + threadpool_map(&e->threadpool, cell_set_super_mapper, cells, nr_cells, + sizeof(struct cell), 0, e); + + if (e->verbose) + message("Setting super-pointers took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + /* Append hierarchical tasks to each cell. */ + threadpool_map(&e->threadpool, engine_make_hierarchical_tasks_mapper, cells, + nr_cells, sizeof(struct cell), 0, e); + + tic2 = getticks(); + + /* Run through the tasks and make force tasks for each density task. + Each force task depends on the cell ghosts and unlocks the kick task + of its super-cell. */ + if (e->policy & engine_policy_hydro) + threadpool_map(&e->threadpool, engine_make_extra_hydroloop_tasks_mapper, + sched->tasks, sched->nr_tasks, sizeof(struct task), 0, e); + + if (e->verbose) + message("Making extra hydroloop tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + + /* Add the dependencies for the gravity stuff */ + if (e->policy & (engine_policy_self_gravity | engine_policy_external_gravity)) + engine_link_gravity_tasks(e); + + if (e->verbose) + message("Linking gravity tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + + if (e->policy & engine_policy_stars) + threadpool_map(&e->threadpool, engine_link_stars_tasks_mapper, sched->tasks, + sched->nr_tasks, sizeof(struct task), 0, e); + + if (e->verbose) + message("Linking stars tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + +#ifdef WITH_MPI + if (e->policy & engine_policy_feedback) + error("Cannot run stellar feedback with MPI (yet)."); + + /* Add the communication tasks if MPI is being used. */ + if (e->policy & engine_policy_mpi) { + + tic2 = getticks(); + + /* Loop over the proxies and add the send tasks, which also generates the + * cell tags for super-cells. */ + int max_num_send_cells = 0; + for (int pid = 0; pid < e->nr_proxies; pid++) + max_num_send_cells += e->proxies[pid].nr_cells_out; + struct cell_type_pair *send_cell_type_pairs = NULL; + if ((send_cell_type_pairs = (struct cell_type_pair *)malloc( + sizeof(struct cell_type_pair) * max_num_send_cells)) == NULL) + error("Failed to allocate temporary cell pointer list."); + int num_send_cells = 0; + + for (int pid = 0; pid < e->nr_proxies; pid++) { + + /* Get a handle on the proxy. */ + struct proxy *p = &e->proxies[pid]; + + for (int k = 0; k < p->nr_cells_out; k++) { + send_cell_type_pairs[num_send_cells].ci = p->cells_out[k]; + send_cell_type_pairs[num_send_cells].cj = p->cells_in[0]; + send_cell_type_pairs[num_send_cells++].type = p->cells_out_type[k]; + } + } + + threadpool_map(&e->threadpool, engine_addtasks_send_mapper, + send_cell_type_pairs, num_send_cells, + sizeof(struct cell_type_pair), + /*chunk=*/0, e); + + free(send_cell_type_pairs); + + if (e->verbose) + message("Creating send tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + + /* Exchange the cell tags. */ + proxy_tags_exchange(e->proxies, e->nr_proxies, s); + + if (e->verbose) + message("Exchanging cell tags took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + + /* Loop over the proxies and add the recv tasks, which relies on having the + * cell tags. */ + int max_num_recv_cells = 0; + for (int pid = 0; pid < e->nr_proxies; pid++) + max_num_recv_cells += e->proxies[pid].nr_cells_in; + struct cell_type_pair *recv_cell_type_pairs = NULL; + if ((recv_cell_type_pairs = (struct cell_type_pair *)malloc( + sizeof(struct cell_type_pair) * max_num_recv_cells)) == NULL) + error("Failed to allocate temporary cell pointer list."); + int num_recv_cells = 0; + for (int pid = 0; pid < e->nr_proxies; pid++) { + + /* Get a handle on the proxy. */ + struct proxy *p = &e->proxies[pid]; + for (int k = 0; k < p->nr_cells_in; k++) { + recv_cell_type_pairs[num_recv_cells].ci = p->cells_in[k]; + recv_cell_type_pairs[num_recv_cells++].type = p->cells_in_type[k]; + } + } + threadpool_map(&e->threadpool, engine_addtasks_recv_mapper, + recv_cell_type_pairs, num_recv_cells, + sizeof(struct cell_type_pair), + /*chunk=*/0, e); + free(recv_cell_type_pairs); + + if (e->verbose) + message("Creating recv tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + } +#endif + + tic2 = getticks(); + + /* Set the unlocks per task. */ + scheduler_set_unlocks(sched); + + if (e->verbose) + message("Setting unlocks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + + /* Rank the tasks. */ + scheduler_ranktasks(sched); + + if (e->verbose) + message("Ranking the tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + /* Weight the tasks. */ + scheduler_reweight(sched, e->verbose); + + /* Set the tasks age. */ + e->tasks_age = 0; + + if (e->verbose) + message("took %.3f %s (including reweight).", + clocks_from_ticks(getticks() - tic), clocks_getunit()); +} diff --git a/src/engine_marktasks.c b/src/engine_marktasks.c new file mode 100644 index 0000000000000000000000000000000000000000..ad36c532da9b582e9f3cdb3287e8cbd121642b67 --- /dev/null +++ b/src/engine_marktasks.c @@ -0,0 +1,572 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * Angus Lepper (angus.lepper@ed.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <stdlib.h> +#include <unistd.h> + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* Load the profiler header, if needed. */ +#ifdef WITH_PROFILER +#include <gperftools/profiler.h> +#endif + +/* This object's header. */ +#include "engine.h" + +/* Local headers. */ +#include "active.h" +#include "atomic.h" +#include "cell.h" +#include "clocks.h" +#include "cycle.h" +#include "debug.h" +#include "error.h" +#include "proxy.h" +#include "timers.h" + +/** + * @brief Mark tasks to be un-skipped and set the sort flags accordingly. + * Threadpool mapper function. + * + * @param map_data pointer to the tasks + * @param num_elements number of tasks + * @param extra_data pointer to int that will define if a rebuild is needed. + */ +void engine_marktasks_mapper(void *map_data, int num_elements, + void *extra_data) { + /* Unpack the arguments. */ + struct task *tasks = (struct task *)map_data; + size_t *rebuild_space = &((size_t *)extra_data)[1]; + struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[2]); + struct engine *e = (struct engine *)((size_t *)extra_data)[0]; + const int nodeID = e->nodeID; + + for (int ind = 0; ind < num_elements; ind++) { + + /* Get basic task information */ + struct task *t = &tasks[ind]; + const enum task_types t_type = t->type; + const enum task_subtypes t_subtype = t->subtype; + + /* Single-cell task? */ + if (t_type == task_type_self || t_type == task_type_sub_self) { + + /* Local pointer. */ + struct cell *ci = t->ci; + + if (ci->nodeID != engine_rank) error("Non-local self task found"); + + /* Activate the hydro drift */ + if (t_type == task_type_self && t_subtype == task_subtype_density) { + if (cell_is_active_hydro(ci, e)) { + scheduler_activate(s, t); + cell_activate_drift_part(ci, s); + } + } + + /* Store current values of dx_max and h_max. */ + else if (t_type == task_type_sub_self && + t_subtype == task_subtype_density) { + if (cell_is_active_hydro(ci, e)) { + scheduler_activate(s, t); + cell_activate_subcell_hydro_tasks(ci, NULL, s); + } + } + + else if (t_type == task_type_self && t_subtype == task_subtype_force) { + if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); + } + + else if (t_type == task_type_sub_self && + t_subtype == task_subtype_force) { + if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); + } + +#ifdef EXTRA_HYDRO_LOOP + else if (t_type == task_type_self && t_subtype == task_subtype_gradient) { + if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); + } + + else if (t_type == task_type_sub_self && + t_subtype == task_subtype_gradient) { + if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); + } +#endif + + /* Activate the star density */ + else if (t_type == task_type_self && + t_subtype == task_subtype_stars_density) { + if (cell_is_active_stars(ci, e)) { + scheduler_activate(s, t); + cell_activate_drift_part(ci, s); + cell_activate_drift_spart(ci, s); + } + } + + /* Store current values of dx_max and h_max. */ + else if (t_type == task_type_sub_self && + t_subtype == task_subtype_stars_density) { + if (cell_is_active_stars(ci, e)) { + scheduler_activate(s, t); + cell_activate_subcell_stars_tasks(ci, NULL, s); + } + } + + /* Activate the gravity drift */ + else if (t_type == task_type_self && t_subtype == task_subtype_grav) { + if (cell_is_active_gravity(ci, e)) { + scheduler_activate(s, t); + cell_activate_subcell_grav_tasks(t->ci, NULL, s); + } + } + + /* Activate the gravity drift */ + else if (t_type == task_type_self && + t_subtype == task_subtype_external_grav) { + if (cell_is_active_gravity(ci, e)) { + scheduler_activate(s, t); + cell_activate_drift_gpart(t->ci, s); + } + } + +#ifdef SWIFT_DEBUG_CHECKS + else { + error("Invalid task type / sub-type encountered"); + } +#endif + } + + /* Pair? */ + else if (t_type == task_type_pair || t_type == task_type_sub_pair) { + + /* Local pointers. */ + struct cell *ci = t->ci; + struct cell *cj = t->cj; +#ifdef WITH_MPI + const int ci_nodeID = ci->nodeID; + const int cj_nodeID = cj->nodeID; +#else + const int ci_nodeID = nodeID; + const int cj_nodeID = nodeID; +#endif + const int ci_active_hydro = cell_is_active_hydro(ci, e); + const int cj_active_hydro = cell_is_active_hydro(cj, e); + + const int ci_active_gravity = cell_is_active_gravity(ci, e); + const int cj_active_gravity = cell_is_active_gravity(cj, e); + + const int ci_active_stars = cell_is_active_stars(ci, e); + const int cj_active_stars = cell_is_active_stars(cj, e); + + /* Only activate tasks that involve a local active cell. */ + if ((t_subtype == task_subtype_density || + t_subtype == task_subtype_gradient || + t_subtype == task_subtype_force) && + ((ci_active_hydro && ci_nodeID == nodeID) || + (cj_active_hydro && cj_nodeID == nodeID))) { + + scheduler_activate(s, t); + + /* Set the correct sorting flags */ + if (t_type == task_type_pair && t_subtype == task_subtype_density) { + + /* Store some values. */ + atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + + /* Activate the hydro drift tasks. */ + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); + if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_hydro_sorts(ci, t->flags, s); + cell_activate_hydro_sorts(cj, t->flags, s); + + } + + /* Store current values of dx_max and h_max. */ + else if (t_type == task_type_sub_pair && + t_subtype == task_subtype_density) { + cell_activate_subcell_hydro_tasks(t->ci, t->cj, s); + } + } + + /* Stars */ + if (t_subtype == task_subtype_stars_density && + ((ci_active_stars && ci->nodeID == engine_rank) || + (cj_active_stars && cj->nodeID == engine_rank))) { + + // MATTHIEU: The logic here can be improved. + // If ci is active for stars but not cj, then we can only drift the + // stars in ci and parts in cj. (and vice-versa). The same logic can be + // applied in cell_unskip_stars(). + + scheduler_activate(s, t); + + /* Set the correct sorting flags */ + if (t_type == task_type_pair) { + + /* Do ci */ + /* Store some values. */ + atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); + atomic_or(&ci->stars.requires_sorts, 1 << t->flags); + + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; + + /* Activate the hydro drift tasks. */ + if (ci_nodeID == nodeID) cell_activate_drift_spart(ci, s); + + if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_hydro_sorts(cj, t->flags, s); + + cell_activate_stars_sorts(ci, t->flags, s); + + /* Do cj */ + /* Store some values. */ + atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + atomic_or(&cj->stars.requires_sorts, 1 << t->flags); + + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; + + /* Activate the hydro drift tasks. */ + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); + + if (cj_nodeID == nodeID) cell_activate_drift_spart(cj, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_hydro_sorts(ci, t->flags, s); + cell_activate_stars_sorts(cj, t->flags, s); + } + + /* Store current values of dx_max and h_max. */ + else if (t_type == task_type_sub_pair) { + cell_activate_subcell_stars_tasks(t->ci, t->cj, s); + } + } + + /* Gravity */ + if ((t_subtype == task_subtype_grav) && + ((ci_active_gravity && ci_nodeID == nodeID) || + (cj_active_gravity && cj_nodeID == nodeID))) { + + scheduler_activate(s, t); + + if (t_type == task_type_pair && t_subtype == task_subtype_grav) { + /* Activate the gravity drift */ + cell_activate_subcell_grav_tasks(t->ci, t->cj, s); + } + +#ifdef SWIFT_DEBUG_CHECKS + else if (t_type == task_type_sub_pair && + t_subtype == task_subtype_grav) { + error("Invalid task sub-type encountered"); + } +#endif + } + + /* Only interested in density tasks as of here. */ + if (t_subtype == task_subtype_density) { + + /* Too much particle movement? */ + if (cell_need_rebuild_for_pair(ci, cj)) *rebuild_space = 1; + +#ifdef WITH_MPI + /* Activate the send/recv tasks. */ + if (ci_nodeID != nodeID) { + + /* If the local cell is active, receive data from the foreign cell. */ + if (cj_active_hydro) { + scheduler_activate(s, ci->mpi.hydro.recv_xv); + if (ci_active_hydro) { + scheduler_activate(s, ci->mpi.hydro.recv_rho); +#ifdef EXTRA_HYDRO_LOOP + scheduler_activate(s, ci->mpi.hydro.recv_gradient); +#endif + } + } + + /* If the foreign cell is active, we want its ti_end values. */ + if (ci_active_hydro) scheduler_activate(s, ci->mpi.recv_ti); + + /* Is the foreign cell active and will need stuff from us? */ + if (ci_active_hydro) { + + struct link *l = + scheduler_activate_send(s, cj->mpi.hydro.send_xv, ci_nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_part(l->t->ci, s); + + /* If the local cell is also active, more stuff will be needed. */ + if (cj_active_hydro) { + scheduler_activate_send(s, cj->mpi.hydro.send_rho, ci_nodeID); + +#ifdef EXTRA_HYDRO_LOOP + scheduler_activate_send(s, cj->mpi.hydro.send_gradient, + ci_nodeID); +#endif + } + } + + /* If the local cell is active, send its ti_end values. */ + if (cj_active_hydro) + scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); + + } else if (cj_nodeID != nodeID) { + + /* If the local cell is active, receive data from the foreign cell. */ + if (ci_active_hydro) { + scheduler_activate(s, cj->mpi.hydro.recv_xv); + if (cj_active_hydro) { + scheduler_activate(s, cj->mpi.hydro.recv_rho); +#ifdef EXTRA_HYDRO_LOOP + scheduler_activate(s, cj->mpi.hydro.recv_gradient); +#endif + } + } + + /* If the foreign cell is active, we want its ti_end values. */ + if (cj_active_hydro) scheduler_activate(s, cj->mpi.recv_ti); + + /* Is the foreign cell active and will need stuff from us? */ + if (cj_active_hydro) { + + struct link *l = + scheduler_activate_send(s, ci->mpi.hydro.send_xv, cj_nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_part(l->t->ci, s); + + /* If the local cell is also active, more stuff will be needed. */ + if (ci_active_hydro) { + + scheduler_activate_send(s, ci->mpi.hydro.send_rho, cj_nodeID); + +#ifdef EXTRA_HYDRO_LOOP + scheduler_activate_send(s, ci->mpi.hydro.send_gradient, + cj_nodeID); +#endif + } + } + + /* If the local cell is active, send its ti_end values. */ + if (ci_active_hydro) + scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); + } +#endif + } + + /* Only interested in stars_density tasks as of here. */ + if (t->subtype == task_subtype_stars_density) { + + /* Too much particle movement? */ + if (cell_need_rebuild_for_pair(ci, cj)) *rebuild_space = 1; + + // LOIC: Need implementing MPI case + } + + /* Only interested in gravity tasks as of here. */ + if (t_subtype == task_subtype_grav) { + +#ifdef WITH_MPI + /* Activate the send/recv tasks. */ + if (ci_nodeID != nodeID) { + + /* If the local cell is active, receive data from the foreign cell. */ + if (cj_active_gravity) scheduler_activate(s, ci->mpi.grav.recv); + + /* If the foreign cell is active, we want its ti_end values. */ + if (ci_active_gravity) scheduler_activate(s, ci->mpi.recv_ti); + + /* Is the foreign cell active and will need stuff from us? */ + if (ci_active_gravity) { + + struct link *l = + scheduler_activate_send(s, cj->mpi.grav.send, ci_nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_gpart(l->t->ci, s); + } + + /* If the local cell is active, send its ti_end values. */ + if (cj_active_gravity) + scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); + + } else if (cj_nodeID != nodeID) { + + /* If the local cell is active, receive data from the foreign cell. */ + if (ci_active_gravity) scheduler_activate(s, cj->mpi.grav.recv); + + /* If the foreign cell is active, we want its ti_end values. */ + if (cj_active_gravity) scheduler_activate(s, cj->mpi.recv_ti); + + /* Is the foreign cell active and will need stuff from us? */ + if (cj_active_gravity) { + + struct link *l = + scheduler_activate_send(s, ci->mpi.grav.send, cj_nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_gpart(l->t->ci, s); + } + + /* If the local cell is active, send its ti_end values. */ + if (ci_active_gravity) + scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); + } +#endif + } + } + + /* End force ? */ + else if (t_type == task_type_end_force) { + + if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e)) + scheduler_activate(s, t); + } + + /* Kick ? */ + else if (t_type == task_type_kick1 || t_type == task_type_kick2) { + + if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e)) + scheduler_activate(s, t); + } + + /* Hydro ghost tasks ? */ + else if (t_type == task_type_ghost || t_type == task_type_extra_ghost || + t_type == task_type_ghost_in || t_type == task_type_ghost_out) { + if (cell_is_active_hydro(t->ci, e)) scheduler_activate(s, t); + } + + /* logger tasks ? */ + else if (t->type == task_type_logger) { + if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e) || + cell_is_active_stars(t->ci, e)) + scheduler_activate(s, t); + } + + /* Gravity stuff ? */ + else if (t_type == task_type_grav_down || t_type == task_type_grav_mesh || + t_type == task_type_grav_long_range || + t_type == task_type_init_grav || + t_type == task_type_init_grav_out || + t_type == task_type_drift_gpart_out || + t_type == task_type_grav_down_in) { + if (cell_is_active_gravity(t->ci, e)) scheduler_activate(s, t); + } + + /* Multipole - Multipole interaction task */ + else if (t_type == task_type_grav_mm) { + + /* Local pointers. */ + const struct cell *ci = t->ci; + const struct cell *cj = t->cj; +#ifdef WITH_MPI + const int ci_nodeID = ci->nodeID; + const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; +#else + const int ci_nodeID = nodeID; + const int cj_nodeID = nodeID; +#endif + const int ci_active_gravity = cell_is_active_gravity_mm(ci, e); + const int cj_active_gravity = cell_is_active_gravity_mm(cj, e); + + if ((ci_active_gravity && ci_nodeID == nodeID) || + (cj_active_gravity && cj_nodeID == nodeID)) + scheduler_activate(s, t); + } + + /* Star ghost tasks ? */ + else if (t_type == task_type_stars_ghost || + t_type == task_type_stars_ghost_in || + t_type == task_type_stars_ghost_out) { + if (cell_is_active_stars(t->ci, e)) scheduler_activate(s, t); + } + + /* Time-step? */ + else if (t_type == task_type_timestep) { + t->ci->hydro.updated = 0; + t->ci->grav.updated = 0; + t->ci->stars.updated = 0; + if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e)) + scheduler_activate(s, t); + } + + /* Subgrid tasks */ + else if (t_type == task_type_cooling) { + if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e)) + scheduler_activate(s, t); + } else if (t_type == task_type_star_formation) { + if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e)) + scheduler_activate(s, t); + } + } +} + +/** + * @brief Mark tasks to be un-skipped and set the sort flags accordingly. + * + * @return 1 if the space has to be rebuilt, 0 otherwise. + */ +int engine_marktasks(struct engine *e) { + + struct scheduler *s = &e->sched; + const ticks tic = getticks(); + int rebuild_space = 0; + + /* Run through the tasks and mark as skip or not. */ + size_t extra_data[3] = {(size_t)e, (size_t)rebuild_space, (size_t)&e->sched}; + threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks, s->nr_tasks, + sizeof(struct task), 0, extra_data); + rebuild_space = extra_data[1]; + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + + /* All is well... */ + return rebuild_space; +} diff --git a/src/equation_of_state/planetary/aneos.h b/src/equation_of_state/planetary/aneos.h deleted file mode 100644 index 904288b2fdf3ba825cdc7d114ebb61cd42de198d..0000000000000000000000000000000000000000 --- a/src/equation_of_state/planetary/aneos.h +++ /dev/null @@ -1,144 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk). - * 2018 Jacob Kegerreis (jacob.kegerreis@durham.ac.uk). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ -#ifndef SWIFT_ANEOS_EQUATION_OF_STATE_H -#define SWIFT_ANEOS_EQUATION_OF_STATE_H - -/** - * @file equation_of_state/planetary/aneos.h - * - * Contains the (M)ANEOS EOS functions for - * equation_of_state/planetary/equation_of_state.h - * - * Adapted from the implementation in Gadget 2 of Cuk & Stewart (2012) - * - */ - -/* Some standard headers. */ -#include <math.h> - -/* Local headers. */ -#include "adiabatic_index.h" -#include "common_io.h" -#include "equation_of_state.h" -#include "inline.h" -#include "physical_constants.h" -#include "units.h" - -// ANEOS parameters -struct ANEOS_params { - enum eos_planetary_material_id mat_id; -}; - -// Parameter values for each material (cgs units) -INLINE static void set_ANEOS_iron(struct ANEOS_params *mat, - enum eos_planetary_material_id mat_id) { - mat->mat_id = mat_id; -} -INLINE static void set_MANEOS_forsterite( - struct ANEOS_params *mat, enum eos_planetary_material_id mat_id) { - mat->mat_id = mat_id; -} - -// Convert from cgs to internal units -INLINE static void convert_units_ANEOS(struct ANEOS_params *mat, - const struct unit_system *us) {} - -// gas_internal_energy_from_entropy -INLINE static float ANEOS_internal_energy_from_entropy( - float density, float entropy, const struct ANEOS_params *mat) { - - error("This EOS function is not yet implemented!"); - - return 0; -} - -// gas_pressure_from_entropy -INLINE static float ANEOS_pressure_from_entropy( - float density, float entropy, const struct ANEOS_params *mat) { - - error("This EOS function is not yet implemented!"); - - return 0; -} - -// gas_entropy_from_pressure -INLINE static float ANEOS_entropy_from_pressure( - float density, float pressure, const struct ANEOS_params *mat) { - - error("This EOS function is not yet implemented!"); - - return 0; -} - -// gas_soundspeed_from_entropy -INLINE static float ANEOS_soundspeed_from_entropy( - float density, float entropy, const struct ANEOS_params *mat) { - - error("This EOS function is not yet implemented!"); - - return 0; -} - -// gas_entropy_from_internal_energy -INLINE static float ANEOS_entropy_from_internal_energy( - float density, float u, const struct ANEOS_params *mat) { - - error("This EOS function is not yet implemented!"); - - return 0; -} - -// gas_pressure_from_internal_energy -INLINE static float ANEOS_pressure_from_internal_energy( - float density, float u, const struct ANEOS_params *mat) { - - error("This EOS function is not yet implemented!"); - - return 0; -} - -// gas_internal_energy_from_pressure -INLINE static float ANEOS_internal_energy_from_pressure( - float density, float P, const struct ANEOS_params *mat) { - - error("This EOS function is not yet implemented!"); - - return 0; -} - -// gas_soundspeed_from_internal_energy -INLINE static float ANEOS_soundspeed_from_internal_energy( - float density, float u, const struct ANEOS_params *mat) { - - error("This EOS function is not yet implemented!"); - - return 0; -} - -// gas_soundspeed_from_pressure -INLINE static float ANEOS_soundspeed_from_pressure( - float density, float P, const struct ANEOS_params *mat) { - - error("This EOS function is not yet implemented!"); - - return 0; -} - -#endif /* SWIFT_ANEOS_EQUATION_OF_STATE_H */ diff --git a/src/equation_of_state/planetary/equation_of_state.h b/src/equation_of_state/planetary/equation_of_state.h index 61e23dc0b4eb82e9ae5c0869f7a10dfff97fc45e..644167bb4795a2a3d0fefe130cba93c64f29941b 100644 --- a/src/equation_of_state/planetary/equation_of_state.h +++ b/src/equation_of_state/planetary/equation_of_state.h @@ -39,6 +39,7 @@ #include "common_io.h" #include "inline.h" #include "physical_constants.h" +#include "restart.h" #include "units.h" extern struct eos_parameters eos; @@ -50,10 +51,15 @@ extern struct eos_parameters eos; * @brief Master type for the planetary equation of state. */ enum eos_planetary_type_id { + + /*! Tillotson */ eos_planetary_type_Til = 1, + + /*! Hubbard & MacFarlane (1980) Uranus/Neptune */ eos_planetary_type_HM80 = 2, - eos_planetary_type_ANEOS = 3, - eos_planetary_type_SESAME = 4, + + /*! SESAME */ + eos_planetary_type_SESAME = 3, }; /** @@ -89,25 +95,26 @@ enum eos_planetary_material_id { eos_planetary_id_HM80_rock = eos_planetary_type_HM80 * eos_planetary_type_factor + 2, - /* ANEOS */ - - /*! ANEOS iron */ - eos_planetary_id_ANEOS_iron = - eos_planetary_type_ANEOS * eos_planetary_type_factor, - - /*! MANEOS forsterite */ - eos_planetary_id_MANEOS_forsterite = - eos_planetary_type_ANEOS * eos_planetary_type_factor + 1, - /* SESAME */ - /*! SESAME iron */ + /*! SESAME iron 2140 */ eos_planetary_id_SESAME_iron = eos_planetary_type_SESAME * eos_planetary_type_factor, + + /*! SESAME basalt 7530 */ + eos_planetary_id_SESAME_basalt = + eos_planetary_type_SESAME * eos_planetary_type_factor + 1, + + /*! SESAME water 7154 */ + eos_planetary_id_SESAME_water = + eos_planetary_type_SESAME * eos_planetary_type_factor + 2, + + /*! Senft & Stewart (2008) SESAME-like water */ + eos_planetary_id_SS08_water = + eos_planetary_type_SESAME * eos_planetary_type_factor + 3, }; /* Individual EOS function headers. */ -#include "aneos.h" #include "hm80.h" #include "sesame.h" #include "tillotson.h" @@ -118,8 +125,7 @@ enum eos_planetary_material_id { struct eos_parameters { struct Til_params Til_iron, Til_granite, Til_water; struct HM80_params HM80_HHe, HM80_ice, HM80_rock; - struct ANEOS_params ANEOS_iron, MANEOS_forsterite; - struct SESAME_params SESAME_iron; + struct SESAME_params SESAME_iron, SESAME_basalt, SESAME_water, SS08_water; }; /** @@ -190,35 +196,29 @@ gas_internal_energy_from_entropy(float density, float entropy, }; break; - /* ANEOS EoS */ - case eos_planetary_type_ANEOS: + /* SESAME EoS */ + case eos_planetary_type_SESAME:; /* Select the material */ switch (mat_id) { - case eos_planetary_id_ANEOS_iron: - return ANEOS_internal_energy_from_entropy(density, entropy, - &eos.ANEOS_iron); + case eos_planetary_id_SESAME_iron: + return SESAME_internal_energy_from_entropy(density, entropy, + &eos.SESAME_iron); break; - case eos_planetary_id_MANEOS_forsterite: - return ANEOS_internal_energy_from_entropy(density, entropy, - &eos.MANEOS_forsterite); + case eos_planetary_id_SESAME_basalt: + return SESAME_internal_energy_from_entropy(density, entropy, + &eos.SESAME_basalt); break; - default: - error("Unknown material ID! mat_id = %d", mat_id); - return 0.f; - }; - break; - - /* SESAME EoS */ - case eos_planetary_type_SESAME:; + case eos_planetary_id_SESAME_water: + return SESAME_internal_energy_from_entropy(density, entropy, + &eos.SESAME_water); + break; - /* Select the material */ - switch (mat_id) { - case eos_planetary_id_SESAME_iron: + case eos_planetary_id_SS08_water: return SESAME_internal_energy_from_entropy(density, entropy, - &eos.SESAME_iron); + &eos.SS08_water); break; default: @@ -294,34 +294,29 @@ __attribute__((always_inline)) INLINE static float gas_pressure_from_entropy( }; break; - /* ANEOS EoS */ - case eos_planetary_type_ANEOS: + /* SESAME EoS */ + case eos_planetary_type_SESAME:; /* Select the material */ switch (mat_id) { - case eos_planetary_id_ANEOS_iron: - return ANEOS_pressure_from_entropy(density, entropy, &eos.ANEOS_iron); + case eos_planetary_id_SESAME_iron: + return SESAME_pressure_from_entropy(density, entropy, + &eos.SESAME_iron); break; - case eos_planetary_id_MANEOS_forsterite: - return ANEOS_pressure_from_entropy(density, entropy, - &eos.MANEOS_forsterite); + case eos_planetary_id_SESAME_basalt: + return SESAME_pressure_from_entropy(density, entropy, + &eos.SESAME_basalt); break; - default: - error("Unknown material ID! mat_id = %d", mat_id); - return 0.f; - }; - break; - - /* SESAME EoS */ - case eos_planetary_type_SESAME:; + case eos_planetary_id_SESAME_water: + return SESAME_pressure_from_entropy(density, entropy, + &eos.SESAME_water); - /* Select the material */ - switch (mat_id) { - case eos_planetary_id_SESAME_iron: + case eos_planetary_id_SS08_water: return SESAME_pressure_from_entropy(density, entropy, - &eos.SESAME_iron); + &eos.SS08_water); + break; break; default: @@ -398,33 +393,25 @@ __attribute__((always_inline)) INLINE static float gas_entropy_from_pressure( }; break; - /* ANEOS EoS */ - case eos_planetary_type_ANEOS: + /* SESAME EoS */ + case eos_planetary_type_SESAME:; /* Select the material */ switch (mat_id) { - case eos_planetary_id_ANEOS_iron: - return ANEOS_entropy_from_pressure(density, P, &eos.ANEOS_iron); + case eos_planetary_id_SESAME_iron: + return SESAME_entropy_from_pressure(density, P, &eos.SESAME_iron); break; - case eos_planetary_id_MANEOS_forsterite: - return ANEOS_entropy_from_pressure(density, P, - &eos.MANEOS_forsterite); + case eos_planetary_id_SESAME_basalt: + return SESAME_entropy_from_pressure(density, P, &eos.SESAME_basalt); break; - default: - error("Unknown material ID! mat_id = %d", mat_id); - return 0.f; - }; - break; - - /* SESAME EoS */ - case eos_planetary_type_SESAME:; + case eos_planetary_id_SESAME_water: + return SESAME_entropy_from_pressure(density, P, &eos.SESAME_water); + break; - /* Select the material */ - switch (mat_id) { - case eos_planetary_id_SESAME_iron: - return SESAME_entropy_from_pressure(density, P, &eos.SESAME_iron); + case eos_planetary_id_SS08_water: + return SESAME_entropy_from_pressure(density, P, &eos.SS08_water); break; default: @@ -501,35 +488,29 @@ __attribute__((always_inline)) INLINE static float gas_soundspeed_from_entropy( }; break; - /* ANEOS EoS */ - case eos_planetary_type_ANEOS: + /* SESAME EoS */ + case eos_planetary_type_SESAME:; /* Select the material */ switch (mat_id) { - case eos_planetary_id_ANEOS_iron: - return ANEOS_soundspeed_from_entropy(density, entropy, - &eos.ANEOS_iron); + case eos_planetary_id_SESAME_iron: + return SESAME_soundspeed_from_entropy(density, entropy, + &eos.SESAME_iron); break; - case eos_planetary_id_MANEOS_forsterite: - return ANEOS_soundspeed_from_entropy(density, entropy, - &eos.MANEOS_forsterite); + case eos_planetary_id_SESAME_basalt: + return SESAME_soundspeed_from_entropy(density, entropy, + &eos.SESAME_basalt); break; - default: - error("Unknown material ID! mat_id = %d", mat_id); - return 0.f; - }; - break; - - /* SESAME EoS */ - case eos_planetary_type_SESAME:; + case eos_planetary_id_SESAME_water: + return SESAME_soundspeed_from_entropy(density, entropy, + &eos.SESAME_water); + break; - /* Select the material */ - switch (mat_id) { - case eos_planetary_id_SESAME_iron: + case eos_planetary_id_SS08_water: return SESAME_soundspeed_from_entropy(density, entropy, - &eos.SESAME_iron); + &eos.SS08_water); break; default: @@ -605,35 +586,29 @@ gas_entropy_from_internal_energy(float density, float u, }; break; - /* ANEOS EoS */ - case eos_planetary_type_ANEOS: + /* SESAME EoS */ + case eos_planetary_type_SESAME:; /* Select the material */ switch (mat_id) { - case eos_planetary_id_ANEOS_iron: - return ANEOS_entropy_from_internal_energy(density, u, - &eos.ANEOS_iron); + case eos_planetary_id_SESAME_iron: + return SESAME_entropy_from_internal_energy(density, u, + &eos.SESAME_iron); break; - case eos_planetary_id_MANEOS_forsterite: - return ANEOS_entropy_from_internal_energy(density, u, - &eos.MANEOS_forsterite); + case eos_planetary_id_SESAME_basalt: + return SESAME_entropy_from_internal_energy(density, u, + &eos.SESAME_basalt); break; - default: - error("Unknown material ID! mat_id = %d", mat_id); - return 0.f; - }; - break; - - /* SESAME EoS */ - case eos_planetary_type_SESAME:; + case eos_planetary_id_SESAME_water: + return SESAME_entropy_from_internal_energy(density, u, + &eos.SESAME_water); + break; - /* Select the material */ - switch (mat_id) { - case eos_planetary_id_SESAME_iron: + case eos_planetary_id_SS08_water: return SESAME_entropy_from_internal_energy(density, u, - &eos.SESAME_iron); + &eos.SS08_water); break; default: @@ -711,35 +686,29 @@ gas_pressure_from_internal_energy(float density, float u, }; break; - /* ANEOS EoS */ - case eos_planetary_type_ANEOS: + /* SESAME EoS */ + case eos_planetary_type_SESAME:; /* Select the material */ switch (mat_id) { - case eos_planetary_id_ANEOS_iron: - return ANEOS_pressure_from_internal_energy(density, u, - &eos.ANEOS_iron); + case eos_planetary_id_SESAME_iron: + return SESAME_pressure_from_internal_energy(density, u, + &eos.SESAME_iron); break; - case eos_planetary_id_MANEOS_forsterite: - return ANEOS_pressure_from_internal_energy(density, u, - &eos.MANEOS_forsterite); + case eos_planetary_id_SESAME_basalt: + return SESAME_pressure_from_internal_energy(density, u, + &eos.SESAME_basalt); break; - default: - error("Unknown material ID! mat_id = %d", mat_id); - return 0.f; - }; - break; - - /* SESAME EoS */ - case eos_planetary_type_SESAME:; + case eos_planetary_id_SESAME_water: + return SESAME_pressure_from_internal_energy(density, u, + &eos.SESAME_water); + break; - /* Select the material */ - switch (mat_id) { - case eos_planetary_id_SESAME_iron: + case eos_planetary_id_SS08_water: return SESAME_pressure_from_internal_energy(density, u, - &eos.SESAME_iron); + &eos.SS08_water); break; default: @@ -820,35 +789,29 @@ gas_internal_energy_from_pressure(float density, float P, }; break; - /* ANEOS EoS */ - case eos_planetary_type_ANEOS: + /* SESAME EoS */ + case eos_planetary_type_SESAME:; /* Select the material */ switch (mat_id) { - case eos_planetary_id_ANEOS_iron: - return ANEOS_internal_energy_from_pressure(density, P, - &eos.ANEOS_iron); + case eos_planetary_id_SESAME_iron: + return SESAME_internal_energy_from_pressure(density, P, + &eos.SESAME_iron); break; - case eos_planetary_id_MANEOS_forsterite: - return ANEOS_internal_energy_from_pressure(density, P, - &eos.MANEOS_forsterite); + case eos_planetary_id_SESAME_basalt: + return SESAME_internal_energy_from_pressure(density, P, + &eos.SESAME_basalt); break; - default: - error("Unknown material ID! mat_id = %d", mat_id); - return 0.f; - }; - break; - - /* SESAME EoS */ - case eos_planetary_type_SESAME:; + case eos_planetary_id_SESAME_water: + return SESAME_internal_energy_from_pressure(density, P, + &eos.SESAME_water); + break; - /* Select the material */ - switch (mat_id) { - case eos_planetary_id_SESAME_iron: + case eos_planetary_id_SS08_water: return SESAME_internal_energy_from_pressure(density, P, - &eos.SESAME_iron); + &eos.SS08_water); break; default: @@ -930,35 +893,29 @@ gas_soundspeed_from_internal_energy(float density, float u, }; break; - /* ANEOS EoS */ - case eos_planetary_type_ANEOS: + /* SESAME EoS */ + case eos_planetary_type_SESAME:; /* Select the material */ switch (mat_id) { - case eos_planetary_id_ANEOS_iron: - return ANEOS_soundspeed_from_internal_energy(density, u, - &eos.ANEOS_iron); + case eos_planetary_id_SESAME_iron: + return SESAME_soundspeed_from_internal_energy(density, u, + &eos.SESAME_iron); break; - case eos_planetary_id_MANEOS_forsterite: - return ANEOS_soundspeed_from_internal_energy(density, u, - &eos.MANEOS_forsterite); + case eos_planetary_id_SESAME_basalt: + return SESAME_soundspeed_from_internal_energy(density, u, + &eos.SESAME_basalt); break; - default: - error("Unknown material ID! mat_id = %d", mat_id); - return 0.f; - }; - break; - - /* SESAME EoS */ - case eos_planetary_type_SESAME:; + case eos_planetary_id_SESAME_water: + return SESAME_soundspeed_from_internal_energy(density, u, + &eos.SESAME_water); + break; - /* Select the material */ - switch (mat_id) { - case eos_planetary_id_SESAME_iron: + case eos_planetary_id_SS08_water: return SESAME_soundspeed_from_internal_energy(density, u, - &eos.SESAME_iron); + &eos.SS08_water); break; default: @@ -1034,33 +991,26 @@ __attribute__((always_inline)) INLINE static float gas_soundspeed_from_pressure( }; break; - /* ANEOS EoS */ - case eos_planetary_type_ANEOS: + /* SESAME EoS */ + case eos_planetary_type_SESAME:; /* Select the material */ switch (mat_id) { - case eos_planetary_id_ANEOS_iron: - return ANEOS_soundspeed_from_pressure(density, P, &eos.ANEOS_iron); + case eos_planetary_id_SESAME_iron: + return SESAME_soundspeed_from_pressure(density, P, &eos.SESAME_iron); break; - case eos_planetary_id_MANEOS_forsterite: - return ANEOS_soundspeed_from_pressure(density, P, - &eos.MANEOS_forsterite); + case eos_planetary_id_SESAME_basalt: + return SESAME_soundspeed_from_pressure(density, P, + &eos.SESAME_basalt); break; - default: - error("Unknown material ID! mat_id = %d", mat_id); - return 0.f; - }; - break; - - /* SESAME EoS */ - case eos_planetary_type_SESAME:; + case eos_planetary_id_SESAME_water: + return SESAME_soundspeed_from_pressure(density, P, &eos.SESAME_water); + break; - /* Select the material */ - switch (mat_id) { - case eos_planetary_id_SESAME_iron: - return SESAME_soundspeed_from_pressure(density, P, &eos.SESAME_iron); + case eos_planetary_id_SS08_water: + return SESAME_soundspeed_from_pressure(density, P, &eos.SS08_water); break; default: @@ -1089,6 +1039,10 @@ __attribute__((always_inline)) INLINE static void eos_init( char HM80_HHe_table_file[PARSER_MAX_LINE_SIZE]; char HM80_ice_table_file[PARSER_MAX_LINE_SIZE]; char HM80_rock_table_file[PARSER_MAX_LINE_SIZE]; + char SESAME_iron_table_file[PARSER_MAX_LINE_SIZE]; + char SESAME_basalt_table_file[PARSER_MAX_LINE_SIZE]; + char SESAME_water_table_file[PARSER_MAX_LINE_SIZE]; + char SS08_water_table_file[PARSER_MAX_LINE_SIZE]; // Set the parameters and material IDs, load tables, etc. for each material // and convert to internal units @@ -1116,30 +1070,49 @@ __attribute__((always_inline)) INLINE static void eos_init( parser_get_param_string(params, "EoS:planetary_HM80_rock_table_file", HM80_rock_table_file); - load_HM80_table(&e->HM80_HHe, HM80_HHe_table_file); - load_HM80_table(&e->HM80_ice, HM80_ice_table_file); - load_HM80_table(&e->HM80_rock, HM80_rock_table_file); + load_table_HM80(&e->HM80_HHe, HM80_HHe_table_file); + load_table_HM80(&e->HM80_ice, HM80_ice_table_file); + load_table_HM80(&e->HM80_rock, HM80_rock_table_file); + + prepare_table_HM80(&e->HM80_HHe); + prepare_table_HM80(&e->HM80_ice); + prepare_table_HM80(&e->HM80_rock); convert_units_HM80(&e->HM80_HHe, us); convert_units_HM80(&e->HM80_ice, us); convert_units_HM80(&e->HM80_rock, us); } - // ANEOS - if (parser_get_opt_param_int(params, "EoS:planetary_use_ANEOS", 0)) { - set_ANEOS_iron(&e->ANEOS_iron, eos_planetary_id_ANEOS_iron); - set_MANEOS_forsterite(&e->MANEOS_forsterite, - eos_planetary_id_MANEOS_forsterite); - - convert_units_ANEOS(&e->ANEOS_iron, us); - convert_units_ANEOS(&e->MANEOS_forsterite, us); - } - // SESAME if (parser_get_opt_param_int(params, "EoS:planetary_use_SESAME", 0)) { set_SESAME_iron(&e->SESAME_iron, eos_planetary_id_SESAME_iron); + set_SESAME_basalt(&e->SESAME_basalt, eos_planetary_id_SESAME_basalt); + set_SESAME_water(&e->SESAME_water, eos_planetary_id_SESAME_water); + set_SS08_water(&e->SESAME_water, eos_planetary_id_SS08_water); + + parser_get_param_string(params, "EoS:planetary_SESAME_iron_table_file", + SESAME_iron_table_file); + parser_get_param_string(params, "EoS:planetary_SESAME_basalt_table_file", + SESAME_basalt_table_file); + parser_get_param_string(params, "EoS:planetary_SESAME_water_table_file", + SESAME_water_table_file); + parser_get_param_string(params, "EoS:planetary_SS08_water_table_file", + SS08_water_table_file); + + load_table_SESAME(&e->SESAME_iron, SESAME_iron_table_file); + load_table_SESAME(&e->SESAME_basalt, SESAME_basalt_table_file); + load_table_SESAME(&e->SESAME_water, SESAME_water_table_file); + load_table_SESAME(&e->SS08_water, SS08_water_table_file); + + prepare_table_SESAME(&e->SESAME_iron); + prepare_table_SESAME(&e->SESAME_basalt); + prepare_table_SESAME(&e->SESAME_water); + prepare_table_SESAME(&e->SS08_water); convert_units_SESAME(&e->SESAME_iron, us); + convert_units_SESAME(&e->SESAME_basalt, us); + convert_units_SESAME(&e->SESAME_water, us); + convert_units_SESAME(&e->SS08_water, us); } } diff --git a/src/equation_of_state/planetary/get_eos_tables.sh b/src/equation_of_state/planetary/get_eos_tables.sh new file mode 100755 index 0000000000000000000000000000000000000000..c0a751252bb060341a01ac70320a16251069a84e --- /dev/null +++ b/src/equation_of_state/planetary/get_eos_tables.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Download the tables of the publicly available planetary equations of state +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/EoS/planetary_HM80_HHe.txt +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/EoS/planetary_HM80_ice.txt +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/EoS/planetary_HM80_rock.txt + +mv planetary_HM80_HHe.txt ../../../examples/ +mv planetary_HM80_ice.txt ../../../examples/ +mv planetary_HM80_rock.txt ../../../examples/ diff --git a/src/equation_of_state/planetary/hm80.h b/src/equation_of_state/planetary/hm80.h index 0131bab6c447e5a8898e29e13dc3f8f6e1c897c6..38e2c9e4022387ee5ab79fafbedc6fc0dc47f49d 100644 --- a/src/equation_of_state/planetary/hm80.h +++ b/src/equation_of_state/planetary/hm80.h @@ -41,110 +41,127 @@ // Hubbard & MacFarlane (1980) parameters struct HM80_params { - float *table_P_rho_u; + float *table_log_P_rho_u; int num_rho, num_u; float log_rho_min, log_rho_max, log_rho_step, inv_log_rho_step, log_u_min, - log_u_max, log_u_step, inv_log_u_step, bulk_mod; + log_u_max, log_u_step, inv_log_u_step, bulk_mod, P_min_for_c_min; enum eos_planetary_material_id mat_id; }; -// Parameter values for each material (cgs units) +// Parameter values for each material (SI units) INLINE static void set_HM80_HHe(struct HM80_params *mat, enum eos_planetary_material_id mat_id) { mat->mat_id = mat_id; - mat->num_rho = 100; - mat->num_u = 100; - mat->log_rho_min = -9.2103404f; - mat->log_rho_max = 1.6094379f; - mat->log_rho_step = 0.1092907f; - mat->log_u_min = 9.2103404f; - mat->log_u_max = 22.3327037f; - mat->log_u_step = 0.1325491f; - mat->bulk_mod = 0; - - mat->inv_log_rho_step = 1.f / mat->log_rho_step; - mat->inv_log_u_step = 1.f / mat->log_u_step; + mat->bulk_mod = 0.f; + mat->P_min_for_c_min = 1e3f; } INLINE static void set_HM80_ice(struct HM80_params *mat, enum eos_planetary_material_id mat_id) { mat->mat_id = mat_id; - mat->num_rho = 200; - mat->num_u = 200; - mat->log_rho_min = -6.9077553f; - mat->log_rho_max = 2.7080502f; - mat->log_rho_step = 0.0483206f; - mat->log_u_min = 6.9077553f; - mat->log_u_max = 22.3327037f; - mat->log_u_step = 0.0775123f; - mat->bulk_mod = 2.0e10f; - - mat->inv_log_rho_step = 1.f / mat->log_rho_step; - mat->inv_log_u_step = 1.f / mat->log_u_step; + mat->bulk_mod = 2.0e9f; + mat->P_min_for_c_min = 0.f; } INLINE static void set_HM80_rock(struct HM80_params *mat, enum eos_planetary_material_id mat_id) { mat->mat_id = mat_id; - mat->num_rho = 100; - mat->num_u = 100; - mat->log_rho_min = -6.9077553f; - mat->log_rho_max = 2.9957323f; - mat->log_rho_step = 0.1000352f; - mat->log_u_min = 9.2103404f; - mat->log_u_max = 20.7232658f; - mat->log_u_step = 0.1162922f; - mat->bulk_mod = 3.49e11f; - - mat->inv_log_rho_step = 1.f / mat->log_rho_step; - mat->inv_log_u_step = 1.f / mat->log_u_step; + mat->bulk_mod = 3.49e10f; + mat->P_min_for_c_min = 0.f; } // Read the table from file -INLINE static void load_HM80_table(struct HM80_params *mat, char *table_file) { - // Allocate table memory - mat->table_P_rho_u = - (float *)malloc(mat->num_rho * mat->num_u * sizeof(float *)); +INLINE static void load_table_HM80(struct HM80_params *mat, char *table_file) { + + /* File contents: + header (four lines) + log_rho_min log_rho_max num_rho log_u_min log_u_max num_u (SI) + P_0_0 P_0_1 ... P_0_num_u # Array of pressures (Pa) + P_1_0 ... ... P_1_num_u + ... ... ... ... + P_num_rho_0 ... P_num_rho_num_u + T_0_0 T_0_1 ... T_0_num_u # Array of temperatures (K) + T_1_0 ... ... T_1_num_u + ... ... ... ... + T_num_rho_0 ... T_num_rho_num_u + */ // Load table contents from file FILE *f = fopen(table_file, "r"); - int c; - for (int i = 0; i < mat->num_rho; i++) { - for (int j = 0; j < mat->num_u; j++) { - c = fscanf(f, "%f", &mat->table_P_rho_u[i * mat->num_rho + j]); - if (c != 1) { - error("Failed to read EOS table"); - } + if (f == NULL) error("Failed to open the HM80 EoS file '%s'", table_file); + + // Ignore header lines + char buffer[100]; + for (int i = 0; i < 4; i++) { + if (fgets(buffer, 100, f) == NULL) + error("Failed to read the HM80 EoS file header %s", table_file); + } + + // Table properties + int c = fscanf(f, "%f %f %d %f %f %d", &mat->log_rho_min, &mat->log_rho_max, + &mat->num_rho, &mat->log_u_min, &mat->log_u_max, &mat->num_u); + if (c != 6) error("Failed to read the HM80 EoS table %s", table_file); + mat->log_rho_step = + (mat->log_rho_max - mat->log_rho_min) / (mat->num_rho - 1); + mat->log_u_step = (mat->log_u_max - mat->log_u_min) / (mat->num_u - 1); + mat->inv_log_rho_step = 1.f / mat->log_rho_step; + mat->inv_log_u_step = 1.f / mat->log_u_step; + + // Allocate table memory + mat->table_log_P_rho_u = + (float *)malloc(mat->num_rho * mat->num_u * sizeof(float)); + + // Pressures (not log yet) + for (int i_rho = 0; i_rho < mat->num_rho; i_rho++) { + for (int i_u = 0; i_u < mat->num_u; i_u++) { + c = fscanf(f, "%f", &mat->table_log_P_rho_u[i_rho * mat->num_u + i_u]); + if (c != 1) error("Failed to read the HM80 EoS table %s", table_file); } } fclose(f); } -// Convert from cgs to internal units +// Misc. modifications +INLINE static void prepare_table_HM80(struct HM80_params *mat) { + + // Convert pressures to log(pressure) + for (int i_rho = 0; i_rho < mat->num_rho; i_rho++) { + for (int i_u = 0; i_u < mat->num_u; i_u++) { + mat->table_log_P_rho_u[i_rho * mat->num_u + i_u] = + logf(mat->table_log_P_rho_u[i_rho * mat->num_u + i_u]); + } + } +} + +// Convert to internal units INLINE static void convert_units_HM80(struct HM80_params *mat, const struct unit_system *us) { - const float Mbar_to_Ba = 1e12f; // Convert Megabar to Barye - const float J_kg_to_erg_g = 1e4f; // Convert J/kg to erg/g + struct unit_system si; + units_init_si(&si); - // Table densities in cgs - mat->log_rho_min -= logf(units_cgs_conversion_factor(us, UNIT_CONV_DENSITY)); - mat->log_rho_max -= logf(units_cgs_conversion_factor(us, UNIT_CONV_DENSITY)); + // All table values in SI + mat->log_rho_min += logf(units_cgs_conversion_factor(&si, UNIT_CONV_DENSITY) / + units_cgs_conversion_factor(us, UNIT_CONV_DENSITY)); + mat->log_rho_max += logf(units_cgs_conversion_factor(&si, UNIT_CONV_DENSITY) / + units_cgs_conversion_factor(us, UNIT_CONV_DENSITY)); - // Table energies in SI mat->log_u_min += - logf(J_kg_to_erg_g / + logf(units_cgs_conversion_factor(&si, UNIT_CONV_ENERGY_PER_UNIT_MASS) / units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS)); mat->log_u_max += - logf(J_kg_to_erg_g / + logf(units_cgs_conversion_factor(&si, UNIT_CONV_ENERGY_PER_UNIT_MASS) / units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS)); - // Table Pressures in Mbar - for (int i = 0; i < mat->num_rho; i++) { - for (int j = 0; j < mat->num_u; j++) { - mat->table_P_rho_u[i * mat->num_rho + j] *= - Mbar_to_Ba / units_cgs_conversion_factor(us, UNIT_CONV_PRESSURE); + for (int i_rho = 0; i_rho < mat->num_rho; i_rho++) { + for (int i_u = 0; i_u < mat->num_u; i_u++) { + mat->table_log_P_rho_u[i_rho * mat->num_u + i_u] += + logf(units_cgs_conversion_factor(&si, UNIT_CONV_PRESSURE) / + units_cgs_conversion_factor(us, UNIT_CONV_PRESSURE)); } } - mat->bulk_mod /= units_cgs_conversion_factor(us, UNIT_CONV_PRESSURE); + mat->bulk_mod *= units_cgs_conversion_factor(&si, UNIT_CONV_PRESSURE) / + units_cgs_conversion_factor(us, UNIT_CONV_PRESSURE); + mat->P_min_for_c_min *= units_cgs_conversion_factor(&si, UNIT_CONV_PRESSURE) / + units_cgs_conversion_factor(us, UNIT_CONV_PRESSURE); } // gas_internal_energy_from_entropy @@ -153,7 +170,7 @@ INLINE static float HM80_internal_energy_from_entropy( error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_pressure_from_entropy @@ -162,7 +179,7 @@ INLINE static float HM80_pressure_from_entropy(float density, float entropy, error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_entropy_from_pressure @@ -171,7 +188,7 @@ INLINE static float HM80_entropy_from_pressure(float density, float pressure, error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_soundspeed_from_entropy @@ -180,75 +197,62 @@ INLINE static float HM80_soundspeed_from_entropy( error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_entropy_from_internal_energy INLINE static float HM80_entropy_from_internal_energy( float density, float u, const struct HM80_params *mat) { - return 0; + return 0.f; } // gas_pressure_from_internal_energy INLINE static float HM80_pressure_from_internal_energy( float density, float u, const struct HM80_params *mat) { - float P; + float log_P, log_P_1, log_P_2, log_P_3, log_P_4; if (u <= 0.f) { return 0.f; } - int rho_idx, u_idx; + int idx_rho, idx_u; float intp_rho, intp_u; const float log_rho = logf(density); const float log_u = logf(u); - // 2D interpolation (linear in log(rho), log(u)) to find P(rho, u) - rho_idx = floorf((log_rho - mat->log_rho_min) * mat->inv_log_rho_step); - u_idx = floorf((log_u - mat->log_u_min) * mat->inv_log_u_step); + // 2D interpolation (bilinear with log(rho), log(u)) to find P(rho, u) + idx_rho = floor((log_rho - mat->log_rho_min) * mat->inv_log_rho_step); + idx_u = floor((log_u - mat->log_u_min) * mat->inv_log_u_step); - intp_rho = (log_rho - mat->log_rho_min - rho_idx * mat->log_rho_step) * - mat->inv_log_rho_step; - intp_u = - (log_u - mat->log_u_min - u_idx * mat->log_u_step) * mat->inv_log_u_step; - - // Return zero pressure if below the table minimum/a - // Extrapolate the pressure for low densities - if (rho_idx < 0) { // Too-low rho - P = expf(logf((1 - intp_u) * mat->table_P_rho_u[u_idx] + - intp_u * mat->table_P_rho_u[u_idx + 1]) + - log_rho - mat->log_rho_min); - if (u_idx < 0) { // and too-low u - P = 0.f; - } - } else if (u_idx < 0) { // Too-low u - P = 0.f; - } - // Return an edge value if above the table maximum/a - else if (rho_idx >= mat->num_rho - 1) { // Too-high rho - if (u_idx >= mat->num_u - 1) { // and too-high u - P = mat->table_P_rho_u[(mat->num_rho - 1) * mat->num_u + mat->num_u - 1]; - } else { - P = mat->table_P_rho_u[(mat->num_rho - 1) * mat->num_u + u_idx]; - } - } else if (u_idx >= mat->num_u - 1) { // Too-high u - P = mat->table_P_rho_u[rho_idx * mat->num_u + mat->num_u - 1]; + // If outside the table then extrapolate from the edge and edge-but-one values + if (idx_rho <= -1) { + idx_rho = 0; + } else if (idx_rho >= mat->num_rho - 1) { + idx_rho = mat->num_rho - 2; } - // Normal interpolation within the table - else { - P = (1.f - intp_rho) * - ((1.f - intp_u) * mat->table_P_rho_u[rho_idx * mat->num_u + u_idx] + - intp_u * mat->table_P_rho_u[rho_idx * mat->num_u + u_idx + 1]) + - intp_rho * - ((1 - intp_u) * - mat->table_P_rho_u[(rho_idx + 1) * mat->num_u + u_idx] + - intp_u * - mat->table_P_rho_u[(rho_idx + 1) * mat->num_u + u_idx + 1]); + if (idx_u <= -1) { + idx_u = 0; + } else if (idx_u >= mat->num_u - 1) { + idx_u = mat->num_u - 2; } - return P; + intp_rho = (log_rho - mat->log_rho_min - idx_rho * mat->log_rho_step) * + mat->inv_log_rho_step; + intp_u = + (log_u - mat->log_u_min - idx_u * mat->log_u_step) * mat->inv_log_u_step; + + // Table values + log_P_1 = mat->table_log_P_rho_u[idx_rho * mat->num_u + idx_u]; + log_P_2 = mat->table_log_P_rho_u[idx_rho * mat->num_u + idx_u + 1]; + log_P_3 = mat->table_log_P_rho_u[(idx_rho + 1) * mat->num_u + idx_u]; + log_P_4 = mat->table_log_P_rho_u[(idx_rho + 1) * mat->num_u + idx_u + 1]; + + log_P = (1.f - intp_rho) * ((1.f - intp_u) * log_P_1 + intp_u * log_P_2) + + intp_rho * ((1.f - intp_u) * log_P_3 + intp_u * log_P_4); + + return expf(log_P); } // gas_internal_energy_from_pressure @@ -257,7 +261,7 @@ INLINE static float HM80_internal_energy_from_pressure( error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_soundspeed_from_internal_energy @@ -274,6 +278,10 @@ INLINE static float HM80_soundspeed_from_internal_energy( else { P = HM80_pressure_from_internal_energy(density, u, mat); c = sqrtf(hydro_gamma * P / density); + + if (c <= 0) { + c = sqrtf(hydro_gamma * mat->P_min_for_c_min / density); + } } return c; @@ -283,18 +291,9 @@ INLINE static float HM80_soundspeed_from_internal_energy( INLINE static float HM80_soundspeed_from_pressure( float density, float P, const struct HM80_params *mat) { - float c; - - // Bulk modulus - if (mat->bulk_mod != 0) { - c = sqrtf(mat->bulk_mod / density); - } - // Ideal gas - else { - c = sqrtf(hydro_gamma * P / density); - } + error("This EOS function is not yet implemented!"); - return c; + return 0.f; } #endif /* SWIFT_HUBBARD_MACFARLANE_EQUATION_OF_STATE_H */ diff --git a/src/equation_of_state/planetary/sesame.h b/src/equation_of_state/planetary/sesame.h index 76574c2ad00282a82649705cd8a2b5a1b428d867..11c16964602b28c0d1a080b6c262ff20c1f5b9cb 100644 --- a/src/equation_of_state/planetary/sesame.h +++ b/src/equation_of_state/planetary/sesame.h @@ -40,21 +40,217 @@ #include "inline.h" #include "physical_constants.h" #include "units.h" +#include "utilities.h" // SESAME parameters struct SESAME_params { + float *table_log_rho; + float *table_log_u_rho_T; + float *table_P_rho_T; + float *table_c_rho_T; + float *table_s_rho_T; + int num_rho, num_T; + float P_tiny, c_tiny; enum eos_planetary_material_id mat_id; }; // Parameter values for each material (cgs units) INLINE static void set_SESAME_iron(struct SESAME_params *mat, enum eos_planetary_material_id mat_id) { + // SESAME 2140 mat->mat_id = mat_id; } +INLINE static void set_SESAME_basalt(struct SESAME_params *mat, + enum eos_planetary_material_id mat_id) { + // SESAME 7530 + mat->mat_id = mat_id; +} +INLINE static void set_SESAME_water(struct SESAME_params *mat, + enum eos_planetary_material_id mat_id) { + // SESAME 7154 + mat->mat_id = mat_id; +} +INLINE static void set_SS08_water(struct SESAME_params *mat, + enum eos_planetary_material_id mat_id) { + // Senft & Stewart (2008) + mat->mat_id = mat_id; +} + +// Read the tables from file +INLINE static void load_table_SESAME(struct SESAME_params *mat, + char *table_file) { + + // Load table contents from file + FILE *f = fopen(table_file, "r"); + if (f == NULL) error("Failed to open the SESAME EoS file '%s'", table_file); + + // Ignore header lines + char buffer[100]; + for (int i = 0; i < 5; i++) { + if (fgets(buffer, 100, f) == NULL) + error("Failed to read the SESAME EoS file header %s", table_file); + } + float ignore; + + // Table properties + int c = fscanf(f, "%d %d", &mat->num_rho, &mat->num_T); + if (c != 2) error("Failed to read the SESAME EoS table %s", table_file); + + // Ignore the first elements of rho = 0, T = 0 + mat->num_rho--; + mat->num_T--; + + // Allocate table memory + mat->table_log_rho = (float *)malloc(mat->num_rho * sizeof(float)); + mat->table_log_u_rho_T = + (float *)malloc(mat->num_rho * mat->num_T * sizeof(float)); + mat->table_P_rho_T = + (float *)malloc(mat->num_rho * mat->num_T * sizeof(float)); + mat->table_c_rho_T = + (float *)malloc(mat->num_rho * mat->num_T * sizeof(float)); + mat->table_s_rho_T = + (float *)malloc(mat->num_rho * mat->num_T * sizeof(float)); + + // Densities (not log yet) + for (int i_rho = -1; i_rho < mat->num_rho; i_rho++) { + // Ignore the first elements of rho = 0, T = 0 + if (i_rho == -1) { + c = fscanf(f, "%f", &ignore); + if (c != 1) error("Failed to read the SESAME EoS table %s", table_file); + } else { + c = fscanf(f, "%f", &mat->table_log_rho[i_rho]); + if (c != 1) error("Failed to read the SESAME EoS table %s", table_file); + } + } + + // Temperatures (ignored) + for (int i_T = -1; i_T < mat->num_T; i_T++) { + c = fscanf(f, "%f", &ignore); + if (c != 1) error("Failed to read the SESAME EoS table %s", table_file); + } + + // Sp. int. energies (not log yet), pressures, sound speeds, and entropies + for (int i_T = -1; i_T < mat->num_T; i_T++) { + for (int i_rho = -1; i_rho < mat->num_rho; i_rho++) { + // Ignore the first elements of rho = 0, T = 0 + if ((i_T == -1) || (i_rho == -1)) { + c = fscanf(f, "%f %f %f %f", &ignore, &ignore, &ignore, &ignore); + if (c != 4) error("Failed to read the SESAME EoS table %s", table_file); + } else { + c = fscanf(f, "%f %f %f %f", + &mat->table_log_u_rho_T[i_rho * mat->num_T + i_T], + &mat->table_P_rho_T[i_rho * mat->num_T + i_T], + &mat->table_c_rho_T[i_rho * mat->num_T + i_T], + &mat->table_s_rho_T[i_rho * mat->num_T + i_T]); + if (c != 4) error("Failed to read the SESAME EoS table %s", table_file); + } + } + } + + fclose(f); +} -// Convert from cgs to internal units +// Misc. modifications +INLINE static void prepare_table_SESAME(struct SESAME_params *mat) { + + // Convert densities to log(density) + for (int i_rho = 0; i_rho < mat->num_rho; i_rho++) { + mat->table_log_rho[i_rho] = logf(mat->table_log_rho[i_rho]); + } + + // Convert sp. int. energies to log(sp. int. energy) + for (int i_rho = 0; i_rho < mat->num_rho; i_rho++) { + for (int i_T = 0; i_T < mat->num_T; i_T++) { + // If not positive then set very small for the log + if (mat->table_log_u_rho_T[i_rho * mat->num_T + i_T] <= 0) { + mat->table_log_u_rho_T[i_rho * mat->num_T + i_T] = 1.f; + } + + mat->table_log_u_rho_T[i_rho * mat->num_T + i_T] = + logf(mat->table_log_u_rho_T[i_rho * mat->num_T + i_T]); + } + } + + // Tiny pressure and soundspeed, initialise in the middle + mat->P_tiny = + mat->table_P_rho_T[mat->num_rho / 2 * mat->num_T + mat->num_T / 2]; + mat->c_tiny = + mat->table_c_rho_T[mat->num_rho / 2 * mat->num_T + mat->num_T / 2]; + + // Enforce that the 1D arrays of u (at each rho) are monotonic + // This is necessary because, for some high-density u slices at very low T, + // u decreases (very slightly) with T, which makes the interpolation fail + for (int i_rho = 0; i_rho < mat->num_rho; i_rho++) { + for (int i_T = mat->num_T - 1; i_T > 0; i_T--) { + + // If the one-lower-T u is greater than this u + if (mat->table_log_u_rho_T[i_rho * mat->num_T + i_T] < + mat->table_log_u_rho_T[i_rho * mat->num_T + i_T - 1]) { + + // Replace it and all elements below it with that value + for (int j_u = 0; j_u < i_T; j_u++) { + mat->table_log_u_rho_T[i_rho * mat->num_T + j_u] = + mat->table_log_u_rho_T[i_rho * mat->num_T + i_T]; + } + break; + } + + // Smallest positive pressure and sound speed + if ((mat->table_P_rho_T[i_rho * mat->num_T + i_T] < mat->P_tiny) && + (mat->table_P_rho_T[i_rho * mat->num_T + i_T] > 0)) { + mat->P_tiny = mat->table_P_rho_T[i_rho * mat->num_T + i_T]; + } + if ((mat->table_c_rho_T[i_rho * mat->num_T + i_T] < mat->c_tiny) && + (mat->table_c_rho_T[i_rho * mat->num_T + i_T] > 0)) { + mat->c_tiny = mat->table_c_rho_T[i_rho * mat->num_T + i_T]; + } + } + } + + // Tiny pressure to allow interpolation near non-positive values + mat->P_tiny *= 1e-3f; + mat->c_tiny *= 1e-3f; +} + +// Convert to internal units INLINE static void convert_units_SESAME(struct SESAME_params *mat, - const struct unit_system *us) {} + const struct unit_system *us) { + + struct unit_system si; + units_init_si(&si); + + // All table values in SI + // Densities (log) + for (int i_rho = 0; i_rho < mat->num_rho; i_rho++) { + mat->table_log_rho[i_rho] += + logf(units_cgs_conversion_factor(&si, UNIT_CONV_DENSITY) / + units_cgs_conversion_factor(us, UNIT_CONV_DENSITY)); + } + + // Sp. Int. Energies (log), pressures, and sound speeds + for (int i_rho = 0; i_rho < mat->num_rho; i_rho++) { + for (int i_T = 0; i_T < mat->num_T; i_T++) { + mat->table_log_u_rho_T[i_rho * mat->num_T + i_T] += logf( + units_cgs_conversion_factor(&si, UNIT_CONV_ENERGY_PER_UNIT_MASS) / + units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS)); + mat->table_P_rho_T[i_rho * mat->num_T + i_T] *= + units_cgs_conversion_factor(&si, UNIT_CONV_PRESSURE) / + units_cgs_conversion_factor(us, UNIT_CONV_PRESSURE); + mat->table_c_rho_T[i_rho * mat->num_T + i_T] *= + units_cgs_conversion_factor(&si, UNIT_CONV_SPEED) / + units_cgs_conversion_factor(us, UNIT_CONV_SPEED); + mat->table_s_rho_T[i_rho * mat->num_T + i_T] *= + units_cgs_conversion_factor(&si, UNIT_CONV_ENERGY_PER_UNIT_MASS) / + units_cgs_conversion_factor(us, UNIT_CONV_ENTROPY); + } + } + + // Tiny pressure and sound speed + mat->P_tiny *= units_cgs_conversion_factor(&si, UNIT_CONV_PRESSURE) / + units_cgs_conversion_factor(us, UNIT_CONV_PRESSURE); + mat->c_tiny *= units_cgs_conversion_factor(&si, UNIT_CONV_SPEED) / + units_cgs_conversion_factor(us, UNIT_CONV_SPEED); +} // gas_internal_energy_from_entropy INLINE static float SESAME_internal_energy_from_entropy( @@ -62,7 +258,7 @@ INLINE static float SESAME_internal_energy_from_entropy( error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_pressure_from_entropy @@ -71,7 +267,7 @@ INLINE static float SESAME_pressure_from_entropy( error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_entropy_from_pressure @@ -80,7 +276,7 @@ INLINE static float SESAME_entropy_from_pressure( error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_soundspeed_from_entropy @@ -89,25 +285,109 @@ INLINE static float SESAME_soundspeed_from_entropy( error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_entropy_from_internal_energy INLINE static float SESAME_entropy_from_internal_energy( float density, float u, const struct SESAME_params *mat) { - error("This EOS function is not yet implemented!"); - - return 0; + return 0.f; } // gas_pressure_from_internal_energy INLINE static float SESAME_pressure_from_internal_energy( float density, float u, const struct SESAME_params *mat) { - error("This EOS function is not yet implemented!"); - - return 0; + float P, P_1, P_2, P_3, P_4; + + if (u <= 0.f) { + return 0.f; + } + + int idx_rho, idx_u_1, idx_u_2; + float intp_rho, intp_u_1, intp_u_2; + const float log_rho = logf(density); + const float log_u = logf(u); + + // 2D interpolation (bilinear with log(rho), log(u)) to find P(rho, u) + // Density index + idx_rho = + find_value_in_monot_incr_array(log_rho, mat->table_log_rho, mat->num_rho); + + // Sp. int. energy at this and the next density (in relevant slice of u array) + idx_u_1 = find_value_in_monot_incr_array( + log_u, mat->table_log_u_rho_T + idx_rho * mat->num_T, mat->num_T); + idx_u_2 = find_value_in_monot_incr_array( + log_u, mat->table_log_u_rho_T + (idx_rho + 1) * mat->num_T, mat->num_T); + + // If outside the table then extrapolate from the edge and edge-but-one values + if (idx_rho <= -1) { + idx_rho = 0; + } else if (idx_rho >= mat->num_rho) { + idx_rho = mat->num_rho - 2; + } + if (idx_u_1 <= -1) { + idx_u_1 = 0; + } else if (idx_u_1 >= mat->num_T) { + idx_u_1 = mat->num_T - 2; + } + if (idx_u_2 <= -1) { + idx_u_2 = 0; + } else if (idx_u_2 >= mat->num_T) { + idx_u_2 = mat->num_T - 2; + } + + intp_rho = (log_rho - mat->table_log_rho[idx_rho]) / + (mat->table_log_rho[idx_rho + 1] - mat->table_log_rho[idx_rho]); + intp_u_1 = (log_u - mat->table_log_u_rho_T[idx_rho * mat->num_T + idx_u_1]) / + (mat->table_log_u_rho_T[idx_rho * mat->num_T + (idx_u_1 + 1)] - + mat->table_log_u_rho_T[idx_rho * mat->num_T + idx_u_1]); + intp_u_2 = + (log_u - mat->table_log_u_rho_T[(idx_rho + 1) * mat->num_T + idx_u_2]) / + (mat->table_log_u_rho_T[(idx_rho + 1) * mat->num_T + (idx_u_2 + 1)] - + mat->table_log_u_rho_T[(idx_rho + 1) * mat->num_T + idx_u_2]); + + // Table values + P_1 = mat->table_P_rho_T[idx_rho * mat->num_T + idx_u_1]; + P_2 = mat->table_P_rho_T[idx_rho * mat->num_T + idx_u_1 + 1]; + P_3 = mat->table_P_rho_T[(idx_rho + 1) * mat->num_T + idx_u_2]; + P_4 = mat->table_P_rho_T[(idx_rho + 1) * mat->num_T + idx_u_2 + 1]; + + // If more than two table values are non-positive then return zero + int num_non_pos = 0; + if (P_1 <= 0.f) num_non_pos++; + if (P_2 <= 0.f) num_non_pos++; + if (P_3 <= 0.f) num_non_pos++; + if (P_4 <= 0.f) num_non_pos++; + if (num_non_pos > 2) { + return 0.f; + } + // If just one or two are non-positive then replace them with a tiny value + else if (num_non_pos > 0) { + // Unless already trying to extrapolate in which case return zero + if ((intp_rho < 0.f) || (intp_u_1 < 0.f) || (intp_u_2 < 0.f)) { + return 0.f; + } + if (P_1 <= 0.f) P_1 = mat->P_tiny; + if (P_2 <= 0.f) P_2 = mat->P_tiny; + if (P_3 <= 0.f) P_3 = mat->P_tiny; + if (P_4 <= 0.f) P_4 = mat->P_tiny; + } + + // Interpolate with the log values + P_1 = logf(P_1); + P_2 = logf(P_2); + P_3 = logf(P_3); + P_4 = logf(P_4); + + P = (1.f - intp_rho) * ((1.f - intp_u_1) * P_1 + intp_u_1 * P_2) + + intp_rho * ((1.f - intp_u_2) * P_3 + intp_u_2 * P_4); + + // Convert back from log + P = expf(P); + + return P; } // gas_internal_energy_from_pressure @@ -116,16 +396,102 @@ INLINE static float SESAME_internal_energy_from_pressure( error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_soundspeed_from_internal_energy INLINE static float SESAME_soundspeed_from_internal_energy( float density, float u, const struct SESAME_params *mat) { - error("This EOS function is not yet implemented!"); - - return 0; + float c, c_1, c_2, c_3, c_4; + + if (u <= 0.f) { + return 0.f; + } + + int idx_rho, idx_u_1, idx_u_2; + float intp_rho, intp_u_1, intp_u_2; + const float log_rho = logf(density); + const float log_u = logf(u); + + // 2D interpolation (bilinear with log(rho), log(u)) to find c(rho, u) + // Density index + idx_rho = + find_value_in_monot_incr_array(log_rho, mat->table_log_rho, mat->num_rho); + + // Sp. int. energy at this and the next density (in relevant slice of u array) + idx_u_1 = find_value_in_monot_incr_array( + log_u, mat->table_log_u_rho_T + idx_rho * mat->num_T, mat->num_T); + idx_u_2 = find_value_in_monot_incr_array( + log_u, mat->table_log_u_rho_T + (idx_rho + 1) * mat->num_T, mat->num_T); + + // If outside the table then extrapolate from the edge and edge-but-one values + if (idx_rho <= -1) { + idx_rho = 0; + } else if (idx_rho >= mat->num_rho) { + idx_rho = mat->num_rho - 2; + } + if (idx_u_1 <= -1) { + idx_u_1 = 0; + } else if (idx_u_1 >= mat->num_T) { + idx_u_1 = mat->num_T - 2; + } + if (idx_u_2 <= -1) { + idx_u_2 = 0; + } else if (idx_u_2 >= mat->num_T) { + idx_u_2 = mat->num_T - 2; + } + + intp_rho = (log_rho - mat->table_log_rho[idx_rho]) / + (mat->table_log_rho[idx_rho + 1] - mat->table_log_rho[idx_rho]); + intp_u_1 = (log_u - mat->table_log_u_rho_T[idx_rho * mat->num_T + idx_u_1]) / + (mat->table_log_u_rho_T[idx_rho * mat->num_T + (idx_u_1 + 1)] - + mat->table_log_u_rho_T[idx_rho * mat->num_T + idx_u_1]); + intp_u_2 = + (log_u - mat->table_log_u_rho_T[(idx_rho + 1) * mat->num_T + idx_u_2]) / + (mat->table_log_u_rho_T[(idx_rho + 1) * mat->num_T + (idx_u_2 + 1)] - + mat->table_log_u_rho_T[(idx_rho + 1) * mat->num_T + idx_u_2]); + + // Table values + c_1 = mat->table_c_rho_T[idx_rho * mat->num_T + idx_u_1]; + c_2 = mat->table_c_rho_T[idx_rho * mat->num_T + idx_u_1 + 1]; + c_3 = mat->table_c_rho_T[(idx_rho + 1) * mat->num_T + idx_u_2]; + c_4 = mat->table_c_rho_T[(idx_rho + 1) * mat->num_T + idx_u_2 + 1]; + + // If more than two table values are non-positive then return zero + int num_non_pos = 0; + if (c_1 <= 0.f) num_non_pos++; + if (c_2 <= 0.f) num_non_pos++; + if (c_3 <= 0.f) num_non_pos++; + if (c_4 <= 0.f) num_non_pos++; + if (num_non_pos > 2) { + return mat->c_tiny; + } + // If just one or two are non-positive then replace them with a tiny value + else if (num_non_pos > 0) { + // Unless already trying to extrapolate in which case return zero + if ((intp_rho < 0.f) || (intp_u_1 < 0.f) || (intp_u_2 < 0.f)) { + return mat->c_tiny; + } + if (c_1 <= 0.f) c_1 = mat->c_tiny; + if (c_2 <= 0.f) c_2 = mat->c_tiny; + if (c_3 <= 0.f) c_3 = mat->c_tiny; + if (c_4 <= 0.f) c_4 = mat->c_tiny; + } + + // Interpolate with the log values + c_1 = logf(c_1); + c_2 = logf(c_2); + c_3 = logf(c_3); + c_4 = logf(c_4); + + c = (1.f - intp_rho) * ((1.f - intp_u_1) * c_1 + intp_u_1 * c_2) + + intp_rho * ((1.f - intp_u_2) * c_3 + intp_u_2 * c_4); + + // Convert back from log + c = expf(c); + + return c; } // gas_soundspeed_from_pressure @@ -134,7 +500,7 @@ INLINE static float SESAME_soundspeed_from_pressure( error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } #endif /* SWIFT_SESAME_EQUATION_OF_STATE_H */ diff --git a/src/equation_of_state/planetary/tillotson.h b/src/equation_of_state/planetary/tillotson.h index d5b6d5c35d5edf9e114fe7f010c4f5b1e2327a83..1a4210699380b3b0398506dde7fce6ca8055e4dc 100644 --- a/src/equation_of_state/planetary/tillotson.h +++ b/src/equation_of_state/planetary/tillotson.h @@ -41,22 +41,22 @@ // Tillotson parameters struct Til_params { - float rho_0, a, b, A, B, E_0, E_iv, E_cv, alpha, beta, eta_min, P_min; + float rho_0, a, b, A, B, u_0, u_iv, u_cv, alpha, beta, eta_min, P_min; enum eos_planetary_material_id mat_id; }; -// Parameter values for each material (cgs units) +// Parameter values for each material (SI units) INLINE static void set_Til_iron(struct Til_params *mat, enum eos_planetary_material_id mat_id) { mat->mat_id = mat_id; - mat->rho_0 = 7.800f; + mat->rho_0 = 7800.0f; mat->a = 0.5f; mat->b = 1.5f; - mat->A = 1.28e12f; - mat->B = 1.05e12f; - mat->E_0 = 9.5e10f; - mat->E_iv = 2.4e10f; - mat->E_cv = 8.67e10f; + mat->A = 1.28e11f; + mat->B = 1.05e11f; + mat->u_0 = 9.5e9f; + mat->u_iv = 2.4e9f; + mat->u_cv = 8.67e9f; mat->alpha = 5.0f; mat->beta = 5.0f; mat->eta_min = 0.0f; @@ -65,14 +65,14 @@ INLINE static void set_Til_iron(struct Til_params *mat, INLINE static void set_Til_granite(struct Til_params *mat, enum eos_planetary_material_id mat_id) { mat->mat_id = mat_id; - mat->rho_0 = 2.680f; + mat->rho_0 = 2680.0f; mat->a = 0.5f; mat->b = 1.3f; - mat->A = 1.8e11f; - mat->B = 1.8e11f; - mat->E_0 = 1.6e11f; - mat->E_iv = 3.5e10f; - mat->E_cv = 1.8e11f; + mat->A = 1.8e10f; + mat->B = 1.8e10f; + mat->u_0 = 1.6e10f; + mat->u_iv = 3.5e9f; + mat->u_cv = 1.8e10f; mat->alpha = 5.0f; mat->beta = 5.0f; mat->eta_min = 0.0f; @@ -81,30 +81,43 @@ INLINE static void set_Til_granite(struct Til_params *mat, INLINE static void set_Til_water(struct Til_params *mat, enum eos_planetary_material_id mat_id) { mat->mat_id = mat_id; - mat->rho_0 = 0.998f; + mat->rho_0 = 998.0f; mat->a = 0.7f; mat->b = 0.15f; - mat->A = 2.18e10f; - mat->B = 1.325e11f; - mat->E_0 = 7.0e10f; - mat->E_iv = 4.19e9f; - mat->E_cv = 2.69e10f; + mat->A = 2.18e9f; + mat->B = 1.325e10f; + mat->u_0 = 7.0e9f; + mat->u_iv = 4.19e8f; + mat->u_cv = 2.69e9f; mat->alpha = 10.0f; mat->beta = 5.0f; - mat->eta_min = 0.915f; + mat->eta_min = 0.9f; mat->P_min = 0.0f; } -// Convert from cgs to internal units +// Convert to internal units INLINE static void convert_units_Til(struct Til_params *mat, const struct unit_system *us) { + struct unit_system si; + units_init_si(&si); + + // SI to cgs + mat->rho_0 *= units_cgs_conversion_factor(&si, UNIT_CONV_DENSITY); + mat->A *= units_cgs_conversion_factor(&si, UNIT_CONV_PRESSURE); + mat->B *= units_cgs_conversion_factor(&si, UNIT_CONV_PRESSURE); + mat->u_0 *= units_cgs_conversion_factor(&si, UNIT_CONV_ENERGY_PER_UNIT_MASS); + mat->u_iv *= units_cgs_conversion_factor(&si, UNIT_CONV_ENERGY_PER_UNIT_MASS); + mat->u_cv *= units_cgs_conversion_factor(&si, UNIT_CONV_ENERGY_PER_UNIT_MASS); + mat->P_min *= units_cgs_conversion_factor(&si, UNIT_CONV_PRESSURE); + + // cgs to internal mat->rho_0 /= units_cgs_conversion_factor(us, UNIT_CONV_DENSITY); mat->A /= units_cgs_conversion_factor(us, UNIT_CONV_PRESSURE); mat->B /= units_cgs_conversion_factor(us, UNIT_CONV_PRESSURE); - mat->E_0 /= units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS); - mat->E_iv /= units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS); - mat->E_cv /= units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS); + mat->u_0 /= units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS); + mat->u_iv /= units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS); + mat->u_cv /= units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS); mat->P_min /= units_cgs_conversion_factor(us, UNIT_CONV_PRESSURE); } @@ -114,7 +127,7 @@ INLINE static float Til_internal_energy_from_entropy( error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_pressure_from_entropy @@ -123,7 +136,7 @@ INLINE static float Til_pressure_from_entropy(float density, float entropy, error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_entropy_from_pressure @@ -132,7 +145,7 @@ INLINE static float Til_entropy_from_pressure(float density, float pressure, error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_soundspeed_from_entropy @@ -141,14 +154,14 @@ INLINE static float Til_soundspeed_from_entropy(float density, float entropy, error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_entropy_from_internal_energy INLINE static float Til_entropy_from_internal_energy( float density, float u, const struct Til_params *mat) { - return 0; + return 0.f; } // gas_pressure_from_internal_energy @@ -156,35 +169,37 @@ INLINE static float Til_pressure_from_internal_energy( float density, float u, const struct Til_params *mat) { const float eta = density / mat->rho_0; + const float eta_sq = eta * eta; const float mu = eta - 1.f; const float nu = 1.f / eta - 1.f; + const float w = u / (mat->u_0 * eta_sq) + 1.f; + const float w_inv = 1.f / w; float P_c, P_e, P; // Condensed or cold if (eta < mat->eta_min) { P_c = 0.f; } else { - P_c = (mat->a + mat->b / (u / (mat->E_0 * eta * eta) + 1.f)) * density * u + - mat->A * mu + mat->B * mu * mu; + P_c = (mat->a + mat->b * w_inv) * density * u + mat->A * mu + + mat->B * mu * mu; } // Expanded and hot P_e = mat->a * density * u + - (mat->b * density * u / (u / (mat->E_0 * eta * eta) + 1.f) + - mat->A * mu * expf(-mat->beta * nu)) * + (mat->b * density * u * w_inv + mat->A * mu * expf(-mat->beta * nu)) * expf(-mat->alpha * nu * nu); // Condensed or cold state - if ((1.f < eta) || (u < mat->E_iv)) { + if ((1.f < eta) || (u < mat->u_iv)) { P = P_c; } // Expanded and hot state - else if ((eta < 1.f) && (mat->E_cv < u)) { + else if ((eta < 1.f) && (mat->u_cv < u)) { P = P_e; } // Hybrid state else { - P = ((u - mat->E_iv) * P_e + (mat->E_cv - u) * P_c) / - (mat->E_cv - mat->E_iv); + P = ((u - mat->u_iv) * P_e + (mat->u_cv - u) * P_c) / + (mat->u_cv - mat->u_iv); } // Minimum pressure @@ -201,81 +216,78 @@ INLINE static float Til_internal_energy_from_pressure( error("This EOS function is not yet implemented!"); - return 0; + return 0.f; } // gas_soundspeed_from_internal_energy INLINE static float Til_soundspeed_from_internal_energy( float density, float u, const struct Til_params *mat) { - // const float eta = density / mat->rho_0; - // const float mu = eta - 1.f; - // const float nu = 1.f/eta - 1.f; - // float P_c, P_e, P, c_c, c_e, c; - // - // // Condensed or cold - // if (eta < mat->eta_min) { - // P_c = 0.f; - // } - // else { - // P_c = (mat->a + mat->b / (u / (mat->E_0 * eta*eta) + 1.f)) * density - // * u - // + mat->A * mu + mat->B * mu*mu; - // } - // c_c = mat->a*u + mat->b*u / ((u / (mat->E_0*eta*eta)+1.f) * - // (u / (mat->E_0*eta*eta)+1.f)) * - // (3.f*(u / (mat->E_0*eta*eta)+1.f) - 2.f) + - // (mat->A + 2.f*mat->B*mu) / mat->rho_0 + P_c / (rho*rho) * - // (mat->a*rho + mat->b*rho / ((u / (mat->E_0*eta*eta)+1.f) * - // (u / (mat->E_0*eta*eta)+1.f))); - // - // c_c = max(c_c, mat->A / mat->rho_0); - // - // // Expanded and hot - // P_e = mat->a*density*u + ( - // mat->b * density * u / (u / (mat->E_0 * eta*eta) + 1.f) - // + mat->A*mu * expf(-mat->beta * nu) - // ) * expf(-mat->alpha * nu*nu); - // - // c_e = (mat->a + mat->b / (u / (mat->E_0*eta*eta)+1.f) * - // expf(-mat->beta*((1.f - eta)/eta)*((1.f - eta)/eta)) - // + 1.f)*P_e/rho + mat->A/mat->rho_0 - // *expf(-(mat->alpha*((1.f - eta)/eta)+mat->beta * - // ((1.f - eta)/eta)*((1.f - eta)/eta)))*(1.f+mu/(eta*eta) - // *(mat->alpha+2.f*mat->beta*((1.f - eta)/eta)-eta)) + - // mat->b*rho*u/((u / (mat->E_0*eta*eta)+1.f)* - // (u / (mat->E_0*eta*eta)+1.f)*eta*eta)* - // expf(-mat->beta*((1.f - eta)/eta)*((1.f - eta)/eta))* - // (2.f*mat->beta*((1.f - eta)/eta)*(u / (mat->E_0*eta*eta)+1.f) / - // mat->rho_0 + 1.f/(mat->E_0*rho)*(2.f*u-P_e/rho)); - // - // // Condensed or cold state - // if ((1.f < eta) || (u < mat->E_iv)) { - // c = c_c; - // } - // // Expanded and hot state - // else if ((eta < 1.f) && (mat->E_cv < u)) { - // c = c_e; - // } - // // Hybrid state - // else { - // c = ((u - mat->E_iv)*c_e + (mat->E_cv - u)*c_c) / - // (mat->E_cv - mat->E_iv); - // - // c = max(c_c, mat->A / mat->rho0); - // } - float c = sqrtf(mat->A / mat->rho_0); - - return c; + const float rho_0_inv = 1.f / mat->rho_0; + const float eta = density * rho_0_inv; + const float rho_inv = 1.f / density; + const float eta_sq = eta * eta; + const float mu = eta - 1.f; + const float nu = 1.f / eta - 1.f; + const float w = u / (mat->u_0 * eta_sq) + 1.f; + const float w_inv = 1.f / w; + const float w_inv_sq = w_inv * w_inv; + const float exp_beta = expf(-mat->beta * nu); + const float exp_alpha = expf(-mat->alpha * nu * nu); + float P_c, P_e, c_sq_c, c_sq_e, c_sq; + + // Condensed or cold + if (eta < mat->eta_min) { + P_c = 0.f; + } else { + P_c = (mat->a + mat->b * w_inv) * density * u + mat->A * mu + + mat->B * mu * mu; + } + c_sq_c = P_c * rho_inv * (1.f - mat->a - mat->b * w_inv) + + mat->b * (w - 1.f) * w_inv_sq * (2 * u + P_c * rho_inv) + + rho_inv * (mat->A + mat->B * (eta_sq - 1.f)); + + c_sq_c = fmax(c_sq_c, mat->A * rho_0_inv); + + // Expanded and hot + P_e = mat->a * density * u + + (mat->b * density * u * w_inv + mat->A * mu * exp_beta) * exp_alpha; + + c_sq_e = P_e * rho_inv * (1.f - mat->a) + + (mat->b * density * u / (w * w * eta_sq) * + (rho_inv / mat->u_0 * (2 * u - P_e * rho_inv * eta_sq) + + 2.f * mat->alpha * nu * rho_0_inv) + + mat->A * rho_0_inv * + (1 + mu / eta_sq * (mat->beta + 2.f * mat->alpha * nu - eta)) * + exp_beta) * + exp_alpha; + + // Condensed or cold state + if ((1.f < eta) || (u < mat->u_iv)) { + c_sq = c_sq_c; + } + // Expanded and hot state + else if ((eta < 1.f) && (mat->u_cv < u)) { + c_sq = c_sq_e; + } + // Hybrid state + else { + c_sq = ((u - mat->u_iv) * c_sq_e + (mat->u_cv - u) * c_sq_c) / + (mat->u_cv - mat->u_iv); + + c_sq = fmax(c_sq_c, mat->A * rho_0_inv); + } + + return sqrtf(c_sq); } // gas_soundspeed_from_pressure INLINE static float Til_soundspeed_from_pressure(float density, float P, const struct Til_params *mat) { - float c = sqrtf(mat->A / mat->rho_0); + error("This EOS function is not yet implemented!"); - return c; + return 0.f; } #endif /* SWIFT_TILLOTSON_EQUATION_OF_STATE_H */ diff --git a/src/gravity.c b/src/gravity.c index 1f88490b57d944fc69e7b2e07dcad39294dba732..53ab6b816f964e4e2b071df1e3192d972c5567de 100644 --- a/src/gravity.c +++ b/src/gravity.c @@ -487,7 +487,7 @@ void gravity_exact_force_compute_mapper(void *map_data, int nr_gparts, long long id = 0; if (gpi->type == swift_type_gas) id = parts[-gpi->id_or_neg_offset].id; - else if (gpi->type == swift_type_star) + else if (gpi->type == swift_type_stars) id = sparts[-gpi->id_or_neg_offset].id; else if (gpi->type == swift_type_black_hole) error("Unexisting type"); @@ -676,7 +676,7 @@ void gravity_exact_force_check(struct space *s, const struct engine *e, long long id = 0; if (gpi->type == swift_type_gas) id = parts[-gpi->id_or_neg_offset].id; - else if (gpi->type == swift_type_star) + else if (gpi->type == swift_type_stars) id = sparts[-gpi->id_or_neg_offset].id; else if (gpi->type == swift_type_black_hole) error("Unexisting type"); @@ -730,7 +730,7 @@ void gravity_exact_force_check(struct space *s, const struct engine *e, long long id = 0; if (gpi->type == swift_type_gas) id = parts[-gpi->id_or_neg_offset].id; - else if (gpi->type == swift_type_star) + else if (gpi->type == swift_type_stars) id = sparts[-gpi->id_or_neg_offset].id; else if (gpi->type == swift_type_black_hole) error("Unexisting type"); diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h index 2713c9ee7affca4f06b369d038916f76b8c2ee48..d446844e8ffc862fd3be0688302ebb3a2efab8fa 100644 --- a/src/gravity/Default/gravity.h +++ b/src/gravity/Default/gravity.h @@ -22,6 +22,7 @@ #include <float.h> +/* Local includes. */ #include "cosmology.h" #include "gravity_properties.h" #include "kernel_gravity.h" @@ -155,6 +156,7 @@ __attribute__((always_inline)) INLINE static void gravity_init_gpart( #ifdef SWIFT_DEBUG_CHECKS gp->num_interacted = 0; + gp->initialised = 1; #endif } @@ -187,6 +189,10 @@ __attribute__((always_inline)) INLINE static void gravity_end_force( gp->a_grav_PM[1] *= const_G; gp->a_grav_PM[2] *= const_G; #endif + +#ifdef SWIFT_DEBUG_CHECKS + gp->initialised = 0; /* Ready for next step */ +#endif } /** diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h index 71e5007a49bda25a8b65d4a5d3733d0027aa2682..6fce3ddd512018e9ea4be21111c75904c77cb925 100644 --- a/src/gravity/Default/gravity_iact.h +++ b/src/gravity/Default/gravity_iact.h @@ -166,7 +166,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pm_full( /* Compute the derivatives of the potential */ struct potential_derivatives_M2P d; - compute_potential_derivatives_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 0, 0.f, + potential_derivatives_compute_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 0, 0.f, &d); /* 0th order contributions */ @@ -271,7 +271,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pm_truncated( /* Compute the derivatives of the potential */ struct potential_derivatives_M2P d; - compute_potential_derivatives_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 1, + potential_derivatives_compute_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 1, r_s_inv, &d); /* 0th order contributions */ diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index bd73c56da82877415f5abc9edf41ede1c551f16f..f065e6d3a2994ff1e522fc3ae9a38fcf591d92af 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -55,6 +55,9 @@ struct gpart { /* Time of the last kick */ integertime_t ti_kick; + /* Has this particle been initialised? */ + int initialised; + #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS diff --git a/src/gravity/Potential/gravity.h b/src/gravity/Potential/gravity.h index 3a6c0fba18856b57911d49bcee6915f5003e2e68..10628dcea91786b0c7483134b8f7f844d6359e49 100644 --- a/src/gravity/Potential/gravity.h +++ b/src/gravity/Potential/gravity.h @@ -151,6 +151,7 @@ __attribute__((always_inline)) INLINE static void gravity_init_gpart( #ifdef SWIFT_DEBUG_CHECKS gp->num_interacted = 0; + gp->initialised = 1; #endif } @@ -183,6 +184,10 @@ __attribute__((always_inline)) INLINE static void gravity_end_force( gp->a_grav_PM[1] *= const_G; gp->a_grav_PM[2] *= const_G; #endif + +#ifdef SWIFT_DEBUG_CHECKS + gp->initialised = 0; /* Ready for next step */ +#endif } /** diff --git a/src/gravity/Potential/gravity_iact.h b/src/gravity/Potential/gravity_iact.h index fdc8c17da1576b85026c3e551dd70d27bc186612..f2094f6ecd5b31b94ebfe7a64f42fbd289a0c81c 100644 --- a/src/gravity/Potential/gravity_iact.h +++ b/src/gravity/Potential/gravity_iact.h @@ -169,7 +169,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pm_full( /* Compute the derivatives of the potential */ struct potential_derivatives_M2P d; - compute_potential_derivatives_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 0, 0.f, + potential_derivatives_compute_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 0, 0.f, &d); /* 0th order contributions */ @@ -281,7 +281,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pm_truncated( /* Compute the derivatives of the potential */ struct potential_derivatives_M2P d; - compute_potential_derivatives_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 1, + potential_derivatives_compute_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, 1, r_s_inv, &d); /* 0th order contributions */ diff --git a/src/gravity/Potential/gravity_part.h b/src/gravity/Potential/gravity_part.h index 252c18a4dc63c9cea4211ed8ab23eb692f064f00..229d8011088d4a4a70ff9f287597a0ba463ca951 100644 --- a/src/gravity/Potential/gravity_part.h +++ b/src/gravity/Potential/gravity_part.h @@ -58,6 +58,9 @@ struct gpart { /* Time of the last kick */ integertime_t ti_kick; + /* Has this particle been initialised? */ + int initialised; + #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS diff --git a/src/gravity_cache.h b/src/gravity_cache.h index 821f044429b445c28ff8ae39b8dc65304dd2b42d..6453d1eb92814f0e20cf25fa5996b920e523812d 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -208,12 +208,20 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate( /* Fill the input caches */ for (int i = 0; i < gcount; ++i) { + x[i] = (float)(gparts[i].x[0] - shift[0]); y[i] = (float)(gparts[i].x[1] - shift[1]); z[i] = (float)(gparts[i].x[2] - shift[2]); epsilon[i] = gravity_get_softening(&gparts[i], grav_props); - m[i] = gparts[i].mass; - active[i] = (int)(gparts[i].time_bin <= max_active_bin); + + /* Make a dummy particle out of the inhibted ones */ + if (gparts[i].time_bin == time_bin_inhibited) { + m[i] = 0.f; + active[i] = 0; + } else { + m[i] = gparts[i].mass; + active[i] = (int)(gparts[i].time_bin <= max_active_bin); + } /* Distance to the CoM of the other cell. */ float dx = x[i] - CoM[0]; @@ -294,8 +302,15 @@ gravity_cache_populate_no_mpole(const timebin_t max_active_bin, y[i] = (float)(gparts[i].x[1] - shift[1]); z[i] = (float)(gparts[i].x[2] - shift[2]); epsilon[i] = gravity_get_softening(&gparts[i], grav_props); - m[i] = gparts[i].mass; - active[i] = (int)(gparts[i].time_bin <= max_active_bin); + + /* Make a dummy particle out of the inhibted ones */ + if (gparts[i].time_bin == time_bin_inhibited) { + m[i] = 0.f; + active[i] = 0; + } else { + m[i] = gparts[i].mass; + active[i] = (int)(gparts[i].time_bin <= max_active_bin); + } } #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/gravity_derivatives.h b/src/gravity_derivatives.h index 756fb7af66d4cb695ba014452e424843b1c7c25b..3dcffe1cc04c5e10d3b2353b7e21c532747c1475 100644 --- a/src/gravity_derivatives.h +++ b/src/gravity_derivatives.h @@ -125,6 +125,65 @@ struct potential_derivatives_M2P { #endif }; +/** + * @brief Converts the derivatives from a distance vector to its opposite. + * + * From a series of tensors D_xxx(r), compute D_xxx(-r). + * This can be computed efficiently by flipping the sign of all the odd + * derivative terms. + * + * @param pot The derivatives of the potential. + */ +__attribute__((always_inline)) INLINE static void +potential_derivatives_flip_signs(struct potential_derivatives_M2L *pot) { + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + /* 1st order terms */ + pot->D_100 = -pot->D_100; + pot->D_010 = -pot->D_010; + pot->D_001 = -pot->D_001; +#endif + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + /* 3rd order terms */ + pot->D_300 = -pot->D_300; + pot->D_030 = -pot->D_030; + pot->D_003 = -pot->D_003; + pot->D_210 = -pot->D_210; + pot->D_201 = -pot->D_201; + pot->D_021 = -pot->D_021; + pot->D_120 = -pot->D_120; + pot->D_012 = -pot->D_012; + pot->D_102 = -pot->D_102; + pot->D_111 = -pot->D_111; +#endif + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + /* 5th order terms */ + pot->D_500 = -pot->D_500; + pot->D_050 = -pot->D_050; + pot->D_005 = -pot->D_005; + pot->D_410 = -pot->D_410; + pot->D_401 = -pot->D_401; + pot->D_041 = -pot->D_041; + pot->D_140 = -pot->D_140; + pot->D_014 = -pot->D_014; + pot->D_104 = -pot->D_104; + pot->D_320 = -pot->D_320; + pot->D_302 = -pot->D_302; + pot->D_032 = -pot->D_032; + pot->D_230 = -pot->D_230; + pot->D_023 = -pot->D_023; + pot->D_203 = -pot->D_203; + pot->D_311 = -pot->D_311; + pot->D_131 = -pot->D_131; + pot->D_113 = -pot->D_113; + pot->D_122 = -pot->D_122; + pot->D_212 = -pot->D_212; + pot->D_221 = -pot->D_221; +#endif +} + /** * @brief Compute all the relevent derivatives of the softened and truncated * gravitational potential for the M2L kernel. @@ -141,7 +200,7 @@ struct potential_derivatives_M2P { * @param pot (return) The structure containing all the derivatives. */ __attribute__((always_inline)) INLINE static void -compute_potential_derivatives_M2L(const float r_x, const float r_y, +potential_derivatives_compute_M2L(const float r_x, const float r_y, const float r_z, const float r2, const float r_inv, const float eps, const float eps_inv, const int periodic, @@ -397,7 +456,7 @@ compute_potential_derivatives_M2L(const float r_x, const float r_y, * @param pot (return) The structure containing all the derivatives. */ __attribute__((always_inline)) INLINE static void -compute_potential_derivatives_M2P(const float r_x, const float r_y, +potential_derivatives_compute_M2P(const float r_x, const float r_y, const float r_z, const float r2, const float r_inv, const float eps, const float eps_inv, const int periodic, diff --git a/src/gravity_properties.c b/src/gravity_properties.c index fc1ce1d62e02c32d44667d602448fc4eb3a65344..fffbf22ec187f179f0e80b7121beaa3a96de0260 100644 --- a/src/gravity_properties.c +++ b/src/gravity_properties.c @@ -39,7 +39,8 @@ #define gravity_props_default_rebuild_frequency 0.01f void gravity_props_init(struct gravity_props *p, struct swift_params *params, - const struct cosmology *cosmo, int with_cosmology) { + const struct cosmology *cosmo, int with_cosmology, + int periodic) { /* Tree updates */ p->rebuild_frequency = @@ -50,19 +51,31 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params, error("Invalid tree rebuild frequency. Must be in [0., 1.]"); /* Tree-PM parameters */ - p->mesh_size = parser_get_param_int(params, "Gravity:mesh_side_length"); - p->a_smooth = parser_get_opt_param_float(params, "Gravity:a_smooth", - gravity_props_default_a_smooth); - p->r_cut_max_ratio = parser_get_opt_param_float( - params, "Gravity:r_cut_max", gravity_props_default_r_cut_max); - p->r_cut_min_ratio = parser_get_opt_param_float( - params, "Gravity:r_cut_min", gravity_props_default_r_cut_min); - - if (p->mesh_size % 2 != 0) - error("The mesh side-length must be an even number."); - - if (p->a_smooth <= 0.) - error("The mesh smoothing scale 'a_smooth' must be > 0."); + if (periodic) { + p->mesh_size = parser_get_param_int(params, "Gravity:mesh_side_length"); + p->a_smooth = parser_get_opt_param_float(params, "Gravity:a_smooth", + gravity_props_default_a_smooth); + p->r_cut_max_ratio = parser_get_opt_param_float( + params, "Gravity:r_cut_max", gravity_props_default_r_cut_max); + p->r_cut_min_ratio = parser_get_opt_param_float( + params, "Gravity:r_cut_min", gravity_props_default_r_cut_min); + + /* Some basic checks of what we read */ + if (p->mesh_size % 2 != 0) + error("The mesh side-length must be an even number."); + + if (p->a_smooth <= 0.) + error("The mesh smoothing scale 'a_smooth' must be > 0."); + + if (2. * p->a_smooth * p->r_cut_max_ratio > p->mesh_size) + error("Mesh too small given r_cut_max. Should be at least %d cells wide.", + (int)(2. * p->a_smooth * p->r_cut_max_ratio) + 1); + } else { + p->mesh_size = 0; + p->a_smooth = 0.f; + p->r_cut_min_ratio = 0.f; + p->r_cut_max_ratio = 0.f; + } /* Time integration */ p->eta = parser_get_param_float(params, "Gravity:eta"); diff --git a/src/gravity_properties.h b/src/gravity_properties.h index 62dbab3605fb2dcfc4ae65e54c0b5f913d714c16..0cabd9958efa2bb23524d03632f90fdd1f1c8306 100644 --- a/src/gravity_properties.h +++ b/src/gravity_properties.h @@ -88,7 +88,8 @@ struct gravity_props { void gravity_props_print(const struct gravity_props *p); void gravity_props_init(struct gravity_props *p, struct swift_params *params, - const struct cosmology *cosmo, int with_cosmology); + const struct cosmology *cosmo, int with_cosmology, + int periodic); void gravity_update(struct gravity_props *p, const struct cosmology *cosmo); #if defined(HAVE_HDF5) diff --git a/src/hydro.h b/src/hydro.h index 950f63526a1590fa0fdcf2bfb5e650a2dfe14431..15c45c1dcfa1217d842904dfc1303acea607e3ab 100644 --- a/src/hydro.h +++ b/src/hydro.h @@ -45,6 +45,12 @@ #include "./hydro/PressureEnergy/hydro.h" #include "./hydro/PressureEnergy/hydro_iact.h" #define SPH_IMPLEMENTATION "Pressure-Energy SPH (Hopkins 2013)" +#elif defined(HOPKINS_PU_SPH_MONAGHAN) +#include "./hydro/PressureEnergyMorrisMonaghanAV/hydro.h" +#include "./hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h" +#define SPH_IMPLEMENTATION \ + "Pressure-Energy SPH (Hopkins 2013) with a Morris and Monaghan (1997) " \ + "variable artificial viscosity." #elif defined(DEFAULT_SPH) #include "./hydro/Default/hydro.h" #include "./hydro/Default/hydro_iact.h" @@ -62,9 +68,9 @@ #include "./hydro/Shadowswift/hydro_iact.h" #define SPH_IMPLEMENTATION \ "Shadowfax moving mesh (Vandenbroucke and De Rijcke 2016)" -#elif defined(MINIMAL_MULTI_MAT_SPH) -#include "./hydro/MinimalMultiMat/hydro.h" -#include "./hydro/MinimalMultiMat/hydro_iact.h" +#elif defined(PLANETARY_SPH) +#include "./hydro/Planetary/hydro.h" +#include "./hydro/Planetary/hydro_iact.h" #define SPH_IMPLEMENTATION "Minimal version of SPH with multiple materials" #else #error "Invalid choice of SPH variant" diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h index 2c3a9c46f0500fb20aa3cfa2e5feb682b3dcec63..4252f2787aefcec058b8fa956eaa0351b8f41d57 100644 --- a/src/hydro/Default/hydro.h +++ b/src/hydro/Default/hydro.h @@ -32,27 +32,59 @@ #include <float.h> /** - * @brief Returns the comoving internal energy of a particle + * @brief Returns the comoving internal energy of a particle at the last + * time the particle was kicked. * * @param p The particle of interest + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float -hydro_get_comoving_internal_energy(const struct part *restrict p) { +hydro_get_comoving_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp) { - return p->u; + return xp->u_full; } /** - * @brief Returns the physical internal energy of a particle + * @brief Returns the physical internal energy of a particle at the last + * time the particle was kicked. * - * @param p The particle of interest + * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp, const struct cosmology *cosmo) { - return cosmo->a_factor_internal_energy * p->u; + return xp->u_full * cosmo->a_factor_internal_energy; +} + +/** + * @brief Returns the comoving internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_internal_energy(const struct part *restrict p) { + + return p->u; +} + +/** + * @brief Returns the physical internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_internal_energy(const struct part *restrict p, + const struct cosmology *cosmo) { + + return p->u * cosmo->a_factor_internal_energy; } /** @@ -80,24 +112,57 @@ __attribute__((always_inline)) INLINE static float hydro_get_physical_pressure( } /** - * @brief Returns the comoving entropy of a particle + * @brief Returns the comoving entropy of a particle at the last + * time the particle was kicked. * - * @param p The particle of interest + * @param p The particle of interest. + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float hydro_get_comoving_entropy( - const struct part *restrict p) { + const struct part *restrict p, const struct xpart *restrict xp) { - return gas_entropy_from_internal_energy(p->rho, p->u); + return gas_entropy_from_internal_energy(p->rho, xp->u_full); } /** - * @brief Returns the physical entropy of a particle + * @brief Returns the physical entropy of a particle at the last + * time the particle was kicked. * - * @param p The particle of interest + * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_entropy( - const struct part *restrict p, const struct cosmology *cosmo) { + const struct part *restrict p, const struct xpart *restrict xp, + const struct cosmology *cosmo) { + + /* Note: no cosmological conversion required here with our choice of + * coordinates. */ + return gas_entropy_from_internal_energy(p->rho, xp->u_full); +} + +/** + * @brief Returns the comoving entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_entropy(const struct part *restrict p) { + + return gas_entropy_from_internal_energy(p->rho, p->u); +} + +/** + * @brief Returns the physical entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_entropy(const struct part *restrict p, + const struct cosmology *cosmo) { /* Note: no cosmological conversion required here with our choice of * coordinates. */ @@ -201,12 +266,27 @@ __attribute__((always_inline)) INLINE static void hydro_get_drifted_velocities( * * @param p The particle of interest */ -__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( - const struct part *restrict p) { +__attribute__((always_inline)) INLINE static float +hydro_get_comoving_internal_energy_dt(const struct part *restrict p) { return p->force.u_dt; } +/** + * @brief Returns the time derivative of internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest + * @param cosmo Cosmology data structure + */ +__attribute__((always_inline)) INLINE static float +hydro_get_physical_internal_energy_dt(const struct part *restrict p, + const struct cosmology *cosmo) { + + return p->force.u_dt * cosmo->a_factor_internal_energy; +} + /** * @brief Returns the time derivative of internal energy of a particle * @@ -215,12 +295,29 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( * @param p The particle of interest. * @param du_dt The new time derivative of the internal energy. */ -__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( - struct part *restrict p, float du_dt) { +__attribute__((always_inline)) INLINE static void +hydro_set_comoving_internal_energy_dt(struct part *restrict p, float du_dt) { p->force.u_dt = du_dt; } +/** + * @brief Returns the time derivative of internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest. + * @param cosmo Cosmology data structure + * @param du_dt The new time derivative of the internal energy. + */ +__attribute__((always_inline)) INLINE static void +hydro_set_physical_internal_energy_dt(struct part *restrict p, + const struct cosmology *cosmo, + float du_dt) { + + p->force.u_dt = du_dt * cosmo->a_factor_internal_energy; +} + /** * @brief Computes the hydro time-step of a given particle * @@ -351,16 +448,24 @@ __attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( /** * @brief Prepare a particle for the force calculation. * - * Computes viscosity term, conduction term and smoothing length gradient terms. + * This function is called in the ghost task to convert some quantities coming + * from the density loop over neighbours into quantities ready to be used in the + * force loop over neighbours. Quantities are typically read from the density + * sub-structure and written to the force sub-structure. + * Examples of calculations done here include the calculation of viscosity term + * constants, thermal conduction terms, hydro conversions, etc. * * @param p The particle to act upon * @param xp The extended particle data to act upon * @param cosmo The current cosmological model. + * @param hydro_props Hydrodynamic properties. + * @param dt_alpha The time-step used to evolve non-cosmological quantities such + * as the artificial viscosity. */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) { - + const struct cosmology *cosmo, const struct hydro_props *hydro_props, + const float dt_alpha) { const float fac_mu = cosmo->a_factor_mu; /* Some smoothing length multiples. */ @@ -389,6 +494,9 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( p->force.balsara = normDiv_v / (normDiv_v + normRot_v + 0.0001f * fac_mu * fc * h_inv); + /* Set the AV property */ + p->alpha = hydro_props->viscosity.alpha; + /* Viscosity parameter decay time */ /* const float tau = h / (2.f * const_viscosity_length * p->force.soundspeed); */ @@ -397,8 +505,10 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* const float S = max(-normDiv_v, 0.f); */ /* Compute the particle's viscosity parameter time derivative */ - /* const float alpha_dot = (const_viscosity_alpha_min - p->alpha) / tau + */ - /* (const_viscosity_alpha_max - p->alpha) * S; */ + /* const float alpha_dot = (hydro_props->viscosity.alpha_max) - p->alpha) / + * tau + */ + /* (hydro_props->viscosity.alpha_max - p->alpha) * S; + */ /* Update particle's viscosity paramter */ /* p->alpha += alpha_dot * (p->ti_end - p->ti_begin) * timeBase; */ // MATTHIEU @@ -506,6 +616,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( struct part *restrict p, struct xpart *restrict xp, float dt_therm, + float dt_grav, float dt_hydro, float dt_kick_corr, const struct cosmology *cosmo, const struct hydro_props *hydro_props) {} /** @@ -519,7 +630,7 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) {} + const struct cosmology *cosmo, const struct hydro_props *hydro_props) {} /** * @brief Initialises the particles for the first time diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h index 658b4aba83085610a49bb9d2579d4f20c70d6c5b..72808874c3fc6b58005d0e3ad450eafea8aa4b4d 100644 --- a/src/hydro/Default/hydro_iact.h +++ b/src/hydro/Default/hydro_iact.h @@ -226,7 +226,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( omega_ij = min(fac_mu * dvdr, 0.f); /* Compute signal velocity */ - v_sig = pi->force.soundspeed + pj->force.soundspeed - 2.0f * omega_ij; + v_sig = pi->force.soundspeed + pj->force.soundspeed - + const_viscosity_beta * omega_ij; /* Compute viscosity parameter */ alpha_ij = -0.5f * (pi->alpha + pj->alpha); @@ -335,7 +336,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( omega_ij = min(fac_mu * dvdr, 0.f); /* Compute signal velocity */ - v_sig = pi->force.soundspeed + pj->force.soundspeed - 2.0f * omega_ij; + v_sig = pi->force.soundspeed + pj->force.soundspeed - + const_viscosity_beta * omega_ij; /* Compute viscosity parameter */ alpha_ij = -0.5f * (pi->alpha + pj->alpha); diff --git a/src/hydro/Default/hydro_io.h b/src/hydro/Default/hydro_io.h index d47c96fbf32e1ee00346888aaf2e8afabc22abc3..69919c202223fdecc197a87178e59767c02ee16e 100644 --- a/src/hydro/Default/hydro_io.h +++ b/src/hydro/Default/hydro_io.h @@ -55,6 +55,17 @@ INLINE static void hydro_read_particles(struct part* parts, list[7] = io_make_input_field("Density", FLOAT, 1, OPTIONAL, UNIT_CONV_DENSITY, parts, rho); } +INLINE static void convert_S(const struct engine* e, const struct part* p, + const struct xpart* xp, float* ret) { + + ret[0] = hydro_get_comoving_entropy(p, xp); +} + +INLINE static void convert_P(const struct engine* e, const struct part* p, + const struct xpart* xp, float* ret) { + + ret[0] = hydro_get_comoving_pressure(p); +} INLINE static void convert_part_pos(const struct engine* e, const struct part* p, @@ -128,7 +139,7 @@ INLINE static void hydro_write_particles(const struct part* parts, struct io_props* list, int* num_fields) { - *num_fields = 8; + *num_fields = 10; /* List what we want to write */ list[0] = io_make_output_field_convert_part("Coordinates", DOUBLE, 3, @@ -146,7 +157,13 @@ INLINE static void hydro_write_particles(const struct part* parts, UNIT_CONV_NO_UNITS, parts, id); list[6] = io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts, rho); - list[7] = io_make_output_field_convert_part("Potential", FLOAT, 1, + list[7] = io_make_output_field_convert_part("Entropy", FLOAT, 1, + UNIT_CONV_ENTROPY_PER_UNIT_MASS, + parts, xparts, convert_S); + list[8] = io_make_output_field_convert_part( + "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, xparts, convert_P); + + list[0] = io_make_output_field_convert_part("Potential", FLOAT, 1, UNIT_CONV_POTENTIAL, parts, xparts, convert_part_potential); } @@ -166,13 +183,6 @@ INLINE static void hydro_write_flavour(hid_t h_grpsph) { h_grpsph, "Viscosity Model", "Morris & Monaghan (1997), Rosswog, Davies, Thielemann & " "Piran (2000) with additional Balsara (1995) switch"); - io_write_attribute_f(h_grpsph, "Viscosity alpha_min", - const_viscosity_alpha_min); - io_write_attribute_f(h_grpsph, "Viscosity alpha_max", - const_viscosity_alpha_max); - io_write_attribute_f(h_grpsph, "Viscosity beta", 2.f); - io_write_attribute_f(h_grpsph, "Viscosity decay length", - const_viscosity_length); /* Time integration properties */ io_write_attribute_f(h_grpsph, "Maximal Delta u change over dt", diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index 26e3bf97dd1924abbe7380d1eaadce75213344df..e16d41a8b4fc50d07f5cac7dd39e8d245bed9923 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -42,26 +42,59 @@ #include "minmax.h" /** - * @brief Returns the comoving internal energy of a particle + * @brief Returns the comoving internal energy of a particle at the last + * time the particle was kicked. * * @param p The particle of interest + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float -hydro_get_comoving_internal_energy(const struct part *restrict p) { +hydro_get_comoving_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp) { - return gas_internal_energy_from_entropy(p->rho, p->entropy); + return gas_internal_energy_from_entropy(p->rho, xp->entropy_full); } /** - * @brief Returns the physical internal energy of a particle + * @brief Returns the physical internal energy of a particle at the last + * time the particle was kicked. * * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp, const struct cosmology *cosmo) { + return gas_internal_energy_from_entropy(p->rho * cosmo->a3_inv, + xp->entropy_full); +} + +/** + * @brief Returns the comoving internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_internal_energy(const struct part *restrict p) { + + return gas_internal_energy_from_entropy(p->rho, p->entropy); +} + +/** + * @brief Returns the physical internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_internal_energy(const struct part *restrict p, + const struct cosmology *cosmo) { + return gas_internal_energy_from_entropy(p->rho * cosmo->a3_inv, p->entropy); } @@ -79,7 +112,8 @@ __attribute__((always_inline)) INLINE static float hydro_get_comoving_pressure( /** * @brief Returns the physical pressure of a particle * - * @param p The particle of interest + * @param p The particle of interest. + * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_pressure( const struct part *restrict p, const struct cosmology *cosmo) { @@ -88,24 +122,57 @@ __attribute__((always_inline)) INLINE static float hydro_get_physical_pressure( } /** - * @brief Returns the comoving entropy of a particle + * @brief Returns the comoving entropy of a particle at the last + * time the particle was kicked. * * @param p The particle of interest. + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float hydro_get_comoving_entropy( - const struct part *restrict p) { + const struct part *restrict p, const struct xpart *restrict xp) { - return p->entropy; + return xp->entropy_full; } /** - * @brief Returns the physical entropy of a particle + * @brief Returns the physical entropy of a particl at the last + * time the particle was kicked. * * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_entropy( - const struct part *restrict p, const struct cosmology *cosmo) { + const struct part *restrict p, const struct xpart *restrict xp, + const struct cosmology *cosmo) { + + /* Note: no cosmological conversion required here with our choice of + * coordinates. */ + return xp->entropy_full; +} + +/** + * @brief Returns the comoving entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_entropy(const struct part *restrict p) { + + return p->entropy; +} + +/** + * @brief Returns the physical entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_entropy(const struct part *restrict p, + const struct cosmology *cosmo) { /* Note: no cosmological conversion required here with our choice of * coordinates. */ @@ -204,32 +271,66 @@ __attribute__((always_inline)) INLINE static void hydro_get_drifted_velocities( } /** - * @brief Returns the time derivative of internal energy of a particle + * @brief Returns the time derivative of co-moving internal energy of a particle * * We assume a constant density. * * @param p The particle of interest */ -__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( - const struct part *restrict p) { +__attribute__((always_inline)) INLINE static float +hydro_get_comoving_internal_energy_dt(const struct part *restrict p) { return gas_internal_energy_from_entropy(p->rho, p->entropy_dt); } /** - * @brief Returns the time derivative of internal energy of a particle + * @brief Returns the time derivative of physical internal energy of a particle * * We assume a constant density. * * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_physical_internal_energy_dt(const struct part *restrict p, + const struct cosmology *cosmo) { + + return gas_internal_energy_from_entropy(p->rho * cosmo->a3_inv, + p->entropy_dt); +} + +/** + * @brief Sets the time derivative of the co-moving internal energy of a + * particle + * + * We assume a constant density for the conversion to entropy. + * + * @param p The particle of interest. * @param du_dt The new time derivative of the internal energy. */ -__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( - struct part *restrict p, float du_dt) { +__attribute__((always_inline)) INLINE static void +hydro_set_comoving_internal_energy_dt(struct part *restrict p, float du_dt) { p->entropy_dt = gas_entropy_from_internal_energy(p->rho, du_dt); } +/** + * @brief Sets the time derivative of the physical internal energy of a particle + * + * We assume a constant density for the conversion to entropy. + * + * @param p The particle of interest. + * @param cosmo Cosmology data structure + * @param du_dt The time derivative of the internal energy. + */ +__attribute__((always_inline)) INLINE static void +hydro_set_physical_internal_energy_dt(struct part *restrict p, + const struct cosmology *restrict cosmo, + float du_dt) { + p->entropy_dt = + gas_entropy_from_internal_energy(p->rho * cosmo->a3_inv, du_dt); +} + /** * @brief Computes the hydro time-step of a given particle * @@ -361,28 +462,41 @@ __attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( /** * @brief Prepare a particle for the force calculation. * - * Computes viscosity term, conduction term and smoothing length gradient terms. + * This function is called in the ghost task to convert some quantities coming + * from the density loop over neighbours into quantities ready to be used in the + * force loop over neighbours. Quantities are typically read from the density + * sub-structure and written to the force sub-structure. + * Examples of calculations done here include the calculation of viscosity term + * constants, thermal conduction terms, hydro conversions, etc. * * @param p The particle to act upon * @param xp The extended particle data to act upon * @param cosmo The current cosmological model. + * @param hydro_props Hydrodynamic properties. + * @param dt_alpha The time-step used to evolve non-cosmological quantities such + * as the artificial viscosity. */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) { + const struct cosmology *cosmo, const struct hydro_props *hydro_props, + const float dt_alpha) { - const float fac_mu = cosmo->a_factor_mu; + const float fac_Balsara_eps = cosmo->a_factor_Balsara_eps; - /* Inverse of the physical density */ + /* Inverse of the co-moving density */ const float rho_inv = 1.f / p->rho; + /* Inverse of the smoothing length */ + const float h_inv = 1.f / p->h; + /* Compute the norm of the curl */ const float curl_v = sqrtf(p->density.rot_v[0] * p->density.rot_v[0] + p->density.rot_v[1] * p->density.rot_v[1] + p->density.rot_v[2] * p->density.rot_v[2]); - /* Compute the norm of div v */ - const float abs_div_v = fabsf(p->density.div_v); + /* Compute the norm of div v including the Hubble flow term */ + const float div_physical_v = p->density.div_v + 3.f * cosmo->H; + const float abs_div_physical_v = fabsf(div_physical_v); /* Compute the pressure */ const float pressure = gas_pressure_from_entropy(p->rho, p->entropy); @@ -394,8 +508,11 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( const float P_over_rho2 = pressure * rho_inv * rho_inv; /* Compute the Balsara switch */ - const float balsara = - abs_div_v / (abs_div_v + curl_v + 0.0001f * fac_mu * soundspeed / p->h); + /* Pre-multiply in the AV factor; hydro_props are not passed to the iact + * functions */ + const float balsara = hydro_props->viscosity.alpha * abs_div_physical_v / + (abs_div_physical_v + curl_v + + 0.0001f * fac_Balsara_eps * soundspeed * h_inv); /* Compute the "grad h" term */ const float omega_inv = @@ -484,7 +601,14 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( else p->rho *= expf(w2); - /* Predict the entropy */ + /* Predict the entropy */ +#ifdef SWIFT_DEBUG_CHECKS + if (p->entropy + p->entropy_dt * dt_therm <= 0) + error( + "Negative entropy for particle id %llu old entropy %.5e d_entropy %.5e " + "entropy_dt %.5e dt therm %.5e", + p->id, p->entropy, p->entropy_dt * dt_therm, p->entropy_dt, dt_therm); +#endif p->entropy += p->entropy_dt * dt_therm; /* Re-compute the pressure */ @@ -515,8 +639,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( p->force.h_dt *= p->h * hydro_dimension_inv; - p->entropy_dt = 0.5f * cosmo->a2_inv * - gas_entropy_from_internal_energy(p->rho, p->entropy_dt); + p->entropy_dt = + 0.5f * gas_entropy_from_internal_energy(p->rho, p->entropy_dt); } /** @@ -525,11 +649,15 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( * @param p The particle to act upon * @param xp The particle extended data to act upon * @param dt_therm The time-step for this kick (for thermodynamic quantities) + * @param dt_grav The time-step for this kick (for gravity forces) + * @param dt_hydro The time-step for this kick (for hydro forces) + * @param dt_kick_corr The time-step for this kick (for correction of the kick) * @param cosmo The cosmological model. * @param hydro_props The constants used in the scheme */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( struct part *restrict p, struct xpart *restrict xp, float dt_therm, + float dt_grav, float dt_hydro, float dt_kick_corr, const struct cosmology *cosmo, const struct hydro_props *hydro_props) { /* Do not decrease the entropy by more than a factor of 2 */ @@ -539,12 +667,13 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( xp->entropy_full += p->entropy_dt * dt_therm; /* Apply the minimal energy limit */ - const float density = p->rho * cosmo->a3_inv; - const float min_energy = hydro_props->minimal_internal_energy; - const float min_entropy = - gas_entropy_from_internal_energy(density, min_energy); - if (xp->entropy_full < min_entropy) { - xp->entropy_full = min_entropy; + const float physical_density = p->rho * cosmo->a3_inv; + const float min_physical_energy = hydro_props->minimal_internal_energy; + const float min_physical_entropy = + gas_entropy_from_internal_energy(physical_density, min_physical_energy); + const float min_comoving_entropy = min_physical_entropy; /* A' = A */ + if (xp->entropy_full < min_comoving_entropy) { + xp->entropy_full = min_comoving_entropy; p->entropy_dt = 0.f; } @@ -570,17 +699,30 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( * @param p The particle to act upon. * @param xp The extended data. * @param cosmo The cosmological model. + * @param hydro_props The constants used in the scheme. */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) { + const struct cosmology *cosmo, const struct hydro_props *hydro_props) { - /* We read u in the entropy field. We now get (comoving) S from (physical) u - * and (physical) rho. Note that comoving S == physical S */ + /* We read u in the entropy field. We now get (comoving) A from (physical) u + * and (physical) rho. Note that comoving A (A') == physical A */ xp->entropy_full = gas_entropy_from_internal_energy(p->rho * cosmo->a3_inv, p->entropy); p->entropy = xp->entropy_full; + /* Apply the minimal energy limit */ + const float physical_density = p->rho * cosmo->a3_inv; + const float min_physical_energy = hydro_props->minimal_internal_energy; + const float min_physical_entropy = + gas_entropy_from_internal_energy(physical_density, min_physical_energy); + const float min_comoving_entropy = min_physical_entropy; /* A' = A */ + if (xp->entropy_full < min_comoving_entropy) { + xp->entropy_full = min_comoving_entropy; + p->entropy = min_comoving_entropy; + p->entropy_dt = 0.f; + } + /* Compute the pressure */ const float pressure = gas_pressure_from_entropy(p->rho, p->entropy); diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index b2af8909bed1780586a5130370222c9b8157d724..746fd4778563aeaab43bafcc7904683ed5b6811c 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -479,23 +479,25 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Compute dv dot r. */ const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2] + a2_Hubble * r2; + (pi->v[2] - pj->v[2]) * dx[2]; + + /* Add Hubble flow */ + const float dvdr_Hubble = dvdr + a2_Hubble * r2; /* Balsara term */ const float balsara_i = pi->force.balsara; const float balsara_j = pj->force.balsara; /* Are the particles moving towards each others ? */ - const float omega_ij = (dvdr < 0.f) ? dvdr : 0.f; + const float omega_ij = min(dvdr_Hubble, 0.f); const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Signal velocity */ - const float v_sig = ci + cj - 3.f * mu_ij; + const float v_sig = ci + cj - const_viscosity_beta * mu_ij; /* Now construct the full viscosity term */ const float rho_ij = 0.5f * (rhoi + rhoj); - const float visc = -0.25f * const_viscosity_alpha * v_sig * mu_ij * - (balsara_i + balsara_j) / rho_ij; + const float visc = -0.25f * v_sig * mu_ij * (balsara_i + balsara_j) / rho_ij; /* Now, convolve with the kernel */ const float visc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; @@ -523,8 +525,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->force.v_sig = max(pj->force.v_sig, v_sig); /* Change in entropy */ - pi->entropy_dt += mj * visc_term * dvdr; - pj->entropy_dt += mi * visc_term * dvdr; + pi->entropy_dt += mj * visc_term * dvdr_Hubble; + pj->entropy_dt += mi * visc_term * dvdr_Hubble; #ifdef DEBUG_INTERACTIONS_SPH /* Update ngb counters */ @@ -599,23 +601,25 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Compute dv dot r. */ const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2] + a2_Hubble * r2; + (pi->v[2] - pj->v[2]) * dx[2]; + + /* Add Hubble flow */ + const float dvdr_Hubble = dvdr + a2_Hubble * r2; /* Balsara term */ const float balsara_i = pi->force.balsara; const float balsara_j = pj->force.balsara; /* Are the particles moving towards each others ? */ - const float omega_ij = (dvdr < 0.f) ? dvdr : 0.f; + const float omega_ij = min(dvdr_Hubble, 0.f); const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Signal velocity */ - const float v_sig = ci + cj - 3.f * mu_ij; + const float v_sig = ci + cj - const_viscosity_beta * mu_ij; /* Now construct the full viscosity term */ const float rho_ij = 0.5f * (rhoi + rhoj); - const float visc = -0.25f * const_viscosity_alpha * v_sig * mu_ij * - (balsara_i + balsara_j) / rho_ij; + const float visc = -0.25f * v_sig * mu_ij * (balsara_i + balsara_j) / rho_ij; /* Now, convolve with the kernel */ const float visc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; @@ -637,7 +641,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = max(pi->force.v_sig, v_sig); /* Change in entropy */ - pi->entropy_dt += mj * visc_term * dvdr; + pi->entropy_dt += mj * visc_term * dvdr_Hubble; #ifdef DEBUG_INTERACTIONS_SPH /* Update ngb counters */ @@ -648,8 +652,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( } #ifdef WITH_VECTORIZATION -static const vector const_viscosity_alpha_fac = - FILL_VEC(-0.25f * const_viscosity_alpha); /** * @brief Force interaction computed using 1 vector @@ -671,7 +673,7 @@ runner_iact_nonsym_1_vec_force( vector dvx, dvy, dvz; vector xi, xj; vector hid_inv, hjd_inv; - vector wi_dx, wj_dx, wi_dr, wj_dr, dvdr; + vector wi_dx, wj_dx, wi_dr, wj_dr, dvdr, dvdr_Hubble; vector piax, piay, piaz; vector pih_dt; vector v_sig; @@ -723,25 +725,26 @@ runner_iact_nonsym_1_vec_force( dvz.v = vec_sub(viz.v, vjz.v); /* Compute dv dot r. */ - dvdr.v = - vec_fma(dvx.v, dx->v, - vec_fma(dvy.v, dy->v, - vec_fma(dvz.v, dz->v, vec_mul(v_a2_Hubble.v, r2->v)))); + dvdr.v = vec_fma(dvx.v, dx->v, vec_fma(dvy.v, dy->v, vec_mul(dvz.v, dz->v))); + + /* Add Hubble flow */ + dvdr_Hubble.v = vec_add(dvdr.v, vec_mul(v_a2_Hubble.v, r2->v)); /* Compute the relative velocity. (This is 0 if the particles move away from * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_setzero()); + omega_ij.v = vec_fmin(dvdr_Hubble.v, vec_setzero()); mu_ij.v = vec_mul(v_fac_mu.v, vec_mul(ri.v, omega_ij.v)); /* This is 0 or negative */ /* Compute signal velocity */ - v_sig.v = vec_fnma(vec_set1(3.f), mu_ij.v, vec_add(ci.v, cj.v)); + v_sig.v = + vec_fnma(vec_set1(const_viscosity_beta), mu_ij.v, vec_add(ci.v, cj.v)); /* Now construct the full viscosity term */ rho_ij.v = vec_mul(vec_set1(0.5f), vec_add(pirho.v, pjrho.v)); - visc.v = vec_div(vec_mul(const_viscosity_alpha_fac.v, - vec_mul(v_sig.v, vec_mul(mu_ij.v, balsara.v))), - rho_ij.v); + visc.v = vec_div( + vec_mul(vec_set1(-0.25f), vec_mul(v_sig.v, vec_mul(mu_ij.v, balsara.v))), + rho_ij.v); /* Now, convolve with the kernel */ visc_term.v = @@ -766,7 +769,7 @@ runner_iact_nonsym_1_vec_force( vec_div(vec_mul(mj.v, vec_mul(dvdr.v, vec_mul(ri.v, wi_dr.v))), pjrho.v); /* Change in entropy */ - entropy_dt.v = vec_mul(mj.v, vec_mul(visc_term.v, dvdr.v)); + entropy_dt.v = vec_mul(mj.v, vec_mul(visc_term.v, dvdr_Hubble.v)); /* Store the forces back on the particles. */ a_hydro_xSum->v = vec_mask_sub(a_hydro_xSum->v, piax.v, mask); @@ -806,7 +809,7 @@ runner_iact_nonsym_2_vec_force( vector dvx, dvy, dvz; vector ui, uj; vector hid_inv, hjd_inv; - vector wi_dx, wj_dx, wi_dr, wj_dr, dvdr; + vector wi_dx, wj_dx, wi_dr, wj_dr, dvdr, dvdr_Hubble; vector piax, piay, piaz; vector pih_dt; vector v_sig; @@ -817,7 +820,7 @@ runner_iact_nonsym_2_vec_force( vector dvx_2, dvy_2, dvz_2; vector ui_2, uj_2; vector hjd_inv_2; - vector wi_dx_2, wj_dx_2, wi_dr_2, wj_dr_2, dvdr_2; + vector wi_dx_2, wj_dx_2, wi_dr_2, wj_dr_2, dvdr_2, dvdr_Hubble_2; vector piax_2, piay_2, piaz_2; vector pih_dt_2; vector v_sig_2; @@ -903,36 +906,38 @@ runner_iact_nonsym_2_vec_force( dvz_2.v = vec_sub(viz.v, vjz_2.v); /* Compute dv dot r. */ - dvdr.v = vec_fma( - dvx.v, dx.v, - vec_fma(dvy.v, dy.v, vec_fma(dvz.v, dz.v, vec_mul(v_a2_Hubble.v, r2.v)))); - dvdr_2.v = vec_fma( - dvx_2.v, dx_2.v, - vec_fma(dvy_2.v, dy_2.v, - vec_fma(dvz_2.v, dz_2.v, vec_mul(v_a2_Hubble.v, r2_2.v)))); + dvdr.v = vec_fma(dvx.v, dx.v, vec_fma(dvy.v, dy.v, vec_mul(dvz.v, dz.v))); + dvdr_2.v = vec_fma(dvx_2.v, dx_2.v, + vec_fma(dvy_2.v, dy_2.v, vec_mul(dvz_2.v, dz_2.v))); + + /* Add the Hubble flow */ + dvdr_Hubble.v = vec_add(dvdr.v, vec_mul(v_a2_Hubble.v, r2.v)); + dvdr_Hubble_2.v = vec_add(dvdr_2.v, vec_mul(v_a2_Hubble.v, r2_2.v)); /* Compute the relative velocity. (This is 0 if the particles move away from * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_setzero()); - omega_ij_2.v = vec_fmin(dvdr_2.v, vec_setzero()); + omega_ij.v = vec_fmin(dvdr_Hubble.v, vec_setzero()); + omega_ij_2.v = vec_fmin(dvdr_Hubble_2.v, vec_setzero()); mu_ij.v = vec_mul(v_fac_mu.v, vec_mul(ri.v, omega_ij.v)); /* This is 0 or negative */ mu_ij_2.v = vec_mul( v_fac_mu.v, vec_mul(ri_2.v, omega_ij_2.v)); /* This is 0 or negative */ /* Compute signal velocity */ - v_sig.v = vec_fnma(vec_set1(3.f), mu_ij.v, vec_add(ci.v, cj.v)); - v_sig_2.v = vec_fnma(vec_set1(3.f), mu_ij_2.v, vec_add(ci.v, cj_2.v)); + v_sig.v = + vec_fnma(vec_set1(const_viscosity_beta), mu_ij.v, vec_add(ci.v, cj.v)); + v_sig_2.v = vec_fnma(vec_set1(const_viscosity_beta), mu_ij_2.v, + vec_add(ci.v, cj_2.v)); /* Now construct the full viscosity term */ rho_ij.v = vec_mul(vec_set1(0.5f), vec_add(pirho.v, pjrho.v)); rho_ij_2.v = vec_mul(vec_set1(0.5f), vec_add(pirho.v, pjrho_2.v)); - visc.v = vec_div(vec_mul(const_viscosity_alpha_fac.v, - vec_mul(v_sig.v, vec_mul(mu_ij.v, balsara.v))), - rho_ij.v); + visc.v = vec_div( + vec_mul(vec_set1(-0.25f), vec_mul(v_sig.v, vec_mul(mu_ij.v, balsara.v))), + rho_ij.v); visc_2.v = - vec_div(vec_mul(const_viscosity_alpha_fac.v, + vec_div(vec_mul(vec_set1(-0.25f), vec_mul(v_sig_2.v, vec_mul(mu_ij_2.v, balsara_2.v))), rho_ij_2.v); @@ -976,8 +981,8 @@ runner_iact_nonsym_2_vec_force( pjrho_2.v); /* Change in entropy */ - entropy_dt.v = vec_mul(mj.v, vec_mul(visc_term.v, dvdr.v)); - entropy_dt_2.v = vec_mul(mj_2.v, vec_mul(visc_term_2.v, dvdr_2.v)); + entropy_dt.v = vec_mul(mj.v, vec_mul(visc_term.v, dvdr_Hubble.v)); + entropy_dt_2.v = vec_mul(mj_2.v, vec_mul(visc_term_2.v, dvdr_Hubble_2.v)); /* Store the forces back on the particles. */ if (mask_cond) { diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h index 3f2af41dc7f0cc8f60992a15a0f09f3c90f764fe..ec7d34f7ad8697f1d639ea4951011ddb06ec8833 100644 --- a/src/hydro/Gadget2/hydro_io.h +++ b/src/hydro/Gadget2/hydro_io.h @@ -59,7 +59,7 @@ INLINE static void hydro_read_particles(struct part* parts, INLINE static void convert_part_u(const struct engine* e, const struct part* p, const struct xpart* xp, float* ret) { - ret[0] = hydro_get_comoving_internal_energy(p); + ret[0] = hydro_get_comoving_internal_energy(p, xp); } INLINE static void convert_part_P(const struct engine* e, const struct part* p, @@ -132,6 +132,7 @@ INLINE static void convert_part_potential(const struct engine* e, * @brief Specifies which particle fields to write to a dataset * * @param parts The particle array. + * @param xparts The extended particle data array. * @param list The list of i/o properties to write. * @param num_fields The number of i/o fields to write. */ @@ -199,8 +200,6 @@ INLINE static void hydro_write_flavour(hid_t h_grpsph) { io_write_attribute_s( h_grpsph, "Viscosity Model", "as in Springel (2005), i.e. Monaghan (1992) with Balsara (1995) switch"); - io_write_attribute_f(h_grpsph, "Viscosity alpha", const_viscosity_alpha); - io_write_attribute_f(h_grpsph, "Viscosity beta", 3.f); } /** diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h index 90f73571701b37b3377601655330d8d25f862a05..bcb40243362dc57d47a3832c1d9330cb68d93fb8 100644 --- a/src/hydro/Gadget2/hydro_part.h +++ b/src/hydro/Gadget2/hydro_part.h @@ -33,6 +33,7 @@ #include "chemistry_struct.h" #include "cooling_struct.h" +#include "logger.h" /* Extra particle data not needed during the SPH loops over neighbours. */ struct xpart { @@ -55,6 +56,11 @@ struct xpart { /* Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; +#ifdef WITH_LOGGER + /* Additional data for the particle logger */ + struct logger_part_data logger_data; +#endif + } SWIFT_STRUCT_ALIGN; /* Data of a single particle. */ diff --git a/src/hydro/GizmoMFM/hydro.h b/src/hydro/GizmoMFM/hydro.h index 7c38896a4bfb0c44a79c509954e11128244e350e..8e466daabb59482a1c2ebbaf80af30c64c4abdfe 100644 --- a/src/hydro/GizmoMFM/hydro.h +++ b/src/hydro/GizmoMFM/hydro.h @@ -60,10 +60,9 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( sqrtf(hydro_gamma * p->P / p->rho); vmax = max(vmax, p->timestepvars.vmax); - // MATTHIEU: Bert is this correct? Do we need more cosmology terms here? - const float psize = - cosmo->a * powf(p->geometry.volume / hydro_dimension_unit_sphere, - hydro_dimension_inv); + const float psize = cosmo->a * cosmo->a * + powf(p->geometry.volume / hydro_dimension_unit_sphere, + hydro_dimension_inv); float dt = FLT_MAX; if (vmax > 0.0f) { dt = psize / vmax; @@ -442,17 +441,24 @@ __attribute__((always_inline)) INLINE static void hydro_end_gradient( /** * @brief Prepare a particle for the force calculation. * - * This function is called in the extra_ghost task to convert some quantities - * coming from the gradient loop over neighbours into quantities ready to be - * used in the force loop over neighbours. + * This function is called in the ghost task to convert some quantities coming + * from the density loop over neighbours into quantities ready to be used in the + * force loop over neighbours. Quantities are typically read from the density + * sub-structure and written to the force sub-structure. + * Examples of calculations done here include the calculation of viscosity term + * constants, thermal conduction terms, hydro conversions, etc. * * @param p The particle to act upon * @param xp The extended particle data to act upon * @param cosmo The current cosmological model. + * @param hydro_props Hydrodynamic properties. + * @param dt_alpha The time-step used to evolve non-cosmological quantities such + * as the artificial viscosity. */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( struct part* restrict p, struct xpart* restrict xp, - const struct cosmology* cosmo) { + const struct cosmology* cosmo, const struct hydro_props* hydro_props, + const float dt_alpha) { /* Initialise values that are used in the force loop */ p->flux.momentum[0] = 0.0f; @@ -502,7 +508,11 @@ __attribute__((always_inline)) INLINE static void hydro_reset_predicted_values( * @param p The particle to act upon. */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( - struct part* p, struct xpart* xp, const struct cosmology* cosmo) {} + struct part* p, struct xpart* xp, const struct cosmology* cosmo, + const struct hydro_props* hydro_props) { + + p->conserved.energy /= cosmo->a_factor_internal_energy; +} /** * @brief Extra operations to be done during the drift @@ -541,23 +551,17 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( #if !defined(EOS_ISOTHERMAL_GAS) #ifdef GIZMO_TOTAL_ENERGY - const float Etot = p->conserved.energy + p->flux.energy * dt_therm; + const float Etot = p->conserved.energy + p->flux.energy * dt_drift; const float v2 = (p->v[0] * p->v[0] + p->v[1] * p->v[1] + p->v[2] * p->v[2]); const float u = (Etot * m_inv - 0.5f * v2); #else - const float u = (p->conserved.energy + p->flux.energy * dt_therm) * m_inv; + const float u = (p->conserved.energy + p->flux.energy * dt_drift) * m_inv; #endif p->P = hydro_gamma_minus_one * u * p->rho; #endif } - /* we use a sneaky way to get the gravitational contribtuion to the - velocity update */ - p->v[0] += p->v[0] - xp->v_full[0]; - p->v[1] += p->v[1] - xp->v_full[1]; - p->v[2] += p->v[2] - xp->v_full[2]; - #ifdef SWIFT_DEBUG_CHECKS if (p->h <= 0.0f) { error("Zero or negative smoothing length (%g)!", p->h); @@ -595,29 +599,40 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( /** * @brief Extra operations done during the kick * - * Not used for GIZMO. - * * @param p Particle to act upon. * @param xp Extended particle data to act upon. - * @param dt Physical time step. - * @param half_dt Half the physical time step. + * @param dt_therm Thermal energy time-step @f$\frac{dt}{a^2}@f$. + * @param dt_grav Gravity time-step @f$\frac{dt}{a}@f$. + * @param dt_hydro Hydro acceleration time-step + * @f$\frac{dt}{a^{3(\gamma{}-1)}}@f$. + * @param dt_kick_corr Gravity correction time-step @f$adt@f$. + * @param cosmo Cosmology. + * @param hydro_props Additional hydro properties. */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( - struct part* p, struct xpart* xp, float dt, const struct cosmology* cosmo, + struct part* p, struct xpart* xp, float dt_therm, float dt_grav, + float dt_hydro, float dt_kick_corr, const struct cosmology* cosmo, const struct hydro_props* hydro_props) { float a_grav[3]; /* Update conserved variables (note: the mass does not change). */ - p->conserved.momentum[0] += p->flux.momentum[0] * dt; - p->conserved.momentum[1] += p->flux.momentum[1] * dt; - p->conserved.momentum[2] += p->flux.momentum[2] * dt; + p->conserved.momentum[0] += p->flux.momentum[0] * dt_therm; + p->conserved.momentum[1] += p->flux.momentum[1] * dt_therm; + p->conserved.momentum[2] += p->flux.momentum[2] * dt_therm; #if defined(EOS_ISOTHERMAL_GAS) /* We use the EoS equation in a sneaky way here just to get the constant u */ p->conserved.energy = p->conserved.mass * gas_internal_energy_from_entropy(0.0f, 0.0f); #else - p->conserved.energy += p->flux.energy * dt; + p->conserved.energy += p->flux.energy * dt_therm; +#endif + +#ifndef HYDRO_GAMMA_5_3 + const float Pcorr = (dt_hydro - dt_therm) * p->geometry.volume; + p->conserved.momentum[0] -= Pcorr * p->gradients.P[0]; + p->conserved.momentum[1] -= Pcorr * p->gradients.P[1]; + p->conserved.momentum[2] -= Pcorr * p->gradients.P[2]; #endif /* Apply the minimal energy limit */ @@ -655,9 +670,9 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( /* Kick the momentum for half a time step */ /* Note that this also affects the particle movement, as the velocity for the particles is set after this. */ - p->conserved.momentum[0] += dt * p->conserved.mass * a_grav[0]; - p->conserved.momentum[1] += dt * p->conserved.mass * a_grav[1]; - p->conserved.momentum[2] += dt * p->conserved.mass * a_grav[2]; + p->conserved.momentum[0] += dt_grav * p->conserved.mass * a_grav[0]; + p->conserved.momentum[1] += dt_grav * p->conserved.mass * a_grav[1]; + p->conserved.momentum[2] += dt_grav * p->conserved.mass * a_grav[2]; } /* Set the velocities: */ @@ -717,6 +732,33 @@ hydro_get_physical_internal_energy(const struct part* restrict p, hydro_get_comoving_internal_energy(p); } +/** + * @brief Returns the comoving internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_internal_energy(const struct part* restrict p) { + + return hydro_get_comoving_internal_energy(p); +} + +/** + * @brief Returns the physical internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_internal_energy(const struct part* restrict p, + const struct cosmology* cosmo) { + + return hydro_get_comoving_internal_energy(p) * + cosmo->a_factor_internal_energy; +} + /** * @brief Returns the comoving entropy of a particle * @@ -746,6 +788,21 @@ __attribute__((always_inline)) INLINE static float hydro_get_physical_entropy( return hydro_get_comoving_entropy(p); } +/** + * @brief Returns the physical internal energy of a particle + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_entropy(const struct part* restrict p, + const struct cosmology* cosmo) { + + /* Note: no cosmological conversion required here with our choice of + * coordinates. */ + return hydro_get_comoving_entropy(p); +} + /** * @brief Returns the sound speed of a particle * @@ -844,7 +901,6 @@ __attribute__((always_inline)) INLINE static void hydro_get_drifted_velocities( v[2] = p->v[2]; } - // MATTHIEU: Bert is this correct? v[0] += xp->a_grav[0] * dt_kick_grav; v[1] += xp->a_grav[1] * dt_kick_grav; v[2] += xp->a_grav[2] * dt_kick_grav; diff --git a/src/hydro/GizmoMFM/hydro_iact.h b/src/hydro/GizmoMFM/hydro_iact.h index 5bed20d7f894a76d5fe3642c7438dc03195e43d6..38a97cbea39c1ed5c6926c911941e655e52362aa 100644 --- a/src/hydro/GizmoMFM/hydro_iact.h +++ b/src/hydro/GizmoMFM/hydro_iact.h @@ -267,8 +267,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common( const float dvdotdx = min(dvdr, 0.0f); /* Get the signal velocity */ - /* the magical factor 3 also appears in Gadget2 */ - vmax -= 3.0f * dvdotdx * r_inv; + vmax -= const_viscosity_beta * dvdotdx * r_inv; /* Store the signal velocity */ pi->timestepvars.vmax = max(pi->timestepvars.vmax, vmax); diff --git a/src/hydro/GizmoMFM/hydro_slope_limiters.h b/src/hydro/GizmoMFM/hydro_slope_limiters.h index 78f2785cdae5dc2334d37e3924dd5b259cca8c05..7c9c759830a4b0ee98412d5a200700c0a148d316 100644 --- a/src/hydro/GizmoMFM/hydro_slope_limiters.h +++ b/src/hydro/GizmoMFM/hydro_slope_limiters.h @@ -47,8 +47,8 @@ * @param r Distance between particle i and particle j. */ __attribute__((always_inline)) INLINE static void hydro_slope_limit_face( - float *Wi, float *Wj, float *dWi, float *dWj, float *xij_i, float *xij_j, - float r) {} + float *Wi, float *Wj, float *dWi, float *dWj, const float *xij_i, + const float *xij_j, float r) {} #endif diff --git a/src/hydro/GizmoMFV/hydro.h b/src/hydro/GizmoMFV/hydro.h index c65f2bb5899b63dbaea4db2f108cf3d914ca78f0..98a70aefed098243bbf2dfe08e752ee48a838d3e 100644 --- a/src/hydro/GizmoMFV/hydro.h +++ b/src/hydro/GizmoMFV/hydro.h @@ -68,14 +68,13 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( vmax = max(vmax, p->timestepvars.vmax); // MATTHIEU: Bert is this correct? Do we need more cosmology terms here? - const float psize = - cosmo->a * powf(p->geometry.volume / hydro_dimension_unit_sphere, - hydro_dimension_inv); + const float psize = powf(p->geometry.volume / hydro_dimension_unit_sphere, + hydro_dimension_inv); float dt = FLT_MAX; if (vmax > 0.) { dt = psize / vmax; } - return CFL_condition * dt; + return cosmo->a * cosmo->a * CFL_condition * dt; } /** @@ -467,17 +466,24 @@ __attribute__((always_inline)) INLINE static void hydro_end_gradient( /** * @brief Prepare a particle for the force calculation. * - * This function is called in the extra_ghost task to convert some quantities - * coming from the gradient loop over neighbours into quantities ready to be - * used in the force loop over neighbours. + * This function is called in the ghost task to convert some quantities coming + * from the density loop over neighbours into quantities ready to be used in the + * force loop over neighbours. Quantities are typically read from the density + * sub-structure and written to the force sub-structure. + * Examples of calculations done here include the calculation of viscosity term + * constants, thermal conduction terms, hydro conversions, etc. * * @param p The particle to act upon * @param xp The extended particle data to act upon * @param cosmo The current cosmological model. + * @param hydro_props Hydrodynamic properties. + * @param dt_alpha The time-step used to evolve non-cosmological quantities such + * as the artificial viscosity. */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( struct part* restrict p, struct xpart* restrict xp, - const struct cosmology* cosmo) { + const struct cosmology* cosmo, const struct hydro_props* hydro_props, + const float dt_alpha) { /* Initialise values that are used in the force loop */ p->gravity.mflux[0] = 0.0f; @@ -544,7 +550,11 @@ __attribute__((always_inline)) INLINE static void hydro_reset_predicted_values( * @param p The particle to act upon. */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( - struct part* p, struct xpart* xp, const struct cosmology* cosmo) {} + struct part* p, struct xpart* xp, const struct cosmology* cosmo, + const struct hydro_props* hydro_props) { + + p->conserved.energy /= cosmo->a_factor_internal_energy; +} /** * @brief Extra operations to be done during the drift @@ -579,16 +589,16 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( /* drift the primitive variables based on the old fluxes */ if (p->geometry.volume > 0.) { - p->primitives.rho += p->conserved.flux.mass * dt_drift / p->geometry.volume; + p->primitives.rho += p->conserved.flux.mass * dt_therm / p->geometry.volume; } if (p->conserved.mass > 0.) { p->primitives.v[0] += - p->conserved.flux.momentum[0] * dt_drift / p->conserved.mass; + p->conserved.flux.momentum[0] * dt_therm / p->conserved.mass; p->primitives.v[1] += - p->conserved.flux.momentum[1] * dt_drift / p->conserved.mass; + p->conserved.flux.momentum[1] * dt_therm / p->conserved.mass; p->primitives.v[2] += - p->conserved.flux.momentum[2] * dt_drift / p->conserved.mass; + p->conserved.flux.momentum[2] * dt_therm / p->conserved.mass; #if !defined(EOS_ISOTHERMAL_GAS) #ifdef GIZMO_TOTAL_ENERGY @@ -607,7 +617,7 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( #endif } - /* we use a sneaky way to get the gravitational contribtuion to the + /* we use a sneaky way to get the gravitational contribution to the velocity update */ p->primitives.v[0] += p->v[0] - xp->v_full[0]; p->primitives.v[1] += p->v[1] - xp->v_full[1]; @@ -649,15 +659,19 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( /** * @brief Extra operations done during the kick * - * Not used for GIZMO. - * * @param p Particle to act upon. * @param xp Extended particle data to act upon. - * @param dt Physical time step. - * @param half_dt Half the physical time step. + * @param dt_therm Thermal energy time-step @f$\frac{dt}{a^2}@f$. + * @param dt_grav Gravity time-step @f$\frac{dt}{a}@f$. + * @param dt_hydro Hydro acceleration time-step + * @f$\frac{dt}{a^{3(\gamma{}-1)}}@f$. + * @param dt_kick_corr Gravity correction time-step @f$adt@f$. + * @param cosmo Cosmology. + * @param hydro_props Additional hydro properties. */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( - struct part* p, struct xpart* xp, float dt, const struct cosmology* cosmo, + struct part* p, struct xpart* xp, float dt_therm, float dt_grav, + float dt_hydro, float dt_kick_corr, const struct cosmology* cosmo, const struct hydro_props* hydro_props) { float a_grav[3]; @@ -672,35 +686,48 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( a_grav[2] = p->gpart->a_grav[2]; #ifdef GIZMO_TOTAL_ENERGY - p->conserved.energy += dt * (p->conserved.momentum[0] * a_grav[0] + - p->conserved.momentum[1] * a_grav[1] + - p->conserved.momentum[2] * a_grav[2]); + p->conserved.energy += dt_grav * (p->conserved.momentum[0] * a_grav[0] + + p->conserved.momentum[1] * a_grav[1] + + p->conserved.momentum[2] * a_grav[2]); #endif /* Kick the momentum for half a time step */ /* Note that this also affects the particle movement, as the velocity for the particles is set after this. */ - p->conserved.momentum[0] += p->conserved.mass * a_grav[0] * dt; - p->conserved.momentum[1] += p->conserved.mass * a_grav[1] * dt; - p->conserved.momentum[2] += p->conserved.mass * a_grav[2] * dt; + p->conserved.momentum[0] += p->conserved.mass * a_grav[0] * dt_grav; + p->conserved.momentum[1] += p->conserved.mass * a_grav[1] * dt_grav; + p->conserved.momentum[2] += p->conserved.mass * a_grav[2] * dt_grav; p->conserved.energy -= - 0.5f * dt * + 0.5f * dt_kick_corr * (p->gravity.mflux[0] * a_grav[0] + p->gravity.mflux[1] * a_grav[1] + p->gravity.mflux[2] * a_grav[2]); } /* Update conserved variables. */ - p->conserved.mass += p->conserved.flux.mass * dt; - p->conserved.momentum[0] += p->conserved.flux.momentum[0] * dt; - p->conserved.momentum[1] += p->conserved.flux.momentum[1] * dt; - p->conserved.momentum[2] += p->conserved.flux.momentum[2] * dt; + p->conserved.mass += p->conserved.flux.mass * dt_therm; + p->conserved.momentum[0] += p->conserved.flux.momentum[0] * dt_therm; + p->conserved.momentum[1] += p->conserved.flux.momentum[1] * dt_therm; + p->conserved.momentum[2] += p->conserved.flux.momentum[2] * dt_therm; #if defined(EOS_ISOTHERMAL_GAS) /* We use the EoS equation in a sneaky way here just to get the constant u */ p->conserved.energy = p->conserved.mass * gas_internal_energy_from_entropy(0.f, 0.f); #else - p->conserved.energy += p->conserved.flux.energy * dt; + p->conserved.energy += p->conserved.flux.energy * dt_therm; +#endif + +#ifndef HYDRO_GAMMA_5_3 + const float Pcorr = (dt_hydro - dt_therm) * p->geometry.volume; + p->conserved.momentum[0] -= Pcorr * p->primitives.gradients.P[0]; + p->conserved.momentum[1] -= Pcorr * p->primitives.gradients.P[1]; + p->conserved.momentum[2] -= Pcorr * p->primitives.gradients.P[2]; +#ifdef GIZMO_TOTAL_ENERGY + p->conserved.energy -= + Pcorr * (p->primitives.v[0] * p->primitives.gradients.P[0] + + p->primitives.v[1] * p->primitives.gradients.P[1] + + p->primitives.v[2] * p->primitives.gradients.P[2]); +#endif #endif /* Apply the minimal energy limit */ @@ -791,6 +818,19 @@ hydro_get_physical_internal_energy(const struct part* restrict p, hydro_get_comoving_internal_energy(p); } +/** + * @brief Returns the physical internal energy of a particle + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_internal_energy(const struct part* restrict p, + const struct cosmology* cosmo) { + + return hydro_get_physical_internal_energy(p, cosmo); +} + /** * @brief Returns the comoving entropy of a particle * @@ -820,6 +860,21 @@ __attribute__((always_inline)) INLINE static float hydro_get_physical_entropy( return hydro_get_comoving_entropy(p); } +/** + * @brief Returns the physical internal energy of a particle + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_entropy(const struct part* restrict p, + const struct cosmology* cosmo) { + + /* Note: no cosmological conversion required here with our choice of + * coordinates. */ + return hydro_get_comoving_entropy(p); +} + /** * @brief Returns the sound speed of a particle * diff --git a/src/hydro/GizmoMFV/hydro_iact.h b/src/hydro/GizmoMFV/hydro_iact.h index c766ce3cc9048f8da8b3438c3c27e6998dd5df7e..2f73e67ea2fdcecc527de8b1af0d15731f967b9b 100644 --- a/src/hydro/GizmoMFV/hydro_iact.h +++ b/src/hydro/GizmoMFV/hydro_iact.h @@ -271,8 +271,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common( dvdotdx = min(dvdotdx, 0.f); /* Get the signal velocity */ - /* the magical factor 3 also appears in Gadget2 */ - vmax -= 3.f * dvdotdx * r_inv; + vmax -= const_viscosity_beta * dvdotdx * r_inv; /* Store the signal velocity */ pi->timestepvars.vmax = max(pi->timestepvars.vmax, vmax); diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index 812f8ad72de55ad7990ee6ef88223a401780bc4b..93a2b3ec8ed4ecb6dcc73314233217d37141ba46 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -44,35 +44,58 @@ #include "minmax.h" /** - * @brief Returns the comoving internal energy of a particle - * - * For implementations where the main thermodynamic variable - * is not internal energy, this function computes the internal - * energy from the thermodynamic variable. + * @brief Returns the comoving internal energy of a particle at the last + * time the particle was kicked. * * @param p The particle of interest + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float -hydro_get_comoving_internal_energy(const struct part *restrict p) { +hydro_get_comoving_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp) { - return p->u; + return xp->u_full; } /** - * @brief Returns the physical internal energy of a particle - * - * For implementations where the main thermodynamic variable - * is not internal energy, this function computes the internal - * energy from the thermodynamic variable and converts it to - * physical coordinates. + * @brief Returns the physical internal energy of a particle at the last + * time the particle was kicked. * * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp, const struct cosmology *cosmo) { + return xp->u_full * cosmo->a_factor_internal_energy; +} + +/** + * @brief Returns the comoving internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_internal_energy(const struct part *restrict p) { + + return p->u; +} + +/** + * @brief Returns the physical internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_internal_energy(const struct part *restrict p, + const struct cosmology *cosmo) { + return p->u * cosmo->a_factor_internal_energy; } @@ -106,33 +129,57 @@ __attribute__((always_inline)) INLINE static float hydro_get_physical_pressure( } /** - * @brief Returns the comoving entropy of a particle - * - * For implementations where the main thermodynamic variable - * is not entropy, this function computes the entropy from - * the thermodynamic variable. + * @brief Returns the comoving entropy of a particle at the last + * time the particle was kicked. * - * @param p The particle of interest + * @param p The particle of interest. + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float hydro_get_comoving_entropy( - const struct part *restrict p) { + const struct part *restrict p, const struct xpart *restrict xp) { - return gas_entropy_from_internal_energy(p->rho, p->u); + return gas_entropy_from_internal_energy(p->rho, xp->u_full); } /** - * @brief Returns the physical entropy of a particle - * - * For implementations where the main thermodynamic variable - * is not entropy, this function computes the entropy from - * the thermodynamic variable and converts it to - * physical coordinates. + * @brief Returns the physical entropy of a particle at the last + * time the particle was kicked. * - * @param p The particle of interest + * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_entropy( - const struct part *restrict p, const struct cosmology *cosmo) { + const struct part *restrict p, const struct xpart *restrict xp, + const struct cosmology *cosmo) { + + /* Note: no cosmological conversion required here with our choice of + * coordinates. */ + return gas_entropy_from_internal_energy(p->rho, xp->u_full); +} + +/** + * @brief Returns the comoving entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_entropy(const struct part *restrict p) { + + return gas_entropy_from_internal_energy(p->rho, p->u); +} + +/** + * @brief Returns the physical entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_entropy(const struct part *restrict p, + const struct cosmology *cosmo) { /* Note: no cosmological conversion required here with our choice of * coordinates. */ @@ -231,14 +278,14 @@ __attribute__((always_inline)) INLINE static void hydro_get_drifted_velocities( } /** - * @brief Returns the time derivative of internal energy of a particle + * @brief Returns the time derivative of co-moving internal energy of a particle * * We assume a constant density. * * @param p The particle of interest */ -__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( - const struct part *restrict p) { +__attribute__((always_inline)) INLINE static float +hydro_get_comoving_internal_energy_dt(const struct part *restrict p) { return p->u_dt; } @@ -248,14 +295,48 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( * * We assume a constant density. * + * @param p The particle of interest + * @param cosmo Cosmology data structure + */ +__attribute__((always_inline)) INLINE static float +hydro_get_physical_internal_energy_dt(const struct part *restrict p, + const struct cosmology *cosmo) { + + return p->u_dt * cosmo->a_factor_internal_energy; +} + +/** + * @brief Sets the time derivative of the co-moving internal energy of a + * particle + * + * We assume a constant density for the conversion to entropy. + * * @param p The particle of interest. * @param du_dt The new time derivative of the internal energy. */ -__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( - struct part *restrict p, float du_dt) { +__attribute__((always_inline)) INLINE static void +hydro_set_comoving_internal_energy_dt(struct part *restrict p, float du_dt) { p->u_dt = du_dt; } + +/** + * @brief Returns the time derivative of internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest. + * @param cosmo Cosmology data structure + * @param du_dt The new time derivative of the internal energy. + */ +__attribute__((always_inline)) INLINE static void +hydro_set_physical_internal_energy_dt(struct part *restrict p, + const struct cosmology *cosmo, + float du_dt) { + + p->u_dt = du_dt * cosmo->a_factor_internal_energy; +} + /** * @brief Computes the hydro time-step of a given particle * @@ -308,6 +389,10 @@ __attribute__((always_inline)) INLINE static void hydro_init_part( p->density.wcount_dh = 0.f; p->rho = 0.f; p->density.rho_dh = 0.f; + p->density.div_v = 0.f; + p->density.rot_v[0] = 0.f; + p->density.rot_v[1] = 0.f; + p->density.rot_v[2] = 0.f; } /** @@ -343,6 +428,17 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( p->density.rho_dh *= h_inv_dim_plus_one; p->density.wcount *= h_inv_dim; p->density.wcount_dh *= h_inv_dim_plus_one; + + const float rho_inv = 1.f / p->rho; + const float a_inv2 = cosmo->a2_inv; + + /* Finish calculation of the (physical) velocity curl components */ + p->density.rot_v[0] *= h_inv_dim_plus_one * a_inv2 * rho_inv; + p->density.rot_v[1] *= h_inv_dim_plus_one * a_inv2 * rho_inv; + p->density.rot_v[2] *= h_inv_dim_plus_one * a_inv2 * rho_inv; + + /* Finish calculation of the (physical) velocity divergence */ + p->density.div_v *= h_inv_dim_plus_one * a_inv2 * rho_inv; } /** @@ -370,6 +466,10 @@ __attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( p->density.wcount = kernel_root * h_inv_dim; p->density.rho_dh = 0.f; p->density.wcount_dh = 0.f; + p->density.div_v = 0.f; + p->density.rot_v[0] = 0.f; + p->density.rot_v[1] = 0.f; + p->density.rot_v[2] = 0.f; } /** @@ -385,10 +485,28 @@ __attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( * @param p The particle to act upon * @param xp The extended particle data to act upon * @param cosmo The current cosmological model. + * @param hydro_props Hydrodynamic properties. + * @param dt_alpha The time-step used to evolve non-cosmological quantities such + * as the artificial viscosity. */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) { + const struct cosmology *cosmo, const struct hydro_props *hydro_props, + const float dt_alpha) { + + const float fac_Balsara_eps = cosmo->a_factor_Balsara_eps; + + /* Inverse of the smoothing length */ + const float h_inv = 1.f / p->h; + + /* Compute the norm of the curl */ + const float curl_v = sqrtf(p->density.rot_v[0] * p->density.rot_v[0] + + p->density.rot_v[1] * p->density.rot_v[1] + + p->density.rot_v[2] * p->density.rot_v[2]); + + /* Compute the norm of div v including the Hubble flow term */ + const float div_physical_v = p->density.div_v + 3.f * cosmo->H; + const float abs_div_physical_v = fabsf(div_physical_v); /* Compute the pressure */ const float pressure = gas_pressure_from_internal_energy(p->rho, p->u); @@ -401,10 +519,18 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( const float grad_h_term = 1.f / (1.f + hydro_dimension_inv * p->h * p->density.rho_dh * rho_inv); + /* Compute the Balsara switch */ + /* Pre-multiply in the AV factor; hydro_props are not passed to the iact + * functions */ + const float balsara = hydro_props->viscosity.alpha * abs_div_physical_v / + (abs_div_physical_v + curl_v + + 0.0001f * fac_Balsara_eps * soundspeed * h_inv); + /* Update variables. */ p->force.f = grad_h_term; p->force.pressure = pressure; p->force.soundspeed = soundspeed; + p->force.balsara = balsara; } /** @@ -522,11 +648,15 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( * @param p The particle to act upon. * @param xp The particle extended data to act upon. * @param dt_therm The time-step for this kick (for thermodynamic quantities). + * @param dt_grav The time-step for this kick (for gravity quantities). + * @param dt_hydro The time-step for this kick (for hydro quantities). + * @param dt_kick_corr The time-step for this kick (for gravity corrections). * @param cosmo The cosmological model. * @param hydro_props The constants used in the scheme */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( struct part *restrict p, struct xpart *restrict xp, float dt_therm, + float dt_grav, float dt_hydro, float dt_kick_corr, const struct cosmology *cosmo, const struct hydro_props *hydro_props) { /* Do not decrease the energy by more than a factor of 2*/ @@ -536,10 +666,10 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( xp->u_full += p->u_dt * dt_therm; /* Apply the minimal energy limit */ - const float min_energy = - hydro_props->minimal_internal_energy * cosmo->a_factor_internal_energy; - if (xp->u_full < min_energy) { - xp->u_full = min_energy; + const float min_comoving_energy = + hydro_props->minimal_internal_energy / cosmo->a_factor_internal_energy; + if (xp->u_full < min_comoving_energy) { + xp->u_full = min_comoving_energy; p->u_dt = 0.f; } @@ -547,7 +677,8 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( const float pressure = gas_pressure_from_internal_energy(p->rho, xp->u_full); /* Compute the sound speed */ - const float soundspeed = gas_soundspeed_from_internal_energy(p->rho, p->u); + const float soundspeed = + gas_soundspeed_from_internal_energy(p->rho, xp->u_full); p->force.pressure = pressure; p->force.soundspeed = soundspeed; @@ -564,10 +695,26 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( * @param p The particle to act upon * @param xp The extended particle to act upon * @param cosmo The cosmological model. + * @param hydro_props The constants used in the scheme. */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) { + const struct cosmology *cosmo, const struct hydro_props *hydro_props) { + + /* Convert the physcial internal energy to the comoving one. */ + /* u' = a^(3(g-1)) u */ + const float factor = 1.f / cosmo->a_factor_internal_energy; + p->u *= factor; + xp->u_full = p->u; + + /* Apply the minimal energy limit */ + const float min_comoving_energy = + hydro_props->minimal_internal_energy / cosmo->a_factor_internal_energy; + if (xp->u_full < min_comoving_energy) { + xp->u_full = min_comoving_energy; + p->u = min_comoving_energy; + p->u_dt = 0.f; + } /* Compute the pressure */ const float pressure = gas_pressure_from_internal_energy(p->rho, p->u); diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index 42fd93d6062cbfcea5cf5297eeda0bb6525f3cad..e060cb3562f1b319c64d6f6523b18858662312e7 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -53,6 +53,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( float wi, wj, wi_dx, wj_dx; +#ifdef SWIFT_DEBUG_CHECKS + if (pi->time_bin == time_bin_inhibited) + error("Inhibited pi in interaction function!"); + if (pj->time_bin == time_bin_inhibited) + error("Inhibited pj in interaction function!"); +#endif + /* Get r. */ const float r_inv = 1.0f / sqrtf(r2); const float r = r2 * r_inv; @@ -80,6 +87,33 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( pj->density.rho_dh -= mi * (hydro_dimension * wj + uj * wj_dx); pj->density.wcount += wj; pj->density.wcount_dh -= (hydro_dimension * wj + uj * wj_dx); + + /* Compute dv dot r */ + float dv[3], curlvr[3]; + + const float faci = mj * wi_dx * r_inv; + const float facj = mi * wj_dx * r_inv; + + dv[0] = pi->v[0] - pj->v[0]; + dv[1] = pi->v[1] - pj->v[1]; + dv[2] = pi->v[2] - pj->v[2]; + const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; + + pi->density.div_v -= faci * dvdr; + pj->density.div_v -= facj * dvdr; + + /* Compute dv cross r */ + curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; + curlvr[1] = dv[2] * dx[0] - dv[0] * dx[2]; + curlvr[2] = dv[0] * dx[1] - dv[1] * dx[0]; + + pi->density.rot_v[0] += faci * curlvr[0]; + pi->density.rot_v[1] += faci * curlvr[1]; + pi->density.rot_v[2] += faci * curlvr[2]; + + pj->density.rot_v[0] += facj * curlvr[0]; + pj->density.rot_v[1] += facj * curlvr[1]; + pj->density.rot_v[2] += facj * curlvr[2]; } /** @@ -100,6 +134,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( float wi, wi_dx; +#ifdef SWIFT_DEBUG_CHECKS + if (pi->time_bin == time_bin_inhibited) + error("Inhibited pi in interaction function!"); + if (pj->time_bin == time_bin_inhibited) + error("Inhibited pj in interaction function!"); +#endif + /* Get the masses. */ const float mj = pj->mass; @@ -115,6 +156,27 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( pi->density.rho_dh -= mj * (hydro_dimension * wi + ui * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); + + /* Compute dv dot r */ + float dv[3], curlvr[3]; + + const float faci = mj * wi_dx * r_inv; + + dv[0] = pi->v[0] - pj->v[0]; + dv[1] = pi->v[1] - pj->v[1]; + dv[2] = pi->v[2] - pj->v[2]; + const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; + + pi->density.div_v -= faci * dvdr; + + /* Compute dv cross r */ + curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; + curlvr[1] = dv[2] * dx[0] - dv[0] * dx[2]; + curlvr[2] = dv[0] * dx[1] - dv[1] * dx[0]; + + pi->density.rot_v[0] += faci * curlvr[0]; + pi->density.rot_v[1] += faci * curlvr[1]; + pi->density.rot_v[2] += faci * curlvr[2]; } /** @@ -133,6 +195,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( float r2, const float *dx, float hi, float hj, struct part *restrict pi, struct part *restrict pj, float a, float H) { +#ifdef SWIFT_DEBUG_CHECKS + if (pi->time_bin == time_bin_inhibited) + error("Inhibited pi in interaction function!"); + if (pj->time_bin == time_bin_inhibited) + error("Inhibited pj in interaction function!"); +#endif + /* Cosmological factors entering the EoMs */ const float fac_mu = pow_three_gamma_minus_five_over_two(a); const float a2_Hubble = a * a * H; @@ -172,20 +241,27 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Compute dv dot r. */ const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2] + a2_Hubble * r2; + (pi->v[2] - pj->v[2]) * dx[2]; + + /* Add Hubble flow */ + const float dvdr_Hubble = dvdr + a2_Hubble * r2; /* Are the particles moving towards each others ? */ - const float omega_ij = min(dvdr, 0.f); + const float omega_ij = min(dvdr_Hubble, 0.f); const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Compute sound speeds and signal velocity */ const float ci = pi->force.soundspeed; const float cj = pj->force.soundspeed; - const float v_sig = ci + cj - 3.f * mu_ij; + const float v_sig = ci + cj - const_viscosity_beta * mu_ij; + + /* Grab balsara switches */ + const float balsara_i = pi->force.balsara; + const float balsara_j = pj->force.balsara; /* Construct the full viscosity term */ const float rho_ij = 0.5f * (rhoi + rhoj); - const float visc = -0.5f * const_viscosity_alpha * v_sig * mu_ij / rho_ij; + const float visc = -0.25f * v_sig * (balsara_i + balsara_j) * mu_ij / rho_ij; /* Convolve with the kernel */ const float visc_acc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; @@ -211,7 +287,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( const float sph_du_term_j = P_over_rho2_j * dvdr * r_inv * wj_dr; /* Viscosity term */ - const float visc_du_term = 0.5f * visc_acc_term * dvdr; + const float visc_du_term = 0.5f * visc_acc_term * dvdr_Hubble; /* Assemble the energy equation term */ const float du_dt_i = sph_du_term_i + visc_du_term; @@ -246,6 +322,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( float r2, const float *dx, float hi, float hj, struct part *restrict pi, const struct part *restrict pj, float a, float H) { +#ifdef SWIFT_DEBUG_CHECKS + if (pi->time_bin == time_bin_inhibited) + error("Inhibited pi in interaction function!"); + if (pj->time_bin == time_bin_inhibited) + error("Inhibited pj in interaction function!"); +#endif + /* Cosmological factors entering the EoMs */ const float fac_mu = pow_three_gamma_minus_five_over_two(a); const float a2_Hubble = a * a * H; @@ -285,20 +368,27 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Compute dv dot r. */ const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2] + a2_Hubble * r2; + (pi->v[2] - pj->v[2]) * dx[2]; + + /* Add Hubble flow */ + const float dvdr_Hubble = dvdr + a2_Hubble * r2; /* Are the particles moving towards each others ? */ - const float omega_ij = min(dvdr, 0.f); + const float omega_ij = min(dvdr_Hubble, 0.f); const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Compute sound speeds and signal velocity */ const float ci = pi->force.soundspeed; const float cj = pj->force.soundspeed; - const float v_sig = ci + cj - 3.f * mu_ij; + const float v_sig = ci + cj - const_viscosity_beta * mu_ij; + + /* Grab balsara switches */ + const float balsara_i = pi->force.balsara; + const float balsara_j = pj->force.balsara; /* Construct the full viscosity term */ const float rho_ij = 0.5f * (rhoi + rhoj); - const float visc = -0.5f * const_viscosity_alpha * v_sig * mu_ij / rho_ij; + const float visc = -0.25f * v_sig * (balsara_i + balsara_j) * mu_ij / rho_ij; /* Convolve with the kernel */ const float visc_acc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; @@ -319,7 +409,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( const float sph_du_term_i = P_over_rho2_i * dvdr * r_inv * wi_dr; /* Viscosity term */ - const float visc_du_term = 0.5f * visc_acc_term * dvdr; + const float visc_du_term = 0.5f * visc_acc_term * dvdr_Hubble; /* Assemble the energy equation term */ const float du_dt_i = sph_du_term_i + visc_du_term; diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h index 879255640fc1a1d6a06a666c80d3860c9c31ab64..1146aa9347d443833cd481103da6f6c57d21fcbf 100644 --- a/src/hydro/Minimal/hydro_io.h +++ b/src/hydro/Minimal/hydro_io.h @@ -73,7 +73,7 @@ INLINE static void hydro_read_particles(struct part* parts, INLINE static void convert_S(const struct engine* e, const struct part* p, const struct xpart* xp, float* ret) { - ret[0] = hydro_get_comoving_entropy(p); + ret[0] = hydro_get_comoving_entropy(p, xp); } INLINE static void convert_P(const struct engine* e, const struct part* p, diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h index c33f1b9a214cf9839f1acb965b686d4a4962865c..1d14a94f2d91bf259df54c875a32bf3072ad33b6 100644 --- a/src/hydro/Minimal/hydro_part.h +++ b/src/hydro/Minimal/hydro_part.h @@ -124,6 +124,12 @@ struct part { /*! Derivative of density with respect to h */ float rho_dh; + /*! Velocity divergence */ + float div_v; + + /*! Velocity curl */ + float rot_v[3]; + } density; /** @@ -150,6 +156,9 @@ struct part { /*! Time derivative of smoothing length */ float h_dt; + /*! Balsara switch */ + float balsara; + } force; }; diff --git a/src/hydro/MinimalMultiMat/hydro.h b/src/hydro/Planetary/hydro.h similarity index 77% rename from src/hydro/MinimalMultiMat/hydro.h rename to src/hydro/Planetary/hydro.h index cfad6b2b2b389da9f423540cb30f1df4cebc5416..dee65a15758043d2cf526ea889b993c694d5dab4 100644 --- a/src/hydro/MinimalMultiMat/hydro.h +++ b/src/hydro/Planetary/hydro.h @@ -17,17 +17,17 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_MINIMAL_MULTI_MAT_HYDRO_H -#define SWIFT_MINIMAL_MULTI_MAT_HYDRO_H +#ifndef SWIFT_PLANETARY_HYDRO_H +#define SWIFT_PLANETARY_HYDRO_H /** - * @file MinimalMultiMat/hydro.h + * @file Planetary/hydro.h * @brief Minimal conservative implementation of SPH (Non-neighbour loop * equations) with multiple materials. * * The thermal variable is the internal energy (u). Simple constant - * viscosity term without switches is implemented. No thermal conduction - * term is implemented. + * viscosity term with the Balsara (1995) switch (optional). + * No thermal conduction term is implemented. * * This corresponds to equations (43), (44), (45), (101), (103) and (104) with * \f$\beta=3\f$ and \f$\alpha_u=0\f$ of Price, D., Journal of Computational @@ -44,23 +44,33 @@ #include "kernel_hydro.h" #include "minmax.h" +/* + * Note: Define PLANETARY_SPH_NO_BALSARA to disable the Balsara (1995) switch + * for the artificial viscosity and use the vanilla Monaghan (1992) instead. + * i.e. compile with: make CFLAGS=-DPLANETARY_SPH_NO_BALSARA + */ + /** - * @brief Returns the comoving internal energy of a particle + * @brief Returns the comoving internal energy of a particle at the last + * time the particle was kicked. * * For implementations where the main thermodynamic variable * is not internal energy, this function computes the internal * energy from the thermodynamic variable. * * @param p The particle of interest + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float -hydro_get_comoving_internal_energy(const struct part *restrict p) { +hydro_get_comoving_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp) { - return p->u; + return xp->u_full; } /** - * @brief Returns the physical internal energy of a particle + * @brief Returns the physical internal energy of a particle at the last + * time the particle was kicked. * * For implementations where the main thermodynamic variable * is not internal energy, this function computes the internal @@ -68,12 +78,40 @@ hydro_get_comoving_internal_energy(const struct part *restrict p) { * physical coordinates. * * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp, const struct cosmology *cosmo) { + return xp->u_full * cosmo->a_factor_internal_energy; +} + +/** + * @brief Returns the comoving internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_internal_energy(const struct part *restrict p) { + + return p->u; +} + +/** + * @brief Returns the physical internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_internal_energy(const struct part *restrict p, + const struct cosmology *cosmo) { + return p->u * cosmo->a_factor_internal_energy; } @@ -114,11 +152,12 @@ __attribute__((always_inline)) INLINE static float hydro_get_physical_pressure( * the thermodynamic variable. * * @param p The particle of interest + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float hydro_get_comoving_entropy( - const struct part *restrict p) { + const struct part *restrict p, const struct xpart *restrict xp) { - return gas_entropy_from_internal_energy(p->rho, p->u, p->mat_id); + return gas_entropy_from_internal_energy(p->rho, xp->u_full, p->mat_id); } /** @@ -130,10 +169,40 @@ __attribute__((always_inline)) INLINE static float hydro_get_comoving_entropy( * physical coordinates. * * @param p The particle of interest + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_entropy( - const struct part *restrict p, const struct cosmology *cosmo) { + const struct part *restrict p, const struct xpart *restrict xp, + const struct cosmology *cosmo) { + + /* Note: no cosmological conversion required here with our choice of + * coordinates. */ + return gas_entropy_from_internal_energy(p->rho, xp->u_full, p->mat_id); +} + +/** + * @brief Returns the comoving entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_entropy(const struct part *restrict p) { + + return gas_entropy_from_internal_energy(p->rho, p->u, p->mat_id); +} + +/** + * @brief Returns the physical entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_entropy(const struct part *restrict p, + const struct cosmology *cosmo) { /* Note: no cosmological conversion required here with our choice of * coordinates. */ @@ -238,12 +307,27 @@ __attribute__((always_inline)) INLINE static void hydro_get_drifted_velocities( * * @param p The particle of interest */ -__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( - const struct part *restrict p) { +__attribute__((always_inline)) INLINE static float +hydro_get_comoving_internal_energy_dt(const struct part *restrict p) { return p->u_dt; } +/** + * @brief Returns the time derivative of internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest + * @param cosmo Cosmology data structure + */ +__attribute__((always_inline)) INLINE static float +hydro_get_physical_internal_energy_dt(const struct part *restrict p, + const struct cosmology *cosmo) { + + return p->u_dt * cosmo->a_factor_internal_energy; +} + /** * @brief Returns the time derivative of internal energy of a particle * @@ -252,11 +336,29 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( * @param p The particle of interest. * @param du_dt The new time derivative of the internal energy. */ -__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( - struct part *restrict p, float du_dt) { +__attribute__((always_inline)) INLINE static void +hydro_set_comoving_internal_energy_dt(struct part *restrict p, float du_dt) { p->u_dt = du_dt; } + +/** + * @brief Returns the time derivative of internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest. + * @param cosmo Cosmology data structure + * @param du_dt The new time derivative of the internal energy. + */ +__attribute__((always_inline)) INLINE static void +hydro_set_physical_internal_energy_dt(struct part *restrict p, + const struct cosmology *cosmo, + float du_dt) { + + p->u_dt = du_dt * cosmo->a_factor_internal_energy; +} + /** * @brief Computes the hydro time-step of a given particle * @@ -386,10 +488,24 @@ __attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( * @param p The particle to act upon * @param xp The extended particle data to act upon * @param cosmo The current cosmological model. + * @param hydro_props Hydrodynamic properties. + * @param dt_alpha The time-step used to evolve non-cosmological quantities such + * as the artificial viscosity. */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) { + const struct cosmology *cosmo, const struct hydro_props *hydro_props, + const float dt_alpha) { + + const float fac_mu = cosmo->a_factor_mu; + + /* Compute the norm of the curl */ + const float curl_v = sqrtf(p->density.rot_v[0] * p->density.rot_v[0] + + p->density.rot_v[1] * p->density.rot_v[1] + + p->density.rot_v[2] * p->density.rot_v[2]); + + /* Compute the norm of div v */ + const float abs_div_v = fabsf(p->density.div_v); /* Compute the pressure */ const float pressure = @@ -397,17 +513,34 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Compute the sound speed */ const float soundspeed = - gas_soundspeed_from_pressure(p->rho, pressure, p->mat_id); + gas_soundspeed_from_internal_energy(p->rho, p->u, p->mat_id); /* Compute the "grad h" term */ const float rho_inv = 1.f / p->rho; - const float grad_h_term = - 1.f / (1.f + hydro_dimension_inv * p->h * p->density.rho_dh * rho_inv); + float grad_h_term; + const float grad_h_term_inv = + 1.f + hydro_dimension_inv * p->h * p->density.rho_dh * rho_inv; + /* Avoid 1/0 from only having one neighbour right at the edge of the kernel */ + if (grad_h_term_inv != 0.f) { + grad_h_term = 1.f / grad_h_term_inv; + } else { + grad_h_term = 0.f; + } + + /* Compute the Balsara switch */ +#ifdef PLANETARY_SPH_NO_BALSARA + const float balsara = hydro_props->viscosity.alpha; +#else + const float balsara = + hydro_props->viscosity.alpha * abs_div_v / + (abs_div_v + curl_v + 0.0001f * fac_mu * soundspeed / p->h); +#endif /* Update variables. */ p->force.f = grad_h_term; p->force.pressure = pressure; p->force.soundspeed = soundspeed; + p->force.balsara = balsara; } /** @@ -494,7 +627,7 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( /* Compute the new sound speed */ const float soundspeed = - gas_soundspeed_from_pressure(p->rho, pressure, p->mat_id); + gas_soundspeed_from_internal_energy(p->rho, p->u, p->mat_id); p->force.pressure = pressure; p->force.soundspeed = soundspeed; @@ -532,6 +665,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( struct part *restrict p, struct xpart *restrict xp, float dt_therm, + float dt_grav, float dt_hydro, float dt_kick_corr, const struct cosmology *cosmo, const struct hydro_props *hydro_props) { /* Do not decrease the energy by more than a factor of 2*/ @@ -574,7 +708,7 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) { + const struct cosmology *cosmo, const struct hydro_props *hydro_props) { /* Compute the pressure */ const float pressure = @@ -631,4 +765,4 @@ hydro_set_init_internal_energy(struct part *p, float u_init) { p->u = u_init; } -#endif /* SWIFT_MINIMAL_MULTI_MAT_HYDRO_H */ +#endif /* SWIFT_PLANETARY_HYDRO_H */ diff --git a/src/hydro/MinimalMultiMat/hydro_debug.h b/src/hydro/Planetary/hydro_debug.h similarity index 89% rename from src/hydro/MinimalMultiMat/hydro_debug.h rename to src/hydro/Planetary/hydro_debug.h index 17b624ad0f660152be4ba685905a3c855e1761f8..74261f3b49e2881af1c403013005560efa53a7f1 100644 --- a/src/hydro/MinimalMultiMat/hydro_debug.h +++ b/src/hydro/Planetary/hydro_debug.h @@ -17,13 +17,12 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_MINIMAL_MULTI_MAT_HYDRO_DEBUG_H -#define SWIFT_MINIMAL_MULTI_MAT_HYDRO_DEBUG_H +#ifndef SWIFT_PLANETARY_HYDRO_DEBUG_H +#define SWIFT_PLANETARY_HYDRO_DEBUG_H /** - * @file MinimalMultiMat/hydro_debug.h - * @brief MinimalMultiMat conservative implementation of SPH (Debugging - * routines) + * @file Planetary/hydro_debug.h + * @brief Minimal conservative implementation of SPH (Debugging routines) * * The thermal variable is the internal energy (u). Simple constant * viscosity term without switches is implemented. No thermal conduction @@ -51,4 +50,4 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( p->density.wcount, p->rho, p->density.rho_dh, p->time_bin, p->mat_id); } -#endif /* SWIFT_MINIMAL_MULTI_MAT_HYDRO_DEBUG_H */ +#endif /* SWIFT_PLANETARY_HYDRO_DEBUG_H */ diff --git a/src/hydro/MinimalMultiMat/hydro_iact.h b/src/hydro/Planetary/hydro_iact.h similarity index 90% rename from src/hydro/MinimalMultiMat/hydro_iact.h rename to src/hydro/Planetary/hydro_iact.h index 5984c1c483546d87800792ced0ffcc41e0aaa408..19ee002b85c1b0bc8ed621a029059cd02c5e670f 100644 --- a/src/hydro/MinimalMultiMat/hydro_iact.h +++ b/src/hydro/Planetary/hydro_iact.h @@ -17,17 +17,16 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_MINIMAL_MULTI_MAT_HYDRO_IACT_H -#define SWIFT_MINIMAL_MULTI_MAT_HYDRO_IACT_H +#ifndef SWIFT_PLANETARY_HYDRO_IACT_H +#define SWIFT_PLANETARY_HYDRO_IACT_H /** - * @file MinimalMultiMat/hydro_iact.h - * @brief MinimalMultiMat conservative implementation of SPH (Neighbour loop - * equations) + * @file Planetary/hydro_iact.h + * @brief Minimal conservative implementation of SPH (Neighbour loop equations) * * The thermal variable is the internal energy (u). Simple constant - * viscosity term without switches is implemented. No thermal conduction - * term is implemented. + * viscosity term with the Balsara (1995) switch (optional). + * No thermal conduction term is implemented. * * This corresponds to equations (43), (44), (45), (101), (103) and (104) with * \f$\beta=3\f$ and \f$\alpha_u=0\f$ of Price, D., Journal of Computational @@ -177,18 +176,22 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( (pi->v[1] - pj->v[1]) * dx[1] + (pi->v[2] - pj->v[2]) * dx[2] + a2_Hubble * r2; - /* Are the particles moving towards each others ? */ + /* Balsara term */ + const float balsara_i = pi->force.balsara; + const float balsara_j = pj->force.balsara; + + /* Are the particles moving towards each other? */ const float omega_ij = min(dvdr, 0.f); const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Compute sound speeds and signal velocity */ const float ci = pi->force.soundspeed; const float cj = pj->force.soundspeed; - const float v_sig = ci + cj - 3.f * mu_ij; + const float v_sig = ci + cj - const_viscosity_beta * mu_ij; - /* Construct the full viscosity term */ + /* Now construct the full viscosity term */ const float rho_ij = 0.5f * (rhoi + rhoj); - const float visc = -0.5f * const_viscosity_alpha * v_sig * mu_ij / rho_ij; + const float visc = -0.25f * v_sig * mu_ij * (balsara_i + balsara_j) / rho_ij; /* Convolve with the kernel */ const float visc_acc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; @@ -220,7 +223,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( const float du_dt_i = sph_du_term_i + visc_du_term; const float du_dt_j = sph_du_term_j + visc_du_term; - /* Internal energy time derivatibe */ + /* Internal energy time derivative */ pi->u_dt += du_dt_i * mj; pj->u_dt += du_dt_j * mi; @@ -290,18 +293,24 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( (pi->v[1] - pj->v[1]) * dx[1] + (pi->v[2] - pj->v[2]) * dx[2] + a2_Hubble * r2; - /* Are the particles moving towards each others ? */ + /* Balsara term */ + const float balsara_i = pi->force.balsara; + const float balsara_j = pj->force.balsara; + + /* Are the particles moving towards each other? */ const float omega_ij = min(dvdr, 0.f); const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ - /* Compute sound speeds and signal velocity */ + /* Compute sound speeds */ const float ci = pi->force.soundspeed; const float cj = pj->force.soundspeed; - const float v_sig = ci + cj - 3.f * mu_ij; + + /* Signal velocity */ + const float v_sig = ci + cj - const_viscosity_beta * mu_ij; /* Construct the full viscosity term */ const float rho_ij = 0.5f * (rhoi + rhoj); - const float visc = -0.5f * const_viscosity_alpha * v_sig * mu_ij / rho_ij; + const float visc = -0.25f * v_sig * mu_ij * (balsara_i + balsara_j) / rho_ij; /* Convolve with the kernel */ const float visc_acc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; @@ -327,7 +336,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Assemble the energy equation term */ const float du_dt_i = sph_du_term_i + visc_du_term; - /* Internal energy time derivatibe */ + /* Internal energy time derivative */ pi->u_dt += du_dt_i * mj; /* Get the time derivative for h. */ @@ -337,4 +346,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = max(pi->force.v_sig, v_sig); } -#endif /* SWIFT_MINIMAL_MULTI_MAT_HYDRO_IACT_H */ +#endif /* SWIFT_PLANETARY_HYDRO_IACT_H */ diff --git a/src/hydro/MinimalMultiMat/hydro_io.h b/src/hydro/Planetary/hydro_io.h similarity index 94% rename from src/hydro/MinimalMultiMat/hydro_io.h rename to src/hydro/Planetary/hydro_io.h index 7f41f5e227b6c8a8904b5546a2568b4700109abd..1b84f8d6db295694846ffd26a422ce158aad0c60 100644 --- a/src/hydro/MinimalMultiMat/hydro_io.h +++ b/src/hydro/Planetary/hydro_io.h @@ -17,16 +17,16 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_MINIMAL_MULTI_MAT_HYDRO_IO_H -#define SWIFT_MINIMAL_MULTI_MAT_HYDRO_IO_H +#ifndef SWIFT_PLANETARY_HYDRO_IO_H +#define SWIFT_PLANETARY_HYDRO_IO_H /** - * @file MinimalMultiMat/hydro_io.h - * @brief MinimalMultiMat conservative implementation of SPH (i/o routines) + * @file Planetary/hydro_io.h + * @brief Minimal conservative implementation of SPH (i/o routines) * * The thermal variable is the internal energy (u). Simple constant - * viscosity term without switches is implemented. No thermal conduction - * term is implemented. + * viscosity term with the Balsara (1995) switch (optional). + * No thermal conduction term is implemented. * * This corresponds to equations (43), (44), (45), (101), (103) and (104) with * \f$\beta=3\f$ and \f$\alpha_u=0\f$ of @@ -76,7 +76,7 @@ INLINE static void hydro_read_particles(struct part* parts, INLINE static void convert_S(const struct engine* e, const struct part* p, const struct xpart* xp, float* ret) { - ret[0] = hydro_get_comoving_entropy(p); + ret[0] = hydro_get_comoving_entropy(p, xp); } INLINE static void convert_P(const struct engine* e, const struct part* p, @@ -197,8 +197,14 @@ INLINE static void hydro_write_flavour(hid_t h_grpsph) { /* Viscosity and thermal conduction */ /* Nothing in this minimal model... */ io_write_attribute_s(h_grpsph, "Thermal Conductivity Model", "No treatment"); +#ifdef PLANETARY_SPH_NO_BALSARA io_write_attribute_s(h_grpsph, "Viscosity Model", "Minimal treatment as in Monaghan (1992)"); +#else + io_write_attribute_s( + h_grpsph, "Viscosity Model", + "as in Springel (2005), i.e. Monaghan (1992) with Balsara (1995) switch"); +#endif /* Time integration properties */ io_write_attribute_f(h_grpsph, "Maximal Delta u change over dt", @@ -212,4 +218,4 @@ INLINE static void hydro_write_flavour(hid_t h_grpsph) { */ INLINE static int writeEntropyFlag(void) { return 0; } -#endif /* SWIFT_MINIMAL_MULTI_MAT_HYDRO_IO_H */ +#endif /* SWIFT_PLANETARY_HYDRO_IO_H */ diff --git a/src/hydro/MinimalMultiMat/hydro_part.h b/src/hydro/Planetary/hydro_part.h similarity index 90% rename from src/hydro/MinimalMultiMat/hydro_part.h rename to src/hydro/Planetary/hydro_part.h index dad13e889aa531636e34846825109086177b3dae..4087cef62e873231a556f82869a7f6d848c8d72c 100644 --- a/src/hydro/MinimalMultiMat/hydro_part.h +++ b/src/hydro/Planetary/hydro_part.h @@ -17,17 +17,16 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_MINIMAL_MULTI_MAT_HYDRO_PART_H -#define SWIFT_MINIMAL_MULTI_MAT_HYDRO_PART_H +#ifndef SWIFT_PLANETARY_HYDRO_PART_H +#define SWIFT_PLANETARY_HYDRO_PART_H /** - * @file MinimalMultiMat/hydro_part.h - * @brief MinimalMultiMat conservative implementation of SPH (Particle - * definition) + * @file Planetary/hydro_part.h + * @brief Minimal conservative implementation of SPH (Particle definition) * * The thermal variable is the internal energy (u). Simple constant - * viscosity term without switches is implemented. No thermal conduction - * term is implemented. + * viscosity term with the Balsara (1995) switch (optional). + * No thermal conduction term is implemented. * * This corresponds to equations (43), (44), (45), (101), (103) and (104) with * \f$\beta=3\f$ and \f$\alpha_u=0\f$ of Price, D., Journal of Computational @@ -127,6 +126,12 @@ struct part { /*! Derivative of density with respect to h */ float rho_dh; + /*! Velocity divergence. */ + float div_v; + + /*! Velocity curl. */ + float rot_v[3]; + } density; /** @@ -153,6 +158,9 @@ struct part { /*! Time derivative of smoothing length */ float h_dt; + /*! Balsara switch */ + float balsara; + } force; }; @@ -177,4 +185,4 @@ struct part { } SWIFT_STRUCT_ALIGN; -#endif /* SWIFT_MINIMAL_MULTI_MAT_HYDRO_PART_H */ +#endif /* SWIFT_PLANETARY_HYDRO_PART_H */ diff --git a/src/hydro/PressureEnergy/hydro.h b/src/hydro/PressureEnergy/hydro.h index ea086daeeb1e93d7f1476302564fb4182a6fb611..4c3cc5c1c588e19de0d4833fc867ae9c0aed1209 100644 --- a/src/hydro/PressureEnergy/hydro.h +++ b/src/hydro/PressureEnergy/hydro.h @@ -49,22 +49,26 @@ #include <float.h> /** - * @brief Returns the comoving internal energy of a particle + * @brief Returns the comoving internal energy of a particle at the last + * time the particle was kicked. * * For implementations where the main thermodynamic variable * is not internal energy, this function computes the internal * energy from the thermodynamic variable. * * @param p The particle of interest + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float -hydro_get_comoving_internal_energy(const struct part *restrict p) { +hydro_get_comoving_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp) { - return p->u; + return xp->u_full; } /** - * @brief Returns the physical internal energy of a particle + * @brief Returns the physical internal energy of a particle at the last + * time the particle was kicked. * * For implementations where the main thermodynamic variable * is not internal energy, this function computes the internal @@ -72,12 +76,40 @@ hydro_get_comoving_internal_energy(const struct part *restrict p) { * physical coordinates. * * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp, const struct cosmology *cosmo) { + return xp->u_full * cosmo->a_factor_internal_energy; +} + +/** + * @brief Returns the comoving internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_internal_energy(const struct part *restrict p) { + + return p->u; +} + +/** + * @brief Returns the physical internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_internal_energy(const struct part *restrict p, + const struct cosmology *cosmo) { + return p->u * cosmo->a_factor_internal_energy; } @@ -110,33 +142,66 @@ __attribute__((always_inline)) INLINE static float hydro_get_physical_pressure( } /** - * @brief Returns the comoving entropy of a particle + * @brief Returns the comoving entropy of a particle at the last + * time the particle was kicked. * * For implementations where the main thermodynamic variable * is not entropy, this function computes the entropy from * the thermodynamic variable. * * @param p The particle of interest + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float hydro_get_comoving_entropy( - const struct part *restrict p) { + const struct part *restrict p, const struct xpart *restrict xp) { - return gas_entropy_from_internal_energy(p->rho, p->u); + return gas_entropy_from_internal_energy(p->rho, xp->u_full); } /** - * @brief Returns the physical entropy of a particle + * @brief Returns the physical entropy of a particle at the last + * time the particle was kicked. * * For implementations where the main thermodynamic variable * is not entropy, this function computes the entropy from * the thermodynamic variable and converts it to * physical coordinates. * - * @param p The particle of interest + * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_entropy( - const struct part *restrict p, const struct cosmology *cosmo) { + const struct part *restrict p, const struct xpart *restrict xp, + const struct cosmology *cosmo) { + + /* Note: no cosmological conversion required here with our choice of + * coordinates. */ + return gas_entropy_from_internal_energy(p->rho, xp->u_full); +} + +/** + * @brief Returns the comoving entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_entropy(const struct part *restrict p) { + + return gas_entropy_from_internal_energy(p->rho, p->u); +} + +/** + * @brief Returns the physical entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_entropy(const struct part *restrict p, + const struct cosmology *cosmo) { /* Note: no cosmological conversion required here with our choice of * coordinates. */ @@ -245,12 +310,27 @@ __attribute__((always_inline)) INLINE static void hydro_get_drifted_velocities( * * @param p The particle of interest */ -__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( - const struct part *restrict p) { +__attribute__((always_inline)) INLINE static float +hydro_get_comoving_internal_energy_dt(const struct part *restrict p) { return p->u_dt; } +/** + * @brief Returns the time derivative of internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest + * @param cosmo Cosmology data structure + */ +__attribute__((always_inline)) INLINE static float +hydro_get_physical_internal_energy_dt(const struct part *restrict p, + const struct cosmology *cosmo) { + + return p->u_dt * cosmo->a_factor_internal_energy; +} + /** * @brief Sets the time derivative of internal energy of a particle * @@ -259,12 +339,29 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( * @param p The particle of interest. * @param du_dt The new time derivative of the internal energy. */ -__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( - struct part *restrict p, float du_dt) { +__attribute__((always_inline)) INLINE static void +hydro_set_comoving_internal_energy_dt(struct part *restrict p, float du_dt) { p->u_dt = du_dt; } +/** + * @brief Returns the time derivative of internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest. + * @param cosmo Cosmology data structure + * @param du_dt The new time derivative of the internal energy. + */ +__attribute__((always_inline)) INLINE static void +hydro_set_physical_internal_energy_dt(struct part *restrict p, + const struct cosmology *cosmo, + float du_dt) { + + p->u_dt = du_dt * cosmo->a_factor_internal_energy; +} + /** * @brief Computes the hydro time-step of a given particle * @@ -375,8 +472,10 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( p->density.rot_v[1] *= h_inv_dim_plus_one * a_inv2 * rho_inv; p->density.rot_v[2] *= h_inv_dim_plus_one * a_inv2 * rho_inv; - /* Finish calculation of the velocity divergence */ - p->density.div_v *= h_inv_dim_plus_one * rho_inv * a_inv2; + /* Finish calculation of the velocity divergence, including hubble flow term + */ + p->density.div_v *= + h_inv_dim_plus_one * rho_inv * a_inv2 + cosmo->H * hydro_dimension; } /** @@ -427,10 +526,14 @@ __attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( * @param p The particle to act upon * @param xp The extended particle data to act upon * @param cosmo The current cosmological model. + * @param hydro_props Hydrodynamic properties. + * @param dt_alpha The time-step used to evolve non-cosmological quantities such + * as the artificial viscosity. */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) { + const struct cosmology *cosmo, const struct hydro_props *hydro_props, + const float dt_alpha) { const float fac_mu = cosmo->a_factor_mu; @@ -447,7 +550,8 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Compute the Balsara switch */ const float balsara = - abs_div_v / (abs_div_v + curl_v + 0.0001f * soundspeed * fac_mu / p->h); + hydro_props->viscosity.alpha * abs_div_v / + (abs_div_v + curl_v + 0.0001f * soundspeed * fac_mu / p->h); /* Compute the "grad h" term */ const float common_factor = p->h / (hydro_dimension * p->density.wcount); @@ -578,11 +682,16 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( * @param p The particle to act upon. * @param xp The particle extended data to act upon. * @param dt_therm The time-step for this kick (for thermodynamic quantities). + * @param dt_grav The time-step for this kick (for gravity quantities). + * @param dt_hydro The time-step for this kick (for hydro quantities). + * @param dt_kick_corr The time-step for this kick (for gravity corrections). + * @param cosmo The cosmological model. + * @param hydro_props The constants used in the scheme */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( struct part *restrict p, struct xpart *restrict xp, float dt_therm, - const struct cosmology *cosmo, - const struct hydro_props *restrict hydro_properties) { + float dt_grav, float dt_hydro, float dt_kick_corr, + const struct cosmology *cosmo, const struct hydro_props *hydro_props) { /* Do not decrease the energy by more than a factor of 2*/ if (dt_therm > 0. && p->u_dt * dt_therm < -0.5f * xp->u_full) { @@ -590,6 +699,14 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( } xp->u_full += p->u_dt * dt_therm; + /* Apply the minimal energy limit */ + const float min_energy = + hydro_props->minimal_internal_energy / cosmo->a_factor_internal_energy; + if (xp->u_full < min_energy) { + xp->u_full = min_energy; + p->u_dt = 0.f; + } + /* Compute the sound speed */ const float soundspeed = hydro_get_comoving_soundspeed(p); @@ -606,10 +723,31 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( * * @param p The particle to act upon * @param xp The extended particle to act upon + * @param cosmo The cosmological model. + * @param hydro_props The constants used in the scheme. */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) {} + const struct cosmology *cosmo, const struct hydro_props *hydro_props) { + + /* Convert the physcial internal energy to the comoving one. */ + /* u' = a^(3(g-1)) u */ + const float factor = 1.f / cosmo->a_factor_internal_energy; + p->u *= factor; + xp->u_full = p->u; + + /* Apply the minimal energy limit */ + const float min_energy = + hydro_props->minimal_internal_energy / cosmo->a_factor_internal_energy; + if (xp->u_full < min_energy) { + xp->u_full = min_energy; + p->u = min_energy; + p->u_dt = 0.f; + } + + /* Note that unlike Minimal the pressure and sound speed cannot be calculated + * here because they are smoothed properties in this scheme. */ +} /** * @brief Initialises the particles for the first time diff --git a/src/hydro/PressureEnergy/hydro_iact.h b/src/hydro/PressureEnergy/hydro_iact.h index 65c46a55554d4a8f09b32bb6eb1deb1fdcfc932a..4146e61a53dd7ece57e263cb90308e2579aa3930 100644 --- a/src/hydro/PressureEnergy/hydro_iact.h +++ b/src/hydro/PressureEnergy/hydro_iact.h @@ -232,16 +232,19 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Compute dv dot r. */ const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2] + a2_Hubble * r2; + (pi->v[2] - pj->v[2]) * dx[2]; + + /* Includes the hubble flow term; not used for du/dt */ + const float dvdr_Hubble = dvdr + a2_Hubble * r2; /* Are the part*icles moving towards each others ? */ - const float omega_ij = min(dvdr, 0.f); + const float omega_ij = min(dvdr_Hubble, 0.f); const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Compute sound speeds and signal velocity */ const float ci = pi->force.soundspeed; const float cj = pj->force.soundspeed; - const float v_sig = ci + cj - 3.f * mu_ij; + const float v_sig = ci + cj - const_viscosity_beta * mu_ij; /* Balsara term */ const float balsara_i = pi->force.balsara; @@ -249,8 +252,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Construct the full viscosity term */ const float rho_ij = 0.5f * (rhoi + rhoj); - const float visc = -0.25f * const_viscosity_alpha * v_sig * mu_ij * - (balsara_i + balsara_j) / rho_ij; + const float visc = -0.25f * v_sig * mu_ij * (balsara_i + balsara_j) / rho_ij; /* Convolve with the kernel */ const float visc_acc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; @@ -282,7 +284,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( wj_dr * dvdr * r_inv; /* Viscosity term */ - const float visc_du_term = 0.5f * visc_acc_term * dvdr; + const float visc_du_term = 0.5f * visc_acc_term * dvdr_Hubble; /* Assemble the energy equation term */ const float du_dt_i = sph_du_term_i + visc_du_term; @@ -357,16 +359,19 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Compute dv dot r. */ const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2] + a2_Hubble * r2; + (pi->v[2] - pj->v[2]) * dx[2]; + + /* Includes the hubble flow term; not used for du/dt */ + const float dvdr_Hubble = dvdr + a2_Hubble * r2; /* Are the part*icles moving towards each others ? */ - const float omega_ij = min(dvdr, 0.f); + const float omega_ij = min(dvdr_Hubble, 0.f); const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Compute sound speeds and signal velocity */ const float ci = pi->force.soundspeed; const float cj = pj->force.soundspeed; - const float v_sig = ci + cj - 3.f * mu_ij; + const float v_sig = ci + cj - const_viscosity_beta * mu_ij; /* Balsara term */ const float balsara_i = pi->force.balsara; @@ -374,8 +379,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Construct the full viscosity term */ const float rho_ij = 0.5f * (rhoi + rhoj); - const float visc = -0.25f * const_viscosity_alpha * v_sig * mu_ij * - (balsara_i + balsara_j) / rho_ij; + const float visc = -0.25f * v_sig * mu_ij * (balsara_i + balsara_j) / rho_ij; /* Convolve with the kernel */ const float visc_acc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; @@ -400,7 +404,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( wi_dr * dvdr * r_inv; /* Viscosity term */ - const float visc_du_term = 0.5f * visc_acc_term * dvdr; + const float visc_du_term = 0.5f * visc_acc_term * dvdr_Hubble; /* Assemble the energy equation term */ const float du_dt_i = sph_du_term_i + visc_du_term; diff --git a/src/hydro/PressureEnergy/hydro_io.h b/src/hydro/PressureEnergy/hydro_io.h index 78967faec218f0efffbb624c4e8d25af214aad94..06762c6124c2c726c4e687980455ab956a5fa79e 100644 --- a/src/hydro/PressureEnergy/hydro_io.h +++ b/src/hydro/PressureEnergy/hydro_io.h @@ -68,16 +68,10 @@ INLINE static void hydro_read_particles(struct part* parts, UNIT_CONV_DENSITY, parts, rho); } -INLINE static void convert_u(const struct engine* e, const struct part* p, - const struct xpart* xp, float* ret) { - - ret[0] = hydro_get_comoving_internal_energy(p); -} - INLINE static void convert_S(const struct engine* e, const struct part* p, const struct xpart* xp, float* ret) { - ret[0] = hydro_get_comoving_entropy(p); + ret[0] = hydro_get_comoving_entropy(p, xp); } INLINE static void convert_P(const struct engine* e, const struct part* p, @@ -136,6 +130,15 @@ INLINE static void convert_part_vel(const struct engine* e, ret[2] *= cosmo->a_inv; } +INLINE static void convert_part_potential(const struct engine* e, + const struct part* p, + const struct xpart* xp, float* ret) { + if (p->gpart != NULL) + ret[0] = gravity_get_comoving_potential(p->gpart); + else + ret[0] = 0.f; +} + /** * @brief Specifies which particle fields to write to a dataset * @@ -148,7 +151,7 @@ INLINE static void hydro_write_particles(const struct part* parts, struct io_props* list, int* num_fields) { - *num_fields = 9; + *num_fields = 10; /* List what we want to write */ list[0] = io_make_output_field_convert_part("Coordinates", DOUBLE, 3, @@ -160,9 +163,8 @@ INLINE static void hydro_write_particles(const struct part* parts, io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, parts, mass); list[3] = io_make_output_field("SmoothingLength", FLOAT, 1, UNIT_CONV_LENGTH, parts, h); - list[4] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1, - UNIT_CONV_ENERGY_PER_UNIT_MASS, - parts, xparts, convert_u); + list[4] = io_make_output_field("InternalEnergy", FLOAT, 1, + UNIT_CONV_ENERGY_PER_UNIT_MASS, parts, u); list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, parts, id); list[6] = @@ -172,6 +174,9 @@ INLINE static void hydro_write_particles(const struct part* parts, list[8] = io_make_output_field_convert_part("Entropy", FLOAT, 1, UNIT_CONV_ENTROPY_PER_UNIT_MASS, parts, xparts, convert_S); + list[9] = io_make_output_field_convert_part("Potential", FLOAT, 1, + UNIT_CONV_POTENTIAL, parts, + xparts, convert_part_potential); } /** diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h new file mode 100644 index 0000000000000000000000000000000000000000..060694a6afa850c4d4815899fde1450316da81f5 --- /dev/null +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h @@ -0,0 +1,766 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) & + * Josh Borrow (joshua.borrow@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_H +#define SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_H + +/** + * @file PressureEnergy/hydro.h + * @brief P-U conservative implementation of SPH (Non-neighbour loop + * equations) + * + * The thermal variable is the internal energy (u). A simple variable + * viscosity term (Morris & Monaghan 1997) with a Balsara switch is + * implemented. + * + * No thermal conduction term is implemented. + * + * This implementation corresponds to the one presented in the SWIFT + * documentation and in Hopkins, "A general class of Lagrangian smoothed + * particle hydrodynamics methods and implications for fluid mixing problems", + * MNRAS, 2013. + */ + +#include "adiabatic_index.h" +#include "approx_math.h" +#include "cosmology.h" +#include "dimension.h" +#include "equation_of_state.h" +#include "hydro_properties.h" +#include "hydro_space.h" +#include "kernel_hydro.h" +#include "minmax.h" + +#include <float.h> + +/** + * @brief Returns the comoving internal energy of a particle + * + * For implementations where the main thermodynamic variable + * is not internal energy, this function computes the internal + * energy from the thermodynamic variable. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float +hydro_get_comoving_internal_energy(const struct part *restrict p) { + + return p->u; +} + +/** + * @brief Returns the physical internal energy of a particle + * + * For implementations where the main thermodynamic variable + * is not internal energy, this function computes the internal + * energy from the thermodynamic variable and converts it to + * physical coordinates. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_physical_internal_energy(const struct part *restrict p, + const struct cosmology *cosmo) { + + return p->u * cosmo->a_factor_internal_energy; +} + +/** + * @brief Returns the comoving pressure of a particle + * + * Computes the pressure based on the particle's properties. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_comoving_pressure( + const struct part *restrict p) { + + return p->pressure_bar; +} + +/** + * @brief Returns the physical pressure of a particle + * + * Computes the pressure based on the particle's properties and + * convert it to physical coordinates. + * + * @param p The particle of interest + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float hydro_get_physical_pressure( + const struct part *restrict p, const struct cosmology *cosmo) { + + return cosmo->a_factor_pressure * p->pressure_bar; +} + +/** + * @brief Returns the comoving entropy of a particle + * + * For implementations where the main thermodynamic variable + * is not entropy, this function computes the entropy from + * the thermodynamic variable. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_comoving_entropy( + const struct part *restrict p) { + + return gas_entropy_from_internal_energy(p->rho, p->u); +} + +/** + * @brief Returns the physical entropy of a particle + * + * For implementations where the main thermodynamic variable + * is not entropy, this function computes the entropy from + * the thermodynamic variable and converts it to + * physical coordinates. + * + * @param p The particle of interest + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float hydro_get_physical_entropy( + const struct part *restrict p, const struct cosmology *cosmo) { + + /* Note: no cosmological conversion required here with our choice of + * coordinates. */ + return gas_entropy_from_internal_energy(p->rho, p->u); +} + +/** + * @brief Returns the comoving sound speed of a particle + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float +hydro_get_comoving_soundspeed(const struct part *restrict p) { + + /* Compute the sound speed -- see theory section for justification */ + /* IDEAL GAS ONLY -- P-U does not work with generic EoS. */ + const float square_rooted = sqrtf(hydro_gamma * p->pressure_bar / p->rho); + + return square_rooted; +} + +/** + * @brief Returns the physical sound speed of a particle + * + * @param p The particle of interest + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_physical_soundspeed(const struct part *restrict p, + const struct cosmology *cosmo) { + + return cosmo->a_factor_sound_speed * p->force.soundspeed; +} + +/** + * @brief Returns the comoving internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_internal_energy(const struct part *restrict p) { + + return p->u; +} + +/** + * @brief Returns the physical internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_internal_energy(const struct part *restrict p, + const struct cosmology *cosmo) { + + return p->u * cosmo->a_factor_internal_energy; +} + +/** + * @brief Returns the comoving entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_entropy(const struct part *restrict p) { + + return gas_entropy_from_internal_energy(p->rho, p->u); +} + +/** + * @brief Returns the physical entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_entropy(const struct part *restrict p, + const struct cosmology *cosmo) { + + /* Note: no cosmological conversion required here with our choice of + * coordinates. */ + return gas_entropy_from_internal_energy(p->rho, p->u); +} + +/** + * @brief Returns the comoving density of a particle + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_comoving_density( + const struct part *restrict p) { + + return p->rho; +} + +/** + * @brief Returns the comoving density of a particle. + * + * @param p The particle of interest + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float hydro_get_physical_density( + const struct part *restrict p, const struct cosmology *cosmo) { + + return cosmo->a3_inv * p->rho; +} + +/** + * @brief Returns the mass of a particle + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_mass( + const struct part *restrict p) { + + return p->mass; +} + +/** + * @brief Sets the mass of a particle + * + * @param p The particle of interest + * @param m The mass to set. + */ +__attribute__((always_inline)) INLINE static void hydro_set_mass( + struct part *restrict p, float m) { + + p->mass = m; +} + +/** + * @brief Returns the velocities drifted to the current time of a particle. + * + * @param p The particle of interest + * @param xp The extended data of the particle. + * @param dt_kick_hydro The time (for hydro accelerations) since the last kick. + * @param dt_kick_grav The time (for gravity accelerations) since the last kick. + * @param v (return) The velocities at the current time. + */ +__attribute__((always_inline)) INLINE static void hydro_get_drifted_velocities( + const struct part *restrict p, const struct xpart *xp, float dt_kick_hydro, + float dt_kick_grav, float v[3]) { + + v[0] = xp->v_full[0] + p->a_hydro[0] * dt_kick_hydro + + xp->a_grav[0] * dt_kick_grav; + v[1] = xp->v_full[1] + p->a_hydro[1] * dt_kick_hydro + + xp->a_grav[1] * dt_kick_grav; + v[2] = xp->v_full[2] + p->a_hydro[2] * dt_kick_hydro + + xp->a_grav[2] * dt_kick_grav; +} + +/** + * @brief Returns the time derivative of internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( + const struct part *restrict p) { + + return p->u_dt; +} + +/** + * @brief Sets the time derivative of internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest. + * @param du_dt The new time derivative of the internal energy. + */ +__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( + struct part *restrict p, float du_dt) { + + p->u_dt = du_dt; +} + +/** + * @brief Computes the hydro time-step of a given particle + * + * This function returns the time-step of a particle given its hydro-dynamical + * state. A typical time-step calculation would be the use of the CFL condition. + * + * @param p Pointer to the particle data + * @param xp Pointer to the extended particle data + * @param hydro_properties The SPH parameters + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float hydro_compute_timestep( + const struct part *restrict p, const struct xpart *restrict xp, + const struct hydro_props *restrict hydro_properties, + const struct cosmology *restrict cosmo) { + + const float CFL_condition = hydro_properties->CFL_condition; + + /* CFL condition */ + const float dt_cfl = 2.f * kernel_gamma * CFL_condition * cosmo->a * p->h / + (cosmo->a_factor_sound_speed * p->force.v_sig); + + const float dt_u_change = + (p->u_dt != 0.0f) ? fabsf(const_max_u_change * p->u / p->u_dt) : FLT_MAX; + + return fminf(dt_cfl, dt_u_change); +} + +/** + * @brief Does some extra hydro operations once the actual physical time step + * for the particle is known. + * + * @param p The particle to act upon. + * @param dt Physical time step of the particle during the next step. + */ +__attribute__((always_inline)) INLINE static void hydro_timestep_extra( + struct part *p, float dt) {} + +/** + * @brief Prepares a particle for the density calculation. + * + * Zeroes all the relevant arrays in preparation for the sums taking place in + * the various density loop over neighbours. Typically, all fields of the + * density sub-structure of a particle get zeroed in here. + * + * @param p The particle to act upon + * @param hs #hydro_space containing hydro specific space information. + */ +__attribute__((always_inline)) INLINE static void hydro_init_part( + struct part *restrict p, const struct hydro_space *hs) { + + p->density.wcount = 0.f; + p->density.wcount_dh = 0.f; + p->rho = 0.f; + p->density.rho_dh = 0.f; + p->pressure_bar = 0.f; + p->density.pressure_bar_dh = 0.f; + + p->density.div_v = 0.f; + p->density.rot_v[0] = 0.f; + p->density.rot_v[1] = 0.f; + p->density.rot_v[2] = 0.f; +} + +/** + * @brief Finishes the density calculation. + * + * Multiplies the density and number of neighbours by the appropiate constants + * and add the self-contribution term. + * Additional quantities such as velocity gradients will also get the final + * terms added to them here. + * + * Also adds/multiplies the cosmological terms if need be. + * + * @param p The particle to act upon + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static void hydro_end_density( + struct part *restrict p, const struct cosmology *cosmo) { + + /* Some smoothing length multiples. */ + const float h = p->h; + const float h_inv = 1.0f / h; /* 1/h */ + const float h_inv_dim = pow_dimension(h_inv); /* 1/h^d */ + const float h_inv_dim_plus_one = h_inv_dim * h_inv; /* 1/h^(d+1) */ + + /* Final operation on the density (add self-contribution). */ + p->rho += p->mass * kernel_root; + p->density.rho_dh -= hydro_dimension * p->mass * kernel_root; + p->pressure_bar += p->mass * p->u * kernel_root; + p->density.pressure_bar_dh -= hydro_dimension * p->mass * p->u * kernel_root; + p->density.wcount += kernel_root; + p->density.wcount_dh -= hydro_dimension * kernel_root; + + /* Finish the calculation by inserting the missing h-factors */ + p->rho *= h_inv_dim; + p->density.rho_dh *= h_inv_dim_plus_one; + p->pressure_bar *= (h_inv_dim * hydro_gamma_minus_one); + p->density.pressure_bar_dh *= (h_inv_dim_plus_one * hydro_gamma_minus_one); + p->density.wcount *= h_inv_dim; + p->density.wcount_dh *= h_inv_dim_plus_one; + + const float rho_inv = 1.f / p->rho; + const float a_inv2 = cosmo->a2_inv; + + /* Finish calculation of the velocity curl components */ + p->density.rot_v[0] *= h_inv_dim_plus_one * a_inv2 * rho_inv; + p->density.rot_v[1] *= h_inv_dim_plus_one * a_inv2 * rho_inv; + p->density.rot_v[2] *= h_inv_dim_plus_one * a_inv2 * rho_inv; + + /* Finish calculation of the velocity divergence */ + p->density.div_v *= + h_inv_dim_plus_one * rho_inv * a_inv2 + cosmo->H * hydro_dimension; +} + +/** + * @brief Sets all particle fields to sensible values when the #part has 0 ngbs. + * + * In the desperate case where a particle has no neighbours (likely because + * of the h_max ceiling), set the particle fields to something sensible to avoid + * NaNs in the next calculations. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( + struct part *restrict p, struct xpart *restrict xp, + const struct cosmology *cosmo) { + + /* Some smoothing length multiples. */ + const float h = p->h; + const float h_inv = 1.0f / h; /* 1/h */ + const float h_inv_dim = pow_dimension(h_inv); /* 1/h^d */ + + /* Re-set problematic values */ + p->rho = p->mass * kernel_root * h_inv_dim; + p->pressure_bar = + p->mass * p->u * hydro_gamma_minus_one * kernel_root * h_inv_dim; + p->density.wcount = kernel_root * h_inv_dim; + p->density.rho_dh = 0.f; + p->density.wcount_dh = 0.f; + p->density.pressure_bar_dh = 0.f; + + p->density.div_v = 0.f; + p->density.rot_v[0] = 0.f; + p->density.rot_v[1] = 0.f; + p->density.rot_v[2] = 0.f; +} + +/** + * @brief Prepare a particle for the force calculation. + * + * This function is called in the ghost task to convert some quantities coming + * from the density loop over neighbours into quantities ready to be used in the + * force loop over neighbours. Quantities are typically read from the density + * sub-structure and written to the force sub-structure. + * Examples of calculations done here include the calculation of viscosity term + * constants, thermal conduction terms, hydro conversions, etc. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + * @param cosmo The current cosmological model. + * @param hydro_props Hydrodynamic properties. + * @param dt_alpha The time-step used to evolve non-cosmological quantities such + * as the artificial viscosity. + */ +__attribute__((always_inline)) INLINE static void hydro_prepare_force( + struct part *restrict p, struct xpart *restrict xp, + const struct cosmology *cosmo, const struct hydro_props *hydro_props, + const float dt_alpha) { + + const float fac_mu = cosmo->a_factor_mu; + + const float h_inv = 1.f / p->h; + + /* Compute the norm of the curl */ + const float curl_v = sqrtf(p->density.rot_v[0] * p->density.rot_v[0] + + p->density.rot_v[1] * p->density.rot_v[1] + + p->density.rot_v[2] * p->density.rot_v[2]); + + /* Compute the norm of div v */ + const float abs_div_v = fabsf(p->density.div_v); + + /* Compute the sound speed -- see theory section for justification */ + const float soundspeed = hydro_get_comoving_soundspeed(p); + + /* Compute the Balsara switch */ + const float balsara = + abs_div_v / (abs_div_v + curl_v + 0.0001f * soundspeed * fac_mu * h_inv); + + /* Compute the "grad h" term */ + const float common_factor = p->h / (hydro_dimension * p->density.wcount); + const float grad_h_term = (p->density.pressure_bar_dh * common_factor * + hydro_one_over_gamma_minus_one) / + (1.f + common_factor * p->density.wcount_dh); + + /* Artificial viscosity updates */ + + const float inverse_tau = hydro_props->viscosity.length * soundspeed * h_inv; + const float source_term = -1.f * min(p->density.div_v, 0.f); + + /* Compute da/dt */ + const float alpha_time_differential = + source_term + (hydro_props->viscosity.alpha_min - p->alpha) * inverse_tau; + + /* Update variables. */ + p->alpha += alpha_time_differential * dt_alpha; + p->force.f = grad_h_term; + p->force.soundspeed = soundspeed; + p->force.balsara = balsara; +} + +/** + * @brief Reset acceleration fields of a particle + * + * Resets all hydro acceleration and time derivative fields in preparation + * for the sums taking place in the various force tasks. + * + * @param p The particle to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_reset_acceleration( + struct part *restrict p) { + + /* Reset the acceleration. */ + p->a_hydro[0] = 0.0f; + p->a_hydro[1] = 0.0f; + p->a_hydro[2] = 0.0f; + + /* Reset the time derivatives. */ + p->u_dt = 0.0f; + p->force.h_dt = 0.0f; + p->force.v_sig = p->force.soundspeed; +} + +/** + * @brief Sets the values to be predicted in the drifts to their values at a + * kick time + * + * @param p The particle. + * @param xp The extended data of this particle. + */ +__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values( + struct part *restrict p, const struct xpart *restrict xp) { + + /* Re-set the predicted velocities */ + p->v[0] = xp->v_full[0]; + p->v[1] = xp->v_full[1]; + p->v[2] = xp->v_full[2]; + + /* Re-set the entropy */ + p->u = xp->u_full; +} + +/** + * @brief Predict additional particle fields forward in time when drifting + * + * Additional hydrodynamic quantites are drifted forward in time here. These + * include thermal quantities (thermal energy or total energy or entropy, ...). + * + * Note the different time-step sizes used for the different quantities as they + * include cosmological factors. + * + * @param p The particle. + * @param xp The extended data of the particle. + * @param dt_drift The drift time-step for positions. + * @param dt_therm The drift time-step for thermal quantities. + */ +__attribute__((always_inline)) INLINE static void hydro_predict_extra( + struct part *restrict p, const struct xpart *restrict xp, float dt_drift, + float dt_therm) { + + const float h_inv = 1.f / p->h; + + /* Predict smoothing length */ + const float w1 = p->force.h_dt * h_inv * dt_drift; + if (fabsf(w1) < 0.2f) + p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */ + else + p->h *= expf(w1); + + /* Predict density and weighted pressure */ + const float w2 = -hydro_dimension * w1; + if (fabsf(w2) < 0.2f) { + const float expf_approx = + approx_expf(w2); /* 4th order expansion of exp(w) */ + p->rho *= expf_approx; + p->pressure_bar *= expf_approx; + } else { + const float expf_exact = expf(w2); + p->rho *= expf_exact; + p->pressure_bar *= expf_exact; + } + + /* Predict the internal energy */ + p->u += p->u_dt * dt_therm; + + /* Compute the new sound speed */ + const float soundspeed = hydro_get_comoving_soundspeed(p); + + p->force.soundspeed = soundspeed; +} + +/** + * @brief Finishes the force calculation. + * + * Multiplies the force and accelerations by the appropiate constants + * and add the self-contribution term. In most cases, there is little + * to do here. + * + * Cosmological terms are also added/multiplied here. + * + * @param p The particle to act upon + * @param cosmo The current cosmological model. + */ +__attribute__((always_inline)) INLINE static void hydro_end_force( + struct part *restrict p, const struct cosmology *cosmo) { + + p->force.h_dt *= p->h * hydro_dimension_inv; +} + +/** + * @brief Kick the additional variables + * + * Additional hydrodynamic quantites are kicked forward in time here. These + * include thermal quantities (thermal energy or total energy or entropy, ...). + * + * @param p The particle to act upon. + * @param xp The particle extended data to act upon. + * @param dt_therm The time-step for this kick (for thermodynamic quantities). + * @param dt_grav The time-step for this kick (for gravity quantities). + * @param dt_hydro The time-step for this kick (for hydro quantities). + * @param dt_kick_corr The time-step for this kick (for gravity corrections). + * @param cosmo The cosmological model. + * @param hydro_props The constants used in the scheme + */ +__attribute__((always_inline)) INLINE static void hydro_kick_extra( + struct part *restrict p, struct xpart *restrict xp, float dt_therm, + float dt_grav, float dt_hydro, float dt_kick_corr, + const struct cosmology *cosmo, const struct hydro_props *hydro_props) { + + /* Do not decrease the energy by more than a factor of 2*/ + if (dt_therm > 0. && p->u_dt * dt_therm < -0.5f * xp->u_full) { + p->u_dt = -0.5f * xp->u_full / dt_therm; + } + xp->u_full += p->u_dt * dt_therm; + + /* Apply the minimal energy limit */ + const float min_energy = + hydro_props->minimal_internal_energy / cosmo->a_factor_internal_energy; + if (xp->u_full < min_energy) { + xp->u_full = min_energy; + p->u_dt = 0.f; + } + + /* Compute the sound speed */ + const float soundspeed = hydro_get_comoving_soundspeed(p); + + p->force.soundspeed = soundspeed; +} + +/** + * @brief Converts hydro quantity of a particle at the start of a run + * + * This function is called once at the end of the engine_init_particle() + * routine (at the start of a calculation) after the densities of + * particles have been computed. + * This can be used to convert internal energy into entropy for instance. + * + * @param p The particle to act upon + * @param xp The extended particle to act upon + * @param cosmo The cosmological model. + * @param hydro_props The constants used in the scheme. + */ +__attribute__((always_inline)) INLINE static void hydro_convert_quantities( + struct part *restrict p, struct xpart *restrict xp, + const struct cosmology *cosmo, const struct hydro_props *hydro_props) { + + /* Convert the physcial internal energy to the comoving one. */ + /* u' = a^(3(g-1)) u */ + const float factor = 1.f / cosmo->a_factor_internal_energy; + p->u *= factor; + xp->u_full = p->u; + + /* Apply the minimal energy limit */ + const float min_energy = + hydro_props->minimal_internal_energy / cosmo->a_factor_internal_energy; + if (xp->u_full < min_energy) { + xp->u_full = min_energy; + p->u = min_energy; + p->u_dt = 0.f; + } + + /* Start out with a 'regular' AV for comparison to other schemes */ + p->alpha = hydro_props->viscosity.alpha; + + /* Note that unlike Minimal the pressure and sound speed cannot be calculated + * here because they are smoothed properties in this scheme. */ +} + +/** + * @brief Initialises the particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions or assignments between the particle + * and extended particle fields. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_first_init_part( + struct part *restrict p, struct xpart *restrict xp) { + + p->time_bin = 0; + xp->v_full[0] = p->v[0]; + xp->v_full[1] = p->v[1]; + xp->v_full[2] = p->v[2]; + xp->a_grav[0] = 0.f; + xp->a_grav[1] = 0.f; + xp->a_grav[2] = 0.f; + xp->u_full = p->u; + + hydro_reset_acceleration(p); + hydro_init_part(p, NULL); +} + +/** + * @brief Overwrite the initial internal energy of a particle. + * + * Note that in the cases where the thermodynamic variable is not + * internal energy but gets converted later, we must overwrite that + * field. The conversion to the actual variable happens later after + * the initial fake time-step. + * + * @param p The #part to write to. + * @param u_init The new initial internal energy. + */ +__attribute__((always_inline)) INLINE static void +hydro_set_init_internal_energy(struct part *p, float u_init) { + + p->u = u_init; +} + +#endif /* SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_H */ diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h new file mode 100644 index 0000000000000000000000000000000000000000..ead5fcc0c842d8018f784a1084941bdb9ebcb6ca --- /dev/null +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h @@ -0,0 +1,47 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) & + * Josh Borrow (joshua.borrow@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_DEBUG_H +#define SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_DEBUG_H +/** + * @file PressureEnergy/hydro_debug.h + * @brief P-U conservative implementation of SPH (Debugging routines) + * + * The thermal variable is the internal energy (u). A simple variable + * viscosity term (Morris & Monaghan 1997) with a Balsara switch is + * implemented. + */ + +__attribute__((always_inline)) INLINE static void hydro_debug_particle( + const struct part* p, const struct xpart* xp) { + printf( + "x=[%.3e,%.3e,%.3e], " + "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e], " + "u=%.3e, du/dt=%.3e v_sig=%.3e, P=%.3e\n" + "h=%.3e, dh/dt=%.3e wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, \n" + "p_dh=%.3e, p_bar=%.3e \n" + "time_bin=%d, alpha=%.3e\n", + p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], + xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], + p->u, p->u_dt, p->force.v_sig, hydro_get_comoving_pressure(p), p->h, + p->force.h_dt, (int)p->density.wcount, p->mass, p->density.rho_dh, p->rho, + p->density.pressure_bar_dh, p->pressure_bar, p->time_bin, p->alpha); +} + +#endif /* SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_DEBUG_H */ diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h new file mode 100644 index 0000000000000000000000000000000000000000..747fca714ce20d9c2b018e14ac24a6492c51a75f --- /dev/null +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h @@ -0,0 +1,427 @@ +/******************************************************************************* + * This file is part* of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) & + * Josh Borrow (joshua.borrow@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_IACT_H +#define SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_IACT_H + +/** + * @file PressureEnergy/hydro_iact.h + * @brief P-U implementation of SPH (Neighbour loop equations) + * + * The thermal variable is the internal energy (u). A simple variable + * viscosity term (Morris & Monaghan 1997) with a Balsara switch is + * implemented. + * + * No thermal conduction term is implemented. + * + * See PressureEnergy/hydro.h for references. + */ + +#include "adiabatic_index.h" +#include "minmax.h" + +/** + * @brief Density interaction between two part*icles. + * + * @param r2 Comoving square distance between the two part*icles. + * @param dx Comoving vector separating both part*icles (pi - pj). + * @param hi Comoving smoothing-length of part*icle i. + * @param hj Comoving smoothing-length of part*icle j. + * @param pi First part*icle. + * @param pj Second part*icle. + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void runner_iact_density( + float r2, const float* dx, float hi, float hj, struct part* pi, + struct part* pj, float a, float H) { + + float wi, wj, wi_dx, wj_dx; + float dv[3], curlvr[3]; + + const float r = sqrtf(r2); + + /* Get the masses. */ + const float mi = pi->mass; + const float mj = pj->mass; + + /* Compute density of pi. */ + const float hi_inv = 1.f / hi; + const float ui = r * hi_inv; + + kernel_deval(ui, &wi, &wi_dx); + + pi->rho += mj * wi; + pi->density.rho_dh -= mj * (hydro_dimension * wi + ui * wi_dx); + + pi->pressure_bar += mj * wi * pj->u; + pi->density.pressure_bar_dh -= + mj * pj->u * (hydro_dimension * wi + ui * wi_dx); + pi->density.wcount += wi; + pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); + + /* Compute density of pj. */ + const float hj_inv = 1.f / hj; + const float uj = r * hj_inv; + kernel_deval(uj, &wj, &wj_dx); + + pj->rho += mi * wj; + pj->density.rho_dh -= mi * (hydro_dimension * wj + uj * wj_dx); + pj->pressure_bar += mi * wj * pi->u; + pj->density.pressure_bar_dh -= + mi * pi->u * (hydro_dimension * wj + uj * wj_dx); + pj->density.wcount += wj; + pj->density.wcount_dh -= (hydro_dimension * wj + uj * wj_dx); + + /* Now we need to compute the div terms */ + const float r_inv = 1.f / r; + const float faci = mj * wi_dx * r_inv; + const float facj = mi * wj_dx * r_inv; + + /* Compute dv dot r */ + dv[0] = pi->v[0] - pj->v[0]; + dv[1] = pi->v[1] - pj->v[1]; + dv[2] = pi->v[2] - pj->v[2]; + const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; + + pi->density.div_v -= faci * dvdr; + pj->density.div_v -= facj * dvdr; + + /* Compute dv cross r */ + curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; + curlvr[1] = dv[2] * dx[0] - dv[0] * dx[2]; + curlvr[2] = dv[0] * dx[1] - dv[1] * dx[0]; + + pi->density.rot_v[0] += faci * curlvr[0]; + pi->density.rot_v[1] += faci * curlvr[1]; + pi->density.rot_v[2] += faci * curlvr[2]; + + /* Negative because of the change in sign of dx & dv. */ + pj->density.rot_v[0] += facj * curlvr[0]; + pj->density.rot_v[1] += facj * curlvr[1]; + pj->density.rot_v[2] += facj * curlvr[2]; +} + +/** + * @brief Density interaction between two part*icles (non-symmetric). + * + * @param r2 Comoving square distance between the two part*icles. + * @param dx Comoving vector separating both part*icles (pi - pj). + * @param hi Comoving smoothing-length of part*icle i. + * @param hj Comoving smoothing-length of part*icle j. + * @param pi First part*icle. + * @param pj Second part*icle (not updated). + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( + float r2, const float* dx, float hi, float hj, struct part* pi, + const struct part* pj, float a, float H) { + + float wi, wi_dx; + float dv[3], curlvr[3]; + + /* Get the masses. */ + const float mj = pj->mass; + + /* Get r and r inverse. */ + const float r = sqrtf(r2); + + const float h_inv = 1.f / hi; + const float ui = r * h_inv; + kernel_deval(ui, &wi, &wi_dx); + + pi->rho += mj * wi; + pi->density.rho_dh -= mj * (hydro_dimension * wi + ui * wi_dx); + + pi->pressure_bar += mj * wi * pj->u; + + pi->density.pressure_bar_dh -= + mj * pj->u * (hydro_dimension * wi + ui * wi_dx); + pi->density.wcount += wi; + pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); + + const float r_inv = 1.f / r; + const float faci = mj * wi_dx * r_inv; + + /* Compute dv dot r */ + dv[0] = pi->v[0] - pj->v[0]; + dv[1] = pi->v[1] - pj->v[1]; + dv[2] = pi->v[2] - pj->v[2]; + const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; + + pi->density.div_v -= faci * dvdr; + + /* Compute dv cross r */ + curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; + curlvr[1] = dv[2] * dx[0] - dv[0] * dx[2]; + curlvr[2] = dv[0] * dx[1] - dv[1] * dx[0]; + + pi->density.rot_v[0] += faci * curlvr[0]; + pi->density.rot_v[1] += faci * curlvr[1]; + pi->density.rot_v[2] += faci * curlvr[2]; +} + +/** + * @brief Force interaction between two part*icles. + * + * @param r2 Comoving square distance between the two part*icles. + * @param dx Comoving vector separating both part*icles (pi - pj). + * @param hi Comoving smoothing-length of part*icle i. + * @param hj Comoving smoothing-length of part*icle j. + * @param pi First part*icle. + * @param pj Second part*icle. + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void runner_iact_force( + float r2, const float* dx, float hi, float hj, struct part* pi, + struct part* pj, float a, float H) { + + /* Cosmological factors entering the EoMs */ + const float fac_mu = pow_three_gamma_minus_five_over_two(a); + const float a2_Hubble = a * a * H; + + const float r = sqrtf(r2); + const float r_inv = 1.0f / r; + + /* Recover some data */ + const float mj = pj->mass; + const float mi = pi->mass; + + const float miui = mi * pi->u; + const float mjuj = mj * pj->u; + + const float rhoi = pi->rho; + const float rhoj = pj->rho; + /* Compute gradient terms */ + const float f_ij = 1.f - (pi->force.f / mjuj); + const float f_ji = 1.f - (pj->force.f / miui); + + /* Get the kernel for hi. */ + const float hi_inv = 1.0f / hi; + const float hid_inv = pow_dimension_plus_one(hi_inv); /* 1/h^(d+1) */ + const float xi = r * hi_inv; + float wi, wi_dx; + kernel_deval(xi, &wi, &wi_dx); + const float wi_dr = hid_inv * wi_dx; + + /* Get the kernel for hj. */ + const float hj_inv = 1.0f / hj; + const float hjd_inv = pow_dimension_plus_one(hj_inv); /* 1/h^(d+1) */ + const float xj = r * hj_inv; + float wj, wj_dx; + kernel_deval(xj, &wj, &wj_dx); + const float wj_dr = hjd_inv * wj_dx; + + /* Compute dv dot r. */ + const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + + (pi->v[1] - pj->v[1]) * dx[1] + + (pi->v[2] - pj->v[2]) * dx[2]; + + /* Includes the hubble flow term; not used for du/dt */ + const float dvdr_Hubble = dvdr + a2_Hubble * r2; + + /* Are the part*icles moving towards each others ? */ + const float omega_ij = min(dvdr_Hubble, 0.f); + const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ + + /* Compute sound speeds and signal velocity */ + const float ci = pi->force.soundspeed; + const float cj = pj->force.soundspeed; + const float v_sig = ci + cj - 3.f * mu_ij; + + /* Balsara term */ + const float balsara_i = pi->force.balsara; + const float balsara_j = pj->force.balsara; + + /* Construct the full viscosity term */ + const float rho_ij = 0.5f * (rhoi + rhoj); + const float alpha = 0.5f * (pi->alpha + pj->alpha); + const float visc = + -0.25f * alpha * v_sig * mu_ij * (balsara_i + balsara_j) / rho_ij; + + /* Convolve with the kernel */ + const float visc_acc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; + + /* SPH acceleration term */ + const float sph_acc_term = + pj->u * pi->u * hydro_gamma_minus_one * hydro_gamma_minus_one * + ((f_ij / pi->pressure_bar) * wi_dr + (f_ji / pj->pressure_bar) * wj_dr) * + r_inv; + + /* Assemble the acceleration */ + const float acc = sph_acc_term + visc_acc_term; + + /* Use the force Luke ! */ + pi->a_hydro[0] -= mj * acc * dx[0]; + pi->a_hydro[1] -= mj * acc * dx[1]; + pi->a_hydro[2] -= mj * acc * dx[2]; + + pj->a_hydro[0] += mi * acc * dx[0]; + pj->a_hydro[1] += mi * acc * dx[1]; + pj->a_hydro[2] += mi * acc * dx[2]; + + /* Get the time derivative for u. */ + const float sph_du_term_i = hydro_gamma_minus_one * hydro_gamma_minus_one * + pj->u * pi->u * (f_ij / pi->pressure_bar) * + wi_dr * dvdr * r_inv; + const float sph_du_term_j = hydro_gamma_minus_one * hydro_gamma_minus_one * + pi->u * pj->u * (f_ji / pj->pressure_bar) * + wj_dr * dvdr * r_inv; + + /* Viscosity term */ + const float visc_du_term = 0.5f * visc_acc_term * dvdr_Hubble; + + /* Assemble the energy equation term */ + const float du_dt_i = sph_du_term_i + visc_du_term; + const float du_dt_j = sph_du_term_j + visc_du_term; + + /* Internal energy time derivative */ + pi->u_dt += du_dt_i * mj; + pj->u_dt += du_dt_j * mi; + + /* Get the time derivative for h. */ + pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; + pj->force.h_dt -= mi * dvdr * r_inv / rhoi * wj_dr; + + /* Update the signal velocity. */ + pi->force.v_sig = max(pi->force.v_sig, v_sig); + pj->force.v_sig = max(pj->force.v_sig, v_sig); +} + +/** + * @brief Force interaction between two part*icles (non-symmetric). + * + * @param r2 Comoving square distance between the two part*icles. + * @param dx Comoving vector separating both part*icles (pi - pj). + * @param hi Comoving smoothing-length of part*icle i. + * @param hj Comoving smoothing-length of part*icle j. + * @param pi First part*icle. + * @param pj Second part*icle (not updated). + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( + float r2, const float* dx, float hi, float hj, struct part* pi, + const struct part* pj, float a, float H) { + + /* Cosmological factors entering the EoMs */ + const float fac_mu = pow_three_gamma_minus_five_over_two(a); + const float a2_Hubble = a * a * H; + + const float r = sqrtf(r2); + const float r_inv = 1.0f / r; + + /* Recover some data */ + // const float mi = pi->mass; + const float mj = pj->mass; + const float mi = pi->mass; + + const float miui = mi * pi->u; + const float mjuj = mj * pj->u; + + const float rhoi = pi->rho; + const float rhoj = pj->rho; + /* Compute gradient terms */ + const float f_ij = 1.f - (pi->force.f / mjuj); + const float f_ji = 1.f - (pj->force.f / miui); + + /* Get the kernel for hi. */ + const float hi_inv = 1.0f / hi; + const float hid_inv = pow_dimension_plus_one(hi_inv); /* 1/h^(d+1) */ + const float xi = r * hi_inv; + float wi, wi_dx; + kernel_deval(xi, &wi, &wi_dx); + const float wi_dr = hid_inv * wi_dx; + + /* Get the kernel for hj. */ + const float hj_inv = 1.0f / hj; + const float hjd_inv = pow_dimension_plus_one(hj_inv); /* 1/h^(d+1) */ + const float xj = r * hj_inv; + float wj, wj_dx; + kernel_deval(xj, &wj, &wj_dx); + const float wj_dr = hjd_inv * wj_dx; + + /* Compute dv dot r. */ + const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + + (pi->v[1] - pj->v[1]) * dx[1] + + (pi->v[2] - pj->v[2]) * dx[2]; + + /* Includes the hubble flow term; not used for du/dt */ + const float dvdr_Hubble = dvdr + a2_Hubble * r2; + + /* Are the part*icles moving towards each others ? */ + const float omega_ij = min(dvdr_Hubble, 0.f); + const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ + + /* Compute sound speeds and signal velocity */ + const float ci = pi->force.soundspeed; + const float cj = pj->force.soundspeed; + const float v_sig = ci + cj - 3.f * mu_ij; + + /* Balsara term */ + const float balsara_i = pi->force.balsara; + const float balsara_j = pj->force.balsara; + + /* Construct the full viscosity term */ + const float rho_ij = 0.5f * (rhoi + rhoj); + const float alpha = 0.5f * (pi->alpha + pj->alpha); + const float visc = + -0.25f * alpha * v_sig * mu_ij * (balsara_i + balsara_j) / rho_ij; + + /* Convolve with the kernel */ + const float visc_acc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; + + /* SPH acceleration term */ + const float sph_acc_term = + pj->u * pi->u * hydro_gamma_minus_one * hydro_gamma_minus_one * + ((f_ij / pi->pressure_bar) * wi_dr + (f_ji / pj->pressure_bar) * wj_dr) * + r_inv; + + /* Assemble the acceleration */ + const float acc = sph_acc_term + visc_acc_term; + + /* Use the force Luke ! */ + pi->a_hydro[0] -= mj * acc * dx[0]; + pi->a_hydro[1] -= mj * acc * dx[1]; + pi->a_hydro[2] -= mj * acc * dx[2]; + + /* Get the time derivative for u. */ + const float sph_du_term_i = hydro_gamma_minus_one * hydro_gamma_minus_one * + pj->u * pi->u * (f_ij / pi->pressure_bar) * + wi_dr * dvdr * r_inv; + + /* Viscosity term */ + const float visc_du_term = 0.5f * visc_acc_term * dvdr_Hubble; + + /* Assemble the energy equation term */ + const float du_dt_i = sph_du_term_i + visc_du_term; + + /* Internal energy time derivative */ + pi->u_dt += du_dt_i * mj; + + /* Get the time derivative for h. */ + pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; + + /* Update the signal velocity. */ + pi->force.v_sig = max(pi->force.v_sig, v_sig); +} + +#endif /* SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_IACT_H */ diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h new file mode 100644 index 0000000000000000000000000000000000000000..1600679bc2e840d0b3b958531c279f5f29293b48 --- /dev/null +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h @@ -0,0 +1,216 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) & + * Josh Borrow (joshua.borrow@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_IO_H +#define SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_IO_H +/** + * @file PressureEnergy/hydro_io.h + * @brief P-U implementation of SPH (i/o routines) + * + * The thermal variable is the internal energy (u). A simple variable + * viscosity term (Morris & Monaghan 1997) with a Balsara switch is + * implemented. + * + * No thermal conduction term is implemented. + * + * See PressureEnergy/hydro.h for references. + */ + +#include "adiabatic_index.h" +#include "hydro.h" +#include "io_properties.h" +#include "kernel_hydro.h" + +/** + * @brief Specifies which particle fields to read from a dataset + * + * @param parts The particle array. + * @param list The list of i/o properties to read. + * @param num_fields The number of i/o fields to read. + */ +INLINE static void hydro_read_particles(struct part* parts, + struct io_props* list, + int* num_fields) { + + *num_fields = 8; + + /* List what we want to read */ + list[0] = io_make_input_field("Coordinates", DOUBLE, 3, COMPULSORY, + UNIT_CONV_LENGTH, parts, x); + list[1] = io_make_input_field("Velocities", FLOAT, 3, COMPULSORY, + UNIT_CONV_SPEED, parts, v); + list[2] = io_make_input_field("Masses", FLOAT, 1, COMPULSORY, UNIT_CONV_MASS, + parts, mass); + list[3] = io_make_input_field("SmoothingLength", FLOAT, 1, COMPULSORY, + UNIT_CONV_LENGTH, parts, h); + list[4] = io_make_input_field("InternalEnergy", FLOAT, 1, COMPULSORY, + UNIT_CONV_ENERGY_PER_UNIT_MASS, parts, u); + list[5] = io_make_input_field("ParticleIDs", ULONGLONG, 1, COMPULSORY, + UNIT_CONV_NO_UNITS, parts, id); + list[6] = io_make_input_field("Accelerations", FLOAT, 3, OPTIONAL, + UNIT_CONV_ACCELERATION, parts, a_hydro); + list[7] = io_make_input_field("Density", FLOAT, 1, OPTIONAL, + UNIT_CONV_DENSITY, parts, rho); +} + +INLINE static void convert_u(const struct engine* e, const struct part* p, + const struct xpart* xp, float* ret) { + + ret[0] = hydro_get_comoving_internal_energy(p); +} + +INLINE static void convert_S(const struct engine* e, const struct part* p, + const struct xpart* xp, float* ret) { + + ret[0] = hydro_get_comoving_entropy(p); +} + +INLINE static void convert_P(const struct engine* e, const struct part* p, + const struct xpart* xp, float* ret) { + + ret[0] = hydro_get_comoving_pressure(p); +} + +INLINE static void convert_part_pos(const struct engine* e, + const struct part* p, + const struct xpart* xp, double* ret) { + + if (e->s->periodic) { + ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]); + ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]); + ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]); + } else { + ret[0] = p->x[0]; + ret[1] = p->x[1]; + ret[2] = p->x[2]; + } +} + +INLINE static void convert_part_vel(const struct engine* e, + const struct part* p, + const struct xpart* xp, float* ret) { + + const int with_cosmology = (e->policy & engine_policy_cosmology); + const struct cosmology* cosmo = e->cosmology; + const integertime_t ti_current = e->ti_current; + const double time_base = e->time_base; + + const integertime_t ti_beg = get_integer_time_begin(ti_current, p->time_bin); + const integertime_t ti_end = get_integer_time_end(ti_current, p->time_bin); + + /* Get time-step since the last kick */ + float dt_kick_grav, dt_kick_hydro; + if (with_cosmology) { + dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_beg, ti_current); + dt_kick_grav -= + cosmology_get_grav_kick_factor(cosmo, ti_beg, (ti_beg + ti_end) / 2); + dt_kick_hydro = cosmology_get_hydro_kick_factor(cosmo, ti_beg, ti_current); + dt_kick_hydro -= + cosmology_get_hydro_kick_factor(cosmo, ti_beg, (ti_beg + ti_end) / 2); + } else { + dt_kick_grav = (ti_current - ((ti_beg + ti_end) / 2)) * time_base; + dt_kick_hydro = (ti_current - ((ti_beg + ti_end) / 2)) * time_base; + } + + /* Extrapolate the velocites to the current time */ + hydro_get_drifted_velocities(p, xp, dt_kick_hydro, dt_kick_grav, ret); + + /* Conversion from internal units to peculiar velocities */ + ret[0] *= cosmo->a_inv; + ret[1] *= cosmo->a_inv; + ret[2] *= cosmo->a_inv; +} + +INLINE static void convert_part_potential(const struct engine* e, + const struct part* p, + const struct xpart* xp, float* ret) { + if (p->gpart != NULL) + ret[0] = gravity_get_comoving_potential(p->gpart); + else + ret[0] = 0.f; +} + +/** + * @brief Specifies which particle fields to write to a dataset + * + * @param parts The particle array. + * @param list The list of i/o properties to write. + * @param num_fields The number of i/o fields to write. + */ +INLINE static void hydro_write_particles(const struct part* parts, + const struct xpart* xparts, + struct io_props* list, + int* num_fields) { + + *num_fields = 11; + + /* List what we want to write */ + list[0] = io_make_output_field_convert_part("Coordinates", DOUBLE, 3, + UNIT_CONV_LENGTH, parts, xparts, + convert_part_pos); + list[1] = io_make_output_field_convert_part( + "Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, xparts, convert_part_vel); + list[2] = + io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, parts, mass); + list[3] = io_make_output_field("SmoothingLength", FLOAT, 1, UNIT_CONV_LENGTH, + parts, h); + list[4] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1, + UNIT_CONV_ENERGY_PER_UNIT_MASS, + parts, xparts, convert_u); + list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1, + UNIT_CONV_NO_UNITS, parts, id); + list[6] = + io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts, rho); + list[7] = io_make_output_field("Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, + parts, pressure_bar); + list[8] = io_make_output_field_convert_part("Entropy", FLOAT, 1, + UNIT_CONV_ENTROPY_PER_UNIT_MASS, + parts, xparts, convert_S); + list[9] = io_make_output_field("Viscosity", FLOAT, 1, UNIT_CONV_NO_UNITS, + parts, alpha); + list[10] = io_make_output_field_convert_part("Potential", FLOAT, 1, + UNIT_CONV_POTENTIAL, parts, + xparts, convert_part_potential); +} + +/** + * @brief Writes the current model of SPH to the file + * @param h_grpsph The HDF5 group in which to write + */ +INLINE static void hydro_write_flavour(hid_t h_grpsph) { + + /* Viscosity and thermal conduction */ + /* Nothing in this minimal model... */ + io_write_attribute_s(h_grpsph, "Thermal Conductivity Model", "No treatment"); + io_write_attribute_s(h_grpsph, "Viscosity Model", + "Variable viscosity as in Morris and Monaghan (1997)"); + + /* Time integration properties */ + io_write_attribute_f(h_grpsph, "Maximal Delta u change over dt", + const_max_u_change); +} + +/** + * @brief Are we writing entropy in the internal energy field ? + * + * @return 1 if entropy is in 'internal energy', 0 otherwise. + */ +INLINE static int writeEntropyFlag(void) { return 0; } + +#endif /* SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_IO_H */ diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h new file mode 100644 index 0000000000000000000000000000000000000000..da6391236811e2a907281c3db05462bb57602fe0 --- /dev/null +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h @@ -0,0 +1,187 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) & + * Josh Borrow (joshua.borrow@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_PART_H +#define SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_PART_H +/** + * @file PressureEnergy/hydro_part.h + * @brief P-U implementation of SPH (Particle definition) + * + * The thermal variable is the internal energy (u). A simple variable + * viscosity term (Morris & Monaghan 1997) with a Balsara switch is + * implemented. + * + * No thermal conduction term is implemented. + * + * See PressureEnergy/hydro.h for references. + */ + +#include "chemistry_struct.h" +#include "cooling_struct.h" + +/** + * @brief Particle fields not needed during the SPH loops over neighbours. + * + * This structure contains the particle fields that are not used in the + * density or force loops. Quantities should be used in the kick, drift and + * potentially ghost tasks only. + */ +struct xpart { + + /*! Offset between current position and position at last tree rebuild. */ + float x_diff[3]; + + /*! Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + + /*! Velocity at the last full step. */ + float v_full[3]; + + /*! Gravitational acceleration at the last full step. */ + float a_grav[3]; + + /*! Internal energy at the last full step. */ + float u_full; + + /*! Additional data used to record cooling information */ + struct cooling_xpart_data cooling_data; + +} SWIFT_STRUCT_ALIGN; + +/** + * @brief Particle fields for the SPH particles + * + * The density and force substructures are used to contain variables only used + * within the density and force loops over neighbours. All more permanent + * variables should be declared in the main part of the part structure, + */ +struct part { + + /*! Particle unique ID. */ + long long id; + + /*! Pointer to corresponding gravity part. */ + struct gpart* gpart; + + /*! Particle position. */ + double x[3]; + + /*! Particle predicted velocity. */ + float v[3]; + + /*! Particle acceleration. */ + float a_hydro[3]; + + /*! Particle mass. */ + float mass; + + /*! Particle smoothing length. */ + float h; + + /*! Particle internal energy. */ + float u; + + /*! Time derivative of the internal energy. */ + float u_dt; + + /*! Particle density. */ + float rho; + + /*! Particle pressure (weighted) */ + float pressure_bar; + + /*! Artificial viscosity */ + float alpha; + + /* Store density/force specific stuff. */ + union { + + /** + * @brief Structure for the variables only used in the density loop over + * neighbours. + * + * Quantities in this sub-structure should only be accessed in the density + * loop over neighbours and the ghost task. + */ + struct { + + /*! Neighbour number count. */ + float wcount; + + /*! Derivative of the neighbour number with respect to h. */ + float wcount_dh; + + /*! Derivative of density with respect to h */ + float rho_dh; + + /*! Derivative of the weighted pressure with respect to h */ + float pressure_bar_dh; + + /*! Particle velocity curl. */ + float rot_v[3]; + + /*! Particle velocity divergence. */ + float div_v; + } density; + + /** + * @brief Structure for the variables only used in the force loop over + * neighbours. + * + * Quantities in this sub-structure should only be accessed in the force + * loop over neighbours and the ghost, drift and kick tasks. + */ + struct { + + /*! "Grad h" term -- only partial in P-U */ + float f; + + /*! Particle soundspeed. */ + float soundspeed; + + /*! Particle signal velocity */ + float v_sig; + + /*! Time derivative of smoothing length */ + float h_dt; + + /*! Balsara switch */ + float balsara; + } force; + }; + + /* Chemistry information */ + struct chemistry_part_data chemistry_data; + + /*! Time-step length */ + timebin_t time_bin; + +#ifdef SWIFT_DEBUG_CHECKS + + /* Time of the last drift */ + integertime_t ti_drift; + + /* Time of the last kick */ + integertime_t ti_kick; + +#endif + +} SWIFT_STRUCT_ALIGN; + +#endif /* SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_PART_H */ diff --git a/src/hydro/PressureEntropy/hydro.h b/src/hydro/PressureEntropy/hydro.h index e4b7cf06e083638a94526cc1f9e7212cf19dfad4..b16d24cfcee9407c8213b1e17465005884da6617 100644 --- a/src/hydro/PressureEntropy/hydro.h +++ b/src/hydro/PressureEntropy/hydro.h @@ -42,26 +42,58 @@ #include "minmax.h" /** - * @brief Returns the comoving internal energy of a particle + * @brief Returns the comoving internal energy of a particle at the last + * time the particle was kicked. * * @param p The particle of interest + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float -hydro_get_comoving_internal_energy(const struct part *restrict p) { +hydro_get_comoving_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp) { - return gas_internal_energy_from_entropy(p->rho_bar, p->entropy); + return gas_internal_energy_from_entropy(p->rho_bar, xp->entropy_full); } /** - * @brief Returns the physical internal energy of a particle + * @brief Returns the physical internal energy of a particle at the last + * time the particle was kicked. * - * @param p The particle of interest + * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_internal_energy(const struct part *restrict p, + const struct xpart *restrict xp, const struct cosmology *cosmo) { + return gas_internal_energy_from_entropy(p->rho_bar * cosmo->a3_inv, + xp->entropy_full); +} +/** + * @brief Returns the comoving internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_internal_energy(const struct part *restrict p) { + + return gas_internal_energy_from_entropy(p->rho_bar, p->entropy); +} + +/** + * @brief Returns the physical internal energy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_internal_energy(const struct part *restrict p, + const struct cosmology *cosmo) { + return gas_internal_energy_from_entropy(p->rho_bar * cosmo->a3_inv, p->entropy); } @@ -89,24 +121,57 @@ __attribute__((always_inline)) INLINE static float hydro_get_physical_pressure( } /** - * @brief Returns the comoving entropy of a particle + * @brief Returns the comoving entropy of a particle at the last + * time the particle was kicked. * - * @param p The particle of interest + * @param p The particle of interest. + * @param xp The extended data of the particle of interest. */ __attribute__((always_inline)) INLINE static float hydro_get_comoving_entropy( - const struct part *restrict p) { + const struct part *restrict p, const struct xpart *restrict xp) { - return p->entropy; + return xp->entropy_full; } /** - * @brief Returns the physical entropy of a particle + * @brief Returns the physical entropy of a particl at the last + * time the particle was kicked. * * @param p The particle of interest. + * @param xp The extended data of the particle of interest. * @param cosmo The cosmological model. */ __attribute__((always_inline)) INLINE static float hydro_get_physical_entropy( - const struct part *restrict p, const struct cosmology *cosmo) { + const struct part *restrict p, const struct xpart *restrict xp, + const struct cosmology *cosmo) { + + /* Note: no cosmological conversion required here with our choice of + * coordinates. */ + return xp->entropy_full; +} + +/** + * @brief Returns the comoving entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_comoving_entropy(const struct part *restrict p) { + + return p->entropy; +} + +/** + * @brief Returns the physical entropy of a particle drifted to the + * current time. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_drifted_physical_entropy(const struct part *restrict p, + const struct cosmology *cosmo) { /* Note: no cosmological conversion required here with our choice of * coordinates. */ @@ -210,12 +275,28 @@ __attribute__((always_inline)) INLINE static void hydro_get_drifted_velocities( * * @param p The particle of interest */ -__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( - const struct part *restrict p) { +__attribute__((always_inline)) INLINE static float +hydro_get_comoving_internal_energy_dt(const struct part *restrict p) { return gas_internal_energy_from_entropy(p->rho_bar, p->entropy_dt); } +/** + * @brief Returns the time derivative of physical internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest. + * @param cosmo The cosmological model. + */ +__attribute__((always_inline)) INLINE static float +hydro_get_physical_internal_energy_dt(const struct part *restrict p, + const struct cosmology *cosmo) { + + return gas_internal_energy_from_entropy(p->rho_bar * cosmo->a3_inv, + p->entropy_dt); +} + /** * @brief Returns the time derivative of internal energy of a particle * @@ -224,12 +305,29 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( * @param p The particle of interest. * @param du_dt The new time derivative of the internal energy. */ -__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( - struct part *restrict p, float du_dt) { +__attribute__((always_inline)) INLINE static void +hydro_set_comoving_internal_energy_dt(struct part *restrict p, float du_dt) { p->entropy_dt = gas_entropy_from_internal_energy(p->rho_bar, du_dt); } +/** + * @brief Sets the time derivative of the physical internal energy of a particle + * + * We assume a constant density for the conversion to entropy. + * + * @param p The particle of interest. + * @param cosmo Cosmology data structure + * @param du_dt The time derivative of the internal energy. + */ +__attribute__((always_inline)) INLINE static void +hydro_set_physical_internal_energy_dt(struct part *restrict p, + const struct cosmology *restrict cosmo, + float du_dt) { + p->entropy_dt = + gas_entropy_from_internal_energy(p->rho_bar * cosmo->a3_inv, du_dt); +} + /** * @brief Computes the hydro time-step of a given particle * @@ -370,15 +468,24 @@ __attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( /** * @brief Prepare a particle for the force calculation. * - * Computes viscosity term, conduction term and smoothing length gradient terms. + * This function is called in the ghost task to convert some quantities coming + * from the density loop over neighbours into quantities ready to be used in the + * force loop over neighbours. Quantities are typically read from the density + * sub-structure and written to the force sub-structure. + * Examples of calculations done here include the calculation of viscosity term + * constants, thermal conduction terms, hydro conversions, etc. * * @param p The particle to act upon * @param xp The extended particle data to act upon * @param cosmo The current cosmological model. + * @param hydro_props Hydrodynamic properties. + * @param dt_alpha The time-step used to evolve non-cosmological quantities such + * as the artificial viscosity. */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) { + const struct cosmology *cosmo, const struct hydro_props *hydro_props, + const float dt_alpha) { const float fac_mu = cosmo->a_factor_mu; @@ -398,7 +505,8 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Compute the Balsara switch */ const float balsara = - abs_div_v / (abs_div_v + curl_v + 0.0001f * soundspeed * fac_mu / p->h); + hydro_props->viscosity.alpha * abs_div_v / + (abs_div_v + curl_v + 0.0001f * soundspeed * fac_mu / p->h); /* Divide the pressure by the density squared to get the SPH term */ const float rho_bar_inv = 1.f / p->rho_bar; @@ -541,6 +649,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( struct part *restrict p, struct xpart *restrict xp, float dt_therm, + float dt_grav, float dt_hydro, float dt_kick_corr, const struct cosmology *cosmo, const struct hydro_props *hydro_props) { /* Do not decrease the entropy (temperature) by more than a factor of 2*/ @@ -586,7 +695,7 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( struct part *restrict p, struct xpart *restrict xp, - const struct cosmology *cosmo) { + const struct cosmology *cosmo, const struct hydro_props *hydro_props) { /* We read u in the entropy field. We now get S from u */ xp->entropy_full = diff --git a/src/hydro/PressureEntropy/hydro_iact.h b/src/hydro/PressureEntropy/hydro_iact.h index b8f8c1983a3b1fb67781f7228194deb770273988..a018b39a99be5ed691485d93bd8dfd1735378bda 100644 --- a/src/hydro/PressureEntropy/hydro_iact.h +++ b/src/hydro/PressureEntropy/hydro_iact.h @@ -259,12 +259,11 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Signal velocity */ - const float v_sig = ci + cj - 3.f * mu_ij; + const float v_sig = ci + cj - const_viscosity_beta * mu_ij; /* Now construct the full viscosity term */ const float rho_ij = 0.5f * (rhoi + rhoj); - const float visc = -0.25f * const_viscosity_alpha * v_sig * mu_ij * - (balsara_i + balsara_j) / rho_ij; + const float visc = -0.25f * v_sig * mu_ij * (balsara_i + balsara_j) / rho_ij; /* Now, convolve with the kernel */ const float visc_term = 0.5f * visc * (wi_dr + wj_dr); @@ -373,12 +372,11 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Signal velocity */ - const float v_sig = ci + cj - 3.f * mu_ij; + const float v_sig = ci + cj - const_viscosity_beta * mu_ij; /* Now construct the full viscosity term */ const float rho_ij = 0.5f * (rhoi + rhoj); - const float visc = -0.25f * const_viscosity_alpha * v_sig * mu_ij * - (balsara_i + balsara_j) / rho_ij; + const float visc = -0.25f * v_sig * mu_ij * (balsara_i + balsara_j) / rho_ij; /* Now, convolve with the kernel */ const float visc_term = 0.5f * visc * (wi_dr + wj_dr); diff --git a/src/hydro/PressureEntropy/hydro_io.h b/src/hydro/PressureEntropy/hydro_io.h index 8c11bf6e334e18b10217e90f6573a42e40880955..e9397bf6108b8bc16658157e424055274f05f23c 100644 --- a/src/hydro/PressureEntropy/hydro_io.h +++ b/src/hydro/PressureEntropy/hydro_io.h @@ -71,7 +71,7 @@ INLINE static void hydro_read_particles(struct part* parts, INLINE static void convert_u(const struct engine* e, const struct part* p, const struct xpart* xp, float* ret) { - ret[0] = hydro_get_comoving_internal_energy(p); + ret[0] = hydro_get_comoving_internal_energy(p, xp); } INLINE static void convert_P(const struct engine* e, const struct part* p, @@ -194,8 +194,6 @@ INLINE static void hydro_write_flavour(hid_t h_grpsph) { io_write_attribute_s( h_grpsph, "Viscosity Model", "as in Springel (2005), i.e. Monaghan (1992) with Balsara (1995) switch"); - io_write_attribute_f(h_grpsph, "Viscosity alpha", const_viscosity_alpha); - io_write_attribute_f(h_grpsph, "Viscosity beta", 3.f); /* Time integration properties */ io_write_attribute_f(h_grpsph, "Maximal Delta u change over dt", diff --git a/src/hydro/Shadowswift/hydro.h b/src/hydro/Shadowswift/hydro.h index d70d58c6ba508ba4282ac9dd32565478afb40692..7e38aa6b57f383564e96d9fea24730926c0ac70b 100644 --- a/src/hydro/Shadowswift/hydro.h +++ b/src/hydro/Shadowswift/hydro.h @@ -283,22 +283,26 @@ __attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( } /** - * @brief Prepare a particle for the gradient calculation. - * - * The name of this method is confusing, as this method is really called after - * the density loop and before the gradient loop. + * @brief Prepare a particle for the force calculation. * - * We use it to set the physical timestep for the particle and to copy the - * actual velocities, which we need to boost our interfaces during the flux - * calculation. We also initialize the variables used for the time step - * calculation. + * This function is called in the ghost task to convert some quantities coming + * from the density loop over neighbours into quantities ready to be used in the + * force loop over neighbours. Quantities are typically read from the density + * sub-structure and written to the force sub-structure. + * Examples of calculations done here include the calculation of viscosity term + * constants, thermal conduction terms, hydro conversions, etc. * - * @param p The particle to act upon. - * @param xp The extended particle data to act upon. + * @param p The particle to act upon + * @param xp The extended particle data to act upon + * @param cosmo The current cosmological model. + * @param hydro_props Hydrodynamic properties. + * @param dt_alpha The time-step used to evolve non-cosmological quantities such + * as the artificial viscosity. */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( struct part* restrict p, struct xpart* restrict xp, - const struct cosmology* cosmo) { + const struct cosmology* cosmo, const struct hydro_props* hydro_props, + const float dt_alpha) { /* Initialize time step criterion variables */ p->timestepvars.vmax = 0.0f; @@ -411,7 +415,8 @@ __attribute__((always_inline)) INLINE static void hydro_reset_predicted_values( * @param xp The extended particle data to act upon. */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( - struct part* p, struct xpart* xp, const struct cosmology* cosmo) {} + struct part* p, struct xpart* xp, const struct cosmology* cosmo, + const struct hydro_props* hydro_props) {} /** * @brief Extra operations to be done during the drift @@ -443,7 +448,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( * @param dt Physical time step. */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( - struct part* p, struct xpart* xp, float dt, const struct cosmology* cosmo, + struct part* p, struct xpart* xp, float dt, float dt_grav, float dt_hydro, + float dt_kick_corr, const struct cosmology* cosmo, const struct hydro_props* hydro_props) { /* Update the conserved variables. We do this here and not in the kick, diff --git a/src/hydro_io.h b/src/hydro_io.h index d752bb8bc03f619fe759fc8f5de32a01b3a61abe..1a2d6319b7caf6c09b9af406cbdd323f27607791 100644 --- a/src/hydro_io.h +++ b/src/hydro_io.h @@ -31,6 +31,8 @@ #include "./hydro/PressureEntropy/hydro_io.h" #elif defined(HOPKINS_PU_SPH) #include "./hydro/PressureEnergy/hydro_io.h" +#elif defined(HOPKINS_PU_SPH_MONAGHAN) +#include "./hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h" #elif defined(DEFAULT_SPH) #include "./hydro/Default/hydro_io.h" #elif defined(GIZMO_MFV_SPH) @@ -39,8 +41,8 @@ #include "./hydro/GizmoMFM/hydro_io.h" #elif defined(SHADOWFAX_SPH) #include "./hydro/Shadowswift/hydro_io.h" -#elif defined(MINIMAL_MULTI_MAT_SPH) -#include "./hydro/MinimalMultiMat/hydro_io.h" +#elif defined(PLANETARY_SPH) +#include "./hydro/Planetary/hydro_io.h" #else #error "Invalid choice of SPH variant" #endif diff --git a/src/hydro_properties.c b/src/hydro_properties.c index f79fd832248fba8fbc55bd9fcec57e645be93159..2b1cd42055c66768e943241c75298e53e0bf75a8 100644 --- a/src/hydro_properties.c +++ b/src/hydro_properties.c @@ -39,7 +39,14 @@ #define hydro_props_default_h_tolerance 1e-4 #define hydro_props_default_init_temp 0.f #define hydro_props_default_min_temp 0.f -#define hydro_props_default_H_fraction 0.76 +#define hydro_props_default_H_ionization_temperature 1e4 +#define hydro_props_default_viscosity_alpha 0.8f +#define hydro_props_default_viscosity_alpha_min \ + 0.1f /* Values taken from (Price,2004), not used in legacy gadget mode */ +#define hydro_props_default_viscosity_alpha_max \ + 2.0f /* Values taken from (Price,2004), not used in legacy gadget mode */ +#define hydro_props_default_viscosity_length \ + 0.1f /* Values taken from (Price,2004), not used in legacy gadget mode */ /** * @brief Initialize the global properties of the hydro scheme. @@ -93,7 +100,7 @@ void hydro_props_init(struct hydro_props *p, p->initial_temperature = parser_get_opt_param_float( params, "SPH:initial_temperature", hydro_props_default_init_temp); - /* Initial temperature */ + /* Minimal temperature */ p->minimal_temperature = parser_get_opt_param_float( params, "SPH:minimal_temperature", hydro_props_default_min_temp); @@ -101,20 +108,41 @@ void hydro_props_init(struct hydro_props *p, (p->initial_temperature < p->minimal_temperature)) error("Initial temperature lower than minimal allowed temperature!"); + /* Neutral to ionized Hydrogen transition temperature */ + p->hydrogen_ionization_temperature = + parser_get_opt_param_double(params, "SPH:H_ionization_temperature", + hydro_props_default_H_ionization_temperature); + /* Hydrogen mass fraction */ + const float default_H_fraction = + 1. - phys_const->const_primordial_He_fraction; p->hydrogen_mass_fraction = parser_get_opt_param_double( - params, "SPH:H_mass_fraction", hydro_props_default_H_fraction); + params, "SPH:H_mass_fraction", default_H_fraction); + + /* Read the artificial viscosity parameters from the file, if they exist */ + p->viscosity.alpha = parser_get_opt_param_float( + params, "SPH:viscosity_alpha", hydro_props_default_viscosity_alpha); + + p->viscosity.alpha_max = + parser_get_opt_param_float(params, "SPH:viscosity_alpha_max", + hydro_props_default_viscosity_alpha_max); + + p->viscosity.alpha_min = + parser_get_opt_param_float(params, "SPH:viscosity_alpha_min", + hydro_props_default_viscosity_alpha_min); + + p->viscosity.length = parser_get_opt_param_float( + params, "SPH:viscosity_length", hydro_props_default_viscosity_length); /* Compute the initial energy (Note the temp. read is in internal units) */ + /* u_init = k_B T_init / (mu m_p (gamma - 1)) */ double u_init = phys_const->const_boltzmann_k / phys_const->const_proton_mass; u_init *= p->initial_temperature; u_init *= hydro_one_over_gamma_minus_one; - /* Correct for hydrogen mass fraction */ + /* Correct for hydrogen mass fraction (mu) */ double mean_molecular_weight; - if (p->initial_temperature * - units_cgs_conversion_factor(us, UNIT_CONV_TEMPERATURE) > - 1e4) + if (p->initial_temperature > p->hydrogen_ionization_temperature) mean_molecular_weight = 4. / (8. - 5. * (1. - p->hydrogen_mass_fraction)); else mean_molecular_weight = 4. / (1. + 3. * p->hydrogen_mass_fraction); @@ -122,14 +150,13 @@ void hydro_props_init(struct hydro_props *p, p->initial_internal_energy = u_init / mean_molecular_weight; /* Compute the minimal energy (Note the temp. read is in internal units) */ + /* u_min = k_B T_min / (mu m_p (gamma - 1)) */ double u_min = phys_const->const_boltzmann_k / phys_const->const_proton_mass; u_min *= p->minimal_temperature; u_min *= hydro_one_over_gamma_minus_one; - /* Correct for hydrogen mass fraction */ - if (p->minimal_temperature * - units_cgs_conversion_factor(us, UNIT_CONV_TEMPERATURE) > - 1e4) + /* Correct for hydrogen mass fraction (mu) */ + if (p->minimal_temperature > p->hydrogen_ionization_temperature) mean_molecular_weight = 4. / (8. - 5. * (1. - p->hydrogen_mass_fraction)); else mean_molecular_weight = 4. / (1. + 3. * p->hydrogen_mass_fraction); @@ -159,6 +186,12 @@ void hydro_props_print(const struct hydro_props *p) { message("Hydrodynamic integration: CFL parameter: %.4f.", p->CFL_condition); + message( + "Artificial viscosity parameters set to alpha: %.3f, max: %.3f, " + "min: %.3f, length: %.3f.", + p->viscosity.alpha, p->viscosity.alpha_max, p->viscosity.alpha_min, + p->viscosity.length); + message( "Hydrodynamic integration: Max change of volume: %.2f " "(max|dlog(h)/dt|=%f).", @@ -176,6 +209,16 @@ void hydro_props_print(const struct hydro_props *p) { if (p->minimal_temperature != hydro_props_default_min_temp) message("Minimal gas temperature set to %f", p->minimal_temperature); + + // Matthieu: Temporary location for this i/o business. + +#ifdef PLANETARY_SPH +#ifdef PLANETARY_SPH_NO_BALSARA + message("Planetary SPH: Balsara switch DISABLED"); +#else + message("Planetary SPH: Balsara switch ENABLED"); +#endif +#endif } #if defined(HAVE_HDF5) @@ -204,9 +247,56 @@ void hydro_props_print_snapshot(hid_t h_grpsph, const struct hydro_props *p) { p->initial_internal_energy); io_write_attribute_f(h_grpsph, "Hydrogen mass fraction", p->hydrogen_mass_fraction); + io_write_attribute_f(h_grpsph, "Hydrogen ionization transition temperature", + p->hydrogen_ionization_temperature); + io_write_attribute_f(h_grpsph, "Alpha viscosity", p->viscosity.alpha); + io_write_attribute_f(h_grpsph, "Alpha viscosity (max)", + p->viscosity.alpha_max); + io_write_attribute_f(h_grpsph, "Alpha viscosity (min)", + p->viscosity.alpha_min); + io_write_attribute_f(h_grpsph, "Viscosity decay length", p->viscosity.length); + io_write_attribute_f(h_grpsph, "Beta viscosity", const_viscosity_beta); } #endif +/** + * @brief Initialises a hydro_props struct with somewhat useful values for + * the automated test suite. This is not intended for production use, + * but rather to fill for the purposes of mocking. + * + * @param p the struct + */ +void hydro_props_init_no_hydro(struct hydro_props *p) { + p->eta_neighbours = 1.2348; + p->h_tolerance = hydro_props_default_h_tolerance; + p->target_neighbours = pow_dimension(p->eta_neighbours) * kernel_norm; + const float delta_eta = p->eta_neighbours * (1.f + p->h_tolerance); + p->delta_neighbours = + (pow_dimension(delta_eta) - pow_dimension(p->eta_neighbours)) * + kernel_norm; + p->h_max = hydro_props_default_h_max; + p->max_smoothing_iterations = hydro_props_default_max_iterations; + p->CFL_condition = 0.1; + p->log_max_h_change = logf(powf(1.4, hydro_dimension_inv)); + + /* These values are inconsistent and in a production run would probably lead + to a crash. Again, this function is intended for mocking use in unit tests + and is _not_ to be used otherwise! */ + p->minimal_temperature = hydro_props_default_min_temp; + p->minimal_internal_energy = 0.f; + p->initial_temperature = hydro_props_default_init_temp; + p->initial_internal_energy = 0.f; + + p->hydrogen_mass_fraction = 0.755; + p->hydrogen_ionization_temperature = + hydro_props_default_H_ionization_temperature; + + p->viscosity.alpha = hydro_props_default_viscosity_alpha; + p->viscosity.alpha_max = hydro_props_default_viscosity_alpha_max; + p->viscosity.alpha_min = hydro_props_default_viscosity_alpha_min; + p->viscosity.length = hydro_props_default_viscosity_length; +} + /** * @brief Write a hydro_props struct to the given FILE as a stream of bytes. * diff --git a/src/hydro_properties.h b/src/hydro_properties.h index 64a840692db677704b8617e962d7883505983cc0..b45b93192e7db7b1bdca49557f8563322f09aae9 100644 --- a/src/hydro_properties.h +++ b/src/hydro_properties.h @@ -69,17 +69,36 @@ struct hydro_props { /*! Minimal temperature allowed */ float minimal_temperature; - /*! Minimal internal energy per unit mass */ + /*! Minimal physical internal energy per unit mass */ float minimal_internal_energy; /*! Initial temperature */ float initial_temperature; - /*! Initial internal energy per unit mass */ + /*! Initial physical internal energy per unit mass */ float initial_internal_energy; - /*! Primoridal hydrogen mass fraction for initial energy conversion */ + /*! Primordial hydrogen mass fraction for initial energy conversion */ float hydrogen_mass_fraction; + + /*! Temperature of the neutral to ionized transition of Hydrogen */ + float hydrogen_ionization_temperature; + + /*! Artificial viscosity parameters */ + struct { + /*! For the fixed, simple case. Also used to set the initial AV + coefficient for variable schemes. */ + float alpha; + + /*! Artificial viscosity (max) for the variable case (e.g. M&M) */ + float alpha_max; + + /*! Artificial viscosity (min) for the variable case (e.g. M&M) */ + float alpha_min; + + /*! The decay length of the artificial viscosity (used in M&M, etc.) */ + float length; + } viscosity; }; void hydro_props_print(const struct hydro_props *p); @@ -96,4 +115,7 @@ void hydro_props_print_snapshot(hid_t h_grpsph, const struct hydro_props *p); void hydro_props_struct_dump(const struct hydro_props *p, FILE *stream); void hydro_props_struct_restore(const struct hydro_props *p, FILE *stream); +/* Setup for tests */ +void hydro_props_init_no_hydro(struct hydro_props *p); + #endif /* SWIFT_HYDRO_PROPERTIES */ diff --git a/src/intrinsics.h b/src/intrinsics.h index 7a4f0870b9d758ed6613e88b6b48a3c93887cd1c..7e3b9108248ddd43303a9103394d818384a9b664 100644 --- a/src/intrinsics.h +++ b/src/intrinsics.h @@ -32,7 +32,7 @@ * This is a wrapper for the GNU intrinsic with an implementation (from * Hacker's Delight) if the compiler intrinsics are not available. */ -__attribute__((always_inline)) INLINE static int intrinsics_clz( +__attribute__((always_inline, const)) INLINE static int intrinsics_clz( unsigned int x) { #ifdef __GNUC__ @@ -70,9 +70,10 @@ __attribute__((always_inline)) INLINE static int intrinsics_clz( * @brief Returns the number of leading 0-bits in x, starting at the most * significant bit position. If x is 0, the result is undefined. * - * This is a wrapper for the GNU intrinsic with an implementation. + * This is a wrapper for the GNU intrinsic with a place-holder for a future + * version in cases where the compiler intrinsic is not available. */ -__attribute__((always_inline)) INLINE static int intrinsics_clzll( +__attribute__((always_inline, const)) INLINE static int intrinsics_clzll( unsigned long long x) { #ifdef __GNUC__ @@ -89,7 +90,7 @@ __attribute__((always_inline)) INLINE static int intrinsics_clzll( * This is a wrapper for the GNU intrinsic with an implementation (from * Hacker's Delight) if the compiler intrinsics are not available. */ -__attribute__((always_inline)) INLINE static int intrinsics_popcount( +__attribute__((always_inline, const)) INLINE static int intrinsics_popcount( unsigned int x) { #ifdef __GNUC__ @@ -111,7 +112,7 @@ __attribute__((always_inline)) INLINE static int intrinsics_popcount( * This is a wrapper for the GNU intrinsic with an implementation (from * Hacker's Delight) if the compiler intrinsics are not available. */ -__attribute__((always_inline)) INLINE static int intrinsics_popcountll( +__attribute__((always_inline, const)) INLINE static int intrinsics_popcountll( unsigned long long x) { #ifdef __GNUC__ diff --git a/src/io_properties.h b/src/io_properties.h index 037d32338f015975489f6cbca4f7dfafac413e5f..9e948fc3991b0178d06fdd5d83fa900a98f84d2a 100644 --- a/src/io_properties.h +++ b/src/io_properties.h @@ -47,6 +47,10 @@ typedef void (*conversion_func_gpart_float)(const struct engine*, const struct gpart*, float*); typedef void (*conversion_func_gpart_double)(const struct engine*, const struct gpart*, double*); +typedef void (*conversion_func_spart_float)(const struct engine*, + const struct spart*, float*); +typedef void (*conversion_func_spart_double)(const struct engine*, + const struct spart*, double*); /** * @brief The properties of a given dataset for i/o @@ -86,6 +90,7 @@ struct io_props { const struct part* parts; const struct xpart* xparts; const struct gpart* gparts; + const struct spart* sparts; /* Are we converting? */ int conversion; @@ -97,6 +102,10 @@ struct io_props { /* Conversion function for gpart */ conversion_func_gpart_float convert_gpart_f; conversion_func_gpart_double convert_gpart_d; + + /* Conversion function for spart */ + conversion_func_spart_float convert_spart_f; + conversion_func_spart_double convert_spart_d; }; /** @@ -134,11 +143,14 @@ INLINE static struct io_props io_make_input_field_( r.parts = NULL; r.xparts = NULL; r.gparts = NULL; + r.sparts = NULL; r.conversion = 0; r.convert_part_f = NULL; r.convert_part_d = NULL; r.convert_gpart_f = NULL; r.convert_gpart_d = NULL; + r.convert_spart_f = NULL; + r.convert_spart_d = NULL; return r; } @@ -175,11 +187,14 @@ INLINE static struct io_props io_make_output_field_( r.partSize = partSize; r.parts = NULL; r.gparts = NULL; + r.sparts = NULL; r.conversion = 0; r.convert_part_f = NULL; r.convert_part_d = NULL; r.convert_gpart_f = NULL; r.convert_gpart_d = NULL; + r.convert_spart_f = NULL; + r.convert_spart_d = NULL; return r; } @@ -223,11 +238,14 @@ INLINE static struct io_props io_make_output_field_convert_part_FLOAT( r.parts = parts; r.xparts = xparts; r.gparts = NULL; + r.sparts = NULL; r.conversion = 1; r.convert_part_f = functionPtr; r.convert_part_d = NULL; r.convert_gpart_f = NULL; r.convert_gpart_d = NULL; + r.convert_spart_f = NULL; + r.convert_spart_d = NULL; return r; } @@ -242,7 +260,7 @@ INLINE static struct io_props io_make_output_field_convert_part_FLOAT( * @param partSize The size in byte of the particle * @param parts The particle array * @param xparts The xparticle array - * @param functionPtr The function used to convert a particle to a float + * @param functionPtr The function used to convert a particle to a double * * Do not call this function directly. Use the macro defined above. */ @@ -263,11 +281,14 @@ INLINE static struct io_props io_make_output_field_convert_part_DOUBLE( r.parts = parts; r.xparts = xparts; r.gparts = NULL; + r.sparts = NULL; r.conversion = 1; r.convert_part_f = NULL; r.convert_part_d = functionPtr; r.convert_gpart_f = NULL; r.convert_gpart_d = NULL; + r.convert_spart_f = NULL; + r.convert_spart_d = NULL; return r; } @@ -309,11 +330,14 @@ INLINE static struct io_props io_make_output_field_convert_gpart_FLOAT( r.parts = NULL; r.xparts = NULL; r.gparts = gparts; + r.sparts = NULL; r.conversion = 1; r.convert_part_f = NULL; r.convert_part_d = NULL; r.convert_gpart_f = functionPtr; r.convert_gpart_d = NULL; + r.convert_spart_f = NULL; + r.convert_spart_d = NULL; return r; } @@ -327,7 +351,7 @@ INLINE static struct io_props io_make_output_field_convert_gpart_FLOAT( * @param units The units of the dataset * @param gpartSize The size in byte of the particle * @param gparts The particle array - * @param functionPtr The function used to convert a g-particle to a float + * @param functionPtr The function used to convert a g-particle to a double * * Do not call this function directly. Use the macro defined above. */ @@ -347,11 +371,104 @@ INLINE static struct io_props io_make_output_field_convert_gpart_DOUBLE( r.parts = NULL; r.xparts = NULL; r.gparts = gparts; + r.sparts = NULL; r.conversion = 1; r.convert_part_f = NULL; r.convert_part_d = NULL; r.convert_gpart_f = NULL; r.convert_gpart_d = functionPtr; + r.convert_spart_f = NULL; + r.convert_spart_d = NULL; + + return r; +} + +/** + * @brief Constructs an #io_props (with conversion) from its parameters + */ +#define io_make_output_field_convert_spart(name, type, dim, units, spart, \ + convert) \ + io_make_output_field_convert_spart_##type(name, type, dim, units, \ + sizeof(spart[0]), spart, convert) + +/** + * @brief Construct an #io_props from its parameters + * + * @param name Name of the field to read + * @param type The type of the data + * @param dimension Dataset dimension (1D, 3D, ...) + * @param units The units of the dataset + * @param spartSize The size in byte of the particle + * @param sparts The particle array + * @param functionPtr The function used to convert a g-particle to a float + * + * Do not call this function directly. Use the macro defined above. + */ +INLINE static struct io_props io_make_output_field_convert_spart_FLOAT( + const char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, + enum unit_conversion_factor units, size_t spartSize, + const struct spart* sparts, conversion_func_spart_float functionPtr) { + + struct io_props r; + strcpy(r.name, name); + r.type = type; + r.dimension = dimension; + r.importance = UNUSED; + r.units = units; + r.field = NULL; + r.partSize = spartSize; + r.parts = NULL; + r.xparts = NULL; + r.gparts = NULL; + r.sparts = sparts; + r.conversion = 1; + r.convert_part_f = NULL; + r.convert_part_d = NULL; + r.convert_gpart_f = NULL; + r.convert_gpart_d = NULL; + r.convert_spart_f = functionPtr; + r.convert_spart_d = NULL; + + return r; +} + +/** + * @brief Construct an #io_props from its parameters + * + * @param name Name of the field to read + * @param type The type of the data + * @param dimension Dataset dimension (1D, 3D, ...) + * @param units The units of the dataset + * @param spartSize The size in byte of the particle + * @param sparts The particle array + * @param functionPtr The function used to convert a s-particle to a double + * + * Do not call this function directly. Use the macro defined above. + */ +INLINE static struct io_props io_make_output_field_convert_spart_DOUBLE( + const char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, + enum unit_conversion_factor units, size_t spartSize, + const struct spart* sparts, conversion_func_spart_double functionPtr) { + + struct io_props r; + strcpy(r.name, name); + r.type = type; + r.dimension = dimension; + r.importance = UNUSED; + r.units = units; + r.field = NULL; + r.partSize = spartSize; + r.parts = NULL; + r.xparts = NULL; + r.gparts = NULL; + r.sparts = sparts; + r.conversion = 1; + r.convert_part_f = NULL; + r.convert_part_d = NULL; + r.convert_gpart_f = NULL; + r.convert_gpart_d = NULL; + r.convert_spart_f = NULL; + r.convert_spart_d = functionPtr; return r; } diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h index 1744f2cd046a90499563a182ca68212e43f4a252..f6580f8f72b9eb6a2b49a4d2d54a0e4d0593fcbf 100644 --- a/src/kernel_long_gravity.h +++ b/src/kernel_long_gravity.h @@ -90,7 +90,7 @@ __attribute__((always_inline)) INLINE static void kernel_long_grav_derivatives( const float r_s_inv5 = r_s_inv4 * r_s_inv; /* Derivatives of \chi */ - derivs->chi_0 = erfcf(u); + derivs->chi_0 = approx_erfcf(u); derivs->chi_1 = -r_s_inv; derivs->chi_2 = r_s_inv2 * u; derivs->chi_3 = -r_s_inv3 * (u2 - 0.5f); @@ -158,7 +158,7 @@ __attribute__((always_inline)) INLINE static void kernel_long_grav_pot_eval( #ifdef GADGET2_LONG_RANGE_CORRECTION const float arg1 = u * 0.5f; - const float term1 = erfcf(arg1); + const float term1 = approx_erfcf(arg1); *W = term1; #else @@ -190,7 +190,7 @@ __attribute__((always_inline)) INLINE static void kernel_long_grav_force_eval( const float arg1 = u * 0.5f; const float arg2 = -arg1 * arg1; - const float term1 = erfcf(arg1); + const float term1 = approx_erfcf(arg1); const float term2 = u * one_over_sqrt_pi * expf(arg2); *W = term1 + term2; diff --git a/src/kick.h b/src/kick.h index 9d10f1e78d3934c4277c14217cbbc46514e87033..e85c9de40d2084304bde108e6f5fa9c776fd3e8f 100644 --- a/src/kick.h +++ b/src/kick.h @@ -45,8 +45,8 @@ __attribute__((always_inline)) INLINE static void kick_gpart( if (gp->ti_kick != ti_start) error( "g-particle has not been kicked to the current time gp->ti_kick=%lld, " - "ti_start=%lld, ti_end=%lld", - gp->ti_kick, ti_start, ti_end); + "ti_start=%lld, ti_end=%lld id=%lld", + gp->ti_kick, ti_start, ti_end, gp->id_or_neg_offset); gp->ti_kick = ti_end; #endif @@ -68,6 +68,7 @@ __attribute__((always_inline)) INLINE static void kick_gpart( * @param dt_kick_hydro The kick time-step for hydro accelerations. * @param dt_kick_grav The kick time-step for gravity accelerations. * @param dt_kick_therm The kick time-step for changes in thermal state. + * @param dt_kick_corr The kick time-step for the gizmo-mfv gravity correction. * @param cosmo The cosmological model. * @param hydro_props The constants used in the scheme * @param ti_start The starting (integer) time of the kick (for debugging @@ -76,16 +77,16 @@ __attribute__((always_inline)) INLINE static void kick_gpart( */ __attribute__((always_inline)) INLINE static void kick_part( struct part *restrict p, struct xpart *restrict xp, double dt_kick_hydro, - double dt_kick_grav, double dt_kick_therm, const struct cosmology *cosmo, - const struct hydro_props *hydro_props, integertime_t ti_start, - integertime_t ti_end) { + double dt_kick_grav, double dt_kick_therm, double dt_kick_corr, + const struct cosmology *cosmo, const struct hydro_props *hydro_props, + integertime_t ti_start, integertime_t ti_end) { #ifdef SWIFT_DEBUG_CHECKS if (p->ti_kick != ti_start) error( "particle has not been kicked to the current time p->ti_kick=%lld, " - "ti_start=%lld, ti_end=%lld", - p->ti_kick, ti_start, ti_end); + "ti_start=%lld, ti_end=%lld id=%lld", + p->ti_kick, ti_start, ti_end, p->id); p->ti_kick = ti_end; #endif @@ -110,7 +111,8 @@ __attribute__((always_inline)) INLINE static void kick_part( } /* Extra kick work */ - hydro_kick_extra(p, xp, dt_kick_therm, cosmo, hydro_props); + hydro_kick_extra(p, xp, dt_kick_therm, dt_kick_grav, dt_kick_hydro, + dt_kick_corr, cosmo, hydro_props); if (p->gpart != NULL) gravity_kick_extra(p->gpart, dt_kick_grav); } @@ -131,8 +133,8 @@ __attribute__((always_inline)) INLINE static void kick_spart( if (sp->ti_kick != ti_start) error( "s-particle has not been kicked to the current time sp->ti_kick=%lld, " - "ti_start=%lld, ti_end=%lld", - sp->ti_kick, ti_start, ti_end); + "ti_start=%lld, ti_end=%lld id=%lld", + sp->ti_kick, ti_start, ti_end, sp->id); sp->ti_kick = ti_end; #endif @@ -148,7 +150,7 @@ __attribute__((always_inline)) INLINE static void kick_spart( sp->gpart->v_full[2] = sp->v[2]; /* Kick extra variables */ - star_kick_extra(sp, dt_kick_grav); + stars_kick_extra(sp, dt_kick_grav); } #endif /* SWIFT_KICK_H */ diff --git a/src/lock.h b/src/lock.h index b2dd2eac9d0ca5d7807907e31cf3fa31894f9aed..39601b0c52e414dad1a507b406c54640a254df30 100644 --- a/src/lock.h +++ b/src/lock.h @@ -34,6 +34,7 @@ #define lock_trylock(l) (pthread_spin_lock(l) != 0) #define lock_unlock(l) (pthread_spin_unlock(l) != 0) #define lock_unlock_blind(l) pthread_spin_unlock(l) +#define lock_static_initializer ((pthread_spinlock_t)0) #elif defined(PTHREAD_LOCK) #include <pthread.h> @@ -44,6 +45,7 @@ #define lock_trylock(l) (pthread_mutex_trylock(l) != 0) #define lock_unlock(l) (pthread_mutex_unlock(l) != 0) #define lock_unlock_blind(l) pthread_mutex_unlock(l) +#define lock_static_initializer PTHREAD_MUTEX_INITIALIZER #else #define swift_lock_type volatile int @@ -52,12 +54,12 @@ INLINE static int lock_lock(volatile int *l) { while (atomic_cas(l, 0, 1) != 0) ; - // while( *l ); return 0; } #define lock_trylock(l) ((*(l)) ? 1 : atomic_cas(l, 0, 1)) #define lock_unlock(l) (atomic_cas(l, 1, 0) != 1) #define lock_unlock_blind(l) atomic_cas(l, 1, 0) +#define lock_static_initializer 0 #endif #endif /* SWIFT_LOCK_H */ diff --git a/src/logger.c b/src/logger.c index 5fd4145aa1b042ed806dd3fe5487d094600b66c4..8be521b27f949ea0d496a5207335f1ec68208489 100644 --- a/src/logger.c +++ b/src/logger.c @@ -21,8 +21,11 @@ #include "../config.h" #ifdef HAVE_POSIX_FALLOCATE /* Are we on a sensible platform? */ +#ifdef WITH_LOGGER /* Some standard headers. */ +#include <hdf5.h> +#include <math.h> #include <stdint.h> #include <stdlib.h> #include <string.h> @@ -33,8 +36,100 @@ /* Local headers. */ #include "atomic.h" #include "dump.h" +#include "engine.h" #include "error.h" #include "part.h" +#include "units.h" + +/* + * Thoses are definitions from the format and therefore should not be changed! + */ +/* number of bytes for a mask */ +// TODO change this to number of bits +#define logger_mask_size 1 + +/* number of bits for chunk header */ +#define logger_header_bytes 8 + +/* number bytes for an offset */ +#define logger_offset_size logger_header_bytes - logger_mask_size + +/* number of bytes for the version information */ +#define logger_version_size 20 + +/* number of bytes for the labels in the header */ +#define logger_label_size 20 + +/* number of bytes for the number in the header */ +#define logger_number_size 4 + +char logger_version[logger_version_size] = "0.1"; + +const struct mask_data logger_mask_data[logger_count_mask] = { + /* Particle's position */ + {3 * sizeof(double), 1 << logger_x, "positions"}, + /* Particle's velocity */ + {3 * sizeof(float), 1 << logger_v, "velocities"}, + /* Particle's acceleration */ + {3 * sizeof(float), 1 << logger_a, "accelerations"}, + /* Particle's entropy */ + {sizeof(float), 1 << logger_u, "entropy"}, + /* Particle's smoothing length */ + {sizeof(float), 1 << logger_h, "smoothing length"}, + /* Particle's density */ + {sizeof(float), 1 << logger_rho, "density"}, + /* Particle's constants: mass (float) and ID (long long) */ + {sizeof(float) + sizeof(long long), 1 << logger_consts, "consts"}, + /* Simulation time stamp: integertime and double time (e.g. scale + factor or time) */ + {sizeof(integertime_t) + sizeof(double), 1 << logger_timestamp, + "timestamp"}}; + +/** + * @brief Write the header of a chunk (offset + mask). + * + * This is maybe broken for big(?) endian. + * + * @param buff The writing buffer + * @param mask The mask to write + * @param offset The old offset + * @param offset_new The new offset + * + * @return updated buff + */ +char *logger_write_chunk_header(char *buff, const unsigned int *mask, + const size_t *offset, const size_t offset_new) { + /* write mask */ + memcpy(buff, mask, logger_mask_size); + buff += logger_mask_size; + + /* write offset */ + size_t diff_offset = offset_new - *offset; + memcpy(buff, &diff_offset, logger_offset_size); + buff += logger_offset_size; + + return buff; +} + +/** + * @brief Write to the dump + * + * @param d #dump file + * @param offset (return) offset of the data + * @param size number of bytes to write + * @param p pointer to the data + */ +void logger_write_data(struct dump *d, size_t *offset, size_t size, + const void *p) { + /* get buffer */ + char *buff = dump_get(d, size, offset); + + /* write data to the buffer */ + memcpy(buff, p, size); + + /* Update offset to end of chunk */ + *offset += size; +} /** * @brief Compute the size of a message given its mask. @@ -43,119 +138,136 @@ * * @return The size of the logger message in bytes. */ -int logger_size(unsigned int mask) { +int logger_compute_chunk_size(unsigned int mask) { /* Start with 8 bytes for the header. */ - int size = 8; + int size = logger_mask_size + logger_offset_size; /* Is this a particle or a timestep? */ - if (mask & logger_mask_timestamp) { + if (mask & logger_mask_data[logger_timestamp].mask) { /* The timestamp should not contain any other bits. */ - if (mask != logger_mask_timestamp) + if (mask != logger_mask_data[logger_timestamp].mask) error("Timestamps should not include any other data."); /* A timestamp consists of an unsigned long long int. */ - size += sizeof(unsigned long long int); + size += logger_mask_data[logger_timestamp].size; } else { - /* Particle position as three doubles. */ - if (mask & logger_mask_x) size += 3 * sizeof(double); - - /* Particle velocity as three floats. */ - if (mask & logger_mask_v) size += 3 * sizeof(float); - - /* Particle accelleration as three floats. */ - if (mask & logger_mask_a) size += 3 * sizeof(float); + for (int i = 0; i < logger_count_mask; i++) { + if (mask & logger_mask_data[i].mask) { + size += logger_mask_data[i].size; + } + } + } - /* Particle internal energy as a single float. */ - if (mask & logger_mask_u) size += sizeof(float); + return size; +} - /* Particle smoothing length as a single float. */ - if (mask & logger_mask_h) size += sizeof(float); +/** + * @brief log all particles in the engine. + * + * @param log The #logger + * @param e The #engine + */ +void logger_log_all(struct logger *log, const struct engine *e) { - /* Particle density as a single float. */ - if (mask & logger_mask_rho) size += sizeof(float); + /* Ensure that enough space is available */ + logger_ensure_size(log, e->total_nr_parts, e->total_nr_gparts, 0); +#ifdef SWIFT_DEBUG_CHECKS + message("Need to implement stars"); +#endif - /* Particle constants, which is a bit more complicated. */ - if (mask & logger_mask_rho) { - size += sizeof(float) + // mass - sizeof(long long); // id - } + /* some constants */ + const struct space *s = e->s; + const unsigned int mask = + logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask | + logger_mask_data[logger_a].mask | logger_mask_data[logger_u].mask | + logger_mask_data[logger_h].mask | logger_mask_data[logger_rho].mask | + logger_mask_data[logger_consts].mask; + + /* loop over all parts */ + for (long long i = 0; i < e->total_nr_parts; i++) { + logger_log_part(log, &s->parts[i], mask, + &s->xparts[i].logger_data.last_offset); + s->xparts[i].logger_data.steps_since_last_output = 0; } - return size; + /* loop over all gparts */ + if (e->total_nr_gparts > 0) error("Not implemented"); + + /* loop over all sparts */ + // TODO } /** * @brief Dump a #part to the log. * + * @param log The #logger * @param p The #part to dump. * @param mask The mask of the data to dump. - * @param offset Pointer to the offset of the previous log of this particle. - * @param dump The #dump in which to log the particle data. + * @param offset Pointer to the offset of the previous log of this particle; + * (return) offset of this log. */ -void logger_log_part(struct part *p, unsigned int mask, size_t *offset, - struct dump *dump) { +void logger_log_part(struct logger *log, const struct part *p, + unsigned int mask, size_t *offset) { /* Make sure we're not writing a timestamp. */ - if (mask & logger_mask_timestamp) + if (mask & logger_mask_data[logger_timestamp].mask) error("You should not log particles as timestamps."); /* Start by computing the size of the message. */ - const int size = logger_size(mask); + const int size = logger_compute_chunk_size(mask); /* Allocate a chunk of memory in the dump of the right size. */ size_t offset_new; - char *buff = (char *)dump_get(dump, size, &offset_new); + char *buff = (char *)dump_get(&log->dump, size, &offset_new); /* Write the header. */ - uint64_t temp = (((uint64_t)(offset_new - *offset)) & 0xffffffffffffffULL) | - ((uint64_t)mask << 56); - memcpy(buff, &temp, 8); - buff += 8; + buff = logger_write_chunk_header(buff, &mask, offset, offset_new); /* Particle position as three doubles. */ - if (mask & logger_mask_x) { - memcpy(buff, p->x, 3 * sizeof(double)); - buff += 3 * sizeof(double); + if (mask & logger_mask_data[logger_x].mask) { + memcpy(buff, p->x, logger_mask_data[logger_x].size); + buff += logger_mask_data[logger_x].size; } /* Particle velocity as three floats. */ - if (mask & logger_mask_v) { - memcpy(buff, p->v, 3 * sizeof(float)); - buff += 3 * sizeof(float); + if (mask & logger_mask_data[logger_v].mask) { + memcpy(buff, p->v, logger_mask_data[logger_v].size); + buff += logger_mask_data[logger_v].size; } /* Particle accelleration as three floats. */ - if (mask & logger_mask_a) { - memcpy(buff, p->a_hydro, 3 * sizeof(float)); - buff += 3 * sizeof(float); + if (mask & logger_mask_data[logger_a].mask) { + memcpy(buff, p->a_hydro, logger_mask_data[logger_a].size); + buff += logger_mask_data[logger_a].size; } #if defined(GADGET2_SPH) /* Particle internal energy as a single float. */ - if (mask & logger_mask_u) { - memcpy(buff, &p->entropy, sizeof(float)); - buff += sizeof(float); + if (mask & logger_mask_data[logger_u].mask) { + memcpy(buff, &p->entropy, logger_mask_data[logger_u].size); + buff += logger_mask_data[logger_u].size; } /* Particle smoothing length as a single float. */ - if (mask & logger_mask_h) { - memcpy(buff, &p->h, sizeof(float)); - buff += sizeof(float); + if (mask & logger_mask_data[logger_h].mask) { + memcpy(buff, &p->h, logger_mask_data[logger_h].size); + buff += logger_mask_data[logger_h].size; } /* Particle density as a single float. */ - if (mask & logger_mask_rho) { - memcpy(buff, &p->rho, sizeof(float)); - buff += sizeof(float); + if (mask & logger_mask_data[logger_rho].mask) { + memcpy(buff, &p->rho, logger_mask_data[logger_rho].size); + buff += logger_mask_data[logger_rho].size; } /* Particle constants, which is a bit more complicated. */ - if (mask & logger_mask_rho) { + if (mask & logger_mask_data[logger_consts].mask) { + // TODO make it dependent of logger_mask_data memcpy(buff, &p->mass, sizeof(float)); buff += sizeof(float); memcpy(buff, &p->id, sizeof(long long)); @@ -171,55 +283,55 @@ void logger_log_part(struct part *p, unsigned int mask, size_t *offset, /** * @brief Dump a #gpart to the log. * + * @param log The #logger * @param p The #gpart to dump. * @param mask The mask of the data to dump. - * @param offset Pointer to the offset of the previous log of this particle. - * @param dump The #dump in which to log the particle data. + * @param offset Pointer to the offset of the previous log of this particle; + * (return) offset of this log. */ -void logger_log_gpart(struct gpart *p, unsigned int mask, size_t *offset, - struct dump *dump) { +void logger_log_gpart(struct logger *log, const struct gpart *p, + unsigned int mask, size_t *offset) { /* Make sure we're not writing a timestamp. */ - if (mask & logger_mask_timestamp) + if (mask & logger_mask_data[logger_timestamp].mask) error("You should not log particles as timestamps."); /* Make sure we're not looging fields not supported by gparts. */ - if (mask & (logger_mask_u | logger_mask_rho)) + if (mask & + (logger_mask_data[logger_u].mask | logger_mask_data[logger_rho].mask)) error("Can't log SPH quantities for gparts."); /* Start by computing the size of the message. */ - const int size = logger_size(mask); + const int size = logger_compute_chunk_size(mask); /* Allocate a chunk of memory in the dump of the right size. */ size_t offset_new; - char *buff = (char *)dump_get(dump, size, &offset_new); + char *buff = (char *)dump_get(&log->dump, size, &offset_new); /* Write the header. */ - uint64_t temp = (((uint64_t)(offset_new - *offset)) & 0xffffffffffffffULL) | - ((uint64_t)mask << 56); - memcpy(buff, &temp, 8); - buff += 8; + buff = logger_write_chunk_header(buff, &mask, offset, offset_new); /* Particle position as three doubles. */ - if (mask & logger_mask_x) { - memcpy(buff, p->x, 3 * sizeof(double)); - buff += 3 * sizeof(double); + if (mask & logger_mask_data[logger_x].mask) { + memcpy(buff, p->x, logger_mask_data[logger_x].size); + buff += logger_mask_data[logger_x].size; } /* Particle velocity as three floats. */ - if (mask & logger_mask_v) { - memcpy(buff, p->v_full, 3 * sizeof(float)); - buff += 3 * sizeof(float); + if (mask & logger_mask_data[logger_v].mask) { + memcpy(buff, p->v_full, logger_mask_data[logger_v].size); + buff += logger_mask_data[logger_v].size; } /* Particle accelleration as three floats. */ - if (mask & logger_mask_a) { - memcpy(buff, p->a_grav, 3 * sizeof(float)); - buff += 3 * sizeof(float); + if (mask & logger_mask_data[logger_a].mask) { + memcpy(buff, p->a_grav, logger_mask_data[logger_a].size); + buff += logger_mask_data[logger_a].size; } /* Particle constants, which is a bit more complicated. */ - if (mask & logger_mask_rho) { + if (mask & logger_mask_data[logger_consts].mask) { + // TODO make it dependent of logger_mask_data memcpy(buff, &p->mass, sizeof(float)); buff += sizeof(float); memcpy(buff, &p->id_or_neg_offset, sizeof(long long)); @@ -230,29 +342,191 @@ void logger_log_gpart(struct gpart *p, unsigned int mask, size_t *offset, *offset = offset_new; } -void logger_log_timestamp(unsigned long long int timestamp, size_t *offset, - struct dump *dump) { +/** + * @brief write a timestamp + * + * @param log The #logger + * @param timestamp time to write + * @param time time or scale factor + * @param offset Pointer to the offset of the previous log of this particle; + * (return) offset of this log. + */ +void logger_log_timestamp(struct logger *log, integertime_t timestamp, + double time, size_t *offset) { + struct dump *dump = &log->dump; /* Start by computing the size of the message. */ - const int size = logger_size(logger_mask_timestamp); + const int size = + logger_compute_chunk_size(logger_mask_data[logger_timestamp].mask); /* Allocate a chunk of memory in the dump of the right size. */ size_t offset_new; char *buff = (char *)dump_get(dump, size, &offset_new); /* Write the header. */ - uint64_t temp = (((uint64_t)(offset_new - *offset)) & 0xffffffffffffffULL) | - ((uint64_t)logger_mask_timestamp << 56); - memcpy(buff, &temp, 8); - buff += 8; + unsigned int mask = logger_mask_data[logger_timestamp].mask; + buff = logger_write_chunk_header(buff, &mask, offset, offset_new); /* Store the timestamp. */ - memcpy(buff, ×tamp, sizeof(unsigned long long int)); + // TODO make it dependent of logger_mask_data + memcpy(buff, ×tamp, sizeof(integertime_t)); + buff += sizeof(integertime_t); + + /* Store the time */ + memcpy(buff, &time, sizeof(double)); /* Update the log message offset. */ *offset = offset_new; } +/** + * @brief Ensure that the buffer is large enough for a step. + * + * Check if logger parameters are large enough to write all particles + * and ensure that enough space is available in the buffer. + * + * @param log The #logger + * @param total_nr_parts total number of part + * @param total_nr_gparts total number of gpart + * @param total_nr_sparts total number of spart + */ +void logger_ensure_size(struct logger *log, size_t total_nr_parts, + size_t total_nr_gparts, size_t total_nr_sparts) { + + /* count part memory */ + size_t limit = log->max_chunk_size; + + limit *= total_nr_parts; + + /* count gpart memory */ + if (total_nr_gparts > 0) error("Not implemented"); + + /* count spart memory */ + if (total_nr_sparts > 0) error("Not implemented"); + + /* ensure enough space in dump */ + dump_ensure(&log->dump, limit, log->buffer_scale * limit); +} + +/** + * @brief intialize the logger structure + * + * @param log The #logger + * @param params The #swift_params + */ +void logger_init(struct logger *log, struct swift_params *params) { + /* read parameters */ + log->delta_step = parser_get_param_int(params, "Logger:delta_step"); + size_t buffer_size = + parser_get_opt_param_float(params, "Logger:initial_buffer_size", 0.5) * + 1e9; + log->buffer_scale = + parser_get_opt_param_float(params, "Logger:buffer_scale", 10); + parser_get_param_string(params, "Logger:basename", log->base_name); + + /* set initial value of parameters */ + log->timestamp_offset = 0; + + /* generate dump filename */ + char logger_name_file[PARSER_MAX_LINE_SIZE]; + strcpy(logger_name_file, log->base_name); + strcat(logger_name_file, ".dump"); + + /* Compute max size for a particle chunk */ + int max_size = logger_offset_size + logger_mask_size; + + /* Loop over all fields except timestamp */ + for (int i = 0; i < logger_count_mask - 1; i++) { + max_size += logger_mask_data[i].size; + } + log->max_chunk_size = max_size; + + /* init dump */ + dump_init(&log->dump, logger_name_file, buffer_size); +} + +/** + * @brief Close dump file and desallocate memory + * + * @param log The #logger + */ +void logger_clean(struct logger *log) { dump_close(&log->dump); } + +/** + * @brief Write a file header to a logger file + * + * @param log The #logger + * @param dump The #dump in which to log the particle data. + * + */ +void logger_write_file_header(struct logger *log, const struct engine *e) { + + /* get required variables */ + struct dump *dump = &log->dump; + + size_t file_offset = dump->file_offset; + + if (file_offset != 0) + error( + "The logger is not empty." + "This function should be called before writing anything in the logger"); + + /* Write version information */ + logger_write_data(dump, &file_offset, logger_version_size, &logger_version); + + /* write offset direction */ + const int reversed = 0; + logger_write_data(dump, &file_offset, logger_number_size, &reversed); + + /* placeholder to write the offset of the first log here */ + char *skip_header = dump_get(dump, logger_offset_size, &file_offset); + + /* write number of bytes used for names */ + const int label_size = logger_label_size; + logger_write_data(dump, &file_offset, logger_number_size, &label_size); + + /* write number of masks */ + int count_mask = logger_count_mask; + logger_write_data(dump, &file_offset, logger_number_size, &count_mask); + + /* write masks */ + // loop over all mask type + for (int i = 0; i < logger_count_mask; i++) { + // mask name + logger_write_data(dump, &file_offset, logger_label_size, + &logger_mask_data[i].name); + + // mask size + logger_write_data(dump, &file_offset, logger_number_size, + &logger_mask_data[i].size); + } + + /* last step: write first offset */ + memcpy(skip_header, &file_offset, logger_offset_size); +} + +/** + * @brief read chunk header + * + * @param buff The reading buffer + * @param mask The mask to read + * @param offset (return) the offset pointed by this chunk (absolute) + * @param offset_cur The current chunk offset + * + * @return Number of bytes read + */ +__attribute__((always_inline)) INLINE static int logger_read_chunk_header( + const char *buff, unsigned int *mask, size_t *offset, size_t cur_offset) { + memcpy(mask, buff, logger_mask_size); + buff += logger_mask_size; + + *offset = 0; + memcpy(offset, buff, logger_offset_size); + *offset = cur_offset - *offset; + + return logger_mask_size + logger_offset_size; +} + /** * @brief Read a logger message and store the data in a #part. * @@ -269,56 +543,55 @@ int logger_read_part(struct part *p, size_t *offset, const char *buff) { buff = &buff[*offset]; /* Start by reading the logger mask for this entry. */ - uint64_t temp; - memcpy(&temp, buff, 8); - const int mask = temp >> 56; - *offset -= temp & 0xffffffffffffffULL; - buff += 8; + const size_t cur_offset = *offset; + unsigned int mask = 0; + buff += logger_read_chunk_header(buff, &mask, offset, cur_offset); /* We are only interested in particle data. */ - if (mask & logger_mask_timestamp) + if (mask & logger_mask_data[logger_timestamp].mask) error("Trying to read timestamp as particle."); /* Particle position as three doubles. */ - if (mask & logger_mask_x) { - memcpy(p->x, buff, 3 * sizeof(double)); - buff += 3 * sizeof(double); + if (mask & logger_mask_data[logger_x].mask) { + memcpy(p->x, buff, logger_mask_data[logger_x].size); + buff += logger_mask_data[logger_x].size; } /* Particle velocity as three floats. */ - if (mask & logger_mask_v) { - memcpy(p->v, buff, 3 * sizeof(float)); - buff += 3 * sizeof(float); + if (mask & logger_mask_data[logger_v].mask) { + memcpy(p->v, buff, logger_mask_data[logger_v].size); + buff += logger_mask_data[logger_v].size; } /* Particle accelleration as three floats. */ - if (mask & logger_mask_a) { - memcpy(p->a_hydro, buff, 3 * sizeof(float)); - buff += 3 * sizeof(float); + if (mask & logger_mask_data[logger_a].mask) { + memcpy(p->a_hydro, buff, logger_mask_data[logger_a].size); + buff += logger_mask_data[logger_a].size; } #if defined(GADGET2_SPH) /* Particle internal energy as a single float. */ - if (mask & logger_mask_u) { - memcpy(&p->entropy, buff, sizeof(float)); - buff += sizeof(float); + if (mask & logger_mask_data[logger_u].mask) { + memcpy(&p->entropy, buff, logger_mask_data[logger_u].size); + buff += logger_mask_data[logger_u].size; } /* Particle smoothing length as a single float. */ - if (mask & logger_mask_h) { - memcpy(&p->h, buff, sizeof(float)); - buff += sizeof(float); + if (mask & logger_mask_data[logger_h].mask) { + memcpy(&p->h, buff, logger_mask_data[logger_h].size); + buff += logger_mask_data[logger_h].size; } /* Particle density as a single float. */ - if (mask & logger_mask_rho) { - memcpy(&p->rho, buff, sizeof(float)); - buff += sizeof(float); + if (mask & logger_mask_data[logger_rho].mask) { + memcpy(&p->rho, buff, logger_mask_data[logger_rho].size); + buff += logger_mask_data[logger_rho].size; } /* Particle constants, which is a bit more complicated. */ - if (mask & logger_mask_rho) { + if (mask & logger_mask_data[logger_rho].mask) { + // TODO make it dependent of logger_mask_data memcpy(&p->mass, buff, sizeof(float)); buff += sizeof(float); memcpy(&p->id, buff, sizeof(long long)); @@ -347,40 +620,40 @@ int logger_read_gpart(struct gpart *p, size_t *offset, const char *buff) { buff = &buff[*offset]; /* Start by reading the logger mask for this entry. */ - uint64_t temp; - memcpy(&temp, buff, 8); - const int mask = temp >> 56; - *offset -= temp & 0xffffffffffffffULL; - buff += 8; + const size_t cur_offset = *offset; + unsigned int mask = 0; + buff += logger_read_chunk_header(buff, &mask, offset, cur_offset); /* We are only interested in particle data. */ - if (mask & logger_mask_timestamp) + if (mask & logger_mask_data[logger_timestamp].mask) error("Trying to read timestamp as particle."); /* We can't store all part fields in a gpart. */ - if (mask & (logger_mask_u | logger_mask_rho)) + if (mask & + (logger_mask_data[logger_u].mask | logger_mask_data[logger_rho].mask)) error("Trying to read SPH quantities into a gpart."); /* Particle position as three doubles. */ - if (mask & logger_mask_x) { - memcpy(p->x, buff, 3 * sizeof(double)); - buff += 3 * sizeof(double); + if (mask & logger_mask_data[logger_x].mask) { + memcpy(p->x, buff, logger_mask_data[logger_x].size); + buff += logger_mask_data[logger_x].size; } /* Particle velocity as three floats. */ - if (mask & logger_mask_v) { - memcpy(p->v_full, buff, 3 * sizeof(float)); - buff += 3 * sizeof(float); + if (mask & logger_mask_data[logger_v].mask) { + memcpy(p->v_full, buff, logger_mask_data[logger_v].size); + buff += logger_mask_data[logger_v].size; } /* Particle accelleration as three floats. */ - if (mask & logger_mask_a) { - memcpy(p->a_grav, buff, 3 * sizeof(float)); - buff += 3 * sizeof(float); + if (mask & logger_mask_data[logger_a].mask) { + memcpy(p->a_grav, buff, logger_mask_data[logger_a].size); + buff += logger_mask_data[logger_a].size; } /* Particle constants, which is a bit more complicated. */ - if (mask & logger_mask_rho) { + if (mask & logger_mask_data[logger_rho].mask) { + // TODO make it dependent of logger_mask_data memcpy(&p->mass, buff, sizeof(float)); buff += sizeof(float); memcpy(&p->id_or_neg_offset, buff, sizeof(long long)); @@ -401,32 +674,37 @@ int logger_read_gpart(struct gpart *p, size_t *offset, const char *buff) { * * @return The mask containing the values read. */ -int logger_read_timestamp(unsigned long long int *t, size_t *offset, - const char *buff) { +int logger_read_timestamp(unsigned long long int *t, double *time, + size_t *offset, const char *buff) { /* Jump to the offset. */ buff = &buff[*offset]; /* Start by reading the logger mask for this entry. */ - uint64_t temp; - memcpy(&temp, buff, 8); - const int mask = temp >> 56; - *offset -= temp & 0xffffffffffffffULL; - buff += 8; + const size_t cur_offset = *offset; + unsigned int mask = 0; + buff += logger_read_chunk_header(buff, &mask, offset, cur_offset); /* We are only interested in timestamps. */ - if (!(mask & logger_mask_timestamp)) + if (!(mask & logger_mask_data[logger_timestamp].mask)) error("Trying to read timestamp from a particle."); /* Make sure we don't have extra fields. */ - if (mask != logger_mask_timestamp) + if (mask != logger_mask_data[logger_timestamp].mask) error("Timestamp message contains extra fields."); /* Copy the timestamp value from the buffer. */ + // TODO make it dependent of logger_mask_data memcpy(t, buff, sizeof(unsigned long long int)); + buff += sizeof(unsigned long long int); + + /* Copy the timestamp value from the buffer. */ + memcpy(time, buff, sizeof(double)); /* Finally, return the mask of the values we just read. */ return mask; } +#endif /* WITH_LOGGER */ + #endif /* HAVE_POSIX_FALLOCATE */ diff --git a/src/logger.h b/src/logger.h index 596c0903750404d0934e0d3843a5461523700e9e..56e2c8ab94c66b24df1800877bb9cfb129c3e645 100644 --- a/src/logger.h +++ b/src/logger.h @@ -19,11 +19,21 @@ #ifndef SWIFT_LOGGER_H #define SWIFT_LOGGER_H +#ifdef WITH_LOGGER + /* Includes. */ -#include "part.h" +#include "common_io.h" +#include "dump.h" +#include "inline.h" +#include "timeline.h" +#include "units.h" /* Forward declaration */ struct dump; +struct gpart; +struct part; +/* TODO remove dependency */ +struct engine; /** * Logger entries contain messages representing the particle data at a given @@ -59,31 +69,111 @@ struct dump; * The offset refers to the relative location of the previous message for the * same particle or for the previous timestamp (if mask bit 7 is set). I.e. * the previous log entry will be at the address of the current mask byte minus - * the unsigned value stored in the offset. An offset of zero indicates that - * this is the first message for the given particle/timestamp. + * the unsigned value stored in the offset. An offset equal to the chunk offset + * indicated that this is the first message for the given particle/timestamp. */ /* Some constants. */ -#define logger_mask_x 1 -#define logger_mask_v 2 -#define logger_mask_a 4 -#define logger_mask_u 8 -#define logger_mask_h 16 -#define logger_mask_rho 32 -#define logger_mask_consts 64 -#define logger_mask_timestamp 128 +enum logger_masks_number { + logger_x = 0, + logger_v = 1, + logger_a = 2, + logger_u = 3, + logger_h = 4, + logger_rho = 5, + logger_consts = 6, + logger_timestamp = 7, /* expect it to be before count */ + logger_count_mask = 8, /* Need to be the last */ +} __attribute__((packed)); + +struct mask_data { + /* Number of bytes for a mask */ + int size; + /* Mask value */ + unsigned int mask; + /* name of the mask */ + char name[100]; +}; + +extern const struct mask_data logger_mask_data[logger_count_mask]; + +/* Size of the strings. */ +#define logger_string_length 200 + +/* structure containing global data */ +struct logger { + /* Number of particle steps between dumping a chunk of data */ + short int delta_step; + + /* Logger basename */ + char base_name[logger_string_length]; + + /* Dump file */ + struct dump dump; + + /* timestamp offset for logger*/ + size_t timestamp_offset; + + /* scaling factor when buffer is too small */ + float buffer_scale; + + /* Size of a chunk if every mask are activated */ + int max_chunk_size; + +} SWIFT_STRUCT_ALIGN; + +/* required structure for each particle type */ +struct logger_part_data { + /* Number of particle updates since last output */ + int steps_since_last_output; + + /* offset of last particle log entry */ + size_t last_offset; +}; /* Function prototypes. */ -int logger_size(unsigned int mask); -void logger_log_part(struct part *p, unsigned int mask, size_t *offset, - struct dump *dump); -void logger_log_gpart(struct gpart *p, unsigned int mask, size_t *offset, - struct dump *dump); -void logger_log_timestamp(unsigned long long int t, size_t *offset, - struct dump *dump); +int logger_compute_chunk_size(unsigned int mask); +void logger_log_all(struct logger *log, const struct engine *e); +void logger_log_part(struct logger *log, const struct part *p, + unsigned int mask, size_t *offset); +void logger_log_gpart(struct logger *log, const struct gpart *p, + unsigned int mask, size_t *offset); +void logger_init(struct logger *log, struct swift_params *params); +void logger_clean(struct logger *log); +void logger_log_timestamp(struct logger *log, integertime_t t, double time, + size_t *offset); +void logger_ensure_size(struct logger *log, size_t total_nr_parts, + size_t total_nr_gparts, size_t total_nr_sparts); +void logger_write_file_header(struct logger *log, const struct engine *e); + int logger_read_part(struct part *p, size_t *offset, const char *buff); int logger_read_gpart(struct gpart *p, size_t *offset, const char *buff); -int logger_read_timestamp(unsigned long long int *t, size_t *offset, - const char *buff); +int logger_read_timestamp(unsigned long long int *t, double *time, + size_t *offset, const char *buff); + +/** + * @brief Initialize the logger data for a particle. + * + * @param logger The #logger_part_data. + */ +INLINE static void logger_part_data_init(struct logger_part_data *logger) { + logger->last_offset = 0; + logger->steps_since_last_output = INT_MAX; +} + +/** + * @brief Should this particle write its data now ? + * + * @param xp The #xpart. + * @param e The #engine containing information about the current time. + * @return 1 if the #part should write, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int logger_should_write( + const struct logger_part_data *logger_data, const struct logger *log) { + + return (logger_data->steps_since_last_output > log->delta_step); +} + +#endif /* WITH_LOGGER */ #endif /* SWIFT_LOGGER_H */ diff --git a/src/logger_io.c b/src/logger_io.c new file mode 100644 index 0000000000000000000000000000000000000000..a0a5ba1db85aa4eb96ee140966a47393ba5a3b68 --- /dev/null +++ b/src/logger_io.c @@ -0,0 +1,299 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + * Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +#ifdef WITH_LOGGER + +/* Some standard headers. */ +#include <hdf5.h> +#include <math.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* This object's header. */ +#include "logger_io.h" + +/* Local includes. */ +#include "chemistry_io.h" +#include "common_io.h" +#include "cooling.h" +#include "dimension.h" +#include "engine.h" +#include "error.h" +#include "gravity_io.h" +#include "gravity_properties.h" +#include "hydro_io.h" +#include "hydro_properties.h" +#include "io_properties.h" +#include "kernel_hydro.h" +#include "parallel_io.h" +#include "part.h" +#include "serial_io.h" +#include "single_io.h" +#include "stars_io.h" +#include "units.h" +#include "xmf.h" + +/** + * @brief Writes an HDF5 index file + * + * @param e The engine containing all the system. + * @param baseName The common part of the snapshot file name. + * @param internal_units The #unit_system used internally + * @param snapshot_units The #unit_system used in the snapshots + * + * Creates an HDF5 output file and writes the offset and id of particles + * contained in the engine. If such a file already exists, it is erased and + * replaced by the new one. + * + * Calls #error() if an error occurs. + * + */ +void write_index_single(struct engine* e, const char* baseName, + const struct unit_system* internal_units, + const struct unit_system* snapshot_units) { + + hid_t h_file = 0, h_grp = 0; + const size_t Ngas = e->s->nr_parts; + const size_t Nstars = e->s->nr_sparts; + const size_t Ntot = e->s->nr_gparts; + int periodic = e->s->periodic; + int numFiles = 1; + struct part* parts = e->s->parts; + struct xpart* xparts = e->s->xparts; + // struct gpart* gparts = e->s->gparts; + struct gpart* dmparts = NULL; + // struct spart* sparts = e->s->sparts; + static int outputCount = 0; + + struct logger* log = e->logger; + + /* Number of unassociated gparts */ + const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0; + + long long N_total[swift_type_count] = {Ngas, Ndm, 0, 0, Nstars, 0}; + + /* File name */ + char fileName[FILENAME_BUFFER_SIZE]; + snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName, + outputCount); + + /* Open file */ + /* message("Opening file '%s'.", fileName); */ + h_file = H5Fcreate(fileName, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + if (h_file < 0) { + error("Error while opening file '%s'.", fileName); + } + + /* Open header to write simulation properties */ + /* message("Writing runtime parameters..."); */ + h_grp = + H5Gcreate(h_file, "/RuntimePars", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_grp < 0) error("Error while creating runtime parameters group\n"); + + /* Write the relevant information */ + io_write_attribute(h_grp, "PeriodicBoundariesOn", INT, &periodic, 1); + + /* Close runtime parameters */ + H5Gclose(h_grp); + + /* Open header to write simulation properties */ + /* message("Writing file header..."); */ + h_grp = H5Gcreate(h_file, "/Header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_grp < 0) error("Error while creating file header\n"); + + /* Print the relevant information and print status */ + io_write_attribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3); + double dblTime = e->time; + io_write_attribute(h_grp, "Time", DOUBLE, &dblTime, 1); + io_write_attribute(h_grp, "Time Offset", UINT, &log->timestamp_offset, 1); + int dimension = (int)hydro_dimension; + io_write_attribute(h_grp, "Dimension", INT, &dimension, 1); + + /* GADGET-2 legacy values */ + /* Number of particles of each type */ + unsigned int numParticles[swift_type_count] = {0}; + unsigned int numParticlesHighWord[swift_type_count] = {0}; + for (int ptype = 0; ptype < swift_type_count; ++ptype) { + numParticles[ptype] = (unsigned int)N_total[ptype]; + numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32); + } + io_write_attribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total, + swift_type_count); + io_write_attribute(h_grp, "NumPart_Total", UINT, numParticles, + swift_type_count); + io_write_attribute(h_grp, "NumPart_Total_HighWord", UINT, + numParticlesHighWord, swift_type_count); + double MassTable[swift_type_count] = {0}; + io_write_attribute(h_grp, "MassTable", DOUBLE, MassTable, swift_type_count); + unsigned int flagEntropy[swift_type_count] = {0}; + flagEntropy[0] = writeEntropyFlag(); + io_write_attribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, + swift_type_count); + io_write_attribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); + + /* Close header */ + H5Gclose(h_grp); + + /* Print the code version */ + io_write_code_description(h_file); + + /* Print the SPH parameters */ + if (e->policy & engine_policy_hydro) { + h_grp = H5Gcreate(h_file, "/HydroScheme", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) error("Error while creating SPH group"); + hydro_props_print_snapshot(h_grp, e->hydro_properties); + hydro_write_flavour(h_grp); + H5Gclose(h_grp); + } + + /* Print the gravity parameters */ + if (e->policy & engine_policy_self_gravity) { + h_grp = H5Gcreate(h_file, "/GravityScheme", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) error("Error while creating gravity group"); + gravity_props_print_snapshot(h_grp, e->gravity_properties); + H5Gclose(h_grp); + } + + /* Print the runtime parameters */ + h_grp = + H5Gcreate(h_file, "/Parameters", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (h_grp < 0) error("Error while creating parameters group"); + parser_write_params_to_hdf5(e->parameter_file, h_grp, 1); + H5Gclose(h_grp); + + /* Print the runtime unused parameters */ + h_grp = H5Gcreate(h_file, "/UnusedParameters", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) error("Error while creating parameters group"); + parser_write_params_to_hdf5(e->parameter_file, h_grp, 0); + H5Gclose(h_grp); + + /* Print the system of Units used in the spashot */ + io_write_unit_system(h_file, snapshot_units, "Units"); + + /* Print the system of Units used internally */ + io_write_unit_system(h_file, internal_units, "InternalCodeUnits"); + + /* Tell the user if a conversion will be needed */ + if (e->verbose) { + if (units_are_equal(snapshot_units, internal_units)) { + + message("Snapshot and internal units match. No conversion needed."); + + } else { + + message("Conversion needed from:"); + message("(Snapshot) Unit system: U_M = %e g.", + snapshot_units->UnitMass_in_cgs); + message("(Snapshot) Unit system: U_L = %e cm.", + snapshot_units->UnitLength_in_cgs); + message("(Snapshot) Unit system: U_t = %e s.", + snapshot_units->UnitTime_in_cgs); + message("(Snapshot) Unit system: U_I = %e A.", + snapshot_units->UnitCurrent_in_cgs); + message("(Snapshot) Unit system: U_T = %e K.", + snapshot_units->UnitTemperature_in_cgs); + message("to:"); + message("(internal) Unit system: U_M = %e g.", + internal_units->UnitMass_in_cgs); + message("(internal) Unit system: U_L = %e cm.", + internal_units->UnitLength_in_cgs); + message("(internal) Unit system: U_t = %e s.", + internal_units->UnitTime_in_cgs); + message("(internal) Unit system: U_I = %e A.", + internal_units->UnitCurrent_in_cgs); + message("(internal) Unit system: U_T = %e K.", + internal_units->UnitTemperature_in_cgs); + } + } + + /* Loop over all particle types */ + for (int ptype = 0; ptype < swift_type_count; ptype++) { + + /* Don't do anything if no particle of this kind */ + if (numParticles[ptype] == 0) continue; + + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gcreate(h_file, partTypeGroupName, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) { + error("Error while creating particle group.\n"); + } + + int num_fields = 0; + struct io_props list[100]; + size_t N = 0; + + /* Write particle fields from the particle structure */ + switch (ptype) { + + case swift_type_gas: + N = Ngas; + hydro_write_index(parts, xparts, list, &num_fields); + break; + + case swift_type_dark_matter: + error("TODO"); + break; + + case swift_type_stars: + N = Nstars; + error("TODO"); + // star_write_index(sparts, list, &num_fields); + break; + + default: + error("Particle Type %d not yet supported. Aborting", ptype); + } + + /* Write everything */ + for (int i = 0; i < num_fields; ++i) + writeArray(e, h_grp, fileName, NULL, partTypeGroupName, list[i], N, + internal_units, snapshot_units); + + /* Free temporary array */ + if (dmparts) { + free(dmparts); + dmparts = NULL; + } + + /* Close particle group */ + H5Gclose(h_grp); + } + + /* message("Done writing particles..."); */ + + /* Close file */ + H5Fclose(h_file); + + ++outputCount; +} + +#endif /* HAVE_HDF5 */ diff --git a/src/logger_io.h b/src/logger_io.h new file mode 100644 index 0000000000000000000000000000000000000000..f5b1274fb7b957d5b48bc8425bf784c586ac6a08 --- /dev/null +++ b/src/logger_io.h @@ -0,0 +1,61 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_LOGGER_IO_H +#define SWIFT_LOGGER_IO_H + +/* Config parameters. */ +#include "../config.h" + +#ifdef WITH_LOGGER + +/* Includes. */ +#include "engine.h" +#include "io_properties.h" +#include "part.h" +#include "units.h" + +void write_index_single(struct engine* e, const char* baseName, + const struct unit_system* internal_units, + const struct unit_system* snapshot_units); + +/** + * @brief Specifies which particle fields to write to a dataset + * + * @param parts The particle array. + * @param list The list of i/o properties to write. + * @param num_fields The number of i/o fields to write. + * + * In this version, we only want the ids and the offset. + */ +__attribute__((always_inline)) INLINE static void hydro_write_index( + const struct part* parts, const struct xpart* xparts, struct io_props* list, + int* num_fields) { + + *num_fields = 2; + + /* List what we want to write */ + list[0] = io_make_output_field("ParticleIDs", ULONGLONG, 1, + UNIT_CONV_NO_UNITS, parts, id); + + list[1] = io_make_output_field("Offset", ULONGLONG, 1, UNIT_CONV_NO_UNITS, + xparts, logger_data.last_offset); +} +#endif + +#endif /* SWIFT_LOGGER_IO_H */ diff --git a/src/map.c b/src/map.c index b0a3117388cb77f4311cf7ee3c5d62a0937da655..68c3618fcdb10a618a97e5d1a2565d58db677cdb 100644 --- a/src/map.c +++ b/src/map.c @@ -73,9 +73,9 @@ void map_cells_plot(struct cell *c, void *data) { printf("%.16e %.16e %.16e\n\n\n", l[0] + h[0], l[1] + h[1], l[2]); if (!c->split) { - for (int k = 0; k < c->count; k++) - printf("0 0 0 %.16e %.16e %.16e\n", c->parts[k].x[0], c->parts[k].x[1], - c->parts[k].x[2]); + for (int k = 0; k < c->hydro.count; k++) + printf("0 0 0 %.16e %.16e %.16e\n", c->hydro.parts[k].x[0], + c->hydro.parts[k].x[1], c->hydro.parts[k].x[2]); printf("\n\n"); } /* else @@ -102,11 +102,11 @@ void map_check(struct part *p, struct cell *c, void *data) { void map_cellcheck(struct cell *c, void *data) { int *count = (int *)data; - atomic_add(count, c->count); + atomic_add(count, c->hydro.count); /* Loop over all parts and check if they are in the cell. */ - for (int k = 0; k < c->count; k++) { - struct part *p = &c->parts[k]; + for (int k = 0; k < c->hydro.count; k++) { + struct part *p = &c->hydro.parts[k]; if (p->x[0] < c->loc[0] || p->x[1] < c->loc[1] || p->x[2] < c->loc[2] || p->x[0] > c->loc[0] + c->width[0] || p->x[1] > c->loc[1] + c->width[1] || @@ -122,8 +122,8 @@ void map_cellcheck(struct cell *c, void *data) { } /* Loop over all gparts and check if they are in the cell. */ - for (int k = 0; k < c->gcount; k++) { - struct gpart *p = &c->gparts[k]; + for (int k = 0; k < c->grav.count; k++) { + struct gpart *p = &c->grav.parts[k]; if (p->x[0] < c->loc[0] || p->x[1] < c->loc[1] || p->x[2] < c->loc[2] || p->x[0] > c->loc[0] + c->width[0] || p->x[1] > c->loc[1] + c->width[1] || @@ -191,6 +191,13 @@ void map_h_max(struct part *p, struct cell *c, void *data) { if (p->h > (*p2)->h) *p2 = p; } +void map_stars_h_max(struct spart *p, struct cell *c, void *data) { + + struct spart **p2 = (struct spart **)data; + + if (p->h > (*p2)->h) *p2 = p; +} + /** * @brief Mapping function for neighbour count. */ diff --git a/src/map.h b/src/map.h index 950a5fd96ebdc7177b41912b1565163f33de8701..6ad05e30df0644e1ee37b1b912bc11681ccf837c 100644 --- a/src/map.h +++ b/src/map.h @@ -34,6 +34,7 @@ void map_wcount_min(struct part *p, struct cell *c, void *data); void map_wcount_max(struct part *p, struct cell *c, void *data); void map_h_min(struct part *p, struct cell *c, void *data); void map_h_max(struct part *p, struct cell *c, void *data); +void map_stars_h_max(struct spart *p, struct cell *c, void *data); void map_icount(struct part *p, struct cell *c, void *data); void map_dump(struct part *p, struct cell *c, void *data); diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c index 2359b8a9cdf785bce719a1d0379d177d00328b9e..e7005b083c94e20f5218923e443f71464ab383e1 100644 --- a/src/mesh_gravity.c +++ b/src/mesh_gravity.c @@ -110,14 +110,22 @@ __attribute__((always_inline)) INLINE static void CIC_set( double dx, double dy, double dz, double value) { /* Classic CIC interpolation */ - mesh[row_major_id_periodic(i + 0, j + 0, k + 0, N)] += value * tx * ty * tz; - mesh[row_major_id_periodic(i + 0, j + 0, k + 1, N)] += value * tx * ty * dz; - mesh[row_major_id_periodic(i + 0, j + 1, k + 0, N)] += value * tx * dy * tz; - mesh[row_major_id_periodic(i + 0, j + 1, k + 1, N)] += value * tx * dy * dz; - mesh[row_major_id_periodic(i + 1, j + 0, k + 0, N)] += value * dx * ty * tz; - mesh[row_major_id_periodic(i + 1, j + 0, k + 1, N)] += value * dx * ty * dz; - mesh[row_major_id_periodic(i + 1, j + 1, k + 0, N)] += value * dx * dy * tz; - mesh[row_major_id_periodic(i + 1, j + 1, k + 1, N)] += value * dx * dy * dz; + atomic_add_d(&mesh[row_major_id_periodic(i + 0, j + 0, k + 0, N)], + value * tx * ty * tz); + atomic_add_d(&mesh[row_major_id_periodic(i + 0, j + 0, k + 1, N)], + value * tx * ty * dz); + atomic_add_d(&mesh[row_major_id_periodic(i + 0, j + 1, k + 0, N)], + value * tx * dy * tz); + atomic_add_d(&mesh[row_major_id_periodic(i + 0, j + 1, k + 1, N)], + value * tx * dy * dz); + atomic_add_d(&mesh[row_major_id_periodic(i + 1, j + 0, k + 0, N)], + value * dx * ty * tz); + atomic_add_d(&mesh[row_major_id_periodic(i + 1, j + 0, k + 1, N)], + value * dx * ty * dz); + atomic_add_d(&mesh[row_major_id_periodic(i + 1, j + 1, k + 0, N)], + value * dx * dy * tz); + atomic_add_d(&mesh[row_major_id_periodic(i + 1, j + 1, k + 1, N)], + value * dx * dy * dz); } /** @@ -165,6 +173,74 @@ INLINE static void gpart_to_mesh_CIC(const struct gpart* gp, double* rho, int N, CIC_set(rho, N, i, j, k, tx, ty, tz, dx, dy, dz, mass); } +/** + * @brief Assigns all the #gpart of a #cell to a density mesh using the CIC + * method. + * + * @param c The #cell. + * @param rho The density mesh. + * @param N the size of the mesh along one axis. + * @param fac The width of a mesh cell. + * @param dim The dimensions of the simulation box. + */ +void cell_gpart_to_mesh_CIC(const struct cell* c, double* rho, int N, + double fac, const double dim[3]) { + const int gcount = c->grav.count; + const struct gpart* gparts = c->grav.parts; + + /* Assign all the gpart of that cell to the mesh */ + for (int i = 0; i < gcount; ++i) + gpart_to_mesh_CIC(&gparts[i], rho, N, fac, dim); +} + +/** + * @brief Shared information about the mesh to be used by all the threads in the + * pool. + */ +struct cic_mapper_data { + const struct cell* cells; + double* rho; + int N; + double fac; + double dim[3]; +}; + +/** + * @brief Threadpool mapper function for the mesh CIC assignment of a cell. + * + * @param map_data A chunk of the list of local cells. + * @param num The number of cells in the chunk. + * @param extra The information about the mesh and cells. + */ +void cell_gpart_to_mesh_CIC_mapper(void* map_data, int num, void* extra) { + + /* Unpack the shared information */ + const struct cic_mapper_data* data = (struct cic_mapper_data*)extra; + const struct cell* cells = data->cells; + double* rho = data->rho; + const int N = data->N; + const double fac = data->fac; + const double dim[3] = {data->dim[0], data->dim[1], data->dim[2]}; + + /* Pointer to the chunk to be processed */ + int* local_cells = (int*)map_data; + + // MATTHIEU: This could in principle be improved by creating a local mesh + // with just the extent required for the cell. Assignment can + // then be done without atomics. That local mesh is then added + // atomically to the global one. + + /* Loop over the elements assigned to this thread */ + for (int i = 0; i < num; ++i) { + + /* Pointer to local cell */ + const struct cell* c = &cells[local_cells[i]]; + + /* Assign this cell's content to the mesh */ + cell_gpart_to_mesh_CIC(c, rho, N, fac, dim); + } +} + /** * @brief Computes the potential on a gpart from a given mesh using the CIC * method. @@ -279,18 +355,24 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, int N, double fac, * * @param mesh The #pm_mesh used to store the potential. * @param s The #space containing the particles. + * @param tp The #threadpool object used for parallelisation. * @param verbose Are we talkative? */ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, - int verbose) { + struct threadpool* tp, int verbose) { #ifdef HAVE_FFTW const double r_s = mesh->r_s; const double box_size = s->dim[0]; const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int* local_cells = s->local_cells_top; + const int nr_local_cells = s->nr_local_cells; if (r_s <= 0.) error("Invalid value of a_smooth"); + if (mesh->dim[0] != dim[0] || mesh->dim[1] != dim[1] || + mesh->dim[2] != dim[2]) + error("Domain size does not match the value stored in the space."); /* Some useful constants */ const int N = mesh->N; @@ -314,30 +396,61 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, fftw_plan inverse_plan = fftw_plan_dft_c2r_3d( N, N, N, frho, rho, FFTW_ESTIMATE | FFTW_DESTROY_INPUT); - const ticks tic = getticks(); + ticks tic = getticks(); /* Zero everything */ bzero(rho, N * N * N * sizeof(double)); - /* Do a CIC mesh assignment of the gparts */ - for (size_t i = 0; i < s->nr_gparts; ++i) - gpart_to_mesh_CIC(&s->gparts[i], rho, N, cell_fac, dim); + /* Gather the mesh shared information to be used by the threads */ + struct cic_mapper_data data; + data.cells = s->cells_top; + data.rho = rho; + data.N = N; + data.fac = cell_fac; + data.dim[0] = dim[0]; + data.dim[1] = dim[1]; + data.dim[2] = dim[2]; + + /* Do a parallel CIC mesh assignment of the gparts but only using + the local top-level cells */ + threadpool_map(tp, cell_gpart_to_mesh_CIC_mapper, (void*)local_cells, + nr_local_cells, sizeof(int), 0, (void*)&data); if (verbose) - message("gpart assignment took %.3f %s.", + message("Gpart assignment took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); +#ifdef WITH_MPI + + MPI_Barrier(MPI_COMM_WORLD); + tic = getticks(); + + /* Merge everybody's share of the density mesh */ + MPI_Allreduce(MPI_IN_PLACE, rho, N * N * N, MPI_DOUBLE, MPI_SUM, + MPI_COMM_WORLD); + + if (verbose) + message("Mesh comunication took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); +#endif + /* message("\n\n\n DENSITY"); */ /* print_array(rho, N); */ - const ticks tic2 = getticks(); + tic = getticks(); /* Fourier transform to go to magic-land */ fftw_execute(forward_plan); + if (verbose) + message("Forward Fourier transform took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + /* frho now contains the Fourier transform of the density field */ /* frho contains NxNx(N/2+1) complex numbers */ + tic = getticks(); + /* Some common factors */ const double green_fac = -1. / (M_PI * box_size); const double a_smooth2 = 4. * M_PI * M_PI * r_s * r_s / (box_size * box_size); @@ -399,18 +512,25 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, frho[0][0] = 0.; frho[0][1] = 0.; + if (verbose) + message("Applying Green function took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + tic = getticks(); + /* Fourier transform to come back from magic-land */ fftw_execute(inverse_plan); + if (verbose) + message("Backwards Fourier transform took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + /* rho now contains the potential */ /* This array is now again NxNxN real numbers */ + /* Let's store it in the structure */ mesh->potential = rho; - if (verbose) - message("Fourier-space PM took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - /* message("\n\n\n POTENTIAL"); */ /* print_array(potential, N); */ @@ -450,8 +570,20 @@ void pm_mesh_interpolate_forces(const struct pm_mesh* mesh, for (int i = 0; i < gcount; ++i) { struct gpart* gp = &gparts[i]; - if (gpart_is_active(gp, e)) + if (gpart_is_active(gp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gp->ti_drift != e->ti_current) + error("gpart not drifted to current time"); + + /* Check that the particle was initialised */ + if (gp->initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + mesh_to_gparts_CIC(gp, potential, N, cell_fac, dim); + } } #else error("No FFTW library found. Cannot compute periodic long-range forces."); @@ -464,18 +596,20 @@ void pm_mesh_interpolate_forces(const struct pm_mesh* mesh, * @param mesh The #pm_mesh to initialise. * @param props The propoerties of the gravity scheme. * @param dim The (comoving) side-lengths of the simulation volume. + * @param nr_threads The number of threads on this MPI rank. */ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props, - double dim[3]) { + double dim[3], int nr_threads) { #ifdef HAVE_FFTW - if (dim[0] != dim[1] || dim[0] != dim[1]) + if (dim[0] != dim[1] || dim[0] != dim[2]) error("Doing mesh-gravity on a non-cubic domain"); const int N = props->mesh_size; const double box_size = dim[0]; + mesh->nr_threads = nr_threads; mesh->periodic = 1; mesh->N = N; mesh->dim[0] = dim[0]; @@ -487,6 +621,17 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props, mesh->r_cut_max = mesh->r_s * props->r_cut_max_ratio; mesh->r_cut_min = mesh->r_s * props->r_cut_min_ratio; + if (2. * mesh->r_cut_max > box_size) + error("Mesh too small or r_cut_max too big for this box size"); + +#ifdef HAVE_THREADED_FFTW + /* Initialise the thread-parallel FFTW version */ + if (N >= 64) { + fftw_init_threads(); + fftw_plan_with_nthreads(nr_threads); + } +#endif + /* Allocate the memory for the combined density and potential array */ mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N); if (mesh->potential == NULL) @@ -525,6 +670,10 @@ void pm_mesh_init_no_mesh(struct pm_mesh* mesh, double dim[3]) { */ void pm_mesh_clean(struct pm_mesh* mesh) { +#ifdef HAVE_THREADED_FFTW + fftw_cleanup_threads(); +#endif + if (mesh->potential) free(mesh->potential); mesh->potential = 0; } @@ -551,15 +700,26 @@ void pm_mesh_struct_restore(struct pm_mesh* mesh, FILE* stream) { restart_read_blocks((void*)mesh, sizeof(struct pm_mesh), 1, stream, NULL, "gravity props"); + + if (mesh->periodic) { + #ifdef HAVE_FFTW - const int N = mesh->N; + const int N = mesh->N; - /* Allocate the memory for the combined density and potential array */ - mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N); - if (mesh->potential == NULL) - error("Error allocating memory for the long-range gravity mesh."); +#ifdef HAVE_THREADED_FFTW + /* Initialise the thread-parallel FFTW version */ + if (N >= 64) { + fftw_init_threads(); + fftw_plan_with_nthreads(mesh->nr_threads); + } +#endif + /* Allocate the memory for the combined density and potential array */ + mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N); + if (mesh->potential == NULL) + error("Error allocating memory for the long-range gravity mesh."); #else - error("No FFTW library found. Cannot compute periodic long-range forces."); + error("No FFTW library found. Cannot compute periodic long-range forces."); #endif + } } diff --git a/src/mesh_gravity.h b/src/mesh_gravity.h index c512a53ca349816caf4c666c6f504dd4b717bcb7..1b2d997398ee6f3f665340cedb790c241e641cfa 100644 --- a/src/mesh_gravity.h +++ b/src/mesh_gravity.h @@ -29,6 +29,7 @@ /* Forward declarations */ struct space; struct gpart; +struct threadpool; /** * @brief Data structure for the long-range periodic forces using a mesh @@ -38,6 +39,9 @@ struct pm_mesh { /*! Is the calculation using periodic BCs? */ int periodic; + /*! The number of threads used by the FFTW library */ + int nr_threads; + /*! Side-length of the mesh */ int N; @@ -64,10 +68,10 @@ struct pm_mesh { }; void pm_mesh_init(struct pm_mesh *mesh, const struct gravity_props *props, - double dim[3]); + double dim[3], int nr_threads); void pm_mesh_init_no_mesh(struct pm_mesh *mesh, double dim[3]); void pm_mesh_compute_potential(struct pm_mesh *mesh, const struct space *s, - int verbose); + struct threadpool *tp, int verbose); void pm_mesh_interpolate_forces(const struct pm_mesh *mesh, const struct engine *e, struct gpart *gparts, int gcount); diff --git a/src/minmax.h b/src/minmax.h index 90dd87968a94d9601a87fd3b826000c166a98966..e4d7c8788ea1e43d1c296a212193049a94347949 100644 --- a/src/minmax.h +++ b/src/minmax.h @@ -71,4 +71,36 @@ max(_temp, _z); \ }) +/** + * @brief Minimum of four numbers + * + * This macro evaluates its arguments exactly once. + */ +#define min4(x, y, z, w) \ + ({ \ + const __typeof__(x) _x = (x); \ + const __typeof__(y) _y = (y); \ + const __typeof__(z) _z = (z); \ + const __typeof__(w) _w = (w); \ + const __typeof__(x) _temp1 = min(_x, _y); \ + const __typeof__(x) _temp2 = min(_z, _w); \ + min(_temp1, _temp2); \ + }) + +/** + * @brief Maximum of four numbers + * + * This macro evaluates its arguments exactly once. + */ +#define max4(x, y, z, w) \ + ({ \ + const __typeof__(x) _x = (x); \ + const __typeof__(y) _y = (y); \ + const __typeof__(z) _z = (z); \ + const __typeof__(w) _w = (w); \ + const __typeof__(x) _temp1 = max(_x, _y); \ + const __typeof__(x) _temp2 = max(_z, _w); \ + max(_temp1, _temp2); \ + }) + #endif /* SWIFT_MINMAX_H */ diff --git a/src/multipole.c b/src/multipole.c index bd5c6d6546fa0546108dcd53d7fe4060293c37a7..a77e6fce297802fb4118b7ac3d4c6a9bf4ecfd22 100644 --- a/src/multipole.c +++ b/src/multipole.c @@ -20,3 +20,70 @@ /* Config parameters. */ #include "../config.h" + +/* This object's header. */ +#include "multipole.h" + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +#ifdef WITH_MPI + +/* MPI data type for the multipole transfer and reduction */ +MPI_Datatype multipole_mpi_type; +MPI_Op multipole_mpi_reduce_op; + +/** + * @brief Apply a bit-by-bit XOR operattion on #gravity_tensors (i.e. does + * a^=b). + * + * @param a The #gravity_tensors to add to. + * @param b The #gravity_tensors to add. + */ +void gravity_binary_xor(struct gravity_tensors *a, + const struct gravity_tensors *b) { + + char *aa = (char *)a; + const char *bb = (const char *)b; + + for (size_t i = 0; i < sizeof(struct gravity_tensors); ++i) { + aa[i] ^= bb[i]; + } +} + +/** + * @brief MPI reduction function for the #gravity_tensors. + * + * @param invec Array of #gravity_tensors to read. + * @param inoutvec Array of #gravity_tensors to read and do the reduction into. + * @param len The length of the array. + * @param datatype The MPI type this function acts upon (unused). + */ +void gravity_tensors_mpi_reduce(void *invec, void *inoutvec, int *len, + MPI_Datatype *datatype) { + + for (int i = 0; i < *len; ++i) { + gravity_binary_xor(&((struct gravity_tensors *)inoutvec)[i], + &((const struct gravity_tensors *)invec)[i]); + } +} + +void multipole_create_mpi_types(void) { + + /* Create the datatype for multipoles */ + /* We just consider each structure to be a byte field disregarding their */ + /* detailed content */ + if (MPI_Type_contiguous( + sizeof(struct gravity_tensors) / sizeof(unsigned char), MPI_BYTE, + &multipole_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&multipole_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for multipole."); + } + + /* And the reduction operator */ + MPI_Op_create(gravity_tensors_mpi_reduce, 1, &multipole_mpi_reduce_op); +} + +#endif diff --git a/src/multipole.h b/src/multipole.h index e0e6da32a2950d7fce164b2abc422302b7c7de5e..8139dc0548bb94b108d6e32da4b19808998f48d3 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -186,12 +186,12 @@ struct gravity_tensors { /*! The actual content */ struct { - /*! Multipole mass */ - struct multipole m_pole; - /*! Field tensor for the potential */ struct grav_tensor pot; + /*! Multipole mass */ + struct multipole m_pole; + /*! Centre of mass of the matter dsitribution */ double CoM[3]; @@ -207,6 +207,13 @@ struct gravity_tensors { }; } SWIFT_STRUCT_ALIGN; +#ifdef WITH_MPI +/* MPI datatypes for transfers */ +extern MPI_Datatype multipole_mpi_type; +extern MPI_Op multipole_mpi_reduce_op; +void multipole_create_mpi_types(void); +#endif + /** * @brief Reset the data of a #multipole. * @@ -240,17 +247,17 @@ INLINE static void gravity_drift(struct gravity_tensors *m, double dt) { m->CoM[2] += dz; #ifdef SWIFT_DEBUG_CHECKS - if (m->m_pole.vel[0] > m->m_pole.max_delta_vel[0]) + if (m->m_pole.vel[0] > m->m_pole.max_delta_vel[0] * 1.1) error("Invalid maximal velocity"); - if (m->m_pole.vel[0] < m->m_pole.min_delta_vel[0]) + if (m->m_pole.vel[0] < m->m_pole.min_delta_vel[0] * 1.1) error("Invalid minimal velocity"); - if (m->m_pole.vel[1] > m->m_pole.max_delta_vel[1]) + if (m->m_pole.vel[1] > m->m_pole.max_delta_vel[1] * 1.1) error("Invalid maximal velocity"); - if (m->m_pole.vel[1] < m->m_pole.min_delta_vel[1]) + if (m->m_pole.vel[1] < m->m_pole.min_delta_vel[1] * 1.1) error("Invalid minimal velocity"); - if (m->m_pole.vel[2] > m->m_pole.max_delta_vel[2]) + if (m->m_pole.vel[2] > m->m_pole.max_delta_vel[2] * 1.1) error("Invalid maximal velocity"); - if (m->m_pole.vel[2] < m->m_pole.min_delta_vel[2]) + if (m->m_pole.vel[2] < m->m_pole.min_delta_vel[2] * 1.1) error("Invalid minimal velocity"); #endif @@ -293,8 +300,8 @@ INLINE static void gravity_field_tensors_init(struct grav_tensor *l, * @param la The gravity tensors to add to. * @param lb The gravity tensors to add. */ -INLINE static void gravity_field_tensors_add(struct grav_tensor *la, - const struct grav_tensor *lb) { +INLINE static void gravity_field_tensors_add( + struct grav_tensor *restrict la, const struct grav_tensor *restrict lb) { #ifdef SWIFT_DEBUG_CHECKS if (lb->num_interacted == 0) error("Adding tensors that did not interact"); la->num_interacted += lb->num_interacted; @@ -502,8 +509,8 @@ INLINE static void gravity_multipole_print(const struct multipole *m) { * @param ma The multipole to add to. * @param mb The multipole to add. */ -INLINE static void gravity_multipole_add(struct multipole *ma, - const struct multipole *mb) { +INLINE static void gravity_multipole_add(struct multipole *restrict ma, + const struct multipole *restrict mb) { /* Add 0th order term */ ma->M_000 += mb->M_000; @@ -1028,6 +1035,11 @@ INLINE static void gravity_P2M(struct gravity_tensors *multi, for (int k = 0; k < gcount; k++) { const double m = gparts[k].mass; +#ifdef SWIFT_DEBUG_CHECKS + if (gparts[k].time_bin == time_bin_inhibited) + error("Inhibited particle in P2M. Should have been removed earlier."); +#endif + mass += m; com[0] += gparts[k].x[0] * m; com[1] += gparts[k].x[1] * m; @@ -1037,6 +1049,13 @@ INLINE static void gravity_P2M(struct gravity_tensors *multi, vel[2] += gparts[k].v_full[2] * m; } +#ifdef PLANETARY_SPH + /* Prevent FPE from zero mass with the temporary outside-the-box particles */ + if (mass == 0.f) { + mass = FLT_MIN; + } +#endif // PLANETARY_SPH + /* Final operation on CoM */ const double imass = 1.0 / mass; com[0] *= imass; @@ -1300,8 +1319,8 @@ INLINE static void gravity_P2M(struct gravity_tensors *multi, * @param pos_a The position to which m_b will be shifted. * @param pos_b The current postion of the multipole to shift. */ -INLINE static void gravity_M2M(struct multipole *m_a, - const struct multipole *m_b, +INLINE static void gravity_M2M(struct multipole *restrict m_a, + const struct multipole *restrict m_b, const double pos_a[3], const double pos_b[3]) { /* Shift 0th order term */ @@ -1551,43 +1570,11 @@ INLINE static void gravity_M2M(struct multipole *m_a, * * @param l_b The field tensor to compute. * @param m_a The multipole creating the field. - * @param pos_b The position of the field tensor. - * @param pos_a The position of the multipole. - * @param props The #gravity_props of this calculation. - * @param periodic Is the calculation periodic ? - * @param dim The size of the simulation box. - * @param rs_inv The inverse of the gravity mesh-smoothing scale. + * @param pot The derivatives of the potential. */ -INLINE static void gravity_M2L(struct grav_tensor *l_b, - const struct multipole *m_a, - const double pos_b[3], const double pos_a[3], - const struct gravity_props *props, int periodic, - const double dim[3], float rs_inv) { - - /* Recover some constants */ - const float eps = props->epsilon_cur; - const float eps_inv = props->epsilon_cur_inv; - - /* Compute distance vector */ - float dx = (float)(pos_b[0] - pos_a[0]); - float dy = (float)(pos_b[1] - pos_a[1]); - float dz = (float)(pos_b[2] - pos_a[2]); - - /* Apply BC */ - if (periodic) { - dx = nearest(dx, dim[0]); - dy = nearest(dy, dim[1]); - dz = nearest(dz, dim[2]); - } - - /* Compute distance */ - const float r2 = dx * dx + dy * dy + dz * dz; - const float r_inv = 1. / sqrtf(r2); - - /* Compute all derivatives */ - struct potential_derivatives_M2L pot; - compute_potential_derivatives_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv, - periodic, rs_inv, &pot); +INLINE static void gravity_M2L_apply( + struct grav_tensor *restrict l_b, const struct multipole *restrict m_a, + const struct potential_derivatives_M2L *pot) { #ifdef SWIFT_DEBUG_CHECKS /* Count interactions */ @@ -1597,330 +1584,368 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, /* Record that this tensor has received contributions */ l_b->interacted = 1; + const float M_000 = m_a->M_000; + const float D_000 = pot->D_000; + /* 0th order term */ - l_b->F_000 += m_a->M_000 * pot.D_000; + l_b->F_000 += M_000 * D_000; #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + + /* The dipole term is zero when using the CoM */ + /* The compiler will optimize out the terms in the equations */ + /* below. We keep them written to maintain the logical structure. */ + const float M_100 = 0.f; + const float M_010 = 0.f; + const float M_001 = 0.f; + + const float D_100 = pot->D_100; + const float D_010 = pot->D_010; + const float D_001 = pot->D_001; + /* 1st order multipole term (addition to rank 0)*/ - l_b->F_000 += - m_a->M_100 * pot.D_100 + m_a->M_010 * pot.D_010 + m_a->M_001 * pot.D_001; + l_b->F_000 += M_100 * D_100 + M_010 * D_010 + M_001 * D_001; /* 1st order multipole term (addition to rank 1)*/ - l_b->F_100 += m_a->M_000 * pot.D_100; - l_b->F_010 += m_a->M_000 * pot.D_010; - l_b->F_001 += m_a->M_000 * pot.D_001; + l_b->F_100 += M_000 * D_100; + l_b->F_010 += M_000 * D_010; + l_b->F_001 += M_000 * D_001; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + const float M_200 = m_a->M_200; + const float M_020 = m_a->M_020; + const float M_002 = m_a->M_002; + const float M_110 = m_a->M_110; + const float M_101 = m_a->M_101; + const float M_011 = m_a->M_011; + + const float D_200 = pot->D_200; + const float D_020 = pot->D_020; + const float D_002 = pot->D_002; + const float D_110 = pot->D_110; + const float D_101 = pot->D_101; + const float D_011 = pot->D_011; + /* 2nd order multipole term (addition to rank 0)*/ - l_b->F_000 += - m_a->M_200 * pot.D_200 + m_a->M_020 * pot.D_020 + m_a->M_002 * pot.D_002; - l_b->F_000 += - m_a->M_110 * pot.D_110 + m_a->M_101 * pot.D_101 + m_a->M_011 * pot.D_011; + l_b->F_000 += M_200 * D_200 + M_020 * D_020 + M_002 * D_002; + l_b->F_000 += M_110 * D_110 + M_101 * D_101 + M_011 * D_011; /* 2nd order multipole term (addition to rank 1)*/ - l_b->F_100 += - m_a->M_100 * pot.D_200 + m_a->M_010 * pot.D_110 + m_a->M_001 * pot.D_101; - l_b->F_010 += - m_a->M_100 * pot.D_110 + m_a->M_010 * pot.D_020 + m_a->M_001 * pot.D_011; - l_b->F_001 += - m_a->M_100 * pot.D_101 + m_a->M_010 * pot.D_011 + m_a->M_001 * pot.D_002; + l_b->F_100 += M_100 * D_200 + M_010 * D_110 + M_001 * D_101; + l_b->F_010 += M_100 * D_110 + M_010 * D_020 + M_001 * D_011; + l_b->F_001 += M_100 * D_101 + M_010 * D_011 + M_001 * D_002; /* 2nd order multipole term (addition to rank 2)*/ - l_b->F_200 += m_a->M_000 * pot.D_200; - l_b->F_020 += m_a->M_000 * pot.D_020; - l_b->F_002 += m_a->M_000 * pot.D_002; - l_b->F_110 += m_a->M_000 * pot.D_110; - l_b->F_101 += m_a->M_000 * pot.D_101; - l_b->F_011 += m_a->M_000 * pot.D_011; + l_b->F_200 += M_000 * D_200; + l_b->F_020 += M_000 * D_020; + l_b->F_002 += M_000 * D_002; + l_b->F_110 += M_000 * D_110; + l_b->F_101 += M_000 * D_101; + l_b->F_011 += M_000 * D_011; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + const float M_300 = m_a->M_300; + const float M_030 = m_a->M_030; + const float M_003 = m_a->M_003; + const float M_210 = m_a->M_210; + const float M_201 = m_a->M_201; + const float M_021 = m_a->M_021; + const float M_120 = m_a->M_120; + const float M_012 = m_a->M_012; + const float M_102 = m_a->M_102; + const float M_111 = m_a->M_111; + + const float D_300 = pot->D_300; + const float D_030 = pot->D_030; + const float D_003 = pot->D_003; + const float D_210 = pot->D_210; + const float D_201 = pot->D_201; + const float D_021 = pot->D_021; + const float D_120 = pot->D_120; + const float D_012 = pot->D_012; + const float D_102 = pot->D_102; + const float D_111 = pot->D_111; + /* 3rd order multipole term (addition to rank 0)*/ - l_b->F_000 += - m_a->M_300 * pot.D_300 + m_a->M_030 * pot.D_030 + m_a->M_003 * pot.D_003; - l_b->F_000 += - m_a->M_210 * pot.D_210 + m_a->M_201 * pot.D_201 + m_a->M_120 * pot.D_120; - l_b->F_000 += - m_a->M_021 * pot.D_021 + m_a->M_102 * pot.D_102 + m_a->M_012 * pot.D_012; - l_b->F_000 += m_a->M_111 * pot.D_111; + l_b->F_000 += M_300 * D_300 + M_030 * D_030 + M_003 * D_003; + l_b->F_000 += M_210 * D_210 + M_201 * D_201 + M_120 * D_120; + l_b->F_000 += M_021 * D_021 + M_102 * D_102 + M_012 * D_012; + l_b->F_000 += M_111 * D_111; /* 3rd order multipole term (addition to rank 1)*/ - l_b->F_100 += - m_a->M_200 * pot.D_300 + m_a->M_020 * pot.D_120 + m_a->M_002 * pot.D_102; - l_b->F_100 += - m_a->M_110 * pot.D_210 + m_a->M_101 * pot.D_201 + m_a->M_011 * pot.D_111; - l_b->F_010 += - m_a->M_200 * pot.D_210 + m_a->M_020 * pot.D_030 + m_a->M_002 * pot.D_012; - l_b->F_010 += - m_a->M_110 * pot.D_120 + m_a->M_101 * pot.D_111 + m_a->M_011 * pot.D_021; - l_b->F_001 += - m_a->M_200 * pot.D_201 + m_a->M_020 * pot.D_021 + m_a->M_002 * pot.D_003; - l_b->F_001 += - m_a->M_110 * pot.D_111 + m_a->M_101 * pot.D_102 + m_a->M_011 * pot.D_012; + l_b->F_100 += M_200 * D_300 + M_020 * D_120 + M_002 * D_102; + l_b->F_100 += M_110 * D_210 + M_101 * D_201 + M_011 * D_111; + l_b->F_010 += M_200 * D_210 + M_020 * D_030 + M_002 * D_012; + l_b->F_010 += M_110 * D_120 + M_101 * D_111 + M_011 * D_021; + l_b->F_001 += M_200 * D_201 + M_020 * D_021 + M_002 * D_003; + l_b->F_001 += M_110 * D_111 + M_101 * D_102 + M_011 * D_012; /* 3rd order multipole term (addition to rank 2)*/ - l_b->F_200 += - m_a->M_100 * pot.D_300 + m_a->M_010 * pot.D_210 + m_a->M_001 * pot.D_201; - l_b->F_020 += - m_a->M_100 * pot.D_120 + m_a->M_010 * pot.D_030 + m_a->M_001 * pot.D_021; - l_b->F_002 += - m_a->M_100 * pot.D_102 + m_a->M_010 * pot.D_012 + m_a->M_001 * pot.D_003; - l_b->F_110 += - m_a->M_100 * pot.D_210 + m_a->M_010 * pot.D_120 + m_a->M_001 * pot.D_111; - l_b->F_101 += - m_a->M_100 * pot.D_201 + m_a->M_010 * pot.D_111 + m_a->M_001 * pot.D_102; - l_b->F_011 += - m_a->M_100 * pot.D_111 + m_a->M_010 * pot.D_021 + m_a->M_001 * pot.D_012; + l_b->F_200 += M_100 * D_300 + M_010 * D_210 + M_001 * D_201; + l_b->F_020 += M_100 * D_120 + M_010 * D_030 + M_001 * D_021; + l_b->F_002 += M_100 * D_102 + M_010 * D_012 + M_001 * D_003; + l_b->F_110 += M_100 * D_210 + M_010 * D_120 + M_001 * D_111; + l_b->F_101 += M_100 * D_201 + M_010 * D_111 + M_001 * D_102; + l_b->F_011 += M_100 * D_111 + M_010 * D_021 + M_001 * D_012; /* 3rd order multipole term (addition to rank 3)*/ - l_b->F_300 += m_a->M_000 * pot.D_300; - l_b->F_030 += m_a->M_000 * pot.D_030; - l_b->F_003 += m_a->M_000 * pot.D_003; - l_b->F_210 += m_a->M_000 * pot.D_210; - l_b->F_201 += m_a->M_000 * pot.D_201; - l_b->F_120 += m_a->M_000 * pot.D_120; - l_b->F_021 += m_a->M_000 * pot.D_021; - l_b->F_102 += m_a->M_000 * pot.D_102; - l_b->F_012 += m_a->M_000 * pot.D_012; - l_b->F_111 += m_a->M_000 * pot.D_111; + l_b->F_300 += M_000 * D_300; + l_b->F_030 += M_000 * D_030; + l_b->F_003 += M_000 * D_003; + l_b->F_210 += M_000 * D_210; + l_b->F_201 += M_000 * D_201; + l_b->F_120 += M_000 * D_120; + l_b->F_021 += M_000 * D_021; + l_b->F_102 += M_000 * D_102; + l_b->F_012 += M_000 * D_012; + l_b->F_111 += M_000 * D_111; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + + const float M_400 = m_a->M_400; + const float M_040 = m_a->M_040; + const float M_004 = m_a->M_004; + const float M_310 = m_a->M_310; + const float M_301 = m_a->M_301; + const float M_031 = m_a->M_031; + const float M_130 = m_a->M_130; + const float M_013 = m_a->M_013; + const float M_103 = m_a->M_103; + const float M_220 = m_a->M_220; + const float M_202 = m_a->M_202; + const float M_022 = m_a->M_022; + const float M_211 = m_a->M_211; + const float M_121 = m_a->M_121; + const float M_112 = m_a->M_112; + + const float D_400 = pot->D_400; + const float D_040 = pot->D_040; + const float D_004 = pot->D_004; + const float D_310 = pot->D_310; + const float D_301 = pot->D_301; + const float D_031 = pot->D_031; + const float D_130 = pot->D_130; + const float D_013 = pot->D_013; + const float D_103 = pot->D_103; + const float D_220 = pot->D_220; + const float D_202 = pot->D_202; + const float D_022 = pot->D_022; + const float D_211 = pot->D_211; + const float D_121 = pot->D_121; + const float D_112 = pot->D_112; + /* Compute 4th order field tensor terms (addition to rank 0) */ - l_b->F_000 += - m_a->M_004 * pot.D_004 + m_a->M_013 * pot.D_013 + m_a->M_022 * pot.D_022 + - m_a->M_031 * pot.D_031 + m_a->M_040 * pot.D_040 + m_a->M_103 * pot.D_103 + - m_a->M_112 * pot.D_112 + m_a->M_121 * pot.D_121 + m_a->M_130 * pot.D_130 + - m_a->M_202 * pot.D_202 + m_a->M_211 * pot.D_211 + m_a->M_220 * pot.D_220 + - m_a->M_301 * pot.D_301 + m_a->M_310 * pot.D_310 + m_a->M_400 * pot.D_400; + l_b->F_000 += M_004 * D_004 + M_013 * D_013 + M_022 * D_022 + M_031 * D_031 + + M_040 * D_040 + M_103 * D_103 + M_112 * D_112 + M_121 * D_121 + + M_130 * D_130 + M_202 * D_202 + M_211 * D_211 + M_220 * D_220 + + M_301 * D_301 + M_310 * D_310 + M_400 * D_400; /* Compute 4th order field tensor terms (addition to rank 1) */ - l_b->F_001 += m_a->M_003 * pot.D_004 + m_a->M_012 * pot.D_013 + - m_a->M_021 * pot.D_022 + m_a->M_030 * pot.D_031 + - m_a->M_102 * pot.D_103 + m_a->M_111 * pot.D_112 + - m_a->M_120 * pot.D_121 + m_a->M_201 * pot.D_202 + - m_a->M_210 * pot.D_211 + m_a->M_300 * pot.D_301; - l_b->F_010 += m_a->M_003 * pot.D_013 + m_a->M_012 * pot.D_022 + - m_a->M_021 * pot.D_031 + m_a->M_030 * pot.D_040 + - m_a->M_102 * pot.D_112 + m_a->M_111 * pot.D_121 + - m_a->M_120 * pot.D_130 + m_a->M_201 * pot.D_211 + - m_a->M_210 * pot.D_220 + m_a->M_300 * pot.D_310; - l_b->F_100 += m_a->M_003 * pot.D_103 + m_a->M_012 * pot.D_112 + - m_a->M_021 * pot.D_121 + m_a->M_030 * pot.D_130 + - m_a->M_102 * pot.D_202 + m_a->M_111 * pot.D_211 + - m_a->M_120 * pot.D_220 + m_a->M_201 * pot.D_301 + - m_a->M_210 * pot.D_310 + m_a->M_300 * pot.D_400; + l_b->F_001 += M_003 * D_004 + M_012 * D_013 + M_021 * D_022 + M_030 * D_031 + + M_102 * D_103 + M_111 * D_112 + M_120 * D_121 + M_201 * D_202 + + M_210 * D_211 + M_300 * D_301; + l_b->F_010 += M_003 * D_013 + M_012 * D_022 + M_021 * D_031 + M_030 * D_040 + + M_102 * D_112 + M_111 * D_121 + M_120 * D_130 + M_201 * D_211 + + M_210 * D_220 + M_300 * D_310; + l_b->F_100 += M_003 * D_103 + M_012 * D_112 + M_021 * D_121 + M_030 * D_130 + + M_102 * D_202 + M_111 * D_211 + M_120 * D_220 + M_201 * D_301 + + M_210 * D_310 + M_300 * D_400; /* Compute 4th order field tensor terms (addition to rank 2) */ - l_b->F_002 += m_a->M_002 * pot.D_004 + m_a->M_011 * pot.D_013 + - m_a->M_020 * pot.D_022 + m_a->M_101 * pot.D_103 + - m_a->M_110 * pot.D_112 + m_a->M_200 * pot.D_202; - l_b->F_011 += m_a->M_002 * pot.D_013 + m_a->M_011 * pot.D_022 + - m_a->M_020 * pot.D_031 + m_a->M_101 * pot.D_112 + - m_a->M_110 * pot.D_121 + m_a->M_200 * pot.D_211; - l_b->F_020 += m_a->M_002 * pot.D_022 + m_a->M_011 * pot.D_031 + - m_a->M_020 * pot.D_040 + m_a->M_101 * pot.D_121 + - m_a->M_110 * pot.D_130 + m_a->M_200 * pot.D_220; - l_b->F_101 += m_a->M_002 * pot.D_103 + m_a->M_011 * pot.D_112 + - m_a->M_020 * pot.D_121 + m_a->M_101 * pot.D_202 + - m_a->M_110 * pot.D_211 + m_a->M_200 * pot.D_301; - l_b->F_110 += m_a->M_002 * pot.D_112 + m_a->M_011 * pot.D_121 + - m_a->M_020 * pot.D_130 + m_a->M_101 * pot.D_211 + - m_a->M_110 * pot.D_220 + m_a->M_200 * pot.D_310; - l_b->F_200 += m_a->M_002 * pot.D_202 + m_a->M_011 * pot.D_211 + - m_a->M_020 * pot.D_220 + m_a->M_101 * pot.D_301 + - m_a->M_110 * pot.D_310 + m_a->M_200 * pot.D_400; + l_b->F_002 += M_002 * D_004 + M_011 * D_013 + M_020 * D_022 + M_101 * D_103 + + M_110 * D_112 + M_200 * D_202; + l_b->F_011 += M_002 * D_013 + M_011 * D_022 + M_020 * D_031 + M_101 * D_112 + + M_110 * D_121 + M_200 * D_211; + l_b->F_020 += M_002 * D_022 + M_011 * D_031 + M_020 * D_040 + M_101 * D_121 + + M_110 * D_130 + M_200 * D_220; + l_b->F_101 += M_002 * D_103 + M_011 * D_112 + M_020 * D_121 + M_101 * D_202 + + M_110 * D_211 + M_200 * D_301; + l_b->F_110 += M_002 * D_112 + M_011 * D_121 + M_020 * D_130 + M_101 * D_211 + + M_110 * D_220 + M_200 * D_310; + l_b->F_200 += M_002 * D_202 + M_011 * D_211 + M_020 * D_220 + M_101 * D_301 + + M_110 * D_310 + M_200 * D_400; /* Compute 4th order field tensor terms (addition to rank 3) */ - l_b->F_003 += - m_a->M_001 * pot.D_004 + m_a->M_010 * pot.D_013 + m_a->M_100 * pot.D_103; - l_b->F_012 += - m_a->M_001 * pot.D_013 + m_a->M_010 * pot.D_022 + m_a->M_100 * pot.D_112; - l_b->F_021 += - m_a->M_001 * pot.D_022 + m_a->M_010 * pot.D_031 + m_a->M_100 * pot.D_121; - l_b->F_030 += - m_a->M_001 * pot.D_031 + m_a->M_010 * pot.D_040 + m_a->M_100 * pot.D_130; - l_b->F_102 += - m_a->M_001 * pot.D_103 + m_a->M_010 * pot.D_112 + m_a->M_100 * pot.D_202; - l_b->F_111 += - m_a->M_001 * pot.D_112 + m_a->M_010 * pot.D_121 + m_a->M_100 * pot.D_211; - l_b->F_120 += - m_a->M_001 * pot.D_121 + m_a->M_010 * pot.D_130 + m_a->M_100 * pot.D_220; - l_b->F_201 += - m_a->M_001 * pot.D_202 + m_a->M_010 * pot.D_211 + m_a->M_100 * pot.D_301; - l_b->F_210 += - m_a->M_001 * pot.D_211 + m_a->M_010 * pot.D_220 + m_a->M_100 * pot.D_310; - l_b->F_300 += - m_a->M_001 * pot.D_301 + m_a->M_010 * pot.D_310 + m_a->M_100 * pot.D_400; + l_b->F_003 += M_001 * D_004 + M_010 * D_013 + M_100 * D_103; + l_b->F_012 += M_001 * D_013 + M_010 * D_022 + M_100 * D_112; + l_b->F_021 += M_001 * D_022 + M_010 * D_031 + M_100 * D_121; + l_b->F_030 += M_001 * D_031 + M_010 * D_040 + M_100 * D_130; + l_b->F_102 += M_001 * D_103 + M_010 * D_112 + M_100 * D_202; + l_b->F_111 += M_001 * D_112 + M_010 * D_121 + M_100 * D_211; + l_b->F_120 += M_001 * D_121 + M_010 * D_130 + M_100 * D_220; + l_b->F_201 += M_001 * D_202 + M_010 * D_211 + M_100 * D_301; + l_b->F_210 += M_001 * D_211 + M_010 * D_220 + M_100 * D_310; + l_b->F_300 += M_001 * D_301 + M_010 * D_310 + M_100 * D_400; /* Compute 4th order field tensor terms (addition to rank 4) */ - l_b->F_004 += m_a->M_000 * pot.D_004; - l_b->F_013 += m_a->M_000 * pot.D_013; - l_b->F_022 += m_a->M_000 * pot.D_022; - l_b->F_031 += m_a->M_000 * pot.D_031; - l_b->F_040 += m_a->M_000 * pot.D_040; - l_b->F_103 += m_a->M_000 * pot.D_103; - l_b->F_112 += m_a->M_000 * pot.D_112; - l_b->F_121 += m_a->M_000 * pot.D_121; - l_b->F_130 += m_a->M_000 * pot.D_130; - l_b->F_202 += m_a->M_000 * pot.D_202; - l_b->F_211 += m_a->M_000 * pot.D_211; - l_b->F_220 += m_a->M_000 * pot.D_220; - l_b->F_301 += m_a->M_000 * pot.D_301; - l_b->F_310 += m_a->M_000 * pot.D_310; - l_b->F_400 += m_a->M_000 * pot.D_400; + l_b->F_004 += M_000 * D_004; + l_b->F_013 += M_000 * D_013; + l_b->F_022 += M_000 * D_022; + l_b->F_031 += M_000 * D_031; + l_b->F_040 += M_000 * D_040; + l_b->F_103 += M_000 * D_103; + l_b->F_112 += M_000 * D_112; + l_b->F_121 += M_000 * D_121; + l_b->F_130 += M_000 * D_130; + l_b->F_202 += M_000 * D_202; + l_b->F_211 += M_000 * D_211; + l_b->F_220 += M_000 * D_220; + l_b->F_301 += M_000 * D_301; + l_b->F_310 += M_000 * D_310; + l_b->F_400 += M_000 * D_400; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + const float M_500 = m_a->M_500; + const float M_050 = m_a->M_050; + const float M_005 = m_a->M_005; + const float M_410 = m_a->M_410; + const float M_401 = m_a->M_401; + const float M_041 = m_a->M_041; + const float M_140 = m_a->M_140; + const float M_014 = m_a->M_014; + const float M_104 = m_a->M_104; + const float M_320 = m_a->M_320; + const float M_302 = m_a->M_302; + const float M_230 = m_a->M_230; + const float M_032 = m_a->M_032; + const float M_203 = m_a->M_203; + const float M_023 = m_a->M_023; + const float M_122 = m_a->M_122; + const float M_212 = m_a->M_212; + const float M_221 = m_a->M_221; + const float M_311 = m_a->M_311; + const float M_131 = m_a->M_131; + const float M_113 = m_a->M_113; + + const float D_500 = pot->D_500; + const float D_050 = pot->D_050; + const float D_005 = pot->D_005; + const float D_410 = pot->D_410; + const float D_401 = pot->D_401; + const float D_041 = pot->D_041; + const float D_140 = pot->D_140; + const float D_014 = pot->D_014; + const float D_104 = pot->D_104; + const float D_320 = pot->D_320; + const float D_302 = pot->D_302; + const float D_230 = pot->D_230; + const float D_032 = pot->D_032; + const float D_203 = pot->D_203; + const float D_023 = pot->D_023; + const float D_122 = pot->D_122; + const float D_212 = pot->D_212; + const float D_221 = pot->D_221; + const float D_311 = pot->D_311; + const float D_131 = pot->D_131; + const float D_113 = pot->D_113; + /* Compute 5th order field tensor terms (addition to rank 0) */ - l_b->F_000 += - m_a->M_005 * pot.D_005 + m_a->M_014 * pot.D_014 + m_a->M_023 * pot.D_023 + - m_a->M_032 * pot.D_032 + m_a->M_041 * pot.D_041 + m_a->M_050 * pot.D_050 + - m_a->M_104 * pot.D_104 + m_a->M_113 * pot.D_113 + m_a->M_122 * pot.D_122 + - m_a->M_131 * pot.D_131 + m_a->M_140 * pot.D_140 + m_a->M_203 * pot.D_203 + - m_a->M_212 * pot.D_212 + m_a->M_221 * pot.D_221 + m_a->M_230 * pot.D_230 + - m_a->M_302 * pot.D_302 + m_a->M_311 * pot.D_311 + m_a->M_320 * pot.D_320 + - m_a->M_401 * pot.D_401 + m_a->M_410 * pot.D_410 + m_a->M_500 * pot.D_500; + l_b->F_000 += M_005 * D_005 + M_014 * D_014 + M_023 * D_023 + M_032 * D_032 + + M_041 * D_041 + M_050 * D_050 + M_104 * D_104 + M_113 * D_113 + + M_122 * D_122 + M_131 * D_131 + M_140 * D_140 + M_203 * D_203 + + M_212 * D_212 + M_221 * D_221 + M_230 * D_230 + M_302 * D_302 + + M_311 * D_311 + M_320 * D_320 + M_401 * D_401 + M_410 * D_410 + + M_500 * D_500; /* Compute 5th order field tensor terms (addition to rank 1) */ - l_b->F_001 += - m_a->M_004 * pot.D_005 + m_a->M_013 * pot.D_014 + m_a->M_022 * pot.D_023 + - m_a->M_031 * pot.D_032 + m_a->M_040 * pot.D_041 + m_a->M_103 * pot.D_104 + - m_a->M_112 * pot.D_113 + m_a->M_121 * pot.D_122 + m_a->M_130 * pot.D_131 + - m_a->M_202 * pot.D_203 + m_a->M_211 * pot.D_212 + m_a->M_220 * pot.D_221 + - m_a->M_301 * pot.D_302 + m_a->M_310 * pot.D_311 + m_a->M_400 * pot.D_401; - l_b->F_010 += - m_a->M_004 * pot.D_014 + m_a->M_013 * pot.D_023 + m_a->M_022 * pot.D_032 + - m_a->M_031 * pot.D_041 + m_a->M_040 * pot.D_050 + m_a->M_103 * pot.D_113 + - m_a->M_112 * pot.D_122 + m_a->M_121 * pot.D_131 + m_a->M_130 * pot.D_140 + - m_a->M_202 * pot.D_212 + m_a->M_211 * pot.D_221 + m_a->M_220 * pot.D_230 + - m_a->M_301 * pot.D_311 + m_a->M_310 * pot.D_320 + m_a->M_400 * pot.D_410; - l_b->F_100 += - m_a->M_004 * pot.D_104 + m_a->M_013 * pot.D_113 + m_a->M_022 * pot.D_122 + - m_a->M_031 * pot.D_131 + m_a->M_040 * pot.D_140 + m_a->M_103 * pot.D_203 + - m_a->M_112 * pot.D_212 + m_a->M_121 * pot.D_221 + m_a->M_130 * pot.D_230 + - m_a->M_202 * pot.D_302 + m_a->M_211 * pot.D_311 + m_a->M_220 * pot.D_320 + - m_a->M_301 * pot.D_401 + m_a->M_310 * pot.D_410 + m_a->M_400 * pot.D_500; + l_b->F_001 += M_004 * D_005 + M_013 * D_014 + M_022 * D_023 + M_031 * D_032 + + M_040 * D_041 + M_103 * D_104 + M_112 * D_113 + M_121 * D_122 + + M_130 * D_131 + M_202 * D_203 + M_211 * D_212 + M_220 * D_221 + + M_301 * D_302 + M_310 * D_311 + M_400 * D_401; + l_b->F_010 += M_004 * D_014 + M_013 * D_023 + M_022 * D_032 + M_031 * D_041 + + M_040 * D_050 + M_103 * D_113 + M_112 * D_122 + M_121 * D_131 + + M_130 * D_140 + M_202 * D_212 + M_211 * D_221 + M_220 * D_230 + + M_301 * D_311 + M_310 * D_320 + M_400 * D_410; + l_b->F_100 += M_004 * D_104 + M_013 * D_113 + M_022 * D_122 + M_031 * D_131 + + M_040 * D_140 + M_103 * D_203 + M_112 * D_212 + M_121 * D_221 + + M_130 * D_230 + M_202 * D_302 + M_211 * D_311 + M_220 * D_320 + + M_301 * D_401 + M_310 * D_410 + M_400 * D_500; /* Compute 5th order field tensor terms (addition to rank 2) */ - l_b->F_002 += m_a->M_003 * pot.D_005 + m_a->M_012 * pot.D_014 + - m_a->M_021 * pot.D_023 + m_a->M_030 * pot.D_032 + - m_a->M_102 * pot.D_104 + m_a->M_111 * pot.D_113 + - m_a->M_120 * pot.D_122 + m_a->M_201 * pot.D_203 + - m_a->M_210 * pot.D_212 + m_a->M_300 * pot.D_302; - l_b->F_011 += m_a->M_003 * pot.D_014 + m_a->M_012 * pot.D_023 + - m_a->M_021 * pot.D_032 + m_a->M_030 * pot.D_041 + - m_a->M_102 * pot.D_113 + m_a->M_111 * pot.D_122 + - m_a->M_120 * pot.D_131 + m_a->M_201 * pot.D_212 + - m_a->M_210 * pot.D_221 + m_a->M_300 * pot.D_311; - l_b->F_020 += m_a->M_003 * pot.D_023 + m_a->M_012 * pot.D_032 + - m_a->M_021 * pot.D_041 + m_a->M_030 * pot.D_050 + - m_a->M_102 * pot.D_122 + m_a->M_111 * pot.D_131 + - m_a->M_120 * pot.D_140 + m_a->M_201 * pot.D_221 + - m_a->M_210 * pot.D_230 + m_a->M_300 * pot.D_320; - l_b->F_101 += m_a->M_003 * pot.D_104 + m_a->M_012 * pot.D_113 + - m_a->M_021 * pot.D_122 + m_a->M_030 * pot.D_131 + - m_a->M_102 * pot.D_203 + m_a->M_111 * pot.D_212 + - m_a->M_120 * pot.D_221 + m_a->M_201 * pot.D_302 + - m_a->M_210 * pot.D_311 + m_a->M_300 * pot.D_401; - l_b->F_110 += m_a->M_003 * pot.D_113 + m_a->M_012 * pot.D_122 + - m_a->M_021 * pot.D_131 + m_a->M_030 * pot.D_140 + - m_a->M_102 * pot.D_212 + m_a->M_111 * pot.D_221 + - m_a->M_120 * pot.D_230 + m_a->M_201 * pot.D_311 + - m_a->M_210 * pot.D_320 + m_a->M_300 * pot.D_410; - l_b->F_200 += m_a->M_003 * pot.D_203 + m_a->M_012 * pot.D_212 + - m_a->M_021 * pot.D_221 + m_a->M_030 * pot.D_230 + - m_a->M_102 * pot.D_302 + m_a->M_111 * pot.D_311 + - m_a->M_120 * pot.D_320 + m_a->M_201 * pot.D_401 + - m_a->M_210 * pot.D_410 + m_a->M_300 * pot.D_500; + l_b->F_002 += M_003 * D_005 + M_012 * D_014 + M_021 * D_023 + M_030 * D_032 + + M_102 * D_104 + M_111 * D_113 + M_120 * D_122 + M_201 * D_203 + + M_210 * D_212 + M_300 * D_302; + l_b->F_011 += M_003 * D_014 + M_012 * D_023 + M_021 * D_032 + M_030 * D_041 + + M_102 * D_113 + M_111 * D_122 + M_120 * D_131 + M_201 * D_212 + + M_210 * D_221 + M_300 * D_311; + l_b->F_020 += M_003 * D_023 + M_012 * D_032 + M_021 * D_041 + M_030 * D_050 + + M_102 * D_122 + M_111 * D_131 + M_120 * D_140 + M_201 * D_221 + + M_210 * D_230 + M_300 * D_320; + l_b->F_101 += M_003 * D_104 + M_012 * D_113 + M_021 * D_122 + M_030 * D_131 + + M_102 * D_203 + M_111 * D_212 + M_120 * D_221 + M_201 * D_302 + + M_210 * D_311 + M_300 * D_401; + l_b->F_110 += M_003 * D_113 + M_012 * D_122 + M_021 * D_131 + M_030 * D_140 + + M_102 * D_212 + M_111 * D_221 + M_120 * D_230 + M_201 * D_311 + + M_210 * D_320 + M_300 * D_410; + l_b->F_200 += M_003 * D_203 + M_012 * D_212 + M_021 * D_221 + M_030 * D_230 + + M_102 * D_302 + M_111 * D_311 + M_120 * D_320 + M_201 * D_401 + + M_210 * D_410 + M_300 * D_500; /* Compute 5th order field tensor terms (addition to rank 3) */ - l_b->F_003 += m_a->M_002 * pot.D_005 + m_a->M_011 * pot.D_014 + - m_a->M_020 * pot.D_023 + m_a->M_101 * pot.D_104 + - m_a->M_110 * pot.D_113 + m_a->M_200 * pot.D_203; - l_b->F_012 += m_a->M_002 * pot.D_014 + m_a->M_011 * pot.D_023 + - m_a->M_020 * pot.D_032 + m_a->M_101 * pot.D_113 + - m_a->M_110 * pot.D_122 + m_a->M_200 * pot.D_212; - l_b->F_021 += m_a->M_002 * pot.D_023 + m_a->M_011 * pot.D_032 + - m_a->M_020 * pot.D_041 + m_a->M_101 * pot.D_122 + - m_a->M_110 * pot.D_131 + m_a->M_200 * pot.D_221; - l_b->F_030 += m_a->M_002 * pot.D_032 + m_a->M_011 * pot.D_041 + - m_a->M_020 * pot.D_050 + m_a->M_101 * pot.D_131 + - m_a->M_110 * pot.D_140 + m_a->M_200 * pot.D_230; - l_b->F_102 += m_a->M_002 * pot.D_104 + m_a->M_011 * pot.D_113 + - m_a->M_020 * pot.D_122 + m_a->M_101 * pot.D_203 + - m_a->M_110 * pot.D_212 + m_a->M_200 * pot.D_302; - l_b->F_111 += m_a->M_002 * pot.D_113 + m_a->M_011 * pot.D_122 + - m_a->M_020 * pot.D_131 + m_a->M_101 * pot.D_212 + - m_a->M_110 * pot.D_221 + m_a->M_200 * pot.D_311; - l_b->F_120 += m_a->M_002 * pot.D_122 + m_a->M_011 * pot.D_131 + - m_a->M_020 * pot.D_140 + m_a->M_101 * pot.D_221 + - m_a->M_110 * pot.D_230 + m_a->M_200 * pot.D_320; - l_b->F_201 += m_a->M_002 * pot.D_203 + m_a->M_011 * pot.D_212 + - m_a->M_020 * pot.D_221 + m_a->M_101 * pot.D_302 + - m_a->M_110 * pot.D_311 + m_a->M_200 * pot.D_401; - l_b->F_210 += m_a->M_002 * pot.D_212 + m_a->M_011 * pot.D_221 + - m_a->M_020 * pot.D_230 + m_a->M_101 * pot.D_311 + - m_a->M_110 * pot.D_320 + m_a->M_200 * pot.D_410; - l_b->F_300 += m_a->M_002 * pot.D_302 + m_a->M_011 * pot.D_311 + - m_a->M_020 * pot.D_320 + m_a->M_101 * pot.D_401 + - m_a->M_110 * pot.D_410 + m_a->M_200 * pot.D_500; + l_b->F_003 += M_002 * D_005 + M_011 * D_014 + M_020 * D_023 + M_101 * D_104 + + M_110 * D_113 + M_200 * D_203; + l_b->F_012 += M_002 * D_014 + M_011 * D_023 + M_020 * D_032 + M_101 * D_113 + + M_110 * D_122 + M_200 * D_212; + l_b->F_021 += M_002 * D_023 + M_011 * D_032 + M_020 * D_041 + M_101 * D_122 + + M_110 * D_131 + M_200 * D_221; + l_b->F_030 += M_002 * D_032 + M_011 * D_041 + M_020 * D_050 + M_101 * D_131 + + M_110 * D_140 + M_200 * D_230; + l_b->F_102 += M_002 * D_104 + M_011 * D_113 + M_020 * D_122 + M_101 * D_203 + + M_110 * D_212 + M_200 * D_302; + l_b->F_111 += M_002 * D_113 + M_011 * D_122 + M_020 * D_131 + M_101 * D_212 + + M_110 * D_221 + M_200 * D_311; + l_b->F_120 += M_002 * D_122 + M_011 * D_131 + M_020 * D_140 + M_101 * D_221 + + M_110 * D_230 + M_200 * D_320; + l_b->F_201 += M_002 * D_203 + M_011 * D_212 + M_020 * D_221 + M_101 * D_302 + + M_110 * D_311 + M_200 * D_401; + l_b->F_210 += M_002 * D_212 + M_011 * D_221 + M_020 * D_230 + M_101 * D_311 + + M_110 * D_320 + M_200 * D_410; + l_b->F_300 += M_002 * D_302 + M_011 * D_311 + M_020 * D_320 + M_101 * D_401 + + M_110 * D_410 + M_200 * D_500; /* Compute 5th order field tensor terms (addition to rank 4) */ - l_b->F_004 += - m_a->M_001 * pot.D_005 + m_a->M_010 * pot.D_014 + m_a->M_100 * pot.D_104; - l_b->F_013 += - m_a->M_001 * pot.D_014 + m_a->M_010 * pot.D_023 + m_a->M_100 * pot.D_113; - l_b->F_022 += - m_a->M_001 * pot.D_023 + m_a->M_010 * pot.D_032 + m_a->M_100 * pot.D_122; - l_b->F_031 += - m_a->M_001 * pot.D_032 + m_a->M_010 * pot.D_041 + m_a->M_100 * pot.D_131; - l_b->F_040 += - m_a->M_001 * pot.D_041 + m_a->M_010 * pot.D_050 + m_a->M_100 * pot.D_140; - l_b->F_103 += - m_a->M_001 * pot.D_104 + m_a->M_010 * pot.D_113 + m_a->M_100 * pot.D_203; - l_b->F_112 += - m_a->M_001 * pot.D_113 + m_a->M_010 * pot.D_122 + m_a->M_100 * pot.D_212; - l_b->F_121 += - m_a->M_001 * pot.D_122 + m_a->M_010 * pot.D_131 + m_a->M_100 * pot.D_221; - l_b->F_130 += - m_a->M_001 * pot.D_131 + m_a->M_010 * pot.D_140 + m_a->M_100 * pot.D_230; - l_b->F_202 += - m_a->M_001 * pot.D_203 + m_a->M_010 * pot.D_212 + m_a->M_100 * pot.D_302; - l_b->F_211 += - m_a->M_001 * pot.D_212 + m_a->M_010 * pot.D_221 + m_a->M_100 * pot.D_311; - l_b->F_220 += - m_a->M_001 * pot.D_221 + m_a->M_010 * pot.D_230 + m_a->M_100 * pot.D_320; - l_b->F_301 += - m_a->M_001 * pot.D_302 + m_a->M_010 * pot.D_311 + m_a->M_100 * pot.D_401; - l_b->F_310 += - m_a->M_001 * pot.D_311 + m_a->M_010 * pot.D_320 + m_a->M_100 * pot.D_410; - l_b->F_400 += - m_a->M_001 * pot.D_401 + m_a->M_010 * pot.D_410 + m_a->M_100 * pot.D_500; + l_b->F_004 += M_001 * D_005 + M_010 * D_014 + M_100 * D_104; + l_b->F_013 += M_001 * D_014 + M_010 * D_023 + M_100 * D_113; + l_b->F_022 += M_001 * D_023 + M_010 * D_032 + M_100 * D_122; + l_b->F_031 += M_001 * D_032 + M_010 * D_041 + M_100 * D_131; + l_b->F_040 += M_001 * D_041 + M_010 * D_050 + M_100 * D_140; + l_b->F_103 += M_001 * D_104 + M_010 * D_113 + M_100 * D_203; + l_b->F_112 += M_001 * D_113 + M_010 * D_122 + M_100 * D_212; + l_b->F_121 += M_001 * D_122 + M_010 * D_131 + M_100 * D_221; + l_b->F_130 += M_001 * D_131 + M_010 * D_140 + M_100 * D_230; + l_b->F_202 += M_001 * D_203 + M_010 * D_212 + M_100 * D_302; + l_b->F_211 += M_001 * D_212 + M_010 * D_221 + M_100 * D_311; + l_b->F_220 += M_001 * D_221 + M_010 * D_230 + M_100 * D_320; + l_b->F_301 += M_001 * D_302 + M_010 * D_311 + M_100 * D_401; + l_b->F_310 += M_001 * D_311 + M_010 * D_320 + M_100 * D_410; + l_b->F_400 += M_001 * D_401 + M_010 * D_410 + M_100 * D_500; /* Compute 5th order field tensor terms (addition to rank 5) */ - l_b->F_005 += m_a->M_000 * pot.D_005; - l_b->F_014 += m_a->M_000 * pot.D_014; - l_b->F_023 += m_a->M_000 * pot.D_023; - l_b->F_032 += m_a->M_000 * pot.D_032; - l_b->F_041 += m_a->M_000 * pot.D_041; - l_b->F_050 += m_a->M_000 * pot.D_050; - l_b->F_104 += m_a->M_000 * pot.D_104; - l_b->F_113 += m_a->M_000 * pot.D_113; - l_b->F_122 += m_a->M_000 * pot.D_122; - l_b->F_131 += m_a->M_000 * pot.D_131; - l_b->F_140 += m_a->M_000 * pot.D_140; - l_b->F_203 += m_a->M_000 * pot.D_203; - l_b->F_212 += m_a->M_000 * pot.D_212; - l_b->F_221 += m_a->M_000 * pot.D_221; - l_b->F_230 += m_a->M_000 * pot.D_230; - l_b->F_302 += m_a->M_000 * pot.D_302; - l_b->F_311 += m_a->M_000 * pot.D_311; - l_b->F_320 += m_a->M_000 * pot.D_320; - l_b->F_401 += m_a->M_000 * pot.D_401; - l_b->F_410 += m_a->M_000 * pot.D_410; - l_b->F_500 += m_a->M_000 * pot.D_500; + l_b->F_005 += M_000 * D_005; + l_b->F_014 += M_000 * D_014; + l_b->F_023 += M_000 * D_023; + l_b->F_032 += M_000 * D_032; + l_b->F_041 += M_000 * D_041; + l_b->F_050 += M_000 * D_050; + l_b->F_104 += M_000 * D_104; + l_b->F_113 += M_000 * D_113; + l_b->F_122 += M_000 * D_122; + l_b->F_131 += M_000 * D_131; + l_b->F_140 += M_000 * D_140; + l_b->F_203 += M_000 * D_203; + l_b->F_212 += M_000 * D_212; + l_b->F_221 += M_000 * D_221; + l_b->F_230 += M_000 * D_230; + l_b->F_302 += M_000 * D_302; + l_b->F_311 += M_000 * D_311; + l_b->F_320 += M_000 * D_320; + l_b->F_401 += M_000 * D_401; + l_b->F_410 += M_000 * D_410; + l_b->F_500 += M_000 * D_500; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 5 @@ -1928,6 +1953,109 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, #endif } +/** + * @brief Compute the field tensor due to a multipole. + * + * @param l_b The field tensor to compute. + * @param m_a The multipole. + * @param pos_b The position of the field tensor. + * @param pos_a The position of the multipole. + * @param props The #gravity_props of this calculation. + * @param periodic Is the calculation periodic ? + * @param dim The size of the simulation box. + * @param rs_inv The inverse of the gravity mesh-smoothing scale. + */ +INLINE static void gravity_M2L_nonsym( + struct grav_tensor *l_b, const struct multipole *m_a, const double pos_b[3], + const double pos_a[3], const struct gravity_props *props, + const int periodic, const double dim[3], const float rs_inv) { + + /* Recover some constants */ + const float eps = props->epsilon_cur; + const float eps_inv = props->epsilon_cur_inv; + + /* Compute distance vector */ + float dx = (float)(pos_b[0] - pos_a[0]); + float dy = (float)(pos_b[1] - pos_a[1]); + float dz = (float)(pos_b[2] - pos_a[2]); + + /* Apply BC */ + if (periodic) { + dx = nearest(dx, dim[0]); + dy = nearest(dy, dim[1]); + dz = nearest(dz, dim[2]); + } + + /* Compute distance */ + const float r2 = dx * dx + dy * dy + dz * dz; + const float r_inv = 1. / sqrtf(r2); + + /* Compute all derivatives */ + struct potential_derivatives_M2L pot; + potential_derivatives_compute_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv, + periodic, rs_inv, &pot); + + /* Do the M2L tensor multiplication */ + gravity_M2L_apply(l_b, m_a, &pot); +} + +/** + * @brief Compute the field tensor due to a multipole and the symmetric + * equivalent. + * + * @param l_a The first field tensor to compute. + * @param l_b The second field tensor to compute. + * @param m_a The first multipole. + * @param m_b The second multipole. + * @param pos_a The position of the first m-pole and field tensor. + * @param pos_b The position of the second m-pole and field tensor. + * @param props The #gravity_props of this calculation. + * @param periodic Is the calculation periodic ? + * @param dim The size of the simulation box. + * @param rs_inv The inverse of the gravity mesh-smoothing scale. + */ +INLINE static void gravity_M2L_symmetric( + struct grav_tensor *restrict l_a, struct grav_tensor *restrict l_b, + const struct multipole *restrict m_a, const struct multipole *restrict m_b, + const double pos_a[3], const double pos_b[3], + const struct gravity_props *props, const int periodic, const double dim[3], + const float rs_inv) { + + /* Recover some constants */ + const float eps = props->epsilon_cur; + const float eps_inv = props->epsilon_cur_inv; + + /* Compute distance vector */ + float dx = (float)(pos_b[0] - pos_a[0]); + float dy = (float)(pos_b[1] - pos_a[1]); + float dz = (float)(pos_b[2] - pos_a[2]); + + /* Apply BC */ + if (periodic) { + dx = nearest(dx, dim[0]); + dy = nearest(dy, dim[1]); + dz = nearest(dz, dim[2]); + } + + /* Compute distance */ + const float r2 = dx * dx + dy * dy + dz * dz; + const float r_inv = 1. / sqrtf(r2); + + /* Compute all derivatives */ + struct potential_derivatives_M2L pot; + potential_derivatives_compute_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv, + periodic, rs_inv, &pot); + + /* Do the first M2L tensor multiplication */ + gravity_M2L_apply(l_b, m_a, &pot); + + /* Flip the signs of odd derivatives */ + potential_derivatives_flip_signs(&pot); + + /* Do the second M2L tensor multiplication */ + gravity_M2L_apply(l_a, m_b, &pot); +} + /** * @brief Creates a copy of #grav_tensor shifted to a new location. * @@ -1938,8 +2066,8 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, * @param pos_a The position to which m_b will be shifted. * @param pos_b The current postion of the multipole to shift. */ -INLINE static void gravity_L2L(struct grav_tensor *la, - const struct grav_tensor *lb, +INLINE static void gravity_L2L(struct grav_tensor *restrict la, + const struct grav_tensor *restrict lb, const double pos_a[3], const double pos_b[3]) { /* Initialise everything to zero */ diff --git a/src/outputlist.c b/src/outputlist.c index 782bdeb3eb53aeb1c259ca0283c8ccaa15d68949..2ab904d4fd0b7008b324f3c37a5cab6c6b337520 100644 --- a/src/outputlist.c +++ b/src/outputlist.c @@ -112,6 +112,9 @@ void output_list_read_file(struct output_list *outputlist, const char *filename, ind += 1; } + /* Cleanup */ + free(line); + if (ind != outputlist->size) error("Did not read the correct number of output times."); @@ -208,7 +211,8 @@ void output_list_read_next_time(struct output_list *t, const struct engine *e, * time) */ void output_list_init(struct output_list **list, const struct engine *e, - const char *name, double *delta_time, double *time_first) { + const char *name, double *delta_time, + double *time_first) { struct swift_params *params = e->parameter_file; /* get cosmo */ @@ -265,8 +269,12 @@ void output_list_print(const struct output_list *outputlist) { /** * @brief Clean an #output_list */ -void output_list_clean(struct output_list *outputlist) { - free(outputlist->times); +void output_list_clean(struct output_list **outputlist) { + if (*outputlist) { + free((*outputlist)->times); + free(*outputlist); + *outputlist = NULL; + } } /** diff --git a/src/outputlist.h b/src/outputlist.h index 6045d75ea29f0aab44252835147502f3df0de20c..b7b12ca32f469c70f716553b30a15f48198f8e5e 100644 --- a/src/outputlist.h +++ b/src/outputlist.h @@ -58,7 +58,7 @@ void output_list_read_next_time(struct output_list *t, const struct engine *e, void output_list_init(struct output_list **list, const struct engine *e, const char *name, double *delta_time, double *time_first); void output_list_print(const struct output_list *outputlist); -void output_list_clean(struct output_list *outputlist); +void output_list_clean(struct output_list **outputlist); void output_list_struct_dump(struct output_list *list, FILE *stream); void output_list_struct_restore(struct output_list *list, FILE *stream); diff --git a/src/parallel_io.c b/src/parallel_io.c index b82443b33ba767cfe2050cf300535db924bfb537..febbb4a7db6796fd751cb1eacc174a42936d19a2 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -31,6 +31,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <time.h> /* This object's header. */ #include "parallel_io.h" @@ -137,7 +138,37 @@ void readArray_chunk(hid_t h_data, hid_t h_plist_id, for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor; } else { float* temp_f = (float*)temp; - for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= factor; + +#ifdef SWIFT_DEBUG_CHECKS + float maximum = 0.; + float minimum = FLT_MAX; +#endif + + /* Loop that converts the Units */ + for (size_t i = 0; i < num_elements; ++i) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Find the absolute minimum and maximum values */ + const float abstemp_f = fabsf(temp_f[i]); + if (abstemp_f != 0.f) { + maximum = max(maximum, abstemp_f); + minimum = min(minimum, abstemp_f); + } +#endif + + /* Convert the float units */ + temp_f[i] *= factor; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* The two possible errors: larger than float or smaller + * than float precission. */ + if (factor * maximum > FLT_MAX) { + error("Unit conversion results in numbers larger than floats"); + } else if (factor * minimum < FLT_MIN) { + error("Numbers smaller than float precision"); + } +#endif } } @@ -302,6 +333,8 @@ void readArray(hid_t grp, struct io_props props, size_t N, long long N_total, N -= max_chunk_size; props.field += max_chunk_size * props.partSize; /* char* on the field */ props.parts += max_chunk_size; /* part* on the part */ + props.xparts += max_chunk_size; /* xpart* on the xpart */ + props.gparts += max_chunk_size; /* gpart* on the gpart */ offset += max_chunk_size; redo = 1; } else { @@ -351,13 +384,13 @@ void prepareArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, rank = 2; shape[0] = N_total; shape[1] = props.dimension; - chunk_shape[0] = 1 << 16; /* Just a guess...*/ + chunk_shape[0] = 1 << 20; /* Just a guess...*/ chunk_shape[1] = props.dimension; } else { rank = 1; shape[0] = N_total; shape[1] = 0; - chunk_shape[0] = 1 << 16; /* Just a guess...*/ + chunk_shape[0] = 1 << 20; /* Just a guess...*/ chunk_shape[1] = 0; } @@ -398,8 +431,9 @@ void prepareArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, io_write_attribute_s(h_data, "Conversion factor", buffer); /* Add a line to the XMF */ - xmf_write_line(xmfFile, fileName, partTypeGroupName, props.name, N_total, - props.dimension, props.type); + if (xmfFile != NULL) + xmf_write_line(xmfFile, fileName, partTypeGroupName, props.name, N_total, + props.dimension, props.type); /* Close everything */ H5Pclose(h_plist_id); @@ -574,6 +608,8 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, N -= max_chunk_size; props.field += max_chunk_size * props.partSize; /* char* on the field */ props.parts += max_chunk_size; /* part* on the part */ + props.xparts += max_chunk_size; /* xpart* on the xpart */ + props.gparts += max_chunk_size; /* gpart* on the gpart */ offset += max_chunk_size; redo = 1; } else { @@ -613,7 +649,6 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, * @param Ngas (output) The number of particles read from the file. * @param Ngparts (output) The number of particles read from the file. * @param Nstars (output) The number of particles read from the file. - * @param periodic (output) 1 if the volume is periodic, 0 if not. * @param flag_entropy (output) 1 if the ICs contained Entropy in the * InternalEnergy field * @param with_hydro Are we running with hydro ? @@ -635,11 +670,11 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, void read_ic_parallel(char* fileName, const struct unit_system* internal_units, double dim[3], struct part** parts, struct gpart** gparts, struct spart** sparts, size_t* Ngas, size_t* Ngparts, - size_t* Nstars, int* periodic, int* flag_entropy, - int with_hydro, int with_gravity, int with_stars, - int cleanup_h, int cleanup_sqrt_a, double h, double a, - int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, - int n_threads, int dry_run) { + size_t* Nstars, int* flag_entropy, int with_hydro, + int with_gravity, int with_stars, int cleanup_h, + int cleanup_sqrt_a, double h, double a, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int n_threads, + int dry_run) { hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ @@ -659,17 +694,6 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, h_file = H5Fopen(fileName, H5F_ACC_RDONLY, h_plist_id); if (h_file < 0) error("Error while opening file '%s'.", fileName); - /* Open header to read simulation properties */ - /* message("Reading runtime parameters..."); */ - h_grp = H5Gopen(h_file, "/RuntimePars", H5P_DEFAULT); - if (h_grp < 0) error("Error while opening runtime parameters\n"); - - /* Read the relevant information */ - io_read_attribute(h_grp, "PeriodicBoundariesOn", INT, periodic); - - /* Close runtime parameters */ - H5Gclose(h_grp); - /* Open header to read simulation properties */ /* message("Reading file header..."); */ h_grp = H5Gopen(h_file, "/Header", H5P_DEFAULT); @@ -684,6 +708,21 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, error("ICs dimensionality (%dD) does not match code dimensionality (%dD)", dimension, (int)hydro_dimension); + /* Check whether the number of files is specified (if the info exists) */ + const hid_t hid_files = H5Aexists(h_grp, "NumFilesPerSnapshot"); + int num_files = 1; + if (hid_files < 0) + error( + "Error while testing the existance of 'NumFilesPerSnapshot' attribute"); + if (hid_files > 0) + io_read_attribute(h_grp, "NumFilesPerSnapshot", INT, &num_files); + if (num_files != 1) + error( + "ICs are split over multiples files (%d). SWIFT cannot handle this " + "case. The script /tools/combine_ics.py is availalbe in the repository " + "to combine files into a valid input file.", + num_files); + /* Read the relevant information and print status */ int flag_entropy_temp[6]; io_read_attribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp); @@ -778,12 +817,12 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, bzero(*parts, *Ngas * sizeof(struct part)); } - /* Allocate memory to store star particles */ + /* Allocate memory to store stars particles */ if (with_stars) { - *Nstars = N[swift_type_star]; + *Nstars = N[swift_type_stars]; if (posix_memalign((void**)sparts, spart_align, *Nstars * sizeof(struct spart)) != 0) - error("Error while allocating memory for star particles"); + error("Error while allocating memory for stars particles"); bzero(*sparts, *Nstars * sizeof(struct spart)); } @@ -792,7 +831,7 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, Ndm = N[1]; *Ngparts = (with_hydro ? N[swift_type_gas] : 0) + N[swift_type_dark_matter] + - (with_stars ? N[swift_type_star] : 0); + (with_stars ? N[swift_type_stars] : 0); if (posix_memalign((void**)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); @@ -841,10 +880,10 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, } break; - case swift_type_star: + case swift_type_stars: if (with_stars) { Nparticles = *Nstars; - star_read_particles(*sparts, list, &num_fields); + stars_read_particles(*sparts, list, &num_fields); } break; @@ -877,9 +916,9 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, /* Duplicate the hydro particles into gparts */ if (with_hydro) io_duplicate_hydro_gparts(&tp, *parts, *gparts, *Ngas, Ndm); - /* Duplicate the star particles into gparts */ + /* Duplicate the stars particles into gparts */ if (with_stars) - io_duplicate_star_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); + io_duplicate_stars_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); threadpool_clean(&tp); } @@ -926,12 +965,12 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], /* HDF5 File name */ char fileName[FILENAME_BUFFER_SIZE]; - if (e->snapshot_label_delta == 1) + if (e->snapshot_int_time_label_on) + snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%06i.hdf5", baseName, + (int)round(e->time)); + else snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName, e->snapshot_output_count); - else - snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%06i.hdf5", baseName, - e->snapshot_output_count * e->snapshot_label_delta); /* Open HDF5 file with the chosen parameters */ hid_t h_file = H5Fcreate(fileName, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); @@ -967,6 +1006,8 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1); io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1); io_write_attribute_s(h_grp, "Code", "SWIFT"); + time_t tm = time(NULL); + io_write_attribute_s(h_grp, "Snapshot date", ctime(&tm)); /* GADGET-2 legacy values */ /* Number of particles of each type */ @@ -1013,7 +1054,7 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], h_grp = H5Gcreate(h_file, "/SubgridScheme", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (h_grp < 0) error("Error while creating subgrid group"); - cooling_write_flavour(h_grp); + cooling_write_flavour(h_grp, e->cooling_func); chemistry_write_flavour(h_grp); H5Gclose(h_grp); @@ -1026,7 +1067,16 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], H5Gclose(h_grp); } - /* Print the gravity parameters */ + /* Print the stellar parameters */ + if (e->policy & engine_policy_stars) { + h_grp = H5Gcreate(h_file, "/StarsScheme", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) error("Error while creating stars group"); + stars_props_print_snapshot(h_grp, e->stars_properties); + H5Gclose(h_grp); + } + + /* Print the cosmological parameters */ h_grp = H5Gcreate(h_file, "/Cosmology", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (h_grp < 0) error("Error while creating cosmology group"); @@ -1092,8 +1142,8 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], darkmatter_write_particles(gparts, list, &num_fields); break; - case swift_type_star: - star_write_particles(sparts, list, &num_fields); + case swift_type_stars: + stars_write_particles(sparts, list, &num_fields); break; default: @@ -1155,22 +1205,30 @@ void write_output_parallel(struct engine* e, const char* baseName, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info) { - const size_t Ngas = e->s->nr_parts; - const size_t Nstars = e->s->nr_sparts; - const size_t Ntot = e->s->nr_gparts; const struct part* parts = e->s->parts; const struct xpart* xparts = e->s->xparts; const struct gpart* gparts = e->s->gparts; - struct gpart* dmparts = NULL; const struct spart* sparts = e->s->sparts; - const struct cooling_function_data* cooling = e->cooling_func; struct swift_params* params = e->parameter_file; - /* Number of unassociated gparts */ - const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0; + /* Number of particles currently in the arrays */ + const size_t Ntot = e->s->nr_gparts; + const size_t Ngas = e->s->nr_parts; + const size_t Nstars = e->s->nr_sparts; + // const size_t Nbaryons = Ngas + Nstars; + // const size_t Ndm = Ntot > 0 ? Ntot - Nbaryons : 0; + + /* Number of particles that we will write */ + const size_t Ntot_written = e->s->nr_gparts - e->s->nr_inhibited_sparts; + const size_t Ngas_written = e->s->nr_parts - e->s->nr_inhibited_parts; + const size_t Nstars_written = e->s->nr_sparts - e->s->nr_inhibited_gparts; + const size_t Nbaryons_written = Ngas_written + Nstars_written; + const size_t Ndm_written = + Ntot_written > 0 ? Ntot_written - Nbaryons_written : 0; /* Compute offset in the file and total number of particles */ - size_t N[swift_type_count] = {Ngas, Ndm, 0, 0, Nstars, 0}; + size_t N[swift_type_count] = { + Ngas_written, Ndm_written, 0, 0, Nstars_written, 0}; long long N_total[swift_type_count] = {0}; long long offset[swift_type_count] = {0}; MPI_Exscan(&N, &offset, swift_type_count, MPI_LONG_LONG_INT, MPI_SUM, comm); @@ -1204,8 +1262,12 @@ void write_output_parallel(struct engine* e, const char* baseName, /* HDF5 File name */ char fileName[FILENAME_BUFFER_SIZE]; - snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName, - e->snapshot_output_count); + if (e->snapshot_int_time_label_on) + snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%06i.hdf5", baseName, + (int)round(e->time)); + else + snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName, + e->snapshot_output_count); /* Prepare some file-access properties */ hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); @@ -1240,8 +1302,8 @@ void write_output_parallel(struct engine* e, const char* baseName, #if H5_VERSION_GE(1, 10, 0) h_err = H5Pset_all_coll_metadata_ops(plist_id, 1); if (h_err < 0) error("Error setting collective meta-data on all ops"); - h_err = H5Pset_coll_metadata_write(plist_id, 1); - if (h_err < 0) error("Error setting collective meta-data writes"); + // h_err = H5Pset_coll_metadata_write(plist_id, 1); + // if (h_err < 0) error("Error setting collective meta-data writes"); #endif #ifdef IO_SPEED_MEASUREMENT @@ -1317,38 +1379,98 @@ void write_output_parallel(struct engine* e, const char* baseName, struct io_props list[100]; size_t Nparticles = 0; + struct part* parts_written = NULL; + struct xpart* xparts_written = NULL; + struct gpart* gparts_written = NULL; + struct spart* sparts_written = NULL; + /* Write particle fields from the particle structure */ switch (ptype) { - case swift_type_gas: - Nparticles = Ngas; - hydro_write_particles(parts, xparts, list, &num_fields); - num_fields += chemistry_write_particles(parts, list + num_fields); - num_fields += - cooling_write_particles(xparts, list + num_fields, cooling); - break; + case swift_type_gas: { + if (Ngas == Ngas_written) { + + /* No inhibted particles: easy case */ + Nparticles = Ngas; + hydro_write_particles(parts, xparts, list, &num_fields); + num_fields += chemistry_write_particles(parts, list + num_fields); + num_fields += cooling_write_particles(xparts, list + num_fields, + e->cooling_func); + } else { + + /* Ok, we need to fish out the particles we want */ + Nparticles = Ngas_written; + + /* Allocate temporary arrays */ + if (posix_memalign((void**)&parts_written, part_align, + Ngas_written * sizeof(struct part)) != 0) + error("Error while allocating temporart memory for parts"); + if (posix_memalign((void**)&xparts_written, xpart_align, + Ngas_written * sizeof(struct xpart)) != 0) + error("Error while allocating temporart memory for xparts"); + + /* Collect the particles we want to write */ + io_collect_parts_to_write(parts, xparts, parts_written, + xparts_written, Ngas, Ngas_written); + + /* Select the fields to write */ + hydro_write_particles(parts_written, xparts_written, list, + &num_fields); + num_fields += + chemistry_write_particles(parts_written, list + num_fields); + num_fields += cooling_write_particles( + xparts_written, list + num_fields, e->cooling_func); + } + } break; - case swift_type_dark_matter: - /* Allocate temporary array */ - if (posix_memalign((void**)&dmparts, gpart_align, - Ndm * sizeof(struct gpart)) != 0) - error( - "Error while allocating temporart memory for " - "DM particles"); - bzero(dmparts, Ndm * sizeof(struct gpart)); - - /* Collect the DM particles from gpart */ - io_collect_dm_gparts(gparts, Ntot, dmparts, Ndm); - - /* Write DM particles */ - Nparticles = Ndm; - darkmatter_write_particles(dmparts, list, &num_fields); - break; + case swift_type_dark_matter: { + if (Ntot == Ndm_written) { - case swift_type_star: - Nparticles = Nstars; - star_write_particles(sparts, list, &num_fields); - break; + /* This is a DM-only run without inhibited particles */ + Nparticles = Ntot; + darkmatter_write_particles(gparts, list, &num_fields); + } else { + + /* Ok, we need to fish out the particles we want */ + Nparticles = Ndm_written; + + /* Allocate temporary array */ + if (posix_memalign((void**)&gparts_written, gpart_align, + Ndm_written * sizeof(struct gpart)) != 0) + error("Error while allocating temporart memory for gparts"); + + /* Collect the non-inhibited DM particles from gpart */ + io_collect_gparts_to_write(gparts, gparts_written, Ntot, Ndm_written); + + /* Write DM particles */ + darkmatter_write_particles(gparts_written, list, &num_fields); + } + } break; + + case swift_type_stars: { + if (Nstars == Nstars_written) { + + /* No inhibted particles: easy case */ + Nparticles = Nstars; + stars_write_particles(sparts, list, &num_fields); + } else { + + /* Ok, we need to fish out the particles we want */ + Nparticles = Nstars_written; + + /* Allocate temporary arrays */ + if (posix_memalign((void**)&sparts_written, spart_align, + Nstars_written * sizeof(struct spart)) != 0) + error("Error while allocating temporart memory for sparts"); + + /* Collect the particles we want to write */ + io_collect_sparts_to_write(sparts, sparts_written, Nstars, + Nstars_written); + + /* Select the fields to write */ + stars_write_particles(sparts_written, list, &num_fields); + } + } break; default: error("Particle Type %d not yet supported. Aborting", ptype); @@ -1370,10 +1492,10 @@ void write_output_parallel(struct engine* e, const char* baseName, } /* Free temporary array */ - if (dmparts) { - free(dmparts); - dmparts = 0; - } + if (parts_written) free(parts_written); + if (xparts_written) free(xparts_written); + if (gparts_written) free(gparts_written); + if (sparts_written) free(sparts_written); #ifdef IO_SPEED_MEASUREMENT MPI_Barrier(MPI_COMM_WORLD); diff --git a/src/parallel_io.h b/src/parallel_io.h index 668b6f83443fe4c39ddf3269c8d2236e72588e32..9cd775347f0d5fbb3bc1b17664e0d5dba734d795 100644 --- a/src/parallel_io.h +++ b/src/parallel_io.h @@ -25,22 +25,21 @@ #if defined(HAVE_HDF5) && defined(WITH_MPI) && defined(HAVE_PARALLEL_HDF5) /* MPI headers. */ -#ifdef WITH_MPI #include <mpi.h> -#endif /* Includes. */ #include "engine.h" +#include "io_properties.h" #include "part.h" #include "units.h" void read_ic_parallel(char* fileName, const struct unit_system* internal_units, double dim[3], struct part** parts, struct gpart** gparts, struct spart** sparts, size_t* Ngas, size_t* Ngparts, - size_t* Nsparts, int* periodic, int* flag_entropy, - int with_hydro, int with_gravity, int with_stars, - int cleanup_h, int cleanup_sqrt_a, double h, double a, - int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, + size_t* Nsparts, int* flag_entropy, int with_hydro, + int with_gravity, int with_stars, int cleanup_h, + int cleanup_sqrt_a, double h, double a, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int nr_threads, int dry_run); void write_output_parallel(struct engine* e, const char* baseName, @@ -48,6 +47,13 @@ void write_output_parallel(struct engine* e, const char* baseName, const struct unit_system* snapshot_units, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info); + +void writeArray(struct engine* e, hid_t grp, char* fileName, + char* partTypeGroupName, struct io_props props, size_t N, + long long N_total, int mpi_rank, long long offset, + const struct unit_system* internal_units, + const struct unit_system* snapshot_units); + #endif #endif /* SWIFT_PARALLEL_IO_H */ diff --git a/src/parser.c b/src/parser.c index d804be507e81ca265b31a6a2699d4f0b998f7c3b..57592d57abb78100d113b91710af68f7b1c3e32d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -35,6 +35,7 @@ #include "error.h" #include "restart.h" #include "tools.h" +#include "version.h" #define PARSER_COMMENT_STRING "#" #define PARSER_COMMENT_CHAR '#' @@ -363,7 +364,12 @@ static void parse_line(char *line, struct swift_params *params) { /* Check if the line contains a value and parse it. */ if (strchr(trim_line, PARSER_VALUE_CHAR)) { - parse_value(trim_line, params); + + /* Trim trailing space before parsing line for a value. */ + char no_space_line[PARSER_MAX_LINE_SIZE]; + strcpy(no_space_line, trim_trailing(trim_line)); + + parse_value(no_space_line, params); } /* Check for invalid lines,not including the start and end of file. */ else if (strcmp(trim_line, PARSER_START_OF_FILE) && @@ -1153,7 +1159,13 @@ void parser_write_params_to_file(const struct swift_params *params, char *token; /* Start of file identifier in YAML. */ - fprintf(file, "%s\n", PARSER_START_OF_FILE); + fprintf(file, "%s\n\n", PARSER_START_OF_FILE); + + fprintf(file, "# SWIFT used parameter file\n"); + fprintf(file, "# Code version: %s\n", package_version()); + fprintf(file, "# git revision: %s\n", git_revision()); + fprintf(file, "# git branch: %s\n", git_branch()); + fprintf(file, "# git date: %s\n", git_date()); /* Flags to track which parameters are written. */ int *written = (int *)calloc(params->paramCount, sizeof(int)); diff --git a/src/part.c b/src/part.c index 050e10e9cdd0ab56adcd34ba3e6f2d35c274f14a..3a626e652cf28f0376cadc1d9a40ab85b752e6c1 100644 --- a/src/part.c +++ b/src/part.c @@ -26,8 +26,10 @@ #endif /* This object's header. */ -#include "error.h" #include "multipole.h" + +/* Local headers */ +#include "error.h" #include "part.h" /** @@ -88,7 +90,7 @@ void part_relink_parts_to_gparts(struct gpart *gparts, size_t N, void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N, struct spart *sparts) { for (size_t k = 0; k < N; k++) { - if (gparts[k].type == swift_type_star) { + if (gparts[k].type == swift_type_stars) { sparts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; } } @@ -108,7 +110,7 @@ void part_relink_all_parts_to_gparts(struct gpart *gparts, size_t N, for (size_t k = 0; k < N; k++) { if (gparts[k].type == swift_type_gas) { parts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; - } else if (gparts[k].type == swift_type_star) { + } else if (gparts[k].type == swift_type_stars) { sparts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; } } @@ -133,6 +135,8 @@ void part_verify_links(struct part *parts, struct gpart *gparts, struct spart *sparts, size_t nr_parts, size_t nr_gparts, size_t nr_sparts, int verbose) { + ticks tic = getticks(); + for (size_t k = 0; k < nr_gparts; ++k) { /* We have a DM particle */ @@ -171,11 +175,11 @@ void part_verify_links(struct part *parts, struct gpart *gparts, error("Linked particles are not at the same time !"); } - else if (gparts[k].type == swift_type_star) { + else if (gparts[k].type == swift_type_stars) { /* Check that it is linked */ if (gparts[k].id_or_neg_offset > 0) - error("Star gpart not linked to anything !"); + error("Stars gpart not linked to anything !"); /* Find its link */ const struct spart *spart = &sparts[-gparts[k].id_or_neg_offset]; @@ -246,6 +250,9 @@ void part_verify_links(struct part *parts, struct gpart *gparts, } if (verbose) message("All links OK"); + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } #ifdef WITH_MPI @@ -254,7 +261,6 @@ MPI_Datatype part_mpi_type; MPI_Datatype xpart_mpi_type; MPI_Datatype gpart_mpi_type; MPI_Datatype spart_mpi_type; -MPI_Datatype multipole_mpi_type; /** * @brief Registers MPI particle types. @@ -287,11 +293,5 @@ void part_create_mpi_types(void) { MPI_Type_commit(&spart_mpi_type) != MPI_SUCCESS) { error("Failed to create MPI type for sparts."); } - if (MPI_Type_contiguous( - sizeof(struct gravity_tensors) / sizeof(unsigned char), MPI_BYTE, - &multipole_mpi_type) != MPI_SUCCESS || - MPI_Type_commit(&multipole_mpi_type) != MPI_SUCCESS) { - error("Failed to create MPI type for multipole."); - } } #endif diff --git a/src/part.h b/src/part.h index bca84cc0212e79e15ffbeeeb0bbcfc714d5481be..64babf4a37696d7cb49b4804ee77773b4e1981fc 100644 --- a/src/part.h +++ b/src/part.h @@ -54,6 +54,9 @@ #elif defined(HOPKINS_PU_SPH) #include "./hydro/PressureEnergy/hydro_part.h" #define hydro_need_extra_init_loop 0 +#elif defined(HOPKINS_PU_SPH_MONAGHAN) +#include "./hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h" +#define hydro_need_extra_init_loop 0 #elif defined(DEFAULT_SPH) #include "./hydro/Default/hydro_part.h" #define hydro_need_extra_init_loop 0 @@ -69,8 +72,8 @@ #include "./hydro/Shadowswift/hydro_part.h" #define hydro_need_extra_init_loop 0 #define EXTRA_HYDRO_LOOP -#elif defined(MINIMAL_MULTI_MAT_SPH) -#include "./hydro/MinimalMultiMat/hydro_part.h" +#elif defined(PLANETARY_SPH) +#include "./hydro/Planetary/hydro_part.h" #define hydro_need_extra_init_loop 0 #else #error "Invalid choice of SPH variant" @@ -86,7 +89,7 @@ #endif /* Import the right star particle definition */ -#include "./stars/Default/star_part.h" +#include "./stars/Default/stars_part.h" void part_relink_gparts_to_parts(struct part *parts, size_t N, ptrdiff_t offset); @@ -108,7 +111,6 @@ extern MPI_Datatype part_mpi_type; extern MPI_Datatype xpart_mpi_type; extern MPI_Datatype gpart_mpi_type; extern MPI_Datatype spart_mpi_type; -extern MPI_Datatype multipole_mpi_type; void part_create_mpi_types(void); #endif diff --git a/src/part_type.c b/src/part_type.c index af97bd34aaace93a9faa953c0c9345d83ca3bc34..1f96d4ef1db4b35a92d133e91498ea10ce472c70 100644 --- a/src/part_type.c +++ b/src/part_type.c @@ -20,5 +20,5 @@ /* This object's header. */ #include "part_type.h" -const char* part_type_names[swift_type_count] = {"Gas", "DM", "Dummy", - "Dummy", "Star", "BH"}; +const char* part_type_names[swift_type_count] = {"Gas", "DM", "Dummy", + "Dummy", "Stars", "BH"}; diff --git a/src/part_type.h b/src/part_type.h index fbe2b2aeaea37503635372b0f09f8edde4578721..901f47193fa0e72b362c8dce5199a1d0a20526c9 100644 --- a/src/part_type.h +++ b/src/part_type.h @@ -27,7 +27,7 @@ enum part_type { swift_type_gas = 0, swift_type_dark_matter = 1, - swift_type_star = 4, + swift_type_stars = 4, swift_type_black_hole = 5, swift_type_count } __attribute__((packed)); diff --git a/src/partition.c b/src/partition.c index 98e3e7b670ddd9d849834f1e7f86fa94c2cd335f..bbd7454dd63be6ab5192558fb4a2e3399ea03cfc 100644 --- a/src/partition.c +++ b/src/partition.c @@ -24,7 +24,7 @@ * a grid of cells into geometrically connected regions and distributing * these around a number of MPI nodes. * - * Currently supported partitioning types: grid, vectorise and METIS. + * Currently supported partitioning types: grid, vectorise and METIS/ParMETIS. */ /* Config parameters. */ @@ -37,10 +37,17 @@ #include <stdlib.h> #include <strings.h> +/* Include int min and max values. Define these limits in C++ as well. */ +#define __STDC_LIMIT_MACROS +#include <stdint.h> + /* MPI headers. */ #ifdef WITH_MPI #include <mpi.h> -/* METIS headers only used when MPI is also available. */ +/* METIS/ParMETIS headers only used when MPI is also available. */ +#ifdef HAVE_PARMETIS +#include <parmetis.h> +#endif #ifdef HAVE_METIS #include <metis.h> #endif @@ -55,25 +62,17 @@ #include "space.h" #include "tools.h" -/* Maximum weight used for METIS. */ -#define metis_maxweight 10000.0f - /* Simple descriptions of initial partition types for reports. */ const char *initial_partition_name[] = { - "gridded cells", "vectorized point associated cells", - "METIS particle weighted cells", "METIS unweighted cells"}; + "axis aligned grids of cells", "vectorized point associated cells", + "memory balanced, using particle weighted cells", + "similar sized regions, using unweighted cells"}; /* Simple descriptions of repartition types for reports. */ const char *repartition_name[] = { - "no", - "METIS edge and vertex task cost weights", - "METIS particle count vertex weights", - "METIS task cost edge weights", - "METIS particle count vertex and task cost edge weights", - "METIS vertex task costs and edge delta timebin weights", - "METIS particle count vertex and edge delta timebin weights", - "METIS edge delta timebin weights", -}; + "none", "edge and vertex task cost weights", "task cost edge weights", + "task cost vertex weights", + "vertex task costs and edge delta timebin weights"}; /* Local functions, if needed. */ static int check_complete(struct space *s, int verbose, int nregions); @@ -156,34 +155,39 @@ static void split_vector(struct space *s, int nregions, int *samplecells) { } #endif - /* METIS support - * ============= + /* METIS/ParMETIS support (optional) + * ================================= * - * METIS partitions using a multi-level k-way scheme. We support using this in - * a unweighted scheme, which works well and seems to be guaranteed, and a - * weighted by the number of particles scheme. Note METIS is optional. + * METIS/ParMETIS partitions using a multi-level k-way scheme. We support + * using this in a unweighted scheme, which works well and seems to be + * guaranteed, and a weighted by the number of particles scheme. * - * Repartitioning is based on METIS and uses weights determined from the times - * that cell tasks have taken. These weight the graph edges and vertices, or - * just the edges, with vertex weights from the particle counts or none. + * Repartitioning is based on ParMETIS and uses weights determined from the + * estimated costs that a cells tasks will take or the relative time bins of + * the cells next updates. */ -#if defined(WITH_MPI) && defined(HAVE_METIS) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) /** - * @brief Fill the METIS xadj and adjncy arrays defining the graph of cells - * in a space. + * @brief Fill the adjncy array defining the graph of cells in a space. * - * See the METIS manual if you want to understand this format. The cell graph - * consists of all nodes as vertices with edges as the connections to all - * neighbours, so we have 26 per vertex. + * See the ParMETIS and METIS manuals if you want to understand this + * format. The cell graph consists of all nodes as vertices with edges as the + * connections to all neighbours, so we have 26 per vertex. Note you will + * also need an xadj array, for METIS that would be: + * + * xadj[0] = 0; + * for (int k = 0; k < s->nr_cells; k++) xadj[k + 1] = xadj[k] + 26; + * + * but each rank needs a different xadj when using ParMETIS. * * @param s the space of cells. - * @param adjncy the METIS adjncy array to fill, must be of size - * 26 * the number of cells in the space. + * @param adjncy the adjncy array to fill, must be of size 26 * the number of + * cells in the space. * @param xadj the METIS xadj array to fill, must be of size * number of cells in space + 1. NULL for not used. */ -static void graph_init_metis(struct space *s, idx_t *adjncy, idx_t *xadj) { +static void graph_init(struct space *s, idx_t *adjncy, idx_t *xadj) { /* Loop over all cells in the space. */ int cid = 0; @@ -227,7 +231,7 @@ static void graph_init_metis(struct space *s, idx_t *adjncy, idx_t *xadj) { } } - /* If given set xadj. */ + /* If given set METIS xadj. */ if (xadj != NULL) { xadj[0] = 0; for (int k = 0; k < s->nr_cells; k++) xadj[k + 1] = xadj[k] + 26; @@ -235,44 +239,129 @@ static void graph_init_metis(struct space *s, idx_t *adjncy, idx_t *xadj) { } #endif -#if defined(WITH_MPI) && defined(HAVE_METIS) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +struct counts_mapper_data { + double *counts; + size_t size; + struct space *s; +}; + +/* Generic function for accumulating sized counts for TYPE parts. Note uses + * local memory to reduce contention, the amount of memory required is + * precalculated by an additional loop determining the range of cell IDs. */ +#define ACCUMULATE_SIZES_MAPPER(TYPE) \ + accumulate_sizes_mapper_##TYPE(void *map_data, int num_elements, \ + void *extra_data) { \ + struct TYPE *parts = (struct TYPE *)map_data; \ + struct counts_mapper_data *mydata = \ + (struct counts_mapper_data *)extra_data; \ + double size = mydata->size; \ + int *cdim = mydata->s->cdim; \ + double iwidth[3] = {mydata->s->iwidth[0], mydata->s->iwidth[1], \ + mydata->s->iwidth[2]}; \ + double dim[3] = {mydata->s->dim[0], mydata->s->dim[1], mydata->s->dim[2]}; \ + double *lcounts = NULL; \ + int lcid = mydata->s->nr_cells; \ + int ucid = 0; \ + for (int k = 0; k < num_elements; k++) { \ + for (int j = 0; j < 3; j++) { \ + if (parts[k].x[j] < 0.0) \ + parts[k].x[j] += dim[j]; \ + else if (parts[k].x[j] >= dim[j]) \ + parts[k].x[j] -= dim[j]; \ + } \ + const int cid = \ + cell_getid(cdim, parts[k].x[0] * iwidth[0], \ + parts[k].x[1] * iwidth[1], parts[k].x[2] * iwidth[2]); \ + if (cid > ucid) ucid = cid; \ + if (cid < lcid) lcid = cid; \ + } \ + int nused = ucid - lcid + 1; \ + if ((lcounts = (double *)calloc(sizeof(double), nused)) == NULL) \ + error("Failed to allocate counts thread-specific buffer"); \ + for (int k = 0; k < num_elements; k++) { \ + const int cid = \ + cell_getid(cdim, parts[k].x[0] * iwidth[0], \ + parts[k].x[1] * iwidth[1], parts[k].x[2] * iwidth[2]); \ + lcounts[cid - lcid] += size; \ + } \ + for (int k = 0; k < nused; k++) \ + atomic_add_d(&mydata->counts[k + lcid], lcounts[k]); \ + free(lcounts); \ + } + /** - * @brief Accumulate the counts of particles per cell. + * @brief Accumulate the sized counts of particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. * - * @param s the space containing the cells. - * @param counts the number of particles per cell. Should be - * allocated as size s->nr_parts. + * part version. + */ +static void ACCUMULATE_SIZES_MAPPER(part); + +/** + * @brief Accumulate the sized counts of particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * gpart version. + */ +static void ACCUMULATE_SIZES_MAPPER(gpart); + +/** + * @brief Accumulate the sized counts of particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * spart version. */ -static void accumulate_counts(struct space *s, double *counts) { +static void ACCUMULATE_SIZES_MAPPER(spart); - struct part *parts = s->parts; - int *cdim = s->cdim; - double iwidth[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]}; - double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; +/** + * @brief Accumulate total memory size in particles per cell. + * + * @param s the space containing the cells. + * @param counts the number of bytes in particles per cell. Should be + * allocated as size s->nr_cells. + */ +static void accumulate_sizes(struct space *s, double *counts) { bzero(counts, sizeof(double) * s->nr_cells); - for (size_t k = 0; k < s->nr_parts; k++) { - for (int j = 0; j < 3; j++) { - if (parts[k].x[j] < 0.0) - parts[k].x[j] += dim[j]; - else if (parts[k].x[j] >= dim[j]) - parts[k].x[j] -= dim[j]; - } - const int cid = - cell_getid(cdim, parts[k].x[0] * iwidth[0], parts[k].x[1] * iwidth[1], - parts[k].x[2] * iwidth[2]); - counts[cid]++; + struct counts_mapper_data mapper_data; + mapper_data.counts = counts; + mapper_data.s = s; + + double hsize = (double)sizeof(struct part); + mapper_data.size = hsize; + threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_part, s->parts, + s->nr_parts, sizeof(struct part), space_splitsize, + &mapper_data); + + double gsize = (double)sizeof(struct gpart); + mapper_data.size = gsize; + threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_gpart, s->gparts, + s->nr_gparts, sizeof(struct gpart), space_splitsize, + &mapper_data); + + double ssize = (double)sizeof(struct spart); + mapper_data.size = ssize; + threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_spart, s->sparts, + s->nr_sparts, sizeof(struct spart), space_splitsize, + &mapper_data); + + /* Keep the sum of particles across all ranks in the range of IDX_MAX. */ + if ((s->e->total_nr_parts * hsize + s->e->total_nr_gparts * gsize + + s->e->total_nr_sparts * ssize) > (double)IDX_MAX) { + double vscale = + (double)(IDX_MAX - 1000) / + (double)(s->e->total_nr_parts * hsize + s->e->total_nr_gparts * gsize + + s->e->total_nr_sparts * ssize); + for (int k = 0; k < s->nr_cells; k++) counts[k] *= vscale; } } #endif -#if defined(WITH_MPI) && defined(HAVE_METIS) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) /** - * @brief Apply METIS cell list partitioning to a cell structure. - * - * Uses the results of part_metis_pick to assign each cell's nodeID to the - * picked region index, thus partitioning the space into regions. + * @brief Apply METIS cell-list partitioning to a cell structure. * * @param s the space containing the cells to split into regions. * @param nregions number of regions. @@ -283,16 +372,18 @@ static void split_metis(struct space *s, int nregions, int *celllist) { for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i]; /* To check or visualise the partition dump all the cells. */ - /* dumpCellRanks("metis_partition", s->cells_top, s->nr_cells); */ + /*dumpCellRanks("metis_partition", s->cells_top, s->nr_cells);*/ } #endif -#if defined(WITH_MPI) && defined(HAVE_METIS) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) /* qsort support. */ struct indexval { int index; int count; + int old_val; + int new_val; }; static int indexvalcmp(const void *p1, const void *p2) { const struct indexval *iv1 = (const struct indexval *)p1; @@ -301,12 +392,107 @@ static int indexvalcmp(const void *p1, const void *p2) { } /** - * @brief Partition the given space into a number of connected regions. + * @brief Check if there is a permutation of the region indices of our cells + * that will reduce the amount of particle movement and return it. + * + * @param newlist the new list of regions for our cells. + * @param oldlist the old list of regions for our cells. + * @param nregions the number of regions. + * @param ncells the number of cells. + * @param permlist the permutation of the newlist. + */ +void permute_regions(int *newlist, int *oldlist, int nregions, int ncells, + int *permlist) { + + /* We want a solution in which the current region assignments of the cells + * are preserved when possible, to avoid unneccesary particle movement. So + * create a 2d-array of counts of cells that are common to all pairs of old + * and new lists. Each element of the array has a count of cells and an + * unique index so we can sort into decreasing counts. + */ + int indmax = nregions * nregions; + struct indexval *ivs = NULL; + if ((ivs = (struct indexval *)malloc(sizeof(struct indexval) * indmax)) == + NULL) + error("Failed to allocate ivs structs"); + bzero(ivs, sizeof(struct indexval) * indmax); + + for (int k = 0; k < ncells; k++) { + int index = newlist[k] + nregions * oldlist[k]; + ivs[index].count++; + ivs[index].index = index; + ivs[index].old_val = oldlist[k]; + ivs[index].new_val = newlist[k]; + } + qsort(ivs, indmax, sizeof(struct indexval), indexvalcmp); + + /* Go through the ivs using the largest counts first, these are the + * regions with the most cells in common, old partition to new. If not + * returning the permutation, avoid the associated work. */ + int *oldmap = NULL; + int *newmap = NULL; + oldmap = permlist; /* Reuse this */ + if ((newmap = (int *)malloc(sizeof(int) * nregions)) == NULL) + error("Failed to allocate newmap array"); + + for (int k = 0; k < nregions; k++) { + oldmap[k] = -1; + newmap[k] = -1; + } + + for (int k = 0; k < indmax; k++) { + + /* Stop when all regions with common cells have been considered. */ + if (ivs[k].count == 0) break; + + /* Store old and new IDs, if not already used. */ + if (newmap[ivs[k].new_val] == -1 && oldmap[ivs[k].old_val] == -1) { + newmap[ivs[k].new_val] = ivs[k].old_val; + oldmap[ivs[k].old_val] = ivs[k].new_val; + } + } + + /* Handle any regions that did not get selected by picking an unused rank + * from oldmap and assigning to newmap. */ + int spare = 0; + for (int k = 0; k < nregions; k++) { + if (newmap[k] == -1) { + for (int j = spare; j < nregions; j++) { + if (oldmap[j] == -1) { + newmap[k] = j; + oldmap[j] = j; + spare = j; + break; + } + } + } + } + + /* Permute the newlist into this order. */ + for (int k = 0; k < ncells; k++) { + permlist[k] = newmap[newlist[k]]; + } + free(newmap); + free(ivs); +} +#endif + +#if defined(WITH_MPI) && defined(HAVE_PARMETIS) +/** + * @brief Partition the given space into a number of connected regions using + * ParMETIS. * - * Split the space using METIS to derive a partitions using the + * Split the space using PARMETIS to derive a partitions using the * given edge and vertex weights. If no weights are given then an - * unweighted partition is performed. + * unweighted partition is performed. If refine is set then an existing + * partition is assumed to be present from the last call to this routine + * in the celllist argument, that will get a refined partition, not a new + * one. * + * Assumes MPI is up and running and the number of ranks is the same as the + * number of regions. + * + * @param nodeID our nodeID. * @param s the space of cells to partition. * @param nregions the number of regions required in the partition. * @param vertexw weights for the cells, sizeof number of cells if used, @@ -315,250 +501,698 @@ static int indexvalcmp(const void *p1, const void *p2) { * of cells * 26 if used, NULL for unit weights. Need to be packed * in CSR format, so same as adjncy array. Need to be in the range of * idx_t. + * @param refine whether to refine an existing partition, or create a new one. + * @param adaptive whether to use an adaptive reparitition of an existing + * partition or simple refinement. Adaptive repartition is controlled + * by the itr parameter. + * @param itr the ratio of inter-process communication time to data + * redistribution time. Used to weight repartitioning edge cuts + * when refine and adaptive are true. * @param celllist on exit this contains the ids of the selected regions, - * sizeof number of cells. + * size of number of cells. If refine is 1, then this should contain + * the old partition on entry. */ -static void pick_metis(struct space *s, int nregions, double *vertexw, - double *edgew, int *celllist) { +static void pick_parmetis(int nodeID, struct space *s, int nregions, + double *vertexw, double *edgew, int refine, + int adaptive, float itr, int *celllist) { + + int res; + MPI_Comm comm; + MPI_Comm_dup(MPI_COMM_WORLD, &comm); /* Total number of cells. */ int ncells = s->cdim[0] * s->cdim[1] * s->cdim[2]; - /* Nothing much to do if only using a single partition. Also avoids METIS - * bug that doesn't handle this case well. */ + /* Nothing much to do if only using a single MPI rank. */ if (nregions == 1) { for (int i = 0; i < ncells; i++) celllist[i] = 0; return; } - /* Allocate weights and adjacency arrays . */ - idx_t *xadj; - if ((xadj = (idx_t *)malloc(sizeof(idx_t) * (ncells + 1))) == NULL) + /* We all get one of these with the same content. It defines the ranges of + * vertices that are found on each rank. This contiguity constraint seems to + * stop efficient local processing, since our cell distributions do not + * meet this requirement. That means the graph and related information needs + * to be all brought to one node and redistributed for processing in + * approproiate batches. */ + idx_t *vtxdist; + if ((vtxdist = (idx_t *)malloc(sizeof(idx_t) * (nregions + 1))) == NULL) + error("Failed to allocate vtxdist buffer."); + + if (nodeID == 0) { + + /* Construct vtxdist and send it to all ranks. Each rank gets an equal + * number of vertices. */ + vtxdist[0] = 0; + int k = ncells; + for (int i = 0; i < nregions; i++) { + int l = k / (nregions - i); + vtxdist[i + 1] = vtxdist[i] + l; + k -= l; + } + res = MPI_Bcast((void *)vtxdist, nregions + 1, IDX_T, 0, comm); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to broadcast vtxdist."); + + } else { + res = MPI_Bcast((void *)vtxdist, nregions + 1, IDX_T, 0, comm); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to broadcast vtxdist."); + } + + /* Number of cells on this node and space for the expected arrays. */ + int nverts = vtxdist[nodeID + 1] - vtxdist[nodeID]; + + idx_t *xadj = NULL; + if ((xadj = (idx_t *)malloc(sizeof(idx_t) * (nverts + 1))) == NULL) error("Failed to allocate xadj buffer."); - idx_t *adjncy; - if ((adjncy = (idx_t *)malloc(sizeof(idx_t) * 26 * ncells)) == NULL) + + idx_t *adjncy = NULL; + if ((adjncy = (idx_t *)malloc(sizeof(idx_t) * 26 * nverts)) == NULL) error("Failed to allocate adjncy array."); + idx_t *weights_v = NULL; if (vertexw != NULL) - if ((weights_v = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + if ((weights_v = (idx_t *)malloc(sizeof(idx_t) * nverts)) == NULL) error("Failed to allocate vertex weights array"); + idx_t *weights_e = NULL; if (edgew != NULL) - if ((weights_e = (idx_t *)malloc(26 * sizeof(idx_t) * ncells)) == NULL) + if ((weights_e = (idx_t *)malloc(26 * sizeof(idx_t) * nverts)) == NULL) error("Failed to allocate edge weights array"); - idx_t *regionid; - if ((regionid = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + + idx_t *regionid = NULL; + if ((regionid = (idx_t *)malloc(sizeof(idx_t) * (nverts + 1))) == NULL) error("Failed to allocate regionid array"); - /* Define the cell graph. */ - graph_init_metis(s, adjncy, xadj); + /* Prepare MPI requests for the asynchronous communications */ + MPI_Request *reqs; + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 5 * nregions)) == + NULL) + error("Failed to allocate MPI request list."); + for (int k = 0; k < 5 * nregions; k++) reqs[k] = MPI_REQUEST_NULL; - /* Init the vertex weights array. */ - if (vertexw != NULL) { - for (int k = 0; k < ncells; k++) { - if (vertexw[k] > 1) { - weights_v[k] = vertexw[k]; - } else { - weights_v[k] = 1; + MPI_Status *stats; + if ((stats = (MPI_Status *)malloc(sizeof(MPI_Status) * 5 * nregions)) == NULL) + error("Failed to allocate MPI status list."); + + /* Only use one rank to organize everything. */ + if (nodeID == 0) { + + /* Space for largest lists. */ + idx_t *full_xadj = NULL; + if ((full_xadj = + (idx_t *)malloc(sizeof(idx_t) * (ncells + nregions + 1))) == NULL) + error("Failed to allocate xadj buffer."); + idx_t *full_adjncy = NULL; + if ((full_adjncy = (idx_t *)malloc(sizeof(idx_t) * 26 * ncells)) == NULL) + error("Failed to allocate adjncy array."); + idx_t *full_weights_v = NULL; + if (weights_v != NULL) + if ((full_weights_v = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + error("Failed to allocate vertex weights array"); + idx_t *full_weights_e = NULL; + if (weights_e != NULL) + if ((full_weights_e = (idx_t *)malloc(26 * sizeof(idx_t) * ncells)) == + NULL) + error("Failed to allocate edge weights array"); + + idx_t *full_regionid = NULL; + if (refine) { + if ((full_regionid = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + error("Failed to allocate regionid array"); + } + + /* Define the cell graph. */ + graph_init(s, full_adjncy, NULL); + + /* xadj is set for each rank, different to serial version in that each + * rank starts with 0 */ + for (int rank = 0, j = 0; rank < nregions; rank++) { + + /* Number of vertices for this rank. */ + int nvt = vtxdist[rank + 1] - vtxdist[rank]; + + /* Start from 0, and step forward 26 edges each value. */ + full_xadj[j] = 0; + for (int k = 0; k <= nvt; k++) { + full_xadj[j + 1] = full_xadj[j] + 26; + j++; } } -#ifdef SWIFT_DEBUG_CHECKS - /* Check weights are all in range. */ - int failed = 0; - for (int k = 0; k < ncells; k++) { - if ((idx_t)vertexw[k] < 0) { - message("Input vertex weight out of range: %ld", (long)vertexw[k]); - failed++; + /* Init the vertex weights array. */ + if (vertexw != NULL) { + for (int k = 0; k < ncells; k++) { + if (vertexw[k] > 1) { + full_weights_v[k] = vertexw[k]; + } else { + full_weights_v[k] = 1; + } } - if (weights_v[k] < 1) { - message("Used vertex weight out of range: %" PRIDX, weights_v[k]); - failed++; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check weights are all in range. */ + int failed = 0; + for (int k = 0; k < ncells; k++) { + if ((idx_t)vertexw[k] < 0) { + message("Input vertex weight out of range: %ld", (long)vertexw[k]); + failed++; + } + if (full_weights_v[k] < 1) { + message("Used vertex weight out of range: %" PRIDX, + full_weights_v[k]); + failed++; + } } + if (failed > 0) error("%d vertex weights are out of range", failed); +#endif } - if (failed > 0) error("%d vertex weights are out of range", failed); + + /* Init the edges weights array. */ + if (edgew != NULL) { + for (int k = 0; k < 26 * ncells; k++) { + if (edgew[k] > 1) { + full_weights_e[k] = edgew[k]; + } else { + full_weights_e[k] = 1; + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check weights are all in range. */ + int failed = 0; + for (int k = 0; k < 26 * ncells; k++) { + + if ((idx_t)edgew[k] < 0) { + message("Input edge weight out of range: %ld", (long)edgew[k]); + failed++; + } + if (full_weights_e[k] < 1) { + message("Used edge weight out of range: %" PRIDX, full_weights_e[k]); + failed++; + } + } + if (failed > 0) error("%d edge weights are out of range", failed); #endif - } + } + + /* Dump graphs to disk files for testing. ParMETIS xadj isn't right for + * a dump, so make a serial-like version. */ + /*{ + idx_t *tmp_xadj = + (idx_t *)malloc(sizeof(idx_t) * (ncells + nregions + 1)); + tmp_xadj[0] = 0; + for (int k = 0; k < ncells; k++) tmp_xadj[k + 1] = tmp_xadj[k] + 26; + dumpMETISGraph("parmetis_graph", ncells, 1, tmp_xadj, full_adjncy, + full_weights_v, NULL, full_weights_e); + free(tmp_xadj); + }*/ + + /* Send ranges to the other ranks and keep our own. */ + for (int rank = 0, j1 = 0, j2 = 0, j3 = 0; rank < nregions; rank++) { + int nvt = vtxdist[rank + 1] - vtxdist[rank]; + + if (refine) + for (int i = 0; i < nvt; i++) full_regionid[j3 + i] = celllist[j3 + i]; + + if (rank == 0) { + memcpy(xadj, &full_xadj[j1], sizeof(idx_t) * (nvt + 1)); + memcpy(adjncy, &full_adjncy[j2], sizeof(idx_t) * nvt * 26); + if (weights_e != NULL) + memcpy(weights_e, &full_weights_e[j2], sizeof(idx_t) * nvt * 26); + if (weights_v != NULL) + memcpy(weights_v, &full_weights_v[j3], sizeof(idx_t) * nvt); + if (refine) memcpy(regionid, full_regionid, sizeof(idx_t) * nvt); - /* Init the edges weights array. */ - if (edgew != NULL) { - for (int k = 0; k < 26 * ncells; k++) { - if (edgew[k] > 1) { - weights_e[k] = edgew[k]; } else { - weights_e[k] = 1; + res = MPI_Isend(&full_xadj[j1], nvt + 1, IDX_T, rank, 0, comm, + &reqs[5 * rank + 0]); + if (res == MPI_SUCCESS) + res = MPI_Isend(&full_adjncy[j2], nvt * 26, IDX_T, rank, 1, comm, + &reqs[5 * rank + 1]); + if (res == MPI_SUCCESS && weights_e != NULL) + res = MPI_Isend(&full_weights_e[j2], nvt * 26, IDX_T, rank, 2, comm, + &reqs[5 * rank + 2]); + if (res == MPI_SUCCESS && weights_v != NULL) + res = MPI_Isend(&full_weights_v[j3], nvt, IDX_T, rank, 3, comm, + &reqs[5 * rank + 3]); + if (refine && res == MPI_SUCCESS) + res = MPI_Isend(&full_regionid[j3], nvt, IDX_T, rank, 4, comm, + &reqs[5 * rank + 4]); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to send graph data"); } + j1 += nvt + 1; + j2 += nvt * 26; + j3 += nvt; } -#ifdef SWIFT_DEBUG_CHECKS - /* Check weights are all in range. */ - int failed = 0; - for (int k = 0; k < 26 * ncells; k++) { - - if ((idx_t)edgew[k] < 0) { - message("Input edge weight out of range: %ld", (long)edgew[k]); - failed++; + /* Wait for all sends to complete. */ + int result; + if ((result = MPI_Waitall(5 * nregions, reqs, stats)) != MPI_SUCCESS) { + for (int k = 0; k < 5 * nregions; k++) { + char buff[MPI_MAX_ERROR_STRING]; + MPI_Error_string(stats[k].MPI_ERROR, buff, &result); + message("send request from source %i, tag %i has error '%s'.", + stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff); } - if (weights_e[k] < 1) { - message("Used edge weight out of range: %" PRIDX, weights_e[k]); - failed++; + error("Failed during waitall sending repartition data."); + } + + /* Clean up. */ + if (weights_v != NULL) free(full_weights_v); + if (weights_e != NULL) free(full_weights_e); + free(full_xadj); + free(full_adjncy); + if (refine) free(full_regionid); + + } else { + + /* Receive stuff from rank 0. */ + res = MPI_Irecv(xadj, nverts + 1, IDX_T, 0, 0, comm, &reqs[0]); + if (res == MPI_SUCCESS) + res = MPI_Irecv(adjncy, nverts * 26, IDX_T, 0, 1, comm, &reqs[1]); + if (res == MPI_SUCCESS && weights_e != NULL) + res = MPI_Irecv(weights_e, nverts * 26, IDX_T, 0, 2, comm, &reqs[2]); + if (res == MPI_SUCCESS && weights_v != NULL) + res = MPI_Irecv(weights_v, nverts, IDX_T, 0, 3, comm, &reqs[3]); + if (refine && res == MPI_SUCCESS) + res += MPI_Irecv((void *)regionid, nverts, IDX_T, 0, 4, comm, &reqs[4]); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to receive graph data"); + + /* Wait for all recvs to complete. */ + int result; + if ((result = MPI_Waitall(5, reqs, stats)) != MPI_SUCCESS) { + for (int k = 0; k < 5; k++) { + char buff[MPI_MAX_ERROR_STRING]; + MPI_Error_string(stats[k].MPI_ERROR, buff, &result); + message("recv request from source %i, tag %i has error '%s'.", + stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff); } + error("Failed during waitall receiving repartition data."); } - if (failed > 0) error("%d edge weights are out of range", failed); -#endif } - /* Set the METIS options. */ - idx_t options[METIS_NOPTIONS]; - METIS_SetDefaultOptions(options); - options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT; - options[METIS_OPTION_NUMBERING] = 0; - options[METIS_OPTION_CONTIG] = 1; - options[METIS_OPTION_NCUTS] = 10; - options[METIS_OPTION_NITER] = 20; - - /* Call METIS. */ - idx_t one = 1; - idx_t idx_ncells = ncells; - idx_t idx_nregions = nregions; - idx_t objval; - - /* Dump graph in METIS format */ - /*dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, - * weights_v, NULL, weights_e); - */ - if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, NULL, - weights_e, &idx_nregions, NULL, NULL, options, - &objval, regionid) != METIS_OK) - error("Call to METIS_PartGraphKway failed."); - - /* Check that the regionids are ok. */ - for (int k = 0; k < ncells; k++) - if (regionid[k] < 0 || regionid[k] >= nregions) - error("Got bad nodeID %" PRIDX " for cell %i.", regionid[k], k); - - /* We want a solution in which the current regions of the space are - * preserved when possible, to avoid unneccesary particle movement. - * So create a 2d-array of cells counts that are common to all pairs - * of old and new ranks. Each element of the array has a cell count and - * an unique index so we can sort into decreasing counts. */ - int indmax = nregions * nregions; - struct indexval *ivs = - (struct indexval *)malloc(sizeof(struct indexval) * indmax); - bzero(ivs, sizeof(struct indexval) * indmax); - for (int k = 0; k < ncells; k++) { - int index = regionid[k] + nregions * s->cells_top[k].nodeID; - ivs[index].count++; - ivs[index].index = index; - } - qsort(ivs, indmax, sizeof(struct indexval), indexvalcmp); + /* Set up the tpwgts array. This is just 1/nregions. */ + real_t *tpwgts; + if ((tpwgts = (real_t *)malloc(sizeof(real_t) * nregions)) == NULL) + error("Failed to allocate tpwgts array"); + for (int i = 0; i < nregions; i++) tpwgts[i] = 1.0 / (real_t)nregions; + + /* Common parameters. */ + idx_t options[4]; + options[0] = 1; + options[1] = 0; + + idx_t edgecut; + idx_t ncon = 1; + idx_t nparts = nregions; + idx_t numflag = 0; + idx_t wgtflag = 0; + if (edgew != NULL) wgtflag += 1; + if (vertexw != NULL) wgtflag += 2; + + real_t ubvec[1]; + ubvec[0] = 1.001; + + if (refine) { + /* Refine an existing partition, uncouple as we do not have the cells + * present on their expected ranks. */ + options[3] = PARMETIS_PSR_UNCOUPLED; + + /* Seed for randoms. */ + options[2] = clocks_random_seed(); + + /* Choice is whether to use an adaptive repartition or a simple + * refinement. */ + if (adaptive) { + + /* Balance between cuts and movement. */ + real_t itr_real_t = itr; + if (ParMETIS_V3_AdaptiveRepart( + vtxdist, xadj, adjncy, weights_v, NULL, weights_e, &wgtflag, + &numflag, &ncon, &nparts, tpwgts, ubvec, &itr_real_t, options, + &edgecut, regionid, &comm) != METIS_OK) + error("Call to ParMETIS_V3_AdaptiveRepart failed."); + } else { + if (ParMETIS_V3_RefineKway(vtxdist, xadj, adjncy, weights_v, weights_e, + &wgtflag, &numflag, &ncon, &nparts, tpwgts, + ubvec, options, &edgecut, regionid, + &comm) != METIS_OK) + error("Call to ParMETIS_V3_RefineKway failed."); + } + } else { - /* Go through the ivs using the largest counts first, these are the - * regions with the most cells in common, old partition to new. */ - int *oldmap = (int *)malloc(sizeof(int) * nregions); - int *newmap = (int *)malloc(sizeof(int) * nregions); - for (int k = 0; k < nregions; k++) { - oldmap[k] = -1; - newmap[k] = -1; + /* Create a new partition. Use a number of guesses as that is similar to + * the way that serial METIS works (serial METIS usually gives the best + * quality partitions). */ + idx_t best_edgecut = 0; + idx_t *best_regionid = NULL; + if ((best_regionid = (idx_t *)malloc(sizeof(idx_t) * (nverts + 1))) == NULL) + error("Failed to allocate best_regionid array"); + + for (int i = 0; i < 10; i++) { + options[2] = clocks_random_seed(); + + if (ParMETIS_V3_PartKway(vtxdist, xadj, adjncy, weights_v, weights_e, + &wgtflag, &numflag, &ncon, &nparts, tpwgts, + ubvec, options, &edgecut, regionid, + &comm) != METIS_OK) + error("Call to ParMETIS_V3_PartKway failed."); + + if (i == 0 || (best_edgecut > edgecut)) { + best_edgecut = edgecut; + memcpy(best_regionid, regionid, sizeof(idx_t) * (nverts + 1)); + } + } + + /* Keep the best edgecut. */ + memcpy(regionid, best_regionid, sizeof(idx_t) * (nverts + 1)); + free(best_regionid); } - for (int k = 0; k < indmax; k++) { - /* Stop when all regions with common cells have been considered. */ - if (ivs[k].count == 0) break; + /* Need to gather all the regionid arrays from the ranks. */ + for (int k = 0; k < nregions; k++) reqs[k] = MPI_REQUEST_NULL; - /* Store old and new IDs, if not already used. */ - int oldregion = ivs[k].index / nregions; - int newregion = ivs[k].index - oldregion * nregions; - if (newmap[newregion] == -1 && oldmap[oldregion] == -1) { - newmap[newregion] = oldregion; - oldmap[oldregion] = newregion; + if (nodeID != 0) { + + /* Send our regions to node 0. */ + res = MPI_Isend(regionid, vtxdist[nodeID + 1] - vtxdist[nodeID], IDX_T, 0, + 1, comm, &reqs[0]); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to send new regionids"); + + /* Wait for send to complete. */ + int err; + if ((err = MPI_Wait(reqs, stats)) != MPI_SUCCESS) { + mpi_error(err, "Failed during wait sending regionids."); } - } - /* Handle any regions that did not get selected by picking an unused rank - * from oldmap and assigning to newmap. */ - int spare = 0; - for (int k = 0; k < nregions; k++) { - if (newmap[k] == -1) { - for (int j = spare; j < nregions; j++) { - if (oldmap[j] == -1) { - newmap[k] = j; - oldmap[j] = j; - spare = j; - break; - } + } else { + + /* Node 0 */ + idx_t *remoteids = NULL; + if ((remoteids = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + error("Failed to allocate remoteids buffer"); + + int nvt = vtxdist[1] - vtxdist[0]; + memcpy(remoteids, regionid, sizeof(idx_t) * nvt); + + /* Receive from other ranks. */ + for (int rank = 1, j = nvt; rank < nregions; rank++) { + nvt = vtxdist[rank + 1] - vtxdist[rank]; + res = MPI_Irecv((void *)&remoteids[j], nvt, IDX_T, rank, 1, comm, + &reqs[rank]); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to receive new regionids"); + j += nvt; + } + + int err; + if ((err = MPI_Waitall(nregions, reqs, stats)) != MPI_SUCCESS) { + for (int k = 0; k < 5; k++) { + char buff[MPI_MAX_ERROR_STRING]; + MPI_Error_string(stats[k].MPI_ERROR, buff, &err); + message("recv request from source %i, tag %i has error '%s'.", + stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff); } + error("Failed during waitall receiving regionid data."); } - } - /* Set the cell list to the region index. */ - for (int k = 0; k < ncells; k++) { - celllist[k] = newmap[regionid[k]]; + /* Copy: idx_t -> int. */ + int *newcelllist = NULL; + if ((newcelllist = (int *)malloc(sizeof(int) * ncells)) == NULL) + error("Failed to allocate new celllist"); + for (int k = 0; k < ncells; k++) newcelllist[k] = remoteids[k]; + free(remoteids); + + /* Check that the region ids are all good. */ + int bad = 0; + for (int k = 0; k < ncells; k++) { + if (newcelllist[k] < 0 || newcelllist[k] >= nregions) { + message("Got bad nodeID %" PRIDX " for cell %i.", newcelllist[k], k); + bad++; + } + } + if (bad) error("Bad node IDs located"); + + /* Now check the similarity to the old partition and permute if necessary. + * Checks show that refinement can return a permutation of the partition, + * we need to check that and correct as necessary. */ + int permute = 1; + if (!refine) { + + /* No old partition was given, so we need to construct the existing + * partition from the cells, if one existed. */ + int nsum = 0; + for (int i = 0; i < s->nr_cells; i++) { + celllist[i] = s->cells_top[i].nodeID; + nsum += celllist[i]; + } + + /* If no previous partition then all nodeIDs will be set to 0. */ + if (nsum == 0) permute = 0; + } + + if (permute) { + int *permcelllist = NULL; + if ((permcelllist = (int *)malloc(sizeof(int) * ncells)) == NULL) + error("Failed to allocate perm celllist array"); + permute_regions(newcelllist, celllist, nregions, ncells, permcelllist); + + /* And keep. */ + memcpy(celllist, permcelllist, sizeof(int) * ncells); + free(permcelllist); + + } else { + memcpy(celllist, newcelllist, sizeof(int) * ncells); + } + free(newcelllist); } + /* And everyone gets a copy. */ + res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to broadcast new celllist"); + /* Clean up. */ + free(reqs); + free(stats); if (weights_v != NULL) free(weights_v); if (weights_e != NULL) free(weights_e); - free(ivs); - free(oldmap); - free(newmap); + free(vtxdist); + free(tpwgts); free(xadj); free(adjncy); free(regionid); } #endif -#if defined(WITH_MPI) && defined(HAVE_METIS) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) /** - * @brief Repartition the cells amongst the nodes using task costs - * as edge weights and vertex weights also from task costs - * or particle cells counts. + * @brief Partition the given space into a number of connected regions. * - * @param partweights whether particle counts will be used as vertex weights. - * @param bothweights whether vertex and edge weights will be used, otherwise - * only edge weights will be used. - * @param timebins use timebins as edge weights. - * @param nodeID our nodeID. - * @param nr_nodes the number of nodes. - * @param s the space of cells holding our local particles. - * @param tasks the completed tasks from the last engine step for our node. - * @param nr_tasks the number of tasks. + * Split the space using METIS to derive a partitions using the given edge and + * vertex weights. If no weights are given then an unweighted partition is + * performed. + * + * @param nodeID the rank of our node. + * @param s the space of cells to partition. + * @param nregions the number of regions required in the partition. + * @param vertexw weights for the cells, sizeof number of cells if used, + * NULL for unit weights. Need to be in the range of idx_t. + * @param edgew weights for the graph edges between all cells, sizeof number + * of cells * 26 if used, NULL for unit weights. Need to be packed + * in CSR format, so same as adjncy array. Need to be in the range of + * idx_t. + * @param celllist on exit this contains the ids of the selected regions, + * sizeof number of cells. */ -static void repart_edge_metis(int partweights, int bothweights, int timebins, - int nodeID, int nr_nodes, struct space *s, - struct task *tasks, int nr_tasks) { +static void pick_metis(int nodeID, struct space *s, int nregions, + double *vertexw, double *edgew, int *celllist) { - /* Create weight arrays using task ticks for vertices and edges (edges - * assume the same graph structure as used in the part_ calls). */ - int nr_cells = s->nr_cells; - struct cell *cells = s->cells_top; + /* Total number of cells. */ + int ncells = s->cdim[0] * s->cdim[1] * s->cdim[2]; - /* Allocate and fill the adjncy indexing array defining the graph of - * cells. */ - idx_t *inds; - if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 * nr_cells)) == NULL) - error("Failed to allocate the inds array"); - graph_init_metis(s, inds, NULL); + /* Nothing much to do if only using a single partition. Also avoids METIS + * bug that doesn't handle this case well. */ + if (nregions == 1) { + for (int i = 0; i < ncells; i++) celllist[i] = 0; + return; + } - /* Allocate and init weights. */ - double *weights_v = NULL; - double *weights_e = NULL; - if (bothweights) { - if ((weights_v = (double *)malloc(sizeof(double) * nr_cells)) == NULL) - error("Failed to allocate vertex weights arrays."); - bzero(weights_v, sizeof(double) * nr_cells); + /* Only one node needs to calculate this. */ + if (nodeID == 0) { + + /* Allocate weights and adjacency arrays . */ + idx_t *xadj; + if ((xadj = (idx_t *)malloc(sizeof(idx_t) * (ncells + 1))) == NULL) + error("Failed to allocate xadj buffer."); + idx_t *adjncy; + if ((adjncy = (idx_t *)malloc(sizeof(idx_t) * 26 * ncells)) == NULL) + error("Failed to allocate adjncy array."); + idx_t *weights_v = NULL; + if (vertexw != NULL) + if ((weights_v = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + error("Failed to allocate vertex weights array"); + idx_t *weights_e = NULL; + if (edgew != NULL) + if ((weights_e = (idx_t *)malloc(26 * sizeof(idx_t) * ncells)) == NULL) + error("Failed to allocate edge weights array"); + idx_t *regionid; + if ((regionid = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + error("Failed to allocate regionid array"); + + /* Define the cell graph. */ + graph_init(s, adjncy, xadj); + + /* Init the vertex weights array. */ + if (vertexw != NULL) { + for (int k = 0; k < ncells; k++) { + if (vertexw[k] > 1) { + weights_v[k] = vertexw[k]; + } else { + weights_v[k] = 1; + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check weights are all in range. */ + int failed = 0; + for (int k = 0; k < ncells; k++) { + if ((idx_t)vertexw[k] < 0) { + message("Input vertex weight out of range: %ld", (long)vertexw[k]); + failed++; + } + if (weights_v[k] < 1) { + message("Used vertex weight out of range: %" PRIDX, weights_v[k]); + failed++; + } + } + if (failed > 0) error("%d vertex weights are out of range", failed); +#endif + } + + /* Init the edges weights array. */ + + if (edgew != NULL) { + for (int k = 0; k < 26 * ncells; k++) { + if (edgew[k] > 1) { + weights_e[k] = edgew[k]; + } else { + weights_e[k] = 1; + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check weights are all in range. */ + int failed = 0; + for (int k = 0; k < 26 * ncells; k++) { + + if ((idx_t)edgew[k] < 0) { + message("Input edge weight out of range: %ld", (long)edgew[k]); + failed++; + } + if (weights_e[k] < 1) { + message("Used edge weight out of range: %" PRIDX, weights_e[k]); + failed++; + } + } + if (failed > 0) error("%d edge weights are out of range", failed); +#endif + } + + /* Set the METIS options. */ + idx_t options[METIS_NOPTIONS]; + METIS_SetDefaultOptions(options); + options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT; + options[METIS_OPTION_NUMBERING] = 0; + options[METIS_OPTION_CONTIG] = 1; + options[METIS_OPTION_NCUTS] = 10; + options[METIS_OPTION_NITER] = 20; + + /* Call METIS. */ + idx_t one = 1; + idx_t idx_ncells = ncells; + idx_t idx_nregions = nregions; + idx_t objval; + + /* Dump graph in METIS format */ + /*dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, weights_v, + NULL, weights_e);*/ + + if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, NULL, + weights_e, &idx_nregions, NULL, NULL, options, + &objval, regionid) != METIS_OK) + error("Call to METIS_PartGraphKway failed."); + + /* Check that the regionids are ok. */ + for (int k = 0; k < ncells; k++) { + if (regionid[k] < 0 || regionid[k] >= nregions) + error("Got bad nodeID %" PRIDX " for cell %i.", regionid[k], k); + + /* And keep. */ + celllist[k] = regionid[k]; + } + + /* Clean up. */ + if (weights_v != NULL) free(weights_v); + if (weights_e != NULL) free(weights_e); + free(xadj); + free(adjncy); + free(regionid); } - if ((weights_e = (double *)malloc(sizeof(double) * 26 * nr_cells)) == NULL) - error("Failed to allocate edge weights arrays."); - bzero(weights_e, sizeof(double) * 26 * nr_cells); - /* Generate task weights for vertices. */ - int taskvweights = (bothweights && !partweights); + /* Calculations all done, now everyone gets a copy. */ + int res = MPI_Bcast(celllist, ncells, MPI_INT, 0, MPI_COMM_WORLD); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to broadcast new celllist"); +} +#endif + +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) + +/* Helper struct for partition_gather weights. */ +struct weights_mapper_data { + double *weights_e; + double *weights_v; + idx_t *inds; + int eweights; + int nodeID; + int timebins; + int vweights; + int nr_cells; + struct cell *cells; +}; + +#ifdef SWIFT_DEBUG_CHECKS +static void check_weights(struct task *tasks, int nr_tasks, + struct weights_mapper_data *weights_data, + double *weights_v, double *weights_e); +#endif + +/** + * @brief Threadpool mapper function to gather cell edge and vertex weights + * from the associated tasks. + * + * @param map_data part of the data to process in this mapper. + * @param num_elements the number of data elements to process. + * @param extra_data additional data for the mapper context. + */ +void partition_gather_weights(void *map_data, int num_elements, + void *extra_data) { + + struct task *tasks = (struct task *)map_data; + struct weights_mapper_data *mydata = (struct weights_mapper_data *)extra_data; + + double *weights_e = mydata->weights_e; + double *weights_v = mydata->weights_v; + idx_t *inds = mydata->inds; + int eweights = mydata->eweights; + int nodeID = mydata->nodeID; + int nr_cells = mydata->nr_cells; + int timebins = mydata->timebins; + int vweights = mydata->vweights; + + struct cell *cells = mydata->cells; /* Loop over the tasks... */ - for (int j = 0; j < nr_tasks; j++) { - /* Get a pointer to the kth task. */ - struct task *t = &tasks[j]; + for (int i = 0; i < num_elements; i++) { + struct task *t = &tasks[i]; /* Skip un-interesting tasks. */ if (t->cost == 0.f) continue; @@ -589,7 +1223,7 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, t->type == task_type_grav_long_range) { /* Particle updates add only to vertex weight. */ - if (taskvweights) weights_v[cid] += w; + if (vweights) atomic_add_d(&weights_v[cid], w); } /* Self interaction? */ @@ -597,7 +1231,7 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, (t->type == task_type_sub_self && cj == NULL && ci->nodeID == nodeID)) { /* Self interactions add only to vertex weight. */ - if (taskvweights) weights_v[cid] += w; + if (vweights) atomic_add_d(&weights_v[cid], w); } @@ -606,7 +1240,7 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, /* In-cell pair? */ if (ci == cj) { /* Add weight to vertex for ci. */ - if (taskvweights) weights_v[cid] += w; + if (vweights) atomic_add_d(&weights_v[cid], w); } @@ -616,236 +1250,260 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, int cjd = cj - cells; /* Local cells add weight to vertices. */ - if (taskvweights && ci->nodeID == nodeID) { - weights_v[cid] += 0.5 * w; - if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w; + if (vweights && ci->nodeID == nodeID) { + atomic_add_d(&weights_v[cid], 0.5 * w); + if (cj->nodeID == nodeID) atomic_add_d(&weights_v[cjd], 0.5 * w); } - /* Find indices of ci/cj neighbours. Note with gravity these cells may - * not be neighbours, in that case we ignore any edge weight for that - * pair. */ - int ik = -1; - for (int k = 26 * cid; k < 26 * nr_cells; k++) { - if (inds[k] == cjd) { - ik = k; - break; + if (eweights) { + + /* Find indices of ci/cj neighbours. Note with gravity these cells may + * not be neighbours, in that case we ignore any edge weight for that + * pair. */ + int ik = -1; + for (int k = 26 * cid; k < 26 * nr_cells; k++) { + if (inds[k] == cjd) { + ik = k; + break; + } } - } - /* cj */ - int jk = -1; - for (int k = 26 * cjd; k < 26 * nr_cells; k++) { - if (inds[k] == cid) { - jk = k; - break; + /* cj */ + int jk = -1; + for (int k = 26 * cjd; k < 26 * nr_cells; k++) { + if (inds[k] == cid) { + jk = k; + break; + } } - } - if (ik != -1 && jk != -1) { - - if (timebins) { - /* Add weights to edge for all cells based on the expected - * interaction time (calculated as the time to the last expected - * time) as we want to avoid having active cells on the edges, so - * we cut for that. Note that weight is added to the local and - * remote cells, as we want to keep both away from any cuts, this - * can overflow int, so take care. */ - int dti = num_time_bins - get_time_bin(ci->ti_hydro_end_min); - int dtj = num_time_bins - get_time_bin(cj->ti_hydro_end_min); - double dt = (double)(1 << dti) + (double)(1 << dtj); - weights_e[ik] += dt; - weights_e[jk] += dt; - - } else { - - /* Add weights from task costs to the edge. */ - weights_e[ik] += w; - weights_e[jk] += w; + if (ik != -1 && jk != -1) { + + if (timebins) { + /* Add weights to edge for all cells based on the expected + * interaction time (calculated as the time to the last expected + * time) as we want to avoid having active cells on the edges, so + * we cut for that. Note that weight is added to the local and + * remote cells, as we want to keep both away from any cuts, this + * can overflow int, so take care. */ + int dti = num_time_bins - get_time_bin(ci->hydro.ti_end_min); + int dtj = num_time_bins - get_time_bin(cj->hydro.ti_end_min); + double dt = (double)(1 << dti) + (double)(1 << dtj); + atomic_add_d(&weights_e[ik], dt); + atomic_add_d(&weights_e[jk], dt); + + } else { + + /* Add weights from task costs to the edge. */ + atomic_add_d(&weights_e[ik], w); + atomic_add_d(&weights_e[jk], w); + } } } } } } +} + +/** + * @brief Repartition the cells amongst the nodes using weights of + * various kinds. + * + * @param vweights whether vertex weights will be used. + * @param eweights whether weights will be used. + * @param timebins use timebins as the edge weights. + * @param repartition the partition struct of the local engine. + * @param nodeID our nodeID. + * @param nr_nodes the number of nodes. + * @param s the space of cells holding our local particles. + * @param tasks the completed tasks from the last engine step for our node. + * @param nr_tasks the number of tasks. + */ +static void repart_edge_metis(int vweights, int eweights, int timebins, + struct repartition *repartition, int nodeID, + int nr_nodes, struct space *s, struct task *tasks, + int nr_tasks) { + + /* Create weight arrays using task ticks for vertices and edges (edges + * assume the same graph structure as used in the part_ calls). */ + int nr_cells = s->nr_cells; + struct cell *cells = s->cells_top; - /* Re-calculate the vertices if using particle counts. */ - if (partweights && bothweights) accumulate_counts(s, weights_v); + /* Allocate and fill the adjncy indexing array defining the graph of + * cells. */ + idx_t *inds; + if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 * nr_cells)) == NULL) + error("Failed to allocate the inds array"); + graph_init(s, inds, NULL); + + /* Allocate and init weights. */ + double *weights_v = NULL; + double *weights_e = NULL; + if (vweights) { + if ((weights_v = (double *)malloc(sizeof(double) * nr_cells)) == NULL) + error("Failed to allocate vertex weights arrays."); + bzero(weights_v, sizeof(double) * nr_cells); + } + if (eweights) { + if ((weights_e = (double *)malloc(sizeof(double) * 26 * nr_cells)) == NULL) + error("Failed to allocate edge weights arrays."); + bzero(weights_e, sizeof(double) * 26 * nr_cells); + } + + /* Gather weights. */ + struct weights_mapper_data weights_data; + + weights_data.cells = cells; + weights_data.eweights = eweights; + weights_data.inds = inds; + weights_data.nodeID = nodeID; + weights_data.nr_cells = nr_cells; + weights_data.timebins = timebins; + weights_data.vweights = vweights; + weights_data.weights_e = weights_e; + weights_data.weights_v = weights_v; + + ticks tic = getticks(); + + threadpool_map(&s->e->threadpool, partition_gather_weights, tasks, nr_tasks, + sizeof(struct task), 0, &weights_data); + if (s->e->verbose) + message("weight mapper took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + +#ifdef SWIFT_DEBUG_CHECKS + check_weights(tasks, nr_tasks, &weights_data, weights_v, weights_e); +#endif /* Merge the weights arrays across all nodes. */ int res; - if (bothweights) { - if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v, - nr_cells, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD)) != - MPI_SUCCESS) + if (vweights) { + res = MPI_Allreduce(MPI_IN_PLACE, weights_v, nr_cells, MPI_DOUBLE, MPI_SUM, + MPI_COMM_WORLD); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to allreduce vertex weights."); } - if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e, - 26 * nr_cells, MPI_DOUBLE, MPI_SUM, 0, - MPI_COMM_WORLD)) != MPI_SUCCESS) - mpi_error(res, "Failed to allreduce edge weights."); - - /* Allocate cell list for the partition. */ - int *celllist = (int *)malloc(sizeof(int) * s->nr_cells); - if (celllist == NULL) error("Failed to allocate celllist"); - - /* As of here, only one node needs to compute the partition. */ - if (nodeID == 0) { + if (eweights) { + res = MPI_Allreduce(MPI_IN_PLACE, weights_e, 26 * nr_cells, MPI_DOUBLE, + MPI_SUM, MPI_COMM_WORLD); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to allreduce edge weights."); + } - /* We need to rescale the weights into the range of an integer for METIS - * (really range of idx_t). Also we would like the range of vertex and - * edges weights to be similar so they balance. */ - double wminv = 0.0; - double wmaxv = 0.0; - if (bothweights) { - wminv = weights_v[0]; - wmaxv = weights_v[0]; - for (int k = 0; k < nr_cells; k++) { - wmaxv = weights_v[k] > wmaxv ? weights_v[k] : wmaxv; - wminv = weights_v[k] < wminv ? weights_v[k] : wminv; - } - } + /* Allocate cell list for the partition. If not already done. */ +#ifdef HAVE_PARMETIS + int refine = 1; +#endif + if (repartition->ncelllist != nr_cells) { +#ifdef HAVE_PARMETIS + refine = 0; +#endif + free(repartition->celllist); + repartition->ncelllist = 0; + if ((repartition->celllist = (int *)malloc(sizeof(int) * nr_cells)) == NULL) + error("Failed to allocate celllist"); + repartition->ncelllist = nr_cells; + } - double wmine = weights_e[0]; - double wmaxe = weights_e[0]; - for (int k = 0; k < 26 * nr_cells; k++) { - wmaxe = weights_e[k] > wmaxe ? weights_e[k] : wmaxe; - wmine = weights_e[k] < wmine ? weights_e[k] : wmine; + /* We need to rescale the sum of the weights so that the sums of the two + * types of weights are less than IDX_MAX, that is the range of idx_t. Also + * we would like to balance edges and vertices when the edge weights are + * timebins, as these have no reason to have equivalent scales, so we use an + * equipartition. */ + double vsum = 0.0; + if (vweights) + for (int k = 0; k < nr_cells; k++) vsum += weights_v[k]; + double esum = 0.0; + if (eweights) + for (int k = 0; k < 26 * nr_cells; k++) esum += weights_e[k]; + + double vscale = 1.0; + double escale = 1.0; + if (timebins && eweights) { + /* Make sums the same. */ + if (vsum > esum) { + escale = vsum / esum; + esum = vsum; + } else { + vscale = esum / vsum; + vsum = esum; } + } - if (bothweights) { + /* Now make sure sum of weights are in the range of idx_t. */ + if (vweights) { + if (vsum > (double)IDX_MAX) { + vscale = (double)(IDX_MAX - 1000) / vsum; - /* Make range the same in both weights systems. */ - if ((wmaxv - wminv) > (wmaxe - wmine)) { - double wscale = 1.0; - if ((wmaxe - wmine) > 0.0) { - wscale = (wmaxv - wminv) / (wmaxe - wmine); - } + if (!timebins && eweights) { + /* Keep edge weights in proportion. */ + esum = 0.0; for (int k = 0; k < 26 * nr_cells; k++) { - weights_e[k] = (weights_e[k] - wmine) * wscale + wminv; - } - wmine = wminv; - wmaxe = wmaxv; - - } else { - double wscale = 1.0; - if ((wmaxv - wminv) > 0.0) { - wscale = (wmaxe - wmine) / (wmaxv - wminv); + weights_e[k] *= vscale; + esum += weights_e[k]; } - for (int k = 0; k < nr_cells; k++) { - weights_v[k] = (weights_v[k] - wminv) * wscale + wmine; - } - wminv = wmine; - wmaxv = wmaxe; - } - - /* Scale to the METIS range. */ - double wscale = 1.0; - if ((wmaxv - wminv) > 0.0) { - wscale = (metis_maxweight - 1.0) / (wmaxv - wminv); - } - for (int k = 0; k < nr_cells; k++) { - weights_v[k] = (weights_v[k] - wminv) * wscale + 1.0; } } + if (vscale != 1.0) + for (int k = 0; k < nr_cells; k++) weights_v[k] *= vscale; + } - /* Scale to the METIS range. */ - double wscale = 1.0; - if ((wmaxe - wmine) > 0.0) { - wscale = (metis_maxweight - 1.0) / (wmaxe - wmine); - } - for (int k = 0; k < 26 * nr_cells; k++) { - weights_e[k] = (weights_e[k] - wmine) * wscale + 1.0; - } + if (eweights) { + if (esum > (double)IDX_MAX) escale = (double)(IDX_MAX - 1000) / esum; + if (escale != 1.0) + for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= escale; + } - /* And partition, use both weights or not as requested. */ - if (bothweights) - pick_metis(s, nr_nodes, weights_v, weights_e, celllist); - else - pick_metis(s, nr_nodes, NULL, weights_e, celllist); + /* And repartition/ partition, using both weights or not as requested. */ +#ifdef HAVE_PARMETIS + if (repartition->usemetis) { + pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, + repartition->celllist); + } else { + pick_parmetis(nodeID, s, nr_nodes, weights_v, weights_e, refine, + repartition->adaptive, repartition->itr, + repartition->celllist); + } +#else + pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, repartition->celllist); +#endif - /* Check that all cells have good values. */ + /* Check that all cells have good values. All nodes have same copy, so just + * check on one. */ + if (nodeID == 0) { for (int k = 0; k < nr_cells; k++) - if (celllist[k] < 0 || celllist[k] >= nr_nodes) - error("Got bad nodeID %d for cell %i.", celllist[k], k); - - /* Check that the partition is complete and all nodes have some work. */ - int present[nr_nodes]; - int failed = 0; - for (int i = 0; i < nr_nodes; i++) present[i] = 0; - for (int i = 0; i < nr_cells; i++) present[celllist[i]]++; - for (int i = 0; i < nr_nodes; i++) { - if (!present[i]) { - failed = 1; - message("Node %d is not present after repartition", i); - } - } + if (repartition->celllist[k] < 0 || repartition->celllist[k] >= nr_nodes) + error("Got bad nodeID %d for cell %i.", repartition->celllist[k], k); + } - /* If partition failed continue with the current one, but make this - * clear. */ - if (failed) { - message( - "WARNING: METIS repartition has failed, continuing with " - "the current partition, load balance will not be optimal"); - for (int k = 0; k < nr_cells; k++) celllist[k] = cells[k].nodeID; + /* Check that the partition is complete and all nodes have some work. */ + int present[nr_nodes]; + int failed = 0; + for (int i = 0; i < nr_nodes; i++) present[i] = 0; + for (int i = 0; i < nr_cells; i++) present[repartition->celllist[i]]++; + for (int i = 0; i < nr_nodes; i++) { + if (!present[i]) { + failed = 1; + if (nodeID == 0) message("Node %d is not present after repartition", i); } } - /* Distribute the celllist partition and apply. */ - if ((res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD)) != - MPI_SUCCESS) - mpi_error(res, "Failed to bcast the cell list"); + /* If partition failed continue with the current one, but make this clear. */ + if (failed) { + if (nodeID == 0) + message( + "WARNING: repartition has failed, continuing with the current" + " partition, load balance will not be optimal"); + for (int k = 0; k < nr_cells; k++) + repartition->celllist[k] = cells[k].nodeID; + } /* And apply to our cells */ - split_metis(s, nr_nodes, celllist); + split_metis(s, nr_nodes, repartition->celllist); /* Clean up. */ free(inds); - if (bothweights) free(weights_v); - free(weights_e); - free(celllist); -} -#endif - -/** - * @brief Repartition the cells amongst the nodes using vertex weights - * - * @param s The space containing the local cells. - * @param nodeID our MPI node id. - * @param nr_nodes number of MPI nodes. - */ -#if defined(WITH_MPI) && defined(HAVE_METIS) -static void repart_vertex_metis(struct space *s, int nodeID, int nr_nodes) { - - /* Use particle counts as vertex weights. */ - /* Space for particles per cell counts, which will be used as weights. */ - double *weights = NULL; - if ((weights = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL) - error("Failed to allocate weights buffer."); - - /* Check each particle and accumulate the counts per cell. */ - accumulate_counts(s, weights); - - /* Get all the counts from all the nodes. */ - int res; - if ((res = MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE, - MPI_SUM, MPI_COMM_WORLD)) != MPI_SUCCESS) - mpi_error(res, "Failed to allreduce particle cell weights."); - - /* Main node does the partition calculation. */ - int *celllist = (int *)malloc(sizeof(int) * s->nr_cells); - if (celllist == NULL) error("Failed to allocate celllist"); - - if (nodeID == 0) pick_metis(s, nr_nodes, weights, NULL, celllist); - - /* Distribute the celllist partition and apply. */ - if ((res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD)) != - MPI_SUCCESS) - mpi_error(res, "Failed to bcast the cell list"); - - /* And apply to our cells */ - split_metis(s, nr_nodes, celllist); - - free(weights); - free(celllist); + if (vweights) free(weights_v); + if (eweights) free(weights_e); } #endif @@ -866,28 +1524,25 @@ void partition_repartition(struct repartition *reparttype, int nodeID, int nr_nodes, struct space *s, struct task *tasks, int nr_tasks) { -#if defined(WITH_MPI) && defined(HAVE_METIS) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) - if (reparttype->type == REPART_METIS_VERTEX_COSTS_EDGE_COSTS) { - repart_edge_metis(0, 1, 0, nodeID, nr_nodes, s, tasks, nr_tasks); + ticks tic = getticks(); - } else if (reparttype->type == REPART_METIS_EDGE_COSTS) { - repart_edge_metis(0, 0, 0, nodeID, nr_nodes, s, tasks, nr_tasks); - - } else if (reparttype->type == REPART_METIS_VERTEX_COUNTS_EDGE_COSTS) { - repart_edge_metis(1, 1, 0, nodeID, nr_nodes, s, tasks, nr_tasks); + if (reparttype->type == REPART_METIS_VERTEX_EDGE_COSTS) { + repart_edge_metis(1, 1, 0, reparttype, nodeID, nr_nodes, s, tasks, + nr_tasks); - } else if (reparttype->type == REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS) { - repart_edge_metis(0, 1, 1, nodeID, nr_nodes, s, tasks, nr_tasks); - - } else if (reparttype->type == REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS) { - repart_edge_metis(1, 1, 1, nodeID, nr_nodes, s, tasks, nr_tasks); + } else if (reparttype->type == REPART_METIS_EDGE_COSTS) { + repart_edge_metis(0, 1, 0, reparttype, nodeID, nr_nodes, s, tasks, + nr_tasks); - } else if (reparttype->type == REPART_METIS_EDGE_TIMEBINS) { - repart_edge_metis(0, 0, 1, nodeID, nr_nodes, s, tasks, nr_tasks); + } else if (reparttype->type == REPART_METIS_VERTEX_COSTS) { + repart_edge_metis(1, 0, 0, reparttype, nodeID, nr_nodes, s, tasks, + nr_tasks); - } else if (reparttype->type == REPART_METIS_VERTEX_COUNTS) { - repart_vertex_metis(s, nodeID, nr_nodes); + } else if (reparttype->type == REPART_METIS_VERTEX_COSTS_TIMEBINS) { + repart_edge_metis(1, 1, 1, reparttype, nodeID, nr_nodes, s, tasks, + nr_tasks); } else if (reparttype->type == REPART_NONE) { /* Doing nothing. */ @@ -895,8 +1550,12 @@ void partition_repartition(struct repartition *reparttype, int nodeID, } else { error("Impossible repartition type"); } + + if (s->e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); #else - error("SWIFT was not compiled with METIS support."); + error("SWIFT was not compiled with METIS or ParMETIS support."); #endif } @@ -918,6 +1577,7 @@ void partition_repartition(struct repartition *reparttype, int nodeID, */ void partition_initial_partition(struct partition *initial_partition, int nodeID, int nr_nodes, struct space *s) { + ticks tic = getticks(); /* Geometric grid partitioning. */ if (initial_partition->type == INITPART_GRID) { @@ -953,21 +1613,21 @@ void partition_initial_partition(struct partition *initial_partition, } else if (initial_partition->type == INITPART_METIS_WEIGHT || initial_partition->type == INITPART_METIS_NOWEIGHT) { -#if defined(WITH_MPI) && defined(HAVE_METIS) - /* Simple k-way partition selected by METIS using cell particle counts as - * weights or not. Should be best when starting with a inhomogeneous dist. +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) + /* Simple k-way partition selected by METIS using cell particle + * counts as weights or not. Should be best when starting with a + * inhomogeneous dist. */ - /* Space for particles per cell counts, which will be used as weights or - * not. */ + /* Space for particles sizes per cell, which will be used as weights. */ double *weights = NULL; if (initial_partition->type == INITPART_METIS_WEIGHT) { if ((weights = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL) error("Failed to allocate weights buffer."); bzero(weights, sizeof(double) * s->nr_cells); - /* Check each particle and accumilate the counts per cell. */ - accumulate_counts(s, weights); + /* Check each particle and accumilate the sizes per cell. */ + accumulate_sizes(s, weights); /* Get all the counts from all the nodes. */ if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE, MPI_SUM, @@ -975,14 +1635,19 @@ void partition_initial_partition(struct partition *initial_partition, error("Failed to allreduce particle cell weights."); } - /* Main node does the partition calculation. */ - int *celllist = (int *)malloc(sizeof(int) * s->nr_cells); - if (celllist == NULL) error("Failed to allocate celllist"); - if (nodeID == 0) pick_metis(s, nr_nodes, weights, NULL, celllist); - - /* Distribute the celllist partition and apply. */ - int res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD); - if (res != MPI_SUCCESS) mpi_error(res, "Failed to bcast the cell list"); + /* Do the calculation. */ + int *celllist = NULL; + if ((celllist = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL) + error("Failed to allocate celllist"); +#ifdef HAVE_PARMETIS + if (initial_partition->usemetis) { + pick_metis(nodeID, s, nr_nodes, weights, NULL, celllist); + } else { + pick_parmetis(nodeID, s, nr_nodes, weights, NULL, 0, 0, 0.0f, celllist); + } +#else + pick_metis(nodeID, s, nr_nodes, weights, NULL, celllist); +#endif /* And apply to our cells */ split_metis(s, nr_nodes, celllist); @@ -999,7 +1664,7 @@ void partition_initial_partition(struct partition *initial_partition, if (weights != NULL) free(weights); free(celllist); #else - error("SWIFT was not compiled with METIS support"); + error("SWIFT was not compiled with METIS or ParMETIS support"); #endif } else if (initial_partition->type == INITPART_VECTORIZE) { @@ -1007,8 +1672,9 @@ void partition_initial_partition(struct partition *initial_partition, #if defined(WITH_MPI) /* Vectorised selection, guaranteed to work for samples less than the * number of cells, but not very clumpy in the selection of regions. */ - int *samplecells = (int *)malloc(sizeof(int) * nr_nodes * 3); - if (samplecells == NULL) error("Failed to allocate samplecells"); + int *samplecells = NULL; + if ((samplecells = (int *)malloc(sizeof(int) * nr_nodes * 3)) == NULL) + error("Failed to allocate samplecells"); if (nodeID == 0) { pick_vector(s, nr_nodes, samplecells); @@ -1026,6 +1692,10 @@ void partition_initial_partition(struct partition *initial_partition, error("SWIFT was not compiled with MPI support"); #endif } + + if (s->e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -1044,9 +1714,9 @@ void partition_init(struct partition *partition, #ifdef WITH_MPI /* Defaults make use of METIS if available */ -#ifdef HAVE_METIS +#if defined(HAVE_METIS) || defined(HAVE_PARMETIS) const char *default_repart = "costs/costs"; - const char *default_part = "simple_metis"; + const char *default_part = "memory"; #else const char *default_repart = "none/none"; const char *default_part = "grid"; @@ -1070,24 +1740,24 @@ void partition_init(struct partition *partition, case 'v': partition->type = INITPART_VECTORIZE; break; -#ifdef HAVE_METIS - case 's': +#if defined(HAVE_METIS) || defined(HAVE_PARMETIS) + case 'r': partition->type = INITPART_METIS_NOWEIGHT; break; - case 'w': + case 'm': partition->type = INITPART_METIS_WEIGHT; break; default: message("Invalid choice of initial partition type '%s'.", part_type); error( - "Permitted values are: 'grid', 'simple_metis', 'weighted_metis'" - " or 'vectorized'"); + "Permitted values are: 'grid', 'region', 'memory' or " + "'vectorized'"); #else default: message("Invalid choice of initial partition type '%s'.", part_type); error( "Permitted values are: 'grid' or 'vectorized' when compiled " - "without METIS."); + "without METIS or ParMETIS."); #endif } @@ -1104,37 +1774,30 @@ void partition_init(struct partition *partition, if (strcmp("none/none", part_type) == 0) { repartition->type = REPART_NONE; -#ifdef HAVE_METIS +#if defined(HAVE_METIS) || defined(HAVE_PARMETIS) } else if (strcmp("costs/costs", part_type) == 0) { - repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_COSTS; - - } else if (strcmp("counts/none", part_type) == 0) { - repartition->type = REPART_METIS_VERTEX_COUNTS; + repartition->type = REPART_METIS_VERTEX_EDGE_COSTS; } else if (strcmp("none/costs", part_type) == 0) { repartition->type = REPART_METIS_EDGE_COSTS; - } else if (strcmp("counts/costs", part_type) == 0) { - repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_COSTS; + } else if (strcmp("costs/none", part_type) == 0) { + repartition->type = REPART_METIS_VERTEX_COSTS; } else if (strcmp("costs/time", part_type) == 0) { - repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS; - - } else if (strcmp("counts/time", part_type) == 0) { - repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS; + repartition->type = REPART_METIS_VERTEX_COSTS_TIMEBINS; - } else if (strcmp("none/time", part_type) == 0) { - repartition->type = REPART_METIS_EDGE_TIMEBINS; } else { message("Invalid choice of re-partition type '%s'.", part_type); error( - "Permitted values are: 'none/none', 'costs/costs'," - "'counts/none', 'none/costs', 'counts/costs', " - "'costs/time', 'counts/time' or 'none/time'"); + "Permitted values are: 'none/none', 'costs/costs', 'none/costs' " + "'costs/none' or 'costs/time'"); #else } else { message("Invalid choice of re-partition type '%s'.", part_type); - error("Permitted values are: 'none/none' when compiled without METIS."); + error( + "Permitted values are: 'none/none' when compiled without " + "METIS or ParMETIS."); #endif } @@ -1158,6 +1821,19 @@ void partition_init(struct partition *partition, "Invalid DomainDecomposition:minfrac, must be greater than 0 and less " "than equal to 1"); + /* Use METIS or ParMETIS when ParMETIS is also available. */ + repartition->usemetis = + parser_get_opt_param_int(params, "DomainDecomposition:usemetis", 0); + partition->usemetis = repartition->usemetis; + + /* Use adaptive or simple refinement when repartitioning. */ + repartition->adaptive = + parser_get_opt_param_int(params, "DomainDecomposition:adaptive", 1); + + /* Ratio of interprocess communication time to data redistribution time. */ + repartition->itr = + parser_get_opt_param_float(params, "DomainDecomposition:itr", 100.0f); + /* Clear the celllist for use. */ repartition->ncelllist = 0; repartition->celllist = NULL; @@ -1181,8 +1857,9 @@ void partition_init(struct partition *partition, */ static int check_complete(struct space *s, int verbose, int nregions) { - int *present = (int *)malloc(sizeof(int) * nregions); - if (present == NULL) error("Failed to allocate present array"); + int *present = NULL; + if ((present = (int *)malloc(sizeof(int) * nregions)) == NULL) + error("Failed to allocate present array"); int failed = 0; for (int i = 0; i < nregions; i++) present[i] = 0; @@ -1203,6 +1880,185 @@ static int check_complete(struct space *s, int verbose, int nregions) { return (!failed); } +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#ifdef SWIFT_DEBUG_CHECKS +/** + * @brief Check that the threadpool version of the weights construction is + * correct by comparing to the old serial code. + * + * @param tasks the list of tasks + * @param nr_tasks number of tasks + * @param mydata additional values as passed to threadpool + * @param ref_weights_v vertex weights to check + * @param ref_weights_e edge weights to check + */ +static void check_weights(struct task *tasks, int nr_tasks, + struct weights_mapper_data *mydata, + double *ref_weights_v, double *ref_weights_e) { + + idx_t *inds = mydata->inds; + int eweights = mydata->eweights; + int nodeID = mydata->nodeID; + int nr_cells = mydata->nr_cells; + int timebins = mydata->timebins; + int vweights = mydata->vweights; + + struct cell *cells = mydata->cells; + + /* Allocate and init weights. */ + double *weights_v = NULL; + double *weights_e = NULL; + if (vweights) { + if ((weights_v = (double *)malloc(sizeof(double) * nr_cells)) == NULL) + error("Failed to allocate vertex weights arrays."); + bzero(weights_v, sizeof(double) * nr_cells); + } + if (eweights) { + if ((weights_e = (double *)malloc(sizeof(double) * 26 * nr_cells)) == NULL) + error("Failed to allocate edge weights arrays."); + bzero(weights_e, sizeof(double) * 26 * nr_cells); + } + + /* Loop over the tasks... */ + for (int j = 0; j < nr_tasks; j++) { + + /* Get a pointer to the kth task. */ + struct task *t = &tasks[j]; + + /* Skip un-interesting tasks. */ + if (t->cost == 0.f) continue; + + /* Get the task weight based on costs. */ + double w = (double)t->cost; + + /* Get the top-level cells involved. */ + struct cell *ci, *cj; + for (ci = t->ci; ci->parent != NULL; ci = ci->parent) + ; + if (t->cj != NULL) + for (cj = t->cj; cj->parent != NULL; cj = cj->parent) + ; + else + cj = NULL; + + /* Get the cell IDs. */ + int cid = ci - cells; + + /* Different weights for different tasks. */ + if (t->type == task_type_drift_part || t->type == task_type_drift_gpart || + t->type == task_type_ghost || t->type == task_type_extra_ghost || + t->type == task_type_kick1 || t->type == task_type_kick2 || + t->type == task_type_end_force || t->type == task_type_cooling || + t->type == task_type_timestep || t->type == task_type_init_grav || + t->type == task_type_grav_down || + t->type == task_type_grav_long_range) { + + /* Particle updates add only to vertex weight. */ + if (vweights) weights_v[cid] += w; + } + + /* Self interaction? */ + else if ((t->type == task_type_self && ci->nodeID == nodeID) || + (t->type == task_type_sub_self && cj == NULL && + ci->nodeID == nodeID)) { + /* Self interactions add only to vertex weight. */ + if (vweights) weights_v[cid] += w; + + } + + /* Pair? */ + else if (t->type == task_type_pair || (t->type == task_type_sub_pair)) { + /* In-cell pair? */ + if (ci == cj) { + /* Add weight to vertex for ci. */ + if (vweights) weights_v[cid] += w; + + } + + /* Distinct cells. */ + else { + /* Index of the jth cell. */ + int cjd = cj - cells; + + /* Local cells add weight to vertices. */ + if (vweights && ci->nodeID == nodeID) { + weights_v[cid] += 0.5 * w; + if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w; + } + + if (eweights) { + + /* Find indices of ci/cj neighbours. Note with gravity these cells may + * not be neighbours, in that case we ignore any edge weight for that + * pair. */ + int ik = -1; + for (int k = 26 * cid; k < 26 * nr_cells; k++) { + if (inds[k] == cjd) { + ik = k; + break; + } + } + + /* cj */ + int jk = -1; + for (int k = 26 * cjd; k < 26 * nr_cells; k++) { + if (inds[k] == cid) { + jk = k; + break; + } + } + if (ik != -1 && jk != -1) { + + if (timebins) { + /* Add weights to edge for all cells based on the expected + * interaction time (calculated as the time to the last expected + * time) as we want to avoid having active cells on the edges, so + * we cut for that. Note that weight is added to the local and + * remote cells, as we want to keep both away from any cuts, this + * can overflow int, so take care. */ + int dti = num_time_bins - get_time_bin(ci->hydro.ti_end_min); + int dtj = num_time_bins - get_time_bin(cj->hydro.ti_end_min); + double dt = (double)(1 << dti) + (double)(1 << dtj); + weights_e[ik] += dt; + weights_e[jk] += dt; + + } else { + + /* Add weights from task costs to the edge. */ + weights_e[ik] += w; + weights_e[jk] += w; + } + } + } + } + } + } + + /* Now do the comparisons. */ + double refsum = 0.0; + double sum = 0.0; + for (int k = 0; k < nr_cells; k++) { + refsum += ref_weights_v[k]; + sum += weights_v[k]; + } + if (fabs(sum - refsum) > 1.0) { + error("vertex partition weights are not consistent (%f!=%f)", sum, refsum); + } else { + refsum = 0.0; + sum = 0.0; + for (int k = 0; k < 26 * nr_cells; k++) { + refsum += ref_weights_e[k]; + sum += weights_e[k]; + } + if (fabs(sum - refsum) > 1.0) { + error("edge partition weights are not consistent (%f!=%f)", sum, refsum); + } + } + message("partition weights checked successfully"); +} +#endif +#endif + /** * @brief Partition a space of cells based on another space of cells. * @@ -1262,10 +2118,13 @@ int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeIDs, * */ void partition_store_celllist(struct space *s, struct repartition *reparttype) { - if (reparttype->celllist != NULL) free(reparttype->celllist); - reparttype->celllist = (int *)malloc(sizeof(int) * s->nr_cells); - reparttype->ncelllist = s->nr_cells; - if (reparttype->celllist == NULL) error("Failed to allocate celllist"); + if (reparttype->ncelllist != s->nr_cells) { + free(reparttype->celllist); + if ((reparttype->celllist = (int *)malloc(sizeof(int) * s->nr_cells)) == + NULL) + error("Failed to allocate celllist"); + reparttype->ncelllist = s->nr_cells; + } for (int i = 0; i < s->nr_cells; i++) { reparttype->celllist[i] = s->cells_top[i].nodeID; @@ -1292,8 +2151,9 @@ void partition_restore_celllist(struct space *s, } } else { error( - "Cannot apply the saved partition celllist as the number of" - "top-level cells (%d) is different to the saved number (%d)", + "Cannot apply the saved partition celllist as the " + "number of top-level cells (%d) is different to the " + "saved number (%d)", s->nr_cells, reparttype->ncelllist); } } @@ -1329,8 +2189,9 @@ void partition_struct_restore(struct repartition *reparttype, FILE *stream) { /* Also restore the celllist, if we have one. */ if (reparttype->ncelllist > 0) { - reparttype->celllist = (int *)malloc(sizeof(int) * reparttype->ncelllist); - if (reparttype->celllist == NULL) error("Failed to allocate celllist"); + if ((reparttype->celllist = + (int *)malloc(sizeof(int) * reparttype->ncelllist)) == NULL) + error("Failed to allocate celllist"); restart_read_blocks(reparttype->celllist, sizeof(int) * reparttype->ncelllist, 1, stream, NULL, "repartition celllist"); diff --git a/src/partition.h b/src/partition.h index ec7d670a43537c4717090b857b6e6ba9186b8f1c..1202a1d19ff18f83ed26464bade088990ed51db6 100644 --- a/src/partition.h +++ b/src/partition.h @@ -38,18 +38,16 @@ extern const char *initial_partition_name[]; struct partition { enum partition_type type; int grid[3]; + int usemetis; }; /* Repartition type to use. */ enum repartition_type { REPART_NONE = 0, - REPART_METIS_VERTEX_COSTS_EDGE_COSTS, - REPART_METIS_VERTEX_COUNTS, + REPART_METIS_VERTEX_EDGE_COSTS, REPART_METIS_EDGE_COSTS, - REPART_METIS_VERTEX_COUNTS_EDGE_COSTS, - REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS, - REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS, - REPART_METIS_EDGE_TIMEBINS + REPART_METIS_VERTEX_COSTS, + REPART_METIS_VERTEX_COSTS_TIMEBINS }; /* Repartition preferences. */ @@ -57,8 +55,11 @@ struct repartition { enum repartition_type type; float trigger; float minfrac; + float itr; + int usemetis; + int adaptive; - /* The partition as a cell list, if used. */ + /* The partition as a cell-list. */ int ncelllist; int *celllist; }; diff --git a/src/physical_constants.c b/src/physical_constants.c index 3936d07f4207263a4c391715ab0a8dd9ded6fa6d..7752f4d3130b7174863d520b3d4d3c6a3e8eb433 100644 --- a/src/physical_constants.c +++ b/src/physical_constants.c @@ -126,6 +126,16 @@ void phys_const_init(const struct unit_system *us, struct swift_params *params, internal_const->const_T_CMB_0 = const_T_CMB_0_cgs / units_general_cgs_conversion_factor(us, dimension_temperature); + + const float dimension_Yp[5] = {0, 0, 0, 0, 0}; /* [ - ] */ + internal_const->const_primordial_He_fraction = + const_primordial_He_fraction_cgs / + units_general_cgs_conversion_factor(us, dimension_Yp); + + const float dimension_reduced_hubble[5] = {0, 0, -1, 0, 0}; /* [s^-1] */ + internal_const->const_reduced_hubble = + const_reduced_hubble_cgs / + units_general_cgs_conversion_factor(us, dimension_reduced_hubble); } /** @@ -148,6 +158,7 @@ void phys_const_print(const struct phys_const *internal_const) { internal_const->const_astronomical_unit); message("%25s = %e", "Parsec", internal_const->const_parsec); message("%25s = %e", "Solar mass", internal_const->const_solar_mass); + message("%25s = %e", "km/s/Mpc", internal_const->const_reduced_hubble); } /** diff --git a/src/physical_constants.h b/src/physical_constants.h index 16628bfd6894699608e167d4b309fa5636209219..97da4b322a8bca1f978b43a4cabda2ff1cc1e517 100644 --- a/src/physical_constants.h +++ b/src/physical_constants.h @@ -93,6 +93,12 @@ struct phys_const { /*! Temperature of the CMB at present day */ double const_T_CMB_0; + + /*! Primordial Helium fraction */ + double const_primordial_He_fraction; + + /*! Reduced hubble constant units (i.e. H_0 / h) */ + double const_reduced_hubble; }; void phys_const_init(const struct unit_system* us, struct swift_params* params, diff --git a/src/physical_constants_cgs.h b/src/physical_constants_cgs.h index 40eef2c992e819e01980cbcbd7ea7f05721e93cf..4d1a54f68ba557c74fb489a9343eaf3846c481f4 100644 --- a/src/physical_constants_cgs.h +++ b/src/physical_constants_cgs.h @@ -95,4 +95,11 @@ const double const_earth_mass_cgs = 5.9724e27; /*! Temperature of the CMB at present day [K] */ const double const_T_CMB_0_cgs = 2.7255; +/*! Primordial Helium fraction [-] */ +const double const_primordial_He_fraction_cgs = 0.245; + +/*! Reduced Hubble constant units (i.e. H_0 / h == 100 km / s / Mpc in CGS) + * [s^-1] */ +const double const_reduced_hubble_cgs = 3.2407792894458e-18; + #endif /* SWIFT_PHYSICAL_CONSTANTS_CGS_H */ diff --git a/src/potential.h b/src/potential.h index 814b83c69180631db21e392704c0279808a6f03e..59567fe92296068f838c39a3eb5ff55c14005d48 100644 --- a/src/potential.h +++ b/src/potential.h @@ -34,6 +34,10 @@ #include "./potential/point_mass/potential.h" #elif defined(EXTERNAL_POTENTIAL_ISOTHERMAL) #include "./potential/isothermal/potential.h" +#elif defined(EXTERNAL_POTENTIAL_HERNQUIST) +#include "./potential/hernquist/potential.h" +#elif defined(EXTERNAL_POTENTIAL_NFW) +#include "./potential/nfw/potential.h" #elif defined(EXTERNAL_POTENTIAL_DISC_PATCH) #include "./potential/disc_patch/potential.h" #elif defined(EXTERNAL_POTENTIAL_SINE_WAVE) diff --git a/src/potential/hernquist/potential.h b/src/potential/hernquist/potential.h new file mode 100644 index 0000000000000000000000000000000000000000..b98f45ff7ab4aeffd94f47f4931d3dd6c80d5642 --- /dev/null +++ b/src/potential/hernquist/potential.h @@ -0,0 +1,309 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Folkert Nobels (nobels@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_POTENTIAL_HERNQUIST_H +#define SWIFT_POTENTIAL_HERNQUIST_H + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <math.h> + +/* Local includes. */ +#include "error.h" +#include "parser.h" +#include "part.h" +#include "physical_constants.h" +#include "space.h" +#include "units.h" + +/** + * @brief External Potential Properties - Hernquist potential + */ +struct external_potential { + + /*! Position of the centre of potential */ + double x[3]; + + /*! Mass of the halo */ + double mass; + + /*! Scale length (often as a, to prevent confusion with the cosmological + * scale-factor we use al) */ + double al; + + /*! Square of the softening length. Acceleration tends to zero within this + * distance from the origin */ + double epsilon2; + + /* Minimum timestep of the potential given by the timestep multiple + * times the orbital time at the softening length */ + double mintime; + + /*! Time-step condition pre-factor, is multiplied times the circular orbital + * time to get the time steps */ + double timestep_mult; +}; + +/** + * @brief Computes the time-step in a Hernquist potential based on a + * fraction of the circular orbital time + * + * @param time The current time. + * @param potential The #external_potential used in the run. + * @param phys_const The physical constants in internal units. + * @param g Pointer to the g-particle data. + */ +__attribute__((always_inline)) INLINE static float external_gravity_timestep( + double time, const struct external_potential* restrict potential, + const struct phys_const* restrict phys_const, + const struct gpart* restrict g) { + + const float G_newton = phys_const->const_newton_G; + + /* Calculate the relative potential with respect to the centre of the + * potential */ + const float dx = g->x[0] - potential->x[0]; + const float dy = g->x[1] - potential->x[1]; + const float dz = g->x[2] - potential->x[2]; + + /* calculate the radius */ + const float r = sqrtf(dx * dx + dy * dy + dz * dz + potential->epsilon2); + const float sqrtgm_inv = 1.f / sqrtf(G_newton * potential->mass); + + /* Calculate the circular orbital period */ + const float period = 2.f * M_PI * sqrtf(r) * potential->al * + (1 + r / potential->al) * sqrtgm_inv; + + /* Time-step as a fraction of the cirecular orbital time */ + const float time_step = potential->timestep_mult * period; + + return max(time_step, potential->mintime); +} + +/** + * @brief Computes the gravitational acceleration from an Hernquist potential. + * + * Note that the accelerations are multiplied by Newton's G constant + * later on. + * + * a_x = - GM / (a+r)^2 * x/r + * a_y = - GM / (a+r)^2 * y/r + * a_z = - GM / (a+r)^2 * z/r + * + * @param time The current time. + * @param potential The #external_potential used in the run. + * @param phys_const The physical constants in internal units. + * @param g Pointer to the g-particle data. + */ +__attribute__((always_inline)) INLINE static void external_gravity_acceleration( + double time, const struct external_potential* potential, + const struct phys_const* const phys_const, struct gpart* g) { + + /* Determine the position relative to the centre of the potential */ + const float dx = g->x[0] - potential->x[0]; + const float dy = g->x[1] - potential->x[1]; + const float dz = g->x[2] - potential->x[2]; + + /* Calculate the acceleration */ + const float r = sqrtf(dx * dx + dy * dy + dz * dz + potential->epsilon2); + const float r_plus_a_inv = 1.f / (r + potential->al); + const float r_plus_a_inv2 = r_plus_a_inv * r_plus_a_inv; + const float term = -potential->mass * r_plus_a_inv2 / r; + + g->a_grav[0] += term * dx; + g->a_grav[1] += term * dy; + g->a_grav[2] += term * dz; +} + +/** + * @brief Computes the gravitational potential energy of a particle in an + * Hernquist potential. + * + * phi = - GM/(r+a) + * + * @param time The current time (unused here). + * @param potential The #external_potential used in the run. + * @param phys_const Physical constants in internal units. + * @param g Pointer to the particle data. + */ +__attribute__((always_inline)) INLINE static float +external_gravity_get_potential_energy( + double time, const struct external_potential* potential, + const struct phys_const* const phys_const, const struct gpart* g) { + + const float dx = g->x[0] - potential->x[0]; + const float dy = g->x[1] - potential->x[1]; + const float dz = g->x[2] - potential->x[2]; + const float r = sqrtf(dx * dx + dy * dy + dz * dz); + const float r_plus_alinv = 1.f / (r + potential->al); + return -phys_const->const_newton_G * potential->mass * r_plus_alinv; +} + +/** + * @brief Initialises the external potential properties in the internal system + * of units. + * + * @param parameter_file The parsed parameter file + * @param phys_const Physical constants in internal units + * @param us The current internal system of units + * @param potential The external potential properties to initialize + */ +static INLINE void potential_init_backend( + struct swift_params* parameter_file, const struct phys_const* phys_const, + const struct unit_system* us, const struct space* s, + struct external_potential* potential) { + + /* Define the default value */ + static const int idealized_disk_default = 0; + static const double M200_default = 0.; + static const double V200_default = 0.; + static const double R200_default = 0.; + + /* Read in the position of the centre of potential */ + parser_get_param_double_array(parameter_file, "HernquistPotential:position", + 3, potential->x); + + /* Is the position absolute or relative to the centre of the box? */ + const int useabspos = + parser_get_param_int(parameter_file, "HernquistPotential:useabspos"); + + if (!useabspos) { + potential->x[0] += s->dim[0] / 2.; + potential->x[1] += s->dim[1] / 2.; + potential->x[2] += s->dim[2] / 2.; + } + + /* check whether we use the more advanced idealized disk setting */ + const int usedisk = parser_get_opt_param_int( + parameter_file, "HernquistPotential:idealizeddisk", + idealized_disk_default); + + if (!usedisk) { + /* Read the parameters of the model in the case of the simple + * potential form \f$ \Phi = - \frac{GM}{r+a} \f$ */ + potential->mass = + parser_get_param_double(parameter_file, "HernquistPotential:mass"); + potential->al = parser_get_param_double(parameter_file, + "HernquistPotential:scalelength"); + } else { + + /* Read the parameters in the case of a idealized disk + * There are 3 different possible input parameters M200, V200 and R200 + * First read in the mandatory parameters in this case */ + + const float G_newton = phys_const->const_newton_G; + const float kmoversoverMpc = phys_const->const_reduced_hubble; + + /* Initialize the variables */ + double M200 = parser_get_opt_param_double( + parameter_file, "HernquistPotential:M200", M200_default); + double V200 = parser_get_opt_param_double( + parameter_file, "HernquistPotential:V200", V200_default); + double R200 = parser_get_opt_param_double( + parameter_file, "HernquistPotential:R200", R200_default); + const double h = + parser_get_param_double(parameter_file, "HernquistPotential:h"); + + /* Hubble constant assumed for halo masses conversion */ + const double H0 = h * kmoversoverMpc; + + /* There are 3 legit runs possible with use disk, + * with a known M200, V200 or R200 */ + if (M200 != 0.0) { + /* Calculate V200 and R200 from M200 */ + V200 = cbrt(10. * M200 * G_newton * H0); + R200 = V200 / (10 * H0); + + } else if (V200 != 0.0) { + + /* Calculate M200 and R200 from V200 */ + M200 = V200 * V200 * V200 / (10. * G_newton * H0); + R200 = V200 / (10 * H0); + } else if (R200 != 0.0) { + + /* Calculate M200 and V200 from R200 */ + V200 = 10. * H0 * R200; + M200 = V200 * V200 * V200 / (10. * G_newton * H0); + } else { + error("Please specify one of the 3 variables M200, V200 or R200"); + } + + /* message("M200 = %g, R200 = %g, V200 = %g", M200, R200, V200); */ + /* message("H0 = %g", H0); */ + + /* get the concentration from the parameter file */ + const double concentration = parser_get_param_double( + parameter_file, "HernquistPotential:concentration"); + + /* Calculate the Scale radius using the NFW definition */ + const double RS = R200 / concentration; + + /* Calculate the Hernquist equivalent scale length */ + potential->al = RS * sqrt(1. * (log(1. + concentration) - + concentration / (1. + concentration))); + + /* Depending on the disk mass and and the bulge mass the halo + * gets a different mass, because of this we read the fractions + * from the parameter file and calculate the absolute mass*/ + const double diskfraction = parser_get_param_double( + parameter_file, "HernquistPotential:diskfraction"); + const double bulgefraction = parser_get_param_double( + parameter_file, "HernquistPotential:bulgefraction"); + /* Calculate the mass of the bulge and disk from the parameters */ + const double Mdisk = M200 * diskfraction; + const double Mbulge = M200 * bulgefraction; + + /* Store the mass of the DM halo */ + potential->mass = M200 - Mdisk - Mbulge; + } + + /* Retrieve the timestep and softening of the potential */ + potential->timestep_mult = parser_get_param_float( + parameter_file, "HernquistPotential:timestep_mult"); + const float epsilon = + parser_get_param_double(parameter_file, "HernquistPotential:epsilon"); + potential->epsilon2 = epsilon * epsilon; + + /* Compute the minimal time-step. */ + /* This is the circular orbital time at the softened radius */ + const float sqrtgm = sqrtf(phys_const->const_newton_G * potential->mass); + potential->mintime = 2.f * sqrtf(epsilon) * potential->al * M_PI * + (1. + epsilon / potential->al) / sqrtgm * + potential->timestep_mult; +} + +/** + * @brief prints the properties of the external potential to stdout. + * + * @param potential the external potential properties. + */ +static inline void potential_print_backend( + const struct external_potential* potential) { + + message( + "external potential is 'hernquist' with properties are (x,y,z) = (%e, " + "%e, %e), mass = %e " + "scale length = %e , minimum time = %e " + "timestep multiplier = %e", + potential->x[0], potential->x[1], potential->x[2], potential->mass, + potential->al, potential->mintime, potential->timestep_mult); +} + +#endif /* SWIFT_POTENTIAL_HERNQUIST_H */ diff --git a/src/potential/isothermal/potential.h b/src/potential/isothermal/potential.h index b5f8d7c39738bfe1895c73e6e59ae1279c0f74fa..160372210e41036f2737c10a4aa3d2ddac1077f2 100644 --- a/src/potential/isothermal/potential.h +++ b/src/potential/isothermal/potential.h @@ -148,7 +148,7 @@ external_gravity_get_potential_energy( const float dy = g->x[1] - potential->x[1]; const float dz = g->x[2] - potential->x[2]; - return -0.5f * potential->vrot * potential->vrot * + return 0.5f * potential->vrot * potential->vrot * logf(dx * dx + dy * dy + dz * dz + potential->epsilon2); } @@ -166,11 +166,19 @@ static INLINE void potential_init_backend( const struct unit_system* us, const struct space* s, struct external_potential* potential) { + /* Read in the position of the centre of potential */ parser_get_param_double_array(parameter_file, "IsothermalPotential:position", 3, potential->x); - potential->x[0] += s->dim[0] / 2.; - potential->x[1] += s->dim[1] / 2.; - potential->x[2] += s->dim[2] / 2.; + + /* Is the position absolute or relative to the centre of the box? */ + const int useabspos = + parser_get_param_int(parameter_file, "IsothermalPotential:useabspos"); + + if (!useabspos) { + potential->x[0] += s->dim[0] / 2.; + potential->x[1] += s->dim[1] / 2.; + potential->x[2] += s->dim[2] / 2.; + } potential->vrot = parser_get_param_double(parameter_file, "IsothermalPotential:vrot"); diff --git a/src/potential/nfw/potential.h b/src/potential/nfw/potential.h new file mode 100644 index 0000000000000000000000000000000000000000..28bafd439a36a41f2feecdc7169f8628fbed47f4 --- /dev/null +++ b/src/potential/nfw/potential.h @@ -0,0 +1,260 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Ashley Kelly () + * Folkert Nobels (nobels@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_POTENTIAL_NFW_H +#define SWIFT_POTENTIAL_NFW_H + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <float.h> +#include <math.h> + +/* Local includes. */ +#include "error.h" +#include "parser.h" +#include "part.h" +#include "physical_constants.h" +#include "space.h" +#include "units.h" + +/** + * @brief External Potential Properties - NFW Potential + rho(r) = rho_0 / ( (r/R_s)*(1+r/R_s)^2 ) + + We however parameterise this in terms of c and virial_mass + */ +struct external_potential { + + /*! Position of the centre of potential */ + double x[3]; + + /*! The scale radius of the NFW potential */ + double r_s; + + /*! The pre-factor \f$ 4 \pi G \rho_0 \r_s^3 \f$ */ + double pre_factor; + + /*! The critical density of the universe */ + double rho_c; + + /*! The concentration parameter */ + double c_200; + + /*! The virial mass */ + double M_200; + + /*! Time-step condition pre_factor, this factor is used to multiply times the + * orbital time, so in the case of 0.01 we take 1% of the orbital time as + * the time integration steps */ + double timestep_mult; + + /*! Minimum time step based on the orbital time at the softening times + * the timestep_mult */ + double mintime; + + /*! Common log term \f$ \ln(1+c_{200}) - \frac{c_{200}}{1 + c_{200}} \f$ */ + double log_c200_term; + + /*! Softening length */ + double eps; +}; + +/** + * @brief Computes the time-step due to the acceleration from the NFW potential + * as a fraction (timestep_mult) of the circular orbital time of that + * particle. + * + * @param time The current time. + * @param potential The #external_potential used in the run. + * @param phys_const The physical constants in internal units. + * @param g Pointer to the g-particle data. + */ +__attribute__((always_inline)) INLINE static float external_gravity_timestep( + double time, const struct external_potential* restrict potential, + const struct phys_const* restrict phys_const, + const struct gpart* restrict g) { + + const float dx = g->x[0] - potential->x[0]; + const float dy = g->x[1] - potential->x[1]; + const float dz = g->x[2] - potential->x[2]; + + const float r = + sqrtf(dx * dx + dy * dy + dz * dz + potential->eps * potential->eps); + + const float mr = potential->M_200 * + (logf(1.f + r / potential->r_s) - r / (r + potential->r_s)) / + potential->log_c200_term; + + const float period = + 2 * M_PI * r * sqrtf(r / (phys_const->const_newton_G * mr)); + + /* Time-step as a fraction of the circular period */ + const float time_step = potential->timestep_mult * period; + + return max(time_step, potential->mintime); +} + +/** + * @brief Computes the gravitational acceleration from an NFW Halo potential. + * + * Note that the accelerations are multiplied by Newton's G constant + * later on. + * + * a_x = 4 pi \rho_0 r_s^3 ( 1/((r+rs)*r^2) - log(1+r/rs)/r^3) * x + * a_y = 4 pi \rho_0 r_s^3 ( 1/((r+rs)*r^2) - log(1+r/rs)/r^3) * y + * a_z = 4 pi \rho_0 r_s^3 ( 1/((r+rs)*r^2) - log(1+r/rs)/r^3) * z + * + * @param time The current time. + * @param potential The #external_potential used in the run. + * @param phys_const The physical constants in internal units. + * @param g Pointer to the g-particle data. + */ +__attribute__((always_inline)) INLINE static void external_gravity_acceleration( + double time, const struct external_potential* restrict potential, + const struct phys_const* restrict phys_const, struct gpart* restrict g) { + + const float dx = g->x[0] - potential->x[0]; + const float dy = g->x[1] - potential->x[1]; + const float dz = g->x[2] - potential->x[2]; + + const float r = + sqrtf(dx * dx + dy * dy + dz * dz + potential->eps * potential->eps); + const float term1 = potential->pre_factor; + const float term2 = (1.0f / ((r + potential->r_s) * r * r) - + logf(1.0f + r / potential->r_s) / (r * r * r)); + + g->a_grav[0] += term1 * term2 * dx; + g->a_grav[1] += term1 * term2 * dy; + g->a_grav[2] += term1 * term2 * dz; +} + +/** + * @brief Computes the gravitational potential energy of a particle in an + * NFW potential. + * + * phi = -4 * pi * G * rho_0 * r_s^3 * ln(1+r/r_s) + * + * @param time The current time (unused here). + * @param potential The #external_potential used in the run. + * @param phys_const Physical constants in internal units. + * @param g Pointer to the particle data. + */ +__attribute__((always_inline)) INLINE static float +external_gravity_get_potential_energy( + double time, const struct external_potential* potential, + const struct phys_const* const phys_const, const struct gpart* g) { + + const float dx = g->x[0] - potential->x[0]; + const float dy = g->x[1] - potential->x[1]; + const float dz = g->x[2] - potential->x[2]; + + const float r = + sqrtf(dx * dx + dy * dy + dz * dz + potential->eps * potential->eps); + const float term1 = -potential->pre_factor / r; + const float term2 = logf(1.0f + r / potential->r_s); + + return term1 * term2; +} + +/** + * @brief Initialises the external potential properties in the internal system + * of units. + * + * @param parameter_file The parsed parameter file + * @param phys_const Physical constants in internal units + * @param us The current internal system of units + * @param potential The external potential properties to initialize + */ +static INLINE void potential_init_backend( + struct swift_params* parameter_file, const struct phys_const* phys_const, + const struct unit_system* us, const struct space* s, + struct external_potential* potential) { + + /* Read in the position of the centre of potential */ + parser_get_param_double_array(parameter_file, "NFWPotential:position", 3, + potential->x); + + /* Is the position absolute or relative to the centre of the box? */ + const int useabspos = + parser_get_param_int(parameter_file, "NFWPotential:useabspos"); + + if (!useabspos) { + potential->x[0] += s->dim[0] / 2.; + potential->x[1] += s->dim[1] / 2.; + potential->x[2] += s->dim[2] / 2.; + } + + /* Read the other parameters of the model */ + potential->timestep_mult = + parser_get_param_double(parameter_file, "NFWPotential:timestep_mult"); + potential->c_200 = + parser_get_param_double(parameter_file, "NFWPotential:concentration"); + potential->M_200 = + parser_get_param_double(parameter_file, "NFWPotential:M_200"); + potential->rho_c = + parser_get_param_double(parameter_file, "NFWPotential:critical_density"); + potential->eps = 0.05; + + /* Compute R_200 */ + const double R_200 = + cbrtf(3.0 * potential->M_200 / (4. * M_PI * 200.0 * potential->rho_c)); + + /* NFW scale-radius */ + potential->r_s = R_200 / potential->c_200; + const double r_s3 = potential->r_s * potential->r_s * potential->r_s; + + /* Log(c_200) term appearing in many expressions */ + potential->log_c200_term = + log(1. + potential->c_200) - potential->c_200 / (1. + potential->c_200); + + const double rho_0 = + potential->M_200 / (4.f * M_PI * r_s3 * potential->log_c200_term); + + /* Pre-factor for the accelerations (note G is multiplied in later on) */ + potential->pre_factor = 4.0f * M_PI * rho_0 * r_s3; + + /* Compute the orbital time at the softening radius */ + const double sqrtgm = sqrt(phys_const->const_newton_G * potential->M_200); + const double epslnthing = log(1.f + potential->eps / potential->r_s) - + potential->eps / (potential->eps + potential->r_s); + + potential->mintime = 2. * M_PI * potential->eps * sqrtf(potential->eps) * + sqrtf(potential->log_c200_term / epslnthing) / sqrtgm * + potential->timestep_mult; +} + +/** + * @brief Prints the properties of the external potential to stdout. + * + * @param potential The external potential properties. + */ +static INLINE void potential_print_backend( + const struct external_potential* potential) { + + message( + "External potential is 'NFW' with properties are (x,y,z) = (%e, " + "%e, %e), scale radius = %e " + "timestep multiplier = %e, mintime = %e", + potential->x[0], potential->x[1], potential->x[2], potential->r_s, + potential->timestep_mult, potential->mintime); +} + +#endif /* SWIFT_POTENTIAL_NFW_H */ diff --git a/src/potential/point_mass/potential.h b/src/potential/point_mass/potential.h index f9d56a1ff165f2331c91ea828b5ffe0e0db76c2f..5ae03f8637708d75800a6a7fb283b98bdb42cec2 100644 --- a/src/potential/point_mass/potential.h +++ b/src/potential/point_mass/potential.h @@ -137,7 +137,7 @@ external_gravity_get_potential_energy( const float dx = g->x[0] - potential->x[0]; const float dy = g->x[1] - potential->x[1]; const float dz = g->x[2] - potential->x[2]; - const float rinv = 1. / sqrtf(dx * dx + dy * dy + dz * dz); + const float rinv = 1.f / sqrtf(dx * dx + dy * dy + dz * dz); return -phys_const->const_newton_G * potential->mass * rinv; } @@ -156,8 +156,21 @@ static INLINE void potential_init_backend( const struct unit_system* us, const struct space* s, struct external_potential* potential) { + /* Read in the position of the centre of potential */ parser_get_param_double_array(parameter_file, "PointMassPotential:position", 3, potential->x); + + /* Is the position absolute or relative to the centre of the box? */ + const int useabspos = + parser_get_param_int(parameter_file, "PointMassPotential:useabspos"); + + if (!useabspos) { + potential->x[0] += s->dim[0] / 2.; + potential->x[1] += s->dim[1] / 2.; + potential->x[2] += s->dim[2] / 2.; + } + + /* Read the other parameters of the model */ potential->mass = parser_get_param_double(parameter_file, "PointMassPotential:mass"); potential->timestep_mult = parser_get_param_float( diff --git a/src/potential/point_mass_softened/potential.h b/src/potential/point_mass_softened/potential.h index 0e35e7bb9870c7954b47316a3cc30bb68cde5fc4..050bc1a00c98da4c350e59cf1ef8ef855094e552 100644 --- a/src/potential/point_mass_softened/potential.h +++ b/src/potential/point_mass_softened/potential.h @@ -183,8 +183,21 @@ static INLINE void potential_init_backend( const struct unit_system* us, const struct space* s, struct external_potential* potential) { + /* Read in the position of the centre of potential */ parser_get_param_double_array(parameter_file, "PointMassPotential:position", 3, potential->x); + + /* Is the position absolute or relative to the centre of the box? */ + const int useabspos = + parser_get_param_int(parameter_file, "PointMassPotential:useabspos"); + + if (!useabspos) { + potential->x[0] += s->dim[0] / 2.; + potential->x[1] += s->dim[1] / 2.; + potential->x[2] += s->dim[2] / 2.; + } + + /* Read the other parameters of the model */ potential->mass = parser_get_param_double(parameter_file, "PointMassPotential:mass"); potential->timestep_mult = parser_get_param_float( diff --git a/src/profiler.c b/src/profiler.c index 58fd279d312d3c752d65ccaceab803ace66fddac..6fed108ea2b1359238ee47d37cd90380086ec6bc 100644 --- a/src/profiler.c +++ b/src/profiler.c @@ -21,6 +21,7 @@ #include "../config.h" /* Some standard headers. */ +#include <math.h> #include <string.h> /* This object's header. */ diff --git a/src/proxy.c b/src/proxy.c index 9d170a517f6f24c907b10330c6d1e33215bcce1b..4a67b4b3584c43b2df63f17303eba9ec5e742cb0 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -38,21 +38,162 @@ #include "proxy.h" /* Local headers. */ +#include "cell.h" +#include "engine.h" #include "error.h" +#include "space.h" + +#ifdef WITH_MPI +/* MPI data type for the communications */ +MPI_Datatype pcell_mpi_type; +#endif /** - * @brief Exchange cells with a remote node. + * @brief Exchange tags between nodes. + * + * Note that this function assumes that the cell structures have already + * been exchanged, e.g. via #proxy_cells_exchange. + * + * @param proxies The list of #proxy that will send/recv tags + * @param num_proxies The number of proxies. + * @param s The space into which the tags will be unpacked. + */ +void proxy_tags_exchange(struct proxy *proxies, int num_proxies, + struct space *s) { + +#ifdef WITH_MPI + + ticks tic2 = getticks(); + + /* Run through the cells and get the size of the tags that will be sent off. + */ + int count_out = 0; + int offset_out[s->nr_cells]; + for (int k = 0; k < s->nr_cells; k++) { + offset_out[k] = count_out; + if (s->cells_top[k].mpi.sendto) { + count_out += s->cells_top[k].mpi.pcell_size; + } + } + + /* Run through the proxies and get the count of incoming tags. */ + int count_in = 0; + int offset_in[s->nr_cells]; + for (int k = 0; k < num_proxies; k++) { + for (int j = 0; j < proxies[k].nr_cells_in; j++) { + offset_in[proxies[k].cells_in[j] - s->cells_top] = count_in; + count_in += proxies[k].cells_in[j]->mpi.pcell_size; + } + } + + /* Allocate the tags. */ + int *tags_in = NULL; + int *tags_out = NULL; + if (posix_memalign((void **)&tags_in, SWIFT_CACHE_ALIGNMENT, + sizeof(int) * count_in) != 0 || + posix_memalign((void **)&tags_out, SWIFT_CACHE_ALIGNMENT, + sizeof(int) * count_out) != 0) + error("Failed to allocate tags buffers."); + + /* Pack the local tags. */ + for (int k = 0; k < s->nr_cells; k++) { + if (s->cells_top[k].mpi.sendto) { + cell_pack_tags(&s->cells_top[k], &tags_out[offset_out[k]]); + } + } + + if (s->e->verbose) + message("Cell pack tags took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + /* Allocate the incoming and outgoing request handles. */ + int num_reqs_out = 0; + int num_reqs_in = 0; + for (int k = 0; k < num_proxies; k++) { + num_reqs_in += proxies[k].nr_cells_in; + num_reqs_out += proxies[k].nr_cells_out; + } + MPI_Request *reqs_in = NULL; + int *cids_in = NULL; + if ((reqs_in = (MPI_Request *)malloc(sizeof(MPI_Request) * + (num_reqs_in + num_reqs_out))) == NULL || + (cids_in = (int *)malloc(sizeof(int) * (num_reqs_in + num_reqs_out))) == + NULL) + error("Failed to allocate MPI_Request arrays."); + MPI_Request *reqs_out = &reqs_in[num_reqs_in]; + int *cids_out = &cids_in[num_reqs_in]; + + /* Emit the sends and recvs. */ + for (int send_rid = 0, recv_rid = 0, k = 0; k < num_proxies; k++) { + for (int j = 0; j < proxies[k].nr_cells_in; j++) { + const int cid = proxies[k].cells_in[j] - s->cells_top; + cids_in[recv_rid] = cid; + int err = MPI_Irecv( + &tags_in[offset_in[cid]], proxies[k].cells_in[j]->mpi.pcell_size, + MPI_INT, proxies[k].nodeID, cid, MPI_COMM_WORLD, &reqs_in[recv_rid]); + if (err != MPI_SUCCESS) mpi_error(err, "Failed to irecv tags."); + recv_rid += 1; + } + for (int j = 0; j < proxies[k].nr_cells_out; j++) { + const int cid = proxies[k].cells_out[j] - s->cells_top; + cids_out[send_rid] = cid; + int err = MPI_Isend( + &tags_out[offset_out[cid]], proxies[k].cells_out[j]->mpi.pcell_size, + MPI_INT, proxies[k].nodeID, cid, MPI_COMM_WORLD, &reqs_out[send_rid]); + if (err != MPI_SUCCESS) mpi_error(err, "Failed to isend tags."); + send_rid += 1; + } + } + + tic2 = getticks(); + + /* Wait for each recv and unpack the tags into the local cells. */ + for (int k = 0; k < num_reqs_in; k++) { + int pid = MPI_UNDEFINED; + MPI_Status status; + if (MPI_Waitany(num_reqs_in, reqs_in, &pid, &status) != MPI_SUCCESS || + pid == MPI_UNDEFINED) + error("MPI_Waitany failed."); + const int cid = cids_in[pid]; + cell_unpack_tags(&tags_in[offset_in[cid]], &s->cells_top[cid]); + } + + if (s->e->verbose) + message("Cell unpack tags took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + /* Wait for all the sends to have completed. */ + if (MPI_Waitall(num_reqs_out, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) + error("MPI_Waitall on sends failed."); + + /* Clean up. */ + free(tags_in); + free(tags_out); + free(reqs_in); + free(cids_in); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Exchange cells with a remote node, first part. + * + * The first part of the transaction sends the local cell count and the packed + * #pcell array to the destination node, and enqueues an @c MPI_Irecv for + * the foreign cell counts. * * @param p The #proxy. */ -void proxy_cells_exch1(struct proxy *p) { +void proxy_cells_exchange_first(struct proxy *p) { #ifdef WITH_MPI /* Get the number of pcells we will need to send. */ p->size_pcells_out = 0; for (int k = 0; k < p->nr_cells_out; k++) - p->size_pcells_out += p->cells_out[k]->pcell_size; + p->size_pcells_out += p->cells_out[k]->mpi.pcell_size; /* Send the number of pcells. */ int err = MPI_Isend(&p->size_pcells_out, 1, MPI_INT, p->nodeID, @@ -68,14 +209,13 @@ void proxy_cells_exch1(struct proxy *p) { sizeof(struct pcell) * p->size_pcells_out) != 0) error("Failed to allocate pcell_out buffer."); for (int ind = 0, k = 0; k < p->nr_cells_out; k++) { - memcpy(&p->pcells_out[ind], p->cells_out[k]->pcell, - sizeof(struct pcell) * p->cells_out[k]->pcell_size); - ind += p->cells_out[k]->pcell_size; + memcpy(&p->pcells_out[ind], p->cells_out[k]->mpi.pcell, + sizeof(struct pcell) * p->cells_out[k]->mpi.pcell_size); + ind += p->cells_out[k]->mpi.pcell_size; } /* Send the pcell buffer. */ - err = MPI_Isend(p->pcells_out, sizeof(struct pcell) * p->size_pcells_out, - MPI_BYTE, p->nodeID, + err = MPI_Isend(p->pcells_out, p->size_pcells_out, pcell_mpi_type, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_cells, MPI_COMM_WORLD, &p->req_cells_out); @@ -96,7 +236,16 @@ void proxy_cells_exch1(struct proxy *p) { #endif } -void proxy_cells_exch2(struct proxy *p) { +/** + * @brief Exchange cells with a remote node, second part. + * + * Once the incomming cell count has been received, allocate a buffer + * for the foreign packed #pcell array and emit the @c MPI_Irecv for + * it. + * + * @param p The #proxy. + */ +void proxy_cells_exchange_second(struct proxy *p) { #ifdef WITH_MPI @@ -107,9 +256,8 @@ void proxy_cells_exch2(struct proxy *p) { error("Failed to allocate pcell_in buffer."); /* Receive the particle buffers. */ - int err = MPI_Irecv(p->pcells_in, sizeof(struct pcell) * p->size_pcells_in, - MPI_BYTE, p->nodeID, - p->nodeID * proxy_tag_shift + proxy_tag_cells, + int err = MPI_Irecv(p->pcells_in, p->size_pcells_in, pcell_mpi_type, + p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_cells, MPI_COMM_WORLD, &p->req_cells_in); if (err != MPI_SUCCESS) mpi_error(err, "Failed to irecv part data."); @@ -121,6 +269,211 @@ void proxy_cells_exch2(struct proxy *p) { #endif } +#ifdef WITH_MPI + +void proxy_cells_count_mapper(void *map_data, int num_elements, + void *extra_data) { + struct cell *cells = (struct cell *)map_data; + + for (int k = 0; k < num_elements; k++) { + if (cells[k].mpi.sendto) cells[k].mpi.pcell_size = cell_getsize(&cells[k]); + } +} + +struct pack_mapper_data { + struct space *s; + int *offset; + struct pcell *pcells; + int with_gravity; +}; + +void proxy_cells_pack_mapper(void *map_data, int num_elements, + void *extra_data) { + struct cell *cells = (struct cell *)map_data; + struct pack_mapper_data *data = (struct pack_mapper_data *)extra_data; + + for (int k = 0; k < num_elements; k++) { + if (cells[k].mpi.sendto) { + ptrdiff_t ind = &cells[k] - data->s->cells_top; + cells[k].mpi.pcell = &data->pcells[data->offset[ind]]; + cell_pack(&cells[k], cells[k].mpi.pcell, data->with_gravity); + } + } +} + +void proxy_cells_exchange_first_mapper(void *map_data, int num_elements, + void *extra_data) { + struct proxy *proxies = (struct proxy *)map_data; + + for (int k = 0; k < num_elements; k++) { + proxy_cells_exchange_first(&proxies[k]); + } +} + +struct wait_and_unpack_mapper_data { + struct space *s; + int num_proxies; + MPI_Request *reqs_in; + struct proxy *proxies; + int with_gravity; + swift_lock_type lock; +}; + +void proxy_cells_wait_and_unpack_mapper(void *unused_map_data, int num_elements, + void *extra_data) { + + // MATTHIEU: This is currently unused. Scalar (non-threadpool) version is + // faster but we still need to explore why this happens. + + struct wait_and_unpack_mapper_data *data = + (struct wait_and_unpack_mapper_data *)extra_data; + + for (int k = 0; k < num_elements; k++) { + int pid = MPI_UNDEFINED; + MPI_Status status; + int res; + + /* We need a lock to prevent concurrent calls to MPI_Waitany on + the same array of requests since this is not supported in the MPI + standard (v3.1). This is not really a problem since the threads + would block inside MPI_Waitany anyway. */ + lock_lock(&data->lock); + if ((res = MPI_Waitany(data->num_proxies, data->reqs_in, &pid, &status)) != + MPI_SUCCESS || + pid == MPI_UNDEFINED) + mpi_error(res, "MPI_Waitany failed."); + if (lock_unlock(&data->lock) != 0) { + error("Failed to release lock."); + } + + // message( "cell data from proxy %i has arrived." , pid ); + for (int count = 0, j = 0; j < data->proxies[pid].nr_cells_in; j++) + count += cell_unpack(&data->proxies[pid].pcells_in[count], + data->proxies[pid].cells_in[j], data->s, + data->with_gravity); + } +} + +#endif // WITH_MPI + +/** + * @brief Exchange the cell structures with all proxies. + * + * @param proxies The list of #proxy that will send/recv cells. + * @param num_proxies The number of proxies. + * @param s The space into which the particles will be unpacked. + * @param with_gravity Are we running with gravity and hence need + * to exchange multipoles? + */ +void proxy_cells_exchange(struct proxy *proxies, int num_proxies, + struct space *s, const int with_gravity) { + +#ifdef WITH_MPI + + MPI_Request *reqs; + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * num_proxies)) == + NULL) + error("Failed to allocate request buffers."); + MPI_Request *reqs_in = reqs; + MPI_Request *reqs_out = &reqs[num_proxies]; + + ticks tic2 = getticks(); + + /* Run through the cells and get the size of the ones that will be sent off. + */ + threadpool_map(&s->e->threadpool, proxy_cells_count_mapper, s->cells_top, + s->nr_cells, sizeof(struct cell), /*chunk=*/0, + /*extra_data=*/NULL); + int count_out = 0; + int offset[s->nr_cells]; + for (int k = 0; k < s->nr_cells; k++) { + offset[k] = count_out; + if (s->cells_top[k].mpi.sendto) count_out += s->cells_top[k].mpi.pcell_size; + } + + if (s->e->verbose) + message("Counting cells to send took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + /* Allocate the pcells. */ + struct pcell *pcells = NULL; + if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT, + sizeof(struct pcell) * count_out) != 0) + error("Failed to allocate pcell buffer."); + + tic2 = getticks(); + + /* Pack the cells. */ + struct pack_mapper_data data = {s, offset, pcells, with_gravity}; + threadpool_map(&s->e->threadpool, proxy_cells_pack_mapper, s->cells_top, + s->nr_cells, sizeof(struct cell), /*chunk=*/0, &data); + + if (s->e->verbose) + message("Packing cells took %.3f %s.", clocks_from_ticks(getticks() - tic2), + clocks_getunit()); + + /* Launch the first part of the exchange. */ + threadpool_map(&s->e->threadpool, proxy_cells_exchange_first_mapper, proxies, + num_proxies, sizeof(struct proxy), /*chunk=*/0, + /*extra_data=*/NULL); + for (int k = 0; k < num_proxies; k++) { + reqs_in[k] = proxies[k].req_cells_count_in; + reqs_out[k] = proxies[k].req_cells_count_out; + } + + /* Wait for each count to come in and start the recv. */ + for (int k = 0; k < num_proxies; k++) { + int pid = MPI_UNDEFINED; + MPI_Status status; + if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || + pid == MPI_UNDEFINED) + error("MPI_Waitany failed."); + // message( "request from proxy %i has arrived." , pid ); + proxy_cells_exchange_second(&proxies[pid]); + } + + /* Wait for all the sends to have finished too. */ + if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) + error("MPI_Waitall on sends failed."); + + /* Set the requests for the cells. */ + for (int k = 0; k < num_proxies; k++) { + reqs_in[k] = proxies[k].req_cells_in; + reqs_out[k] = proxies[k].req_cells_out; + } + + tic2 = getticks(); + + /* Wait for each pcell array to come in from the proxies. */ + for (int k = 0; k < num_proxies; k++) { + int pid = MPI_UNDEFINED; + MPI_Status status; + if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || + pid == MPI_UNDEFINED) + error("MPI_Waitany failed."); + // message( "cell data from proxy %i has arrived." , pid ); + for (int count = 0, j = 0; j < proxies[pid].nr_cells_in; j++) + count += cell_unpack(&proxies[pid].pcells_in[count], + proxies[pid].cells_in[j], s, with_gravity); + } + + if (s->e->verbose) + message("Un-packing cells took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + /* Wait for all the sends to have finished too. */ + if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) + error("MPI_Waitall on sends failed."); + + /* Clean up. */ + free(reqs); + free(pcells); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + /** * @brief Add a cell to the given proxy's input list. * @@ -219,7 +572,7 @@ void proxy_addcell_out(struct proxy *p, struct cell *c, int type) { * * @param p The #proxy. */ -void proxy_parts_exch1(struct proxy *p) { +void proxy_parts_exchange_first(struct proxy *p) { #ifdef WITH_MPI @@ -279,7 +632,7 @@ void proxy_parts_exch1(struct proxy *p) { #endif } -void proxy_parts_exch2(struct proxy *p) { +void proxy_parts_exchange_second(struct proxy *p) { #ifdef WITH_MPI @@ -540,3 +893,19 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { } p->nr_sparts_out = 0; } + +/** + * @brief Registers the MPI types for the proxy cells. + */ +void proxy_create_mpi_type(void) { + +#ifdef WITH_MPI + if (MPI_Type_contiguous(sizeof(struct pcell) / sizeof(unsigned char), + MPI_BYTE, &pcell_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&pcell_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for parts."); + } +#else + error("SWIFT was not compiled with MPI support."); +#endif +} diff --git a/src/proxy.h b/src/proxy.h index b45f6fcca86b0320a49b5c2b879539cbf8c73116..2e3f350333d9e6fdb09161f852cf3a143c60e7ce 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -22,6 +22,7 @@ /* Includes. */ #include "cell.h" #include "part.h" +#include "space.h" /* Some constants. */ #define proxy_buffgrow 1.5 @@ -96,11 +97,14 @@ void proxy_parts_load(struct proxy *p, const struct part *parts, const struct xpart *xparts, int N); void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N); void proxy_sparts_load(struct proxy *p, const struct spart *sparts, int N); -void proxy_parts_exch1(struct proxy *p); -void proxy_parts_exch2(struct proxy *p); +void proxy_parts_exchange_first(struct proxy *p); +void proxy_parts_exchange_second(struct proxy *p); void proxy_addcell_in(struct proxy *p, struct cell *c, int type); void proxy_addcell_out(struct proxy *p, struct cell *c, int type); -void proxy_cells_exch1(struct proxy *p); -void proxy_cells_exch2(struct proxy *p); +void proxy_cells_exchange(struct proxy *proxies, int num_proxies, + struct space *s, int with_gravity); +void proxy_tags_exchange(struct proxy *proxies, int num_proxies, + struct space *s); +void proxy_create_mpi_type(void); #endif /* SWIFT_PROXY_H */ diff --git a/src/restart.c b/src/restart.c index c412c8477d9f93e7c085e13c9e3fe72cd0cab9df..54a098413d7a393ac88a7ef5d7300d912c99f845 100644 --- a/src/restart.c +++ b/src/restart.c @@ -334,3 +334,17 @@ void restart_remove_previous(const char *filename) { } } } + +/** + * @brief Run a given command, usually to resubmit a job. + * + * No check is done on the command being run. + * + * @param command The command to run in the system's shell. + */ +void restart_resubmit(const char *command) { + + /* Let's trust the user's command... */ + const int result = system(command); + if (result != 0) message("Command returned error code %d", result); +} diff --git a/src/restart.h b/src/restart.h index 49d127492255364cbf0f48653c560494e83a2920..b9380201659dacf05fcedad8c9fcb29e7bd89be2 100644 --- a/src/restart.h +++ b/src/restart.h @@ -41,4 +41,6 @@ int restart_stop_now(const char *dir, int cleanup); void restart_save_previous(const char *filename); void restart_remove_previous(const char *filename); +void restart_resubmit(const char *command); + #endif /* SWIFT_RESTART_H */ diff --git a/src/runner.c b/src/runner.c index 7771e247a07866297576856e4f5099fa8a1e55da..f14f8d8bc2721d0edfaca542b68af645bd5ac1a0 100644 --- a/src/runner.c +++ b/src/runner.c @@ -53,6 +53,7 @@ #include "hydro.h" #include "hydro_properties.h" #include "kick.h" +#include "logger.h" #include "minmax.h" #include "runner_doiact_vec.h" #include "scheduler.h" @@ -96,6 +97,16 @@ /* Import the gravity loop functions. */ #include "runner_doiact_grav.h" +/* Import the stars density loop functions. */ +#define FUNCTION density +#include "runner_doiact_stars.h" +#undef FUNCTION + +/* Import the stars feedback loop functions. */ +#define FUNCTION feedback +#include "runner_doiact_stars.h" +#undef FUNCTION + /** * @brief Perform source terms * @@ -104,7 +115,7 @@ * @param timer 1 if the time is to be recorded. */ void runner_do_sourceterms(struct runner *r, struct cell *c, int timer) { - const int count = c->count; + const int count = c->hydro.count; const double cell_min[3] = {c->loc[0], c->loc[1], c->loc[2]}; const double cell_width[3] = {c->width[0], c->width[1], c->width[2]}; struct sourceterms *sourceterms = r->e->sourceterms; @@ -132,6 +143,210 @@ void runner_do_sourceterms(struct runner *r, struct cell *c, int timer) { if (timer) TIMER_TOC(timer_dosource); } +/** + * @brief Intermediate task after the density to check that the smoothing + * lengths are correct. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { + + struct spart *restrict sparts = c->stars.parts; + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const struct stars_props *stars_properties = e->stars_properties; + const float stars_h_max = stars_properties->h_max; + const float eps = stars_properties->h_tolerance; + const float stars_eta_dim = pow_dimension(stars_properties->eta_neighbours); + const int max_smoothing_iter = stars_properties->max_smoothing_iterations; + int redo = 0, scount = 0; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_stars(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_stars_ghost(r, c->progeny[k], 0); + } else { + + /* Init the list of active particles that have to be updated. */ + int *sid = NULL; + if ((sid = (int *)malloc(sizeof(int) * c->stars.count)) == NULL) + error("Can't allocate memory for sid."); + for (int k = 0; k < c->stars.count; k++) + if (spart_is_active(&sparts[k], e)) { + sid[scount] = k; + ++scount; + } + + /* While there are particles that need to be updated... */ + for (int num_reruns = 0; scount > 0 && num_reruns < max_smoothing_iter; + num_reruns++) { + + /* Reset the redo-count. */ + redo = 0; + + /* Loop over the remaining active parts in this cell. */ + for (int i = 0; i < scount; i++) { + + /* Get a direct pointer on the part. */ + struct spart *sp = &sparts[sid[i]]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Is this part within the timestep? */ + if (!spart_is_active(sp, e)) + error("Ghost applied to inactive particle"); +#endif + + /* Get some useful values */ + const float h_old = sp->h; + const float h_old_dim = pow_dimension(h_old); + const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); + float h_new; + int has_no_neighbours = 0; + + if (sp->density.wcount == 0.f) { /* No neighbours case */ + + /* Flag that there were no neighbours */ + has_no_neighbours = 1; + + /* Double h and try again */ + h_new = 2.f * h_old; + } else { + + /* Finish the density calculation */ + stars_end_density(sp, cosmo); + + /* Compute one step of the Newton-Raphson scheme */ + const float n_sum = sp->density.wcount * h_old_dim; + const float n_target = stars_eta_dim; + const float f = n_sum - n_target; + const float f_prime = + sp->density.wcount_dh * h_old_dim + + hydro_dimension * sp->density.wcount * h_old_dim_minus_one; + + /* Avoid floating point exception from f_prime = 0 */ + h_new = h_old - f / (f_prime + FLT_MIN); +#ifdef SWIFT_DEBUG_CHECKS + if ((f > 0.f && h_new > h_old) || (f < 0.f && h_new < h_old)) + error( + "Smoothing length correction not going in the right direction"); +#endif + + /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ + h_new = min(h_new, 2.f * h_old); + h_new = max(h_new, 0.5f * h_old); + } + + /* Check whether the particle has an inappropriate smoothing length */ + if (fabsf(h_new - h_old) > eps * h_old) { + + /* Ok, correct then */ + sp->h = h_new; + + /* If below the absolute maximum, try again */ + if (sp->h < stars_h_max) { + + /* Flag for another round of fun */ + sid[redo] = sid[i]; + redo += 1; + + /* Re-initialise everything */ + stars_init_spart(sp); + + /* Off we go ! */ + continue; + + } else { + + /* Ok, this particle is a lost cause... */ + sp->h = stars_h_max; + + /* Do some damage control if no neighbours at all were found */ + if (has_no_neighbours) { + stars_spart_has_no_neighbours(sp, cosmo); + } + } + } + + /* We now have a particle whose smoothing length has converged */ + + /* Compute the stellar evolution */ + stars_evolve_spart(sp, stars_properties, cosmo); + } + + /* We now need to treat the particles whose smoothing length had not + * converged again */ + + /* Re-set the counter for the next loop (potentially). */ + scount = redo; + if (scount > 0) { + + /* Climb up the cell hierarchy. */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + + /* Run through this cell's density interactions. */ + for (struct link *l = finger->stars.density; l != NULL; l = l->next) { + +#ifdef SWIFT_DEBUG_CHECKS + if (l->t->ti_run < r->e->ti_current) + error("Density task should have been run."); +#endif + + /* Self-interaction? */ + if (l->t->type == task_type_self) + runner_doself_subset_branch_stars_density(r, finger, sparts, sid, + scount); + + /* Otherwise, pair interaction? */ + else if (l->t->type == task_type_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dopair_subset_branch_stars_density( + r, finger, sparts, sid, scount, l->t->cj); + else + runner_dopair_subset_branch_stars_density( + r, finger, sparts, sid, scount, l->t->ci); + } + + /* Otherwise, sub-self interaction? */ + else if (l->t->type == task_type_sub_self) + runner_dosub_subset_stars_density(r, finger, sparts, sid, scount, + NULL, -1, 1); + + /* Otherwise, sub-pair interaction? */ + else if (l->t->type == task_type_sub_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dosub_subset_stars_density(r, finger, sparts, sid, + scount, l->t->cj, -1, 1); + else + runner_dosub_subset_stars_density(r, finger, sparts, sid, + scount, l->t->ci, -1, 1); + } + } + } + } + } + + if (scount) { + error("Smoothing length failed to converge on %i particles.", scount); + } + + /* Be clean */ + free(sid); + } + + if (timer) TIMER_TOC(timer_dostars_ghost); +} + /** * @brief Calculate gravity acceleration from external potential * @@ -141,8 +356,8 @@ void runner_do_sourceterms(struct runner *r, struct cell *c, int timer) { */ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { - struct gpart *restrict gparts = c->gparts; - const int gcount = c->gcount; + struct gpart *restrict gparts = c->grav.parts; + const int gcount = c->grav.count; const struct engine *e = r->e; const struct external_potential *potential = e->external_potential; const struct phys_const *constants = e->physical_constants; @@ -184,8 +399,8 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { */ void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) { - struct gpart *restrict gparts = c->gparts; - const int gcount = c->gcount; + struct gpart *restrict gparts = c->grav.parts; + const int gcount = c->grav.count; const struct engine *e = r->e; #ifdef SWIFT_DEBUG_CHECKS @@ -226,11 +441,12 @@ void runner_do_cooling(struct runner *r, struct cell *c, int timer) { const struct cooling_function_data *cooling_func = e->cooling_func; const struct phys_const *constants = e->physical_constants; const struct unit_system *us = e->internal_units; + const struct hydro_props *hydro_props = e->hydro_properties; const double time_base = e->time_base; const integertime_t ti_current = e->ti_current; - struct part *restrict parts = c->parts; - struct xpart *restrict xparts = c->xparts; - const int count = c->count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const int count = c->hydro.count; TIMER_TIC; @@ -252,19 +468,25 @@ void runner_do_cooling(struct runner *r, struct cell *c, int timer) { if (part_is_active(p, e)) { - double dt_cool; + double dt_cool, dt_therm; if (with_cosmology) { const integertime_t ti_step = get_integer_timestep(p->time_bin); const integertime_t ti_begin = - get_integer_time_begin(ti_current + 1, p->time_bin); + get_integer_time_begin(ti_current - 1, p->time_bin); + dt_cool = cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); + dt_therm = cosmology_get_therm_kick_factor(e->cosmology, ti_begin, + ti_begin + ti_step); + } else { dt_cool = get_timestep(p->time_bin, time_base); + dt_therm = get_timestep(p->time_bin, time_base); } /* Let's cool ! */ - cooling_cool_part(constants, us, cosmo, cooling_func, p, xp, dt_cool); + cooling_cool_part(constants, us, cosmo, hydro_props, cooling_func, p, + xp, dt_cool, dt_therm); } } } @@ -272,6 +494,52 @@ void runner_do_cooling(struct runner *r, struct cell *c, int timer) { if (timer) TIMER_TOC(timer_do_cooling); } +/** + * + */ +void runner_do_star_formation(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const int count = c->hydro.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_star_formation(r, c->progeny[k], 0); + } else { + + /* Loop over the gas particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + if (part_is_active(p, e)) { + + const float rho = hydro_get_physical_density(p, cosmo); + + // MATTHIEU: Temporary star-formation law + // Do not use this at home. + if (rho > 1.5e7 && e->step > 2) { + message("Removing particle id=%lld rho=%e", p->id, rho); + cell_convert_part_to_gpart(e, c, p, xp); + } + } + } + } + + if (timer) TIMER_TOC(timer_do_star_formation); +} + /** * @brief Sort the entries in ascending order using QuickSort. * @@ -350,26 +618,33 @@ void runner_do_sort_ascending(struct entry *sort, int N) { } } +#ifdef SWIFT_DEBUG_CHECKS /** * @brief Recursively checks that the flags are consistent in a cell hierarchy. * - * Debugging function. + * Debugging function. Exists in two flavours: hydro & stars. * * @param c The #cell to check. * @param flags The sorting flags to check. */ -void runner_check_sorts(struct cell *c, int flags) { - -#ifdef SWIFT_DEBUG_CHECKS - if (flags & ~c->sorted) error("Inconsistent sort flags (downward)!"); - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL && c->progeny[k]->count > 0) - runner_check_sorts(c->progeny[k], c->sorted); +#define RUNNER_CHECK_SORTS(TYPE) \ + void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ + \ + if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \ + if (c->split) \ + for (int k = 0; k < 8; k++) \ + if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0) \ + runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted); \ + } #else - error("Calling debugging code without debugging flag activated."); +#define RUNNER_CHECK_SORTS(TYPE) \ + void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ + error("Calling debugging code without debugging flag activated."); \ + } #endif -} + +RUNNER_CHECK_SORTS(hydro) +RUNNER_CHECK_SORTS(stars) /** * @brief Sort the particles in the given cell along all cardinal directions. @@ -382,25 +657,25 @@ void runner_check_sorts(struct cell *c, int flags) { * @param clock Flag indicating whether to record the timing or not, needed * for recursive calls. */ -void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, - int clock) { +void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, + int cleanup, int clock) { struct entry *fingers[8]; - const int count = c->count; - const struct part *parts = c->parts; - struct xpart *xparts = c->xparts; + const int count = c->hydro.count; + const struct part *parts = c->hydro.parts; + struct xpart *xparts = c->hydro.xparts; float buff[8]; TIMER_TIC; /* We need to do the local sorts plus whatever was requested further up. */ - flags |= c->do_sort; + flags |= c->hydro.do_sort; if (cleanup) { - c->sorted = 0; + c->hydro.sorted = 0; } else { - flags &= ~c->sorted; + flags &= ~c->hydro.sorted; } - if (flags == 0 && !c->do_sub_sort) return; + if (flags == 0 && !c->hydro.do_sub_sort) return; /* Check that the particles have been moved to the current time */ if (flags && !cell_are_part_drifted(c, r->e)) @@ -408,24 +683,25 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, #ifdef SWIFT_DEBUG_CHECKS /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts(c, c->sorted); + runner_check_sorts_hydro(c, c->hydro.sorted); /* Make sure the sort flags are consistent (upard). */ for (struct cell *finger = c->parent; finger != NULL; finger = finger->parent) { - if (finger->sorted & ~c->sorted) error("Inconsistent sort flags (upward)."); + if (finger->hydro.sorted & ~c->hydro.sorted) + error("Inconsistent sort flags (upward)."); } /* Update the sort timer which represents the last time the sorts were re-set. */ - if (c->sorted == 0) c->ti_sort = r->e->ti_current; + if (c->hydro.sorted == 0) c->hydro.ti_sort = r->e->ti_current; #endif /* start by allocating the entry arrays in the requested dimensions. */ for (int j = 0; j < 13; j++) { - if ((flags & (1 << j)) && c->sort[j] == NULL) { - if ((c->sort[j] = (struct entry *)malloc(sizeof(struct entry) * - (count + 1))) == NULL) + if ((flags & (1 << j)) && c->hydro.sort[j] == NULL) { + if ((c->hydro.sort[j] = (struct entry *)malloc(sizeof(struct entry) * + (count + 1))) == NULL) error("Failed to allocate sort memory."); } } @@ -437,18 +713,19 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, float dx_max_sort = 0.0f; float dx_max_sort_old = 0.0f; for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->count > 0) { + if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { /* Only propagate cleanup if the progeny is stale. */ - runner_do_sort(r, c->progeny[k], flags, - cleanup && (c->progeny[k]->dx_max_sort > - space_maxreldx * c->progeny[k]->dmin), - 0); - dx_max_sort = max(dx_max_sort, c->progeny[k]->dx_max_sort); - dx_max_sort_old = max(dx_max_sort_old, c->progeny[k]->dx_max_sort_old); + runner_do_hydro_sort(r, c->progeny[k], flags, + cleanup && (c->progeny[k]->hydro.dx_max_sort_old > + space_maxreldx * c->progeny[k]->dmin), + 0); + dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort); + dx_max_sort_old = + max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old); } } - c->dx_max_sort = dx_max_sort; - c->dx_max_sort_old = dx_max_sort_old; + c->hydro.dx_max_sort = dx_max_sort; + c->hydro.dx_max_sort_old = dx_max_sort_old; /* Loop over the 13 different sort arrays. */ for (int j = 0; j < 13; j++) { @@ -461,7 +738,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, off[0] = 0; for (int k = 1; k < 8; k++) if (c->progeny[k - 1] != NULL) - off[k] = off[k - 1] + c->progeny[k - 1]->count; + off[k] = off[k - 1] + c->progeny[k - 1]->hydro.count; else off[k] = off[k - 1]; @@ -469,8 +746,8 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, int inds[8]; for (int k = 0; k < 8; k++) { inds[k] = k; - if (c->progeny[k] != NULL && c->progeny[k]->count > 0) { - fingers[k] = c->progeny[k]->sort[j]; + if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { + fingers[k] = c->progeny[k]->hydro.sort[j]; buff[k] = fingers[k]->d; off[k] = off[k]; } else @@ -487,7 +764,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, } /* For each entry in the new sort list. */ - struct entry *finger = c->sort[j]; + struct entry *finger = c->hydro.sort[j]; for (int ind = 0; ind < count; ind++) { /* Copy the minimum into the new sort array. */ @@ -508,11 +785,11 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, } /* Merge. */ /* Add a sentinel. */ - c->sort[j][count].d = FLT_MAX; - c->sort[j][count].i = 0; + c->hydro.sort[j][count].d = FLT_MAX; + c->hydro.sort[j][count].i = 0; /* Mark as sorted. */ - atomic_or(&c->sorted, 1 << j); + atomic_or(&c->hydro.sorted, 1 << j); } /* loop over sort arrays. */ @@ -522,7 +799,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, else { /* Reset the sort distance */ - if (c->sorted == 0) { + if (c->hydro.sorted == 0) { #ifdef SWIFT_DEBUG_CHECKS if (xparts != NULL && c->nodeID != engine_rank) error("Have non-NULL xparts in foreign cell"); @@ -536,8 +813,8 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, xparts[k].x_diff_sort[2] = 0.0f; } } - c->dx_max_sort_old = 0.f; - c->dx_max_sort = 0.f; + c->hydro.dx_max_sort_old = 0.f; + c->hydro.dx_max_sort = 0.f; } /* Fill the sort array. */ @@ -545,20 +822,20 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]}; for (int j = 0; j < 13; j++) if (flags & (1 << j)) { - c->sort[j][k].i = k; - c->sort[j][k].d = px[0] * runner_shift[j][0] + - px[1] * runner_shift[j][1] + - px[2] * runner_shift[j][2]; + c->hydro.sort[j][k].i = k; + c->hydro.sort[j][k].d = px[0] * runner_shift[j][0] + + px[1] * runner_shift[j][1] + + px[2] * runner_shift[j][2]; } } /* Add the sentinel and sort. */ for (int j = 0; j < 13; j++) if (flags & (1 << j)) { - c->sort[j][count].d = FLT_MAX; - c->sort[j][count].i = 0; - runner_do_sort_ascending(c->sort[j], count); - atomic_or(&c->sorted, 1 << j); + c->hydro.sort[j][count].d = FLT_MAX; + c->hydro.sort[j][count].i = 0; + runner_do_sort_ascending(c->hydro.sort[j], count); + atomic_or(&c->hydro.sorted, 1 << j); } } @@ -566,7 +843,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, /* Verify the sorting. */ for (int j = 0; j < 13; j++) { if (!(flags & (1 << j))) continue; - struct entry *finger = c->sort[j]; + struct entry *finger = c->hydro.sort[j]; for (int k = 1; k < count; k++) { if (finger[k].d < finger[k - 1].d) error("Sorting failed, ascending array."); @@ -575,23 +852,242 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, } /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts(c, flags); + runner_check_sorts_hydro(c, flags); /* Make sure the sort flags are consistent (upward). */ for (struct cell *finger = c->parent; finger != NULL; finger = finger->parent) { - if (finger->sorted & ~c->sorted) error("Inconsistent sort flags."); + if (finger->hydro.sorted & ~c->hydro.sorted) + error("Inconsistent sort flags."); } #endif /* Clear the cell's sort flags. */ - c->do_sort = 0; - c->do_sub_sort = 0; - c->requires_sorts = 0; + c->hydro.do_sort = 0; + c->hydro.do_sub_sort = 0; + c->hydro.requires_sorts = 0; if (clock) TIMER_TOC(timer_dosort); } +/** + * @brief Sort the stars particles in the given cell along all cardinal + * directions. + * + * @param r The #runner. + * @param c The #cell. + * @param flags Cell flag. + * @param cleanup If true, re-build the sorts for the selected flags instead + * of just adding them. + * @param clock Flag indicating whether to record the timing or not, needed + * for recursive calls. + */ +void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, + int cleanup, int clock) { + + struct entry *fingers[8]; + const int count = c->stars.count; + struct spart *sparts = c->stars.parts; + float buff[8]; + + TIMER_TIC; + + /* We need to do the local sorts plus whatever was requested further up. */ + flags |= c->stars.do_sort; + if (cleanup) { + c->stars.sorted = 0; + } else { + flags &= ~c->stars.sorted; + } + if (flags == 0 && !c->stars.do_sub_sort) return; + + /* Check that the particles have been moved to the current time */ + if (flags && !cell_are_spart_drifted(c, r->e)) + error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); + +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_stars(c, c->stars.sorted); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->stars.sorted & ~c->stars.sorted) + error("Inconsistent sort flags (upward)."); + } + + /* Update the sort timer which represents the last time the sorts + were re-set. */ + if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current; +#endif + + /* start by allocating the entry arrays in the requested dimensions. */ + for (int j = 0; j < 13; j++) { + if ((flags & (1 << j)) && c->stars.sort[j] == NULL) { + if ((c->stars.sort[j] = (struct entry *)malloc(sizeof(struct entry) * + (count + 1))) == NULL) + error("Failed to allocate sort memory."); + } + } + + /* Does this cell have any progeny? */ + if (c->split) { + + /* Fill in the gaps within the progeny. */ + float dx_max_sort = 0.0f; + float dx_max_sort_old = 0.0f; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { + /* Only propagate cleanup if the progeny is stale. */ + runner_do_stars_sort(r, c->progeny[k], flags, + cleanup && (c->progeny[k]->stars.dx_max_sort_old > + space_maxreldx * c->progeny[k]->dmin), + 0); + dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort); + dx_max_sort_old = + max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old); + } + } + c->stars.dx_max_sort = dx_max_sort; + c->stars.dx_max_sort_old = dx_max_sort_old; + + /* Loop over the 13 different sort arrays. */ + for (int j = 0; j < 13; j++) { + + /* Has this sort array been flagged? */ + if (!(flags & (1 << j))) continue; + + /* Init the particle index offsets. */ + int off[8]; + off[0] = 0; + for (int k = 1; k < 8; k++) + if (c->progeny[k - 1] != NULL) + off[k] = off[k - 1] + c->progeny[k - 1]->stars.count; + else + off[k] = off[k - 1]; + + /* Init the entries and indices. */ + int inds[8]; + for (int k = 0; k < 8; k++) { + inds[k] = k; + if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { + fingers[k] = c->progeny[k]->stars.sort[j]; + buff[k] = fingers[k]->d; + off[k] = off[k]; + } else + buff[k] = FLT_MAX; + } + + /* Sort the buffer. */ + for (int i = 0; i < 7; i++) + for (int k = i + 1; k < 8; k++) + if (buff[inds[k]] < buff[inds[i]]) { + int temp_i = inds[i]; + inds[i] = inds[k]; + inds[k] = temp_i; + } + + /* For each entry in the new sort list. */ + struct entry *finger = c->stars.sort[j]; + for (int ind = 0; ind < count; ind++) { + + /* Copy the minimum into the new sort array. */ + finger[ind].d = buff[inds[0]]; + finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; + + /* Update the buffer. */ + fingers[inds[0]] += 1; + buff[inds[0]] = fingers[inds[0]]->d; + + /* Find the smallest entry. */ + for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { + int temp_i = inds[k - 1]; + inds[k - 1] = inds[k]; + inds[k] = temp_i; + } + + } /* Merge. */ + + /* Add a sentinel. */ + c->stars.sort[j][count].d = FLT_MAX; + c->stars.sort[j][count].i = 0; + + /* Mark as sorted. */ + atomic_or(&c->stars.sorted, 1 << j); + + } /* loop over sort arrays. */ + + } /* progeny? */ + + /* Otherwise, just sort. */ + else { + + /* Reset the sort distance */ + if (c->stars.sorted == 0) { + + /* And the individual sort distances if we are a local cell */ + for (int k = 0; k < count; k++) { + sparts[k].x_diff_sort[0] = 0.0f; + sparts[k].x_diff_sort[1] = 0.0f; + sparts[k].x_diff_sort[2] = 0.0f; + } + c->stars.dx_max_sort_old = 0.f; + c->stars.dx_max_sort = 0.f; + } + + /* Fill the sort array. */ + for (int k = 0; k < count; k++) { + const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]}; + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->stars.sort[j][k].i = k; + c->stars.sort[j][k].d = px[0] * runner_shift[j][0] + + px[1] * runner_shift[j][1] + + px[2] * runner_shift[j][2]; + } + } + + /* Add the sentinel and sort. */ + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->stars.sort[j][count].d = FLT_MAX; + c->stars.sort[j][count].i = 0; + runner_do_sort_ascending(c->stars.sort[j], count); + atomic_or(&c->stars.sorted, 1 << j); + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify the sorting. */ + for (int j = 0; j < 13; j++) { + if (!(flags & (1 << j))) continue; + struct entry *finger = c->stars.sort[j]; + for (int k = 1; k < count; k++) { + if (finger[k].d < finger[k - 1].d) + error("Sorting failed, ascending array."); + if (finger[k].i >= count) error("Sorting failed, indices borked."); + } + } + + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_stars(c, flags); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->stars.sorted & ~c->stars.sorted) + error("Inconsistent sort flags."); + } +#endif + + /* Clear the cell's sort flags. */ + c->stars.do_sort = 0; + c->stars.do_sub_sort = 0; + c->stars.requires_sorts = 0; + + if (clock) TIMER_TOC(timer_do_stars_sort); +} + /** * @brief Initialize the multipoles before the gravity calculation. * @@ -614,7 +1110,7 @@ void runner_do_init_grav(struct runner *r, struct cell *c, int timer) { if (!cell_is_active_gravity(c, e)) return; /* Reset the gravity acceleration tensors */ - gravity_field_tensors_init(&c->multipole->pot, e->ti_current); + gravity_field_tensors_init(&c->grav.multipole->pot, e->ti_current); /* Recurse? */ if (c->split) { @@ -638,11 +1134,15 @@ void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) { #ifdef EXTRA_HYDRO_LOOP - struct part *restrict parts = c->parts; - struct xpart *restrict xparts = c->xparts; - const int count = c->count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const int count = c->hydro.count; const struct engine *e = r->e; + const integertime_t ti_end = e->ti_current; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const double time_base = e->time_base; const struct cosmology *cosmo = e->cosmology; + const struct hydro_props *hydro_props = e->hydro_properties; TIMER_TIC; @@ -669,8 +1169,19 @@ void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) { /* As of here, particle force variables will be set. */ + /* Calculate the time-step for passing to hydro_prepare_force. + * This is the physical time between the start and end of the time-step + * without any scale-factor powers. */ + double dt_alpha; + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + dt_alpha = cosmology_get_delta_time(cosmo, ti_end - ti_step, ti_end); + } else { + dt_alpha = get_timestep(p->time_bin, time_base); + } + /* Compute variables required for the force loop */ - hydro_prepare_force(p, xp, cosmo); + hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); /* The particle force values are now set. Do _NOT_ try to read any particle density variables! */ @@ -698,8 +1209,8 @@ void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) { */ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { - struct part *restrict parts = c->parts; - struct xpart *restrict xparts = c->xparts; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; const struct engine *e = r->e; const struct space *s = e->s; const struct hydro_space *hs = &s->hs; @@ -725,9 +1236,9 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { /* Init the list of active particles that have to be updated. */ int *pid = NULL; - if ((pid = (int *)malloc(sizeof(int) * c->count)) == NULL) + if ((pid = (int *)malloc(sizeof(int) * c->hydro.count)) == NULL) error("Can't allocate memory for pid."); - for (int k = 0; k < c->count; k++) + for (int k = 0; k < c->hydro.count; k++) if (part_is_active(&parts[k], e)) { pid[count] = k; ++count; @@ -766,6 +1277,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { /* Double h and try again */ h_new = 2.f * h_old; + } else { /* Finish the density calculation */ @@ -780,6 +1292,65 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { p->density.wcount_dh * h_old_dim + hydro_dimension * p->density.wcount * h_old_dim_minus_one; + /* Skip if h is already h_max and we don't have enough neighbours */ + if ((p->h >= hydro_h_max) && (f < 0.f)) { + + /* We have a particle whose smoothing length is already set (wants + * to be larger but has already hit the maximum). So, just tidy up + * as if the smoothing length had converged correctly */ + +#ifdef EXTRA_HYDRO_LOOP + + /* As of here, particle gradient variables will be set. */ + /* The force variables are set in the extra ghost. */ + + /* Compute variables required for the gradient loop */ + hydro_prepare_gradient(p, xp, cosmo); + + /* The particle gradient values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the gradient loop over neighbours */ + hydro_reset_gradient(p); + +#else + const struct hydro_props *hydro_props = e->hydro_properties; + + /* Calculate the time-step for passing to hydro_prepare_force, used + * for the evolution of alpha factors (i.e. those involved in the + * artificial viscosity and thermal conduction terms) */ + const int with_cosmology = (e->policy & engine_policy_cosmology); + const double time_base = e->time_base; + const integertime_t ti_end = e->ti_current; + double dt_alpha; + + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + dt_alpha = + cosmology_get_delta_time(cosmo, ti_end - ti_step, ti_end); + } else { + dt_alpha = get_timestep(p->time_bin, time_base); + } + + /* As of here, particle force variables will be set. */ + + /* Compute variables required for the force loop */ + hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); + + /* The particle force values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the force loop over neighbours */ + hydro_reset_acceleration(p); + +#endif /* EXTRA_HYDRO_LOOP */ + + /* Ok, we are done with this particle */ + continue; + } + + /* Normal case: Use Newton-Raphson to get a better value of h */ + /* Avoid floating point exception from f_prime = 0 */ h_new = h_old - f / (f_prime + FLT_MIN); @@ -844,10 +1415,27 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { hydro_reset_gradient(p); #else + const struct hydro_props *hydro_props = e->hydro_properties; + + /* Calculate the time-step for passing to hydro_prepare_force, used for + * the evolution of alpha factors (i.e. those involved in the artificial + * viscosity and thermal conduction terms) */ + const int with_cosmology = (e->policy & engine_policy_cosmology); + const integertime_t ti_end = e->ti_current; + const double time_base = e->time_base; + double dt_alpha; + + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + dt_alpha = cosmology_get_delta_time(cosmo, ti_end - ti_step, ti_end); + } else { + dt_alpha = get_timestep(p->time_bin, time_base); + } + /* As of here, particle force variables will be set. */ /* Compute variables required for the force loop */ - hydro_prepare_force(p, xp, cosmo); + hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); /* The particle force values are now set. Do _NOT_ try to read any particle density variables! */ @@ -869,7 +1457,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { for (struct cell *finger = c; finger != NULL; finger = finger->parent) { /* Run through this cell's density interactions. */ - for (struct link *l = finger->density; l != NULL; l = l->next) { + for (struct link *l = finger->hydro.density; l != NULL; l = l->next) { #ifdef SWIFT_DEBUG_CHECKS if (l->t->ti_run < r->e->ti_current) @@ -933,7 +1521,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { static void runner_do_unskip_hydro(struct cell *c, struct engine *e) { /* Ignore empty cells. */ - if (c->count == 0) return; + if (c->hydro.count == 0) return; /* Skip inactive cells. */ if (!cell_is_active_hydro(c, e)) return; @@ -953,6 +1541,35 @@ static void runner_do_unskip_hydro(struct cell *c, struct engine *e) { if (forcerebuild) atomic_inc(&e->forcerebuild); } +/** + * @brief Unskip any stars tasks associated with active cells. + * + * @param c The cell. + * @param e The engine. + */ +static void runner_do_unskip_stars(struct cell *c, struct engine *e) { + + /* Ignore empty cells. */ + if (c->stars.count == 0) return; + + /* Skip inactive cells. */ + if (!cell_is_active_stars(c, e)) return; + + /* Recurse */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + runner_do_unskip_stars(cp, e); + } + } + } + + /* Unskip any active tasks. */ + const int forcerebuild = cell_unskip_stars_tasks(c, &e->sched); + if (forcerebuild) atomic_inc(&e->forcerebuild); +} + /** * @brief Unskip any gravity tasks associated with active cells. * @@ -962,13 +1579,13 @@ static void runner_do_unskip_hydro(struct cell *c, struct engine *e) { static void runner_do_unskip_gravity(struct cell *c, struct engine *e) { /* Ignore empty cells. */ - if (c->gcount == 0) return; + if (c->grav.count == 0) return; /* Skip inactive cells. */ if (!cell_is_active_gravity(c, e)) return; /* Recurse */ - if (c->split && c->depth < space_subdepth_grav) { + if (c->split && ((c->maxdepth - c->depth) >= space_subdepth_diff_grav)) { for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) { struct cell *cp = c->progeny[k]; @@ -1006,6 +1623,9 @@ void runner_do_unskip_mapper(void *map_data, int num_elements, if ((e->policy & engine_policy_self_gravity) || (e->policy & engine_policy_external_gravity)) runner_do_unskip_gravity(c, e); + + /* Stars tasks */ + if (e->policy & engine_policy_stars) runner_do_unskip_stars(c, e); } } } @@ -1054,13 +1674,13 @@ void runner_do_kick1(struct runner *r, struct cell *c, int timer) { const struct cosmology *cosmo = e->cosmology; const struct hydro_props *hydro_props = e->hydro_properties; const int with_cosmology = (e->policy & engine_policy_cosmology); - struct part *restrict parts = c->parts; - struct xpart *restrict xparts = c->xparts; - struct gpart *restrict gparts = c->gparts; - struct spart *restrict sparts = c->sparts; - const int count = c->count; - const int gcount = c->gcount; - const int scount = c->scount; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + struct gpart *restrict gparts = c->grav.parts; + struct spart *restrict sparts = c->stars.parts; + const int count = c->hydro.count; + const int gcount = c->grav.count; + const int scount = c->stars.count; const integertime_t ti_current = e->ti_current; const double time_base = e->time_base; @@ -1101,7 +1721,7 @@ void runner_do_kick1(struct runner *r, struct cell *c, int timer) { #endif /* Time interval for this half-kick */ - double dt_kick_grav, dt_kick_hydro, dt_kick_therm; + double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr; if (with_cosmology) { dt_kick_hydro = cosmology_get_hydro_kick_factor( cosmo, ti_begin, ti_begin + ti_step / 2); @@ -1109,15 +1729,19 @@ void runner_do_kick1(struct runner *r, struct cell *c, int timer) { ti_begin + ti_step / 2); dt_kick_therm = cosmology_get_therm_kick_factor( cosmo, ti_begin, ti_begin + ti_step / 2); + dt_kick_corr = cosmology_get_corr_kick_factor(cosmo, ti_begin, + ti_begin + ti_step / 2); } else { dt_kick_hydro = (ti_step / 2) * time_base; dt_kick_grav = (ti_step / 2) * time_base; dt_kick_therm = (ti_step / 2) * time_base; + dt_kick_corr = (ti_step / 2) * time_base; } /* do the kick */ - kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, cosmo, - hydro_props, ti_begin, ti_begin + ti_step / 2); + kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, + dt_kick_corr, cosmo, hydro_props, ti_begin, + ti_begin + ti_step / 2); /* Update the accelerations to be used in the drift for hydro */ if (p->gpart != NULL) { @@ -1167,7 +1791,7 @@ void runner_do_kick1(struct runner *r, struct cell *c, int timer) { } } - /* Loop over the star particles in this cell. */ + /* Loop over the stars particles in this cell. */ for (int k = 0; k < scount; k++) { /* Get a handle on the s-part. */ @@ -1224,13 +1848,13 @@ void runner_do_kick2(struct runner *r, struct cell *c, int timer) { const struct cosmology *cosmo = e->cosmology; const struct hydro_props *hydro_props = e->hydro_properties; const int with_cosmology = (e->policy & engine_policy_cosmology); - const int count = c->count; - const int gcount = c->gcount; - const int scount = c->scount; - struct part *restrict parts = c->parts; - struct xpart *restrict xparts = c->xparts; - struct gpart *restrict gparts = c->gparts; - struct spart *restrict sparts = c->sparts; + const int count = c->hydro.count; + const int gcount = c->grav.count; + const int scount = c->stars.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + struct gpart *restrict gparts = c->grav.parts; + struct spart *restrict sparts = c->stars.parts; const integertime_t ti_current = e->ti_current; const double time_base = e->time_base; @@ -1267,7 +1891,7 @@ void runner_do_kick2(struct runner *r, struct cell *c, int timer) { ti_begin, ti_step, p->time_bin, ti_current); #endif /* Time interval for this half-kick */ - double dt_kick_grav, dt_kick_hydro, dt_kick_therm; + double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr; if (with_cosmology) { dt_kick_hydro = cosmology_get_hydro_kick_factor( cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); @@ -1275,15 +1899,19 @@ void runner_do_kick2(struct runner *r, struct cell *c, int timer) { cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); dt_kick_therm = cosmology_get_therm_kick_factor( cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); + dt_kick_corr = cosmology_get_corr_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); } else { dt_kick_hydro = (ti_step / 2) * time_base; dt_kick_grav = (ti_step / 2) * time_base; dt_kick_therm = (ti_step / 2) * time_base; + dt_kick_corr = (ti_step / 2) * time_base; } /* Finish the time-step with a second half-kick */ - kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, cosmo, - hydro_props, ti_begin + ti_step / 2, ti_begin + ti_step); + kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, + dt_kick_corr, cosmo, hydro_props, ti_begin + ti_step / 2, + ti_begin + ti_step); #ifdef SWIFT_DEBUG_CHECKS /* Check that kick and the drift are synchronized */ @@ -1375,7 +2003,7 @@ void runner_do_kick2(struct runner *r, struct cell *c, int timer) { #endif /* Prepare the values to be drifted */ - star_reset_predicted_values(sp); + stars_reset_predicted_values(sp); } } } @@ -1394,29 +2022,31 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { const struct engine *e = r->e; const integertime_t ti_current = e->ti_current; - const int count = c->count; - const int gcount = c->gcount; - const int scount = c->scount; - struct part *restrict parts = c->parts; - struct xpart *restrict xparts = c->xparts; - struct gpart *restrict gparts = c->gparts; - struct spart *restrict sparts = c->sparts; + const int count = c->hydro.count; + const int gcount = c->grav.count; + const int scount = c->stars.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + struct gpart *restrict gparts = c->grav.parts; + struct spart *restrict sparts = c->stars.parts; TIMER_TIC; /* Anything to do here? */ if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) { - c->updated = 0; - c->g_updated = 0; - c->s_updated = 0; + c->hydro.updated = 0; + c->grav.updated = 0; + c->stars.updated = 0; return; } int updated = 0, g_updated = 0, s_updated = 0; + int inhibited = 0, g_inhibited = 0, s_inhibited = 0; integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; /* No children? */ if (!c->split) { @@ -1473,6 +2103,9 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { else { /* part is inactive */ + /* Count the number of inhibited particles */ + if (part_is_inhibited(p, e)) inhibited++; + const integertime_t ti_end = get_integer_time_end(ti_current, p->time_bin); @@ -1539,6 +2172,9 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { } else { /* gpart is inactive */ + /* Count the number of inhibited particles */ + if (gpart_is_inhibited(gp, e)) g_inhibited++; + const integertime_t ti_end = get_integer_time_end(ti_current, gp->time_bin); @@ -1587,10 +2223,16 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); + ti_stars_end_min = min(ti_current + ti_new_step, ti_stars_end_min); + /* What is the next starting point for this cell ? */ ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - } else { /* star particle is inactive */ + /* star particle is inactive but not inhibited */ + } else { + + /* Count the number of inhibited particles */ + if (spart_is_inhibited(sp, e)) ++s_inhibited; const integertime_t ti_end = get_integer_time_end(ti_current, sp->time_bin); @@ -1599,6 +2241,8 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { ti_gravity_end_min = min(ti_end, ti_gravity_end_min); ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + ti_stars_end_min = min(ti_end, ti_stars_end_min); + const integertime_t ti_beg = get_integer_time_begin(ti_current + 1, sp->time_bin); @@ -1617,28 +2261,48 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { runner_do_timestep(r, cp, 0); /* And aggregate */ - updated += cp->updated; - g_updated += cp->g_updated; - s_updated += cp->s_updated; - ti_hydro_end_min = min(cp->ti_hydro_end_min, ti_hydro_end_min); - ti_hydro_end_max = max(cp->ti_hydro_end_max, ti_hydro_end_max); - ti_hydro_beg_max = max(cp->ti_hydro_beg_max, ti_hydro_beg_max); - ti_gravity_end_min = min(cp->ti_gravity_end_min, ti_gravity_end_min); - ti_gravity_end_max = max(cp->ti_gravity_end_max, ti_gravity_end_max); - ti_gravity_beg_max = max(cp->ti_gravity_beg_max, ti_gravity_beg_max); + updated += cp->hydro.updated; + g_updated += cp->grav.updated; + s_updated += cp->stars.updated; + inhibited += cp->hydro.inhibited; + g_inhibited += cp->grav.inhibited; + s_inhibited += cp->stars.inhibited; + ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min); + ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max); + ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max); + ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); + ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); + ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); + ti_stars_end_min = min(cp->stars.ti_end_min, ti_stars_end_min); } } /* Store the values. */ - c->updated = updated; - c->g_updated = g_updated; - c->s_updated = s_updated; - c->ti_hydro_end_min = ti_hydro_end_min; - c->ti_hydro_end_max = ti_hydro_end_max; - c->ti_hydro_beg_max = ti_hydro_beg_max; - c->ti_gravity_end_min = ti_gravity_end_min; - c->ti_gravity_end_max = ti_gravity_end_max; - c->ti_gravity_beg_max = ti_gravity_beg_max; + c->hydro.updated = updated; + c->grav.updated = g_updated; + c->stars.updated = s_updated; + c->hydro.inhibited = inhibited; + c->grav.inhibited = g_inhibited; + c->stars.inhibited = s_inhibited; + c->hydro.ti_end_min = ti_hydro_end_min; + c->hydro.ti_end_max = ti_hydro_end_max; + c->hydro.ti_beg_max = ti_hydro_beg_max; + c->grav.ti_end_min = ti_gravity_end_min; + c->grav.ti_end_max = ti_gravity_end_max; + c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.ti_end_min = ti_stars_end_min; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.ti_end_min == e->ti_current && + c->hydro.ti_end_min < max_nr_timesteps) + error("End of next hydro step is current time!"); + if (c->grav.ti_end_min == e->ti_current && + c->grav.ti_end_min < max_nr_timesteps) + error("End of next gravity step is current time!"); + if (c->stars.ti_end_min == e->ti_current && + c->stars.ti_end_min < max_nr_timesteps) + error("End of next stars step is current time!"); +#endif if (timer) TIMER_TOC(timer_timestep); } @@ -1656,12 +2320,12 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { const struct engine *e = r->e; const struct space *s = e->s; const struct cosmology *cosmo = e->cosmology; - const int count = c->count; - const int gcount = c->gcount; - const int scount = c->scount; - struct part *restrict parts = c->parts; - struct gpart *restrict gparts = c->gparts; - struct spart *restrict sparts = c->sparts; + const int count = c->hydro.count; + const int gcount = c->grav.count; + const int scount = c->stars.count; + struct part *restrict parts = c->hydro.parts; + struct gpart *restrict gparts = c->grav.parts; + struct spart *restrict sparts = c->stars.parts; const int periodic = s->periodic; const float G_newton = e->physical_constants->const_newton_G; @@ -1708,13 +2372,21 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { /* Finish the force calculation */ gravity_end_force(gp, G_newton, potential_normalisation, periodic); +#ifdef SWIFT_MAKE_GRAVITY_GLASS + + /* Negate the gravity forces */ + gp->a_grav[0] *= -1.f; + gp->a_grav[1] *= -1.f; + gp->a_grav[2] *= -1.f; +#endif + #ifdef SWIFT_NO_GRAVITY_BELOW_ID /* Get the ID of the gpart */ long long id = 0; if (gp->type == swift_type_gas) id = e->s->parts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_star) + else if (gp->type == swift_type_stars) id = e->s->sparts[-gp->id_or_neg_offset].id; else if (gp->type == swift_type_black_hole) error("Unexisting type"); @@ -1739,13 +2411,14 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { /* Check that this gpart has interacted with all the other * particles (via direct or multipoles) in the box */ - if (gp->num_interacted != e->total_nr_gparts) { + if (gp->num_interacted != + e->total_nr_gparts - e->count_inhibited_gparts) { /* Get the ID of the gpart */ long long my_id = 0; if (gp->type == swift_type_gas) my_id = e->s->parts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_star) + else if (gp->type == swift_type_stars) my_id = e->s->sparts[-gp->id_or_neg_offset].id; else if (gp->type == swift_type_black_hole) error("Unexisting type"); @@ -1756,16 +2429,16 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { "g-particle (id=%lld, type=%s) did not interact " "gravitationally with all other gparts " "gp->num_interacted=%lld, total_gparts=%lld (local " - "num_gparts=%zd)", + "num_gparts=%zd inhibited_gparts=%lld)", my_id, part_type_names[gp->type], gp->num_interacted, - e->total_nr_gparts, e->s->nr_gparts); + e->total_nr_gparts, e->s->nr_gparts, e->count_inhibited_gparts); } } #endif } } - /* Loop over the star particles in this cell. */ + /* Loop over the stars particles in this cell. */ for (int k = 0; k < scount; k++) { /* Get a handle on the spart. */ @@ -1773,7 +2446,7 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { if (spart_is_active(sp, e)) { /* Finish the force loop */ - star_end_force(sp); + stars_end_force(sp); } } } @@ -1794,8 +2467,8 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, #ifdef WITH_MPI - const struct part *restrict parts = c->parts; - const size_t nr_parts = c->count; + const struct part *restrict parts = c->hydro.parts; + const size_t nr_parts = c->hydro.count; const integertime_t ti_current = r->e->ti_current; TIMER_TIC; @@ -1811,7 +2484,7 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, #endif /* Clear this cell's sorted mask. */ - if (clear_sorts) c->sorted = 0; + if (clear_sorts) c->hydro.sorted = 0; /* If this cell is a leaf, collect the particle data. */ if (!c->split) { @@ -1832,13 +2505,13 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, /* Otherwise, recurse and collect. */ else { for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->count > 0) { + if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { runner_do_recv_part(r, c->progeny[k], clear_sorts, 0); ti_hydro_end_min = - min(ti_hydro_end_min, c->progeny[k]->ti_hydro_end_min); + min(ti_hydro_end_min, c->progeny[k]->hydro.ti_end_min); ti_hydro_end_max = - max(ti_hydro_end_max, c->progeny[k]->ti_hydro_end_max); - h_max = max(h_max, c->progeny[k]->h_max); + max(ti_hydro_end_max, c->progeny[k]->hydro.ti_end_max); + h_max = max(h_max, c->progeny[k]->hydro.h_max); } } } @@ -1852,10 +2525,10 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, #endif /* ... and store. */ - // c->ti_hydro_end_min = ti_hydro_end_min; - // c->ti_hydro_end_max = ti_hydro_end_max; - c->ti_old_part = ti_current; - c->h_max = h_max; + // c->hydro.ti_end_min = ti_hydro_end_min; + // c->hydro.ti_end_max = ti_hydro_end_max; + c->hydro.ti_old_part = ti_current; + c->hydro.h_max = h_max; if (timer) TIMER_TOC(timer_dorecv_part); @@ -1875,8 +2548,8 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { #ifdef WITH_MPI - const struct gpart *restrict gparts = c->gparts; - const size_t nr_gparts = c->gcount; + const struct gpart *restrict gparts = c->grav.parts; + const size_t nr_gparts = c->grav.count; const integertime_t ti_current = r->e->ti_current; TIMER_TIC; @@ -1898,11 +2571,6 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { if (gparts[k].time_bin == time_bin_inhibited) continue; time_bin_min = min(time_bin_min, gparts[k].time_bin); time_bin_max = max(time_bin_max, gparts[k].time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - if (gparts[k].ti_drift != ti_current) - error("Received un-drifted g-particle !"); -#endif } /* Convert into a time */ @@ -1913,12 +2581,12 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { /* Otherwise, recurse and collect. */ else { for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->gcount > 0) { + if (c->progeny[k] != NULL && c->progeny[k]->grav.count > 0) { runner_do_recv_gpart(r, c->progeny[k], 0); ti_gravity_end_min = - min(ti_gravity_end_min, c->progeny[k]->ti_gravity_end_min); + min(ti_gravity_end_min, c->progeny[k]->grav.ti_end_min); ti_gravity_end_max = - max(ti_gravity_end_max, c->progeny[k]->ti_gravity_end_max); + max(ti_gravity_end_max, c->progeny[k]->grav.ti_end_max); } } } @@ -1932,9 +2600,9 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { #endif /* ... and store. */ - // c->ti_gravity_end_min = ti_gravity_end_min; - // c->ti_gravity_end_max = ti_gravity_end_max; - c->ti_old_gpart = ti_current; + // c->grav.ti_end_min = ti_gravity_end_min; + // c->grav.ti_end_max = ti_gravity_end_max; + c->grav.ti_old_part = ti_current; if (timer) TIMER_TOC(timer_dorecv_gpart); @@ -1954,10 +2622,12 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { #ifdef WITH_MPI - const struct spart *restrict sparts = c->sparts; - const size_t nr_sparts = c->scount; + const struct spart *restrict sparts = c->stars.parts; + const size_t nr_sparts = c->stars.count; const integertime_t ti_current = r->e->ti_current; + error("Need to add h_max computation"); + TIMER_TIC; integertime_t ti_gravity_end_min = max_nr_timesteps; @@ -1977,11 +2647,6 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { if (sparts[k].time_bin == time_bin_inhibited) continue; time_bin_min = min(time_bin_min, sparts[k].time_bin); time_bin_max = max(time_bin_max, sparts[k].time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - if (sparts[k].ti_drift != ti_current) - error("Received un-drifted s-particle !"); -#endif } /* Convert into a time */ @@ -1992,12 +2657,12 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { /* Otherwise, recurse and collect. */ else { for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->scount > 0) { + if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { runner_do_recv_spart(r, c->progeny[k], 0); ti_gravity_end_min = - min(ti_gravity_end_min, c->progeny[k]->ti_gravity_end_min); + min(ti_gravity_end_min, c->progeny[k]->grav.ti_end_min); ti_gravity_end_max = - max(ti_gravity_end_max, c->progeny[k]->ti_gravity_end_max); + max(ti_gravity_end_max, c->progeny[k]->grav.ti_end_max); } } } @@ -2011,9 +2676,9 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { #endif /* ... and store. */ - c->ti_gravity_end_min = ti_gravity_end_min; - c->ti_gravity_end_max = ti_gravity_end_max; - c->ti_old_gpart = ti_current; + // c->grav.ti_end_min = ti_gravity_end_min; + // c->grav.ti_end_max = ti_gravity_end_max; + c->grav.ti_old_part = ti_current; if (timer) TIMER_TOC(timer_dorecv_spart); @@ -2098,6 +2763,8 @@ void *runner_main(void *data) { runner_doself_recursive_grav(r, ci, 1); else if (t->subtype == task_subtype_external_grav) runner_do_grav_external(r, ci, 1); + else if (t->subtype == task_subtype_stars_density) + runner_doself_stars_density(r, ci, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; @@ -2113,6 +2780,8 @@ void *runner_main(void *data) { runner_dopair2_branch_force(r, ci, cj); else if (t->subtype == task_subtype_grav) runner_dopair_recursive_grav(r, ci, cj, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dopair_stars_density(r, ci, cj, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; @@ -2126,6 +2795,8 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_dosub_self2_force(r, ci, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dosub_self_stars_density(r, ci, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; @@ -2139,14 +2810,25 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_dosub_pair2_force(r, ci, cj, t->flags, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dosub_pair_stars_density(r, ci, cj, t->flags, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; case task_type_sort: /* Cleanup only if any of the indices went stale. */ - runner_do_sort(r, ci, t->flags, - ci->dx_max_sort_old > space_maxreldx * ci->dmin, 1); + runner_do_hydro_sort( + r, ci, t->flags, + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, 1); + /* Reset the sort flags as our work here is done. */ + t->flags = 0; + break; + case task_type_stars_sort: + /* Cleanup only if any of the indices went stale. */ + runner_do_stars_sort( + r, ci, t->flags, + ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin, 1); /* Reset the sort flags as our work here is done. */ t->flags = 0; break; @@ -2161,6 +2843,9 @@ void *runner_main(void *data) { runner_do_extra_ghost(r, ci, 1); break; #endif + case task_type_stars_ghost: + runner_do_stars_ghost(r, ci, 1); + break; case task_type_drift_part: runner_do_drift_part(r, ci, 1); break; @@ -2176,6 +2861,9 @@ void *runner_main(void *data) { case task_type_end_force: runner_do_end_force(r, ci, 1); break; + case task_type_logger: + runner_do_logger(r, ci, 1); + break; case task_type_timestep: runner_do_timestep(r, ci, 1); break; @@ -2217,11 +2905,14 @@ void *runner_main(void *data) { runner_do_grav_long_range(r, t->ci, 1); break; case task_type_grav_mm: - runner_dopair_grav_mm_symmetric(r, t->ci, t->cj); + runner_dopair_grav_mm_progenies(r, t->flags, t->ci, t->cj); break; case task_type_cooling: runner_do_cooling(r, t->ci, 1); break; + case task_type_star_formation: + runner_do_star_formation(r, t->ci, 1); + break; case task_type_sourceterms: runner_do_sourceterms(r, t->ci, 1); break; @@ -2251,3 +2942,74 @@ void *runner_main(void *data) { /* Be kind, rewind. */ return NULL; } + +/** + * @brief Write the required particles through the logger. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_logger(struct runner *r, struct cell *c, int timer) { + +#ifdef WITH_LOGGER + TIMER_TIC; + + const struct engine *e = r->e; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const int count = c->hydro.count; + + /* Anything to do here? */ + if (!cell_is_starting_hydro(c, e) && !cell_is_starting_gravity(c, e)) return; + + /* Recurse? Avoid spending too much time in useless cells. */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_logger(r, c->progeny[k], 0); + } else { + + /* Loop over the parts in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* If particle needs to be log */ + /* This is the same function than part_is_active, except for + * debugging checks */ + if (part_is_starting(p, e)) { + + if (logger_should_write(&xp->logger_data, e->logger)) { + /* Write particle */ + /* Currently writing everything, should adapt it through time */ + logger_log_part(e->logger, p, + logger_mask_data[logger_x].mask | + logger_mask_data[logger_v].mask | + logger_mask_data[logger_a].mask | + logger_mask_data[logger_u].mask | + logger_mask_data[logger_h].mask | + logger_mask_data[logger_rho].mask | + logger_mask_data[logger_consts].mask, + &xp->logger_data.last_offset); + + /* Set counter back to zero */ + xp->logger_data.steps_since_last_output = 0; + } else + /* Update counter */ + xp->logger_data.steps_since_last_output += 1; + } + } + } + + if (c->grav.count > 0) error("gparts not implemented"); + + if (c->stars.count > 0) error("sparts not implemented"); + + if (timer) TIMER_TOC(timer_logger); + +#else + error("Logger disabled, please enable it during configuration"); +#endif +} diff --git a/src/runner.h b/src/runner.h index e33a3e380e6097a67258d116d617483caca35086..6af0cd227374afd616b3329a8dbd527634902922 100644 --- a/src/runner.h +++ b/src/runner.h @@ -69,8 +69,10 @@ struct runner { /* Function prototypes. */ void runner_do_ghost(struct runner *r, struct cell *c, int timer); void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer); -void runner_do_sort(struct runner *r, struct cell *c, int flag, int cleanup, - int clock); +void runner_do_hydro_sort(struct runner *r, struct cell *c, int flag, + int cleanup, int clock); +void runner_do_stars_sort(struct runner *r, struct cell *c, int flag, + int cleanup, int clock); void runner_do_drift_part(struct runner *r, struct cell *c, int timer); void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer); void runner_do_kick1(struct runner *r, struct cell *c, int timer); @@ -80,6 +82,7 @@ void runner_do_init(struct runner *r, struct cell *c, int timer); void runner_do_cooling(struct runner *r, struct cell *c, int timer); void runner_do_grav_external(struct runner *r, struct cell *c, int timer); void runner_do_grav_fft(struct runner *r, int timer); +void runner_do_logger(struct runner *r, struct cell *c, int timer); void *runner_main(void *data); void runner_do_unskip_mapper(void *map_data, int num_elements, void *extra_data); diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 79c7760b86a7e8cc92bc7aef5d0bab093464033c..53cf51ed400f82d0e195e38dd08fcc5af16f1ad7 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -145,10 +145,10 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, /* Anything to do here? */ if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return; - const int count_i = ci->count; - const int count_j = cj->count; - struct part *restrict parts_i = ci->parts; - struct part *restrict parts_j = cj->parts; + const int count_i = ci->hydro.count; + const int count_j = cj->hydro.count; + struct part *restrict parts_i = ci->hydro.parts; + struct part *restrict parts_j = cj->hydro.parts; /* Cosmological terms */ const float a = cosmo->a; @@ -169,6 +169,7 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[pid]; const int pi_active = part_is_active(pi, e); + const int pi_inhibited = part_is_inhibited(pi, e); const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - (cj->loc[0] + shift[0])), @@ -183,6 +184,7 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; const int pj_active = part_is_active(pj, e); + const int pj_inhibited = part_is_inhibited(pj, e); /* Compute the pairwise distance. */ const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), @@ -193,21 +195,21 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pi_inhibited) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2 && pi_active) { + if (r2 < hig2 && pi_active && !pj_inhibited) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); #endif } - if (r2 < hjg2 && pj_active) { + if (r2 < hjg2 && pj_active && !pi_inhibited) { dx[0] = -dx[0]; dx[1] = -dx[1]; @@ -245,10 +247,10 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, /* Anything to do here? */ if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return; - const int count_i = ci->count; - const int count_j = cj->count; - struct part *restrict parts_i = ci->parts; - struct part *restrict parts_j = cj->parts; + const int count_i = ci->hydro.count; + const int count_j = cj->hydro.count; + struct part *restrict parts_i = ci->hydro.parts; + struct part *restrict parts_j = cj->hydro.parts; /* Cosmological terms */ const float a = cosmo->a; @@ -269,6 +271,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[pid]; const int pi_active = part_is_active(pi, e); + const int pi_inhibited = part_is_inhibited(pi, e); const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - (cj->loc[0] + shift[0])), @@ -281,6 +284,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; const int pj_active = part_is_active(pj, e); + const int pj_inhibited = part_is_inhibited(pj, e); const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; @@ -293,28 +297,28 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pj_inhibited) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pi_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ if (r2 < hig2 || r2 < hjg2) { - if (pi_active && pj_active) { + if (pi_active && pj_active && !pi_inhibited && !pj_inhibited) { IACT(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); #endif - } else if (pi_active) { + } else if (pi_active && !pj_inhibited) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); #endif - } else if (pj_active) { + } else if (pj_active && !pi_inhibited) { dx[0] = -dx[0]; dx[1] = -dx[1]; @@ -354,8 +358,8 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { const float a = cosmo->a; const float H = cosmo->H; - const int count = c->count; - struct part *restrict parts = c->parts; + const int count = c->hydro.count; + struct part *restrict parts = c->hydro.parts; /* Loop over the parts in ci. */ for (int pid = 0; pid < count; pid++) { @@ -363,6 +367,7 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts[pid]; const int pi_active = part_is_active(pi, e); + const int pi_inhibited = part_is_inhibited(pi, e); const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - c->loc[0]), @@ -377,6 +382,7 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; const int pj_active = part_is_active(pj, e); + const int pj_inhibited = part_is_inhibited(pj, e); /* Compute the pairwise distance. */ const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), @@ -385,14 +391,14 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - const int doi = pi_active && (r2 < hig2); - const int doj = pj_active && (r2 < hjg2); + const int doi = pi_active && (r2 < hig2) && !pj_inhibited; + const int doj = pj_active && (r2 < hjg2) && !pi_inhibited; #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pi_inhibited) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif @@ -448,8 +454,8 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { const float a = cosmo->a; const float H = cosmo->H; - const int count = c->count; - struct part *restrict parts = c->parts; + const int count = c->hydro.count; + struct part *restrict parts = c->hydro.parts; /* Loop over the parts in ci. */ for (int pid = 0; pid < count; pid++) { @@ -457,6 +463,7 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts[pid]; const int pi_active = part_is_active(pi, e); + const int pi_inhibited = part_is_inhibited(pi, e); const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - c->loc[0]), @@ -471,6 +478,7 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; const int pj_active = part_is_active(pj, e); + const int pj_inhibited = part_is_inhibited(pj, e); /* Compute the pairwise distance. */ const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), @@ -479,14 +487,16 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - const int doi = pi_active && ((r2 < hig2) || (r2 < hjg2)); - const int doj = pj_active && ((r2 < hig2) || (r2 < hjg2)); + const int doi = + pi_active && ((r2 < hig2) || (r2 < hjg2)) && !pj_inhibited; + const int doj = + pj_active && ((r2 < hig2) || (r2 < hjg2)) && !pi_inhibited; #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pi_inhibited) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif @@ -544,8 +554,8 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, TIMER_TIC; - const int count_j = cj->count; - struct part *restrict parts_j = cj->parts; + const int count_j = cj->hydro.count; + struct part *restrict parts_j = cj->hydro.parts; /* Cosmological terms */ const float a = cosmo->a; @@ -571,6 +581,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; + const int pj_inhibited = part_is_inhibited(pj, e); /* Compute the pairwise distance. */ float r2 = 0.0f; @@ -584,12 +595,12 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2) { + if (r2 < hig2 && !pj_inhibited) { IACT_NONSYM(r2, dx, hi, pj->h, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -626,16 +637,16 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, TIMER_TIC; - const int count_j = cj->count; - struct part *restrict parts_j = cj->parts; + const int count_j = cj->hydro.count; + struct part *restrict parts_j = cj->hydro.parts; /* Cosmological terms */ const float a = cosmo->a; const float H = cosmo->H; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_j = cj->sort[sid]; - const float dxj = cj->dx_max_sort; + const struct entry *restrict sort_j = cj->hydro.sort[sid]; + const float dxj = cj->hydro.dx_max_sort; /* Parts are on the left? */ if (!flipped) { @@ -658,6 +669,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[sort_j[pjd].i]; + const int pj_inhibited = part_is_inhibited(pj, e); const float hj = pj->h; const double pjx = pj->x[0]; const double pjy = pj->x[1]; @@ -672,12 +684,12 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2) { + if (r2 < hig2 && !pj_inhibited) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -709,6 +721,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[sort_j[pjd].i]; + const int pj_inhibited = part_is_inhibited(pj, e); const float hj = pj->h; const double pjx = pj->x[0]; const double pjy = pj->x[1]; @@ -723,12 +736,12 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2) { + if (r2 < hig2 && !pj_inhibited) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -782,8 +795,8 @@ void DOPAIR_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, sid = sortlistID[sid]; /* Has the cell cj been sorted? */ - if (!(cj->sorted & (1 << sid)) || - cj->dx_max_sort_old > space_maxreldx * cj->dmin) + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) error("Interacting unsorted cells."); #endif @@ -822,8 +835,8 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, const float a = cosmo->a; const float H = cosmo->H; - const int count_i = ci->count; - struct part *restrict parts_j = ci->parts; + const int count_i = ci->hydro.count; + struct part *restrict parts_j = ci->hydro.parts; /* Loop over the parts in ci. */ for (int pid = 0; pid < count; pid++) { @@ -845,6 +858,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; + const int pj_inhibited = part_is_inhibited(pj, e); const float hj = pj->h; /* Compute the pairwise distance. */ @@ -858,12 +872,12 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 > 0.f && r2 < hig2) { + if (r2 > 0.f && r2 < hig2 && !pj_inhibited) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -919,29 +933,32 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->sort[sid]; - const struct entry *restrict sort_j = cj->sort[sid]; + const struct entry *restrict sort_i = ci->hydro.sort[sid]; + const struct entry *restrict sort_j = cj->hydro.sort[sid]; #ifdef SWIFT_DEBUG_CHECKS /* Some constants used to checks that the parts are in the right frame */ const float shift_threshold_x = - 2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part); + 2. * ci->width[0] + + 2. * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); const float shift_threshold_y = - 2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part); + 2. * ci->width[1] + + 2. * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); const float shift_threshold_z = - 2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part); + 2. * ci->width[2] + + 2. * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); #endif /* SWIFT_DEBUG_CHECKS */ /* Get some other useful values. */ - const double hi_max = ci->h_max * kernel_gamma - rshift; - const double hj_max = cj->h_max * kernel_gamma; - const int count_i = ci->count; - const int count_j = cj->count; - struct part *restrict parts_i = ci->parts; - struct part *restrict parts_j = cj->parts; + const double hi_max = ci->hydro.h_max * kernel_gamma - rshift; + const double hj_max = cj->hydro.h_max * kernel_gamma; + const int count_i = ci->hydro.count; + const int count_j = cj->hydro.count; + struct part *restrict parts_i = ci->hydro.parts; + struct part *restrict parts_j = cj->hydro.parts; const double di_max = sort_i[count_i - 1].d - rshift; const double dj_min = sort_j[0].d; - const float dx_max = (ci->dx_max_sort + cj->dx_max_sort); + const float dx_max = (ci->hydro.dx_max_sort + cj->hydro.dx_max_sort); /* Cosmological terms */ const float a = cosmo->a; @@ -975,6 +992,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Recover pj */ struct part *pj = &parts_j[sort_j[pjd].i]; + const int pj_inhibited = part_is_inhibited(pj, e); const float hj = pj->h; const float pjx = pj->x[0] - cj->loc[0]; const float pjy = pj->x[1] - cj->loc[1]; @@ -1014,12 +1032,12 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2) { + if (r2 < hig2 && !pj_inhibited) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1058,6 +1076,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Recover pi */ struct part *pi = &parts_i[sort_i[pid].i]; + const int pi_inhibited = part_is_inhibited(pi, e); const float hi = pi->h; const float pix = pi->x[0] - (cj->loc[0] + shift[0]); const float piy = pi->x[1] - (cj->loc[1] + shift[1]); @@ -1095,14 +1114,14 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, pjz, ci->width[2]); /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pi_inhibited) error("Particle pi not drifted to current time"); if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hjg2) { + if (r2 < hjg2 && !pi_inhibited) { IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1141,49 +1160,55 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { const int sid = space_getsid(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || - ci->dx_max_sort_old > space_maxreldx * ci->dmin) + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin) error("Interacting unsorted cells."); - if (!(cj->sorted & (1 << sid)) || - cj->dx_max_sort_old > space_maxreldx * cj->dmin) + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) error("Interacting unsorted cells."); #ifdef SWIFT_DEBUG_CHECKS /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->sort[sid]; - const struct entry *restrict sort_j = cj->sort[sid]; + const struct entry *restrict sort_i = ci->hydro.sort[sid]; + const struct entry *restrict sort_j = cj->hydro.sort[sid]; /* Check that the dx_max_sort values in the cell are indeed an upper bound on particle movement. */ - for (int pid = 0; pid < ci->count; pid++) { - const struct part *p = &ci->parts[sort_i[pid].i]; + for (int pid = 0; pid < ci->hydro.count; pid++) { + const struct part *p = &ci->hydro.parts[sort_i[pid].i]; + if (part_is_inhibited(p, e)) continue; + const float d = p->x[0] * runner_shift[sid][0] + p->x[1] * runner_shift[sid][1] + p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > - 1.0e-4 * max(fabsf(d), ci->dx_max_sort_old) && - fabsf(d - sort_i[pid].d) - ci->dx_max_sort > ci->width[0] * 1.0e-10) + if (fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > + 1.0e-4 * max(fabsf(d), ci->hydro.dx_max_sort_old) && + fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > + ci->width[0] * 1.0e-10) error( "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " - "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e " - "ci->dx_max_sort_old=%e", - ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort, - ci->dx_max_sort_old); + "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->hydro.dx_max_sort=%e " + "ci->hydro.dx_max_sort_old=%e", + ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->hydro.dx_max_sort, + ci->hydro.dx_max_sort_old); } - for (int pjd = 0; pjd < cj->count; pjd++) { - const struct part *p = &cj->parts[sort_j[pjd].i]; + for (int pjd = 0; pjd < cj->hydro.count; pjd++) { + const struct part *p = &cj->hydro.parts[sort_j[pjd].i]; + if (part_is_inhibited(p, e)) continue; + const float d = p->x[0] * runner_shift[sid][0] + p->x[1] * runner_shift[sid][1] + p->x[2] * runner_shift[sid][2]; - if ((fabsf(d - sort_j[pjd].d) - cj->dx_max_sort) > - 1.0e-4 * max(fabsf(d), cj->dx_max_sort_old) && - (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort) > cj->width[0] * 1.0e-10) + if ((fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort) > + 1.0e-4 * max(fabsf(d), cj->hydro.dx_max_sort_old) && + (fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort) > + cj->width[0] * 1.0e-10) error( "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e " - "cj->dx_max_sort_old=%e", - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort, - cj->dx_max_sort_old); + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->hydro.dx_max_sort=%e " + "cj->hydro.dx_max_sort_old=%e", + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->hydro.dx_max_sort, + cj->hydro.dx_max_sort_old); } #endif /* SWIFT_DEBUG_CHECKS */ @@ -1222,33 +1247,36 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - struct entry *restrict sort_i = ci->sort[sid]; - struct entry *restrict sort_j = cj->sort[sid]; + struct entry *restrict sort_i = ci->hydro.sort[sid]; + struct entry *restrict sort_j = cj->hydro.sort[sid]; #ifdef SWIFT_DEBUG_CHECKS /* Some constants used to checks that the parts are in the right frame */ const float shift_threshold_x = - 2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part); + 2. * ci->width[0] + + 2. * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); const float shift_threshold_y = - 2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part); + 2. * ci->width[1] + + 2. * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); const float shift_threshold_z = - 2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part); + 2. * ci->width[2] + + 2. * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); #endif /* SWIFT_DEBUG_CHECKS */ /* Get some other useful values. */ - const double hi_max = ci->h_max; - const double hj_max = cj->h_max; - const int count_i = ci->count; - const int count_j = cj->count; - struct part *restrict parts_i = ci->parts; - struct part *restrict parts_j = cj->parts; + const double hi_max = ci->hydro.h_max; + const double hj_max = cj->hydro.h_max; + const int count_i = ci->hydro.count; + const int count_j = cj->hydro.count; + struct part *restrict parts_i = ci->hydro.parts; + struct part *restrict parts_j = cj->hydro.parts; /* Cosmological terms */ const float a = cosmo->a; const float H = cosmo->H; /* Maximal displacement since last rebuild */ - const double dx_max = (ci->dx_max_sort + cj->dx_max_sort); + const double dx_max = (ci->hydro.dx_max_sort + cj->hydro.dx_max_sort); /* Position on the axis of the particles closest to the interface */ const double di_max = sort_i[count_i - 1].d; @@ -1307,6 +1335,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Get a hold of the ith part in ci. */ struct part *pi = &parts_i[sort_i[pid].i]; + const int pi_inhibited = part_is_inhibited(pi, e); const float hi = pi->h; /* Is there anything we need to interact with (for this specific hi) ? */ @@ -1337,7 +1366,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float pjz = pj->x[2] - shift_j[2]; /* Compute the pairwise distance. */ - float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -1368,7 +1397,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, pjz, ci->width[2]); /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pi_inhibited) error("Particle pi not drifted to current time"); if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); @@ -1376,7 +1405,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? (note that we will do the other condition in the reverse loop) */ - if (r2 < hig2) { + if (r2 < hig2 && !pi_inhibited) { IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); @@ -1392,6 +1421,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Recover pj */ struct part *pj = &parts_j[sort_j[pjd].i]; + const int pj_inhibited = part_is_inhibited(pj, e); const float hj = pj->h; /* Get the position of pj in the right frame */ @@ -1400,7 +1430,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float pjz = pj->x[2] - shift_j[2]; /* Compute the pairwise distance. */ - float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -1431,14 +1461,14 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, pjz, ci->width[2]); /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pi_inhibited) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? (note that we will do the other condition in the reverse loop) */ - if (r2 < hig2) { + if (r2 < hig2 && !pj_inhibited) { /* Does pj need to be updated too? */ if (part_is_active(pj, e)) { @@ -1466,6 +1496,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Get a hold of the jth part in cj. */ struct part *pj = &parts_j[sort_j[pjd].i]; + const int pj_inhibited = part_is_inhibited(pj, e); const float hj = pj->h; /* Is there anything we need to interact with (for this specific hj) ? */ @@ -1497,7 +1528,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float piz = pi->x[2] - shift_i[2]; /* Compute the pairwise distance. */ - float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -1530,13 +1561,13 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? (note that we must avoid the r2 < hig2 cases we already processed) */ - if (r2 < hjg2 && r2 >= hig2) { + if (r2 < hjg2 && r2 >= hig2 && !pj_inhibited) { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); @@ -1553,6 +1584,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Recover pi */ struct part *pi = &parts_i[sort_i[pid].i]; + const int pi_inhibited = part_is_inhibited(pi, e); const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; @@ -1562,7 +1594,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float piz = pi->x[2] - shift_i[2]; /* Compute the pairwise distance. */ - float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -1593,15 +1625,15 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, pjz, ci->width[2]); /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pi_inhibited) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? (note that we must avoid the r2 < hig2 cases we already processed) */ - if (r2 < hjg2 && r2 >= hig2) { + if (r2 < hjg2 && r2 >= hig2 && !pi_inhibited) { /* Does pi need to be updated too? */ if (part_is_active(pi, e)) { @@ -1654,49 +1686,55 @@ void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { const int sid = space_getsid(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || - ci->dx_max_sort_old > space_maxreldx * ci->dmin) + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin) error("Interacting unsorted cells."); - if (!(cj->sorted & (1 << sid)) || - cj->dx_max_sort_old > space_maxreldx * cj->dmin) + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) error("Interacting unsorted cells."); #ifdef SWIFT_DEBUG_CHECKS /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->sort[sid]; - const struct entry *restrict sort_j = cj->sort[sid]; + const struct entry *restrict sort_i = ci->hydro.sort[sid]; + const struct entry *restrict sort_j = cj->hydro.sort[sid]; /* Check that the dx_max_sort values in the cell are indeed an upper bound on particle movement. */ - for (int pid = 0; pid < ci->count; pid++) { - const struct part *p = &ci->parts[sort_i[pid].i]; + for (int pid = 0; pid < ci->hydro.count; pid++) { + const struct part *p = &ci->hydro.parts[sort_i[pid].i]; + if (part_is_inhibited(p, e)) continue; + const float d = p->x[0] * runner_shift[sid][0] + p->x[1] * runner_shift[sid][1] + p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > - 1.0e-4 * max(fabsf(d), ci->dx_max_sort_old) && - fabsf(d - sort_i[pid].d) - ci->dx_max_sort > ci->width[0] * 1.0e-10) + if (fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > + 1.0e-4 * max(fabsf(d), ci->hydro.dx_max_sort_old) && + fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > + ci->width[0] * 1.0e-10) error( "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " - "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e " - "ci->dx_max_sort_old=%e", - ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort, - ci->dx_max_sort_old); + "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->hydro.dx_max_sort=%e " + "ci->hydro.dx_max_sort_old=%e", + ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->hydro.dx_max_sort, + ci->hydro.dx_max_sort_old); } - for (int pjd = 0; pjd < cj->count; pjd++) { - const struct part *p = &cj->parts[sort_j[pjd].i]; + for (int pjd = 0; pjd < cj->hydro.count; pjd++) { + const struct part *p = &cj->hydro.parts[sort_j[pjd].i]; + if (part_is_inhibited(p, e)) continue; + const float d = p->x[0] * runner_shift[sid][0] + p->x[1] * runner_shift[sid][1] + p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > - 1.0e-4 * max(fabsf(d), cj->dx_max_sort_old) && - fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > cj->width[0] * 1.0e-10) + if (fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort > + 1.0e-4 * max(fabsf(d), cj->hydro.dx_max_sort_old) && + fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort > + cj->width[0] * 1.0e-10) error( "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e " - "cj->dx_max_sort_old=%e", - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort, - cj->dx_max_sort_old); + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->hydro.dx_max_sort=%e " + "cj->hydro.dx_max_sort_old=%e", + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->hydro.dx_max_sort, + cj->hydro.dx_max_sort_old); } #endif /* SWIFT_DEBUG_CHECKS */ @@ -1726,8 +1764,8 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { TIMER_TIC; - struct part *restrict parts = c->parts; - const int count = c->count; + struct part *restrict parts = c->hydro.parts; + const int count = c->hydro.count; /* Set up indt. */ int *indt = NULL; @@ -1750,6 +1788,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Get a pointer to the ith particle. */ struct part *restrict pi = &parts[pid]; + const int pi_inhibited = part_is_inhibited(pi, e); /* Get the particle position and radius. */ double pix[3]; @@ -1769,7 +1808,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pi_inhibited) error("Particle pi not drifted to current time"); if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); @@ -1784,7 +1823,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { } /* Hit or miss? */ - if (r2 < hj * hj * kernel_gamma2) { + if (r2 < hj * hj * kernel_gamma2 && !pi_inhibited) { IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1805,6 +1844,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts[pjd]; + const int pj_inhibited = part_is_inhibited(pj, e); const float hj = pj->h; /* Compute the pairwise distance. */ @@ -1817,29 +1857,34 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { const int doj = (part_is_active(pj, e)) && (r2 < hj * hj * kernel_gamma2); + const int doi = (r2 < hig2); + #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pi_inhibited) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2 || doj) { + if (doi || doj) { /* Which parts need to be updated? */ - if (r2 < hig2 && doj) { + if (doi && doj) { + IACT(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); #endif - } else if (!doj) { + } else if (doi && !pj_inhibited) { + IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); #endif - } else { + } else if (doj && !pi_inhibited) { + dx[0] = -dx[0]; dx[1] = -dx[1]; dx[2] = -dx[2]; @@ -1874,7 +1919,7 @@ void DOSELF1_BRANCH(struct runner *r, struct cell *c) { if (!cell_is_active_hydro(c, e)) return; /* Did we mess up the recursion? */ - if (c->h_max_old * kernel_gamma > c->dmin) + if (c->hydro.h_max_old * kernel_gamma > c->dmin) error("Cell smaller than smoothing length"); /* Check that cells are drifted. */ @@ -1903,8 +1948,8 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { TIMER_TIC; - struct part *restrict parts = c->parts; - const int count = c->count; + struct part *restrict parts = c->hydro.parts; + const int count = c->hydro.count; /* Set up indt. */ int *indt = NULL; @@ -1927,6 +1972,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Get a pointer to the ith particle. */ struct part *restrict pi = &parts[pid]; + const int pi_inhibited = part_is_inhibited(pi, e); /* Get the particle position and radius. */ double pix[3]; @@ -1954,14 +2000,14 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pi_inhibited) error("Particle pi not drifted to current time"); if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { + if ((r2 < hig2 || r2 < hj * hj * kernel_gamma2) && !pi_inhibited) { IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1982,6 +2028,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts[pjd]; + const int pj_inhibited = part_is_inhibited(pj, e); const float hj = pj->h; /* Compute the pairwise distance. */ @@ -1994,14 +2041,14 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) + if (pi->ti_drift != e->ti_current && !pi_inhibited) error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) + if (pj->ti_drift != e->ti_current && !pj_inhibited) error("Particle pj not drifted to current time"); #endif /* Hit or miss? */ - if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { + if ((r2 < hig2 || r2 < hj * hj * kernel_gamma2) && !pj_inhibited) { /* Does pj need to be updated too? */ if (part_is_active(pj, e)) { @@ -2041,7 +2088,7 @@ void DOSELF2_BRANCH(struct runner *r, struct cell *c) { if (!cell_is_active_hydro(c, e)) return; /* Did we mess up the recursion? */ - if (c->h_max_old * kernel_gamma > c->dmin) + if (c->hydro.h_max_old * kernel_gamma > c->dmin) error("Cell smaller than smoothing length"); /* Check that cells are drifted. */ @@ -2079,7 +2126,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Should we even bother? */ if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return; - if (ci->count == 0 || cj->count == 0) return; + if (ci->hydro.count == 0 || cj->hydro.count == 0) return; /* Get the type of pair if not specified explicitly. */ double shift[3]; @@ -2295,12 +2342,18 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, error("Interacting undrifted cells."); /* Do any of the cells need to be sorted first? */ - if (!(ci->sorted & (1 << sid)) || - ci->dx_max_sort_old > ci->dmin * space_maxreldx) - error("Interacting unsorted cell."); - if (!(cj->sorted & (1 << sid)) || - cj->dx_max_sort_old > cj->dmin * space_maxreldx) - error("Interacting unsorted cell."); + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) + error( + "Interacting unsorted cell. ci->hydro.dx_max_sort_old=%e ci->dmin=%e " + "ci->sorted=%d sid=%d", + ci->hydro.dx_max_sort_old, ci->dmin, ci->hydro.sorted, sid); + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) + error( + "Interacting unsorted cell. cj->hydro.dx_max_sort_old=%e cj->dmin=%e " + "cj->sorted=%d sid=%d", + cj->hydro.dx_max_sort_old, cj->dmin, cj->hydro.sorted, sid); /* Compute the interactions. */ DOPAIR1_BRANCH(r, ci, cj); @@ -2321,7 +2374,7 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { TIMER_TIC; /* Should we even bother? */ - if (ci->count == 0 || !cell_is_active_hydro(ci, r->e)) return; + if (ci->hydro.count == 0 || !cell_is_active_hydro(ci, r->e)) return; /* Recurse? */ if (cell_can_recurse_in_self_hydro_task(ci)) { @@ -2370,7 +2423,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Should we even bother? */ if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return; - if (ci->count == 0 || cj->count == 0) return; + if (ci->hydro.count == 0 || cj->hydro.count == 0) return; /* Get the type of pair if not specified explicitly. */ double shift[3]; @@ -2586,12 +2639,18 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, error("Interacting undrifted cells."); /* Do any of the cells need to be sorted first? */ - if (!(ci->sorted & (1 << sid)) || - ci->dx_max_sort_old > ci->dmin * space_maxreldx) - error("Interacting unsorted cells."); - if (!(cj->sorted & (1 << sid)) || - cj->dx_max_sort_old > cj->dmin * space_maxreldx) - error("Interacting unsorted cells."); + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) + error( + "Interacting unsorted cell. ci->hydro.dx_max_sort_old=%e ci->dmin=%e " + "ci->sorted=%d sid=%d", + ci->hydro.dx_max_sort_old, ci->dmin, ci->hydro.sorted, sid); + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) + error( + "Interacting unsorted cell. cj->hydro.dx_max_sort_old=%e cj->dmin=%e " + "cj->sorted=%d sid=%d", + cj->hydro.dx_max_sort_old, cj->dmin, cj->hydro.sorted, sid); /* Compute the interactions. */ DOPAIR2_BRANCH(r, ci, cj); @@ -2612,7 +2671,7 @@ void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) { TIMER_TIC; /* Should we even bother? */ - if (ci->count == 0 || !cell_is_active_hydro(ci, r->e)) return; + if (ci->hydro.count == 0 || !cell_is_active_hydro(ci, r->e)) return; /* Recurse? */ if (cell_can_recurse_in_self_hydro_task(ci)) { @@ -2647,15 +2706,16 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, if (!cell_is_active_hydro(ci, e) && (cj == NULL || !cell_is_active_hydro(cj, e))) return; - if (ci->count == 0 || (cj != NULL && cj->count == 0)) return; + if (ci->hydro.count == 0 || (cj != NULL && cj->hydro.count == 0)) return; /* Find out in which sub-cell of ci the parts are. */ struct cell *sub = NULL; if (ci->split) { for (int k = 0; k < 8; k++) { if (ci->progeny[k] != NULL) { - if (&parts[ind[0]] >= &ci->progeny[k]->parts[0] && - &parts[ind[0]] < &ci->progeny[k]->parts[ci->progeny[k]->count]) { + if (&parts[ind[0]] >= &ci->progeny[k]->hydro.parts[0] && + &parts[ind[0]] < + &ci->progeny[k]->hydro.parts[ci->progeny[k]->hydro.count]) { sub = ci->progeny[k]; break; } diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index dcc3265f34e405a1a414085851ce8d30b9cd3d3d..c6885746a29fd7b6bd828496316f8dad01c1b7da 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -48,8 +48,9 @@ static INLINE void runner_do_grav_down(struct runner *r, struct cell *c, TIMER_TIC; #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_old_multipole != e->ti_current) error("c->multipole not drifted."); - if (c->multipole->pot.ti_init != e->ti_current) + if (c->grav.ti_old_multipole != e->ti_current) + error("c->multipole not drifted."); + if (c->grav.multipole->pot.ti_init != e->ti_current) error("c->field tensor not initialised"); #endif @@ -65,22 +66,22 @@ static INLINE void runner_do_grav_down(struct runner *r, struct cell *c, if (cp != NULL && cell_is_active_gravity(cp, e)) { #ifdef SWIFT_DEBUG_CHECKS - if (cp->ti_old_multipole != e->ti_current) + if (cp->grav.ti_old_multipole != e->ti_current) error("cp->multipole not drifted."); - if (cp->multipole->pot.ti_init != e->ti_current) + if (cp->grav.multipole->pot.ti_init != e->ti_current) error("cp->field tensor not initialised"); #endif /* If the tensor received any contribution, push it down */ - if (c->multipole->pot.interacted) { + if (c->grav.multipole->pot.interacted) { struct grav_tensor shifted_tensor; /* Shift the field tensor */ - gravity_L2L(&shifted_tensor, &c->multipole->pot, cp->multipole->CoM, - c->multipole->CoM); + gravity_L2L(&shifted_tensor, &c->grav.multipole->pot, + cp->grav.multipole->CoM, c->grav.multipole->CoM); /* Add it to this level's tensor */ - gravity_field_tensors_add(&cp->multipole->pot, &shifted_tensor); + gravity_field_tensors_add(&cp->grav.multipole->pot, &shifted_tensor); } /* Recurse */ @@ -93,16 +94,16 @@ static INLINE void runner_do_grav_down(struct runner *r, struct cell *c, /* Leaf case */ /* We can abort early if no interactions via multipole happened */ - if (!c->multipole->pot.interacted) return; + if (!c->grav.multipole->pot.interacted) return; if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); /* Cell properties */ - struct gpart *gparts = c->gparts; - const int gcount = c->gcount; - const struct grav_tensor *pot = &c->multipole->pot; - const double CoM[3] = {c->multipole->CoM[0], c->multipole->CoM[1], - c->multipole->CoM[2]}; + struct gpart *gparts = c->grav.parts; + const int gcount = c->grav.count; + const struct grav_tensor *pot = &c->grav.multipole->pot; + const double CoM[3] = {c->grav.multipole->CoM[0], c->grav.multipole->CoM[1], + c->grav.multipole->CoM[2]}; /* Apply accelerations to the particles */ for (int i = 0; i < gcount; ++i) { @@ -117,8 +118,15 @@ static INLINE void runner_do_grav_down(struct runner *r, struct cell *c, /* Check that particles have been drifted to the current time */ if (gp->ti_drift != e->ti_current) error("gpart not drifted to current time"); - if (c->multipole->pot.ti_init != e->ti_current) + if (c->grav.multipole->pot.ti_init != e->ti_current) error("c->field tensor not initialised"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(gp, e)) error("Updating an inhibited particle!"); + + /* Check that the particle was initialised */ + if (gp->initialised == 0) + error("Adding forces to an un-initialised gpart."); #endif /* Apply the kernel */ gravity_L2P(pot, CoM, gp); @@ -221,8 +229,22 @@ static INLINE void runner_dopair_grav_pp_full( /* Check that particles have been drifted to the current time */ if (gparts_i[pid].ti_drift != e->ti_current) error("gpi not drifted to current time"); - if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current) + if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts_j[pjd], e)) error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && + mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); #endif /* Interact! */ @@ -238,7 +260,8 @@ static INLINE void runner_dopair_grav_pp_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount_j) gparts_i[pid].num_interacted++; + if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) + gparts_i[pid].num_interacted++; #endif } @@ -347,8 +370,22 @@ static INLINE void runner_dopair_grav_pp_truncated( /* Check that particles have been drifted to the current time */ if (gparts_i[pid].ti_drift != e->ti_current) error("gpi not drifted to current time"); - if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current) + if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts_j[pjd], e)) error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && + mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); #endif /* Interact! */ @@ -364,7 +401,8 @@ static INLINE void runner_dopair_grav_pp_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount_j) gparts_i[pid].num_interacted++; + if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) + gparts_i[pid].num_interacted++; #endif } @@ -433,6 +471,18 @@ static INLINE void runner_dopair_grav_pm_full( if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) error("Active particle went through the cache"); + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); + if (pid >= gcount_i) error("Adding forces to padded particle"); #endif @@ -458,13 +508,13 @@ static INLINE void runner_dopair_grav_pm_full( const float r2 = dx * dx + dy * dy + dz * dz; -#ifdef SWIFT_DEBUG_CHECKSa - const float r_max_j = cj->multipole->r_max; +#ifdef SWIFT_DEBUG_CHECKS + const float r_max_j = cj->grav.multipole->r_max; const float r_max2 = r_max_j * r_max_j; const float theta_crit2 = e->gravity_properties->theta_crit2; - /* 1.01 to avoid FP rounding false-positives */ - if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.01, r2)) + /* Note: 1.1 to avoid FP rounding false-positives */ + if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2)) error( "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " "%e], rmax=%e", @@ -485,7 +535,7 @@ static INLINE void runner_dopair_grav_pm_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ if (pid < gcount_i) - gparts_i[pid].num_interacted += cj->multipole->m_pole.num_gpart; + gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; #endif } } @@ -554,6 +604,18 @@ static INLINE void runner_dopair_grav_pm_truncated( if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) error("Active particle went through the cache"); + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); + if (pid >= gcount_i) error("Adding forces to padded particle"); #endif @@ -578,12 +640,12 @@ static INLINE void runner_dopair_grav_pm_truncated( const float r2 = dx * dx + dy * dy + dz * dz; #ifdef SWIFT_DEBUG_CHECKS - const float r_max_j = cj->multipole->r_max; + const float r_max_j = cj->grav.multipole->r_max; const float r_max2 = r_max_j * r_max_j; const float theta_crit2 = e->gravity_properties->theta_crit2; - /* 1.01 to avoid FP rounding false-positives */ - if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.01, r2)) + /* 1.1 to avoid FP rounding false-positives */ + if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2)) error( "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " "%e], rmax=%e", @@ -604,7 +666,7 @@ static INLINE void runner_dopair_grav_pm_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ if (pid < gcount_i) - gparts_i[pid].num_interacted += cj->multipole->m_pole.num_gpart; + gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; #endif } } @@ -635,15 +697,18 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, /* Recover some useful constants */ const struct engine *e = r->e; const int periodic = e->mesh->periodic; - const float dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], + (float)e->mesh->dim[2]}; const float r_s_inv = e->mesh->r_s_inv; const double min_trunc = e->mesh->r_cut_min; TIMER_TIC; /* Record activity status */ - const int ci_active = cell_is_active_gravity(ci, e); - const int cj_active = cell_is_active_gravity(cj, e); + const int ci_active = + cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID); + const int cj_active = + cell_is_active_gravity(cj, e) && (cj->nodeID == e->nodeID); /* Anything to do here? */ if (!ci_active && !cj_active) return; @@ -655,9 +720,9 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, /* Let's start by checking things are drifted */ if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts"); if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts"); - if (cj_active && ci->ti_old_multipole != e->ti_current) + if (cj_active && ci->grav.ti_old_multipole != e->ti_current) error("Un-drifted multipole"); - if (ci_active && cj->ti_old_multipole != e->ti_current) + if (ci_active && cj->grav.ti_old_multipole != e->ti_current) error("Un-drifted multipole"); /* Caches to play with */ @@ -669,24 +734,24 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, const double shift_j[3] = {0., 0., 0.}; /* Recover the multipole info and shift the CoM locations */ - const float rmax_i = ci->multipole->r_max; - const float rmax_j = cj->multipole->r_max; + const float rmax_i = ci->grav.multipole->r_max; + const float rmax_j = cj->grav.multipole->r_max; const float rmax2_i = rmax_i * rmax_i; const float rmax2_j = rmax_j * rmax_j; - const struct multipole *multi_i = &ci->multipole->m_pole; - const struct multipole *multi_j = &cj->multipole->m_pole; - const float CoM_i[3] = {(float)(ci->multipole->CoM[0] - shift_i[0]), - (float)(ci->multipole->CoM[1] - shift_i[1]), - (float)(ci->multipole->CoM[2] - shift_i[2])}; - const float CoM_j[3] = {(float)(cj->multipole->CoM[0] - shift_j[0]), - (float)(cj->multipole->CoM[1] - shift_j[1]), - (float)(cj->multipole->CoM[2] - shift_j[2])}; + const struct multipole *multi_i = &ci->grav.multipole->m_pole; + const struct multipole *multi_j = &cj->grav.multipole->m_pole; + const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]), + (float)(ci->grav.multipole->CoM[1] - shift_i[1]), + (float)(ci->grav.multipole->CoM[2] - shift_i[2])}; + const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0] - shift_j[0]), + (float)(cj->grav.multipole->CoM[1] - shift_j[1]), + (float)(cj->grav.multipole->CoM[2] - shift_j[2])}; /* Start by constructing particle caches */ /* Computed the padded counts */ - const int gcount_i = ci->gcount; - const int gcount_j = cj->gcount; + const int gcount_i = ci->grav.count; + const int gcount_j = cj->grav.count; const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE; @@ -699,10 +764,10 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, /* Fill the caches */ gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, - ci_cache, ci->gparts, gcount_i, gcount_padded_i, + ci_cache, ci->grav.parts, gcount_i, gcount_padded_i, shift_i, CoM_j, rmax2_j, ci, e->gravity_properties); gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, - cj_cache, cj->gparts, gcount_j, gcount_padded_j, + cj_cache, cj->grav.parts, gcount_j, gcount_padded_j, shift_j, CoM_i, rmax2_i, cj, e->gravity_properties); /* Can we use the Newtonian version or do we need the truncated one ? */ @@ -715,25 +780,27 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, /* First the P2P */ runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, - gcount_padded_j, periodic, dim, e, ci->gparts, - cj->gparts); + gcount_padded_j, periodic, dim, e, + ci->grav.parts, cj->grav.parts); /* Then the M2P */ if (allow_mpole) runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, - periodic, dim, e, ci->gparts, gcount_i, cj); + periodic, dim, e, ci->grav.parts, gcount_i, + cj); } if (cj_active && symmetric) { /* First the P2P */ runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, - gcount_padded_i, periodic, dim, e, cj->gparts, - ci->gparts); + gcount_padded_i, periodic, dim, e, + cj->grav.parts, ci->grav.parts); /* Then the M2P */ if (allow_mpole) runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, - periodic, dim, e, cj->gparts, gcount_j, ci); + periodic, dim, e, cj->grav.parts, gcount_j, + ci); } } else { /* Periodic BC */ @@ -757,26 +824,26 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, /* First the (truncated) P2P */ runner_dopair_grav_pp_truncated(ci_cache, cj_cache, gcount_i, gcount_j, gcount_padded_j, dim, r_s_inv, e, - ci->gparts, cj->gparts); + ci->grav.parts, cj->grav.parts); /* Then the M2P */ if (allow_mpole) runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, - multi_j, dim, r_s_inv, e, ci->gparts, - gcount_i, cj); + multi_j, dim, r_s_inv, e, + ci->grav.parts, gcount_i, cj); } if (cj_active && symmetric) { /* First the (truncated) P2P */ runner_dopair_grav_pp_truncated(cj_cache, ci_cache, gcount_j, gcount_i, gcount_padded_i, dim, r_s_inv, e, - cj->gparts, ci->gparts); + cj->grav.parts, ci->grav.parts); /* Then the M2P */ if (allow_mpole) runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i, - multi_i, dim, r_s_inv, e, cj->gparts, - gcount_j, ci); + multi_i, dim, r_s_inv, e, + cj->grav.parts, gcount_j, ci); } } else { @@ -789,12 +856,12 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, /* First the (Newtonian) P2P */ runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, gcount_padded_j, periodic, dim, e, - ci->gparts, cj->gparts); + ci->grav.parts, cj->grav.parts); /* Then the M2P */ if (allow_mpole) runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, - periodic, dim, e, ci->gparts, gcount_i, + periodic, dim, e, ci->grav.parts, gcount_i, cj); } if (cj_active && symmetric) { @@ -802,21 +869,21 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, /* First the (Newtonian) P2P */ runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, gcount_padded_i, periodic, dim, e, - cj->gparts, ci->gparts); + cj->grav.parts, ci->grav.parts); /* Then the M2P */ if (allow_mpole) runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, - periodic, dim, e, cj->gparts, gcount_j, + periodic, dim, e, cj->grav.parts, gcount_j, ci); } } } /* Write back to the particles */ - if (ci_active) gravity_cache_write_back(ci_cache, ci->gparts, gcount_i); + if (ci_active) gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); if (cj_active && symmetric) - gravity_cache_write_back(cj_cache, cj->gparts, gcount_j); + gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j); TIMER_TOC(timer_dopair_grav_pp); } @@ -892,8 +959,21 @@ static INLINE void runner_doself_grav_pp_full( /* Check that particles have been drifted to the current time */ if (gparts[pid].ti_drift != e->ti_current) error("gpi not drifted to current time"); - if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current) + if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts[pjd], e)) error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); #endif /* Interact! */ @@ -909,7 +989,8 @@ static INLINE void runner_doself_grav_pp_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount) gparts[pid].num_interacted++; + if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) + gparts[pid].num_interacted++; #endif } @@ -1002,8 +1083,21 @@ static INLINE void runner_doself_grav_pp_truncated( /* Check that particles have been drifted to the current time */ if (gparts[pid].ti_drift != e->ti_current) error("gpi not drifted to current time"); - if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current) + if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts[pjd], e)) error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); #endif /* Interact! */ @@ -1019,7 +1113,8 @@ static INLINE void runner_doself_grav_pp_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount) gparts[pid].num_interacted++; + if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) + gparts[pid].num_interacted++; #endif } @@ -1056,7 +1151,7 @@ static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) { TIMER_TIC; #ifdef SWIFT_DEBUG_CHECKS - if (c->gcount == 0) error("Doing self gravity on an empty cell !"); + if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); #endif /* Anything to do here? */ @@ -1077,7 +1172,7 @@ static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) { c->loc[2] + 0.5 * c->width[2]}; /* Computed the padded counts */ - const int gcount = c->gcount; + const int gcount = c->grav.count; const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; #ifdef SWIFT_DEBUG_CHECKS @@ -1087,7 +1182,7 @@ static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) { #endif /* Fill the cache */ - gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, c->gparts, + gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, c->grav.parts, gcount, gcount_padded, loc, c, e->gravity_properties); @@ -1095,33 +1190,101 @@ static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) { if (!periodic) { /* Not periodic -> Can always use Newtonian potential */ - runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, c->gparts); + runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, + c->grav.parts); } else { /* Get the maximal distance between any two particles */ - const double max_r = 2. * c->multipole->r_max; + const double max_r = 2. * c->grav.multipole->r_max; /* Do we need to use the truncated interactions ? */ if (max_r > min_trunc) { /* Periodic but far-away cells must use the truncated potential */ runner_doself_grav_pp_truncated(ci_cache, gcount, gcount_padded, r_s_inv, - e, c->gparts); + e, c->grav.parts); } else { /* Periodic but close-by cells can use the full Newtonian potential */ - runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, c->gparts); + runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, + c->grav.parts); } } /* Write back to the particles */ - gravity_cache_write_back(ci_cache, c->gparts, gcount); + gravity_cache_write_back(ci_cache, c->grav.parts, gcount); TIMER_TOC(timer_doself_grav_pp); } +/** + * @brief Computes the interaction of the field tensor and multipole + * of two cells symmetrically. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ +static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, + struct cell *restrict ci, + struct cell *restrict cj) { + + /* Some constants */ + const struct engine *e = r->e; + const struct gravity_props *props = e->gravity_properties; + const int periodic = e->mesh->periodic; + const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const float r_s_inv = e->mesh->r_s_inv; + + TIMER_TIC; + + /* Anything to do here? */ + if ((!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) || + (!cell_is_active_gravity_mm(cj, e) || cj->nodeID != engine_rank)) + error("Invalid state in symmetric M-M calculation!"); + + /* Short-cut to the multipole */ + const struct multipole *multi_i = &ci->grav.multipole->m_pole; + const struct multipole *multi_j = &cj->grav.multipole->m_pole; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci == cj) error("Interacting a cell with itself using M2L"); + + if (multi_i->num_gpart == 0) + error("Multipole i does not seem to have been set."); + + if (multi_j->num_gpart == 0) + error("Multipole j does not seem to have been set."); + + if (ci->grav.multipole->pot.ti_init != e->ti_current) + error("ci->grav tensor not initialised."); + + if (ci->grav.multipole->pot.ti_init != e->ti_current) + error("cj->grav tensor not initialised."); + + if (ci->grav.ti_old_multipole != e->ti_current) + error( + "Undrifted multipole ci->grav.ti_old_multipole=%lld ci->nodeID=%d " + "cj->nodeID=%d e->ti_current=%lld", + ci->grav.ti_old_multipole, ci->nodeID, cj->nodeID, e->ti_current); + + if (cj->grav.ti_old_multipole != e->ti_current) + error( + "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " + "ci->nodeID=%d e->ti_current=%lld", + cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); +#endif + + /* Let's interact at this level */ + gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot, + multi_i, multi_j, ci->grav.multipole->CoM, + cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); + + TIMER_TOC(timer_dopair_grav_mm); +} + /** * @brief Computes the interaction of the field tensor in a cell with the * multipole of another cell. @@ -1130,9 +1293,9 @@ static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) { * @param ci The #cell with field tensor to interact. * @param cj The #cell with the multipole. */ -static INLINE void runner_dopair_grav_mm(struct runner *r, - struct cell *restrict ci, - struct cell *restrict cj) { +static INLINE void runner_dopair_grav_mm_nonsym( + struct runner *r, struct cell *restrict ci, + const struct cell *restrict cj) { /* Some constants */ const struct engine *e = r->e; @@ -1144,10 +1307,10 @@ static INLINE void runner_dopair_grav_mm(struct runner *r, TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active_gravity(ci, e) || ci->nodeID != engine_rank) return; + if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return; /* Short-cut to the multipole */ - const struct multipole *multi_j = &cj->multipole->m_pole; + const struct multipole *multi_j = &cj->grav.multipole->m_pole; #ifdef SWIFT_DEBUG_CHECKS if (ci == cj) error("Interacting a cell with itself using M2L"); @@ -1155,31 +1318,97 @@ static INLINE void runner_dopair_grav_mm(struct runner *r, if (multi_j->num_gpart == 0) error("Multipole does not seem to have been set."); - if (ci->multipole->pot.ti_init != e->ti_current) + if (ci->grav.multipole->pot.ti_init != e->ti_current) error("ci->grav tensor not initialised."); -#endif - /* Do we need to drift the multipole ? */ - if (cj->ti_old_multipole != e->ti_current) + if (cj->grav.ti_old_multipole != e->ti_current) error( - "Undrifted multipole cj->ti_old_multipole=%lld cj->nodeID=%d " + "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " "ci->nodeID=%d e->ti_current=%lld", - cj->ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); + cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); +#endif /* Let's interact at this level */ - gravity_M2L(&ci->multipole->pot, multi_j, ci->multipole->CoM, - cj->multipole->CoM, props, periodic, dim, r_s_inv); + gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM, + cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); TIMER_TOC(timer_dopair_grav_mm); } +/** + * @brief Call the M-M calculation on two cells if active. + * + * @param r The #runner object. + * @param ci The first #cell. + * @param cj The second #cell. + */ +static INLINE void runner_dopair_grav_mm(struct runner *r, + struct cell *restrict ci, + struct cell *restrict cj) { + + const struct engine *e = r->e; + + /* What do we need to do? */ + const int do_i = + cell_is_active_gravity_mm(ci, e) && (ci->nodeID == e->nodeID); + const int do_j = + cell_is_active_gravity_mm(cj, e) && (cj->nodeID == e->nodeID); + + /* Do we need drifting first? */ + if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); + if (cj->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e); + + /* Interact! */ + if (do_i && do_j) + runner_dopair_grav_mm_symmetric(r, ci, cj); + else if (do_i) + runner_dopair_grav_mm_nonsym(r, ci, cj); + else if (do_j) + runner_dopair_grav_mm_nonsym(r, cj, ci); +} + +/** + * @brief Computes all the M-M interactions between all the well-separated (at + * rebuild) pairs of progenies of the two cells. + * + * @param r The #runner thread. + * @param flags The task flag containing the list of well-separated pairs as a + * bit-field. + * @param ci The first #cell. + * @param cj The second #cell. + */ +static INLINE void runner_dopair_grav_mm_progenies(struct runner *r, + const long long flags, + struct cell *restrict ci, + struct cell *restrict cj) { + + /* Loop over all pairs of progenies */ + for (int i = 0; i < 8; i++) { + if (ci->progeny[i] != NULL) { + for (int j = 0; j < 8; j++) { + if (cj->progeny[j] != NULL) { + + struct cell *cpi = ci->progeny[i]; + struct cell *cpj = cj->progeny[j]; + + const int flag = i * 8 + j; + + /* Did we agree to use an M-M interaction here at the last rebuild? */ + if (flags & (1ULL << flag)) runner_dopair_grav_mm(r, cpi, cpj); + } + } + } + } +} + static INLINE void runner_dopair_recursive_grav_pm(struct runner *r, struct cell *ci, const struct cell *cj) { /* Some constants */ const struct engine *e = r->e; const int periodic = e->mesh->periodic; - const float dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], + (float)e->mesh->dim[2]}; const float r_s_inv = e->mesh->r_s_inv; /* Anything to do here? */ @@ -1187,14 +1416,14 @@ static INLINE void runner_dopair_recursive_grav_pm(struct runner *r, #ifdef SWIFT_DEBUG_CHECKS /* Early abort? */ - if (ci->gcount == 0 || cj->gcount == 0) + if (ci->grav.count == 0 || cj->grav.count == 0) error("Doing pair gravity on an empty cell !"); /* Sanity check */ if (ci == cj) error("Pair interaction between a cell and itself."); - if (cj->ti_old_multipole != e->ti_current) - error("cj->multipole not drifted."); + if (cj->grav.ti_old_multipole != e->ti_current) + error("cj->grav.multipole not drifted."); #endif /* Can we recurse further? */ @@ -1215,7 +1444,7 @@ static INLINE void runner_dopair_recursive_grav_pm(struct runner *r, struct gravity_cache *const ci_cache = &r->ci_gravity_cache; /* Computed the padded counts */ - const int gcount_i = ci->gcount; + const int gcount_i = ci->grav.count; const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; #ifdef SWIFT_DEBUG_CHECKS @@ -1225,32 +1454,33 @@ static INLINE void runner_dopair_recursive_grav_pm(struct runner *r, #endif /* Recover the multipole info and the CoM locations */ - const struct multipole *multi_j = &cj->multipole->m_pole; - const float r_max = cj->multipole->r_max; - const float CoM_j[3] = {(float)(cj->multipole->CoM[0]), - (float)(cj->multipole->CoM[1]), - (float)(cj->multipole->CoM[2])}; + const struct multipole *multi_j = &cj->grav.multipole->m_pole; + const float r_max = cj->grav.multipole->r_max; + const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]), + (float)(cj->grav.multipole->CoM[1]), + (float)(cj->grav.multipole->CoM[2])}; /* Fill the cache */ gravity_cache_populate_all_mpole( - e->max_active_bin, periodic, dim, ci_cache, ci->gparts, gcount_i, + e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i, gcount_padded_i, ci, CoM_j, r_max * r_max, e->gravity_properties); /* Can we use the Newtonian version or do we need the truncated one ? */ if (!periodic) { runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, - periodic, dim, e, ci->gparts, gcount_i, cj); + periodic, dim, e, ci->grav.parts, gcount_i, + cj); } else { runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j, - dim, r_s_inv, e, ci->gparts, gcount_i, + dim, r_s_inv, e, ci->grav.parts, gcount_i, cj); } /* Write back to the particles */ - gravity_cache_write_back(ci_cache, ci->gparts, gcount_i); + gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); } } @@ -1288,8 +1518,8 @@ static INLINE void runner_dopair_recursive_grav(struct runner *r, #ifdef SWIFT_DEBUG_CHECKS - const int gcount_i = ci->gcount; - const int gcount_j = cj->gcount; + const int gcount_i = ci->grav.count; + const int gcount_j = cj->grav.count; /* Early abort? */ if (gcount_i == 0 || gcount_j == 0) @@ -1298,17 +1528,19 @@ static INLINE void runner_dopair_recursive_grav(struct runner *r, /* Sanity check */ if (ci == cj) error("Pair interaction between a cell and itself."); - if (cell_is_active_gravity(ci, e) && ci->ti_old_multipole != e->ti_current) - error("ci->multipole not drifted."); - if (cell_is_active_gravity(cj, e) && cj->ti_old_multipole != e->ti_current) - error("cj->multipole not drifted."); + if (cell_is_active_gravity(ci, e) && + ci->grav.ti_old_multipole != e->ti_current) + error("ci->grav.multipole not drifted."); + if (cell_is_active_gravity(cj, e) && + cj->grav.ti_old_multipole != e->ti_current) + error("cj->grav.multipole not drifted."); #endif TIMER_TIC; /* Recover the multipole information */ - struct gravity_tensors *const multi_i = ci->multipole; - struct gravity_tensors *const multi_j = cj->multipole; + struct gravity_tensors *const multi_i = ci->grav.multipole; + struct gravity_tensors *const multi_j = cj->grav.multipole; /* Get the distance between the CoMs */ double dx = multi_i->CoM[0] - multi_j->CoM[0]; @@ -1345,9 +1577,8 @@ static INLINE void runner_dopair_recursive_grav(struct runner *r, /* Can we use M-M interactions ? */ if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2)) { - /* MATTHIEU: make a symmetric M-M interaction function ! */ + /* Go M-M */ runner_dopair_grav_mm(r, ci, cj); - runner_dopair_grav_mm(r, cj, ci); } else if (!ci->split && !cj->split) { @@ -1431,7 +1662,7 @@ static INLINE void runner_doself_recursive_grav(struct runner *r, #ifdef SWIFT_DEBUG_CHECKS /* Early abort? */ - if (c->gcount == 0) error("Doing self gravity on an empty cell !"); + if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); #endif TIMER_TIC; @@ -1467,28 +1698,6 @@ static INLINE void runner_doself_recursive_grav(struct runner *r, if (gettimer) TIMER_TOC(timer_dosub_self_grav); } -/** - * @brief Call the non-symmetric M-M calculation on two cells if active. - * - * @param r The #runner object. - * @param ci The first #cell. - * @param cj The second #cell. - */ -static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, - struct cell *ci, - struct cell *cj) { - - const struct engine *e = r->e; - -#ifdef SWIFT_DEBUG_CHECKS - if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e)) - error("Running M-M task with two inactive cells."); -#endif - - if (cell_is_active_gravity(ci, e)) runner_dopair_grav_mm(r, ci, cj); - if (cell_is_active_gravity(cj, e)) runner_dopair_grav_mm(r, cj, ci); -} - /** * @brief Performs all M-M interactions between a given top-level cell and all * the other top-levels that are far enough. @@ -1505,13 +1714,14 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, const int periodic = e->mesh->periodic; const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; const double theta_crit2 = e->gravity_properties->theta_crit2; - const double max_distance = e->mesh->r_cut_max; + const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max; TIMER_TIC; /* Recover the list of top-level cells */ struct cell *cells = e->s->cells_top; - const int nr_cells = e->s->nr_cells; + int *cells_with_particles = e->s->cells_with_particles_top; + const int nr_cells_with_particles = e->s->nr_cells_with_particles; /* Anything to do here? */ if (!cell_is_active_gravity(ci, e)) return; @@ -1520,30 +1730,28 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, error("Non-local cell in long-range gravity task!"); /* Check multipole has been drifted */ - if (ci->ti_old_multipole != e->ti_current) - error("Interacting un-drifted multipole"); + if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); + + /* Get this cell's multipole information */ + struct gravity_tensors *const multi_i = ci->grav.multipole; /* Find this cell's top-level (great-)parent */ struct cell *top = ci; while (top->parent != NULL) top = top->parent; - /* Flag that contributions will be recieved */ - struct gravity_tensors *const multi_i = ci->multipole; - multi_i->pot.interacted = 1; - /* Recover the top-level multipole (for distance checks) */ - struct gravity_tensors *const multi_top = top->multipole; + struct gravity_tensors *const multi_top = top->grav.multipole; const double CoM_rebuild_top[3] = {multi_top->CoM_rebuild[0], multi_top->CoM_rebuild[1], multi_top->CoM_rebuild[2]}; /* Loop over all the top-level cells and go for a M-M interaction if * well-separated */ - for (int n = 0; n < nr_cells; ++n) { + for (int n = 0; n < nr_cells_with_particles; ++n) { /* Handle on the top-level cell and it's gravity business*/ - struct cell *cj = &cells[n]; - const struct gravity_tensors *const multi_j = cj->multipole; + const struct cell *cj = &cells[cells_with_particles[n]]; + const struct gravity_tensors *const multi_j = cj->grav.multipole; /* Avoid self contributions */ if (top == cj) continue; @@ -1551,6 +1759,29 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, /* Skip empty cells */ if (multi_j->m_pole.M_000 == 0.f) continue; + /* Can we escape early in the periodic BC case? */ + if (periodic) { + + /* Minimal distance between any pair of particles */ + const double min_radius2 = + cell_min_dist2_same_size(top, cj, periodic, dim); + + /* Are we beyond the distance where the truncated forces are 0 ?*/ + if (min_radius2 > max_distance2) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Need to account for the interactions we missed */ + multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; +#endif + + /* Record that this multipole received a contribution */ + multi_i->pot.interacted = 1; + + /* We are done here. */ + continue; + } + } + /* Get the distance between the CoMs at the last rebuild*/ double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0]; double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1]; @@ -1564,27 +1795,17 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, } const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r; - const double max_radius = - sqrt(r2_rebuild) - (multi_top->r_max_rebuild + multi_j->r_max_rebuild); - - /* Are we beyond the distance where the truncated forces are 0 ?*/ - if (periodic && max_radius > max_distance) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Need to account for the interactions we missed */ - multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; -#endif - continue; - } - /* Are we in charge of this cell pair? */ if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild, theta_crit2, r2_rebuild)) { /* Call the PM interaction fucntion on the active sub-cells of ci */ - runner_dopair_grav_mm(r, ci, cj); + runner_dopair_grav_mm_nonsym(r, ci, cj); // runner_dopair_recursive_grav_pm(r, ci, cj); + /* Record that this multipole received a contribution */ + multi_i->pot.interacted = 1; + } /* We are in charge of this pair */ } /* Loop over top-level cells */ diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h new file mode 100644 index 0000000000000000000000000000000000000000..e816d80399a0fef85645c914168dd4038f55988c --- /dev/null +++ b/src/runner_doiact_stars.h @@ -0,0 +1,1399 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2018 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Before including this file, define FUNCTION, which is the + name of the interaction function. This creates the interaction functions + runner_dopair_FUNCTION, runner_doself_FUNCTION and runner_dosub_FUNCTION + calling the pairwise interaction function runner_iact_FUNCTION. */ + +#define PASTE(x, y) x##_##y + +#define _DOSELF1_STARS(f) PASTE(runner_doself_stars, f) +#define DOSELF1_STARS _DOSELF1_STARS(FUNCTION) + +#define _DO_NONSYM_PAIR1_STARS(f) PASTE(runner_do_nonsym_pair_stars, f) +#define DO_NONSYM_PAIR1_STARS _DO_NONSYM_PAIR1_STARS(FUNCTION) + +#define _DOPAIR1_STARS(f) PASTE(runner_dopair_stars, f) +#define DOPAIR1_STARS _DOPAIR1_STARS(FUNCTION) + +#define _DOPAIR1_SUBSET_STARS(f) PASTE(runner_dopair_subset_stars, f) +#define DOPAIR1_SUBSET_STARS _DOPAIR1_SUBSET_STARS(FUNCTION) + +#define _DOSELF1_SUBSET_STARS(f) PASTE(runner_doself_subset_stars, f) +#define DOSELF1_SUBSET_STARS _DOSELF1_SUBSET_STARS(FUNCTION) + +#define _DOSELF1_SUBSET_BRANCH_STARS(f) \ + PASTE(runner_doself_subset_branch_stars, f) +#define DOSELF1_SUBSET_BRANCH_STARS _DOSELF1_SUBSET_BRANCH_STARS(FUNCTION) + +#define _DOPAIR1_SUBSET_BRANCH_STARS(f) \ + PASTE(runner_dopair_subset_branch_stars, f) +#define DOPAIR1_SUBSET_BRANCH_STARS _DOPAIR1_SUBSET_BRANCH_STARS(FUNCTION) + +#define _DOSUB_SUBSET_STARS(f) PASTE(runner_dosub_subset_stars, f) +#define DOSUB_SUBSET_STARS _DOSUB_SUBSET_STARS(FUNCTION) + +#define _DOSELF1_BRANCH_STARS(f) PASTE(runner_doself_branch_stars, f) +#define DOSELF1_BRANCH_STARS _DOSELF1_BRANCH_STARS(FUNCTION) + +#define _DOPAIR1_BRANCH_STARS(f) PASTE(runner_dopair_branch_stars, f) +#define DOPAIR1_BRANCH_STARS _DOPAIR1_BRANCH_STARS(FUNCTION) + +#define _DOSUB_PAIR1_STARS(f) PASTE(runner_dosub_pair_stars, f) +#define DOSUB_PAIR1_STARS _DOSUB_PAIR1_STARS(FUNCTION) + +#define _DOSUB_SELF1_STARS(f) PASTE(runner_dosub_self_stars, f) +#define DOSUB_SELF1_STARS _DOSUB_SELF1_STARS(FUNCTION) + +#define _IACT_STARS(f) PASTE(runner_iact_nonsym_stars, f) +#define IACT_STARS _IACT_STARS(FUNCTION) + +/** + * @brief Calculate the number density of #part around the #spart + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + + /* Anything to do here? */ + if (!cell_is_active_stars(c, e)) return; + if (c->hydro.count == 0 && c->stars.count == 0) return; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int scount = c->stars.count; + const int count = c->hydro.count; + struct spart *restrict sparts = c->stars.parts; + struct part *restrict parts = c->hydro.parts; + + /* Loop over the sparts in ci. */ + for (int sid = 0; sid < scount; sid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict si = &sparts[sid]; + const float hi = si->h; + const float hig2 = hi * hi * kernel_gamma2; + const float six[3] = {(float)(si->x[0] - c->loc[0]), + (float)(si->x[1] - c->loc[1]), + (float)(si->x[2] - c->loc[2])}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts[pjd]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), + (float)(pj->x[1] - c->loc[1]), + (float)(pj->x[2] - c->loc[2])}; + float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 > 0.f && r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, si, pj, a, H); + } + } /* loop over the parts in ci. */ + } /* loop over the sparts in ci. */ +} + +/** + * @brief Calculate the number density of cj #part around the ci #spart + * + * @param r runner task + * @param ci The first #cell + * @param cj The second #cell + */ +void DO_NONSYM_PAIR1_STARS(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + + /* Anything to do here? */ + if (!cell_is_active_stars(ci, e)) return; + + const int scount_i = ci->stars.count; + const int count_j = cj->hydro.count; + struct spart *restrict sparts_i = ci->stars.parts; + struct part *restrict parts_j = cj->hydro.parts; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Loop over the sparts in ci. */ + for (int sid = 0; sid < scount_i; sid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict si = &sparts_i[sid]; + const float hi = si->h; + const float hig2 = hi * hi * kernel_gamma2; + const float six[3] = {(float)(si->x[0] - (cj->loc[0] + shift[0])), + (float)(si->x[1] - (cj->loc[1] + shift[1])), + (float)(si->x[2] - (cj->loc[2] + shift[2]))}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), + (float)(pj->x[1] - cj->loc[1]), + (float)(pj->x[2] - cj->loc[2])}; + float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 < hig2) IACT_STARS(r2, dx, hi, hj, si, pj, a, H); + + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +void DOPAIR1_STARS(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj, int timer) { + + if (ci->stars.count != 0 && cj->hydro.count != 0) + DO_NONSYM_PAIR1_STARS(r, ci, cj); + if (cj->stars.count != 0 && ci->hydro.count != 0) + DO_NONSYM_PAIR1_STARS(r, cj, ci); +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * Version using a brute-force algorithm. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts_i The #part to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + * @param cj The second #cell. + * @param shift The shift vector to apply to the particles in ci. + */ +void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts_i, int *restrict ind, + int scount, struct cell *restrict cj, + const double *shift) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + + const int count_j = cj->hydro.count; + struct part *restrict parts_j = cj->hydro.parts; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + /* Loop over the parts_i. */ + for (int pid = 0; pid < scount; pid++) { + + /* Get a hold of the ith part in ci. */ + struct spart *restrict spi = &sparts_i[ind[pid]]; + double spix[3]; + for (int k = 0; k < 3; k++) spix[k] = spi->x[k] - shift[k]; + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!spart_is_active(spi, e)) + error("Trying to correct smoothing length of inactive particle !"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = spix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, pj->h, spi, pj, a, H); + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts The #spart to interact. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + */ +void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts, int *restrict ind, + int scount) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int count_i = ci->hydro.count; + struct part *restrict parts_j = ci->hydro.parts; + + /* Loop over the parts in ci. */ + for (int spid = 0; spid < scount; spid++) { + + /* Get a hold of the ith part in ci. */ + struct spart *spi = &sparts[ind[spid]]; + const float spix[3] = {(float)(spi->x[0] - ci->loc[0]), + (float)(spi->x[1] - ci->loc[1]), + (float)(spi->x[2] - ci->loc[2])}; + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!spart_is_active(spi, e)) + error("Inactive particle in subset function!"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_i; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]), + (float)(pj->x[1] - ci->loc[1]), + (float)(pj->x[2] - ci->loc[2])}; + float dx[3] = {spix[0] - pjx[0], spix[1] - pjx[1], spix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 > 0.f && r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Determine which version of DOSELF1_SUBSET_STARS needs to be called + * depending on the optimisation level. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts The #spart to interact. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + */ +void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts, + int *restrict ind, int scount) { + + DOSELF1_SUBSET_STARS(r, ci, sparts, ind, scount); +} + +/** + * @brief Determine which version of DOPAIR1_SUBSET_STARS needs to be called + * depending on the orientation of the cells or whether DOPAIR1_SUBSET_STARS + * needs to be called at all. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts_i The #spart to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + * @param cj The second #cell. + */ +void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts_i, + int *restrict ind, int scount, + struct cell *restrict cj) { + + const struct engine *e = r->e; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + DOPAIR1_SUBSET_STARS(r, ci, sparts_i, ind, scount, cj, shift); +} + +void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts, + int *ind, int scount, struct cell *cj, int sid, + int gettimer) { + + const struct engine *e = r->e; + struct space *s = e->s; + + /* Should we even bother? */ + if (!cell_is_active_stars(ci, e) && + (cj == NULL || !cell_is_active_stars(cj, e))) + return; + + /* Find out in which sub-cell of ci the parts are. */ + struct cell *sub = NULL; + if (ci->split) { + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) { + if (&sparts[ind[0]] >= &ci->progeny[k]->stars.parts[0] && + &sparts[ind[0]] < + &ci->progeny[k]->stars.parts[ci->progeny[k]->stars.count]) { + sub = ci->progeny[k]; + break; + } + } + } + } + + /* Is this a single cell? */ + if (cj == NULL) { + + /* Recurse? */ + if (cell_can_recurse_in_self_stars_task(ci)) { + + /* Loop over all progeny. */ + DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, NULL, -1, 0); + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != sub && ci->progeny[j] != NULL) + DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, ci->progeny[j], -1, + 0); + + } + + /* Otherwise, compute self-interaction. */ + else + DOSELF1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount); + } /* self-interaction. */ + + /* Otherwise, it's a pair interaction. */ + else { + + /* Recurse? */ + if (cell_can_recurse_in_pair_stars_task(ci) && + cell_can_recurse_in_pair_stars_task(cj)) { + + /* Get the type of pair if not specified explicitly. */ + double shift[3] = {0.0, 0.0, 0.0}; + sid = space_getsid(s, &ci, &cj, shift); + + /* Different types of flags. */ + switch (sid) { + + /* Regular sub-cell interactions of a single cell. */ + case 0: /* ( 1 , 1 , 1 ) */ + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[7], -1, 0); + break; + + case 1: /* ( 1 , 1 , 0 ) */ + if (ci->progeny[6] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[7], -1, 0); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[6], -1, 0); + break; + + case 3: /* ( 1 , 0 , 1 ) */ + if (ci->progeny[5] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[5], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[7], -1, 0); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + if (ci->progeny[4] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[4], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[4], -1, 0); + if (ci->progeny[4] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[4], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[4], -1, 0); + if (ci->progeny[4] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[4], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[4], -1, 0); + if (ci->progeny[4] == sub && cj->progeny[3] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[4], sparts, ind, scount, + cj->progeny[3], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[3], sparts, ind, scount, + ci->progeny[4], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[3] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[3], sparts, ind, scount, + ci->progeny[5], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[3] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[3], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[3] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[3], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[3] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[3], sparts, ind, scount, + ci->progeny[7], -1, 0); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + if (ci->progeny[4] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[4], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[4], -1, 0); + if (ci->progeny[4] == sub && cj->progeny[3] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[4], sparts, ind, scount, + cj->progeny[3], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[3], sparts, ind, scount, + ci->progeny[4], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[3] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[3], sparts, ind, scount, + ci->progeny[6], -1, 0); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + if (ci->progeny[5] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[5], -1, 0); + break; + + case 7: /* ( 1 , -1 , 0 ) */ + if (ci->progeny[4] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[4], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[4], -1, 0); + if (ci->progeny[4] == sub && cj->progeny[3] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[4], sparts, ind, scount, + cj->progeny[3], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[3], sparts, ind, scount, + ci->progeny[4], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[3] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[3], sparts, ind, scount, + ci->progeny[5], -1, 0); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + if (ci->progeny[4] == sub && cj->progeny[3] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[4], sparts, ind, scount, + cj->progeny[3], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[3], sparts, ind, scount, + ci->progeny[4], -1, 0); + break; + + case 9: /* ( 0 , 1 , 1 ) */ + if (ci->progeny[3] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[3], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[4] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[3], sparts, ind, scount, + cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[4], sparts, ind, scount, + ci->progeny[3], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[4] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[4], sparts, ind, scount, + ci->progeny[7], -1, 0); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + if (ci->progeny[2] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[2], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[2], -1, 0); + if (ci->progeny[2] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[2], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[2], -1, 0); + if (ci->progeny[2] == sub && cj->progeny[4] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[2], sparts, ind, scount, + cj->progeny[4], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[4] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[4], sparts, ind, scount, + ci->progeny[2], -1, 0); + if (ci->progeny[2] == sub && cj->progeny[5] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[2], sparts, ind, scount, + cj->progeny[5], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[5], sparts, ind, scount, + ci->progeny[2], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[3], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[3], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[4] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[3], sparts, ind, scount, + cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[4], sparts, ind, scount, + ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[5] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[3], sparts, ind, scount, + cj->progeny[5], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[5] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[5], sparts, ind, scount, + ci->progeny[3], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[4] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[4], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[4] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[4], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[5] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[5], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[4] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[4], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[5] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[5], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[5] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[5], sparts, ind, scount, + ci->progeny[7], -1, 0); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + if (ci->progeny[2] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[2], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[2], -1, 0); + if (ci->progeny[2] == sub && cj->progeny[5] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[2], sparts, ind, scount, + cj->progeny[5], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[5], sparts, ind, scount, + ci->progeny[2], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[1], sparts, ind, scount, + ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[5] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[6], sparts, ind, scount, + cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[5], sparts, ind, scount, + ci->progeny[6], -1, 0); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + if (ci->progeny[1] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[1], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[1], -1, 0); + if (ci->progeny[1] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[1], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[1], -1, 0); + if (ci->progeny[1] == sub && cj->progeny[4] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[1], sparts, ind, scount, + cj->progeny[4], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[4] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[4], sparts, ind, scount, + ci->progeny[1], -1, 0); + if (ci->progeny[1] == sub && cj->progeny[6] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[1], sparts, ind, scount, + cj->progeny[6], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[6] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[6], sparts, ind, scount, + ci->progeny[1], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[3], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[3], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[4] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[3], sparts, ind, scount, + cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[4], sparts, ind, scount, + ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[6] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[3], sparts, ind, scount, + cj->progeny[6], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[6] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[6], sparts, ind, scount, + ci->progeny[3], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[4] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[4], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[4] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[4], sparts, ind, scount, + ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[6] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[5], sparts, ind, scount, + cj->progeny[6], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[6] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[6], sparts, ind, scount, + ci->progeny[5], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[0], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[2] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[2], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[4] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[4], sparts, ind, scount, + ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[6] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[7], sparts, ind, scount, + cj->progeny[6], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[6] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[6], sparts, ind, scount, + ci->progeny[7], -1, 0); + break; + } + + } + + /* Otherwise, compute the pair directly. */ + else if (cell_is_active_stars(ci, e) || cell_is_active_stars(cj, e)) { + + /* Do any of the cells need to be drifted first? */ + if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); + + DOPAIR1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount, cj); + } + + } /* otherwise, pair interaction. */ +} + +/** + * @brief Determine which version of DOSELF1_STARS needs to be called depending + * on the optimisation level. + * + * @param r #runner + * @param c #cell c + * + */ +void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) { + + const struct engine *restrict e = r->e; + + /* Anything to do here? */ + if (!cell_is_active_stars(c, e)) return; + + /* Did we mess up the recursion? */ + if (c->stars.h_max_old * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); + + DOSELF1_STARS(r, c, 1); +} + +#define RUNNER_CHECK_SORT(TYPE, PART, cj, ci, sid) \ + ({ \ + const struct entry *restrict sort_j = cj->TYPE.sort[sid]; \ + \ + for (int pjd = 0; pjd < cj->TYPE.count; pjd++) { \ + const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i]; \ + const float d = p->x[0] * runner_shift[sid][0] + \ + p->x[1] * runner_shift[sid][1] + \ + p->x[2] * runner_shift[sid][2]; \ + if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ + 1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) && \ + (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ + cj->width[0] * 1.0e-10) \ + error( \ + "particle shift diff exceeds dx_max_sort in cell cj. " \ + "cj->nodeID=%d " \ + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE \ + ".dx_max_sort=%e " \ + "cj->" #TYPE ".dx_max_sort_old=%e", \ + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \ + cj->TYPE.dx_max_sort_old); \ + } \ + }) + +/** + * @brief Determine which version of DOPAIR1_STARS needs to be called depending + * on the orientation of the cells or whether DOPAIR1_STARS needs to be called + * at all. + * + * @param r #runner + * @param ci #cell ci + * @param cj #cell cj + * + */ +void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *restrict e = r->e; + const int ci_active = cell_is_active_stars(ci, e); + const int cj_active = cell_is_active_stars(cj, e); + const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 && ci_active); + const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 && cj_active); + + /* Anything to do here? */ + if (!do_ci && !do_cj) return; + + /* Get the sort ID. */ + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(e->s, &ci, &cj, shift); + + /* Check that cells are drifted. */ + if (do_ci && + (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) + error("Interacting undrifted cells."); + + /* Have the cells been sorted? */ + if (do_ci && (!(ci->stars.sorted & (1 << sid)) || + ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin)) + error("Interacting unsorted cells."); + + if (do_ci && (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)) + error("Interacting unsorted cells."); + + if (do_cj && + (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e))) + error("Interacting undrifted cells."); + + /* Have the cells been sorted? */ + if (do_cj && (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin)) + error("Interacting unsorted cells."); + + if (do_cj && (!(cj->stars.sorted & (1 << sid)) || + cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin)) + error("Interacting unsorted cells."); + +#ifdef SWIFT_DEBUG_CHECKS + if (do_ci) { + RUNNER_CHECK_SORT(hydro, part, cj, ci, sid); + RUNNER_CHECK_SORT(stars, spart, ci, cj, sid); + } + + if (do_cj) { + RUNNER_CHECK_SORT(hydro, part, ci, cj, sid); + RUNNER_CHECK_SORT(stars, spart, cj, ci, sid); + } +#endif /* SWIFT_DEBUG_CHECKS */ + + DOPAIR1_STARS(r, ci, cj, 1); +} + +/** + * @brief Compute grouped sub-cell interactions for pairs + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + * @param sid The direction linking the cells + * @param gettimer Do we have a timer ? + * + * @todo Hard-code the sid on the recursive calls to avoid the + * redundant computations to find the sid on-the-fly. + */ +void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, + int sid, int gettimer) { + + struct space *s = r->e->s; + const struct engine *e = r->e; + + /* Should we even bother? */ + int should_do = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e); + should_do |= cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e); + if (!should_do) return; + + /* Get the type of pair if not specified explicitly. */ + double shift[3]; + sid = space_getsid(s, &ci, &cj, shift); + + /* Recurse? */ + if (cell_can_recurse_in_pair_stars_task(ci) && + cell_can_recurse_in_pair_stars_task(cj)) { + + /* Different types of flags. */ + switch (sid) { + + /* Regular sub-cell interactions of a single cell. */ + case 0: /* ( 1 , 1 , 1 ) */ + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[0], -1, 0); + break; + + case 1: /* ( 1 , 1 , 0 ) */ + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[1], -1, 0); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[1], -1, 0); + break; + + case 3: /* ( 1 , 0 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[2], -1, 0); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[4], cj->progeny[0], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[4], cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[4], cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[1], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[2], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[3], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[3], -1, 0); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[4], cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[3], -1, 0); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[2], -1, 0); + break; + + case 7: /* ( 1 , -1 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[4], cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[3], -1, 0); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[4], cj->progeny[3], -1, 0); + break; + + case 9: /* ( 0 , 1 , 1 ) */ + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[4], -1, 0); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + if (ci->progeny[2] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[2], cj->progeny[0], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[2], cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[2], cj->progeny[4], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[2], cj->progeny[5], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[3], cj->progeny[1], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[3], cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[4], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[5], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[5], -1, 0); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[2], cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[2], cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[6], cj->progeny[5], -1, 0); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + if (ci->progeny[1] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[1], cj->progeny[0], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[1], cj->progeny[2], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[1], cj->progeny[4], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[1], cj->progeny[6], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[3], cj->progeny[2], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[3], cj->progeny[6], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[4], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[5], cj->progeny[6], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[7], cj->progeny[6], -1, 0); + break; + } + + } + + /* Otherwise, compute the pair directly. */ + else { + + const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e); + const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e); + + if (do_ci) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_spart_drifted(ci, e)) + error("Interacting undrifted cells (sparts)."); + + if (!cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells (parts)."); + + /* Do any of the cells need to be sorted first? */ + if (!(ci->stars.sorted & (1 << sid)) || + ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) + error("Interacting unsorted cell (sparts)."); + + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cell (parts)."); + } + + if (do_cj) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_part_drifted(ci, e)) + error("Interacting undrifted cells (parts)."); + + if (!cell_are_spart_drifted(cj, e)) + error("Interacting undrifted cells (sparts)."); + + /* Do any of the cells need to be sorted first? */ + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) + error("Interacting unsorted cell (parts)."); + + if (!(cj->stars.sorted & (1 << sid)) || + cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cell (sparts)."); + } + + if (do_ci || do_cj) DOPAIR1_BRANCH_STARS(r, ci, cj); + } +} + +/** + * @brief Compute grouped sub-cell interactions for self tasks + * + * @param r The #runner. + * @param ci The first #cell. + * @param gettimer Do we have a timer ? + */ +void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer) { + + /* Should we even bother? */ + if (ci->hydro.count == 0 || ci->stars.count == 0 || + !cell_is_active_stars(ci, r->e)) + return; + + /* Recurse? */ + if (cell_can_recurse_in_self_stars_task(ci)) { + + /* Loop over all progeny. */ + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) { + DOSUB_SELF1_STARS(r, ci->progeny[k], 0); + for (int j = k + 1; j < 8; j++) + if (ci->progeny[j] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[k], ci->progeny[j], -1, 0); + } + } + + /* Otherwise, compute self-interaction. */ + else { + + /* Drift the cell to the current timestep if needed. */ + if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell."); + + DOSELF1_BRANCH_STARS(r, ci); + } +} diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 2e86280d64491ee1750f41c2cd22ab01c08e30b8..c74fa7c8f53576f2e80578488fdf3378c59c0400 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -270,10 +270,10 @@ __attribute__((always_inline)) INLINE static void populate_max_index_density( int *max_index_i, int *max_index_j, int *init_pi, int *init_pj, const timebin_t max_active_bin, const int active_ci, const int active_cj) { - const struct part *restrict parts_i = ci->parts; - const struct part *restrict parts_j = cj->parts; + const struct part *restrict parts_i = ci->hydro.parts; + const struct part *restrict parts_j = cj->hydro.parts; - int first_pi = 0, last_pj = cj->count - 1; + int first_pi = 0, last_pj = cj->hydro.count - 1; int temp, active_id; /* Only populate max_index array for local actve cells. */ @@ -281,7 +281,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_density( /* Find the leftmost active particle in cell i that interacts with any * particle in cell j. */ - first_pi = ci->count; + first_pi = ci->hydro.count; active_id = first_pi - 1; while (first_pi > 0 && sort_i[first_pi - 1].d + dx_max + hi_max > dj_min) { first_pi--; @@ -295,7 +295,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_density( /* Find the maximum index into cell j for each particle in range in cell i. */ - if (first_pi < ci->count) { + if (first_pi < ci->hydro.count) { /* Start from the first particle in cell j. */ temp = 0; @@ -305,33 +305,33 @@ __attribute__((always_inline)) INLINE static void populate_max_index_density( sort_i[first_pi].d + pi->h * kernel_gamma + dx_max - rshift; /* Loop through particles in cell j until they are not in range of pi. - * Make sure that temp stays between 0 and cj->count - 1.*/ - while (temp < cj->count - 1 && first_di > sort_j[temp].d) temp++; + * Make sure that temp stays between 0 and cj->hydro.count - 1.*/ + while (temp < cj->hydro.count - 1 && first_di > sort_j[temp].d) temp++; max_index_i[first_pi] = temp; /* Populate max_index_i for remaining particles that are within range. */ - for (int i = first_pi + 1; i < ci->count; i++) { + for (int i = first_pi + 1; i < ci->hydro.count; i++) { temp = max_index_i[i - 1]; pi = &parts_i[sort_i[i].i]; const float di = sort_i[i].d + pi->h * kernel_gamma + dx_max - rshift; - /* Make sure that temp stays between 0 and cj->count - 1.*/ - while (temp < cj->count - 1 && di > sort_j[temp].d) temp++; + /* Make sure that temp stays between 0 and cj->hydro.count - 1.*/ + while (temp < cj->hydro.count - 1 && di > sort_j[temp].d) temp++; max_index_i[i] = temp; } } else { /* Make sure that max index is set to first particle in cj.*/ - max_index_i[ci->count - 1] = 0; + max_index_i[ci->hydro.count - 1] = 0; } } else { /* Make sure that foreign cells are only read into the cache if the local * cell requires it. * Also ensure that it does not require any particles from cj. */ - first_pi = ci->count - 1; - max_index_i[ci->count - 1] = 0; + first_pi = ci->hydro.count - 1; + max_index_i[ci->hydro.count - 1] = 0; } /* Only populate max_index array for local actve cells. */ @@ -340,7 +340,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_density( * particle in cell i. */ last_pj = -1; active_id = last_pj; - while (last_pj < cj->count && + while (last_pj < cj->hydro.count && sort_j[last_pj + 1].d - hj_max - dx_max < di_max) { last_pj++; /* Store the index of the particle if it is active. */ @@ -356,7 +356,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_density( if (last_pj >= 0) { /* Start from the last particle in cell i. */ - temp = ci->count - 1; + temp = ci->hydro.count - 1; const struct part *pj = &parts_j[sort_j[last_pj].i]; const float last_dj = @@ -379,14 +379,14 @@ __attribute__((always_inline)) INLINE static void populate_max_index_density( } } else { /* Make sure that max index is set to last particle in ci.*/ - max_index_j[0] = ci->count - 1; + max_index_j[0] = ci->hydro.count - 1; } } else { /* Make sure that foreign cells are only read into the cache if the local * cell requires it. * Also ensure that it does not require any particles from ci. */ last_pj = 0; - max_index_j[0] = ci->count - 1; + max_index_j[0] = ci->hydro.count - 1; } *init_pi = first_pi; @@ -430,10 +430,10 @@ __attribute__((always_inline)) INLINE static void populate_max_index_force( int *init_pj, const timebin_t max_active_bin, const int active_ci, const int active_cj) { - const struct part *restrict parts_i = ci->parts; - const struct part *restrict parts_j = cj->parts; + const struct part *restrict parts_i = ci->hydro.parts; + const struct part *restrict parts_j = cj->hydro.parts; - int first_pi = 0, last_pj = cj->count - 1; + int first_pi = 0, last_pj = cj->hydro.count - 1; int temp, active_id; /* Only populate max_index array for local actve cells. */ @@ -441,7 +441,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_force( /* Find the leftmost active particle in cell i that interacts with any * particle in cell j. */ - first_pi = ci->count; + first_pi = ci->hydro.count; active_id = first_pi - 1; while (first_pi > 0 && sort_i[first_pi - 1].d + dx_max + h_max > dj_min) { first_pi--; @@ -455,7 +455,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_force( /* Find the maximum index into cell j for each particle in range in cell i. */ - if (first_pi < ci->count) { + if (first_pi < ci->hydro.count) { /* Start from the first particle in cell j. */ temp = 0; @@ -466,34 +466,34 @@ __attribute__((always_inline)) INLINE static void populate_max_index_force( rshift; /* Loop through particles in cell j until they are not in range of pi. - * Make sure that temp stays between 0 and cj->count - 1.*/ - while (temp < cj->count - 1 && first_di > sort_j[temp].d) temp++; + * Make sure that temp stays between 0 and cj->hydro.count - 1.*/ + while (temp < cj->hydro.count - 1 && first_di > sort_j[temp].d) temp++; max_index_i[first_pi] = temp; /* Populate max_index_i for remaining particles that are within range. */ - for (int i = first_pi + 1; i < ci->count; i++) { + for (int i = first_pi + 1; i < ci->hydro.count; i++) { temp = max_index_i[i - 1]; pi = &parts_i[sort_i[i].i]; const float di = sort_i[i].d + max(pi->h, hj_max_raw) * kernel_gamma + dx_max - rshift; - /* Make sure that temp stays between 0 and cj->count - 1.*/ - while (temp < cj->count - 1 && di > sort_j[temp].d) temp++; + /* Make sure that temp stays between 0 and cj->hydro.count - 1.*/ + while (temp < cj->hydro.count - 1 && di > sort_j[temp].d) temp++; max_index_i[i] = temp; } } else { /* Make sure that max index is set to first particle in cj.*/ - max_index_i[ci->count - 1] = 0; + max_index_i[ci->hydro.count - 1] = 0; } } else { /* Make sure that foreign cells are only read into the cache if the local * cell requires it. * Also ensure that it does not require any particles from cj. */ - first_pi = ci->count - 1; - max_index_i[ci->count - 1] = 0; + first_pi = ci->hydro.count - 1; + max_index_i[ci->hydro.count - 1] = 0; } /* Only populate max_index array for local actve cells. */ @@ -502,7 +502,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_force( * particle in cell i. */ last_pj = -1; active_id = last_pj; - while (last_pj < cj->count && + while (last_pj < cj->hydro.count && sort_j[last_pj + 1].d - h_max - dx_max < di_max) { last_pj++; /* Store the index of the particle if it is active. */ @@ -518,7 +518,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_force( if (last_pj >= 0) { /* Start from the last particle in cell i. */ - temp = ci->count - 1; + temp = ci->hydro.count - 1; const struct part *pj = &parts_j[sort_j[last_pj].i]; const float last_dj = sort_j[last_pj].d - dx_max - @@ -543,14 +543,14 @@ __attribute__((always_inline)) INLINE static void populate_max_index_force( } } else { /* Make sure that max index is set to last particle in ci.*/ - max_index_j[0] = ci->count - 1; + max_index_j[0] = ci->hydro.count - 1; } } else { /* Make sure that foreign cells are only read into the cache if the local * cell requires it. * Also ensure that it does not require any particles from ci. */ last_pj = 0; - max_index_j[0] = ci->count - 1; + max_index_j[0] = ci->hydro.count - 1; } *init_pi = first_pi; @@ -655,8 +655,8 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) { /* Get some local variables */ const struct engine *e = r->e; const timebin_t max_active_bin = e->max_active_bin; - struct part *restrict parts = c->parts; - const int count = c->count; + struct part *restrict parts = c->hydro.parts; + const int count = c->hydro.count; TIMER_TIC; @@ -888,7 +888,7 @@ void runner_doself_subset_density_vec(struct runner *r, struct cell *restrict c, #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) - const int count = c->count; + const int count = c->hydro.count; TIMER_TIC; @@ -1016,7 +1016,7 @@ void runner_doself_subset_density_vec(struct runner *r, struct cell *restrict c, vec_is_mask_true(v_doi_mask2_self_check); #ifdef DEBUG_INTERACTIONS_SPH - struct part *restrict parts_i = c->parts; + struct part *restrict parts_i = c->hydro.parts; for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { if (doi_mask & (1 << bit_index)) { if (pi->num_ngb_density < MAX_NUM_OF_NEIGHBOURS) @@ -1113,8 +1113,8 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; const struct cosmology *restrict cosmo = e->cosmology; const timebin_t max_active_bin = e->max_active_bin; - struct part *restrict parts = c->parts; - const int count = c->count; + struct part *restrict parts = c->hydro.parts; + const int count = c->hydro.count; TIMER_TIC; @@ -1322,19 +1322,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->sort[sid]; - const struct entry *restrict sort_j = cj->sort[sid]; + const struct entry *restrict sort_i = ci->hydro.sort[sid]; + const struct entry *restrict sort_j = cj->hydro.sort[sid]; /* Get some other useful values. */ - const int count_i = ci->count; - const int count_j = cj->count; - const double hi_max = ci->h_max * kernel_gamma - rshift; - const double hj_max = cj->h_max * kernel_gamma; - struct part *restrict parts_i = ci->parts; - struct part *restrict parts_j = cj->parts; + const int count_i = ci->hydro.count; + const int count_j = cj->hydro.count; + const double hi_max = ci->hydro.h_max * kernel_gamma - rshift; + const double hj_max = cj->hydro.h_max * kernel_gamma; + struct part *restrict parts_i = ci->hydro.parts; + struct part *restrict parts_j = cj->hydro.parts; const double di_max = sort_i[count_i - 1].d - rshift; const double dj_min = sort_j[0].d; - const float dx_max = (ci->dx_max_sort + cj->dx_max_sort); + const float dx_max = (ci->hydro.dx_max_sort + cj->hydro.dx_max_sort); const int active_ci = cell_is_active_hydro(ci, e) && ci_local; const int active_cj = cell_is_active_hydro(cj, e) && cj_local; @@ -1693,11 +1693,11 @@ void runner_dopair_subset_density_vec(struct runner *r, TIMER_TIC; - const int count_j = cj->count; + const int count_j = cj->hydro.count; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_j = cj->sort[sid]; - const float dxj = cj->dx_max_sort; + const struct entry *restrict sort_j = cj->hydro.sort[sid]; + const float dxj = cj->hydro.dx_max_sort; /* Get both particle caches from the runner and re-allocate * them if they are not big enough for the cells. */ @@ -1806,7 +1806,7 @@ void runner_dopair_subset_density_vec(struct runner *r, vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_hig2.v)); #ifdef DEBUG_INTERACTIONS_SPH - struct part *restrict parts_j = cj->parts; + struct part *restrict parts_j = cj->hydro.parts; for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { if (pi->num_ngb_density < MAX_NUM_OF_NEIGHBOURS) { @@ -1935,7 +1935,7 @@ void runner_dopair_subset_density_vec(struct runner *r, vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_hig2.v)); #ifdef DEBUG_INTERACTIONS_SPH - struct part *restrict parts_j = cj->parts; + struct part *restrict parts_j = cj->hydro.parts; for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) { if (pi->num_ngb_density < MAX_NUM_OF_NEIGHBOURS) { @@ -2007,21 +2007,21 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->sort[sid]; - const struct entry *restrict sort_j = cj->sort[sid]; + const struct entry *restrict sort_i = ci->hydro.sort[sid]; + const struct entry *restrict sort_j = cj->hydro.sort[sid]; /* Get some other useful values. */ - const int count_i = ci->count; - const int count_j = cj->count; - const double hi_max = ci->h_max * kernel_gamma; - const double hj_max = cj->h_max * kernel_gamma; - const double hi_max_raw = ci->h_max; - const double hj_max_raw = cj->h_max; - struct part *restrict parts_i = ci->parts; - struct part *restrict parts_j = cj->parts; + const int count_i = ci->hydro.count; + const int count_j = cj->hydro.count; + const double hi_max = ci->hydro.h_max * kernel_gamma; + const double hj_max = cj->hydro.h_max * kernel_gamma; + const double hi_max_raw = ci->hydro.h_max; + const double hj_max_raw = cj->hydro.h_max; + struct part *restrict parts_i = ci->hydro.parts; + struct part *restrict parts_j = cj->hydro.parts; const double di_max = sort_i[count_i - 1].d - rshift; const double dj_min = sort_j[0].d; - const float dx_max = (ci->dx_max_sort + cj->dx_max_sort); + const float dx_max = (ci->hydro.dx_max_sort + cj->hydro.dx_max_sort); const int active_ci = cell_is_active_hydro(ci, e) && ci_local; const int active_cj = cell_is_active_hydro(cj, e) && cj_local; diff --git a/src/scheduler.c b/src/scheduler.c index 4974884651b02db57d851493a4fc8fe343483a05..2ae8f6785434af021b52dd2d6586b4e2dc5d68bb 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -113,6 +113,32 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, atomic_inc(&s->completed_unlock_writes); } +/** + * @brief generate the dependency name for the tasks + * + * @param ta_type The #task type. + * @param ta_subtype The #task type. + * @param ta_name (return) The formatted string + */ +void scheduler_task_dependency_name(int ta_type, int ta_subtype, + char *ta_name) { + + /* Check input */ + if ((ta_type < 0) || (ta_type >= task_type_count)) + error("Unknown task type %i", ta_type); + + if ((ta_subtype < 0) || (ta_subtype >= task_subtype_count)) + error("Unknown task subtype %i with type %s", ta_subtype, + taskID_names[ta_type]); + + /* construct line */ + if (ta_subtype == task_subtype_none) + sprintf(ta_name, "%s", taskID_names[ta_type]); + else + sprintf(ta_name, "\"%s %s\"", taskID_names[ta_type], + subtaskID_names[ta_subtype]); +} + /** * @brief Write a dot file with the task dependencies. * @@ -139,10 +165,15 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { * task_subtype */ int *table = (int *)malloc(nber_relation * sizeof(int)); if (table == NULL) - error("Error allocating memory for task-dependency graph."); + error("Error allocating memory for task-dependency graph (table)."); + + int *count_rel = (int *)malloc(nber_relation * sizeof(int) / 2); + if (count_rel == NULL) + error("Error allocating memory for task-dependency graph (count_rel)."); /* Reset everything */ for (int i = 0; i < nber_relation; i++) table[i] = -1; + for (int i = 0; i < nber_relation / 2; i++) count_rel[i] = 0; /* Create file */ char filename[200] = "dependency_graph.dot"; @@ -189,13 +220,16 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { } k += 1; - cur = &cur[3]; + cur = &cur[2]; } /* max_nber_dep is too small */ if (k == max_nber_dep) error("Not enough memory, please increase max_nber_dep"); + /* Increase counter of relation */ + count_rel[ind / 2 + k] += 1; + /* Not written yet => write it */ if (!written) { @@ -204,20 +238,8 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { char tb_name[200]; /* construct line */ - if (ta->subtype == task_subtype_none) - sprintf(ta_name, "%s", taskID_names[ta->type]); - else - sprintf(ta_name, "\"%s %s\"", taskID_names[ta->type], - subtaskID_names[ta->subtype]); - - if (tb->subtype == task_subtype_none) - sprintf(tb_name, "%s", taskID_names[tb->type]); - else - sprintf(tb_name, "\"%s %s\"", taskID_names[tb->type], - subtaskID_names[tb->subtype]); - - /* Write to the ffile */ - fprintf(f, "\t %s->%s;\n", ta_name, tb_name); + scheduler_task_dependency_name(ta->type, ta->subtype, ta_name); + scheduler_task_dependency_name(tb->type, tb->subtype, tb_name); /* Change colour of implicit tasks */ if (ta->implicit) @@ -242,6 +264,7 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { int gradient_cluster[4] = {0}; int force_cluster[4] = {0}; int gravity_cluster[5] = {0}; + int stars_density_cluster[4] = {0}; /* Check whether we need to construct a group of tasks */ for (int type = 0; type < task_type_count; ++type) { @@ -262,6 +285,9 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { force_cluster[k] = 1; if (type == task_type_self + k && subtype == task_subtype_grav) gravity_cluster[k] = 1; + if (type == task_type_self + k && + subtype == task_subtype_stars_density) + stars_density_cluster[k] = 1; } if (type == task_type_grav_mesh) gravity_cluster[2] = 1; if (type == task_type_grav_long_range) gravity_cluster[3] = 1; @@ -312,10 +338,55 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { fprintf(f, "\t\t %s;\n", taskID_names[task_type_grav_mm]); fprintf(f, "\t};\n"); - /* Be clean */ + /* Make a cluster for the density tasks */ + fprintf(f, "\t subgraph cluster4{\n"); + fprintf(f, "\t\t label=\"\";\n"); + for (int k = 0; k < 4; ++k) + if (stars_density_cluster[k]) + fprintf(f, "\t\t \"%s %s\";\n", taskID_names[task_type_self + k], + subtaskID_names[task_subtype_stars_density]); + fprintf(f, "\t};\n"); + + /* Write down the number of relation */ + for (int ta_type = 0; ta_type < task_type_count; ta_type++) { + + for (int ta_subtype = 0; ta_subtype < task_subtype_count; ta_subtype++) { + + /* Get task indice */ + const int ind = + (ta_type * task_subtype_count + ta_subtype) * max_nber_dep; + + /* Loop over dependencies */ + for (int k = 0; k < max_nber_dep; k++) { + + if (count_rel[ind + k] == 0) continue; + + /* Get task type */ + const int i = 2 * (ind + k); + int tb_type = table[i]; + int tb_subtype = table[i + 1]; + + /* Get names */ + char ta_name[200]; + char tb_name[200]; + + scheduler_task_dependency_name(ta_type, ta_subtype, ta_name); + scheduler_task_dependency_name(tb_type, tb_subtype, tb_name); + + /* Write to the fle */ + fprintf(f, "\t %s->%s[label=%i];\n", ta_name, tb_name, + count_rel[ind + k]); + } + } + } + + /* Close the file */ fprintf(f, "}"); fclose(f); + + /* Be clean */ free(table); + free(count_rel); if (verbose) message("Printing task graph took %.3f %s.", @@ -337,9 +408,9 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Reset the redo flag. */ redo = 0; - /* Non-splittable task? */ + /* Empty task? */ if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL) || - t->ci->count == 0 || (t->cj != NULL && t->cj->count == 0)) { + t->ci->hydro.count == 0 || (t->cj != NULL && t->cj->hydro.count == 0)) { t->type = task_type_none; t->subtype = task_subtype_none; t->cj = NULL; @@ -363,7 +434,7 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { if (cell_can_split_self_hydro_task(ci)) { /* Make a sub? */ - if (scheduler_dosub && ci->count < space_subsize_self_hydro) { + if (scheduler_dosub && ci->hydro.count < space_subsize_self_hydro) { /* convert to a self-subtask. */ t->type = task_type_sub_self; @@ -379,7 +450,7 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { while (ci->progeny[first_child] == NULL) first_child++; t->ci = ci->progeny[first_child]; for (int k = first_child + 1; k < 8; k++) - if (ci->progeny[k] != NULL && ci->progeny[k]->count) + if (ci->progeny[k] != NULL && ci->progeny[k]->hydro.count) scheduler_splittask_hydro( scheduler_addtask(s, task_type_self, t->subtype, 0, 0, ci->progeny[k], NULL), @@ -387,9 +458,9 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Make a task for each pair of progeny */ for (int j = 0; j < 8; j++) - if (ci->progeny[j] != NULL && ci->progeny[j]->count) + if (ci->progeny[j] != NULL && ci->progeny[j]->hydro.count) for (int k = j + 1; k < 8; k++) - if (ci->progeny[k] != NULL && ci->progeny[k]->count) + if (ci->progeny[k] != NULL && ci->progeny[k]->hydro.count) scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, sub_sid_flag[j][k], 0, ci->progeny[j], @@ -418,13 +489,20 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { double shift[3]; const int sid = space_getsid(s->space, &ci, &cj, shift); +#ifdef SWIFT_DEBUG_CHECKS + if (sid != t->flags) + error("Got pair task with incorrect flags: sid=%d flags=%lld", sid, + t->flags); +#endif + /* Should this task be split-up? */ if (cell_can_split_pair_hydro_task(ci) && cell_can_split_pair_hydro_task(cj)) { /* Replace by a single sub-task? */ if (scheduler_dosub && /* Use division to avoid integer overflow. */ - ci->count * sid_scale[sid] < space_subsize_pair_hydro / cj->count && + ci->hydro.count * sid_scale[sid] < + space_subsize_pair_hydro / cj->hydro.count && !sort_is_corner(sid)) { /* Make this task a sub task. */ @@ -773,18 +851,18 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Otherwise, break it up if it is too large? */ } else if (scheduler_doforcesplit && ci->split && cj->split && - (ci->count > space_maxsize / cj->count)) { + (ci->hydro.count > space_maxsize / cj->hydro.count)) { - // message( "force splitting pair with %i and %i parts." , ci->count , - // cj->count ); + // message( "force splitting pair with %i and %i parts." , + // ci->hydro.count , cj->hydro.count ); /* Replace the current task. */ t->type = task_type_none; for (int j = 0; j < 8; j++) - if (ci->progeny[j] != NULL && ci->progeny[j]->count) + if (ci->progeny[j] != NULL && ci->progeny[j]->hydro.count) for (int k = 0; k < 8; k++) - if (cj->progeny[k] != NULL && cj->progeny[k]->count) { + if (cj->progeny[k] != NULL && cj->progeny[k]->hydro.count) { struct task *tl = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[j], cj->progeny[k]); @@ -796,6 +874,478 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { } /* iterate over the current task. */ } +/** + * @brief Split a stars task if too large. + * + * @param t The #task + * @param s The #scheduler we are working in. + */ +static void scheduler_splittask_stars(struct task *t, struct scheduler *s) { + + /* Iterate on this task until we're done with it. */ + int redo = 1; + while (redo) { + + /* Reset the redo flag. */ + redo = 0; + + /* Empty task? */ + if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL) || + t->ci->stars.count == 0 || (t->cj != NULL && t->cj->stars.count == 0)) { + t->type = task_type_none; + t->subtype = task_subtype_none; + t->cj = NULL; + t->skip = 1; + break; + } + + /* Self-interaction? */ + if (t->type == task_type_self) { + + /* Get a handle on the cell involved. */ + struct cell *ci = t->ci; + + /* Foreign task? */ + if (ci->nodeID != s->nodeID) { + t->skip = 1; + break; + } + + /* Is this cell even split and the task does not violate h ? */ + if (cell_can_split_self_stars_task(ci)) { + + /* Make a sub? */ + if (scheduler_dosub && ci->stars.count < space_subsize_self_stars) { + + /* convert to a self-subtask. */ + t->type = task_type_sub_self; + + /* Otherwise, make tasks explicitly. */ + } else { + + /* Take a step back (we're going to recycle the current task)... */ + redo = 1; + + /* Add the self tasks. */ + int first_child = 0; + while (ci->progeny[first_child] == NULL) first_child++; + t->ci = ci->progeny[first_child]; + for (int k = first_child + 1; k < 8; k++) + if (ci->progeny[k] != NULL && ci->progeny[k]->stars.count) + scheduler_splittask_stars( + scheduler_addtask(s, task_type_self, t->subtype, 0, 0, + ci->progeny[k], NULL), + s); + + /* Make a task for each pair of progeny */ + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != NULL && ci->progeny[j]->stars.count) + for (int k = j + 1; k < 8; k++) + if (ci->progeny[k] != NULL && ci->progeny[k]->stars.count) + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, + sub_sid_flag[j][k], 0, ci->progeny[j], + ci->progeny[k]), + s); + } + } /* Cell is split */ + + } /* Self interaction */ + + /* Pair interaction? */ + else if (t->type == task_type_pair) { + + /* Get a handle on the cells involved. */ + struct cell *ci = t->ci; + struct cell *cj = t->cj; + + /* Foreign task? */ + if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) { + t->skip = 1; + break; + } + + /* Get the sort ID, use space_getsid and not t->flags + to make sure we get ci and cj swapped if needed. */ + double shift[3]; + const int sid = space_getsid(s->space, &ci, &cj, shift); + + /* Should this task be split-up? */ + if (cell_can_split_pair_stars_task(ci) && + cell_can_split_pair_stars_task(cj)) { + + /* Replace by a single sub-task? */ + if (scheduler_dosub && /* Use division to avoid integer overflow. */ + ci->stars.count * sid_scale[sid] < + space_subsize_pair_stars / cj->stars.count && + !sort_is_corner(sid)) { + + /* Make this task a sub task. */ + t->type = task_type_sub_pair; + + /* Otherwise, split it. */ + } else { + + /* Take a step back (we're going to recycle the current task)... */ + redo = 1; + + /* For each different sorting type... */ + switch (sid) { + + case 0: /* ( 1 , 1 , 1 ) */ + t->ci = ci->progeny[7]; + t->cj = cj->progeny[0]; + t->flags = 0; + break; + + case 1: /* ( 1 , 1 , 0 ) */ + t->ci = ci->progeny[6]; + t->cj = cj->progeny[0]; + t->flags = 1; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[7], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[6], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[7], cj->progeny[0]), + s); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + t->ci = ci->progeny[6]; + t->cj = cj->progeny[1]; + t->flags = 2; + break; + + case 3: /* ( 1 , 0 , 1 ) */ + t->ci = ci->progeny[5]; + t->cj = cj->progeny[0]; + t->flags = 3; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[7], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[5], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[7], cj->progeny[0]), + s); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[0]; + t->flags = 4; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[5], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[6], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[7], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[4], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, + ci->progeny[5], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[6], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[7], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[4], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[5], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, + ci->progeny[6], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[7], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[4], cj->progeny[3]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[5], cj->progeny[3]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[6], cj->progeny[3]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, + ci->progeny[7], cj->progeny[3]), + s); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[1]; + t->flags = 5; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[6], cj->progeny[3]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[4], cj->progeny[3]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[6], cj->progeny[1]), + s); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + t->ci = ci->progeny[5]; + t->cj = cj->progeny[2]; + t->flags = 6; + break; + + case 7: /* ( 1 , -1 , 0 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[3]; + t->flags = 6; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[5], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[4], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[5], cj->progeny[3]), + s); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[3]; + t->flags = 8; + break; + + case 9: /* ( 0 , 1 , 1 ) */ + t->ci = ci->progeny[3]; + t->cj = cj->progeny[0]; + t->flags = 9; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[7], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[3], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[7], cj->progeny[0]), + s); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + t->ci = ci->progeny[2]; + t->cj = cj->progeny[0]; + t->flags = 10; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[3], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[6], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[7], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[2], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, + ci->progeny[3], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[6], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[7], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[2], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[3], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, + ci->progeny[6], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[7], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[2], cj->progeny[5]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[3], cj->progeny[5]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[6], cj->progeny[5]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, + ci->progeny[7], cj->progeny[5]), + s); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + t->ci = ci->progeny[2]; + t->cj = cj->progeny[1]; + t->flags = 11; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[6], cj->progeny[5]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[2], cj->progeny[5]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[6], cj->progeny[1]), + s); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + t->ci = ci->progeny[1]; + t->cj = cj->progeny[0]; + t->flags = 12; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[3], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[5], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[7], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[1], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, + ci->progeny[3], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[5], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[7], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[1], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[3], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, + ci->progeny[5], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[7], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[1], cj->progeny[6]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[3], cj->progeny[6]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[5], cj->progeny[6]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, + ci->progeny[7], cj->progeny[6]), + s); + break; + } /* switch(sid) */ + } + + /* Otherwise, break it up if it is too large? */ + } else if (scheduler_doforcesplit && ci->split && cj->split && + (ci->stars.count > space_maxsize / cj->stars.count)) { + + /* Replace the current task. */ + t->type = task_type_none; + + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != NULL && ci->progeny[j]->stars.count) + for (int k = 0; k < 8; k++) + if (cj->progeny[k] != NULL && cj->progeny[k]->stars.count) { + struct task *tl = + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[j], cj->progeny[k]); + scheduler_splittask_stars(tl, s); + tl->flags = space_getsid(s->space, &t->ci, &t->cj, shift); + } + } + } /* pair interaction? */ + } /* iterate over the current task. */ +} + /** * @brief Split a gravity task if too large. * @@ -804,11 +1354,8 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { */ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { -/* Temporarily prevent MPI here */ -#ifndef WITH_MPI const struct space *sp = s->space; struct engine *e = sp->e; -#endif /* Iterate on this task until we're done with it. */ int redo = 1; @@ -838,13 +1385,10 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { break; } -/* Temporarily prevent MPI here */ -#ifndef WITH_MPI - /* Should we split this task? */ if (cell_can_split_self_gravity_task(ci)) { - if (scheduler_dosub && ci->gcount < space_subsize_self_grav) { + if (scheduler_dosub && ci->grav.count < space_subsize_self_grav) { /* Otherwise, split it. */ } else { @@ -879,7 +1423,6 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { } /* Self-gravity only */ } /* Make tasks explicitly */ } /* Cell is split */ -#endif /* WITH_MPI */ } /* Self interaction */ /* Pair interaction? */ @@ -895,68 +1438,70 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { break; } -/* Temporarily prevent MPI here */ -#ifndef WITH_MPI - - /* Should we replace it with an M-M task? */ - if (cell_can_use_pair_mm(ci, cj, e, sp)) { - - t->type = task_type_grav_mm; - t->subtype = task_subtype_none; - - /* Since this task will not be split, we can already link it */ - atomic_inc(&ci->nr_tasks); - atomic_inc(&cj->nr_tasks); - engine_addlink(e, &ci->grav, t); - engine_addlink(e, &cj->grav, t); - break; - } - /* Should this task be split-up? */ if (cell_can_split_pair_gravity_task(ci) && cell_can_split_pair_gravity_task(cj)) { + const long long gcount_i = ci->grav.count; + const long long gcount_j = cj->grav.count; + /* Replace by a single sub-task? */ - if (scheduler_dosub && /* Use division to avoid integer overflow. */ - ci->gcount < space_subsize_pair_grav / cj->gcount) { + if (scheduler_dosub && + gcount_i * gcount_j < ((long long)space_subsize_pair_grav)) { /* Otherwise, split it. */ } else { - /* Take a step back (we're going to recycle the current task)... */ - redo = 1; - - /* Find the first non-empty childrens of the cells */ - int first_ci_child = 0, first_cj_child = 0; - while (ci->progeny[first_ci_child] == NULL) first_ci_child++; - while (cj->progeny[first_cj_child] == NULL) first_cj_child++; - - /* Recycle the current pair */ - t->ci = ci->progeny[first_ci_child]; - t->cj = cj->progeny[first_cj_child]; + /* Turn the task into a M-M task that will take care of all the + * progeny pairs */ + t->type = task_type_grav_mm; + t->subtype = task_subtype_none; + t->flags = 0; /* Make a task for every other pair of progeny */ - for (int i = first_ci_child; i < 8; i++) { + for (int i = 0; i < 8; i++) { if (ci->progeny[i] != NULL) { - for (int j = first_cj_child; j < 8; j++) { + for (int j = 0; j < 8; j++) { if (cj->progeny[j] != NULL) { - /* Skip the recycled pair */ - if (i == first_ci_child && j == first_cj_child) continue; + /* Can we use a M-M interaction here? */ + if (cell_can_use_pair_mm_rebuild(ci->progeny[i], + cj->progeny[j], e, sp)) { - scheduler_splittask_gravity( - scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[i], cj->progeny[j]), - s); + /* Flag this pair as being treated by the M-M task. + * We use the 64 bits in the task->flags field to store + * this information. The corresponding taks will unpack + * the information and operate according to the choices + * made here. */ + const int flag = i * 8 + j; + t->flags |= (1ULL << flag); + + } else { + + /* Ok, we actually have to create a task */ + scheduler_splittask_gravity( + scheduler_addtask(s, task_type_pair, task_subtype_grav, + 0, 0, ci->progeny[i], cj->progeny[j]), + s); + } } } } } + + /* Can none of the progenies use M-M calculations? */ + if (t->flags == 0) { + t->type = task_type_none; + t->subtype = task_subtype_none; + t->ci = NULL; + t->cj = NULL; + t->skip = 1; + } + } /* Split the pair */ } -#endif /* WITH_MPI */ - } /* pair interaction? */ - } /* iterate over the current task. */ + } /* pair interaction? */ + } /* iterate over the current task. */ } /** @@ -985,6 +1530,8 @@ void scheduler_splittasks_mapper(void *map_data, int num_elements, scheduler_splittask_gravity(t, s); } else if (t->type == task_type_grav_mesh) { /* For future use */ + } else if (t->subtype == task_subtype_stars_density) { + scheduler_splittask_stars(t, s); } else { #ifdef SWIFT_DEBUG_CHECKS error("Unexpected task sub-type"); @@ -1270,20 +1817,33 @@ void scheduler_reweight(struct scheduler *s, int verbose) { for (int k = nr_tasks - 1; k >= 0; k--) { struct task *t = &tasks[tid[k]]; t->weight = 0.f; +#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) + t->cost = 0.f; +#endif for (int j = 0; j < t->nr_unlock_tasks; j++) if (t->unlock_tasks[j]->weight > t->weight) t->weight = t->unlock_tasks[j]->weight; float cost = 0.f; +#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) + int partcost = 1; +#endif - const float count_i = (t->ci != NULL) ? t->ci->count : 0.f; - const float count_j = (t->cj != NULL) ? t->cj->count : 0.f; - const float gcount_i = (t->ci != NULL) ? t->ci->gcount : 0.f; - const float gcount_j = (t->cj != NULL) ? t->cj->gcount : 0.f; + const float count_i = (t->ci != NULL) ? t->ci->hydro.count : 0.f; + const float count_j = (t->cj != NULL) ? t->cj->hydro.count : 0.f; + const float gcount_i = (t->ci != NULL) ? t->ci->grav.count : 0.f; + const float gcount_j = (t->cj != NULL) ? t->cj->grav.count : 0.f; + const float scount_i = (t->ci != NULL) ? t->ci->stars.count : 0.f; + const float scount_j = (t->cj != NULL) ? t->cj->stars.count : 0.f; switch (t->type) { case task_type_sort: cost = wscale * intrinsics_popcount(t->flags) * count_i * - (sizeof(int) * 8 - intrinsics_clz(t->ci->count)); + (sizeof(int) * 8 - intrinsics_clz(t->ci->hydro.count)); + break; + + case task_type_stars_sort: + cost = wscale * intrinsics_popcount(t->flags) * scount_i * + (sizeof(int) * 8 - intrinsics_clz(t->ci->stars.count)); break; case task_type_self: @@ -1291,6 +1851,8 @@ void scheduler_reweight(struct scheduler *s, int verbose) { cost = 1.f * (wscale * gcount_i) * gcount_i; else if (t->subtype == task_subtype_external_grav) cost = 1.f * wscale * gcount_i; + else if (t->subtype == task_subtype_stars_density) + cost = 1.f * wscale * scount_i * count_i; else cost = 1.f * (wscale * count_i) * count_i; break; @@ -1301,6 +1863,14 @@ void scheduler_reweight(struct scheduler *s, int verbose) { cost = 3.f * (wscale * gcount_i) * gcount_j; else cost = 2.f * (wscale * gcount_i) * gcount_j; + } else if (t->subtype == task_subtype_stars_density) { + if (t->ci->nodeID != nodeID) + cost = 3.f * wscale * count_i * scount_j * sid_scale[t->flags]; + else if (t->cj->nodeID != nodeID) + cost = 3.f * wscale * scount_i * count_j * sid_scale[t->flags]; + else + cost = 2.f * wscale * (scount_i * count_j + scount_j * count_i) * + sid_scale[t->flags]; } else { if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) cost = 3.f * (wscale * count_i) * count_j * sid_scale[t->flags]; @@ -1310,27 +1880,43 @@ void scheduler_reweight(struct scheduler *s, int verbose) { break; case task_type_sub_pair: - if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { - if (t->flags < 0) - cost = 3.f * (wscale * count_i) * count_j; - else - cost = 3.f * (wscale * count_i) * count_j * sid_scale[t->flags]; +#ifdef SWIFT_DEBUG_CHECKS + if (t->flags < 0) error("Negative flag value!"); +#endif + if (t->subtype == task_subtype_stars_density) { + if (t->ci->nodeID != nodeID) { + cost = 3.f * (wscale * count_i) * scount_j * sid_scale[t->flags]; + } else if (t->cj->nodeID != nodeID) { + cost = 3.f * (wscale * scount_i) * count_j * sid_scale[t->flags]; + } else { + cost = 2.f * wscale * (scount_i * count_j + scount_j * count_i) * + sid_scale[t->flags]; + } + } else { - if (t->flags < 0) - cost = 2.f * (wscale * count_i) * count_j; - else + if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { + cost = 3.f * (wscale * count_i) * count_j * sid_scale[t->flags]; + } else { cost = 2.f * (wscale * count_i) * count_j * sid_scale[t->flags]; + } } break; case task_type_sub_self: - cost = 1.f * (wscale * count_i) * count_i; + if (t->subtype == task_subtype_stars_density) { + cost = 1.f * (wscale * scount_i) * count_i; + } else { + cost = 1.f * (wscale * count_i) * count_i; + } break; case task_type_ghost: - if (t->ci == t->ci->super_hydro) cost = wscale * count_i; + if (t->ci == t->ci->hydro.super) cost = wscale * count_i; break; case task_type_extra_ghost: - if (t->ci == t->ci->super_hydro) cost = wscale * count_i; + if (t->ci == t->ci->hydro.super) cost = wscale * count_i; + break; + case task_type_stars_ghost: + if (t->ci == t->ci->hydro.super) cost = wscale * scount_i; break; case task_type_drift_part: cost = wscale * count_i; @@ -1363,12 +1949,18 @@ void scheduler_reweight(struct scheduler *s, int verbose) { cost = wscale * count_i + wscale * gcount_i; break; case task_type_send: +#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) + partcost = 0; +#endif if (count_i < 1e5) cost = 10.f * (wscale * count_i) * count_i; else cost = 2e9; break; case task_type_recv: +#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) + partcost = 0; +#endif if (count_i < 1e5) cost = 5.f * (wscale * count_i) * count_i; else @@ -1379,8 +1971,8 @@ void scheduler_reweight(struct scheduler *s, int verbose) { break; } -#if defined(WITH_MPI) && defined(HAVE_METIS) - t->cost = cost; +#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) + if (partcost) t->cost = cost; #endif t->weight += cost; } @@ -1527,20 +2119,23 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_sub_self: if (t->subtype == task_subtype_grav || t->subtype == task_subtype_external_grav) - qid = t->ci->super_gravity->owner; + qid = t->ci->grav.super->owner; else - qid = t->ci->super_hydro->owner; + qid = t->ci->hydro.super->owner; break; case task_type_sort: case task_type_ghost: case task_type_drift_part: - qid = t->ci->super_hydro->owner; + qid = t->ci->hydro.super->owner; break; case task_type_drift_gpart: - qid = t->ci->super_gravity->owner; + qid = t->ci->grav.super->owner; break; case task_type_kick1: case task_type_kick2: + case task_type_stars_ghost: + case task_type_logger: + case task_type_stars_sort: case task_type_timestep: qid = t->ci->super->owner; break; @@ -1555,30 +2150,34 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { #ifdef WITH_MPI if (t->subtype == task_subtype_tend) { t->buff = (struct pcell_step *)malloc(sizeof(struct pcell_step) * - t->ci->pcell_size); - err = MPI_Irecv( - t->buff, t->ci->pcell_size * sizeof(struct pcell_step), MPI_BYTE, - t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + t->ci->mpi.pcell_size); + err = MPI_Irecv(t->buff, + t->ci->mpi.pcell_size * sizeof(struct pcell_step), + MPI_BYTE, t->ci->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); } else if (t->subtype == task_subtype_xv || t->subtype == task_subtype_rho || t->subtype == task_subtype_gradient) { - err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, - t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + err = MPI_Irecv(t->ci->hydro.parts, t->ci->hydro.count, part_mpi_type, + t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); // message( "receiving %i parts with tag=%i from %i to %i." , - // t->ci->count , t->flags , t->ci->nodeID , s->nodeID ); + // t->ci->hydro.count , t->flags , t->ci->nodeID , s->nodeID ); // fflush(stdout); } else if (t->subtype == task_subtype_gpart) { - err = MPI_Irecv(t->ci->gparts, t->ci->gcount, gpart_mpi_type, - t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + err = MPI_Irecv(t->ci->grav.parts, t->ci->grav.count, gpart_mpi_type, + t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); } else if (t->subtype == task_subtype_spart) { - err = MPI_Irecv(t->ci->sparts, t->ci->scount, spart_mpi_type, - t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + err = MPI_Irecv(t->ci->stars.parts, t->ci->stars.count, + spart_mpi_type, t->ci->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); } else if (t->subtype == task_subtype_multipole) { t->buff = (struct gravity_tensors *)malloc( - sizeof(struct gravity_tensors) * t->ci->pcell_size); - err = MPI_Irecv( - t->buff, sizeof(struct gravity_tensors) * t->ci->pcell_size, - MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + sizeof(struct gravity_tensors) * t->ci->mpi.pcell_size); + err = MPI_Irecv(t->buff, t->ci->mpi.pcell_size, multipole_mpi_type, + t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); } else { error("Unknown communication sub-type"); } @@ -1594,50 +2193,59 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { #ifdef WITH_MPI if (t->subtype == task_subtype_tend) { t->buff = (struct pcell_step *)malloc(sizeof(struct pcell_step) * - t->ci->pcell_size); + t->ci->mpi.pcell_size); cell_pack_end_step(t->ci, (struct pcell_step *)t->buff); - if ((t->ci->pcell_size * sizeof(struct pcell_step)) > + if ((t->ci->mpi.pcell_size * sizeof(struct pcell_step)) > s->mpi_message_limit) - err = MPI_Isend( - t->buff, t->ci->pcell_size * sizeof(struct pcell_step), - MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + err = MPI_Isend(t->buff, + t->ci->mpi.pcell_size * sizeof(struct pcell_step), + MPI_BYTE, t->cj->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); else - err = MPI_Issend( - t->buff, t->ci->pcell_size * sizeof(struct pcell_step), - MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + err = MPI_Issend(t->buff, + t->ci->mpi.pcell_size * sizeof(struct pcell_step), + MPI_BYTE, t->cj->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); } else if (t->subtype == task_subtype_xv || t->subtype == task_subtype_rho || t->subtype == task_subtype_gradient) { - if ((t->ci->count * sizeof(struct part)) > s->mpi_message_limit) - err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if ((t->ci->hydro.count * sizeof(struct part)) > s->mpi_message_limit) + err = MPI_Isend(t->ci->hydro.parts, t->ci->hydro.count, + part_mpi_type, t->cj->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); else - err = MPI_Issend(t->ci->parts, t->ci->count, part_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + err = MPI_Issend(t->ci->hydro.parts, t->ci->hydro.count, + part_mpi_type, t->cj->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); // message( "sending %i parts with tag=%i from %i to %i." , - // t->ci->count , t->flags , s->nodeID , t->cj->nodeID ); + // t->ci->hydro.count , t->flags , s->nodeID , t->cj->nodeID ); // fflush(stdout); } else if (t->subtype == task_subtype_gpart) { - if ((t->ci->gcount * sizeof(struct gpart)) > s->mpi_message_limit) - err = MPI_Isend(t->ci->gparts, t->ci->gcount, gpart_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if ((t->ci->grav.count * sizeof(struct gpart)) > s->mpi_message_limit) + err = MPI_Isend(t->ci->grav.parts, t->ci->grav.count, + gpart_mpi_type, t->cj->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); else - err = MPI_Issend(t->ci->gparts, t->ci->gcount, gpart_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + err = MPI_Issend(t->ci->grav.parts, t->ci->grav.count, + gpart_mpi_type, t->cj->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); } else if (t->subtype == task_subtype_spart) { - if ((t->ci->scount * sizeof(struct spart)) > s->mpi_message_limit) - err = MPI_Isend(t->ci->sparts, t->ci->scount, spart_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if ((t->ci->stars.count * sizeof(struct spart)) > + s->mpi_message_limit) + err = MPI_Isend(t->ci->stars.parts, t->ci->stars.count, + spart_mpi_type, t->cj->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); else - err = MPI_Issend(t->ci->sparts, t->ci->scount, spart_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + err = MPI_Issend(t->ci->stars.parts, t->ci->stars.count, + spart_mpi_type, t->cj->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); } else if (t->subtype == task_subtype_multipole) { t->buff = (struct gravity_tensors *)malloc( - sizeof(struct gravity_tensors) * t->ci->pcell_size); + sizeof(struct gravity_tensors) * t->ci->mpi.pcell_size); cell_pack_multipoles(t->ci, (struct gravity_tensors *)t->buff); - err = MPI_Isend( - t->buff, t->ci->pcell_size * sizeof(struct gravity_tensors), - MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + err = MPI_Isend(t->buff, t->ci->mpi.pcell_size, multipole_mpi_type, + t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], + &t->req); } else { error("Unknown communication sub-type"); } @@ -1949,3 +2557,61 @@ void scheduler_free_tasks(struct scheduler *s) { } s->size = 0; } + +/** + * @brief write down each task level + */ +void scheduler_write_task_level(const struct scheduler *s) { + /* init */ + const int max_depth = 30; + const struct task *tasks = s->tasks; + int nr_tasks = s->nr_tasks; + + /* Init counter */ + int size = task_type_count * task_subtype_count * max_depth; + int *count = (int *)malloc(size * sizeof(int)); + if (count == NULL) error("Failed to allocate memory"); + + for (int i = 0; i < size; i++) count[i] = 0; + + /* Count tasks */ + for (int i = 0; i < nr_tasks; i++) { + const struct task *t = &tasks[i]; + if (t->ci) { + + if ((int)t->ci->depth >= max_depth) + error("Cell is too deep, you need to increase max_depth"); + + int ind = t->type * task_subtype_count * max_depth; + ind += t->subtype * max_depth; + ind += (int)t->ci->depth; + + count[ind] += 1; + } + } + + /* Open file */ + char filename[200] = "task_level.txt"; + FILE *f = fopen(filename, "w"); + if (f == NULL) error("Error opening task level file."); + + /* Print header */ + fprintf(f, "# task_type, task_subtype, depth, count\n"); + + /* Print tasks level */ + for (int i = 0; i < size; i++) { + if (count[i] == 0) continue; + + int type = i / (task_subtype_count * max_depth); + int subtype = i - task_subtype_count * max_depth * type; + subtype /= max_depth; + int depth = i - task_subtype_count * max_depth * type; + depth -= subtype * max_depth; + fprintf(f, "%s %s %i %i\n", taskID_names[type], subtaskID_names[subtype], + depth, count[i]); + } + + /* clean up */ + fclose(f); + free(count); +} diff --git a/src/scheduler.h b/src/scheduler.h index 1a75544de12b8402e553e3ae2b84e2d8a65c56e8..f1e130c6ce2a8538b0126e86ee0cbd79cf5a0e0d 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -173,5 +173,6 @@ void scheduler_print_tasks(const struct scheduler *s, const char *fileName); void scheduler_clean(struct scheduler *s); void scheduler_free_tasks(struct scheduler *s); void scheduler_write_dependencies(struct scheduler *s, int verbose); +void scheduler_write_task_level(const struct scheduler *s); #endif /* SWIFT_SCHEDULER_H */ diff --git a/src/serial_io.c b/src/serial_io.c index dafa75ab0baacb1b5ddeee34020c9773893bced7..059318df180e0d06e446f9d3f839b16439dd1b34 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -31,6 +31,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <time.h> /* This object's header. */ #include "serial_io.h" @@ -148,11 +149,41 @@ void readArray(hid_t grp, const struct io_props props, size_t N, /* message("Converting ! factor=%e", factor); */ if (io_is_double_precision(props.type)) { - double* temp_d = temp; + double* temp_d = (double*)temp; for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor; } else { - float* temp_f = temp; - for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= factor; + float* temp_f = (float*)temp; + +#ifdef SWIFT_DEBUG_CHECKS + float maximum = 0.f; + float minimum = FLT_MAX; +#endif + + /* Loop that converts the Units */ + for (size_t i = 0; i < num_elements; ++i) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Find the absolute minimum and maximum values */ + const float abstemp_f = fabsf(temp_f[i]); + if (abstemp_f != 0.f) { + maximum = max(maximum, abstemp_f); + minimum = min(minimum, abstemp_f); + } +#endif + + /* Convert the float units */ + temp_f[i] *= factor; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* The two possible errors: larger than float or smaller + * than float precision. */ + if (factor * maximum > FLT_MAX) { + error("Unit conversion results in numbers larger than floats"); + } else if (factor * minimum < FLT_MIN) { + error("Numbers smaller than float precision"); + } +#endif } } @@ -189,7 +220,7 @@ void readArray(hid_t grp, const struct io_props props, size_t N, } /* Copy temporary buffer to particle data */ - char* temp_c = temp; + char* temp_c = (char*)temp; for (size_t i = 0; i < N; ++i) memcpy(props.field + i * props.partSize, &temp_c[i * copySize], copySize); @@ -269,8 +300,9 @@ void prepareArray(const struct engine* e, hid_t grp, char* fileName, if (h_data < 0) error("Error while creating dataspace '%s'.", props.name); /* Write XMF description for this data set */ - xmf_write_line(xmfFile, fileName, partTypeGroupName, props.name, N_total, - props.dimension, props.type); + if (xmfFile != NULL) + xmf_write_line(xmfFile, fileName, partTypeGroupName, props.name, N_total, + props.dimension, props.type); /* Write unit conversion factors for this data set */ char buffer[FIELD_BUFFER_SIZE]; @@ -397,7 +429,6 @@ void writeArray(const struct engine* e, hid_t grp, char* fileName, * @param Ngas (output) The number of #part read from the file on that node. * @param Ngparts (output) The number of #gpart read from the file on that node. * @param Nstars (output) The number of #spart read from the file on that node. - * @param periodic (output) 1 if the volume is periodic, 0 if not. * @param flag_entropy (output) 1 if the ICs contained Entropy in the * InternalEnergy field * @param with_hydro Are we reading gas particles ? @@ -426,11 +457,11 @@ void writeArray(const struct engine* e, hid_t grp, char* fileName, void read_ic_serial(char* fileName, const struct unit_system* internal_units, double dim[3], struct part** parts, struct gpart** gparts, struct spart** sparts, size_t* Ngas, size_t* Ngparts, - size_t* Nstars, int* periodic, int* flag_entropy, - int with_hydro, int with_gravity, int with_stars, - int cleanup_h, int cleanup_sqrt_a, double h, double a, - int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, - int n_threads, int dry_run) { + size_t* Nstars, int* flag_entropy, int with_hydro, + int with_gravity, int with_stars, int cleanup_h, + int cleanup_sqrt_a, double h, double a, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int n_threads, + int dry_run) { hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ @@ -443,7 +474,8 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, long long offset[swift_type_count] = {0}; int dimension = 3; /* Assume 3D if nothing is specified */ size_t Ndm = 0; - struct unit_system* ic_units = malloc(sizeof(struct unit_system)); + struct unit_system* ic_units = + (struct unit_system*)malloc(sizeof(struct unit_system)); /* First read some information about the content */ if (mpi_rank == 0) { @@ -454,17 +486,6 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, if (h_file < 0) error("Error while opening file '%s' for initial read.", fileName); - /* Open header to read simulation properties */ - /* message("Reading runtime parameters..."); */ - h_grp = H5Gopen(h_file, "/RuntimePars", H5P_DEFAULT); - if (h_grp < 0) error("Error while opening runtime parameters\n"); - - /* Read the relevant information */ - io_read_attribute(h_grp, "PeriodicBoundariesOn", INT, periodic); - - /* Close runtime parameters */ - H5Gclose(h_grp); - /* Open header to read simulation properties */ /* message("Reading file header..."); */ h_grp = H5Gopen(h_file, "/Header", H5P_DEFAULT); @@ -479,6 +500,23 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, error("ICs dimensionality (%dD) does not match code dimensionality (%dD)", dimension, (int)hydro_dimension); + /* Check whether the number of files is specified (if the info exists) */ + const hid_t hid_files = H5Aexists(h_grp, "NumFilesPerSnapshot"); + int num_files = 1; + if (hid_files < 0) + error( + "Error while testing the existance of 'NumFilesPerSnapshot' " + "attribute"); + if (hid_files > 0) + io_read_attribute(h_grp, "NumFilesPerSnapshot", INT, &num_files); + if (num_files != 1) + error( + "ICs are split over multiples files (%d). SWIFT cannot handle this " + "case. The script /tools/combine_ics.py is availalbe in the " + "repository " + "to combine files into a valid input file.", + num_files); + /* Read the relevant information and print status */ int flag_entropy_temp[6]; io_read_attribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp); @@ -559,7 +597,6 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, /* Now need to broadcast that information to all ranks. */ MPI_Bcast(flag_entropy, 1, MPI_INT, 0, comm); - MPI_Bcast(periodic, 1, MPI_INT, 0, comm); MPI_Bcast(&N_total, swift_type_count, MPI_LONG_LONG_INT, 0, comm); MPI_Bcast(dim, 3, MPI_DOUBLE, 0, comm); MPI_Bcast(ic_units, sizeof(struct unit_system), MPI_BYTE, 0, comm); @@ -573,18 +610,18 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, /* Allocate memory to store SPH particles */ if (with_hydro) { *Ngas = N[0]; - if (posix_memalign((void*)parts, part_align, *Ngas * sizeof(struct part)) != - 0) + if (posix_memalign((void**)parts, part_align, + *Ngas * sizeof(struct part)) != 0) error("Error while allocating memory for SPH particles"); bzero(*parts, *Ngas * sizeof(struct part)); } - /* Allocate memory to store star particles */ + /* Allocate memory to store stars particles */ if (with_stars) { - *Nstars = N[swift_type_star]; - if (posix_memalign((void*)sparts, spart_align, + *Nstars = N[swift_type_stars]; + if (posix_memalign((void**)sparts, spart_align, *Nstars * sizeof(struct spart)) != 0) - error("Error while allocating memory for star particles"); + error("Error while allocating memory for stars particles"); bzero(*sparts, *Nstars * sizeof(struct spart)); } @@ -593,8 +630,8 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, Ndm = N[1]; *Ngparts = (with_hydro ? N[swift_type_gas] : 0) + N[swift_type_dark_matter] + - (with_stars ? N[swift_type_star] : 0); - if (posix_memalign((void*)gparts, gpart_align, + (with_stars ? N[swift_type_stars] : 0); + if (posix_memalign((void**)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); bzero(*gparts, *Ngparts * sizeof(struct gpart)); @@ -654,10 +691,10 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, } break; - case swift_type_star: + case swift_type_stars: if (with_stars) { Nparticles = *Nstars; - star_read_particles(*sparts, list, &num_fields); + stars_read_particles(*sparts, list, &num_fields); } break; @@ -699,9 +736,9 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, /* Duplicate the hydro particles into gparts */ if (with_hydro) io_duplicate_hydro_gparts(&tp, *parts, *gparts, *Ngas, Ndm); - /* Duplicate the star particles into gparts */ + /* Duplicate the stars particles into gparts */ if (with_stars) - io_duplicate_star_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); + io_duplicate_stars_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); threadpool_clean(&tp); } @@ -738,34 +775,42 @@ void write_output_serial(struct engine* e, const char* baseName, int mpi_size, MPI_Comm comm, MPI_Info info) { hid_t h_file = 0, h_grp = 0; - const size_t Ngas = e->s->nr_parts; - const size_t Nstars = e->s->nr_sparts; - const size_t Ntot = e->s->nr_gparts; int periodic = e->s->periodic; int numFiles = 1; const struct part* parts = e->s->parts; const struct xpart* xparts = e->s->xparts; const struct gpart* gparts = e->s->gparts; - struct gpart* dmparts = NULL; const struct spart* sparts = e->s->sparts; - const struct cooling_function_data* cooling = e->cooling_func; struct swift_params* params = e->parameter_file; FILE* xmfFile = 0; - /* Number of unassociated gparts */ - const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0; + /* Number of particles currently in the arrays */ + const size_t Ntot = e->s->nr_gparts; + const size_t Ngas = e->s->nr_parts; + const size_t Nstars = e->s->nr_sparts; + // const size_t Nbaryons = Ngas + Nstars; + // const size_t Ndm = Ntot > 0 ? Ntot - Nbaryons : 0; + + /* Number of particles that we will write */ + const size_t Ntot_written = e->s->nr_gparts - e->s->nr_inhibited_sparts; + const size_t Ngas_written = e->s->nr_parts - e->s->nr_inhibited_parts; + const size_t Nstars_written = e->s->nr_sparts - e->s->nr_inhibited_gparts; + const size_t Nbaryons_written = Ngas_written + Nstars_written; + const size_t Ndm_written = + Ntot_written > 0 ? Ntot_written - Nbaryons_written : 0; /* File name */ char fileName[FILENAME_BUFFER_SIZE]; - if (e->snapshot_label_delta == 1) + if (e->snapshot_int_time_label_on) + snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%06i.hdf5", baseName, + (int)round(e->time)); + else snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName, e->snapshot_output_count); - else - snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%06i.hdf5", baseName, - e->snapshot_output_count * e->snapshot_label_delta); /* Compute offset in the file and total number of particles */ - size_t N[swift_type_count] = {Ngas, Ndm, 0, 0, Nstars, 0}; + size_t N[swift_type_count] = { + Ngas_written, Ndm_written, 0, 0, Nstars_written, 0}; long long N_total[swift_type_count] = {0}; long long offset[swift_type_count] = {0}; MPI_Exscan(&N, &offset, swift_type_count, MPI_LONG_LONG_INT, MPI_SUM, comm); @@ -821,6 +866,8 @@ void write_output_serial(struct engine* e, const char* baseName, io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1); io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1); io_write_attribute_s(h_grp, "Code", "SWIFT"); + time_t tm = time(NULL); + io_write_attribute_s(h_grp, "Snapshot date", ctime(&tm)); /* GADGET-2 legacy values */ /* Number of particles of each type */ @@ -867,7 +914,7 @@ void write_output_serial(struct engine* e, const char* baseName, h_grp = H5Gcreate(h_file, "/SubgridScheme", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (h_grp < 0) error("Error while creating subgrid group"); - cooling_write_flavour(h_grp); + cooling_write_flavour(h_grp, e->cooling_func); chemistry_write_flavour(h_grp); H5Gclose(h_grp); @@ -880,6 +927,15 @@ void write_output_serial(struct engine* e, const char* baseName, H5Gclose(h_grp); } + /* Print the stellar parameters */ + if (e->policy & engine_policy_stars) { + h_grp = H5Gcreate(h_file, "/StarsScheme", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) error("Error while creating stars group"); + stars_props_print_snapshot(h_grp, e->stars_properties); + H5Gclose(h_grp); + } + /* Print the cosmological model */ h_grp = H5Gcreate(h_file, "/Cosmology", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); @@ -1000,36 +1056,99 @@ void write_output_serial(struct engine* e, const char* baseName, struct io_props list[100]; size_t Nparticles = 0; + struct part* parts_written = NULL; + struct xpart* xparts_written = NULL; + struct gpart* gparts_written = NULL; + struct spart* sparts_written = NULL; + /* Write particle fields from the particle structure */ switch (ptype) { - case swift_type_gas: - Nparticles = Ngas; - hydro_write_particles(parts, xparts, list, &num_fields); - num_fields += chemistry_write_particles(parts, list + num_fields); - num_fields += - cooling_write_particles(xparts, list + num_fields, cooling); - break; + case swift_type_gas: { + if (Ngas == Ngas_written) { + + /* No inhibted particles: easy case */ + Nparticles = Ngas; + hydro_write_particles(parts, xparts, list, &num_fields); + num_fields += chemistry_write_particles(parts, list + num_fields); + num_fields += cooling_write_particles(xparts, list + num_fields, + e->cooling_func); + } else { + + /* Ok, we need to fish out the particles we want */ + Nparticles = Ngas_written; + + /* Allocate temporary arrays */ + if (posix_memalign((void**)&parts_written, part_align, + Ngas_written * sizeof(struct part)) != 0) + error("Error while allocating temporart memory for parts"); + if (posix_memalign((void**)&xparts_written, xpart_align, + Ngas_written * sizeof(struct xpart)) != 0) + error("Error while allocating temporart memory for xparts"); + + /* Collect the particles we want to write */ + io_collect_parts_to_write(parts, xparts, parts_written, + xparts_written, Ngas, Ngas_written); + + /* Select the fields to write */ + hydro_write_particles(parts_written, xparts_written, list, + &num_fields); + num_fields += + chemistry_write_particles(parts_written, list + num_fields); + num_fields += cooling_write_particles( + xparts_written, list + num_fields, e->cooling_func); + } + } break; - case swift_type_dark_matter: - /* Allocate temporary array */ - if (posix_memalign((void*)&dmparts, gpart_align, - Ndm * sizeof(struct gpart)) != 0) - error("Error while allocating temporart memory for DM particles"); - bzero(dmparts, Ndm * sizeof(struct gpart)); - - /* Collect the DM particles from gpart */ - io_collect_dm_gparts(gparts, Ntot, dmparts, Ndm); - - /* Write DM particles */ - Nparticles = Ndm; - darkmatter_write_particles(dmparts, list, &num_fields); - break; + case swift_type_dark_matter: { + if (Ntot == Ndm_written) { - case swift_type_star: - Nparticles = Nstars; - star_write_particles(sparts, list, &num_fields); - break; + /* This is a DM-only run without inhibited particles */ + Nparticles = Ntot; + darkmatter_write_particles(gparts, list, &num_fields); + } else { + + /* Ok, we need to fish out the particles we want */ + Nparticles = Ndm_written; + + /* Allocate temporary array */ + if (posix_memalign((void**)&gparts_written, gpart_align, + Ndm_written * sizeof(struct gpart)) != 0) + error("Error while allocating temporart memory for gparts"); + + /* Collect the non-inhibited DM particles from gpart */ + io_collect_gparts_to_write(gparts, gparts_written, Ntot, + Ndm_written); + + /* Write DM particles */ + darkmatter_write_particles(gparts_written, list, &num_fields); + } + } break; + + case swift_type_stars: { + if (Nstars == Nstars_written) { + + /* No inhibted particles: easy case */ + Nparticles = Nstars; + stars_write_particles(sparts, list, &num_fields); + } else { + + /* Ok, we need to fish out the particles we want */ + Nparticles = Nstars_written; + + /* Allocate temporary arrays */ + if (posix_memalign((void**)&sparts_written, spart_align, + Nstars_written * sizeof(struct spart)) != 0) + error("Error while allocating temporart memory for sparts"); + + /* Collect the particles we want to write */ + io_collect_sparts_to_write(sparts, sparts_written, Nstars, + Nstars_written); + + /* Select the fields to write */ + stars_write_particles(sparts_written, list, &num_fields); + } + } break; default: error("Particle Type %d not yet supported. Aborting", ptype); @@ -1051,10 +1170,10 @@ void write_output_serial(struct engine* e, const char* baseName, } /* Free temporary array */ - if (dmparts) { - free(dmparts); - dmparts = 0; - } + if (parts_written) free(parts_written); + if (xparts_written) free(xparts_written); + if (gparts_written) free(gparts_written); + if (sparts_written) free(sparts_written); /* Close particle group */ H5Gclose(h_grp); diff --git a/src/serial_io.h b/src/serial_io.h index 6644e34bb32bcbd63250f25502563155eda0a293..07df76fe869fa0612bba5cf953faadd8bc63f29e 100644 --- a/src/serial_io.h +++ b/src/serial_io.h @@ -31,23 +31,30 @@ /* Includes. */ #include "engine.h" +#include "io_properties.h" #include "part.h" #include "units.h" void read_ic_serial(char* fileName, const struct unit_system* internal_units, double dim[3], struct part** parts, struct gpart** gparts, struct spart** sparts, size_t* Ngas, size_t* Ngparts, - size_t* Nstars, int* periodic, int* flag_entropy, - int with_hydro, int with_gravity, int with_stars, - int cleanup_h, int cleanup_sqrt_a, double h, double a, - int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, - int nr_threads, int dry_run); + size_t* Nstars, int* flag_entropy, int with_hydro, + int with_gravity, int with_stars, int cleanup_h, + int cleanup_sqrt_a, double h, double a, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int nr_threads, + int dry_run); void write_output_serial(struct engine* e, const char* baseName, const struct unit_system* internal_units, const struct unit_system* snapshot_units, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info); +void writeArray(const struct engine* e, hid_t grp, char* fileName, + FILE* xmfFile, char* partTypeGroupName, + const struct io_props props, size_t N, long long N_total, + int mpi_rank, long long offset, + const struct unit_system* internal_units, + const struct unit_system* snapshot_units); #endif #endif /* SWIFT_SERIAL_IO_H */ diff --git a/src/single_io.c b/src/single_io.c index a0f02878b52c89beca94d15c09ef7d456ce0a4eb..833c4a80cb2f43455d34ad3cd694255b7b19038c 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -30,6 +30,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <time.h> /* This object's header. */ #include "single_io.h" @@ -126,9 +127,40 @@ void readArray(hid_t h_grp, const struct io_props props, size_t N, if (io_is_double_precision(props.type)) { double* temp_d = (double*)temp; for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= unit_factor; + } else { float* temp_f = (float*)temp; - for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= unit_factor; + +#ifdef SWIFT_DEBUG_CHECKS + float maximum = 0.f; + float minimum = FLT_MAX; +#endif + + /* Loop that converts the Units */ + for (size_t i = 0; i < num_elements; ++i) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Find the absolute minimum and maximum values */ + const float abstemp_f = fabsf(temp_f[i]); + if (abstemp_f != 0.f) { + maximum = max(maximum, abstemp_f); + minimum = min(minimum, abstemp_f); + } +#endif + + /* Convert the float units */ + temp_f[i] *= unit_factor; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* The two possible errors: larger than float or smaller + * than float precision. */ + if (unit_factor * maximum > FLT_MAX) { + error("Unit conversion results in numbers larger than floats"); + } else if (unit_factor * minimum < FLT_MIN) { + error("Numbers smaller than float precision"); + } +#endif } } @@ -280,8 +312,9 @@ void writeArray(const struct engine* e, hid_t grp, char* fileName, if (h_err < 0) error("Error while writing data array '%s'.", props.name); /* Write XMF description for this data set */ - xmf_write_line(xmfFile, fileName, partTypeGroupName, props.name, N, - props.dimension, props.type); + if (xmfFile != NULL) + xmf_write_line(xmfFile, fileName, partTypeGroupName, props.name, N, + props.dimension, props.type); /* Write unit conversion factors for this data set */ char buffer[FIELD_BUFFER_SIZE]; @@ -313,7 +346,6 @@ void writeArray(const struct engine* e, hid_t grp, char* fileName, * @param Ngas (output) number of Gas particles read. * @param Ngparts (output) The number of #gpart read. * @param Nstars (output) The number of #spart read. - * @param periodic (output) 1 if the volume is periodic, 0 if not. * @param flag_entropy (output) 1 if the ICs contained Entropy in the * InternalEnergy field * @param with_hydro Are we reading gas particles ? @@ -338,10 +370,10 @@ void read_ic_single(const char* fileName, const struct unit_system* internal_units, double dim[3], struct part** parts, struct gpart** gparts, struct spart** sparts, size_t* Ngas, size_t* Ngparts, - size_t* Nstars, int* periodic, int* flag_entropy, - int with_hydro, int with_gravity, int with_stars, - int cleanup_h, int cleanup_sqrt_a, double h, double a, - int n_threads, int dry_run) { + size_t* Nstars, int* flag_entropy, int with_hydro, + int with_gravity, int with_stars, int cleanup_h, + int cleanup_sqrt_a, double h, double a, int n_threads, + int dry_run) { hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ @@ -358,17 +390,6 @@ void read_ic_single(const char* fileName, h_file = H5Fopen(fileName, H5F_ACC_RDONLY, H5P_DEFAULT); if (h_file < 0) error("Error while opening file '%s'.", fileName); - /* Open header to read simulation properties */ - /* message("Reading runtime parameters..."); */ - h_grp = H5Gopen(h_file, "/RuntimePars", H5P_DEFAULT); - if (h_grp < 0) error("Error while opening runtime parameters\n"); - - /* Read the relevant information */ - io_read_attribute(h_grp, "PeriodicBoundariesOn", INT, periodic); - - /* Close runtime parameters */ - H5Gclose(h_grp); - /* Open header to read simulation properties */ /* message("Reading file header..."); */ h_grp = H5Gopen(h_file, "/Header", H5P_DEFAULT); @@ -383,6 +404,21 @@ void read_ic_single(const char* fileName, error("ICs dimensionality (%dD) does not match code dimensionality (%dD)", dimension, (int)hydro_dimension); + /* Check whether the number of files is specified (if the info exists) */ + const hid_t hid_files = H5Aexists(h_grp, "NumFilesPerSnapshot"); + int num_files = 1; + if (hid_files < 0) + error( + "Error while testing the existance of 'NumFilesPerSnapshot' attribute"); + if (hid_files > 0) + io_read_attribute(h_grp, "NumFilesPerSnapshot", INT, &num_files); + if (num_files != 1) + error( + "ICs are split over multiples files (%d). SWIFT cannot handle this " + "case. The script /tools/combine_ics.py is availalbe in the repository " + "to combine files into a valid input file.", + num_files); + /* Read the relevant information and print status */ int flag_entropy_temp[6]; io_read_attribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp); @@ -470,10 +506,10 @@ void read_ic_single(const char* fileName, /* Allocate memory to store star particles */ if (with_stars) { - *Nstars = N[swift_type_star]; + *Nstars = N[swift_type_stars]; if (posix_memalign((void**)sparts, spart_align, *Nstars * sizeof(struct spart)) != 0) - error("Error while allocating memory for star particles"); + error("Error while allocating memory for stars particles"); bzero(*sparts, *Nstars * sizeof(struct spart)); } @@ -482,7 +518,7 @@ void read_ic_single(const char* fileName, Ndm = N[swift_type_dark_matter]; *Ngparts = (with_hydro ? N[swift_type_gas] : 0) + N[swift_type_dark_matter] + - (with_stars ? N[swift_type_star] : 0); + (with_stars ? N[swift_type_stars] : 0); if (posix_memalign((void**)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); @@ -531,10 +567,10 @@ void read_ic_single(const char* fileName, } break; - case swift_type_star: + case swift_type_stars: if (with_stars) { Nparticles = *Nstars; - star_read_particles(*sparts, list, &num_fields); + stars_read_particles(*sparts, list, &num_fields); } break; @@ -567,7 +603,7 @@ void read_ic_single(const char* fileName, /* Duplicate the star particles into gparts */ if (with_stars) - io_duplicate_star_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); + io_duplicate_stars_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); threadpool_clean(&tp); } @@ -602,33 +638,45 @@ void write_output_single(struct engine* e, const char* baseName, const struct unit_system* snapshot_units) { hid_t h_file = 0, h_grp = 0; - const size_t Ngas = e->s->nr_parts; - const size_t Nstars = e->s->nr_sparts; - const size_t Ntot = e->s->nr_gparts; int periodic = e->s->periodic; int numFiles = 1; const struct part* parts = e->s->parts; const struct xpart* xparts = e->s->xparts; const struct gpart* gparts = e->s->gparts; - struct gpart* dmparts = NULL; const struct spart* sparts = e->s->sparts; - const struct cooling_function_data* cooling = e->cooling_func; struct swift_params* params = e->parameter_file; - /* Number of unassociated gparts */ - const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0; - - long long N_total[swift_type_count] = { - (long long)Ngas, (long long)Ndm, 0, 0, (long long)Nstars, 0}; + /* Number of particles currently in the arrays */ + const size_t Ntot = e->s->nr_gparts; + const size_t Ngas = e->s->nr_parts; + const size_t Nstars = e->s->nr_sparts; + // const size_t Nbaryons = Ngas + Nstars; + // const size_t Ndm = Ntot > 0 ? Ntot - Nbaryons : 0; + + /* Number of particles that we will write */ + const size_t Ntot_written = e->s->nr_gparts - e->s->nr_inhibited_sparts; + const size_t Ngas_written = e->s->nr_parts - e->s->nr_inhibited_parts; + const size_t Nstars_written = e->s->nr_sparts - e->s->nr_inhibited_gparts; + const size_t Nbaryons_written = Ngas_written + Nstars_written; + const size_t Ndm_written = + Ntot_written > 0 ? Ntot_written - Nbaryons_written : 0; + + /* Format things in a Gadget-friendly array */ + long long N_total[swift_type_count] = {(long long)Ngas_written, + (long long)Ndm_written, + 0, + 0, + (long long)Nstars_written, + 0}; /* File name */ char fileName[FILENAME_BUFFER_SIZE]; - if (e->snapshot_label_delta == 1) + if (e->snapshot_int_time_label_on) + snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%06i.hdf5", baseName, + (int)round(e->time)); + else snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName, e->snapshot_output_count); - else - snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%06i.hdf5", baseName, - e->snapshot_output_count * e->snapshot_label_delta); /* First time, we need to create the XMF file */ if (e->snapshot_output_count == 0) xmf_create_file(baseName); @@ -671,6 +719,8 @@ void write_output_single(struct engine* e, const char* baseName, io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1); io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1); io_write_attribute_s(h_grp, "Code", "SWIFT"); + time_t tm = time(NULL); + io_write_attribute_s(h_grp, "Snapshot date", ctime(&tm)); /* GADGET-2 legacy values */ /* Number of particles of each type */ @@ -717,7 +767,7 @@ void write_output_single(struct engine* e, const char* baseName, h_grp = H5Gcreate(h_file, "/SubgridScheme", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (h_grp < 0) error("Error while creating subgrid group"); - cooling_write_flavour(h_grp); + cooling_write_flavour(h_grp, e->cooling_func); chemistry_write_flavour(h_grp); H5Gclose(h_grp); @@ -730,6 +780,15 @@ void write_output_single(struct engine* e, const char* baseName, H5Gclose(h_grp); } + /* Print the stellar parameters */ + if (e->policy & engine_policy_stars) { + h_grp = H5Gcreate(h_file, "/StarsScheme", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) error("Error while creating stars group"); + stars_props_print_snapshot(h_grp, e->stars_properties); + H5Gclose(h_grp); + } + /* Print the cosmological model */ h_grp = H5Gcreate(h_file, "/Cosmology", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); @@ -816,36 +875,98 @@ void write_output_single(struct engine* e, const char* baseName, struct io_props list[100]; size_t N = 0; + struct part* parts_written = NULL; + struct xpart* xparts_written = NULL; + struct gpart* gparts_written = NULL; + struct spart* sparts_written = NULL; + /* Write particle fields from the particle structure */ switch (ptype) { - case swift_type_gas: - N = Ngas; - hydro_write_particles(parts, xparts, list, &num_fields); - num_fields += chemistry_write_particles(parts, list + num_fields); - num_fields += - cooling_write_particles(xparts, list + num_fields, cooling); - break; + case swift_type_gas: { + if (Ngas == Ngas_written) { + + /* No inhibted particles: easy case */ + N = Ngas; + hydro_write_particles(parts, xparts, list, &num_fields); + num_fields += chemistry_write_particles(parts, list + num_fields); + num_fields += cooling_write_particles(xparts, list + num_fields, + e->cooling_func); + } else { + + /* Ok, we need to fish out the particles we want */ + N = Ngas_written; + + /* Allocate temporary arrays */ + if (posix_memalign((void**)&parts_written, part_align, + Ngas_written * sizeof(struct part)) != 0) + error("Error while allocating temporart memory for parts"); + if (posix_memalign((void**)&xparts_written, xpart_align, + Ngas_written * sizeof(struct xpart)) != 0) + error("Error while allocating temporart memory for xparts"); + + /* Collect the particles we want to write */ + io_collect_parts_to_write(parts, xparts, parts_written, + xparts_written, Ngas, Ngas_written); + + /* Select the fields to write */ + hydro_write_particles(parts_written, xparts_written, list, + &num_fields); + num_fields += + chemistry_write_particles(parts_written, list + num_fields); + num_fields += cooling_write_particles( + xparts_written, list + num_fields, e->cooling_func); + } + } break; - case swift_type_dark_matter: - /* Allocate temporary array */ - if (posix_memalign((void**)&dmparts, gpart_align, - Ndm * sizeof(struct gpart)) != 0) - error("Error while allocating temporart memory for DM particles"); - bzero(dmparts, Ndm * sizeof(struct gpart)); - - /* Collect the DM particles from gpart */ - io_collect_dm_gparts(gparts, Ntot, dmparts, Ndm); - - /* Write DM particles */ - N = Ndm; - darkmatter_write_particles(dmparts, list, &num_fields); - break; + case swift_type_dark_matter: { + if (Ntot == Ndm_written) { - case swift_type_star: - N = Nstars; - star_write_particles(sparts, list, &num_fields); - break; + /* This is a DM-only run without inhibited particles */ + N = Ntot; + darkmatter_write_particles(gparts, list, &num_fields); + } else { + + /* Ok, we need to fish out the particles we want */ + N = Ndm_written; + + /* Allocate temporary array */ + if (posix_memalign((void**)&gparts_written, gpart_align, + Ndm_written * sizeof(struct gpart)) != 0) + error("Error while allocating temporart memory for gparts"); + + /* Collect the non-inhibited DM particles from gpart */ + io_collect_gparts_to_write(gparts, gparts_written, Ntot, Ndm_written); + + /* Write DM particles */ + darkmatter_write_particles(gparts_written, list, &num_fields); + } + } break; + + case swift_type_stars: { + if (Nstars == Nstars_written) { + + /* No inhibted particles: easy case */ + N = Nstars; + stars_write_particles(sparts, list, &num_fields); + } else { + + /* Ok, we need to fish out the particles we want */ + N = Nstars_written; + + /* Allocate temporary arrays */ + if (posix_memalign((void**)&sparts_written, spart_align, + Nstars_written * sizeof(struct spart)) != 0) + error("Error while allocating temporart memory for sparts"); + + /* Collect the particles we want to write */ + io_collect_sparts_to_write(sparts, sparts_written, Nstars, + Nstars_written); + + /* Select the fields to write */ + stars_write_particles(sparts_written, list, &num_fields); + } + } break; default: error("Particle Type %d not yet supported. Aborting", ptype); @@ -865,11 +986,11 @@ void write_output_single(struct engine* e, const char* baseName, internal_units, snapshot_units); } - /* Free temporary array */ - if (dmparts) { - free(dmparts); - dmparts = NULL; - } + /* Free temporary arrays */ + if (parts_written) free(parts_written); + if (xparts_written) free(xparts_written); + if (gparts_written) free(gparts_written); + if (sparts_written) free(sparts_written); /* Close particle group */ H5Gclose(h_grp); diff --git a/src/single_io.h b/src/single_io.h index a0ce8370dfa1009f28e7c399b3f1db345c23de49..62285c3da210243e76347f33780146604673656f 100644 --- a/src/single_io.h +++ b/src/single_io.h @@ -26,6 +26,7 @@ /* Includes. */ #include "engine.h" +#include "io_properties.h" #include "part.h" #include "units.h" @@ -33,15 +34,21 @@ void read_ic_single(const char* fileName, const struct unit_system* internal_units, double dim[3], struct part** parts, struct gpart** gparts, struct spart** sparts, size_t* Ngas, size_t* Ndm, - size_t* Nstars, int* periodic, int* flag_entropy, - int with_hydro, int with_gravity, int with_stars, - int cleanup_h, int cleanup_sqrt_a, double h, double a, - int nr_threads, int dry_run); + size_t* Nstars, int* flag_entropy, int with_hydro, + int with_gravity, int with_stars, int cleanup_h, + int cleanup_sqrt_a, double h, double a, int nr_threads, + int dry_run); void write_output_single(struct engine* e, const char* baseName, const struct unit_system* internal_units, const struct unit_system* snapshot_units); +void writeArray(const struct engine* e, hid_t grp, char* fileName, + FILE* xmfFile, char* partTypeGroupName, + const struct io_props props, size_t N, + const struct unit_system* internal_units, + const struct unit_system* snapshot_units); + #endif /* HAVE_HDF5 && !WITH_MPI */ #endif /* SWIFT_SINGLE_IO_H */ diff --git a/src/space.c b/src/space.c index 6f98e788e9625c1cc872f59c58a8bf87b7b2cfa8..82f369a501bc1be13d27d8096acc8a57a004a580 100644 --- a/src/space.c +++ b/src/space.c @@ -66,7 +66,9 @@ int space_subsize_pair_hydro = space_subsize_pair_hydro_default; int space_subsize_self_hydro = space_subsize_self_hydro_default; int space_subsize_pair_grav = space_subsize_pair_grav_default; int space_subsize_self_grav = space_subsize_self_grav_default; -int space_subdepth_grav = space_subdepth_grav_default; +int space_subsize_pair_stars = space_subsize_pair_stars_default; +int space_subsize_self_stars = space_subsize_self_stars_default; +int space_subdepth_diff_grav = space_subdepth_diff_grav_default; int space_maxsize = space_maxsize_default; #ifdef SWIFT_DEBUG_CHECKS int last_cell_id; @@ -100,9 +102,11 @@ struct parallel_sort { */ struct index_data { struct space *s; - struct cell *cells; int *ind; int *cell_counts; + int count_inhibited_part; + int count_inhibited_gpart; + int count_inhibited_spart; }; /** @@ -133,15 +137,15 @@ void space_rebuild_recycle_rec(struct space *s, struct cell *c, *cell_rec_begin = c->progeny[k]; if (s->gravity) { - c->progeny[k]->multipole->next = *multipole_rec_begin; - *multipole_rec_begin = c->progeny[k]->multipole; + c->progeny[k]->grav.multipole->next = *multipole_rec_begin; + *multipole_rec_begin = c->progeny[k]->grav.multipole; } if (*cell_rec_end == NULL) *cell_rec_end = *cell_rec_begin; if (s->gravity && *multipole_rec_end == NULL) *multipole_rec_end = *multipole_rec_begin; - c->progeny[k]->multipole = NULL; + c->progeny[k]->grav.multipole = NULL; c->progeny[k] = NULL; } } @@ -162,64 +166,97 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, if (cell_rec_begin != NULL) space_recycle_list(s, cell_rec_begin, cell_rec_end, multipole_rec_begin, multipole_rec_end); - c->sorts = NULL; + c->hydro.sorts = NULL; + c->stars.sorts = NULL; c->nr_tasks = 0; - c->density = NULL; - c->gradient = NULL; - c->force = NULL; - c->grav = NULL; - c->dx_max_part = 0.0f; - c->dx_max_sort = 0.0f; - c->sorted = 0; - c->count = 0; - c->gcount = 0; - c->scount = 0; - c->init_grav = NULL; - c->init_grav_out = NULL; - c->extra_ghost = NULL; - c->ghost_in = NULL; - c->ghost_out = NULL; - c->ghost = NULL; + c->grav.nr_mm_tasks = 0; + c->hydro.density = NULL; + c->hydro.gradient = NULL; + c->hydro.force = NULL; + c->grav.grav = NULL; + c->grav.mm = NULL; + c->hydro.dx_max_part = 0.0f; + c->hydro.dx_max_sort = 0.0f; + c->stars.dx_max_part = 0.f; + c->stars.dx_max_sort = 0.f; + c->hydro.sorted = 0; + c->stars.sorted = 0; + c->hydro.count = 0; + c->hydro.updated = 0; + c->hydro.inhibited = 0; + c->grav.count = 0; + c->grav.updated = 0; + c->grav.inhibited = 0; + c->stars.count = 0; + c->stars.updated = 0; + c->stars.inhibited = 0; + c->grav.init = NULL; + c->grav.init_out = NULL; + c->hydro.extra_ghost = NULL; + c->hydro.ghost_in = NULL; + c->hydro.ghost_out = NULL; + c->hydro.ghost = NULL; + c->stars.ghost_in = NULL; + c->stars.ghost_out = NULL; + c->stars.ghost = NULL; + c->stars.density = NULL; c->kick1 = NULL; c->kick2 = NULL; c->timestep = NULL; c->end_force = NULL; - c->drift_part = NULL; - c->drift_gpart = NULL; - c->cooling = NULL; + c->hydro.drift = NULL; + c->grav.drift = NULL; + c->grav.drift_out = NULL; + c->hydro.cooling = NULL; c->sourceterms = NULL; - c->grav_long_range = NULL; - c->grav_down_in = NULL; - c->grav_down = NULL; - c->grav_mesh = NULL; + c->grav.long_range = NULL; + c->grav.down_in = NULL; + c->grav.down = NULL; + c->grav.mesh = NULL; c->super = c; - c->super_hydro = c; - c->super_gravity = c; - c->parts = NULL; - c->xparts = NULL; - c->gparts = NULL; - c->sparts = NULL; - c->do_sub_sort = 0; - c->do_grav_sub_drift = 0; - c->do_sub_drift = 0; - if (s->gravity) bzero(c->multipole, sizeof(struct gravity_tensors)); - for (int i = 0; i < 13; i++) - if (c->sort[i] != NULL) { - free(c->sort[i]); - c->sort[i] = NULL; + c->hydro.super = c; + c->grav.super = c; + c->hydro.parts = NULL; + c->hydro.xparts = NULL; + c->grav.parts = NULL; + c->stars.parts = NULL; + c->hydro.do_sub_sort = 0; + c->stars.do_sub_sort = 0; + c->grav.do_sub_drift = 0; + c->hydro.do_sub_drift = 0; + c->hydro.ti_end_min = -1; + c->hydro.ti_end_max = -1; + c->grav.ti_end_min = -1; + c->grav.ti_end_max = -1; + c->stars.ti_end_min = -1; +#ifdef SWIFT_DEBUG_CHECKS + c->cellID = 0; +#endif + if (s->gravity) bzero(c->grav.multipole, sizeof(struct gravity_tensors)); + for (int i = 0; i < 13; i++) { + if (c->hydro.sort[i] != NULL) { + free(c->hydro.sort[i]); + c->hydro.sort[i] = NULL; + } + if (c->stars.sort[i] != NULL) { + free(c->stars.sort[i]); + c->stars.sort[i] = NULL; } + } #if WITH_MPI - c->recv_xv = NULL; - c->recv_rho = NULL; - c->recv_gradient = NULL; - c->recv_grav = NULL; - c->recv_ti = NULL; - - c->send_xv = NULL; - c->send_rho = NULL; - c->send_gradient = NULL; - c->send_grav = NULL; - c->send_ti = NULL; + c->mpi.tag = -1; + + c->mpi.hydro.recv_xv = NULL; + c->mpi.hydro.recv_rho = NULL; + c->mpi.hydro.recv_gradient = NULL; + c->mpi.grav.recv = NULL; + c->mpi.recv_ti = NULL; + + c->mpi.hydro.send_xv = NULL; + c->mpi.hydro.send_rho = NULL; + c->mpi.hydro.send_gradient = NULL; + c->mpi.grav.send = NULL; + c->mpi.send_ti = NULL; #endif } } @@ -228,9 +265,16 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, * @brief Free up any allocated cells. */ void space_free_cells(struct space *s) { + + ticks tic = getticks(); + threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper, s->cells_top, s->nr_cells, sizeof(struct cell), 0, s); s->maxdepth = 0; + + if (s->e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -242,6 +286,7 @@ void space_free_cells(struct space *s) { void space_regrid(struct space *s, int verbose) { const size_t nr_parts = s->nr_parts; + const size_t nr_sparts = s->nr_sparts; const ticks tic = getticks(); const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0; @@ -249,17 +294,40 @@ void space_regrid(struct space *s, int verbose) { // tic = getticks(); float h_max = s->cell_min / kernel_gamma / space_stretch; if (nr_parts > 0) { - if (s->cells_top != NULL) { + + /* Can we use the list of local non-empty top-level cells? */ + if (s->local_cells_with_particles_top != NULL) { + for (int k = 0; k < s->nr_local_cells_with_particles; ++k) { + const struct cell *c = + &s->cells_top[s->local_cells_with_particles_top[k]]; + if (c->hydro.h_max > h_max) { + h_max = c->hydro.h_max; + } + if (c->stars.h_max > h_max) { + h_max = c->stars.h_max; + } + } + + /* Can we instead use all the top-level cells? */ + } else if (s->cells_top != NULL) { for (int k = 0; k < s->nr_cells; k++) { - if (s->cells_top[k].nodeID == engine_rank && - s->cells_top[k].h_max > h_max) { - h_max = s->cells_top[k].h_max; + const struct cell *c = &s->cells_top[k]; + if (c->nodeID == engine_rank && c->hydro.h_max > h_max) { + h_max = c->hydro.h_max; + } + if (c->nodeID == engine_rank && c->stars.h_max > h_max) { + h_max = c->stars.h_max; } } + + /* Last option: run through the particles */ } else { for (size_t k = 0; k < nr_parts; k++) { if (s->parts[k].h > h_max) h_max = s->parts[k].h; } + for (size_t k = 0; k < nr_sparts; k++) { + if (s->sparts[k].h > h_max) h_max = s->sparts[k].h; + } } } @@ -332,7 +400,7 @@ void space_regrid(struct space *s, int verbose) { /* Are we about to allocate new top level cells without a regrid? * Can happen when restarting the application. */ - int no_regrid = (s->cells_top == NULL && oldnodeIDs == NULL); + const int no_regrid = (s->cells_top == NULL && oldnodeIDs == NULL); #endif /* Do we need to re-build the upper-level cells? */ @@ -349,7 +417,10 @@ void space_regrid(struct space *s, int verbose) { /* Free the old cells, if they were allocated. */ if (s->cells_top != NULL) { space_free_cells(s); + free(s->local_cells_with_tasks_top); free(s->local_cells_top); + free(s->cells_with_particles_top); + free(s->local_cells_with_particles_top); free(s->cells_top); free(s->multipoles_top); } @@ -387,15 +458,35 @@ void space_regrid(struct space *s, int verbose) { error("Failed to allocate indices of local top-level cells."); bzero(s->local_cells_top, s->nr_cells * sizeof(int)); + /* Allocate the indices of local cells with tasks */ + if (posix_memalign((void **)&s->local_cells_with_tasks_top, + SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0) + error("Failed to allocate indices of local top-level cells with tasks."); + bzero(s->local_cells_with_tasks_top, s->nr_cells * sizeof(int)); + + /* Allocate the indices of cells with particles */ + if (posix_memalign((void **)&s->cells_with_particles_top, + SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0) + error("Failed to allocate indices of top-level cells with particles."); + bzero(s->cells_with_particles_top, s->nr_cells * sizeof(int)); + + /* Allocate the indices of local cells with particles */ + if (posix_memalign((void **)&s->local_cells_with_particles_top, + SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0) + error( + "Failed to allocate indices of local top-level cells with " + "particles."); + bzero(s->local_cells_with_particles_top, s->nr_cells * sizeof(int)); + /* Set the cells' locks */ for (int k = 0; k < s->nr_cells; k++) { - if (lock_init(&s->cells_top[k].lock) != 0) + if (lock_init(&s->cells_top[k].hydro.lock) != 0) error("Failed to init spinlock for hydro."); - if (lock_init(&s->cells_top[k].glock) != 0) + if (lock_init(&s->cells_top[k].grav.plock) != 0) error("Failed to init spinlock for gravity."); - if (lock_init(&s->cells_top[k].mlock) != 0) + if (lock_init(&s->cells_top[k].grav.mlock) != 0) error("Failed to init spinlock for multipoles."); - if (lock_init(&s->cells_top[k].slock) != 0) + if (lock_init(&s->cells_top[k].stars.lock) != 0) error("Failed to init spinlock for stars."); } @@ -413,16 +504,32 @@ void space_regrid(struct space *s, int verbose) { c->width[2] = s->width[2]; c->dmin = dmin; c->depth = 0; - c->count = 0; - c->gcount = 0; - c->scount = 0; + c->split = 0; + c->hydro.count = 0; + c->grav.count = 0; + c->stars.count = 0; c->super = c; - c->super_hydro = c; - c->super_gravity = c; - c->ti_old_part = ti_current; - c->ti_old_gpart = ti_current; - c->ti_old_multipole = ti_current; - if (s->gravity) c->multipole = &s->multipoles_top[cid]; + c->hydro.super = c; + c->grav.super = c; + c->hydro.ti_old_part = ti_current; + c->grav.ti_old_part = ti_current; + c->grav.ti_old_multipole = ti_current; +#ifdef WITH_MPI + c->mpi.tag = -1; + c->mpi.hydro.recv_xv = NULL; + c->mpi.hydro.recv_rho = NULL; + c->mpi.hydro.recv_gradient = NULL; + c->mpi.hydro.send_xv = NULL; + c->mpi.hydro.send_rho = NULL; + c->mpi.hydro.send_gradient = NULL; + c->mpi.grav.recv = NULL; + c->mpi.grav.send = NULL; +#endif // WITH_MPI + if (s->gravity) c->grav.multipole = &s->multipoles_top[cid]; +#ifdef SWIFT_DEBUG_CHECKS + c->cellID = -last_cell_id; + last_cell_id++; +#endif } /* Be verbose about the change. */ @@ -445,7 +552,7 @@ void space_regrid(struct space *s, int verbose) { /* Failed, try another technique that requires no settings. */ message("Failed to get a new partition, trying less optimal method"); struct partition initial_partition; -#ifdef HAVE_METIS +#if defined(HAVE_PARMETIS) || defined(HAVE_METIS) initial_partition.type = INITPART_METIS_NOWEIGHT; #else initial_partition.type = INITPART_VECTORIZE; @@ -499,10 +606,10 @@ void space_regrid(struct space *s, int verbose) { * @brief Re-build the cells as well as the tasks. * * @param s The #space in which to update the cells. + * @param repartitioned Did we just repartition? * @param verbose Print messages to stdout or not - * */ -void space_rebuild(struct space *s, int verbose) { +void space_rebuild(struct space *s, int repartitioned, int verbose) { const ticks tic = getticks(); @@ -518,6 +625,9 @@ void space_rebuild(struct space *s, int verbose) { size_t nr_parts = s->nr_parts; size_t nr_gparts = s->nr_gparts; size_t nr_sparts = s->nr_sparts; + int count_inhibited_parts = 0; + int count_inhibited_gparts = 0; + int count_inhibited_sparts = 0; struct cell *restrict cells_top = s->cells_top; const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0; @@ -531,7 +641,8 @@ void space_rebuild(struct space *s, int verbose) { if (cell_part_counts == NULL) error("Failed to allocate cell part count buffer."); if (s->size_parts > 0) - space_parts_get_cell_index(s, ind, cell_part_counts, cells_top, verbose); + space_parts_get_cell_index(s, ind, cell_part_counts, &count_inhibited_parts, + verbose); /* Run through the gravity particles and get their cell index. */ const size_t gind_size = s->size_gparts + 100; @@ -541,7 +652,8 @@ void space_rebuild(struct space *s, int verbose) { if (cell_gpart_counts == NULL) error("Failed to allocate cell gpart count buffer."); if (s->size_gparts > 0) - space_gparts_get_cell_index(s, gind, cell_gpart_counts, cells_top, verbose); + space_gparts_get_cell_index(s, gind, cell_gpart_counts, + &count_inhibited_gparts, verbose); /* Run through the star particles and get their cell index. */ const size_t sind_size = s->size_sparts + 100; @@ -551,137 +663,202 @@ void space_rebuild(struct space *s, int verbose) { if (cell_spart_counts == NULL) error("Failed to allocate cell gpart count buffer."); if (s->size_sparts > 0) - space_sparts_get_cell_index(s, sind, cell_spart_counts, cells_top, verbose); + space_sparts_get_cell_index(s, sind, cell_spart_counts, + &count_inhibited_sparts, verbose); + +#ifdef SWIFT_DEBUG_CHECKS + if (repartitioned && count_inhibited_parts) + error("We just repartitioned but still found inhibited parts."); + if (repartitioned && count_inhibited_sparts) + error("We just repartitioned but still found inhibited sparts."); + if (repartitioned && count_inhibited_gparts) + error("We just repartitioned but still found inhibited gparts."); +#endif -#ifdef WITH_MPI const int local_nodeID = s->e->nodeID; - /* Move non-local parts to the end of the list. */ - for (size_t k = 0; k < nr_parts;) { - if (cells_top[ind[k]].nodeID != local_nodeID) { - nr_parts -= 1; - /* Swap the particle */ - memswap(&s->parts[k], &s->parts[nr_parts], sizeof(struct part)); - /* Swap the link with the gpart */ - if (s->parts[k].gpart != NULL) { - s->parts[k].gpart->id_or_neg_offset = -k; - } - if (s->parts[nr_parts].gpart != NULL) { - s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts; + /* Move non-local parts and inhibited parts to the end of the list. */ + if (!repartitioned && (s->e->nr_nodes > 1 || count_inhibited_parts > 0)) { + for (size_t k = 0; k < nr_parts; /* void */) { + + /* Inhibited particle or foreign particle */ + if (ind[k] == -1 || cells_top[ind[k]].nodeID != local_nodeID) { + + /* One fewer particle */ + nr_parts -= 1; + + /* Swap the particle */ + memswap(&s->parts[k], &s->parts[nr_parts], sizeof(struct part)); + + /* Swap the link with the gpart */ + if (s->parts[k].gpart != NULL) { + s->parts[k].gpart->id_or_neg_offset = -k; + } + if (s->parts[nr_parts].gpart != NULL) { + s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts; + } + + /* Swap the xpart */ + memswap(&s->xparts[k], &s->xparts[nr_parts], sizeof(struct xpart)); + /* Swap the index */ + memswap(&ind[k], &ind[nr_parts], sizeof(int)); + + } else { + /* Increment when not exchanging otherwise we need to retest "k".*/ + k++; } - /* Swap the xpart */ - memswap(&s->xparts[k], &s->xparts[nr_parts], sizeof(struct xpart)); - /* Swap the index */ - memswap(&ind[k], &ind[nr_parts], sizeof(int)); - } else { - /* Increment when not exchanging otherwise we need to retest "k".*/ - k++; } } #ifdef SWIFT_DEBUG_CHECKS /* Check that all parts are in the correct places. */ + int check_count_inhibited_part = 0; for (size_t k = 0; k < nr_parts; k++) { - if (cells_top[ind[k]].nodeID != local_nodeID) { + if (ind[k] == -1 || cells_top[ind[k]].nodeID != local_nodeID) { error("Failed to move all non-local parts to send list"); } } for (size_t k = nr_parts; k < s->nr_parts; k++) { - if (cells_top[ind[k]].nodeID == local_nodeID) { + if (ind[k] != -1 && cells_top[ind[k]].nodeID == local_nodeID) { error("Failed to remove local parts from send list"); } + if (ind[k] == -1) ++check_count_inhibited_part; } -#endif + if (check_count_inhibited_part != count_inhibited_parts) + error("Counts of inhibited particles do not match!"); +#endif /* SWIFT_DEBUG_CHECKS */ - /* Move non-local sparts to the end of the list. */ - for (size_t k = 0; k < nr_sparts;) { - if (cells_top[sind[k]].nodeID != local_nodeID) { - nr_sparts -= 1; - /* Swap the particle */ - memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart)); - /* Swap the link with the gpart */ - if (s->sparts[k].gpart != NULL) { - s->sparts[k].gpart->id_or_neg_offset = -k; - } - if (s->sparts[nr_sparts].gpart != NULL) { - s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts; + /* Move non-local sparts and inhibited sparts to the end of the list. */ + if (!repartitioned && (s->e->nr_nodes > 1 || count_inhibited_sparts > 0)) { + for (size_t k = 0; k < nr_sparts; /* void */) { + + /* Inhibited particle or foreign particle */ + if (sind[k] == -1 || cells_top[sind[k]].nodeID != local_nodeID) { + + /* One fewer particle */ + nr_sparts -= 1; + + /* Swap the particle */ + memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart)); + + /* Swap the link with the gpart */ + if (s->sparts[k].gpart != NULL) { + s->sparts[k].gpart->id_or_neg_offset = -k; + } + if (s->sparts[nr_sparts].gpart != NULL) { + s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts; + } + + /* Swap the index */ + memswap(&sind[k], &sind[nr_sparts], sizeof(int)); + + } else { + /* Increment when not exchanging otherwise we need to retest "k".*/ + k++; } - /* Swap the index */ - memswap(&sind[k], &sind[nr_sparts], sizeof(int)); - } else { - /* Increment when not exchanging otherwise we need to retest "k".*/ - k++; } } #ifdef SWIFT_DEBUG_CHECKS - /* Check that all sparts are in the correct place (untested). */ + /* Check that all sparts are in the correct place. */ + int check_count_inhibited_spart = 0; for (size_t k = 0; k < nr_sparts; k++) { - if (cells_top[sind[k]].nodeID != local_nodeID) { + if (sind[k] == -1 || cells_top[sind[k]].nodeID != local_nodeID) { error("Failed to move all non-local sparts to send list"); } } for (size_t k = nr_sparts; k < s->nr_sparts; k++) { - if (cells_top[sind[k]].nodeID == local_nodeID) { + if (sind[k] != -1 && cells_top[sind[k]].nodeID == local_nodeID) { error("Failed to remove local sparts from send list"); } + if (sind[k] == -1) ++check_count_inhibited_spart; } -#endif + if (check_count_inhibited_spart != count_inhibited_sparts) + error("Counts of inhibited s-particles do not match!"); +#endif /* SWIFT_DEBUG_CHECKS */ - /* Move non-local gparts to the end of the list. */ - for (size_t k = 0; k < nr_gparts;) { - if (cells_top[gind[k]].nodeID != local_nodeID) { - nr_gparts -= 1; - /* Swap the particle */ - memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart)); - /* Swap the link with part/spart */ - if (s->gparts[k].type == swift_type_gas) { - s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; - } else if (s->gparts[k].type == swift_type_star) { - s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; - } - if (s->gparts[nr_gparts].type == swift_type_gas) { - s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = - &s->gparts[nr_gparts]; - } else if (s->gparts[nr_gparts].type == swift_type_star) { - s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = - &s->gparts[nr_gparts]; + /* Move non-local gparts and inhibited parts to the end of the list. */ + if (!repartitioned && (s->e->nr_nodes > 1 || count_inhibited_gparts > 0)) { + for (size_t k = 0; k < nr_gparts; /* void */) { + + /* Inhibited particle or foreign particle */ + if (gind[k] == -1 || cells_top[gind[k]].nodeID != local_nodeID) { + + /* One fewer particle */ + nr_gparts -= 1; + + /* Swap the particle */ + memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart)); + + /* Swap the link with part/spart */ + if (s->gparts[k].type == swift_type_gas) { + s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } else if (s->gparts[k].type == swift_type_stars) { + s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } + if (s->gparts[nr_gparts].type == swift_type_gas) { + s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; + } else if (s->gparts[nr_gparts].type == swift_type_stars) { + s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; + } + + /* Swap the index */ + memswap(&gind[k], &gind[nr_gparts], sizeof(int)); + } else { + /* Increment when not exchanging otherwise we need to retest "k".*/ + k++; } - /* Swap the index */ - memswap(&gind[k], &gind[nr_gparts], sizeof(int)); - } else { - /* Increment when not exchanging otherwise we need to retest "k".*/ - k++; } } #ifdef SWIFT_DEBUG_CHECKS - /* Check that all gparts are in the correct place (untested). */ + /* Check that all gparts are in the correct place. */ + int check_count_inhibited_gpart = 0; for (size_t k = 0; k < nr_gparts; k++) { - if (cells_top[gind[k]].nodeID != local_nodeID) { + if (gind[k] == -1 || cells_top[gind[k]].nodeID != local_nodeID) { error("Failed to move all non-local gparts to send list"); } } for (size_t k = nr_gparts; k < s->nr_gparts; k++) { - if (cells_top[gind[k]].nodeID == local_nodeID) { + if (gind[k] != -1 && cells_top[gind[k]].nodeID == local_nodeID) { error("Failed to remove local gparts from send list"); } + if (gind[k] == -1) ++check_count_inhibited_gpart; } -#endif + if (check_count_inhibited_gpart != count_inhibited_gparts) + error("Counts of inhibited g-particles do not match!"); +#endif /* SWIFT_DEBUG_CHECKS */ + +#ifdef WITH_MPI /* Exchange the strays, note that this potentially re-allocates - the parts arrays. */ - size_t nr_parts_exchanged = s->nr_parts - nr_parts; - size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts; - size_t nr_sparts_exchanged = s->nr_sparts - nr_sparts; - engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged, - nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged, - nr_sparts, &sind[nr_sparts], &nr_sparts_exchanged); - - /* Set the new particle counts. */ - s->nr_parts = nr_parts + nr_parts_exchanged; - s->nr_gparts = nr_gparts + nr_gparts_exchanged; - s->nr_sparts = nr_sparts + nr_sparts_exchanged; + the parts arrays. This can be skipped if we just repartitioned aspace + there should be no strays */ + if (!repartitioned) { + + size_t nr_parts_exchanged = s->nr_parts - nr_parts; + size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts; + size_t nr_sparts_exchanged = s->nr_sparts - nr_sparts; + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged, + nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged, + nr_sparts, &sind[nr_sparts], &nr_sparts_exchanged); + + /* Set the new particle counts. */ + s->nr_parts = nr_parts + nr_parts_exchanged; + s->nr_gparts = nr_gparts + nr_gparts_exchanged; + s->nr_sparts = nr_sparts + nr_sparts_exchanged; + } else { +#ifdef SWIFT_DEBUG_CHECKS + if (s->nr_parts != nr_parts) + error("Number of parts changing after repartition"); + if (s->nr_sparts != nr_sparts) + error("Number of sparts changing after repartition"); + if (s->nr_gparts != nr_gparts) + error("Number of gparts changing after repartition"); +#endif + } /* Clear non-local cell counts. */ for (int k = 0; k < s->nr_cells; k++) { @@ -743,6 +920,12 @@ void space_rebuild(struct space *s, int verbose) { } nr_sparts = s->nr_sparts; +#else /* WITH_MPI */ + + /* Update the part and spart counters */ + s->nr_parts = nr_parts; + s->nr_sparts = nr_sparts; + #endif /* WITH_MPI */ /* Sort the parts according to their cells. */ @@ -755,6 +938,9 @@ void space_rebuild(struct space *s, int verbose) { for (size_t k = 0; k < nr_parts; k++) { const struct part *p = &s->parts[k]; + if (p->time_bin == time_bin_inhibited) + error("Inhibited particle sorted into a cell!"); + /* New cell index */ const int new_ind = cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1], @@ -771,7 +957,7 @@ void space_rebuild(struct space *s, int verbose) { p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2]) error("part not sorted into the right top-level cell!"); } -#endif +#endif /* SWIFT_DEBUG_CHECKS */ /* Sort the sparts according to their cells. */ if (nr_sparts > 0) @@ -782,6 +968,9 @@ void space_rebuild(struct space *s, int verbose) { for (size_t k = 0; k < nr_sparts; k++) { const struct spart *sp = &s->sparts[k]; + if (sp->time_bin == time_bin_inhibited) + error("Inhibited particle sorted into a cell!"); + /* New cell index */ const int new_sind = cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1], @@ -798,14 +987,14 @@ void space_rebuild(struct space *s, int verbose) { sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2]) error("spart not sorted into the right top-level cell!"); } -#endif +#endif /* SWIFT_DEBUG_CHECKS */ /* Extract the cell counts from the sorted indices. */ size_t last_index = 0; ind[nr_parts] = s->nr_cells; // sentinel. for (size_t k = 0; k < nr_parts; k++) { if (ind[k] < ind[k + 1]) { - cells_top[ind[k]].count = k - last_index + 1; + cells_top[ind[k]].hydro.count = k - last_index + 1; last_index = k + 1; } } @@ -815,7 +1004,7 @@ void space_rebuild(struct space *s, int verbose) { sind[nr_sparts] = s->nr_cells; // sentinel. for (size_t k = 0; k < nr_sparts; k++) { if (sind[k] < sind[k + 1]) { - cells_top[sind[k]].scount = k - last_sindex + 1; + cells_top[sind[k]].stars.count = k - last_sindex + 1; last_sindex = k + 1; } } @@ -852,8 +1041,18 @@ void space_rebuild(struct space *s, int verbose) { } nr_gparts = s->nr_gparts; +#else /* WITH_MPI */ + + /* Update the gpart counter */ + s->nr_gparts = nr_gparts; + #endif /* WITH_MPI */ + /* Mark that there are no inhibited particles left */ + s->nr_inhibited_parts = 0; + s->nr_inhibited_gparts = 0; + s->nr_inhibited_sparts = 0; + /* Sort the gparts according to their cells. */ if (nr_gparts > 0) space_gparts_sort(s->gparts, s->parts, s->sparts, gind, cell_gpart_counts, @@ -864,6 +1063,9 @@ void space_rebuild(struct space *s, int verbose) { for (size_t k = 0; k < nr_gparts; k++) { const struct gpart *gp = &s->gparts[k]; + if (gp->time_bin == time_bin_inhibited) + error("Inhibited particle sorted into a cell!"); + /* New cell index */ const int new_gind = cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1], @@ -880,14 +1082,14 @@ void space_rebuild(struct space *s, int verbose) { gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2]) error("gpart not sorted into the right top-level cell!"); } -#endif +#endif /* SWIFT_DEBUG_CHECKS */ /* Extract the cell counts from the sorted indices. */ size_t last_gindex = 0; gind[nr_gparts] = s->nr_cells; for (size_t k = 0; k < nr_gparts; k++) { if (gind[k] < gind[k + 1]) { - cells_top[gind[k]].gcount = k - last_gindex + 1; + cells_top[gind[k]].grav.count = k - last_gindex + 1; last_gindex = k + 1; } } @@ -903,40 +1105,70 @@ void space_rebuild(struct space *s, int verbose) { nr_sparts, verbose); #endif - /* Hook the cells up to the parts. */ - // tic = getticks(); + /* Hook the cells up to the parts. Make list of local and non-empty cells */ + ticks tic2 = getticks(); struct part *finger = s->parts; struct xpart *xfinger = s->xparts; struct gpart *gfinger = s->gparts; struct spart *sfinger = s->sparts; + s->nr_cells_with_particles = 0; + s->nr_local_cells_with_particles = 0; + s->nr_local_cells = 0; for (int k = 0; k < s->nr_cells; k++) { struct cell *restrict c = &cells_top[k]; - c->ti_old_part = ti_current; - c->ti_old_gpart = ti_current; - c->ti_old_multipole = ti_current; - if (c->nodeID == engine_rank) { - c->parts = finger; - c->xparts = xfinger; - c->gparts = gfinger; - c->sparts = sfinger; - finger = &finger[c->count]; - xfinger = &xfinger[c->count]; - gfinger = &gfinger[c->gcount]; - sfinger = &sfinger[c->scount]; + c->hydro.ti_old_part = ti_current; + c->grav.ti_old_part = ti_current; + c->grav.ti_old_multipole = ti_current; + +#ifdef SWIFT_DEBUG_CHECKS + c->cellID = -last_cell_id; + last_cell_id++; +#endif + + const int is_local = (c->nodeID == engine_rank); + const int has_particles = + (c->hydro.count > 0) || (c->grav.count > 0) || (c->stars.count > 0); + + if (is_local) { + c->hydro.parts = finger; + c->hydro.xparts = xfinger; + c->grav.parts = gfinger; + c->stars.parts = sfinger; + finger = &finger[c->hydro.count]; + xfinger = &xfinger[c->hydro.count]; + gfinger = &gfinger[c->grav.count]; + sfinger = &sfinger[c->stars.count]; + + /* Add this cell to the list of local cells */ + s->local_cells_top[s->nr_local_cells] = k; + s->nr_local_cells++; + } + + if (is_local && has_particles) { + + /* Add this cell to the list of non-empty cells */ + s->local_cells_with_particles_top[s->nr_local_cells_with_particles] = k; + s->nr_local_cells_with_particles++; } } - // message( "hooking up cells took %.3f %s." , - // clocks_from_ticks(getticks() - tic), clocks_getunit()); + if (verbose) { + message("Have %d local top-level cells with particles (total=%d)", + s->nr_local_cells_with_particles, s->nr_cells); + message("Have %d local top-level cells (total=%d)", s->nr_local_cells, + s->nr_cells); + message("hooking up cells took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + } /* At this point, we have the upper-level cells, old or new. Now make sure that the parts in each cell are ok. */ - space_split(s, cells_top, s->nr_cells, verbose); + space_split(s, verbose); #ifdef SWIFT_DEBUG_CHECKS /* Check that the multipole construction went OK */ if (s->gravity) for (int k = 0; k < s->nr_cells; k++) - cell_check_multipole(&s->cells_top[k], NULL); + cell_check_multipole(&s->cells_top[k]); #endif /* Clean up any stray sort indices in the cell buffer. */ @@ -948,22 +1180,21 @@ void space_rebuild(struct space *s, int verbose) { } /** - * @brief Split particles between cells of a hierarchy + * @brief Split particles between cells of a hierarchy. * * This is done in parallel using threads in the #threadpool. + * Only do this for the local non-empty top-level cells. * * @param s The #space. - * @param cells The cell hierarchy. - * @param nr_cells The number of cells. * @param verbose Are we talkative ? */ -void space_split(struct space *s, struct cell *cells, int nr_cells, - int verbose) { +void space_split(struct space *s, int verbose) { const ticks tic = getticks(); - threadpool_map(&s->e->threadpool, space_split_mapper, cells, nr_cells, - sizeof(struct cell), 0, s); + threadpool_map(&s->e->threadpool, space_split_mapper, + s->local_cells_with_particles_top, + s->nr_local_cells_with_particles, sizeof(int), 0, s); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -1033,6 +1264,7 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, /* Init the local collectors */ float min_mass = FLT_MAX; float sum_vel_norm = 0.f; + int count_inhibited_part = 0; /* Loop over the parts. */ for (int k = 0; k < nr_parts; k++) { @@ -1064,8 +1296,15 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, pos_z); #endif - ind[k] = index; - cell_counts[index]++; + /* Is this particle to be removed? */ + if (p->time_bin == time_bin_inhibited) { + ind[k] = -1; + ++count_inhibited_part; + } else { + /* List its top-level cell index */ + ind[k] = index; + cell_counts[index]++; + } /* Compute minimal mass */ min_mass = min(min_mass, hydro_get_mass(p)); @@ -1084,6 +1323,9 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, if (cell_counts[k]) atomic_add(&data->cell_counts[k], cell_counts[k]); free(cell_counts); + /* Write the count of inhibited parts */ + atomic_add(&data->count_inhibited_part, count_inhibited_part); + /* Write back the minimal part mass and velocity sum */ atomic_min_f(&s->min_part_mass, min_mass); atomic_add_f(&s->sum_part_vel_norm, sum_vel_norm); @@ -1122,6 +1364,7 @@ void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts, /* Init the local collectors */ float min_mass = FLT_MAX; float sum_vel_norm = 0.f; + int count_inhibited_gpart = 0; for (int k = 0; k < nr_gparts; k++) { @@ -1152,12 +1395,22 @@ void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts, pos_z); #endif - ind[k] = index; - cell_counts[index]++; + /* Is this particle to be removed? */ + if (gp->time_bin == time_bin_inhibited) { + ind[k] = -1; + ++count_inhibited_gpart; + } else { + /* List its top-level cell index */ + ind[k] = index; + cell_counts[index]++; + } - /* Compute minimal mass */ if (gp->type == swift_type_dark_matter) { + + /* Compute minimal mass */ min_mass = min(min_mass, gp->mass); + + /* Compute sum of velocity norm */ sum_vel_norm += gp->v_full[0] * gp->v_full[0] + gp->v_full[1] * gp->v_full[1] + gp->v_full[2] * gp->v_full[2]; @@ -1174,6 +1427,9 @@ void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts, if (cell_counts[k]) atomic_add(&data->cell_counts[k], cell_counts[k]); free(cell_counts); + /* Write the count of inhibited gparts */ + atomic_add(&data->count_inhibited_gpart, count_inhibited_gpart); + /* Write back the minimal part mass and velocity sum */ atomic_min_f(&s->min_gpart_mass, min_mass); atomic_add_f(&s->sum_gpart_vel_norm, sum_vel_norm); @@ -1212,6 +1468,7 @@ void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, /* Init the local collectors */ float min_mass = FLT_MAX; float sum_vel_norm = 0.f; + int count_inhibited_spart = 0; for (int k = 0; k < nr_sparts; k++) { @@ -1242,8 +1499,15 @@ void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, pos_z); #endif - ind[k] = index; - cell_counts[index]++; + /* Is this particle to be removed? */ + if (sp->time_bin == time_bin_inhibited) { + ind[k] = -1; + ++count_inhibited_spart; + } else { + /* List its top-level cell index */ + ind[k] = index; + cell_counts[index]++; + } /* Compute minimal mass */ min_mass = min(min_mass, sp->mass); @@ -1263,6 +1527,9 @@ void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, if (cell_counts[k]) atomic_add(&data->cell_counts[k], cell_counts[k]); free(cell_counts); + /* Write the count of inhibited parts */ + atomic_add(&data->count_inhibited_spart, count_inhibited_spart); + /* Write back the minimal part mass and velocity sum */ atomic_min_f(&s->min_spart_mass, min_mass); atomic_add_f(&s->sum_spart_vel_norm, sum_vel_norm); @@ -1276,11 +1543,11 @@ void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, * @param s The #space. * @param ind The array of indices to fill. * @param cell_counts The cell counters to update. - * @param cells The array of #cell to update. + * @param count_inhibited_parts (return) The number of #part to remove. * @param verbose Are we talkative ? */ void space_parts_get_cell_index(struct space *s, int *ind, int *cell_counts, - struct cell *cells, int verbose) { + int *count_inhibited_parts, int verbose) { const ticks tic = getticks(); @@ -1291,13 +1558,17 @@ void space_parts_get_cell_index(struct space *s, int *ind, int *cell_counts, /* Pack the extra information */ struct index_data data; data.s = s; - data.cells = cells; data.ind = ind; data.cell_counts = cell_counts; + data.count_inhibited_part = 0; + data.count_inhibited_gpart = 0; + data.count_inhibited_spart = 0; threadpool_map(&s->e->threadpool, space_parts_get_cell_index_mapper, s->parts, s->nr_parts, sizeof(struct part), 0, &data); + *count_inhibited_parts = data.count_inhibited_part; + if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -1311,11 +1582,11 @@ void space_parts_get_cell_index(struct space *s, int *ind, int *cell_counts, * @param s The #space. * @param gind The array of indices to fill. * @param cell_counts The cell counters to update. - * @param cells The array of #cell to update. + * @param count_inhibited_gparts (return) The number of #gpart to remove. * @param verbose Are we talkative ? */ void space_gparts_get_cell_index(struct space *s, int *gind, int *cell_counts, - struct cell *cells, int verbose) { + int *count_inhibited_gparts, int verbose) { const ticks tic = getticks(); @@ -1326,13 +1597,17 @@ void space_gparts_get_cell_index(struct space *s, int *gind, int *cell_counts, /* Pack the extra information */ struct index_data data; data.s = s; - data.cells = cells; data.ind = gind; data.cell_counts = cell_counts; + data.count_inhibited_part = 0; + data.count_inhibited_gpart = 0; + data.count_inhibited_spart = 0; threadpool_map(&s->e->threadpool, space_gparts_get_cell_index_mapper, s->gparts, s->nr_gparts, sizeof(struct gpart), 0, &data); + *count_inhibited_gparts = data.count_inhibited_gpart; + if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -1346,11 +1621,11 @@ void space_gparts_get_cell_index(struct space *s, int *gind, int *cell_counts, * @param s The #space. * @param sind The array of indices to fill. * @param cell_counts The cell counters to update. - * @param cells The array of #cell to update. + * @param count_inhibited_sparts (return) The number of #spart to remove. * @param verbose Are we talkative ? */ void space_sparts_get_cell_index(struct space *s, int *sind, int *cell_counts, - struct cell *cells, int verbose) { + int *count_inhibited_sparts, int verbose) { const ticks tic = getticks(); @@ -1361,13 +1636,17 @@ void space_sparts_get_cell_index(struct space *s, int *sind, int *cell_counts, /* Pack the extra information */ struct index_data data; data.s = s; - data.cells = cells; data.ind = sind; data.cell_counts = cell_counts; + data.count_inhibited_part = 0; + data.count_inhibited_gpart = 0; + data.count_inhibited_spart = 0; threadpool_map(&s->e->threadpool, space_sparts_get_cell_index_mapper, s->sparts, s->nr_sparts, sizeof(struct spart), 0, &data); + *count_inhibited_sparts = data.count_inhibited_spart; + if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -1384,8 +1663,9 @@ void space_sparts_get_cell_index(struct space *s, int *sind, int *cell_counts, * @param num_bins Total number of bins (length of count). * @param parts_offset Offset of the #part array from the global #part array. */ -void space_parts_sort(struct part *parts, struct xpart *xparts, int *ind, - int *counts, int num_bins, ptrdiff_t parts_offset) { +void space_parts_sort(struct part *parts, struct xpart *xparts, + int *restrict ind, int *restrict counts, int num_bins, + ptrdiff_t parts_offset) { /* Create the offsets array. */ size_t *offsets = NULL; if (posix_memalign((void **)&offsets, SWIFT_STRUCT_ALIGNMENT, @@ -1446,8 +1726,9 @@ void space_parts_sort(struct part *parts, struct xpart *xparts, int *ind, * @param sparts_offset Offset of the #spart array from the global #spart. * array. */ -void space_sparts_sort(struct spart *sparts, int *ind, int *counts, - int num_bins, ptrdiff_t sparts_offset) { +void space_sparts_sort(struct spart *sparts, int *restrict ind, + int *restrict counts, int num_bins, + ptrdiff_t sparts_offset) { /* Create the offsets array. */ size_t *offsets = NULL; if (posix_memalign((void **)&offsets, SWIFT_STRUCT_ALIGNMENT, @@ -1506,8 +1787,8 @@ void space_sparts_sort(struct spart *sparts, int *ind, int *counts, * @param num_bins Total number of bins (length of counts). */ void space_gparts_sort(struct gpart *gparts, struct part *parts, - struct spart *sparts, int *ind, int *counts, - int num_bins) { + struct spart *sparts, int *restrict ind, + int *restrict counts, int num_bins) { /* Create the offsets array. */ size_t *offsets = NULL; if (posix_memalign((void **)&offsets, SWIFT_STRUCT_ALIGNMENT, @@ -1538,7 +1819,7 @@ void space_gparts_sort(struct gpart *gparts, struct part *parts, memswap(&ind[j], &target_cid, sizeof(int)); if (gparts[j].type == swift_type_gas) { parts[-gparts[j].id_or_neg_offset].gpart = &gparts[j]; - } else if (gparts[j].type == swift_type_star) { + } else if (gparts[j].type == swift_type_stars) { sparts[-gparts[j].id_or_neg_offset].gpart = &gparts[j]; } } @@ -1546,7 +1827,7 @@ void space_gparts_sort(struct gpart *gparts, struct part *parts, ind[k] = target_cid; if (gparts[k].type == swift_type_gas) { parts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; - } else if (gparts[k].type == swift_type_star) { + } else if (gparts[k].type == swift_type_stars) { sparts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; } } @@ -1566,11 +1847,16 @@ void space_gparts_sort(struct gpart *gparts, struct part *parts, */ void space_map_clearsort(struct cell *c, void *data) { - for (int i = 0; i < 13; i++) - if (c->sort[i] != NULL) { - free(c->sort[i]); - c->sort[i] = NULL; + for (int i = 0; i < 13; i++) { + if (c->hydro.sort[i] != NULL) { + free(c->hydro.sort[i]); + c->hydro.sort[i] = NULL; + } + if (c->stars.sort[i] != NULL) { + free(c->stars.sort[i]); + c->stars.sort[i] = NULL; } + } } /** @@ -1586,7 +1872,7 @@ static void rec_map_parts(struct cell *c, void *data) { /* No progeny? */ if (!c->split) - for (int k = 0; k < c->count; k++) fun(&c->parts[k], c, data); + for (int k = 0; k < c->hydro.count; k++) fun(&c->hydro.parts[k], c, data); /* Otherwise, recurse. */ else @@ -1622,7 +1908,8 @@ static void rec_map_parts_xparts(struct cell *c, /* No progeny? */ if (!c->split) - for (int k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c); + for (int k = 0; k < c->hydro.count; k++) + fun(&c->hydro.parts[k], &c->hydro.xparts[k], c); /* Otherwise, recurse. */ else @@ -1718,31 +2005,33 @@ void space_map_cells_pre(struct space *s, int full, * @param s The #space in which the cell lives. * @param c The #cell to split recursively. * @param buff A buffer for particle sorting, should be of size at least - * c->count or @c NULL. + * c->hydro.count or @c NULL. * @param sbuff A buffer for particle sorting, should be of size at least - * c->scount or @c NULL. + * c->stars.count or @c NULL. * @param gbuff A buffer for particle sorting, should be of size at least - * c->gcount or @c NULL. + * c->grav.count or @c NULL. */ void space_split_recursive(struct space *s, struct cell *c, struct cell_buff *buff, struct cell_buff *sbuff, struct cell_buff *gbuff) { - const int count = c->count; - const int gcount = c->gcount; - const int scount = c->scount; + const int count = c->hydro.count; + const int gcount = c->grav.count; + const int scount = c->stars.count; const int with_gravity = s->gravity; const int depth = c->depth; int maxdepth = 0; float h_max = 0.0f; + float stars_h_max = 0.f; integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; - struct part *parts = c->parts; - struct gpart *gparts = c->gparts; - struct spart *sparts = c->sparts; - struct xpart *xparts = c->xparts; + integertime_t ti_stars_end_min = max_nr_timesteps; + struct part *parts = c->hydro.parts; + struct gpart *gparts = c->grav.parts; + struct spart *sparts = c->stars.parts; + struct xpart *xparts = c->hydro.xparts; struct engine *e = s->e; const integertime_t ti_current = e->ti_current; @@ -1754,6 +2043,10 @@ void space_split_recursive(struct space *s, struct cell *c, sizeof(struct cell_buff) * count) != 0) error("Failed to allocate temporary indices."); for (int k = 0; k < count; k++) { +#ifdef SWIFT_DEBUG_CHECKS + if (parts[k].time_bin == time_bin_inhibited) + error("Inhibited particle present in space_split()"); +#endif buff[k].x[0] = parts[k].x[0]; buff[k].x[1] = parts[k].x[1]; buff[k].x[2] = parts[k].x[2]; @@ -1764,6 +2057,10 @@ void space_split_recursive(struct space *s, struct cell *c, sizeof(struct cell_buff) * gcount) != 0) error("Failed to allocate temporary indices."); for (int k = 0; k < gcount; k++) { +#ifdef SWIFT_DEBUG_CHECKS + if (gparts[k].time_bin == time_bin_inhibited) + error("Inhibited particle present in space_split()"); +#endif gbuff[k].x[0] = gparts[k].x[0]; gbuff[k].x[1] = gparts[k].x[1]; gbuff[k].x[2] = gparts[k].x[2]; @@ -1774,6 +2071,10 @@ void space_split_recursive(struct space *s, struct cell *c, sizeof(struct cell_buff) * scount) != 0) error("Failed to allocate temporary indices."); for (int k = 0; k < scount; k++) { +#ifdef SWIFT_DEBUG_CHECKS + if (sparts[k].time_bin == time_bin_inhibited) + error("Inhibited particle present in space_split()"); +#endif sbuff[k].x[0] = sparts[k].x[0]; sbuff[k].x[1] = sparts[k].x[1]; sbuff[k].x[2] = sparts[k].x[2]; @@ -1804,12 +2105,12 @@ void space_split_recursive(struct space *s, struct cell *c, space_getcells(s, 8, c->progeny); for (int k = 0; k < 8; k++) { struct cell *cp = c->progeny[k]; - cp->count = 0; - cp->gcount = 0; - cp->scount = 0; - cp->ti_old_part = c->ti_old_part; - cp->ti_old_gpart = c->ti_old_gpart; - cp->ti_old_multipole = c->ti_old_multipole; + cp->hydro.count = 0; + cp->grav.count = 0; + cp->stars.count = 0; + cp->hydro.ti_old_part = c->hydro.ti_old_part; + cp->grav.ti_old_part = c->grav.ti_old_part; + cp->grav.ti_old_multipole = c->grav.ti_old_multipole; cp->loc[0] = c->loc[0]; cp->loc[1] = c->loc[1]; cp->loc[2] = c->loc[2]; @@ -1822,25 +2123,32 @@ void space_split_recursive(struct space *s, struct cell *c, if (k & 1) cp->loc[2] += cp->width[2]; cp->depth = c->depth + 1; cp->split = 0; - cp->h_max = 0.f; - cp->dx_max_part = 0.f; - cp->dx_max_sort = 0.f; + cp->hydro.h_max = 0.f; + cp->hydro.dx_max_part = 0.f; + cp->hydro.dx_max_sort = 0.f; + cp->stars.h_max = 0.f; + cp->stars.dx_max_part = 0.f; + cp->stars.dx_max_sort = 0.f; cp->nodeID = c->nodeID; cp->parent = c; cp->super = NULL; - cp->super_hydro = NULL; - cp->super_gravity = NULL; - cp->do_sub_sort = 0; - cp->do_grav_sub_drift = 0; - cp->do_sub_drift = 0; + cp->hydro.super = NULL; + cp->grav.super = NULL; + cp->hydro.do_sub_sort = 0; + cp->stars.do_sub_sort = 0; + cp->grav.do_sub_drift = 0; + cp->hydro.do_sub_drift = 0; +#ifdef WITH_MPI + cp->mpi.tag = -1; +#endif // WITH_MPI #ifdef SWIFT_DEBUG_CHECKS cp->cellID = last_cell_id++; #endif } /* Split the cell's partcle data. */ - cell_split(c, c->parts - s->parts, c->sparts - s->sparts, buff, sbuff, - gbuff); + cell_split(c, c->hydro.parts - s->parts, c->stars.parts - s->sparts, buff, + sbuff, gbuff); /* Buffers for the progenitors */ struct cell_buff *progeny_buff = buff, *progeny_gbuff = gbuff, @@ -1852,7 +2160,7 @@ void space_split_recursive(struct space *s, struct cell *c, struct cell *cp = c->progeny[k]; /* Remove any progeny with zero particles. */ - if (cp->count == 0 && cp->gcount == 0 && cp->scount == 0) { + if (cp->hydro.count == 0 && cp->grav.count == 0 && cp->stars.count == 0) { space_recycle(s, cp); c->progeny[k] = NULL; @@ -1864,18 +2172,20 @@ void space_split_recursive(struct space *s, struct cell *c, progeny_gbuff); /* Update the pointers in the buffers */ - progeny_buff += cp->count; - progeny_gbuff += cp->gcount; - progeny_sbuff += cp->scount; + progeny_buff += cp->hydro.count; + progeny_gbuff += cp->grav.count; + progeny_sbuff += cp->stars.count; /* Update the cell-wide properties */ - h_max = max(h_max, cp->h_max); - ti_hydro_end_min = min(ti_hydro_end_min, cp->ti_hydro_end_min); - ti_hydro_end_max = max(ti_hydro_end_max, cp->ti_hydro_end_max); - ti_hydro_beg_max = max(ti_hydro_beg_max, cp->ti_hydro_beg_max); - ti_gravity_end_min = min(ti_gravity_end_min, cp->ti_gravity_end_min); - ti_gravity_end_max = max(ti_gravity_end_max, cp->ti_gravity_end_max); - ti_gravity_beg_max = max(ti_gravity_beg_max, cp->ti_gravity_beg_max); + h_max = max(h_max, cp->hydro.h_max); + stars_h_max = max(h_max, cp->stars.h_max); + ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min); + ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max); + ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max); + ti_gravity_end_min = min(ti_gravity_end_min, cp->grav.ti_end_min); + ti_gravity_end_max = max(ti_gravity_end_max, cp->grav.ti_end_max); + ti_gravity_beg_max = max(ti_gravity_beg_max, cp->grav.ti_beg_max); + ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); /* Increase the depth */ if (cp->maxdepth > maxdepth) maxdepth = cp->maxdepth; @@ -1886,7 +2196,7 @@ void space_split_recursive(struct space *s, struct cell *c, if (s->gravity) { /* Reset everything */ - gravity_reset(c->multipole); + gravity_reset(c->grav.multipole); /* Compute CoM and bulk velocity from all progenies */ double CoM[3] = {0., 0., 0.}; @@ -1897,7 +2207,7 @@ void space_split_recursive(struct space *s, struct cell *c, for (int k = 0; k < 8; ++k) { if (c->progeny[k] != NULL) { - const struct gravity_tensors *m = c->progeny[k]->multipole; + const struct gravity_tensors *m = c->progeny[k]->grav.multipole; mass += m->m_pole.M_000; @@ -1921,20 +2231,20 @@ void space_split_recursive(struct space *s, struct cell *c, /* Final operation on the CoM and bulk velocity */ const double inv_mass = 1. / mass; - c->multipole->CoM[0] = CoM[0] * inv_mass; - c->multipole->CoM[1] = CoM[1] * inv_mass; - c->multipole->CoM[2] = CoM[2] * inv_mass; - c->multipole->m_pole.vel[0] = vel[0] * inv_mass; - c->multipole->m_pole.vel[1] = vel[1] * inv_mass; - c->multipole->m_pole.vel[2] = vel[2] * inv_mass; + c->grav.multipole->CoM[0] = CoM[0] * inv_mass; + c->grav.multipole->CoM[1] = CoM[1] * inv_mass; + c->grav.multipole->CoM[2] = CoM[2] * inv_mass; + c->grav.multipole->m_pole.vel[0] = vel[0] * inv_mass; + c->grav.multipole->m_pole.vel[1] = vel[1] * inv_mass; + c->grav.multipole->m_pole.vel[2] = vel[2] * inv_mass; /* Min max velocity along each axis */ - c->multipole->m_pole.max_delta_vel[0] = max_delta_vel[0]; - c->multipole->m_pole.max_delta_vel[1] = max_delta_vel[1]; - c->multipole->m_pole.max_delta_vel[2] = max_delta_vel[2]; - c->multipole->m_pole.min_delta_vel[0] = min_delta_vel[0]; - c->multipole->m_pole.min_delta_vel[1] = min_delta_vel[1]; - c->multipole->m_pole.min_delta_vel[2] = min_delta_vel[2]; + c->grav.multipole->m_pole.max_delta_vel[0] = max_delta_vel[0]; + c->grav.multipole->m_pole.max_delta_vel[1] = max_delta_vel[1]; + c->grav.multipole->m_pole.max_delta_vel[2] = max_delta_vel[2]; + c->grav.multipole->m_pole.min_delta_vel[0] = min_delta_vel[0]; + c->grav.multipole->m_pole.min_delta_vel[1] = min_delta_vel[1]; + c->grav.multipole->m_pole.min_delta_vel[2] = min_delta_vel[2]; /* Now shift progeny multipoles and add them up */ struct multipole temp; @@ -1942,45 +2252,52 @@ void space_split_recursive(struct space *s, struct cell *c, for (int k = 0; k < 8; ++k) { if (c->progeny[k] != NULL) { const struct cell *cp = c->progeny[k]; - const struct multipole *m = &cp->multipole->m_pole; + const struct multipole *m = &cp->grav.multipole->m_pole; /* Contribution to multipole */ - gravity_M2M(&temp, m, c->multipole->CoM, cp->multipole->CoM); - gravity_multipole_add(&c->multipole->m_pole, &temp); + gravity_M2M(&temp, m, c->grav.multipole->CoM, + cp->grav.multipole->CoM); + gravity_multipole_add(&c->grav.multipole->m_pole, &temp); /* Upper limit of max CoM<->gpart distance */ - const double dx = c->multipole->CoM[0] - cp->multipole->CoM[0]; - const double dy = c->multipole->CoM[1] - cp->multipole->CoM[1]; - const double dz = c->multipole->CoM[2] - cp->multipole->CoM[2]; + const double dx = + c->grav.multipole->CoM[0] - cp->grav.multipole->CoM[0]; + const double dy = + c->grav.multipole->CoM[1] - cp->grav.multipole->CoM[1]; + const double dz = + c->grav.multipole->CoM[2] - cp->grav.multipole->CoM[2]; const double r2 = dx * dx + dy * dy + dz * dz; - r_max = max(r_max, cp->multipole->r_max + sqrt(r2)); + r_max = max(r_max, cp->grav.multipole->r_max + sqrt(r2)); } } /* Alternative upper limit of max CoM<->gpart distance */ - const double dx = c->multipole->CoM[0] > c->loc[0] + c->width[0] / 2. - ? c->multipole->CoM[0] - c->loc[0] - : c->loc[0] + c->width[0] - c->multipole->CoM[0]; - const double dy = c->multipole->CoM[1] > c->loc[1] + c->width[1] / 2. - ? c->multipole->CoM[1] - c->loc[1] - : c->loc[1] + c->width[1] - c->multipole->CoM[1]; - const double dz = c->multipole->CoM[2] > c->loc[2] + c->width[2] / 2. - ? c->multipole->CoM[2] - c->loc[2] - : c->loc[2] + c->width[2] - c->multipole->CoM[2]; + const double dx = + c->grav.multipole->CoM[0] > c->loc[0] + c->width[0] / 2. + ? c->grav.multipole->CoM[0] - c->loc[0] + : c->loc[0] + c->width[0] - c->grav.multipole->CoM[0]; + const double dy = + c->grav.multipole->CoM[1] > c->loc[1] + c->width[1] / 2. + ? c->grav.multipole->CoM[1] - c->loc[1] + : c->loc[1] + c->width[1] - c->grav.multipole->CoM[1]; + const double dz = + c->grav.multipole->CoM[2] > c->loc[2] + c->width[2] / 2. + ? c->grav.multipole->CoM[2] - c->loc[2] + : c->loc[2] + c->width[2] - c->grav.multipole->CoM[2]; /* Take minimum of both limits */ - c->multipole->r_max = min(r_max, sqrt(dx * dx + dy * dy + dz * dz)); + c->grav.multipole->r_max = min(r_max, sqrt(dx * dx + dy * dy + dz * dz)); /* Store the value at rebuild time */ - c->multipole->r_max_rebuild = c->multipole->r_max; - c->multipole->CoM_rebuild[0] = c->multipole->CoM[0]; - c->multipole->CoM_rebuild[1] = c->multipole->CoM[1]; - c->multipole->CoM_rebuild[2] = c->multipole->CoM[2]; + c->grav.multipole->r_max_rebuild = c->grav.multipole->r_max; + c->grav.multipole->CoM_rebuild[0] = c->grav.multipole->CoM[0]; + c->grav.multipole->CoM_rebuild[1] = c->grav.multipole->CoM[1]; + c->grav.multipole->CoM_rebuild[2] = c->grav.multipole->CoM[2]; /* We know the first-order multipole (dipole) is 0. */ - c->multipole->m_pole.M_100 = 0.f; - c->multipole->m_pole.M_010 = 0.f; - c->multipole->m_pole.M_001 = 0.f; + c->grav.multipole->m_pole.M_100 = 0.f; + c->grav.multipole->m_pole.M_010 = 0.f; + c->grav.multipole->m_pole.M_001 = 0.f; } /* Deal with gravity */ } /* Split or let it be? */ @@ -1995,6 +2312,7 @@ void space_split_recursive(struct space *s, struct cell *c, timebin_t hydro_time_bin_min = num_time_bins, hydro_time_bin_max = 0; timebin_t gravity_time_bin_min = num_time_bins, gravity_time_bin_max = 0; + timebin_t stars_time_bin_min = num_time_bins; /* parts: Get dt_min/dt_max and h_max. */ for (int k = 0; k < count; k++) { @@ -2032,6 +2350,14 @@ void space_split_recursive(struct space *s, struct cell *c, #endif gravity_time_bin_min = min(gravity_time_bin_min, sparts[k].time_bin); gravity_time_bin_max = max(gravity_time_bin_max, sparts[k].time_bin); + stars_time_bin_min = min(stars_time_bin_min, sparts[k].time_bin); + + stars_h_max = max(stars_h_max, sparts[k].h); + + /* Reset x_diff */ + sparts[k].x_diff[0] = 0.f; + sparts[k].x_diff[1] = 0.f; + sparts[k].x_diff[2] = 0.f; } /* Convert into integer times */ @@ -2043,55 +2369,58 @@ void space_split_recursive(struct space *s, struct cell *c, ti_gravity_end_max = get_integer_time_end(ti_current, gravity_time_bin_max); ti_gravity_beg_max = get_integer_time_begin(ti_current + 1, gravity_time_bin_max); + ti_stars_end_min = get_integer_time_end(ti_current, stars_time_bin_min); /* Construct the multipole and the centre of mass*/ if (s->gravity) { if (gcount > 0) { - gravity_P2M(c->multipole, c->gparts, c->gcount); + gravity_P2M(c->grav.multipole, c->grav.parts, c->grav.count); } else { /* No gparts in that leaf cell */ /* Set the values to something sensible */ - gravity_multipole_init(&c->multipole->m_pole); + gravity_multipole_init(&c->grav.multipole->m_pole); if (c->nodeID == engine_rank) { - c->multipole->CoM[0] = c->loc[0] + c->width[0] / 2.; - c->multipole->CoM[1] = c->loc[1] + c->width[1] / 2.; - c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.; - c->multipole->r_max = 0.; + c->grav.multipole->CoM[0] = c->loc[0] + c->width[0] / 2.; + c->grav.multipole->CoM[1] = c->loc[1] + c->width[1] / 2.; + c->grav.multipole->CoM[2] = c->loc[2] + c->width[2] / 2.; + c->grav.multipole->r_max = 0.; } } /* Store the value at rebuild time */ - c->multipole->r_max_rebuild = c->multipole->r_max; - c->multipole->CoM_rebuild[0] = c->multipole->CoM[0]; - c->multipole->CoM_rebuild[1] = c->multipole->CoM[1]; - c->multipole->CoM_rebuild[2] = c->multipole->CoM[2]; + c->grav.multipole->r_max_rebuild = c->grav.multipole->r_max; + c->grav.multipole->CoM_rebuild[0] = c->grav.multipole->CoM[0]; + c->grav.multipole->CoM_rebuild[1] = c->grav.multipole->CoM[1]; + c->grav.multipole->CoM_rebuild[2] = c->grav.multipole->CoM[2]; } } /* Set the values for this cell. */ - c->h_max = h_max; - c->ti_hydro_end_min = ti_hydro_end_min; - c->ti_hydro_end_max = ti_hydro_end_max; - c->ti_hydro_beg_max = ti_hydro_beg_max; - c->ti_gravity_end_min = ti_gravity_end_min; - c->ti_gravity_end_max = ti_gravity_end_max; - c->ti_gravity_beg_max = ti_gravity_beg_max; + c->hydro.h_max = h_max; + c->hydro.ti_end_min = ti_hydro_end_min; + c->hydro.ti_end_max = ti_hydro_end_max; + c->hydro.ti_beg_max = ti_hydro_beg_max; + c->grav.ti_end_min = ti_gravity_end_min; + c->grav.ti_end_max = ti_gravity_end_max; + c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.ti_end_min = ti_stars_end_min; + c->stars.h_max = stars_h_max; c->maxdepth = maxdepth; /* Set ownership according to the start of the parts array. */ if (s->nr_parts > 0) - c->owner = - ((c->parts - s->parts) % s->nr_parts) * s->nr_queues / s->nr_parts; + c->owner = ((c->hydro.parts - s->parts) % s->nr_parts) * s->nr_queues / + s->nr_parts; else if (s->nr_sparts > 0) - c->owner = - ((c->sparts - s->sparts) % s->nr_sparts) * s->nr_queues / s->nr_sparts; + c->owner = ((c->stars.parts - s->sparts) % s->nr_sparts) * s->nr_queues / + s->nr_sparts; else if (s->nr_gparts > 0) - c->owner = - ((c->gparts - s->gparts) % s->nr_gparts) * s->nr_queues / s->nr_gparts; + c->owner = ((c->grav.parts - s->gparts) % s->nr_gparts) * s->nr_queues / + s->nr_gparts; else c->owner = 0; /* Ok, there is really nothing on this rank... */ @@ -2115,10 +2444,12 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) { /* Unpack the inputs. */ struct space *s = (struct space *)extra_data; - struct cell *restrict cells_top = (struct cell *)map_data; + struct cell *cells_top = s->cells_top; + int *local_cells_with_particles = (int *)map_data; + /* Loop over the non-empty cells */ for (int ind = 0; ind < num_cells; ind++) { - struct cell *c = &cells_top[ind]; + struct cell *c = &cells_top[local_cells_with_particles[ind]]; space_split_recursive(s, c, NULL, NULL, NULL); } @@ -2126,8 +2457,8 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) { /* All cells and particles should have consistent h_max values. */ for (int ind = 0; ind < num_cells; ind++) { int depth = 0; - if (!checkCellhdxmax(&cells_top[ind], &depth)) - message(" at cell depth %d", depth); + const struct cell *c = &cells_top[local_cells_with_particles[ind]]; + if (!checkCellhdxmax(c, &depth)) message(" at cell depth %d", depth); } #endif } @@ -2141,8 +2472,8 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) { void space_recycle(struct space *s, struct cell *c) { /* Clear the cell. */ - if (lock_destroy(&c->lock) != 0 || lock_destroy(&c->glock) != 0 || - lock_destroy(&c->mlock) != 0 || lock_destroy(&c->slock) != 0) + if (lock_destroy(&c->lock) != 0 || lock_destroy(&c->grav.plock) != 0 || + lock_destroy(&c->mlock) != 0 || lock_destroy(&c->stars.lock) != 0) error("Failed to destroy spinlocks."); /* Lock the space. */ @@ -2150,8 +2481,8 @@ void space_recycle(struct space *s, struct cell *c) { /* Hook the multipole back in the buffer */ if (s->gravity) { - c->multipole->next = s->multipoles_sub; - s->multipoles_sub = c->multipole; + c->grav.multipole->next = s->multipoles_sub; + s->multipoles_sub = c->grav.multipole; } /* Hook this cell into the buffer. */ @@ -2190,8 +2521,8 @@ void space_recycle_list(struct space *s, struct cell *cell_list_begin, /* Clean up the list of cells. */ for (struct cell *c = cell_list_begin; c != NULL; c = c->next) { /* Clear the cell. */ - if (lock_destroy(&c->lock) != 0 || lock_destroy(&c->glock) != 0 || - lock_destroy(&c->mlock) != 0 || lock_destroy(&c->slock) != 0) + if (lock_destroy(&c->lock) != 0 || lock_destroy(&c->grav.plock) != 0 || + lock_destroy(&c->mlock) != 0 || lock_destroy(&c->stars.lock) != 0) error("Failed to destroy spinlocks."); /* Count this cell. */ @@ -2270,8 +2601,8 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { /* Hook the multipole */ if (s->gravity) { - cells[j]->multipole = s->multipoles_sub; - s->multipoles_sub = cells[j]->multipole->next; + cells[j]->grav.multipole = s->multipoles_sub; + s->multipoles_sub = cells[j]->grav.multipole->next; } } @@ -2280,14 +2611,18 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { /* Init some things in the cell we just got. */ for (int j = 0; j < nr_cells; j++) { - for (int k = 0; k < 13; k++) - if (cells[j]->sort[k] != NULL) free(cells[j]->sort[k]); - struct gravity_tensors *temp = cells[j]->multipole; + for (int k = 0; k < 13; k++) { + if (cells[j]->hydro.sort[k] != NULL) free(cells[j]->hydro.sort[k]); + if (cells[j]->stars.sort[k] != NULL) free(cells[j]->stars.sort[k]); + } + struct gravity_tensors *temp = cells[j]->grav.multipole; bzero(cells[j], sizeof(struct cell)); - cells[j]->multipole = temp; + cells[j]->grav.multipole = temp; cells[j]->nodeID = -1; - if (lock_init(&cells[j]->lock) != 0 || lock_init(&cells[j]->glock) != 0 || - lock_init(&cells[j]->mlock) != 0 || lock_init(&cells[j]->slock) != 0) + if (lock_init(&cells[j]->hydro.lock) != 0 || + lock_init(&cells[j]->grav.plock) != 0 || + lock_init(&cells[j]->grav.mlock) != 0 || + lock_init(&cells[j]->stars.lock) != 0) error("Failed to initialize cell spinlocks."); } } @@ -2300,33 +2635,62 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { void space_free_buff_sort_indices(struct space *s) { for (struct cell *finger = s->cells_sub; finger != NULL; finger = finger->next) { - for (int k = 0; k < 13; k++) - if (finger->sort[k] != NULL) { - free(finger->sort[k]); - finger->sort[k] = NULL; + for (int k = 0; k < 13; k++) { + if (finger->hydro.sort[k] != NULL) { + free(finger->hydro.sort[k]); + finger->hydro.sort[k] = NULL; } + if (finger->stars.sort[k] != NULL) { + free(finger->stars.sort[k]); + finger->stars.sort[k] = NULL; + } + } } } /** * @brief Construct the list of top-level cells that have any tasks in - * their hierarchy. + * their hierarchy on this MPI rank. Also construct the list of top-level + * cells on any rank that have > 0 particles (of any kind). * * This assumes the list has been pre-allocated at a regrid. * * @param s The #space. */ -void space_list_cells_with_tasks(struct space *s) { +void space_list_useful_top_level_cells(struct space *s) { - /* Let's rebuild the list of local top-level cells */ - s->nr_local_cells = 0; - for (int i = 0; i < s->nr_cells; ++i) - if (cell_has_tasks(&s->cells_top[i])) { - s->local_cells_top[s->nr_local_cells] = i; - s->nr_local_cells++; + const ticks tic = getticks(); + + s->nr_local_cells_with_tasks = 0; + s->nr_cells_with_particles = 0; + + for (int i = 0; i < s->nr_cells; ++i) { + struct cell *c = &s->cells_top[i]; + + if (cell_has_tasks(c)) { + s->local_cells_with_tasks_top[s->nr_local_cells_with_tasks] = i; + s->nr_local_cells_with_tasks++; } + + const int has_particles = + (c->hydro.count > 0) || (c->grav.count > 0) || (c->stars.count > 0) || + (c->grav.multipole != NULL && c->grav.multipole->m_pole.M_000 > 0.f); + + if (has_particles) { + s->cells_with_particles_top[s->nr_cells_with_particles] = i; + s->nr_cells_with_particles++; + } + } + if (s->e->verbose) { + message("Have %d local top-level cells with tasks (total=%d)", + s->nr_local_cells_with_tasks, s->nr_cells); + message("Have %d top-level cells with particles (total=%d)", + s->nr_cells_with_particles, s->nr_cells); + } + if (s->e->verbose) - message("Have %d local cells (total=%d)", s->nr_local_cells, s->nr_cells); + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } void space_synchronize_particle_positions_mapper(void *map_data, int nr_gparts, @@ -2359,7 +2723,7 @@ void space_synchronize_particle_positions_mapper(void *map_data, int nr_gparts, xp->v_full[2] = gp->v_full[2]; } - else if (gp->type == swift_type_star) { + else if (gp->type == swift_type_stars) { /* Get it's stellar friend */ struct spart *sp = &s->sparts[-gp->id_or_neg_offset]; @@ -2374,11 +2738,17 @@ void space_synchronize_particle_positions_mapper(void *map_data, int nr_gparts, void space_synchronize_particle_positions(struct space *s) { + const ticks tic = getticks(); + if ((s->nr_gparts > 0 && s->nr_parts > 0) || (s->nr_gparts > 0 && s->nr_sparts > 0)) threadpool_map(&s->e->threadpool, space_synchronize_particle_positions_mapper, s->gparts, s->nr_gparts, sizeof(struct gpart), 0, (void *)s); + + if (s->e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } void space_first_init_parts_mapper(void *restrict map_data, int count, @@ -2399,7 +2769,6 @@ void space_first_init_parts_mapper(void *restrict map_data, int count, const struct hydro_props *hydro_props = s->e->hydro_properties; const float u_init = hydro_props->initial_internal_energy; - const float u_min = hydro_props->minimal_internal_energy; const struct chemistry_global_data *chemistry = e->chemistry; const struct cooling_function_data *cool_func = e->cooling_func; @@ -2425,10 +2794,12 @@ void space_first_init_parts_mapper(void *restrict map_data, int count, for (int k = 0; k < count; k++) { hydro_first_init_part(&p[k], &xp[k]); +#ifdef WITH_LOGGER + logger_part_data_init(&xp[k].logger_data); +#endif /* Overwrite the internal energy? */ if (u_init > 0.f) hydro_set_init_internal_energy(&p[k], u_init); - if (u_min > 0.f) hydro_set_init_internal_energy(&p[k], u_min); /* Also initialise the chemistry */ chemistry_first_init_part(phys_const, us, cosmo, chemistry, &p[k], &xp[k]); @@ -2558,7 +2929,7 @@ void space_first_init_sparts_mapper(void *restrict map_data, int count, /* Initialise the rest */ for (int k = 0; k < count; k++) { - star_first_init_spart(&sp[k]); + stars_first_init_spart(&sp[k]); #ifdef SWIFT_DEBUG_CHECKS if (sp[k].gpart && sp[k].gpart->id_or_neg_offset != -(k + delta)) @@ -2574,7 +2945,7 @@ void space_first_init_sparts_mapper(void *restrict map_data, int count, /** * @brief Initialises all the s-particles by setting them into a valid state * - * Calls star_first_init_spart() on all the particles + * Calls stars_first_init_spart() on all the particles */ void space_first_init_sparts(struct space *s, int verbose) { const ticks tic = getticks(); @@ -2639,16 +3010,43 @@ void space_init_gparts(struct space *s, int verbose) { clocks_getunit()); } +void space_init_sparts_mapper(void *restrict map_data, int scount, + void *restrict extra_data) { + + struct spart *restrict sparts = (struct spart *)map_data; + for (int k = 0; k < scount; k++) stars_init_spart(&sparts[k]); +} + +/** + * @brief Calls the #spart initialisation function on all particles in the + * space. + * + * @param s The #space. + * @param verbose Are we talkative? + */ +void space_init_sparts(struct space *s, int verbose) { + + const ticks tic = getticks(); + + if (s->nr_sparts > 0) + threadpool_map(&s->e->threadpool, space_init_sparts_mapper, s->sparts, + s->nr_sparts, sizeof(struct spart), 0, NULL); + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + void space_convert_quantities_mapper(void *restrict map_data, int count, void *restrict extra_data) { struct space *s = (struct space *)extra_data; const struct cosmology *cosmo = s->e->cosmology; + const struct hydro_props *hydro_props = s->e->hydro_properties; struct part *restrict parts = (struct part *)map_data; const ptrdiff_t index = parts - s->parts; struct xpart *restrict xparts = s->xparts + index; for (int k = 0; k < count; k++) - hydro_convert_quantities(&parts[k], &xparts[k], cosmo); + hydro_convert_quantities(&parts[k], &xparts[k], cosmo, hydro_props); } /** @@ -2680,13 +3078,14 @@ void space_convert_quantities(struct space *s, int verbose) { * @param dim Spatial dimensions of the domain. * @param parts Array of Gas particles. * @param gparts Array of Gravity particles. - * @param sparts Array of star particles. + * @param sparts Array of stars particles. * @param Npart The number of Gas particles in the space. * @param Ngpart The number of Gravity particles in the space. - * @param Nspart The number of star particles in the space. + * @param Nspart The number of stars particles in the space. * @param periodic flag whether the domain is periodic or not. * @param replicate How many replications along each direction do we want? * @param generate_gas_in_ics Are we generating gas particles from the gparts? + * @param hydro flag whether we are doing hydro or not? * @param self_gravity flag whether we are doing gravity or not? * @param verbose Print messages to stdout or not. * @param dry_run If 1, just initialise stuff, don't do anything with the parts. @@ -2700,8 +3099,8 @@ void space_init(struct space *s, struct swift_params *params, const struct cosmology *cosmo, double dim[3], struct part *parts, struct gpart *gparts, struct spart *sparts, size_t Npart, size_t Ngpart, size_t Nspart, int periodic, - int replicate, int generate_gas_in_ics, int self_gravity, - int verbose, int dry_run) { + int replicate, int generate_gas_in_ics, int hydro, + int self_gravity, int verbose, int dry_run) { /* Clean-up everything */ bzero(s, sizeof(struct space)); @@ -2712,6 +3111,7 @@ void space_init(struct space *s, struct swift_params *params, s->dim[2] = dim[2]; s->periodic = periodic; s->gravity = self_gravity; + s->hydro = hydro; s->nr_parts = Npart; s->size_parts = Npart; s->parts = parts; @@ -2731,7 +3131,7 @@ void space_init(struct space *s, struct swift_params *params, /* Are we generating gas from the DM-only ICs? */ if (generate_gas_in_ics) { - space_generate_gas(s, cosmo, verbose); + space_generate_gas(s, cosmo, periodic, dim, verbose); parts = s->parts; gparts = s->gparts; Npart = s->nr_parts; @@ -2791,19 +3191,28 @@ void space_init(struct space *s, struct swift_params *params, space_subsize_self_grav = parser_get_opt_param_int(params, "Scheduler:cell_sub_size_self_grav", space_subsize_self_grav_default); + space_subsize_pair_stars = + parser_get_opt_param_int(params, "Scheduler:cell_sub_size_pair_stars", + space_subsize_pair_stars_default); + space_subsize_self_stars = + parser_get_opt_param_int(params, "Scheduler:cell_sub_size_self_stars", + space_subsize_self_stars_default); space_splitsize = parser_get_opt_param_int( params, "Scheduler:cell_split_size", space_splitsize_default); - space_subdepth_grav = parser_get_opt_param_int( - params, "Scheduler:cell_subdepth_grav", space_subdepth_grav_default); + space_subdepth_diff_grav = + parser_get_opt_param_int(params, "Scheduler:cell_subdepth_diff_grav", + space_subdepth_diff_grav_default); if (verbose) { message("max_size set to %d split_size set to %d", space_maxsize, space_splitsize); - message("subdepth_grav set to %d", space_subdepth_grav); + message("subdepth_grav set to %d", space_subdepth_diff_grav); message("sub_size_pair_hydro set to %d, sub_size_self_hydro set to %d", space_subsize_pair_hydro, space_subsize_self_hydro); message("sub_size_pair_grav set to %d, sub_size_self_grav set to %d", space_subsize_pair_grav, space_subsize_self_grav); + message("sub_size_pair_stars set to %d, sub_size_self_stars set to %d", + space_subsize_pair_stars, space_subsize_self_stars); } /* Apply h scaling */ @@ -2895,6 +3304,10 @@ void space_init(struct space *s, struct swift_params *params, /* Init the space lock. */ if (lock_init(&s->lock) != 0) error("Failed to create space spin-lock."); +#ifdef SWIFT_DEBUG_CHECKS + last_cell_id = 1; +#endif + /* Build the cells recursively. */ if (!dry_run) space_regrid(s, verbose); } @@ -3021,8 +3434,30 @@ void space_replicate(struct space *s, int replicate, int verbose) { #endif } +/** + * @brief Duplicate all the dark matter particles to create the same number + * of gas particles with mass ratios given by the cosmology. + * + * Note that this function alters the dark matter particle masses and positions. + * Velocities are unchanged. We also leave the thermodynamic properties of the + * gas un-initialised as they will be given a value from the parameter file at a + * later stage. + * + * @param s The #space to create the particles in. + * @param cosmo The current #cosmology model. + * @param periodic Are we using periodic boundary conditions? + * @param dim The size of the box (for periodic wrapping). + * @param verbose Are we talkative? + */ void space_generate_gas(struct space *s, const struct cosmology *cosmo, - int verbose) { + int periodic, const double dim[3], int verbose) { + + /* Check that this is a sensible ting to do */ + if (!s->hydro) + error( + "Cannot generate gas from ICs if we are running without " + "hydrodynamics. Need to run with -s and the corresponding " + "hydrodynamics parameters in the YAML file."); if (verbose) message("Generating gas particles from gparts"); @@ -3057,7 +3492,7 @@ void space_generate_gas(struct space *s, const struct cosmology *cosmo, /* Compute some constants */ const double mass_ratio = cosmo->Omega_b / cosmo->Omega_m; - const double bg_density = cosmo->Omega_m * cosmo->critical_density; + const double bg_density = cosmo->Omega_m * cosmo->critical_density_0; const double bg_density_inv = 1. / bg_density; /* Update the particle properties */ @@ -3071,9 +3506,11 @@ void space_generate_gas(struct space *s, const struct cosmology *cosmo, p->id = gp_gas->id_or_neg_offset * 2 + 1; gp_dm->id_or_neg_offset *= 2; - if (gp_dm->id_or_neg_offset <= 0) error("DM particle ID overflowd"); + if (gp_dm->id_or_neg_offset < 0) + error("DM particle ID overflowd (DM id=%lld gas id=%lld)", + gp_dm->id_or_neg_offset, p->id); - if (p->id <= 0) error("gas particle ID overflowd"); + if (p->id < 0) error("gas particle ID overflowd (id=%lld)", p->id); /* Set the links correctly */ p->gpart = gp_gas; @@ -3082,8 +3519,8 @@ void space_generate_gas(struct space *s, const struct cosmology *cosmo, /* Compute positions shift */ const double d = cbrt(gp_dm->mass * bg_density_inv); - const double shift_dm = d * mass_ratio; - const double shift_gas = d * (1. - mass_ratio); + const double shift_dm = 0.5 * d * mass_ratio; + const double shift_gas = 0.5 * d * (1. - mass_ratio); /* Set the masses */ gp_dm->mass *= (1. - mass_ratio); @@ -3094,20 +3531,37 @@ void space_generate_gas(struct space *s, const struct cosmology *cosmo, gp_dm->x[0] += shift_dm; gp_dm->x[1] += shift_dm; gp_dm->x[2] += shift_dm; - gp_gas->x[0] += shift_gas; - gp_gas->x[1] += shift_gas; - gp_gas->x[2] += shift_gas; + gp_gas->x[0] -= shift_gas; + gp_gas->x[1] -= shift_gas; + gp_gas->x[2] -= shift_gas; + + /* Make sure the positions are identical between linked particles */ p->x[0] = gp_gas->x[0]; p->x[1] = gp_gas->x[1]; p->x[2] = gp_gas->x[2]; + /* Box-wrap the whole thing to be safe */ + if (periodic) { + gp_dm->x[0] = box_wrap(gp_dm->x[0], 0., dim[0]); + gp_dm->x[1] = box_wrap(gp_dm->x[1], 0., dim[1]); + gp_dm->x[2] = box_wrap(gp_dm->x[2], 0., dim[2]); + gp_gas->x[0] = box_wrap(gp_gas->x[0], 0., dim[0]); + gp_gas->x[1] = box_wrap(gp_gas->x[1], 0., dim[1]); + gp_gas->x[2] = box_wrap(gp_gas->x[2], 0., dim[2]); + p->x[0] = box_wrap(p->x[0], 0., dim[0]); + p->x[1] = box_wrap(p->x[1], 0., dim[1]); + p->x[2] = box_wrap(p->x[2], 0., dim[2]); + } + /* Also copy the velocities */ p->v[0] = gp_gas->v_full[0]; p->v[1] = gp_gas->v_full[1]; p->v[2] = gp_gas->v_full[2]; /* Set the smoothing length to the mean inter-particle separation */ - p->h = 30. * d; + p->h = d; + + /* Note that the thermodynamic properties (u, S, ...) will be set later */ } /* Replace the content of the space */ @@ -3256,6 +3710,9 @@ void space_clean(struct space *s) { free(s->cells_top); free(s->multipoles_top); free(s->local_cells_top); + free(s->local_cells_with_tasks_top); + free(s->cells_with_particles_top); + free(s->local_cells_with_particles_top); free(s->parts); free(s->xparts); free(s->gparts); @@ -3307,7 +3764,12 @@ void space_struct_restore(struct space *s, FILE *stream) { s->multipoles_top = NULL; s->multipoles_sub = NULL; s->local_cells_top = NULL; + s->local_cells_with_tasks_top = NULL; + s->cells_with_particles_top = NULL; + s->local_cells_with_particles_top = NULL; s->grav_top_level = NULL; + s->nr_local_cells_with_tasks = 0; + s->nr_cells_with_particles = 0; #ifdef WITH_MPI s->parts_foreign = NULL; s->size_parts_foreign = 0; @@ -3355,7 +3817,7 @@ void space_struct_restore(struct space *s, FILE *stream) { NULL, "sparts"); } - /* Need to reconnect the gravity parts to their hydro and star particles. */ + /* Need to reconnect the gravity parts to their hydro and stars particles. */ /* Re-link the parts. */ if (s->nr_parts > 0 && s->nr_gparts > 0) part_relink_parts_to_gparts(s->gparts, s->nr_gparts, s->parts); diff --git a/src/space.h b/src/space.h index e3173ece1e2749a3afb8072b179150587a100a82..e6d774200be1a31d622419dceafb16b3826ce177 100644 --- a/src/space.h +++ b/src/space.h @@ -48,7 +48,9 @@ struct cosmology; #define space_subsize_self_hydro_default 32000 #define space_subsize_pair_grav_default 256000000 #define space_subsize_self_grav_default 32000 -#define space_subdepth_grav_default 2 +#define space_subsize_pair_stars_default 256000000 +#define space_subsize_self_stars_default 32000 +#define space_subdepth_diff_grav_default 4 #define space_max_top_level_cells_default 12 #define space_stretch 1.10f #define space_maxreldx 0.1f @@ -63,7 +65,9 @@ extern int space_subsize_pair_hydro; extern int space_subsize_self_hydro; extern int space_subsize_pair_grav; extern int space_subsize_self_grav; -extern int space_subdepth_grav; +extern int space_subsize_pair_stars; +extern int space_subsize_self_stars; +extern int space_subdepth_diff_grav; /** * @brief The space in which the cells and particles reside. @@ -79,6 +83,9 @@ struct space { /*! Extra space information needed for some hydro schemes. */ struct hydro_space hs; + /*! Are we doing hydrodynamics? */ + int hydro; + /*! Are we doing gravity? */ int gravity; @@ -106,9 +113,18 @@ struct space { /*! Total number of cells (top- and sub-) */ int tot_cells; - /*! Number of *local* top-level cells with tasks */ + /*! Number of *local* top-level cells */ int nr_local_cells; + /*! Number of *local* top-level cells with tasks */ + int nr_local_cells_with_tasks; + + /*! Number of top-level cells that have >0 particle (of any kind) */ + int nr_cells_with_particles; + + /*! Number of top-level cells that have >0 particle (of any kind) */ + int nr_local_cells_with_particles; + /*! The (level 0) cells themselves. */ struct cell *cells_top; @@ -121,9 +137,18 @@ struct space { /*! Buffer of unused multipoles for the sub-cells. */ struct gravity_tensors *multipoles_sub; - /*! The indices of the *local* top-level cells with tasks */ + /*! The indices of the *local* top-level cells */ int *local_cells_top; + /*! The indices of the *local* top-level cells with tasks */ + int *local_cells_with_tasks_top; + + /*! The indices of the top-level cells that have >0 particles (of any kind) */ + int *cells_with_particles_top; + + /*! The indices of the top-level cells that have >0 particles (of any kind) */ + int *local_cells_with_particles_top; + /*! The total number of parts in the space. */ size_t nr_parts, size_parts; @@ -133,6 +158,15 @@ struct space { /*! The total number of g-parts in the space. */ size_t nr_sparts, size_sparts; + /*! Number of inhibted gas particles in the space */ + size_t nr_inhibited_parts; + + /*! Number of inhibted gravity particles in the space */ + size_t nr_inhibited_gparts; + + /*! Number of inhibted star particles in the space */ + size_t nr_inhibited_sparts; + /*! The particle data (cells have pointers to this). */ struct part *parts; @@ -192,7 +226,7 @@ struct space { #endif }; -/* function prototypes. */ +/* Function prototypes. */ void space_free_buff_sort_indices(struct space *s); void space_parts_sort(struct part *parts, struct xpart *xparts, int *ind, int *counts, int num_bins, ptrdiff_t parts_offset); @@ -206,7 +240,7 @@ void space_init(struct space *s, struct swift_params *params, const struct cosmology *cosmo, double dim[3], struct part *parts, struct gpart *gparts, struct spart *sparts, size_t Npart, size_t Ngpart, size_t Nspart, int periodic, - int replicate, int generate_gas_in_ics, int gravity, + int replicate, int generate_gas_in_ics, int hydro, int gravity, int verbose, int dry_run); void space_sanitize(struct space *s); void space_map_cells_pre(struct space *s, int full, @@ -219,22 +253,21 @@ void space_map_parts_xparts(struct space *s, struct cell *c)); void space_map_cells_post(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data); -void space_rebuild(struct space *s, int verbose); +void space_rebuild(struct space *s, int repartitioned, int verbose); void space_recycle(struct space *s, struct cell *c); void space_recycle_list(struct space *s, struct cell *cell_list_begin, struct cell *cell_list_end, struct gravity_tensors *multipole_list_begin, struct gravity_tensors *multipole_list_end); -void space_split(struct space *s, struct cell *cells, int nr_cells, - int verbose); +void space_split(struct space *s, int verbose); void space_split_mapper(void *map_data, int num_elements, void *extra_data); -void space_list_cells_with_tasks(struct space *s); +void space_list_useful_top_level_cells(struct space *s); void space_parts_get_cell_index(struct space *s, int *ind, int *cell_counts, - struct cell *cells, int verbose); + int *count_inibibited_parts, int verbose); void space_gparts_get_cell_index(struct space *s, int *gind, int *cell_counts, - struct cell *cells, int verbose); + int *count_inibibited_gparts, int verbose); void space_sparts_get_cell_index(struct space *s, int *sind, int *cell_counts, - struct cell *cells, int verbose); + int *count_inibibited_sparts, int verbose); void space_synchronize_particle_positions(struct space *s); void space_do_parts_sort(void); void space_do_gparts_sort(void); @@ -244,6 +277,7 @@ void space_first_init_gparts(struct space *s, int verbose); void space_first_init_sparts(struct space *s, int verbose); void space_init_parts(struct space *s, int verbose); void space_init_gparts(struct space *s, int verbose); +void space_init_sparts(struct space *s, int verbose); void space_convert_quantities(struct space *s, int verbose); void space_link_cleanup(struct space *s); void space_check_drift_point(struct space *s, integertime_t ti_drift, @@ -253,7 +287,7 @@ void space_check_top_multipoles_drift_point(struct space *s, void space_check_timesteps(struct space *s); void space_replicate(struct space *s, int replicate, int verbose); void space_generate_gas(struct space *s, const struct cosmology *cosmo, - int verbose); + int periodic, const double dim[3], int verbose); void space_check_cosmology(struct space *s, const struct cosmology *cosmo, int rank); void space_reset_task_counters(struct space *s); diff --git a/src/stars.h b/src/stars.h index ade47ff57298c13bf205e991548945576a802293..3e921239a29d862aba998c138623eb1cb81a37b9 100644 --- a/src/stars.h +++ b/src/stars.h @@ -16,15 +16,15 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_STAR_H -#define SWIFT_STAR_H +#ifndef SWIFT_STARS_H +#define SWIFT_STARS_H /* Config parameters. */ #include "../config.h" /* So far only one model here */ /* Straight-forward import */ -#include "./stars/Default/star.h" -#include "./stars/Default/star_iact.h" +#include "./stars/Default/stars.h" +#include "./stars/Default/stars_iact.h" #endif diff --git a/src/stars/Default/star.h b/src/stars/Default/star.h deleted file mode 100644 index 61ae4aeb5c51e18e39c3f4c6855d7c6ddfe05abb..0000000000000000000000000000000000000000 --- a/src/stars/Default/star.h +++ /dev/null @@ -1,86 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ -#ifndef SWIFT_DEFAULT_STAR_H -#define SWIFT_DEFAULT_STAR_H - -#include <float.h> -#include "minmax.h" - -/** - * @brief Computes the gravity time-step of a given star particle. - * - * @param sp Pointer to the s-particle data. - */ -__attribute__((always_inline)) INLINE static float star_compute_timestep( - const struct spart* const sp) { - - return FLT_MAX; -} - -/** - * @brief Initialises the s-particles for the first time - * - * This function is called only once just after the ICs have been - * read in to do some conversions. - * - * @param sp The particle to act upon - */ -__attribute__((always_inline)) INLINE static void star_first_init_spart( - struct spart* sp) { - - sp->time_bin = 0; -} - -/** - * @brief Prepares a s-particle for its interactions - * - * @param sp The particle to act upon - */ -__attribute__((always_inline)) INLINE static void star_init_spart( - struct spart* sp) {} - -/** - * @brief Sets the values to be predicted in the drifts to their values at a - * kick time - * - * @param sp The particle. - */ -__attribute__((always_inline)) INLINE static void star_reset_predicted_values( - struct spart* restrict sp) {} - -/** - * @brief Finishes the calculation of (non-gravity) forces acting on stars - * - * Multiplies the forces and accelerations by the appropiate constants - * - * @param sp The particle to act upon - */ -__attribute__((always_inline)) INLINE static void star_end_force( - struct spart* sp) {} - -/** - * @brief Kick the additional variables - * - * @param sp The particle to act upon - * @param dt The time-step for this kick - */ -__attribute__((always_inline)) INLINE static void star_kick_extra( - struct spart* sp, float dt) {} - -#endif /* SWIFT_DEFAULT_STAR_H */ diff --git a/src/stars/Default/star_iact.h b/src/stars/Default/star_iact.h deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/src/stars/Default/star_io.h b/src/stars/Default/star_io.h deleted file mode 100644 index 7ad29f0a935c002b1337c2a75d6f987c05c9bb43..0000000000000000000000000000000000000000 --- a/src/stars/Default/star_io.h +++ /dev/null @@ -1,73 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ -#ifndef SWIFT_DEFAULT_STAR_IO_H -#define SWIFT_DEFAULT_STAR_IO_H - -#include "io_properties.h" - -/** - * @brief Specifies which s-particle fields to read from a dataset - * - * @param sparts The s-particle array. - * @param list The list of i/o properties to read. - * @param num_fields The number of i/o fields to read. - */ -INLINE static void star_read_particles(struct spart* sparts, - struct io_props* list, int* num_fields) { - - /* Say how much we want to read */ - *num_fields = 4; - - /* List what we want to read */ - list[0] = io_make_input_field("Coordinates", DOUBLE, 3, COMPULSORY, - UNIT_CONV_LENGTH, sparts, x); - list[1] = io_make_input_field("Velocities", FLOAT, 3, COMPULSORY, - UNIT_CONV_SPEED, sparts, v); - list[2] = io_make_input_field("Masses", FLOAT, 1, COMPULSORY, UNIT_CONV_MASS, - sparts, mass); - list[3] = io_make_input_field("ParticleIDs", LONGLONG, 1, COMPULSORY, - UNIT_CONV_NO_UNITS, sparts, id); -} - -/** - * @brief Specifies which s-particle fields to write to a dataset - * - * @param sparts The s-particle array. - * @param list The list of i/o properties to write. - * @param num_fields The number of i/o fields to write. - */ -INLINE static void star_write_particles(const struct spart* sparts, - struct io_props* list, - int* num_fields) { - - /* Say how much we want to read */ - *num_fields = 4; - - /* List what we want to read */ - list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, - sparts, x); - list[1] = - io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, sparts, v); - list[2] = - io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, sparts, mass); - list[3] = io_make_output_field("ParticleIDs", LONGLONG, 1, UNIT_CONV_NO_UNITS, - sparts, id); -} - -#endif /* SWIFT_DEFAULT_STAR_IO_H */ diff --git a/src/stars/Default/stars.h b/src/stars/Default/stars.h new file mode 100644 index 0000000000000000000000000000000000000000..ab4c6c7013d47a5731440bc953ad6a1101c7d2a4 --- /dev/null +++ b/src/stars/Default/stars.h @@ -0,0 +1,150 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEFAULT_STARS_H +#define SWIFT_DEFAULT_STARS_H + +#include <float.h> +#include "minmax.h" + +/** + * @brief Computes the gravity time-step of a given star particle. + * + * @param sp Pointer to the s-particle data. + */ +__attribute__((always_inline)) INLINE static float stars_compute_timestep( + const struct spart* const sp) { + + return FLT_MAX; +} + +/** + * @brief Initialises the s-particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param sp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void stars_first_init_spart( + struct spart* sp) { + + sp->time_bin = 0; +} + +/** + * @brief Prepares a s-particle for its interactions + * + * @param sp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void stars_init_spart( + struct spart* sp) { + +#ifdef DEBUG_INTERACTIONS_STARS + for (int i = 0; i < MAX_NUM_OF_NEIGHBOURS_STARS; ++i) + sp->ids_ngbs_density[i] = -1; + sp->num_ngb_density = 0; +#endif + + sp->density.wcount = 0.f; + sp->density.wcount_dh = 0.f; +} + +/** + * @brief Sets the values to be predicted in the drifts to their values at a + * kick time + * + * @param sp The particle. + */ +__attribute__((always_inline)) INLINE static void stars_reset_predicted_values( + struct spart* restrict sp) {} + +/** + * @brief Finishes the calculation of (non-gravity) forces acting on stars + * + * Multiplies the forces and accelerations by the appropiate constants + * + * @param sp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void stars_end_force( + struct spart* sp) {} + +/** + * @brief Kick the additional variables + * + * @param sp The particle to act upon + * @param dt The time-step for this kick + */ +__attribute__((always_inline)) INLINE static void stars_kick_extra( + struct spart* sp, float dt) {} + +/** + * @brief Finishes the calculation of density on stars + * + * @param sp The particle to act upon + * @param cosmo The current cosmological model. + */ +__attribute__((always_inline)) INLINE static void stars_end_density( + struct spart* sp, const struct cosmology* cosmo) { + + /* Some smoothing length multiples. */ + const float h = sp->h; + const float h_inv = 1.0f / h; /* 1/h */ + const float h_inv_dim = pow_dimension(h_inv); /* 1/h^d */ + const float h_inv_dim_plus_one = h_inv_dim * h_inv; /* 1/h^(d+1) */ + + /* Finish the calculation by inserting the missing h-factors */ + sp->density.wcount *= h_inv_dim; + sp->density.wcount_dh *= h_inv_dim_plus_one; +} + +/** + * @brief Sets all particle fields to sensible values when the #spart has 0 + * ngbs. + * + * @param sp The particle to act upon + * @param cosmo The current cosmological model. + */ +__attribute__((always_inline)) INLINE static void stars_spart_has_no_neighbours( + struct spart* restrict sp, const struct cosmology* cosmo) { + + /* Some smoothing length multiples. */ + const float h = sp->h; + const float h_inv = 1.0f / h; /* 1/h */ + const float h_inv_dim = pow_dimension(h_inv); /* 1/h^d */ + + /* Re-set problematic values */ + sp->density.wcount = kernel_root * h_inv_dim; + sp->density.wcount_dh = 0.f; +} + +/** + * @brief Evolve the stellar properties of a #spart. + * + * This function allows for example to compute the SN rate before sending + * this information to a different MPI rank. + * + * @param sp The particle to act upon + * @param cosmo The current cosmological model. + * @param stars_properties The #stars_props + */ +__attribute__((always_inline)) INLINE static void stars_evolve_spart( + struct spart* restrict sp, const struct stars_props* stars_properties, + const struct cosmology* cosmo) {} + +#endif /* SWIFT_DEFAULT_STARS_H */ diff --git a/src/stars/Default/star_debug.h b/src/stars/Default/stars_debug.h similarity index 86% rename from src/stars/Default/star_debug.h rename to src/stars/Default/stars_debug.h index d940afac2eb67c97481f48a4bda6fa56085166d5..39ae754ddf60910ae07b3252e151c1f619588161 100644 --- a/src/stars/Default/star_debug.h +++ b/src/stars/Default/stars_debug.h @@ -16,10 +16,10 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_DEFAULT_STAR_DEBUG_H -#define SWIFT_DEFAULT_STAR_DEBUG_H +#ifndef SWIFT_DEFAULT_STARS_DEBUG_H +#define SWIFT_DEFAULT_STARS_DEBUG_H -__attribute__((always_inline)) INLINE static void star_debug_particle( +__attribute__((always_inline)) INLINE static void stars_debug_particle( const struct spart* p) { printf( "x=[%.3e,%.3e,%.3e], " @@ -28,4 +28,4 @@ __attribute__((always_inline)) INLINE static void star_debug_particle( p->mass, p->ti_begin, p->ti_end); } -#endif /* SWIFT_DEFAULT_STAR_DEBUG_H */ +#endif /* SWIFT_DEFAULT_STARS_DEBUG_H */ diff --git a/src/stars/Default/stars_iact.h b/src/stars/Default/stars_iact.h new file mode 100644 index 0000000000000000000000000000000000000000..9e27f86028245a230cfd777dfc46da7b7d2f3915 --- /dev/null +++ b/src/stars/Default/stars_iact.h @@ -0,0 +1,57 @@ +/** + * @brief Density interaction between two particles (non-symmetric). + * + * @param r2 Comoving square distance between the two particles. + * @param dx Comoving vector separating both particles (pi - pj). + * @param hi Comoving smoothing-length of particle i. + * @param hj Comoving smoothing-length of particle j. + * @param si First sparticle. + * @param pj Second particle (not updated). + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void +runner_iact_nonsym_stars_density(float r2, const float *dx, float hi, float hj, + struct spart *restrict si, + const struct part *restrict pj, float a, + float H) { + + float wi, wi_dx; + + /* Get r and 1/r. */ + const float r_inv = 1.0f / sqrtf(r2); + const float r = r2 * r_inv; + + /* Compute the kernel function */ + const float hi_inv = 1.0f / hi; + const float ui = r * hi_inv; + kernel_deval(ui, &wi, &wi_dx); + + /* Compute contribution to the number of neighbours */ + si->density.wcount += wi; + si->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); + +#ifdef DEBUG_INTERACTIONS_STARS + /* Update ngb counters */ + if (si->num_ngb_density < MAX_NUM_OF_NEIGHBOURS_STARS) + si->ids_ngbs_density[si->num_ngb_density] = pj->id; + ++si->num_ngb_density; +#endif +} + +/** + * @brief Feedback interaction between two particles (non-symmetric). + * + * @param r2 Comoving square distance between the two particles. + * @param dx Comoving vector separating both particles (pi - pj). + * @param hi Comoving smoothing-length of particle i. + * @param hj Comoving smoothing-length of particle j. + * @param si First sparticle. + * @param pj Second particle (not updated). + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void +runner_iact_nonsym_stars_feedback(float r2, const float *dx, float hi, float hj, + struct spart *restrict si, + struct part *restrict pj, float a, float H) {} diff --git a/src/stars/Default/stars_io.h b/src/stars/Default/stars_io.h new file mode 100644 index 0000000000000000000000000000000000000000..a6c2768f715e3dc6e870ee92e7d8a5e9458a5d11 --- /dev/null +++ b/src/stars/Default/stars_io.h @@ -0,0 +1,199 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEFAULT_STARS_IO_H +#define SWIFT_DEFAULT_STARS_IO_H + +#include "io_properties.h" +#include "stars_part.h" + +/** + * @brief Specifies which s-particle fields to read from a dataset + * + * @param sparts The s-particle array. + * @param list The list of i/o properties to read. + * @param num_fields The number of i/o fields to read. + */ +INLINE static void stars_read_particles(struct spart *sparts, + struct io_props *list, + int *num_fields) { + + /* Say how much we want to read */ + *num_fields = 5; + + /* List what we want to read */ + list[0] = io_make_input_field("Coordinates", DOUBLE, 3, COMPULSORY, + UNIT_CONV_LENGTH, sparts, x); + list[1] = io_make_input_field("Velocities", FLOAT, 3, COMPULSORY, + UNIT_CONV_SPEED, sparts, v); + list[2] = io_make_input_field("Masses", FLOAT, 1, COMPULSORY, UNIT_CONV_MASS, + sparts, mass); + list[3] = io_make_input_field("ParticleIDs", LONGLONG, 1, COMPULSORY, + UNIT_CONV_NO_UNITS, sparts, id); + list[4] = io_make_input_field("SmoothingLength", FLOAT, 1, OPTIONAL, + UNIT_CONV_LENGTH, sparts, h); +} + +/** + * @brief Specifies which s-particle fields to write to a dataset + * + * @param sparts The s-particle array. + * @param list The list of i/o properties to write. + * @param num_fields The number of i/o fields to write. + */ +INLINE static void stars_write_particles(const struct spart *sparts, + struct io_props *list, + int *num_fields) { + + /* Say how much we want to read */ + *num_fields = 5; + + /* List what we want to read */ + list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, + sparts, x); + list[1] = + io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, sparts, v); + list[2] = + io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, sparts, mass); + list[3] = io_make_output_field("ParticleIDs", LONGLONG, 1, UNIT_CONV_NO_UNITS, + sparts, id); + list[4] = io_make_output_field("SmoothingLength", FLOAT, 1, UNIT_CONV_LENGTH, + sparts, h); +} + +/** + * @brief Initialize the global properties of the stars scheme. + * + * By default, takes the values provided by the hydro. + * + * @param sp The #stars_props. + * @param phys_const The physical constants in the internal unit system. + * @param us The internal unit system. + * @param params The parsed parameters. + * @param p The already read-in properties of the hydro scheme. + */ +INLINE static void stars_props_init(struct stars_props *sp, + const struct phys_const *phys_const, + const struct unit_system *us, + struct swift_params *params, + const struct hydro_props *p) { + + /* Kernel properties */ + sp->eta_neighbours = parser_get_opt_param_float( + params, "Stars:resolution_eta", p->eta_neighbours); + + /* Tolerance for the smoothing length Newton-Raphson scheme */ + sp->h_tolerance = + parser_get_opt_param_float(params, "Stars:h_tolerance", p->h_tolerance); + + /* Get derived properties */ + sp->target_neighbours = pow_dimension(sp->eta_neighbours) * kernel_norm; + const float delta_eta = sp->eta_neighbours * (1.f + sp->h_tolerance); + sp->delta_neighbours = + (pow_dimension(delta_eta) - pow_dimension(sp->eta_neighbours)) * + kernel_norm; + + /* Maximal smoothing length */ + sp->h_max = parser_get_opt_param_float(params, "Stars:h_max", p->h_max); + + /* Number of iterations to converge h */ + sp->max_smoothing_iterations = parser_get_opt_param_int( + params, "Stars:max_ghost_iterations", p->max_smoothing_iterations); + + /* Time integration properties */ + const float max_volume_change = + parser_get_opt_param_float(params, "Stars:max_volume_change", -1); + if (max_volume_change == -1) + sp->log_max_h_change = p->log_max_h_change; + else + sp->log_max_h_change = logf(powf(max_volume_change, hydro_dimension_inv)); +} + +/** + * @brief Print the global properties of the stars scheme. + * + * @param sp The #stars_props. + */ +INLINE static void stars_props_print(const struct stars_props *sp) { + + /* Now stars */ + message("Stars kernel: %s with eta=%f (%.2f neighbours).", kernel_name, + sp->eta_neighbours, sp->target_neighbours); + + message("Stars relative tolerance in h: %.5f (+/- %.4f neighbours).", + sp->h_tolerance, sp->delta_neighbours); + + message( + "Stars integration: Max change of volume: %.2f " + "(max|dlog(h)/dt|=%f).", + pow_dimension(expf(sp->log_max_h_change)), sp->log_max_h_change); + + if (sp->h_max != FLT_MAX) + message("Maximal smoothing length allowed: %.4f", sp->h_max); + + message("Maximal iterations in ghost task set to %d", + sp->max_smoothing_iterations); +} + +#if defined(HAVE_HDF5) +INLINE static void stars_props_print_snapshot(hid_t h_grpstars, + const struct stars_props *sp) { + + io_write_attribute_s(h_grpstars, "Kernel function", kernel_name); + io_write_attribute_f(h_grpstars, "Kernel target N_ngb", + sp->target_neighbours); + io_write_attribute_f(h_grpstars, "Kernel delta N_ngb", sp->delta_neighbours); + io_write_attribute_f(h_grpstars, "Kernel eta", sp->eta_neighbours); + io_write_attribute_f(h_grpstars, "Smoothing length tolerance", + sp->h_tolerance); + io_write_attribute_f(h_grpstars, "Maximal smoothing length", sp->h_max); + io_write_attribute_f(h_grpstars, "Volume log(max(delta h))", + sp->log_max_h_change); + io_write_attribute_f(h_grpstars, "Volume max change time-step", + pow_dimension(expf(sp->log_max_h_change))); + io_write_attribute_i(h_grpstars, "Max ghost iterations", + sp->max_smoothing_iterations); +} +#endif + +/** + * @brief Write a #stars_props struct to the given FILE as a stream of bytes. + * + * @param p the struct + * @param stream the file stream + */ +INLINE static void stars_props_struct_dump(const struct stars_props *p, + FILE *stream) { + restart_write_blocks((void *)p, sizeof(struct stars_props), 1, stream, + "starsprops", "stars props"); +} + +/** + * @brief Restore a stars_props struct from the given FILE as a stream of + * bytes. + * + * @param p the struct + * @param stream the file stream + */ +INLINE static void stars_props_struct_restore(const struct stars_props *p, + FILE *stream) { + restart_read_blocks((void *)p, sizeof(struct stars_props), 1, stream, NULL, + "stars props"); +} + +#endif /* SWIFT_DEFAULT_STAR_IO_H */ diff --git a/src/stars/Default/star_part.h b/src/stars/Default/stars_part.h similarity index 57% rename from src/stars/Default/star_part.h rename to src/stars/Default/stars_part.h index 68dd4869c257e35b3be7dc21f36e6dcdb725dc17..32006a15b67b9e97be7aac1a86ec45d369bcc1a0 100644 --- a/src/stars/Default/star_part.h +++ b/src/stars/Default/stars_part.h @@ -38,15 +38,33 @@ struct spart { /*! Particle position. */ double x[3]; + /* Offset between current position and position at last tree rebuild. */ + float x_diff[3]; + + /* Offset between current position and position at last tree rebuild. */ + float x_diff_sort[3]; + /*! Particle velocity. */ float v[3]; /*! Star mass */ float mass; + /* Particle cutoff radius. */ + float h; + /*! Particle time bin */ timebin_t time_bin; + struct { + /* Number of neighbours. */ + float wcount; + + /* Number of neighbours spatial derivative. */ + float wcount_dh; + + } density; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ @@ -57,6 +75,41 @@ struct spart { #endif +#ifdef DEBUG_INTERACTIONS_STARS + /*! List of interacting particles in the density SELF and PAIR */ + long long ids_ngbs_density[MAX_NUM_OF_NEIGHBOURS_STARS]; + + /*! Number of interactions in the density SELF and PAIR */ + int num_ngb_density; +#endif + } SWIFT_STRUCT_ALIGN; +/** + * @brief Contains all the constants and parameters of the stars scheme + */ +struct stars_props { + + /*! Resolution parameter */ + float eta_neighbours; + + /*! Target weightd number of neighbours (for info only)*/ + float target_neighbours; + + /*! Smoothing length tolerance */ + float h_tolerance; + + /*! Tolerance on neighbour number (for info only)*/ + float delta_neighbours; + + /*! Maximal smoothing length */ + float h_max; + + /*! Maximal number of iterations to converge h */ + int max_smoothing_iterations; + + /*! Maximal change of h over one time-step */ + float log_max_h_change; +}; + #endif /* SWIFT_DEFAULT_STAR_PART_H */ diff --git a/src/stars_io.h b/src/stars_io.h index 18a13ec19163008f1c8e9f64cf544ddf812db655..046e90ee7570430ea25632539bc2cd642d4b52c0 100644 --- a/src/stars_io.h +++ b/src/stars_io.h @@ -16,11 +16,11 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_STAR_IO_H -#define SWIFT_STAR_IO_H +#ifndef SWIFT_STARS_IO_H +#define SWIFT_STARS_IO_H #include "./const.h" -#include "./stars/Default/star_io.h" +#include "./stars/Default/stars_io.h" -#endif /* SWIFT_STAR_IO_H */ +#endif /* SWIFT_STARS_IO_H */ diff --git a/src/statistics.c b/src/statistics.c index bdca6cfb4ef84bb64aa4776bfc600b0727e0d606..8866f345a4d0ccad8d3a50f30f6b07ff7787dbbd 100644 --- a/src/statistics.c +++ b/src/statistics.c @@ -21,6 +21,7 @@ #include "../config.h" /* Some standard headers. */ +#include <math.h> #include <string.h> /* MPI headers. */ @@ -136,7 +137,7 @@ void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) { /* Get the particle */ const struct part *p = &parts[k]; const struct xpart *xp = &xparts[k]; - const struct gpart *gp = (p->gpart != NULL) ? gp = p->gpart : NULL; + const struct gpart *gp = p->gpart; /* Get useful time variables */ const integertime_t ti_beg = @@ -166,8 +167,8 @@ void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) { hydro_get_drifted_velocities(p, xp, dt_kick_hydro, dt_kick_grav, v); const double x[3] = {p->x[0], p->x[1], p->x[2]}; const float m = hydro_get_mass(p); - const float entropy = hydro_get_physical_entropy(p, cosmo); - const float u_inter = hydro_get_physical_internal_energy(p, cosmo); + const float entropy = hydro_get_drifted_physical_entropy(p, cosmo); + const float u_inter = hydro_get_drifted_physical_internal_energy(p, cosmo); /* Collect mass */ stats.mass += m; @@ -386,7 +387,7 @@ MPI_Op statistics_mpi_reduce_op; /** * @brief MPI reduce operator for #statistics structures. */ -void stats_add_MPI(void *in, void *inout, int *len, MPI_Datatype *datatype) { +void stats_add_mpi(void *in, void *inout, int *len, MPI_Datatype *datatype) { for (int i = 0; i < *len; ++i) stats_add(&((struct statistics *)inout)[0], @@ -396,7 +397,7 @@ void stats_add_MPI(void *in, void *inout, int *len, MPI_Datatype *datatype) { /** * @brief Registers MPI #statistics type and reduction function. */ -void stats_create_MPI_type(void) { +void stats_create_mpi_type(void) { /* This is not the recommended way of doing this. One should define the structure field by field @@ -411,6 +412,6 @@ void stats_create_MPI_type(void) { } /* Create the reduction operation */ - MPI_Op_create(stats_add_MPI, 1, &statistics_mpi_reduce_op); + MPI_Op_create(stats_add_mpi, 1, &statistics_mpi_reduce_op); } #endif diff --git a/src/statistics.h b/src/statistics.h index adc9f5b6a24a093419b7dd644404a68ef736a685..b741eac3d406d767f5652234b9a16d82464cc456 100644 --- a/src/statistics.h +++ b/src/statistics.h @@ -76,8 +76,7 @@ void stats_finalize(struct statistics* s); extern MPI_Datatype statistics_mpi_type; extern MPI_Op statistics_mpi_reduce_op; -void stats_add_MPI(void* in, void* out, int* len, MPI_Datatype* datatype); -void stats_create_MPI_type(void); +void stats_create_mpi_type(void); #endif #endif /* SWIFT_STATISTICS_H */ diff --git a/src/swift.h b/src/swift.h index e10938addb99956c202b3e4dd2b0592b580fa948..153c4ae0d4440d083f1b0c9850e1f2649c0df6fb 100644 --- a/src/swift.h +++ b/src/swift.h @@ -46,6 +46,7 @@ #include "hydro_properties.h" #include "lock.h" #include "logger.h" +#include "logger_io.h" #include "map.h" #include "mesh_gravity.h" #include "multipole.h" @@ -66,6 +67,8 @@ #include "single_io.h" #include "sourceterms.h" #include "space.h" +#include "stars.h" +#include "stars_io.h" #include "task.h" #include "threadpool.h" #include "timeline.h" diff --git a/src/swift_velociraptor_part.h b/src/swift_velociraptor_part.h index 80ee94ba612299dbe8b451cf1ef9d0ee45f8bf53..adae884c2f930c44edf4d48f47f168475bc65885 100644 --- a/src/swift_velociraptor_part.h +++ b/src/swift_velociraptor_part.h @@ -19,6 +19,8 @@ #ifndef SWIFT_VELOCIRAPTOR_PART_H #define SWIFT_VELOCIRAPTOR_PART_H +#include "part_type.h" + /* SWIFT/VELOCIraptor particle. */ struct swift_vel_part { diff --git a/src/task.c b/src/task.c index 2782dabfc1369dedd43e9b42855a8b43acf2f1b7..3918dad3b713c6c226e5dacf3e38756910c1dd27 100644 --- a/src/task.c +++ b/src/task.c @@ -47,65 +47,80 @@ #include "lock.h" /* Task type names. */ -const char *taskID_names[task_type_count] = { - "none", "sort", "self", - "pair", "sub_self", "sub_pair", - "init_grav", "init_grav_out", "ghost_in", - "ghost", "ghost_out", "extra_ghost", - "drift_part", "drift_gpart", "end_force", - "kick1", "kick2", "timestep", - "send", "recv", "grav_long_range", - "grav_mm", "grav_down_in", "grav_down", - "grav_mesh", "cooling", "sourceterms"}; +const char *taskID_names[task_type_count] = {"none", + "sort", + "self", + "pair", + "sub_self", + "sub_pair", + "init_grav", + "init_grav_out", + "ghost_in", + "ghost", + "ghost_out", + "extra_ghost", + "drift_part", + "drift_gpart", + "drift_gpart_out", + "end_force", + "kick1", + "kick2", + "timestep", + "send", + "recv", + "grav_long_range", + "grav_mm", + "grav_down_in", + "grav_down", + "grav_mesh", + "cooling", + "star_formation", + "sourceterms", + "logger", + "stars_ghost_in", + "stars_ghost", + "stars_ghost_out", + "stars_sort"}; /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { - "none", "density", "gradient", "force", "grav", "external_grav", - "tend", "xv", "rho", "gpart", "multipole", "spart"}; + "none", "density", "gradient", "force", "grav", + "external_grav", "tend", "xv", "rho", "gpart", + "multipole", "spart", "stars_density"}; -/** - * @brief Computes the overlap between the parts array of two given cells. - * - * @param ci The first #cell. - * @param cj The second #cell. - */ -__attribute__((always_inline)) INLINE static size_t task_cell_overlap_part( - const struct cell *restrict ci, const struct cell *restrict cj) { - - if (ci == NULL || cj == NULL) return 0; - - if (ci->parts <= cj->parts && - ci->parts + ci->count >= cj->parts + cj->count) { - return cj->count; - } else if (cj->parts <= ci->parts && - cj->parts + cj->count >= ci->parts + ci->count) { - return ci->count; - } - - return 0; -} +#ifdef WITH_MPI +/* MPI communicators for the subtypes. */ +MPI_Comm subtaskMPI_comms[task_subtype_count]; +#endif /** - * @brief Computes the overlap between the gparts array of two given cells. + * @brief Computes the overlap between the parts array of two given cells. * - * @param ci The first #cell. - * @param cj The second #cell. + * @param TYPE is the type of parts (e.g. #part, #gpart, #spart) + * @param ARRAY is the array of this specific type. + * @param COUNT is the number of elements in the array. */ -__attribute__((always_inline)) INLINE static size_t task_cell_overlap_gpart( - const struct cell *restrict ci, const struct cell *restrict cj) { - - if (ci == NULL || cj == NULL) return 0; - - if (ci->gparts <= cj->gparts && - ci->gparts + ci->gcount >= cj->gparts + cj->gcount) { - return cj->gcount; - } else if (cj->gparts <= ci->gparts && - cj->gparts + cj->gcount >= ci->gparts + ci->gcount) { - return ci->gcount; +#define TASK_CELL_OVERLAP(TYPE, ARRAY, COUNT) \ + __attribute__((always_inline)) \ + INLINE static size_t task_cell_overlap_##TYPE( \ + const struct cell *restrict ci, const struct cell *restrict cj) { \ + \ + if (ci == NULL || cj == NULL) return 0; \ + \ + if (ci->ARRAY <= cj->ARRAY && \ + ci->ARRAY + ci->COUNT >= cj->ARRAY + cj->COUNT) { \ + return cj->COUNT; \ + } else if (cj->ARRAY <= ci->ARRAY && \ + cj->ARRAY + cj->COUNT >= ci->ARRAY + ci->COUNT) { \ + return ci->COUNT; \ + } \ + \ + return 0; \ } - return 0; -} +TASK_CELL_OVERLAP(part, hydro.parts, hydro.count); +TASK_CELL_OVERLAP(gpart, grav.parts, grav.count); +TASK_CELL_OVERLAP(spart, stars.parts, stars.count); /** * @brief Returns the #task_actions for a given task. @@ -130,6 +145,14 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_part; break; + case task_type_star_formation: + return task_action_all; + + case task_type_stars_ghost: + case task_type_stars_sort: + return task_action_spart; + break; + case task_type_self: case task_type_pair: case task_type_sub_self: @@ -142,6 +165,10 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_part; break; + case task_subtype_stars_density: + return task_action_all; + break; + case task_subtype_grav: case task_subtype_external_grav: return task_action_gpart; @@ -157,14 +184,15 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( case task_type_end_force: case task_type_kick1: case task_type_kick2: + case task_type_logger: case task_type_timestep: case task_type_send: case task_type_recv: - if (t->ci->count > 0 && t->ci->gcount > 0) + if (t->ci->hydro.count > 0 && t->ci->grav.count > 0) return task_action_all; - else if (t->ci->count > 0) + else if (t->ci->hydro.count > 0) return task_action_part; - else if (t->ci->gcount > 0) + else if (t->ci->grav.count > 0) return task_action_gpart; else error("Task without particles"); @@ -172,13 +200,13 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( case task_type_init_grav: case task_type_grav_mm: + case task_type_grav_long_range: return task_action_multipole; break; case task_type_drift_gpart: case task_type_grav_down: case task_type_grav_mesh: - case task_type_grav_long_range: return task_action_gpart; break; @@ -215,19 +243,23 @@ float task_overlap(const struct task *restrict ta, const int ta_part = (ta_act == task_action_part || ta_act == task_action_all); const int ta_gpart = (ta_act == task_action_gpart || ta_act == task_action_all); + const int ta_spart = + (ta_act == task_action_spart || ta_act == task_action_all); const int tb_part = (tb_act == task_action_part || tb_act == task_action_all); const int tb_gpart = (tb_act == task_action_gpart || tb_act == task_action_all); + const int tb_spart = + (tb_act == task_action_spart || tb_act == task_action_all); /* In the case where both tasks act on parts */ if (ta_part && tb_part) { /* Compute the union of the cell data. */ size_t size_union = 0; - if (ta->ci != NULL) size_union += ta->ci->count; - if (ta->cj != NULL) size_union += ta->cj->count; - if (tb->ci != NULL) size_union += tb->ci->count; - if (tb->cj != NULL) size_union += tb->cj->count; + if (ta->ci != NULL) size_union += ta->ci->hydro.count; + if (ta->cj != NULL) size_union += ta->cj->hydro.count; + if (tb->ci != NULL) size_union += tb->ci->hydro.count; + if (tb->cj != NULL) size_union += tb->cj->hydro.count; /* Compute the intersection of the cell data. */ const size_t size_intersect = task_cell_overlap_part(ta->ci, tb->ci) + @@ -243,10 +275,10 @@ float task_overlap(const struct task *restrict ta, /* Compute the union of the cell data. */ size_t size_union = 0; - if (ta->ci != NULL) size_union += ta->ci->gcount; - if (ta->cj != NULL) size_union += ta->cj->gcount; - if (tb->ci != NULL) size_union += tb->ci->gcount; - if (tb->cj != NULL) size_union += tb->cj->gcount; + if (ta->ci != NULL) size_union += ta->ci->grav.count; + if (ta->cj != NULL) size_union += ta->cj->grav.count; + if (tb->ci != NULL) size_union += tb->ci->grav.count; + if (tb->cj != NULL) size_union += tb->cj->grav.count; /* Compute the intersection of the cell data. */ const size_t size_intersect = task_cell_overlap_gpart(ta->ci, tb->ci) + @@ -257,6 +289,25 @@ float task_overlap(const struct task *restrict ta, return ((float)size_intersect) / (size_union - size_intersect); } + /* In the case where both tasks act on sparts */ + else if (ta_spart && tb_spart) { + + /* Compute the union of the cell data. */ + size_t size_union = 0; + if (ta->ci != NULL) size_union += ta->ci->stars.count; + if (ta->cj != NULL) size_union += ta->cj->stars.count; + if (tb->ci != NULL) size_union += tb->ci->stars.count; + if (tb->cj != NULL) size_union += tb->cj->stars.count; + + /* Compute the intersection of the cell data. */ + const size_t size_intersect = task_cell_overlap_spart(ta->ci, tb->ci) + + task_cell_overlap_spart(ta->ci, tb->cj) + + task_cell_overlap_spart(ta->cj, tb->ci) + + task_cell_overlap_spart(ta->cj, tb->cj); + + return ((float)size_intersect) / (size_union - size_intersect); + } + /* Else, no overlap */ return 0.f; } @@ -278,6 +329,7 @@ void task_unlock(struct task *t) { case task_type_end_force: case task_type_kick1: case task_type_kick2: + case task_type_logger: case task_type_timestep: cell_unlocktree(ci); cell_gunlocktree(ci); @@ -293,11 +345,17 @@ void task_unlock(struct task *t) { cell_gunlocktree(ci); break; + case task_type_stars_sort: + cell_sunlocktree(ci); + break; + case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { cell_gunlocktree(ci); cell_munlocktree(ci); + } else if (subtype == task_subtype_stars_density) { + cell_sunlocktree(ci); } else { cell_unlocktree(ci); } @@ -310,6 +368,9 @@ void task_unlock(struct task *t) { cell_gunlocktree(cj); cell_munlocktree(ci); cell_munlocktree(cj); + } else if (subtype == task_subtype_stars_density) { + cell_sunlocktree(ci); + cell_sunlocktree(cj); } else { cell_unlocktree(ci); cell_unlocktree(cj); @@ -361,8 +422,10 @@ int task_lock(struct task *t) { char buff[MPI_MAX_ERROR_STRING]; int len; MPI_Error_string(err, buff, &len); - error("Failed to test request on send/recv task (tag=%i, %s).", - t->flags, buff); + error( + "Failed to test request on send/recv task (type=%s/%s tag=%lld, " + "%s).", + taskID_names[t->type], subtaskID_names[t->subtype], t->flags, buff); } return res; #else @@ -373,8 +436,9 @@ int task_lock(struct task *t) { case task_type_end_force: case task_type_kick1: case task_type_kick2: + case task_type_logger: case task_type_timestep: - if (ci->hold || ci->ghold) return 0; + if (ci->hydro.hold || ci->grav.phold) return 0; if (cell_locktree(ci) != 0) return 0; if (cell_glocktree(ci) != 0) { cell_unlocktree(ci); @@ -384,13 +448,18 @@ int task_lock(struct task *t) { case task_type_drift_part: case task_type_sort: - if (ci->hold) return 0; + if (ci->hydro.hold) return 0; if (cell_locktree(ci) != 0) return 0; break; + case task_type_stars_sort: + if (ci->stars.hold) return 0; + if (cell_slocktree(ci) != 0) return 0; + break; + case task_type_drift_gpart: case task_type_grav_mesh: - if (ci->ghold) return 0; + if (ci->grav.phold) return 0; if (cell_glocktree(ci) != 0) return 0; break; @@ -398,14 +467,18 @@ int task_lock(struct task *t) { case task_type_sub_self: if (subtype == task_subtype_grav) { /* Lock the gparts and the m-pole */ - if (ci->ghold || ci->mhold) return 0; + if (ci->grav.phold || ci->grav.mhold) return 0; if (cell_glocktree(ci) != 0) return 0; else if (cell_mlocktree(ci) != 0) { cell_gunlocktree(ci); return 0; } + } else if (subtype == task_subtype_stars_density) { + if (ci->stars.hold) return 0; + if (cell_slocktree(ci) != 0) return 0; } else { + if (ci->hydro.hold) return 0; if (cell_locktree(ci) != 0) return 0; } break; @@ -414,7 +487,7 @@ int task_lock(struct task *t) { case task_type_sub_pair: if (subtype == task_subtype_grav) { /* Lock the gparts and the m-pole in both cells */ - if (ci->ghold || cj->ghold) return 0; + if (ci->grav.phold || cj->grav.phold) return 0; if (cell_glocktree(ci) != 0) return 0; if (cell_glocktree(cj) != 0) { cell_gunlocktree(ci); @@ -429,9 +502,16 @@ int task_lock(struct task *t) { cell_munlocktree(ci); return 0; } + } else if (subtype == task_subtype_stars_density) { + if (ci->stars.hold || cj->stars.hold) return 0; + if (cell_slocktree(ci) != 0) return 0; + if (cell_slocktree(cj) != 0) { + cell_sunlocktree(ci); + return 0; + } } else { /* Lock the parts in both cells */ - if (ci->hold || cj->hold) return 0; + if (ci->hydro.hold || cj->hydro.hold) return 0; if (cell_locktree(ci) != 0) return 0; if (cell_locktree(cj) != 0) { cell_unlocktree(ci); @@ -442,7 +522,7 @@ int task_lock(struct task *t) { case task_type_grav_down: /* Lock the gparts and the m-poles */ - if (ci->ghold || ci->mhold) return 0; + if (ci->grav.phold || ci->grav.mhold) return 0; if (cell_glocktree(ci) != 0) return 0; else if (cell_mlocktree(ci) != 0) { @@ -453,13 +533,13 @@ int task_lock(struct task *t) { case task_type_grav_long_range: /* Lock the m-poles */ - if (ci->mhold) return 0; + if (ci->grav.mhold) return 0; if (cell_mlocktree(ci) != 0) return 0; break; case task_type_grav_mm: /* Lock both m-poles */ - if (ci->mhold || cj->mhold) return 0; + if (ci->grav.mhold || cj->grav.mhold) return 0; if (cell_mlocktree(ci) != 0) return 0; if (cell_mlocktree(cj) != 0) { cell_munlocktree(ci); @@ -485,3 +565,14 @@ void task_print(const struct task *t) { taskID_names[t->type], subtaskID_names[t->subtype], t->wait, t->nr_unlock_tasks, t->skip); } + +#ifdef WITH_MPI +/** + * @brief Create global communicators for each of the subtasks. + */ +void task_create_mpi_comms(void) { + for (int i = 0; i < task_subtype_count; i++) { + MPI_Comm_dup(MPI_COMM_WORLD, &subtaskMPI_comms[i]); + } +} +#endif diff --git a/src/task.h b/src/task.h index 072d3979ce04990aaef46c5cc5eb0b8c62fdc860..994b2b14c05965b71e877feac5cb9827a1d1b4bb 100644 --- a/src/task.h +++ b/src/task.h @@ -53,6 +53,7 @@ enum task_types { task_type_extra_ghost, task_type_drift_part, task_type_drift_gpart, + task_type_drift_gpart_out, /* Implicit */ task_type_end_force, task_type_kick1, task_type_kick2, @@ -65,7 +66,13 @@ enum task_types { task_type_grav_down, task_type_grav_mesh, task_type_cooling, + task_type_star_formation, task_type_sourceterms, + task_type_logger, + task_type_stars_ghost_in, + task_type_stars_ghost, + task_type_stars_ghost_out, + task_type_stars_sort, task_type_count } __attribute__((packed)); @@ -85,6 +92,7 @@ enum task_subtypes { task_subtype_gpart, task_subtype_multipole, task_subtype_spart, + task_subtype_stars_density, task_subtype_count } __attribute__((packed)); @@ -95,6 +103,7 @@ enum task_actions { task_action_none, task_action_part, task_action_gpart, + task_action_spart, task_action_all, task_action_multipole, task_action_count @@ -110,6 +119,13 @@ extern const char *taskID_names[]; */ extern const char *subtaskID_names[]; +/** + * @brief The MPI communicators for the different subtypes. + */ +#ifdef WITH_MPI +extern MPI_Comm subtaskMPI_comms[task_subtype_count]; +#endif + /** * @brief A task to be run by the #scheduler. */ @@ -121,6 +137,9 @@ struct task { /*! List of tasks unlocked by this one */ struct task **unlock_tasks; + /*! Flags used to carry additional information (e.g. sort directions) */ + long long flags; + #ifdef WITH_MPI /*! Buffer for this task's communications */ @@ -131,16 +150,13 @@ struct task { #endif - /*! Flags used to carry additional information (e.g. sort directions) */ - int flags; - /*! Rank of a task in the order */ int rank; /*! Weight of the task */ float weight; -#if defined(WITH_MPI) && defined(HAVE_METIS) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) /*! Individual cost estimate for this task. */ float cost; #endif @@ -187,5 +203,7 @@ float task_overlap(const struct task *ta, const struct task *tb); int task_lock(struct task *t); void task_do_rewait(struct task *t); void task_print(const struct task *t); - +#ifdef WITH_MPI +void task_create_mpi_comms(void); +#endif #endif /* SWIFT_TASK_H */ diff --git a/src/timeline.h b/src/timeline.h index 4078a904c3e9205b8ea6ae7090534bd3d3d0784f..38727def50b5a81c073ab23375f0c548ca096b66 100644 --- a/src/timeline.h +++ b/src/timeline.h @@ -62,24 +62,30 @@ get_integer_timestep(timebin_t bin) { * @brief Returns the time bin corresponding to a given time_step size. * * Assumes that integertime_t maps to an unsigned long long. + * Given our definitions, this is log_2 of the time_step rounded down minus one. + * + * We use a fast (but exact for any non-zero value) logarithm in base 2 + * calculation based on the bit representation of the number: + * log_2(x) = (number of bits in the type) - (number of leading 0-bits in x) - 1 */ __attribute__((const)) static INLINE timebin_t get_time_bin(integertime_t time_step) { /* ((int) log_2(time_step)) - 1 */ - return (timebin_t)(62 - intrinsics_clzll(time_step)); + return (timebin_t)((8 * sizeof(integertime_t) - 2) - + intrinsics_clzll((unsigned long long)time_step)); } /** * @brief Returns the physical time interval corresponding to a time bin. * * @param bin The time bin of interest. - * @param timeBase the minimal time-step size of the simulation. + * @param time_base the minimal time-step size of the simulation. */ __attribute__((const)) static INLINE double get_timestep(timebin_t bin, - double timeBase) { + double time_base) { - return get_integer_timestep(bin) * timeBase; + return get_integer_timestep(bin) * time_base; } /** @@ -142,7 +148,7 @@ __attribute__((const)) static INLINE timebin_t get_min_active_bin(integertime_t ti_current, integertime_t ti_old) { const timebin_t min_bin = get_max_active_bin(ti_current - ti_old); - return (ti_old > 0) ? min_bin : (min_bin - 1); + return min_bin; } #endif /* SWIFT_TIMELINE_H */ diff --git a/src/timers.c b/src/timers.c index e3fbfdb01249e98e46d2c60d45bd98adb0a34241..d7523edaa42570f8e5f3c01516075267988dfd9c 100644 --- a/src/timers.c +++ b/src/timers.c @@ -80,12 +80,16 @@ const char* timers_names[timer_count] = { "dorecv_gpart", "dorecv_spart", "do_cooling", + "do_star_formation", "gettask", "qget", "qsteal", "locktree", "runners", "step", + "do_stars_ghost", + "logger", + "do_stars_sort", }; /* File to store the timers */ diff --git a/src/timers.h b/src/timers.h index 91d26c1c0d781f725b4c55a7ed3b6cfe956651df..412f36d7de547040e29efdede9cc6826358929bc 100644 --- a/src/timers.h +++ b/src/timers.h @@ -81,12 +81,16 @@ enum { timer_dorecv_gpart, timer_dorecv_spart, timer_do_cooling, + timer_do_star_formation, timer_gettask, timer_qget, timer_qsteal, timer_locktree, timer_runners, timer_step, + timer_dostars_ghost, + timer_logger, + timer_do_stars_sort, timer_count, }; diff --git a/src/timestep.h b/src/timestep.h index d065df4c444cb880a74688be97245c285a391817..e9943a41a0536b65944f0256c827d43386aadd88 100644 --- a/src/timestep.h +++ b/src/timestep.h @@ -58,6 +58,11 @@ make_integer_timestep(float new_dt, timebin_t old_bin, integertime_t ti_current, if (new_dti > current_dti) { if ((max_nr_timesteps - ti_end) % new_dti > 0) new_dti = current_dti; } + +#ifdef SWIFT_DEBUG_CHECKS + if (new_dti == 0) error("Computed an integer time-step of size 0"); +#endif + return new_dti; } @@ -121,8 +126,9 @@ __attribute__((always_inline)) INLINE static integertime_t get_part_timestep( /* Compute the next timestep (cooling condition) */ float new_dt_cooling = FLT_MAX; if (e->policy & engine_policy_cooling) - new_dt_cooling = cooling_timestep(e->cooling_func, e->physical_constants, - e->cosmology, e->internal_units, p); + new_dt_cooling = + cooling_timestep(e->cooling_func, e->physical_constants, e->cosmology, + e->internal_units, e->hydro_properties, p, xp); /* Compute the next timestep (gravity condition) */ float new_dt_grav = FLT_MAX, new_dt_self_grav = FLT_MAX, @@ -181,7 +187,7 @@ __attribute__((always_inline)) INLINE static integertime_t get_spart_timestep( const struct spart *restrict sp, const struct engine *restrict e) { /* Stellar time-step */ - float new_dt_star = star_compute_timestep(sp); + float new_dt_stars = stars_compute_timestep(sp); /* Gravity time-step */ float new_dt_self = FLT_MAX, new_dt_ext = FLT_MAX; @@ -196,7 +202,7 @@ __attribute__((always_inline)) INLINE static integertime_t get_spart_timestep( sp->gpart, a_hydro, e->gravity_properties, e->cosmology); /* Take the minimum of all */ - float new_dt = min3(new_dt_star, new_dt_self, new_dt_ext); + float new_dt = min3(new_dt_stars, new_dt_self, new_dt_ext); /* Apply the maximal displacement constraint (FLT_MAX if non-cosmological)*/ new_dt = min(new_dt, e->dt_max_RMS_displacement); diff --git a/src/tools.c b/src/tools.c index 9c0df6012737872eef8d97521b3a7532ceb42720..c0400aa7b42322fce276a5e788af7bcb9e7f3625 100644 --- a/src/tools.c +++ b/src/tools.c @@ -45,6 +45,7 @@ #include "part.h" #include "periodic.h" #include "runner.h" +#include "stars.h" /** * Factorize a given integer, attempts to keep larger pair of factors. @@ -194,23 +195,23 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { const float H = cosmo->H; /* Implements a double-for loop and checks every interaction */ - for (int i = 0; i < ci->count; ++i) { + for (int i = 0; i < ci->hydro.count; ++i) { - pi = &ci->parts[i]; + pi = &ci->hydro.parts[i]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; /* Skip inactive particles. */ if (!part_is_active(pi, e)) continue; - for (int j = 0; j < cj->count; ++j) { + for (int j = 0; j < cj->hydro.count; ++j) { - pj = &cj->parts[j]; + pj = &cj->hydro.parts[j]; /* Pairwise distance */ r2 = 0.0f; for (int k = 0; k < 3; k++) { - dx[k] = ci->parts[i].x[k] - cj->parts[j].x[k]; + dx[k] = ci->hydro.parts[i].x[k] - cj->hydro.parts[j].x[k]; dx[k] = nearest(dx[k], dim[k]); r2 += dx[k] * dx[k]; } @@ -226,23 +227,23 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { } /* Reverse double-for loop and checks every interaction */ - for (int j = 0; j < cj->count; ++j) { + for (int j = 0; j < cj->hydro.count; ++j) { - pj = &cj->parts[j]; + pj = &cj->hydro.parts[j]; hj = pj->h; hjg2 = hj * hj * kernel_gamma2; /* Skip inactive particles. */ if (!part_is_active(pj, e)) continue; - for (int i = 0; i < ci->count; ++i) { + for (int i = 0; i < ci->hydro.count; ++i) { - pi = &ci->parts[i]; + pi = &ci->hydro.parts[i]; /* Pairwise distance */ r2 = 0.0f; for (int k = 0; k < 3; k++) { - dx[k] = cj->parts[j].x[k] - ci->parts[i].x[k]; + dx[k] = cj->hydro.parts[j].x[k] - ci->hydro.parts[i].x[k]; dx[k] = nearest(dx[k], dim[k]); r2 += dx[k] * dx[k]; } @@ -269,25 +270,25 @@ void pairs_all_force(struct runner *r, struct cell *ci, struct cell *cj) { const float H = cosmo->H; /* Implements a double-for loop and checks every interaction */ - for (int i = 0; i < ci->count; ++i) { + for (int i = 0; i < ci->hydro.count; ++i) { - pi = &ci->parts[i]; + pi = &ci->hydro.parts[i]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; /* Skip inactive particles. */ if (!part_is_active(pi, e)) continue; - for (int j = 0; j < cj->count; ++j) { + for (int j = 0; j < cj->hydro.count; ++j) { - pj = &cj->parts[j]; + pj = &cj->hydro.parts[j]; hj = pj->h; hjg2 = hj * hj * kernel_gamma2; /* Pairwise distance */ r2 = 0.0f; for (int k = 0; k < 3; k++) { - dx[k] = ci->parts[i].x[k] - cj->parts[j].x[k]; + dx[k] = ci->hydro.parts[i].x[k] - cj->hydro.parts[j].x[k]; dx[k] = nearest(dx[k], dim[k]); r2 += dx[k] * dx[k]; } @@ -302,25 +303,25 @@ void pairs_all_force(struct runner *r, struct cell *ci, struct cell *cj) { } /* Reverse double-for loop and checks every interaction */ - for (int j = 0; j < cj->count; ++j) { + for (int j = 0; j < cj->hydro.count; ++j) { - pj = &cj->parts[j]; + pj = &cj->hydro.parts[j]; hj = pj->h; hjg2 = hj * hj * kernel_gamma2; /* Skip inactive particles. */ if (!part_is_active(pj, e)) continue; - for (int i = 0; i < ci->count; ++i) { + for (int i = 0; i < ci->hydro.count; ++i) { - pi = &ci->parts[i]; + pi = &ci->hydro.parts[i]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; /* Pairwise distance */ r2 = 0.0f; for (int k = 0; k < 3; k++) { - dx[k] = cj->parts[j].x[k] - ci->parts[i].x[k]; + dx[k] = cj->hydro.parts[j].x[k] - ci->hydro.parts[i].x[k]; dx[k] = nearest(dx[k], dim[k]); r2 += dx[k] * dx[k]; } @@ -335,6 +336,77 @@ void pairs_all_force(struct runner *r, struct cell *ci, struct cell *cj) { } } +void pairs_all_stars_density(struct runner *r, struct cell *ci, + struct cell *cj) { + + float r2, dx[3]; + const double dim[3] = {r->e->s->dim[0], r->e->s->dim[1], r->e->s->dim[2]}; + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const float a = cosmo->a; + const float H = cosmo->H; + + /* Implements a double-for loop and checks every interaction */ + for (int i = 0; i < ci->stars.count; ++i) { + struct spart *spi = &ci->stars.parts[i]; + + float hi = spi->h; + float hig2 = hi * hi * kernel_gamma2; + + /* Skip inactive particles. */ + if (!spart_is_active(spi, e)) continue; + + for (int j = 0; j < cj->hydro.count; ++j) { + + struct part *pj = &cj->hydro.parts[j]; + + /* Pairwise distance */ + r2 = 0.0f; + for (int k = 0; k < 3; k++) { + dx[k] = spi->x[k] - pj->x[k]; + dx[k] = nearest(dx[k], dim[k]); + r2 += dx[k] * dx[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + /* Interact */ + runner_iact_nonsym_stars_density(r2, dx, hi, pj->h, spi, pj, a, H); + } + } + } + + /* Reverse double-for loop and checks every interaction */ + for (int j = 0; j < cj->stars.count; ++j) { + + struct spart *spj = &cj->stars.parts[j]; + float hj = spj->h; + float hjg2 = hj * hj * kernel_gamma2; + + /* Skip inactive particles. */ + if (!spart_is_active(spj, e)) continue; + + for (int i = 0; i < ci->hydro.count; ++i) { + + struct part *pi = &ci->hydro.parts[i]; + + /* Pairwise distance */ + r2 = 0.0f; + for (int k = 0; k < 3; k++) { + dx[k] = spj->x[k] - pi->x[k]; + dx[k] = nearest(dx[k], dim[k]); + r2 += dx[k] * dx[k]; + } + + /* Hit or miss? */ + if (r2 < hjg2) { + /* Interact */ + runner_iact_nonsym_stars_density(r2, dx, hj, pi->h, spj, pi, a, H); + } + } + } +} + void self_all_density(struct runner *r, struct cell *ci) { float r2, hi, hj, hig2, hjg2, dxi[3]; //, dxj[3]; struct part *pi, *pj; @@ -344,15 +416,15 @@ void self_all_density(struct runner *r, struct cell *ci) { const float H = cosmo->H; /* Implements a double-for loop and checks every interaction */ - for (int i = 0; i < ci->count; ++i) { + for (int i = 0; i < ci->hydro.count; ++i) { - pi = &ci->parts[i]; + pi = &ci->hydro.parts[i]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; - for (int j = i + 1; j < ci->count; ++j) { + for (int j = i + 1; j < ci->hydro.count; ++j) { - pj = &ci->parts[j]; + pj = &ci->hydro.parts[j]; hj = pj->h; hjg2 = hj * hj * kernel_gamma2; @@ -361,7 +433,7 @@ void self_all_density(struct runner *r, struct cell *ci) { /* Pairwise distance */ r2 = 0.0f; for (int k = 0; k < 3; k++) { - dxi[k] = ci->parts[i].x[k] - ci->parts[j].x[k]; + dxi[k] = ci->hydro.parts[i].x[k] - ci->hydro.parts[j].x[k]; r2 += dxi[k] * dxi[k]; } @@ -397,15 +469,15 @@ void self_all_force(struct runner *r, struct cell *ci) { const float H = cosmo->H; /* Implements a double-for loop and checks every interaction */ - for (int i = 0; i < ci->count; ++i) { + for (int i = 0; i < ci->hydro.count; ++i) { - pi = &ci->parts[i]; + pi = &ci->hydro.parts[i]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; - for (int j = i + 1; j < ci->count; ++j) { + for (int j = i + 1; j < ci->hydro.count; ++j) { - pj = &ci->parts[j]; + pj = &ci->hydro.parts[j]; hj = pj->h; hjg2 = hj * hj * kernel_gamma2; @@ -414,7 +486,7 @@ void self_all_force(struct runner *r, struct cell *ci) { /* Pairwise distance */ r2 = 0.0f; for (int k = 0; k < 3; k++) { - dxi[k] = ci->parts[i].x[k] - ci->parts[j].x[k]; + dxi[k] = ci->hydro.parts[i].x[k] - ci->hydro.parts[j].x[k]; r2 += dxi[k] * dxi[k]; } @@ -428,6 +500,45 @@ void self_all_force(struct runner *r, struct cell *ci) { } } +void self_all_stars_density(struct runner *r, struct cell *ci) { + float r2, hi, hj, hig2, dxi[3]; + struct spart *spi; + struct part *pj; + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const float a = cosmo->a; + const float H = cosmo->H; + + /* Implements a double-for loop and checks every interaction */ + for (int i = 0; i < ci->stars.count; ++i) { + + spi = &ci->stars.parts[i]; + hi = spi->h; + hig2 = hi * hi * kernel_gamma2; + + if (!spart_is_active(spi, e)) continue; + + for (int j = 0; j < ci->hydro.count; ++j) { + + pj = &ci->hydro.parts[j]; + hj = pj->h; + + /* Pairwise distance */ + r2 = 0.0f; + for (int k = 0; k < 3; k++) { + dxi[k] = spi->x[k] - pj->x[k]; + r2 += dxi[k] * dxi[k]; + } + + /* Hit or miss? */ + if (r2 > 0.f && r2 < hig2) { + /* Interact */ + runner_iact_nonsym_stars_density(r2, dxi, hi, hj, spi, pj, a, H); + } + } + } +} + /** * @brief Compute the force on a single particle brute-force. */ @@ -544,6 +655,23 @@ void shuffle_particles(struct part *parts, const int count) { } } +/** + * @brief Randomly shuffle an array of sparticles. + */ +void shuffle_sparticles(struct spart *sparts, const int scount) { + if (scount > 1) { + for (int i = 0; i < scount - 1; i++) { + int j = i + random_uniform(0., (double)(scount - 1 - i)); + + struct spart sparticle = sparts[j]; + + sparts[j] = sparts[i]; + + sparts[i] = sparticle; + } + } +} + /** * @brief Compares two values based on their relative difference: |a - b|/|a + * b| @@ -586,7 +714,7 @@ int compare_values(double a, double b, double threshold, double *absDiff, * * @return 1 if difference found, 0 otherwise */ -int compare_particles(struct part a, struct part b, double threshold) { +int compare_particles(struct part *a, struct part *b, double threshold) { #ifdef GADGET2_SPH @@ -594,117 +722,117 @@ int compare_particles(struct part a, struct part b, double threshold) { double absDiff = 0.0, absSum = 0.0, relDiff = 0.0; for (int k = 0; k < 3; k++) { - if (compare_values(a.x[k], b.x[k], threshold, &absDiff, &absSum, + if (compare_values(a->x[k], b->x[k], threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for x[%d] of " "particle %lld.", - relDiff, threshold, k, a.id); - message("a = %e, b = %e", a.x[k], b.x[k]); + relDiff, threshold, k, a->id); + message("a = %e, b = %e", a->x[k], b->x[k]); result = 1; } } for (int k = 0; k < 3; k++) { - if (compare_values(a.v[k], b.v[k], threshold, &absDiff, &absSum, + if (compare_values(a->v[k], b->v[k], threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for v[%d] of " "particle %lld.", - relDiff, threshold, k, a.id); - message("a = %e, b = %e", a.v[k], b.v[k]); + relDiff, threshold, k, a->id); + message("a = %e, b = %e", a->v[k], b->v[k]); result = 1; } } for (int k = 0; k < 3; k++) { - if (compare_values(a.a_hydro[k], b.a_hydro[k], threshold, &absDiff, &absSum, - &relDiff)) { + if (compare_values(a->a_hydro[k], b->a_hydro[k], threshold, &absDiff, + &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for a_hydro[%d] " "of particle %lld.", - relDiff, threshold, k, a.id); - message("a = %e, b = %e", a.a_hydro[k], b.a_hydro[k]); + relDiff, threshold, k, a->id); + message("a = %e, b = %e", a->a_hydro[k], b->a_hydro[k]); result = 1; } } - if (compare_values(a.rho, b.rho, threshold, &absDiff, &absSum, &relDiff)) { + if (compare_values(a->rho, b->rho, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for rho of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.rho, b.rho); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->rho, b->rho); result = 1; } - if (compare_values(a.density.rho_dh, b.density.rho_dh, threshold, &absDiff, + if (compare_values(a->density.rho_dh, b->density.rho_dh, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for rho_dh of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.density.rho_dh, b.density.rho_dh); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->density.rho_dh, b->density.rho_dh); result = 1; } - if (compare_values(a.density.wcount, b.density.wcount, threshold, &absDiff, + if (compare_values(a->density.wcount, b->density.wcount, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for wcount of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.density.wcount, b.density.wcount); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->density.wcount, b->density.wcount); result = 1; } - if (compare_values(a.density.wcount_dh, b.density.wcount_dh, threshold, + if (compare_values(a->density.wcount_dh, b->density.wcount_dh, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for wcount_dh of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.density.wcount_dh, b.density.wcount_dh); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->density.wcount_dh, b->density.wcount_dh); result = 1; } - if (compare_values(a.force.h_dt, b.force.h_dt, threshold, &absDiff, &absSum, + if (compare_values(a->force.h_dt, b->force.h_dt, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for h_dt of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.force.h_dt, b.force.h_dt); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->force.h_dt, b->force.h_dt); result = 1; } - if (compare_values(a.force.v_sig, b.force.v_sig, threshold, &absDiff, &absSum, - &relDiff)) { + if (compare_values(a->force.v_sig, b->force.v_sig, threshold, &absDiff, + &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for v_sig of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.force.v_sig, b.force.v_sig); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->force.v_sig, b->force.v_sig); result = 1; } - if (compare_values(a.entropy_dt, b.entropy_dt, threshold, &absDiff, &absSum, + if (compare_values(a->entropy_dt, b->entropy_dt, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for entropy_dt of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.entropy_dt, b.entropy_dt); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->entropy_dt, b->entropy_dt); result = 1; } - if (compare_values(a.density.div_v, b.density.div_v, threshold, &absDiff, + if (compare_values(a->density.div_v, b->density.div_v, threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for div_v of " "particle %lld.", - relDiff, threshold, a.id); - message("a = %e, b = %e", a.density.div_v, b.density.div_v); + relDiff, threshold, a->id); + message("a = %e, b = %e", a->density.div_v, b->density.div_v); result = 1; } for (int k = 0; k < 3; k++) { - if (compare_values(a.density.rot_v[k], b.density.rot_v[k], threshold, + if (compare_values(a->density.rot_v[k], b->density.rot_v[k], threshold, &absDiff, &absSum, &relDiff)) { message( "Relative difference (%e) larger than tolerance (%e) for rot_v[%d] " "of particle %lld.", - relDiff, threshold, k, a.id); - message("a = %e, b = %e", a.density.rot_v[k], b.density.rot_v[k]); + relDiff, threshold, k, a->id); + message("a = %e, b = %e", a->density.rot_v[k], b->density.rot_v[k]); result = 1; } } diff --git a/src/tools.h b/src/tools.h index 25d024679174eabbe89908c0254651e4bbc69e15..22eba1519ebf80673cb2d8540791e6b4d092bab0 100644 --- a/src/tools.h +++ b/src/tools.h @@ -40,17 +40,21 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj); void self_all_density(struct runner *r, struct cell *ci); void pairs_all_force(struct runner *r, struct cell *ci, struct cell *cj); void self_all_force(struct runner *r, struct cell *ci); +void pairs_all_stars_density(struct runner *r, struct cell *ci, + struct cell *cj); +void self_all_stars_density(struct runner *r, struct cell *ci); void pairs_n2(double *dim, struct part *restrict parts, int N, int periodic); double random_uniform(double a, double b); void shuffle_particles(struct part *parts, const int count); +void shuffle_sparticles(struct spart *sparts, const int scount); void gravity_n2(struct gpart *gparts, const int gcount, const struct phys_const *constants, const struct gravity_props *gravity_properties, float rlr); int compare_values(double a, double b, double threshold, double *absDiff, double *absSum, double *relDiff); -int compare_particles(struct part a, struct part b, double threshold); +int compare_particles(struct part *a, struct part *b, double threshold); long get_maxrss(void); diff --git a/src/units.c b/src/units.c index 04e74bc4d7040ed1bde73184b125eec5d8a7fe97..066e524da0765840130065b626db63de2793c330 100644 --- a/src/units.c +++ b/src/units.c @@ -253,6 +253,7 @@ void units_get_base_unit_exponants_array(float baseUnitsExp[5], break; case UNIT_CONV_SPEED: + case UNIT_CONV_VELOCITY: baseUnitsExp[UNIT_LENGTH] = 1.f; baseUnitsExp[UNIT_TIME] = -1.f; break; @@ -370,6 +371,7 @@ void units_get_base_unit_exponants_array(float baseUnitsExp[5], break; case UNIT_CONV_INV_VOLUME: + case UNIT_CONV_NUMBER_DENSITY: baseUnitsExp[UNIT_LENGTH] = -3.f; break; @@ -490,6 +492,10 @@ float units_general_a_factor(const struct unit_system* us, /** * @brief Returns a string containing the exponents of the base units making up * the conversion factors (expressed in terms of the 5 fundamental units) + * + * Note that in accordance with the SWIFT philosphy, there are no h-factors + * in any units and hence in the string returned here. + * * @param buffer The buffer in which to write (The buffer must be long enough, * 140 chars at most) * @param us The UnitsSystem in use. @@ -501,7 +507,7 @@ void units_general_cgs_conversion_string(char* buffer, const float baseUnitsExponants[5]) { char temp[20]; const double a_exp = units_general_a_factor(us, baseUnitsExponants); - const double h_exp = units_general_h_factor(us, baseUnitsExponants); + const double h_exp = 0.; /* There are no h-factors in SWIFT outputs. */ /* Check whether we are unitless or not */ char isAllNonZero = 1; diff --git a/src/units.h b/src/units.h index 08b738c5303db8b40dfbe51799d67da8df3936ce..259cf73e1024c8ca605ced136e27a82cbb8a7a5e 100644 --- a/src/units.h +++ b/src/units.h @@ -71,7 +71,9 @@ enum unit_conversion_factor { UNIT_CONV_LENGTH, UNIT_CONV_TIME, UNIT_CONV_DENSITY, + UNIT_CONV_NUMBER_DENSITY, UNIT_CONV_SPEED, + UNIT_CONV_VELOCITY, UNIT_CONV_ACCELERATION, UNIT_CONV_POTENTIAL, UNIT_CONV_FORCE, diff --git a/src/velociraptor_dummy.c b/src/velociraptor_dummy.c new file mode 100644 index 0000000000000000000000000000000000000000..8f14a3230d341993122f09f2bccf3d8232550fd9 --- /dev/null +++ b/src/velociraptor_dummy.c @@ -0,0 +1,54 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <stddef.h> + +/* Local includes. */ +#include "error.h" +#include "swift_velociraptor_part.h" +#include "velociraptor_interface.h" + +/* Dummy VELOCIraptor interface for testing compilation without linking the + * actual VELOCIraptor library. */ +#ifdef HAVE_DUMMY_VELOCIRAPTOR +struct cosmoinfo {}; +struct unitinfo {}; +struct cell_loc {}; +struct siminfo {}; + +int InitVelociraptor(char *config_name, char *output_name, + struct cosmoinfo cosmo_info, struct unitinfo unit_info, + struct siminfo sim_info) { + + error("This is only a dummy. Call the real one!"); + return 0; +} +int InvokeVelociraptor(const size_t num_gravity_parts, + const size_t num_hydro_parts, + struct swift_vel_part *swift_parts, + const int *cell_node_ids, char *output_name) { + + error("This is only a dummy. Call the real one!"); + return 0; +} +#endif /* HAVE_DUMMY_VELOCIRAPTOR */ diff --git a/src/velociraptor_interface.c b/src/velociraptor_interface.c index d7331ce49f102f52adafff1364dce173fc247586..7756fe4b937986c108d223c56183f7d31cdfaa98 100644 --- a/src/velociraptor_interface.c +++ b/src/velociraptor_interface.c @@ -35,6 +35,79 @@ #ifdef HAVE_VELOCIRAPTOR +/* Structure for passing cosmological information to VELOCIraptor. */ +struct cosmoinfo { + + /*! Current expansion factor of the Universe. (cosmology.a) */ + double atime; + + /*! Reduced Hubble constant (H0 / (100km/s/Mpc) (cosmology.h) */ + double littleh; + + /*! Matter density parameter (cosmology.Omega_m) */ + double Omega_m; + + /*! Baryon density parameter (cosmology.Omega_b) */ + double Omega_b; + + /*! Radiation constant density parameter (cosmology.Omega_lambda) */ + double Omega_Lambda; + + /*! Dark matter density parameter (cosmology.Omega_m - cosmology.Omega_b) */ + double Omega_cdm; + + /*! Dark-energy equation of state at the current time (cosmology.w)*/ + double w_de; +}; + +/* Structure for passing unit information to VELOCIraptor. */ +struct unitinfo { + + /* Length conversion factor to kpc. */ + double lengthtokpc; + + /* Velocity conversion factor to km/s. */ + double velocitytokms; + + /* Mass conversion factor to solar masses. */ + double masstosolarmass; + + /* Potential conversion factor. */ + double energyperunitmass; + + /*! Newton's gravitationl constant (phys_const.const_newton_G)*/ + double gravity; + + /*! Hubble constant at the current redshift (cosmology.H) */ + double hubbleunit; +}; + +/* Structure to hold the location of a top-level cell. */ +struct cell_loc { + + /* Coordinates x,y,z */ + double loc[3]; +}; + +/* Structure for passing simulation information to VELOCIraptor. */ +struct siminfo { + double period, zoomhigresolutionmass, interparticlespacing, spacedimension[3]; + + /* Number of top-cells. */ + int numcells; + + /*! Locations of top-level cells. */ + struct cell_loc *cell_loc; + + /*! Top-level cell width. */ + double cellwidth[3]; + + /*! Inverse of the top-level cell width. */ + double icellwidth[3]; + + int icosmologicalsim; +}; + /* VELOCIraptor interface. */ int InitVelociraptor(char *config_name, char *output_name, struct cosmoinfo cosmo_info, struct unitinfo unit_info, @@ -185,10 +258,12 @@ void velociraptor_invoke(struct engine *e) { struct space *s = e->s; struct gpart *gparts = s->gparts; struct part *parts = s->parts; + struct xpart *xparts = s->xparts; const size_t nr_gparts = s->nr_gparts; const size_t nr_hydro_parts = s->nr_parts; const int nr_cells = s->nr_cells; int *cell_node_ids = NULL; + static int stf_output_count = 0; /* Allow thread to run on any core for the duration of the call to * VELOCIraptor so that @@ -218,12 +293,12 @@ void velociraptor_invoke(struct engine *e) { /* Append base name with either the step number or time depending on what * format is specified in the parameter file. */ char outputFileName[PARSER_MAX_LINE_SIZE + 128]; - if (e->stf_output_freq_format == STEPS) { + if (e->stf_output_freq_format == io_stf_steps) { snprintf(outputFileName, PARSER_MAX_LINE_SIZE + 128, "%s_%04i.VELOCIraptor", e->stfBaseName, e->step); - } else if (e->stf_output_freq_format == TIME) { - snprintf(outputFileName, PARSER_MAX_LINE_SIZE + 128, "%s_%04e.VELOCIraptor", - e->stfBaseName, e->time); + } else if (e->stf_output_freq_format == io_stf_time) { + snprintf(outputFileName, PARSER_MAX_LINE_SIZE + 128, "%s_%04i.VELOCIraptor", + e->stfBaseName, stf_output_count); } /* Allocate and populate an array of swift_vel_parts to be passed to @@ -260,7 +335,8 @@ void velociraptor_invoke(struct engine *e) { swift_parts[i].id = parts[-gparts[i].id_or_neg_offset].id; swift_parts[i].u = hydro_get_physical_internal_energy( - &parts[-gparts[i].id_or_neg_offset], e->cosmology) * + &parts[-gparts[i].id_or_neg_offset], + &xparts[-gparts[i].id_or_neg_offset], e->cosmology) * energy_scale; } else if (gparts[i].type == swift_type_dark_matter) { swift_parts[i].id = gparts[i].id_or_neg_offset; @@ -282,6 +358,8 @@ void velociraptor_invoke(struct engine *e) { free(cell_node_ids); free(swift_parts); + stf_output_count++; + message("VELOCIraptor took %.3f %s on rank %d.", clocks_from_ticks(getticks() - tic), clocks_getunit(), engine_rank); #else diff --git a/src/velociraptor_interface.h b/src/velociraptor_interface.h index 0f6b8d339471f4bb1409baae62475a74e68cb5b1..1f29be11c9dd8e267c87201b0a438979fec3775b 100644 --- a/src/velociraptor_interface.h +++ b/src/velociraptor_interface.h @@ -22,81 +22,16 @@ /* Config parameters. */ #include "../config.h" -/* Forward declaration */ -struct engine; - -/* Structure for passing cosmological information to VELOCIraptor. */ -struct cosmoinfo { - - /*! Current expansion factor of the Universe. (cosmology.a) */ - double atime; - - /*! Reduced Hubble constant (H0 / (100km/s/Mpc) (cosmology.h) */ - double littleh; - - /*! Matter density parameter (cosmology.Omega_m) */ - double Omega_m; - - /*! Baryon density parameter (cosmology.Omega_b) */ - double Omega_b; - - /*! Radiation constant density parameter (cosmology.Omega_lambda) */ - double Omega_Lambda; - - /*! Dark matter density parameter (cosmology.Omega_m - cosmology.Omega_b) */ - double Omega_cdm; - - /*! Dark-energy equation of state at the current time (cosmology.w)*/ - double w_de; +/** + * @brief The different formats for when to run structure finding. + */ +enum io_stf_output_format { + io_stf_steps = 0, /*!< Output every N steps */ + io_stf_time /*!< Output at fixed time intervals */ }; -/* Structure for passing unit information to VELOCIraptor. */ -struct unitinfo { - - /* Length conversion factor to kpc. */ - double lengthtokpc; - - /* Velocity conversion factor to km/s. */ - double velocitytokms; - - /* Mass conversion factor to solar masses. */ - double masstosolarmass; - - /* Potential conversion factor. */ - double energyperunitmass; - - /*! Newton's gravitationl constant (phys_const.const_newton_G)*/ - double gravity; - - /*! Hubble constant at the current redshift (cosmology.H) */ - double hubbleunit; -}; - -/* Structure to hold the location of a top-level cell. */ -struct cell_loc { - - /* Coordinates x,y,z */ - double loc[3]; -}; - -/* Structure for passing simulation information to VELOCIraptor. */ -struct siminfo { - double period, zoomhigresolutionmass, interparticlespacing, spacedimension[3]; - - /* Number of top-cells. */ - int numcells; - - /*! Locations of top-level cells. */ - struct cell_loc *cell_loc; - - /*! Top-level cell width. */ - double cellwidth[3]; - - /*! Inverse of the top-level cell width. */ - double icellwidth[3]; - - int icosmologicalsim; -}; +/* Forward declaration */ +struct engine; /* VELOCIraptor wrapper functions. */ void velociraptor_init(struct engine *e); diff --git a/src/version.c b/src/version.c index 69f70b9aec3549c061c162f2ce183f8fafcc2e9f..6fe8c38fc22f3c06fae42adbba83a65aff208bb9 100644 --- a/src/version.c +++ b/src/version.c @@ -27,6 +27,9 @@ #ifdef HAVE_METIS #include <metis.h> #endif +#ifdef HAVE_PARMETIS +#include <parmetis.h> +#endif #endif #ifdef HAVE_HDF5 @@ -320,6 +323,23 @@ const char *metis_version(void) { return version; } +/** + * @brief return the ParMETIS version used when SWIFT was built. + * + * @result description of the ParMETIS version. + */ +const char *parmetis_version(void) { + + static char version[256] = {0}; +#if defined(WITH_MPI) && defined(HAVE_PARMETIS) + sprintf(version, "%i.%i.%i", PARMETIS_MAJOR_VERSION, PARMETIS_MINOR_VERSION, + PARMETIS_SUBMINOR_VERSION); +#else + sprintf(version, "Unknown version"); +#endif + return version; +} + /** * @brief return the FFTW version used when SWIFT was built. * @@ -424,6 +444,9 @@ void greetings(void) { #ifdef HAVE_METIS printf(" METIS library version: %s\n", metis_version()); #endif +#ifdef HAVE_PARMETIS + printf(" ParMETIS library version: %s\n", parmetis_version()); +#endif #endif printf("\n"); } diff --git a/src/version.h b/src/version.h index 44119b6a3bbdf57c3f0195bae5ff329d05c61fd5..75a371bd9e47b19c1887b556accd486da50d9cea 100644 --- a/src/version.h +++ b/src/version.h @@ -32,6 +32,7 @@ const char* compiler_name(void); const char* compiler_version(void); const char* mpi_version(void); const char* metis_version(void); +const char* parmetis_version(void); const char* hdf5_version(void); const char* fftw3_version(void); const char* libgsl_version(void); diff --git a/tests/Makefile.am b/tests/Makefile.am index ae234581228b2ea6035af845292e9cc22d6bcaa8..7a0242437b8496d8c8756b1bccd2abb4d991262f 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -20,7 +20,7 @@ AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(GSL_LIBS) $(PROFILER_LIBS) # List of programs and scripts to run in the test suite -TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetry \ +TESTS = testGreetings testMaths testReading.sh testSingle testKernel \ testActivePair.sh test27cells.sh test27cellsPerturbed.sh \ testParser.sh testSPHStep test125cells.sh test125cellsPerturbed.sh testFFT \ testAdiabaticIndex \ @@ -28,7 +28,8 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr testVoronoi1D testVoronoi2D testVoronoi3D testGravityDerivatives \ testPeriodicBC.sh testPeriodicBCPerturbed.sh testPotentialSelf \ testPotentialPair testEOS testUtilities testSelectOutput.sh \ - testCbrt testCosmology testOutputList + testCbrt testCosmology testOutputList testFormat.sh \ + test27cellsStars.sh test27cellsStarsPerturbed.sh # List of test programs to compile check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ @@ -39,7 +40,8 @@ check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testRiemannHLLC testMatrixInversion testDump testLogger \ testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \ testGravityDerivatives testPotentialSelf testPotentialPair testEOS testUtilities \ - testSelectOutput testCbrt testCosmology testOutputList + testSelectOutput testCbrt testCosmology testOutputList test27cellsStars \ + test27cellsStars_subset testCooling # Rebuild tests when SWIFT is updated. $(check_PROGRAMS): ../src/.libs/libswiftsim.a @@ -76,6 +78,12 @@ test27cells_subset_SOURCES = test27cells.c test27cells_subset_CFLAGS = $(AM_CFLAGS) -DTEST_DOSELF_SUBSET -DTEST_DOPAIR_SUBSET +test27cellsStars_SOURCES = test27cellsStars.c + +test27cellsStars_subset_SOURCES = test27cellsStars.c + +test27cellsStars_subset_CFLAGS = $(AM_CFLAGS) -DTEST_DOSELF_SUBSET -DTEST_DOPAIR_SUBSET + testPeriodicBC_SOURCES = testPeriodicBC.c test125cells_SOURCES = test125cells.c @@ -120,6 +128,8 @@ testEOS_SOURCES = testEOS.c testUtilities_SOURCES = testUtilities.c +testCooling_SOURCES = testCooling.c + # Files necessary for distribution EXTRA_DIST = testReading.sh makeInput.py testActivePair.sh \ test27cells.sh test27cellsPerturbed.sh testParser.sh testPeriodicBC.sh \ @@ -130,4 +140,6 @@ EXTRA_DIST = testReading.sh makeInput.py testActivePair.sh \ fft_params.yml tolerance_periodic_BC_normal.dat tolerance_periodic_BC_perturbed.dat \ testEOS.sh testEOS_plot.sh testSelectOutput.sh selectOutput.yml \ output_list_params.yml output_list_time.txt output_list_redshift.txt \ - output_list_scale_factor.txt + output_list_scale_factor.txt testEOS.sh testEOS_plot.sh \ + test27cellsStars.sh test27cellsStarsPerturbed.sh star_tolerance_27_normal.dat \ + star_tolerance_27_perturbed.dat star_tolerance_27_perturbed_h.dat star_tolerance_27_perturbed_h2.dat diff --git a/tests/logger.yml b/tests/logger.yml new file mode 100644 index 0000000000000000000000000000000000000000..eaf8731f0e09df40b891c7b57be35cd9e14fc5cc --- /dev/null +++ b/tests/logger.yml @@ -0,0 +1,5 @@ +# Parameters governing the logger snapshot system +Logger: + delta_step: 10 # (Optional) Update the particle log every this many updates + initial_buffer_size: .1 # buffer size in GB + basename: indice # Common part of the filenames diff --git a/tests/star_tolerance_27_normal.dat b/tests/star_tolerance_27_normal.dat new file mode 100644 index 0000000000000000000000000000000000000000..c243da2bcd9f5177ab471b2b3e622bdb1ee677d4 --- /dev/null +++ b/tests/star_tolerance_27_normal.dat @@ -0,0 +1,4 @@ +# ID pos_x pos_y pos_z wcount wcount_dh + 0 1e-6 1e-6 1e-6 4e-4 1.2e-2 + 0 1e-6 1e-6 1e-6 1e-4 2e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 diff --git a/tests/star_tolerance_27_perturbed.dat b/tests/star_tolerance_27_perturbed.dat new file mode 100644 index 0000000000000000000000000000000000000000..9e6886834e9a793d37dfe77c9713697cc7f6f606 --- /dev/null +++ b/tests/star_tolerance_27_perturbed.dat @@ -0,0 +1,4 @@ +# ID pos_x pos_y pos_z wcount wcount_dh + 0 1e-6 1e-6 1e-6 2e-4 1e-2 + 0 1e-6 1e-6 1e-6 1e-5 2.4e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-2 diff --git a/tests/star_tolerance_27_perturbed_h.dat b/tests/star_tolerance_27_perturbed_h.dat new file mode 100644 index 0000000000000000000000000000000000000000..20367e6f09ac171cad17ab5418304bd5674e78d6 --- /dev/null +++ b/tests/star_tolerance_27_perturbed_h.dat @@ -0,0 +1,4 @@ +# ID pos_x pos_y pos_z wcount wcount_dh + 0 1e-6 1e-6 1e-6 5e-4 1.4e-2 + 0 1e-6 1e-6 1e-6 1e-5 4e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e0 diff --git a/tests/star_tolerance_27_perturbed_h2.dat b/tests/star_tolerance_27_perturbed_h2.dat new file mode 100644 index 0000000000000000000000000000000000000000..fe89f21dd2fe37360bc0e3a2c5431528075bf2e5 --- /dev/null +++ b/tests/star_tolerance_27_perturbed_h2.dat @@ -0,0 +1,4 @@ +# ID pos_x pos_y pos_z wcount wcount_dh + 0 1e-6 1e-6 1e-6 5e-4 1.5e-2 + 0 1e-6 1e-6 1e-6 1e-5 5.86e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e0 diff --git a/tests/test125cells.c b/tests/test125cells.c index 70af9dfb19aeee812fdc90733a604f795b7f478f..5a9c4ea9511b5d75a3098f7997b83607cdcbd715 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -31,19 +31,12 @@ #include "swift.h" #if defined(WITH_VECTORIZATION) -#define DOSELF2 runner_doself2_force_vec -#define DOPAIR2 runner_dopair2_branch_force #define DOSELF2_NAME "runner_doself2_force_vec" #define DOPAIR2_NAME "runner_dopair2_force_vec" #endif -#ifndef DOSELF2 -#define DOSELF2 runner_doself2_force +#ifndef DOSELF2_NAME #define DOSELF2_NAME "runner_doself2_density" -#endif - -#ifndef DOPAIR2 -#define DOPAIR2 runner_dopair2_branch_force #define DOPAIR2_NAME "runner_dopair2_force" #endif @@ -118,9 +111,10 @@ void set_energy_state(struct part *part, enum pressure_field press, float size, part->entropy = pressure / pow_gamma(density); #elif defined(DEFAULT_SPH) part->u = pressure / (hydro_gamma_minus_one * density); -#elif defined(MINIMAL_SPH) || defined(HOPKINS_PU_SPH) +#elif defined(MINIMAL_SPH) || defined(HOPKINS_PU_SPH) || \ + defined(HOPKINS_PU_SPH_MONAGHAN) part->u = pressure / (hydro_gamma_minus_one * density); -#elif defined(MINIMAL_MULTI_MAT_SPH) +#elif defined(PLANETARY_SPH) part->u = pressure / (hydro_gamma_minus_one * density); #elif defined(GIZMO_MFV_SPH) || defined(SHADOWFAX_SPH) part->primitives.P = pressure; @@ -152,19 +146,19 @@ void get_solution(const struct cell *main_cell, struct solution_part *solution, float density, enum velocity_field vel, enum pressure_field press, float size) { - for (int i = 0; i < main_cell->count; ++i) { + for (int i = 0; i < main_cell->hydro.count; ++i) { - solution[i].id = main_cell->parts[i].id; + solution[i].id = main_cell->hydro.parts[i].id; - solution[i].x[0] = main_cell->parts[i].x[0]; - solution[i].x[1] = main_cell->parts[i].x[1]; - solution[i].x[2] = main_cell->parts[i].x[2]; + solution[i].x[0] = main_cell->hydro.parts[i].x[0]; + solution[i].x[1] = main_cell->hydro.parts[i].x[1]; + solution[i].x[2] = main_cell->hydro.parts[i].x[2]; - solution[i].v[0] = main_cell->parts[i].v[0]; - solution[i].v[1] = main_cell->parts[i].v[1]; - solution[i].v[2] = main_cell->parts[i].v[2]; + solution[i].v[0] = main_cell->hydro.parts[i].v[0]; + solution[i].v[1] = main_cell->hydro.parts[i].v[1]; + solution[i].v[2] = main_cell->hydro.parts[i].v[2]; - solution[i].h = main_cell->parts[i].h; + solution[i].h = main_cell->hydro.parts[i].h; solution[i].rho = density; @@ -213,9 +207,9 @@ void reset_particles(struct cell *c, struct hydro_space *hs, enum velocity_field vel, enum pressure_field press, float size, float density) { - for (int i = 0; i < c->count; ++i) { + for (int i = 0; i < c->hydro.count; ++i) { - struct part *p = &c->parts[i]; + struct part *p = &c->hydro.parts[i]; set_velocity(p, vel, size); set_energy_state(p, press, size, density); @@ -272,20 +266,20 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, struct cell *cell = (struct cell *)malloc(sizeof(struct cell)); bzero(cell, sizeof(struct cell)); - if (posix_memalign((void **)&cell->parts, part_align, + if (posix_memalign((void **)&cell->hydro.parts, part_align, count * sizeof(struct part)) != 0) error("couldn't allocate particles, no. of particles: %d", (int)count); - if (posix_memalign((void **)&cell->xparts, xpart_align, + if (posix_memalign((void **)&cell->hydro.xparts, xpart_align, count * sizeof(struct xpart)) != 0) error("couldn't allocate particles, no. of x-particles: %d", (int)count); - bzero(cell->parts, count * sizeof(struct part)); - bzero(cell->xparts, count * sizeof(struct xpart)); + bzero(cell->hydro.parts, count * sizeof(struct part)); + bzero(cell->hydro.xparts, count * sizeof(struct xpart)); float h_max = 0.f; /* Construct the parts */ - struct part *part = cell->parts; - struct xpart *xpart = cell->xparts; + struct part *part = cell->hydro.parts; + struct xpart *xpart = cell->hydro.xparts; for (size_t x = 0; x < n; ++x) { for (size_t y = 0; y < n; ++y) { for (size_t z = 0; z < n; ++z) { @@ -346,11 +340,11 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, /* Cell properties */ cell->split = 0; - cell->h_max = h_max; - cell->count = count; - cell->gcount = 0; - cell->dx_max_part = 0.; - cell->dx_max_sort = 0.; + cell->hydro.h_max = h_max; + cell->hydro.count = count; + cell->grav.count = 0; + cell->hydro.dx_max_part = 0.; + cell->hydro.dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; cell->width[2] = size; @@ -358,24 +352,24 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, cell->loc[1] = offset[1]; cell->loc[2] = offset[2]; - cell->ti_old_part = 8; - cell->ti_hydro_end_min = 8; - cell->ti_hydro_end_max = 8; + cell->hydro.ti_old_part = 8; + cell->hydro.ti_end_min = 8; + cell->hydro.ti_end_max = 8; cell->nodeID = NODE_ID; - // shuffle_particles(cell->parts, cell->count); + // shuffle_particles(cell->hydro.parts, cell->hydro.count); - cell->sorted = 0; - for (int k = 0; k < 13; k++) cell->sort[k] = NULL; + cell->hydro.sorted = 0; + for (int k = 0; k < 13; k++) cell->hydro.sort[k] = NULL; return cell; } void clean_up(struct cell *ci) { - free(ci->parts); - free(ci->xparts); + free(ci->hydro.parts); + free(ci->hydro.xparts); for (int k = 0; k < 13; k++) - if (ci->sort[k] != NULL) free(ci->sort[k]); + if (ci->hydro.sort[k] != NULL) free(ci->hydro.sort[k]); free(ci); } @@ -397,37 +391,42 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, fprintf(file, "# Main cell --------------------------------------------\n"); /* Write main cell */ - for (int pid = 0; pid < main_cell->count; pid++) { + for (int pid = 0; pid < main_cell->hydro.count; pid++) { fprintf(file, "%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f " "%8.5f " "%8.5f %8.5f %13e %13e %13e %13e %13e %8.5f %8.5f\n", - main_cell->parts[pid].id, main_cell->parts[pid].x[0], - main_cell->parts[pid].x[1], main_cell->parts[pid].x[2], - main_cell->parts[pid].v[0], main_cell->parts[pid].v[1], - main_cell->parts[pid].v[2], main_cell->parts[pid].h, - hydro_get_comoving_density(&main_cell->parts[pid]), -#if defined(MINIMAL_SPH) || defined(MINIMAL_MULTI_MAT_SPH) || \ - defined(GIZMO_MFV_SPH) || defined(SHADOWFAX_SPH) || \ - defined(HOPKINS_PU_SPH) + main_cell->hydro.parts[pid].id, main_cell->hydro.parts[pid].x[0], + main_cell->hydro.parts[pid].x[1], main_cell->hydro.parts[pid].x[2], + main_cell->hydro.parts[pid].v[0], main_cell->hydro.parts[pid].v[1], + main_cell->hydro.parts[pid].v[2], main_cell->hydro.parts[pid].h, + hydro_get_comoving_density(&main_cell->hydro.parts[pid]), +#if defined(MINIMAL_SPH) || defined(PLANETARY_SPH) || \ + defined(GIZMO_MFV_SPH) || defined(SHADOWFAX_SPH) || \ + defined(HOPKINS_PU_SPH) || defined(HOPKINS_PU_SPH_MONAGHAN) 0.f, #else - main_cell->parts[pid].density.div_v, + main_cell->hydro.parts[pid].density.div_v, #endif - hydro_get_comoving_entropy(&main_cell->parts[pid]), - hydro_get_comoving_internal_energy(&main_cell->parts[pid]), - hydro_get_comoving_pressure(&main_cell->parts[pid]), - hydro_get_comoving_soundspeed(&main_cell->parts[pid]), - main_cell->parts[pid].a_hydro[0], main_cell->parts[pid].a_hydro[1], - main_cell->parts[pid].a_hydro[2], main_cell->parts[pid].force.h_dt, + hydro_get_drifted_comoving_entropy(&main_cell->hydro.parts[pid]), + hydro_get_drifted_comoving_internal_energy( + &main_cell->hydro.parts[pid]), + hydro_get_comoving_pressure(&main_cell->hydro.parts[pid]), + hydro_get_comoving_soundspeed(&main_cell->hydro.parts[pid]), + main_cell->hydro.parts[pid].a_hydro[0], + main_cell->hydro.parts[pid].a_hydro[1], + main_cell->hydro.parts[pid].a_hydro[2], + main_cell->hydro.parts[pid].force.h_dt, #if defined(GADGET2_SPH) - main_cell->parts[pid].force.v_sig, main_cell->parts[pid].entropy_dt, - 0.f + main_cell->hydro.parts[pid].force.v_sig, + main_cell->hydro.parts[pid].entropy_dt, 0.f #elif defined(DEFAULT_SPH) - main_cell->parts[pid].force.v_sig, 0.f, - main_cell->parts[pid].force.u_dt -#elif defined(MINIMAL_SPH) || defined(HOPKINS_PU_SPH) - main_cell->parts[pid].force.v_sig, 0.f, main_cell->parts[pid].u_dt + main_cell->hydro.parts[pid].force.v_sig, 0.f, + main_cell->hydro.parts[pid].force.u_dt +#elif defined(MINIMAL_SPH) || defined(HOPKINS_PU_SPH) || \ + defined(HOPKINS_PU_SPH_MONAGHAN) + main_cell->hydro.parts[pid].force.v_sig, 0.f, + main_cell->hydro.parts[pid].u_dt #else 0.f, 0.f, 0.f #endif @@ -438,7 +437,7 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, fprintf(file, "# Solution ---------------------------------------------\n"); - for (int pid = 0; pid < main_cell->count; pid++) { + for (int pid = 0; pid < main_cell->hydro.count; pid++) { fprintf(file, "%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f " "%8.5f %8.5f " @@ -586,6 +585,7 @@ int main(int argc, char *argv[]) { prog_const.const_newton_G = 1.f; struct hydro_props hp; + hydro_props_init_no_hydro(&hp); hp.eta_neighbours = h; hp.h_tolerance = 1e0; hp.h_max = FLT_MAX; @@ -640,7 +640,7 @@ int main(int argc, char *argv[]) { /* Construct the real solution */ struct solution_part *solution = (struct solution_part *)malloc( - main_cell->count * sizeof(struct solution_part)); + main_cell->hydro.count * sizeof(struct solution_part)); get_solution(main_cell, solution, rho, vel, press, size); ticks timings[27]; @@ -657,22 +657,21 @@ int main(int argc, char *argv[]) { /* Reset particles. */ for (int i = 0; i < 125; ++i) { - for (int pid = 0; pid < cells[i]->count; ++pid) - hydro_init_part(&cells[i]->parts[pid], &space.hs); + for (int pid = 0; pid < cells[i]->hydro.count; ++pid) + hydro_init_part(&cells[i]->hydro.parts[pid], &space.hs); } /* First, sort stuff */ for (int j = 0; j < 125; ++j) - runner_do_sort(&runner, cells[j], 0x1FFF, 0, 0); + runner_do_hydro_sort(&runner, cells[j], 0x1FFF, 0, 0); -/* Do the density calculation */ -#if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) + /* Do the density calculation */ /* Initialise the particle cache. */ #ifdef WITH_VECTORIZATION runner.ci_cache.count = 0; - cache_init(&runner.ci_cache, 512); runner.cj_cache.count = 0; + cache_init(&runner.ci_cache, 512); cache_init(&runner.cj_cache, 512); #endif @@ -710,18 +709,15 @@ int main(int argc, char *argv[]) { for (int j = 0; j < 27; ++j) runner_doself1_density(&runner, inner_cells[j]); -#endif - /* Ghost to finish everything on the central cells */ for (int j = 0; j < 27; ++j) runner_do_ghost(&runner, inner_cells[j], 0); -/* Do the force calculation */ -#if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) + /* Do the force calculation */ #ifdef WITH_VECTORIZATION /* Initialise the cache. */ - runner.ci_cache.count = 0; - runner.cj_cache.count = 0; + cache_clean(&runner.ci_cache); + cache_clean(&runner.cj_cache); cache_init(&runner.ci_cache, 512); cache_init(&runner.cj_cache, 512); #endif @@ -738,7 +734,7 @@ int main(int argc, char *argv[]) { const ticks sub_tic = getticks(); - DOPAIR2(&runner, main_cell, cj); + runner_dopair2_branch_force(&runner, main_cell, cj); timings[ctr++] += getticks() - sub_tic; } @@ -749,10 +745,9 @@ int main(int argc, char *argv[]) { ticks self_tic = getticks(); /* And now the self-interaction for the main cell */ - DOSELF2(&runner, main_cell); + runner_doself2_force(&runner, main_cell); timings[26] += getticks() - self_tic; -#endif /* Finally, give a gentle kick */ runner_do_end_force(&runner, main_cell, 0); @@ -767,8 +762,8 @@ int main(int argc, char *argv[]) { } for (int i = 0; i < 125; ++i) { - for (int pid = 0; pid < cells[i]->count; ++pid) - hydro_init_part(&cells[i]->parts[pid], &space.hs); + for (int pid = 0; pid < cells[i]->hydro.count; ++pid) + hydro_init_part(&cells[i]->hydro.parts[pid], &space.hs); } } @@ -798,18 +793,17 @@ int main(int argc, char *argv[]) { const ticks tic = getticks(); -/* Kick the central cell */ -// runner_do_kick1(&runner, main_cell, 0); + /* Kick the central cell */ + // runner_do_kick1(&runner, main_cell, 0); -/* And drift it */ -// runner_do_drift_particles(&runner, main_cell, 0); + /* And drift it */ + // runner_do_drift_particles(&runner, main_cell, 0); -/* Initialise the particles */ -// for (int j = 0; j < 125; ++j) runner_do_drift_particles(&runner, cells[j], -// 0); + /* Initialise the particles */ + // for (int j = 0; j < 125; ++j) runner_do_drift_particles(&runner, cells[j], + // 0); -/* Do the density calculation */ -#if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) + /* Do the density calculation */ /* Run all the pairs (only once !)*/ for (int i = 0; i < 5; i++) { @@ -844,13 +838,10 @@ int main(int argc, char *argv[]) { /* And now the self-interaction for the central cells*/ for (int j = 0; j < 27; ++j) self_all_density(&runner, inner_cells[j]); -#endif - /* Ghost to finish everything on the central cells */ for (int j = 0; j < 27; ++j) runner_do_ghost(&runner, inner_cells[j], 0); -/* Do the force calculation */ -#if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) + /* Do the force calculation */ /* Do the pairs (for the central 27 cells) */ for (int i = 1; i < 4; i++) { @@ -867,8 +858,6 @@ int main(int argc, char *argv[]) { /* And now the self-interaction for the main cell */ self_all_force(&runner, main_cell); -#endif - /* Finally, give a gentle kick */ runner_do_end_force(&runner, main_cell, 0); // runner_do_kick2(&runner, main_cell, 0); @@ -886,5 +875,10 @@ int main(int argc, char *argv[]) { for (int i = 0; i < 125; ++i) clean_up(cells[i]); free(solution); +#ifdef WITH_VECTORIZATION + cache_clean(&runner.ci_cache); + cache_clean(&runner.cj_cache); +#endif + return 0; } diff --git a/tests/test27cells.c b/tests/test27cells.c index 1ca6b2c54d901943b0cc748a2241a3a2f9ae9244..d100e2e30f0bb1452b3366ddde51dbae0575d67a 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -101,14 +101,14 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, struct cell *cell = (struct cell *)malloc(sizeof(struct cell)); bzero(cell, sizeof(struct cell)); - if (posix_memalign((void **)&cell->parts, part_align, + if (posix_memalign((void **)&cell->hydro.parts, part_align, count * sizeof(struct part)) != 0) { error("couldn't allocate particles, no. of particles: %d", (int)count); } - bzero(cell->parts, count * sizeof(struct part)); + bzero(cell->hydro.parts, count * sizeof(struct part)); /* Construct the parts */ - struct part *part = cell->parts; + struct part *part = cell->hydro.parts; for (size_t x = 0; x < n; ++x) { for (size_t y = 0; y < n; ++y) { for (size_t z = 0; z < n; ++z) { @@ -182,10 +182,10 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, /* Cell properties */ cell->split = 0; - cell->h_max = h_max; - cell->count = count; - cell->dx_max_part = 0.; - cell->dx_max_sort = 0.; + cell->hydro.h_max = h_max; + cell->hydro.count = count; + cell->hydro.dx_max_part = 0.; + cell->hydro.dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; cell->width[2] = size; @@ -193,23 +193,23 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->loc[1] = offset[1]; cell->loc[2] = offset[2]; - cell->ti_old_part = 8; - cell->ti_hydro_end_min = 8; - cell->ti_hydro_end_max = 8; + cell->hydro.ti_old_part = 8; + cell->hydro.ti_end_min = 8; + cell->hydro.ti_end_max = 8; cell->nodeID = NODE_ID; - shuffle_particles(cell->parts, cell->count); + shuffle_particles(cell->hydro.parts, cell->hydro.count); - cell->sorted = 0; - for (int k = 0; k < 13; k++) cell->sort[k] = NULL; + cell->hydro.sorted = 0; + for (int k = 0; k < 13; k++) cell->hydro.sort[k] = NULL; return cell; } void clean_up(struct cell *ci) { - free(ci->parts); + free(ci->hydro.parts); for (int k = 0; k < 13; k++) - if (ci->sort[k] != NULL) free(ci->sort[k]); + if (ci->hydro.sort[k] != NULL) free(ci->hydro.sort[k]); free(ci); } @@ -229,8 +229,8 @@ void zero_particle_fields(struct cell *c) { #else struct hydro_space *hspointer = NULL; #endif - for (int pid = 0; pid < c->count; pid++) { - hydro_init_part(&c->parts[pid], hspointer); + for (int pid = 0; pid < c->hydro.count; pid++) { + hydro_init_part(&c->hydro.parts[pid], hspointer); } } @@ -238,12 +238,12 @@ void zero_particle_fields(struct cell *c) { * @brief Ends the loop by adding the appropriate coefficients */ void end_calculation(struct cell *c, const struct cosmology *cosmo) { - for (int pid = 0; pid < c->count; pid++) { - hydro_end_density(&c->parts[pid], cosmo); + for (int pid = 0; pid < c->hydro.count; pid++) { + hydro_end_density(&c->hydro.parts[pid], cosmo); /* Recover the common "Neighbour number" definition */ - c->parts[pid].density.wcount *= pow_dimension(c->parts[pid].h); - c->parts[pid].density.wcount *= kernel_norm; + c->hydro.parts[pid].density.wcount *= pow_dimension(c->hydro.parts[pid].h); + c->hydro.parts[pid].density.wcount *= kernel_norm; } } @@ -264,30 +264,30 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, fprintf(file, "# Main cell --------------------------------------------\n"); /* Write main cell */ - for (int pid = 0; pid < main_cell->count; pid++) { + for (int pid = 0; pid < main_cell->hydro.count; pid++) { fprintf(file, "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " "%13e %13e %13e\n", - main_cell->parts[pid].id, main_cell->parts[pid].x[0], - main_cell->parts[pid].x[1], main_cell->parts[pid].x[2], - main_cell->parts[pid].v[0], main_cell->parts[pid].v[1], - main_cell->parts[pid].v[2], - hydro_get_comoving_density(&main_cell->parts[pid]), + main_cell->hydro.parts[pid].id, main_cell->hydro.parts[pid].x[0], + main_cell->hydro.parts[pid].x[1], main_cell->hydro.parts[pid].x[2], + main_cell->hydro.parts[pid].v[0], main_cell->hydro.parts[pid].v[1], + main_cell->hydro.parts[pid].v[2], + hydro_get_comoving_density(&main_cell->hydro.parts[pid]), #if defined(GIZMO_MFV_SPH) || defined(SHADOWFAX_SPH) 0.f, -#elif defined(HOPKINS_PU_SPH) - main_cell->parts[pid].density.pressure_bar_dh, +#elif defined(HOPKINS_PU_SPH) || defined(HOPKINS_PU_SPH_MONAGHAN) + main_cell->hydro.parts[pid].density.pressure_bar_dh, #else - main_cell->parts[pid].density.rho_dh, + main_cell->hydro.parts[pid].density.rho_dh, #endif - main_cell->parts[pid].density.wcount, - main_cell->parts[pid].density.wcount_dh, + main_cell->hydro.parts[pid].density.wcount, + main_cell->hydro.parts[pid].density.wcount_dh, #if defined(GADGET2_SPH) || defined(DEFAULT_SPH) || defined(HOPKINS_PE_SPH) || \ - defined(HOPKINS_PU_SPH) - main_cell->parts[pid].density.div_v, - main_cell->parts[pid].density.rot_v[0], - main_cell->parts[pid].density.rot_v[1], - main_cell->parts[pid].density.rot_v[2] + defined(HOPKINS_PU_SPH) || defined(HOPKINS_PU_SPH_MONAGHAN) + main_cell->hydro.parts[pid].density.div_v, + main_cell->hydro.parts[pid].density.rot_v[0], + main_cell->hydro.parts[pid].density.rot_v[1], + main_cell->hydro.parts[pid].density.rot_v[2] #else 0., 0., 0., 0. #endif @@ -305,23 +305,28 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, "# Offset: [%2d %2d %2d] -----------------------------------\n", i - 1, j - 1, k - 1); - for (int pjd = 0; pjd < cj->count; pjd++) { + for (int pjd = 0; pjd < cj->hydro.count; pjd++) { fprintf( file, "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " "%13e %13e %13e\n", - cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1], - cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1], - cj->parts[pjd].v[2], hydro_get_comoving_density(&cj->parts[pjd]), + cj->hydro.parts[pjd].id, cj->hydro.parts[pjd].x[0], + cj->hydro.parts[pjd].x[1], cj->hydro.parts[pjd].x[2], + cj->hydro.parts[pjd].v[0], cj->hydro.parts[pjd].v[1], + cj->hydro.parts[pjd].v[2], + hydro_get_comoving_density(&cj->hydro.parts[pjd]), #if defined(GIZMO_MFV_SPH) || defined(SHADOWFAX_SPH) 0.f, #else - main_cell->parts[pjd].density.rho_dh, + main_cell->hydro.parts[pjd].density.rho_dh, #endif - cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, + cj->hydro.parts[pjd].density.wcount, + cj->hydro.parts[pjd].density.wcount_dh, #if defined(GADGET2_SPH) || defined(DEFAULT_SPH) || defined(HOPKINS_PE_SPH) - cj->parts[pjd].density.div_v, cj->parts[pjd].density.rot_v[0], - cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2] + cj->hydro.parts[pjd].density.div_v, + cj->hydro.parts[pjd].density.rot_v[0], + cj->hydro.parts[pjd].density.rot_v[1], + cj->hydro.parts[pjd].density.rot_v[2] #else 0., 0., 0., 0. #endif @@ -451,6 +456,7 @@ int main(int argc, char *argv[]) { space.dim[2] = 3.; struct hydro_props hp; + hydro_props_init_no_hydro(&hp); hp.eta_neighbours = h; hp.h_tolerance = 1e0; hp.h_max = FLT_MAX; @@ -486,7 +492,7 @@ int main(int argc, char *argv[]) { runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0); - runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); + runner_do_hydro_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); } } } @@ -514,10 +520,10 @@ int main(int argc, char *argv[]) { #if defined(TEST_DOSELF_SUBSET) || defined(TEST_DOPAIR_SUBSET) int *pid = NULL; int count = 0; - if ((pid = (int *)malloc(sizeof(int) * main_cell->count)) == NULL) + if ((pid = (int *)malloc(sizeof(int) * main_cell->hydro.count)) == NULL) error("Can't allocate memory for pid."); - for (int k = 0; k < main_cell->count; k++) - if (part_is_active(&main_cell->parts[k], &engine)) { + for (int k = 0; k < main_cell->hydro.count; k++) + if (part_is_active(&main_cell->hydro.parts[k], &engine)) { pid[count] = k; ++count; } @@ -529,7 +535,7 @@ int main(int argc, char *argv[]) { const ticks sub_tic = getticks(); #ifdef TEST_DOPAIR_SUBSET - DOPAIR1_SUBSET(&runner, main_cell, main_cell->parts, pid, count, + DOPAIR1_SUBSET(&runner, main_cell, main_cell->hydro.parts, pid, count, cells[j]); #else DOPAIR1(&runner, main_cell, cells[j]); @@ -543,7 +549,7 @@ int main(int argc, char *argv[]) { const ticks self_tic = getticks(); #ifdef TEST_DOSELF_SUBSET - DOSELF1_SUBSET(&runner, main_cell, main_cell->parts, pid, count); + DOSELF1_SUBSET(&runner, main_cell, main_cell->hydro.parts, pid, count); #else DOSELF1(&runner, main_cell); #endif @@ -610,5 +616,10 @@ int main(int argc, char *argv[]) { /* Clean things to make the sanitizer happy ... */ for (int i = 0; i < 27; ++i) clean_up(cells[i]); +#ifdef WITH_VECTORIZATION + cache_clean(&runner.ci_cache); + cache_clean(&runner.cj_cache); +#endif + return 0; } diff --git a/tests/test27cellsStars.c b/tests/test27cellsStars.c new file mode 100644 index 0000000000000000000000000000000000000000..0377fc49edfedc8b1d9ce0630821622117187c9b --- /dev/null +++ b/tests/test27cellsStars.c @@ -0,0 +1,541 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <fenv.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* Local headers. */ +#include "swift.h" + +#define DOSELF1 runner_doself_branch_stars_density +#define DOSELF1_SUBSET runner_doself_subset_branch_stars_density +#ifdef TEST_DOSELF_SUBSET +#define DOSELF1_NAME "runner_doself_subset_branch_stars_density" +#else +#define DOSELF1_NAME "runner_doself1_branch_stars_density" +#endif + +#define DOPAIR1 runner_dopair_branch_stars_density +#define DOPAIR1_SUBSET runner_dopair_subset_branch_stars_density +#ifdef TEST_DOPAIR_SUBSET +#define DOPAIR1_NAME "runner_dopair_subset_branch_stars_density" +#else +#define DOPAIR1_NAME "runner_dopair_branch_stars_density" +#endif + +#define NODE_ID 0 + +/** + * @brief Constructs a cell and all of its particle in a valid state prior to + * a DOPAIR or DOSELF calcuation. + * + * @param n The cube root of the number of particles. + * @param n_stars The cube root of the number of star particles. + * @param offset The position of the cell offset from (0,0,0). + * @param size The cell size. + * @param h The smoothing length of the particles in units of the inter-particle + * separation. + * @param partId The running counter of IDs for gas. + * @param spartId The running counter of IDs for stars. + * @param pert The perturbation to apply to the particles in the cell in units + * of the inter-particle separation. + * @param h_pert The perturbation to apply to the smoothing length. + */ +struct cell *make_cell(size_t n, size_t n_stars, double *offset, double size, + double h, long long *partId, long long *spartId, + double pert, double h_pert) { + const size_t count = n * n * n; + const size_t scount = n_stars * n_stars * n_stars; + float h_max = 0.f; + struct cell *cell = (struct cell *)malloc(sizeof(struct cell)); + bzero(cell, sizeof(struct cell)); + + if (posix_memalign((void **)&cell->hydro.parts, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + bzero(cell->hydro.parts, count * sizeof(struct part)); + + /* Construct the parts */ + struct part *part = cell->hydro.parts; + for (size_t x = 0; x < n; ++x) { + for (size_t y = 0; y < n; ++y) { + for (size_t z = 0; z < n; ++z) { + part->x[0] = + offset[0] + + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[1] = + offset[1] + + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[2] = + offset[2] + + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + + part->v[0] = 0; + part->v[1] = 0; + part->v[2] = 0; + if (h_pert) + part->h = size * h * random_uniform(1.f, h_pert) / (float)n; + else + part->h = size * h / (float)n; + h_max = fmaxf(h_max, part->h); + part->id = ++(*partId); + + part->time_bin = 1; + +#ifdef SWIFT_DEBUG_CHECKS + part->ti_drift = 8; + part->ti_kick = 8; +#endif + ++part; + } + } + } + + /* Construct the sparts */ + if (posix_memalign((void **)&cell->stars.parts, spart_align, + scount * sizeof(struct spart)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)scount); + } + bzero(cell->stars.parts, scount * sizeof(struct spart)); + + struct spart *spart = cell->stars.parts; + for (size_t x = 0; x < n_stars; ++x) { + for (size_t y = 0; y < n_stars; ++y) { + for (size_t z = 0; z < n_stars; ++z) { + spart->x[0] = + offset[0] + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / + (float)n_stars; + spart->x[1] = + offset[1] + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / + (float)n_stars; + spart->x[2] = + offset[2] + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / + (float)n_stars; + + spart->v[0] = 0; + spart->v[1] = 0; + spart->v[2] = 0; + if (h_pert) + spart->h = size * h * random_uniform(1.f, h_pert) / (float)n_stars; + else + spart->h = size * h / (float)n_stars; + h_max = fmaxf(h_max, spart->h); + spart->id = ++(*spartId); + + spart->time_bin = 1; + +#ifdef SWIFT_DEBUG_CHECKS + spart->ti_drift = 8; + spart->ti_kick = 8; +#endif + ++spart; + } + } + } + + /* Cell properties */ + cell->split = 0; + cell->hydro.h_max = h_max; + cell->hydro.count = count; + cell->stars.count = scount; + cell->hydro.dx_max_part = 0.; + cell->hydro.dx_max_sort = 0.; + cell->stars.dx_max_sort = 0.; + cell->width[0] = size; + cell->width[1] = size; + cell->width[2] = size; + cell->loc[0] = offset[0]; + cell->loc[1] = offset[1]; + cell->loc[2] = offset[2]; + + cell->hydro.ti_old_part = 8; + cell->hydro.ti_end_min = 8; + cell->hydro.ti_end_max = 8; + cell->grav.ti_old_part = 8; + cell->grav.ti_end_min = 8; + cell->grav.ti_end_max = 8; + cell->stars.ti_end_min = 8; + cell->nodeID = NODE_ID; + + shuffle_particles(cell->hydro.parts, cell->hydro.count); + shuffle_sparticles(cell->stars.parts, cell->stars.count); + + cell->hydro.sorted = 0; + for (int k = 0; k < 13; k++) cell->hydro.sort[k] = NULL; + + return cell; +} + +void clean_up(struct cell *ci) { + free(ci->hydro.parts); + free(ci->stars.parts); + for (int k = 0; k < 13; k++) + if (ci->hydro.sort[k] != NULL) free(ci->hydro.sort[k]); + free(ci); +} + +/** + * @brief Initializes all particles field to be ready for a density calculation + */ +void zero_particle_fields(struct cell *c) { + for (int pid = 0; pid < c->stars.count; pid++) { + stars_init_spart(&c->stars.parts[pid]); + } +} + +/** + * @brief Ends the loop by adding the appropriate coefficients + */ +void end_calculation(struct cell *c, const struct cosmology *cosmo) { + for (int pid = 0; pid < c->stars.count; pid++) { + stars_end_density(&c->stars.parts[pid], cosmo); + + /* Recover the common "Neighbour number" definition */ + c->stars.parts[pid].density.wcount *= pow_dimension(c->stars.parts[pid].h); + c->stars.parts[pid].density.wcount *= kernel_norm; + } +} + +/** + * @brief Dump all the particles to a file + */ +void dump_particle_fields(char *fileName, struct cell *main_cell, + struct cell **cells) { + FILE *file = fopen(fileName, "w"); + + /* Write header */ + fprintf(file, "# %4s %10s %10s %10s %13s %13s\n", "ID", "pos_x", "pos_y", + "pos_z", "wcount", "wcount_dh"); + + fprintf(file, "# Main cell --------------------------------------------\n"); + + /* Write main cell */ + for (int pid = 0; pid < main_cell->stars.count; pid++) { + fprintf(file, "%6llu %10f %10f %10f %13e %13e\n", + main_cell->stars.parts[pid].id, main_cell->stars.parts[pid].x[0], + main_cell->stars.parts[pid].x[1], main_cell->stars.parts[pid].x[2], + main_cell->stars.parts[pid].density.wcount, + main_cell->stars.parts[pid].density.wcount_dh); + } + + /* Write all other cells */ + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 3; ++k) { + struct cell *cj = cells[i * 9 + j * 3 + k]; + if (cj == main_cell) continue; + + fprintf(file, + "# Offset: [%2d %2d %2d] -----------------------------------\n", + i - 1, j - 1, k - 1); + + for (int pjd = 0; pjd < cj->stars.count; pjd++) { + fprintf(file, "%6llu %10f %10f %10f %13e %13e\n", + cj->stars.parts[pjd].id, cj->stars.parts[pjd].x[0], + cj->stars.parts[pjd].x[1], cj->stars.parts[pjd].x[2], + cj->stars.parts[pjd].density.wcount, + cj->stars.parts[pjd].density.wcount_dh); + } + } + } + } + fclose(file); +} + +/* Just a forward declaration... */ +void runner_dopair_branch_stars_density(struct runner *r, struct cell *ci, + struct cell *cj); +void runner_doself_branch_stars_density(struct runner *r, struct cell *c); +void runner_dopair_subset_branch_stars_density(struct runner *r, + struct cell *restrict ci, + struct spart *restrict sparts_i, + int *restrict ind, int scount, + struct cell *restrict cj); +void runner_doself_subset_branch_stars_density(struct runner *r, + struct cell *restrict ci, + struct spart *restrict sparts, + int *restrict ind, int scount); + +/* And go... */ +int main(int argc, char *argv[]) { + +#ifdef HAVE_SETAFFINITY + engine_pin(); +#endif + + size_t runs = 0, particles = 0; + size_t sparticles = 0; + double h = 1.23485, size = 1.; + double perturbation = 0., h_pert = 0.; + char outputFileNameExtension[100] = ""; + char outputFileName[200] = ""; + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + +/* Choke on FP-exceptions */ +#ifdef HAVE_FE_ENABLE_EXCEPT + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + + /* Get some randomness going */ + srand(0); + + char c; + while ((c = getopt(argc, argv, "s:h:p:n:N:r:t:d:f:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 'p': + sscanf(optarg, "%lf", &h_pert); + break; + case 's': + sscanf(optarg, "%lf", &size); + break; + case 'n': + sscanf(optarg, "%zu", &particles); + break; + case 'N': + sscanf(optarg, "%zu", &sparticles); + break; + case 'r': + sscanf(optarg, "%zu", &runs); + break; + case 'd': + sscanf(optarg, "%lf", &perturbation); + break; + case 'f': + strcpy(outputFileNameExtension, optarg); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || particles == 0 || runs == 0 || sparticles == 0) { + printf( + "\nUsage: %s -n PARTICLES_PER_AXIS -N SPARTICLES_PER_AXIS -r " + "NUMBER_OF_RUNS [OPTIONS...]\n" + "\nGenerates 27 cells, filled with particles on a Cartesian grid." + "\nThese are then interacted using runner_dopair_stars_density() and " + "runner_doself_stars_density()." + "\n\nOptions:" + "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" + "\n-p - Random fractional change in h, h=h*random(1,p)" + "\n-s size - Physical size of the cell" + "\n-d pert - Perturbation to apply to the particles [0,1[" + "\n-f fileName - Part of the file name used to save the dumps\n", + argv[0]); + exit(1); + } + + /* Help users... */ + message("DOSELF1 function called: %s", DOSELF1_NAME); + message("DOPAIR1 function called: %s", DOPAIR1_NAME); + message("Smoothing length: h = %f", h * size); + message("Kernel: %s", kernel_name); + message("Neighbour target: N = %f", pow_dimension(h) * kernel_norm); + + printf("\n"); + + /* Build the infrastructure */ + struct space space; + space.periodic = 1; + space.dim[0] = 3.; + space.dim[1] = 3.; + space.dim[2] = 3.; + + struct hydro_props hp; + hp.eta_neighbours = h; + hp.h_tolerance = 1e0; + hp.h_max = FLT_MAX; + hp.max_smoothing_iterations = 1; + hp.CFL_condition = 0.1; + + struct stars_props stars_p; + stars_p.eta_neighbours = h; + stars_p.h_tolerance = 1e0; + stars_p.h_max = FLT_MAX; + stars_p.max_smoothing_iterations = 1; + + struct engine engine; + engine.s = &space; + engine.time = 0.1f; + engine.ti_current = 8; + engine.max_active_bin = num_time_bins; + engine.hydro_properties = &hp; + engine.stars_properties = &stars_p; + engine.nodeID = NODE_ID; + + struct cosmology cosmo; + cosmology_init_no_cosmo(&cosmo); + engine.cosmology = &cosmo; + + struct runner runner; + runner.e = &engine; + + /* Construct some cells */ + struct cell *cells[27]; + struct cell *main_cell; + static long long partId = 0; + long long spartId = particles * particles * particles * 27; + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 3; ++k) { + double offset[3] = {i * size, j * size, k * size}; + cells[i * 9 + j * 3 + k] = + make_cell(particles, sparticles, offset, size, h, &partId, &spartId, + perturbation, h_pert); + + runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0); + + runner_do_hydro_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); + runner_do_stars_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); + } + } + } + + /* Store the main cell for future use */ + main_cell = cells[13]; + + ticks timings[27]; + for (int i = 0; i < 27; i++) timings[i] = 0; + + ticks time = 0; + for (size_t i = 0; i < runs; ++i) { + /* Zero the fields */ + for (int j = 0; j < 27; ++j) zero_particle_fields(cells[j]); + + const ticks tic = getticks(); + +#if defined(TEST_DOSELF_SUBSET) || defined(TEST_DOPAIR_SUBSET) + int *pid = NULL; + int scount = 0; + if ((pid = (int *)malloc(sizeof(int) * main_cell->stars.count)) == NULL) + error("Can't allocate memory for pid."); + for (int k = 0; k < main_cell->stars.count; k++) + if (spart_is_active(&main_cell->stars.parts[k], &engine)) { + pid[scount] = k; + ++scount; + } +#endif + + /* Run all the pairs */ + for (int j = 0; j < 27; ++j) { + if (cells[j] != main_cell) { + const ticks sub_tic = getticks(); + +#ifdef TEST_DOPAIR_SUBSET + DOPAIR1_SUBSET(&runner, main_cell, main_cell->stars.parts, pid, scount, + cells[j]); +#else + DOPAIR1(&runner, main_cell, cells[j]); +#endif + + timings[j] += getticks() - sub_tic; + } + } + + /* And now the self-interaction */ + const ticks self_tic = getticks(); + +#ifdef TEST_DOSELF_SUBSET + DOSELF1_SUBSET(&runner, main_cell, main_cell->stars.parts, pid, scount); +#else + DOSELF1(&runner, main_cell); +#endif + + timings[13] += getticks() - self_tic; + + const ticks toc = getticks(); + time += toc - tic; + + /* Let's get physical ! */ + end_calculation(main_cell, &cosmo); + + /* Dump if necessary */ + if (i % 50 == 0) { + sprintf(outputFileName, "swift_star_dopair_27_%.150s.dat", + outputFileNameExtension); + dump_particle_fields(outputFileName, main_cell, cells); + } + } + + /* Output timing */ + ticks corner_time = timings[0] + timings[2] + timings[6] + timings[8] + + timings[18] + timings[20] + timings[24] + timings[26]; + + ticks edge_time = timings[1] + timings[3] + timings[5] + timings[7] + + timings[9] + timings[11] + timings[15] + timings[17] + + timings[19] + timings[21] + timings[23] + timings[25]; + + ticks face_time = timings[4] + timings[10] + timings[12] + timings[14] + + timings[16] + timings[22]; + + message("Corner calculations took : %15lli ticks.", corner_time / runs); + message("Edge calculations took : %15lli ticks.", edge_time / runs); + message("Face calculations took : %15lli ticks.", face_time / runs); + message("Self calculations took : %15lli ticks.", timings[13] / runs); + message("SWIFT calculation took : %15lli ticks.", time / runs); + + /* Now perform a brute-force version for accuracy tests */ + + /* Zero the fields */ + for (int i = 0; i < 27; ++i) zero_particle_fields(cells[i]); + + const ticks tic = getticks(); + + /* Run all the brute-force pairs */ + for (int j = 0; j < 27; ++j) + if (cells[j] != main_cell) + pairs_all_stars_density(&runner, main_cell, cells[j]); + + /* And now the self-interaction */ + self_all_stars_density(&runner, main_cell); + + const ticks toc = getticks(); + + /* Let's get physical ! */ + end_calculation(main_cell, &cosmo); + + /* Dump */ + sprintf(outputFileName, "star_brute_force_27_%.150s.dat", + outputFileNameExtension); + dump_particle_fields(outputFileName, main_cell, cells); + + /* Output timing */ + message("Brute force calculation took : %15lli ticks.", toc - tic); + + /* Clean things to make the sanitizer happy ... */ + for (int i = 0; i < 27; ++i) clean_up(cells[i]); + + return 0; +} diff --git a/tests/test27cellsStars.sh.in b/tests/test27cellsStars.sh.in new file mode 100644 index 0000000000000000000000000000000000000000..5385b86fca6bcd24878f51567266eb81b7c21772 --- /dev/null +++ b/tests/test27cellsStars.sh.in @@ -0,0 +1,85 @@ +#!/bin/bash + +# List each test that should be run +declare -a TEST_LIST=(test27cellsStars test27cellsStars_subset) + +# Run same test for each executable +for TEST in "${TEST_LIST[@]}" +do + # Test for particles with the same smoothing length + echo "" + + rm -f star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat + + echo "Running ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard" + ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard + + if [ -e star_brute_force_27_standard.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat @srcdir@/star_tolerance_27_normal.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + + + # Test for particles with random smoothing lengths + echo "" + + rm -f star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat + + echo "Running ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard -p 1.1" + ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard -p 1.1 + + if [ -e star_brute_force_27_standard.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat @srcdir@/star_tolerance_27_perturbed_h.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + + + # Test for particles with random smoothing lengths + echo "" + + rm -f star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat + + echo "Running ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard -p 1.3" + ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard -p 1.3 + + if [ -e star_brute_force_27_standard.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat @srcdir@/star_tolerance_27_perturbed_h2.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + +done + +exit $? diff --git a/tests/test27cellsStarsPerturbed.sh.in b/tests/test27cellsStarsPerturbed.sh.in new file mode 100644 index 0000000000000000000000000000000000000000..ddf258fc17e6054d801ea9c73b4d0bd274cfad12 --- /dev/null +++ b/tests/test27cellsStarsPerturbed.sh.in @@ -0,0 +1,76 @@ +#!/bin/bash + +# Test for particles with the same smoothing length + echo "" + + rm -f star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat + + echo "Running ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed" + ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed + + if [ -e star_brute_force_27_perturbed.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat @srcdir@/star_tolerance_27_perturbed.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + +# Test for particles with random smoothing lengths + echo "" + + rm -f star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat + + echo "Running ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed -p 1.1" + ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed -p 1.1 + + if [ -e star_brute_force_27_perturbed.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat @srcdir@/star_tolerance_27_perturbed_h.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + + +# Test for particles with random smoothing lengths + echo "" + + rm -f star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat + + echo "Running ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed -p 1.3" + ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed -p 1.3 + + if [ -e star_brute_force_27_perturbed.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat @srcdir@/star_tolerance_27_perturbed_h2.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + +exit $? diff --git a/tests/testActivePair.c b/tests/testActivePair.c index 6889a18887894af0a9434f786df21dbf842e87e5..402a9a7ac416a0b2651f628eb32988d8ad62a14f 100644 --- a/tests/testActivePair.c +++ b/tests/testActivePair.c @@ -33,7 +33,8 @@ /* Typdef function pointer for interaction function. */ typedef void (*interaction_func)(struct runner *, struct cell *, struct cell *); -typedef void (*init_func)(struct cell *, const struct cosmology *); +typedef void (*init_func)(struct cell *, const struct cosmology *, + const struct hydro_props *); typedef void (*finalise_func)(struct cell *, const struct cosmology *); /** @@ -62,14 +63,14 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, struct cell *cell = (struct cell *)malloc(sizeof(struct cell)); bzero(cell, sizeof(struct cell)); - if (posix_memalign((void **)&cell->parts, part_align, + if (posix_memalign((void **)&cell->hydro.parts, part_align, count * sizeof(struct part)) != 0) { error("couldn't allocate particles, no. of particles: %d", (int)count); } - bzero(cell->parts, count * sizeof(struct part)); + bzero(cell->hydro.parts, count * sizeof(struct part)); /* Construct the parts */ - struct part *part = cell->parts; + struct part *part = cell->hydro.parts; for (size_t x = 0; x < n; ++x) { for (size_t y = 0; y < n; ++y) { for (size_t z = 0; z < n; ++z) { @@ -110,7 +111,8 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, /* Set the thermodynamic variable */ #if defined(GADGET2_SPH) part->entropy = 1.f; -#elif defined(MINIMAL_SPH) || defined(HOPKINS_PU_SPH) +#elif defined(MINIMAL_SPH) || defined(HOPKINS_PU_SPH) || \ + defined(HOPKINS_PU_SPH_MONAGHAN) part->u = 1.f; #elif defined(HOPKINS_PE_SPH) part->entropy = 1.f; @@ -135,10 +137,10 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, /* Cell properties */ cell->split = 0; - cell->h_max = h_max; - cell->count = count; - cell->dx_max_part = 0.; - cell->dx_max_sort = 0.; + cell->hydro.h_max = h_max; + cell->hydro.count = count; + cell->hydro.dx_max_part = 0.; + cell->hydro.dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; cell->width[2] = size; @@ -146,43 +148,44 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->loc[1] = offset[1]; cell->loc[2] = offset[2]; - cell->ti_old_part = 8; - cell->ti_hydro_end_min = 8; - cell->ti_hydro_end_max = 10; + cell->hydro.ti_old_part = 8; + cell->hydro.ti_end_min = 8; + cell->hydro.ti_end_max = 10; cell->nodeID = NODE_ID; - shuffle_particles(cell->parts, cell->count); + shuffle_particles(cell->hydro.parts, cell->hydro.count); - cell->sorted = 0; - for (int k = 0; k < 13; k++) cell->sort[k] = NULL; + cell->hydro.sorted = 0; + for (int k = 0; k < 13; k++) cell->hydro.sort[k] = NULL; return cell; } void clean_up(struct cell *ci) { - free(ci->parts); + free(ci->hydro.parts); for (int k = 0; k < 13; k++) - if (ci->sort[k] != NULL) free(ci->sort[k]); + if (ci->hydro.sort[k] != NULL) free(ci->hydro.sort[k]); free(ci); } /** * @brief Initializes all particles field to be ready for a density calculation */ -void zero_particle_fields_density(struct cell *c, - const struct cosmology *cosmo) { - for (int pid = 0; pid < c->count; pid++) { - hydro_init_part(&c->parts[pid], NULL); +void zero_particle_fields_density(struct cell *c, const struct cosmology *cosmo, + const struct hydro_props *hydro_props) { + for (int pid = 0; pid < c->hydro.count; pid++) { + hydro_init_part(&c->hydro.parts[pid], NULL); } } /** * @brief Initializes all particles field to be ready for a force calculation */ -void zero_particle_fields_force(struct cell *c, const struct cosmology *cosmo) { - for (int pid = 0; pid < c->count; pid++) { - struct part *p = &c->parts[pid]; - struct xpart *xp = &c->xparts[pid]; +void zero_particle_fields_force(struct cell *c, const struct cosmology *cosmo, + const struct hydro_props *hydro_props) { + for (int pid = 0; pid < c->hydro.count; pid++) { + struct part *p = &c->hydro.parts[pid]; + struct xpart *xp = &c->hydro.xparts[pid]; /* Mimic the result of a density calculation */ #ifdef GADGET2_SPH @@ -209,7 +212,7 @@ void zero_particle_fields_force(struct cell *c, const struct cosmology *cosmo) { p->density.wcount = 48.f / (kernel_norm * pow_dimension(p->h)); p->density.wcount_dh = 0.f; #endif /* PRESSURE-ENTROPY */ -#ifdef HOPKINS_PU_SPH +#if defined(HOPKINS_PU_SPH) || defined(HOPKINS_PU_SPH_MONAGHAN) p->rho = 1.f; p->pressure_bar = 0.6666666; p->density.rho_dh = 0.f; @@ -219,7 +222,7 @@ void zero_particle_fields_force(struct cell *c, const struct cosmology *cosmo) { #endif /* PRESSURE-ENERGY */ /* And prepare for a round of force tasks. */ - hydro_prepare_force(p, xp, cosmo); + hydro_prepare_force(p, xp, cosmo, hydro_props, 0.); hydro_reset_acceleration(p); } } @@ -228,12 +231,12 @@ void zero_particle_fields_force(struct cell *c, const struct cosmology *cosmo) { * @brief Ends the density loop by adding the appropriate coefficients */ void end_calculation_density(struct cell *c, const struct cosmology *cosmo) { - for (int pid = 0; pid < c->count; pid++) { - hydro_end_density(&c->parts[pid], cosmo); + for (int pid = 0; pid < c->hydro.count; pid++) { + hydro_end_density(&c->hydro.parts[pid], cosmo); /* Recover the common "Neighbour number" definition */ - c->parts[pid].density.wcount *= pow_dimension(c->parts[pid].h); - c->parts[pid].density.wcount *= kernel_norm; + c->hydro.parts[pid].density.wcount *= pow_dimension(c->hydro.parts[pid].h); + c->hydro.parts[pid].density.wcount *= kernel_norm; } } @@ -241,8 +244,8 @@ void end_calculation_density(struct cell *c, const struct cosmology *cosmo) { * @brief Ends the force loop by adding the appropriate coefficients */ void end_calculation_force(struct cell *c, const struct cosmology *cosmo) { - for (int pid = 0; pid < c->count; pid++) { - hydro_end_force(&c->parts[pid], cosmo); + for (int pid = 0; pid < c->hydro.count; pid++) { + hydro_end_force(&c->hydro.parts[pid], cosmo); } } @@ -257,16 +260,18 @@ void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) { fprintf(file, "# ci --------------------------------------------\n"); - for (int pid = 0; pid < ci->count; pid++) { - fprintf(file, "%6llu %13e %13e\n", ci->parts[pid].id, - ci->parts[pid].density.wcount, ci->parts[pid].force.h_dt); + for (int pid = 0; pid < ci->hydro.count; pid++) { + fprintf(file, "%6llu %13e %13e\n", ci->hydro.parts[pid].id, + ci->hydro.parts[pid].density.wcount, + ci->hydro.parts[pid].force.h_dt); } fprintf(file, "# cj --------------------------------------------\n"); - for (int pjd = 0; pjd < cj->count; pjd++) { - fprintf(file, "%6llu %13e %13e\n", cj->parts[pjd].id, - cj->parts[pjd].density.wcount, cj->parts[pjd].force.h_dt); + for (int pjd = 0; pjd < cj->hydro.count; pjd++) { + fprintf(file, "%6llu %13e %13e\n", cj->hydro.parts[pjd].id, + cj->hydro.parts[pjd].density.wcount, + cj->hydro.parts[pjd].force.h_dt); } fclose(file); @@ -293,12 +298,12 @@ void test_pair_interactions(struct runner *runner, struct cell **ci, interaction_func vec_interaction, init_func init, finalise_func finalise) { - runner_do_sort(runner, *ci, 0x1FFF, 0, 0); - runner_do_sort(runner, *cj, 0x1FFF, 0, 0); + runner_do_hydro_sort(runner, *ci, 0x1FFF, 0, 0); + runner_do_hydro_sort(runner, *cj, 0x1FFF, 0, 0); /* Zero the fields */ - init(*ci, runner->e->cosmology); - init(*cj, runner->e->cosmology); + init(*ci, runner->e->cosmology, runner->e->hydro_properties); + init(*cj, runner->e->cosmology, runner->e->hydro_properties); /* Run the test */ vec_interaction(runner, *ci, *cj); @@ -313,8 +318,8 @@ void test_pair_interactions(struct runner *runner, struct cell **ci, /* Now perform a brute-force version for accuracy tests */ /* Zero the fields */ - init(*ci, runner->e->cosmology); - init(*cj, runner->e->cosmology); + init(*ci, runner->e->cosmology, runner->e->hydro_properties); + init(*cj, runner->e->cosmology, runner->e->hydro_properties); /* Run the brute-force test */ serial_interaction(runner, *ci, *cj); @@ -485,6 +490,7 @@ int main(int argc, char *argv[]) { struct space space; struct engine engine; struct cosmology cosmo; + struct hydro_props hydro_props; struct runner *runner; char c; static long long partId = 0; @@ -569,6 +575,8 @@ int main(int argc, char *argv[]) { cosmology_init_no_cosmo(&cosmo); engine.cosmology = &cosmo; + hydro_props_init_no_hydro(&hydro_props); + engine.hydro_properties = &hydro_props; if (posix_memalign((void **)&runner, SWIFT_STRUCT_ALIGNMENT, sizeof(struct runner)) != 0) { diff --git a/tests/testAdiabaticIndex.c b/tests/testAdiabaticIndex.c index 60ecefa264f48bed2d4df205766dc392a1a03d0f..6aa794207f0e23e6a26060f3ef98b7ee841d7a32 100644 --- a/tests/testAdiabaticIndex.c +++ b/tests/testAdiabaticIndex.c @@ -34,7 +34,8 @@ */ void check_value(float a, float b, const char* s) { if (fabsf(a - b) / fabsf(a + b) > 1.e-6f) - error("Values are inconsistent: %12.15e %12.15e (%s)!", a, b, s); + error("Values are inconsistent: %12.15e %12.15e rel=%e (%s)!", a, b, + fabsf(a - b) / fabsf(a + b), s); } /** @@ -77,36 +78,61 @@ void check_constants(void) { void check_functions(float x) { float val_a, val_b; + const double xx = x; + +#if defined(HYDRO_GAMMA_5_3) +#define hydro_gamma_d (5. / 3.) +#elif defined(HYDRO_GAMMA_7_5) +#define hydro_gamma_d (7. / 5.) +#elif defined(HYDRO_GAMMA_4_3) +#define hydro_gamma_d (4. / 3.) +#elif defined(HYDRO_GAMMA_2_1) +#define hydro_gamma_d (2. / 1.) +#else +#error "Need to choose an adiabatic index!" +#endif + + val_a = pow(xx, hydro_gamma_d); + val_b = pow_gamma(x); + check_value(val_a, val_b, "x^gamma"); + + val_a = pow(xx, hydro_gamma_d - 1.0); + val_b = pow_gamma_minus_one(x); + check_value(val_a, val_b, "x^(gamma - 1)"); + + val_a = pow(xx, -(hydro_gamma_d - 1.0)); + val_b = pow_minus_gamma_minus_one(x); + check_value(val_a, val_b, "x^(-(gamma - 1))"); - val_a = powf(x, -hydro_gamma); + val_a = pow(xx, -hydro_gamma_d); val_b = pow_minus_gamma(x); check_value(val_a, val_b, "x^(-gamma)"); - val_a = powf(x, 2.0f / (hydro_gamma - 1.0f)); + val_a = pow(xx, 2.0 / (hydro_gamma_d - 1.0)); val_b = pow_two_over_gamma_minus_one(x); check_value(val_a, val_b, "x^(2/(gamma-1))"); - val_a = powf(x, 2.0f * hydro_gamma / (hydro_gamma - 1.0f)); + val_a = pow(xx, 2.0 * hydro_gamma_d / (hydro_gamma_d - 1.0)); val_b = pow_two_gamma_over_gamma_minus_one(x); check_value(val_a, val_b, "x^((2 gamma)/(gamma-1))"); - val_a = powf(x, 0.5f * (hydro_gamma - 1.0f) / hydro_gamma); + val_a = pow(xx, (hydro_gamma_d - 1.0) / (2.0 * hydro_gamma_d)); val_b = pow_gamma_minus_one_over_two_gamma(x); check_value(val_a, val_b, "x^((gamma-1)/(2 gamma))"); - val_a = powf(x, -0.5f * (hydro_gamma + 1.0f) / hydro_gamma); + val_a = pow(xx, -(hydro_gamma_d + 1.0) / (2.0 * hydro_gamma_d)); val_b = pow_minus_gamma_plus_one_over_two_gamma(x); check_value(val_a, val_b, "x^(-(gamma+1)/(2 gamma))"); - val_a = powf(x, 1.0f / hydro_gamma); + val_a = pow(xx, 1.0 / hydro_gamma_d); val_b = pow_one_over_gamma(x); check_value(val_a, val_b, "x^(1/gamma)"); - val_a = powf(x, 3.f * hydro_gamma - 2.f); + val_a = pow(xx, 3. * hydro_gamma_d - 2.); val_b = pow_three_gamma_minus_two(x); check_value(val_a, val_b, "x^(3gamma - 2)"); - val_a = powf(x, (3.f * hydro_gamma - 5.f) / 2.f); + val_a = pow(xx, (3. * hydro_gamma_d - 5.) / 2.); val_b = pow_three_gamma_minus_five_over_two(x); check_value(val_a, val_b, "x^((3gamma - 5)/2)"); } diff --git a/tests/testCbrt.c b/tests/testCbrt.c index b608f9a00d619570c298f4123038f930584a245c..3663e0e19ad2a5ad35d67703e00f5c0309a3eb00 100644 --- a/tests/testCbrt.c +++ b/tests/testCbrt.c @@ -125,5 +125,6 @@ int main(int argc, char *argv[]) { message("x * icbrtf took %9.3f %s (acc = %18.11e).", clocks_from_ticks(getticks() - tic_ours), clocks_getunit(), acc_ours); + free(data); return 0; } diff --git a/tests/testCooling.c b/tests/testCooling.c new file mode 100644 index 0000000000000000000000000000000000000000..727a9638b09b871e866fe787438a5707fd43ec6b --- /dev/null +++ b/tests/testCooling.c @@ -0,0 +1,204 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "../config.h" + +/* Local headers. */ +#include "swift.h" + +#if 0 + +/* + * @brief Assign particle density and entropy corresponding to the + * hydrogen number density and internal energy specified. + * + * @param p Particle data structure + * @param xp extra particle structure + * @param us unit system struct + * @param cooling Cooling function data structure + * @param cosmo Cosmology data structure + * @param phys_const Physical constants data structure + * @param nh_cgs Hydrogen number density (cgs units) + * @param u Internal energy (cgs units) + * @param ti_current integertime to set cosmo quantities + */ +void set_quantities(struct part *restrict p, struct xpart *restrict xp, + const struct unit_system *restrict us, + const struct cooling_function_data *restrict cooling, + struct cosmology *restrict cosmo, + const struct phys_const *restrict phys_const, float nh_cgs, + double u, integertime_t ti_current) { + + /* Update cosmology quantities */ + cosmology_update(cosmo, phys_const, ti_current); + + /* calculate density */ + double hydrogen_number_density = nh_cgs / cooling->number_density_scale; + p->rho = hydrogen_number_density * phys_const->const_proton_mass / + p->chemistry_data.metal_mass_fraction[chemistry_element_H] * + (cosmo->a * cosmo->a * cosmo->a); + + /* update entropy based on internal energy */ + float pressure = (u * cosmo->a * cosmo->a) / cooling->internal_energy_scale * + p->rho * (hydro_gamma_minus_one); + p->entropy = pressure * (pow(p->rho, -hydro_gamma)); + xp->entropy_full = p->entropy; +} + +/* + * @brief Produces contributions to cooling rates for different + * hydrogen number densities, from different metals, + * tests 1d and 4d table interpolations produce + * same results for cooling rate, dlambda/du and temperature. + */ +int main(int argc, char **argv) { + // Declare relevant structs + struct swift_params *params = malloc(sizeof(struct swift_params)); + struct unit_system us; + struct chemistry_global_data chem_data; + struct part p; + struct xpart xp; + struct phys_const phys_const; + struct cooling_function_data cooling; + struct cosmology cosmo; + char *parametersFileName = "./testCooling.yml"; + + float nh; // hydrogen number density + double u; // internal energy + + /* Number of values to test for in redshift, + * hydrogen number density and internal energy */ + const int n_z = 50; + const int n_nh = 50; + const int n_u = 50; + + /* Number of subcycles and tolerance used to compare + * subcycled and implicit solution. Note, high value + * of tolerance due to mismatch between explicit and + * implicit solution for large timesteps */ + const int n_subcycle = 1000; + const float integration_tolerance = 0.2; + + /* Set dt */ + const float dt_cool = 1.0e-5; + const float dt_therm = 1.0e-5; + + /* Read the parameter file */ + if (params == NULL) error("Error allocating memory for the parameter file."); + message("Reading runtime parameters from file '%s'", parametersFileName); + parser_read_file(parametersFileName, params); + + /* Init units */ + units_init_from_params(&us, params, "InternalUnitSystem"); + phys_const_init(&us, params, &phys_const); + + /* Init chemistry */ + chemistry_init(params, &us, &phys_const, &chem_data); + chemistry_first_init_part(&phys_const, &us, &cosmo, &chem_data, &p, &xp); + chemistry_print(&chem_data); + + /* Init cosmology */ + cosmology_init(params, &us, &phys_const, &cosmo); + cosmology_print(&cosmo); + + /* Init cooling */ + cooling_init(params, &us, &phys_const, &cooling); + cooling_print(&cooling); + cooling_update(&cosmo, &cooling, 0); + + /* Calculate abundance ratios */ + float *abundance_ratio; + abundance_ratio = malloc((chemistry_element_count + 2) * sizeof(float)); + abundance_ratio_to_solar(&p, &cooling, abundance_ratio); + + /* extract mass fractions, calculate table indices and offsets */ + float XH = p.chemistry_data.metal_mass_fraction[chemistry_element_H]; + float HeFrac = + p.chemistry_data.metal_mass_fraction[chemistry_element_He] / + (XH + p.chemistry_data.metal_mass_fraction[chemistry_element_He]); + int He_i; + float d_He; + get_index_1d(cooling.HeFrac, cooling.N_He, HeFrac, &He_i, &d_He); + + /* Cooling function needs to know the minimal energy. Set it to the lowest + * internal energy in the cooling table. */ + struct hydro_props hydro_properties; + hydro_properties.minimal_internal_energy = + exp(M_LN10 * cooling.Therm[0]) / cooling.internal_energy_scale; + + /* calculate spacing in nh and u */ + const float delta_nh = (cooling.nH[cooling.N_nH - 1] - cooling.nH[0]) / n_nh; + const float delta_u = + (cooling.Therm[cooling.N_Temp - 1] - cooling.Therm[0]) / n_u; + + for (int z_i = 0; z_i < n_z; z_i++) { + integertime_t ti_current = max_nr_timesteps / n_z * z_i; + for (int nh_i = 0; nh_i < n_nh; nh_i++) { + nh = exp(M_LN10 * cooling.nH[0] + delta_nh * nh_i); + for (int u_i = 0; u_i < n_u; u_i++) { + u = exp(M_LN10 * cooling.Therm[0] + delta_u * u_i); + + /* update nh, u, z */ + set_quantities(&p, &xp, &us, &cooling, &cosmo, &phys_const, nh, u, + ti_current); + + /* calculate subcycled solution */ + for (int t_subcycle = 0; t_subcycle < n_subcycle; t_subcycle++) { + p.entropy_dt = 0; + cooling_cool_part(&phys_const, &us, &cosmo, &hydro_properties, + &cooling, &p, &xp, dt_cool / n_subcycle, + dt_therm / n_subcycle); + xp.entropy_full += p.entropy_dt * dt_therm / n_subcycle; + } + double u_subcycled = + hydro_get_physical_internal_energy(&p, &xp, &cosmo) * + cooling.internal_energy_scale; + + /* reset quantities to nh, u, and z that we want to test */ + set_quantities(&p, &xp, &us, &cooling, &cosmo, &phys_const, nh, u, + ti_current); + + /* compute implicit solution */ + cooling_cool_part(&phys_const, &us, &cosmo, &hydro_properties, &cooling, + &p, &xp, dt_cool, dt_therm); + double u_implicit = + hydro_get_physical_internal_energy(&p, &xp, &cosmo) * + cooling.internal_energy_scale; + + /* check if the two solutions are consistent */ + if (fabs((u_implicit - u_subcycled) / u_subcycled) > + integration_tolerance) + message( + "implicit and subcycled solutions do not match. z_i %d nh_i %d " + "u_i %d implicit %.5e subcycled %.5e error %.5e", + z_i, nh_i, u_i, u_implicit, u_subcycled, + fabs((u_implicit - u_subcycled) / u_subcycled)); + } + } + } + message("done test"); + + free(params); + return 0; +} + +#else + +int main(int argc, char **argv) { return 0; } + +#endif diff --git a/tests/testCooling.yml b/tests/testCooling.yml new file mode 100644 index 0000000000000000000000000000000000000000..faec32cdfec20b48af7341889c79b60bd2f6bb5b --- /dev/null +++ b/tests/testCooling.yml @@ -0,0 +1,107 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.989e43 # 10^10 M_sun in grams + UnitLength_in_cgs: 3.085678e24 # Mpc in centimeters + UnitVelocity_in_cgs: 1e5 # km/s in centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Cosmological parameters +Cosmology: + h: 0.6777 # Reduced Hubble constant + a_begin: 0.1 # Initial scale-factor of the simulation + a_end: 1.0 # Final scale factor of the simulation + Omega_m: 0.307 # Matter density parameter + Omega_lambda: 0.693 # Dark-energy density parameter + Omega_b: 0.0455 # Baryon density parameter + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1e-2 # The end time of the simulation (in internal units). + dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-7 # The maximal time-step size of the simulation (in internal units). + +Scheduler: + max_top_level_cells: 15 + +# Parameters governing the snapshots +Snapshots: + basename: coolingBox # Common part of the name of output files + scale_factor_first: 0.142857142857 # Scale-factor of the first snaphot (cosmological run) + time_first: 0.01 # Time of the first output (non-cosmological run) (in internal units) + delta_time: 1.00002 # Time difference between consecutive outputs (in internal units) + compression: 1 + +# Parameters governing the conserved quantities statistics +Statistics: + scale_factor_first: 0.142857142857 # Scale-factor of the first stat dump (cosmological run) + time_first: 0.01 # Time of the first stat dump (non-cosmological run) (in internal units) + delta_time: 1.00002 # Time between statistics output + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 # Constant dimensionless multiplier for time integration. + theta: 0.85 # Opening angle (Multipole acceptance criterion) + comoving_softening: 0.0026994 # Comoving softening length (in internal units). + max_physical_softening: 0.0007 # Physical softening length (in internal units). + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + minimal_temperature: 100. # Kelvin + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./coolingBox.hdf5 # The file to read + periodic: 1 + +# Dimensionless pre-factor for the time-step condition +LambdaCooling: + lambda_nH2_cgs: 1e-22 # Cooling rate divided by square Hydrogen number density (in cgs units [erg * s^-1 * cm^3]) + cooling_tstep_mult: 1.0 # Dimensionless pre-factor for the time-step condition + +# Dimensionless constant cooling (AB 13/02/18) +ConstCooling: + cooling_rate: 10000.0 + min_energy: 0.0 + cooling_tstep_mult: 1.0 + +# Cooling with Grackle 2.0 +GrackleCooling: + CloudyTable: CloudyData_UVB=HM2012.h5 # Name of the Cloudy Table (available on the grackle bitbucket repository) + WithUVbackground: 0 # Enable or not the UV background + Redshift: 0 # Redshift to use (-1 means time based redshift) + WithMetalCooling: 1 # Enable or not the metal cooling + ProvideVolumetricHeatingRates: 0 # User provide volumetric heating rates + ProvideSpecificHeatingRates: 0 # User provide specific heating rates + SelfShieldingMethod: 0 # Grackle (<= 3) or Gear self shielding method + OutputMode: 1 # Write in output corresponding primordial chemistry mode + MaxSteps: 1000 + ConvergenceLimit: 1e-2 + +EagleCooling: + filename: /cosma5/data/Eagle/BG_Tables/CoolingTables/ + reionisation_redshift: 8.989 + He_reion_z_centre: 3.5 + He_reion_z_sigma: 0.5 + He_reion_ev_pH: 2.0 + +EAGLEChemistry: + InitMetallicity: 0.014 + InitAbundance_Hydrogen: 0.70649785 + InitAbundance_Helium: 0.28055534 + InitAbundance_Carbon: 2.0665436e-3 + InitAbundance_Nitrogen: 8.3562563e-4 + InitAbundance_Oxygen: 5.4926244e-3 + InitAbundance_Neon: 1.4144605e-3 + InitAbundance_Magnesium: 5.907064e-4 + InitAbundance_Silicon: 6.825874e-4 + InitAbundance_Iron: 1.1032152e-3 + CalciumOverSilicon: 0.0941736 + SulphurOverSilicon: 0.6054160 + +GearChemistry: + InitialMetallicity: 0.01295 + diff --git a/tests/testCosmology.c b/tests/testCosmology.c index 698351ad952e7d0b5f7d8e354c45a1a2dd53f968..bafad55471453f7308d1498daa15dbae3a76a6bc 100644 --- a/tests/testCosmology.c +++ b/tests/testCosmology.c @@ -24,7 +24,7 @@ #include "swift.h" #define N_CHECK 20 -#define TOLERANCE 1e-3 +#define TOLERANCE 1e-7 void test_params_init(struct swift_params *params) { parser_init("", params); @@ -72,5 +72,6 @@ int main(int argc, char *argv[]) { message("Everything seems fine with cosmology."); + cosmology_clean(&cosmo); return 0; } diff --git a/tests/testDump.c b/tests/testDump.c index f47a44256536d6ac1d9676c844f7081a6daa5ca4..878daae9cc0deddd6f9fb02857041f705110743c 100644 --- a/tests/testDump.c +++ b/tests/testDump.c @@ -73,7 +73,7 @@ int main(int argc, char *argv[]) { for (int run = 0; run < num_runs; run++) { /* Ensure capacity. */ - dump_ensure(&d, 7 * chunk_size); + dump_ensure(&d, 7 * chunk_size, 7 * chunk_size); /* Dump a few numbers. */ printf("dumping %i chunks...\n", chunk_size); diff --git a/tests/testEOS.c b/tests/testEOS.c index d090d83d77a16b2c5b4506c5f9224b3e4434d1be..4a1e666b47acc55a5ed7f1800e7199a1abb5e821 100644 --- a/tests/testEOS.c +++ b/tests/testEOS.c @@ -74,6 +74,11 @@ * P_1_0 ... ... P_1_num_u * ... ... ... ... * P_num_rho_0 ... P_num_rho_num_u + * c_0_0 c_0_1 ... c_0_num_u # Array of sound speeds, c(rho, + * u) + * c_1_0 ... ... c_1_num_u + * ... ... ... ... + * c_num_rho_0 ... c_num_rho_num_u * * Note that the values tested extend beyond the range that most EOS are * designed for (e.g. outside table limits), to help test the EOS in case of @@ -83,21 +88,24 @@ #ifdef EOS_PLANETARY int main(int argc, char *argv[]) { - float rho, log_rho, log_u, P; + float rho, u, log_rho, log_u, P, c; struct unit_system us; + struct swift_params *params = + (struct swift_params *)malloc(sizeof(struct swift_params)); + if (params == NULL) error("Error allocating memory for the parameter file."); const struct phys_const *phys_const = 0; // Unused placeholder - struct swift_params *params = 0; // Unused placeholder const float J_kg_to_erg_g = 1e4; // Convert J/kg to erg/g char filename[64]; // Output table params const int num_rho = 100, num_u = 100; - float log_rho_min = logf(1e-4), log_rho_max = logf(30.f), - log_u_min = logf(1e4), log_u_max = logf(1e10), - log_rho_step = (log_rho_max - log_rho_min) / (num_rho - 1.f), + float log_rho_min = logf(1e-4f), log_rho_max = logf(1e3f), // Densities (cgs) + log_u_min = logf(1e4f), + log_u_max = logf(1e13f), // Sp. int. energies (SI) + log_rho_step = (log_rho_max - log_rho_min) / (num_rho - 1.f), log_u_step = (log_u_max - log_u_min) / (num_u - 1.f); float A1_rho[num_rho], A1_u[num_u]; // Sys args - int mat_id, do_output; + int mat_id_in, do_output; // Default sys args const int mat_id_def = eos_planetary_id_HM80_ice; const int do_output_def = 0; @@ -106,34 +114,40 @@ int main(int argc, char *argv[]) { switch (argc) { case 1: // Default both - mat_id = mat_id_def; + mat_id_in = mat_id_def; do_output = do_output_def; break; case 2: // Read mat_id, default do_output - mat_id = atoi(argv[1]); + mat_id_in = atoi(argv[1]); do_output = do_output_def; break; case 3: // Read both - mat_id = atoi(argv[1]); + mat_id_in = atoi(argv[1]); do_output = atoi(argv[2]); break; default: error("Invalid number of system arguments!\n"); - mat_id = mat_id_def; // Ignored, just here to keep the compiler happy + mat_id_in = mat_id_def; // Ignored, just here to keep the compiler happy do_output = do_output_def; }; + enum eos_planetary_material_id mat_id = + (enum eos_planetary_material_id)mat_id_in; + /* Greeting message */ printf("This is %s\n", package_description()); // Check material ID - // Material base type - switch ((int)(mat_id / eos_planetary_type_factor)) { + const enum eos_planetary_type_id type = + (enum eos_planetary_type_id)(mat_id / eos_planetary_type_factor); + + // Select the material base type + switch (type) { // Tillotson case eos_planetary_type_Til: switch (mat_id) { @@ -174,27 +188,23 @@ int main(int argc, char *argv[]) { }; break; - // ANEOS - case eos_planetary_type_ANEOS: + // SESAME + case eos_planetary_type_SESAME: switch (mat_id) { - case eos_planetary_id_ANEOS_iron: - printf(" ANEOS iron \n"); + case eos_planetary_id_SESAME_iron: + printf(" SESAME basalt 7530 \n"); break; - case eos_planetary_id_MANEOS_forsterite: - printf(" MANEOS forsterite \n"); + case eos_planetary_id_SESAME_basalt: + printf(" SESAME basalt 7530 \n"); break; - default: - error("Unknown material ID! mat_id = %d \n", mat_id); - }; - break; + case eos_planetary_id_SESAME_water: + printf(" SESAME water 7154 \n"); + break; - // SESAME - case eos_planetary_type_SESAME: - switch (mat_id) { - case eos_planetary_id_SESAME_iron: - printf(" SESAME iron \n"); + case eos_planetary_id_SS08_water: + printf(" Senft & Stewart (2008) SESAME-like water \n"); break; default: @@ -206,8 +216,11 @@ int main(int argc, char *argv[]) { error("Unknown material type! mat_id = %d \n", mat_id); } - // Convert to internal units (Earth masses and radii) - units_init(&us, 5.9724e27, 6.3710e8, 1.f, 1.f, 1.f); + // Convert to internal units + // Earth masses and radii + // units_init(&us, 5.9724e27, 6.3710e8, 1.f, 1.f, 1.f); + // SI + units_init(&us, 1000.f, 100.f, 1.f, 1.f, 1.f); log_rho_min -= logf(units_cgs_conversion_factor(&us, UNIT_CONV_DENSITY)); log_rho_max -= logf(units_cgs_conversion_factor(&us, UNIT_CONV_DENSITY)); log_u_min += logf(J_kg_to_erg_g / units_cgs_conversion_factor( @@ -215,11 +228,51 @@ int main(int argc, char *argv[]) { log_u_max += logf(J_kg_to_erg_g / units_cgs_conversion_factor( &us, UNIT_CONV_ENERGY_PER_UNIT_MASS)); + // Set the input parameters + // Which EOS to initialise + parser_set_param(params, "EoS:planetary_use_Til:1"); + parser_set_param(params, "EoS:planetary_use_HM80:1"); + parser_set_param(params, "EoS:planetary_use_SESAME:1"); + // Table file names + parser_set_param(params, + "EoS:planetary_HM80_HHe_table_file:" + "../examples/planetary_HM80_HHe.txt"); + parser_set_param(params, + "EoS:planetary_HM80_ice_table_file:" + "../examples/planetary_HM80_ice.txt"); + parser_set_param(params, + "EoS:planetary_HM80_rock_table_file:" + "../examples/planetary_HM80_rock.txt"); + parser_set_param(params, + "EoS:planetary_SESAME_iron_table_file:" + "../examples/planetary_SESAME_iron_2140.txt"); + parser_set_param(params, + "EoS:planetary_SESAME_basalt_table_file:" + "../examples/planetary_SESAME_basalt_7530.txt"); + parser_set_param(params, + "EoS:planetary_SESAME_water_table_file:" + "../examples/planetary_SESAME_water_7154.txt"); + parser_set_param(params, + "EoS:planetary_SS08_water_table_file:" + "../examples/planetary_SS08_water.txt"); + // Initialise the EOS materials eos_init(&eos, phys_const, &us, params); + // Manual debug testing + if (1) { + printf("\n ### MANUAL DEBUG TESTING ### \n"); + + rho = 5960; + u = 1.7e8; + P = gas_pressure_from_internal_energy(rho, u, eos_planetary_id_HM80_ice); + printf("u = %.2e, rho = %.2e, P = %.2e \n", u, rho, P); + + return 0; + } + // Output file - sprintf(filename, "testEOS_rho_u_P_%d.txt", mat_id); + sprintf(filename, "testEOS_rho_u_P_c_%d.txt", mat_id); FILE *f = fopen(filename, "w"); if (f == NULL) { printf("Could not open output file!\n"); @@ -270,6 +323,21 @@ int main(int argc, char *argv[]) { if (do_output == 1) fprintf(f, "\n"); } + + // Sound speeds + for (int i = 0; i < num_rho; i++) { + rho = A1_rho[i]; + + for (int j = 0; j < num_u; j++) { + c = gas_soundspeed_from_internal_energy(rho, A1_u[j], mat_id); + + if (do_output == 1) + fprintf(f, "%.6e ", + c * units_cgs_conversion_factor(&us, UNIT_CONV_SPEED)); + } + + if (do_output == 1) fprintf(f, "\n"); + } fclose(f); return 0; diff --git a/tests/testEOS.py b/tests/testEOS.py index 363bab200b58c65fa24cc033af4b8d3c04b7b503..a2a31a248a2073a834d9543b706a6a12ba12796c 100644 --- a/tests/testEOS.py +++ b/tests/testEOS.py @@ -18,8 +18,8 @@ # ############################################################################## """ -Plot the output of testEOS to show how the equation of state pressure varies -with density and specific internal energy. +Plot the output of testEOS to show how the equation of state pressure and sound +speed varies with density and specific internal energy. Usage: python testEOS.py (mat_id) @@ -37,9 +37,13 @@ Text file contains: P_1_0 ... ... P_1_num_u ... ... ... ... P_num_rho_0 ... P_num_rho_num_u + c_0_0 c_0_1 ... c_0_num_u # Array of sound speeds, c(rho, u) + c_1_0 ... ... c_1_num_u + ... ... ... ... + c_num_rho_0 ... c_num_rho_num_u Note that the values tested extend beyond the range that most EOS are -designed for (e.g. outside table limits), to help test the EOS in case of +designed for (e.g. outside table limits), to help test the EOS in cases of unexpected particle behaviour. """ @@ -58,8 +62,7 @@ type_factor = 100 Di_type = { 'Til' : 1, 'HM80' : 2, - 'ANEOS' : 3, - 'SESAME' : 4, + 'SESAME' : 3, } Di_material = { # Tillotson @@ -70,11 +73,11 @@ Di_material = { 'HM80_HHe' : Di_type['HM80']*type_factor, # Hydrogen-helium atmosphere 'HM80_ice' : Di_type['HM80']*type_factor + 1, # H20-CH4-NH3 ice mix 'HM80_rock' : Di_type['HM80']*type_factor + 2, # SiO2-MgO-FeS-FeO rock mix - # ANEOS - 'ANEOS_iron' : Di_type['ANEOS']*type_factor, - 'MANEOS_forsterite' : Di_type['ANEOS']*type_factor + 1, # SESAME - 'SESAME_iron' : Di_type['SESAME']*type_factor, + 'SESAME_iron' : Di_type['SESAME']*type_factor, # 2140 + 'SESAME_basalt' : Di_type['SESAME']*type_factor + 1, # 7530 + 'SESAME_water' : Di_type['SESAME']*type_factor + 2, # 7154 + 'SS08_water' : Di_type['SESAME']*type_factor + 3, # Senft & Stewart (2008) } # Invert so the mat_id are the keys Di_mat_id = {mat_id : mat for mat, mat_id in Di_material.iteritems()} @@ -82,6 +85,7 @@ Di_mat_id = {mat_id : mat for mat, mat_id in Di_material.iteritems()} # Unit conversion Ba_to_Mbar = 1e-12 erg_g_to_J_kg = 1e-4 +cm_s_to_m_s = 1e-2 if __name__ == '__main__': # Sys args @@ -101,7 +105,7 @@ if __name__ == '__main__': for mat_id, mat in sorted(Di_mat_id.iteritems()): print " %s%s%d" % (mat, (20 - len("%s" % mat))*" ", mat_id) - filename = "testEOS_rho_u_P_%d.txt" % mat_id + filename = "testEOS_rho_u_P_c_%d.txt" % mat_id # Load the header info and density and energy arrays with open(filename) as f: @@ -110,31 +114,37 @@ if __name__ == '__main__': A1_rho = np.array(f.readline().split(), dtype=float) A1_u = np.array(f.readline().split(), dtype=float) - # Load pressure array + # Load pressure and soundspeed arrays A2_P = np.loadtxt(filename, skiprows=4) + A2_c = A2_P[num_rho:] + A2_P = A2_P[:num_rho] - # Convert pressures from cgs Barye to Mbar - A2_P *= Ba_to_Mbar # Convert energies from cgs to SI A1_u *= erg_g_to_J_kg + # Convert pressures from cgs (Barye) to Mbar + A2_P *= Ba_to_Mbar + # Convert sound speeds from cgs to SI + A1_u *= cm_s_to_m_s # Check that the numbers are right assert A1_rho.shape == (num_rho,) assert A1_u.shape == (num_u,) assert A2_P.shape == (num_rho, num_u) + assert A2_c.shape == (num_rho, num_u) # Plot + # Pressure: P(rho) at fixed u plt.figure(figsize=(7, 7)) ax = plt.gca() - # P(rho) at fixed u - num_u_fix = 9 - A1_idx = np.floor(np.linspace(0, num_u - 1, num_u_fix)).astype(int) - A1_colour = matplotlib.cm.rainbow(np.linspace(0, 1, num_u_fix)) + A1_colour = matplotlib.cm.rainbow(np.linspace(0, 1, num_u)) - for i, idx in enumerate(A1_idx): - plt.plot(A1_rho, A2_P[:, idx], c=A1_colour[i], - label=r"%.2e" % A1_u[idx]) + for i_u, u in enumerate(A1_u): + if i_u%10 == 0: + plt.plot(A1_rho, A2_P[:, i_u], c=A1_colour[i_u], + label=r"%.2e" % u) + else: + plt.plot(A1_rho, A2_P[:, i_u], c=A1_colour[i_u]) plt.legend(title="Sp. Int. Energy (J kg$^{-1}$)") plt.xscale('log') @@ -144,7 +154,31 @@ if __name__ == '__main__': plt.title(mat) plt.tight_layout() - plt.savefig("testEOS_%d.png" % mat_id) + plt.savefig("testEOS_P_%d.png" % mat_id) + plt.close() + + # Sound speed: c(rho) at fixed u + plt.figure(figsize=(7, 7)) + ax = plt.gca() + + A1_colour = matplotlib.cm.rainbow(np.linspace(0, 1, num_u)) + + for i_u, u in enumerate(A1_u): + if i_u%10 == 0: + plt.plot(A1_rho, A2_c[:, i_u], c=A1_colour[i_u], + label=r"%.2e" % u) + else: + plt.plot(A1_rho, A2_c[:, i_u], c=A1_colour[i_u]) + + plt.legend(title="Sp. Int. Energy (J kg$^{-1}$)") + plt.xscale('log') + plt.yscale('log') + plt.xlabel(r"Density (g cm$^{-3}$)") + plt.ylabel(r"Sound Speed (m s^{-1})") + plt.title(mat) + plt.tight_layout() + + plt.savefig("testEOS_c_%d.png" % mat_id) plt.close() diff --git a/tests/testEOS.sh b/tests/testEOS.sh index 411ac746be186bfe5758e03c2a852e081daefd10..bcd87eabbf15a962808843dda76d1829f2917c97 100755 --- a/tests/testEOS.sh +++ b/tests/testEOS.sh @@ -13,6 +13,10 @@ A1_mat_id=( 200 201 202 + 300 + 301 + 302 + 303 ) for mat_id in "${A1_mat_id[@]}" diff --git a/tests/testEOS_plot.sh b/tests/testEOS_plot.sh index 39108c5e19d8f4474de508e205951a1fd0aebcc9..5fd7f4976496223e467aae65b2846a8c4e1b7485 100755 --- a/tests/testEOS_plot.sh +++ b/tests/testEOS_plot.sh @@ -2,6 +2,8 @@ echo "" +rm -f testEOS*.png + echo "Plotting testEOS output for each planetary material" A1_mat_id=( @@ -11,6 +13,10 @@ A1_mat_id=( 200 201 202 + 300 + 301 + 302 + 303 ) for mat_id in "${A1_mat_id[@]}" diff --git a/tests/testFormat.sh.in b/tests/testFormat.sh.in new file mode 100644 index 0000000000000000000000000000000000000000..1d0fdeb1334ea7e9ac7b6605c23d7567a2c8c62b --- /dev/null +++ b/tests/testFormat.sh.in @@ -0,0 +1,6 @@ +#!/bin/bash + +set -e + +cd @srcdir@/.. +./format.sh --test diff --git a/tests/testGravityDerivatives.c b/tests/testGravityDerivatives.c index 184d66db623f34963dc91915c12fc58fbaa4ec4d..f31967de7075bccfb2c7fb19c1ba262aa12da54f 100644 --- a/tests/testGravityDerivatives.c +++ b/tests/testGravityDerivatives.c @@ -943,6 +943,13 @@ int main(int argc, char* argv[]) { message("Testing gravity for r=(%e %e %e)", dx, dy, dz); + const double r_s = 100. * ((double)rand() / (RAND_MAX)); + const double r_s_inv = 1. / r_s; + + const int periodic = 0; + + message("Mesh scale r_s=%e periodic=%d", r_s, periodic); + /* Compute distance */ const double r2 = dx * dx + dy * dy + dz * dz; const double r_inv = 1. / sqrt(r2); @@ -952,8 +959,8 @@ int main(int argc, char* argv[]) { /* Compute all derivatives */ struct potential_derivatives_M2L pot; - compute_potential_derivatives_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv, - /*periodic*/ 0, /* 1/r_s */ 0., &pot); + potential_derivatives_compute_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv, + periodic, r_s_inv, &pot); /* Minimal value we care about */ const double min = 1e-9; diff --git a/tests/testInteractions.c b/tests/testInteractions.c index 0a7354f0d2a5e1853ba2c22d696dbb910de2b667..dae55a337642e1616e94119263ff8f1c2a617c89 100644 --- a/tests/testInteractions.c +++ b/tests/testInteractions.c @@ -16,12 +16,16 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ +#include "../config.h" +/* Some standard headers. */ #include <fenv.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> + +/* Local includes */ #include "swift.h" /* Other schemes need to be added here if they are not vectorized, otherwise @@ -107,9 +111,9 @@ struct part *make_particles(size_t count, double *offset, double spacing, */ void prepare_force(struct part *parts, size_t count) { -#if !defined(GIZMO_MFV_SPH) && !defined(SHADOWFAX_SPH) && \ - !defined(MINIMAL_SPH) && !defined(MINIMAL_MULTI_MAT_SPH) && \ - !defined(HOPKINS_PU_SPH) +#if !defined(GIZMO_MFV_SPH) && !defined(SHADOWFAX_SPH) && \ + !defined(MINIMAL_SPH) && !defined(PLANETARY_SPH) && \ + !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) struct part *p; for (size_t i = 0; i < count; ++i) { p = &parts[i]; @@ -136,20 +140,21 @@ void dump_indv_particle_fields(char *fileName, struct part *p) { "%8.5f %8.5f %13e %13e %13e %13e %13e %8.5f %8.5f\n", p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->h, hydro_get_comoving_density(p), -#if defined(MINIMAL_SPH) || defined(MINIMAL_MULTI_MAT_SPH) || \ - defined(SHADOWFAX_SPH) +#if defined(MINIMAL_SPH) || defined(PLANETARY_SPH) || defined(SHADOWFAX_SPH) 0.f, #else p->density.div_v, #endif - hydro_get_comoving_entropy(p), hydro_get_comoving_internal_energy(p), + hydro_get_drifted_comoving_entropy(p), + hydro_get_drifted_comoving_internal_energy(p), hydro_get_comoving_pressure(p), hydro_get_comoving_soundspeed(p), p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->force.h_dt, #if defined(GADGET2_SPH) p->force.v_sig, p->entropy_dt, 0.f #elif defined(DEFAULT_SPH) p->force.v_sig, 0.f, p->force.u_dt -#elif defined(MINIMAL_SPH) || defined(HOPKINS_PU_SPH) +#elif defined(MINIMAL_SPH) || defined(HOPKINS_PU_SPH) || \ + defined(HOPKINS_PU_SPH_MONAGHAN) p->force.v_sig, 0.f, p->u_dt #else 0.f, 0.f, 0.f @@ -192,10 +197,10 @@ int check_results(struct part serial_test_part, struct part *serial_parts, struct part vec_test_part, struct part *vec_parts, int count) { int result = 0; - result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD); + result += compare_particles(&serial_test_part, &vec_test_part, ACC_THRESHOLD); for (int i = 0; i < count; i++) - result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD); + result += compare_particles(&serial_parts[i], &vec_parts[i], ACC_THRESHOLD); return result; } @@ -553,7 +558,7 @@ void test_force_interactions(struct part test_part, struct part *parts, vizq[i] = pi_vec.v[2]; rhoiq[i] = pi_vec.rho; grad_hiq[i] = pi_vec.force.f; -#if !defined(HOPKINS_PU_SPH) +#if !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) pOrhoi2q[i] = pi_vec.force.P_over_rho2; #endif balsaraiq[i] = pi_vec.force.balsara; @@ -566,7 +571,7 @@ void test_force_interactions(struct part test_part, struct part *parts, vjzq[i] = pj_vec[i].v[2]; rhojq[i] = pj_vec[i].rho; grad_hjq[i] = pj_vec[i].force.f; -#if !defined(HOPKINS_PU_SPH) +#if !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) pOrhoj2q[i] = pj_vec[i].force.P_over_rho2; #endif balsarajq[i] = pj_vec[i].force.balsara; @@ -648,7 +653,7 @@ void test_force_interactions(struct part test_part, struct part *parts, VEC_HADD(a_hydro_zSum, piq[0]->a_hydro[2]); VEC_HADD(h_dtSum, piq[0]->force.h_dt); VEC_HMAX(v_sigSum, piq[0]->force.v_sig); -#if !defined(HOPKINS_PU_SPH) +#if !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) VEC_HADD(entropy_dtSum, piq[0]->entropy_dt); #endif diff --git a/tests/testLogger.c b/tests/testLogger.c index ee933500ab585d286c9dea7370b0d208573ca7d2..c5be0d7cc18742bdc2fa6167462579c45fd43e92 100644 --- a/tests/testLogger.c +++ b/tests/testLogger.c @@ -20,7 +20,8 @@ /* Config parameters. */ #include "../config.h" -#ifdef HAVE_POSIX_FALLOCATE /* Are we on a sensible platform? */ +#if defined(HAVE_POSIX_FALLOCATE) && \ + defined(WITH_LOGGER) /* Are we on a sensible platform? */ /* Some standard headers. */ #include <stdio.h> @@ -31,7 +32,8 @@ /* Local headers. */ #include "swift.h" -void test_log_parts(struct dump *d) { +void test_log_parts(struct logger *log) { + struct dump *d = log->dump; /* Write several copies of a part to the dump. */ struct part p; @@ -43,22 +45,22 @@ void test_log_parts(struct dump *d) { size_t offset = d->count; /* Write the full part. */ - logger_log_part(&p, + logger_log_part(log, &p, logger_mask_x | logger_mask_v | logger_mask_a | logger_mask_u | logger_mask_h | logger_mask_rho | logger_mask_consts, - &offset, d); + &offset); printf("Wrote part at offset %#016zx.\n", offset); /* Write only the position. */ p.x[0] = 2.0; - logger_log_part(&p, logger_mask_x, &offset, d); + logger_log_part(log, &p, logger_mask_x, &offset); printf("Wrote part at offset %#016zx.\n", offset); /* Write the position and velocity. */ p.x[0] = 3.0; p.v[0] = 0.3; - logger_log_part(&p, logger_mask_x | logger_mask_v, &offset, d); + logger_log_part(log, &p, logger_mask_x | logger_mask_v, &offset); printf("Wrote part at offset %#016zx.\n", offset); /* Recover the last part from the dump. */ @@ -101,7 +103,8 @@ void test_log_parts(struct dump *d) { } } -void test_log_gparts(struct dump *d) { +void test_log_gparts(struct logger *log) { + struct dump *d = log->dump; /* Write several copies of a part to the dump. */ struct gpart p; @@ -113,21 +116,21 @@ void test_log_gparts(struct dump *d) { size_t offset = d->count; /* Write the full part. */ - logger_log_gpart(&p, + logger_log_gpart(log, &p, logger_mask_x | logger_mask_v | logger_mask_a | logger_mask_h | logger_mask_consts, - &offset, d); + &offset); printf("Wrote gpart at offset %#016zx.\n", offset); /* Write only the position. */ p.x[0] = 2.0; - logger_log_gpart(&p, logger_mask_x, &offset, d); + logger_log_gpart(log, &p, logger_mask_x, &offset); printf("Wrote gpart at offset %#016zx.\n", offset); /* Write the position and velocity. */ p.x[0] = 3.0; p.v_full[0] = 0.3; - logger_log_gpart(&p, logger_mask_x | logger_mask_v, &offset, d); + logger_log_gpart(log, &p, logger_mask_x | logger_mask_v, &offset); printf("Wrote gpart at offset %#016zx.\n", offset); /* Recover the last part from the dump. */ @@ -170,82 +173,100 @@ void test_log_gparts(struct dump *d) { } } -void test_log_timestamps(struct dump *d) { +void test_log_timestamps(struct logger *log) { + struct dump *d = log->dump; /* The timestamp to log. */ unsigned long long int t = 10; + double time = 0.1; /* Start with an offset at the end of the dump. */ size_t offset = d->count; /* Log three consecutive timestamps. */ - logger_log_timestamp(t, &offset, d); + logger_log_timestamp(log, t, time, &offset); printf("Logged timestamp %020llu at offset %#016zx.\n", t, offset); t += 10; - logger_log_timestamp(t, &offset, d); + time = 0.2; + logger_log_timestamp(log, t, time, &offset); printf("Logged timestamp %020llu at offset %#016zx.\n", t, offset); t += 10; - logger_log_timestamp(t, &offset, d); + time = 0.3; + logger_log_timestamp(log, t, time, &offset); printf("Logged timestamp %020llu at offset %#016zx.\n", t, offset); /* Recover the three timestamps. */ size_t offset_old = offset; t = 0; - int mask = logger_read_timestamp(&t, &offset, (const char *)d->data); + time = 0; + int mask = logger_read_timestamp(&t, &time, &offset, (const char *)d->data); printf("Recovered timestamp %020llu at offset %#016zx with mask %#04x.\n", t, offset_old, mask); if (t != 30) { printf("FAIL: could not recover correct timestamp.\n"); abort(); } + if (time != 0.3) { + printf("FAIL: could not recover correct time %g.\n", time); + abort(); + } offset_old = offset; t = 0; - mask = logger_read_timestamp(&t, &offset, (const char *)d->data); + time = 0; + mask = logger_read_timestamp(&t, &time, &offset, (const char *)d->data); printf("Recovered timestamp %020llu at offset %#016zx with mask %#04x.\n", t, offset_old, mask); if (t != 20) { printf("FAIL: could not recover correct timestamp.\n"); abort(); } + if (time != 0.2) { + printf("FAIL: could not recover correct time.\n"); + abort(); + } offset_old = offset; t = 0; - mask = logger_read_timestamp(&t, &offset, (const char *)d->data); + time = 0; + mask = logger_read_timestamp(&t, &time, &offset, (const char *)d->data); printf("Recovered timestamp %020llu at offset %#016zx with mask %#04x.\n", t, offset_old, mask); if (t != 10) { printf("FAIL: could not recover correct timestamp.\n"); abort(); } + if (time != 0.1) { + printf("FAIL: could not recover correct time.\n"); + abort(); + } } int main(int argc, char *argv[]) { - /* Some constants. */ - char filename[256]; - const int now = time(NULL); - sprintf(filename, "/tmp/SWIFT_logger_test_%d.out", now); - - /* Prepare a dump. */ - struct dump d; - dump_init(&d, filename, 1024 * 1024); + /* Prepare a logger. */ + struct logger log; + struct swift_params params; + parser_read_file("logger.yml", ¶ms); + logger_init(&log, ¶ms); /* Test writing/reading parts. */ - test_log_parts(&d); + test_log_parts(&log); /* Test writing/reading gparts. */ - test_log_gparts(&d); + test_log_gparts(&log); /* Test writing/reading timestamps. */ - test_log_timestamps(&d); - - /* Finalize the dump. */ - dump_close(&d); + test_log_timestamps(&log); /* Be clean */ + char filename[256]; + sprintf(filename, "%s.dump", log.base_name); remove(filename); + /* Clean the logger. */ + logger_clean(&log); + /* Return a happy number. */ return 0; } diff --git a/tests/testMatrixInversion.c b/tests/testMatrixInversion.c index a15e0dab7ec793cf4a914b6eb89c63863ab24fb0..8cd0f4e272a6b7e587619117e1aa143409976c51 100644 --- a/tests/testMatrixInversion.c +++ b/tests/testMatrixInversion.c @@ -16,9 +16,13 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ +#include "../config.h" +/* Some standard headers. */ #include <stdlib.h> #include <string.h> + +/* Local headers */ #include "const.h" #include "dimension.h" #include "error.h" diff --git a/tests/testOutputList.c b/tests/testOutputList.c index f3aa155577c992caeaee8516a298cb58b94ec2e1..dbc257aafdee85429d05dc517a86f496443ac0ed 100644 --- a/tests/testOutputList.c +++ b/tests/testOutputList.c @@ -29,12 +29,16 @@ /* Expected values from file */ const double time_values[Ntest] = { - 0., 10., 12., + 0., + 10., + 12., }; /* Expected values from file */ const double a_values[Ntest] = { - 0.5, 0.1, 0.01, + 0.01, + 0.1, + 0.5, }; void test_no_cosmo(struct engine *e, char *name, int with_assert) { @@ -55,10 +59,10 @@ void test_no_cosmo(struct engine *e, char *name, int with_assert) { output_list_init(&list, e, name, &delta_time, &output_time); output_list_print(list); - for(int i = 0; i < Ntest; i++) { + for (int i = 0; i < Ntest; i++) { /* Test last value */ if (with_assert) { - assert(abs(output_time - time_values[i]) < tol); + assert(fabs(output_time - time_values[i]) < tol); } /* Set current time */ @@ -69,12 +73,10 @@ void test_no_cosmo(struct engine *e, char *name, int with_assert) { integertime_t ti_next; output_list_read_next_time(list, e, name, &ti_next); - output_time = (double) (ti_next * e->time_base) + e->time_begin; + output_time = (double)(ti_next * e->time_base) + e->time_begin; } - - output_list_clean(list); - + output_list_clean(&list); }; void test_cosmo(struct engine *e, char *name, int with_assert) { @@ -93,10 +95,10 @@ void test_cosmo(struct engine *e, char *name, int with_assert) { output_list_init(&list, e, name, &delta_time, &output_time); output_list_print(list); - for(int i = 0; i < Ntest; i++) { + for (int i = 0; i < Ntest; i++) { /* Test last value */ if (with_assert) { - assert(abs(output_time - a_values[i]) < tol); + assert(fabs(output_time - a_values[i]) < tol); } /* Set current time */ @@ -107,15 +109,12 @@ void test_cosmo(struct engine *e, char *name, int with_assert) { integertime_t ti_next; output_list_read_next_time(list, e, name, &ti_next); - output_time = (double) exp(ti_next * e->time_base) * e->cosmology->a_begin; + output_time = (double)exp(ti_next * e->time_base) * e->cosmology->a_begin; } - output_list_clean(list); - + output_list_clean(&list); }; - - int main(int argc, char *argv[]) { /* Create a structure to read file into. */ struct swift_params params; @@ -146,12 +145,14 @@ int main(int argc, char *argv[]) { int without_assert = 0; /* Test without cosmo */ test_no_cosmo(&e, "Time", with_assert); - + /* Test with cosmo */ test_cosmo(&e, "Redshift", with_assert); test_cosmo(&e, "ScaleFactor", with_assert); test_cosmo(&e, "Time", without_assert); + cosmology_clean(&cosmo); + /* Write message and leave */ message("Test done"); return 0; diff --git a/tests/testParser.c b/tests/testParser.c index 3944e86fa19a1f623623383eabefe1094bf5addf..84ce70ff44fad0482573c740d5a174285655c08d 100644 --- a/tests/testParser.c +++ b/tests/testParser.c @@ -114,6 +114,8 @@ int main(int argc, char *argv[]) { int haveoptwords1 = parser_get_opt_param_string_array( ¶m_file, "Simulation:optwords", &nvar_result, &var_result, noptwords, optwords); + parser_free_param_string_array(nvar_result, var_result); + /* Check if we can read it again */ int haveoptwords2 = parser_get_opt_param_string_array( ¶m_file, "Simulation:optwords", &nvar_result, &var_result, noptwords, diff --git a/tests/testPeriodicBC.c b/tests/testPeriodicBC.c index de30b1af9ac8595cb081eb0702e9a7e7da13a162..be83f20a58b17f9a5fdcf967cda9a678aab5b8a9 100644 --- a/tests/testPeriodicBC.c +++ b/tests/testPeriodicBC.c @@ -81,16 +81,16 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, struct cell *cell = (struct cell *)malloc(sizeof(struct cell)); bzero(cell, sizeof(struct cell)); - if (posix_memalign((void **)&cell->parts, part_align, + if (posix_memalign((void **)&cell->hydro.parts, part_align, count * sizeof(struct part)) != 0) { error("couldn't allocate particles, no. of particles: %d", (int)count); } - bzero(cell->parts, count * sizeof(struct part)); + bzero(cell->hydro.parts, count * sizeof(struct part)); float h_max = 0.f; /* Construct the parts */ - struct part *part = cell->parts; + struct part *part = cell->hydro.parts; for (size_t x = 0; x < n; ++x) { for (size_t y = 0; y < n; ++y) { for (size_t z = 0; z < n; ++z) { @@ -161,10 +161,10 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, /* Cell properties */ cell->split = 0; - cell->h_max = h_max; - cell->count = count; - cell->dx_max_part = 0.; - cell->dx_max_sort = 0.; + cell->hydro.h_max = h_max; + cell->hydro.count = count; + cell->hydro.dx_max_part = 0.; + cell->hydro.dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; cell->width[2] = size; @@ -172,23 +172,23 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->loc[1] = offset[1]; cell->loc[2] = offset[2]; - cell->ti_old_part = 8; - cell->ti_hydro_end_min = 8; - cell->ti_hydro_end_max = 8; + cell->hydro.ti_old_part = 8; + cell->hydro.ti_end_min = 8; + cell->hydro.ti_end_max = 8; cell->nodeID = NODE_ID; - shuffle_particles(cell->parts, cell->count); + shuffle_particles(cell->hydro.parts, cell->hydro.count); - cell->sorted = 0; - for (int k = 0; k < 13; k++) cell->sort[k] = NULL; + cell->hydro.sorted = 0; + for (int k = 0; k < 13; k++) cell->hydro.sort[k] = NULL; return cell; } void clean_up(struct cell *ci) { - free(ci->parts); + free(ci->hydro.parts); for (int k = 0; k < 13; k++) - if (ci->sort[k] != NULL) free(ci->sort[k]); + if (ci->hydro.sort[k] != NULL) free(ci->hydro.sort[k]); free(ci); } @@ -196,8 +196,8 @@ void clean_up(struct cell *ci) { * @brief Initializes all particles field to be ready for a density calculation */ void zero_particle_fields(struct cell *c) { - for (int pid = 0; pid < c->count; pid++) { - hydro_init_part(&c->parts[pid], NULL); + for (int pid = 0; pid < c->hydro.count; pid++) { + hydro_init_part(&c->hydro.parts[pid], NULL); } } @@ -205,8 +205,8 @@ void zero_particle_fields(struct cell *c) { * @brief Ends the loop by adding the appropriate coefficients */ void end_calculation(struct cell *c, const struct cosmology *cosmo) { - for (int pid = 0; pid < c->count; pid++) { - hydro_end_density(&c->parts[pid], cosmo); + for (int pid = 0; pid < c->hydro.count; pid++) { + hydro_end_density(&c->hydro.parts[pid], cosmo); } } @@ -228,27 +228,27 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, int i, int j, i, j, k); /* Write main cell */ - for (int pid = 0; pid < main_cell->count; pid++) { + for (int pid = 0; pid < main_cell->hydro.count; pid++) { fprintf(file, "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " "%13e %13e %13e\n", - main_cell->parts[pid].id, main_cell->parts[pid].x[0], - main_cell->parts[pid].x[1], main_cell->parts[pid].x[2], - main_cell->parts[pid].v[0], main_cell->parts[pid].v[1], - main_cell->parts[pid].v[2], - hydro_get_comoving_density(&main_cell->parts[pid]), + main_cell->hydro.parts[pid].id, main_cell->hydro.parts[pid].x[0], + main_cell->hydro.parts[pid].x[1], main_cell->hydro.parts[pid].x[2], + main_cell->hydro.parts[pid].v[0], main_cell->hydro.parts[pid].v[1], + main_cell->hydro.parts[pid].v[2], + hydro_get_comoving_density(&main_cell->hydro.parts[pid]), #if defined(GIZMO_MFV_SPH) || defined(SHADOWFAX_SPH) 0.f, #else - main_cell->parts[pid].density.rho_dh, + main_cell->hydro.parts[pid].density.rho_dh, #endif - main_cell->parts[pid].density.wcount, - main_cell->parts[pid].density.wcount_dh, + main_cell->hydro.parts[pid].density.wcount, + main_cell->hydro.parts[pid].density.wcount_dh, #if defined(GADGET2_SPH) || defined(DEFAULT_SPH) || defined(HOPKINS_PE_SPH) - main_cell->parts[pid].density.div_v, - main_cell->parts[pid].density.rot_v[0], - main_cell->parts[pid].density.rot_v[1], - main_cell->parts[pid].density.rot_v[2] + main_cell->hydro.parts[pid].density.div_v, + main_cell->hydro.parts[pid].density.rot_v[0], + main_cell->hydro.parts[pid].density.rot_v[1], + main_cell->hydro.parts[pid].density.rot_v[2] #else 0., 0., 0., 0. #endif @@ -273,7 +273,7 @@ int check_results(struct part *serial_parts, struct part *vec_parts, int count, int result = 0; for (int i = 0; i < count; i++) - result += compare_particles(serial_parts[i], vec_parts[i], threshold); + result += compare_particles(&serial_parts[i], &vec_parts[i], threshold); return result; } @@ -505,8 +505,8 @@ int main(int argc, char *argv[]) { runner_do_drift_part(&runner, cells[i * (dim * dim) + j * dim + k], 0); - runner_do_sort(&runner, cells[i * (dim * dim) + j * dim + k], 0x1FFF, 0, - 0); + runner_do_hydro_sort(&runner, cells[i * (dim * dim) + j * dim + k], + 0x1FFF, 0, 0); } } } diff --git a/tests/testPotentialPair.c b/tests/testPotentialPair.c index 380d1fc979f129d46b08306d759eae3ff2739195..064c86d42f8df907d1ffaaab164b6a2f8b534b19 100644 --- a/tests/testPotentialPair.c +++ b/tests/testPotentialPair.c @@ -106,6 +106,7 @@ int main(int argc, char *argv[]) { e.time = 0.1f; e.ti_current = 8; e.time_base = 1e-10; + e.nodeID = 0; struct space s; s.periodic = 0; @@ -141,57 +142,59 @@ int main(int argc, char *argv[]) { bzero(&ci, sizeof(struct cell)); bzero(&cj, sizeof(struct cell)); + ci.nodeID = 0; ci.width[0] = 1.; ci.width[1] = 1.; ci.width[2] = 1.; ci.loc[0] = 0.; ci.loc[1] = 0.; ci.loc[2] = 0.; - ci.gcount = 1; - ci.ti_old_gpart = 8; - ci.ti_old_multipole = 8; - ci.ti_gravity_end_min = 8; - ci.ti_gravity_end_max = 8; + ci.grav.count = 1; + ci.grav.ti_old_part = 8; + ci.grav.ti_old_multipole = 8; + ci.grav.ti_end_min = 8; + ci.grav.ti_end_max = 8; + cj.nodeID = 0; cj.width[0] = 1.; cj.width[1] = 1.; cj.width[2] = 1.; cj.loc[0] = 1.; cj.loc[1] = 0.; cj.loc[2] = 0.; - cj.gcount = num_tests; - cj.ti_old_gpart = 8; - cj.ti_old_multipole = 8; - cj.ti_gravity_end_min = 8; - cj.ti_gravity_end_max = 8; + cj.grav.count = num_tests; + cj.grav.ti_old_part = 8; + cj.grav.ti_old_multipole = 8; + cj.grav.ti_end_min = 8; + cj.grav.ti_end_max = 8; /* Allocate multipoles */ - ci.multipole = + ci.grav.multipole = (struct gravity_tensors *)malloc(sizeof(struct gravity_tensors)); - cj.multipole = + cj.grav.multipole = (struct gravity_tensors *)malloc(sizeof(struct gravity_tensors)); - bzero(ci.multipole, sizeof(struct gravity_tensors)); - bzero(cj.multipole, sizeof(struct gravity_tensors)); + bzero(ci.grav.multipole, sizeof(struct gravity_tensors)); + bzero(cj.grav.multipole, sizeof(struct gravity_tensors)); /* Set the multipoles */ - ci.multipole->r_max = 0.1; - cj.multipole->r_max = 0.1; + ci.grav.multipole->r_max = 0.1; + cj.grav.multipole->r_max = 0.1; /* Allocate the particles */ - if (posix_memalign((void **)&ci.gparts, gpart_align, - ci.gcount * sizeof(struct gpart)) != 0) + if (posix_memalign((void **)&ci.grav.parts, gpart_align, + ci.grav.count * sizeof(struct gpart)) != 0) error("Error allocating gparts for cell ci"); - bzero(ci.gparts, ci.gcount * sizeof(struct gpart)); + bzero(ci.grav.parts, ci.grav.count * sizeof(struct gpart)); - if (posix_memalign((void **)&cj.gparts, gpart_align, - cj.gcount * sizeof(struct gpart)) != 0) + if (posix_memalign((void **)&cj.grav.parts, gpart_align, + cj.grav.count * sizeof(struct gpart)) != 0) error("Error allocating gparts for cell ci"); - bzero(cj.gparts, cj.gcount * sizeof(struct gpart)); + bzero(cj.grav.parts, cj.grav.count * sizeof(struct gpart)); /* Create the mass-less test particles */ for (int n = 0; n < num_tests; ++n) { - struct gpart *gp = &cj.gparts[n]; + struct gpart *gp = &cj.grav.parts[n]; gp->x[0] = 1. + (n + 1) / ((double)num_tests); gp->x[1] = 0.5; @@ -202,6 +205,7 @@ int main(int argc, char *argv[]) { gp->id_or_neg_offset = n + 1; #ifdef SWIFT_DEBUG_CHECKS gp->ti_drift = 8; + gp->initialised = 1; #endif } @@ -210,15 +214,16 @@ int main(int argc, char *argv[]) { /***********************************************/ /* Create the massive particle */ - ci.gparts[0].x[0] = 0.; - ci.gparts[0].x[1] = 0.5; - ci.gparts[0].x[2] = 0.5; - ci.gparts[0].mass = 1.; - ci.gparts[0].time_bin = 1; - ci.gparts[0].type = swift_type_dark_matter; - ci.gparts[0].id_or_neg_offset = 1; + ci.grav.parts[0].x[0] = 0.; + ci.grav.parts[0].x[1] = 0.5; + ci.grav.parts[0].x[2] = 0.5; + ci.grav.parts[0].mass = 1.; + ci.grav.parts[0].time_bin = 1; + ci.grav.parts[0].type = swift_type_dark_matter; + ci.grav.parts[0].id_or_neg_offset = 1; #ifdef SWIFT_DEBUG_CHECKS - ci.gparts[0].ti_drift = 8; + ci.grav.parts[0].ti_drift = 8; + ci.grav.parts[0].initialised = 1; #endif /* Now compute the forces */ @@ -226,28 +231,29 @@ int main(int argc, char *argv[]) { /* Verify everything */ for (int n = 0; n < num_tests; ++n) { - const struct gpart *gp = &cj.gparts[n]; - const struct gpart *gp2 = &ci.gparts[0]; + const struct gpart *gp = &cj.grav.parts[n]; + const struct gpart *gp2 = &ci.grav.parts[0]; const double epsilon = gravity_get_softening(gp, &props); #if defined(POTENTIAL_GRAVITY) double pot_true = - potential(ci.gparts[0].mass, gp->x[0] - gp2->x[0], epsilon, rlr); + potential(ci.grav.parts[0].mass, gp->x[0] - gp2->x[0], epsilon, rlr); check_value(gp->potential, pot_true, "potential"); #endif double acc_true = - acceleration(ci.gparts[0].mass, gp->x[0] - gp2->x[0], epsilon, rlr); - check_value(gp->a_grav[0], acc_true, "acceleration"); + acceleration(ci.grav.parts[0].mass, gp->x[0] - gp2->x[0], epsilon, rlr); /* message("x=%e f=%e f_true=%e pot=%e pot_true=%e", gp->x[0] - gp2->x[0], - * gp->a_grav[0], acc_true, gp->potential, pot_true); */ + gp->a_grav[0], acc_true, gp->potential, pot_true); */ + + check_value(gp->a_grav[0], acc_true, "acceleration"); } message("\n\t\t P-P interactions all good\n"); /* Reset the accelerations */ - for (int n = 0; n < num_tests; ++n) gravity_init_gpart(&cj.gparts[n]); + for (int n = 0; n < num_tests; ++n) gravity_init_gpart(&cj.grav.parts[n]); /**********************************/ /* Test the basic PM interactions */ @@ -256,22 +262,22 @@ int main(int argc, char *argv[]) { /* Set an opening angle that allows P-M interactions */ props.theta_crit2 = 1.; - ci.gparts[0].mass = 0.; - ci.multipole->CoM[0] = 0.; - ci.multipole->CoM[1] = 0.5; - ci.multipole->CoM[2] = 0.5; + ci.grav.parts[0].mass = 0.; + ci.grav.multipole->CoM[0] = 0.; + ci.grav.multipole->CoM[1] = 0.5; + ci.grav.multipole->CoM[2] = 0.5; - bzero(&ci.multipole->m_pole, sizeof(struct multipole)); - bzero(&cj.multipole->m_pole, sizeof(struct multipole)); - ci.multipole->m_pole.M_000 = 1.; + bzero(&ci.grav.multipole->m_pole, sizeof(struct multipole)); + bzero(&cj.grav.multipole->m_pole, sizeof(struct multipole)); + ci.grav.multipole->m_pole.M_000 = 1.; /* Now compute the forces */ runner_dopair_grav_pp(&r, &ci, &cj, 1, 1); /* Verify everything */ for (int n = 0; n < num_tests; ++n) { - const struct gpart *gp = &cj.gparts[n]; - const struct gravity_tensors *mpole = ci.multipole; + const struct gpart *gp = &cj.grav.parts[n]; + const struct gravity_tensors *mpole = ci.grav.multipole; const double epsilon = gravity_get_softening(gp, &props); #if defined(POTENTIAL_GRAVITY) @@ -293,7 +299,7 @@ int main(int argc, char *argv[]) { #ifndef GADGET2_LONG_RANGE_CORRECTION /* Reset the accelerations */ - for (int n = 0; n < num_tests; ++n) gravity_init_gpart(&cj.gparts[n]); + for (int n = 0; n < num_tests; ++n) gravity_init_gpart(&cj.grav.parts[n]); /***************************************/ /* Test the truncated PM interactions */ @@ -310,8 +316,8 @@ int main(int argc, char *argv[]) { /* Verify everything */ for (int n = 0; n < num_tests; ++n) { - const struct gpart *gp = &cj.gparts[n]; - const struct gravity_tensors *mpole = ci.multipole; + const struct gpart *gp = &cj.grav.parts[n]; + const struct gravity_tensors *mpole = ci.grav.multipole; const double epsilon = gravity_get_softening(gp, &props); #if defined(POTENTIAL_GRAVITY) @@ -338,57 +344,58 @@ int main(int argc, char *argv[]) { /************************************************/ /* Reset the accelerations */ - for (int n = 0; n < num_tests; ++n) gravity_init_gpart(&cj.gparts[n]); + for (int n = 0; n < num_tests; ++n) gravity_init_gpart(&cj.grav.parts[n]); #if SELF_GRAVITY_MULTIPOLE_ORDER >= 3 /* Let's make ci more interesting */ - free(ci.gparts); - ci.gcount = 8; - if (posix_memalign((void **)&ci.gparts, gpart_align, - ci.gcount * sizeof(struct gpart)) != 0) + free(ci.grav.parts); + ci.grav.count = 8; + if (posix_memalign((void **)&ci.grav.parts, gpart_align, + ci.grav.count * sizeof(struct gpart)) != 0) error("Error allocating gparts for cell ci"); - bzero(ci.gparts, ci.gcount * sizeof(struct gpart)); + bzero(ci.grav.parts, ci.grav.count * sizeof(struct gpart)); /* Place particles on a simple cube of side-length 0.2 */ for (int n = 0; n < 8; ++n) { if (n & 1) - ci.gparts[n].x[0] = 0.0 - 0.1; + ci.grav.parts[n].x[0] = 0.0 - 0.1; else - ci.gparts[n].x[0] = 0.0 + 0.1; + ci.grav.parts[n].x[0] = 0.0 + 0.1; if (n & 2) - ci.gparts[n].x[1] = 0.5 - 0.1; + ci.grav.parts[n].x[1] = 0.5 - 0.1; else - ci.gparts[n].x[1] = 0.5 + 0.1; + ci.grav.parts[n].x[1] = 0.5 + 0.1; if (n & 2) - ci.gparts[n].x[2] = 0.5 - 0.1; + ci.grav.parts[n].x[2] = 0.5 - 0.1; else - ci.gparts[n].x[2] = 0.5 + 0.1; + ci.grav.parts[n].x[2] = 0.5 + 0.1; - ci.gparts[n].mass = 1. / 8.; + ci.grav.parts[n].mass = 1. / 8.; - ci.gparts[n].time_bin = 1; - ci.gparts[n].type = swift_type_dark_matter; - ci.gparts[n].id_or_neg_offset = 1; + ci.grav.parts[n].time_bin = 1; + ci.grav.parts[n].type = swift_type_dark_matter; + ci.grav.parts[n].id_or_neg_offset = 1; #ifdef SWIFT_DEBUG_CHECKS - ci.gparts[n].ti_drift = 8; + ci.grav.parts[n].ti_drift = 8; + ci.grav.parts[n].initialised = 1; #endif } /* Now let's make a multipole out of it. */ - gravity_reset(ci.multipole); - gravity_P2M(ci.multipole, ci.gparts, ci.gcount); + gravity_reset(ci.grav.multipole); + gravity_P2M(ci.grav.multipole, ci.grav.parts, ci.grav.count); - gravity_multipole_print(&ci.multipole->m_pole); + gravity_multipole_print(&ci.grav.multipole->m_pole); /* Compute the forces */ runner_dopair_grav_pp(&r, &ci, &cj, 1, 1); /* Verify everything */ for (int n = 0; n < num_tests; ++n) { - const struct gpart *gp = &cj.gparts[n]; + const struct gpart *gp = &cj.grav.parts[n]; #if defined(POTENTIAL_GRAVITY) double pot_true = 0; @@ -396,7 +403,7 @@ int main(int argc, char *argv[]) { double acc_true[3] = {0., 0., 0.}; for (int i = 0; i < 8; ++i) { - const struct gpart *gp2 = &ci.gparts[i]; + const struct gpart *gp2 = &ci.grav.parts[i]; const double epsilon = gravity_get_softening(gp, &props); const double dx[3] = {gp2->x[0] - gp->x[0], gp2->x[1] - gp->x[1], @@ -417,7 +424,7 @@ int main(int argc, char *argv[]) { #endif check_value_backend(gp->a_grav[0], acc_true[0], "acceleration", 1e-2, 1e-6); - /* const struct gravity_tensors *mpole = ci.multipole; */ + /* const struct gravity_tensors *mpole = ci.grav.multipole; */ /* message("x=%e f=%e f_true=%e pot=%e pot_true=%e %e %e", */ /* gp->x[0] - mpole->CoM[0], gp->a_grav[0], acc_true[0], * gp->potential, */ @@ -428,9 +435,14 @@ int main(int argc, char *argv[]) { #endif - free(ci.multipole); - free(cj.multipole); - free(ci.gparts); - free(cj.gparts); + free(ci.grav.multipole); + free(cj.grav.multipole); + free(ci.grav.parts); + free(cj.grav.parts); + + /* Clean up the caches */ + gravity_cache_clean(&r.ci_gravity_cache); + gravity_cache_clean(&r.cj_gravity_cache); + return 0; } diff --git a/tests/testPotentialSelf.c b/tests/testPotentialSelf.c index 6bf5dbd405830f1ba1c58d8627606a67111f5fb0..10eb499570a591daaf0de2e011f2346077905e8e 100644 --- a/tests/testPotentialSelf.c +++ b/tests/testPotentialSelf.c @@ -137,32 +137,33 @@ int main(int argc, char *argv[]) { c.loc[0] = 0.; c.loc[1] = 0.; c.loc[2] = 0.; - c.gcount = 1 + num_tests; - c.ti_old_gpart = 8; - c.ti_gravity_end_min = 8; - c.ti_gravity_end_max = 8; + c.grav.count = 1 + num_tests; + c.grav.ti_old_part = 8; + c.grav.ti_end_min = 8; + c.grav.ti_end_max = 8; - if (posix_memalign((void **)&c.gparts, gpart_align, - c.gcount * sizeof(struct gpart)) != 0) + if (posix_memalign((void **)&c.grav.parts, gpart_align, + c.grav.count * sizeof(struct gpart)) != 0) error("Impossible to allocate memory for the gparts."); - bzero(c.gparts, c.gcount * sizeof(struct gpart)); + bzero(c.grav.parts, c.grav.count * sizeof(struct gpart)); /* Create the massive particle */ - c.gparts[0].x[0] = 0.; - c.gparts[0].x[1] = 0.5; - c.gparts[0].x[2] = 0.5; - c.gparts[0].mass = 1.; - c.gparts[0].time_bin = 1; - c.gparts[0].type = swift_type_dark_matter; - c.gparts[0].id_or_neg_offset = 1; + c.grav.parts[0].x[0] = 0.; + c.grav.parts[0].x[1] = 0.5; + c.grav.parts[0].x[2] = 0.5; + c.grav.parts[0].mass = 1.; + c.grav.parts[0].time_bin = 1; + c.grav.parts[0].type = swift_type_dark_matter; + c.grav.parts[0].id_or_neg_offset = 1; #ifdef SWIFT_DEBUG_CHECKS - c.gparts[0].ti_drift = 8; + c.grav.parts[0].ti_drift = 8; + c.grav.parts[0].initialised = 1; #endif /* Create the mass-less particles */ for (int n = 1; n < num_tests + 1; ++n) { - struct gpart *gp = &c.gparts[n]; + struct gpart *gp = &c.grav.parts[n]; gp->x[0] = n / ((double)num_tests); gp->x[1] = 0.5; @@ -173,6 +174,7 @@ int main(int argc, char *argv[]) { gp->id_or_neg_offset = n + 1; #ifdef SWIFT_DEBUG_CHECKS gp->ti_drift = 8; + gp->initialised = 1; #endif } @@ -181,21 +183,27 @@ int main(int argc, char *argv[]) { /* Verify everything */ for (int n = 1; n < num_tests + 1; ++n) { - const struct gpart *gp = &c.gparts[n]; + const struct gpart *gp = &c.grav.parts[n]; const double epsilon = gravity_get_softening(gp, &props); #if defined(POTENTIAL_GRAVITY) - double pot_true = potential(c.gparts[0].mass, gp->x[0], epsilon, rlr); + double pot_true = potential(c.grav.parts[0].mass, gp->x[0], epsilon, rlr); check_value(gp->potential, pot_true, "potential"); #endif - double acc_true = acceleration(c.gparts[0].mass, gp->x[0], epsilon, rlr); + double acc_true = + acceleration(c.grav.parts[0].mass, gp->x[0], epsilon, rlr); check_value(gp->a_grav[0], acc_true, "acceleration"); // message("x=%e f=%e f_true=%e", gp->x[0], gp->a_grav[0], acc_true); } - free(c.gparts); + free(c.grav.parts); + + /* Clean up the caches */ + gravity_cache_clean(&r.ci_gravity_cache); + + /* All done! */ return 0; } diff --git a/tests/testReading.c b/tests/testReading.c index 5e6cee7f1e37f7615eb2c3b4edcaee1d4ebba319..d7d3fcbdae2f3ab744f338bb74e105644a5d88be 100644 --- a/tests/testReading.c +++ b/tests/testReading.c @@ -17,6 +17,9 @@ * ******************************************************************************/ +/* Some standard headers. */ +#include "../config.h" + /* Some standard headers. */ #include <stdlib.h> @@ -26,7 +29,6 @@ int main(int argc, char *argv[]) { size_t Ngas = 0, Ngpart = 0, Nspart = 0; - int periodic = -1; int flag_entropy_ICs = -1; int i, j, k; double dim[3]; @@ -48,8 +50,8 @@ int main(int argc, char *argv[]) { /* Read data */ read_ic_single("input.hdf5", &us, dim, &parts, &gparts, &sparts, &Ngas, - &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, 1, 1, 0, 0, 0, - 1., 1., 1, 0); + &Ngpart, &Nspart, &flag_entropy_ICs, 1, 1, 0, 0, 0, 1., 1., 1, + 0); /* Check global properties read are correct */ assert(dim[0] == boxSize); @@ -57,7 +59,6 @@ int main(int argc, char *argv[]) { assert(dim[2] == boxSize); assert(Ngas == L * L * L); assert(Ngpart == L * L * L); - assert(periodic == 1); /* Check particles */ for (size_t n = 0; n < Ngas; ++n) { diff --git a/tests/testRiemannTRRS.c b/tests/testRiemannTRRS.c index 2c7098367a1ca8db84f097ad01aa2e1e411c433d..e975230c61cd58ad1a077e9b66949044cb7708da 100644 --- a/tests/testRiemannTRRS.c +++ b/tests/testRiemannTRRS.c @@ -16,8 +16,12 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ +#include "../config.h" +/* Local headers. */ #include <string.h> + +/* Local includes */ #include "error.h" #include "riemann/riemann_trrs.h" #include "tools.h" diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c index 63834d94b7696e160dd7ca487ab7e9f1e943abfb..41694872efbfc4d9611127eb1e6324b2b0fa5500 100644 --- a/tests/testSPHStep.c +++ b/tests/testSPHStep.c @@ -35,19 +35,19 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { size_t x, y, z, size; size = count * sizeof(struct part); - if (posix_memalign((void **)&cell->parts, part_align, size) != 0) { + if (posix_memalign((void **)&cell->hydro.parts, part_align, size) != 0) { error("couldn't allocate particles"); } size = count * sizeof(struct xpart); - if (posix_memalign((void **)&cell->xparts, xpart_align, size) != 0) { + if (posix_memalign((void **)&cell->hydro.xparts, xpart_align, size) != 0) { error("couldn't allocate extended particles"); } h = 1.2348 * cellSize / N; - part = cell->parts; - xpart = cell->xparts; + part = cell->hydro.parts; + xpart = cell->hydro.xparts; memset(part, 0, count * sizeof(struct part)); memset(xpart, 0, count * sizeof(struct xpart)); for (x = 0; x < N; ++x) { @@ -68,20 +68,20 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { } cell->split = 0; - cell->h_max = h; - cell->count = count; - cell->gcount = 0; - cell->dx_max_part = 0.; - cell->dx_max_sort = 0.; + cell->hydro.h_max = h; + cell->hydro.count = count; + cell->grav.count = 0; + cell->hydro.dx_max_part = 0.; + cell->hydro.dx_max_sort = 0.; cell->width[0] = cellSize; cell->width[1] = cellSize; cell->width[2] = cellSize; - cell->ti_hydro_end_min = 1; - cell->ti_hydro_end_max = 1; + cell->hydro.ti_end_min = 1; + cell->hydro.ti_end_max = 1; - cell->sorted = 0; - for (int k = 0; k < 13; k++) cell->sort[k] = NULL; + cell->hydro.sorted = 0; + for (int k = 0; k < 13; k++) cell->hydro.sort[k] = NULL; return cell; } @@ -128,9 +128,9 @@ int main(int argc, char *argv[]) { /* Set particle properties */ for (j = 0; j < 27; ++j) - for (i = 0; i < cells[j]->count; ++i) { - cells[j]->parts[i].mass = dim * dim * dim * rho / (N * N * N); - cells[j]->parts[i].u = P / (hydro_gamma_minus_one * rho); + for (i = 0; i < cells[j]->hydro.count; ++i) { + cells[j]->hydro.parts[i].mass = dim * dim * dim * rho / (N * N * N); + cells[j]->hydro.parts[i].u = P / (hydro_gamma_minus_one * rho); } message("m=%f", dim * dim * dim * rho / (N * N * N)); @@ -171,7 +171,7 @@ int main(int argc, char *argv[]) { e.ti_current = 1; /* The tracked particle */ - p = &(ci->parts[N * N * N / 2 + N * N / 2 + N / 2]); + p = &(ci->hydro.parts[N * N * N / 2 + N * N / 2 + N / 2]); message("Studying particle p->id=%lld", p->id); @@ -209,10 +209,10 @@ int main(int argc, char *argv[]) { message("ti_end=%d", p->ti_end); for (int j = 0; j < 27; ++j) { - free(cells[j]->parts); - free(cells[j]->xparts); + free(cells[j]->hydro.parts); + free(cells[j]->hydro.xparts); for (int k = 0; k < 13; k++) - if (cells[j]->sort[k] != NULL) free(cells[j]->sort[k]); + if (cells[j]->hydro.sort[k] != NULL) free(cells[j]->hydro.sort[k]); free(cells[j]); } diff --git a/tests/testSelectOutput.c b/tests/testSelectOutput.c index 0b0adfa4e5a96f3431b27052bbb079f9be8838f2..01c80ce8f15f2be7d264ceecdb397950b822de35 100644 --- a/tests/testSelectOutput.c +++ b/tests/testSelectOutput.c @@ -44,7 +44,6 @@ void select_output_engine_init(struct engine *e, struct space *s, e->time = 0; e->snapshot_output_count = 0; e->snapshot_compression = 0; - e->snapshot_label_delta = 1; }; void select_output_space_init(struct space *s, double *dim, int periodic, @@ -86,8 +85,8 @@ int main(int argc, char *argv[]) { char *base_name = "testSelectOutput"; size_t Ngas = 0, Ngpart = 0, Nspart = 0; - int periodic = -1; int flag_entropy_ICs = -1; + int periodic = 1; double dim[3]; struct part *parts = NULL; struct gpart *gparts = NULL; @@ -112,8 +111,8 @@ int main(int argc, char *argv[]) { /* Read data */ message("Reading initial conditions."); read_ic_single("input.hdf5", &us, dim, &parts, &gparts, &sparts, &Ngas, - &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, 1, 0, 0, 0, 0, - 1., 1., 1, 0); + &Ngpart, &Nspart, &flag_entropy_ICs, 1, 0, 0, 0, 0, 1., 1., 1, + 0); /* pseudo initialization of the space */ message("Initialization of the space."); diff --git a/tests/testSymmetry.c b/tests/testSymmetry.c index 886290ab984603d0afb3201377611598cd7163e4..ce1e2e9354c4d59a6e58619d43b743864ed38585 100644 --- a/tests/testSymmetry.c +++ b/tests/testSymmetry.c @@ -27,7 +27,10 @@ void print_bytes(void *p, size_t len) { printf("("); - for (size_t i = 0; i < len; ++i) printf("%02x", ((unsigned char *)p)[i]); + for (size_t i = 0; i < len; ++i) { + printf("%02x", ((unsigned char *)p)[i]); + if (i % 4 == 3) printf("|"); + } printf(")\n"); } @@ -162,8 +165,8 @@ void test(void) { if (i_not_ok) { printParticle_single(&pi, &xpi); printParticle_single(&pi2, &xpi); - print_bytes(&pj, sizeof(struct part)); - print_bytes(&pj2, sizeof(struct part)); + print_bytes(&pi, sizeof(struct part)); + print_bytes(&pi2, sizeof(struct part)); error("Particles 'pi' do not match after density (byte = %d)", i_not_ok); } if (j_not_ok) { @@ -220,17 +223,15 @@ void test(void) { j_not_ok |= c_is_d; } #else - i_not_ok = - strncmp((const char *)&pi, (const char *)&pi2, sizeof(struct part)); - j_not_ok = - strncmp((const char *)&pj, (const char *)&pj2, sizeof(struct part)); + i_not_ok = memcmp((char *)&pi, (char *)&pi2, sizeof(struct part)); + j_not_ok = memcmp((char *)&pj, (char *)&pj2, sizeof(struct part)); #endif if (i_not_ok) { printParticle_single(&pi, &xpi); printParticle_single(&pi2, &xpi); - print_bytes(&pj, sizeof(struct part)); - print_bytes(&pj2, sizeof(struct part)); + print_bytes(&pi, sizeof(struct part)); + print_bytes(&pi2, sizeof(struct part)); error("Particles 'pi' do not match after force (byte = %d)", i_not_ok); } if (j_not_ok) { diff --git a/tests/testTimeIntegration.c b/tests/testTimeIntegration.c index 2034c402a2d626a7b503613f6cade821ec438151..b7f5201356ee52419038c8379dde14c9bab82055 100644 --- a/tests/testTimeIntegration.c +++ b/tests/testTimeIntegration.c @@ -83,9 +83,9 @@ int main(int argc, char *argv[]) { xparts[0].v_full[2] = 0.; /* Set the particle in the cell */ - c.parts = parts; - c.xparts = xparts; - c.count = 1; + c.hydro.parts = parts; + c.hydro.xparts = xparts; + c.hydro.count = 1; c.split = 0; /* Create an engine and a fake runner */ @@ -108,11 +108,13 @@ int main(int argc, char *argv[]) { eng.time += dt; /* Compute gravitational acceleration */ - float r2 = - c.parts[0].x[0] * c.parts[0].x[0] + c.parts[0].x[1] * c.parts[0].x[1]; + float r2 = c.hydro.parts[0].x[0] * c.hydro.parts[0].x[0] + + c.hydro.parts[0].x[1] * c.hydro.parts[0].x[1]; float r = sqrtf(r2); - c.parts[0].a_hydro[0] = -(G * M_sun * c.parts[0].x[0] / r * r * r); - c.parts[0].a_hydro[1] = -(G * M_sun * c.parts[0].x[1] / r * r * r); + c.hydro.parts[0].a_hydro[0] = + -(G * M_sun * c.hydro.parts[0].x[0] / r * r * r); + c.hydro.parts[0].a_hydro[1] = + -(G * M_sun * c.hydro.parts[0].x[1] / r * r * r); /* Kick... */ runner_do_kick2(&run, &c, 0); diff --git a/tests/testVoronoi2D.c b/tests/testVoronoi2D.c index 60a71624904c11a3cdb3b90906189df60bfc6956..5057278efaa3ba0e1ccec2ba6b032cd12b029ff9 100644 --- a/tests/testVoronoi2D.c +++ b/tests/testVoronoi2D.c @@ -16,6 +16,9 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ +#include "../config.h" + +/* Local headers. */ #include "hydro/Shadowswift/voronoi2d_algorithm.h" #include "tools.h" diff --git a/tests/testVoronoi3D.c b/tests/testVoronoi3D.c index db5c33aa6e4ef0792373febd5d773a6d1198db29..5e0288fa9b3e13e0c6a6fb13db202e0f73f29a5b 100644 --- a/tests/testVoronoi3D.c +++ b/tests/testVoronoi3D.c @@ -16,8 +16,12 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ +#include "../config.h" +/* Some standard headers. */ #include <stdlib.h> + +/* Local headers. */ #include "error.h" #include "hydro/Shadowswift/voronoi3d_algorithm.h" #include "part.h" diff --git a/theory/Cooling/bibliography.bib b/theory/Cooling/bibliography.bib new file mode 100644 index 0000000000000000000000000000000000000000..c0277fed06c19dbc428978517afc395d7e57d474 --- /dev/null +++ b/theory/Cooling/bibliography.bib @@ -0,0 +1,15 @@ +@ARTICLE{Wiersma2009, + author = {{Wiersma}, R.~P.~C. and {Schaye}, J. and {Smith}, B.~D.}, + title = "{The effect of photoionization on the cooling rates of enriched, astrophysical plasmas}", + journal = {\mnras}, +archivePrefix = "arXiv", + eprint = {0807.3748}, + keywords = {atomic processes , plasmas , cooling flows , galaxies: formation , intergalactic medium}, + year = 2009, + month = feb, + volume = 393, + pages = {99-107}, + doi = {10.1111/j.1365-2966.2008.14191.x}, + adsurl = {http://adsabs.harvard.edu/abs/2009MNRAS.393...99W}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} diff --git a/theory/Cooling/eagle_cooling.tex b/theory/Cooling/eagle_cooling.tex new file mode 100644 index 0000000000000000000000000000000000000000..db01f65f4cd4a48a66c62e640b9c0165626f4bdf --- /dev/null +++ b/theory/Cooling/eagle_cooling.tex @@ -0,0 +1,358 @@ +\documentclass[fleqn, usenatbib, useAMS, a4paper]{mnras} +\usepackage{graphicx} +\usepackage{amsmath,paralist,xcolor,xspace,amssymb} +\usepackage{times} +\usepackage{comment} +\usepackage[super]{nth} + +\newcommand{\todo}[1]{{\textcolor{red}{TODO: #1}\\}} +\newcommand{\swift}{{\sc Swift}\xspace} + +\newcommand{\D}[2]{\frac{d#1}{d#2}} +\newcommand{\LL}{\left(} +\newcommand{\RR}{\right)} + +\title{Integration scheme for cooling} +\author{Alexei Borissov, Matthieu Schaller} + +\begin{document} + +\maketitle + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Basic principles} + +\subsection{Isochoric cooling} + +\todo{MATTHIEU: Discuss the fact that we want to do cooling at constant + density.} + +\subsection{Time integration} + +We want to compute the change in internal energy of a given particle +due to the interaction of the gas with the background radiation. More +specifically we want to integrate the following equation: +\begin{equation} + u_{\rm final} \equiv u(t+\Delta t) = u(t) + \left(\frac{\partial u}{\partial t}\bigg|_{\rm + hydro} + \frac{\partial u}{\partial t}\bigg|_{\rm cooling}\right) + \times \Delta t. +\end{equation} +The first derivative is given by the SPH scheme, the second one is +what we are after here. We start by computing the internal energy the +particle would have in the absence of cooling: +\begin{equation} + u_0 \equiv u(t) + \frac{\partial u}{\partial t}\bigg|_{\rm + hydro} \times \Delta t. +\end{equation} +We then proceed to solve the implicit equation +\begin{equation}\label{implicit-eq} + u_{\rm final} = u_0 + \lambda(u_{\rm final}) \Delta t, +\end{equation} +where $\lambda$ is the cooling rate\footnote{Note this is not the + physical cooling rate $\Lambda/n_{\rm H}^2$ that is commonly + used. This is the change in energy over time $\frac{\partial + u}{\partial t}\big|_{\rm cool}$ from all the channels + including all the multiplicative factors coming in front of the + physical $\Lambda$.}, which for a given particle varies +only with respect to $u$ throughout the duration of the timestep. The +other dependencies of $\lambda$ (density, metallicity and redshift) +are kept constant over the course of $\Delta t$. Crucially, we want to +evaluate $\lambda$ at the end of the time-step. Once a solution to this +implicit problem has been found, we get the total cooling rate: +\begin{equation} + \frac{\partial u}{\partial t}\bigg|_{\rm total} \equiv \frac{u_{\rm final} - + u(t)}{\Delta t}, +\end{equation} +leading to the following total equation of motion for internal energy: +\begin{equation} + u(t+\Delta t) = u(t) + \frac{\partial u}{\partial t}\bigg|_{\rm + total} \times \Delta t. +\end{equation} +The time integration is then performed in the regular time integration +section of the code. Note that, as expected, if $\lambda=0$ the whole +processes reduces to a normal hydro-dynamics only time integration of +the internal energy. + +Finally, for schemes evolving entropy $A$ instead of internal energy +$u$ (or for that matter any other thermodynamic quantity), we convert +the entropy derivative coming from the hydro scheme to an internal +energy derivative, solve the implicit cooling problem using internal +energies and convert the total time derivative back to an entropy +derivative. Since we already assume that cooling is performed at +constant density, there is no loss in accuracy happening via this +conversion procedure. + +\subsubsection{Energy floor and prediction step} + +In most applications, the cooling is not allowed to bring the internal +energy below a certain value $u_{\rm min}$, usually expressed in the +form of a minimal temperature. Additionally, and even in the absence +of such a temperature floor, we must ensure that the energy does not +become negative. + +Since the time-step size is not chosen in a way to fulfil these +criteria, we have to limit the total rate of change of energy such +that the limits are not reached. In practice this means modifying +$\frac{\partial u}{\partial t}\big|_{\rm total}$ such that +\begin{equation} + u(t) + \frac{\partial u}{\partial t}\bigg|_{\rm total} \times \Delta t \geq + u_{\rm min} +\end{equation} +is true. In the vast majority of cases, there is no need to modify the +energy derivative but this may be necessary for some rapidly cooling +particles. + +The time integration uses a leapfrog algorithm in its +``Kick-Drift-Kick'' form. In the cases, where the time-step is +constant, the condition as written above would be sufficient, however +with variable $\Delta t$ this needs modifying. If the next time-step +of a particle decreases in size, then the condition above will remain +true. However, if the time-step size increases then we may violate the +condition and integrate the energy to a value below $u_{\rm min}$. The +time-step size is chosen in-between the application of the two kick +operators\footnote{Effectively creating the chain + ``Kick-Drift-Kick-Timestep'', where the last operation fixes the + time-step size for the next kick-drift-kick cycle.}. We hence have +to ensure that the integration of one half of the current step (the +second kick) and one half of the next step (the first kick) does not +lead to a value below the allowed minimum. In \swift, we do not allow +the time-step to increase by more than a factor of $2$. This implies +that we will at most integrate the internal energy forward in time for +$1.5\Delta t$, where $\Delta t$ is the current value of the time-step +size we used in all the equations thus far. An additional subtlety +does, however, enter the scheme. The internal energy is not just used +in the Kick operator. Because of the needs of the SPH scheme, the +particles have to carry an estimate of the entropy at various points +in time inside the step of this particle. This is especially important +for inactive particles that act as sources for neighbouring active +particles. We must hence not only protect for the next half-kick, we +must also ensure that the value we estimated will be valid over the +next drift step as well. This means completing the current half-kick +and the next full drift, which could in principle double in size; this +implies checking the limits over the next $2.5\Delta t$. However, for +that variable, since it is an extrapolation and not the actual +variable we integrate forward in time, we do not need to enforce the +$u_{\rm min}$ limit. We must only ensure that the energy remains +positive. Combining those two conditions, we conclude that we must +enforce two limits: +\begin{equation} + \left\lbrace + \begin{array}{ll} + \displaystyle\frac{\partial u}{\partial t}\bigg|_{\rm total} \geq + -\displaystyle\frac{u(t) - u_{\rm min} }{1.5 \Delta t}, \Bigg. \\ + \displaystyle\frac{\partial u}{\partial t}\bigg|_{\rm total} \geq + -\displaystyle\frac{u(t) - u_{\rm min} }{(2.5 + \epsilon) \Delta t}, + \end{array} + \right. +\end{equation} +where in the second equation we added a small value $\epsilon$ to +ensure that we will not get negative values because of rounding errors. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Solution to the implicit cooling problem} + +In this section we describe the integration scheme used to compute the +cooling rate. It consists of an explicit solver for cases where the +cooling rate is small, a solver based on the Newton-Raphson method, and +one based on the bisection method. + +\subsection{Explicit solver} + +For many particles the cooling occuring over a timestep will be small +(for example, if a particle is at the equilibrium temperature and was not +heated by other means such as shock heating). In these cases $\lambda(u_0) +\simeq \lambda(u_{final})$, so an explicit solution to compute $u_{final}$ +may be used as a faster alternative to a more complicated implicit scheme. +More specifically, if $\lambda(u_0) dt < \varepsilon u_0$ we set +\begin{equation} +u_{final} = u_0 + \lambda(u_0) dt, +\end{equation} +where $\varepsilon$ is a small constant, set to $0.05$ to be consistent +with the EAGLE simulations. + +In cases where $\lambda(u_0) dt > \varepsilon u_0$ one of two implicit +methods are used, either the Newton-Raphson method, which benefits from +faster convergence, however is not guaranteed to converge, or the +bisection method, which is slower but always converges. + +\subsection{Newton-Raphson method} + +Equation \ref{implicit-eq} may be rearranged so that we are trying to +find the root of +\begin{equation}\label{fu-eq} +f(u_{final}) = u_{final} - u_0 - \lambda(u_{final}) dt = 0. +\end{equation} +This may be done iteratively using the Newton-Raphson method obtaining +consecutive approximations to $u_{final}$ by +\begin{equation} +u_{n+1} = u_n - \frac{f(u_n)}{df(u_n)/du}. +\end{equation} +In some cases a negative value of $u_{n+1}$ may be calculated. To +prevent the occurance of negative internal energies during the +calculation we introduce $x = \log (u_{final})$, so that we solve +\begin{equation}\label{fx-eq} +f(x) = e^x - u_0 - \lambda(e^x) dt = 0 +\end{equation} +instead of \ref{fu-eq}. Thus we obtain consecutive approximations of +the root of $f$ by the formula $x_{n+1} = x_n - f(x_n)/f'(x_n)$. This +leads to +\begin{equation} +x_{n+1} = x_n - \frac{1 - u_0 e^{-x_n} -\lambda(e^{x_n})e^{-x_n}dt}{1 + - \frac{d\lambda}{du}(e^{x_n}) dt}. +\end{equation} + +The tables used for EAGLE cooling in fact depend on temperature rather +than internal energy and include a separate table to convert from +internal energy to temperature. Hence, to obtain the gradient we use +\begin{align*} + \D \lambda u &= \D \lambda T \D T u \\ + &= \frac{\lambda(T_{high,n}) + - \lambda(T_{low,n})}{T_{high,n} - T_{low,n}} + \frac{T(u_{high,n}) + - T(u_{low,n})}{u_{high,n} - u_{low,n}}, +\end{align*} +where $T_{\rm high,n}, u_{\rm high,n}$ and $T_{\rm low,n}, u_{\rm low,n}$ +are values of the temperature and internal energy grid bracketing the current +temperature and internal energy for the iteration in Newton's method +(e.g. $u_{high,n} \ge u_n \ge u_{low,n}$). + +The initial guess for the Newton-Raphson method is taken to be $x_0 = \log(u_0)$. +If in the first iteration the sign of the $\lambda$ changes the next +guess to correspond to the equilibrium temperature (i.e. $10^4$K). + +A particle is considered to have converged if the relative error in +the internal energy is sufficiently small. This can be formulated as +\begin{align*} +\frac{u_{n+1} - u_n}{u_{n+1}} &< C \\ +u_{n+1} - u_n &< Cu_{n+1} \\ +\LL 1-C\RR u_{n+1} &< u_n \\ +\frac{u_{n+1}}{u_n} &< \frac{1}{1-C} \\ +x_{n+1} - x_n = \log\frac{u_{n+1}}{u_n} &< -\log\LL 1-C \RR \simeq C. +\end{align*} +Since the grid spacing in the internal energy of the Eagle tables is +0.045 in $\log_{10}u$ we take $C = 10^{-2}$. + +In cases when the Newton-Raphson method doesn't converge within a specified +number of iterations we revert to the bisection method. In order to use +the Newton-Raphson method a parameter (EagleCooling:newton\_integration) in +the yaml file needs to be set to 1. + +\subsection{Bisection method} + +In order to guarantee convergence the bisection method is used to solve +equation \ref{fu-eq} The implementation is the same as in the EAGLE +simulations, but is described here for convenience. + +First a small interval is used to bracket the solution. The interval bounds +are defined as $u_{upper} = \kappa u_0$ and $u_{lower} = \kappa^{-1} u_0$, +with $\kappa = \sqrt{1.1}$ as specified in EAGLE. If the particle is cooling +($\lambda(u_0) < 0$) $u_{upper}$ and $u_{lower}$ are iteratively decreased +by factors of $\kappa$ until $f(u_{lower}) < 0$. Alternatively, if the +particle is initially heating ($\lambda(u_0) > 0$) the bounds are iteratively +increased by factors of $\kappa$ until $f(u_{upper}) > 0$. Once the bounds +are obtained, the bisection scheme is performed as normal. + +\section{EAGLE cooling tables} + +We use the same cooling tables as used in EAGLE, specifically those found in +\cite{Wiersma2009} and may be found at http://www.strw.leidenuniv.nl/WSS08/. +These tables contain pre-computed values of the cooling rate for a given +redshift, metallicity, hydrogen number density and temperature produced using +the package CLOUDY. When calculating the cooling rate for particles at +redshifts higher than the redshift of reionisation the tables used do not +depend on redshift, but only on metallicity, hydrogen number density and +temperature. These tables are linearly interpolated based on the particle +based on the particle properties. + +Since these tables specify the cooling rate in terms of temperature, the internal +energy of a particle needs to be converted to a temperature in a way which takes +into account the ionisation state of the gas. This is done by interpolating a +pre-computed table of values of temperature depending on redshift, hydrogen number +density, helium fraction and internal energy (again, for redshifts higher than the +redshift of reionisation this table does not depend on redshift). + +Inverse Compton cooling is not accounted for in the high redshift tables, so prior +to reionisation it is taken care of by an analytical formula, +\begin{equation} +\frac{\Lambda_{compton}}{n_h^2} = -\Lambda_{0,compton} \left( T - T_{CMB}(1+z) +\right) (1+z)^4 \frac{n_e}{n_h}, +\end{equation} +which is added to the cooling rate interpolated from the tables. Here $n_h$ is the +hydrogen number density, $T$ the temperature of the particle, $T_{CMB} = 2.7255$K +the temperature of the CMB, $z$ the redshift, $n_e$ the hydrogen and helium electron +number density, and $\Lambda_{0,compton} = 1.0178085 \times 10^{-37} g \cdot cm^2 +\cdot s^{-3} \cdot K^{-5}$. + +\section{Co-moving time integration} + +In the case of cosmological simulations, the equations need to be +slightly modified to take into account the expansion of the +Universe. The code uses the comoving internal energy $u' = +a(t)^{3(\gamma-1)}u$ or comoving entropy $A'=A$ as thermodynamic +variable. The equation of motion for the variable are then modified +and take the following form: +\begin{equation} + \frac{\partial u'_i}{\partial t} = \frac{\partial u'_i}{\partial + t}\bigg|_{\rm hydro} = \frac{1}{a(t)^2} Y'_i(t)\big|_{\rm + hydro}, +\end{equation} +where $Y_i$ is computed from the particle itself and its neighbours +and corresponds to the change in internal energy due to hydrodynamic +forces. We then integrate the internal energy forward in time using +\begin{equation} + u'_i(t+\Delta t) = u'_i(t) + Y'_i(t)\big|_{\rm hydro} \times \underbrace{\int_t^{t+\Delta t} + \frac{1}{a(t)^2} dt}_{\Delta t_{\rm therm}}. +\end{equation} +The exact same equations apply in the case of a hydrodynamics scheme +evolving entropy (see cosmology document). We note that this is +different from the choice made in Gadget where there is no $a^{-2}$ +term as it is absorbed in the definition in $Y'_i$ itself. As a +consequence $\Delta t_{\rm therm}$ is just $\Delta t$. + +In order to compute the +cooling rate of a particle, we convert quantities to physcial +coordinates. Given the appearence of scale-factors in some of these +equations, we have to be careful to remain consistent throughout. We +start by constructing the co-moving internal energy at the end of the +time-step in the absence of cooling: +\begin{equation} + u'_0 \equiv u'(t) + Y'_i(t)\big|_{\rm hydro} \times \Delta t_{\rm therm}, +\end{equation} +which we then convert into a physical internal energy alongside the +thermal energy at the current time: +\begin{align} + u(t) &= a^{3(1-\gamma)}u'(t),\\ + u_0 &= a^{3(1-\gamma)}u'_0. +\end{align} +We can then solve the implicit cooling problem in the same way as in +the non-comoving case and obtain +\begin{equation} + u_{\rm final} = u_0 + \lambda(u_{\rm final}) \Delta t. +\end{equation} +We note that the $\Delta t$ here is the actual time between the start +and end of the step; unlike $\Delta t_{\rm therm}$ there are no +scale-factors entering that term. The solution to the implicit problem +in physical coordinates yields the definition of the total time +derivative of internal energy: +\begin{equation} + \frac{\partial u}{\partial t}\bigg|_{\rm total} \equiv \frac{u_{\rm final} - + u(t)}{\Delta t}. +\end{equation} +This allows us to construct the total eveolution of co-moving energy: +\begin{equation} + Y'_i(t)\big|_{\rm total} = a^{3(\gamma-1)} \times \frac{\Delta t}{\Delta + t_{\rm therm}} \times + \frac{\partial u}{\partial t}\bigg|_{\rm total}, +\end{equation} +where the first term is is the conversion from physical to co-moving +internal energy and the second term is required by our definition of +our time integration opertator. The time integration routine then performs the +same calculation as in the non-cooling case: +\begin{equation} + u'_i(t+\Delta t) = u'_i(t) + Y'_i(t)\big|_{\rm total} \times {\Delta t_{\rm therm}}. +\end{equation} + +\bibliographystyle{mnras} +\bibliography{./bibliography.bib} + +\end{document} diff --git a/theory/Cooling/run.sh b/theory/Cooling/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..17ae407ba66b36b5f192f2b97f7d216a17af26a0 --- /dev/null +++ b/theory/Cooling/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +echo "Generating PDF..." +pdflatex -jobname=eagle_cooling eagle_cooling.tex +bibtex eagle_cooling.aux +pdflatex -jobname=eagle_cooling eagle_cooling.tex +pdflatex -jobname=eagle_cooling eagle_cooling.tex diff --git a/theory/Cosmology/artificialvisc.tex b/theory/Cosmology/artificialvisc.tex new file mode 100644 index 0000000000000000000000000000000000000000..55cbe2756714f875c2a9c52d7afae49499b4328b --- /dev/null +++ b/theory/Cosmology/artificialvisc.tex @@ -0,0 +1,75 @@ +\subsection{Cosmological factors for properties entering the artificial viscosity} +\label{ssec:artificialvisc} + +There are multiple properties that enter into the more complex artificial +viscosity schemes, such as those by \citet{Morris1997} (henceforth M\&M) and +\citet{Cullen2010} (henceforth C\&D). + +\subsubsection{M\&M basic scheme} +\label{sssec:mandm} + +This relies on the velocity divergence as a shock indicator, i.e. the property +$\nabla \cdot \mathbf{v}$. The interpretation of this is the velocity divergence of +the fluid overall, i.e. the physical velocity divergence. Starting with +\begin{equation} +\mathbf{v}_p = a \dot{\mathbf{r}}' + \dot{a} \mathbf{r}', \nonumber +\end{equation} +with the divergence, +\begin{equation} +\nabla \cdot \mathbf{v}_p = + \nabla \cdot \left(a \dot{\mathbf{r}}'\right) + + \nabla \cdot \left(\dot{a} \mathbf{r}'\right). \nonumber +\end{equation} +The quantity on the left is the one that we want to enter the source term for the +artificial viscosity. Transforming to the co-moving derivative on the right hand side +to enable it to be calculated in the code, +\begin{equation} +\nabla \cdot \mathbf{v}_p = + \nabla' \cdot \dot{\mathbf{r}}' + n_d H(a), +\label{eqn:divvwithcomovingcoordinates} +\end{equation} +with $n_d$ the number of spatial dimensions, and the final transformation +being the one to internal code velocity units, +\begin{equation} +\nabla \cdot \mathbf{v}_p = + \frac{1}{a^2} \nabla' \cdot \mathbf{v}' + n_d H(a). +\label{eqn:divvcodeunits} +\end{equation} +We note that there is no similar hubble flow term in the expression for +$\nabla \times \mathbf{v}_p$. + +In some more complex schemes, such as the one presented by \cite{Cullen2010}, +the time differential of the velocity divergence is used as a way to differentiate +the pre- and post-shock region. + +Building on the above, we take the time differential of both sides, +\begin{equation} + \frac{{\mathrm d}}{{\mathrm d} t} \nabla \cdot \mathbf{v}_p = + \frac{{\mathrm d}}{{\mathrm d} t} \left( + \frac{1}{a^2} \nabla' \cdot \mathbf{v}' + n_d H(a) + \right). + \nonumber +\end{equation} +Collecting the factors, we see +\begin{align} + \frac{{\mathrm d}}{{\mathrm d} t} \nabla \cdot \mathbf{v}_p = + \frac{1}{a^2} &\left( + \frac{{\mathrm d}}{{\mathrm d} t} \nabla ' \cdot \mathbf{v}' - + 2H(a) \nabla' \cdot \mathbf{v}' + \right) \\ + + n_d &\left( + \frac{\ddot{a}}{a} - \frac{\dot{a}}{a^2} + \right). + \label{eqn:divvdtcodeunits} +\end{align} +This looks like quite a mess, but in most cases we calculate this implicitly +from the velocity divergence itself, and so we do not actually need to take +into account these factors; i.e. we actually calculate +\begin{equation} + \frac{\mathrm d}{{\mathrm d} t} \nabla \cdot \mathbf{v}_p = + \frac{ + \nabla \cdot \mathbf{v}_p (t + {\mathrm d}t) - \nabla \cdot \mathbf{v}_p (t) + }{dt}, + \label{eqn:divvdtcodeunitsimplicit} +\end{equation} +meaning that the above is taken into account self-consistently. \ No newline at end of file diff --git a/theory/Cosmology/bibliography.bib b/theory/Cosmology/bibliography.bib index 550801dffb8d92f24d355677febcab0ceb39a47f..84cec263d2e8195bc831e672184b41d61479fcc2 100644 --- a/theory/Cosmology/bibliography.bib +++ b/theory/Cosmology/bibliography.bib @@ -138,6 +138,28 @@ doi = "https://doi.org/10.1006/jcph.1997.5732", url = "http://www.sciencedirect.com/science/article/pii/S0021999197957326", author = "J.J. Monaghan" } +@article{Cullen2010, +author = {Cullen, Lee and Dehnen, Walter}, +title = {{Inviscid smoothed particle hydrodynamics}}, +journal = {Monthly Notices of the Royal Astronomical Society}, +year = {2010}, +volume = {408}, +number = {2}, +pages = {669--683}, +month = oct, +annote = {14 pages (15 in arXiv), 15 figures, accepted for publication in MNRAS} +} + +@article{Morris1997, +author = {Morris, J P and Monaghan, J J}, +title = {{A Switch to Reduce SPH Viscosity}}, +journal = {Journal of Computational Physics}, +year = {1997}, +volume = {136}, +number = {1}, +pages = {41--50}, +month = sep +} @ARTICLE{Springel2010, author = {{Springel}, V.}, diff --git a/theory/Cosmology/coordinates.tex b/theory/Cosmology/coordinates.tex index 38a571aefea68fbe1bc7a8ebc3867109f1c4736e..a1dbff71c13cbd62acde83c14e9e81f0fbc41214 100644 --- a/theory/Cosmology/coordinates.tex +++ b/theory/Cosmology/coordinates.tex @@ -88,13 +88,13 @@ gravitational terms. SPH flavours that evolve the internal energy $u$ instead of entropy require the additional equation of motion describing the evolution of $u'$: \begin{equation} - \dot{u}_i' = \frac{P_i'}{\rho_i'^2}\left[3H\rho_i' + \frac{1}{a^2}f_i'\sum_jm_j\left(\mathbf{v}_i' - - \mathbf{v}_j'\right)\cdot\mathbf{\nabla}_i'W_{ij}'(h_i)\right], + \dot{u}_i' = \frac{1}{a^2}\frac{P_i'}{\rho_i'^2} f_i'\sum_jm_j\left(\mathbf{v}_i' - + \mathbf{v}_j'\right)\cdot\mathbf{\nabla}_i'W_{ij}'(h_i). \label{eq:cosmo_eom_u} \end{equation} -where the first term in the brackets accounts for the change in energy -due to the expansion of the Universe. The scale-factors appearing in -the equations are later absorbed in the time-integration operators + +In all these cases, the scale-factors appearing in the equations are +later absorbed in the time-integration operators (Sec.~\ref{ssec:operators}) such that the RHS of the equations of motions is identical for the primed quantities to the ones obtained in the non-cosmological case for the physical quantities. diff --git a/theory/Cosmology/cosmology_standalone.tex b/theory/Cosmology/cosmology_standalone.tex index 72c75fb1649b1a94715c7418efcd31bc605cfd31..5b5fa228fe4cd1c5cfbd64a5ddb7a7ec466fa7a7 100644 --- a/theory/Cosmology/cosmology_standalone.tex +++ b/theory/Cosmology/cosmology_standalone.tex @@ -43,6 +43,8 @@ Making cosmology great again. \input{timesteps} +\input{artificialvisc} + \input{gizmo} \bibliographystyle{mnras} diff --git a/theory/Cosmology/timesteps.tex b/theory/Cosmology/timesteps.tex index 0ad419d23bba3ecd1bd8703cd3a01e6b8985b4c1..4a1c2ef534d32c667f1b5b655e9b93ae618b8c99 100644 --- a/theory/Cosmology/timesteps.tex +++ b/theory/Cosmology/timesteps.tex @@ -9,31 +9,34 @@ hence requiring an additional conversion. \subsubsection{Maximal displacement} -to prevent particles from moving on trajectories that do not include the effects -of the expansion of the Universe, we compute a maximal time-step for the -particles based on their RMS peculiar motion: +to prevent particles from moving on trajectories that do not include +the effects of the expansion of the Universe, we compute a maximal +time-step for the particles based on their RMS peculiar motion and +mean inter-particle separation: \begin{equation} - \Delta t_{\rm cosmo} \equiv \mathcal{C}_{\rm RMS} \frac{a^2 d_{\rm p}}{\sqrt{\frac{1}{N_{\rm p}}\sum_i | \mathbf{v}_i' |^2}}, + \Delta t_{\rm cosmo} \equiv \mathcal{C}_{\rm RMS} \frac{a^2}{\sqrt{\frac{1}{N_{\rm p}}\sum_i | \mathbf{v}_i' |^2}} d_{\rm p}, \label{eq:dt_RMS} \end{equation} -where the sum runs over all particles of a species $p$, $\mathcal{C}_{\rm RMS}$ -is a free parameter, $N_{\rm p}$ is the number of baryonic or non-baryonic -particles, and $d_{\rm p}$ is the mean inter-particle separation for the -particle with the lowest mass $m_i$ of a given species: +where the sum runs over all particles of a species $p$, +$\mathcal{C}_{\rm RMS}$ is a free parameter, $N_{\rm p}$ is the number +of baryonic or non-baryonic particles, and $d_{\rm p}$ is the mean +inter-particle separation at redshift $0$ for the particle with the +lowest mass $m_i$ of a given species: \begin{equation} - d_{\rm baryons} \equiv \left(\frac{m_i}{\Omega_{\rm b} \rho_{\rm crit}}\right)^{1/3}, \quad d_{\rm DM} \equiv \left(\frac{m_i}{\left(\Omega_{\rm m} - \Omega_{\rm b}\right) \rho_{\rm crit}}\right)^{1/3}. + d_{\rm baryons} \equiv \sqrt[3]{\frac{m_i}{\Omega_{\rm b} \rho_{\rm crit, 0}}}, \quad d_{\rm DM} \equiv \sqrt[3]{\frac{m_i}{\left(\Omega_{\rm m} - \Omega_{\rm b}\right) \rho_{\rm crit, 0}}}. \nonumber \end{equation} -We typically use $\mathcal{C}_{\rm RMS} = 0.25$ and given the slow evolution of -this maximal time-step size, we only re-compute it every time the tree is -reconstructed. +We typically use $\mathcal{C}_{\rm RMS} = 0.25$ and given the slow +evolution of this maximal time-step size, we only re-compute it every +time the tree is reconstructed. -We also apply an additional criterion based on the smoothing scale of the forces -computed from the top-level mesh. In eq.~\ref{eq:dt_RMS}, we replace -$d_{\rm p}$ by $a_{\rm smooth} \frac{L_{\rm box}}{N_{\rm mesh}}$, where we used -the definition of the mesh parameters introduced earlier. Given the rather -coarse mesh usually used in \swift, this time-step condition rarely dominates -the overall time-step size calculation. +We also apply an additional criterion based on the smoothing scale of +the forces computed from the top-level mesh. In eq.~\ref{eq:dt_RMS}, +we replace $d_{\rm p}$ by +$a_{\rm smooth} \frac{L_{\rm box}}{N_{\rm mesh}}$, where we used the +definition of the mesh parameters introduced earlier. Given the rather +coarse mesh usually used in \swift, this time-step condition rarely +dominates the overall time-step size calculation. \subsubsection{Conversion from time to integer time-line} diff --git a/theory/Multipoles/bibliography.bib b/theory/Multipoles/bibliography.bib index 077525a9e4db781ea58bd46ef2ba109d6c074be0..245a5223d43aff3ed871cc7ce278fb319d88a938 100644 --- a/theory/Multipoles/bibliography.bib +++ b/theory/Multipoles/bibliography.bib @@ -275,6 +275,21 @@ keywords = "adaptive algorithms" adsnote = {Provided by the SAO/NASA Astrophysics Data System} } +@BOOK{Abramowitz1972, + author = {{Abramowitz}, M. and {Stegun}, I.~A.}, + title = "{Handbook of Mathematical Functions}", +booktitle = {Handbook of Mathematical Functions, New York: Dover, 1972}, + year = 1972, + adsurl = {http://cdsads.u-strasbg.fr/abs/1972hmfw.book.....A}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} - +@book{Hastings1955 + author = {Hastings, Cecil}, + title = {Approximations for Digital Computers}, + year = {1955}, + isbn = {0691079145}, + publisher = {Princeton University Press}, + address = {Princeton, NJ, USA}, +} diff --git a/theory/SPH/Flavours/bibliography.bib b/theory/SPH/Flavours/bibliography.bib index 2bc11dacca90fe03d05c2e847503105d80eb1317..02ebed25a407ae5adba87d9f46d3f004bf9fbae2 100644 --- a/theory/SPH/Flavours/bibliography.bib +++ b/theory/SPH/Flavours/bibliography.bib @@ -97,4 +97,17 @@ archivePrefix = "arXiv", +@article{Morris1997, +abstract = {Smoothed particle hydrodynamics is a Lagrangian particle method for fluid dynamics which simulates shocks by using an artificial viscosity. Unlike Eulerian methods it is not convenient to reduce the effects of viscosity by means of switches based on spatial gradients. In this paper we introduce the idea of time-varying coefficients which fits more naturally with a particle formulation. Each particle has a viscosity parameter which evolves according to a simple source and decay equation. The source causes the parameter to grow when the particle enters a shock and the decay term causes it to decay to a small value beyond the shock. Tests on one-dimensional shocks and a two-dimensional shock-bubble interaction confirm that the method gives good results. {\textcopyright} 1997 Academic Press.}, +author = {Morris, J. P. and Monaghan, J. J.}, +doi = {10.1006/jcph.1997.5690}, +isbn = {0021-9991}, +issn = {00219991}, +journal = {Journal of Computational Physics}, +number = {1}, +pages = {41--50}, +title = {{A switch to reduce SPH viscosity}}, +volume = {136}, +year = {1997} +} diff --git a/theory/SPH/Flavours/sph_flavours.tex b/theory/SPH/Flavours/sph_flavours.tex index 5d62af3aab777e66f0b33b89e861d2b21e10b38c..81ac2153ed23f29f14345e0377774548420c84c9 100644 --- a/theory/SPH/Flavours/sph_flavours.tex +++ b/theory/SPH/Flavours/sph_flavours.tex @@ -590,6 +590,42 @@ both sides, such that %############################################################################## +\subsection{Variable artificial viscosity} + +Here we consider a modified version of the Pressure-Energy scheme described +above but one that uses a variable artificial viscosity. The prescription used +in this scheme was originally introduced by \citet{Morris1997} and is almost +identical to the above equations, but tracks an individual viscosity paramaeter +$\alpha_i$ for each particle. This viscosity is then updated each time-step to +a more appropraite value. The hope is that the artificial viscosity will be +high in regions that contain shocks, but as low as possible in regions where it +is uneccesary such as shear flows. This is already accomplished somewhat with +the inclusion of a \citet{Balsara1995} switch, but a fixed $\alpha$ still leads +to spurious transport of angular momentum and vorticity. + +The equation governing the growth of the viscosity is +\begin{align} + \frac{\mathrm{d} \alpha_i} + {\mathrm{d} t} = + - (\alpha_i - \alpha_{\rm min}) \ell \frac{c_{s, i}}{h}, + \label{eq:sph:pu:alphadt} +\end{align} +with $\alpha_{\rm min}=0.1$ the minimal artificial viscosity parameter, and +$\ell=0.1$ the viscosity ``length" that governs how quickly the viscosity +decays. This equation is solved implicitly in a similar way to +$\mathrm{d}\mathbf{v}/ \mathrm{d}t$ and $\mathrm{d}u/\mathrm{d}t$ - i.e. +$\alpha_{i} (t+\Delta t_i) = \alpha_{i}(t) + \dot{\alpha}_i \Delta t_i$. + +To ensure that the scheme is conservative, the viscosity coefficients must be +combined in a fully conservative way; this is performed by taking the mean +viscosity parameter of the two particles that are being interacted, such that +\begin{align} + \alpha_{ij} = \frac{\alpha_i + \alpha_j}{2}. +\end{align} +The rest of the artificial viscosity implementation, including the +\citet{Balsara1995} switch, is the same - just with $\alpha \rightarrow +\alpha_{ij}$. + \subsection{Anarchy SPH} Dalla Vecchia (\textit{in prep.}), also described in section 2.2.2 of \cite{Schaller2015}.\\ diff --git a/tools/Makefile.am b/tools/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..5b075aa8241977ac5545bb1345adb5325a6bb6df --- /dev/null +++ b/tools/Makefile.am @@ -0,0 +1,18 @@ +# Scripts to plot task graphs +EXTRA_DIST = task_plots/plot_tasks.py task_plots/analyse_tasks.py \ + task_plots/process_plot_tasks_MPI task_plots/process_plot_tasks + +# Scripts to plot threadpool 'task' graphs +EXTRA_DIST += task_plots/analyse_threadpool_tasks.py \ + task_plots/plot_threadpool.py \ + task_plots/process_plot_threadpool + +# Script for scaling plot +EXTRA_DIST += plot_scaling_results.py \ + plot_scaling_results_breakdown.py + +# Script for gravity accuracy +EXTRA_DIST += plot_gravity_checks.py + +# Combine ICs. +EXTRA_DIST += combine_ics.py diff --git a/examples/analyse_dump_cells.py b/tools/analyse_dump_cells.py similarity index 85% rename from examples/analyse_dump_cells.py rename to tools/analyse_dump_cells.py index 2adfaf319e9c0da33f86a6158da68e6620c47361..2216b5f5fe6aa0c0d9dcc29a8abf0f263d2c3cc4 100755 --- a/examples/analyse_dump_cells.py +++ b/tools/analyse_dump_cells.py @@ -47,13 +47,13 @@ mpicol = 20 # Command-line arguments. if len(sys.argv) < 5: - print "usage: ", sys.argv[0], " nx ny nz cell1.dat cell2.dat ..." + print("usage: ", sys.argv[0], " nx ny nz cell1.dat cell2.dat ...") sys.exit(1) nx = int(sys.argv[1]) ny = int(sys.argv[2]) nz = int(sys.argv[3]) -print "# x y z onedge" +print("# x y z onedge") allactives = [] onedge = 0 tcount = 0 @@ -65,28 +65,28 @@ for i in range(4, len(sys.argv)): continue # Select cells that are on the current rank and are top-level cells. - rdata = data[data[:,localcol] == 1] - tdata = rdata[rdata[:,topcol] == 1] + rdata = data[data[:, localcol] == 1] + tdata = rdata[rdata[:, topcol] == 1] # Separation of the cells is in data. - xwidth = tdata[0,xwcol] - ywidth = tdata[0,ywcol] - zwidth = tdata[0,zwcol] + xwidth = tdata[0, xwcol] + ywidth = tdata[0, ywcol] + zwidth = tdata[0, zwcol] # Fill space nx, ny,n nz with all toplevel cells and flag their active # state. - space = np.zeros((nx,ny,nz)) + space = np.zeros((nx, ny, nz)) actives = [] for line in tdata: ix = int(np.rint(line[xcol] / xwidth)) iy = int(np.rint(line[ycol] / ywidth)) iz = int(np.rint(line[zcol] / zwidth)) active = int(line[activecol]) - space[ix,iy,iz] = 1 + active + space[ix, iy, iz] = 1 + active tcount = tcount + 1 if active == 1: actives.append([ix, iy, iz, line]) - + # Report all active cells and flag any without 26 neighbours. These are # on the edge of the partition volume and will have foreign neighbour # cells. @@ -116,13 +116,12 @@ for i in range(4, len(sys.argv)): count = count + 1 if count < 27: onedge = onedge + 1 - print active[3][0], active[3][1], active[3][2], 1 + print(active[3][0], active[3][1], active[3][2], 1) else: - print active[3][0], active[3][1], active[3][2], 0 + print(active[3][0], active[3][1], active[3][2], 0) allactives.extend(actives) -print "# top cells: ", tcount, " active: ", len(allactives), " on edge: ", onedge +print("# top cells: ", tcount, " active: ", len(allactives), " on edge: ", onedge) sys.exit(0) - diff --git a/tools/analyse_runtime.py b/tools/analyse_runtime.py new file mode 100755 index 0000000000000000000000000000000000000000..e1b09a9903c804a20788f165ff28c90142ae38b1 --- /dev/null +++ b/tools/analyse_runtime.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python + +################################################################################ +# This file is part of SWIFT. +# Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ + +import re +import sys +import matplotlib + +matplotlib.use("Agg") +from pylab import * + +# Plot parameters +params = { + "axes.labelsize": 10, + "axes.titlesize": 10, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 10, + "ytick.labelsize": 10, + "text.usetex": True, + "figure.figsize": (6.45, 6.45), + "figure.subplot.left": 0.06, + "figure.subplot.right": 0.99, + "figure.subplot.bottom": 0.06, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.21, + "figure.subplot.hspace": 0.13, + "lines.markersize": 6, + "lines.linewidth": 3.0, + "text.latex.unicode": True, +} +rcParams.update(params) + +threshold = 0.008 + +num_files = len(sys.argv) - 1 + +labels = [ + "Gpart assignment", + "Mesh comunication", + "Forward Fourier transform", + "Green function", + "Backwards Fourier transform", + "engine_recompute_displacement_constraint:", + "engine_exchange_top_multipoles:", + "updating particle counts", + "Making gravity tasks", + "Making hydro tasks", + "Splitting tasks", + "Counting and linking tasks", + "Setting super-pointers", + "Making extra hydroloop tasks", + "Linking gravity tasks", + "Creating send tasks", + "Exchanging cell tags", + "Creating recv tasks", + "Setting unlocks", + "Ranking the tasks", + "scheduler_reweight:", + "space_list_useful_top_level_cells:", + "space_rebuild:", + "engine_drift_all:", + "engine_unskip:", + "engine_collect_end_of_step:", + "engine_launch:", + "writing particle properties", + "engine_repartition:", + "engine_exchange_cells:", + "Dumping restart files", + "engine_print_stats:", + "engine_marktasks:", + "Reading initial conditions", + "engine_print_task_counts:", + "engine_drift_top_multipoles:", + "Communicating rebuild flag", + "engine_split:", + "space_init", + "engine_init", + "engine_repartition_trigger:" +] +is_rebuild = [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 +] +times = np.zeros(len(labels)) +counts = np.zeros(len(labels)) + +cols = [ + "0.5", + "#332288", + "#88CCEE", + "#44AA99", + "#117733", + "#999933", + "#DDCC77", + "#CC6677", + "#882255", + "#AA4499", + "#661100", + "#6699CC", + "#AA4466", + "#4477AA", +] + +total_time = 0 +lastline = "" + +for i in range(num_files): + + filename = sys.argv[i + 1] + print("Analysing %s" % filename) + + # Open stdout file + file = open(filename, "r") + + # Search the different phrases + for line in file: + + # Loop over the possbile labels + for i in range(len(labels)): + + # Extract the different blocks + if re.search("%s took" % labels[i], line): + counts[i] += 1.0 + times[i] += float( + re.findall(r"[+-]?((\d+\.?\d*)|(\.\d+))", line)[-1][0] + ) + + # Find the last line with meaningful output (avoid crash report, batch system stuf....) + if re.findall(r"\[[0-9]{4}\][ ]\[*", line) or re.findall( + r"^\[[0-9]*[.][0-9]+\][ ]", line + ): + lastline = line + + # Total run time + total_time += float(re.findall(r"[+-]?([0-9]*[.])?[0-9]+", lastline)[1]) + +# Conver to seconds +times /= 1000.0 + +# Total time +total_measured_time = np.sum(times) +print("\nTotal measured time: %.3f s" % total_measured_time) + +print("Total time: %f s\n" % total_time) + +# Ratios +time_ratios = times / total_time + +# Better looking labels +for i in range(len(labels)): + labels[i] = labels[i].replace("_", " ") + labels[i] = labels[i].replace(":", "") + labels[i] = labels[i].title() + +times = np.array(times) +time_ratios = np.array(time_ratios) +is_rebuild = np.array(is_rebuild) + +# Sort in order of importance +order = np.argsort(-times) +times = times[order] +counts = counts[order] +time_ratios = time_ratios[order] +is_rebuild = is_rebuild[order] +labels = np.take(labels, order) + +# Keep only the important components +important_times = [0.0] +important_ratios = [0.0] +important_labels = ["Others (all below %.1f\%%)" % (threshold * 100)] +important_is_rebuild = [0] +need_print = True +print("Time spent in the different code sections:") +for i in range(len(labels)): + if time_ratios[i] > threshold: + important_times.append(times[i]) + important_ratios.append(time_ratios[i]) + important_labels.append(labels[i]) + important_is_rebuild.append(is_rebuild[i]) + else: + if need_print: + print("Elements in 'Other' category (<%.1f%%):" % (threshold * 100)) + need_print = False + important_times[0] += times[i] + important_ratios[0] += time_ratios[i] + + print(" - '%-40s' (%5d calls): %.4f%%" % (labels[i], counts[i], time_ratios[i] * 100)) + +# Anything unaccounted for? +print( + "\nUnaccounted for: %.4f%%\n" + % (100 * (total_time - total_measured_time) / total_time) +) + +important_ratios = np.array(important_ratios) +important_is_rebuild = np.array(important_is_rebuild) + +figure() + + +def func(pct): + return "$%4.2f\\%%$" % pct + + +pie, _, _ = pie( + important_ratios, + explode=important_is_rebuild * 0.2, + autopct=lambda pct: func(pct), + textprops=dict(color="0.1", fontsize=14), + labeldistance=0.7, + pctdistance=0.85, + startangle=-15, + colors=cols, +) +legend(pie, important_labels, title="SWIFT operations", loc="upper left") + +savefig("time_pie.pdf", dpi=150) diff --git a/examples/check_interactions.sh b/tools/check_interactions.sh similarity index 83% rename from examples/check_interactions.sh rename to tools/check_interactions.sh index 24a534b154313927ee4b2a108d3da7ea5f4d1f31..d688e69bb36b628905668183989d08204604c631 100755 --- a/examples/check_interactions.sh +++ b/tools/check_interactions.sh @@ -20,7 +20,7 @@ cd examples/SedovBlast_3D/ ./getGlass.sh python makeIC.py -../swift -s -t 16 -n 5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +../swift --hydro --threads=16 --steps=5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv sedov_0000.hdf5 sedov_naive.hdf5 @@ -29,7 +29,7 @@ cd ../EAGLE_12/ # Link to ICs ln -s /gpfs/data/Swift/web-storage/ICs/EAGLE_ICs_12.hdf5 EAGLE_ICs_12.hdf5 -../swift -s -t 16 -n 5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +../swift --hydro --threads=16 --steps=5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv eagle_0000.hdf5 eagle_12_naive.hdf5 @@ -45,13 +45,13 @@ make clean; make -j 6 cd examples/SedovBlast_3D/ -../swift -s -t 16 -n 5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +../swift --hydro --threads=16 --steps=5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv sedov_0000.hdf5 sedov_serial.hdf5 cd ../EAGLE_12/ -../swift -s -t 16 -n 5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +../swift --hydro --threads=16 --steps=5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv eagle_0000.hdf5 eagle_12_serial.hdf5 @@ -67,7 +67,7 @@ make clean; make -j 6 cd examples/SedovBlast_3D/ -../swift -s -t 16 -n 5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +../swift --hydro --threads=16 --steps=5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv sedov_0000.hdf5 sedov_vec.hdf5 @@ -98,7 +98,7 @@ fi cd ../EAGLE_12/ -../swift -s -t 16 -n 5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +../swift --hydro --threads=16 --steps=5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv eagle_0000.hdf5 eagle_12_vec.hdf5 @@ -145,13 +145,13 @@ make clean; make -j 6 cd examples/SedovBlast_3D/ -mpirun -np 4 ../swift_mpi -s -t 16 -n 5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +mpirun -np 4 ../swift_mpi --hydro --threads=16 --steps=5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv sedov_0000.hdf5 sedov_naive.hdf5 cd ../EAGLE_12/ -mpirun -np 4 ../swift_mpi -s -t 16 -n 5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +mpirun -np 4 ../swift_mpi --hydro --threads=16 --steps=5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv eagle_0000.hdf5 eagle_12_naive.hdf5 @@ -167,13 +167,13 @@ make clean; make -j 6 cd examples/SedovBlast_3D/ -mpirun -np 4 ../swift_mpi -s -t 16 -n 5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +mpirun -np 4 ../swift_mpi --hydro --threads=16 --steps=5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv sedov_0000.hdf5 sedov_serial.hdf5 cd ../EAGLE_12/ -mpirun -np 4 ../swift_mpi -s -t 16 -n 5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +mpirun -np 4 ../swift_mpi --hydro --threads=16 --steps=5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv eagle_0000.hdf5 eagle_12_serial.hdf5 @@ -189,7 +189,7 @@ make clean; make -j 6 cd examples/SedovBlast_3D/ -mpirun -np 4 ../swift_mpi -s -t 16 -n 5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +mpirun -np 4 ../swift_mpi --hydro --threads=16 --steps=5 sedov.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv sedov_0000.hdf5 sedov_vec.hdf5 @@ -220,7 +220,7 @@ fi cd ../EAGLE_12/ -mpirun -np 4 ../swift_mpi -s -t 16 -n 5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 +mpirun -np 4 ../swift_mpi --hydro --threads=16 --steps=5 eagle_12.yml -P SPH:h_tolerance:10 -P Snapshots:compression:7 mv eagle_0000.hdf5 eagle_12_vec.hdf5 diff --git a/tools/check_ngbs.py b/tools/check_ngbs.py new file mode 100755 index 0000000000000000000000000000000000000000..648308cb4b2c142fba3ca8e25a024113d2d082f2 --- /dev/null +++ b/tools/check_ngbs.py @@ -0,0 +1,418 @@ +#!/usr/bin/env python + +import h5py as h +import numpy as np +import matplotlib + +matplotlib.use("Agg") +from pylab import * +import os.path + +kernel_gamma = 1.825742 +kernel_gamma2 = kernel_gamma * kernel_gamma +kernel_gamma_dim = np.power(kernel_gamma, 3) +hydro_dimension_unit_sphere = 4.0 * np.pi / 3.0 +kernel_norm = hydro_dimension_unit_sphere * kernel_gamma_dim +error = False + +inputFile1 = "" +inputFile2 = "" + +# Compare the values of two floats +def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): + return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) + + +# Check list of density neighbours and check that they are correct. +def check_density_neighbours( + pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h_naive, h_sort, num_invalid, acc +): + + for k in range(0, num_invalid): + + # Filter neighbour lists for valid particle ids + filter_neigh_naive = [i for i in ngb_ids_naive[mask][k] if i > -1] + filter_neigh_sort = [i for i in ngb_ids_sort[mask][k] if i > -1] + + # Check neighbour lists for differences + id_list = set(filter_neigh_naive).symmetric_difference(set(filter_neigh_sort)) + + # Check for duplicate IDs + duplicate_check_naive = len(filter_neigh_naive) != len(set(filter_neigh_naive)) + duplicate_check_sort = len(filter_neigh_sort) != len(set(filter_neigh_sort)) + + if duplicate_check_naive: + print("Duplicate neighbour ID found in: ", inputFile1) + print(filter_neigh_naive) + return True + + if duplicate_check_sort: + print("Duplicate neighbour ID found in: ", inputFile2) + print(filter_neigh_sort) + return True + + pid = pids[mask][k] + + # Loop over discrepancies and check if they are actually neighbours + for pjd in id_list: + pi_pos = pos[np.where(pids == pid)] + pj_pos = pos[np.where(pids == pjd)] + + hi = h_naive[np.where(pids == pid)] + + dx = pi_pos[0][0] - pj_pos[0][0] + dy = pi_pos[0][1] - pj_pos[0][1] + dz = pi_pos[0][2] - pj_pos[0][2] + + # Correct for BCs + dx = nearest(dx) + dy = nearest(dy) + dz = nearest(dz) + + r2 = dx * dx + dy * dy + dz * dz + + hig2 = hi * hi * kernel_gamma2 + + diff = abs(r2 - hig2) + + print( + "Particle {} is missing {}, hig2: {}, r2: {}, |r2 - hig2|: {}".format( + pid, pjd, hig2, r2, diff + ) + ) + + if diff < acc * hig2: + print("Missing interaction due to precision issue will be ignored.") + else: + hi_2 = h_sort[np.where(pids == pid)] + + # If a neigbour is missing and the particle has the same h throw + # an error. + if isclose(hi, hi_2): + print( + "Missing interaction found but particle has the same smoothing length (hi_1: %e, hi_2: %e)." + % (hi, hi_2) + ) + return True + else: + print( + "Missing interaction due to different smoothing lengths will be ignored (hi_1: %e, hi_2: %e)." + % (hi, hi_2) + ) + + return False + + +# Check list of force neighbours and check that they are correct. +def check_force_neighbours( + pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h_naive, h_sort, num_invalid, acc +): + + error_val = False + + for k in range(0, num_invalid): + + # Filter neighbour lists for valid particle ids + filter_neigh_naive = [i for i in ngb_ids_naive[mask][k] if i > -1] + filter_neigh_sort = [i for i in ngb_ids_sort[mask][k] if i > -1] + + # Check neighbour lists for differences + id_list = set(filter_neigh_naive).symmetric_difference(set(filter_neigh_sort)) + + pid = pids[mask][k] + + # Loop over discrepancies and check if they are actually neighbours + for pjd in id_list: + pi_pos = pos[np.where(pids == pid)] + pj_pos = pos[np.where(pids == pjd)] + + hi = h_naive[np.where(pids == pid)] + hj = h_naive[np.where(pids == pjd)] + + dx = pi_pos[0][0] - pj_pos[0][0] + dy = pi_pos[0][1] - pj_pos[0][1] + dz = pi_pos[0][2] - pj_pos[0][2] + + # Correct for BCs + dx = nearest(dx) + dy = nearest(dy) + dz = nearest(dz) + + r2 = dx * dx + dy * dy + dz * dz + + hig2 = hi * hi * kernel_gamma2 + hjg2 = hj * hj * kernel_gamma2 + + diff = abs(r2 - max(hig2, hjg2)) + + print( + "Particle {} is missing {}, hig2: {}, hjg2: {}, r2: {}, |r2 - max(hig2,hjg2)|: {}".format( + pid, pjd, hig2, hjg2, r2, diff + ) + ) + + if diff < acc * max(hig2, hjg2): + print("Missing interaction due to precision issue will be ignored.") + else: + hi_2 = h_sort[np.where(pids == pid)] + if isclose(hi, hi_2): + print( + "Missing interaction due to the same smoothing lengths will not be ignored (hi_1: %e, hi_2: %e)." + % (hi, hi_2) + ) + error_val = True + else: + print( + "Missing interaction due to different smoothing lengths will be ignored (hi_1: %e, hi_2: %e)." + % (hi, hi_2) + ) + + return error_val + + +def nearest(dx): + if dx > 0.5 * box_size: + return dx - box_size + elif dx < -0.5 * box_size: + return dx + box_size + else: + return dx + + +# Parse command line arguments +if len(sys.argv) < 3: + print("Error: pass input files as arguments") + sys.exit() +else: + inputFile1 = sys.argv[1] + inputFile2 = sys.argv[2] + if os.path.exists(inputFile1) != 1: + print("\n{} does not exist!\n".format(inputFile1)) + sys.exit() + if os.path.exists(inputFile2) != 1: + print("\n{} does not exist!\n".format(inputFile2)) + sys.exit() + +# Open input files +file_naive = h.File(inputFile1, "r") +file_sort = h.File(inputFile2, "r") + +box_size = file_naive["/Header"].attrs["BoxSize"][0] + +# Read input file fields +ids_naive = file_naive["/PartType0/ParticleIDs"][:] +ids_sort = file_sort["/PartType0/ParticleIDs"][:] + +h_naive = file_naive["/PartType0/SmoothingLength"][:] +h_sort = file_sort["/PartType0/SmoothingLength"][:] + +pos_naive = file_naive["/PartType0/Coordinates"][:, :] +# pos_sort = file_sort["/PartType0/Coordinates"][:,:] + +num_density_naive = file_naive["/PartType0/Num_ngb_density"][:] +num_density_sort = file_sort["/PartType0/Num_ngb_density"][:] + +num_force_naive = file_naive["/PartType0/Num_ngb_force"][:] +num_force_sort = file_sort["/PartType0/Num_ngb_force"][:] + +neighbour_ids_density_naive = file_naive["/PartType0/Ids_ngb_density"][:] +neighbour_ids_density_sort = file_sort["/PartType0/Ids_ngb_density"][:] + +neighbour_ids_force_naive = file_naive["/PartType0/Ids_ngb_force"][:] +neighbour_ids_force_sort = file_sort["/PartType0/Ids_ngb_force"][:] + + +# wcount_naive = file_naive["/PartType0/Wcount"][:] +# wcount_sort = file_sort["/PartType0/Wcount"][:] +# +# wcount_naive = wcount_naive * np.power(h_naive,3) * kernel_norm +# wcount_sort = wcount_sort * np.power(h_sort,3) * kernel_norm + +# Cross check +max_density_ngbs_naive = np.max(num_density_naive) +max_density_ngbs_sort = np.max(num_density_sort) +max_force_ngbs_naive = np.max(num_force_naive) +max_force_ngbs_sort = np.max(num_force_sort) + +print(" Min Mean Max ") +print(" ---------------------") +print( + "Ngbs density naiv: ", + np.min(num_density_naive), + np.mean(num_density_naive), + max_density_ngbs_naive, +) +print( + "Ngbs density sort: ", + np.min(num_density_sort), + np.mean(num_density_sort), + max_density_ngbs_sort, +) +print( + "Ngbs force naiv: ", + np.min(num_force_naive), + np.mean(num_force_naive), + max_force_ngbs_naive, +) +print( + "Ngbs force sort: ", + np.min(num_force_sort), + np.mean(num_force_sort), + max_force_ngbs_sort, +) +# print "Wcount naiv: ", np.min(wcount_naive), np.mean(wcount_naive), np.max(wcount_naive) +# print "Wcount sort: ", np.min(wcount_sort), np.mean(wcount_sort), np.max(wcount_sort) + +# Sort +index_naive = np.argsort(ids_naive) +index_sort = np.argsort(ids_sort) + +num_density_naive = num_density_naive[index_naive] +num_density_sort = num_density_sort[index_sort] +num_force_naive = num_force_naive[index_naive] +num_force_sort = num_force_sort[index_sort] +ids_naive = ids_naive[index_naive] +ids_sort = ids_sort[index_sort] +neighbour_ids_density_naive = neighbour_ids_density_naive[index_naive] +neighbour_ids_density_sort = neighbour_ids_density_sort[index_sort] +neighbour_ids_force_naive = neighbour_ids_force_naive[index_naive] +neighbour_ids_force_sort = neighbour_ids_force_sort[index_sort] +# wcount_naive = wcount_naive[index_naive] +# wcount_sort = wcount_sort[index_sort] +h_naive = h_naive[index_naive] +h_sort = h_sort[index_sort] +pos_naive = pos_naive[index_naive] +# pos_sort = pos_sort[index_sort] + +neighbour_length_naive = len(neighbour_ids_density_naive[0]) +neighbour_length_sort = len(neighbour_ids_density_sort[0]) + +# Check that input files are logging the same number of neighbours +if neighbour_length_naive != neighbour_length_sort: + print("Input files have logged different numbers of neighbour lengths!") + print("{} has logged: {} neighbours".format(inputFile1, neighbour_length_naive)) + print("{} has logged: {} neighbours".format(inputFile2, neighbour_length_sort)) + exit(1) + +if ( + max_density_ngbs_naive > neighbour_length_naive + or max_force_ngbs_naive > neighbour_length_naive + or max_density_ngbs_sort > neighbour_length_sort + or max_force_ngbs_sort > neighbour_length_sort +): + print("The number of neighbours has exceeded the number of neighbours logged.") + print("Modify NUM_OF_NEIGHBOURS in hydro_part.h to log more neighbours.") + print( + "The highest neighbour count is: ", + max( + max_density_ngbs_naive, + max_force_ngbs_naive, + max_density_ngbs_sort, + max_force_ngbs_sort, + ), + ) + exit(1) + +# First check +print("\n Min Max") +print(" ----------") +print( + "Differences for density: ", + min(num_density_naive - num_density_sort), + max(num_density_naive - num_density_sort), +) +print( + "Differences for force: ", + min(num_force_naive - num_force_sort), + max(num_force_naive - num_force_sort), +) + +# Get the IDs that are different +mask_density = num_density_naive != num_density_sort +mask_force = num_force_naive != num_force_sort +num_invalid_density = np.sum(mask_density) +num_invalid_force = np.sum(mask_force) + +print("\nNum non-zero density: ", num_invalid_density) +print("Num non-zero force: ", num_invalid_force) + +print("\nParticle IDs with incorrect densities") +print("----------------------------------------") +print(ids_naive[mask_density]) + +# Check density neighbour lists +error += check_density_neighbours( + ids_naive, + neighbour_ids_density_naive, + neighbour_ids_density_sort, + mask_density, + pos_naive, + h_naive, + h_sort, + num_invalid_density, + 2e-6, +) + +print("Num of density interactions", inputFile1) +print(num_density_naive[mask_density]) + +print("Num of density interactions", inputFile2) +print(num_density_sort[mask_density]) + +print("\nParticle IDs with incorrect forces") +print("------------------------------------") +print(ids_naive[mask_force]) + +# Check force neighbour lists +error += check_force_neighbours( + ids_naive, + neighbour_ids_force_naive, + neighbour_ids_force_sort, + mask_force, + pos_naive, + h_naive, + h_sort, + num_invalid_force, + 2e-6, +) + +print("Num of force interactions", inputFile1) +print(num_force_naive[mask_force]) + +# print "Smoothing lengths", inputFile1 +# print h_naive[mask_force] + +print("Num of force interactions", inputFile2) +print(num_force_sort[mask_force]) + +# print "Smoothing lengths", inputFile2 +# print h_sort[mask_force] + +# Statistics of h difference +h_relative = (h_naive - h_sort) / h_naive +print( + "h statistics: {} {} (Min, 1st Percentile)".format( + np.min(h_relative), np.percentile(h_relative, 1) + ) +) +print( + "h statistics: {} {} (Mean, Median)".format( + np.mean(h_relative), np.median(h_relative) + ) +) +print( + "h statistics: {} {} (Max, 99th Percentile)".format( + np.max(h_relative), np.percentile(h_relative, 99) + ) +) + +if error: + print("\n------------------") + print("Differences found.") + print("------------------") + exit(1) +else: + print("\n---------------------") + print("No differences found.") + print("---------------------") + exit(0) diff --git a/tools/combine_ics.py b/tools/combine_ics.py new file mode 100755 index 0000000000000000000000000000000000000000..ac5680f9c70e3c7f8280b14a80a09d8c3140ae99 --- /dev/null +++ b/tools/combine_ics.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python +""" +Usage: + combine_ics.py input_file.0.hdf5 merged_file.hdf5 gzip_level + +This file combines Gadget-2 type 2 (i.e. hdf5) initial condition files +into a single file that can be digested by SWIFT. +This has mainly be tested for DM-only (parttype1) files but also works +smoothly for ICs including gas. The special case of a mass-table for +the DM particles is handled. No unit conversions are applied nor are +any scale-factors or h-factors changed. +The script applies some compression and checksum filters to the output +to save disk space. +The last argument `gzip_level` is used to specify the level of compression +to apply to all the fields in the file. Use 0 to cancel all coompression. +The default value is `4`. + +This file is part of SWIFT. +Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + +All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import sys +import h5py as h5 +import numpy as np + +# Store the compression level +gzip_level = 4 +if sys.argc > 3: + gzip_level = sys.argv[3] + +# First, we need to collect some information from the master file +main_file_name = str(sys.argv[1])[:-7] +print("Merging snapshots files with name", main_file_name) +master_file_name = main_file_name + ".0.hdf5" +print("Reading master information from", master_file_name) +master_file = h5.File(master_file_name, "r") +grp_header = master_file["/Header"] + +num_files = grp_header.attrs["NumFilesPerSnapshot"] +tot_num_parts = grp_header.attrs["NumPart_Total"] +tot_num_parts_high_word = grp_header.attrs["NumPart_Total_HighWord"] +entropy_flag = grp_header.attrs["Flag_Entropy_ICs"] +box_size = grp_header.attrs["BoxSize"] +time = grp_header.attrs["Time"] + +# Combine the low- and high-words +tot_num_parts = tot_num_parts.astype(np.int64) +for i in range(6): + tot_num_parts[i] += (np.int64(tot_num_parts_high_word[i]) << 32) + +# Some basic information +print("Reading", tot_num_parts, "particles from", num_files, "files.") + +# Check whether there is a mass table +DM_mass = 0.0 +mtable = grp_header.attrs.get("MassTable") +if mtable is not None: + DM_mass = grp_header.attrs["MassTable"][1] +if DM_mass != 0.0: + print("DM mass set to", DM_mass, "from the header mass table.") +else: + print("Reading DM mass from the particles.") + + +# Create the empty file +output_file_name = sys.argv[2] +output_file = h5.File(output_file_name, "w-") + + +# Header +grp = output_file.create_group("/Header") +grp.attrs["NumFilesPerSnapshot"] = 1 +grp.attrs["NumPart_Total"] = tot_num_parts +grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] +grp.attrs["NumPart_ThisFile"] = tot_num_parts +grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +grp.attrs["BoxSize"] = box_size +grp.attrs["Flag_Entropy_ICs"] = entropy_flag +grp.attrs["Time"] = time + +# Create the particle groups +if tot_num_parts[0] > 0: + grp0 = output_file.create_group("/PartType0") +if tot_num_parts[1] > 0: + grp1 = output_file.create_group("/PartType1") +if tot_num_parts[4] > 0: + grp4 = output_file.create_group("/PartType4") +if tot_num_parts[5] > 0: + grp5 = output_file.create_group("/PartType5") + + +# Helper function to create the datasets we need +def create_set(grp, name, size, dim, dtype): + if dim == 1: + grp.create_dataset( + name, + (size,), + dtype=dtype, + chunks=True, + compression="gzip", + compression_opts=gzip_level, + shuffle=True, + fletcher32=True, + maxshape=(size,), + ) + else: + grp.create_dataset( + name, + (size, dim), + dtype=dtype, + chunks=True, + compression="gzip", + compression_opts=gzip_level, + shuffle=True, + fletcher32=True, + maxshape=(size, dim), + ) + + +# Create the required datasets +if tot_num_parts[0] > 0: + create_set(grp0, "Coordinates", tot_num_parts[0], 3, "d") + create_set(grp0, "Velocities", tot_num_parts[0], 3, "f") + create_set(grp0, "Masses", tot_num_parts[0], 1, "f") + create_set(grp0, "ParticleIDs", tot_num_parts[0], 1, "l") + create_set(grp0, "InternalEnergy", tot_num_parts[0], 1, "f") + create_set(grp0, "SmoothingLength", tot_num_parts[0], 1, "f") + +if tot_num_parts[1] > 0: + create_set(grp1, "Coordinates", tot_num_parts[1], 3, "d") + create_set(grp1, "Velocities", tot_num_parts[1], 3, "f") + create_set(grp1, "Masses", tot_num_parts[1], 1, "f") + create_set(grp1, "ParticleIDs", tot_num_parts[1], 1, "l") + +if tot_num_parts[4] > 0: + create_set(grp4, "Coordinates", tot_num_parts[4], 3, "d") + create_set(grp4, "Velocities", tot_num_parts[4], 3, "f") + create_set(grp4, "Masses", tot_num_parts[4], 1, "f") + create_set(grp4, "ParticleIDs", tot_num_parts[4], 1, "l") + +if tot_num_parts[5] > 0: + create_set(grp5, "Coordinates", tot_num_parts[5], 3, "d") + create_set(grp5, "Velocities", tot_num_parts[5], 3, "f") + create_set(grp5, "Masses", tot_num_parts[5], 1, "f") + create_set(grp5, "ParticleIDs", tot_num_parts[5], 1, "l") + +# Heavy-lifting ahead. Leave a last message. +print("Datasets created in output file") + + +# Special case of the non-zero mass table +if DM_mass != 0.0: + masses = np.ones(tot_num_parts[1], dtype=np.float) * DM_mass + grp1["Masses"][:] = masses + + +# Cumulative number of particles read/written +cumul_parts = [0, 0, 0, 0, 0, 0] + +# Loop over all the files that are part of the snapshots +for f in range(num_files): + + file_name = main_file_name + "." + str(f) + ".hdf5" + file = h5.File(file_name, "r") + file_header = file["/Header"] + num_parts = file_header.attrs["NumPart_ThisFile"] + + print( + "Copying data from file", + f, + "/", + num_files, + ": num_parts = [", + num_parts[0], + num_parts[1], + num_parts[4], + num_parts[5], + "]", + ) + sys.stdout.flush() + + # Helper function to copy data + def copy_grp(name_new, name_old, ptype): + full_name_new = "/PartType" + str(ptype) + "/" + name_new + full_name_old = "/PartType" + str(ptype) + "/" + name_old + output_file[full_name_new][ + cumul_parts[ptype] : cumul_parts[ptype] + num_parts[ptype] + ] = file[full_name_old] + + def copy_grp_same_name(name, ptype): + copy_grp(name, name, ptype) + + if num_parts[0] > 0: + copy_grp_same_name("Coordinates", 0) + copy_grp_same_name("Velocities", 0) + copy_grp_same_name("Masses", 0) + copy_grp_same_name("ParticleIDs", 0) + copy_grp_same_name("InternalEnergy", 0) + copy_grp_same_name("SmoothingLength", 0) + + if num_parts[1] > 0: + copy_grp_same_name("Coordinates", 1) + copy_grp_same_name("Velocities", 1) + copy_grp_same_name("ParticleIDs", 1) + if DM_mass == 0.0: # Do not overwrite values if there was a mass table + copy_grp_same_name("Masses", 1) + + if num_parts[4] > 0: + copy_grp_same_name("Coordinates", 4) + copy_grp_same_name("Velocities", 4) + copy_grp_same_name("Masses", 4) + copy_grp_same_name("ParticleIDs", 4) + + if num_parts[5] > 0: + copy_grp_same_name("Coordinates", 5) + copy_grp_same_name("Velocities", 5) + copy_grp_same_name("Masses", 5) + copy_grp_same_name("ParticleIDs", 5) + + cumul_parts[0] += num_parts[0] + cumul_parts[1] += num_parts[1] + cumul_parts[4] += num_parts[4] + cumul_parts[5] += num_parts[5] + file.close() + +print("All done! SWIFT is waiting.") diff --git a/tools/plot_gravity_checks.py b/tools/plot_gravity_checks.py new file mode 100755 index 0000000000000000000000000000000000000000..cef81b86e9663bc7c9df2c9affaeb258501f57e6 --- /dev/null +++ b/tools/plot_gravity_checks.py @@ -0,0 +1,456 @@ +#!/usr/bin/env python + +import sys +import glob +import re +import numpy as np +import matplotlib.pyplot as plt + +params = { + "axes.labelsize": 14, + "axes.titlesize": 18, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 14, + "ytick.labelsize": 14, + "text.usetex": True, + "figure.figsize": (12, 10), + "figure.subplot.left": 0.06, + "figure.subplot.right": 0.99, + "figure.subplot.bottom": 0.06, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.14, + "figure.subplot.hspace": 0.14, + "lines.markersize": 6, + "lines.linewidth": 3.0, + "text.latex.unicode": True, +} +plt.rcParams.update(params) +plt.rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]}) + +min_error = 1e-7 +max_error = 3e-1 +num_bins = 64 + +# Construct the bins +bin_edges = np.linspace(np.log10(min_error), np.log10(max_error), num_bins + 1) +bin_size = (np.log10(max_error) - np.log10(min_error)) / num_bins +bins = 0.5 * (bin_edges[1:] + bin_edges[:-1]) +bin_edges = 10 ** bin_edges +bins = 10 ** bins + +# Colours +cols = ["#332288", "#88CCEE", "#117733", "#DDCC77", "#CC6677"] + +# Time-step to plot +step = int(sys.argv[1]) +periodic = int(sys.argv[2]) + +# Find the files for the different expansion orders +order_list = glob.glob("gravity_checks_swift_step%.4d_order*.dat" % step) +num_order = len(order_list) + +# Get the multipole orders +order = np.zeros(num_order) +for i in range(num_order): + order[i] = int(order_list[i][35]) +order = sorted(order) +order_list = sorted(order_list) + +# Read the exact accelerations first +if periodic: + data = np.loadtxt("gravity_checks_exact_periodic_step%.4d.dat" % step) +else: + data = np.loadtxt("gravity_checks_exact_step%.4d.dat" % step) +exact_ids = data[:, 0] +exact_pos = data[:, 1:4] +exact_a = data[:, 4:7] +exact_pot = data[:, 7] +# Sort stuff +sort_index = np.argsort(exact_ids) +exact_ids = exact_ids[sort_index] +exact_pos = exact_pos[sort_index, :] +exact_a = exact_a[sort_index, :] +exact_pot = exact_pot[sort_index] +exact_a_norm = np.sqrt(exact_a[:, 0] ** 2 + exact_a[:, 1] ** 2 + exact_a[:, 2] ** 2) + +print("Number of particles tested:", np.size(exact_ids)) + +# Start the plot +plt.figure() + +count = 0 + +# Get the Gadget-2 data if existing +if periodic: + gadget2_file_list = glob.glob("forcetest_gadget2_periodic.txt") +else: + gadget2_file_list = glob.glob("forcetest_gadget2.txt") +if len(gadget2_file_list) != 0: + + gadget2_data = np.loadtxt(gadget2_file_list[0]) + gadget2_ids = gadget2_data[:, 0] + gadget2_pos = gadget2_data[:, 1:4] + gadget2_a_exact = gadget2_data[:, 4:7] + gadget2_a_grav = gadget2_data[:, 7:10] + + # Sort stuff + sort_index = np.argsort(gadget2_ids) + gadget2_ids = gadget2_ids[sort_index] + gadget2_pos = gadget2_pos[sort_index, :] + gadget2_a_exact = gadget2_a_exact[sort_index, :] + gadget2_exact_a_norm = np.sqrt( + gadget2_a_exact[:, 0] ** 2 + + gadget2_a_exact[:, 1] ** 2 + + gadget2_a_exact[:, 2] ** 2 + ) + gadget2_a_grav = gadget2_a_grav[sort_index, :] + + # Cross-checks + if not np.array_equal(exact_ids, gadget2_ids): + print("Comparing different IDs !") + + if np.max(np.abs(exact_pos - gadget2_pos) / np.abs(gadget2_pos)) > 1e-6: + print("Comparing different positions ! max difference:") + index = np.argmax( + exact_pos[:, 0] ** 2 + + exact_pos[:, 1] ** 2 + + exact_pos[:, 2] ** 2 + - gadget2_pos[:, 0] ** 2 + - gadget2_pos[:, 1] ** 2 + - gadget2_pos[:, 2] ** 2 + ) + print( + "Gadget2 (id=%d):" % gadget2_ids[index], + gadget2_pos[index, :], + "exact (id=%d):" % exact_ids[index], + exact_pos[index, :], + "\n", + ) + + diff = np.abs(exact_a_norm - gadget2_exact_a_norm) / np.abs(gadget2_exact_a_norm) + max_diff = np.max(diff) + if max_diff > 2e-6: + print("Comparing different exact accelerations !") + print( + "Median=", + np.median(diff), + "Mean=", + np.mean(diff), + "99%=", + np.percentile(diff, 99), + ) + print("max difference ( relative diff =", max_diff, "):") + # index = np.argmax(exact_a[:,0]**2 + exact_a[:,1]**2 + exact_a[:,2]**2 - gadget2_a_exact[:,0]**2 - gadget2_a_exact[:,1]**2 - gadget2_a_exact[:,2]**2) + index = np.argmax(diff) + print( + "a_exact --- Gadget2:", + gadget2_a_exact[index, :], + "exact:", + exact_a[index, :], + ) + print( + "pos --- Gadget2: (id=%d):" % gadget2_ids[index], + gadget2_pos[index, :], + "exact (id=%d):" % gadget2_ids[index], + gadget2_pos[index, :], + "\n", + ) + + # Compute the error norm + diff = gadget2_a_exact - gadget2_a_grav + + norm_diff = np.sqrt(diff[:, 0] ** 2 + diff[:, 1] ** 2 + diff[:, 2] ** 2) + norm_a = np.sqrt( + gadget2_a_exact[:, 0] ** 2 + + gadget2_a_exact[:, 1] ** 2 + + gadget2_a_exact[:, 2] ** 2 + ) + + norm_error = norm_diff / norm_a + error_x = abs(diff[:, 0]) / norm_a + error_y = abs(diff[:, 1]) / norm_a + error_z = abs(diff[:, 2]) / norm_a + + # Bin the error + norm_error_hist, _ = np.histogram(norm_error, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_x_hist, _ = np.histogram(error_x, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_y_hist, _ = np.histogram(error_y, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_z_hist, _ = np.histogram(error_z, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + + norm_median = np.median(norm_error) + median_x = np.median(error_x) + median_y = np.median(error_y) + median_z = np.median(error_z) + + norm_per99 = np.percentile(norm_error, 99) + per99_x = np.percentile(error_x, 99) + per99_y = np.percentile(error_y, 99) + per99_z = np.percentile(error_z, 99) + + norm_max = np.max(norm_error) + max_x = np.max(error_x) + max_y = np.max(error_y) + max_z = np.max(error_z) + + print("Gadget-2 ---- ") + print("Norm: median= %f 99%%= %f max= %f" % (norm_median, norm_per99, norm_max)) + print("X : median= %f 99%%= %f max= %f" % (median_x, per99_x, max_x)) + print("Y : median= %f 99%%= %f max= %f" % (median_y, per99_y, max_y)) + print("Z : median= %f 99%%= %f max= %f" % (median_z, per99_z, max_z)) + print("") + + plt.subplot(231) + plt.text( + min_error * 1.5, + 1.55, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (norm_median, norm_per99), + ha="left", + va="top", + alpha=0.8, + ) + plt.semilogx(bins, norm_error_hist, "k--", label="Gadget-2", alpha=0.8) + plt.subplot(232) + plt.semilogx(bins, error_x_hist, "k--", label="Gadget-2", alpha=0.8) + plt.text( + min_error * 1.5, + 1.55, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_x, per99_x), + ha="left", + va="top", + alpha=0.8, + ) + plt.subplot(233) + plt.semilogx(bins, error_y_hist, "k--", label="Gadget-2", alpha=0.8) + plt.text( + min_error * 1.5, + 1.55, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_y, per99_y), + ha="left", + va="top", + alpha=0.8, + ) + plt.subplot(234) + plt.semilogx(bins, error_z_hist, "k--", label="Gadget-2", alpha=0.8) + plt.text( + min_error * 1.5, + 1.55, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_z, per99_z), + ha="left", + va="top", + alpha=0.8, + ) + + count += 1 + + +# Plot the different histograms +for i in range(num_order): + data = np.loadtxt(order_list[i]) + ids = data[:, 0] + pos = data[:, 1:4] + a_grav = data[:, 4:7] + pot = data[:, 7] + + # Sort stuff + sort_index = np.argsort(ids) + ids = ids[sort_index] + pos = pos[sort_index, :] + a_grav = a_grav[sort_index, :] + pot = pot[sort_index] + + # Cross-checks + if not np.array_equal(exact_ids, ids): + print("Comparing different IDs !") + + if np.max(np.abs(exact_pos - pos) / np.abs(pos)) > 1e-6: + print("Comparing different positions ! max difference:") + index = np.argmax( + exact_pos[:, 0] ** 2 + + exact_pos[:, 1] ** 2 + + exact_pos[:, 2] ** 2 + - pos[:, 0] ** 2 + - pos[:, 1] ** 2 + - pos[:, 2] ** 2 + ) + print( + "SWIFT (id=%d):" % ids[index], + pos[index, :], + "exact (id=%d):" % exact_ids[index], + exact_pos[index, :], + "\n", + ) + + # Compute the error norm + diff = exact_a - a_grav + diff_pot = exact_pot - pot + + # Correct for different normalization of potential + print("Difference in normalization of potential:", np.mean(diff_pot), end=" ") + print( + "std_dev=", + np.std(diff_pot), + "99-percentile:", + np.percentile(diff_pot, 99) - np.median(diff_pot), + "1-percentile:", + np.median(diff_pot) - np.percentile(diff_pot, 1), + ) + + exact_pot -= np.mean(diff_pot) + diff_pot = exact_pot - pot + + norm_diff = np.sqrt(diff[:, 0] ** 2 + diff[:, 1] ** 2 + diff[:, 2] ** 2) + + norm_error = norm_diff / exact_a_norm + error_x = abs(diff[:, 0]) / exact_a_norm + error_y = abs(diff[:, 1]) / exact_a_norm + error_z = abs(diff[:, 2]) / exact_a_norm + error_pot = abs(diff_pot) / abs(exact_pot) + + # Bin the error + norm_error_hist, _ = np.histogram(norm_error, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_x_hist, _ = np.histogram(error_x, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_y_hist, _ = np.histogram(error_y, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_z_hist, _ = np.histogram(error_z, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_pot_hist, _ = np.histogram(error_pot, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + + norm_median = np.median(norm_error) + median_x = np.median(error_x) + median_y = np.median(error_y) + median_z = np.median(error_z) + median_pot = np.median(error_pot) + + norm_per99 = np.percentile(norm_error, 99) + per99_x = np.percentile(error_x, 99) + per99_y = np.percentile(error_y, 99) + per99_z = np.percentile(error_z, 99) + per99_pot = np.percentile(error_pot, 99) + + norm_max = np.max(norm_error) + max_x = np.max(error_x) + max_y = np.max(error_y) + max_z = np.max(error_z) + max_pot = np.max(error_pot) + + print("Order %d ---- " % order[i]) + print("Norm: median= %f 99%%= %f max= %f" % (norm_median, norm_per99, norm_max)) + print("X : median= %f 99%%= %f max= %f" % (median_x, per99_x, max_x)) + print("Y : median= %f 99%%= %f max= %f" % (median_y, per99_y, max_y)) + print("Z : median= %f 99%%= %f max= %f" % (median_z, per99_z, max_z)) + print("Pot : median= %f 99%%= %f max= %f" % (median_pot, per99_pot, max_pot)) + print("") + + plt.subplot(231) + plt.semilogx( + bins, error_x_hist, color=cols[i], label="SWIFT m-poles order %d" % order[i] + ) + plt.text( + min_error * 1.5, + 1.5 - count / 10.0, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_x, per99_x), + ha="left", + va="top", + color=cols[i], + ) + plt.subplot(232) + plt.semilogx( + bins, error_y_hist, color=cols[i], label="SWIFT m-poles order %d" % order[i] + ) + plt.text( + min_error * 1.5, + 1.5 - count / 10.0, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_y, per99_y), + ha="left", + va="top", + color=cols[i], + ) + plt.subplot(233) + plt.semilogx( + bins, error_z_hist, color=cols[i], label="SWIFT m-poles order %d" % order[i] + ) + plt.text( + min_error * 1.5, + 1.5 - count / 10.0, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_z, per99_z), + ha="left", + va="top", + color=cols[i], + ) + plt.subplot(234) + plt.semilogx( + bins, norm_error_hist, color=cols[i], label="SWIFT m-poles order %d" % order[i] + ) + plt.text( + min_error * 1.5, + 1.5 - count / 10.0, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (norm_median, norm_per99), + ha="left", + va="top", + color=cols[i], + ) + plt.subplot(235) + plt.semilogx( + bins, error_pot_hist, color=cols[i], label="SWIFT m-poles order %d" % order[i] + ) + plt.text( + min_error * 1.5, + 1.5 - count / 10.0, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_pot, per99_pot), + ha="left", + va="top", + color=cols[i], + ) + + count += 1 + +plt.subplot(231) +plt.xlabel("$\delta a_x/|\overrightarrow{a}_{exact}|$") +# plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0, 1.75) +# plt.legend(loc="center left") +plt.subplot(232) +plt.xlabel("$\delta a_y/|\overrightarrow{a}_{exact}|$") +# plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0, 1.75) +# plt.legend(loc="center left") +plt.subplot(233) +plt.xlabel("$\delta a_z/|\overrightarrow{a}_{exact}|$") +# plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0, 1.75) +plt.subplot(234) +plt.xlabel("$|\delta \overrightarrow{a}|/|\overrightarrow{a}_{exact}|$") +# plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0, 2.5) +plt.legend(loc="upper left") +plt.subplot(235) +plt.xlabel("$\delta \phi/\phi_{exact}$") +# plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0, 1.75) +# plt.legend(loc="center left") + + +plt.savefig("gravity_checks_step%.4d.png" % step, dpi=200) +plt.savefig("gravity_checks_step%.4d.pdf" % step, dpi=200) diff --git a/tools/plot_scaling_results.py b/tools/plot_scaling_results.py new file mode 100755 index 0000000000000000000000000000000000000000..2c29d93f88cbe4bde83f3473e9f738a526480c1c --- /dev/null +++ b/tools/plot_scaling_results.py @@ -0,0 +1,365 @@ +#!/usr/bin/env python +# +# Usage: +# python plot_scaling_results.py input-file1-ext input-file2-ext ... +# +# Description: +# Plots speed up, parallel efficiency and time to solution given a "timesteps" output file generated by SWIFT. +# +# Example: +# python plot_scaling_results.py _hreads_cosma_stdout.txt _threads_knl_stdout.txt +# +# The working directory should contain files 1_threads_cosma_stdout.txt - 64_threads_cosma_stdout.txt and 1_threads_knl_stdout.txt - 64_threads_knl_stdout.txt, i.e wall clock time for each run using a given number of threads + +import sys +import glob +import re +import numpy as np +import matplotlib.pyplot as plt +import scipy.stats +import ntpath + +params = { + "axes.labelsize": 14, + "axes.titlesize": 18, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 14, + "ytick.labelsize": 14, + "text.usetex": True, + "figure.subplot.left": 0.055, + "figure.subplot.right": 0.98, + "figure.subplot.bottom": 0.05, + "figure.subplot.top": 0.95, + "figure.subplot.wspace": 0.14, + "figure.subplot.hspace": 0.12, + "lines.markersize": 6, + "lines.linewidth": 3.0, + "text.latex.unicode": True, +} +plt.rcParams.update(params) +plt.rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]}) + +version = [] +branch = [] +revision = [] +hydro_scheme = [] +hydro_kernel = [] +hydro_neighbours = [] +hydro_eta = [] +threadList = [] +hexcols = [ + "#332288", + "#88CCEE", + "#44AA99", + "#117733", + "#999933", + "#DDCC77", + "#CC6677", + "#882255", + "#AA4499", + "#661100", + "#6699CC", + "#AA4466", + "#4477AA", +] +linestyle = ( + hexcols[0], + hexcols[1], + hexcols[3], + hexcols[5], + hexcols[6], + hexcols[8], + hexcols[2], + hexcols[4], + hexcols[7], + hexcols[9], +) +numTimesteps = 0 +legendTitle = " " + +inputFileNames = [] + +# Work out how many data series there are +if len(sys.argv) == 1: + print("Please specify an input file in the arguments.") + sys.exit() +else: + for fileName in sys.argv[1:]: + inputFileNames.append(fileName) + numOfSeries = int(len(sys.argv) - 1) + +# Get the names of the branch, Git revision, hydro scheme and hydro kernel +def parse_header(inputFile): + with open(inputFile, "r") as f: + found_end = False + for line in f: + if "Branch:" in line: + s = line.split() + line = s[2:] + branch.append(" ".join(line)) + elif "Revision:" in line: + s = line.split() + revision.append(s[2]) + elif "Hydrodynamic scheme:" in line: + line = line[2:-1] + s = line.split() + line = s[2:] + hydro_scheme.append(" ".join(line)) + elif "Hydrodynamic kernel:" in line: + line = line[2:-1] + s = line.split() + line = s[2:5] + hydro_kernel.append(" ".join(line)) + elif "neighbours:" in line: + s = line.split() + hydro_neighbours.append(s[4]) + elif "Eta:" in line: + s = line.split() + hydro_eta.append(s[2]) + return + + +# Parse file and return total time taken, speed up and parallel efficiency +def parse_files(): + + totalTime = [] + sumTotal = [] + speedUp = [] + parallelEff = [] + + for i in range(0, numOfSeries): # Loop over each data series + + # Get path to set of files + path, name = ntpath.split(inputFileNames[i]) + + # Get each file that starts with the cmd line arg + file_list = glob.glob(inputFileNames[i] + "*") + + threadList.append([]) + + # Remove path from file names + for j in range(0, len(file_list)): + p, filename = ntpath.split(file_list[j]) + file_list[j] = filename + + # Create a list of threads using the list of files + for fileName in file_list: + s = re.split(r"[_.]+", fileName) + threadList[i].append(int(s[1])) + + # Re-add path once each file has been found + if len(path) != 0: + for j in range(0, len(file_list)): + file_list[j] = path + "/" + file_list[j] + + # Sort the thread list in ascending order and save the indices + sorted_indices = np.argsort(threadList[i]) + threadList[i].sort() + + # Sort the file list in ascending order acording to the thread number + file_list = [file_list[j] for j in sorted_indices] + + parse_header(file_list[0]) + + branch[i] = branch[i].replace("_", "\\_") + + # version.append("$\\textrm{%s}$"%str(branch[i]))# + " " + revision[i])# + "\n" + hydro_scheme[i] + + # "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + + # r", $\eta=%.3f$"%float(hydro_eta[i])) + totalTime.append([]) + speedUp.append([]) + parallelEff.append([]) + + # Loop over all files for a given series and load the times + for j in range(0, len(file_list)): + times = np.loadtxt(file_list[j], usecols=(9,)) + updates = np.loadtxt(file_list[j], usecols=(6,)) + totalTime[i].append(np.sum(times)) + + sumTotal.append(np.sum(totalTime[i])) + + # Sort the total times in descending order + sorted_indices = np.argsort(sumTotal)[::-1] + + totalTime = [totalTime[j] for j in sorted_indices] + branchNew = [branch[j] for j in sorted_indices] + + for i in range(0, numOfSeries): + version.append("$\\textrm{%s}$" % str(branchNew[i])) + + global numTimesteps + numTimesteps = len(times) + + # Find speed-up and parallel efficiency + for i in range(0, numOfSeries): + for j in range(0, len(file_list)): + speedUp[i].append(totalTime[i][0] / totalTime[i][j]) + parallelEff[i].append(speedUp[i][j] / threadList[i][j]) + + return (totalTime, speedUp, parallelEff) + + +def print_results(totalTime, parallelEff, version): + + for i in range(0, numOfSeries): + print(" ") + print("------------------------------------") + print(version[i]) + print("------------------------------------") + print("Wall clock time for: {} time steps".format(numTimesteps)) + print("------------------------------------") + + for j in range(0, len(threadList[i])): + print(str(threadList[i][j]) + " threads: {}".format(totalTime[i][j])) + + print(" ") + print("------------------------------------") + print("Parallel Efficiency for: {} time steps".format(numTimesteps)) + print("------------------------------------") + + for j in range(0, len(threadList[i])): + print(str(threadList[i][j]) + " threads: {}".format(parallelEff[i][j])) + + return + + +# Returns a lighter/darker version of the colour +def color_variant(hex_color, brightness_offset=1): + + rgb_hex = [hex_color[x : x + 2] for x in [1, 3, 5]] + new_rgb_int = [int(hex_value, 16) + brightness_offset for hex_value in rgb_hex] + new_rgb_int = [ + min([255, max([0, i])]) for i in new_rgb_int + ] # make sure new values are between 0 and 255 + # hex() produces "0x88", we want just "88" + + return "#" + "".join([hex(i)[2:] for i in new_rgb_int]) + + +def plot_results(totalTime, speedUp, parallelEff, numSeries): + + fig, axarr = plt.subplots(2, 2, figsize=(10, 10), frameon=True) + speedUpPlot = axarr[0, 0] + parallelEffPlot = axarr[0, 1] + totalTimePlot = axarr[1, 0] + emptyPlot = axarr[1, 1] + + # Plot speed up + speedUpPlot.plot(threadList[0], threadList[0], linestyle="--", lw=1.5, color="0.2") + for i in range(0, numSeries): + speedUpPlot.plot(threadList[0], speedUp[i], linestyle[i], label=version[i]) + + speedUpPlot.set_ylabel("${\\rm Speed\\textendash up}$", labelpad=0.0) + speedUpPlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + speedUpPlot.set_xlim([0.7, threadList[0][-1] + 1]) + speedUpPlot.set_ylim([0.7, threadList[0][-1] + 1]) + + # Plot parallel efficiency + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [1, 1], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.9, 0.9], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.75, 0.75], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.5, 0.5], + "k--", + lw=1.5, + color="0.2", + ) + for i in range(0, numSeries): + parallelEffPlot.plot(threadList[0], parallelEff[i], linestyle[i]) + + parallelEffPlot.set_xscale("log") + parallelEffPlot.set_ylabel("${\\rm Parallel~efficiency}$", labelpad=0.0) + parallelEffPlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + parallelEffPlot.set_ylim([0, 1.1]) + parallelEffPlot.set_xlim([0.9, 10 ** (np.floor(np.log10(threadList[0][-1])) + 0.5)]) + + # Plot time to solution + for i in range(0, numOfSeries): + pts = [1, 10 ** np.floor(np.log10(threadList[i][-1]) + 1)] + totalTimePlot.loglog(pts, totalTime[i][0] / pts, "k--", lw=1.0, color="0.2") + totalTimePlot.loglog( + threadList[i], totalTime[i], linestyle[i], label=version[i] + ) + + y_min = 10 ** np.floor(np.log10(np.min(totalTime[:][0]) * 0.6)) + y_max = 1.0 * 10 ** np.floor(np.log10(np.max(totalTime[:][0]) * 1.5) + 1) + totalTimePlot.set_xscale("log") + totalTimePlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + totalTimePlot.set_ylabel("${\\rm Time~to~solution}~[{\\rm ms}]$", labelpad=0.0) + totalTimePlot.set_xlim([0.9, 10 ** (np.floor(np.log10(threadList[0][-1])) + 0.5)]) + totalTimePlot.set_ylim(y_min, y_max) + + totalTimePlot.legend( + bbox_to_anchor=(1.21, 0.97), + loc=2, + borderaxespad=0.0, + prop={"size": 12}, + frameon=False, + title=legendTitle, + ) + emptyPlot.axis("off") + + for i, txt in enumerate(threadList[0]): + if ( + 2 ** np.floor(np.log2(threadList[0][i])) == threadList[0][i] + ): # only powers of 2 + speedUpPlot.annotate( + "$%s$" % txt, + (threadList[0][i], speedUp[0][i]), + (threadList[0][i], speedUp[0][i] + 0.3), + color=hexcols[0], + ) + parallelEffPlot.annotate( + "$%s$" % txt, + (threadList[0][i], parallelEff[0][i]), + (threadList[0][i], parallelEff[0][i] + 0.02), + color=hexcols[0], + ) + totalTimePlot.annotate( + "$%s$" % txt, + (threadList[0][i], totalTime[0][i]), + (threadList[0][i], totalTime[0][i] * 1.1), + color=hexcols[0], + ) + + # fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(numTimesteps),cmdLine,platform)) + fig.suptitle( + "${\\rm Speed\\textendash up,~parallel~efficiency~and~time~to~solution~for}~%d~{\\rm time\\textendash steps}$" + % numTimesteps, + fontsize=16, + ) + + return + + +# Calculate results +(totalTime, speedUp, parallelEff) = parse_files() + +legendTitle = version[0] + +plot_results(totalTime, speedUp, parallelEff, numOfSeries) + +print_results(totalTime, parallelEff, version) + +# And plot +plt.show() diff --git a/tools/plot_scaling_results_breakdown.py b/tools/plot_scaling_results_breakdown.py new file mode 100755 index 0000000000000000000000000000000000000000..570ec37ee908dbbe51bfc12fa2c3af59d2d8800a --- /dev/null +++ b/tools/plot_scaling_results_breakdown.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python +# +# Usage: +# python plot_scaling_results.py input-file1-ext input-file2-ext ... +# +# Description: +# Plots speed up, parallel efficiency and time to solution given a "timesteps" output file generated by SWIFT. +# +# Example: +# python plot_scaling_results.py _hreads_cosma_stdout.txt _threads_knl_stdout.txt +# +# The working directory should contain files 1_threads_cosma_stdout.txt - 64_threads_cosma_stdout.txt and 1_threads_knl_stdout.txt - 64_threads_knl_stdout.txt, i.e wall clock time for each run using a given number of threads + +import sys +import glob +import re +import numpy as np +import matplotlib.pyplot as plt +import scipy.stats +import ntpath + +params = { + "axes.labelsize": 14, + "axes.titlesize": 18, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 14, + "ytick.labelsize": 14, + "text.usetex": True, + "figure.subplot.left": 0.055, + "figure.subplot.right": 0.98, + "figure.subplot.bottom": 0.05, + "figure.subplot.top": 0.95, + "figure.subplot.wspace": 0.14, + "figure.subplot.hspace": 0.12, + "lines.markersize": 6, + "lines.linewidth": 3.0, + "text.latex.unicode": True, +} +plt.rcParams.update(params) +plt.rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]}) + +version = [] +branch = [] +revision = [] +hydro_scheme = [] +hydro_kernel = [] +hydro_neighbours = [] +hydro_eta = [] +threadList = [] +hexcols = [ + "#332288", + "#88CCEE", + "#44AA99", + "#117733", + "#999933", + "#DDCC77", + "#CC6677", + "#882255", + "#AA4499", + "#661100", + "#6699CC", + "#AA4466", + "#4477AA", +] +linestyle = ( + hexcols[0], + hexcols[1], + hexcols[3], + hexcols[5], + hexcols[6], + hexcols[8], + hexcols[2], + hexcols[4], + hexcols[7], + hexcols[9], +) +numTimesteps = 0 +legendTitle = " " + +inputFileNames = [] + +# Work out how many data series there are +if len(sys.argv) == 1: + print("Please specify an input file in the arguments.") + sys.exit() +else: + for fileName in sys.argv[1:]: + inputFileNames.append(fileName) + numOfSeries = int(len(sys.argv) - 1) + +# Get the names of the branch, Git revision, hydro scheme and hydro kernel +def parse_header(inputFile): + with open(inputFile, "r") as f: + found_end = False + for line in f: + if "Branch:" in line: + s = line.split() + line = s[2:] + branch.append(" ".join(line)) + elif "Revision:" in line: + s = line.split() + revision.append(s[2]) + elif "Hydrodynamic scheme:" in line: + line = line[2:-1] + s = line.split() + line = s[2:] + hydro_scheme.append(" ".join(line)) + elif "Hydrodynamic kernel:" in line: + line = line[2:-1] + s = line.split() + line = s[2:5] + hydro_kernel.append(" ".join(line)) + elif "neighbours:" in line: + s = line.split() + hydro_neighbours.append(s[4]) + elif "Eta:" in line: + s = line.split() + hydro_eta.append(s[2]) + return + + +# Parse file and return total time taken, speed up and parallel efficiency +def parse_files(): + + totalTime = [] + sumTotal = [] + speedUp = [] + parallelEff = [] + + for i in range(0, numOfSeries): # Loop over each data series + + # Get path to set of files + path, name = ntpath.split(inputFileNames[i]) + + # Get each file that starts with the cmd line arg + file_list = glob.glob(inputFileNames[i] + "*") + + threadList.append([]) + + # Remove path from file names + for j in range(0, len(file_list)): + p, filename = ntpath.split(file_list[j]) + file_list[j] = filename + + # Create a list of threads using the list of files + for fileName in file_list: + s = re.split(r"[_.]+", fileName) + threadList[i].append(int(s[1])) + + # Re-add path once each file has been found + if len(path) != 0: + for j in range(0, len(file_list)): + file_list[j] = path + "/" + file_list[j] + + # Sort the thread list in ascending order and save the indices + sorted_indices = np.argsort(threadList[i]) + threadList[i].sort() + + # Sort the file list in ascending order acording to the thread number + file_list = [file_list[j] for j in sorted_indices] + + parse_header(file_list[0]) + + branch[i] = branch[i].replace("_", "\\_") + + # version.append("$\\textrm{%s}$"%str(branch[i]))# + " " + revision[i])# + "\n" + hydro_scheme[i] + + # "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + + # r", $\eta=%.3f$"%float(hydro_eta[i])) + totalTime.append([]) + speedUp.append([]) + parallelEff.append([]) + + # Loop over all files for a given series and load the times + for j in range(0, len(file_list)): + times = np.loadtxt(file_list[j], usecols=(9,)) + updates = np.loadtxt(file_list[j], usecols=(6,)) + totalTime[i].append(np.sum(times)) + + sumTotal.append(np.sum(totalTime[i])) + + # Sort the total times in descending order + sorted_indices = np.argsort(sumTotal)[::-1] + + totalTime = [totalTime[j] for j in sorted_indices] + branchNew = [branch[j] for j in sorted_indices] + + for i in range(0, numOfSeries): + version.append("$\\textrm{%s}$" % str(branchNew[i])) + + global numTimesteps + numTimesteps = len(times) + + # Find speed-up and parallel efficiency + for i in range(0, numOfSeries): + for j in range(0, len(file_list)): + speedUp[i].append(totalTime[i][0] / totalTime[i][j]) + parallelEff[i].append(speedUp[i][j] / threadList[i][j]) + + return (totalTime, speedUp, parallelEff) + + +def print_results(totalTime, parallelEff, version): + + for i in range(0, numOfSeries): + print(" ") + print("------------------------------------") + print(version[i]) + print("------------------------------------") + print("Wall clock time for: {} time steps".format(numTimesteps)) + print("------------------------------------") + + for j in range(0, len(threadList[i])): + print(str(threadList[i][j]) + " threads: {}".format(totalTime[i][j])) + + print(" ") + print("------------------------------------") + print("Parallel Efficiency for: {} time steps".format(numTimesteps)) + print("------------------------------------") + + for j in range(0, len(threadList[i])): + print(str(threadList[i][j]) + " threads: {}".format(parallelEff[i][j])) + + return + + +# Returns a lighter/darker version of the colour +def color_variant(hex_color, brightness_offset=1): + + rgb_hex = [hex_color[x : x + 2] for x in [1, 3, 5]] + new_rgb_int = [int(hex_value, 16) + brightness_offset for hex_value in rgb_hex] + new_rgb_int = [ + min([255, max([0, i])]) for i in new_rgb_int + ] # make sure new values are between 0 and 255 + # hex() produces "0x88", we want just "88" + + return "#" + "".join([hex(i)[2:] for i in new_rgb_int]) + + +def plot_results(totalTime, speedUp, parallelEff, numSeries): + + fig, axarr = plt.subplots(2, 2, figsize=(10, 10), frameon=True) + speedUpPlot = axarr[0, 0] + parallelEffPlot = axarr[0, 1] + totalTimePlot = axarr[1, 0] + emptyPlot = axarr[1, 1] + + # Plot speed up + speedUpPlot.plot(threadList[0], threadList[0], linestyle="--", lw=1.5, color="0.2") + for i in range(0, numSeries): + speedUpPlot.plot(threadList[0], speedUp[i], linestyle[i], label=version[i]) + + speedUpPlot.set_ylabel("${\\rm Speed\\textendash up}$", labelpad=0.0) + speedUpPlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + speedUpPlot.set_xlim([0.7, threadList[0][-1] + 1]) + speedUpPlot.set_ylim([0.7, threadList[0][-1] + 1]) + + # Plot parallel efficiency + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [1, 1], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.9, 0.9], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.75, 0.75], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.5, 0.5], + "k--", + lw=1.5, + color="0.2", + ) + for i in range(0, numSeries): + parallelEffPlot.plot(threadList[0], parallelEff[i], linestyle[i]) + + parallelEffPlot.set_xscale("log") + parallelEffPlot.set_ylabel("${\\rm Parallel~efficiency}$", labelpad=0.0) + parallelEffPlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + parallelEffPlot.set_ylim([0, 1.1]) + parallelEffPlot.set_xlim([0.9, 10 ** (np.floor(np.log10(threadList[0][-1])) + 0.5)]) + + # Plot time to solution + for i in range(0, numSeries): + for j in range(0, len(threadList[0])): + totalTime[i][j] = totalTime[i][j] * threadList[i][j] + if i > 1: + totalTime[i][j] = totalTime[i][j] + totalTime[i - 1][j] + totalTimePlot.plot(threadList[0], totalTime[i], linestyle[i], label=version[i]) + + if i > 1: + colour = color_variant(linestyle[i], 100) + totalTimePlot.fill_between( + threadList[0], + np.array(totalTime[i]), + np.array(totalTime[i - 1]), + facecolor=colour, + ) + elif i == 1: + colour = color_variant(linestyle[i], 100) + totalTimePlot.fill_between(threadList[0], totalTime[i], facecolor=colour) + + totalTimePlot.set_xscale("log") + totalTimePlot.ticklabel_format(style="sci", axis="y", scilimits=(0, 0)) + totalTimePlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + totalTimePlot.set_ylabel( + "${\\rm Time~to~solution~x~No.~of~cores}~[{\\rm ms}]$", labelpad=0.0 + ) + totalTimePlot.set_xlim([0.9, 10 ** (np.floor(np.log10(threadList[0][-1])) + 0.5)]) + # totalTimePlot.set_ylim(y_min, y_max) + + totalTimePlot.legend( + bbox_to_anchor=(1.21, 0.97), + loc=2, + borderaxespad=0.0, + prop={"size": 12}, + frameon=False, + title=legendTitle, + ) + emptyPlot.axis("off") + + for i, txt in enumerate(threadList[0]): + if ( + 2 ** np.floor(np.log2(threadList[0][i])) == threadList[0][i] + ): # only powers of 2 + speedUpPlot.annotate( + "$%s$" % txt, + (threadList[0][i], speedUp[0][i]), + (threadList[0][i], speedUp[0][i] + 0.3), + color=hexcols[0], + ) + parallelEffPlot.annotate( + "$%s$" % txt, + (threadList[0][i], parallelEff[0][i]), + (threadList[0][i], parallelEff[0][i] + 0.02), + color=hexcols[0], + ) + totalTimePlot.annotate( + "$%s$" % txt, + (threadList[0][i], totalTime[0][i]), + (threadList[0][i], totalTime[0][i] * 1.1), + color=hexcols[0], + ) + + # fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(numTimesteps),cmdLine,platform)) + fig.suptitle( + "${\\rm Speed\\textendash up,~parallel~efficiency~and~time~to~solution~x~no.~of~cores~for}~%d~{\\rm time\\textendash steps}$" + % numTimesteps, + fontsize=16, + ) + + return + + +# Calculate results +(totalTime, speedUp, parallelEff) = parse_files() + +legendTitle = version[0] + +plot_results(totalTime, speedUp, parallelEff, numOfSeries) + +print_results(totalTime, parallelEff, version) + +# And plot +plt.show() diff --git a/examples/plot_task_dependencies.sh b/tools/plot_task_dependencies.sh similarity index 100% rename from examples/plot_task_dependencies.sh rename to tools/plot_task_dependencies.sh diff --git a/tools/plot_task_level.py b/tools/plot_task_level.py new file mode 100755 index 0000000000000000000000000000000000000000..23e3ec878a2b8ef0f4d3c56d91ef75026e012de8 --- /dev/null +++ b/tools/plot_task_level.py @@ -0,0 +1,47 @@ +#!/usr/bin/python +""" +Usage: + ./plot_task_level.py task_level.txt + +Description: + Plot the number of tasks for each depth level and each type of task. +""" + + +import pandas as pd +import matplotlib.pyplot as plt +import sys + +# get filename +filename = sys.argv[-1] + +# Column names +names = ["type", "subtype", "depth", "count"] + +# read file +data = pd.read_csv(filename, sep=" ", comment="#", names=names) + +# generate color map +cmap = plt.get_cmap("hsv") +N = data["depth"].max() + 5 + +# plot data +for i in range(data["depth"].max()): + ind = data["depth"] == i + label = "depth = %i" % i + c = cmap(i / N) + plt.plot( + data["type"][ind] + "_" + data["subtype"][ind], + data["count"][ind], + ".", + label=label, + color=c, + ) + +# modify figure parameters and show it +plt.gca().set_yscale("log") +plt.xticks(rotation=45) +plt.ylabel("Number of Tasks") +plt.gcf().subplots_adjust(bottom=0.15) +plt.legend() +plt.show() diff --git a/examples/process_cells b/tools/process_cells similarity index 100% rename from examples/process_cells rename to tools/process_cells diff --git a/examples/process_cells_helper b/tools/process_cells_helper similarity index 100% rename from examples/process_cells_helper rename to tools/process_cells_helper diff --git a/tools/task_plots/analyse_tasks.py b/tools/task_plots/analyse_tasks.py new file mode 100755 index 0000000000000000000000000000000000000000..ca41970c683a1680e9d1054c9d70d6370992a05e --- /dev/null +++ b/tools/task_plots/analyse_tasks.py @@ -0,0 +1,528 @@ +#!/usr/bin/env python +""" +Usage: + analyse_tasks.py [options] input.dat + +where input.dat is a thread info file for a step (MPI or non-MPI). Use the +'-y interval' flag of the swift and swift_mpi commands to create these +(you will also need to configure with the --enable-task-debugging option). + +The output is an analysis of the task timings, including deadtime per thread +and step, total amount of time spent for each task type, for the whole step +and per thread and the minimum and maximum times spent per task type. + +This file is part of SWIFT. +Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import matplotlib + +matplotlib.use("Agg") +import matplotlib.collections as collections +import matplotlib.ticker as plticker +import pylab as pl +import sys +import argparse + +# Handle the command line. +parser = argparse.ArgumentParser(description="Analyse task dumps") + +parser.add_argument("input", help="Thread data file (-y output)") +parser.add_argument( + "-v", + "--verbose", + dest="verbose", + help="Verbose output (default: False)", + default=False, + action="store_true", +) +parser.add_argument( + "-r", + "--rank", + dest="rank", + help="Rank to process (default: all)", + default="all", + action="store", +) + +args = parser.parse_args() +infile = args.input + +# Tasks and subtypes. Indexed as in tasks.h. +TASKTYPES = [ + "none", + "sort", + "self", + "pair", + "sub_self", + "sub_pair", + "init_grav", + "init_grav_out", + "ghost_in", + "ghost", + "ghost_out", + "extra_ghost", + "drift_part", + "drift_gpart", + "drift_gpart_out", + "end_force", + "kick1", + "kick2", + "timestep", + "send", + "recv", + "grav_long_range", + "grav_mm", + "grav_down_in", + "grav_down", + "grav_mesh", + "cooling", + "star_formation", + "sourceterms", + "logger", + "stars_ghost_in", + "stars_ghost", + "stars_ghost_out", + "stars_sort", + "count", +] + +SUBTYPES = [ + "none", + "density", + "gradient", + "force", + "grav", + "external_grav", + "tend", + "xv", + "rho", + "gpart", + "multipole", + "spart", + "stars_density", + "count", +] + +SIDS = [ + "(-1,-1,-1)", + "(-1,-1, 0)", + "(-1,-1, 1)", + "(-1, 0,-1)", + "(-1, 0, 0)", + "(-1, 0, 1)", + "(-1, 1,-1)", + "(-1, 1, 0)", + "(-1, 1, 1)", + "( 0,-1,-1)", + "( 0,-1, 0)", + "( 0,-1, 1)", + "( 0, 0,-1)", +] + +# Read input. +data = pl.loadtxt(infile) +full_step = data[0, :] + +# Do we have an MPI file? +full_step = data[0, :] +if full_step.size == 13: + print("# MPI mode") + mpimode = True + nranks = int(max(data[:, 0])) + 1 + print("# Number of ranks:", nranks) + rankcol = 0 + threadscol = 1 + taskcol = 2 + subtaskcol = 3 + ticcol = 5 + toccol = 6 + updates = int(full_step[7]) + g_updates = int(full_step[8]) + s_updates = int(full_step[9]) +else: + print("# non MPI mode") + nranks = 1 + mpimode = False + rankcol = -1 + threadscol = 0 + taskcol = 1 + subtaskcol = 2 + ticcol = 4 + toccol = 5 + updates = int(full_step[6]) + g_updates = int(full_step[7]) + s_updates = int(full_step[8]) + +# Get the CPU clock to convert ticks into milliseconds. +CPU_CLOCK = float(full_step[-1]) / 1000.0 +if args.verbose: + print("# CPU frequency:", CPU_CLOCK * 1000.0) +print("# updates:", updates) +print("# g_updates:", g_updates) +print("# s_updates:", s_updates) + +if mpimode: + if args.rank == "all": + ranks = list(range(nranks)) + else: + ranks = [int(args.rank)] + if ranks[0] >= nranks: + print("Error: maximum rank is " + str(nranks - 1)) + sys.exit(1) +else: + ranks = [1] + +maxthread = int(max(data[:, threadscol])) + 1 +print("# Maximum thread id:", maxthread) + +# Avoid start and end times of zero. +sdata = data[data[:, ticcol] != 0] +sdata = data[data[:, toccol] != 0] + +# Now we process the required ranks. +for rank in ranks: + if mpimode: + print("# Rank", rank) + data = sdata[sdata[:, rankcol] == rank] + full_step = data[0, :] + else: + data = sdata + + # Recover the start and end time + tic_step = int(full_step[ticcol]) + toc_step = int(full_step[toccol]) + data = data[1:, :] + + # Avoid start and end times of zero. + data = data[data[:, ticcol] != 0] + data = data[data[:, toccol] != 0] + + # Calculate the time range. + total_t = (toc_step - tic_step) / CPU_CLOCK + print("# Data range: ", total_t, "ms") + print() + + # Correct times to relative values. + start_t = float(tic_step) + data[:, ticcol] -= start_t + data[:, toccol] -= start_t + end_t = (toc_step - start_t) / CPU_CLOCK + + tasks = {} + tasks[-1] = [] + for i in range(maxthread): + tasks[i] = [] + + # Gather into by thread data. + num_lines = pl.shape(data)[0] + for line in range(num_lines): + thread = int(data[line, threadscol]) + tic = int(data[line, ticcol]) / CPU_CLOCK + toc = int(data[line, toccol]) / CPU_CLOCK + tasktype = int(data[line, taskcol]) + subtype = int(data[line, subtaskcol]) + sid = int(data[line, -1]) + + tasks[thread].append([tic, toc, tasktype, subtype, sid]) + + # Sort by tic and gather used threads. + threadids = [] + for i in range(maxthread): + tasks[i] = sorted(tasks[i], key=lambda task: task[0]) + threadids.append(i) + + # Times per task. + print("# Task times:") + print("# -----------") + print( + "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}".format( + "type/subtype", "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + + alltasktimes = {} + sidtimes = {} + for i in threadids: + tasktimes = {} + for task in tasks[i]: + key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]] + dt = task[1] - task[0] + if not key in tasktimes: + tasktimes[key] = [] + tasktimes[key].append(dt) + + if not key in alltasktimes: + alltasktimes[key] = [] + alltasktimes[key].append(dt) + + my_sid = task[4] + if my_sid > -1: + if not my_sid in sidtimes: + sidtimes[my_sid] = [] + sidtimes[my_sid].append(dt) + + print("# Thread : ", i) + for key in sorted(tasktimes.keys()): + taskmin = min(tasktimes[key]) + taskmax = max(tasktimes[key]) + tasksum = sum(tasktimes[key]) + print( + "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + key, + len(tasktimes[key]), + taskmin, + taskmax, + tasksum, + tasksum / len(tasktimes[key]), + tasksum / total_t * 100.0, + ) + ) + print() + + print("# All threads : ") + for key in sorted(alltasktimes.keys()): + taskmin = min(alltasktimes[key]) + taskmax = max(alltasktimes[key]) + tasksum = sum(alltasktimes[key]) + print( + "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + key, + len(alltasktimes[key]), + taskmin, + taskmax, + tasksum, + tasksum / len(alltasktimes[key]), + tasksum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + + # For pairs, show stuff sorted by SID + print("# By SID (all threads): ") + print( + "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}".format( + "Pair/Sub-pair SID", "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + + for sid in range(0, 13): + if sid in sidtimes: + sidmin = min(sidtimes[sid]) + sidmax = max(sidtimes[sid]) + sidsum = sum(sidtimes[sid]) + sidcount = len(sidtimes[sid]) + sidmean = sidsum / sidcount + else: + sidmin = 0.0 + sidmax = 0.0 + sidsum = 0.0 + sidcount = 0 + sidmean = 0.0 + print( + "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}".format( + sid, + SIDS[sid], + sidcount, + sidmin, + sidmax, + sidsum, + sidmean, + sidsum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + + # Dead times. + print("# Times not in tasks (deadtimes)") + print("# ------------------------------") + print("# Time before first task:") + print("# no. : {0:>9s} {1:>9s}".format("value", "percent")) + predeadtimes = [] + for i in threadids: + if len(tasks[i]) > 0: + predeadtime = tasks[i][0][0] + print( + "thread {0:2d}: {1:9.4f} {2:9.4f}".format( + i, predeadtime, predeadtime / total_t * 100.0 + ) + ) + predeadtimes.append(predeadtime) + else: + predeadtimes.append(0.0) + + predeadmin = min(predeadtimes) + predeadmax = max(predeadtimes) + predeadsum = sum(predeadtimes) + print( + "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(predeadtimes), + predeadmin, + predeadmax, + predeadsum, + predeadsum / len(predeadtimes), + predeadsum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + + print("# Time after last task:") + print("# no. : {0:>9s} {1:>9s}".format("value", "percent")) + postdeadtimes = [] + for i in threadids: + if len(tasks[i]) > 0: + postdeadtime = total_t - tasks[i][-1][1] + print( + "thread {0:2d}: {1:9.4f} {2:9.4f}".format( + i, postdeadtime, postdeadtime / total_t * 100.0 + ) + ) + postdeadtimes.append(postdeadtime) + else: + postdeadtimes.append(0.0) + + postdeadmin = min(postdeadtimes) + postdeadmax = max(postdeadtimes) + postdeadsum = sum(postdeadtimes) + print( + "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(postdeadtimes), + postdeadmin, + postdeadmax, + postdeadsum, + postdeadsum / len(postdeadtimes), + postdeadsum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + + # Time in engine, i.e. from first to last tasks. + print("# Time between tasks (engine deadtime):") + print( + "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + enginedeadtimes = [] + for i in threadids: + deadtimes = [] + if len(tasks[i]) > 0: + last = tasks[i][0][0] + else: + last = 0.0 + for task in tasks[i]: + dt = task[0] - last + deadtimes.append(dt) + last = task[1] + + # Drop first value, last value already gone. + if len(deadtimes) > 1: + deadtimes = deadtimes[1:] + else: + # Only one or fewer tasks, so no deadtime by definition. + deadtimes = [0.0] + + deadmin = min(deadtimes) + deadmax = max(deadtimes) + deadsum = sum(deadtimes) + print( + "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + i, + len(deadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(deadtimes), + deadsum / total_t * 100.0, + ) + ) + enginedeadtimes.extend(deadtimes) + + deadmin = min(enginedeadtimes) + deadmax = max(enginedeadtimes) + deadsum = sum(enginedeadtimes) + print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(enginedeadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(enginedeadtimes), + deadsum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + + # All times in step. + print("# All deadtimes:") + print( + "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + alldeadtimes = [] + for i in threadids: + deadtimes = [] + last = 0 + for task in tasks[i]: + dt = task[0] - last + deadtimes.append(dt) + last = task[1] + dt = total_t - last + deadtimes.append(dt) + + deadmin = min(deadtimes) + deadmax = max(deadtimes) + deadsum = sum(deadtimes) + print( + "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + i, + len(deadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(deadtimes), + deadsum / total_t * 100.0, + ) + ) + alldeadtimes.extend(deadtimes) + + deadmin = min(alldeadtimes) + deadmax = max(alldeadtimes) + deadsum = sum(alldeadtimes) + print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(alldeadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(alldeadtimes), + deadsum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + +sys.exit(0) diff --git a/examples/analyse_threadpool_tasks.py b/tools/task_plots/analyse_threadpool_tasks.py similarity index 53% rename from examples/analyse_threadpool_tasks.py rename to tools/task_plots/analyse_threadpool_tasks.py index 609af363b4110e010d6714bef6862d40e5acb278..af8d88dc1d4dc319fe7506d604e550de22a55a81 100755 --- a/examples/analyse_threadpool_tasks.py +++ b/tools/task_plots/analyse_threadpool_tasks.py @@ -29,6 +29,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. """ import matplotlib + matplotlib.use("Agg") import matplotlib.collections as collections import matplotlib.ticker as plticker @@ -40,23 +41,28 @@ import argparse parser = argparse.ArgumentParser(description="Analyse task dumps") parser.add_argument("input", help="Threadpool data file (-y output)") -parser.add_argument("-v", "--verbose", dest="verbose", - help="Verbose output (default: False)", - default=False, action="store_true") +parser.add_argument( + "-v", + "--verbose", + dest="verbose", + help="Verbose output (default: False)", + default=False, + action="store_true", +) args = parser.parse_args() infile = args.input # Read header. First two lines. with open(infile) as infid: - head = [next(infid) for x in xrange(2)] + head = [next(infid) for x in range(2)] header = head[1][2:].strip() header = eval(header) -nthread = int(header['num_threads']) + 1 -CPU_CLOCK = float(header['cpufreq']) / 1000.0 -print "Number of threads: ", nthread - 1 +nthread = int(header["num_threads"]) + 1 +CPU_CLOCK = float(header["cpufreq"]) / 1000.0 +print("Number of threads: ", nthread - 1) if args.verbose: - print "CPU frequency:", CPU_CLOCK * 1000.0 + print("CPU frequency:", CPU_CLOCK * 1000.0) # Read input. data = pl.genfromtxt(infile, dtype=None, delimiter=" ") @@ -71,7 +77,7 @@ for i in data: if i[0] != "#": funcs.append(i[0].replace("_mapper", "")) if i[1] < 0: - threads.append(nthread-1) + threads.append(nthread - 1) else: threads.append(i[1]) chunks.append(i[2]) @@ -88,9 +94,9 @@ tic_step = min(tics) toc_step = max(tocs) # Calculate the time range. -total_t = (toc_step - tic_step)/ CPU_CLOCK -print "# Data range: ", total_t, "ms" -print +total_t = (toc_step - tic_step) / CPU_CLOCK +print("# Data range: ", total_t, "ms") +print() # Correct times to relative millisecs. start_t = float(tic_step) @@ -104,7 +110,7 @@ for i in range(nthread): # Gather into by thread data. for i in range(len(tics)): - tasks[threads[i]].append([tics[i],tocs[i],funcs[i]]) + tasks[threads[i]].append([tics[i], tocs[i], funcs[i]]) # Don't actually process the fake thread. nthread = nthread - 1 @@ -117,11 +123,13 @@ for i in range(nthread): threadids.append(i) # Times per task. -print "# Task times:" -print "# -----------" -print "# {0:<31s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\ - .format("type/subtype", "count","minimum", "maximum", - "sum", "mean", "percent") +print("# Task times:") +print("# -----------") +print( + "# {0:<31s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}".format( + "type/subtype", "count", "minimum", "maximum", "sum", "mean", "percent" + ) +) alltasktimes = {} sidtimes = {} for i in threadids: @@ -137,74 +145,116 @@ for i in threadids: alltasktimes[key] = [] alltasktimes[key].append(dt) - print "# Thread : ", i + print("# Thread : ", i) for key in sorted(tasktimes.keys()): taskmin = min(tasktimes[key]) taskmax = max(tasktimes[key]) tasksum = sum(tasktimes[key]) - print "{0:33s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(key, len(tasktimes[key]), taskmin, taskmax, tasksum, - tasksum / len(tasktimes[key]), tasksum / total_t * 100.0) - print - -print "# All threads : " + print( + "{0:33s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + key, + len(tasktimes[key]), + taskmin, + taskmax, + tasksum, + tasksum / len(tasktimes[key]), + tasksum / total_t * 100.0, + ) + ) + print() + +print("# All threads : ") for key in sorted(alltasktimes.keys()): taskmin = min(alltasktimes[key]) taskmax = max(alltasktimes[key]) tasksum = sum(alltasktimes[key]) - print "{0:33s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum, - tasksum / len(alltasktimes[key]), - tasksum / (len(threadids) * total_t) * 100.0) -print + print( + "{0:33s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + key, + len(alltasktimes[key]), + taskmin, + taskmax, + tasksum, + tasksum / len(alltasktimes[key]), + tasksum / (len(threadids) * total_t) * 100.0, + ) + ) +print() # Dead times. -print "# Times not in tasks (deadtimes)" -print "# ------------------------------" -print "# Time before first task:" -print "# no. : {0:>9s} {1:>9s}".format("value", "percent") +print("# Times not in tasks (deadtimes)") +print("# ------------------------------") +print("# Time before first task:") +print("# no. : {0:>9s} {1:>9s}".format("value", "percent")) predeadtimes = [] for i in threadids: predeadtime = tasks[i][0][0] - print "thread {0:2d}: {1:9.4f} {2:9.4f}"\ - .format(i, predeadtime, predeadtime / total_t * 100.0) + print( + "thread {0:2d}: {1:9.4f} {2:9.4f}".format( + i, predeadtime, predeadtime / total_t * 100.0 + ) + ) predeadtimes.append(predeadtime) predeadmin = min(predeadtimes) predeadmax = max(predeadtimes) predeadsum = sum(predeadtimes) -print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") -print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(predeadtimes), predeadmin, predeadmax, predeadsum, - predeadsum / len(predeadtimes), - predeadsum / (len(threadids) * total_t ) * 100.0) -print - -print "# Time after last task:" -print "# no. : {0:>9s} {1:>9s}".format("value", "percent") +print( + "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) +) +print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(predeadtimes), + predeadmin, + predeadmax, + predeadsum, + predeadsum / len(predeadtimes), + predeadsum / (len(threadids) * total_t) * 100.0, + ) +) +print() + +print("# Time after last task:") +print("# no. : {0:>9s} {1:>9s}".format("value", "percent")) postdeadtimes = [] for i in threadids: postdeadtime = total_t - tasks[i][-1][1] - print "thread {0:2d}: {1:9.4f} {2:9.4f}"\ - .format(i, postdeadtime, postdeadtime / total_t * 100.0) + print( + "thread {0:2d}: {1:9.4f} {2:9.4f}".format( + i, postdeadtime, postdeadtime / total_t * 100.0 + ) + ) postdeadtimes.append(postdeadtime) postdeadmin = min(postdeadtimes) postdeadmax = max(postdeadtimes) postdeadsum = sum(postdeadtimes) -print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") -print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(postdeadtimes), postdeadmin, postdeadmax, postdeadsum, - postdeadsum / len(postdeadtimes), - postdeadsum / (len(threadids) * total_t ) * 100.0) -print +print( + "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) +) +print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(postdeadtimes), + postdeadmin, + postdeadmax, + postdeadsum, + postdeadsum / len(postdeadtimes), + postdeadsum / (len(threadids) * total_t) * 100.0, + ) +) +print() # Time in threadpool, i.e. from first to last tasks. -print "# Time between tasks (threadpool deadtime):" -print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") +print("# Time between tasks (threadpool deadtime):") +print( + "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) +) threadpooldeadtimes = [] for i in threadids: deadtimes = [] @@ -224,24 +274,41 @@ for i in threadids: deadmin = min(deadtimes) deadmax = max(deadtimes) deadsum = sum(deadtimes) - print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(i, len(deadtimes), deadmin, deadmax, deadsum, - deadsum / len(deadtimes), deadsum / total_t * 100.0) + print( + "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + i, + len(deadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(deadtimes), + deadsum / total_t * 100.0, + ) + ) threadpooldeadtimes.extend(deadtimes) deadmin = min(threadpooldeadtimes) deadmax = max(threadpooldeadtimes) deadsum = sum(threadpooldeadtimes) -print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(threadpooldeadtimes), deadmin, deadmax, deadsum, - deadsum / len(threadpooldeadtimes), - deadsum / (len(threadids) * total_t ) * 100.0) -print +print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(threadpooldeadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(threadpooldeadtimes), + deadsum / (len(threadids) * total_t) * 100.0, + ) +) +print() # All times in step. -print "# All deadtimes:" -print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") +print("# All deadtimes:") +print( + "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) +) alldeadtimes = [] for i in threadids: deadtimes = [] @@ -256,18 +323,32 @@ for i in threadids: deadmin = min(deadtimes) deadmax = max(deadtimes) deadsum = sum(deadtimes) - print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(i, len(deadtimes), deadmin, deadmax, deadsum, - deadsum / len(deadtimes), deadsum / total_t * 100.0) + print( + "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + i, + len(deadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(deadtimes), + deadsum / total_t * 100.0, + ) + ) alldeadtimes.extend(deadtimes) deadmin = min(alldeadtimes) deadmax = max(alldeadtimes) deadsum = sum(alldeadtimes) -print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(alldeadtimes), deadmin, deadmax, deadsum, - deadsum / len(alldeadtimes), - deadsum / (len(threadids) * total_t ) * 100.0) -print +print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(alldeadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(alldeadtimes), + deadsum / (len(threadids) * total_t) * 100.0, + ) +) +print() sys.exit(0) diff --git a/examples/plot_tasks.py b/tools/task_plots/plot_tasks.py similarity index 55% rename from examples/plot_tasks.py rename to tools/task_plots/plot_tasks.py index 9eecf6f4ca15148f544ea48cb65c97cd3802a48d..1fe7bcbd11f30ff17051bc9a7ae789439df8b9e9 100755 --- a/examples/plot_tasks.py +++ b/tools/task_plots/plot_tasks.py @@ -53,30 +53,67 @@ parser = argparse.ArgumentParser(description="Plot task graphs") parser.add_argument("input", help="Thread data file (-y output)") parser.add_argument("outbase", help="Base name for output graphic files (PNG)") -parser.add_argument("-l", "--limit", dest="limit", - help="Upper time limit in millisecs (def: depends on data)", - default=0, type=float) -parser.add_argument("-e", "--expand", dest="expand", - help="Thread expansion factor (def: 1)", - default=1, type=int) -parser.add_argument("--height", dest="height", - help="Height of plot in inches (def: 4)", - default=4., type=float) -parser.add_argument("--width", dest="width", - help="Width of plot in inches (def: 16)", - default=16., type=float) -parser.add_argument("--nolegend", dest="nolegend", - help="Whether to show the legend (def: False)", - default=False, action="store_true") -parser.add_argument("-v", "--verbose", dest="verbose", - help="Show colour assignments and other details (def: False)", - default=False, action="store_true") -parser.add_argument("-r", "--ranks", dest="ranks", - help="Comma delimited list of ranks to process, if MPI in effect", - default=None, type=str) -parser.add_argument("-m", "--mintic", dest="mintic", - help="Value of the smallest tic (def: least in input file)", - default=-1, type=int) +parser.add_argument( + "-l", + "--limit", + dest="limit", + help="Upper time limit in millisecs (def: depends on data)", + default=0, + type=float, +) +parser.add_argument( + "-e", + "--expand", + dest="expand", + help="Thread expansion factor (def: 1)", + default=1, + type=int, +) +parser.add_argument( + "--height", + dest="height", + help="Height of plot in inches (def: 4)", + default=4.0, + type=float, +) +parser.add_argument( + "--width", + dest="width", + help="Width of plot in inches (def: 16)", + default=16.0, + type=float, +) +parser.add_argument( + "--nolegend", + dest="nolegend", + help="Whether to show the legend (def: False)", + default=False, + action="store_true", +) +parser.add_argument( + "-v", + "--verbose", + dest="verbose", + help="Show colour assignments and other details (def: False)", + default=False, + action="store_true", +) +parser.add_argument( + "-r", + "--ranks", + dest="ranks", + help="Comma delimited list of ranks to process, if MPI in effect", + default=None, + type=str, +) +parser.add_argument( + "-m", + "--mintic", + dest="mintic", + help="Value of the smallest tic (def: least in input file)", + default=-1, + type=int, +) args = parser.parse_args() infile = args.input @@ -85,55 +122,155 @@ delta_t = args.limit expand = args.expand mintic = args.mintic if args.ranks != None: - ranks = [int(item) for item in args.ranks.split(',')] + ranks = [int(item) for item in args.ranks.split(",")] else: ranks = None # Basic plot configuration. -PLOT_PARAMS = {"axes.labelsize": 10, - "axes.titlesize": 10, - "font.size": 12, - "legend.fontsize": 12, - "xtick.labelsize": 10, - "ytick.labelsize": 10, - "figure.figsize" : (args.width, args.height), - "figure.subplot.left" : 0.03, - "figure.subplot.right" : 0.995, - "figure.subplot.bottom" : 0.1, - "figure.subplot.top" : 0.99, - "figure.subplot.wspace" : 0., - "figure.subplot.hspace" : 0., - "lines.markersize" : 6, - "lines.linewidth" : 3. - } +PLOT_PARAMS = { + "axes.labelsize": 10, + "axes.titlesize": 10, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 10, + "ytick.labelsize": 10, + "figure.figsize": (args.width, args.height), + "figure.subplot.left": 0.03, + "figure.subplot.right": 0.995, + "figure.subplot.bottom": 0.1, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.0, + "figure.subplot.hspace": 0.0, + "lines.markersize": 6, + "lines.linewidth": 3.0, +} pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", - "init_grav", "init_grav_out", "ghost_in", "ghost", "ghost_out", "extra_ghost", "drift_part", "drift_gpart", - "end_force", "kick1", "kick2", "timestep", "send", "recv", "grav_long_range", "grav_mm", "grav_down_in", - "grav_down", "grav_mesh", "cooling", "sourceterms", "count"] - -SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", - "tend", "xv", "rho", "gpart", "multipole", "spart", "count"] +TASKTYPES = [ + "none", + "sort", + "self", + "pair", + "sub_self", + "sub_pair", + "init_grav", + "init_grav_out", + "ghost_in", + "ghost", + "ghost_out", + "extra_ghost", + "drift_part", + "drift_gpart", + "drift_gpart_out", + "end_force", + "kick1", + "kick2", + "timestep", + "send", + "recv", + "grav_long_range", + "grav_mm", + "grav_down_in", + "grav_down", + "grav_mesh", + "cooling", + "star_formation", + "sourceterms", + "logger", + "stars_ghost_in", + "stars_ghost", + "stars_ghost_out", + "stars_sort", + "count", +] + +SUBTYPES = [ + "none", + "density", + "gradient", + "force", + "grav", + "external_grav", + "tend", + "xv", + "rho", + "gpart", + "multipole", + "spart", + "stars_density", + "count", +] # Task/subtypes of interest. -FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force", - "sub_self/density", "pair/force", "pair/density", "pair/grav", - "sub_pair/force", - "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho", - "recv/tend", "send/tend", "recv/gpart", "send/gpart"] +FULLTYPES = [ + "self/force", + "self/density", + "self/grav", + "sub_self/force", + "sub_self/density", + "pair/force", + "pair/density", + "pair/grav", + "sub_pair/force", + "sub_pair/density", + "recv/xv", + "send/xv", + "recv/rho", + "send/rho", + "recv/tend", + "send/tend", + "recv/gpart", + "send/gpart", + "self/stars_density", + "pair/stars_density", + "sub_self/stars_density", + "sub_pair/stars_density", +] # A number of colours for the various types. Recycled when there are # more task types than colours... -colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue", - "sienna", "aquamarine", "bisque", "blue", "green", "lightgreen", - "brown", "purple", "moccasin", "olivedrab", "chartreuse", - "darksage", "darkgreen", "green", "mediumseagreen", - "mediumaquamarine", "darkslategrey", "mediumturquoise", - "black", "cadetblue", "skyblue", "red", "slategray", "gold", - "slateblue", "blueviolet", "mediumorchid", "firebrick", - "magenta", "hotpink", "pink", "orange", "lightgreen"] +colours = [ + "cyan", + "lightgray", + "darkblue", + "yellow", + "tan", + "dodgerblue", + "sienna", + "aquamarine", + "bisque", + "blue", + "green", + "lightgreen", + "brown", + "purple", + "moccasin", + "olivedrab", + "chartreuse", + "olive", + "darkgreen", + "green", + "mediumseagreen", + "mediumaquamarine", + "darkslategrey", + "mediumturquoise", + "black", + "cadetblue", + "skyblue", + "red", + "slategray", + "gold", + "slateblue", + "blueviolet", + "mediumorchid", + "firebrick", + "magenta", + "hotpink", + "pink", + "orange", + "lightgreen", +] maxcolours = len(colours) # Set colours of task/subtype. @@ -154,23 +291,23 @@ for task in SUBTYPES: # For fiddling with colours... if args.verbose: - print "#Selected colours:" + print("#Selected colours:") for task in sorted(TASKCOLOURS.keys()): - print "# " + task + ": " + TASKCOLOURS[task] + print(("# " + task + ": " + TASKCOLOURS[task])) for task in sorted(SUBCOLOURS.keys()): - print "# " + task + ": " + SUBCOLOURS[task] + print(("# " + task + ": " + SUBCOLOURS[task])) # Read input. -data = pl.loadtxt( infile ) +data = pl.loadtxt(infile) # Do we have an MPI file? -full_step = data[0,:] +full_step = data[0, :] if full_step.size == 13: - print "# MPI mode" + print("# MPI mode") mpimode = True if ranks == None: - ranks = range(int(max(data[:,0])) + 1) - print "# Number of ranks:", len(ranks) + ranks = list(range(int(max(data[:, 0])) + 1)) + print(("# Number of ranks:", len(ranks))) rankcol = 0 threadscol = 1 taskcol = 2 @@ -178,7 +315,7 @@ if full_step.size == 13: ticcol = 5 toccol = 6 else: - print "# non MPI mode" + print("# non MPI mode") ranks = [0] mpimode = False rankcol = -1 @@ -191,14 +328,14 @@ else: # Get CPU_CLOCK to convert ticks into milliseconds. CPU_CLOCK = float(full_step[-1]) / 1000.0 if args.verbose: - print "# CPU frequency:", CPU_CLOCK * 1000.0 + print(("# CPU frequency:", CPU_CLOCK * 1000.0)) -nthread = int(max(data[:,threadscol])) + 1 -print "# Number of threads:", nthread +nthread = int(max(data[:, threadscol])) + 1 +print(("# Number of threads:", nthread)) # Avoid start and end times of zero. -sdata = data[data[:,ticcol] != 0] -sdata = sdata[sdata[:,toccol] != 0] +sdata = data[data[:, ticcol] != 0] +sdata = sdata[sdata[:, toccol] != 0] # Each rank can have different clocks (compute node), but we want to use the # same delta times range for comparisons, so we suck it up and take the hit of @@ -207,8 +344,8 @@ delta_t = delta_t * CPU_CLOCK if delta_t == 0: for rank in ranks: if mpimode: - data = sdata[sdata[:,rankcol] == rank] - full_step = data[0,:] + data = sdata[sdata[:, rankcol] == rank] + full_step = data[0, :] # Start and end times for this rank. Can be changed using the mintic # option. This moves our zero time to other time. Useful for @@ -221,28 +358,31 @@ if delta_t == 0: dt = toc_step - tic_step if dt > delta_t: delta_t = dt - print "# Data range: ", delta_t / CPU_CLOCK, "ms" + print(("# Data range: ", delta_t / CPU_CLOCK, "ms")) # Once more doing the real gather and plots this time. for rank in ranks: - print "# Processing rank: ", rank + print(("# Processing rank: ", rank)) if mpimode: - data = sdata[sdata[:,rankcol] == rank] - full_step = data[0,:] + data = sdata[sdata[:, rankcol] == rank] + full_step = data[0, :] tic_step = int(full_step[ticcol]) toc_step = int(full_step[toccol]) - print "# Min tic = ", tic_step - data = data[1:,:] + print(("# Min tic = ", tic_step)) + data = data[1:, :] typesseen = [] nethread = 0 # Dummy image for ranks that have no tasks. if data.size == 0: - print "# Rank ", rank, " has no tasks" + print(("# Rank ", rank, " has no tasks")) fig = pl.figure() - ax = fig.add_subplot(1,1,1) + ax = fig.add_subplot(1, 1, 1) ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK) - ax.set_ylim(0, nthread*expand) + if nthread == 0: + ax.set_ylim(0, expand) + else: + ax.set_ylim(0, nthread * expand) if mintic < 0: start_t = tic_step else: @@ -254,13 +394,13 @@ for rank in ranks: start_t = float(tic_step) else: start_t = float(mintic) - data[:,ticcol] -= start_t - data[:,toccol] -= start_t + data[:, ticcol] -= start_t + data[:, toccol] -= start_t end_t = (toc_step - start_t) / CPU_CLOCK tasks = {} tasks[-1] = [] - for i in range(nthread*expand): + for i in range(nthread * expand): tasks[i] = [] # Counters for each thread when expanding. @@ -278,15 +418,20 @@ for rank in ranks: thread = ethread tasks[thread].append({}) - tasktype = TASKTYPES[int(data[line,taskcol])] - subtype = SUBTYPES[int(data[line,subtaskcol])] + tasktype = TASKTYPES[int(data[line, taskcol])] + subtype = SUBTYPES[int(data[line, subtaskcol])] tasks[thread][-1]["type"] = tasktype tasks[thread][-1]["subtype"] = subtype - tic = int(data[line,ticcol]) / CPU_CLOCK - toc = int(data[line,toccol]) / CPU_CLOCK + tic = int(data[line, ticcol]) / CPU_CLOCK + toc = int(data[line, toccol]) / CPU_CLOCK tasks[thread][-1]["tic"] = tic tasks[thread][-1]["toc"] = toc - if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype: + if ( + "self" in tasktype + or "pair" in tasktype + or "recv" in tasktype + or "send" in tasktype + ): fulltype = tasktype + "/" + subtype if fulltype in SUBCOLOURS: tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype] @@ -300,9 +445,9 @@ for rank in ranks: typesseen = [] fig = pl.figure() - ax = fig.add_subplot(1,1,1) + ax = fig.add_subplot(1, 1, 1) ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK) - ax.set_ylim(0, nethread) + ax.set_ylim(0.5, nethread + 1.0) for i in range(nethread): # Collect ranges and colours into arrays. @@ -324,46 +469,47 @@ for rank in ranks: typesseen.append(qtask) # Now plot. - ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0) - + ax.broken_barh(tictocs, [i + 0.55, 0.9], facecolors=colours, linewidth=0) # Legend and room for it. - nrow = len(typesseen) / 5 - ax.fill_between([0, 0], nethread+0.5, nethread + nrow + 0.5, facecolor="white") - ax.set_ylim(0, nethread + 0.5) + nrow = len(typesseen) / 8 + ax.fill_between([0, 0], nethread, nethread + nrow, facecolor="white") if data.size > 0 and not args.nolegend: - ax.fill_between([0, 0], nethread+0.5, nethread + nrow + 0.5, facecolor="white") - ax.set_ylim(0, nethread + 0.5) - ax.legend(loc=1, shadow=True, bbox_to_anchor=(0., 1.05 ,1., 0.2), mode="expand", ncol=5) - box = ax.get_position() - ax.set_position([box.x0, box.y0, box.width, box.height*0.8]) + ax.fill_between([0, 0], nethread, nethread + nrow, facecolor="white") + ax.legend( + loc="lower left", + shadow=True, + bbox_to_anchor=(0.0, 1.0, 1.0, 0.2), + mode="expand", + ncol=8, + ) # Start and end of time-step if mintic < 0: - ax.plot([0, 0], [0, nethread + nrow + 1], 'k--', linewidth=1) + ax.plot([0, 0], [0, nethread + nrow + 1], "k--", linewidth=1) else: real_start = tic_step - mintic - ax.plot([real_start, real_start], [0, nethread + nrow + 1], 'k--', linewidth=1) - ax.plot([end_t, end_t], [0, nethread + nrow + 1], 'k--', linewidth=1) + ax.plot([real_start, real_start], [0, nethread + nrow + 1], "k--", linewidth=1) + ax.plot([end_t, end_t], [0, nethread + nrow + 1], "k--", linewidth=1) ax.set_xlabel("Wall clock time [ms]") if expand == 1: - ax.set_ylabel("Thread ID" ) + ax.set_ylabel("Thread ID") else: - ax.set_ylabel("Thread ID * " + str(expand) ) - ax.set_yticks(pl.array(range(nethread)), True) + ax.set_ylabel("Thread ID * " + str(expand)) + ax.set_yticks(pl.array(list(range(nethread))), True) loc = plticker.MultipleLocator(base=expand) ax.yaxis.set_major_locator(loc) - ax.grid(True, which='major', axis="y", linestyle="-") + ax.grid(True, which="major", axis="y", linestyle="-") pl.show() if mpimode: outpng = outbase + str(rank) + ".png" else: outpng = outbase + ".png" - pl.savefig(outpng) - print "Graphics done, output written to", outpng + pl.savefig(outpng, bbox_inches="tight") + print(("Graphics done, output written to", outpng)) sys.exit(0) diff --git a/examples/plot_threadpool.py b/tools/task_plots/plot_threadpool.py similarity index 59% rename from examples/plot_threadpool.py rename to tools/task_plots/plot_threadpool.py index bbcc8c23e4c4e5ed6b93055d7460d793f43d91fb..2e5521c901d0571665c6c6d7ec3297b0e9e60552 100755 --- a/examples/plot_threadpool.py +++ b/tools/task_plots/plot_threadpool.py @@ -31,6 +31,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. """ import matplotlib + matplotlib.use("Agg") import matplotlib.collections as collections import matplotlib.ticker as plticker @@ -43,27 +44,59 @@ parser = argparse.ArgumentParser(description="Plot threadpool function graphs") parser.add_argument("input", help="Threadpool data file (-Y output)") parser.add_argument("outpng", help="Name for output graphic file (PNG)") -parser.add_argument("-l", "--limit", dest="limit", - help="Upper time limit in millisecs (def: depends on data)", - default=0, type=float) -parser.add_argument("-e", "--expand", dest="expand", - help="Thread expansion factor (def: 1)", - default=1, type=int) -parser.add_argument("--height", dest="height", - help="Height of plot in inches (def: 4)", - default=4., type=float) -parser.add_argument("--width", dest="width", - help="Width of plot in inches (def: 16)", - default=16., type=float) -parser.add_argument("--nolegend", dest="nolegend", - help="Whether to show the legend (def: False)", - default=False, action="store_true") -parser.add_argument("-v", "--verbose", dest="verbose", - help="Show colour assignments and other details (def: False)", - default=False, action="store_true") -parser.add_argument("-m", "--mintic", dest="mintic", - help="Value of the smallest tic (def: least in input file)", - default=-1, type=int) +parser.add_argument( + "-l", + "--limit", + dest="limit", + help="Upper time limit in millisecs (def: depends on data)", + default=0, + type=float, +) +parser.add_argument( + "-e", + "--expand", + dest="expand", + help="Thread expansion factor (def: 1)", + default=1, + type=int, +) +parser.add_argument( + "--height", + dest="height", + help="Height of plot in inches (def: 4)", + default=4.0, + type=float, +) +parser.add_argument( + "--width", + dest="width", + help="Width of plot in inches (def: 16)", + default=16.0, + type=float, +) +parser.add_argument( + "--nolegend", + dest="nolegend", + help="Whether to show the legend (def: False)", + default=False, + action="store_true", +) +parser.add_argument( + "-v", + "--verbose", + dest="verbose", + help="Show colour assignments and other details (def: False)", + default=False, + action="store_true", +) +parser.add_argument( + "-m", + "--mintic", + dest="mintic", + help="Value of the smallest tic (def: least in input file)", + default=-1, + type=int, +) args = parser.parse_args() infile = args.input @@ -73,46 +106,80 @@ expand = args.expand mintic = args.mintic # Basic plot configuration. -PLOT_PARAMS = {"axes.labelsize": 10, - "axes.titlesize": 10, - "font.size": 12, - "legend.fontsize": 12, - "xtick.labelsize": 10, - "ytick.labelsize": 10, - "figure.figsize" : (args.width, args.height), - "figure.subplot.left" : 0.03, - "figure.subplot.right" : 0.995, - "figure.subplot.bottom" : 0.09, - "figure.subplot.top" : 0.99, - "figure.subplot.wspace" : 0., - "figure.subplot.hspace" : 0., - "lines.markersize" : 6, - "lines.linewidth" : 3. - } +PLOT_PARAMS = { + "axes.labelsize": 10, + "axes.titlesize": 10, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 10, + "ytick.labelsize": 10, + "figure.figsize": (args.width, args.height), + "figure.subplot.left": 0.03, + "figure.subplot.right": 0.995, + "figure.subplot.bottom": 0.09, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.0, + "figure.subplot.hspace": 0.0, + "lines.markersize": 6, + "lines.linewidth": 3.0, +} pl.rcParams.update(PLOT_PARAMS) # A number of colours for the various types. Recycled when there are # more task types than colours... -colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue", - "sienna", "aquamarine", "bisque", "blue", "green", "lightgreen", - "brown", "purple", "moccasin", "olivedrab", "chartreuse", - "darksage", "darkgreen", "green", "mediumseagreen", - "mediumaquamarine", "darkslategrey", "mediumturquoise", - "black", "cadetblue", "skyblue", "red", "slategray", "gold", - "slateblue", "blueviolet", "mediumorchid", "firebrick", - "magenta", "hotpink", "pink", "orange", "lightgreen"] +colours = [ + "cyan", + "lightgray", + "darkblue", + "yellow", + "tan", + "dodgerblue", + "sienna", + "aquamarine", + "bisque", + "blue", + "green", + "lightgreen", + "brown", + "purple", + "moccasin", + "olivedrab", + "chartreuse", + "olive", + "darkgreen", + "green", + "mediumseagreen", + "mediumaquamarine", + "darkslategrey", + "mediumturquoise", + "black", + "cadetblue", + "skyblue", + "red", + "slategray", + "gold", + "slateblue", + "blueviolet", + "mediumorchid", + "firebrick", + "magenta", + "hotpink", + "pink", + "orange", + "lightgreen", +] maxcolours = len(colours) # Read header. First two lines. with open(infile) as infid: - head = [next(infid) for x in xrange(2)] + head = [next(infid) for x in range(2)] header = head[1][2:].strip() header = eval(header) -nthread = int(header['num_threads']) + 1 -CPU_CLOCK = float(header['cpufreq']) / 1000.0 -print "Number of threads: ", nthread +nthread = int(header["num_threads"]) + 1 +CPU_CLOCK = float(header["cpufreq"]) / 1000.0 +print("Number of threads: ", nthread) if args.verbose: - print "CPU frequency:", CPU_CLOCK * 1000.0 + print("CPU frequency:", CPU_CLOCK * 1000.0) # Read input. data = pl.genfromtxt(infile, dtype=None, delimiter=" ") @@ -127,7 +194,7 @@ for i in data: if i[0] != "#": funcs.append(i[0].replace("_mapper", "")) if i[1] < 0: - threads.append(nthread-1) + threads.append(nthread - 1) else: threads.append(i[1]) chunks.append(i[2]) @@ -143,7 +210,7 @@ chunks = pl.array(chunks) mintic_step = min(tics) tic_step = mintic_step toc_step = max(tocs) -print "# Min tic = ", mintic_step +print("# Min tic = ", mintic_step) if mintic > 0: tic_step = mintic @@ -153,7 +220,7 @@ if delta_t == 0: dt = toc_step - tic_step if dt > delta_t: delta_t = dt - print "Data range: ", delta_t / CPU_CLOCK, "ms" + print("Data range: ", delta_t / CPU_CLOCK, "ms") # Once more doing the real gather and plots this time. start_t = float(tic_step) @@ -163,7 +230,7 @@ end_t = (toc_step - start_t) / CPU_CLOCK # Get all "task" names and assign colours. TASKTYPES = pl.unique(funcs) -print TASKTYPES +print(TASKTYPES) # Set colours of task/subtype. TASKCOLOURS = {} @@ -174,15 +241,15 @@ for task in TASKTYPES: # For fiddling with colours... if args.verbose: - print "#Selected colours:" + print("#Selected colours:") for task in sorted(TASKCOLOURS.keys()): - print "# " + task + ": " + TASKCOLOURS[task] + print("# " + task + ": " + TASKCOLOURS[task]) for task in sorted(SUBCOLOURS.keys()): - print "# " + task + ": " + SUBCOLOURS[task] + print("# " + task + ": " + SUBCOLOURS[task]) tasks = {} tasks[-1] = [] -for i in range(nthread*expand): +for i in range(nthread * expand): tasks[i] = [] # Counters for each thread when expanding. @@ -211,7 +278,7 @@ nthread = nthread * expand typesseen = [] fig = pl.figure() -ax = fig.add_subplot(1,1,1) +ax = fig.add_subplot(1, 1, 1) ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK) ax.set_ylim(0, nthread) @@ -222,7 +289,7 @@ j = 0 for task in tasks[nthread - expand]: tictocs.append((task["tic"], task["toc"] - task["tic"])) colours.append(task["colour"]) -ax.broken_barh(tictocs, [0,(nthread-1)], facecolors = colours, linewidth=0, alpha=0.15) +ax.broken_barh(tictocs, [0, (nthread - 1)], facecolors=colours, linewidth=0, alpha=0.15) # And we don't plot the fake thread. nthread = nthread - expand @@ -243,36 +310,38 @@ for i in range(nthread): typesseen.append(qtask) # Now plot. - ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0) + ax.broken_barh(tictocs, [i + 0.05, 0.90], facecolors=colours, linewidth=0) # Legend and room for it. nrow = len(typesseen) / 5 if not args.nolegend: - ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white") + ax.fill_between([0, 0], nthread + 0.5, nthread + nrow + 0.5, facecolor="white") ax.set_ylim(0, nthread + 0.5) - ax.legend(loc=1, shadow=True, bbox_to_anchor=(0., 1.05 ,1., 0.2), mode="expand", ncol=5) + ax.legend( + loc=1, shadow=True, bbox_to_anchor=(0.0, 1.05, 1.0, 0.2), mode="expand", ncol=5 + ) box = ax.get_position() - ax.set_position([box.x0, box.y0, box.width, box.height*0.8]) - + ax.set_position([box.x0, box.y0, box.width, box.height * 0.8]) + # Start and end of time-step -real_start_t = (mintic_step - tic_step)/ CPU_CLOCK -ax.plot([real_start_t, real_start_t], [0, nthread + nrow + 1], 'k--', linewidth=1) +real_start_t = (mintic_step - tic_step) / CPU_CLOCK +ax.plot([real_start_t, real_start_t], [0, nthread + nrow + 1], "k--", linewidth=1) -ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1) +ax.plot([end_t, end_t], [0, nthread + nrow + 1], "k--", linewidth=1) -ax.set_xlabel("Wall clock time [ms]", labelpad=0.) +ax.set_xlabel("Wall clock time [ms]", labelpad=0.0) if expand == 1: - ax.set_ylabel("Thread ID", labelpad=0 ) + ax.set_ylabel("Thread ID", labelpad=0) else: - ax.set_ylabel("Thread ID * " + str(expand), labelpad=0 ) -ax.set_yticks(pl.array(range(nthread)), True) + ax.set_ylabel("Thread ID * " + str(expand), labelpad=0) +ax.set_yticks(pl.array(list(range(nthread))), True) loc = plticker.MultipleLocator(base=expand) ax.yaxis.set_major_locator(loc) -ax.grid(True, which='major', axis="y", linestyle="-") +ax.grid(True, which="major", axis="y", linestyle="-") pl.show() pl.savefig(outpng) -print "Graphics done, output written to", outpng +print("Graphics done, output written to", outpng) sys.exit(0) diff --git a/examples/process_plot_tasks b/tools/task_plots/process_plot_tasks similarity index 100% rename from examples/process_plot_tasks rename to tools/task_plots/process_plot_tasks diff --git a/examples/process_plot_tasks_MPI b/tools/task_plots/process_plot_tasks_MPI similarity index 89% rename from examples/process_plot_tasks_MPI rename to tools/task_plots/process_plot_tasks_MPI index 22c9a106f52ca28244f9fef60839b1125474f14c..736aad05b98aea619f79e2b2114815c8e0fbaa1c 100755 --- a/examples/process_plot_tasks_MPI +++ b/tools/task_plots/process_plot_tasks_MPI @@ -87,10 +87,22 @@ echo $list | xargs -n 3 | while read f s g; do <ul style="list-style-type:none"> <li> EOF + + cat <<EOF3 > step${s}r.html +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<body> +EOF3 + for i in $(seq 0 $nrank); do - cat <<EOF2 >> index.html -<a href="step${s}r${i}.html"><img src="step${s}r${i}.png" width=400px/></a> -EOF2 + + cat <<EOF >> index.html +<a href="step${s}r.html"><img src="step${s}r${i}.png" width=400px/></a> +EOF + cat <<EOF3 >> step${s}r.html +<a href="step${s}r${i}.html"><img src="step${s}r${i}.png"/></a> +EOF3 + cat <<EOF2 > step${s}r${i}.html <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> @@ -104,7 +116,14 @@ cat <<EOF2 >> step${s}r${i}.html </body> </html> EOF2 + done + +cat <<EOF3 >> step${s}r.html +</body> +</html> +EOF3 + cat <<EOF >> index.html </li> </ul> diff --git a/examples/process_plot_taskthreadpools b/tools/task_plots/process_plot_taskthreadpools similarity index 100% rename from examples/process_plot_taskthreadpools rename to tools/task_plots/process_plot_taskthreadpools diff --git a/examples/process_plot_taskthreadpools_helper b/tools/task_plots/process_plot_taskthreadpools_helper similarity index 100% rename from examples/process_plot_taskthreadpools_helper rename to tools/task_plots/process_plot_taskthreadpools_helper diff --git a/examples/process_plot_threadpool b/tools/task_plots/process_plot_threadpool similarity index 100% rename from examples/process_plot_threadpool rename to tools/task_plots/process_plot_threadpool