diff --git a/.gitignore b/.gitignore index e47ab3086cfbb52be6c42d3c1b9a3d0a28fa4525..f78f64a8d8a96278ba8c5be8649095f0ef8861c7 100644 --- a/.gitignore +++ b/.gitignore @@ -63,12 +63,18 @@ tests/brute_force_perturbed.dat tests/swift_dopair_perturbed.dat tests/test27cells tests/test27cells_subset +tests/test27cellsStars +tests/test27cellsStars_subset tests/testPeriodicBC tests/test125cells tests/brute_force_27_standard.dat tests/swift_dopair_27_standard.dat tests/brute_force_27_perturbed.dat tests/swift_dopair_27_perturbed.dat +tests/star_brute_force_27_standard.dat +tests/swift_star_dopair_27_standard.dat +tests/star_brute_force_27_perturbed.dat +tests/swift_star_dopair_27_perturbed.dat tests/brute_force_125_standard.dat tests/swift_dopair_125_standard.dat tests/brute_force_125_perturbed.dat @@ -106,6 +112,8 @@ tests/testPeriodicBC.sh tests/testPeriodicBCPerturbed.sh tests/test27cells.sh tests/test27cellsPerturbed.sh +tests/test27cellsStars.sh +tests/test27cellsStarsPerturbed.sh tests/test125cells.sh tests/test125cellsPerturbed.sh tests/testParser.sh diff --git a/configure.ac b/configure.ac index aa5757a9e3c224ceb75357bae988db1b93014e96..aa2d3136078a7866598470878eaf58b886c923a6 100644 --- a/configure.ac +++ b/configure.ac @@ -1045,11 +1045,15 @@ case "$with_subgrid" in with_subgrid_cooling=grackle with_subgrid_chemistry=GEAR with_subgrid_hydro=gadget2 + with_subgrid_stars=GEAR + with_subgrid_feedback=thermal ;; EAGLE) with_subgrid_cooling=EAGLE with_subgrid_chemistry=EAGLE with_subgrid_hydro=gadget2 + with_subgrid_stars=none + with_subgrid_feedback=none ;; *) AC_MSG_ERROR([Unknown subgrid choice: $with_subgrid]) @@ -1129,6 +1133,25 @@ case "$with_hydro" in ;; esac +# Check if debugging interactions stars is switched on. +AC_ARG_ENABLE([debug-interactions-stars], + [AS_HELP_STRING([--enable-debug-interactions-stars], + [Activate interaction debugging for stars, logging a maximum of @<:@N@:>@ neighbours. Defaults to 256 if no value set.] + )], + [enable_debug_interactions_stars="$enableval"], + [enable_debug_interactions_stars="no"] +) +if test "$enable_debug_interactions_stars" != "no"; then + AC_DEFINE([DEBUG_INTERACTIONS_STARS],1,[Enable interaction debugging for stars]) + if test "$enable_debug_interactions_stars" == "yes"; then + AC_DEFINE([MAX_NUM_OF_NEIGHBOURS_STARS],256,[The maximum number of particle neighbours to be logged for stars]) + [enable_debug_interactions_stars="yes (Logging up to 256 neighbours)"] + else + AC_DEFINE_UNQUOTED([MAX_NUM_OF_NEIGHBOURS_STARS], [$enableval] ,[The maximum number of particle neighbours to be logged for stars]) + [enable_debug_interactions_stars="yes (Logging up to $enableval neighbours)"] + fi +fi + # Check if debugging interactions is switched on. AC_ARG_ENABLE([debug-interactions], [AS_HELP_STRING([--enable-debug-interactions], @@ -1152,6 +1175,7 @@ if test "$enable_debug_interactions" != "no"; then fi fi + # SPH Kernel function AC_ARG_WITH([kernel], [AS_HELP_STRING([--with-kernel=<kernel>], @@ -1401,6 +1425,64 @@ case "$with_chemistry" in ;; esac +# Stellar model. +AC_ARG_WITH([stars], + [AS_HELP_STRING([--with-stars=<model>], + [Stellar model to use @<:@none, GEAR, debug default: none@:>@] + )], + [with_stars="$withval"], + [with_stars="none"] +) + +if test "$with_subgrid" != "none"; then + if test "$with_stars" != "none"; then + AC_MSG_ERROR([Cannot provide with-subgrid and with-stars together]) + else + with_stars="$with_subgrid_stars" + fi +fi + +case "$with_stars" in + GEAR) + AC_DEFINE([STARS_GEAR], [1], [GEAR stellar model]) + ;; + none) + ;; + + *) + AC_MSG_ERROR([Unknown stellar model: $with_stars]) + ;; +esac + +# Feedback model +AC_ARG_WITH([feedback], + [AS_HELP_STRING([--with-feedback=<model>], + [Feedback model to use @<:@none, thermal, debug default: none@:>@] + )], + [with_feedback="$withval"], + [with_feedback="none"] +) + +if test "$with_subgrid" != "none"; then + if test "$with_feedback" != "none"; then + AC_MSG_ERROR([Cannot provide with-subgrid and with-feedback together]) + else + with_feedback="$with_subgrid_feedback" + fi +fi + +case "$with_feedback" in + thermal) + AC_DEFINE([FEEDBACK_THERMAL], [1], [Thermal Blastwave]) + ;; + none) + ;; + + *) + AC_MSG_ERROR([Unknown feedback model: $with_feedback]) + ;; +esac + # External potential AC_ARG_WITH([ext-potential], [AS_HELP_STRING([--with-ext-potential=<pot>], @@ -1460,6 +1542,8 @@ AC_CONFIG_FILES([tests/testReading.sh], [chmod +x tests/testReading.sh]) AC_CONFIG_FILES([tests/testActivePair.sh], [chmod +x tests/testActivePair.sh]) AC_CONFIG_FILES([tests/test27cells.sh], [chmod +x tests/test27cells.sh]) AC_CONFIG_FILES([tests/test27cellsPerturbed.sh], [chmod +x tests/test27cellsPerturbed.sh]) +AC_CONFIG_FILES([tests/test27cellsStars.sh], [chmod +x tests/test27cellsStars.sh]) +AC_CONFIG_FILES([tests/test27cellsStarsPerturbed.sh], [chmod +x tests/test27cellsStarsPerturbed.sh]) AC_CONFIG_FILES([tests/test125cells.sh], [chmod +x tests/test125cells.sh]) AC_CONFIG_FILES([tests/test125cellsPerturbed.sh], [chmod +x tests/test125cellsPerturbed.sh]) AC_CONFIG_FILES([tests/testPeriodicBC.sh], [chmod +x tests/testPeriodicBC.sh]) @@ -1521,6 +1605,8 @@ AC_MSG_RESULT([ Cooling function : $with_cooling Chemistry : $with_chemistry + Stellar model : $with_stars + Feedback model : $with_feedback Individual timers : $enable_timers Task debugging : $enable_task_debugging diff --git a/doc/RTD/source/Task/adding_your_own.rst b/doc/RTD/source/Task/adding_your_own.rst new file mode 100644 index 0000000000000000000000000000000000000000..6f6b37899b505a5bbf6a09d8757232e0b547a081 --- /dev/null +++ b/doc/RTD/source/Task/adding_your_own.rst @@ -0,0 +1,262 @@ +.. Task + Loic Hausammann 17th July 2018 + +.. _task_adding_your_own: +.. highlight:: c + +Adding a Task +============= + +First you will need to understand the dependencies between tasks +using the file ``dependency_graph.dot`` generated by swift at the beginning of any simulation and then decide where it will fit (see :ref:`task`). + +For the next paragraphs, let's assume that we want to implement the existing task ``cooling``. + +Adding it to the Task List +-------------------------- +First you will need to add it to the task list situated in ``task.h`` and ``task.c``. + +In ``task.h``, you need to provide an additional entry to the enum ``task_types`` (e.g. ``task_type_cooling``). +The last entry ``task_type_count`` should always stay at the end as it is a counter of the number of elements. +For example:: + + enum task_types { + task_type_none = 0, + task_type_sort, + task_type_self, + task_type_pair, + task_type_sub_self, + task_type_sub_pair, + task_type_ghost_in, + task_type_ghost, + task_type_ghost_out, + task_type_extra_ghost, + task_type_drift_part, + task_type_end_force, + task_type_kick1, + task_type_kick2, + task_type_timestep, + task_type_send, + task_type_recv, + task_type_cooling, + task_type_count + } __attribute__((packed)); + + +In ``task.c``, you will find an array containing the name of each task and need to add your own (e.g. ``cooling``). +Be careful with the order that should be the same than in the previous list. +For example:: + + /* Task type names. */ + const char *taskID_names[task_type_count] = { + "none", "sort", "self", "pair", "sub_self", + "sub_pair", "ghost_in", "ghost", "ghost_out", + "extra_ghost", "drift_part", "end_force", "kick1", + "kick2", "timestep", "send", "recv", + "cooling"}; + + +Adding it to the Cells +---------------------- + +Each cell contains a list to its tasks and therefore you need to provide a link for it. + +In ``cell.h``, add a pointer to a task in the structure. +In order to stay clean, please put the new task in the same group than the other tasks. +For example:: + + struct cell { + /* Lot of stuff before. */ + + /*! Task for the cooling */ + struct task *cooling; + + /*! The second kick task */ + struct task *kick2; + + /* Lot of stuff after */ + } + + +Adding a new Timer +------------------ + +As SWIFT is HPC oriented, any new task need to be optimized. +It cannot be done without timing the function. + +In ``timers.h``, you will find an enum that contains all the tasks. +You will need to add yours inside it. +For example:: + + enum { + timer_none = 0, + timer_prepare, + timer_init, + timer_drift_part, + timer_drift_gpart, + timer_kick1, + timer_kick2, + timer_timestep, + timer_endforce, + timer_dosort, + timer_doself_density, + timer_doself_gradient, + timer_doself_force, + timer_dopair_density, + timer_dopair_gradient, + timer_dopair_force, + timer_dosub_self_density, + timer_dosub_self_gradient, + timer_dosub_self_force, + timer_dosub_pair_density, + timer_dosub_pair_gradient, + timer_dosub_pair_force, + timer_doself_subset, + timer_dopair_subset, + timer_dopair_subset_naive, + timer_dosub_subset, + timer_do_ghost, + timer_do_extra_ghost, + timer_dorecv_part, + timer_do_cooling, + timer_gettask, + timer_qget, + timer_qsteal, + timer_locktree, + timer_runners, + timer_step, + timer_cooling, + timer_count, + }; + +As for ``task.h``, +you will need to give a name to your timer in ``timers.c``:: + + const char* timers_names[timer_count] = { + "none", + "prepare", + "init", + "drift_part", + "kick1", + "kick2", + "timestep", + "endforce", + "dosort", + "doself_density", + "doself_gradient", + "doself_force", + "dopair_density", + "dopair_gradient", + "dopair_force", + "dosub_self_density", + "dosub_self_gradient", + "dosub_self_force", + "dosub_pair_density", + "dosub_pair_gradient", + "dosub_pair_force", + "doself_subset", + "dopair_subset", + "dopair_subset_naive", + "dosub_subset", + "do_ghost", + "do_extra_ghost", + "dorecv_part", + "gettask", + "qget", + "qsteal", + "locktree", + "runners", + "step", + "cooling", + }; + + +You can now easily time +your functions by using:: + + TIMER_TIC; + /* Your complicated functions */ + if (timer) TIMER_TOC(timer_cooling); + + +Adding your Task to the System +------------------------------ + +Now the tricky part happens. +SWIFT is able to deal automatically with the conflicts between tasks, but unfortunately cannot understand the dependencies. + +To implement your new task in the task system, you will need to modify a few functions in ``engine.c``. + +First, you will need to add mainly two functions: ``scheduler_addtask`` and ``scheduler_addunlocks`` in the ``engine_make_hierarchical_tasks_*`` functions (depending on the type of task you implement, you will need to write it to a different function). + +In ``engine_make_hierarchical_tasks_hydro``, +we add the task through the following call:: + + /* Add the cooling task. */ + c->cooling = + scheduler_addtask(s, task_type_cooling, task_subtype_none, 0, + 0, c, NULL); + +As the ``cooling`` cannot be done before the end of the force computation +and the second kick cannot be done before the cooling:: + + scheduler_addunlock(s, c->super->end_force, c->cooling); + scheduler_addunlock(s, c->cooling, c->super->kick2); + + +The next step is to activate your task +in ``engine_marktasks_mapper``:: + + else if (t->type == task_type_cooling || t->type == task_type_sourceterms) { + if (cell_is_active_hydro(t->ci, e)) scheduler_activate(s, t); + } + +Then you will need to update the estimate for the number of tasks in ``engine_estimate_nr_tasks`` by modifying ``n1`` or ``n2``. + +Initially, the engine will need to skip the task that updates the particles. +It is the case for the cooling, therefore you will need to add it in ``engine_skip_force_and_kick``. + +Implementing your Task +---------------------- + +The last part is situated in ``runner.c``. + +You will need to implement a function ``runner_do_cooling`` +(do not forget to time it):: + + void runner_do_cooling(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + /* Now you can check if something is required at this time step. + * You may want to use a different cell_is_active function depending + * on your task + */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_cooling(r, c->progeny[k], 0); + } else { + /* Implement your cooling here */ + } + + if (timer) TIMER_TOC(timer_do_cooling); + } + + + +and add a call to this function in ``runner_main`` +in the switch:: + + case task_type_cooling: + runner_do_cooling(r, t->ci, 1); + break; + + +Finalizing your Task +-------------------- + +Now that you have done the easiest part, you can start debugging by implementing a test and/or an example. +Before creating your merge request with your new task, do not forget the most funny part that consists in writing a nice and beautiful documentation ;) diff --git a/doc/RTD/source/Task/index.rst b/doc/RTD/source/Task/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..e701e924a79f9256d4c0b034b6c15651f41ff405 --- /dev/null +++ b/doc/RTD/source/Task/index.rst @@ -0,0 +1,21 @@ +.. Task + Loic Hausammann 17th July 2018 + +.. _task: + +Task System +=========== + +This section of the documentation includes information on the task system +available in SWIFT, as well as how to implement your own task. + +SWIFT produces at the beginning of each simulation a ``dot`` file (see the graphviz library for more information). +It contains the full hierarchy of tasks used in this simulation. +You can convert the ``dot`` file into a ``png`` with the following command +``dot -Tpng dependency_graph.dot -o dependency_graph.png``. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + adding_your_own diff --git a/doc/RTD/source/index.rst b/doc/RTD/source/index.rst index 888945a5c0101bb6f59b574a30f1f736ad134079..05c15c5a081cb03ae4e53c26bf6fe4e8dd78dfd5 100644 --- a/doc/RTD/source/index.rst +++ b/doc/RTD/source/index.rst @@ -20,3 +20,4 @@ difference is the parameter file that will need to be adapted for SWIFT. Cooling/index EquationOfState/index NewOption/index + Task/index diff --git a/examples/analyse_tasks.py b/examples/analyse_tasks.py index a72ee0ce637b6ac2da4b8b95dac5bacab3d40a99..78d03ae5436a1fc56abcd6f6b99a4920011fa6db 100755 --- a/examples/analyse_tasks.py +++ b/examples/analyse_tasks.py @@ -54,10 +54,12 @@ infile = args.input TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init_grav", "init_grav_out", "ghost_in", "ghost", "ghost_out", "extra_ghost", "drift_part", "drift_gpart", "end_force", "kick1", "kick2", "timestep", "send", "recv", "grav_long_range", "grav_mm", "grav_down_in", - "grav_down", "grav_mesh", "cooling", "sourceterms", "count"] + "grav_down", "grav_mesh", "cooling", "sourceterms", + "stars_ghost_in", "stars_ghost", "stars_ghost_out", + "count"] SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", - "tend", "xv", "rho", "gpart", "multipole", "spart", "count"] + "tend", "xv", "rho", "gpart", "multipole", "spart", "stars_density", "count"] SIDS = ["(-1,-1,-1)", "(-1,-1, 0)", "(-1,-1, 1)", "(-1, 0,-1)", "(-1, 0, 0)", "(-1, 0, 1)", "(-1, 1,-1)", "(-1, 1, 0)", diff --git a/examples/main.c b/examples/main.c index 0af9cc2160c79dc90063620c2346ede84e67ce03..a8fe484323c5db1dd9955e852122a278fec8aead 100644 --- a/examples/main.c +++ b/examples/main.c @@ -98,6 +98,7 @@ void print_help_message(void) { printf(" %2s %14s %s\n", "-r", "", "Continue using restart files."); printf(" %2s %14s %s\n", "-s", "", "Run with hydrodynamics."); printf(" %2s %14s %s\n", "-S", "", "Run with stars."); + printf(" %2s %14s %s\n", "-b", "", "Run with stars feedback."); printf(" %2s %14s %s\n", "-t", "{int}", "The number of threads to use on each MPI rank. Defaults to 1 if not " "specified."); @@ -135,6 +136,7 @@ int main(int argc, char *argv[]) { struct gpart *gparts = NULL; struct gravity_props gravity_properties; struct hydro_props hydro_properties; + struct stars_props stars_properties; struct part *parts = NULL; struct phys_const prog_const; struct sourceterms sourceterms; @@ -192,6 +194,7 @@ int main(int argc, char *argv[]) { int with_self_gravity = 0; int with_hydro = 0; int with_stars = 0; + int with_feedback = 0; int with_fp_exceptions = 0; int with_drift_all = 0; int with_mpole_reconstruction = 0; @@ -208,7 +211,7 @@ int main(int argc, char *argv[]) { /* Parse the parameters */ int c; - while ((c = getopt(argc, argv, "acCdDef:FgGhMn:o:P:rsSt:Tv:xy:Y:")) != -1) + while ((c = getopt(argc, argv, "abcCdDef:FgGhMn:o:P:rsSt:Tv:xy:Y:")) != -1) switch (c) { case 'a': #if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) @@ -217,6 +220,9 @@ int main(int argc, char *argv[]) { error("Need NUMA support for thread affinity"); #endif break; + case 'b': + with_feedback = 1; + break; case 'c': with_cosmology = 1; break; @@ -384,6 +390,15 @@ int main(int argc, char *argv[]) { return 1; } + if (!with_stars && with_feedback) { + if (myrank == 0) + printf( + "Error: Cannot process feedback without stars, -S must be " + "chosen.\n"); + if (myrank == 0) print_help_message(); + return 1; + } + /* Let's pin the main thread, now we know if affinity will be used. */ #if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE) if (with_aff && @@ -681,6 +696,13 @@ int main(int argc, char *argv[]) { else bzero(&eos, sizeof(struct eos_parameters)); + /* Initialise the stars properties */ + if (with_stars) + stars_props_init(&stars_properties, &prog_const, &us, params, + &hydro_properties); + else + bzero(&stars_properties, sizeof(struct stars_props)); + /* Initialise the gravity properties */ if (with_self_gravity) gravity_props_init(&gravity_properties, params, &cosmo, with_cosmology); @@ -752,7 +774,7 @@ int main(int argc, char *argv[]) { /* Check once and for all that we don't have unwanted links */ if (!with_stars && !dry_run) { for (size_t k = 0; k < Ngpart; ++k) - if (gparts[k].type == swift_type_star) error("Linking problem"); + if (gparts[k].type == swift_type_stars) error("Linking problem"); } if (!with_hydro && !dry_run) { for (size_t k = 0; k < Ngpart; ++k) @@ -778,7 +800,7 @@ int main(int argc, char *argv[]) { if (myrank == 0) message( - "Read %lld gas particles, %lld star particles and %lld gparts from " + "Read %lld gas particles, %lld stars particles and %lld gparts from " "the ICs.", N_total[0], N_total[2], N_total[1]); @@ -887,6 +909,7 @@ int main(int argc, char *argv[]) { if (with_cooling) engine_policies |= engine_policy_cooling; if (with_sourceterms) engine_policies |= engine_policy_sourceterms; if (with_stars) engine_policies |= engine_policy_stars; + if (with_feedback) engine_policies |= engine_policy_feedback; if (with_structure_finding) engine_policies |= engine_policy_structure_finding; @@ -894,8 +917,8 @@ int main(int argc, char *argv[]) { if (myrank == 0) clocks_gettime(&tic); engine_init(&e, &s, params, N_total[0], N_total[1], N_total[2], engine_policies, talking, &reparttype, &us, &prog_const, &cosmo, - &hydro_properties, &gravity_properties, &mesh, &potential, - &cooling_func, &chemistry, &sourceterms); + &hydro_properties, &gravity_properties, &stars_properties, + &mesh, &potential, &cooling_func, &chemistry, &sourceterms); engine_config(0, &e, params, nr_nodes, myrank, nr_threads, with_aff, talking, restart_file); @@ -910,7 +933,7 @@ int main(int argc, char *argv[]) { if (myrank == 0) { long long N_DM = N_total[1] - N_total[2] - N_total[0]; message( - "Running on %lld gas particles, %lld star particles and %lld DM " + "Running on %lld gas particles, %lld stars particles and %lld DM " "particles (%lld gravity particles)", N_total[0], N_total[2], N_total[1] > 0 ? N_DM : 0, N_total[1]); message( diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 9a773fb6bbe2ee9e44eddb80be069f4ccc6e58a8..3f83a8f25b312cb167922f7e094c616f16211bf9 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -55,6 +55,8 @@ Scheduler: cell_sub_size_self_hydro: 32000 # (Optional) Maximal number of interactions per sub-self hydro task (this is the default value). cell_sub_size_pair_grav: 256000000 # (Optional) Maximal number of interactions per sub-pair gravity task (this is the default value). cell_sub_size_self_grav: 32000 # (Optional) Maximal number of interactions per sub-self gravity task (this is the default value). + cell_sub_size_pair_stars: 256000000 # (Optional) Maximal number of interactions per sub-pair stars task (this is the default value). + cell_sub_size_self_stars: 32000 # (Optional) Maximal number of interactions per sub-self stars task (this is the default value). cell_split_size: 400 # (Optional) Maximal number of particles per cell (this is the default value). cell_subdepth_grav: 2 # (Optional) Maximal depth the gravity tasks can be pushed down (this is the default value). max_top_level_cells: 12 # (Optional) Maximal number of top-level cells in any dimension. The number of top-level cells will be the cube of this (this is the default value). diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py index a5c55ac9de559c5d3333282e835fa47e8af7c807..a8d8c49a7ca8676278ac1ab50c7b6cdf3755445c 100755 --- a/examples/plot_tasks.py +++ b/examples/plot_tasks.py @@ -112,17 +112,20 @@ pl.rcParams.update(PLOT_PARAMS) TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init_grav", "init_grav_out", "ghost_in", "ghost", "ghost_out", "extra_ghost", "drift_part", "drift_gpart", "end_force", "kick1", "kick2", "timestep", "send", "recv", "grav_long_range", "grav_mm", "grav_down_in", - "grav_down", "grav_mesh", "cooling", "sourceterms", "count"] + "grav_down", "grav_mesh", "cooling", "sourceterms", + "stars_ghost_in", "stars_ghost", "stars_ghost_out", + "count"] SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", - "tend", "xv", "rho", "gpart", "multipole", "spart", "count"] + "tend", "xv", "rho", "gpart", "multipole", "spart", "stars_density", "count"] # Task/subtypes of interest. FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force", "sub_self/density", "pair/force", "pair/density", "pair/grav", "sub_pair/force", "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho", - "recv/tend", "send/tend", "recv/gpart", "send/gpart"] + "recv/tend", "send/tend", "recv/gpart", "send/gpart", "self/stars_density", + "pair/stars_density", "sub_self/stars_density", "sub_pair/stars_density"] # A number of colours for the various types. Recycled when there are # more task types than colours... diff --git a/src/Makefile.am b/src/Makefile.am index ab3215d068416bba6d74170fedecf915a2654afd..e1e66856fa3265c7f8429c377fd9bdce09347581 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -126,8 +126,8 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h riemann/riemann_exact.h riemann/riemann_vacuum.h \ riemann/riemann_checks.h \ stars.h stars_io.h \ - stars/Default/star.h stars/Default/star_iact.h stars/Default/star_io.h \ - stars/Default/star_debug.h stars/Default/star_part.h \ + stars/Default/stars.h stars/Default/stars_iact.h stars/Default/stars_io.h \ + stars/Default/stars_debug.h stars/Default/stars_part.h \ potential/none/potential.h potential/point_mass/potential.h \ potential/isothermal/potential.h potential/disc_patch/potential.h \ potential/sine_wave/potential.h \ diff --git a/src/active.h b/src/active.h index 949babf1b1f8d27c89d0a5d9c3a93f1825512fd3..5b8c2684822ebbc1d1c86b6ebb0d7d6a869959a4 100644 --- a/src/active.h +++ b/src/active.h @@ -174,6 +174,21 @@ __attribute__((always_inline)) INLINE static int cell_is_all_active_gravity( return (c->ti_gravity_end_max == e->ti_current); } +/** + * @brief Does a cell contain any s-particle finishing their time-step now ? + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if the #cell contains at least an active particle, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_is_active_stars( + const struct cell *c, const struct engine *e) { + + // LOIC: Need star-specific implementation + + return cell_is_active_gravity(c, e); +} + /** * @brief Is this particle finishing its time-step now ? * diff --git a/src/cell.c b/src/cell.c index 06013b0f32f4d763d0fa3babd1dd1b093019d2ed..8ecaf74711434dbbfa2b0c39b8a90bdf4ccc3155 100644 --- a/src/cell.c +++ b/src/cell.c @@ -61,6 +61,7 @@ #include "scheduler.h" #include "space.h" #include "space_getsid.h" +#include "stars.h" #include "timers.h" #include "tools.h" @@ -1114,7 +1115,7 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, if (gparts[j].type == swift_type_gas) { parts[-gparts[j].id_or_neg_offset - parts_offset].gpart = &gparts[j]; - } else if (gparts[j].type == swift_type_star) { + } else if (gparts[j].type == swift_type_stars) { sparts[-gparts[j].id_or_neg_offset - sparts_offset].gpart = &gparts[j]; } @@ -1124,7 +1125,7 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, gbuff[k] = temp_buff; if (gparts[k].type == swift_type_gas) { parts[-gparts[k].id_or_neg_offset - parts_offset].gpart = &gparts[k]; - } else if (gparts[k].type == swift_type_star) { + } else if (gparts[k].type == swift_type_stars) { sparts[-gparts[k].id_or_neg_offset - sparts_offset].gpart = &gparts[k]; } @@ -2017,6 +2018,19 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, } /* Otherwise, pair interation */ } +/** + * @brief Traverse a sub-cell task and activate the stars drift tasks that are + * required by a stars task + * + * @param ci The first #cell we recurse in. + * @param cj The second #cell we recurse in. + * @param s The task #scheduler. + */ +void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, + struct scheduler *s) { + // LOIC: to implement +} + /** * @brief Traverse a sub-cell task and activate the gravity drift tasks that * are required by a self gravity task. @@ -2504,6 +2518,159 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { return rebuild; } +/** + * @brief Un-skips all the stars tasks associated with a given cell and checks + * if the space needs to be rebuilt. + * + * @param c the #cell. + * @param s the #scheduler. + * + * @return 1 If the space needs rebuilding. 0 otherwise. + */ +int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { + struct engine *e = s->space->e; + const int nodeID = e->nodeID; + int rebuild = 0; + + /* Un-skip the density tasks involved with this cell. */ + for (struct link *l = c->stars_density; l != NULL; l = l->next) { + struct task *t = l->t; + struct cell *ci = t->ci; + struct cell *cj = t->cj; + const int ci_active = cell_is_active_stars(ci, e); + const int cj_active = (cj != NULL) ? cell_is_active_stars(cj, e) : 0; + + /* Only activate tasks that involve a local active cell. */ + if ((ci_active && ci->nodeID == nodeID) || + (cj_active && cj->nodeID == nodeID)) { + scheduler_activate(s, t); + + /* Activate drifts */ + if (t->type == task_type_self) { + if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); + if (ci->nodeID == nodeID) cell_activate_drift_gpart(ci, s); + } + + /* Set the correct sorting flags and activate hydro drifts */ + else if (t->type == task_type_pair) { + /* Store some values. */ + atomic_or(&ci->requires_sorts, 1 << t->flags); + atomic_or(&cj->requires_sorts, 1 << t->flags); + ci->dx_max_sort_old = ci->dx_max_sort; + cj->dx_max_sort_old = cj->dx_max_sort; + + /* Activate the drift tasks. */ + if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); + if (cj->nodeID == nodeID) cell_activate_drift_part(cj, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_sorts(ci, t->flags, s); + cell_activate_sorts(cj, t->flags, s); + } + /* Store current values of dx_max and h_max. */ + else if (t->type == task_type_sub_pair || t->type == task_type_sub_self) { + cell_activate_subcell_stars_tasks(t->ci, t->cj, s); + } + } + + /* Only interested in pair interactions as of here. */ + if (t->type == task_type_pair || t->type == task_type_sub_pair) { + + /* Check whether there was too much particle motion, i.e. the + cell neighbour conditions were violated. */ + if (cell_need_rebuild_for_pair(ci, cj)) rebuild = 1; + +#ifdef WITH_MPI + error("MPI with stars not implemented"); + /* /\* Activate the send/recv tasks. *\/ */ + /* if (ci->nodeID != nodeID) { */ + + /* /\* If the local cell is active, receive data from the foreign cell. + * *\/ */ + /* if (cj_active) { */ + /* scheduler_activate(s, ci->recv_xv); */ + /* if (ci_active) { */ + /* scheduler_activate(s, ci->recv_rho); */ + + /* } */ + /* } */ + + /* /\* If the foreign cell is active, we want its ti_end values. *\/ */ + /* if (ci_active) scheduler_activate(s, ci->recv_ti); */ + + /* /\* Is the foreign cell active and will need stuff from us? *\/ */ + /* if (ci_active) { */ + + /* scheduler_activate_send(s, cj->send_xv, ci->nodeID); */ + + /* /\* Drift the cell which will be sent; note that not all sent */ + /* particles will be drifted, only those that are needed. *\/ */ + /* cell_activate_drift_part(cj, s); */ + + /* /\* If the local cell is also active, more stuff will be needed. + * *\/ */ + /* if (cj_active) { */ + /* scheduler_activate_send(s, cj->send_rho, ci->nodeID); */ + + /* } */ + /* } */ + + /* /\* If the local cell is active, send its ti_end values. *\/ */ + /* if (cj_active) scheduler_activate_send(s, cj->send_ti, ci->nodeID); + */ + + /* } else if (cj->nodeID != nodeID) { */ + + /* /\* If the local cell is active, receive data from the foreign cell. + * *\/ */ + /* if (ci_active) { */ + /* scheduler_activate(s, cj->recv_xv); */ + /* if (cj_active) { */ + /* scheduler_activate(s, cj->recv_rho); */ + + /* } */ + /* } */ + + /* /\* If the foreign cell is active, we want its ti_end values. *\/ */ + /* if (cj_active) scheduler_activate(s, cj->recv_ti); */ + + /* /\* Is the foreign cell active and will need stuff from us? *\/ */ + /* if (cj_active) { */ + + /* scheduler_activate_send(s, ci->send_xv, cj->nodeID); */ + + /* /\* Drift the cell which will be sent; note that not all sent */ + /* particles will be drifted, only those that are needed. *\/ */ + /* cell_activate_drift_part(ci, s); */ + + /* /\* If the local cell is also active, more stuff will be needed. + * *\/ */ + /* if (ci_active) { */ + + /* scheduler_activate_send(s, ci->send_rho, cj->nodeID); */ + + /* } */ + /* } */ + + /* /\* If the local cell is active, send its ti_end values. *\/ */ + /* if (ci_active) scheduler_activate_send(s, ci->send_ti, cj->nodeID); + */ + /* } */ +#endif + } + } + + /* Unskip all the other task types. */ + if (c->nodeID == nodeID && cell_is_active_stars(c, e)) { + + if (c->stars_ghost_in != NULL) scheduler_activate(s, c->stars_ghost_in); + if (c->stars_ghost_out != NULL) scheduler_activate(s, c->stars_ghost_out); + if (c->stars_ghost != NULL) scheduler_activate(s, c->stars_ghost); + } + + return rebuild; +} + /** * @brief Set the super-cell pointers for all cells in a hierarchy. * diff --git a/src/cell.h b/src/cell.h index bbba199d5f364c9c68377e58871d1cc3d8f780b8..373a10b04a4a0e28a8735ea8089e86e022e725d2 100644 --- a/src/cell.h +++ b/src/cell.h @@ -282,6 +282,18 @@ struct cell { /*! Task propagating the multipole to the particles */ struct task *grav_down; + /*! Dependency implicit task for the star ghost (in->ghost->out)*/ + struct task *stars_ghost_in; + + /*! Dependency implicit task for the star ghost (in->ghost->out)*/ + struct task *stars_ghost_out; + + /*! The star ghost task itself */ + struct task *stars_ghost; + + /*! Linked list of the tasks computing this cell's star density. */ + struct link *stars_density; + /*! Task for cooling */ struct task *cooling; @@ -523,6 +535,7 @@ void cell_check_gpart_drift_point(struct cell *c, void *data); void cell_check_multipole_drift_point(struct cell *c, void *data); void cell_reset_task_counters(struct cell *c); int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s); +int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s); int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s); void cell_set_super(struct cell *c, struct cell *super); void cell_drift_part(struct cell *c, const struct engine *e, int force); @@ -535,6 +548,8 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, struct scheduler *s); void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj, struct scheduler *s); +void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, + struct scheduler *s); void cell_activate_subcell_external_grav_tasks(struct cell *ci, struct scheduler *s); void cell_activate_drift_part(struct cell *c, struct scheduler *s); @@ -580,6 +595,32 @@ cell_can_recurse_in_self_hydro_task(const struct cell *c) { return c->split && (kernel_gamma * c->h_max_old < 0.5f * c->dmin); } +/** + * @brief Can a sub-pair star task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int +cell_can_recurse_in_pair_stars_task(const struct cell *c) { + + // LOIC: To implement + return 0; +} + +/** + * @brief Can a sub-self stars task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int +cell_can_recurse_in_self_stars_task(const struct cell *c) { + + // LOIC: To implement + return 0; +} + /** * @brief Can a pair hydro task associated with a cell be split into smaller * sub-tasks. @@ -614,6 +655,32 @@ __attribute__((always_inline)) INLINE static int cell_can_split_self_hydro_task( return c->split && (space_stretch * kernel_gamma * c->h_max < 0.5f * c->dmin); } +/** + * @brief Can a pair stars task associated with a cell be split into smaller + * sub-tasks. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int cell_can_split_pair_stars_task( + const struct cell *c) { + + // LOIC: To implement + return 0; +} + +/** + * @brief Can a self stars task associated with a cell be split into smaller + * sub-tasks. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int cell_can_split_self_stars_task( + const struct cell *c) { + + // LOIC: To implement + return 0; +} + /** * @brief Can a pair gravity task associated with a cell be split into smaller * sub-tasks. diff --git a/src/common_io.c b/src/common_io.c index cf4f0132c4339985da21319b290aba2586e46c51..5c8a234c124fb95c08a4bd59d903745e03c5d92c 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -751,7 +751,7 @@ void io_duplicate_hydro_sparts_mapper(void* restrict data, int Nstars, gparts[i + Ndm].mass = sparts[i].mass; /* Set gpart type */ - gparts[i + Ndm].type = swift_type_star; + gparts[i + Ndm].type = swift_type_stars; /* Link the particles */ gparts[i + Ndm].id_or_neg_offset = -(long long)(offset + i); @@ -772,9 +772,10 @@ void io_duplicate_hydro_sparts_mapper(void* restrict data, int Nstars, * @param Nstars The number of stars particles read in. * @param Ndm The number of DM and gas particles read in. */ -void io_duplicate_star_gparts(struct threadpool* tp, struct spart* const sparts, - struct gpart* const gparts, size_t Nstars, - size_t Ndm) { +void io_duplicate_stars_gparts(struct threadpool* tp, + struct spart* const sparts, + struct gpart* const gparts, size_t Nstars, + size_t Ndm) { struct duplication_data data; data.gparts = gparts; @@ -857,8 +858,8 @@ void io_check_output_fields(const struct swift_params* params, darkmatter_write_particles(&gp, list, &num_fields); break; - case swift_type_star: - star_write_particles(&sp, list, &num_fields); + case swift_type_stars: + stars_write_particles(&sp, list, &num_fields); break; default: @@ -943,8 +944,8 @@ void io_write_output_field_parameter(const char* filename) { darkmatter_write_particles(NULL, list, &num_fields); break; - case swift_type_star: - star_write_particles(NULL, list, &num_fields); + case swift_type_stars: + stars_write_particles(NULL, list, &num_fields); break; default: diff --git a/src/common_io.h b/src/common_io.h index 66d8f69c329c8118e131085458a58ee5d16aea8c..7e0f1b74898f5cfdec5631c142231d30a0944f50 100644 --- a/src/common_io.h +++ b/src/common_io.h @@ -104,9 +104,10 @@ void io_prepare_dm_gparts(struct threadpool* tp, struct gpart* const gparts, void io_duplicate_hydro_gparts(struct threadpool* tp, struct part* const parts, struct gpart* const gparts, size_t Ngas, size_t Ndm); -void io_duplicate_star_gparts(struct threadpool* tp, struct spart* const sparts, - struct gpart* const gparts, size_t Nstars, - size_t Ndm); +void io_duplicate_stars_gparts(struct threadpool* tp, + struct spart* const sparts, + struct gpart* const gparts, size_t Nstars, + size_t Ndm); void io_check_output_fields(const struct swift_params* params, const long long N_total[3]); diff --git a/src/engine.c b/src/engine.c index 3d83f739b84b6ffd86c9414f0c75ba833fea9b8f..43047761ba545927c99a2937f37a814e0d6d639e 100644 --- a/src/engine.c +++ b/src/engine.c @@ -82,6 +82,7 @@ #include "single_io.h" #include "sort_part.h" #include "sourceterms.h" +#include "stars_io.h" #include "statistics.h" #include "timers.h" #include "tools.h" @@ -109,7 +110,8 @@ const char *engine_policy_names[] = {"none", "cooling", "sourceterms", "stars", - "structure finding"}; + "structure finding", + "feedback"}; /** The rank of the engine as a global variable (for messages). */ int engine_rank; @@ -148,6 +150,31 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { res->next = atomic_swap(l, res); } +/** + * @brief Recursively add non-implicit star ghost tasks to a cell hierarchy. + */ +void engine_add_stars_ghosts(struct engine *e, struct cell *c, + struct task *stars_ghost_in, + struct task *stars_ghost_out) { + + /* If we have reached the leaf OR have to few particles to play with*/ + if (!c->split || c->scount < engine_max_sparts_per_ghost) { + + /* Add the ghost task and its dependencies */ + struct scheduler *s = &e->sched; + c->stars_ghost = scheduler_addtask(s, task_type_stars_ghost, + task_subtype_none, 0, 0, c, NULL); + scheduler_addunlock(s, stars_ghost_in, c->stars_ghost); + scheduler_addunlock(s, c->stars_ghost, stars_ghost_out); + } else { + /* Keep recursing */ + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_add_stars_ghosts(e, c->progeny[k], stars_ghost_in, + stars_ghost_out); + } +} + /** * @brief Recursively add non-implicit ghost tasks to a cell hierarchy. */ @@ -389,6 +416,47 @@ void engine_make_hierarchical_tasks_gravity(struct engine *e, struct cell *c) { engine_make_hierarchical_tasks_gravity(e, c->progeny[k]); } +/** + * @brief Generate the stars hierarchical tasks for a hierarchy of cells - + * i.e. all the O(Npart) tasks -- star version + * + * Tasks are only created here. The dependencies will be added later on. + * + * Note that there is no need to recurse below the super-cell. Note also + * that we only add tasks if the relevant particles are present in the cell. + * + * @param e The #engine. + * @param c The #cell. + */ +void engine_make_hierarchical_tasks_stars(struct engine *e, struct cell *c) { + + struct scheduler *s = &e->sched; + + /* Are we in a super-cell ? */ + if (c->super == c) { + + /* Local tasks only... */ + if (c->nodeID == e->nodeID) { + + /* Generate the ghost tasks. */ + c->stars_ghost_in = + scheduler_addtask(s, task_type_stars_ghost_in, task_subtype_none, 0, + /* implicit = */ 1, c, NULL); + c->stars_ghost_out = + scheduler_addtask(s, task_type_stars_ghost_out, task_subtype_none, 0, + /* implicit = */ 1, c, NULL); + engine_add_stars_ghosts(e, c, c->stars_ghost_in, c->stars_ghost_out); + } + } else { /* We are above the super-cell so need to go deeper */ + + /* Recurse. */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_make_hierarchical_tasks_stars(e, c->progeny[k]); + } +} + void engine_make_hierarchical_tasks_mapper(void *map_data, int num_elements, void *extra_data) { struct engine *e = (struct engine *)extra_data; @@ -396,6 +464,7 @@ void engine_make_hierarchical_tasks_mapper(void *map_data, int num_elements, const int is_with_self_gravity = (e->policy & engine_policy_self_gravity); const int is_with_external_gravity = (e->policy & engine_policy_external_gravity); + const int is_with_feedback = (e->policy & engine_policy_feedback); for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &((struct cell *)map_data)[ind]; @@ -406,6 +475,7 @@ void engine_make_hierarchical_tasks_mapper(void *map_data, int num_elements, /* And the gravity stuff */ if (is_with_self_gravity || is_with_external_gravity) engine_make_hierarchical_tasks_gravity(e, c); + if (is_with_feedback) engine_make_hierarchical_tasks_stars(e, c); } } @@ -749,7 +819,7 @@ static void engine_redistribute_relink_mapper(void *map_data, int num_elements, } /* Does this gpart have a star partner ? */ - else if (s->gparts[k].type == swift_type_star) { + else if (s->gparts[k].type == swift_type_stars) { const ptrdiff_t partner_index = offset_sparts - s->gparts[k].id_or_neg_offset; @@ -1958,7 +2028,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, for (size_t k = 0; k < offset_gparts; k++) { if (s->gparts[k].type == swift_type_gas) { s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; - } else if (s->gparts[k].type == swift_type_star) { + } else if (s->gparts[k].type == swift_type_stars) { s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; } } @@ -2054,7 +2124,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, &s->parts[offset_parts + count_parts - gp->id_or_neg_offset]; gp->id_or_neg_offset = s->parts - p; p->gpart = gp; - } else if (gp->type == swift_type_star) { + } else if (gp->type == swift_type_stars) { struct spart *sp = &s->sparts[offset_sparts + count_sparts - gp->id_or_neg_offset]; gp->id_or_neg_offset = s->sparts - sp; @@ -2598,6 +2668,84 @@ void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, } } +/** + * @brief Constructs the top-level pair tasks for the star loop over + * neighbours + * + * Here we construct all the tasks for all possible neighbouring non-empty + * local cells in the hierarchy. No dependencies are being added thus far. + * Additional loop over neighbours can later be added by simply duplicating + * all the tasks created by this function. + * + * @param map_data Offset of first two indices disguised as a pointer. + * @param num_elements Number of cells to traverse. + * @param extra_data The #engine. + */ +void engine_make_starsloop_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + + /* Extract the engine pointer. */ + struct engine *e = (struct engine *)extra_data; + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + const int *cdim = s->cdim; + struct cell *cells = s->cells_top; + + /* Loop through the elements, which are just byte offsets from NULL. */ + for (int ind = 0; ind < num_elements; ind++) { + + /* Get the cell index. */ + const int cid = (size_t)(map_data) + ind; + const int i = cid / (cdim[1] * cdim[2]); + const int j = (cid / cdim[2]) % cdim[1]; + const int k = cid % cdim[2]; + + /* Get the cell */ + struct cell *ci = &cells[cid]; + + /* Skip cells without star particles */ + if (ci->scount == 0) continue; + + /* If the cells is local build a self-interaction */ + if (ci->nodeID == nodeID) + scheduler_addtask(sched, task_type_self, task_subtype_stars_density, 0, 0, + ci, NULL); + + /* Now loop over all the neighbours of this cell */ + for (int ii = -1; ii < 2; ii++) { + int iii = i + ii; + if (!s->periodic && (iii < 0 || iii >= cdim[0])) continue; + iii = (iii + cdim[0]) % cdim[0]; + for (int jj = -1; jj < 2; jj++) { + int jjj = j + jj; + if (!s->periodic && (jjj < 0 || jjj >= cdim[1])) continue; + jjj = (jjj + cdim[1]) % cdim[1]; + for (int kk = -1; kk < 2; kk++) { + int kkk = k + kk; + if (!s->periodic && (kkk < 0 || kkk >= cdim[2])) continue; + kkk = (kkk + cdim[2]) % cdim[2]; + + /* Get the neighbouring cell */ + const int cjd = cell_getid(cdim, iii, jjj, kkk); + struct cell *cj = &cells[cjd]; + + /* Is that neighbour local and does it have particles ? */ + if (cid >= cjd || cj->count == 0 || + (ci->nodeID != nodeID && cj->nodeID != nodeID)) + continue; + + /* Construct the pair task */ + const int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; + scheduler_addtask(sched, task_type_pair, task_subtype_stars_density, + sid, 0, ci, cj); + } + } + } + } +} + /** * @brief Counts the tasks associated with one cell and constructs the links * @@ -2636,6 +2784,8 @@ void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, engine_addlink(e, &ci->grav, t); } else if (t_subtype == task_subtype_external_grav) { engine_addlink(e, &ci->grav, t); + } else if (t->subtype == task_subtype_stars_density) { + engine_addlink(e, &ci->stars_density, t); } /* Link pair tasks to cells. */ @@ -2649,6 +2799,9 @@ void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, } else if (t_subtype == task_subtype_grav) { engine_addlink(e, &ci->grav, t); engine_addlink(e, &cj->grav, t); + } else if (t->subtype == task_subtype_stars_density) { + engine_addlink(e, &ci->stars_density, t); + engine_addlink(e, &cj->stars_density, t); } #ifdef SWIFT_DEBUG_CHECKS else if (t_subtype == task_subtype_external_grav) { @@ -2666,6 +2819,8 @@ void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, engine_addlink(e, &ci->grav, t); } else if (t_subtype == task_subtype_external_grav) { engine_addlink(e, &ci->grav, t); + } else if (t->subtype == task_subtype_stars_density) { + engine_addlink(e, &ci->stars_density, t); } /* Link sub-pair tasks to cells. */ @@ -2679,6 +2834,9 @@ void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, } else if (t_subtype == task_subtype_grav) { engine_addlink(e, &ci->grav, t); engine_addlink(e, &cj->grav, t); + } else if (t->subtype == task_subtype_stars_density) { + engine_addlink(e, &ci->stars_density, t); + engine_addlink(e, &cj->stars_density, t); } #ifdef SWIFT_DEBUG_CHECKS else if (t_subtype == task_subtype_external_grav) { @@ -2903,6 +3061,20 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, } #endif +/** + * @brief Creates the dependency network for the stars tasks of a given cell. + * + * @param sched The #scheduler. + * @param density The density task to link. + * @param c The cell. + */ +static inline void engine_make_stars_loops_dependencies(struct scheduler *sched, + struct task *density, + struct cell *c) { + /* density loop --> ghost */ + scheduler_addunlock(sched, density, c->super->stars_ghost_in); +} + /** * @brief Duplicates the first hydro loop and construct all the * dependencies for the hydro part @@ -3155,6 +3327,109 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, } } +/** + * @brief Duplicates the first hydro loop and construct all the + * dependencies for the stars + * + * This is done by looping over all the previously constructed tasks + * and adding another task involving the same cells but this time + * corresponding to the second hydro loop over neighbours. + * With all the relevant tasks for a given cell available, we construct + * all the dependencies for that cell. + */ +void engine_link_stars_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &((struct task *)map_data)[ind]; + + /* Self-interaction? */ + if (t->type == task_type_self && t->subtype == task_subtype_stars_density) { + + /* Make the self-density tasks depend on the drifts. */ + scheduler_addunlock(sched, t->ci->super->drift_part, t); + + scheduler_addunlock(sched, t->ci->super->drift_gpart, t); + + /* Now, build all the dependencies for the stars */ + engine_make_stars_loops_dependencies(sched, t, t->ci); + scheduler_addunlock(sched, t->ci->stars_ghost_out, + t->ci->super->end_force); + } + + /* Otherwise, pair interaction? */ + else if (t->type == task_type_pair && + t->subtype == task_subtype_stars_density) { + + /* Make all density tasks depend on the drift and the sorts. */ + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super->sorts, t); + if (t->ci->super != t->cj->super) { + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->drift_part, t); + scheduler_addunlock(sched, t->cj->super->sorts, t); + } + + /* Now, build all the dependencies for the stars for the cells */ + /* that are local and are not descendant of the same super-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_stars_loops_dependencies(sched, t, t->ci); + } + if (t->cj->nodeID == nodeID) { + if (t->ci->super != t->cj->super) + engine_make_stars_loops_dependencies(sched, t, t->cj); + } + + } + + /* Otherwise, sub-self interaction? */ + else if (t->type == task_type_sub_self && + t->subtype == task_subtype_stars_density) { + + /* Make all density tasks depend on the drift and sorts. */ + scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super->sorts, t); + + /* Now, build all the dependencies for the stars for the cells */ + /* that are local and are not descendant of the same super-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_stars_loops_dependencies(sched, t, t->ci); + } else + error("oo"); + } + + /* Otherwise, sub-pair interaction? */ + else if (t->type == task_type_sub_pair && + t->subtype == task_subtype_stars_density) { + + /* Make all density tasks depend on the drift. */ + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super->sorts, t); + if (t->ci->super != t->cj->super) { + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->drift_part, t); + scheduler_addunlock(sched, t->cj->super->sorts, t); + } + + /* Now, build all the dependencies for the stars for the cells */ + /* that are local and are not descendant of the same super-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_stars_loops_dependencies(sched, t, t->ci); + } + if (t->cj->nodeID == nodeID) { + if (t->ci->super != t->cj->super) + engine_make_stars_loops_dependencies(sched, t, t->cj); + } + } + } +} + /** * @brief Fill the #space's task list. * @@ -3173,7 +3448,7 @@ void engine_maketasks(struct engine *e) { ticks tic2 = getticks(); - /* Construct the firt hydro loop over neighbours */ + /* Construct the first hydro loop over neighbours */ if (e->policy & engine_policy_hydro) threadpool_map(&e->threadpool, engine_make_hydroloop_tasks_mapper, NULL, s->nr_cells, 1, 0, e); @@ -3184,6 +3459,12 @@ void engine_maketasks(struct engine *e) { tic2 = getticks(); + /* Construct the stars hydro loop over neighbours */ + if (e->policy & engine_policy_feedback) { + threadpool_map(&e->threadpool, engine_make_starsloop_tasks_mapper, NULL, + s->nr_cells, 1, 0, e); + } + /* Add the self gravity tasks. */ if (e->policy & engine_policy_self_gravity) engine_make_self_gravity_tasks(e); @@ -3214,6 +3495,7 @@ void engine_maketasks(struct engine *e) { #endif const size_t self_grav_tasks_per_cell = 125; const size_t ext_grav_tasks_per_cell = 1; + const size_t stars_tasks_per_cell = 15; if (e->policy & engine_policy_hydro) e->size_links += s->tot_cells * hydro_tasks_per_cell; @@ -3221,6 +3503,8 @@ void engine_maketasks(struct engine *e) { e->size_links += s->tot_cells * ext_grav_tasks_per_cell; if (e->policy & engine_policy_self_gravity) e->size_links += s->tot_cells * self_grav_tasks_per_cell; + if (e->policy & engine_policy_stars) + e->size_links += s->tot_cells * stars_tasks_per_cell; /* Allocate the new link list */ if ((e->links = (struct link *)malloc(sizeof(struct link) * e->size_links)) == @@ -3301,7 +3585,18 @@ void engine_maketasks(struct engine *e) { message("Linking gravity tasks took %.3f %s.", clocks_from_ticks(getticks() - tic2), clocks_getunit()); + tic2 = getticks(); + + if (e->policy & engine_policy_stars) + threadpool_map(&e->threadpool, engine_link_stars_tasks_mapper, sched->tasks, + sched->nr_tasks, sizeof(struct task), 0, e); + + if (e->verbose) + message("Linking stars tasks took %.3f %s (including reweight).", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + #ifdef WITH_MPI + if (e->policy & engine_policy_stars) error("Cannot run stars with MPI"); /* Add the communication tasks if MPI is being used. */ if (e->policy & engine_policy_mpi) { @@ -3481,6 +3776,25 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } #endif + /* Activate the star density */ + else if (t_type == task_type_self && + t_subtype == task_subtype_stars_density) { + if (cell_is_active_stars(ci, e)) { + scheduler_activate(s, t); + cell_activate_drift_part(ci, s); + cell_activate_drift_gpart(ci, s); + } + } + + /* Store current values of dx_max and h_max. */ + else if (t_type == task_type_sub_self && + t_subtype == task_subtype_stars_density) { + if (cell_is_active_stars(ci, e)) { + scheduler_activate(s, t); + cell_activate_subcell_stars_tasks(ci, NULL, s); + } + } + /* Activate the gravity drift */ else if (t_type == task_type_self && t_subtype == task_subtype_grav) { if (cell_is_active_gravity(ci, e)) { @@ -3522,6 +3836,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements, const int cj_active_hydro = cell_is_active_hydro(cj, e); const int ci_active_gravity = cell_is_active_gravity(ci, e); const int cj_active_gravity = cell_is_active_gravity(cj, e); + const int ci_active_stars = cell_is_active_stars(ci, e); + const int cj_active_stars = cell_is_active_stars(cj, e); /* Only activate tasks that involve a local active cell. */ if ((t_subtype == task_subtype_density || @@ -3533,7 +3849,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements, scheduler_activate(s, t); /* Set the correct sorting flags */ - if (t_type == task_type_pair && t_subtype == task_subtype_density) { + if (t_type == task_type_pair && + (t_subtype == task_subtype_density || + t_subtype == task_subtype_stars_density)) { /* Store some values. */ atomic_or(&ci->requires_sorts, 1 << t->flags); @@ -3553,11 +3871,50 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Store current values of dx_max and h_max. */ else if (t_type == task_type_sub_pair && - t_subtype == task_subtype_density) { + (t_subtype == task_subtype_density || + t_subtype == task_subtype_stars_density)) { cell_activate_subcell_hydro_tasks(t->ci, t->cj, s); } } + /* Stars */ + if (t_subtype == task_subtype_stars_density && + ((ci_active_stars && ci->nodeID == engine_rank) || + (cj_active_stars && cj->nodeID == engine_rank))) { + + scheduler_activate(s, t); + + /* Set the correct sorting flags */ + if (t_type == task_type_pair) { + + /* Store some values. */ + atomic_or(&ci->requires_sorts, 1 << t->flags); + atomic_or(&cj->requires_sorts, 1 << t->flags); + ci->dx_max_sort_old = ci->dx_max_sort; + cj->dx_max_sort_old = cj->dx_max_sort; + + /* Activate the hydro drift tasks. */ + if (ci_nodeID == nodeID) { + cell_activate_drift_part(ci, s); + cell_activate_drift_gpart(ci, s); + } + if (cj_nodeID == nodeID) { + cell_activate_drift_part(cj, s); + cell_activate_drift_gpart(cj, s); + } + + /* Check the sorts and activate them if needed. */ + cell_activate_sorts(ci, t->flags, s); + cell_activate_sorts(cj, t->flags, s); + + } + + /* Store current values of dx_max and h_max. */ + else if (t_type == task_type_sub_pair) { + cell_activate_subcell_stars_tasks(t->ci, t->cj, s); + } + } + if ((t_subtype == task_subtype_grav) && ((ci_active_gravity && ci_nodeID == nodeID) || (cj_active_gravity && cj_nodeID == nodeID))) { @@ -3669,6 +4026,15 @@ void engine_marktasks_mapper(void *map_data, int num_elements, #endif } + /* Only interested in stars_density tasks as of here. */ + if (t->subtype == task_subtype_stars_density) { + + /* Too much particle movement? */ + if (cell_need_rebuild_for_pair(ci, cj)) *rebuild_space = 1; + + // LOIC: Need implementing MPI case + } + /* Only interested in gravity tasks as of here. */ if (t_subtype == task_subtype_grav) { @@ -3776,6 +4142,13 @@ void engine_marktasks_mapper(void *map_data, int num_elements, scheduler_activate(s, t); } + /* Star ghost tasks ? */ + else if (t_type == task_type_stars_ghost || + t_type == task_type_stars_ghost_in || + t_type == task_type_stars_ghost_out) { + if (cell_is_active_stars(t->ci, e)) scheduler_activate(s, t); + } + /* Time-step? */ else if (t_type == task_type_timestep) { t->ci->updated = 0; @@ -4615,6 +4988,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, /* Init the particle data (by hand). */ space_init_parts(s, e->verbose); space_init_gparts(s, e->verbose); + space_init_sparts(s, e->verbose); /* Now, launch the calculation */ TIMER_TIC; @@ -4663,6 +5037,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, /* Init the particle data (by hand). */ space_init_parts(e->s, e->verbose); space_init_gparts(e->s, e->verbose); + space_init_sparts(e->s, e->verbose); /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); @@ -5895,6 +6270,7 @@ void engine_unpin(void) { * @param cosmo The #cosmology used for this run. * @param hydro The #hydro_props used for this run. * @param gravity The #gravity_props used for this run. + * @param stars The #stars_props used for this run. * @param mesh The #pm_mesh used for the long-range periodic forces. * @param potential The properties of the external potential. * @param cooling_func The properties of the cooling function. @@ -5907,7 +6283,8 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, const struct unit_system *internal_units, const struct phys_const *physical_constants, struct cosmology *cosmo, const struct hydro_props *hydro, - struct gravity_props *gravity, struct pm_mesh *mesh, + struct gravity_props *gravity, const struct stars_props *stars, + struct pm_mesh *mesh, const struct external_potential *potential, const struct cooling_function_data *cooling_func, const struct chemistry_global_data *chemistry, @@ -5969,6 +6346,7 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, e->cosmology = cosmo; e->hydro_properties = hydro; e->gravity_properties = gravity; + e->stars_properties = stars; e->mesh = mesh; e->external_potential = potential; e->cooling_func = cooling_func; @@ -6303,6 +6681,9 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, if (e->policy & engine_policy_self_gravity) if (e->nodeID == 0) gravity_props_print(e->gravity_properties); + if (e->policy & engine_policy_stars) + if (e->nodeID == 0) stars_props_print(e->stars_properties); + /* Check we have sensible time bounds */ if (e->time_begin >= e->time_end) error( @@ -7038,6 +7419,7 @@ void engine_struct_dump(struct engine *e, FILE *stream) { phys_const_struct_dump(e->physical_constants, stream); hydro_props_struct_dump(e->hydro_properties, stream); gravity_props_struct_dump(e->gravity_properties, stream); + stars_props_struct_dump(e->stars_properties, stream); pm_mesh_struct_dump(e->mesh, stream); potential_struct_dump(e->external_potential, stream); cooling_struct_dump(e->cooling_func, stream); @@ -7113,6 +7495,11 @@ void engine_struct_restore(struct engine *e, FILE *stream) { gravity_props_struct_restore(gravity_properties, stream); e->gravity_properties = gravity_properties; + struct stars_props *stars_properties = + (struct stars_props *)malloc(sizeof(struct stars_props)); + stars_props_struct_restore(stars_properties, stream); + e->stars_properties = stars_properties; + struct pm_mesh *mesh = (struct pm_mesh *)malloc(sizeof(struct pm_mesh)); pm_mesh_struct_restore(mesh, stream); e->mesh = mesh; diff --git a/src/engine.h b/src/engine.h index 88fcef1c224d0804e1ce854345d71b34f29d940f..edc89ef341a43c56479a27979f61bf8bf6b2abe5 100644 --- a/src/engine.h +++ b/src/engine.h @@ -71,9 +71,10 @@ enum engine_policy { engine_policy_cooling = (1 << 13), engine_policy_sourceterms = (1 << 14), engine_policy_stars = (1 << 15), - engine_policy_structure_finding = (1 << 16) + engine_policy_structure_finding = (1 << 16), + engine_policy_feedback = (1 << 17) }; -#define engine_maxpolicy 16 +#define engine_maxpolicy 17 extern const char *engine_policy_names[]; /** @@ -97,6 +98,7 @@ enum engine_step_properties { #define engine_default_energy_file_name "energy" #define engine_default_timesteps_file_name "timesteps" #define engine_max_parts_per_ghost 1000 +#define engine_max_sparts_per_ghost 1000 /** * @brief The rank of the engine as a global variable (for messages). @@ -322,6 +324,9 @@ struct engine { /* Properties of the hydro scheme */ const struct hydro_props *hydro_properties; + /* Properties of the star model */ + const struct stars_props *stars_properties; + /* Properties of the self-gravity scheme */ struct gravity_props *gravity_properties; @@ -390,7 +395,8 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, const struct unit_system *internal_units, const struct phys_const *physical_constants, struct cosmology *cosmo, const struct hydro_props *hydro, - struct gravity_props *gravity, struct pm_mesh *mesh, + struct gravity_props *gravity, const struct stars_props *stars, + struct pm_mesh *mesh, const struct external_potential *potential, const struct cooling_function_data *cooling_func, const struct chemistry_global_data *chemistry, diff --git a/src/kick.h b/src/kick.h index 50ecaea498bdd401cc0ac27525ed27986a344c59..8c1c55519c311888ea1271f2285841731d7407ba 100644 --- a/src/kick.h +++ b/src/kick.h @@ -150,7 +150,7 @@ __attribute__((always_inline)) INLINE static void kick_spart( sp->gpart->v_full[2] = sp->v[2]; /* Kick extra variables */ - star_kick_extra(sp, dt_kick_grav); + stars_kick_extra(sp, dt_kick_grav); } #endif /* SWIFT_KICK_H */ diff --git a/src/parallel_io.c b/src/parallel_io.c index 64cb6ecdf12e50248741679ada96eda18ecb922d..fa1b58d4dc5f2dd8d330c42df95e3e67de8ea2b1 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -783,12 +783,12 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, bzero(*parts, *Ngas * sizeof(struct part)); } - /* Allocate memory to store star particles */ + /* Allocate memory to store stars particles */ if (with_stars) { - *Nstars = N[swift_type_star]; + *Nstars = N[swift_type_stars]; if (posix_memalign((void**)sparts, spart_align, *Nstars * sizeof(struct spart)) != 0) - error("Error while allocating memory for star particles"); + error("Error while allocating memory for stars particles"); bzero(*sparts, *Nstars * sizeof(struct spart)); } @@ -797,7 +797,7 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, Ndm = N[1]; *Ngparts = (with_hydro ? N[swift_type_gas] : 0) + N[swift_type_dark_matter] + - (with_stars ? N[swift_type_star] : 0); + (with_stars ? N[swift_type_stars] : 0); if (posix_memalign((void**)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); @@ -846,10 +846,10 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, } break; - case swift_type_star: + case swift_type_stars: if (with_stars) { Nparticles = *Nstars; - star_read_particles(*sparts, list, &num_fields); + stars_read_particles(*sparts, list, &num_fields); } break; @@ -882,9 +882,9 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, /* Duplicate the hydro particles into gparts */ if (with_hydro) io_duplicate_hydro_gparts(&tp, *parts, *gparts, *Ngas, Ndm); - /* Duplicate the star particles into gparts */ + /* Duplicate the stars particles into gparts */ if (with_stars) - io_duplicate_star_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); + io_duplicate_stars_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); threadpool_clean(&tp); } @@ -1033,7 +1033,16 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], H5Gclose(h_grp); } - /* Print the gravity parameters */ + /* Print the stellar parameters */ + if (e->policy & engine_policy_stars) { + h_grp = H5Gcreate(h_file, "/StarsScheme", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) error("Error while creating stars group"); + stars_props_print_snapshot(h_grp, e->stars_properties); + H5Gclose(h_grp); + } + + /* Print the cosmological parameters */ h_grp = H5Gcreate(h_file, "/Cosmology", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (h_grp < 0) error("Error while creating cosmology group"); @@ -1099,8 +1108,8 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], darkmatter_write_particles(gparts, list, &num_fields); break; - case swift_type_star: - star_write_particles(sparts, list, &num_fields); + case swift_type_stars: + stars_write_particles(sparts, list, &num_fields); break; default: @@ -1356,9 +1365,9 @@ void write_output_parallel(struct engine* e, const char* baseName, darkmatter_write_particles(dmparts, list, &num_fields); break; - case swift_type_star: + case swift_type_stars: Nparticles = Nstars; - star_write_particles(sparts, list, &num_fields); + stars_write_particles(sparts, list, &num_fields); break; default: diff --git a/src/part.c b/src/part.c index 050e10e9cdd0ab56adcd34ba3e6f2d35c274f14a..db221dbd4bf9ff2b829b02fbae673aa1c65f339e 100644 --- a/src/part.c +++ b/src/part.c @@ -88,7 +88,7 @@ void part_relink_parts_to_gparts(struct gpart *gparts, size_t N, void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N, struct spart *sparts) { for (size_t k = 0; k < N; k++) { - if (gparts[k].type == swift_type_star) { + if (gparts[k].type == swift_type_stars) { sparts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; } } @@ -108,7 +108,7 @@ void part_relink_all_parts_to_gparts(struct gpart *gparts, size_t N, for (size_t k = 0; k < N; k++) { if (gparts[k].type == swift_type_gas) { parts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; - } else if (gparts[k].type == swift_type_star) { + } else if (gparts[k].type == swift_type_stars) { sparts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; } } @@ -171,11 +171,11 @@ void part_verify_links(struct part *parts, struct gpart *gparts, error("Linked particles are not at the same time !"); } - else if (gparts[k].type == swift_type_star) { + else if (gparts[k].type == swift_type_stars) { /* Check that it is linked */ if (gparts[k].id_or_neg_offset > 0) - error("Star gpart not linked to anything !"); + error("Stars gpart not linked to anything !"); /* Find its link */ const struct spart *spart = &sparts[-gparts[k].id_or_neg_offset]; diff --git a/src/part.h b/src/part.h index 145bf2111771d8ad254affb213b93b7ac829f1a6..36d4cc5ba2051aad1952cbb1a81102d62c076c17 100644 --- a/src/part.h +++ b/src/part.h @@ -86,7 +86,7 @@ #endif /* Import the right star particle definition */ -#include "./stars/Default/star_part.h" +#include "./stars/Default/stars_part.h" void part_relink_gparts_to_parts(struct part *parts, size_t N, ptrdiff_t offset); diff --git a/src/part_type.c b/src/part_type.c index af97bd34aaace93a9faa953c0c9345d83ca3bc34..1f96d4ef1db4b35a92d133e91498ea10ce472c70 100644 --- a/src/part_type.c +++ b/src/part_type.c @@ -20,5 +20,5 @@ /* This object's header. */ #include "part_type.h" -const char* part_type_names[swift_type_count] = {"Gas", "DM", "Dummy", - "Dummy", "Star", "BH"}; +const char* part_type_names[swift_type_count] = {"Gas", "DM", "Dummy", + "Dummy", "Stars", "BH"}; diff --git a/src/part_type.h b/src/part_type.h index fbe2b2aeaea37503635372b0f09f8edde4578721..901f47193fa0e72b362c8dce5199a1d0a20526c9 100644 --- a/src/part_type.h +++ b/src/part_type.h @@ -27,7 +27,7 @@ enum part_type { swift_type_gas = 0, swift_type_dark_matter = 1, - swift_type_star = 4, + swift_type_stars = 4, swift_type_black_hole = 5, swift_type_count } __attribute__((packed)); diff --git a/src/runner.c b/src/runner.c index f62836844e05cbf5c1ae41c6277853dad80854d7..f88a91e790fac997bcc09d7323a50bc13a63e4bf 100644 --- a/src/runner.c +++ b/src/runner.c @@ -96,6 +96,9 @@ /* Import the gravity loop functions. */ #include "runner_doiact_grav.h" +/* Import the stars loop functions. */ +#include "runner_doiact_stars.h" + /** * @brief Perform source terms * @@ -132,6 +135,210 @@ void runner_do_sourceterms(struct runner *r, struct cell *c, int timer) { if (timer) TIMER_TOC(timer_dosource); } +/** + * @brief Intermediate task after the density to check that the smoothing + * lengths are correct. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { + + struct spart *restrict sparts = c->sparts; + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const struct stars_props *stars_properties = e->stars_properties; + const float stars_h_max = stars_properties->h_max; + const float eps = stars_properties->h_tolerance; + const float stars_eta_dim = pow_dimension(stars_properties->eta_neighbours); + const int max_smoothing_iter = stars_properties->max_smoothing_iterations; + int redo = 0, scount = 0; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_stars(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_stars_ghost(r, c->progeny[k], 0); + } else { + + /* Init the list of active particles that have to be updated. */ + int *sid = NULL; + if ((sid = (int *)malloc(sizeof(int) * c->scount)) == NULL) + error("Can't allocate memory for sid."); + for (int k = 0; k < c->scount; k++) + if (spart_is_active(&sparts[k], e)) { + sid[scount] = k; + ++scount; + } + + /* While there are particles that need to be updated... */ + for (int num_reruns = 0; scount > 0 && num_reruns < max_smoothing_iter; + num_reruns++) { + + /* Reset the redo-count. */ + redo = 0; + + /* Loop over the remaining active parts in this cell. */ + for (int i = 0; i < scount; i++) { + + /* Get a direct pointer on the part. */ + struct spart *sp = &sparts[sid[i]]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Is this part within the timestep? */ + if (!spart_is_active(sp, e)) + error("Ghost applied to inactive particle"); +#endif + + /* Get some useful values */ + const float h_old = sp->h; + const float h_old_dim = pow_dimension(h_old); + const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); + float h_new; + int has_no_neighbours = 0; + + if (sp->density.wcount == 0.f) { /* No neighbours case */ + + /* Flag that there were no neighbours */ + has_no_neighbours = 1; + + /* Double h and try again */ + h_new = 2.f * h_old; + } else { + + /* Finish the density calculation */ + stars_end_density(sp, cosmo); + + /* Compute one step of the Newton-Raphson scheme */ + const float n_sum = sp->density.wcount * h_old_dim; + const float n_target = stars_eta_dim; + const float f = n_sum - n_target; + const float f_prime = + sp->density.wcount_dh * h_old_dim + + hydro_dimension * sp->density.wcount * h_old_dim_minus_one; + + /* Avoid floating point exception from f_prime = 0 */ + h_new = h_old - f / (f_prime + FLT_MIN); +#ifdef SWIFT_DEBUG_CHECKS + if ((f > 0.f && h_new > h_old) || (f < 0.f && h_new < h_old)) + error( + "Smoothing length correction not going in the right direction"); +#endif + + /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ + h_new = min(h_new, 2.f * h_old); + h_new = max(h_new, 0.5f * h_old); + } + + /* Check whether the particle has an inappropriate smoothing length */ + if (fabsf(h_new - h_old) > eps * h_old) { + + /* Ok, correct then */ + sp->h = h_new; + + /* If below the absolute maximum, try again */ + if (sp->h < stars_h_max) { + + /* Flag for another round of fun */ + sid[redo] = sid[i]; + redo += 1; + + /* Re-initialise everything */ + stars_init_spart(sp); + + /* Off we go ! */ + continue; + + } else { + + /* Ok, this particle is a lost cause... */ + sp->h = stars_h_max; + + /* Do some damage control if no neighbours at all were found */ + if (has_no_neighbours) { + stars_spart_has_no_neighbours(sp, cosmo); + } + } + } + + /* We now have a particle whose smoothing length has converged */ + + /* Compute the stellar evolution */ + stars_evolve_spart(sp, stars_properties, cosmo); + } + + /* We now need to treat the particles whose smoothing length had not + * converged again */ + + /* Re-set the counter for the next loop (potentially). */ + scount = redo; + if (scount > 0) { + + /* Climb up the cell hierarchy. */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + + /* Run through this cell's density interactions. */ + for (struct link *l = finger->density; l != NULL; l = l->next) { + +#ifdef SWIFT_DEBUG_CHECKS + if (l->t->ti_run < r->e->ti_current) + error("Density task should have been run."); +#endif + + /* Self-interaction? */ + if (l->t->type == task_type_self) + runner_doself_subset_branch_stars_density(r, finger, sparts, sid, + scount); + + /* Otherwise, pair interaction? */ + else if (l->t->type == task_type_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dopair_subset_branch_stars_density( + r, finger, sparts, sid, scount, l->t->cj); + else + runner_dopair_subset_branch_stars_density( + r, finger, sparts, sid, scount, l->t->ci); + } + + /* Otherwise, sub-self interaction? */ + else if (l->t->type == task_type_sub_self) + runner_dosub_subset_stars_density(r, finger, sparts, sid, scount, + NULL, -1, 1); + + /* Otherwise, sub-pair interaction? */ + else if (l->t->type == task_type_sub_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dosub_subset_stars_density(r, finger, sparts, sid, + scount, l->t->cj, -1, 1); + else + runner_dosub_subset_stars_density(r, finger, sparts, sid, + scount, l->t->ci, -1, 1); + } + } + } + } + } + + if (scount) { + error("Smoothing length failed to converge on %i particles.", scount); + } + + /* Be clean */ + free(sid); + } + + if (timer) TIMER_TOC(timer_do_stars_ghost); +} + /** * @brief Calculate gravity acceleration from external potential * @@ -1039,6 +1246,35 @@ static void runner_do_unskip_hydro(struct cell *c, struct engine *e) { if (forcerebuild) atomic_inc(&e->forcerebuild); } +/** + * @brief Unskip any stars tasks associated with active cells. + * + * @param c The cell. + * @param e The engine. + */ +static void runner_do_unskip_stars(struct cell *c, struct engine *e) { + + /* Ignore empty cells. */ + if (c->scount == 0) return; + + /* Skip inactive cells. */ + if (!cell_is_active_stars(c, e)) return; + + /* Recurse */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + runner_do_unskip_stars(cp, e); + } + } + } + + /* Unskip any active tasks. */ + const int forcerebuild = cell_unskip_stars_tasks(c, &e->sched); + if (forcerebuild) atomic_inc(&e->forcerebuild); +} + /** * @brief Unskip any gravity tasks associated with active cells. * @@ -1092,6 +1328,9 @@ void runner_do_unskip_mapper(void *map_data, int num_elements, if ((e->policy & engine_policy_self_gravity) || (e->policy & engine_policy_external_gravity)) runner_do_unskip_gravity(c, e); + + /* Stars tasks */ + if (e->policy & engine_policy_stars) runner_do_unskip_stars(c, e); } } } @@ -1257,7 +1496,7 @@ void runner_do_kick1(struct runner *r, struct cell *c, int timer) { } } - /* Loop over the star particles in this cell. */ + /* Loop over the stars particles in this cell. */ for (int k = 0; k < scount; k++) { /* Get a handle on the s-part. */ @@ -1469,7 +1708,7 @@ void runner_do_kick2(struct runner *r, struct cell *c, int timer) { #endif /* Prepare the values to be drifted */ - star_reset_predicted_values(sp); + stars_reset_predicted_values(sp); } } } @@ -1684,7 +1923,7 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { /* What is the next starting point for this cell ? */ ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - } else { /* star particle is inactive */ + } else { /* stars particle is inactive */ const integertime_t ti_end = get_integer_time_end(ti_current, sp->time_bin); @@ -1825,7 +2064,7 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { long long id = 0; if (gp->type == swift_type_gas) id = e->s->parts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_star) + else if (gp->type == swift_type_stars) id = e->s->sparts[-gp->id_or_neg_offset].id; else if (gp->type == swift_type_black_hole) error("Unexisting type"); @@ -1856,7 +2095,7 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { long long my_id = 0; if (gp->type == swift_type_gas) my_id = e->s->parts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_star) + else if (gp->type == swift_type_stars) my_id = e->s->sparts[-gp->id_or_neg_offset].id; else if (gp->type == swift_type_black_hole) error("Unexisting type"); @@ -1876,7 +2115,7 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { } } - /* Loop over the star particles in this cell. */ + /* Loop over the stars particles in this cell. */ for (int k = 0; k < scount; k++) { /* Get a handle on the spart. */ @@ -1884,7 +2123,7 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { if (spart_is_active(sp, e)) { /* Finish the force loop */ - star_end_force(sp); + stars_end_force(sp); } } } @@ -2199,6 +2438,8 @@ void *runner_main(void *data) { runner_doself_recursive_grav(r, ci, 1); else if (t->subtype == task_subtype_external_grav) runner_do_grav_external(r, ci, 1); + else if (t->subtype == task_subtype_stars_density) + runner_doself_stars_density(r, ci, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; @@ -2214,6 +2455,8 @@ void *runner_main(void *data) { runner_dopair2_branch_force(r, ci, cj); else if (t->subtype == task_subtype_grav) runner_dopair_recursive_grav(r, ci, cj, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dopair_stars_density(r, ci, cj, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; @@ -2227,6 +2470,8 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_dosub_self2_force(r, ci, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dosub_self_stars_density(r, ci, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; @@ -2240,6 +2485,8 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_dosub_pair2_force(r, ci, cj, t->flags, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dosub_pair_stars_density(r, ci, cj, t->flags, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; @@ -2262,6 +2509,9 @@ void *runner_main(void *data) { runner_do_extra_ghost(r, ci, 1); break; #endif + case task_type_stars_ghost: + runner_do_stars_ghost(r, ci, 1); + break; case task_type_drift_part: runner_do_drift_part(r, ci, 1); break; diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h new file mode 100644 index 0000000000000000000000000000000000000000..cb61b3c9b10a22f6d10ab3000a8120719e25f3bf --- /dev/null +++ b/src/runner_doiact_stars.h @@ -0,0 +1,1410 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2018 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include "swift.h" + +/** + * @brief Calculate the number density of #part around the #spart + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_doself_stars_density(struct runner *r, struct cell *c, int timer) { + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_stars(c, e)) return; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int scount = c->scount; + const int count = c->count; + struct spart *restrict sparts = c->sparts; + struct part *restrict parts = c->parts; + + /* Loop over the sparts in ci. */ + for (int sid = 0; sid < scount; sid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict si = &sparts[sid]; + const float hi = si->h; + const float hig2 = hi * hi * kernel_gamma2; + const float six[3] = {(float)(si->x[0] - c->loc[0]), + (float)(si->x[1] - c->loc[1]), + (float)(si->x[2] - c->loc[2])}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts[pjd]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), + (float)(pj->x[1] - c->loc[1]), + (float)(pj->x[2] - c->loc[2])}; + float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 > 0.f && r2 < hig2) { + runner_iact_nonsym_stars_density(r2, dx, hi, hj, si, pj, a, H); + } + } /* loop over the parts in ci. */ + } /* loop over the sparts in ci. */ + + TIMER_TOC(timer_doself_stars_density); +} + +/** + * @brief Calculate the number density of cj #part around the ci #spart + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_dosubpair_stars_density(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + + /* Anything to do here? */ + if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; + + const int scount_i = ci->scount; + const int count_j = cj->count; + struct spart *restrict sparts_i = ci->sparts; + struct part *restrict parts_j = cj->parts; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Loop over the sparts in ci. */ + for (int sid = 0; sid < scount_i; sid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict si = &sparts_i[sid]; + const float hi = si->h; + const float hig2 = hi * hi * kernel_gamma2; + const float six[3] = {(float)(si->x[0] - (cj->loc[0] + shift[0])), + (float)(si->x[1] - (cj->loc[1] + shift[1])), + (float)(si->x[2] - (cj->loc[2] + shift[2]))}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), + (float)(pj->x[1] - cj->loc[1]), + (float)(pj->x[2] - cj->loc[2])}; + float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 < hig2) + runner_iact_nonsym_stars_density(r2, dx, hi, hj, si, pj, a, H); + + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +void runner_dopair_stars_density(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj, int timer) { + + TIMER_TIC; + + runner_dosubpair_stars_density(r, ci, cj); + runner_dosubpair_stars_density(r, cj, ci); + + if (timer) TIMER_TOC(timer_dopair_stars_density); +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * Version using a brute-force algorithm. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts_i The #part to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + * @param cj The second #cell. + * @param shift The shift vector to apply to the particles in ci. + */ +void runner_dopair_subset_stars_density(struct runner *r, + struct cell *restrict ci, + struct spart *restrict sparts_i, + int *restrict ind, int scount, + struct cell *restrict cj, + const double *shift) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + + TIMER_TIC; + + const int count_j = cj->count; + struct part *restrict parts_j = cj->parts; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + /* Loop over the parts_i. */ + for (int pid = 0; pid < scount; pid++) { + + /* Get a hold of the ith part in ci. */ + struct spart *restrict spi = &sparts_i[ind[pid]]; + double spix[3]; + for (int k = 0; k < 3; k++) spix[k] = spi->x[k] - shift[k]; + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!spart_is_active(spi, e)) + error("Trying to correct smoothing length of inactive particle !"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = spix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ + if (r2 < hig2) { + runner_iact_nonsym_stars_density(r2, dx, hi, pj->h, spi, pj, a, H); + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ + + TIMER_TOC(timer_dopair_subset_naive); +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts The #spart to interact. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + */ +void runner_doself_subset_stars_density(struct runner *r, + struct cell *restrict ci, + struct spart *restrict sparts, + int *restrict ind, int scount) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + + TIMER_TIC; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int count_i = ci->count; + struct part *restrict parts_j = ci->parts; + + /* Loop over the parts in ci. */ + for (int spid = 0; spid < scount; spid++) { + + /* Get a hold of the ith part in ci. */ + struct spart *spi = &sparts[ind[spid]]; + const float spix[3] = {(float)(spi->x[0] - ci->loc[0]), + (float)(spi->x[1] - ci->loc[1]), + (float)(spi->x[2] - ci->loc[2])}; + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!spart_is_active(spi, e)) + error("Inactive particle in subset function!"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_i; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]), + (float)(pj->x[1] - ci->loc[1]), + (float)(pj->x[2] - ci->loc[2])}; + float dx[3] = {spix[0] - pjx[0], spix[1] - pjx[1], spix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 > 0.f && r2 < hig2) { + runner_iact_nonsym_stars_density(r2, dx, hi, hj, spi, pj, a, H); + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ + + TIMER_TOC(timer_doself_subset_stars_density); +} + +/** +* @brief Determine which version of DOSELF_SUBSET needs to be called depending +* on the optimisation level. + +* @param r The #runner. +* @param ci The first #cell. +* @param parts The #spart to interact. +* @param ind The list of indices of particles in @c ci to interact with. +* @param scount The number of particles in @c ind. +*/ +void runner_doself_subset_branch_stars_density(struct runner *r, + struct cell *restrict ci, + struct spart *restrict sparts, + int *restrict ind, int scount) { + + runner_doself_subset_stars_density(r, ci, sparts, ind, scount); +} + +/** + * @brief Determine which version of DOPAIR_SUBSET needs to be called depending + * on the + * orientation of the cells or whether DOPAIR_SUBSET needs to be called at all. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts_i The #spart to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + * @param cj The second #cell. + */ +void runner_dopair_subset_branch_stars_density(struct runner *r, + struct cell *restrict ci, + struct spart *restrict sparts_i, + int *restrict ind, int scount, + struct cell *restrict cj) { + + const struct engine *e = r->e; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + runner_dopair_subset_stars_density(r, ci, sparts_i, ind, scount, cj, shift); +} + +void runner_dosub_subset_stars_density(struct runner *r, struct cell *ci, + struct spart *sparts, int *ind, + int scount, struct cell *cj, int sid, + int gettimer) { + + const struct engine *e = r->e; + struct space *s = e->s; + + TIMER_TIC; + + /* Should we even bother? */ + if (!cell_is_active_stars(ci, e) && + (cj == NULL || !cell_is_active_stars(cj, e))) + return; + if (ci->scount == 0 || (cj != NULL && cj->scount == 0)) return; + + /* Find out in which sub-cell of ci the parts are. */ + struct cell *sub = NULL; + if (ci->split) { + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) { + if (&sparts[ind[0]] >= &ci->progeny[k]->sparts[0] && + &sparts[ind[0]] < &ci->progeny[k]->sparts[ci->progeny[k]->scount]) { + sub = ci->progeny[k]; + break; + } + } + } + } + + /* Is this a single cell? */ + if (cj == NULL) { + + /* Recurse? */ + if (cell_can_recurse_in_self_stars_task(ci)) { + + /* Loop over all progeny. */ + runner_dosub_subset_stars_density(r, sub, sparts, ind, scount, NULL, -1, + 0); + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != sub && ci->progeny[j] != NULL) + runner_dosub_subset_stars_density(r, sub, sparts, ind, scount, + ci->progeny[j], -1, 0); + + } + + /* Otherwise, compute self-interaction. */ + else + runner_doself_subset_branch_stars_density(r, ci, sparts, ind, scount); + } /* self-interaction. */ + + /* Otherwise, it's a pair interaction. */ + else { + + /* Recurse? */ + if (cell_can_recurse_in_pair_stars_task(ci) && + cell_can_recurse_in_pair_stars_task(cj)) { + + /* Get the type of pair if not specified explicitly. */ + double shift[3] = {0.0, 0.0, 0.0}; + sid = space_getsid(s, &ci, &cj, shift); + + /* Different types of flags. */ + switch (sid) { + + /* Regular sub-cell interactions of a single cell. */ + case 0: /* ( 1 , 1 , 1 ) */ + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[7], -1, 0); + break; + + case 1: /* ( 1 , 1 , 0 ) */ + if (ci->progeny[6] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[7], -1, 0); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[6], -1, 0); + break; + + case 3: /* ( 1 , 0 , 1 ) */ + if (ci->progeny[5] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[5], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[7], -1, 0); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + if (ci->progeny[4] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[4], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[4], -1, 0); + if (ci->progeny[4] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[4], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[4], -1, 0); + if (ci->progeny[4] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[4], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[4], -1, 0); + if (ci->progeny[4] == sub && cj->progeny[3] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[4], sparts, ind, + scount, cj->progeny[3], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[3], sparts, ind, + scount, ci->progeny[4], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[3] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[3], sparts, ind, + scount, ci->progeny[5], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[3] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[3], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[3] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[3], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[3] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[3], sparts, ind, + scount, ci->progeny[7], -1, 0); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + if (ci->progeny[4] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[4], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[4], -1, 0); + if (ci->progeny[4] == sub && cj->progeny[3] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[4], sparts, ind, + scount, cj->progeny[3], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[3], sparts, ind, + scount, ci->progeny[4], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[3] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[3], sparts, ind, + scount, ci->progeny[6], -1, 0); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + if (ci->progeny[5] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[5], -1, 0); + break; + + case 7: /* ( 1 , -1 , 0 ) */ + if (ci->progeny[4] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[4], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[4], -1, 0); + if (ci->progeny[4] == sub && cj->progeny[3] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[4], sparts, ind, + scount, cj->progeny[3], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[3], sparts, ind, + scount, ci->progeny[4], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[3] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[3], sparts, ind, + scount, ci->progeny[5], -1, 0); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + if (ci->progeny[4] == sub && cj->progeny[3] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[4], sparts, ind, + scount, cj->progeny[3], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[3], sparts, ind, + scount, ci->progeny[4], -1, 0); + break; + + case 9: /* ( 0 , 1 , 1 ) */ + if (ci->progeny[3] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[3], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[4] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[3], sparts, ind, + scount, cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[4], sparts, ind, + scount, ci->progeny[3], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[4] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[4], sparts, ind, + scount, ci->progeny[7], -1, 0); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + if (ci->progeny[2] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[2], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[2], -1, 0); + if (ci->progeny[2] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[2], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[2], -1, 0); + if (ci->progeny[2] == sub && cj->progeny[4] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[2], sparts, ind, + scount, cj->progeny[4], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[4] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[4], sparts, ind, + scount, ci->progeny[2], -1, 0); + if (ci->progeny[2] == sub && cj->progeny[5] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[2], sparts, ind, + scount, cj->progeny[5], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[5], sparts, ind, + scount, ci->progeny[2], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[3], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[3], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[4] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[3], sparts, ind, + scount, cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[4], sparts, ind, + scount, ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[5] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[3], sparts, ind, + scount, cj->progeny[5], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[5] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[5], sparts, ind, + scount, ci->progeny[3], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[4] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[4], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[4] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[4], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[5] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[5], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[4] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[4], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[5] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[5], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[5] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[5], sparts, ind, + scount, ci->progeny[7], -1, 0); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + if (ci->progeny[2] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[2], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[2], -1, 0); + if (ci->progeny[2] == sub && cj->progeny[5] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[2], sparts, ind, + scount, cj->progeny[5], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[5], sparts, ind, + scount, ci->progeny[2], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[1] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[1], sparts, ind, + scount, ci->progeny[6], -1, 0); + if (ci->progeny[6] == sub && cj->progeny[5] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[6], sparts, ind, + scount, cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[5], sparts, ind, + scount, ci->progeny[6], -1, 0); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + if (ci->progeny[1] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[1], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[1], -1, 0); + if (ci->progeny[1] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[1], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[1], -1, 0); + if (ci->progeny[1] == sub && cj->progeny[4] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[1], sparts, ind, + scount, cj->progeny[4], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[4] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[4], sparts, ind, + scount, ci->progeny[1], -1, 0); + if (ci->progeny[1] == sub && cj->progeny[6] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[1], sparts, ind, + scount, cj->progeny[6], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[6] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[6], sparts, ind, + scount, ci->progeny[1], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[3], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[3], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[4] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[3], sparts, ind, + scount, cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[4], sparts, ind, + scount, ci->progeny[3], -1, 0); + if (ci->progeny[3] == sub && cj->progeny[6] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[3], sparts, ind, + scount, cj->progeny[6], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[6] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[6], sparts, ind, + scount, ci->progeny[3], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[4] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[4], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[4] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[4], sparts, ind, + scount, ci->progeny[5], -1, 0); + if (ci->progeny[5] == sub && cj->progeny[6] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[5], sparts, ind, + scount, cj->progeny[6], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[6] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[6], sparts, ind, + scount, ci->progeny[5], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[0] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[0], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[2] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[2], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[4] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[4], sparts, ind, + scount, ci->progeny[7], -1, 0); + if (ci->progeny[7] == sub && cj->progeny[6] != NULL) + runner_dosub_subset_stars_density(r, ci->progeny[7], sparts, ind, + scount, cj->progeny[6], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[6] == sub) + runner_dosub_subset_stars_density(r, cj->progeny[6], sparts, ind, + scount, ci->progeny[7], -1, 0); + break; + } + + } + + /* Otherwise, compute the pair directly. */ + else if (cell_is_active_stars(ci, e) || cell_is_active_stars(cj, e)) { + + /* Do any of the cells need to be drifted first? */ + if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); + + runner_dopair_subset_branch_stars_density(r, ci, sparts, ind, scount, cj); + } + + } /* otherwise, pair interaction. */ + + if (gettimer) TIMER_TOC(timer_dosub_subset); +} + +/** + * @brief Determine which version of runner_doself needs to be called depending + * on the optimisation level. + * + * @param r #runner + * @param c #cell c + * + */ +void runner_doself_branch_stars_density(struct runner *r, struct cell *c) { + + const struct engine *restrict e = r->e; + + /* Anything to do here? */ + if (!cell_is_active_stars(c, e)) return; + + /* Did we mess up the recursion? */ + if (c->h_max_old * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); + + runner_doself_stars_density(r, c, 1); +} + +/** + * @brief Determine which version of DOPAIR1 needs to be called depending on the + * orientation of the cells or whether DOPAIR1 needs to be called at all. + * + * @param r #runner + * @param ci #cell ci + * @param cj #cell cj + * + */ +void runner_dopair_branch_stars_density(struct runner *r, struct cell *ci, + struct cell *cj) { + + const struct engine *restrict e = r->e; + + /* Anything to do here? */ + if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; + + /* Check that cells are drifted. */ + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells."); + + /* Get the sort ID. */ + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(e->s, &ci, &cj, shift); + + /* Have the cells been sorted? */ + if (!(ci->sorted & (1 << sid)) || + ci->dx_max_sort_old > space_maxreldx * ci->dmin) + error("Interacting unsorted cells."); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort_old > space_maxreldx * cj->dmin) + error("Interacting unsorted cells."); + +#ifdef SWIFT_DEBUG_CHECKS + /* Pick-out the sorted lists. */ + const struct entry *restrict sort_i = ci->sort[sid]; + const struct entry *restrict sort_j = cj->sort[sid]; + + /* Check that the dx_max_sort values in the cell are indeed an upper + bound on particle movement. */ + for (int pid = 0; pid < ci->count; pid++) { + const struct part *p = &ci->parts[sort_i[pid].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > + 1.0e-4 * max(fabsf(d), ci->dx_max_sort_old) && + fabsf(d - sort_i[pid].d) - ci->dx_max_sort > ci->width[0] * 1.0e-10) + error( + "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " + "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e " + "ci->dx_max_sort_old=%e", + ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort, + ci->dx_max_sort_old); + } + for (int pjd = 0; pjd < cj->count; pjd++) { + const struct part *p = &cj->parts[sort_j[pjd].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if ((fabsf(d - sort_j[pjd].d) - cj->dx_max_sort) > + 1.0e-4 * max(fabsf(d), cj->dx_max_sort_old) && + (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort) > cj->width[0] * 1.0e-10) + error( + "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e " + "cj->dx_max_sort_old=%e", + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort, + cj->dx_max_sort_old); + } +#endif /* SWIFT_DEBUG_CHECKS */ + + runner_dopair_stars_density(r, ci, cj, 1); +} + +/** + * @brief Compute grouped sub-cell interactions for pairs + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + * @param sid The direction linking the cells + * @param gettimer Do we have a timer ? + * + * @todo Hard-code the sid on the recursive calls to avoid the + * redundant computations to find the sid on-the-fly. + */ +void runner_dosub_pair_stars_density(struct runner *r, struct cell *ci, + struct cell *cj, int sid, int gettimer) { + + struct space *s = r->e->s; + const struct engine *e = r->e; + + TIMER_TIC; + + /* Should we even bother? */ + if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; + if (ci->scount == 0 || cj->scount == 0) return; + + /* Get the type of pair if not specified explicitly. */ + double shift[3]; + sid = space_getsid(s, &ci, &cj, shift); + + /* Recurse? */ + if (cell_can_recurse_in_pair_stars_task(ci) && + cell_can_recurse_in_pair_stars_task(cj)) { + + /* Different types of flags. */ + switch (sid) { + + /* Regular sub-cell interactions of a single cell. */ + case 0: /* ( 1 , 1 , 1 ) */ + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[0], -1, + 0); + break; + + case 1: /* ( 1 , 1 , 0 ) */ + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[0], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[1], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[0], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[1], -1, + 0); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[1], -1, + 0); + break; + + case 3: /* ( 1 , 0 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[0], -1, + 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[2], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[0], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[2], -1, + 0); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[4], cj->progeny[0], -1, + 0); + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[4], cj->progeny[1], -1, + 0); + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[4], cj->progeny[2], -1, + 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[4], cj->progeny[3], -1, + 0); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[0], -1, + 0); + if (ci->progeny[5] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[1], -1, + 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[2], -1, + 0); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[3], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[0], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[1], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[2], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[3], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[0], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[1], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[2], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[3], -1, + 0); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[4], cj->progeny[1], -1, + 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[4], cj->progeny[3], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[1], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[3], -1, + 0); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[2], -1, + 0); + break; + + case 7: /* ( 1 , -1 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[4], cj->progeny[2], -1, + 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[4], cj->progeny[3], -1, + 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[2], -1, + 0); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[3], -1, + 0); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[4], cj->progeny[3], -1, + 0); + break; + + case 9: /* ( 0 , 1 , 1 ) */ + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[3], cj->progeny[0], -1, + 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[3], cj->progeny[4], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[0], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[4], -1, + 0); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + if (ci->progeny[2] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[2], cj->progeny[0], -1, + 0); + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[2], cj->progeny[1], -1, + 0); + if (ci->progeny[2] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[2], cj->progeny[4], -1, + 0); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[2], cj->progeny[5], -1, + 0); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[3], cj->progeny[0], -1, + 0); + if (ci->progeny[3] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[3], cj->progeny[1], -1, + 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[3], cj->progeny[4], -1, + 0); + if (ci->progeny[3] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[3], cj->progeny[5], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[0], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[1], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[4], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[5], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[0], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[1], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[4], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[5], -1, + 0); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[2], cj->progeny[1], -1, + 0); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[2], cj->progeny[5], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[1], -1, + 0); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[6], cj->progeny[5], -1, + 0); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + if (ci->progeny[1] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[1], cj->progeny[0], -1, + 0); + if (ci->progeny[1] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[1], cj->progeny[2], -1, + 0); + if (ci->progeny[1] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[1], cj->progeny[4], -1, + 0); + if (ci->progeny[1] != NULL && cj->progeny[6] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[1], cj->progeny[6], -1, + 0); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[3], cj->progeny[0], -1, + 0); + if (ci->progeny[3] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[3], cj->progeny[2], -1, + 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[3], cj->progeny[4], -1, + 0); + if (ci->progeny[3] != NULL && cj->progeny[6] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[3], cj->progeny[6], -1, + 0); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[0], -1, + 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[2], -1, + 0); + if (ci->progeny[5] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[4], -1, + 0); + if (ci->progeny[5] != NULL && cj->progeny[6] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[5], cj->progeny[6], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[0], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[2], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[4], -1, + 0); + if (ci->progeny[7] != NULL && cj->progeny[6] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[7], cj->progeny[6], -1, + 0); + break; + } + + } + + /* Otherwise, compute the pair directly. */ + else if (cell_is_active_stars(ci, e) || cell_is_active_stars(cj, e)) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells."); + + /* Do any of the cells need to be sorted first? */ + if (!(ci->sorted & (1 << sid)) || + ci->dx_max_sort_old > ci->dmin * space_maxreldx) + error("Interacting unsorted cell."); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cell."); + + /* Compute the interactions. */ + runner_dopair_branch_stars_density(r, ci, cj); + } + + if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); +} + +/** + * @brief Compute grouped sub-cell interactions for self tasks + * + * @param r The #runner. + * @param ci The first #cell. + * @param gettimer Do we have a timer ? + */ +void runner_dosub_self_stars_density(struct runner *r, struct cell *ci, + int gettimer) { + + TIMER_TIC; + + /* Should we even bother? */ + if (ci->scount == 0 || !cell_is_active_stars(ci, r->e)) return; + + /* Recurse? */ + if (cell_can_recurse_in_self_stars_task(ci)) { + + /* Loop over all progeny. */ + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) { + runner_dosub_self_stars_density(r, ci->progeny[k], 0); + for (int j = k + 1; j < 8; j++) + if (ci->progeny[j] != NULL) + runner_dosub_pair_stars_density(r, ci->progeny[k], ci->progeny[j], + -1, 0); + } + } + + /* Otherwise, compute self-interaction. */ + else { + + runner_doself_branch_stars_density(r, ci); + } + + if (gettimer) TIMER_TOC(timer_dosub_self_stars_density); +} diff --git a/src/scheduler.c b/src/scheduler.c index a8047cc9ca18e8c330262844f060ceeb095e028b..b285b6e2a467e135a24d393312c78f5c344f14be 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -263,6 +263,7 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { int gradient_cluster[4] = {0}; int force_cluster[4] = {0}; int gravity_cluster[5] = {0}; + int stars_density_cluster[4] = {0}; /* Check whether we need to construct a group of tasks */ for (int type = 0; type < task_type_count; ++type) { @@ -283,6 +284,9 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { force_cluster[k] = 1; if (type == task_type_self + k && subtype == task_subtype_grav) gravity_cluster[k] = 1; + if (type == task_type_self + k && + subtype == task_subtype_stars_density) + stars_density_cluster[k] = 1; } if (type == task_type_grav_mesh) gravity_cluster[2] = 1; if (type == task_type_grav_long_range) gravity_cluster[3] = 1; @@ -333,6 +337,15 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { fprintf(f, "\t\t %s;\n", taskID_names[task_type_grav_mm]); fprintf(f, "\t};\n"); + /* Make a cluster for the density tasks */ + fprintf(f, "\t subgraph cluster4{\n"); + fprintf(f, "\t\t label=\"\";\n"); + for (int k = 0; k < 4; ++k) + if (stars_density_cluster[k]) + fprintf(f, "\t\t \"%s %s\";\n", taskID_names[task_type_self + k], + subtaskID_names[task_subtype_stars_density]); + fprintf(f, "\t};\n"); + /* Write down the number of relation */ for (int ta_type = 0; ta_type < task_type_count; ta_type++) { @@ -850,6 +863,480 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { } /* iterate over the current task. */ } +/** + * @brief Split a stars task if too large. + * + * @param t The #task + * @param s The #scheduler we are working in. + */ +static void scheduler_splittask_stars(struct task *t, struct scheduler *s) { + + // LOIC: This is un-tested. Need to verify that it works. + + /* Iterate on this task until we're done with it. */ + int redo = 1; + while (redo) { + + /* Reset the redo flag. */ + redo = 0; + + /* Non-splittable task? */ + if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL) || + t->ci->scount == 0 || (t->cj != NULL && t->cj->scount == 0)) { + t->type = task_type_none; + t->subtype = task_subtype_none; + t->cj = NULL; + t->skip = 1; + break; + } + + /* Self-interaction? */ + if (t->type == task_type_self) { + + /* Get a handle on the cell involved. */ + struct cell *ci = t->ci; + + /* Foreign task? */ + if (ci->nodeID != s->nodeID) { + t->skip = 1; + break; + } + + /* Is this cell even split and the task does not violate h ? */ + if (cell_can_split_self_stars_task(ci)) { + + /* Make a sub? */ + if (scheduler_dosub && ci->scount < space_subsize_self_stars) { + + /* convert to a self-subtask. */ + t->type = task_type_sub_self; + + /* Otherwise, make tasks explicitly. */ + } else { + + /* Take a step back (we're going to recycle the current task)... */ + redo = 1; + + /* Add the self tasks. */ + int first_child = 0; + while (ci->progeny[first_child] == NULL) first_child++; + t->ci = ci->progeny[first_child]; + for (int k = first_child + 1; k < 8; k++) + if (ci->progeny[k] != NULL && ci->progeny[k]->scount) + scheduler_splittask_stars( + scheduler_addtask(s, task_type_self, t->subtype, 0, 0, + ci->progeny[k], NULL), + s); + + /* Make a task for each pair of progeny */ + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != NULL && ci->progeny[j]->scount) + for (int k = j + 1; k < 8; k++) + if (ci->progeny[k] != NULL && ci->progeny[k]->scount) + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, + sub_sid_flag[j][k], 0, ci->progeny[j], + ci->progeny[k]), + s); + } + } /* Cell is split */ + + } /* Self interaction */ + + /* Pair interaction? */ + else if (t->type == task_type_pair) { + + /* Get a handle on the cells involved. */ + struct cell *ci = t->ci; + struct cell *cj = t->cj; + + /* Foreign task? */ + if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) { + t->skip = 1; + break; + } + + /* Get the sort ID, use space_getsid and not t->flags + to make sure we get ci and cj swapped if needed. */ + double shift[3]; + const int sid = space_getsid(s->space, &ci, &cj, shift); + + /* Should this task be split-up? */ + if (cell_can_split_pair_stars_task(ci) && + cell_can_split_pair_stars_task(cj)) { + + /* Replace by a single sub-task? */ + if (scheduler_dosub && /* Use division to avoid integer overflow. */ + ci->scount * sid_scale[sid] < + space_subsize_pair_stars / cj->scount && + !sort_is_corner(sid)) { + + /* Make this task a sub task. */ + t->type = task_type_sub_pair; + + /* Otherwise, split it. */ + } else { + + /* Take a step back (we're going to recycle the current task)... */ + redo = 1; + + /* For each different sorting type... */ + switch (sid) { + + case 0: /* ( 1 , 1 , 1 ) */ + t->ci = ci->progeny[7]; + t->cj = cj->progeny[0]; + t->flags = 0; + break; + + case 1: /* ( 1 , 1 , 0 ) */ + t->ci = ci->progeny[6]; + t->cj = cj->progeny[0]; + t->flags = 1; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[7], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[6], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[7], cj->progeny[0]), + s); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + t->ci = ci->progeny[6]; + t->cj = cj->progeny[1]; + t->flags = 2; + break; + + case 3: /* ( 1 , 0 , 1 ) */ + t->ci = ci->progeny[5]; + t->cj = cj->progeny[0]; + t->flags = 3; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[7], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[5], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[7], cj->progeny[0]), + s); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[0]; + t->flags = 4; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[5], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[6], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[7], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[4], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, + ci->progeny[5], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[6], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[7], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[4], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[5], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, + ci->progeny[6], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[7], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[4], cj->progeny[3]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[5], cj->progeny[3]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[6], cj->progeny[3]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, + ci->progeny[7], cj->progeny[3]), + s); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[1]; + t->flags = 5; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[6], cj->progeny[3]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[4], cj->progeny[3]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[6], cj->progeny[1]), + s); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + t->ci = ci->progeny[5]; + t->cj = cj->progeny[2]; + t->flags = 6; + break; + + case 7: /* ( 1 , -1 , 0 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[3]; + t->flags = 6; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[5], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[4], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[5], cj->progeny[3]), + s); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[3]; + t->flags = 8; + break; + + case 9: /* ( 0 , 1 , 1 ) */ + t->ci = ci->progeny[3]; + t->cj = cj->progeny[0]; + t->flags = 9; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[7], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[3], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[7], cj->progeny[0]), + s); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + t->ci = ci->progeny[2]; + t->cj = cj->progeny[0]; + t->flags = 10; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[3], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[6], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[7], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[2], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, + ci->progeny[3], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[6], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[7], cj->progeny[1]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[2], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[3], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, + ci->progeny[6], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[7], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[2], cj->progeny[5]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[3], cj->progeny[5]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[6], cj->progeny[5]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, + ci->progeny[7], cj->progeny[5]), + s); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + t->ci = ci->progeny[2]; + t->cj = cj->progeny[1]; + t->flags = 11; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[6], cj->progeny[5]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[2], cj->progeny[5]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[6], cj->progeny[1]), + s); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + t->ci = ci->progeny[1]; + t->cj = cj->progeny[0]; + t->flags = 12; + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[3], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[5], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[7], cj->progeny[0]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[1], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, + ci->progeny[3], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[5], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[7], cj->progeny[2]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[1], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[3], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, + ci->progeny[5], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[7], cj->progeny[4]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[1], cj->progeny[6]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[3], cj->progeny[6]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[5], cj->progeny[6]), + s); + scheduler_splittask_stars( + scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, + ci->progeny[7], cj->progeny[6]), + s); + break; + } /* switch(sid) */ + } + + /* Otherwise, break it up if it is too large? */ + } else if (scheduler_doforcesplit && ci->split && cj->split && + (ci->scount > space_maxsize / cj->scount)) { + + /* Replace the current task. */ + t->type = task_type_none; + + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != NULL && ci->progeny[j]->scount) + for (int k = 0; k < 8; k++) + if (cj->progeny[k] != NULL && cj->progeny[k]->scount) { + struct task *tl = + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[j], cj->progeny[k]); + scheduler_splittask_stars(tl, s); + tl->flags = space_getsid(s->space, &t->ci, &t->cj, shift); + } + } + } /* pair interaction? */ + } /* iterate over the current task. */ +} + /** * @brief Split a gravity task if too large. * @@ -1034,6 +1521,8 @@ void scheduler_splittasks_mapper(void *map_data, int num_elements, scheduler_splittask_gravity(t, s); } else if (t->type == task_type_grav_mesh) { /* For future use */ + } else if (t->subtype == task_subtype_stars_density) { + scheduler_splittask_stars(t, s); } else { #ifdef SWIFT_DEBUG_CHECKS error("Unexpected task sub-type"); @@ -1334,6 +1823,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) { const float count_j = (t->cj != NULL) ? t->cj->count : 0.f; const float gcount_i = (t->ci != NULL) ? t->ci->gcount : 0.f; const float gcount_j = (t->cj != NULL) ? t->cj->gcount : 0.f; + const float scount_i = (t->ci != NULL) ? t->ci->scount : 0.f; switch (t->type) { case task_type_sort: @@ -1342,6 +1832,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) { break; case task_type_self: + // LOIC: Need to do something for stars here if (t->subtype == task_subtype_grav) cost = 1.f * (wscale * gcount_i) * gcount_i; else if (t->subtype == task_subtype_external_grav) @@ -1351,6 +1842,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) { break; case task_type_pair: + // LOIC: Need to do something for stars here if (t->subtype == task_subtype_grav) { if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) cost = 3.f * (wscale * gcount_i) * gcount_j; @@ -1365,6 +1857,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) { break; case task_type_sub_pair: + // LOIC: Need to do something for stars here if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { #ifdef SWIFT_DEBUG_CHECKS if (t->flags < 0) error("Negative flag value!"); @@ -1379,6 +1872,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) { break; case task_type_sub_self: + // LOIC: Need to do something for stars here cost = 1.f * (wscale * count_i) * count_i; break; case task_type_ghost: @@ -1387,6 +1881,9 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_extra_ghost: if (t->ci == t->ci->super_hydro) cost = wscale * count_i; break; + case task_type_stars_ghost: + if (t->ci == t->ci->super_hydro) cost = wscale * scount_i; + break; case task_type_drift_part: cost = wscale * count_i; break; @@ -1602,6 +2099,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_kick1: case task_type_kick2: + case task_type_stars_ghost: case task_type_timestep: qid = t->ci->super->owner; break; diff --git a/src/serial_io.c b/src/serial_io.c index 14ed5f88477f8c92b9311c47121fa82d6e402bd6..9f874ca8c56c853816c0b52a719267481f443bc4 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -580,12 +580,12 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, bzero(*parts, *Ngas * sizeof(struct part)); } - /* Allocate memory to store star particles */ + /* Allocate memory to store stars particles */ if (with_stars) { - *Nstars = N[swift_type_star]; + *Nstars = N[swift_type_stars]; if (posix_memalign((void*)sparts, spart_align, *Nstars * sizeof(struct spart)) != 0) - error("Error while allocating memory for star particles"); + error("Error while allocating memory for stars particles"); bzero(*sparts, *Nstars * sizeof(struct spart)); } @@ -594,7 +594,7 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, Ndm = N[1]; *Ngparts = (with_hydro ? N[swift_type_gas] : 0) + N[swift_type_dark_matter] + - (with_stars ? N[swift_type_star] : 0); + (with_stars ? N[swift_type_stars] : 0); if (posix_memalign((void*)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); @@ -655,10 +655,10 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, } break; - case swift_type_star: + case swift_type_stars: if (with_stars) { Nparticles = *Nstars; - star_read_particles(*sparts, list, &num_fields); + stars_read_particles(*sparts, list, &num_fields); } break; @@ -700,9 +700,9 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, /* Duplicate the hydro particles into gparts */ if (with_hydro) io_duplicate_hydro_gparts(&tp, *parts, *gparts, *Ngas, Ndm); - /* Duplicate the star particles into gparts */ + /* Duplicate the stars particles into gparts */ if (with_stars) - io_duplicate_star_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); + io_duplicate_stars_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); threadpool_clean(&tp); } @@ -883,6 +883,15 @@ void write_output_serial(struct engine* e, const char* baseName, H5Gclose(h_grp); } + /* Print the stellar parameters */ + if (e->policy & engine_policy_stars) { + h_grp = H5Gcreate(h_file, "/StarsScheme", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) error("Error while creating stars group"); + stars_props_print_snapshot(h_grp, e->stars_properties); + H5Gclose(h_grp); + } + /* Print the cosmological model */ h_grp = H5Gcreate(h_file, "/Cosmology", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); @@ -1029,9 +1038,9 @@ void write_output_serial(struct engine* e, const char* baseName, darkmatter_write_particles(dmparts, list, &num_fields); break; - case swift_type_star: + case swift_type_stars: Nparticles = Nstars; - star_write_particles(sparts, list, &num_fields); + stars_write_particles(sparts, list, &num_fields); break; default: diff --git a/src/single_io.c b/src/single_io.c index 577400fd877d6267e8aa3d0d5dc23032f53dd348..33981b6cfa36bd01248b6ba87c28466cb41e0947 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -471,10 +471,10 @@ void read_ic_single(const char* fileName, /* Allocate memory to store star particles */ if (with_stars) { - *Nstars = N[swift_type_star]; + *Nstars = N[swift_type_stars]; if (posix_memalign((void**)sparts, spart_align, *Nstars * sizeof(struct spart)) != 0) - error("Error while allocating memory for star particles"); + error("Error while allocating memory for stars particles"); bzero(*sparts, *Nstars * sizeof(struct spart)); } @@ -483,7 +483,7 @@ void read_ic_single(const char* fileName, Ndm = N[swift_type_dark_matter]; *Ngparts = (with_hydro ? N[swift_type_gas] : 0) + N[swift_type_dark_matter] + - (with_stars ? N[swift_type_star] : 0); + (with_stars ? N[swift_type_stars] : 0); if (posix_memalign((void**)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); @@ -532,10 +532,10 @@ void read_ic_single(const char* fileName, } break; - case swift_type_star: + case swift_type_stars: if (with_stars) { Nparticles = *Nstars; - star_read_particles(*sparts, list, &num_fields); + stars_read_particles(*sparts, list, &num_fields); } break; @@ -568,7 +568,7 @@ void read_ic_single(const char* fileName, /* Duplicate the star particles into gparts */ if (with_stars) - io_duplicate_star_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); + io_duplicate_stars_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); threadpool_clean(&tp); } @@ -733,6 +733,15 @@ void write_output_single(struct engine* e, const char* baseName, H5Gclose(h_grp); } + /* Print the stellar parameters */ + if (e->policy & engine_policy_stars) { + h_grp = H5Gcreate(h_file, "/StarsScheme", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) error("Error while creating stars group"); + stars_props_print_snapshot(h_grp, e->stars_properties); + H5Gclose(h_grp); + } + /* Print the cosmological model */ h_grp = H5Gcreate(h_file, "/Cosmology", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); @@ -845,9 +854,9 @@ void write_output_single(struct engine* e, const char* baseName, darkmatter_write_particles(dmparts, list, &num_fields); break; - case swift_type_star: + case swift_type_stars: N = Nstars; - star_write_particles(sparts, list, &num_fields); + stars_write_particles(sparts, list, &num_fields); break; default: diff --git a/src/space.c b/src/space.c index f751ac18c78c08d7d181a5416224218e889a0e58..3d7c56e05f9def875fe630b68cfa34e040d60a71 100644 --- a/src/space.c +++ b/src/space.c @@ -66,6 +66,8 @@ int space_subsize_pair_hydro = space_subsize_pair_hydro_default; int space_subsize_self_hydro = space_subsize_self_hydro_default; int space_subsize_pair_grav = space_subsize_pair_grav_default; int space_subsize_self_grav = space_subsize_self_grav_default; +int space_subsize_pair_stars = space_subsize_pair_stars_default; +int space_subsize_self_stars = space_subsize_self_stars_default; int space_subdepth_grav = space_subdepth_grav_default; int space_maxsize = space_maxsize_default; #ifdef SWIFT_DEBUG_CHECKS @@ -182,6 +184,10 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->ghost_in = NULL; c->ghost_out = NULL; c->ghost = NULL; + c->stars_ghost_in = NULL; + c->stars_ghost_out = NULL; + c->stars_ghost = NULL; + c->stars_density = NULL; c->kick1 = NULL; c->kick2 = NULL; c->timestep = NULL; @@ -676,13 +682,13 @@ void space_rebuild(struct space *s, int verbose) { /* Swap the link with part/spart */ if (s->gparts[k].type == swift_type_gas) { s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; - } else if (s->gparts[k].type == swift_type_star) { + } else if (s->gparts[k].type == swift_type_stars) { s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; } if (s->gparts[nr_gparts].type == swift_type_gas) { s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = &s->gparts[nr_gparts]; - } else if (s->gparts[nr_gparts].type == swift_type_star) { + } else if (s->gparts[nr_gparts].type == swift_type_stars) { s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = &s->gparts[nr_gparts]; } @@ -1583,7 +1589,7 @@ void space_gparts_sort(struct gpart *gparts, struct part *parts, memswap(&ind[j], &target_cid, sizeof(int)); if (gparts[j].type == swift_type_gas) { parts[-gparts[j].id_or_neg_offset].gpart = &gparts[j]; - } else if (gparts[j].type == swift_type_star) { + } else if (gparts[j].type == swift_type_stars) { sparts[-gparts[j].id_or_neg_offset].gpart = &gparts[j]; } } @@ -1591,7 +1597,7 @@ void space_gparts_sort(struct gpart *gparts, struct part *parts, ind[k] = target_cid; if (gparts[k].type == swift_type_gas) { parts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; - } else if (gparts[k].type == swift_type_star) { + } else if (gparts[k].type == swift_type_stars) { sparts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; } } @@ -2430,7 +2436,7 @@ void space_synchronize_particle_positions_mapper(void *map_data, int nr_gparts, xp->v_full[2] = gp->v_full[2]; } - else if (gp->type == swift_type_star) { + else if (gp->type == swift_type_stars) { /* Get it's stellar friend */ struct spart *sp = &s->sparts[-gp->id_or_neg_offset]; @@ -2627,7 +2633,7 @@ void space_first_init_sparts_mapper(void *restrict map_data, int count, /* Initialise the rest */ for (int k = 0; k < count; k++) { - star_first_init_spart(&sp[k]); + stars_first_init_spart(&sp[k]); #ifdef SWIFT_DEBUG_CHECKS if (sp[k].gpart && sp[k].gpart->id_or_neg_offset != -(k + delta)) @@ -2643,7 +2649,7 @@ void space_first_init_sparts_mapper(void *restrict map_data, int count, /** * @brief Initialises all the s-particles by setting them into a valid state * - * Calls star_first_init_spart() on all the particles + * Calls stars_first_init_spart() on all the particles */ void space_first_init_sparts(struct space *s, int verbose) { const ticks tic = getticks(); @@ -2708,6 +2714,32 @@ void space_init_gparts(struct space *s, int verbose) { clocks_getunit()); } +void space_init_sparts_mapper(void *restrict map_data, int scount, + void *restrict extra_data) { + + struct spart *restrict sparts = (struct spart *)map_data; + for (int k = 0; k < scount; k++) stars_init_spart(&sparts[k]); +} + +/** + * @brief Calls the #spart initialisation function on all particles in the + * space. + * + * @param s The #space. + * @param verbose Are we talkative? + */ +void space_init_sparts(struct space *s, int verbose) { + + const ticks tic = getticks(); + + if (s->nr_sparts > 0) + threadpool_map(&s->e->threadpool, space_init_sparts_mapper, s->sparts, + s->nr_sparts, sizeof(struct spart), 0, NULL); + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + void space_convert_quantities_mapper(void *restrict map_data, int count, void *restrict extra_data) { struct space *s = (struct space *)extra_data; @@ -2750,10 +2782,10 @@ void space_convert_quantities(struct space *s, int verbose) { * @param dim Spatial dimensions of the domain. * @param parts Array of Gas particles. * @param gparts Array of Gravity particles. - * @param sparts Array of star particles. + * @param sparts Array of stars particles. * @param Npart The number of Gas particles in the space. * @param Ngpart The number of Gravity particles in the space. - * @param Nspart The number of star particles in the space. + * @param Nspart The number of stars particles in the space. * @param periodic flag whether the domain is periodic or not. * @param replicate How many replications along each direction do we want? * @param generate_gas_in_ics Are we generating gas particles from the gparts? @@ -2863,6 +2895,12 @@ void space_init(struct space *s, struct swift_params *params, space_subsize_self_grav = parser_get_opt_param_int(params, "Scheduler:cell_sub_size_self_grav", space_subsize_self_grav_default); + space_subsize_pair_stars = + parser_get_opt_param_int(params, "Scheduler:cell_sub_size_pair_stars", + space_subsize_pair_stars_default); + space_subsize_self_stars = + parser_get_opt_param_int(params, "Scheduler:cell_sub_size_self_stars", + space_subsize_self_stars_default); space_splitsize = parser_get_opt_param_int( params, "Scheduler:cell_split_size", space_splitsize_default); space_subdepth_grav = parser_get_opt_param_int( @@ -2876,6 +2914,8 @@ void space_init(struct space *s, struct swift_params *params, space_subsize_pair_hydro, space_subsize_self_hydro); message("sub_size_pair_grav set to %d, sub_size_self_grav set to %d", space_subsize_pair_grav, space_subsize_self_grav); + message("sub_size_pair_stars set to %d, sub_size_self_stars set to %d", + space_subsize_pair_stars, space_subsize_self_stars); } /* Apply h scaling */ @@ -3455,7 +3495,7 @@ void space_struct_restore(struct space *s, FILE *stream) { NULL, "sparts"); } - /* Need to reconnect the gravity parts to their hydro and star particles. */ + /* Need to reconnect the gravity parts to their hydro and stars particles. */ /* Re-link the parts. */ if (s->nr_parts > 0 && s->nr_gparts > 0) part_relink_parts_to_gparts(s->gparts, s->nr_gparts, s->parts); diff --git a/src/space.h b/src/space.h index 94c7f5b38825120b7bdd922f9f60b9f25b09c26c..b2b8f7dc3aefcadd6bd4e3013e374afb3d4e5dd4 100644 --- a/src/space.h +++ b/src/space.h @@ -48,6 +48,8 @@ struct cosmology; #define space_subsize_self_hydro_default 32000 #define space_subsize_pair_grav_default 256000000 #define space_subsize_self_grav_default 32000 +#define space_subsize_pair_stars_default 256000000 +#define space_subsize_self_stars_default 32000 #define space_subdepth_grav_default 2 #define space_max_top_level_cells_default 12 #define space_stretch 1.10f @@ -63,6 +65,8 @@ extern int space_subsize_pair_hydro; extern int space_subsize_self_hydro; extern int space_subsize_pair_grav; extern int space_subsize_self_grav; +extern int space_subsize_pair_stars; +extern int space_subsize_self_stars; extern int space_subdepth_grav; /** @@ -254,6 +258,7 @@ void space_first_init_gparts(struct space *s, int verbose); void space_first_init_sparts(struct space *s, int verbose); void space_init_parts(struct space *s, int verbose); void space_init_gparts(struct space *s, int verbose); +void space_init_sparts(struct space *s, int verbose); void space_convert_quantities(struct space *s, int verbose); void space_link_cleanup(struct space *s); void space_check_drift_point(struct space *s, integertime_t ti_drift, diff --git a/src/stars.h b/src/stars.h index ade47ff57298c13bf205e991548945576a802293..3e921239a29d862aba998c138623eb1cb81a37b9 100644 --- a/src/stars.h +++ b/src/stars.h @@ -16,15 +16,15 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_STAR_H -#define SWIFT_STAR_H +#ifndef SWIFT_STARS_H +#define SWIFT_STARS_H /* Config parameters. */ #include "../config.h" /* So far only one model here */ /* Straight-forward import */ -#include "./stars/Default/star.h" -#include "./stars/Default/star_iact.h" +#include "./stars/Default/stars.h" +#include "./stars/Default/stars_iact.h" #endif diff --git a/src/stars/Default/star.h b/src/stars/Default/star.h deleted file mode 100644 index 61ae4aeb5c51e18e39c3f4c6855d7c6ddfe05abb..0000000000000000000000000000000000000000 --- a/src/stars/Default/star.h +++ /dev/null @@ -1,86 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ -#ifndef SWIFT_DEFAULT_STAR_H -#define SWIFT_DEFAULT_STAR_H - -#include <float.h> -#include "minmax.h" - -/** - * @brief Computes the gravity time-step of a given star particle. - * - * @param sp Pointer to the s-particle data. - */ -__attribute__((always_inline)) INLINE static float star_compute_timestep( - const struct spart* const sp) { - - return FLT_MAX; -} - -/** - * @brief Initialises the s-particles for the first time - * - * This function is called only once just after the ICs have been - * read in to do some conversions. - * - * @param sp The particle to act upon - */ -__attribute__((always_inline)) INLINE static void star_first_init_spart( - struct spart* sp) { - - sp->time_bin = 0; -} - -/** - * @brief Prepares a s-particle for its interactions - * - * @param sp The particle to act upon - */ -__attribute__((always_inline)) INLINE static void star_init_spart( - struct spart* sp) {} - -/** - * @brief Sets the values to be predicted in the drifts to their values at a - * kick time - * - * @param sp The particle. - */ -__attribute__((always_inline)) INLINE static void star_reset_predicted_values( - struct spart* restrict sp) {} - -/** - * @brief Finishes the calculation of (non-gravity) forces acting on stars - * - * Multiplies the forces and accelerations by the appropiate constants - * - * @param sp The particle to act upon - */ -__attribute__((always_inline)) INLINE static void star_end_force( - struct spart* sp) {} - -/** - * @brief Kick the additional variables - * - * @param sp The particle to act upon - * @param dt The time-step for this kick - */ -__attribute__((always_inline)) INLINE static void star_kick_extra( - struct spart* sp, float dt) {} - -#endif /* SWIFT_DEFAULT_STAR_H */ diff --git a/src/stars/Default/star_iact.h b/src/stars/Default/star_iact.h deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/src/stars/Default/star_io.h b/src/stars/Default/star_io.h deleted file mode 100644 index 7ad29f0a935c002b1337c2a75d6f987c05c9bb43..0000000000000000000000000000000000000000 --- a/src/stars/Default/star_io.h +++ /dev/null @@ -1,73 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ -#ifndef SWIFT_DEFAULT_STAR_IO_H -#define SWIFT_DEFAULT_STAR_IO_H - -#include "io_properties.h" - -/** - * @brief Specifies which s-particle fields to read from a dataset - * - * @param sparts The s-particle array. - * @param list The list of i/o properties to read. - * @param num_fields The number of i/o fields to read. - */ -INLINE static void star_read_particles(struct spart* sparts, - struct io_props* list, int* num_fields) { - - /* Say how much we want to read */ - *num_fields = 4; - - /* List what we want to read */ - list[0] = io_make_input_field("Coordinates", DOUBLE, 3, COMPULSORY, - UNIT_CONV_LENGTH, sparts, x); - list[1] = io_make_input_field("Velocities", FLOAT, 3, COMPULSORY, - UNIT_CONV_SPEED, sparts, v); - list[2] = io_make_input_field("Masses", FLOAT, 1, COMPULSORY, UNIT_CONV_MASS, - sparts, mass); - list[3] = io_make_input_field("ParticleIDs", LONGLONG, 1, COMPULSORY, - UNIT_CONV_NO_UNITS, sparts, id); -} - -/** - * @brief Specifies which s-particle fields to write to a dataset - * - * @param sparts The s-particle array. - * @param list The list of i/o properties to write. - * @param num_fields The number of i/o fields to write. - */ -INLINE static void star_write_particles(const struct spart* sparts, - struct io_props* list, - int* num_fields) { - - /* Say how much we want to read */ - *num_fields = 4; - - /* List what we want to read */ - list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, - sparts, x); - list[1] = - io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, sparts, v); - list[2] = - io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, sparts, mass); - list[3] = io_make_output_field("ParticleIDs", LONGLONG, 1, UNIT_CONV_NO_UNITS, - sparts, id); -} - -#endif /* SWIFT_DEFAULT_STAR_IO_H */ diff --git a/src/stars/Default/stars.h b/src/stars/Default/stars.h new file mode 100644 index 0000000000000000000000000000000000000000..ac7ed4207dafc3e4fdccadc97e597bcdd443d489 --- /dev/null +++ b/src/stars/Default/stars.h @@ -0,0 +1,149 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEFAULT_STARS_H +#define SWIFT_DEFAULT_STARS_H + +#include <float.h> +#include "minmax.h" + +/** + * @brief Computes the gravity time-step of a given star particle. + * + * @param sp Pointer to the s-particle data. + */ +__attribute__((always_inline)) INLINE static float stars_compute_timestep( + const struct spart* const sp) { + + return FLT_MAX; +} + +/** + * @brief Initialises the s-particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param sp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void stars_first_init_spart( + struct spart* sp) { + + sp->time_bin = 0; +} + +/** + * @brief Prepares a s-particle for its interactions + * + * @param sp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void stars_init_spart( + struct spart* sp) { + +#ifdef DEBUG_INTERACTIONS_STARS + for (int i = 0; i < MAX_NUM_OF_NEIGHBOURS_STARS; ++i) + sp->ids_ngbs_density[i] = -1; + sp->num_ngb_density = 0; +#endif + + sp->density.wcount = 0.f; + sp->density.wcount_dh = 0.f; +} + +/** + * @brief Sets the values to be predicted in the drifts to their values at a + * kick time + * + * @param sp The particle. + */ +__attribute__((always_inline)) INLINE static void stars_reset_predicted_values( + struct spart* restrict sp) {} + +/** + * @brief Finishes the calculation of (non-gravity) forces acting on stars + * + * Multiplies the forces and accelerations by the appropiate constants + * + * @param sp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void stars_end_force( + struct spart* sp) {} + +/** + * @brief Kick the additional variables + * + * @param sp The particle to act upon + * @param dt The time-step for this kick + */ +__attribute__((always_inline)) INLINE static void stars_kick_extra( + struct spart* sp, float dt) {} + +/** + * @brief Finishes the calculation of density on stars + * + * @param sp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void stars_end_density( + struct spart* sp, const struct cosmology* cosmo) { + + /* Some smoothing length multiples. */ + const float h = sp->h; + const float h_inv = 1.0f / h; /* 1/h */ + const float h_inv_dim = pow_dimension(h_inv); /* 1/h^d */ + const float h_inv_dim_plus_one = h_inv_dim * h_inv; /* 1/h^(d+1) */ + + /* Finish the calculation by inserting the missing h-factors */ + sp->density.wcount *= h_inv_dim; + sp->density.wcount_dh *= h_inv_dim_plus_one; +} + +/** + * @brief Sets all particle fields to sensible values when the #spart has 0 + * ngbs. + * + * @param sp The particle to act upon + * @param cosmo The current cosmological model. + */ +__attribute__((always_inline)) INLINE static void stars_spart_has_no_neighbours( + struct spart* restrict sp, const struct cosmology* cosmo) { + + /* Some smoothing length multiples. */ + const float h = sp->h; + const float h_inv = 1.0f / h; /* 1/h */ + const float h_inv_dim = pow_dimension(h_inv); /* 1/h^d */ + + /* Re-set problematic values */ + sp->density.wcount = kernel_root * h_inv_dim; + sp->density.wcount_dh = 0.f; +} + +/** + * @brief Evolve the stellar properties of a #spart. + * + * This function allows for example to compute the SN rate before sending + * this information to a different MPI rank. + * + * @param sp The particle to act upon + * @param cosmo The current cosmological model. + * @param stars_properties The #stars_props + */ +__attribute__((always_inline)) INLINE static void stars_evolve_spart( + struct spart* restrict sp, const struct stars_props* stars_properties, + const struct cosmology* cosmo) {} + +#endif /* SWIFT_DEFAULT_STARS_H */ diff --git a/src/stars/Default/star_debug.h b/src/stars/Default/stars_debug.h similarity index 86% rename from src/stars/Default/star_debug.h rename to src/stars/Default/stars_debug.h index d940afac2eb67c97481f48a4bda6fa56085166d5..39ae754ddf60910ae07b3252e151c1f619588161 100644 --- a/src/stars/Default/star_debug.h +++ b/src/stars/Default/stars_debug.h @@ -16,10 +16,10 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_DEFAULT_STAR_DEBUG_H -#define SWIFT_DEFAULT_STAR_DEBUG_H +#ifndef SWIFT_DEFAULT_STARS_DEBUG_H +#define SWIFT_DEFAULT_STARS_DEBUG_H -__attribute__((always_inline)) INLINE static void star_debug_particle( +__attribute__((always_inline)) INLINE static void stars_debug_particle( const struct spart* p) { printf( "x=[%.3e,%.3e,%.3e], " @@ -28,4 +28,4 @@ __attribute__((always_inline)) INLINE static void star_debug_particle( p->mass, p->ti_begin, p->ti_end); } -#endif /* SWIFT_DEFAULT_STAR_DEBUG_H */ +#endif /* SWIFT_DEFAULT_STARS_DEBUG_H */ diff --git a/src/stars/Default/stars_iact.h b/src/stars/Default/stars_iact.h new file mode 100644 index 0000000000000000000000000000000000000000..3b066686060bcee7583fe65c032da5c12f637081 --- /dev/null +++ b/src/stars/Default/stars_iact.h @@ -0,0 +1,40 @@ +/** + * @brief Density interaction between two particles (non-symmetric). + * + * @param r2 Comoving square distance between the two particles. + * @param dx Comoving vector separating both particles (pi - pj). + * @param hi Comoving smoothing-length of particle i. + * @param hj Comoving smoothing-length of particle j. + * @param si First sparticle. + * @param pj Second particle (not updated). + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void +runner_iact_nonsym_stars_density(float r2, const float *dx, float hi, float hj, + struct spart *restrict si, + const struct part *restrict pj, float a, + float H) { + + float wi, wi_dx; + + /* Get r and 1/r. */ + const float r_inv = 1.0f / sqrtf(r2); + const float r = r2 * r_inv; + + /* Compute the kernel function */ + const float hi_inv = 1.0f / hi; + const float ui = r * hi_inv; + kernel_deval(ui, &wi, &wi_dx); + + /* Compute contribution to the number of neighbours */ + si->density.wcount += wi; + si->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); + +#ifdef DEBUG_INTERACTIONS_STARS + /* Update ngb counters */ + if (si->num_ngb_density < MAX_NUM_OF_NEIGHBOURS_STARS) + si->ids_ngbs_density[si->num_ngb_density] = pj->id; + ++si->num_ngb_density; +#endif +} diff --git a/src/stars/Default/stars_io.h b/src/stars/Default/stars_io.h new file mode 100644 index 0000000000000000000000000000000000000000..d41a13a5b84fe1b4c98592f56a06df12590d6900 --- /dev/null +++ b/src/stars/Default/stars_io.h @@ -0,0 +1,198 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEFAULT_STARS_IO_H +#define SWIFT_DEFAULT_STARS_IO_H + +#include "io_properties.h" +#include "stars_part.h" + +/** + * @brief Specifies which s-particle fields to read from a dataset + * + * @param sparts The s-particle array. + * @param list The list of i/o properties to read. + * @param num_fields The number of i/o fields to read. + */ +INLINE static void stars_read_particles(struct spart *sparts, + struct io_props *list, + int *num_fields) { + + /* Say how much we want to read */ + *num_fields = 5; + + /* List what we want to read */ + list[0] = io_make_input_field("Coordinates", DOUBLE, 3, COMPULSORY, + UNIT_CONV_LENGTH, sparts, x); + list[1] = io_make_input_field("Velocities", FLOAT, 3, COMPULSORY, + UNIT_CONV_SPEED, sparts, v); + list[2] = io_make_input_field("Masses", FLOAT, 1, COMPULSORY, UNIT_CONV_MASS, + sparts, mass); + list[3] = io_make_input_field("ParticleIDs", LONGLONG, 1, COMPULSORY, + UNIT_CONV_NO_UNITS, sparts, id); + list[4] = io_make_input_field("SmoothingLength", FLOAT, 1, OPTIONAL, + UNIT_CONV_LENGTH, sparts, h); +} + +/** + * @brief Specifies which s-particle fields to write to a dataset + * + * @param sparts The s-particle array. + * @param list The list of i/o properties to write. + * @param num_fields The number of i/o fields to write. + */ +INLINE static void stars_write_particles(const struct spart *sparts, + struct io_props *list, + int *num_fields) { + + /* Say how much we want to read */ + *num_fields = 5; + + /* List what we want to read */ + list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, + sparts, x); + list[1] = + io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, sparts, v); + list[2] = + io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, sparts, mass); + list[3] = io_make_output_field("ParticleIDs", LONGLONG, 1, UNIT_CONV_NO_UNITS, + sparts, id); + list[4] = io_make_output_field("SmoothingLength", FLOAT, 1, UNIT_CONV_LENGTH, + sparts, h); +} + +/** + * @brief Initialize the global properties of the stars scheme. + * + * By default, takes the values provided by the hydro. + * + * @param p The #stars_props. + * @param phys_const The physical constants in the internal unit system. + * @param us The internal unit system. + * @param params The parsed parameters. + */ +INLINE static void stars_props_init(struct stars_props *sp, + const struct phys_const *phys_const, + const struct unit_system *us, + struct swift_params *params, + const struct hydro_props *p) { + + /* Kernel properties */ + sp->eta_neighbours = parser_get_opt_param_float( + params, "Stars:resolution_eta", p->eta_neighbours); + + /* Tolerance for the smoothing length Newton-Raphson scheme */ + sp->h_tolerance = + parser_get_opt_param_float(params, "Stars:h_tolerance", p->h_tolerance); + + /* Get derived properties */ + sp->target_neighbours = pow_dimension(sp->eta_neighbours) * kernel_norm; + const float delta_eta = sp->eta_neighbours * (1.f + sp->h_tolerance); + sp->delta_neighbours = + (pow_dimension(delta_eta) - pow_dimension(sp->eta_neighbours)) * + kernel_norm; + + /* Maximal smoothing length */ + sp->h_max = parser_get_opt_param_float(params, "Stars:h_max", p->h_max); + + /* Number of iterations to converge h */ + sp->max_smoothing_iterations = parser_get_opt_param_int( + params, "Stars:max_ghost_iterations", p->max_smoothing_iterations); + + /* Time integration properties */ + const float max_volume_change = + parser_get_opt_param_float(params, "Stars:max_volume_change", -1); + if (max_volume_change == -1) + sp->log_max_h_change = p->log_max_h_change; + else + sp->log_max_h_change = logf(powf(max_volume_change, hydro_dimension_inv)); +} + +/** + * @brief Print the global properties of the stars scheme. + * + * @param p The #stars_props. + */ +INLINE static void stars_props_print(const struct stars_props *sp) { + + /* Now stars */ + message("Stars kernel: %s with eta=%f (%.2f neighbours).", kernel_name, + sp->eta_neighbours, sp->target_neighbours); + + message("Stars relative tolerance in h: %.5f (+/- %.4f neighbours).", + sp->h_tolerance, sp->delta_neighbours); + + message( + "Stars integration: Max change of volume: %.2f " + "(max|dlog(h)/dt|=%f).", + pow_dimension(expf(sp->log_max_h_change)), sp->log_max_h_change); + + if (sp->h_max != FLT_MAX) + message("Maximal smoothing length allowed: %.4f", sp->h_max); + + message("Maximal iterations in ghost task set to %d", + sp->max_smoothing_iterations); +} + +#if defined(HAVE_HDF5) +INLINE static void stars_props_print_snapshot(hid_t h_grpstars, + const struct stars_props *sp) { + + io_write_attribute_s(h_grpstars, "Kernel function", kernel_name); + io_write_attribute_f(h_grpstars, "Kernel target N_ngb", + sp->target_neighbours); + io_write_attribute_f(h_grpstars, "Kernel delta N_ngb", sp->delta_neighbours); + io_write_attribute_f(h_grpstars, "Kernel eta", sp->eta_neighbours); + io_write_attribute_f(h_grpstars, "Smoothing length tolerance", + sp->h_tolerance); + io_write_attribute_f(h_grpstars, "Maximal smoothing length", sp->h_max); + io_write_attribute_f(h_grpstars, "Volume log(max(delta h))", + sp->log_max_h_change); + io_write_attribute_f(h_grpstars, "Volume max change time-step", + pow_dimension(expf(sp->log_max_h_change))); + io_write_attribute_i(h_grpstars, "Max ghost iterations", + sp->max_smoothing_iterations); +} +#endif + +/** + * @brief Write a #stars_props struct to the given FILE as a stream of bytes. + * + * @param p the struct + * @param stream the file stream + */ +INLINE static void stars_props_struct_dump(const struct stars_props *p, + FILE *stream) { + restart_write_blocks((void *)p, sizeof(struct stars_props), 1, stream, + "starsprops", "stars props"); +} + +/** + * @brief Restore a stars_props struct from the given FILE as a stream of + * bytes. + * + * @param p the struct + * @param stream the file stream + */ +INLINE static void stars_props_struct_restore(const struct stars_props *p, + FILE *stream) { + restart_read_blocks((void *)p, sizeof(struct stars_props), 1, stream, NULL, + "stars props"); +} + +#endif /* SWIFT_DEFAULT_STAR_IO_H */ diff --git a/src/stars/Default/star_part.h b/src/stars/Default/stars_part.h similarity index 61% rename from src/stars/Default/star_part.h rename to src/stars/Default/stars_part.h index 68dd4869c257e35b3be7dc21f36e6dcdb725dc17..fe5be3c7f6f190adea2c81ee14c0cad7a10fbd2c 100644 --- a/src/stars/Default/star_part.h +++ b/src/stars/Default/stars_part.h @@ -44,9 +44,21 @@ struct spart { /*! Star mass */ float mass; + /* Particle cutoff radius. */ + float h; + /*! Particle time bin */ timebin_t time_bin; + struct { + /* Number of neighbours. */ + float wcount; + + /* Number of neighbours spatial derivative. */ + float wcount_dh; + + } density; + #ifdef SWIFT_DEBUG_CHECKS /* Time of the last drift */ @@ -57,6 +69,41 @@ struct spart { #endif +#ifdef DEBUG_INTERACTIONS_STARS + /*! List of interacting particles in the density SELF and PAIR */ + long long ids_ngbs_density[MAX_NUM_OF_NEIGHBOURS_STARS]; + + /*! Number of interactions in the density SELF and PAIR */ + int num_ngb_density; +#endif + } SWIFT_STRUCT_ALIGN; +/** + * @brief Contains all the constants and parameters of the stars scheme + */ +struct stars_props { + + /*! Resolution parameter */ + float eta_neighbours; + + /*! Target weightd number of neighbours (for info only)*/ + float target_neighbours; + + /*! Smoothing length tolerance */ + float h_tolerance; + + /*! Tolerance on neighbour number (for info only)*/ + float delta_neighbours; + + /*! Maximal smoothing length */ + float h_max; + + /*! Maximal number of iterations to converge h */ + int max_smoothing_iterations; + + /*! Maximal change of h over one time-step */ + float log_max_h_change; +}; + #endif /* SWIFT_DEFAULT_STAR_PART_H */ diff --git a/src/stars_io.h b/src/stars_io.h index 18a13ec19163008f1c8e9f64cf544ddf812db655..046e90ee7570430ea25632539bc2cd642d4b52c0 100644 --- a/src/stars_io.h +++ b/src/stars_io.h @@ -16,11 +16,11 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_STAR_IO_H -#define SWIFT_STAR_IO_H +#ifndef SWIFT_STARS_IO_H +#define SWIFT_STARS_IO_H #include "./const.h" -#include "./stars/Default/star_io.h" +#include "./stars/Default/stars_io.h" -#endif /* SWIFT_STAR_IO_H */ +#endif /* SWIFT_STARS_IO_H */ diff --git a/src/swift.h b/src/swift.h index e10938addb99956c202b3e4dd2b0592b580fa948..4398f3f69a66320e13d0ed1a6a0a63fafc7d1e52 100644 --- a/src/swift.h +++ b/src/swift.h @@ -66,6 +66,8 @@ #include "single_io.h" #include "sourceterms.h" #include "space.h" +#include "stars.h" +#include "stars_io.h" #include "task.h" #include "threadpool.h" #include "timeline.h" diff --git a/src/task.c b/src/task.c index 018a09b5c74f8dc72093f411d24e31abd767b3d0..47301e38d783c3d71b35f7e15b5c9f7793df88d4 100644 --- a/src/task.c +++ b/src/task.c @@ -48,20 +48,22 @@ /* Task type names. */ const char *taskID_names[task_type_count] = { - "none", "sort", "self", - "pair", "sub_self", "sub_pair", - "init_grav", "init_grav_out", "ghost_in", - "ghost", "ghost_out", "extra_ghost", - "drift_part", "drift_gpart", "end_force", - "kick1", "kick2", "timestep", - "send", "recv", "grav_long_range", - "grav_mm", "grav_down_in", "grav_down", - "grav_mesh", "cooling", "sourceterms"}; + "none", "sort", "self", + "pair", "sub_self", "sub_pair", + "init_grav", "init_grav_out", "ghost_in", + "ghost", "ghost_out", "extra_ghost", + "drift_part", "drift_gpart", "end_force", + "kick1", "kick2", "timestep", + "send", "recv", "grav_long_range", + "grav_mm", "grav_down_in", "grav_down", + "grav_mesh", "cooling", "sourceterms", + "stars_ghost_in", "stars_ghost", "stars_ghost_out"}; /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { - "none", "density", "gradient", "force", "grav", "external_grav", - "tend", "xv", "rho", "gpart", "multipole", "spart"}; + "none", "density", "gradient", "force", "grav", + "external_grav", "tend", "xv", "rho", "gpart", + "multipole", "spart", "stars_density"}; #ifdef WITH_MPI /* MPI communicators for the subtypes. */ @@ -71,46 +73,32 @@ MPI_Comm subtaskMPI_comms[task_subtype_count]; /** * @brief Computes the overlap between the parts array of two given cells. * - * @param ci The first #cell. - * @param cj The second #cell. - */ -__attribute__((always_inline)) INLINE static size_t task_cell_overlap_part( - const struct cell *restrict ci, const struct cell *restrict cj) { - - if (ci == NULL || cj == NULL) return 0; - - if (ci->parts <= cj->parts && - ci->parts + ci->count >= cj->parts + cj->count) { - return cj->count; - } else if (cj->parts <= ci->parts && - cj->parts + cj->count >= ci->parts + ci->count) { - return ci->count; - } - - return 0; -} - -/** - * @brief Computes the overlap between the gparts array of two given cells. + * TYPE is the type of parts (e.g. #part, #gpart, #spart) * * @param ci The first #cell. * @param cj The second #cell. */ -__attribute__((always_inline)) INLINE static size_t task_cell_overlap_gpart( - const struct cell *restrict ci, const struct cell *restrict cj) { - - if (ci == NULL || cj == NULL) return 0; - - if (ci->gparts <= cj->gparts && - ci->gparts + ci->gcount >= cj->gparts + cj->gcount) { - return cj->gcount; - } else if (cj->gparts <= ci->gparts && - cj->gparts + cj->gcount >= ci->gparts + ci->gcount) { - return ci->gcount; +#define TASK_CELL_OVERLAP(TYPE, ARRAY, COUNT) \ + __attribute__((always_inline)) \ + INLINE static size_t task_cell_overlap_##TYPE( \ + const struct cell *restrict ci, const struct cell *restrict cj) { \ + \ + if (ci == NULL || cj == NULL) return 0; \ + \ + if (ci->ARRAY <= cj->ARRAY && \ + ci->ARRAY + ci->COUNT >= cj->ARRAY + cj->COUNT) { \ + return cj->COUNT; \ + } else if (cj->ARRAY <= ci->ARRAY && \ + cj->ARRAY + cj->COUNT >= ci->ARRAY + ci->COUNT) { \ + return ci->COUNT; \ + } \ + \ + return 0; \ } - return 0; -} +TASK_CELL_OVERLAP(part, parts, count); +TASK_CELL_OVERLAP(gpart, gparts, gcount); +TASK_CELL_OVERLAP(spart, sparts, scount); /** * @brief Returns the #task_actions for a given task. @@ -135,6 +123,10 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_part; break; + case task_type_stars_ghost: + return task_action_spart; + break; + case task_type_self: case task_type_pair: case task_type_sub_self: @@ -147,6 +139,10 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_part; break; + case task_subtype_stars_density: + return task_action_all; + break; + case task_subtype_grav: case task_subtype_external_grav: return task_action_gpart; @@ -220,9 +216,13 @@ float task_overlap(const struct task *restrict ta, const int ta_part = (ta_act == task_action_part || ta_act == task_action_all); const int ta_gpart = (ta_act == task_action_gpart || ta_act == task_action_all); + const int ta_spart = + (ta_act == task_action_spart || ta_act == task_action_all); const int tb_part = (tb_act == task_action_part || tb_act == task_action_all); const int tb_gpart = (tb_act == task_action_gpart || tb_act == task_action_all); + const int tb_spart = + (tb_act == task_action_spart || tb_act == task_action_all); /* In the case where both tasks act on parts */ if (ta_part && tb_part) { @@ -262,6 +262,25 @@ float task_overlap(const struct task *restrict ta, return ((float)size_intersect) / (size_union - size_intersect); } + /* In the case where both tasks act on sparts */ + else if (ta_spart && tb_spart) { + + /* Compute the union of the cell data. */ + size_t size_union = 0; + if (ta->ci != NULL) size_union += ta->ci->scount; + if (ta->cj != NULL) size_union += ta->cj->scount; + if (tb->ci != NULL) size_union += tb->ci->scount; + if (tb->cj != NULL) size_union += tb->cj->scount; + + /* Compute the intersection of the cell data. */ + const size_t size_intersect = task_cell_overlap_spart(ta->ci, tb->ci) + + task_cell_overlap_spart(ta->ci, tb->cj) + + task_cell_overlap_spart(ta->cj, tb->ci) + + task_cell_overlap_spart(ta->cj, tb->cj); + + return ((float)size_intersect) / (size_union - size_intersect); + } + /* Else, no overlap */ return 0.f; } diff --git a/src/task.h b/src/task.h index 202231f778e48bba4b94c118105e77aae8541d55..866465e4997770d3e0e48818c6eebc6e9abf67a2 100644 --- a/src/task.h +++ b/src/task.h @@ -66,6 +66,9 @@ enum task_types { task_type_grav_mesh, task_type_cooling, task_type_sourceterms, + task_type_stars_ghost_in, + task_type_stars_ghost, + task_type_stars_ghost_out, task_type_count } __attribute__((packed)); @@ -85,6 +88,7 @@ enum task_subtypes { task_subtype_gpart, task_subtype_multipole, task_subtype_spart, + task_subtype_stars_density, task_subtype_count } __attribute__((packed)); @@ -95,6 +99,7 @@ enum task_actions { task_action_none, task_action_part, task_action_gpart, + task_action_spart, task_action_all, task_action_multipole, task_action_count diff --git a/src/timers.c b/src/timers.c index e3fbfdb01249e98e46d2c60d45bd98adb0a34241..51d0e5f6dc4fd9e4e0567592750b8de45ecda06b 100644 --- a/src/timers.c +++ b/src/timers.c @@ -86,6 +86,13 @@ const char* timers_names[timer_count] = { "locktree", "runners", "step", + "doself_stars_density", + "dopair_stars_density", + "do_stars_ghost", + "doself_subset_stars_density", + "dopair_subset_stars_density", + "dosubpair_stars_density", + "dosub_self_stars_density", }; /* File to store the timers */ diff --git a/src/timers.h b/src/timers.h index 91d26c1c0d781f725b4c55a7ed3b6cfe956651df..aba6ae33e3b8268c088694863967b96851715153 100644 --- a/src/timers.h +++ b/src/timers.h @@ -87,6 +87,13 @@ enum { timer_locktree, timer_runners, timer_step, + timer_doself_stars_density, + timer_dopair_stars_density, + timer_dostars_ghost, + timer_doself_subset_stars_density, + timer_dopair_subset_stars_density, + timer_dosubpair_stars_density, + timer_dosub_self_stars_density, timer_count, }; diff --git a/src/timestep.h b/src/timestep.h index 2d54db4a4b82be83d253174e63b0146763ddbacf..3b957f0c4b9d103023c77837a1a6ca6388856d22 100644 --- a/src/timestep.h +++ b/src/timestep.h @@ -186,7 +186,7 @@ __attribute__((always_inline)) INLINE static integertime_t get_spart_timestep( const struct spart *restrict sp, const struct engine *restrict e) { /* Stellar time-step */ - float new_dt_star = star_compute_timestep(sp); + float new_dt_stars = stars_compute_timestep(sp); /* Gravity time-step */ float new_dt_self = FLT_MAX, new_dt_ext = FLT_MAX; @@ -201,7 +201,7 @@ __attribute__((always_inline)) INLINE static integertime_t get_spart_timestep( sp->gpart, a_hydro, e->gravity_properties, e->cosmology); /* Take the minimum of all */ - float new_dt = min3(new_dt_star, new_dt_self, new_dt_ext); + float new_dt = min3(new_dt_stars, new_dt_self, new_dt_ext); /* Apply the maximal displacement constraint (FLT_MAX if non-cosmological)*/ new_dt = min(new_dt, e->dt_max_RMS_displacement); diff --git a/src/tools.c b/src/tools.c index 9c0df6012737872eef8d97521b3a7532ceb42720..93f0c6f970a1fa530951d1d9d8e3ef169a688a70 100644 --- a/src/tools.c +++ b/src/tools.c @@ -45,6 +45,7 @@ #include "part.h" #include "periodic.h" #include "runner.h" +#include "stars.h" /** * Factorize a given integer, attempts to keep larger pair of factors. @@ -335,6 +336,77 @@ void pairs_all_force(struct runner *r, struct cell *ci, struct cell *cj) { } } +void pairs_all_stars_density(struct runner *r, struct cell *ci, + struct cell *cj) { + + float r2, dx[3]; + const double dim[3] = {r->e->s->dim[0], r->e->s->dim[1], r->e->s->dim[2]}; + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const float a = cosmo->a; + const float H = cosmo->H; + + /* Implements a double-for loop and checks every interaction */ + for (int i = 0; i < ci->scount; ++i) { + struct spart *spi = &ci->sparts[i]; + + float hi = spi->h; + float hig2 = hi * hi * kernel_gamma2; + + /* Skip inactive particles. */ + if (!spart_is_active(spi, e)) continue; + + for (int j = 0; j < cj->count; ++j) { + + struct part *pj = &cj->parts[j]; + + /* Pairwise distance */ + r2 = 0.0f; + for (int k = 0; k < 3; k++) { + dx[k] = spi->x[k] - pj->x[k]; + dx[k] = nearest(dx[k], dim[k]); + r2 += dx[k] * dx[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + /* Interact */ + runner_iact_nonsym_stars_density(r2, dx, hi, pj->h, spi, pj, a, H); + } + } + } + + /* Reverse double-for loop and checks every interaction */ + for (int j = 0; j < cj->scount; ++j) { + + struct spart *spj = &cj->sparts[j]; + float hj = spj->h; + float hjg2 = hj * hj * kernel_gamma2; + + /* Skip inactive particles. */ + if (!spart_is_active(spj, e)) continue; + + for (int i = 0; i < ci->count; ++i) { + + struct part *pi = &ci->parts[i]; + + /* Pairwise distance */ + r2 = 0.0f; + for (int k = 0; k < 3; k++) { + dx[k] = spj->x[k] - pi->x[k]; + dx[k] = nearest(dx[k], dim[k]); + r2 += dx[k] * dx[k]; + } + + /* Hit or miss? */ + if (r2 < hjg2) { + /* Interact */ + runner_iact_nonsym_stars_density(r2, dx, hj, pi->h, spj, pi, a, H); + } + } + } +} + void self_all_density(struct runner *r, struct cell *ci) { float r2, hi, hj, hig2, hjg2, dxi[3]; //, dxj[3]; struct part *pi, *pj; @@ -428,6 +500,45 @@ void self_all_force(struct runner *r, struct cell *ci) { } } +void self_all_stars_density(struct runner *r, struct cell *ci) { + float r2, hi, hj, hig2, dxi[3]; + struct spart *spi; + struct part *pj; + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const float a = cosmo->a; + const float H = cosmo->H; + + /* Implements a double-for loop and checks every interaction */ + for (int i = 0; i < ci->scount; ++i) { + + spi = &ci->sparts[i]; + hi = spi->h; + hig2 = hi * hi * kernel_gamma2; + + if (!spart_is_active(spi, e)) continue; + + for (int j = 0; j < ci->count; ++j) { + + pj = &ci->parts[j]; + hj = pj->h; + + /* Pairwise distance */ + r2 = 0.0f; + for (int k = 0; k < 3; k++) { + dxi[k] = spi->x[k] - pj->x[k]; + r2 += dxi[k] * dxi[k]; + } + + /* Hit or miss? */ + if (r2 > 0.f && r2 < hig2) { + /* Interact */ + runner_iact_nonsym_stars_density(r2, dxi, hi, hj, spi, pj, a, H); + } + } + } +} + /** * @brief Compute the force on a single particle brute-force. */ @@ -544,6 +655,23 @@ void shuffle_particles(struct part *parts, const int count) { } } +/** + * @brief Randomly shuffle an array of sparticles. + */ +void shuffle_sparticles(struct spart *sparts, const int scount) { + if (scount > 1) { + for (int i = 0; i < scount - 1; i++) { + int j = i + random_uniform(0., (double)(scount - 1 - i)); + + struct spart sparticle = sparts[j]; + + sparts[j] = sparts[i]; + + sparts[i] = sparticle; + } + } +} + /** * @brief Compares two values based on their relative difference: |a - b|/|a + * b| diff --git a/src/tools.h b/src/tools.h index 25d024679174eabbe89908c0254651e4bbc69e15..5ec2ca42810e1cb733e1aa674bd1c94ac5c569bd 100644 --- a/src/tools.h +++ b/src/tools.h @@ -40,11 +40,15 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj); void self_all_density(struct runner *r, struct cell *ci); void pairs_all_force(struct runner *r, struct cell *ci, struct cell *cj); void self_all_force(struct runner *r, struct cell *ci); +void pairs_all_stars_density(struct runner *r, struct cell *ci, + struct cell *cj); +void self_all_stars_density(struct runner *r, struct cell *ci); void pairs_n2(double *dim, struct part *restrict parts, int N, int periodic); double random_uniform(double a, double b); void shuffle_particles(struct part *parts, const int count); +void shuffle_sparticles(struct spart *sparts, const int scount); void gravity_n2(struct gpart *gparts, const int gcount, const struct phys_const *constants, const struct gravity_props *gravity_properties, float rlr); diff --git a/tests/Makefile.am b/tests/Makefile.am index d99b68f224f542dcdc60ae59fc6e2042ae20d9b7..fa3f71fd0df1f9919f89ddcc7a4dcb9ead5bbde0 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -28,7 +28,8 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr testVoronoi1D testVoronoi2D testVoronoi3D testGravityDerivatives \ testPeriodicBC.sh testPeriodicBCPerturbed.sh testPotentialSelf \ testPotentialPair testEOS testUtilities testSelectOutput.sh \ - testCbrt testCosmology testOutputList testFormat.sh + testCbrt testCosmology testOutputList testFormat.sh \ + test27cellsStars.sh test27cellsStarsPerturbed.sh # List of test programs to compile check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ @@ -39,7 +40,8 @@ check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testRiemannHLLC testMatrixInversion testDump testLogger \ testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \ testGravityDerivatives testPotentialSelf testPotentialPair testEOS testUtilities \ - testSelectOutput testCbrt testCosmology testOutputList + testSelectOutput testCbrt testCosmology testOutputList test27cellsStars \ + test27cellsStars_subset # Rebuild tests when SWIFT is updated. $(check_PROGRAMS): ../src/.libs/libswiftsim.a @@ -76,6 +78,12 @@ test27cells_subset_SOURCES = test27cells.c test27cells_subset_CFLAGS = $(AM_CFLAGS) -DTEST_DOSELF_SUBSET -DTEST_DOPAIR_SUBSET +test27cellsStars_SOURCES = test27cellsStars.c + +test27cellsStars_subset_SOURCES = test27cellsStars.c + +test27cellsStars_subset_CFLAGS = $(AM_CFLAGS) -DTEST_DOSELF_SUBSET -DTEST_DOPAIR_SUBSET + testPeriodicBC_SOURCES = testPeriodicBC.c test125cells_SOURCES = test125cells.c @@ -130,4 +138,6 @@ EXTRA_DIST = testReading.sh makeInput.py testActivePair.sh \ fft_params.yml tolerance_periodic_BC_normal.dat tolerance_periodic_BC_perturbed.dat \ testEOS.sh testEOS_plot.sh testSelectOutput.sh selectOutput.yml \ output_list_params.yml output_list_time.txt output_list_redshift.txt \ - output_list_scale_factor.txt + output_list_scale_factor.txt testEOS.sh testEOS_plot.sh \ + test27cellsStars.sh test27cellsStarsPerturbed.sh star_tolerance_27_normal.dat \ + star_tolerance_27_perturbed.dat diff --git a/tests/star_tolerance_27_normal.dat b/tests/star_tolerance_27_normal.dat new file mode 100644 index 0000000000000000000000000000000000000000..c243da2bcd9f5177ab471b2b3e622bdb1ee677d4 --- /dev/null +++ b/tests/star_tolerance_27_normal.dat @@ -0,0 +1,4 @@ +# ID pos_x pos_y pos_z wcount wcount_dh + 0 1e-6 1e-6 1e-6 4e-4 1.2e-2 + 0 1e-6 1e-6 1e-6 1e-4 2e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 diff --git a/tests/star_tolerance_27_perturbed.dat b/tests/star_tolerance_27_perturbed.dat new file mode 100644 index 0000000000000000000000000000000000000000..c64b5f779ed826f4fd24e02253da9aad66c9a4e1 --- /dev/null +++ b/tests/star_tolerance_27_perturbed.dat @@ -0,0 +1,4 @@ +# ID pos_x pos_y pos_z wcount wcount_dh + 0 1e-6 1e-6 1e-6 2e-4 1e-2 + 0 1e-6 1e-6 1e-6 1e-5 2e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e0 diff --git a/tests/star_tolerance_27_perturbed_h.dat b/tests/star_tolerance_27_perturbed_h.dat new file mode 100644 index 0000000000000000000000000000000000000000..20367e6f09ac171cad17ab5418304bd5674e78d6 --- /dev/null +++ b/tests/star_tolerance_27_perturbed_h.dat @@ -0,0 +1,4 @@ +# ID pos_x pos_y pos_z wcount wcount_dh + 0 1e-6 1e-6 1e-6 5e-4 1.4e-2 + 0 1e-6 1e-6 1e-6 1e-5 4e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e0 diff --git a/tests/star_tolerance_27_perturbed_h2.dat b/tests/star_tolerance_27_perturbed_h2.dat new file mode 100644 index 0000000000000000000000000000000000000000..fe89f21dd2fe37360bc0e3a2c5431528075bf2e5 --- /dev/null +++ b/tests/star_tolerance_27_perturbed_h2.dat @@ -0,0 +1,4 @@ +# ID pos_x pos_y pos_z wcount wcount_dh + 0 1e-6 1e-6 1e-6 5e-4 1.5e-2 + 0 1e-6 1e-6 1e-6 1e-5 5.86e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e0 diff --git a/tests/test27cellsStars.c b/tests/test27cellsStars.c new file mode 100644 index 0000000000000000000000000000000000000000..f74ea1b01400e0176bc6a6ef28a7b89184f2a271 --- /dev/null +++ b/tests/test27cellsStars.c @@ -0,0 +1,534 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <fenv.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* Local headers. */ +#include "swift.h" + +#define DOSELF1 runner_doself_branch_stars_density +#define DOSELF1_SUBSET runner_doself_subset_branch_stars_density +#ifdef TEST_DOSELF_SUBSET +#define DOSELF1_NAME "runner_doself_subset_branch_stars_density" +#else +#define DOSELF1_NAME "runner_doself1_branch_stars_density" +#endif + +#define DOPAIR1 runner_dopair_branch_stars_density +#define DOPAIR1_SUBSET runner_dopair_subset_branch_stars_density +#ifdef TEST_DOPAIR_SUBSET +#define DOPAIR1_NAME "runner_dopair_subset_branch_stars_density" +#else +#define DOPAIR1_NAME "runner_dopair_branch_stars_density" +#endif + +#define NODE_ID 0 + +/** + * @brief Constructs a cell and all of its particle in a valid state prior to + * a DOPAIR or DOSELF calcuation. + * + * @param n The cube root of the number of particles. + * @param n The cube root of the number of star particles. + * @param offset The position of the cell offset from (0,0,0). + * @param size The cell size. + * @param h The smoothing length of the particles in units of the inter-particle + * separation. + * @param partId The running counter of IDs. + * @param pert The perturbation to apply to the particles in the cell in units + * of the inter-particle separation. + * @param h_pert The perturbation to apply to the smoothing length. + */ +struct cell *make_cell(size_t n, size_t n_stars, double *offset, double size, + double h, long long *partId, long long *spartId, + double pert, double h_pert) { + const size_t count = n * n * n; + const size_t scount = n_stars * n_stars * n_stars; + float h_max = 0.f; + struct cell *cell = (struct cell *)malloc(sizeof(struct cell)); + bzero(cell, sizeof(struct cell)); + + if (posix_memalign((void **)&cell->parts, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + bzero(cell->parts, count * sizeof(struct part)); + + /* Construct the parts */ + struct part *part = cell->parts; + for (size_t x = 0; x < n; ++x) { + for (size_t y = 0; y < n; ++y) { + for (size_t z = 0; z < n; ++z) { + part->x[0] = + offset[0] + + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[1] = + offset[1] + + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[2] = + offset[2] + + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + + part->v[0] = 0; + part->v[1] = 0; + part->v[2] = 0; + if (h_pert) + part->h = size * h * random_uniform(1.f, h_pert) / (float)n; + else + part->h = size * h / (float)n; + h_max = fmaxf(h_max, part->h); + part->id = ++(*partId); + + part->time_bin = 1; + +#ifdef SWIFT_DEBUG_CHECKS + part->ti_drift = 8; + part->ti_kick = 8; +#endif + ++part; + } + } + } + + /* Construct the sparts */ + if (posix_memalign((void **)&cell->sparts, spart_align, + scount * sizeof(struct spart)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)scount); + } + bzero(cell->sparts, scount * sizeof(struct spart)); + + struct spart *spart = cell->sparts; + for (size_t x = 0; x < n_stars; ++x) { + for (size_t y = 0; y < n_stars; ++y) { + for (size_t z = 0; z < n_stars; ++z) { + spart->x[0] = + offset[0] + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / + (float)n_stars; + spart->x[1] = + offset[1] + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / + (float)n_stars; + spart->x[2] = + offset[2] + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / + (float)n_stars; + + spart->v[0] = 0; + spart->v[1] = 0; + spart->v[2] = 0; + if (h_pert) + spart->h = size * h * random_uniform(1.f, h_pert) / (float)n_stars; + else + spart->h = size * h / (float)n_stars; + h_max = fmaxf(h_max, spart->h); + spart->id = ++(*spartId); + + spart->time_bin = 1; + +#ifdef SWIFT_DEBUG_CHECKS + spart->ti_drift = 8; + spart->ti_kick = 8; +#endif + ++spart; + } + } + } + + /* Cell properties */ + cell->split = 0; + cell->h_max = h_max; + cell->count = count; + cell->scount = scount; + cell->dx_max_part = 0.; + cell->dx_max_sort = 0.; + cell->width[0] = size; + cell->width[1] = size; + cell->width[2] = size; + cell->loc[0] = offset[0]; + cell->loc[1] = offset[1]; + cell->loc[2] = offset[2]; + + cell->ti_old_part = 8; + cell->ti_hydro_end_min = 8; + cell->ti_hydro_end_max = 8; + cell->ti_gravity_end_min = 8; + cell->ti_gravity_end_max = 8; + cell->nodeID = NODE_ID; + + shuffle_particles(cell->parts, cell->count); + shuffle_sparticles(cell->sparts, cell->scount); + + cell->sorted = 0; + for (int k = 0; k < 13; k++) cell->sort[k] = NULL; + + return cell; +} + +void clean_up(struct cell *ci) { + free(ci->parts); + free(ci->sparts); + for (int k = 0; k < 13; k++) + if (ci->sort[k] != NULL) free(ci->sort[k]); + free(ci); +} + +/** + * @brief Initializes all particles field to be ready for a density calculation + */ +void zero_particle_fields(struct cell *c) { + for (int pid = 0; pid < c->scount; pid++) { + stars_init_spart(&c->sparts[pid]); + } +} + +/** + * @brief Ends the loop by adding the appropriate coefficients + */ +void end_calculation(struct cell *c, const struct cosmology *cosmo) { + for (int pid = 0; pid < c->scount; pid++) { + stars_end_density(&c->sparts[pid], cosmo); + + /* Recover the common "Neighbour number" definition */ + c->sparts[pid].density.wcount *= pow_dimension(c->sparts[pid].h); + c->sparts[pid].density.wcount *= kernel_norm; + } +} + +/** + * @brief Dump all the particles to a file + */ +void dump_particle_fields(char *fileName, struct cell *main_cell, + struct cell **cells) { + FILE *file = fopen(fileName, "w"); + + /* Write header */ + fprintf(file, "# %4s %10s %10s %10s %13s %13s\n", "ID", "pos_x", "pos_y", + "pos_z", "wcount", "wcount_dh"); + + fprintf(file, "# Main cell --------------------------------------------\n"); + + /* Write main cell */ + for (int pid = 0; pid < main_cell->scount; pid++) { + fprintf(file, "%6llu %10f %10f %10f %13e %13e\n", main_cell->sparts[pid].id, + main_cell->sparts[pid].x[0], main_cell->sparts[pid].x[1], + main_cell->sparts[pid].x[2], main_cell->sparts[pid].density.wcount, + main_cell->sparts[pid].density.wcount_dh); + } + + /* Write all other cells */ + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 3; ++k) { + struct cell *cj = cells[i * 9 + j * 3 + k]; + if (cj == main_cell) continue; + + fprintf(file, + "# Offset: [%2d %2d %2d] -----------------------------------\n", + i - 1, j - 1, k - 1); + + for (int pjd = 0; pjd < cj->scount; pjd++) { + fprintf(file, "%6llu %10f %10f %10f %13e %13e\n", cj->sparts[pjd].id, + cj->sparts[pjd].x[0], cj->sparts[pjd].x[1], + cj->sparts[pjd].x[2], cj->sparts[pjd].density.wcount, + cj->sparts[pjd].density.wcount_dh); + } + } + } + } + fclose(file); +} + +/* Just a forward declaration... */ +void runner_dopair_branch_stars_density(struct runner *r, struct cell *ci, + struct cell *cj); +void runner_doself_branch_stars_density(struct runner *r, struct cell *c); +void runner_dopair_subset_branch_stars_density(struct runner *r, + struct cell *restrict ci, + struct spart *restrict sparts_i, + int *restrict ind, int scount, + struct cell *restrict cj); +void runner_doself_subset_branch_stars_density(struct runner *r, + struct cell *restrict ci, + struct spart *restrict sparts, + int *restrict ind, int scount); + +/* And go... */ +int main(int argc, char *argv[]) { + +#ifdef HAVE_SETAFFINITY + engine_pin(); +#endif + + size_t runs = 0, particles = 0; + size_t sparticles = 0; + double h = 1.23485, size = 1.; + double perturbation = 0., h_pert = 0.; + char outputFileNameExtension[100] = ""; + char outputFileName[200] = ""; + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + +/* Choke on FP-exceptions */ +#ifdef HAVE_FE_ENABLE_EXCEPT + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + + /* Get some randomness going */ + srand(0); + + char c; + while ((c = getopt(argc, argv, "s:h:p:n:N:r:t:d:f:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 'p': + sscanf(optarg, "%lf", &h_pert); + break; + case 's': + sscanf(optarg, "%lf", &size); + break; + case 'n': + sscanf(optarg, "%zu", &particles); + break; + case 'N': + sscanf(optarg, "%zu", &sparticles); + break; + case 'r': + sscanf(optarg, "%zu", &runs); + break; + case 'd': + sscanf(optarg, "%lf", &perturbation); + break; + case 'f': + strcpy(outputFileNameExtension, optarg); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || particles == 0 || runs == 0 || sparticles == 0) { + printf( + "\nUsage: %s -n PARTICLES_PER_AXIS -N SPARTICLES_PER_AXIS -r " + "NUMBER_OF_RUNS [OPTIONS...]\n" + "\nGenerates 27 cells, filled with particles on a Cartesian grid." + "\nThese are then interacted using runner_dopair_stars_density() and " + "runner_doself_stars_density()." + "\n\nOptions:" + "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" + "\n-p - Random fractional change in h, h=h*random(1,p)" + "\n-s size - Physical size of the cell" + "\n-d pert - Perturbation to apply to the particles [0,1[" + "\n-f fileName - Part of the file name used to save the dumps\n", + argv[0]); + exit(1); + } + + /* Help users... */ + message("DOSELF1 function called: %s", DOSELF1_NAME); + message("DOPAIR1 function called: %s", DOPAIR1_NAME); + message("Smoothing length: h = %f", h * size); + message("Kernel: %s", kernel_name); + message("Neighbour target: N = %f", pow_dimension(h) * kernel_norm); + + printf("\n"); + + /* Build the infrastructure */ + struct space space; + space.periodic = 1; + space.dim[0] = 3.; + space.dim[1] = 3.; + space.dim[2] = 3.; + + struct hydro_props hp; + hp.eta_neighbours = h; + hp.h_tolerance = 1e0; + hp.h_max = FLT_MAX; + hp.max_smoothing_iterations = 1; + hp.CFL_condition = 0.1; + + struct stars_props stars_p; + stars_p.eta_neighbours = h; + stars_p.h_tolerance = 1e0; + stars_p.h_max = FLT_MAX; + stars_p.max_smoothing_iterations = 1; + + struct engine engine; + engine.s = &space; + engine.time = 0.1f; + engine.ti_current = 8; + engine.max_active_bin = num_time_bins; + engine.hydro_properties = &hp; + engine.stars_properties = &stars_p; + engine.nodeID = NODE_ID; + + struct cosmology cosmo; + cosmology_init_no_cosmo(&cosmo); + engine.cosmology = &cosmo; + + struct runner runner; + runner.e = &engine; + + /* Construct some cells */ + struct cell *cells[27]; + struct cell *main_cell; + static long long partId = 0; + long long spartId = particles * particles * particles * 27; + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 3; ++k) { + double offset[3] = {i * size, j * size, k * size}; + cells[i * 9 + j * 3 + k] = + make_cell(particles, sparticles, offset, size, h, &partId, &spartId, + perturbation, h_pert); + + runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0); + + runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); + } + } + } + + /* Store the main cell for future use */ + main_cell = cells[13]; + + ticks timings[27]; + for (int i = 0; i < 27; i++) timings[i] = 0; + + ticks time = 0; + for (size_t i = 0; i < runs; ++i) { + /* Zero the fields */ + for (int j = 0; j < 27; ++j) zero_particle_fields(cells[j]); + + const ticks tic = getticks(); + +#if defined(TEST_DOSELF_SUBSET) || defined(TEST_DOPAIR_SUBSET) + int *pid = NULL; + int scount = 0; + if ((pid = (int *)malloc(sizeof(int) * main_cell->scount)) == NULL) + error("Can't allocate memory for pid."); + for (int k = 0; k < main_cell->scount; k++) + if (spart_is_active(&main_cell->sparts[k], &engine)) { + pid[scount] = k; + ++scount; + } +#endif + + /* Run all the pairs */ + for (int j = 0; j < 27; ++j) { + if (cells[j] != main_cell) { + const ticks sub_tic = getticks(); + +#ifdef TEST_DOPAIR_SUBSET + DOPAIR1_SUBSET(&runner, main_cell, main_cell->sparts, pid, scount, + cells[j]); +#else + DOPAIR1(&runner, main_cell, cells[j]); +#endif + + timings[j] += getticks() - sub_tic; + } + } + + /* And now the self-interaction */ + const ticks self_tic = getticks(); + +#ifdef TEST_DOSELF_SUBSET + DOSELF1_SUBSET(&runner, main_cell, main_cell->sparts, pid, scount); +#else + DOSELF1(&runner, main_cell); +#endif + + timings[13] += getticks() - self_tic; + + const ticks toc = getticks(); + time += toc - tic; + + /* Let's get physical ! */ + end_calculation(main_cell, &cosmo); + + /* Dump if necessary */ + if (i % 50 == 0) { + sprintf(outputFileName, "swift_star_dopair_27_%.150s.dat", + outputFileNameExtension); + dump_particle_fields(outputFileName, main_cell, cells); + } + } + + /* Output timing */ + ticks corner_time = timings[0] + timings[2] + timings[6] + timings[8] + + timings[18] + timings[20] + timings[24] + timings[26]; + + ticks edge_time = timings[1] + timings[3] + timings[5] + timings[7] + + timings[9] + timings[11] + timings[15] + timings[17] + + timings[19] + timings[21] + timings[23] + timings[25]; + + ticks face_time = timings[4] + timings[10] + timings[12] + timings[14] + + timings[16] + timings[22]; + + message("Corner calculations took : %15lli ticks.", corner_time / runs); + message("Edge calculations took : %15lli ticks.", edge_time / runs); + message("Face calculations took : %15lli ticks.", face_time / runs); + message("Self calculations took : %15lli ticks.", timings[13] / runs); + message("SWIFT calculation took : %15lli ticks.", time / runs); + + /* Now perform a brute-force version for accuracy tests */ + + /* Zero the fields */ + for (int i = 0; i < 27; ++i) zero_particle_fields(cells[i]); + + const ticks tic = getticks(); + + /* Run all the brute-force pairs */ + for (int j = 0; j < 27; ++j) + if (cells[j] != main_cell) + pairs_all_stars_density(&runner, main_cell, cells[j]); + + /* And now the self-interaction */ + self_all_stars_density(&runner, main_cell); + + const ticks toc = getticks(); + + /* Let's get physical ! */ + end_calculation(main_cell, &cosmo); + + /* Dump */ + sprintf(outputFileName, "star_brute_force_27_%.150s.dat", + outputFileNameExtension); + dump_particle_fields(outputFileName, main_cell, cells); + + /* Output timing */ + message("Brute force calculation took : %15lli ticks.", toc - tic); + + /* Clean things to make the sanitizer happy ... */ + for (int i = 0; i < 27; ++i) clean_up(cells[i]); + + return 0; +} diff --git a/tests/test27cellsStars.sh.in b/tests/test27cellsStars.sh.in new file mode 100644 index 0000000000000000000000000000000000000000..5385b86fca6bcd24878f51567266eb81b7c21772 --- /dev/null +++ b/tests/test27cellsStars.sh.in @@ -0,0 +1,85 @@ +#!/bin/bash + +# List each test that should be run +declare -a TEST_LIST=(test27cellsStars test27cellsStars_subset) + +# Run same test for each executable +for TEST in "${TEST_LIST[@]}" +do + # Test for particles with the same smoothing length + echo "" + + rm -f star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat + + echo "Running ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard" + ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard + + if [ -e star_brute_force_27_standard.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat @srcdir@/star_tolerance_27_normal.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + + + # Test for particles with random smoothing lengths + echo "" + + rm -f star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat + + echo "Running ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard -p 1.1" + ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard -p 1.1 + + if [ -e star_brute_force_27_standard.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat @srcdir@/star_tolerance_27_perturbed_h.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + + + # Test for particles with random smoothing lengths + echo "" + + rm -f star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat + + echo "Running ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard -p 1.3" + ./$TEST -n 6 -N 7 -r 1 -d 0 -f standard -p 1.3 + + if [ -e star_brute_force_27_standard.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_standard.dat swift_star_dopair_27_standard.dat @srcdir@/star_tolerance_27_perturbed_h2.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + +done + +exit $? diff --git a/tests/test27cellsStarsPerturbed.sh.in b/tests/test27cellsStarsPerturbed.sh.in new file mode 100644 index 0000000000000000000000000000000000000000..ddf258fc17e6054d801ea9c73b4d0bd274cfad12 --- /dev/null +++ b/tests/test27cellsStarsPerturbed.sh.in @@ -0,0 +1,76 @@ +#!/bin/bash + +# Test for particles with the same smoothing length + echo "" + + rm -f star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat + + echo "Running ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed" + ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed + + if [ -e star_brute_force_27_perturbed.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat @srcdir@/star_tolerance_27_perturbed.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + +# Test for particles with random smoothing lengths + echo "" + + rm -f star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat + + echo "Running ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed -p 1.1" + ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed -p 1.1 + + if [ -e star_brute_force_27_perturbed.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat @srcdir@/star_tolerance_27_perturbed_h.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + + +# Test for particles with random smoothing lengths + echo "" + + rm -f star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat + + echo "Running ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed -p 1.3" + ./test27cellsStars -n 6 -N 7 -r 1 -d 0.1 -f perturbed -p 1.3 + + if [ -e star_brute_force_27_perturbed.dat ] + then + if python @srcdir@/difffloat.py star_brute_force_27_perturbed.dat swift_star_dopair_27_perturbed.dat @srcdir@/star_tolerance_27_perturbed_h2.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + +exit $?