From 0e464cb625a7d5f26c48702f8a355034951283cb Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Wed, 1 Jul 2020 23:01:48 +0200 Subject: [PATCH 1/6] Push the cooling task to a lower level to gain more parallelism in the case of GRACKLE cooling for instance. --- examples/parameter_example.yml | 1 + src/cell.c | 46 ++++++++++++++++++++++++++++++- src/cell.h | 6 ++++ src/engine.h | 1 + src/engine_maketasks.c | 42 +++++++++++++++++++++++++--- src/engine_marktasks.c | 3 +- src/space.c | 12 ++++++++ src/task.c | 2 ++ src/task.h | 2 ++ tools/task_plots/analyse_tasks.py | 2 ++ tools/task_plots/plot_tasks.py | 2 ++ 11 files changed, 113 insertions(+), 6 deletions(-) diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 7a0d8fec4..715542250 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -119,6 +119,7 @@ Scheduler: mpi_message_limit: 4096 # (Optional) Maximum MPI task message size to send non-buffered, KB. engine_max_parts_per_ghost: 1000 # (Optional) Maximum number of parts per ghost. engine_max_sparts_per_ghost: 1000 # (Optional) Maximum number of sparts per ghost. + engine_max_parts_per_cooling: 200 # (Optional) Maximum number of parts per cooling task. # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) TimeIntegration: diff --git a/src/cell.c b/src/cell.c index 2ffbed40a..641bf0db1 100644 --- a/src/cell.c +++ b/src/cell.c @@ -2613,6 +2613,50 @@ void cell_activate_hydro_ghosts(struct cell *c, struct scheduler *s, cell_recursively_activate_hydro_ghosts(c, s, e); } +/** + * @brief Recursively activate the cooling (and implicit links) in a cell + * hierarchy. + * + * @param c The #cell. + * @param s The #scheduler. + * @param e The #engine. + */ +void cell_recursively_activate_cooling(struct cell *c, struct scheduler *s, + const struct engine *e) { + /* Early abort? */ + if ((c->hydro.count == 0) || !cell_is_active_hydro(c, e)) return; + + /* Is the ghost at this level? */ + if (c->hydro.cooling != NULL) { + scheduler_activate(s, c->hydro.cooling); + } else { + +#ifdef SWIFT_DEBUG_CHECKS + if (!c->split) + error("Reached the leaf level without finding a cooling task!"); +#endif + + /* Keep recursing */ + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + cell_recursively_activate_cooling(c->progeny[k], s, e); + } +} + +/** + * @brief Activate the cooling tasks (and implicit links) in a cell hierarchy. + * + * @param c The #cell. + * @param s The #scheduler. + * @param e The #engine. + */ +void cell_activate_cooling(struct cell *c, struct scheduler *s, + const struct engine *e) { + scheduler_activate(s, c->hydro.cooling_in); + scheduler_activate(s, c->hydro.cooling_out); + cell_recursively_activate_cooling(c, s, e); +} + /** * @brief Recurse down in a cell hierarchy until the hydro.super level is * reached and activate the spart drift at that level. @@ -3722,7 +3766,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { if (c->kick2 != NULL) scheduler_activate(s, c->kick2); if (c->timestep != NULL) scheduler_activate(s, c->timestep); if (c->hydro.end_force != NULL) scheduler_activate(s, c->hydro.end_force); - if (c->hydro.cooling != NULL) scheduler_activate(s, c->hydro.cooling); + if (c->hydro.cooling_in != NULL) cell_activate_cooling(c, s, e); #ifdef WITH_LOGGER if (c->logger != NULL) scheduler_activate(s, c->logger); #endif diff --git a/src/cell.h b/src/cell.h index 9666d44d1..766f3e207 100644 --- a/src/cell.h +++ b/src/cell.h @@ -390,6 +390,12 @@ struct cell { /*! The task to end the force calculation */ struct task *end_force; + /*! Dependency implicit task for cooling (in->cooling->out) */ + struct task *cooling_in; + + /*! Dependency implicit task for cooling (in->cooling->out) */ + struct task *cooling_out; + /*! Task for cooling */ struct task *cooling; diff --git a/src/engine.h b/src/engine.h index d10fa694e..2adbd4a21 100644 --- a/src/engine.h +++ b/src/engine.h @@ -114,6 +114,7 @@ enum engine_step_properties { #define engine_default_timesteps_file_name "timesteps" #define engine_max_parts_per_ghost_default 1000 #define engine_max_sparts_per_ghost_default 1000 +#define engine_max_parts_per_cooling_default 200 #define engine_star_resort_task_depth_default 2 #define engine_tasks_per_cell_margin 1.2 #define engine_default_stf_subdir_per_output "." diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index b73aa475c..c2fa9eef4 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -56,6 +56,7 @@ extern int engine_max_parts_per_ghost; extern int engine_max_sparts_per_ghost; extern int engine_star_resort_task_depth; +extern int engine_max_parts_per_cooling; /** * @brief Add send tasks for the gravity pairs to a hierarchy of cells. @@ -1055,6 +1056,33 @@ void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in, } } +/** + * @brief Recursively add non-implicit cooling tasks to a cell hierarchy. + */ +void engine_add_cooling(struct engine *e, struct cell *c, + struct task *cooling_in, struct task *cooling_out) { + + /* Abort as there are no hydro particles here? */ + if (c->hydro.count_total == 0) return; + + /* If we have reached the leaf OR have to few particles to play with*/ + if (!c->split || c->hydro.count_total < engine_max_parts_per_cooling) { + + /* Add the cooling task and its dependencies */ + struct scheduler *s = &e->sched; + c->hydro.cooling = scheduler_addtask(s, task_type_cooling, + task_subtype_none, 0, 0, c, NULL); + scheduler_addunlock(s, cooling_in, c->hydro.cooling); + scheduler_addunlock(s, c->hydro.cooling, cooling_out); + + } else { + /* Keep recursing */ + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_add_cooling(e, c->progeny[k], cooling_in, cooling_out); + } +} + /** * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - * i.e. all the O(Npart) tasks -- hydro version @@ -1163,11 +1191,17 @@ void engine_make_hierarchical_tasks_hydro(struct engine *e, struct cell *c, /* Subgrid tasks: cooling */ if (with_cooling) { - c->hydro.cooling = scheduler_addtask(s, task_type_cooling, - task_subtype_none, 0, 0, c, NULL); + c->hydro.cooling_in = + scheduler_addtask(s, task_type_cooling_in, task_subtype_none, 0, + /*implicit=*/1, c, NULL); + c->hydro.cooling_out = + scheduler_addtask(s, task_type_cooling_out, task_subtype_none, 0, + /*implicit=*/1, c, NULL); + + engine_add_cooling(e, c, c->hydro.cooling_in, c->hydro.cooling_out); - scheduler_addunlock(s, c->hydro.end_force, c->hydro.cooling); - scheduler_addunlock(s, c->hydro.cooling, c->super->kick2); + scheduler_addunlock(s, c->hydro.end_force, c->hydro.cooling_in); + scheduler_addunlock(s, c->hydro.cooling_out, c->super->kick2); } else { scheduler_addunlock(s, c->hydro.end_force, c->super->kick2); diff --git a/src/engine_marktasks.c b/src/engine_marktasks.c index 532297a02..21e85010e 100644 --- a/src/engine_marktasks.c +++ b/src/engine_marktasks.c @@ -978,7 +978,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } /* Subgrid tasks: cooling */ - else if (t_type == task_type_cooling) { + else if (t_type == task_type_cooling || t_type == task_type_cooling_in || + t_type == task_type_cooling_out) { if (cell_is_active_hydro(t->ci, e)) scheduler_activate(s, t); } diff --git a/src/space.c b/src/space.c index 156d34372..60b3c3975 100644 --- a/src/space.c +++ b/src/space.c @@ -94,6 +94,7 @@ int space_extra_gparts = space_extra_gparts_default; /*! Maximum number of particles per ghost */ int engine_max_parts_per_ghost = engine_max_parts_per_ghost_default; int engine_max_sparts_per_ghost = engine_max_sparts_per_ghost_default; +int engine_max_parts_per_cooling = engine_max_parts_per_cooling_default; /*! Maximal depth at which the stars resort task can be pushed */ int engine_star_resort_task_depth = engine_star_resort_task_depth_default; @@ -252,6 +253,8 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->black_holes.black_holes_out = NULL; c->grav.drift = NULL; c->grav.drift_out = NULL; + c->hydro.cooling_in = NULL; + c->hydro.cooling_out = NULL; c->hydro.cooling = NULL; c->grav.long_range = NULL; c->grav.down_in = NULL; @@ -4986,6 +4989,10 @@ void space_init(struct space *s, struct swift_params *params, parser_get_opt_param_int(params, "Scheduler:engine_max_sparts_per_ghost", engine_max_sparts_per_ghost_default); + engine_max_parts_per_cooling = + parser_get_opt_param_int(params, "Scheduler:engine_max_parts_per_cooling", + engine_max_parts_per_cooling_default); + if (verbose) { message("max_size set to %d split_size set to %d", space_maxsize, space_splitsize); @@ -5853,6 +5860,9 @@ void space_struct_dump(struct space *s, FILE *stream) { restart_write_blocks(&engine_max_sparts_per_ghost, sizeof(int), 1, stream, "engine_max_sparts_per_ghost", "engine_max_sparts_per_ghost"); + restart_write_blocks(&engine_max_parts_per_cooling, sizeof(int), 1, stream, + "engine_max_parts_per_cooling", + "engine_max_parts_per_cooling"); restart_write_blocks(&engine_star_resort_task_depth, sizeof(int), 1, stream, "engine_star_resort_task_depth", "engine_star_resort_task_depth"); @@ -5920,6 +5930,8 @@ void space_struct_restore(struct space *s, FILE *stream) { "engine_max_parts_per_ghost"); restart_read_blocks(&engine_max_sparts_per_ghost, sizeof(int), 1, stream, NULL, "engine_max_sparts_per_ghost"); + restart_read_blocks(&engine_max_parts_per_cooling, sizeof(int), 1, stream, + NULL, "engine_max_parts_per_cooling"); restart_read_blocks(&engine_star_resort_task_depth, sizeof(int), 1, stream, NULL, "engine_star_resort_task_depth"); diff --git a/src/task.c b/src/task.c index 3efc2fe4f..09721bcd8 100644 --- a/src/task.c +++ b/src/task.c @@ -81,6 +81,8 @@ const char *taskID_names[task_type_count] = {"none", "grav_mesh", "grav_end_force", "cooling", + "cooling_in", + "cooling_out", "star_formation", "star_formation_in", "star_formation_out", diff --git a/src/task.h b/src/task.h index 97ff8b446..c8b7e587c 100644 --- a/src/task.h +++ b/src/task.h @@ -75,6 +75,8 @@ enum task_types { task_type_grav_mesh, task_type_end_grav_force, task_type_cooling, + task_type_cooling_in, /* Implicit */ + task_type_cooling_out, /* Implicit */ task_type_star_formation, task_type_star_formation_in, /* Implicit */ task_type_star_formation_out, /* Implicit */ diff --git a/tools/task_plots/analyse_tasks.py b/tools/task_plots/analyse_tasks.py index 4a87ebad3..ffa0f6d5e 100755 --- a/tools/task_plots/analyse_tasks.py +++ b/tools/task_plots/analyse_tasks.py @@ -103,6 +103,8 @@ TASKTYPES = [ "grav_mesh", "grav_end_force", "cooling", + "cooling_in", + "cooling_out", "star_formation", "star_formation_in", "star_formation_out", diff --git a/tools/task_plots/plot_tasks.py b/tools/task_plots/plot_tasks.py index a0d58e351..a0eb356ca 100755 --- a/tools/task_plots/plot_tasks.py +++ b/tools/task_plots/plot_tasks.py @@ -180,6 +180,8 @@ TASKTYPES = [ "grav_mesh", "grav_end_force", "cooling", + "cooling_in", + "cooling_out", "star_formation", "star_formation_in", "star_formation_out", -- GitLab From a73d9fcee0e675ac13d3c46bd94d87e9da78aa46 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Fri, 3 Jul 2020 14:52:56 +0200 Subject: [PATCH 2/6] Add the missing new tasks to the interactive task plotting script --- tools/plot_task_dependencies.py | 0 tools/task_plots/iplot_tasks.py | 2 ++ 2 files changed, 2 insertions(+) mode change 100644 => 100755 tools/plot_task_dependencies.py diff --git a/tools/plot_task_dependencies.py b/tools/plot_task_dependencies.py old mode 100644 new mode 100755 diff --git a/tools/task_plots/iplot_tasks.py b/tools/task_plots/iplot_tasks.py index 1a6b501fb..faa8c534d 100755 --- a/tools/task_plots/iplot_tasks.py +++ b/tools/task_plots/iplot_tasks.py @@ -157,6 +157,8 @@ TASKTYPES = [ "grav_mesh", "grav_end_force", "cooling", + "cooling_in", + "cooling_out", "star_formation", "star_formation_in", "star_formation_out", -- GitLab From d0ebee65793aab7ea354e3c35aca9cdcfa1b875f Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Mon, 6 Jul 2020 17:31:15 +0200 Subject: [PATCH 3/6] Better default value for the cooling splitting for the default case where the cooling task is inexpensive --- src/engine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine.h b/src/engine.h index 2adbd4a21..63492619c 100644 --- a/src/engine.h +++ b/src/engine.h @@ -114,7 +114,7 @@ enum engine_step_properties { #define engine_default_timesteps_file_name "timesteps" #define engine_max_parts_per_ghost_default 1000 #define engine_max_sparts_per_ghost_default 1000 -#define engine_max_parts_per_cooling_default 200 +#define engine_max_parts_per_cooling_default 10000 #define engine_star_resort_task_depth_default 2 #define engine_tasks_per_cell_margin 1.2 #define engine_default_stf_subdir_per_output "." -- GitLab From 287637156b8daff58c7bd9c222666edd341d0045 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Mon, 6 Jul 2020 17:34:59 +0200 Subject: [PATCH 4/6] Update the master example YAML file to showcase the new default value --- examples/parameter_example.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 715542250..100e8ac1a 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -117,9 +117,9 @@ Scheduler: tasks_per_cell: 0.0 # (Optional) The average number of tasks per cell. If not large enough the simulation will fail (means guess...). links_per_tasks: 25 # (Optional) The average number of links per tasks (before adding the communication tasks). If not large enough the simulation will fail (means guess...). Defaults to 10. mpi_message_limit: 4096 # (Optional) Maximum MPI task message size to send non-buffered, KB. - engine_max_parts_per_ghost: 1000 # (Optional) Maximum number of parts per ghost. - engine_max_sparts_per_ghost: 1000 # (Optional) Maximum number of sparts per ghost. - engine_max_parts_per_cooling: 200 # (Optional) Maximum number of parts per cooling task. + engine_max_parts_per_ghost: 1000 # (Optional) Maximum number of parts per ghost. + engine_max_sparts_per_ghost: 1000 # (Optional) Maximum number of sparts per ghost. + engine_max_parts_per_cooling: 10000 # (Optional) Maximum number of parts per cooling task. # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) TimeIntegration: -- GitLab From 9edeb5acd0a870d45caa12f70fcc7f30fe62ada3 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Mon, 6 Jul 2020 17:36:26 +0200 Subject: [PATCH 5/6] Added Planetary IC file that was missing from the tree --- doc/RTD/source/Planetary/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/RTD/source/Planetary/index.rst b/doc/RTD/source/Planetary/index.rst index 3bda4637e..9368d5db0 100644 --- a/doc/RTD/source/Planetary/index.rst +++ b/doc/RTD/source/Planetary/index.rst @@ -32,3 +32,4 @@ chosen from the several available equations of state. Hydro Scheme Equations of State + Initial Conditions -- GitLab From 4d5fa30be931e947ad42582479f6707ecafba22b Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Mon, 6 Jul 2020 17:39:47 +0200 Subject: [PATCH 6/6] Update the RTD to describe the new parameter --- .../ParameterFiles/parameter_description.rst | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/RTD/source/ParameterFiles/parameter_description.rst b/doc/RTD/source/ParameterFiles/parameter_description.rst index 61cd9c68b..55c24e720 100644 --- a/doc/RTD/source/ParameterFiles/parameter_description.rst +++ b/doc/RTD/source/ParameterFiles/parameter_description.rst @@ -946,15 +946,18 @@ which stops these from being done at the scale of the leaf cells, of which there can be a large number. In this case cells with gravity tasks must be at least 4 levels above the leaf cells (when possible). -To control the depth at which the ghost tasks are placed, there are -two parameters (one for the gas, one for the stars). These specify the -maximum number of particles allowed in such a task before splitting -into finer ones. These parameters are: +To control the depth at which the ghost tasks are placed, there are two +parameters (one for the gas, one for the stars). These specify the maximum +number of particles allowed in such a task before splitting into finer ones. A +similar parameter exists for the cooling tasks, which can be useful to tweak for +models in which the cooling operations are expensive. These three parameters +are: .. code:: YAML - engine_max_parts_per_ghost: 1000 - engine_max_sparts_per_ghost: 1000 + engine_max_parts_per_ghost: 1000 + engine_max_sparts_per_ghost: 1000 + engine_max_parts_per_cooling: 10000 Extra space is required when particles are created in the system (to the time -- GitLab