Commit ec3afbcc authored by Matthieu Schaller's avatar Matthieu Schaller Committed by Peter W. Draper
Browse files

Speedup the unskip and scheduler_start process

parent 0372d3c0
......@@ -70,7 +70,7 @@ endif
# Common source files
AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c \
engine_marktasks.c engine_drift.c serial_io.c timers.c debug.c scheduler.c \
engine_marktasks.c engine_drift.c engine_unskip.c serial_io.c timers.c debug.c scheduler.c \
proxy.c parallel_io.c units.c common_io.c single_io.c multipole.c version.c map.c \
kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
physical_constants.c potential.c hydro_properties.c \
......
......@@ -2479,6 +2479,50 @@ void cell_activate_star_formation_tasks(struct cell *c, struct scheduler *s) {
cell_activate_star_resort_tasks(c, s);
}
/**
* @brief Recursively activate the hydro ghosts (and implicit links) in a cell
* hierarchy.
*
* @param c The #cell.
* @param s The #scheduler.
* @param e The #engine.
*/
void cell_recursively_activate_hydro_ghosts(struct cell *c, struct scheduler *s,
const struct engine *e) {
/* Early abort? */
if ((c->hydro.count == 0) || !cell_is_active_hydro(c, e)) return;
/* Is the ghost at this level? */
if (c->hydro.ghost != NULL) {
scheduler_activate(s, c->hydro.ghost);
} else {
#ifdef SWIFT_DEBUG_CHECKS
if (!c->split)
error("Reached the leaf level without finding a hydro ghost!");
#endif
/* Keep recursing */
for (int k = 0; k < 8; k++)
if (c->progeny[k] != NULL)
cell_recursively_activate_hydro_ghosts(c->progeny[k], s, e);
}
}
/**
* @brief Activate the hydro ghosts (and implicit links) in a cell hierarchy.
*
* @param c The #cell.
* @param s The #scheduler.
* @param e The #engine.
*/
void cell_activate_hydro_ghosts(struct cell *c, struct scheduler *s,
const struct engine *e) {
scheduler_activate(s, c->hydro.ghost_in);
scheduler_activate(s, c->hydro.ghost_out);
cell_recursively_activate_hydro_ghosts(c, s, e);
}
/**
* @brief Recurse down in a cell hierarchy until the hydro.super level is
* reached and activate the spart drift at that level.
......@@ -3505,9 +3549,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
if (c->hydro.extra_ghost != NULL)
scheduler_activate(s, c->hydro.extra_ghost);
if (c->hydro.ghost_in != NULL) scheduler_activate(s, c->hydro.ghost_in);
if (c->hydro.ghost_out != NULL) scheduler_activate(s, c->hydro.ghost_out);
if (c->hydro.ghost != NULL) scheduler_activate(s, c->hydro.ghost);
if (c->hydro.ghost_in != NULL) cell_activate_hydro_ghosts(c, s, e);
if (c->kick1 != NULL) scheduler_activate(s, c->kick1);
if (c->kick2 != NULL) scheduler_activate(s, c->kick2);
if (c->timestep != NULL) scheduler_activate(s, c->timestep);
......
......@@ -273,8 +273,10 @@ struct pcell_sf {
} stars;
};
/** Bitmasks for the cell flags. Beware when adding flags that you don't exceed
the size of the flags variable in the struct cell. */
/**
* @brief Bitmasks for the cell flags. Beware when adding flags that you don't
* exceed the size of the flags variable in the struct cell.
*/
enum cell_flags {
cell_flag_split = (1UL << 0),
cell_flag_do_hydro_drift = (1UL << 1),
......@@ -289,7 +291,8 @@ enum cell_flags {
cell_flag_do_stars_sub_drift = (1UL << 10),
cell_flag_do_bh_drift = (1UL << 11),
cell_flag_do_bh_sub_drift = (1UL << 12),
cell_flag_do_stars_resort = (1UL << 13)
cell_flag_do_stars_resort = (1UL << 13),
cell_flag_has_tasks = (1UL << 14),
};
/**
......
......@@ -44,11 +44,6 @@
#include <numa.h>
#endif
/* Load the profiler header, if needed. */
#ifdef WITH_PROFILER
#include <gperftools/profiler.h>
#endif
/* This object's header. */
#include "engine.h"
......@@ -4131,64 +4126,6 @@ int engine_is_done(struct engine *e) {
return !(e->ti_current < max_nr_timesteps);
}
/**
* @brief Unskip all the tasks that act on active cells at this time.
*
* @param e The #engine.
*/
void engine_unskip(struct engine *e) {
const ticks tic = getticks();
struct space *s = e->s;
const int nodeID = e->nodeID;
const int with_hydro = e->policy & engine_policy_hydro;
const int with_self_grav = e->policy & engine_policy_self_gravity;
const int with_ext_grav = e->policy & engine_policy_external_gravity;
const int with_stars = e->policy & engine_policy_stars;
const int with_feedback = e->policy & engine_policy_feedback;
const int with_black_holes = e->policy & engine_policy_black_holes;
#ifdef WITH_PROFILER
static int count = 0;
char filename[100];
sprintf(filename, "/tmp/swift_runner_do_usnkip_mapper_%06i.prof", count++);
ProfilerStart(filename);
#endif // WITH_PROFILER
/* Move the active local cells to the top of the list. */
int *local_cells = e->s->local_cells_with_tasks_top;
int num_active_cells = 0;
for (int k = 0; k < s->nr_local_cells_with_tasks; k++) {
struct cell *c = &s->cells_top[local_cells[k]];
if ((with_hydro && cell_is_active_hydro(c, e)) ||
(with_self_grav && cell_is_active_gravity(c, e)) ||
(with_ext_grav && c->nodeID == nodeID &&
cell_is_active_gravity(c, e)) ||
(with_feedback && cell_is_active_stars(c, e)) ||
(with_stars && c->nodeID == nodeID && cell_is_active_stars(c, e)) ||
(with_black_holes && cell_is_active_black_holes(c, e))) {
if (num_active_cells != k)
memswap(&local_cells[k], &local_cells[num_active_cells], sizeof(int));
num_active_cells += 1;
}
}
/* Activate all the regular tasks */
threadpool_map(&e->threadpool, runner_do_unskip_mapper, local_cells,
num_active_cells, sizeof(int), 1, e);
#ifdef WITH_PROFILER
ProfilerStop();
#endif // WITH_PROFILER
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
void engine_do_reconstruct_multipoles_mapper(void *map_data, int num_elements,
void *extra_data) {
......
......@@ -74,6 +74,9 @@ void engine_addtasks_send_gravity(struct engine *e, struct cell *ci,
struct scheduler *s = &e->sched;
const int nodeID = cj->nodeID;
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
/* Check if any of the gravity tasks are for the target node. */
for (l = ci->grav.grav; l != NULL; l = l->next)
if (l->t->ci->nodeID == nodeID ||
......@@ -141,6 +144,9 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci,
struct scheduler *s = &e->sched;
const int nodeID = cj->nodeID;
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
/* Check if any of the density tasks are for the target node. */
for (l = ci->hydro.density; l != NULL; l = l->next)
if (l->t->ci->nodeID == nodeID ||
......@@ -248,6 +254,9 @@ void engine_addtasks_send_stars(struct engine *e, struct cell *ci,
struct scheduler *s = &e->sched;
const int nodeID = cj->nodeID;
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
if (t_sf_counts == NULL && with_star_formation && ci->hydro.count > 0) {
#ifdef SWIFT_DEBUG_CHECKS
if (ci->depth != 0)
......@@ -339,6 +348,9 @@ void engine_addtasks_send_black_holes(struct engine *e, struct cell *ci,
struct scheduler *s = &e->sched;
const int nodeID = cj->nodeID;
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
/* Check if any of the density tasks are for the target node. */
for (l = ci->black_holes.density; l != NULL; l = l->next)
if (l->t->ci->nodeID == nodeID ||
......@@ -434,6 +446,9 @@ void engine_addtasks_recv_hydro(struct engine *e, struct cell *c,
#ifdef WITH_MPI
struct scheduler *s = &e->sched;
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(c, cell_flag_has_tasks)) return;
/* Have we reached a level where there are any hydro tasks ? */
if (t_xv == NULL && c->hydro.density != NULL) {
......@@ -533,6 +548,9 @@ void engine_addtasks_recv_stars(struct engine *e, struct cell *c,
#ifdef WITH_MPI
struct scheduler *s = &e->sched;
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(c, cell_flag_has_tasks)) return;
if (t_sf_counts == NULL && with_star_formation && c->hydro.count > 0) {
#ifdef SWIFT_DEBUG_CHECKS
if (c->depth != 0)
......@@ -624,6 +642,9 @@ void engine_addtasks_recv_black_holes(struct engine *e, struct cell *c,
#ifdef WITH_MPI
struct scheduler *s = &e->sched;
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(c, cell_flag_has_tasks)) return;
/* Have we reached a level where there are any black_holes tasks ? */
if (t_rho == NULL && c->black_holes.density != NULL) {
......@@ -714,6 +735,9 @@ void engine_addtasks_recv_gravity(struct engine *e, struct cell *c,
#ifdef WITH_MPI
struct scheduler *s = &e->sched;
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(c, cell_flag_has_tasks)) return;
/* Have we reached a level where there are any gravity tasks ? */
if (t_grav == NULL && c->grav.grav != NULL) {
......
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
* Matthieu Schaller (matthieu.schaller@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
/* Config parameters. */
#include "../config.h"
/* This object's header. */
#include "engine.h"
/* Local headers. */
#include "active.h"
#include "cell.h"
#include "memswap.h"
/* Load the profiler header, if needed. */
#ifdef WITH_PROFILER
#include <gperftools/profiler.h>
#endif
/**
* @brief Broad categories of tasks.
*
* Each category is unskipped independently
* of the others.
*/
enum task_broad_types {
task_broad_types_hydro = 1,
task_broad_types_gravity,
task_broad_types_stars,
task_broad_types_black_holes,
task_broad_types_count,
};
/**
* @brief Meta-data for the unskipping
*/
struct unskip_data {
/*! The #engine */
struct engine *e;
/*! Pointer to the start of the list of cells to unskip */
int *list_base;
/*! Number of times the list has been duplicated */
int multiplier;
/*! The number of active cells (without dulication) */
int num_active_cells;
/*! The #task_broad_types corresponding to each copy of the list */
enum task_broad_types task_types[task_broad_types_count];
};
/**
* @brief Unskip any hydro tasks associated with active cells.
*
* @param c The cell.
* @param e The engine.
*/
static void engine_do_unskip_hydro(struct cell *c, struct engine *e) {
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(c, cell_flag_has_tasks)) return;
/* Ignore empty cells. */
if (c->hydro.count == 0) return;
/* Skip inactive cells. */
if (!cell_is_active_hydro(c, e)) return;
/* Recurse */
if (c->split) {
for (int k = 0; k < 8; k++) {
if (c->progeny[k] != NULL) {
struct cell *cp = c->progeny[k];
engine_do_unskip_hydro(cp, e);
}
}
}
/* Unskip any active tasks. */
const int forcerebuild = cell_unskip_hydro_tasks(c, &e->sched);
if (forcerebuild) atomic_inc(&e->forcerebuild);
}
/**
* @brief Unskip any stars tasks associated with active cells.
*
* @param c The cell.
* @param e The engine.
* @param with_star_formation Are we running with star formation switched on?
*/
static void engine_do_unskip_stars(struct cell *c, struct engine *e,
const int with_star_formation) {
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(c, cell_flag_has_tasks)) return;
const int non_empty =
c->stars.count > 0 || (with_star_formation && c->hydro.count > 0);
/* Ignore empty cells. */
if (!non_empty) return;
const int ci_active = cell_is_active_stars(c, e) ||
(with_star_formation && cell_is_active_hydro(c, e));
/* Skip inactive cells. */
if (!ci_active) return;
/* Recurse */
if (c->split) {
for (int k = 0; k < 8; k++) {
if (c->progeny[k] != NULL) {
struct cell *cp = c->progeny[k];
engine_do_unskip_stars(cp, e, with_star_formation);
}
}
}
/* Unskip any active tasks. */
const int forcerebuild =
cell_unskip_stars_tasks(c, &e->sched, with_star_formation);
if (forcerebuild) atomic_inc(&e->forcerebuild);
}
/**
* @brief Unskip any black hole tasks associated with active cells.
*
* @param c The cell.
* @param e The engine.
*/
static void engine_do_unskip_black_holes(struct cell *c, struct engine *e) {
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(c, cell_flag_has_tasks)) return;
/* Ignore empty cells. */
if (c->black_holes.count == 0) return;
/* Skip inactive cells. */
if (!cell_is_active_black_holes(c, e)) return;
/* Recurse */
if (c->split) {
for (int k = 0; k < 8; k++) {
if (c->progeny[k] != NULL) {
struct cell *cp = c->progeny[k];
engine_do_unskip_black_holes(cp, e);
}
}
}
/* Unskip any active tasks. */
const int forcerebuild = cell_unskip_black_holes_tasks(c, &e->sched);
if (forcerebuild) atomic_inc(&e->forcerebuild);
}
/**
* @brief Unskip any gravity tasks associated with active cells.
*
* @param c The cell.
* @param e The engine.
*/
static void engine_do_unskip_gravity(struct cell *c, struct engine *e) {
/* Early abort (are we below the level where tasks are)? */
if (!cell_get_flag(c, cell_flag_has_tasks)) return;
/* Ignore empty cells. */
if (c->grav.count == 0) return;
/* Skip inactive cells. */
if (!cell_is_active_gravity(c, e)) return;
/* Recurse */
if (c->split && ((c->maxdepth - c->depth) >= space_subdepth_diff_grav)) {
for (int k = 0; k < 8; k++) {
if (c->progeny[k] != NULL) {
struct cell *cp = c->progeny[k];
engine_do_unskip_gravity(cp, e);
}
}
}
/* Unskip any active tasks. */
cell_unskip_gravity_tasks(c, &e->sched);
}
/**
* @brief Mapper function to unskip active tasks.
*
* @param map_data An array of #cell%s.
* @param num_elements Chunk size.
* @param extra_data Pointer to an unskip_data structure.
*/
void engine_do_unskip_mapper(void *map_data, int num_elements,
void *extra_data) {
/* Unpack the meta data */
struct unskip_data *data = (struct unskip_data *)extra_data;
const int num_active_cells = data->num_active_cells;
const enum task_broad_types *const task_types = data->task_types;
const int *const list_base = data->list_base;
struct engine *e = data->e;
struct cell *const cells_top = e->s->cells_top;
/* What policies are we running? */
const int with_star_formation = e->policy & engine_policy_star_formation;
/* The current chunk of active cells */
const int *const local_cells = (int *)map_data;
/* Loop over this thread's chunk of cells to unskip */
for (int ind = 0; ind < num_elements; ind++) {
/* Handle on the cell */
struct cell *const c = &cells_top[local_cells[ind]];
/* In what copy of the global list are we?
* This gives us the broad type of task we are working on. */
const ptrdiff_t delta = &local_cells[ind] - list_base;
const int type = delta / num_active_cells;
#ifdef SWIFT_DEBUG_CHECKS
if (type >= data->multiplier) error("Invalid broad task type!");
if (c == NULL) error("Got an invalid cell index!");
#endif
/* What broad type of tasks are we unskipping? */
switch (task_types[type]) {
case task_broad_types_hydro:
#ifdef SWIFT_DEBUG_CHECKS
if (!(e->policy & engine_policy_hydro))
error("Trying to unskip hydro tasks in a non-hydro run!");
#endif
engine_do_unskip_hydro(c, e);
break;
case task_broad_types_gravity:
#ifdef SWIFT_DEBUG_CHECKS
if (!(e->policy & engine_policy_self_gravity) &&
!(e->policy & engine_policy_external_gravity))
error("Trying to unskip gravity tasks in a non-gravity run!");
#endif
engine_do_unskip_gravity(c, e);
break;
case task_broad_types_stars:
#ifdef SWIFT_DEBUG_CHECKS
if (!(e->policy & engine_policy_stars))
error("Trying to unskip star tasks in a non-stars run!");
#endif
engine_do_unskip_stars(c, e, with_star_formation);
break;
case task_broad_types_black_holes:
#ifdef SWIFT_DEBUG_CHECKS
if (!(e->policy & engine_policy_black_holes))
error("Trying to unskip black holes tasks in a non-BH run!");
#endif
engine_do_unskip_black_holes(c, e);
break;
default:
#ifdef SWIFT_DEBUG_CHECKS
error("Invalid broad task type!");
#endif
continue;
}
}
}
/**
* @brief Unskip all the tasks that act on active cells at this time.
*
* @param e The #engine.
*/
void engine_unskip(struct engine *e) {
const ticks tic = getticks();
struct space *s = e->s;
const int nodeID = e->nodeID;
const int with_hydro = e->policy & engine_policy_hydro;
const int with_self_grav = e->policy & engine_policy_self_gravity;
const int with_ext_grav = e->policy & engine_policy_external_gravity;
const int with_stars = e->policy & engine_policy_stars;
const int with_feedback = e->policy & engine_policy_feedback;
const int with_black_holes = e->policy & engine_policy_black_holes;
#ifdef WITH_PROFILER
static int count = 0;
char filename[100];
sprintf(filename, "/tmp/swift_engine_do_usnkip_mapper_%06i.prof", count++);
ProfilerStart(filename);
#endif // WITH_PROFILER
/* Move the active local cells to the top of the list. */
int *local_cells = e->s->local_cells_with_tasks_top;
int num_active_cells = 0;
for (int k = 0; k < s->nr_local_cells_with_tasks; k++) {
struct cell *c = &s->cells_top[local_cells[k]];
if ((with_hydro && cell_is_active_hydro(c, e)) ||
(with_self_grav && cell_is_active_gravity(c, e)) ||
(with_ext_grav && c->nodeID == nodeID &&
cell_is_active_gravity(c, e)) ||
(with_feedback && cell_is_active_stars(c, e)) ||
(with_stars && c->nodeID == nodeID && cell_is_active_stars(c, e)) ||
(with_black_holes && cell_is_active_black_holes(c, e))) {
if (num_active_cells != k)
memswap(&local_cells[k], &local_cells[num_active_cells], sizeof(int));
num_active_cells += 1;
}
}
/* What kind of tasks do we have? */
struct unskip_data data;
bzero(&data, sizeof(struct unskip_data));
int multiplier = 0;
if (with_hydro) {
data.task_types[multiplier] = task_broad_types_hydro;
multiplier++;
}
if (with_self_grav || with_ext_grav) {
data.task_types[multiplier] = task_broad_types_gravity;
multiplier++;
}
if (with_feedback || with_stars) {
data.task_types[multiplier] = task_broad_types_stars;
multiplier++;
}
if (with_black_holes) {
data.task_types[multiplier] = task_broad_types_black_holes;
multiplier++;
}
/* Should we duplicate the list of active cells to better parallelise the
unskip over the threads ? */
int *local_active_cells;
if (multiplier > 1) {
/* Make space for copies of the list */
local_active_cells =
(int *)malloc(multiplier * num_active_cells * sizeof(int));
if (local_active_cells == NULL)
error(
"Couldn't allocate memory for duplicated list of local active "
"cells.");
/* Make blind copies of the list */
for (int m = 0; m < multiplier; m++) {
memcpy(local_active_cells + m * num_active_cells, local_cells,
num_active_cells * sizeof(int));
}
} else {
local_active_cells = local_cells;
}