From ec3afbcce859e48c93f2e47af8900c364fbb6eec Mon Sep 17 00:00:00 2001
From: Matthieu Schaller <schaller@strw.leidenuniv.nl>
Date: Sat, 14 Sep 2019 11:17:51 +0100
Subject: [PATCH] Speedup the unskip and scheduler_start process

---
 src/Makefile.am        |   2 +-
 src/cell.c             |  48 ++++-
 src/cell.h             |   9 +-
 src/engine.c           |  63 -------
 src/engine_maketasks.c |  24 +++
 src/engine_unskip.c    | 400 +++++++++++++++++++++++++++++++++++++++++
 src/runner.c           | 163 -----------------
 src/runner.h           |   2 -
 src/scheduler.c        |  20 ++-
 src/task.c             |  11 +-
 10 files changed, 494 insertions(+), 248 deletions(-)
 create mode 100644 src/engine_unskip.c

diff --git a/src/Makefile.am b/src/Makefile.am
index 947fd1a82c..480953c6aa 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -70,7 +70,7 @@ endif
 
 # Common source files
 AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c \
-    engine_marktasks.c engine_drift.c serial_io.c timers.c debug.c scheduler.c \
+    engine_marktasks.c engine_drift.c engine_unskip.c serial_io.c timers.c debug.c scheduler.c \
     proxy.c parallel_io.c units.c common_io.c single_io.c multipole.c version.c map.c \
     kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
     physical_constants.c potential.c hydro_properties.c \
diff --git a/src/cell.c b/src/cell.c
index 6696410533..92f53d7ca8 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -2479,6 +2479,50 @@ void cell_activate_star_formation_tasks(struct cell *c, struct scheduler *s) {
   cell_activate_star_resort_tasks(c, s);
 }
 
+/**
+ * @brief Recursively activate the hydro ghosts (and implicit links) in a cell
+ * hierarchy.
+ *
+ * @param c The #cell.
+ * @param s The #scheduler.
+ * @param e The #engine.
+ */
+void cell_recursively_activate_hydro_ghosts(struct cell *c, struct scheduler *s,
+                                            const struct engine *e) {
+  /* Early abort? */
+  if ((c->hydro.count == 0) || !cell_is_active_hydro(c, e)) return;
+
+  /* Is the ghost at this level? */
+  if (c->hydro.ghost != NULL) {
+    scheduler_activate(s, c->hydro.ghost);
+  } else {
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!c->split)
+      error("Reached the leaf level without finding a hydro ghost!");
+#endif
+
+    /* Keep recursing */
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL)
+        cell_recursively_activate_hydro_ghosts(c->progeny[k], s, e);
+  }
+}
+
+/**
+ * @brief Activate the hydro ghosts (and implicit links) in a cell hierarchy.
+ *
+ * @param c The #cell.
+ * @param s The #scheduler.
+ * @param e The #engine.
+ */
+void cell_activate_hydro_ghosts(struct cell *c, struct scheduler *s,
+                                const struct engine *e) {
+  scheduler_activate(s, c->hydro.ghost_in);
+  scheduler_activate(s, c->hydro.ghost_out);
+  cell_recursively_activate_hydro_ghosts(c, s, e);
+}
+
 /**
  * @brief Recurse down in a cell hierarchy until the hydro.super level is
  * reached and activate the spart drift at that level.
@@ -3505,9 +3549,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
 
     if (c->hydro.extra_ghost != NULL)
       scheduler_activate(s, c->hydro.extra_ghost);
-    if (c->hydro.ghost_in != NULL) scheduler_activate(s, c->hydro.ghost_in);
-    if (c->hydro.ghost_out != NULL) scheduler_activate(s, c->hydro.ghost_out);
-    if (c->hydro.ghost != NULL) scheduler_activate(s, c->hydro.ghost);
+    if (c->hydro.ghost_in != NULL) cell_activate_hydro_ghosts(c, s, e);
     if (c->kick1 != NULL) scheduler_activate(s, c->kick1);
     if (c->kick2 != NULL) scheduler_activate(s, c->kick2);
     if (c->timestep != NULL) scheduler_activate(s, c->timestep);
diff --git a/src/cell.h b/src/cell.h
index 8067a31898..10a3e2bddf 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -273,8 +273,10 @@ struct pcell_sf {
   } stars;
 };
 
-/** Bitmasks for the cell flags. Beware when adding flags that you don't exceed
-    the size of the flags variable in the struct cell. */
+/**
+ * @brief Bitmasks for the cell flags. Beware when adding flags that you don't
+ * exceed the size of the flags variable in the struct cell.
+ */
 enum cell_flags {
   cell_flag_split = (1UL << 0),
   cell_flag_do_hydro_drift = (1UL << 1),
@@ -289,7 +291,8 @@ enum cell_flags {
   cell_flag_do_stars_sub_drift = (1UL << 10),
   cell_flag_do_bh_drift = (1UL << 11),
   cell_flag_do_bh_sub_drift = (1UL << 12),
-  cell_flag_do_stars_resort = (1UL << 13)
+  cell_flag_do_stars_resort = (1UL << 13),
+  cell_flag_has_tasks = (1UL << 14),
 };
 
 /**
diff --git a/src/engine.c b/src/engine.c
index 04cfa15c48..61ba7051cd 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -44,11 +44,6 @@
 #include <numa.h>
 #endif
 
-/* Load the profiler header, if needed. */
-#ifdef WITH_PROFILER
-#include <gperftools/profiler.h>
-#endif
-
 /* This object's header. */
 #include "engine.h"
 
@@ -4131,64 +4126,6 @@ int engine_is_done(struct engine *e) {
   return !(e->ti_current < max_nr_timesteps);
 }
 
-/**
- * @brief Unskip all the tasks that act on active cells at this time.
- *
- * @param e The #engine.
- */
-void engine_unskip(struct engine *e) {
-
-  const ticks tic = getticks();
-  struct space *s = e->s;
-  const int nodeID = e->nodeID;
-
-  const int with_hydro = e->policy & engine_policy_hydro;
-  const int with_self_grav = e->policy & engine_policy_self_gravity;
-  const int with_ext_grav = e->policy & engine_policy_external_gravity;
-  const int with_stars = e->policy & engine_policy_stars;
-  const int with_feedback = e->policy & engine_policy_feedback;
-  const int with_black_holes = e->policy & engine_policy_black_holes;
-
-#ifdef WITH_PROFILER
-  static int count = 0;
-  char filename[100];
-  sprintf(filename, "/tmp/swift_runner_do_usnkip_mapper_%06i.prof", count++);
-  ProfilerStart(filename);
-#endif  // WITH_PROFILER
-
-  /* Move the active local cells to the top of the list. */
-  int *local_cells = e->s->local_cells_with_tasks_top;
-  int num_active_cells = 0;
-  for (int k = 0; k < s->nr_local_cells_with_tasks; k++) {
-    struct cell *c = &s->cells_top[local_cells[k]];
-
-    if ((with_hydro && cell_is_active_hydro(c, e)) ||
-        (with_self_grav && cell_is_active_gravity(c, e)) ||
-        (with_ext_grav && c->nodeID == nodeID &&
-         cell_is_active_gravity(c, e)) ||
-        (with_feedback && cell_is_active_stars(c, e)) ||
-        (with_stars && c->nodeID == nodeID && cell_is_active_stars(c, e)) ||
-        (with_black_holes && cell_is_active_black_holes(c, e))) {
-
-      if (num_active_cells != k)
-        memswap(&local_cells[k], &local_cells[num_active_cells], sizeof(int));
-      num_active_cells += 1;
-    }
-  }
-
-  /* Activate all the regular tasks */
-  threadpool_map(&e->threadpool, runner_do_unskip_mapper, local_cells,
-                 num_active_cells, sizeof(int), 1, e);
-
-#ifdef WITH_PROFILER
-  ProfilerStop();
-#endif  // WITH_PROFILER
-
-  if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
-}
-
 void engine_do_reconstruct_multipoles_mapper(void *map_data, int num_elements,
                                              void *extra_data) {
 
diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c
index 05bde9091d..42590cb5f4 100644
--- a/src/engine_maketasks.c
+++ b/src/engine_maketasks.c
@@ -74,6 +74,9 @@ void engine_addtasks_send_gravity(struct engine *e, struct cell *ci,
   struct scheduler *s = &e->sched;
   const int nodeID = cj->nodeID;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
+
   /* Check if any of the gravity tasks are for the target node. */
   for (l = ci->grav.grav; l != NULL; l = l->next)
     if (l->t->ci->nodeID == nodeID ||
@@ -141,6 +144,9 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci,
   struct scheduler *s = &e->sched;
   const int nodeID = cj->nodeID;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
+
   /* Check if any of the density tasks are for the target node. */
   for (l = ci->hydro.density; l != NULL; l = l->next)
     if (l->t->ci->nodeID == nodeID ||
@@ -248,6 +254,9 @@ void engine_addtasks_send_stars(struct engine *e, struct cell *ci,
   struct scheduler *s = &e->sched;
   const int nodeID = cj->nodeID;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
+
   if (t_sf_counts == NULL && with_star_formation && ci->hydro.count > 0) {
 #ifdef SWIFT_DEBUG_CHECKS
     if (ci->depth != 0)
@@ -339,6 +348,9 @@ void engine_addtasks_send_black_holes(struct engine *e, struct cell *ci,
   struct scheduler *s = &e->sched;
   const int nodeID = cj->nodeID;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
+
   /* Check if any of the density tasks are for the target node. */
   for (l = ci->black_holes.density; l != NULL; l = l->next)
     if (l->t->ci->nodeID == nodeID ||
@@ -434,6 +446,9 @@ void engine_addtasks_recv_hydro(struct engine *e, struct cell *c,
 #ifdef WITH_MPI
   struct scheduler *s = &e->sched;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
   /* Have we reached a level where there are any hydro tasks ? */
   if (t_xv == NULL && c->hydro.density != NULL) {
 
@@ -533,6 +548,9 @@ void engine_addtasks_recv_stars(struct engine *e, struct cell *c,
 #ifdef WITH_MPI
   struct scheduler *s = &e->sched;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
   if (t_sf_counts == NULL && with_star_formation && c->hydro.count > 0) {
 #ifdef SWIFT_DEBUG_CHECKS
     if (c->depth != 0)
@@ -624,6 +642,9 @@ void engine_addtasks_recv_black_holes(struct engine *e, struct cell *c,
 #ifdef WITH_MPI
   struct scheduler *s = &e->sched;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
   /* Have we reached a level where there are any black_holes tasks ? */
   if (t_rho == NULL && c->black_holes.density != NULL) {
 
@@ -714,6 +735,9 @@ void engine_addtasks_recv_gravity(struct engine *e, struct cell *c,
 #ifdef WITH_MPI
   struct scheduler *s = &e->sched;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
   /* Have we reached a level where there are any gravity tasks ? */
   if (t_grav == NULL && c->grav.grav != NULL) {
 
diff --git a/src/engine_unskip.c b/src/engine_unskip.c
new file mode 100644
index 0000000000..dfadfa5ca1
--- /dev/null
+++ b/src/engine_unskip.c
@@ -0,0 +1,400 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "engine.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "memswap.h"
+
+/* Load the profiler header, if needed. */
+#ifdef WITH_PROFILER
+#include <gperftools/profiler.h>
+#endif
+
+/**
+ * @brief Broad categories of tasks.
+ *
+ * Each category is unskipped independently
+ * of the others.
+ */
+enum task_broad_types {
+  task_broad_types_hydro = 1,
+  task_broad_types_gravity,
+  task_broad_types_stars,
+  task_broad_types_black_holes,
+  task_broad_types_count,
+};
+
+/**
+ * @brief Meta-data for the unskipping
+ */
+struct unskip_data {
+
+  /*! The #engine */
+  struct engine *e;
+
+  /*! Pointer to the start of the list of cells to unskip */
+  int *list_base;
+
+  /*! Number of times the list has been duplicated */
+  int multiplier;
+
+  /*! The number of active cells (without dulication) */
+  int num_active_cells;
+
+  /*! The #task_broad_types corresponding to each copy of the list */
+  enum task_broad_types task_types[task_broad_types_count];
+};
+
+/**
+ * @brief Unskip any hydro tasks associated with active cells.
+ *
+ * @param c The cell.
+ * @param e The engine.
+ */
+static void engine_do_unskip_hydro(struct cell *c, struct engine *e) {
+
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
+  /* Ignore empty cells. */
+  if (c->hydro.count == 0) return;
+
+  /* Skip inactive cells. */
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+        engine_do_unskip_hydro(cp, e);
+      }
+    }
+  }
+
+  /* Unskip any active tasks. */
+  const int forcerebuild = cell_unskip_hydro_tasks(c, &e->sched);
+  if (forcerebuild) atomic_inc(&e->forcerebuild);
+}
+
+/**
+ * @brief Unskip any stars tasks associated with active cells.
+ *
+ * @param c The cell.
+ * @param e The engine.
+ * @param with_star_formation Are we running with star formation switched on?
+ */
+static void engine_do_unskip_stars(struct cell *c, struct engine *e,
+                                   const int with_star_formation) {
+
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
+  const int non_empty =
+      c->stars.count > 0 || (with_star_formation && c->hydro.count > 0);
+
+  /* Ignore empty cells. */
+  if (!non_empty) return;
+
+  const int ci_active = cell_is_active_stars(c, e) ||
+                        (with_star_formation && cell_is_active_hydro(c, e));
+
+  /* Skip inactive cells. */
+  if (!ci_active) return;
+
+  /* Recurse */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+        engine_do_unskip_stars(cp, e, with_star_formation);
+      }
+    }
+  }
+
+  /* Unskip any active tasks. */
+  const int forcerebuild =
+      cell_unskip_stars_tasks(c, &e->sched, with_star_formation);
+  if (forcerebuild) atomic_inc(&e->forcerebuild);
+}
+
+/**
+ * @brief Unskip any black hole tasks associated with active cells.
+ *
+ * @param c The cell.
+ * @param e The engine.
+ */
+static void engine_do_unskip_black_holes(struct cell *c, struct engine *e) {
+
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
+  /* Ignore empty cells. */
+  if (c->black_holes.count == 0) return;
+
+  /* Skip inactive cells. */
+  if (!cell_is_active_black_holes(c, e)) return;
+
+  /* Recurse */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+        engine_do_unskip_black_holes(cp, e);
+      }
+    }
+  }
+
+  /* Unskip any active tasks. */
+  const int forcerebuild = cell_unskip_black_holes_tasks(c, &e->sched);
+  if (forcerebuild) atomic_inc(&e->forcerebuild);
+}
+
+/**
+ * @brief Unskip any gravity tasks associated with active cells.
+ *
+ * @param c The cell.
+ * @param e The engine.
+ */
+static void engine_do_unskip_gravity(struct cell *c, struct engine *e) {
+
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
+  /* Ignore empty cells. */
+  if (c->grav.count == 0) return;
+
+  /* Skip inactive cells. */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Recurse */
+  if (c->split && ((c->maxdepth - c->depth) >= space_subdepth_diff_grav)) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+        engine_do_unskip_gravity(cp, e);
+      }
+    }
+  }
+
+  /* Unskip any active tasks. */
+  cell_unskip_gravity_tasks(c, &e->sched);
+}
+
+/**
+ * @brief Mapper function to unskip active tasks.
+ *
+ * @param map_data An array of #cell%s.
+ * @param num_elements Chunk size.
+ * @param extra_data Pointer to an unskip_data structure.
+ */
+void engine_do_unskip_mapper(void *map_data, int num_elements,
+                             void *extra_data) {
+
+  /* Unpack the meta data */
+  struct unskip_data *data = (struct unskip_data *)extra_data;
+  const int num_active_cells = data->num_active_cells;
+  const enum task_broad_types *const task_types = data->task_types;
+  const int *const list_base = data->list_base;
+  struct engine *e = data->e;
+  struct cell *const cells_top = e->s->cells_top;
+
+  /* What policies are we running? */
+  const int with_star_formation = e->policy & engine_policy_star_formation;
+
+  /* The current chunk of active cells */
+  const int *const local_cells = (int *)map_data;
+
+  /* Loop over this thread's chunk of cells to unskip */
+  for (int ind = 0; ind < num_elements; ind++) {
+
+    /* Handle on the cell */
+    struct cell *const c = &cells_top[local_cells[ind]];
+
+    /* In what copy of the global list are we?
+     * This gives us the broad type of task we are working on. */
+    const ptrdiff_t delta = &local_cells[ind] - list_base;
+    const int type = delta / num_active_cells;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (type >= data->multiplier) error("Invalid broad task type!");
+    if (c == NULL) error("Got an invalid cell index!");
+#endif
+
+    /* What broad type of tasks are we unskipping? */
+    switch (task_types[type]) {
+      case task_broad_types_hydro:
+#ifdef SWIFT_DEBUG_CHECKS
+        if (!(e->policy & engine_policy_hydro))
+          error("Trying to unskip hydro tasks in a non-hydro run!");
+#endif
+        engine_do_unskip_hydro(c, e);
+        break;
+      case task_broad_types_gravity:
+#ifdef SWIFT_DEBUG_CHECKS
+        if (!(e->policy & engine_policy_self_gravity) &&
+            !(e->policy & engine_policy_external_gravity))
+          error("Trying to unskip gravity tasks in a non-gravity run!");
+#endif
+        engine_do_unskip_gravity(c, e);
+        break;
+      case task_broad_types_stars:
+#ifdef SWIFT_DEBUG_CHECKS
+        if (!(e->policy & engine_policy_stars))
+          error("Trying to unskip star tasks in a non-stars run!");
+#endif
+        engine_do_unskip_stars(c, e, with_star_formation);
+        break;
+      case task_broad_types_black_holes:
+#ifdef SWIFT_DEBUG_CHECKS
+        if (!(e->policy & engine_policy_black_holes))
+          error("Trying to unskip black holes tasks in a non-BH run!");
+#endif
+        engine_do_unskip_black_holes(c, e);
+        break;
+      default:
+#ifdef SWIFT_DEBUG_CHECKS
+        error("Invalid broad task type!");
+#endif
+        continue;
+    }
+  }
+}
+
+/**
+ * @brief Unskip all the tasks that act on active cells at this time.
+ *
+ * @param e The #engine.
+ */
+void engine_unskip(struct engine *e) {
+
+  const ticks tic = getticks();
+  struct space *s = e->s;
+  const int nodeID = e->nodeID;
+
+  const int with_hydro = e->policy & engine_policy_hydro;
+  const int with_self_grav = e->policy & engine_policy_self_gravity;
+  const int with_ext_grav = e->policy & engine_policy_external_gravity;
+  const int with_stars = e->policy & engine_policy_stars;
+  const int with_feedback = e->policy & engine_policy_feedback;
+  const int with_black_holes = e->policy & engine_policy_black_holes;
+
+#ifdef WITH_PROFILER
+  static int count = 0;
+  char filename[100];
+  sprintf(filename, "/tmp/swift_engine_do_usnkip_mapper_%06i.prof", count++);
+  ProfilerStart(filename);
+#endif  // WITH_PROFILER
+
+  /* Move the active local cells to the top of the list. */
+  int *local_cells = e->s->local_cells_with_tasks_top;
+  int num_active_cells = 0;
+  for (int k = 0; k < s->nr_local_cells_with_tasks; k++) {
+    struct cell *c = &s->cells_top[local_cells[k]];
+
+    if ((with_hydro && cell_is_active_hydro(c, e)) ||
+        (with_self_grav && cell_is_active_gravity(c, e)) ||
+        (with_ext_grav && c->nodeID == nodeID &&
+         cell_is_active_gravity(c, e)) ||
+        (with_feedback && cell_is_active_stars(c, e)) ||
+        (with_stars && c->nodeID == nodeID && cell_is_active_stars(c, e)) ||
+        (with_black_holes && cell_is_active_black_holes(c, e))) {
+
+      if (num_active_cells != k)
+        memswap(&local_cells[k], &local_cells[num_active_cells], sizeof(int));
+      num_active_cells += 1;
+    }
+  }
+
+  /* What kind of tasks do we have? */
+  struct unskip_data data;
+  bzero(&data, sizeof(struct unskip_data));
+  int multiplier = 0;
+  if (with_hydro) {
+    data.task_types[multiplier] = task_broad_types_hydro;
+    multiplier++;
+  }
+  if (with_self_grav || with_ext_grav) {
+    data.task_types[multiplier] = task_broad_types_gravity;
+    multiplier++;
+  }
+  if (with_feedback || with_stars) {
+    data.task_types[multiplier] = task_broad_types_stars;
+    multiplier++;
+  }
+  if (with_black_holes) {
+    data.task_types[multiplier] = task_broad_types_black_holes;
+    multiplier++;
+  }
+
+  /* Should we duplicate the list of active cells to better parallelise the
+     unskip over the threads ? */
+  int *local_active_cells;
+  if (multiplier > 1) {
+
+    /* Make space for copies of the list */
+    local_active_cells =
+        (int *)malloc(multiplier * num_active_cells * sizeof(int));
+    if (local_active_cells == NULL)
+      error(
+          "Couldn't allocate memory for duplicated list of local active "
+          "cells.");
+
+    /* Make blind copies of the list */
+    for (int m = 0; m < multiplier; m++) {
+      memcpy(local_active_cells + m * num_active_cells, local_cells,
+             num_active_cells * sizeof(int));
+    }
+  } else {
+    local_active_cells = local_cells;
+  }
+
+  /* We now have a list of local active cells duplicated as many times as
+   * we have broad task types. We can now release all the threads on the list */
+
+  data.e = e;
+  data.list_base = local_active_cells;
+  data.num_active_cells = num_active_cells;
+  data.multiplier = multiplier;
+
+  /* Activate all the regular tasks */
+  threadpool_map(&e->threadpool, engine_do_unskip_mapper, local_active_cells,
+                 num_active_cells * multiplier, sizeof(int), 1, &data);
+
+#ifdef WITH_PROFILER
+  ProfilerStop();
+#endif  // WITH_PROFILER
+
+  /* Free stuff? */
+  if (multiplier > 1) {
+    free(local_active_cells);
+  }
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
diff --git a/src/runner.c b/src/runner.c
index db7e512873..38c3197155 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -2440,169 +2440,6 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
   if (timer) TIMER_TOC(timer_do_ghost);
 }
 
-/**
- * @brief Unskip any hydro tasks associated with active cells.
- *
- * @param c The cell.
- * @param e The engine.
- */
-static void runner_do_unskip_hydro(struct cell *c, struct engine *e) {
-
-  /* Ignore empty cells. */
-  if (c->hydro.count == 0) return;
-
-  /* Skip inactive cells. */
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *cp = c->progeny[k];
-        runner_do_unskip_hydro(cp, e);
-      }
-    }
-  }
-
-  /* Unskip any active tasks. */
-  const int forcerebuild = cell_unskip_hydro_tasks(c, &e->sched);
-  if (forcerebuild) atomic_inc(&e->forcerebuild);
-}
-
-/**
- * @brief Unskip any stars tasks associated with active cells.
- *
- * @param c The cell.
- * @param e The engine.
- * @param with_star_formation Are we running with star formation switched on?
- */
-static void runner_do_unskip_stars(struct cell *c, struct engine *e,
-                                   const int with_star_formation) {
-
-  const int non_empty =
-      c->stars.count > 0 || (with_star_formation && c->hydro.count > 0);
-
-  /* Ignore empty cells. */
-  if (!non_empty) return;
-
-  const int ci_active = cell_is_active_stars(c, e) ||
-                        (with_star_formation && cell_is_active_hydro(c, e));
-
-  /* Skip inactive cells. */
-  if (!ci_active) return;
-
-  /* Recurse */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *cp = c->progeny[k];
-        runner_do_unskip_stars(cp, e, with_star_formation);
-      }
-    }
-  }
-
-  /* Unskip any active tasks. */
-  const int forcerebuild =
-      cell_unskip_stars_tasks(c, &e->sched, with_star_formation);
-  if (forcerebuild) atomic_inc(&e->forcerebuild);
-}
-
-/**
- * @brief Unskip any black hole tasks associated with active cells.
- *
- * @param c The cell.
- * @param e The engine.
- */
-static void runner_do_unskip_black_holes(struct cell *c, struct engine *e) {
-
-  /* Ignore empty cells. */
-  if (c->black_holes.count == 0) return;
-
-  /* Skip inactive cells. */
-  if (!cell_is_active_black_holes(c, e)) return;
-
-  /* Recurse */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *cp = c->progeny[k];
-        runner_do_unskip_black_holes(cp, e);
-      }
-    }
-  }
-
-  /* Unskip any active tasks. */
-  const int forcerebuild = cell_unskip_black_holes_tasks(c, &e->sched);
-  if (forcerebuild) atomic_inc(&e->forcerebuild);
-}
-
-/**
- * @brief Unskip any gravity tasks associated with active cells.
- *
- * @param c The cell.
- * @param e The engine.
- */
-static void runner_do_unskip_gravity(struct cell *c, struct engine *e) {
-
-  /* Ignore empty cells. */
-  if (c->grav.count == 0) return;
-
-  /* Skip inactive cells. */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Recurse */
-  if (c->split && ((c->maxdepth - c->depth) >= space_subdepth_diff_grav)) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *cp = c->progeny[k];
-        runner_do_unskip_gravity(cp, e);
-      }
-    }
-  }
-
-  /* Unskip any active tasks. */
-  cell_unskip_gravity_tasks(c, &e->sched);
-}
-
-/**
- * @brief Mapper function to unskip active tasks.
- *
- * @param map_data An array of #cell%s.
- * @param num_elements Chunk size.
- * @param extra_data Pointer to an #engine.
- */
-void runner_do_unskip_mapper(void *map_data, int num_elements,
-                             void *extra_data) {
-
-  struct engine *e = (struct engine *)extra_data;
-  const int with_star_formation = e->policy & engine_policy_star_formation;
-  const int nodeID = e->nodeID;
-  struct space *s = e->s;
-  int *local_cells = (int *)map_data;
-
-  for (int ind = 0; ind < num_elements; ind++) {
-    struct cell *c = &s->cells_top[local_cells[ind]];
-    if (c != NULL) {
-
-      /* Hydro tasks */
-      if (e->policy & engine_policy_hydro) runner_do_unskip_hydro(c, e);
-
-      /* All gravity tasks */
-      if ((e->policy & engine_policy_self_gravity) ||
-          ((e->policy & engine_policy_external_gravity) && c->nodeID == nodeID))
-        runner_do_unskip_gravity(c, e);
-
-      /* Stars tasks */
-      if (e->policy & engine_policy_stars)
-        runner_do_unskip_stars(c, e, with_star_formation);
-
-      /* Black hole tasks */
-      if (e->policy & engine_policy_black_holes)
-        runner_do_unskip_black_holes(c, e);
-    }
-  }
-}
-
 /**
  * @brief Drift all part in a cell.
  *
diff --git a/src/runner.h b/src/runner.h
index 1dc62ad6f5..be175eef42 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -96,8 +96,6 @@ void runner_do_fof_self(struct runner *r, struct cell *c, int timer);
 void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj,
                         int timer);
 void *runner_main(void *data);
-void runner_do_unskip_mapper(void *map_data, int num_elements,
-                             void *extra_data);
 void runner_do_drift_all_mapper(void *map_data, int num_elements,
                                 void *extra_data);
 
diff --git a/src/scheduler.c b/src/scheduler.c
index 85c3727a1e..1fad63fd71 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -601,7 +601,10 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) {
           /* Add the self tasks. */
           int first_child = 0;
           while (ci->progeny[first_child] == NULL) first_child++;
+
           t->ci = ci->progeny[first_child];
+          cell_set_flag(t->ci, cell_flag_has_tasks);
+
           for (int k = first_child + 1; k < 8; k++) {
             /* Do we have a non-empty progenitor? */
             if (ci->progeny[k] != NULL &&
@@ -711,8 +714,12 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) {
           /* Loop over the sub-cell pairs for the current sid and add new tasks
            * for them. */
           struct cell_split_pair *csp = &cell_split_pairs[sid];
+
           t->ci = ci->progeny[csp->pairs[0].pid];
           t->cj = cj->progeny[csp->pairs[0].pjd];
+          cell_set_flag(t->ci, cell_flag_has_tasks);
+          cell_set_flag(t->cj, cell_flag_has_tasks);
+
           t->flags = csp->pairs[0].sid;
           for (int k = 1; k < csp->count; k++) {
             scheduler_splittask_hydro(
@@ -796,7 +803,9 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
           /* Add the self tasks. */
           int first_child = 0;
           while (ci->progeny[first_child] == NULL) first_child++;
+
           t->ci = ci->progeny[first_child];
+          cell_set_flag(t->ci, cell_flag_has_tasks);
 
           for (int k = first_child + 1; k < 8; k++)
             if (ci->progeny[k] != NULL)
@@ -1100,6 +1109,9 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
   t->tic = 0;
   t->toc = 0;
 
+  if (ci != NULL) cell_set_flag(ci, cell_flag_has_tasks);
+  if (cj != NULL) cell_set_flag(cj, cell_flag_has_tasks);
+
   /* Add an index for it. */
   // lock_lock( &s->lock );
   s->tasks_ind[atomic_inc(&s->nr_tasks)] = ind;
@@ -1589,14 +1601,6 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements,
  * @param s The #scheduler.
  */
 void scheduler_start(struct scheduler *s) {
-  /* Reset all task timers. */
-  for (int i = 0; i < s->nr_tasks; ++i) {
-    s->tasks[i].tic = 0;
-    s->tasks[i].toc = 0;
-#ifdef SWIFT_DEBUG_TASKS
-    s->tasks[i].rid = -1;
-#endif
-  }
 
   /* Re-wait the tasks. */
   if (s->active_count > 1000) {
diff --git a/src/task.c b/src/task.c
index 643f084b1f..4d6cfa2482 100644
--- a/src/task.c
+++ b/src/task.c
@@ -893,7 +893,7 @@ void task_dump_all(struct engine *e, int step) {
 #ifdef SWIFT_DEBUG_TASKS
 
   /* Need this to convert ticks to seconds. */
-  unsigned long long cpufreq = clocks_get_cpufreq();
+  const unsigned long long cpufreq = clocks_get_cpufreq();
 
 #ifdef WITH_MPI
   /* Make sure output file is empty, only on one rank. */
@@ -926,7 +926,8 @@ void task_dump_all(struct engine *e, int step) {
               e->s_updates, cpufreq);
       int count = 0;
       for (int l = 0; l < e->sched.nr_tasks; l++) {
-        if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
+        if (!e->sched.tasks[l].implicit &&
+            e->sched.tasks[l].tic > e->tic_step) {
           fprintf(
               file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n",
               engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type,
@@ -966,7 +967,7 @@ void task_dump_all(struct engine *e, int step) {
           (unsigned long long)e->toc_step, e->updates, e->g_updates,
           e->s_updates, 0, cpufreq);
   for (int l = 0; l < e->sched.nr_tasks; l++) {
-    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
+    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].tic > e->tic_step) {
       fprintf(
           file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
           e->sched.tasks[l].rid, e->sched.tasks[l].type,
@@ -1037,8 +1038,8 @@ void task_dump_stats(const char *dumpfile, struct engine *e, int header,
   for (int l = 0; l < e->sched.nr_tasks; l++) {
     int type = e->sched.tasks[l].type;
 
-    /* Skip implicit tasks, tasks that didn't run. */
-    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
+    /* Skip implicit tasks, tasks that didn't run this step. */
+    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].tic > e->tic_step) {
       int subtype = e->sched.tasks[l].subtype;
 
       double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic;
-- 
GitLab