From 7941b67cda22ce0dfe508a15fa362fb88c6a78c4 Mon Sep 17 00:00:00 2001
From: Mladen Ivkovic <mladen.ivkovic@hotmail.com>
Date: Fri, 18 Dec 2020 19:38:44 +0000
Subject: [PATCH] Give user option to dump task levels at a given time step
 frequency

---
 doc/RTD/source/AnalysisTools/index.rst | 20 +++++++++++-
 examples/parameter_example.yml         |  1 +
 src/engine.c                           |  7 +++-
 src/engine_config.c                    |  7 ++++
 src/scheduler.c                        | 25 ++++++++++++---
 src/scheduler.h                        |  5 ++-
 tools/plot_task_level.py               | 44 ++++++++++++++++----------
 7 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/doc/RTD/source/AnalysisTools/index.rst b/doc/RTD/source/AnalysisTools/index.rst
index e908b37835..bf827d1b4a 100644
--- a/doc/RTD/source/AnalysisTools/index.rst
+++ b/doc/RTD/source/AnalysisTools/index.rst
@@ -10,7 +10,7 @@ Analysis Tools
 Task dependencies
 -----------------
 
-At the beginning of each simulation the file ``dependency_graph.csv`` is generated and can be transformed into a ``dot`` and a ``png`` file with the script ``tools/plot_task_dependencies.py``.
+At the beginning of each simulation the file ``dependency_graph_0.csv`` is generated and can be transformed into a ``dot`` and a ``png`` file with the script ``tools/plot_task_dependencies.py``.
 It requires the ``dot`` package that is available in the library graphviz.
 This script has also the possibility to generate a list of function calls for each task with the option ``--with-calls`` (this list may be incomplete) and to describe at which level each task are run ``--with-levels`` (a larger simulation will provide more accurate levels).
 You can convert the ``dot`` file into a ``png`` with the following command
@@ -19,6 +19,24 @@ If you wish to have more dependency graphs, you can use the parameter ``Schedule
 While the initial graph is showing all the tasks/dependencies, the next ones are only showing the active tasks/dependencies.
 
 
+
+Task levels
+-----------------
+
+At the beginning of each simulation the file ``task_level_0.txt`` is generated. 
+It contains the counts of all tasks at all levels (depths) in the tree.
+The depths and counts of the tasks can be plotted with the script ``tools/plot_task_levels.py``.
+It will display the individual tasks at the x-axis, the number of each task at a given level on the y-axis, and the level is shown as the colour of the plotted point.
+Additionally, the script can write out in brackets next to each tasks's name on the x-axis on how many different levels the task exists using the ``--count`` flag.
+Finally, in some cases the counts for different levels of a task may be very close to each other and overlap on the plot, making them barely visible.
+This can be alleviated by using the ``--displace`` flag: 
+It will displace the plot points w.r.t. the y-axis in an attempt to make them better visible, however the counts won't be exact in that case.
+If you wish to have more task level plots, you can use the parameter ``Scheduler:task_level_output_frequency``. 
+It defines how many steps are done in between two task level output dumps.
+
+
+
+
 Cell graph
 ----------
 
diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml
index 5eec165ede..8933395fa6 100644
--- a/examples/parameter_example.yml
+++ b/examples/parameter_example.yml
@@ -134,6 +134,7 @@ Scheduler:
   engine_max_sparts_per_ghost:   1000  # (Optional) Maximum number of sparts per ghost.
   engine_max_parts_per_cooling: 10000  # (Optional) Maximum number of parts per cooling task.
   dependency_graph_frequency:       0  # (Optional) Dumping frequency of the dependency graph. By default, writes only at the first step.
+  task_level_output_frequency:      0  # (Optional) Dumping frequency of the task level data. By default, writes only at the first step.
 
 # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.)
 TimeIntegration:
diff --git a/src/engine.c b/src/engine.c
index 13ac66793f..a448bdfa49 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1825,7 +1825,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs,
 #endif
 
   scheduler_write_dependencies(&e->sched, e->verbose, e->step);
-  if (e->nodeID == 0) scheduler_write_task_level(&e->sched);
+  if (e->nodeID == 0) scheduler_write_task_level(&e->sched, e->step);
 
   /* Run the 0th time-step */
   TIMER_TIC2;
@@ -2243,6 +2243,11 @@ void engine_step(struct engine *e) {
       e->step % e->sched.frequency_dependency == 0)
     scheduler_write_dependencies(&e->sched, e->verbose, e->step);
 
+  /* Write the task levels */
+  if (e->sched.frequency_task_levels != 0 &&
+      e->step % e->sched.frequency_task_levels == 0)
+    scheduler_write_task_level(&e->sched, e->step);
+
   /* Start all the tasks. */
   TIMER_TIC;
   engine_launch(e, "tasks");
diff --git a/src/engine_config.c b/src/engine_config.c
index 02257355a7..99e7b32a97 100644
--- a/src/engine_config.c
+++ b/src/engine_config.c
@@ -193,6 +193,13 @@ void engine_config(int restart, int fof, struct engine *e,
     error("Scheduler:dependency_graph_frequency should be >= 0");
   }
 
+  /* Get the frequency of the task level dumping */
+  e->sched.frequency_task_levels = parser_get_opt_param_int(
+      params, "Scheduler:task_level_output_frequency", 0);
+  if (e->sched.frequency_task_levels < 0) {
+    error("Scheduler:task_level_output_frequency should be >= 0");
+  }
+
 /* Deal with affinity. For now, just figure out the number of cores. */
 #if defined(HAVE_SETAFFINITY)
   const int nr_cores = sysconf(_SC_NPROCESSORS_ONLN);
diff --git a/src/scheduler.c b/src/scheduler.c
index bb0c43bb3c..f107c653fc 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -369,7 +369,7 @@ void task_dependency_sum(void *in_p, void *out_p, int *len,
 /**
  * @brief Write a csv file with the task dependencies.
  *
- * Run plot_task_dependencies.sh for an example of how to use it
+ * Run plot_task_dependencies.py for an example of how to use it
  * to generate the figure.
  *
  * @param s The #scheduler we are working in.
@@ -2489,9 +2489,16 @@ void scheduler_free_tasks(struct scheduler *s) {
 }
 
 /**
- * @brief write down each task level
+ * @brief write down the levels and the number of tasks at that level.
+ *
+ * Run plot_task_level.py for an example of how to use it
+ * to generate the figure.
+ *
+ * @param s The #scheduler we are working in.
+ * @param step The current step number.
  */
-void scheduler_write_task_level(const struct scheduler *s) {
+void scheduler_write_task_level(const struct scheduler *s, int step) {
+
   /* init */
   const int max_depth = 30;
   const struct task *tasks = s->tasks;
@@ -2519,8 +2526,18 @@ void scheduler_write_task_level(const struct scheduler *s) {
     }
   }
 
+  /* Generate filename */
+  char filename[200] = "task_level_\0";
+#ifdef WITH_MPI
+  char rankstr[6];
+  sprintf(rankstr, "%04d_", s->nodeID);
+  strcat(filename, rankstr);
+#endif
+  char stepstr[100];
+  sprintf(stepstr, "%d.txt", step);
+  strcat(filename, stepstr);
+
   /* Open file */
-  char filename[200] = "task_level.txt";
   FILE *f = fopen(filename, "w");
   if (f == NULL) error("Error opening task level file.");
 
diff --git a/src/scheduler.h b/src/scheduler.h
index 13c7395d26..d7fc9ad38a 100644
--- a/src/scheduler.h
+++ b/src/scheduler.h
@@ -114,6 +114,9 @@ struct scheduler {
 
   /* Frequency of the dependency graph dumping. */
   int frequency_dependency;
+
+  /* Frequency of the task levels dumping. */
+  int frequency_task_levels;
 };
 
 /* Inlined functions (for speed). */
@@ -207,7 +210,7 @@ void scheduler_print_tasks(const struct scheduler *s, const char *fileName);
 void scheduler_clean(struct scheduler *s);
 void scheduler_free_tasks(struct scheduler *s);
 void scheduler_write_dependencies(struct scheduler *s, int verbose, int step);
-void scheduler_write_task_level(const struct scheduler *s);
+void scheduler_write_task_level(const struct scheduler *s, int step);
 void scheduler_dump_queues(struct engine *e);
 void scheduler_report_task_times(const struct scheduler *s,
                                  const int nr_threads);
diff --git a/tools/plot_task_level.py b/tools/plot_task_level.py
index 2fbe55c025..668baaf895 100755
--- a/tools/plot_task_level.py
+++ b/tools/plot_task_level.py
@@ -3,7 +3,9 @@ description = """
 Plot the number of tasks for each depth level and each type of task.
 
 Usage:
-  ./plot_task_level.py task_level.txt
+  ./plot_task_level.py task_level_0.txt
+  or
+  ./plot_task_level.py task_level_*_0.txt
 
 """
 
@@ -61,43 +63,51 @@ def parse_args():
     parser.add_argument(
         "file",
         type=str,
-        help="Required file name of .csv file(s) of the task levels "
+        nargs="+",
+        help="Required file name(s) of .txt file(s) of the task levels "
         "generated by swift.",
     )
 
     args = parser.parse_args()
-    filename = args.file
-    print(filename)
+    files = args.file
 
-    if not path.exists(filename):
-        raise FileNotFoundError("File not found:'" + filename + "'")
+    for f in files:
+        if not path.exists(f):
+            raise FileNotFoundError("File not found:'" + f + "'")
 
-    return args, filename
+    return args, files
 
 
-def read_data(filename):
+def read_data(files):
     """
-    Reads in data from the csv file.
+    Reads in data from the .txt file.
 
     Parameters
     ----------
 
-    filename: str
-        filename to be read from
+    files: list
+        list of filenames to be read from
 
 
     Returns
     -------
 
-    data: pandas dataset
-        dataset containing read in data
+    data: pandas dataframe
+        dataframe containing read in data
     """
 
     # Column names
     names = ["type", "subtype", "depth", "count"]
 
-    # read file
-    data = pd.read_csv(filename, sep=" ", comment="#", names=names)
+    alldata = None
+    for f in files:
+        # read file
+        data = pd.read_csv(f, sep=" ", comment="#", names=names)
+        if alldata is None:
+            alldata = data
+        else:
+            concat = pd.concat([alldata, data])
+            alldata = concat.groupby(["type", "subtype", "depth"], as_index=False).sum()
 
     return data
 
@@ -237,9 +247,9 @@ def add_displacement(data):
 
 if __name__ == "__main__":
 
-    args, filename = parse_args()
+    args, files = parse_args()
 
-    data = read_data(filename)
+    data = read_data(files)
     cmap = get_discrete_cmap(data["depth"].max())
 
     # are we counting the levels?
-- 
GitLab