From a9d6e7c6f872832dc72d11c2a606ea49d43d9dc8 Mon Sep 17 00:00:00 2001
From: "Peter W. Draper" <p.w.draper@durham.ac.uk>
Date: Fri, 1 Feb 2019 18:22:01 +0000
Subject: [PATCH] Under MPI report the maximum tasks_per_cell value in rank 0,
 that is the most applicable

We fudge this by 1.2 for some overheads and now also report the total number of tasks and cells across the whole application
---
 examples/parameter_example.yml |  2 +-
 src/collectgroup.c             | 63 ++++++++++++++++++++++++----------
 src/collectgroup.h             | 24 ++++++++-----
 src/engine.c                   | 52 +++++++++++++++++++++++-----
 src/engine.h                   | 12 +++++--
 5 files changed, 113 insertions(+), 40 deletions(-)

diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml
index 4c8d3b5048..2660a72f22 100644
--- a/examples/parameter_example.yml
+++ b/examples/parameter_example.yml
@@ -72,7 +72,7 @@ Scheduler:
   cell_extra_gparts:         0         # (Optional) Number of spare gparts per top-level allocated at rebuild time for on-the-fly creation.
   cell_extra_sparts:         400       # (Optional) Number of spare sparts per top-level allocated at rebuild time for on-the-fly creation.
   max_top_level_cells:       12        # (Optional) Maximal number of top-level cells in any dimension. The number of top-level cells will be the cube of this (this is the default value).
-  tasks_per_cell:            0.0         # (Optional) The average number of tasks per cell. If not large enough the simulation will fail (means guess...).
+  tasks_per_cell:            0.0       # (Optional) The average number of tasks per cell. If not large enough the simulation will fail (means guess...).
   links_per_tasks:           10        # (Optional) The average number of links per tasks (before adding the communication tasks). If not large enough the simulation will fail (means guess...). Defaults to 10.
   mpi_message_limit:         4096      # (Optional) Maximum MPI task message size to send non-buffered, KB.
 
diff --git a/src/collectgroup.c b/src/collectgroup.c
index 0b7b419b56..ddf3e35d94 100644
--- a/src/collectgroup.c
+++ b/src/collectgroup.c
@@ -41,6 +41,9 @@ struct mpicollectgroup1 {
   integertime_t ti_hydro_end_min;
   integertime_t ti_gravity_end_min;
   int forcerebuild;
+  long long total_nr_cells;
+  long long total_nr_tasks;
+  float tasks_per_cell_max;
 };
 
 /* Forward declarations. */
@@ -93,6 +96,9 @@ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) {
   e->nr_inhibited_gparts = grp1->g_inhibited;
   e->nr_inhibited_sparts = grp1->s_inhibited;
   e->forcerebuild = grp1->forcerebuild;
+  e->total_nr_cells = grp1->total_nr_cells;
+  e->total_nr_tasks = grp1->total_nr_tasks;
+  e->tasks_per_cell_max = grp1->tasks_per_cell_max;
 }
 
 /**
@@ -101,37 +107,39 @@ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) {
  * @param grp1 The #collectgroup1 to initialise
  * @param updated the number of updated hydro particles on this node this step.
  * @param g_updated the number of updated gravity particles on this node this
- * step.
+ *                  step.
  * @param s_updated the number of updated star particles on this node this step.
  * @param inhibited the number of inhibited hydro particles on this node this
- * step.
+ *                  step.
  * @param g_inhibited the number of inhibited gravity particles on this node
- * this step.
+ *                    this step.
  * @param s_inhibited the number of inhibited star particles on this node this
- * step.
+ *                    step.
  * @param ti_hydro_end_min the minimum end time for next hydro time step after
- * this step.
+ *                         this step.
  * @param ti_hydro_end_max the maximum end time for next hydro time step after
- * this step.
+ *                         this step.
  * @param ti_hydro_beg_max the maximum begin time for next hydro time step after
- * this step.
+ *                         this step.
  * @param ti_gravity_end_min the minimum end time for next gravity time step
- * after this step.
+ *                           after this step.
  * @param ti_gravity_end_max the maximum end time for next gravity time step
- * after this step.
+ *                           after this step.
  * @param ti_gravity_beg_max the maximum begin time for next gravity time step
- * after this step.
+ *                           after this step.
  * @param forcerebuild whether a rebuild is required after this step.
+ * @param total_nr_cells total number of all cells on rank.
+ * @param total_nr_tasks total number of tasks on rank.
+ * @param tasks_per_cell the used number of tasks per cell.
  */
-void collectgroup1_init(struct collectgroup1 *grp1, size_t updated,
-                        size_t g_updated, size_t s_updated, size_t inhibited,
-                        size_t g_inhibited, size_t s_inhibited,
-                        integertime_t ti_hydro_end_min,
-                        integertime_t ti_hydro_end_max,
-                        integertime_t ti_hydro_beg_max,
-                        integertime_t ti_gravity_end_min,
-                        integertime_t ti_gravity_end_max,
-                        integertime_t ti_gravity_beg_max, int forcerebuild) {
+void collectgroup1_init(
+    struct collectgroup1 *grp1, size_t updated, size_t g_updated,
+    size_t s_updated, size_t inhibited, size_t g_inhibited, size_t s_inhibited,
+    integertime_t ti_hydro_end_min, integertime_t ti_hydro_end_max,
+    integertime_t ti_hydro_beg_max, integertime_t ti_gravity_end_min,
+    integertime_t ti_gravity_end_max, integertime_t ti_gravity_beg_max,
+    int forcerebuild, long long total_nr_cells, long long total_nr_tasks,
+    float tasks_per_cell) {
 
   grp1->updated = updated;
   grp1->g_updated = g_updated;
@@ -146,6 +154,9 @@ void collectgroup1_init(struct collectgroup1 *grp1, size_t updated,
   grp1->ti_gravity_end_max = ti_gravity_end_max;
   grp1->ti_gravity_beg_max = ti_gravity_beg_max;
   grp1->forcerebuild = forcerebuild;
+  grp1->total_nr_cells = total_nr_cells;
+  grp1->total_nr_tasks = total_nr_tasks;
+  grp1->tasks_per_cell_max = tasks_per_cell;
 }
 
 /**
@@ -171,6 +182,9 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) {
   mpigrp11.ti_hydro_end_min = grp1->ti_hydro_end_min;
   mpigrp11.ti_gravity_end_min = grp1->ti_gravity_end_min;
   mpigrp11.forcerebuild = grp1->forcerebuild;
+  mpigrp11.total_nr_cells = grp1->total_nr_cells;
+  mpigrp11.total_nr_tasks = grp1->total_nr_tasks;
+  mpigrp11.tasks_per_cell_max = grp1->tasks_per_cell_max;
 
   struct mpicollectgroup1 mpigrp12;
   if (MPI_Allreduce(&mpigrp11, &mpigrp12, 1, mpicollectgroup1_type,
@@ -187,6 +201,9 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) {
   grp1->ti_hydro_end_min = mpigrp12.ti_hydro_end_min;
   grp1->ti_gravity_end_min = mpigrp12.ti_gravity_end_min;
   grp1->forcerebuild = mpigrp12.forcerebuild;
+  grp1->total_nr_cells = mpigrp12.total_nr_cells;
+  grp1->total_nr_tasks = mpigrp12.total_nr_tasks;
+  grp1->tasks_per_cell_max = mpigrp12.tasks_per_cell_max;
 
 #endif
 }
@@ -221,6 +238,14 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11,
   /* Everyone must agree to not rebuild. */
   if (mpigrp11->forcerebuild || mpigrp12->forcerebuild)
     mpigrp11->forcerebuild = 1;
+
+  /* Totals of all tasks and cells. */
+  mpigrp11->total_nr_cells += mpigrp12->total_nr_cells;
+  mpigrp11->total_nr_tasks += mpigrp12->total_nr_tasks;
+
+  /* Maximum value of tasks_per_cell. */
+  mpigrp11->tasks_per_cell_max =
+      max(mpigrp11->tasks_per_cell_max, mpigrp12->tasks_per_cell_max);
 }
 
 /**
diff --git a/src/collectgroup.h b/src/collectgroup.h
index b6e8769ac9..3e430b58db 100644
--- a/src/collectgroup.h
+++ b/src/collectgroup.h
@@ -46,19 +46,25 @@ struct collectgroup1 {
 
   /* Force the engine to rebuild? */
   int forcerebuild;
+
+  /* Totals of cells and tasks. */
+  long long total_nr_cells;
+  long long total_nr_tasks;
+
+  /* Maximum value of actual tasks per cell across all ranks. */
+  float tasks_per_cell_max;
 };
 
 void collectgroup_init(void);
 void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e);
-void collectgroup1_init(struct collectgroup1 *grp1, size_t updated,
-                        size_t g_updated, size_t s_updated, size_t inhibited,
-                        size_t g_inhibited, size_t s_inhibited,
-                        integertime_t ti_hydro_end_min,
-                        integertime_t ti_hydro_end_max,
-                        integertime_t ti_hydro_beg_max,
-                        integertime_t ti_gravity_end_min,
-                        integertime_t ti_gravity_end_max,
-                        integertime_t ti_gravity_beg_max, int forcerebuild);
+void collectgroup1_init(
+    struct collectgroup1 *grp1, size_t updated, size_t g_updated,
+    size_t s_updated, size_t inhibited, size_t g_inhibited, size_t s_inhibited,
+    integertime_t ti_hydro_end_min, integertime_t ti_hydro_end_max,
+    integertime_t ti_hydro_beg_max, integertime_t ti_gravity_end_min,
+    integertime_t ti_gravity_end_max, integertime_t ti_gravity_beg_max,
+    int forcerebuild, long long total_nr_cells, long long total_nr_tasks,
+    float tasks_per_cell);
 void collectgroup1_reduce(struct collectgroup1 *grp1);
 
 #endif /* SWIFT_COLLECTGROUP_H */
diff --git a/src/engine.c b/src/engine.c
index 9b4b724368..21a14889fe 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1902,6 +1902,35 @@ void engine_print_task_counts(const struct engine *e) {
   const int nr_tasks = sched->nr_tasks;
   const struct task *const tasks = sched->tasks;
 
+  /* Global tasks and cells when using MPI. */
+#ifdef WITH_MPI
+  if (e->nodeID == 0 && e->total_nr_tasks > 0)
+    printf(
+        "[%04i] %s engine_print_task_counts: System total: %lld,"
+        " no. cells: %lld\n",
+        e->nodeID, clocks_get_timesincestart(), e->total_nr_tasks,
+        e->total_nr_cells);
+  fflush(stdout);
+#endif
+
+  /* Report value that can be used to estimate the task_per_cells parameter. */
+  float tasks_per_cell = (float)nr_tasks / (float)e->s->tot_cells;
+
+#ifdef WITH_MPI
+  message("Total = %d (per cell = %.2f)", nr_tasks, tasks_per_cell);
+
+  /* And the system maximum on rank 0, only after first step, increase by our
+   * margin to allow for some variation in repartitioning. */
+  if (e->nodeID == 0 && e->total_nr_tasks > 0) {
+    message("Total = %d (maximum per cell = %.2f)", nr_tasks,
+            e->tasks_per_cell_max * engine_tasks_per_cell_margin);
+  }
+
+#else
+  message("Total = %d (per cell = %.2f)", nr_tasks, tasks_per_cell);
+#endif
+  fflush(stdout);
+
   /* Count and print the number of each task type. */
   int counts[task_type_count + 1];
   for (int k = 0; k <= task_type_count; k++) counts[k] = 0;
@@ -1911,8 +1940,7 @@ void engine_print_task_counts(const struct engine *e) {
     else
       counts[(int)tasks[k].type] += 1;
   }
-  message("Total = %d  (per cell = %.2f)", nr_tasks,
-          (float)nr_tasks / e->s->tot_cells);
+
 #ifdef WITH_MPI
   printf("[%04i] %s engine_print_task_counts: task counts are [ %s=%i",
          e->nodeID, clocks_get_timesincestart(), taskID_names[0], counts[0]);
@@ -1949,10 +1977,10 @@ int engine_estimate_nr_tasks(const struct engine *e) {
 
   float tasks_per_cell = e->tasks_per_cell;
   if (tasks_per_cell > 0.0f) {
-      if (e->verbose)
-          message("tasks per cell given as: %.2f, so maximum tasks: %d",
-                  e->tasks_per_cell, (int)(e->s->tot_cells * tasks_per_cell));
-      return (int)(e->s->tot_cells * tasks_per_cell);
+    if (e->verbose)
+      message("tasks per cell given as: %.2f, so maximum tasks: %d",
+              e->tasks_per_cell, (int)(e->s->tot_cells * tasks_per_cell));
+    return (int)(e->s->tot_cells * tasks_per_cell);
   }
 
   /* Our guess differs depending on the types of tasks we are using, but we
@@ -2064,9 +2092,9 @@ int engine_estimate_nr_tasks(const struct engine *e) {
   if (tasks_per_cell < 1.0f) tasks_per_cell = 1.0f;
   if (e->verbose)
     message("tasks per cell estimated as: %.2f, maximum tasks: %d",
-            tasks_per_cell, (int) (ncells * tasks_per_cell));
+            tasks_per_cell, (int)(ncells * tasks_per_cell));
 
-  return (int) (ncells * tasks_per_cell);
+  return (int)(ncells * tasks_per_cell);
 }
 
 /**
@@ -2516,7 +2544,9 @@ void engine_collect_end_of_step(struct engine *e, int apply) {
       &e->collect_group1, data.updated, data.g_updated, data.s_updated,
       data.inhibited, data.g_inhibited, data.s_inhibited, data.ti_hydro_end_min,
       data.ti_hydro_end_max, data.ti_hydro_beg_max, data.ti_gravity_end_min,
-      data.ti_gravity_end_max, data.ti_gravity_beg_max, e->forcerebuild);
+      data.ti_gravity_end_max, data.ti_gravity_beg_max, e->forcerebuild,
+      e->s->tot_cells, e->sched.nr_tasks,
+      (float)e->sched.nr_tasks / (float)e->s->tot_cells);
 
 /* Aggregate collective data from the different nodes for this step. */
 #ifdef WITH_MPI
@@ -4164,6 +4194,8 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params,
   e->cputime_last_step = 0;
   e->last_repartition = 0;
 #endif
+  e->total_nr_cells = 0;
+  e->total_nr_tasks = 0;
 
 #if defined(WITH_LOGGER)
   e->logger = (struct logger *)malloc(sizeof(struct logger));
@@ -4695,6 +4727,8 @@ void engine_config(int restart, struct engine *e, struct swift_params *params,
    * from the end of the dumped run. Can be changed on restart. */
   e->tasks_per_cell =
       parser_get_opt_param_float(params, "Scheduler:tasks_per_cell", 0.0);
+  e->tasks_per_cell_max = 0.0f;
+
   float maxtasks = 0;
   if (restart)
     maxtasks = e->restart_max_tasks;
diff --git a/src/engine.h b/src/engine.h
index ce583243e5..e73a2eda85 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -107,6 +107,7 @@ enum engine_step_properties {
 #define engine_default_timesteps_file_name "timesteps"
 #define engine_max_parts_per_ghost 1000
 #define engine_max_sparts_per_ghost 1000
+#define engine_tasks_per_cell_margin 1.2
 
 /**
  * @brief The rank of the engine as a global variable (for messages).
@@ -211,7 +212,13 @@ struct engine {
   /* Total numbers of particles in the system. */
   long long total_nr_parts, total_nr_gparts, total_nr_sparts;
 
-  /* The total number of inhibted particles in the system. */
+  /* Total numbers of cells (top-level and sub-cells) in the system. */
+  long long total_nr_cells;
+
+  /* Total numbers of tasks in the system. */
+  long long total_nr_tasks;
+
+  /* The total number of inhibited particles in the system. */
   long long nr_inhibited_parts, nr_inhibited_gparts, nr_inhibited_sparts;
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -326,8 +333,9 @@ struct engine {
   size_t nr_links, size_links;
 
   /* Average number of tasks per cell. Used to estimate the sizes
-   * of the various task arrays. */
+   * of the various task arrays. Also the maximum from all ranks. */
   float tasks_per_cell;
+  float tasks_per_cell_max;
 
   /* Average number of links per tasks. This number is used before
      the creation of communication tasks so needs to be large enough. */
-- 
GitLab