Merge remote-tracking branch 'origin/repart-by-ticks' into repart-by-means

Conflicts: src/partition.c src/scheduler.c

Merge remote-tracking branch 'origin/repart-by-ticks' into repart-by-means
84830f45 · Peter W. Draper · e76ec80e · 5fe5e550 · 84830f45 · 84830f45
Commit 84830f45 authored 6 years ago by Peter W. Draper
--- a/configure.ac
+++ b/configure.ac
@@ -199,7 +199,7 @@ fi
 # Check if task debugging is on.
 AC_ARG_ENABLE([task-debugging],
   [AS_HELP_STRING([--enable-task-debugging],
-     [Store task timing information and generate task dump files @<:@yes/no@:>@]
+     [Store extra information for generating task dump files @<:@yes/no@:>@]
   )],
   [enable_task_debugging="$enableval"],
   [enable_task_debugging="no"]
@@ -335,7 +335,7 @@ AC_ARG_ENABLE([vec],
   [enable_vec="yes"]
 )

-#  Disable hand written vectorisation. Slightly odd implementation as want 
+#  Disable hand written vectorisation. Slightly odd implementation as want
 # to describe as --disable-hand-vec, but macro is enable (there is no enable action).
 AC_ARG_ENABLE([hand-vec],
   [AS_HELP_STRING([--disable-hand-vec],

--- a/src/engine.c
+++ b/src/engine.c
@@ -1046,65 +1046,56 @@ void engine_repartition_trigger(struct engine *e) {

  const ticks tic = getticks();

-  /* Do nothing if there have not been enough steps since the last
-   * repartition, don't want to repeat this too often or immediately after
-   * a repartition step. Also nothing to do when requested. */
+  /* Do nothing if there have not been enough steps since the last repartition
+   * as we don't want to repeat this too often or immediately after a
+   * repartition step. Also nothing to do when requested. */
  if (e->step - e->last_repartition >= 2 &&
      e->reparttype->type != REPART_NONE) {

-    /* Old style if trigger is >1 or this is the second step (want an early
-     * repartition following the initial repartition). */
-    if (e->reparttype->trigger > 1 || e->step == 2) {
-      if (e->reparttype->trigger > 1) {
-        if ((e->step % (int)e->reparttype->trigger) == 0) e->forcerepart = 1;
-      } else {
-        e->forcerepart = 1;
-      }
-
-    } else {
-
-      /* Use cputimes from ranks to estimate the imbalance. */
-      /* First check if we are going to skip this stage anyway, if so do that
-       * now. If is only worth checking the CPU loads when we have processed a
-       * significant number of all particles. */
-      if ((e->updates > 1 &&
-           e->updates >= e->total_nr_parts * e->reparttype->minfrac) ||
-          (e->g_updates > 1 &&
-           e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) {
-
-        /* Get CPU time used since the last call to this function. */
-        double elapsed_cputime =
-            clocks_get_cputime_used() - e->cputime_last_step;
-
-        /* Gather the elapsed CPU times from all ranks for the last step. */
-        double elapsed_cputimes[e->nr_nodes];
-        MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1,
-                   MPI_DOUBLE, 0, MPI_COMM_WORLD);
-        if (e->nodeID == 0) {
-
-          /* Get the range and mean of cputimes. */
-          double mintime = elapsed_cputimes[0];
-          double maxtime = elapsed_cputimes[0];
-          double sum = elapsed_cputimes[0];
-          for (int k = 1; k < e->nr_nodes; k++) {
-            if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k];
-            if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k];
-            sum += elapsed_cputimes[k];
-          }
-          double mean = sum / (double)e->nr_nodes;
-
-          /* Are we out of balance? */
-          if (((maxtime - mintime) / mean) > e->reparttype->trigger) {
-            if (e->verbose)
-              message("trigger fraction %.3f exceeds %.3f will repartition",
-                      (maxtime - mintime) / mintime, e->reparttype->trigger);
-            e->forcerepart = 1;
-          }
+    /* It is only worth checking the CPU loads when we have processed a
+     * significant number of all particles as we require all tasks to have
+     * times. */
+    if ((e->updates > 1 &&
+         e->updates >= e->total_nr_parts * e->reparttype->minfrac) ||
+        (e->g_updates > 1 &&
+         e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) {
+
+      /* Get CPU time used since the last call to this function. */
+      double elapsed_cputime = clocks_get_cputime_used() - e->cputime_last_step;
+
+      /* Gather the elapsed CPU times from all ranks for the last step. */
+      double elapsed_cputimes[e->nr_nodes];
+      MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1,
+                 MPI_DOUBLE, 0, MPI_COMM_WORLD);
+      if (e->nodeID == 0) {
+
+        /* Get the range and mean of cputimes. */
+        double mintime = elapsed_cputimes[0];
+        double maxtime = elapsed_cputimes[0];
+        double sum = elapsed_cputimes[0];
+        for (int k = 1; k < e->nr_nodes; k++) {
+          if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k];
+          if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k];
+          sum += elapsed_cputimes[k];
+        }
+        double mean = sum / (double)e->nr_nodes;
+
+        /* Are we out of balance? */
+        double abs_trigger = fabs(e->reparttype->trigger);
+        if (((maxtime - mintime) / mean) > abs_trigger) {
+          // if (e->verbose)
+          message("trigger fraction %.3f > %.3f will repartition",
+                  (maxtime - mintime) / mean, abs_trigger);
+          e->forcerepart = 1;
+        } else {
+          // if (e->verbose) {
+          message("trigger fraction %.3f =< %.3f will not repartition",
+                  (maxtime - mintime) / mean, abs_trigger);
        }
-
-        /* All nodes do this together. */
-        MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD);
      }
+
+      /* All nodes do this together. */
+      MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD);
    }

    /* Remember we did this. */
@@ -2897,9 +2888,7 @@ void engine_step(struct engine *e) {
  struct clocks_time time1, time2;
  clocks_gettime(&time1);

-#ifdef SWIFT_DEBUG_TASKS
  e->tic_step = getticks();
-#endif

  if (e->nodeID == 0) {

@@ -3056,10 +3045,8 @@ void engine_step(struct engine *e) {
  clocks_gettime(&time2);
  e->wallclock_time = (float)clocks_diff(&time1, &time2);

-#ifdef SWIFT_DEBUG_TASKS
  /* Time in ticks at the end of this step. */
  e->toc_step = getticks();
-#endif
 }

 /**

--- a/src/engine.h
+++ b/src/engine.h
@@ -289,10 +289,8 @@ struct engine {
  struct proxy *proxies;
  int nr_proxies, *proxy_ind;

-#ifdef SWIFT_DEBUG_TASKS
  /* Tic/toc at the start/end of a step. */
  ticks tic_step, toc_step;
-#endif

 #ifdef WITH_MPI
  /* CPU time of the last step. */

--- a/src/partition.c
+++ b/src/partition.c
@@ -1208,7 +1208,11 @@ void partition_gather_weights(void *map_data, int num_elements,
        t->type == task_type_logger || t->implicit || t->ci == NULL) continue;

    /* Get the task weight based on fixed cost for this task type. */
-    double w = repartition_costs[t->type][t->subtype];
+    //double w = repartition_costs[t->type][t->subtype];
+
+    /* Get the task weight based on costs. */
+    double w = (double)t->toc - (double)t->tic;
+    if (w <= 0.0) continue;

    /* Get the top-level cells involved. */
    struct cell *ci, *cj;
@@ -1824,13 +1828,13 @@ void partition_init(struct partition *partition,
        " than 1");

  /* Fraction of particles that should be updated before a repartition
-   * based on CPU time is considered. */
+   * based on CPU time is considered, needs to be high. */
  repartition->minfrac =
-      parser_get_opt_param_float(params, "DomainDecomposition:minfrac", 0.9f);
-  if (repartition->minfrac <= 0 || repartition->minfrac > 1)
+      parser_get_opt_param_float(params, "DomainDecomposition:minfrac", 0.95f);
+  if (repartition->minfrac <= 0.5 || repartition->minfrac > 1)
    error(
-        "Invalid DomainDecomposition:minfrac, must be greater than 0 and less "
-        "than equal to 1");
+        "Invalid DomainDecomposition:minfrac, must be greater than 0.5 "
+        "and less than equal to 1");

  /* Use METIS or ParMETIS when ParMETIS is also available. */
  repartition->usemetis =

--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -1595,9 +1595,9 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
  t->nr_unlock_tasks = 0;
 #ifdef SWIFT_DEBUG_TASKS
  t->rid = -1;
+#endif
  t->tic = 0;
  t->toc = 0;
-#endif

  /* Add an index for it. */
  // lock_lock( &s->lock );
@@ -1816,6 +1816,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
  /* Run through the tasks backwards and set their weights. */
  for (int k = nr_tasks - 1; k >= 0; k--) {
    struct task *t = &tasks[tid[k]];
+    float cost;
    t->weight = 0.f;

    for (int j = 0; j < t->nr_unlock_tasks; j++)
@@ -2031,14 +2032,14 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements,
 */
 void scheduler_start(struct scheduler *s) {

-/* Reset all task debugging timers */
-#ifdef SWIFT_DEBUG_TASKS
+/* Reset all task timers. */
  for (int i = 0; i < s->nr_tasks; ++i) {
    s->tasks[i].tic = 0;
    s->tasks[i].toc = 0;
+#ifdef SWIFT_DEBUG_TASKS
    s->tasks[i].rid = -1;
-  }
 #endif
+  }

  /* Re-wait the tasks. */
  if (s->active_count > 1000) {
@@ -2290,9 +2291,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) {

  /* Task definitely done, signal any sleeping runners. */
  if (!t->implicit) {
-#ifdef SWIFT_DEBUG_TASKS
    t->toc = getticks();
-#endif
    pthread_mutex_lock(&s->sleep_mutex);
    atomic_dec(&s->waiting);
    pthread_cond_broadcast(&s->sleep_cond);
@@ -2333,9 +2332,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) {

  /* Task definitely done. */
  if (!t->implicit) {
-#ifdef SWIFT_DEBUG_TASKS
    t->toc = getticks();
-#endif
    pthread_mutex_lock(&s->sleep_mutex);
    atomic_dec(&s->waiting);
    pthread_cond_broadcast(&s->sleep_cond);
@@ -2419,13 +2416,13 @@ struct task *scheduler_gettask(struct scheduler *s, int qid,
    }
  }

-#ifdef SWIFT_DEBUG_TASKS
  /* Start the timer on this task, if we got one. */
  if (res != NULL) {
    res->tic = getticks();
+#ifdef SWIFT_DEBUG_TASKS
    res->rid = qid;
-  }
 #endif
+  }

  /* No milk today. */
  return res;

--- a/src/task.h
+++ b/src/task.h
@@ -180,10 +180,10 @@ struct task {

  /*! Information about the direction of the pair task */
  short int sid;
+#endif

  /*! Start and end time of this task */
  ticks tic, toc;
-#endif

 #ifdef SWIFT_DEBUG_CHECKS
  /* When was this task last run? */