diff --git a/examples/main.c b/examples/main.c
index 8b367cba14c96414d6eb5eb758a2e46f01a24774..034b800887928c049a610c27ef7c916573c71be6 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -444,8 +444,8 @@ int main(int argc, char *argv[]) {
   long long N_total[3] = {0, 0, 0};
 #if defined(WITH_MPI)
   long long N_long[3] = {Ngas, Ngpart, Nspart};
-  MPI_Reduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM, 0,
-             MPI_COMM_WORLD);
+  MPI_Allreduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM,
+                MPI_COMM_WORLD);
 #else
   N_total[0] = Ngas;
   N_total[1] = Ngpart;
diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml
index bc893c6d27b67a2687e7766977c61f22df83d290..35550de3d752d91daf60bea92c16e9cd4a55fca5 100644
--- a/examples/parameter_example.yml
+++ b/examples/parameter_example.yml
@@ -76,7 +76,6 @@ DomainDecomposition:
                              # new decomposition, or number of steps (>1) between decompositions
   minfrac:           0.9     # (Optional) Fractional of all particles that should be updated in previous step when
                              # using CPU time trigger
-  cputime_file_name: cputime # (Optional) File name for per node CPU time estimates for each step, requires --enable-task-debugging
 
 # Parameters related to external potentials --------------------------------------------
 
diff --git a/src/engine.c b/src/engine.c
index 7f987db400b49a893a8e8e794356f02d25127251..e4b4085e4a1ffcf1ba491bc789f3587fced95215 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -882,6 +882,81 @@ void engine_repartition(struct engine *e) {
 #endif
 }
 
+/**
+ * @brief Decide whether trigger a repartition the cells amongst the nodes.
+ *
+ * @param e The #engine.
+ */
+void engine_repartition_trigger(struct engine *e) {
+
+#ifdef WITH_MPI
+
+  /* Do nothing if there have not been enough steps since the last
+   * repartition, don't want to repeat this too often or immediately after
+   * a repartition step. */
+  if (e->step - e->last_repartition > 2) {
+
+    /* Old style if trigger is >1 or this is the second step (want an early
+     * repartition following the initial repartition). */
+    if (e->reparttype->trigger > 1 || e->step == 2) {
+      if (e->reparttype->trigger > 1) {
+        if (e->step % (int)e->reparttype->trigger == 2) e->forcerepart = 1;
+      } else {
+        e->forcerepart = 1;
+      }
+
+    } else {
+
+      /* Use cputimes from ranks to estimate the imbalance. */
+      /* First check if we are going to skip this stage anyway, if so do that
+       * now. If is only worth checking the CPU loads when we have processed a
+       * significant number of all particles. */
+      if ((e->updates > 1 &&
+           e->updates >= e->total_nr_parts * e->reparttype->minfrac) ||
+          (e->g_updates > 1 &&
+           e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) {
+
+        /* Get CPU time used since the last call to this function. */
+        double elapsed_cputime = clocks_get_cputime_used() - e->cputime_last_step;
+
+        /* Gather the elapsed CPU times from all ranks for the last step. */
+        double elapsed_cputimes[e->nr_nodes];
+        MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, MPI_DOUBLE,
+                   0, MPI_COMM_WORLD);
+        if (e->nodeID == 0) {
+
+          /* Get the range of cputimes. */
+          double mintime = elapsed_cputimes[0];
+          double maxtime = elapsed_cputimes[0];
+          for (int k = 1; k < e->nr_nodes; k++) {
+            if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k];
+            if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k];
+          }
+
+          /* Are we out of balance? */
+          if (((maxtime - mintime) / mintime) > e->reparttype->trigger) {
+            if (e->verbose)
+              message("trigger fraction %.3f exceeds %.3f will repartition",
+                      (maxtime - mintime) / mintime, e->reparttype->trigger);
+            e->forcerepart = 1;
+          }
+        }
+
+        /* All nodes do this together. */
+        MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD);
+      }
+    }
+
+    /* Remember we did this. */
+    if (e->forcerepart)
+      e->last_repartition = e->step;
+  }
+
+  /* We always reset CPU time for next check. */
+  e->cputime_last_step = clocks_get_cputime_used();
+#endif
+}
+
 /**
  * @brief Add up/down gravity tasks to a cell hierarchy.
  *
@@ -3070,102 +3145,11 @@ void engine_step(struct engine *e) {
   e->timeStep = (e->ti_current - e->ti_old) * e->timeBase;
 
   /* Prepare the tasks to be launched, rebuild or repartition if needed. */
-#ifdef WITH_MPI
-  int justrepart = (e->forcerepart != REPART_NONE);
-#endif
   engine_prepare(e);
 
-/* Repartition the space amongst the nodes? */
 #ifdef WITH_MPI
-
-  /* Old style if trigger is >1 or this is the second step, but we never
-   * repartition immediately after a repartition, those timings will not be
-   * representative. */
-  if (e->reparttype->trigger > 1 || e->step == 2 || justrepart) {
-    if (! justrepart) {
-      if (e->reparttype->trigger > 1) {
-        if (e->step % (int)e->reparttype->trigger == 2) e->forcerepart = 1;
-      } else {
-        e->forcerepart = 1;
-      }
-    }
-
-#ifdef SWIFT_DEBUG_TASKS
-    /* Capture CPU times for comparisons with other methods. */
-    double elapsed_cputime = e->cputoc_step - e->cputic_step;
-    e->cputic_step = clocks_get_cputime_used();
-    double elapsed_cputimes[e->nr_nodes];
-    MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, MPI_DOUBLE,
-               0, MPI_COMM_WORLD);
-    if (e->nodeID == 0) {
-      double mintime = elapsed_cputimes[0];
-      double maxtime = elapsed_cputimes[0];
-      for (int k = 1; k < e->nr_nodes; k++) {
-        if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k];
-        if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k];
-      }
-      fprintf(e->file_cputimes, "%6d ", e->step);
-      for (int k = 0; k < e->nr_nodes; k++) {
-        fprintf(e->file_cputimes, " %14.7g", elapsed_cputimes[k]);
-      }
-      fprintf(e->file_cputimes, "\n");
-      fflush(e->file_cputimes);
-    }
-#endif
-
-  } else {
-
-    /* Use cputimes from ranks to estimate the imbalance. */
-    double elapsed_cputime = e->cputoc_step - e->cputic_step;
-    e->cputic_step = clocks_get_cputime_used();
-
-    /* Gather the elapsed CPU times from all ranks for the last step. */
-    double elapsed_cputimes[e->nr_nodes];
-    MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, MPI_DOUBLE,
-               0, MPI_COMM_WORLD);
-
-    /* If all available particles of any type have been updated then consider
-     * if a repartition might be needed. Only worth checking when there is
-     * load on all ranks, so require that some fraction of all particles have
-     * been processed. */
-    if (e->nodeID == 0) {
-
-      /* Get the range of cputimes. */
-      double mintime = elapsed_cputimes[0];
-      double maxtime = elapsed_cputimes[0];
-      for (int k = 1; k < e->nr_nodes; k++) {
-        if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k];
-        if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k];
-      }
-
-      if ((e->updates > 1 &&
-           e->updates >= e->total_nr_parts * e->reparttype->minfrac) ||
-          (e->g_updates > 1 &&
-           e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) {
-
-        /* Are we out of balance? */
-        if (((maxtime - mintime) / mintime) > e->reparttype->trigger) {
-          if (e->verbose)
-            message("fractionaltime %.2f > %.2f will repartition",
-                    (maxtime - mintime) / mintime, e->reparttype->trigger);
-          e->forcerepart = 1;
-        }
-      }
-
-#ifdef SWIFT_DEBUG_TASKS
-      /* Save the cputimes for analysis. */
-      fprintf(e->file_cputimes, "%6d ", e->step);
-      for (int k = 0; k < e->nr_nodes; k++) {
-        fprintf(e->file_cputimes, " %14.7g", elapsed_cputimes[k]);
-      }
-      fprintf(e->file_cputimes, "\n");
-      fflush(e->file_cputimes);
-#endif
-    }
-  }
-
-  /* All nodes do this together. */
-  MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  /* Repartition the space amongst the nodes? */
+  engine_repartition_trigger(e);
 #endif
 
   /* Are we drifting everything (a la Gadget/GIZMO) ? */
@@ -3244,11 +3228,6 @@ void engine_step(struct engine *e) {
   /* Time in ticks at the end of this step. */
   e->toc_step = getticks();
 #endif
-
-#ifdef WITH_MPI
-  /* CPU time used at the end of this step. */
-  e->cputoc_step = clocks_get_cputime_used();
-#endif
 }
 
 /**
@@ -3704,9 +3683,6 @@ void engine_init(struct engine *e, struct space *s,
   e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max");
   e->file_stats = NULL;
   e->file_timesteps = NULL;
-#if WITH_MPI
-  e->file_cputimes = NULL;
-#endif
   e->deltaTimeStatistics =
       parser_get_param_double(params, "Statistics:delta_time");
   e->timeLastStatistics = e->timeBegin - e->deltaTimeStatistics;
@@ -3720,6 +3696,10 @@ void engine_init(struct engine *e, struct space *s,
   e->cooling_func = cooling_func;
   e->sourceterms = sourceterms;
   e->parameter_file = params;
+#ifdef WITH_MPI
+  e->cputime_last_step = 0;
+  e->last_repartition = -1;
+#endif
   engine_rank = nodeID;
 
   /* Make the space link back to the engine. */
@@ -3903,15 +3883,6 @@ void engine_init(struct engine *e, struct space *s,
             "Step", "Time", "Time-step", "Updates", "g-Updates", "s-Updates",
             "Wall-clock time", clocks_getunit());
     fflush(e->file_timesteps);
-
-#if defined(SWIFT_DEBUG_TASKS) && defined(WITH_MPI)
-    char cputimefileName[200] = "";
-    parser_get_opt_param_string(params, "DomainDecomposition:cputime_file_name",
-                                cputimefileName,
-                                engine_default_cputime_file_name);
-    sprintf(cputimefileName + strlen(cputimefileName), ".txt");
-    e->file_cputimes = fopen(cputimefileName, "w");
-#endif
   }
 
   /* Print policy */
diff --git a/src/engine.h b/src/engine.h
index dba72cc97cbba93c742fea8391a5811104e4b0b6..a0e32ad15b79c364d13d19589f8462ff8705ee29 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -80,7 +80,6 @@ extern const char *engine_policy_names[];
 #define engine_redistribute_alloc_margin 1.2
 #define engine_default_energy_file_name "energy"
 #define engine_default_timesteps_file_name "timesteps"
-#define engine_default_cputime_file_name "cputime"
 
 /* The rank of the engine as a global variable (for messages). */
 extern int engine_rank;
@@ -191,11 +190,11 @@ struct engine {
 #endif
 
 #ifdef WITH_MPI
-  /* CPU times at the start/end of a step. */
-  double cputic_step, cputoc_step;
+  /* CPU time of the last step. */
+  double cputime_last_step;
 
-  /* Record of these. */
-  FILE *file_cputimes;
+  /* Step of last repartition. */
+  int last_repartition;
 #endif
 
   /* Wallclock time of the last time-step */
@@ -276,6 +275,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
                             size_t *Nspart);
 void engine_rebuild(struct engine *e);
 void engine_repartition(struct engine *e);
+void engine_repartition_trigger(struct engine *e);
 void engine_makeproxies(struct engine *e);
 void engine_redistribute(struct engine *e);
 void engine_print_policy(struct engine *e);