diff --git a/examples/main.c b/examples/main.c
index 0e3cdc9b33b99aea2c8178c8469aa215c76c8fb6..6882b4753049bcab08effb981b8076b9bf1156e5 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -283,11 +283,11 @@ int main(int argc, char *argv[]) {
 
 #ifndef SWIFT_DEBUG_TASKS
   if (dump_tasks) {
-      if (myrank == 0) {
-          message("WARNING: complete task dumps are only created when "
-                  "configured with --enable-task-debugging.");
-          message("         Basic task statistics will be output.");
-      }
+    if (myrank == 0) {
+      message("WARNING: complete task dumps are only created when "
+              "configured with --enable-task-debugging.");
+      message("         Basic task statistics will be output.");
+    }
   }
 #endif
 
@@ -1037,7 +1037,9 @@ int main(int argc, char *argv[]) {
 #endif
 
       /* Generate the task statistics. */
-      task_dump_stats(&e, j + 1);
+      char dumpfile[40];
+      snprintf(dumpfile, 40, "thread_stats-step%d.dat", j + 1);
+      task_dump_stats(dumpfile, &e, /* header = */ 0, /* allranks = */ 1);
     }
 
 #ifdef SWIFT_DEBUG_THREADPOOL
diff --git a/src/Makefile.am b/src/Makefile.am
index 9b0610667bdcf1f760f6e94d4481848a8fc4d0f0..aa8972f9ee5d2f366b268d9107b4cb0c4f6c49bf 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -41,8 +41,8 @@ endif
 # List required headers
 include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \
-    common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \
-    physical_constants.h physical_constants_cgs.h potential.h version.h \
+    common_io.h single_io.h multipole.h map.h tools.h partition.h partition_fixed_costs.h \
+    clocks.h parser.h physical_constants.h physical_constants_cgs.h potential.h version.h \
     hydro_properties.h riemann.h threadpool.h cooling_io.h cooling.h cooling_struct.h \
     sourceterms.h sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \
     dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \
diff --git a/src/engine.c b/src/engine.c
index ac9456f1c8a2ba909877d9f520a887e2883be08e..92e892a7c8c0fa0963198aa2b5eb6e12aab76e40 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -991,6 +991,11 @@ void engine_repartition(struct engine *e) {
    * bug that doesn't handle this case well. */
   if (e->nr_nodes == 1) return;
 
+  /* Generate the fixed costs include file. */
+  if (e->step > 3 && e->reparttype->trigger <= 1.f) {
+    task_dump_stats("partition_fixed_costs.h", e, /* header = */ 1, /* allranks = */ 1);
+  }
+
   /* Do the repartitioning. */
   partition_repartition(e->reparttype, e->nodeID, e->nr_nodes, e->s,
                         e->sched.tasks, e->sched.nr_tasks);
diff --git a/src/task.c b/src/task.c
index 00c796423346dc1a7d8cba64bd943a33ef57908f..0f5b0e8d54540b776bf3f4377fb061c48e9316ab 100644
--- a/src/task.c
+++ b/src/task.c
@@ -687,26 +687,26 @@ void task_dump_all(struct engine *e, int step) {
 
 /**
  * @brief Generate simple statistics about the times used by the tasks of
- *        all the engines and write these into two files, a human readable
- *        version and one intented for inclusion as the fixed costs for
- *        repartitioning.
+ *        all the engines and write these into two format, a human readable
+ *        version for debugging and one intented for inclusion as the fixed
+ *        costs for repartitioning.
  *
- * Dumps the human readable information to a file "thread_stats-stepn.dat"
- * where n is the given step value. When running under MPI all the tasks are
- * summed into this single file.
+ * Note that when running under MPI all the tasks can be summed into this single
+ * file. In the fuller, human readable file, the statistics included are the
+ * number of task of each type/subtype followed by the minimum, maximum, mean
+ * and total time, in millisec and then the fixed costs value.
  *
- * The fixed costs file will be called "thread_stats-stepn.h".
+ * If header is set, only the fixed costs value is written into the output
+ * file in a format that is suitable for inclusion in SWIFT (as
+ * partition_fixed_costs.h).
  *
+ * @param dumpfile name of the file for the output.
  * @param e the #engine
- * @param step the current step.
+ * @param header whether to write a header include file.
+ * @param allranks do the statistics over all ranks, if not just the current
+ *                 one, only used if header is false.
  */
-void task_dump_stats(struct engine *e, int step) {
-
-  char dumpfile[40];
-  snprintf(dumpfile, 40, "thread_stats-step%d.dat", step);
-
-  char costsfile[40];
-  snprintf(costsfile, 40, "thread_stats-step%d.h", step);
+void task_dump_stats(const char *dumpfile, struct engine *e, int header, int allranks) {
 
   /* Need arrays for sum, min and max across all types and subtypes. */
   double sum[task_type_count][task_subtype_count];
@@ -746,39 +746,43 @@ void task_dump_stats(struct engine *e, int step) {
     }
   }
 
-#ifdef WITH_MPI
-  /* Get these from all ranks for output from rank 0. Could wrap these into a
-   * single operation. */
-  size_t size = task_type_count * task_subtype_count;
-  int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size,
-                       MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
-  if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums");
-
-  res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size,
-                   MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
-  if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts");
-
-  res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size,
-                   MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
-  if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");
 
-  res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : max), max, size,
-                   MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
-  if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima");
-
-  res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1,
-                   MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
-  if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time");
+#ifdef WITH_MPI
+  if (allranks || header) {
+    /* Get these from all ranks for output from rank 0. Could wrap these into a
+     * single operation. */
+    size_t size = task_type_count * task_subtype_count;
+    int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size,
+                         MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums");
+
+    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size,
+                     MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts");
+
+    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size,
+                     MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
+    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");
+
+    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : max), max, size,
+                     MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
+    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima");
+
+    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1,
+                     MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time");
+  }
 
-  if (engine_rank == 0) {
+  if (!allranks || (engine_rank == 0 && (allranks || header))) {
 #endif
 
     FILE *dfile = fopen(dumpfile, "w");
-    fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n");
-
-    FILE *cfile = fopen(costsfile, "w");
-    fprintf(cfile, "/* use as src/partition_fixed_costs.h */\n");
-    fprintf(cfile, "#define HAVE_FIXED_COSTS 1\n");
+    if (header) {
+      fprintf(dfile, "/* use as src/partition_fixed_costs.h */\n");
+      fprintf(dfile, "#define HAVE_FIXED_COSTS 1\n");
+    } else {
+      fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n");
+    }
 
     for (int j = 0; j < task_type_count; j++) {
       const char *taskID = taskID_names[j];
@@ -790,19 +794,21 @@ void task_dump_stats(struct engine *e, int step) {
           /* Fixed cost is in .1ns as we want to compare between runs in
            * some absolute units. */
           int fixed_cost = (int)(clocks_from_ticks(mean) * 10000.f);
-          fprintf(dfile,
-                  "%15s/%-10s %10d %14.4f %14.4f %14.4f %14.4f %14.4f %10d\n",
-                  taskID, subtaskID_names[k], count[j][k],
-                  clocks_from_ticks(min[j][k]), clocks_from_ticks(max[j][k]),
-                  clocks_from_ticks(sum[j][k]), clocks_from_ticks(mean),
-                  perc, fixed_cost);
-          fprintf(cfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j,
-                  k, fixed_cost, taskID, subtaskID_names[k]);
+          if (header) {
+            fprintf(dfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j,
+                    k, fixed_cost, taskID, subtaskID_names[k]);
+          } else {
+            fprintf(dfile,
+                    "%15s/%-10s %10d %14.4f %14.4f %14.4f %14.4f %14.4f %10d\n",
+                    taskID, subtaskID_names[k], count[j][k],
+                    clocks_from_ticks(min[j][k]), clocks_from_ticks(max[j][k]),
+                    clocks_from_ticks(sum[j][k]), clocks_from_ticks(mean),
+                    perc, fixed_cost);
+          }
         }
       }
     }
     fclose(dfile);
-    fclose(cfile);
 #ifdef WITH_MPI
   }
 #endif
diff --git a/src/task.h b/src/task.h
index 2122f757220f2fea6691c45370b513a913aeeac8..61af35c69449972ee235d578eec320b74a1bff3e 100644
--- a/src/task.h
+++ b/src/task.h
@@ -199,7 +199,7 @@ int task_lock(struct task *t);
 void task_do_rewait(struct task *t);
 void task_print(const struct task *t);
 void task_dump_all(struct engine *e, int step);
-void task_dump_stats(struct engine *e, int step);
+void task_dump_stats(const char *dumpfile, struct engine *e, int header, int allranks);
 #ifdef WITH_MPI
 void task_create_mpi_comms(void);
 #endif