diff --git a/examples/main.c b/examples/main.c
index fa0a7764a3af91581c367f8618d3932bdeb3b8f5..69b0fcd55c4508af1825954b7a3088c258f9353a 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -1089,6 +1089,9 @@ int main(int argc, char *argv[]) {
     /* Dump the task data using the given frequency. */
     if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) {
       task_dump_all(&e, j + 1);
+
+      /* Generate the task statistics. */
+      task_dump_stats(&e, j + 1);
     }
 #endif
 
diff --git a/src/partition.c b/src/partition.c
index bbd7454dd63be6ab5192558fb4a2e3399ea03cfc..80855be2dbf72b5fde426e8861a7a19a6b5235e4 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -77,6 +77,15 @@ const char *repartition_name[] = {
 /* Local functions, if needed. */
 static int check_complete(struct space *s, int verbose, int nregions);
 
+#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
+/*
+ * Repartition fixed costs per type/subtype. These are determined from the
+ * statistics output produced when running with task debugging enabled.
+ */
+static double repartition_costs[task_type_count][task_subtype_count];
+static void repart_init_fixed_costs(int policy);
+#endif
+
 /*  Vectorisation support */
 /*  ===================== */
 
@@ -1195,10 +1204,11 @@ void partition_gather_weights(void *map_data, int num_elements,
     struct task *t = &tasks[i];
 
     /* Skip un-interesting tasks. */
-    if (t->cost == 0.f) continue;
+    if (t->type == task_type_send || t->type == task_type_recv ||
+        t->type == task_type_logger || t->implicit) continue;
 
-    /* Get the task weight based on costs. */
-    double w = (double)t->cost;
+    /* Get the task weight based on fixed cost for this task type. */
+    double w = repartition_costs[t->type][t->subtype];
 
     /* Get the top-level cells involved. */
     struct cell *ci, *cj;
@@ -1527,6 +1537,7 @@ void partition_repartition(struct repartition *reparttype, int nodeID,
 #if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
 
   ticks tic = getticks();
+  repart_init_fixed_costs(s->e->policy);
 
   if (reparttype->type == REPART_METIS_VERTEX_EDGE_COSTS) {
     repart_edge_metis(1, 1, 0, reparttype, nodeID, nr_nodes, s, tasks,
@@ -1700,7 +1711,7 @@ void partition_initial_partition(struct partition *initial_partition,
 
 /**
  * @brief Initialises the partition and re-partition scheme from the parameter
- *        file
+ *        file.
  *
  * @param partition The #partition scheme to initialise.
  * @param repartition The #repartition scheme to initialise.
@@ -1843,6 +1854,95 @@ void partition_init(struct partition *partition,
 #endif
 }
 
+#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
+/**
+ * @brief Set the fixed costs for repartition using METIS.
+ *
+ *  These are determined using a run with task debugging enabled which gives a
+ *  statistical analysis condensed into a .h file. Note that some tasks have
+ *  different costs depending on the engine policies, for instance the kicks
+ *  do work with self gravity and hydro, we attempt to allow for that. Finally
+ *  note this is a statistical solution, so requires that there are sufficient
+ *  tasks on each rank so that the fixed costs do the right thing on average,
+ *  you may like to used task ticks as weights if this isn't working.
+ *
+ *  @param policy the #engine policy.
+ */
+static void repart_init_fixed_costs(int policy) {
+
+  /* Set the default fixed cost. */
+  for (int j = 0; j < task_type_count; j++) {
+    for (int k = 0; k < task_subtype_count; k++) {
+      repartition_costs[j][k] = 1.0;
+    }
+  }
+
+  /* TODO: these may be separable, so we could have costs for each policy
+   * addition, if separated need to take care with the relative scaling. */
+  if (policy & (engine_policy_hydro & engine_policy_self_gravity)) {
+
+    /* EAGLE_50 -s -G -S 8 nodes 16 cores */
+    repartition_costs[1][0] = 45842;  /* sort/none */
+    repartition_costs[2][1] = 15609;  /* self/density */
+    repartition_costs[2][3] = 18830;  /* self/force */
+    repartition_costs[2][4] = 202588; /* self/grav */
+    repartition_costs[3][1] = 67;     /* pair/density */
+    repartition_costs[3][3] = 207;    /* pair/force */
+    repartition_costs[3][4] = 1507;   /* pair/grav */
+    repartition_costs[4][1] = 36268;  /* sub_self/density */
+    repartition_costs[4][3] = 52252;  /* sub_self/force */
+    repartition_costs[5][1] = 709;    /* sub_pair/density */
+    repartition_costs[5][3] = 1258;   /* sub_pair/force */
+    repartition_costs[6][0] = 1135;   /* init_grav/none */
+    repartition_costs[9][0] = 160;    /* ghost/none */
+    repartition_costs[12][0] = 7176;  /* drift_part/none */
+    repartition_costs[13][0] = 4502;  /* drift_gpart/none */
+    repartition_costs[15][0] = 8234;  /* end_force/none */
+    repartition_costs[16][0] = 15508; /* kick1/none */
+    repartition_costs[17][0] = 15780; /* kick2/none */
+    repartition_costs[18][0] = 19848; /* timestep/none */
+    repartition_costs[21][0] = 4105;  /* grav_long_range/none */
+    repartition_costs[22][0] = 68;    /* grav_mm/none */
+    repartition_costs[24][0] = 16785; /* grav_down/none */
+    repartition_costs[25][0] = 70632; /* grav_mesh/none */
+
+  } else if (policy & engine_policy_self_gravity) {
+
+    /* EAGLE_50 -G 8 nodes 16 cores, scaled to match self/grav. */
+    repartition_costs[2][4] = 202588; /* self/grav */
+    repartition_costs[3][4] = 1760;   /* pair/grav */
+    repartition_costs[6][0] = 1610;   /* init_grav/none */
+    repartition_costs[13][0] = 999;   /* drift_gpart/none */
+    repartition_costs[15][0] = 3481;  /* end_force/none */
+    repartition_costs[16][0] = 6336;  /* kick1/none */
+    repartition_costs[17][0] = 6343;  /* kick2/none */
+    repartition_costs[18][0] = 13864; /* timestep/none */
+    repartition_costs[21][0] = 1422;  /* grav_long_range/none */
+    repartition_costs[22][0] = 71;    /* grav_mm/none */
+    repartition_costs[24][0] = 16011; /* grav_down/none */
+    repartition_costs[25][0] = 60414; /* grav_mesh/none */
+  } else if (policy & engine_policy_hydro) {
+
+    /* EAGLE_50 -s 8 nodes 16 cores, not scaled, but similar. */
+    repartition_costs[1][0] =      52733; /* sort/none */
+    repartition_costs[2][1] =      15458; /* self/density */
+    repartition_costs[2][3] =      19212; /* self/force */
+    repartition_costs[3][1] =         74; /* pair/density */
+    repartition_costs[3][3] =        242; /* pair/force */
+    repartition_costs[4][1] =      42895; /* sub_self/density */
+    repartition_costs[4][3] =      64254; /* sub_self/force */
+    repartition_costs[5][1] =        818; /* sub_pair/density */
+    repartition_costs[5][3] =       1443; /* sub_pair/force */
+    repartition_costs[9][0] =        159; /* ghost/none */
+    repartition_costs[12][0] =       6708; /* drift_part/none */
+    repartition_costs[15][0] =       6479; /* end_force/none */
+    repartition_costs[16][0] =       6609; /* kick1/none */
+    repartition_costs[17][0] =       6975; /* kick2/none */
+    repartition_costs[18][0] =       5229; /* timestep/none */
+  }
+}
+#endif
+
 /*  General support */
 /*  =============== */
 
diff --git a/src/scheduler.c b/src/scheduler.c
index 2ae8f6785434af021b52dd2d6586b4e2dc5d68bb..d40971482a273fd3b3d2d65a166d1340e7cd4c9d 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -1817,16 +1817,11 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
   for (int k = nr_tasks - 1; k >= 0; k--) {
     struct task *t = &tasks[tid[k]];
     t->weight = 0.f;
-#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
-    t->cost = 0.f;
-#endif
+
     for (int j = 0; j < t->nr_unlock_tasks; j++)
       if (t->unlock_tasks[j]->weight > t->weight)
         t->weight = t->unlock_tasks[j]->weight;
     float cost = 0.f;
-#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
-    int partcost = 1;
-#endif
 
     const float count_i = (t->ci != NULL) ? t->ci->hydro.count : 0.f;
     const float count_j = (t->cj != NULL) ? t->cj->hydro.count : 0.f;
@@ -1847,9 +1842,9 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
         break;
 
       case task_type_self:
-        if (t->subtype == task_subtype_grav)
+        if (t->subtype == task_subtype_grav) {
           cost = 1.f * (wscale * gcount_i) * gcount_i;
-        else if (t->subtype == task_subtype_external_grav)
+        } else if (t->subtype == task_subtype_external_grav)
           cost = 1.f * wscale * gcount_i;
         else if (t->subtype == task_subtype_stars_density)
           cost = 1.f * wscale * scount_i * count_i;
@@ -1949,18 +1944,12 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
         cost = wscale * count_i + wscale * gcount_i;
         break;
       case task_type_send:
-#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
-        partcost = 0;
-#endif
         if (count_i < 1e5)
           cost = 10.f * (wscale * count_i) * count_i;
         else
           cost = 2e9;
         break;
       case task_type_recv:
-#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
-        partcost = 0;
-#endif
         if (count_i < 1e5)
           cost = 5.f * (wscale * count_i) * count_i;
         else
@@ -1970,10 +1959,6 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
         cost = 0;
         break;
     }
-
-#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
-    if (partcost) t->cost = cost;
-#endif
     t->weight += cost;
   }
 
diff --git a/src/task.c b/src/task.c
index b78ae5c465416c68021c2e49e8b25454615d9050..62e00d976582766e8ff9b1b7ebb408ca11c373a4 100644
--- a/src/task.c
+++ b/src/task.c
@@ -579,7 +579,8 @@ void task_create_mpi_comms(void) {
 #endif
 
 /**
- * @brief dump all the tasks of all the known engines into a file for postprocessing.
+ * @brief dump all the tasks of all the known engines into a file for
+ * postprocessing.
  *
  * Dumps the information to a file "thread_info-stepn.dat" where n is the
  * given step value, or "thread_info_MPI-stepn.dat", if we are running
@@ -681,5 +682,129 @@ void task_dump_all(struct engine *e, int step) {
   }
   fclose(file_thread);
 #endif  // WITH_MPI
-#endif // SWIFT_DEBUG_TASKS
+#endif  // SWIFT_DEBUG_TASKS
+}
+
+/**
+ * @brief Generate simple statistics about the times used by the tasks of
+ *        all the engines and write these into two files, a human readable
+ *        version and one intented for inclusion as the fixed costs for
+ *        repartitioning.
+ *
+ * Dumps the human readable information to a file "thread_stats-stepn.dat"
+ * where n is the given step value. When running under MPI all the tasks are
+ * summed into this single file.
+ *
+ * The fixed costs file will be called "thread_stats-stepn.h".
+ *
+ * @param e the #engine
+ * @param step the current step.
+ */
+void task_dump_stats(struct engine *e, int step) {
+
+#ifdef SWIFT_DEBUG_TASKS
+  char dumpfile[40];
+  snprintf(dumpfile, 40, "thread_stats-step%d.dat", step);
+
+  char costsfile[40];
+  snprintf(costsfile, 40, "thread_stats-step%d.h", step);
+
+  /* Need arrays for sum, min and max across all types and subtypes. */
+  double sum[task_type_count][task_subtype_count];
+  double min[task_type_count][task_subtype_count];
+  double max[task_type_count][task_subtype_count];
+  int count[task_type_count][task_subtype_count];
+
+  for (int j = 0; j < task_type_count; j++) {
+    for (int k = 0; k < task_subtype_count; k++) {
+      sum[j][k] = 0.0;
+      count[j][k] = 0;
+      min[j][k] = DBL_MAX;
+      max[j][k] = 0.0;
+    }
+  }
+
+  double total[1] = {0.0};
+  double minmin[1] = {DBL_MAX};
+  for (int l = 0; l < e->sched.nr_tasks; l++) {
+    int type = e->sched.tasks[l].type;
+
+    /* Skip implicit tasks, tasks that didn't run and MPI send/recv as these
+     * are not interesting (or meaningfully measured). */
+    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0 &&
+        type != task_type_send && type != task_type_recv) {
+      int subtype = e->sched.tasks[l].subtype;
+
+      double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic;
+      sum[type][subtype] += dt;
+      count[type][subtype] += 1;
+      if (dt < min[type][subtype]) {
+        min[type][subtype] = dt;
+      }
+      if (dt > max[type][subtype]) {
+        max[type][subtype] = dt;
+      }
+      total[0] += dt;
+      if (dt < minmin[0]) minmin[0] = dt;
+    }
+  }
+
+#ifdef WITH_MPI
+  /* Get these from all ranks for output from rank 0. Could wrap these into a
+   * single operation. */
+  size_t size = task_type_count * task_subtype_count;
+  int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size,
+                       MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums");
+
+  res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size,
+                   MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+  if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts");
+
+  res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size,
+                   MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
+  if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");
+
+  res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : max), max, size,
+                   MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
+  if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima");
+
+  res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1,
+                   MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time");
+
+  res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : minmin), minmin, 1,
+                   MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
+  if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task global min");
+
+  if (engine_rank == 0) {
+#endif
+
+    FILE *dfile = fopen(dumpfile, "w");
+    FILE *cfile = fopen(costsfile, "w");
+
+    fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n");
+    for (int j = 0; j < task_type_count; j++) {
+      const char *taskID = taskID_names[j];
+      for (int k = 0; k < task_subtype_count; k++) {
+        if (sum[j][k] > 0.0) {
+          double mean = sum[j][k] / (double)count[j][k];
+          double perc = 100.0 * sum[j][k] / total[0];
+          int fixed_cost = (int)mean / minmin[0];
+          fprintf(dfile,
+                  "%15s/%-10s %10d %14.2f %14.2f %14.2f %14.2f %14.2f %10d\n",
+                  taskID, subtaskID_names[k], count[j][k], min[j][k], max[j][k],
+                  sum[j][k], mean, perc, fixed_cost);
+          fprintf(cfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j,
+                  k, fixed_cost, taskID, subtaskID_names[k]);
+        }
+      }
+    }
+    fclose(dfile);
+    fclose(cfile);
+#ifdef WITH_MPI
+  }
+#endif
+
+#endif  // SWIFT_DEBUG_TASKS
 }
diff --git a/src/task.h b/src/task.h
index eda3c82fd316223459f696a066d6b37603202e93..8203c2484345017fc34dd58a78882fc069fb8e4a 100644
--- a/src/task.h
+++ b/src/task.h
@@ -156,11 +156,6 @@ struct task {
   /*! Weight of the task */
   float weight;
 
-#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
-  /*! Individual cost estimate for this task. */
-  float cost;
-#endif
-
   /*! Number of tasks unlocked by this one */
   short int nr_unlock_tasks;
 
@@ -204,6 +199,7 @@ int task_lock(struct task *t);
 void task_do_rewait(struct task *t);
 void task_print(const struct task *t);
 void task_dump_all(struct engine *e, int step);
+void task_dump_stats(struct engine *e, int step);
 #ifdef WITH_MPI
 void task_create_mpi_comms(void);
 #endif