diff --git a/configure.ac b/configure.ac
index f6e2ea0db8e9829b719a0190eea7a1d891bfbbd6..8cb7fd1b3f09819b7c8b57ce5d70e3b38e32a637 100644
--- a/configure.ac
+++ b/configure.ac
@@ -170,6 +170,18 @@ if test "x$enable_debug" = "xyes"; then
    fi
 fi
 
+# Check if task debugging is on.
+AC_ARG_ENABLE([task-debugging],
+   [AS_HELP_STRING([--enable-task-debugging],
+     [Store task timing information and generate task dump files @<:@yes/no@:>@]
+   )],
+   [enable_task_debugging="$enableval"],
+   [enable_task_debugging="no"]
+)
+if test "$enable_task_debugging" = "yes"; then
+   AC_DEFINE([SWIFT_DEBUG_TASKS],1,[Enable task debugging])
+fi
+
 # Define HAVE_POSIX_MEMALIGN if it works.
 AX_FUNC_POSIX_MEMALIGN
 
@@ -533,6 +545,7 @@ AC_MSG_RESULT([
    libNUMA enabled : $have_numa
    Using tcmalloc  : $have_tcmalloc
    CPU profiler    : $have_profiler
+   Task debugging  : $enable_task_debugging
 ])
 
 # Generate output.
diff --git a/examples/main.c b/examples/main.c
index 8bd591ccbd1c2fd860fa8ae2321d2e34132b287f..09b6c785a6dc29ecc1202c949b8f3d03f6f1fa78 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -235,6 +235,13 @@ int main(int argc, char *argv[]) {
           if (myrank == 0) print_help_message();
           return 1;
         }
+#ifndef SWIFT_DEBUG_TASKS
+        if (dump_tasks) {
+          error(
+              "Task dumping is only possible if SWIFT was configured with the "
+              "--enable-task-debugging option.");
+        }
+#endif
         break;
       case '?':
         if (myrank == 0) print_help_message();
@@ -549,6 +556,7 @@ int main(int argc, char *argv[]) {
     /* Take a step. */
     engine_step(&e);
 
+#ifdef SWIFT_DEBUG_TASKS
     /* Dump the task data using the given frequency. */
     if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) {
 #ifdef WITH_MPI
@@ -626,8 +634,9 @@ int main(int argc, char *argv[]) {
         }
       }
       fclose(file_thread);
-#endif
+#endif  // WITH_MPI
     }
+#endif  // SWIFT_DEBUG_TASKS
   }
 
 /* Print the values of the runner histogram. */
diff --git a/src/partition.c b/src/partition.c
index 8d17bedf0aaeadc64044b12ffe1bb8887b02d83e..3f5386154497d6901a5330b828007f86d87033a4 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -370,7 +370,7 @@ static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew,
 
   /* Dump graph in METIS format */
   /* dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy,
-   *                weights_v, weights_e, NULL);
+   *                weights_v, NULL, weights_e);
    */
 
   if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, weights_e,
@@ -420,7 +420,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
    * assume the same graph structure as used in the part_ calls). */
   int nr_cells = s->nr_cells;
   struct cell *cells = s->cells_top;
-  float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0;
+  float wscale = 1.f, wscale_buff = 0.0;
   int wtot = 0;
   int wmax = 1e9 / nr_nodes;
   int wmin;
@@ -459,15 +459,8 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
         t->type != task_type_init)
       continue;
 
-    /* Get the task weight. This can be slightly negative on multiple board
-     * computers when the runners are not pinned to cores, don't stress just
-     * make a report and ignore these tasks. */
-    int w = (t->toc - t->tic) * wscale;
-    if (w < 0) {
-      message("Task toc before tic: -%.3f %s, (try using processor affinity).",
-              clocks_from_ticks(t->tic - t->toc), clocks_getunit());
-      w = 0;
-    }
+    /* Get the task weight. */
+    int w = t->cost * wscale;
 
     /* Do we need to re-scale? */
     wtot += w;
@@ -616,7 +609,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
       if (weights_e[k] == 0) weights_e[k] = 1;
     if (bothweights)
       for (int k = 0; k < nr_cells; k++)
-        if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1;
+        if (weights_v[k] == 0) weights_v[k] = 1;
 
     /* And partition, use both weights or not as requested. */
     if (bothweights)
diff --git a/src/runner.c b/src/runner.c
index f5efc99d492be837509e50bd2674ab6923404446..aaec28e2e6f924f7e912f8156520fac9647bccfe 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -1231,7 +1231,9 @@ void *runner_main(void *data) {
       /* Get the cells. */
       struct cell *ci = t->ci;
       struct cell *cj = t->cj;
+#ifdef SWIFT_DEBUG_TASKS
       t->rid = r->cpuid;
+#endif
 
 /* Check that we haven't scheduled an inactive task */
 #ifdef SWIFT_DEBUG_CHECKS
diff --git a/src/scheduler.c b/src/scheduler.c
index 44790fcd2fa5f67e6f325ba5849da19e35ab285a..c656470cb596c12cf93639f00e26d96c24403df8 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -708,10 +708,12 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
   t->implicit = 0;
   t->weight = 0;
   t->rank = 0;
-  t->tic = 0;
-  t->toc = 0;
   t->nr_unlock_tasks = 0;
+#ifdef SWIFT_DEBUG_TASKS
   t->rid = -1;
+  t->tic = 0;
+  t->toc = 0;
+#endif
 
   /* Add an index for it. */
   // lock_lock( &s->lock );
@@ -924,55 +926,56 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
     for (int j = 0; j < t->nr_unlock_tasks; j++)
       if (t->unlock_tasks[j]->weight > t->weight)
         t->weight = t->unlock_tasks[j]->weight;
-    if (!t->implicit && t->tic > 0)
-      t->weight += wscale * (t->toc - t->tic);
-    else
-      switch (t->type) {
-        case task_type_sort:
-          t->weight += wscale * intrinsics_popcount(t->flags) * t->ci->count *
-                       (sizeof(int) * 8 - intrinsics_clz(t->ci->count));
-          break;
-        case task_type_self:
-          t->weight += 1 * wscale * t->ci->count * t->ci->count;
-          break;
-        case task_type_pair:
-          if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID)
-            t->weight +=
+    int cost = 0;
+    switch (t->type) {
+      case task_type_sort:
+        cost = wscale * intrinsics_popcount(t->flags) * t->ci->count *
+               (sizeof(int) * 8 - intrinsics_clz(t->ci->count));
+        break;
+      case task_type_self:
+        cost = 1 * wscale * t->ci->count * t->ci->count;
+        break;
+      case task_type_pair:
+        if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID)
+          cost = 3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
+        else
+          cost = 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
+        break;
+      case task_type_sub_pair:
+        if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) {
+          if (t->flags < 0)
+            cost = 3 * wscale * t->ci->count * t->cj->count;
+          else
+            cost =
                 3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
+        } else {
+          if (t->flags < 0)
+            cost = 2 * wscale * t->ci->count * t->cj->count;
           else
-            t->weight +=
+            cost =
                 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
-          break;
-        case task_type_sub_pair:
-          if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) {
-            if (t->flags < 0)
-              t->weight += 3 * wscale * t->ci->count * t->cj->count;
-            else
-              t->weight += 3 * wscale * t->ci->count * t->cj->count *
-                           sid_scale[t->flags];
-          } else {
-            if (t->flags < 0)
-              t->weight += 2 * wscale * t->ci->count * t->cj->count;
-            else
-              t->weight += 2 * wscale * t->ci->count * t->cj->count *
-                           sid_scale[t->flags];
-          }
-          break;
-        case task_type_sub_self:
-          t->weight += 1 * wscale * t->ci->count * t->ci->count;
-          break;
-        case task_type_ghost:
-          if (t->ci == t->ci->super) t->weight += wscale * t->ci->count;
-          break;
-        case task_type_kick:
-          t->weight += wscale * t->ci->count;
-          break;
-        case task_type_init:
-          t->weight += wscale * t->ci->count;
-          break;
-        default:
-          break;
-      }
+        }
+        break;
+      case task_type_sub_self:
+        cost = 1 * wscale * t->ci->count * t->ci->count;
+        break;
+      case task_type_ghost:
+        if (t->ci == t->ci->super) cost = wscale * t->ci->count;
+        break;
+      case task_type_kick:
+        cost = wscale * t->ci->count;
+        break;
+      case task_type_init:
+        cost = wscale * t->ci->count;
+        break;
+      default:
+        cost = 0;
+        break;
+    }
+#if defined(WITH_MPI) && defined(HAVE_METIS)
+    t->cost = cost;
+#endif
+    t->weight += cost;
   }
 
   if (verbose)
@@ -1052,9 +1055,6 @@ void scheduler_start(struct scheduler *s, unsigned int mask,
   /* Clear all the waits, rids, and times. */
   for (int k = 0; k < s->nr_tasks; k++) {
     s->tasks[k].wait = 1;
-    s->tasks[k].rid = -1;
-    s->tasks[k].tic = 0;
-    s->tasks[k].toc = 0;
     if (((1 << s->tasks[k].type) & mask) == 0 ||
         ((1 << s->tasks[k].subtype) & s->submask) == 0)
       s->tasks[k].skip = 1;
@@ -1137,9 +1137,6 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
   /* The target queue for this task. */
   int qid = -1;
 
-  /* Fail if this task has already been enqueued before. */
-  if (t->rid >= 0) error("Task has already been enqueued.");
-
   /* Ignore skipped tasks and tasks not in the masks. */
   if (t->skip || (1 << t->type) & ~(s->mask) ||
       (1 << t->subtype) & ~(s->submask)) {
@@ -1268,7 +1265,9 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) {
 
   /* Task definitely done, signal any sleeping runners. */
   if (!t->implicit) {
+#ifdef SWIFT_DEBUG_TASKS
     t->toc = getticks();
+#endif
     pthread_mutex_lock(&s->sleep_mutex);
     atomic_dec(&s->waiting);
     pthread_cond_broadcast(&s->sleep_cond);
@@ -1310,7 +1309,9 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) {
 
   /* Task definitely done. */
   if (!t->implicit) {
+#ifdef SWIFT_DEBUG_TASKS
     t->toc = getticks();
+#endif
     pthread_mutex_lock(&s->sleep_mutex);
     atomic_dec(&s->waiting);
     pthread_cond_broadcast(&s->sleep_cond);
@@ -1394,11 +1395,13 @@ struct task *scheduler_gettask(struct scheduler *s, int qid,
     }
   }
 
+#ifdef SWIFT_DEBUG_TASKS
   /* Start the timer on this task, if we got one. */
   if (res != NULL) {
     res->tic = getticks();
     res->rid = qid;
   }
+#endif
 
   /* No milk today. */
   return res;
diff --git a/src/task.h b/src/task.h
index f840c0b4b8e807dce28f6f13479dbdf4995ab66d..f6d75598af933adeb045973890866b54f16d84d4 100644
--- a/src/task.h
+++ b/src/task.h
@@ -105,9 +105,6 @@ struct task {
   /*! List of tasks unlocked by this one */
   struct task **unlock_tasks;
 
-  /*! Start and end time of this task */
-  ticks tic, toc;
-
 #ifdef WITH_MPI
 
   /*! Buffer for this task's communications */
@@ -127,8 +124,10 @@ struct task {
   /*! Weight of the task */
   int weight;
 
-  /*! ID of the queue or runner owning this task */
-  short int rid;
+#if defined(WITH_MPI) && defined(HAVE_METIS)
+  /*! Individual cost estimate for this task. */
+  int cost;
+#endif
 
   /*! Number of tasks unlocked by this one */
   short int nr_unlock_tasks;
@@ -151,6 +150,14 @@ struct task {
   /*! Is this task implicit (i.e. does not do anything) ? */
   char implicit;
 
+#ifdef SWIFT_DEBUG_TASKS
+  /*! ID of the queue or runner owning this task */
+  short int rid;
+
+  /*! Start and end time of this task */
+  ticks tic, toc;
+#endif
+
 } SWIFT_STRUCT_ALIGN;
 
 /* Function prototypes. */