diff --git a/src/scheduler.c b/src/scheduler.c
index 13527d97599b9cdcff18a0143d6238559a3af450..1a57d7ae267e9ddb5b9d0428b75bca94c7aeb85d 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -996,13 +996,18 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
 void scheduler_rewait_mapper(void *map_data, int num_elements,
                              void *extra_data) {
 
-  struct task *tasks = (struct task *)map_data;
+  struct scheduler *s = (struct scheduler *)extra_data;
+  const int *tid = (int *)map_data;
 
   for (int ind = 0; ind < num_elements; ind++) {
-    struct task *t = &tasks[ind];
+    struct task *t = &s->tasks[tid[ind]];
 
+    /* Ignore skipped tasks. */
     if (t->skip) continue;
 
+    /* Increment the task's own wait counter for the enqueueing. */
+    atomic_inc(&t->wait);
+
     /* Skip sort tasks that have already been performed */
     if (t->type == task_type_sort && t->flags == 0) {
       error("Empty sort task encountered.");
@@ -1037,12 +1042,13 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements,
  */
 void scheduler_start(struct scheduler *s) {
 
-  /* Clear all the waits. */
-  for (int k = 0; k < s->nr_tasks; k++) s->tasks[k].wait = 1;
-
   /* Re-wait the tasks. */
-  threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tasks, s->nr_tasks,
-                 sizeof(struct task), 1000, NULL);
+  if (s->active_count > 1000) {
+    threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tid_active,
+                   s->active_count, sizeof(int), 1000, s);
+  } else {
+    scheduler_rewait_mapper(s->tid_active, s->active_count, s);
+  }
 
 /* Check we have not missed an active task */
 #ifdef SWIFT_DEBUG_CHECKS
@@ -1091,12 +1097,11 @@ void scheduler_start(struct scheduler *s) {
 #endif
 
   /* Loop over the tasks and enqueue whoever is ready. */
-  for (int k = 0; k < s->active_count; k++) {
-    struct task *t = &s->tasks[s->tid_active[k]];
-    if (atomic_dec(&t->wait) == 1 && !t->skip) {
-      scheduler_enqueue(s, t);
-      pthread_cond_signal(&s->sleep_cond);
-    }
+  if (s->active_count > 1000) {
+    threadpool_map(s->threadpool, scheduler_enqueue_mapper, s->tid_active,
+                   s->active_count, sizeof(int), 1000, s);
+  } else {
+    scheduler_enqueue_mapper(s->tid_active, s->active_count, s);
   }
 
   /* Clear the list of active tasks. */
@@ -1232,6 +1237,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) {
      they are ready. */
   for (int k = 0; k < t->nr_unlock_tasks; k++) {
     struct task *t2 = t->unlock_tasks[k];
+    if (t2->skip) continue;
 
     const int res = atomic_dec(&t2->wait);
     if (res < 1) {
diff --git a/src/scheduler.h b/src/scheduler.h
index 8631d22cce6cc5925f154a8f3c875dc8d38d5c8b..f2225f5f5b8d0a5db54eb8506e02d78b14f4bb88 100644
--- a/src/scheduler.h
+++ b/src/scheduler.h
@@ -114,6 +114,7 @@ struct scheduler {
 __attribute__((always_inline)) INLINE static void scheduler_activate(
     struct scheduler *s, struct task *t) {
   if (atomic_cas(&t->skip, 1, 0)) {
+    t->wait = 0;
     int ind = atomic_inc(&s->active_count);
     s->tid_active[ind] = t - s->tasks;
   }