diff --git a/src/engine.c b/src/engine.c
index a435178acb833a06db7f8432dc38c990f247833b..bfee0f2e5a6e475a4726d65c38c0e1821210ba1a 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1165,48 +1165,54 @@ void engine_make_hydroloop_tasks(struct engine *e) {
  *
  * @param e The #engine.
  */
-void engine_count_and_link_tasks_mapper(void *map_data, void *extra_data) {
+void engine_count_and_link_tasks_mapper(void *map_data, int num_elements,
+                                        void *extra_data) {
 
   struct engine *e = (struct engine *)extra_data;
-  struct task *t = (struct task *)map_data;
+  struct task *tasks = (struct task *)map_data;
   struct scheduler *sched = &e->sched;
 
-  if (t->skip) return;
+  for (int ind = 0; ind < num_elements; ind++) {
 
-  /* Link sort tasks together. */
-  if (t->type == task_type_sort && t->ci->split)
-    for (int j = 0; j < 8; j++)
-      if (t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL) {
-        t->ci->progeny[j]->sorts->skip = 0;
-        scheduler_addunlock(sched, t->ci->progeny[j]->sorts, t);
-      }
+    struct task *t = &tasks[ind];
 
-  /* Link density tasks to cells. */
-  if (t->type == task_type_self) {
-    atomic_inc(&t->ci->nr_tasks);
-    if (t->subtype == task_subtype_density) {
-      engine_addlink(e, &t->ci->density, t);
-      atomic_inc(&t->ci->nr_density);
-    }
-  } else if (t->type == task_type_pair) {
-    atomic_inc(&t->ci->nr_tasks);
-    atomic_inc(&t->cj->nr_tasks);
-    if (t->subtype == task_subtype_density) {
-      engine_addlink(e, &t->ci->density, t);
-      atomic_inc(&t->ci->nr_density);
-      engine_addlink(e, &t->cj->density, t);
-      atomic_inc(&t->cj->nr_density);
-    }
-  } else if (t->type == task_type_sub) {
-    atomic_inc(&t->ci->nr_tasks);
-    if (t->cj != NULL) atomic_inc(&t->cj->nr_tasks);
-    if (t->subtype == task_subtype_density) {
-      engine_addlink(e, &t->ci->density, t);
-      atomic_inc(&t->ci->nr_density);
-      if (t->cj != NULL) {
+    if (t->skip) continue;
+
+    /* Link sort tasks together. */
+    if (t->type == task_type_sort && t->ci->split)
+      for (int j = 0; j < 8; j++)
+        if (t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL) {
+          t->ci->progeny[j]->sorts->skip = 0;
+          scheduler_addunlock(sched, t->ci->progeny[j]->sorts, t);
+        }
+
+    /* Link density tasks to cells. */
+    if (t->type == task_type_self) {
+      atomic_inc(&t->ci->nr_tasks);
+      if (t->subtype == task_subtype_density) {
+        engine_addlink(e, &t->ci->density, t);
+        atomic_inc(&t->ci->nr_density);
+      }
+    } else if (t->type == task_type_pair) {
+      atomic_inc(&t->ci->nr_tasks);
+      atomic_inc(&t->cj->nr_tasks);
+      if (t->subtype == task_subtype_density) {
+        engine_addlink(e, &t->ci->density, t);
+        atomic_inc(&t->ci->nr_density);
         engine_addlink(e, &t->cj->density, t);
         atomic_inc(&t->cj->nr_density);
       }
+    } else if (t->type == task_type_sub) {
+      atomic_inc(&t->ci->nr_tasks);
+      if (t->cj != NULL) atomic_inc(&t->cj->nr_tasks);
+      if (t->subtype == task_subtype_density) {
+        engine_addlink(e, &t->ci->density, t);
+        atomic_inc(&t->ci->nr_density);
+        if (t->cj != NULL) {
+          engine_addlink(e, &t->cj->density, t);
+          atomic_inc(&t->cj->nr_density);
+        }
+      }
     }
   }
 }
@@ -1246,89 +1252,94 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched,
  *
  * @param e The #engine.
  */
-void engine_make_extra_hydroloop_tasks_mapper(void *map_data,
+void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
                                               void *extra_data) {
 
   struct engine *e = (struct engine *)extra_data;
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
-  struct task *t = (struct task *)map_data;
+  struct task *tasks = (struct task *)map_data;
 
-  /* Skip? */
-  if (t->skip) return;
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
 
-  /* Self-interaction? */
-  if (t->type == task_type_self && t->subtype == task_subtype_density) {
+    /* Skip? */
+    if (t->skip) continue;
 
-    /* Start by constructing the task for the second hydro loop */
-    struct task *t2 = scheduler_addtask(
-        sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL, 0);
+    /* Self-interaction? */
+    if (t->type == task_type_self && t->subtype == task_subtype_density) {
 
-    /* Add the link between the new loop and the cell */
-    engine_addlink(e, &t->ci->force, t2);
-    atomic_inc(&t->ci->nr_force);
+      /* Start by constructing the task for the second hydro loop */
+      struct task *t2 = scheduler_addtask(
+          sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL, 0);
 
-    /* Now, build all the dependencies for the hydro */
-    engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
-  }
+      /* Add the link between the new loop and the cell */
+      engine_addlink(e, &t->ci->force, t2);
+      atomic_inc(&t->ci->nr_force);
 
-  /* Otherwise, pair interaction? */
-  else if (t->type == task_type_pair && t->subtype == task_subtype_density) {
-
-    /* Start by constructing the task for the second hydro loop */
-    struct task *t2 = scheduler_addtask(
-        sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj, 0);
-
-    /* Add the link between the new loop and both cells */
-    engine_addlink(e, &t->ci->force, t2);
-    atomic_inc(&t->ci->nr_force);
-    engine_addlink(e, &t->cj->force, t2);
-    atomic_inc(&t->cj->nr_force);
-
-    /* Now, build all the dependencies for the hydro for the cells */
-    /* that are local and are not descendant of the same super-cells */
-    if (t->ci->nodeID == nodeID) {
+      /* Now, build all the dependencies for the hydro */
       engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
     }
-    if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
-      engine_make_hydro_loops_dependencies(sched, t, t2, t->cj);
-    }
-  }
 
-  /* Otherwise, sub interaction? */
-  else if (t->type == task_type_sub && t->subtype == task_subtype_density) {
+    /* Otherwise, pair interaction? */
+    else if (t->type == task_type_pair && t->subtype == task_subtype_density) {
 
-    /* Start by constructing the task for the second hydro loop */
-    struct task *t2 = scheduler_addtask(
-        sched, task_type_sub, task_subtype_force, t->flags, 0, t->ci, t->cj, 0);
+      /* Start by constructing the task for the second hydro loop */
+      struct task *t2 = scheduler_addtask(
+          sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj, 0);
 
-    /* Add the link between the new loop and both cells */
-    engine_addlink(e, &t->ci->force, t2);
-    atomic_inc(&t->ci->nr_force);
-    if (t->cj != NULL) {
+      /* Add the link between the new loop and both cells */
+      engine_addlink(e, &t->ci->force, t2);
+      atomic_inc(&t->ci->nr_force);
       engine_addlink(e, &t->cj->force, t2);
       atomic_inc(&t->cj->nr_force);
-    }
 
-    /* Now, build all the dependencies for the hydro for the cells */
-    /* that are local and are not descendant of the same super-cells */
-    if (t->ci->nodeID == nodeID) {
-      engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
+      /* Now, build all the dependencies for the hydro for the cells */
+      /* that are local and are not descendant of the same super-cells */
+      if (t->ci->nodeID == nodeID) {
+        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
+      }
+      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+        engine_make_hydro_loops_dependencies(sched, t, t2, t->cj);
+      }
     }
-    if (t->cj != NULL && t->cj->nodeID == nodeID &&
-        t->ci->super != t->cj->super) {
-      engine_make_hydro_loops_dependencies(sched, t, t2, t->cj);
+
+    /* Otherwise, sub interaction? */
+    else if (t->type == task_type_sub && t->subtype == task_subtype_density) {
+
+      /* Start by constructing the task for the second hydro loop */
+      struct task *t2 =
+          scheduler_addtask(sched, task_type_sub, task_subtype_force, t->flags,
+                            0, t->ci, t->cj, 0);
+
+      /* Add the link between the new loop and both cells */
+      engine_addlink(e, &t->ci->force, t2);
+      atomic_inc(&t->ci->nr_force);
+      if (t->cj != NULL) {
+        engine_addlink(e, &t->cj->force, t2);
+        atomic_inc(&t->cj->nr_force);
+      }
+
+      /* Now, build all the dependencies for the hydro for the cells */
+      /* that are local and are not descendant of the same super-cells */
+      if (t->ci->nodeID == nodeID) {
+        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
+      }
+      if (t->cj != NULL && t->cj->nodeID == nodeID &&
+          t->ci->super != t->cj->super) {
+        engine_make_hydro_loops_dependencies(sched, t, t2, t->cj);
+      }
     }
-  }
 
-  /* /\* Kick tasks should rely on the grav_down tasks of their cell. *\/ */
-  /* else if (t->type == task_type_kick && t->ci->grav_down != NULL) */
-  /*   scheduler_addunlock(sched, t->ci->grav_down, t); */
+    /* /\* Kick tasks should rely on the grav_down tasks of their cell. *\/ */
+    /* else if (t->type == task_type_kick && t->ci->grav_down != NULL) */
+    /*   scheduler_addunlock(sched, t->ci->grav_down, t); */
 
-  /* External gravity tasks should depend on init and unlock the kick */
-  else if (t->type == task_type_grav_external) {
-    scheduler_addunlock(sched, t->ci->init, t);
-    scheduler_addunlock(sched, t, t->ci->kick);
+    /* External gravity tasks should depend on init and unlock the kick */
+    else if (t->type == task_type_grav_external) {
+      scheduler_addunlock(sched, t->ci->init, t);
+      scheduler_addunlock(sched, t, t->ci->kick);
+    }
   }
 }
 
@@ -1445,7 +1456,7 @@ void engine_maketasks(struct engine *e) {
      store the density tasks in each cell, and make each sort
      depend on the sorts of its progeny. */
   threadpool_map(&e->threadpool, engine_count_and_link_tasks_mapper,
-                 sched->tasks, sched->nr_tasks, sizeof(struct task), e);
+                 sched->tasks, sched->nr_tasks, sizeof(struct task), 1000, e);
 
   /* Append hierarchical tasks to each cells */
   for (int k = 0; k < nr_cells; k++)
@@ -1455,7 +1466,7 @@ void engine_maketasks(struct engine *e) {
      Each force task depends on the cell ghosts and unlocks the kick task
      of its super-cell. */
   threadpool_map(&e->threadpool, engine_make_extra_hydroloop_tasks_mapper,
-                 sched->tasks, sched->nr_tasks, sizeof(struct task), e);
+                 sched->tasks, sched->nr_tasks, sizeof(struct task), 1000, e);
 
   /* Add the communication tasks if MPI is being used. */
   if (e->policy & engine_policy_mpi) {
@@ -1507,140 +1518,67 @@ void engine_maketasks(struct engine *e) {
  * @return 1 if the space has to be rebuilt, 0 otherwise.
  */
 
-void engine_marktasks_fixdt_mapper(void *map_data, void *extra_data) {
+void engine_marktasks_fixdt_mapper(void *map_data, int num_elements,
+                                   void *extra_data) {
   /* Unpack the arguments. */
-  struct task *t = (struct task *)map_data;
+  struct task *tasks = (struct task *)map_data;
   int *rebuild_space = (int *)extra_data;
 
-  /* Pair? */
-  if (t->type == task_type_pair ||
-      (t->type == task_type_sub && t->cj != NULL)) {
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
 
-    /* Local pointers. */
-    const struct cell *ci = t->ci;
-    const struct cell *cj = t->cj;
+    /* Pair? */
+    if (t->type == task_type_pair ||
+        (t->type == task_type_sub && t->cj != NULL)) {
 
-    /* Too much particle movement? */
-    if (t->tight &&
-        (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
-         ci->dx_max > space_maxreldx * ci->h_max ||
-         cj->dx_max > space_maxreldx * cj->h_max))
-      *rebuild_space = 1;
+      /* Local pointers. */
+      const struct cell *ci = t->ci;
+      const struct cell *cj = t->cj;
 
-  }
+      /* Too much particle movement? */
+      if (t->tight &&
+          (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
+           ci->dx_max > space_maxreldx * ci->h_max ||
+           cj->dx_max > space_maxreldx * cj->h_max))
+        *rebuild_space = 1;
 
-  /* Sort? */
-  else if (t->type == task_type_sort) {
+    }
 
-    /* If all the sorts have been done, make this task implicit. */
-    if (!(t->flags & (t->flags ^ t->ci->sorted))) t->implicit = 1;
+    /* Sort? */
+    else if (t->type == task_type_sort) {
+
+      /* If all the sorts have been done, make this task implicit. */
+      if (!(t->flags & (t->flags ^ t->ci->sorted))) t->implicit = 1;
+    }
   }
 }
 
-void engine_marktasks_sorts_mapper(void *map_data, void *extra_data) {
+void engine_marktasks_sorts_mapper(void *map_data, int num_elements,
+                                   void *extra_data) {
   /* Unpack the arguments. */
-  struct task *t = (struct task *)map_data;
-  if (t->type == task_type_sort) {
-    t->flags = 0;
-    t->skip = 1;
+  struct task *tasks = (struct task *)map_data;
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
+    if (t->type == task_type_sort) {
+      t->flags = 0;
+      t->skip = 1;
+    }
   }
 }
 
-void engine_marktasks_mapper(void *map_data, void *extra_data) {
+void engine_marktasks_mapper(void *map_data, int num_elements,
+                             void *extra_data) {
   /* Unpack the arguments. */
-  struct task *t = (struct task *)map_data;
+  struct task *tasks = (struct task *)map_data;
   const int ti_end = ((int *)extra_data)[0];
   int *rebuild_space = &((int *)extra_data)[1];
 
-  /* Single-cell task? */
-  if (t->type == task_type_self || t->type == task_type_ghost ||
-      (t->type == task_type_sub && t->cj == NULL)) {
-
-    /* Set this task's skip. */
-    t->skip = (t->ci->ti_end_min > ti_end);
-  }
-
-  /* Pair? */
-  else if (t->type == task_type_pair ||
-           (t->type == task_type_sub && t->cj != NULL)) {
-
-    /* Local pointers. */
-    const struct cell *ci = t->ci;
-    const struct cell *cj = t->cj;
-
-    /* Set this task's skip. */
-    t->skip = (ci->ti_end_min > ti_end && cj->ti_end_min > ti_end);
-
-    /* Too much particle movement? */
-    if (t->tight &&
-        (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
-         ci->dx_max > space_maxreldx * ci->h_max ||
-         cj->dx_max > space_maxreldx * cj->h_max))
-      *rebuild_space = 1;
-
-    /* Set the sort flags. */
-    if (!t->skip && t->type == task_type_pair) {
-      if (!(ci->sorted & (1 << t->flags))) {
-        atomic_or(&ci->sorts->flags, (1 << t->flags));
-        ci->sorts->skip = 0;
-      }
-      if (!(cj->sorted & (1 << t->flags))) {
-        atomic_or(&cj->sorts->flags, (1 << t->flags));
-        cj->sorts->skip = 0;
-      }
-    }
-
-  }
-
-  /* Kick? */
-  else if (t->type == task_type_kick) {
-    t->skip = (t->ci->ti_end_min > ti_end);
-    t->ci->updated = 0;
-    t->ci->g_updated = 0;
-  }
-
-  /* Drift? */
-  else if (t->type == task_type_drift)
-    t->skip = 0;
-
-  /* Init? */
-  else if (t->type == task_type_init) {
-    /* Set this task's skip. */
-    t->skip = (t->ci->ti_end_min > ti_end);
-  }
-
-  /* None? */
-  else if (t->type == task_type_none)
-    t->skip = 1;
-}
-
-int engine_marktasks_serial(struct engine *e) {
-
-  struct scheduler *s = &e->sched;
-  const int ti_end = e->ti_current;
-  const int nr_tasks = s->nr_tasks;
-  const int *const ind = s->tasks_ind;
-  struct task *tasks = s->tasks;
-
-  /* Run through the tasks and mark as skip or not. */
-  for (int k = 0; k < nr_tasks; k++) {
-
-    /* Get a handle on the kth task. */
-    struct task *t = &tasks[ind[k]];
-
-    /* Sort-task? Note that due to the task ranking, the sorts
-       will all come before the pairs. */
-    if (t->type == task_type_sort) {
-
-      /* Re-set the flags. */
-      t->flags = 0;
-      t->skip = 1;
-
-    }
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
 
     /* Single-cell task? */
-    else if (t->type == task_type_self || t->type == task_type_ghost ||
-             (t->type == task_type_sub && t->cj == NULL)) {
+    if (t->type == task_type_self || t->type == task_type_ghost ||
+        (t->type == task_type_sub && t->cj == NULL)) {
 
       /* Set this task's skip. */
       t->skip = (t->ci->ti_end_min > ti_end);
@@ -1662,16 +1600,16 @@ int engine_marktasks_serial(struct engine *e) {
           (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
            ci->dx_max > space_maxreldx * ci->h_max ||
            cj->dx_max > space_maxreldx * cj->h_max))
-        return 1;
+        *rebuild_space = 1;
 
       /* Set the sort flags. */
       if (!t->skip && t->type == task_type_pair) {
         if (!(ci->sorted & (1 << t->flags))) {
-          ci->sorts->flags |= (1 << t->flags);
+          atomic_or(&ci->sorts->flags, (1 << t->flags));
           ci->sorts->skip = 0;
         }
         if (!(cj->sorted & (1 << t->flags))) {
-          cj->sorts->flags |= (1 << t->flags);
+          atomic_or(&cj->sorts->flags, (1 << t->flags));
           cj->sorts->skip = 0;
         }
       }
@@ -1699,9 +1637,6 @@ int engine_marktasks_serial(struct engine *e) {
     else if (t->type == task_type_none)
       t->skip = 1;
   }
-
-  /* All is well... */
-  return 0;
 }
 
 int engine_marktasks(struct engine *e) {
@@ -1715,20 +1650,19 @@ int engine_marktasks(struct engine *e) {
 
     /* Run through the tasks and mark as skip or not. */
     threadpool_map(&e->threadpool, engine_marktasks_fixdt_mapper, s->tasks,
-                   s->nr_tasks, sizeof(struct task), &rebuild_space);
+                   s->nr_tasks, sizeof(struct task), 1000, &rebuild_space);
     return rebuild_space;
 
     /* Multiple-timestep case */
   } else {
 
     /* Run through the tasks and mark as skip or not. */
-    /* int extra_data[2] = {e->ti_current, rebuild_space};
+    int extra_data[2] = {e->ti_current, rebuild_space};
     threadpool_map(&e->threadpool, engine_marktasks_sorts_mapper, s->tasks,
-                   s->nr_tasks, sizeof(struct task), NULL);
+                   s->nr_tasks, sizeof(struct task), 1000, NULL);
     threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks,
-                   s->nr_tasks, sizeof(struct task), extra_data);
-    rebuild_space = extra_data[1]; */
-    rebuild_space = engine_marktasks_serial(e);
+                   s->nr_tasks, sizeof(struct task), 1000, extra_data);
+    rebuild_space = extra_data[1];
   }
 
   if (e->verbose)
diff --git a/src/scheduler.c b/src/scheduler.c
index fde7cd0e8e0816b7be899205ec5f160c3e8fb295..09e9ad6a6d559f09325c56fadda15955c10d87c3 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -103,7 +103,8 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta,
  * @param s The #scheduler we are working in.
  */
 
-void scheduler_splittasks_mapper(void *map_data, void *extra_data) {
+void scheduler_splittasks_mapper(void *map_data, int num_elements,
+                                 void *extra_data) {
 
   /* Static constants. */
   const static int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0},
@@ -119,517 +120,521 @@ void scheduler_splittasks_mapper(void *map_data, void *extra_data) {
 
   /* Extract the parameters. */
   struct scheduler *s = (struct scheduler *)extra_data;
-  struct task *t = (struct task *)map_data;
-
-  /* Iterate on this task until we're done with it. */
-  int redo = 1;
-  while (redo) {
-
-    /* Reset the redo flag. */
-    redo = 0;
-
-    /* Non-splittable task? */
-    if ((t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) ||
-        ((t->type == task_type_kick) && t->ci->nodeID != s->nodeID) ||
-        ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) ||
-        ((t->type == task_type_init) && t->ci->nodeID != s->nodeID)) {
-      t->type = task_type_none;
-      t->skip = 1;
-      return;
-    }
+  struct task *tasks = (struct task *)map_data;
+
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
 
-    /* Self-interaction? */
-    if (t->type == task_type_self) {
+    /* Iterate on this task until we're done with it. */
+    int redo = 1;
+    while (redo) {
 
-      /* Get a handle on the cell involved. */
-      struct cell *ci = t->ci;
+      /* Reset the redo flag. */
+      redo = 0;
 
-      /* Foreign task? */
-      if (ci->nodeID != s->nodeID) {
+      /* Non-splittable task? */
+      if ((t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) ||
+          ((t->type == task_type_kick) && t->ci->nodeID != s->nodeID) ||
+          ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) ||
+          ((t->type == task_type_init) && t->ci->nodeID != s->nodeID)) {
+        t->type = task_type_none;
         t->skip = 1;
-        return;
+        break;
       }
 
-      /* Is this cell even split? */
-      if (ci->split) {
+      /* Self-interaction? */
+      if (t->type == task_type_self) {
 
-        /* Make a sub? */
-        if (scheduler_dosub && ci->count < space_subsize / ci->count) {
+        /* Get a handle on the cell involved. */
+        struct cell *ci = t->ci;
 
-          /* convert to a self-subtask. */
-          t->type = task_type_sub;
-
-          /* Otherwise, make tasks explicitly. */
-        } else {
-
-          /* Take a step back (we're going to recycle the current task)... */
-          redo = 1;
-
-          /* Add the self tasks. */
-          int first_child = 0;
-          while (ci->progeny[first_child] == NULL) first_child++;
-          t->ci = ci->progeny[first_child];
-          for (int k = first_child + 1; k < 8; k++)
-            if (ci->progeny[k] != NULL)
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
-                                    ci->progeny[k], NULL, 0),
-                  s);
-
-          /* Make a task for each pair of progeny. */
-          for (int j = 0; j < 8; j++)
-            if (ci->progeny[j] != NULL)
-              for (int k = j + 1; k < 8; k++)
-                if (ci->progeny[k] != NULL)
-                  scheduler_splittasks_mapper(
-                      scheduler_addtask(s, task_type_pair, t->subtype,
-                                        pts[j][k], 0, ci->progeny[j],
-                                        ci->progeny[k], 0),
-                      s);
+        /* Foreign task? */
+        if (ci->nodeID != s->nodeID) {
+          t->skip = 1;
+          break;
         }
-      }
 
-      /* Pair interaction? */
-    } else if (t->type == task_type_pair) {
+        /* Is this cell even split? */
+        if (ci->split) {
 
-      /* Get a handle on the cells involved. */
-      struct cell *ci = t->ci;
-      struct cell *cj = t->cj;
-      const double hi = ci->dmin;
-      const double hj = cj->dmin;
+          /* Make a sub? */
+          if (scheduler_dosub && ci->count < space_subsize / ci->count) {
 
-      /* Foreign task? */
-      if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) {
-        t->skip = 1;
-        return;
-      }
+            /* convert to a self-subtask. */
+            t->type = task_type_sub;
 
-      /* Get the sort ID, use space_getsid and not t->flags
-         to make sure we get ci and cj swapped if needed. */
-      double shift[3];
-      int sid = space_getsid(s->space, &ci, &cj, shift);
+            /* Otherwise, make tasks explicitly. */
+          } else {
 
-      /* Should this task be split-up? */
-      if (ci->split && cj->split &&
-          ci->h_max * kernel_gamma * space_stretch < hi / 2 &&
-          cj->h_max * kernel_gamma * space_stretch < hj / 2) {
+            /* Take a step back (we're going to recycle the current task)... */
+            redo = 1;
 
-        /* Replace by a single sub-task? */
-        if (scheduler_dosub &&
-            ci->count * sid_scale[sid] < space_subsize / cj->count &&
-            sid != 0 && sid != 2 && sid != 6 && sid != 8) {
+            /* Add the self tasks. */
+            int first_child = 0;
+            while (ci->progeny[first_child] == NULL) first_child++;
+            t->ci = ci->progeny[first_child];
+            for (int k = first_child + 1; k < 8; k++)
+              if (ci->progeny[k] != NULL)
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
+                                      ci->progeny[k], NULL, 0),
+                    1, s);
+
+            /* Make a task for each pair of progeny. */
+            for (int j = 0; j < 8; j++)
+              if (ci->progeny[j] != NULL)
+                for (int k = j + 1; k < 8; k++)
+                  if (ci->progeny[k] != NULL)
+                    scheduler_splittasks_mapper(
+                        scheduler_addtask(s, task_type_pair, t->subtype,
+                                          pts[j][k], 0, ci->progeny[j],
+                                          ci->progeny[k], 0),
+                        1, s);
+          }
+        }
 
-          /* Make this task a sub task. */
-          t->type = task_type_sub;
+        /* Pair interaction? */
+      } else if (t->type == task_type_pair) {
 
-          /* Otherwise, split it. */
-        } else {
+        /* Get a handle on the cells involved. */
+        struct cell *ci = t->ci;
+        struct cell *cj = t->cj;
+        const double hi = ci->dmin;
+        const double hj = cj->dmin;
 
-          /* Take a step back (we're going to recycle the current task)... */
-          redo = 1;
-
-          /* For each different sorting type... */
-          switch (sid) {
-
-            case 0: /* (  1 ,  1 ,  1 ) */
-              t->ci = ci->progeny[7];
-              t->cj = cj->progeny[0];
-              t->flags = 0;
-              break;
-
-            case 1: /* (  1 ,  1 ,  0 ) */
-              t->ci = ci->progeny[6];
-              t->cj = cj->progeny[0];
-              t->flags = 1;
-              t->tight = 1;
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
-                                    ci->progeny[7], cj->progeny[1], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[6], cj->progeny[1], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[7], cj->progeny[0], 1),
-                  s);
-              break;
-
-            case 2: /* (  1 ,  1 , -1 ) */
-              t->ci = ci->progeny[6];
-              t->cj = cj->progeny[1];
-              t->flags = 2;
-              t->tight = 1;
-              break;
-
-            case 3: /* (  1 ,  0 ,  1 ) */
-              t->ci = ci->progeny[5];
-              t->cj = cj->progeny[0];
-              t->flags = 3;
-              t->tight = 1;
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
-                                    ci->progeny[7], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[5], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
-                                    ci->progeny[7], cj->progeny[0], 1),
-                  s);
-              break;
-
-            case 4: /* (  1 ,  0 ,  0 ) */
-              t->ci = ci->progeny[4];
-              t->cj = cj->progeny[0];
-              t->flags = 4;
-              t->tight = 1;
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
-                                    ci->progeny[5], cj->progeny[0], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[6], cj->progeny[0], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[7], cj->progeny[0], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
-                                    ci->progeny[4], cj->progeny[1], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
-                                    ci->progeny[5], cj->progeny[1], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
-                                    ci->progeny[6], cj->progeny[1], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[7], cj->progeny[1], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
-                                    ci->progeny[4], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[5], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
-                                    ci->progeny[6], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
-                                    ci->progeny[7], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[4], cj->progeny[3], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
-                                    ci->progeny[5], cj->progeny[3], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
-                                    ci->progeny[6], cj->progeny[3], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
-                                    ci->progeny[7], cj->progeny[3], 1),
-                  s);
-              break;
-
-            case 5: /* (  1 ,  0 , -1 ) */
-              t->ci = ci->progeny[4];
-              t->cj = cj->progeny[1];
-              t->flags = 5;
-              t->tight = 1;
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
-                                    ci->progeny[6], cj->progeny[3], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[4], cj->progeny[3], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[6], cj->progeny[1], 1),
-                  s);
-              break;
-
-            case 6: /* (  1 , -1 ,  1 ) */
-              t->ci = ci->progeny[5];
-              t->cj = cj->progeny[2];
-              t->flags = 6;
-              t->tight = 1;
-              break;
-
-            case 7: /* (  1 , -1 ,  0 ) */
-              t->ci = ci->progeny[4];
-              t->cj = cj->progeny[3];
-              t->flags = 6;
-              t->tight = 1;
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[5], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[4], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[5], cj->progeny[3], 1),
-                  s);
-              break;
-
-            case 8: /* (  1 , -1 , -1 ) */
-              t->ci = ci->progeny[4];
-              t->cj = cj->progeny[3];
-              t->flags = 8;
-              t->tight = 1;
-              break;
-
-            case 9: /* (  0 ,  1 ,  1 ) */
-              t->ci = ci->progeny[3];
-              t->cj = cj->progeny[0];
-              t->flags = 9;
-              t->tight = 1;
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
-                                    ci->progeny[7], cj->progeny[4], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[3], cj->progeny[4], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[7], cj->progeny[0], 1),
-                  s);
-              break;
-
-            case 10: /* (  0 ,  1 ,  0 ) */
-              t->ci = ci->progeny[2];
-              t->cj = cj->progeny[0];
-              t->flags = 10;
-              t->tight = 1;
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
-                                    ci->progeny[3], cj->progeny[0], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[6], cj->progeny[0], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
-                                    ci->progeny[7], cj->progeny[0], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
-                                    ci->progeny[2], cj->progeny[1], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
-                                    ci->progeny[3], cj->progeny[1], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[6], cj->progeny[1], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[7], cj->progeny[1], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
-                                    ci->progeny[2], cj->progeny[4], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[3], cj->progeny[4], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
-                                    ci->progeny[6], cj->progeny[4], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
-                                    ci->progeny[7], cj->progeny[4], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[2], cj->progeny[5], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
-                                    ci->progeny[3], cj->progeny[5], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
-                                    ci->progeny[6], cj->progeny[5], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
-                                    ci->progeny[7], cj->progeny[5], 1),
-                  s);
-              break;
-
-            case 11: /* (  0 ,  1 , -1 ) */
-              t->ci = ci->progeny[2];
-              t->cj = cj->progeny[1];
-              t->flags = 11;
-              t->tight = 1;
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
-                                    ci->progeny[6], cj->progeny[5], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[2], cj->progeny[5], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
-                                    ci->progeny[6], cj->progeny[1], 1),
-                  s);
-              break;
-
-            case 12: /* (  0 ,  0 ,  1 ) */
-              t->ci = ci->progeny[1];
-              t->cj = cj->progeny[0];
-              t->flags = 12;
-              t->tight = 1;
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
-                                    ci->progeny[3], cj->progeny[0], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
-                                    ci->progeny[5], cj->progeny[0], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[7], cj->progeny[0], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
-                                    ci->progeny[1], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
-                                    ci->progeny[3], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[5], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
-                                    ci->progeny[7], cj->progeny[2], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
-                                    ci->progeny[1], cj->progeny[4], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
-                                    ci->progeny[3], cj->progeny[4], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
-                                    ci->progeny[5], cj->progeny[4], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
-                                    ci->progeny[7], cj->progeny[4], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[1], cj->progeny[6], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
-                                    ci->progeny[3], cj->progeny[6], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
-                                    ci->progeny[5], cj->progeny[6], 1),
-                  s);
-              scheduler_splittasks_mapper(
-                  scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
-                                    ci->progeny[7], cj->progeny[6], 1),
-                  s);
-              break;
-          } /* switch(sid) */
+        /* Foreign task? */
+        if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) {
+          t->skip = 1;
+          break;
         }
 
-        /* Otherwise, break it up if it is too large? */
-      } else if (scheduler_doforcesplit && ci->split && cj->split &&
-                 (ci->count > space_maxsize / cj->count)) {
+        /* Get the sort ID, use space_getsid and not t->flags
+           to make sure we get ci and cj swapped if needed. */
+        double shift[3];
+        int sid = space_getsid(s->space, &ci, &cj, shift);
 
-        // message( "force splitting pair with %i and %i parts." , ci->count ,
-        // cj->count );
+        /* Should this task be split-up? */
+        if (ci->split && cj->split &&
+            ci->h_max * kernel_gamma * space_stretch < hi / 2 &&
+            cj->h_max * kernel_gamma * space_stretch < hj / 2) {
 
-        /* Replace the current task. */
-        t->type = task_type_none;
+          /* Replace by a single sub-task? */
+          if (scheduler_dosub &&
+              ci->count * sid_scale[sid] < space_subsize / cj->count &&
+              sid != 0 && sid != 2 && sid != 6 && sid != 8) {
+
+            /* Make this task a sub task. */
+            t->type = task_type_sub;
+
+            /* Otherwise, split it. */
+          } else {
+
+            /* Take a step back (we're going to recycle the current task)... */
+            redo = 1;
 
-        for (int j = 0; j < 8; j++)
-          if (ci->progeny[j] != NULL)
-            for (int k = 0; k < 8; k++)
-              if (cj->progeny[k] != NULL) {
+            /* For each different sorting type... */
+            switch (sid) {
+
+              case 0: /* (  1 ,  1 ,  1 ) */
+                t->ci = ci->progeny[7];
+                t->cj = cj->progeny[0];
+                t->flags = 0;
+                break;
+
+              case 1: /* (  1 ,  1 ,  0 ) */
+                t->ci = ci->progeny[6];
+                t->cj = cj->progeny[0];
+                t->flags = 1;
+                t->tight = 1;
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
+                                      ci->progeny[7], cj->progeny[1], 1),
+                    1, s);
                 scheduler_splittasks_mapper(
                     scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                      ci->progeny[j], cj->progeny[k], 0),
-                    s);
-                t->flags = space_getsid(s->space, &t->ci, &t->cj, shift);
-              }
-
-        /* Otherwise, if not spilt, stitch-up the sorting. */
-      } else {
-
-        /* Create the sort for ci. */
-        lock_lock(&ci->lock);
-        if (ci->sorts == NULL)
-          ci->sorts =
-              scheduler_addtask(s, task_type_sort, 0, 1 << sid, 0, ci, NULL, 0);
-        else
-          ci->sorts->flags |= (1 << sid);
-        lock_unlock_blind(&ci->lock);
-        scheduler_addunlock(s, ci->sorts, t);
-
-        /* Create the sort for cj. */
-        lock_lock(&cj->lock);
-        if (cj->sorts == NULL)
-          cj->sorts =
-              scheduler_addtask(s, task_type_sort, 0, 1 << sid, 0, cj, NULL, 0);
-        else
-          cj->sorts->flags |= (1 << sid);
-        lock_unlock_blind(&cj->lock);
-        scheduler_addunlock(s, cj->sorts, t);
-      }
+                                      ci->progeny[6], cj->progeny[1], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                      ci->progeny[7], cj->progeny[0], 1),
+                    1, s);
+                break;
+
+              case 2: /* (  1 ,  1 , -1 ) */
+                t->ci = ci->progeny[6];
+                t->cj = cj->progeny[1];
+                t->flags = 2;
+                t->tight = 1;
+                break;
+
+              case 3: /* (  1 ,  0 ,  1 ) */
+                t->ci = ci->progeny[5];
+                t->cj = cj->progeny[0];
+                t->flags = 3;
+                t->tight = 1;
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
+                                      ci->progeny[7], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                      ci->progeny[5], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
+                                      ci->progeny[7], cj->progeny[0], 1),
+                    1, s);
+                break;
+
+              case 4: /* (  1 ,  0 ,  0 ) */
+                t->ci = ci->progeny[4];
+                t->cj = cj->progeny[0];
+                t->flags = 4;
+                t->tight = 1;
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
+                                      ci->progeny[5], cj->progeny[0], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                      ci->progeny[6], cj->progeny[0], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                      ci->progeny[7], cj->progeny[0], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
+                                      ci->progeny[4], cj->progeny[1], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
+                                      ci->progeny[5], cj->progeny[1], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
+                                      ci->progeny[6], cj->progeny[1], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                      ci->progeny[7], cj->progeny[1], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
+                                      ci->progeny[4], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                      ci->progeny[5], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
+                                      ci->progeny[6], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
+                                      ci->progeny[7], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                      ci->progeny[4], cj->progeny[3], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
+                                      ci->progeny[5], cj->progeny[3], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
+                                      ci->progeny[6], cj->progeny[3], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
+                                      ci->progeny[7], cj->progeny[3], 1),
+                    1, s);
+                break;
+
+              case 5: /* (  1 ,  0 , -1 ) */
+                t->ci = ci->progeny[4];
+                t->cj = cj->progeny[1];
+                t->flags = 5;
+                t->tight = 1;
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
+                                      ci->progeny[6], cj->progeny[3], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                      ci->progeny[4], cj->progeny[3], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                      ci->progeny[6], cj->progeny[1], 1),
+                    1, s);
+                break;
+
+              case 6: /* (  1 , -1 ,  1 ) */
+                t->ci = ci->progeny[5];
+                t->cj = cj->progeny[2];
+                t->flags = 6;
+                t->tight = 1;
+                break;
+
+              case 7: /* (  1 , -1 ,  0 ) */
+                t->ci = ci->progeny[4];
+                t->cj = cj->progeny[3];
+                t->flags = 6;
+                t->tight = 1;
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                      ci->progeny[5], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                      ci->progeny[4], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                      ci->progeny[5], cj->progeny[3], 1),
+                    1, s);
+                break;
+
+              case 8: /* (  1 , -1 , -1 ) */
+                t->ci = ci->progeny[4];
+                t->cj = cj->progeny[3];
+                t->flags = 8;
+                t->tight = 1;
+                break;
+
+              case 9: /* (  0 ,  1 ,  1 ) */
+                t->ci = ci->progeny[3];
+                t->cj = cj->progeny[0];
+                t->flags = 9;
+                t->tight = 1;
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
+                                      ci->progeny[7], cj->progeny[4], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                      ci->progeny[3], cj->progeny[4], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                      ci->progeny[7], cj->progeny[0], 1),
+                    1, s);
+                break;
+
+              case 10: /* (  0 ,  1 ,  0 ) */
+                t->ci = ci->progeny[2];
+                t->cj = cj->progeny[0];
+                t->flags = 10;
+                t->tight = 1;
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
+                                      ci->progeny[3], cj->progeny[0], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                      ci->progeny[6], cj->progeny[0], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
+                                      ci->progeny[7], cj->progeny[0], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
+                                      ci->progeny[2], cj->progeny[1], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
+                                      ci->progeny[3], cj->progeny[1], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                      ci->progeny[6], cj->progeny[1], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                      ci->progeny[7], cj->progeny[1], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
+                                      ci->progeny[2], cj->progeny[4], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                      ci->progeny[3], cj->progeny[4], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
+                                      ci->progeny[6], cj->progeny[4], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
+                                      ci->progeny[7], cj->progeny[4], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                      ci->progeny[2], cj->progeny[5], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
+                                      ci->progeny[3], cj->progeny[5], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
+                                      ci->progeny[6], cj->progeny[5], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
+                                      ci->progeny[7], cj->progeny[5], 1),
+                    1, s);
+                break;
+
+              case 11: /* (  0 ,  1 , -1 ) */
+                t->ci = ci->progeny[2];
+                t->cj = cj->progeny[1];
+                t->flags = 11;
+                t->tight = 1;
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
+                                      ci->progeny[6], cj->progeny[5], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                      ci->progeny[2], cj->progeny[5], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
+                                      ci->progeny[6], cj->progeny[1], 1),
+                    1, s);
+                break;
+
+              case 12: /* (  0 ,  0 ,  1 ) */
+                t->ci = ci->progeny[1];
+                t->cj = cj->progeny[0];
+                t->flags = 12;
+                t->tight = 1;
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
+                                      ci->progeny[3], cj->progeny[0], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
+                                      ci->progeny[5], cj->progeny[0], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                      ci->progeny[7], cj->progeny[0], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
+                                      ci->progeny[1], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
+                                      ci->progeny[3], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                      ci->progeny[5], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
+                                      ci->progeny[7], cj->progeny[2], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
+                                      ci->progeny[1], cj->progeny[4], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
+                                      ci->progeny[3], cj->progeny[4], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
+                                      ci->progeny[5], cj->progeny[4], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
+                                      ci->progeny[7], cj->progeny[4], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                      ci->progeny[1], cj->progeny[6], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
+                                      ci->progeny[3], cj->progeny[6], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
+                                      ci->progeny[5], cj->progeny[6], 1),
+                    1, s);
+                scheduler_splittasks_mapper(
+                    scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
+                                      ci->progeny[7], cj->progeny[6], 1),
+                    1, s);
+                break;
+            } /* switch(sid) */
+          }
+
+          /* Otherwise, break it up if it is too large? */
+        } else if (scheduler_doforcesplit && ci->split && cj->split &&
+                   (ci->count > space_maxsize / cj->count)) {
+
+          // message( "force splitting pair with %i and %i parts." , ci->count ,
+          // cj->count );
+
+          /* Replace the current task. */
+          t->type = task_type_none;
+
+          for (int j = 0; j < 8; j++)
+            if (ci->progeny[j] != NULL)
+              for (int k = 0; k < 8; k++)
+                if (cj->progeny[k] != NULL) {
+                  scheduler_splittasks_mapper(
+                      scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                        ci->progeny[j], cj->progeny[k], 0),
+                      1, s);
+                  t->flags = space_getsid(s->space, &t->ci, &t->cj, shift);
+                }
 
-    } /* pair interaction? */
+          /* Otherwise, if not spilt, stitch-up the sorting. */
+        } else {
 
-  } /* iterate over the current task. */
+          /* Create the sort for ci. */
+          lock_lock(&ci->lock);
+          if (ci->sorts == NULL)
+            ci->sorts = scheduler_addtask(s, task_type_sort, 0, 1 << sid, 0, ci,
+                                          NULL, 0);
+          else
+            ci->sorts->flags |= (1 << sid);
+          lock_unlock_blind(&ci->lock);
+          scheduler_addunlock(s, ci->sorts, t);
+
+          /* Create the sort for cj. */
+          lock_lock(&cj->lock);
+          if (cj->sorts == NULL)
+            cj->sorts = scheduler_addtask(s, task_type_sort, 0, 1 << sid, 0, cj,
+                                          NULL, 0);
+          else
+            cj->sorts->flags |= (1 << sid);
+          lock_unlock_blind(&cj->lock);
+          scheduler_addunlock(s, cj->sorts, t);
+        }
+
+      } /* pair interaction? */
+
+    } /* iterate over the current task. */
+  }
 }
 
 void scheduler_splittasks(struct scheduler *s) {
 
   /* Call the mapper on each current task. */
   threadpool_map(s->threadpool, scheduler_splittasks_mapper, s->tasks,
-                 s->nr_tasks, sizeof(struct task), s);
+                 s->nr_tasks, sizeof(struct task), 1, s);
 }
 
 /**
@@ -758,16 +763,21 @@ void scheduler_set_unlocks(struct scheduler *s) {
  *        graph and re-computes the task wait counters.
  */
 
-void scheduler_simple_rewait_mapper(void *map_data, void *extra_data) {
+void scheduler_simple_rewait_mapper(void *map_data, int num_elements,
+                                    void *extra_data) {
 
-  struct task *t = (struct task *)map_data;
+  struct task *tasks = (struct task *)map_data;
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
 
-  /* Increment the waits of the dependances */
-  for (int k = 0; k < t->nr_unlock_tasks; k++) {
-    struct task *u = t->unlock_tasks[k];
-    atomic_inc(&u->wait);
+    /* Increment the waits of the dependances */
+    for (int k = 0; k < t->nr_unlock_tasks; k++) {
+      struct task *u = t->unlock_tasks[k];
+      atomic_inc(&u->wait);
+    }
   }
 }
+
 /**
  * @brief Sort the tasks in topological order over all queues.
  *
@@ -782,7 +792,7 @@ void scheduler_ranktasks(struct scheduler *s) {
 
   /* Run through the tasks and get all the waits right. */
   threadpool_map(s->threadpool, scheduler_simple_rewait_mapper, tasks, nr_tasks,
-                 sizeof(struct task), NULL);
+                 sizeof(struct task), 1000, NULL);
 
   /* Load the tids of tasks with no waits. */
   int left = 0;
@@ -960,33 +970,43 @@ void scheduler_reweight(struct scheduler *s) {
  *        graph and re-computes the task wait counters.
  */
 
-void scheduler_rewait_mapper(void *map_data, void *extra_data) {
+void scheduler_rewait_mapper(void *map_data, int num_elements,
+                             void *extra_data) {
 
   struct scheduler *s = (struct scheduler *)extra_data;
-  struct task *t = (struct task *)map_data;
+  struct task *tasks = (struct task *)map_data;
 
-  if (t->skip) return;
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
 
-  /* Skip tasks not in the mask */
-  if (!((1 << t->type) & s->mask) || !((1 << t->subtype) & s->submask)) return;
+    if (t->skip || !((1 << t->type) & s->mask) ||
+        !((1 << t->subtype) & s->submask))
+      continue;
 
-  /* Skip sort tasks that have already been performed */
-  if (t->type == task_type_sort && t->flags == 0) return;
+    /* Skip sort tasks that have already been performed */
+    if (t->type == task_type_sort && t->flags == 0) {
+      error("empty sort task, bad!");
+    }
 
-  /* Sets the waits of the dependances */
-  for (int k = 0; k < t->nr_unlock_tasks; k++) {
-    struct task *u = t->unlock_tasks[k];
-    atomic_inc(&u->wait);
+    /* Sets the waits of the dependances */
+    for (int k = 0; k < t->nr_unlock_tasks; k++) {
+      struct task *u = t->unlock_tasks[k];
+      atomic_inc(&u->wait);
+    }
   }
 }
 
-void scheduler_enqueue_mapper(void *map_data, void *extra_data) {
+void scheduler_enqueue_mapper(void *map_data, int num_elements,
+                              void *extra_data) {
   struct scheduler *s = (struct scheduler *)extra_data;
-  struct task *t = (struct task *)map_data;
-  if (atomic_dec(&t->wait) == 1 && !t->skip && ((1 << t->type) & s->mask) &&
-      ((1 << t->subtype) & s->submask)) {
-    scheduler_enqueue(s, t);
-    pthread_cond_signal(&s->sleep_cond);
+  struct task *tasks = (struct task *)map_data;
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
+    if (atomic_dec(&t->wait) == 1 && !t->skip && ((1 << t->type) & s->mask) &&
+        ((1 << t->subtype) & s->submask)) {
+      scheduler_enqueue(s, t);
+      pthread_cond_signal(&s->sleep_cond);
+    }
   }
 }
 
@@ -1015,11 +1035,11 @@ void scheduler_start(struct scheduler *s, unsigned int mask,
 
   /* Re-wait the tasks. */
   threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tasks, s->nr_tasks,
-                 sizeof(struct task), s);
+                 sizeof(struct task), 1000, s);
 
   /* Loop over the tasks and enqueue whoever is ready. */
   threadpool_map(s->threadpool, scheduler_enqueue_mapper, s->tasks, s->nr_tasks,
-                 sizeof(struct task), s);
+                 sizeof(struct task), 1000, s);
 
   /* To be safe, fire of one last sleep_cond in a safe way. */
   pthread_mutex_lock(&s->sleep_mutex);
diff --git a/src/space.c b/src/space.c
index 5f6e9aa4fb03ab72318578d5ec0180e0f52b5ca2..4f6a3362534b1454b5864bfde25a2a497ce72697 100644
--- a/src/space.c
+++ b/src/space.c
@@ -654,7 +654,7 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
   const ticks tic = getticks();
 
   threadpool_map(&s->e->threadpool, space_split_mapper, cells, s->nr_cells,
-                 sizeof(struct cell), s);
+                 sizeof(struct cell), 1, s);
 
   if (verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
@@ -684,8 +684,8 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
   sort_struct.xparts = s->xparts;
   sort_struct.ind = ind;
   sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
-  if ((sort_struct.stack = malloc(sizeof(struct qstack) *
-                                        sort_struct.stack_size)) == NULL)
+  if ((sort_struct.stack =
+           malloc(sizeof(struct qstack) * sort_struct.stack_size)) == NULL)
     error("Failed to allocate sorting stack.");
   for (int i = 0; i < sort_struct.stack_size; i++)
     sort_struct.stack[i].ready = 0;
@@ -702,8 +702,8 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
 
   /* Launch the sorting tasks with a stride of zero such that the same
      map data is passed to each thread. */
-  threadpool_map(&s->e->threadpool, space_parts_sort_mapper,
-    &sort_struct, s->e->threadpool.num_threads, 0, NULL);
+  threadpool_map(&s->e->threadpool, space_parts_sort_mapper, &sort_struct,
+                 s->e->threadpool.num_threads, 0, 1, NULL);
 
   /* Verify sort_struct. */
   /* for (int i = 1; i < N; i++)
@@ -721,7 +721,7 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
             clocks_getunit());
 }
 
-void space_parts_sort_mapper(void *map_data, void *extra_data) {
+void space_parts_sort_mapper(void *map_data, int num_elements, void *extra_data) {
 
   /* Unpack the mapping data. */
   struct parallel_sort *sort_struct = (struct parallel_sort *)map_data;
@@ -735,8 +735,7 @@ void space_parts_sort_mapper(void *map_data, void *extra_data) {
   while (sort_struct->waiting) {
 
     /* Grab an interval off the queue. */
-    int qid =
-        atomic_inc(&sort_struct->first) % sort_struct->stack_size;
+    int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size;
 
     /* Wait for the entry to be ready, or for the sorting do be done. */
     while (!sort_struct->stack[qid].ready)
@@ -795,16 +794,14 @@ void space_parts_sort_mapper(void *map_data, void *extra_data) {
 
         /* Recurse on the left? */
         if (jj > i && pivot > min) {
-          qid = atomic_inc(&sort_struct->last) %
-                sort_struct->stack_size;
+          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
           while (sort_struct->stack[qid].ready)
             ;
           sort_struct->stack[qid].i = i;
           sort_struct->stack[qid].j = jj;
           sort_struct->stack[qid].min = min;
           sort_struct->stack[qid].max = pivot;
-          if (atomic_inc(&sort_struct->waiting) >=
-              sort_struct->stack_size)
+          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
             error("Qstack overflow.");
           sort_struct->stack[qid].ready = 1;
         }
@@ -820,16 +817,14 @@ void space_parts_sort_mapper(void *map_data, void *extra_data) {
 
         /* Recurse on the right? */
         if (pivot + 1 < max) {
-          qid = atomic_inc(&sort_struct->last) %
-                sort_struct->stack_size;
+          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
           while (sort_struct->stack[qid].ready)
             ;
           sort_struct->stack[qid].i = jj + 1;
           sort_struct->stack[qid].j = j;
           sort_struct->stack[qid].min = pivot + 1;
           sort_struct->stack[qid].max = max;
-          if (atomic_inc(&sort_struct->waiting) >=
-              sort_struct->stack_size)
+          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
             error("Qstack overflow.");
           sort_struct->stack[qid].ready = 1;
         }
@@ -870,8 +865,8 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
   sort_struct.gparts = s->gparts;
   sort_struct.ind = ind;
   sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
-  if ((sort_struct.stack = malloc(sizeof(struct qstack) *
-                                        sort_struct.stack_size)) == NULL)
+  if ((sort_struct.stack =
+           malloc(sizeof(struct qstack) * sort_struct.stack_size)) == NULL)
     error("Failed to allocate sorting stack.");
   for (int i = 0; i < sort_struct.stack_size; i++)
     sort_struct.stack[i].ready = 0;
@@ -888,8 +883,8 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
 
   /* Launch the sorting tasks with a stride of zero such that the same
      map data is passed to each thread. */
-  threadpool_map(&s->e->threadpool, space_gparts_sort_mapper,
-    &sort_struct, s->e->threadpool.num_threads, 0, NULL);
+  threadpool_map(&s->e->threadpool, space_gparts_sort_mapper, &sort_struct,
+                 s->e->threadpool.num_threads, 0, 1, NULL);
 
   /* Verify sort_struct. */
   /* for (int i = 1; i < N; i++)
@@ -907,7 +902,7 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
             clocks_getunit());
 }
 
-void space_gparts_sort_mapper(void *map_data, void *extra_data) {
+void space_gparts_sort_mapper(void *map_data, int num_elements, void *extra_data) {
 
   /* Unpack the mapping data. */
   struct parallel_sort *sort_struct = (struct parallel_sort *)map_data;
@@ -920,8 +915,7 @@ void space_gparts_sort_mapper(void *map_data, void *extra_data) {
   while (sort_struct->waiting) {
 
     /* Grab an interval off the queue. */
-    int qid =
-        atomic_inc(&sort_struct->first) % sort_struct->stack_size;
+    int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size;
 
     /* Wait for the entry to be ready, or for the sorting do be done. */
     while (!sort_struct->stack[qid].ready)
@@ -977,16 +971,14 @@ void space_gparts_sort_mapper(void *map_data, void *extra_data) {
 
         /* Recurse on the left? */
         if (jj > i && pivot > min) {
-          qid = atomic_inc(&sort_struct->last) %
-                sort_struct->stack_size;
+          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
           while (sort_struct->stack[qid].ready)
             ;
           sort_struct->stack[qid].i = i;
           sort_struct->stack[qid].j = jj;
           sort_struct->stack[qid].min = min;
           sort_struct->stack[qid].max = pivot;
-          if (atomic_inc(&sort_struct->waiting) >=
-              sort_struct->stack_size)
+          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
             error("Qstack overflow.");
           sort_struct->stack[qid].ready = 1;
         }
@@ -1002,16 +994,14 @@ void space_gparts_sort_mapper(void *map_data, void *extra_data) {
 
         /* Recurse on the right? */
         if (pivot + 1 < max) {
-          qid = atomic_inc(&sort_struct->last) %
-                sort_struct->stack_size;
+          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
           while (sort_struct->stack[qid].ready)
             ;
           sort_struct->stack[qid].i = jj + 1;
           sort_struct->stack[qid].j = j;
           sort_struct->stack[qid].min = pivot + 1;
           sort_struct->stack[qid].max = max;
-          if (atomic_inc(&sort_struct->waiting) >=
-              sort_struct->stack_size)
+          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
             error("Qstack overflow.");
           sort_struct->stack[qid].ready = 1;
         }
@@ -1211,124 +1201,129 @@ void space_map_cells_pre(struct space *s, int full,
  *        too many particles.
  */
 
-void space_split_mapper(void *map_data, void *extra_data) {
+void space_split_mapper(void *map_data, int num_elements, void *extra_data) {
 
   /* Unpack the inputs. */
   struct space *s = (struct space *)extra_data;
-  struct cell *c = (struct cell *)map_data;
-
-  const int count = c->count;
-  const int gcount = c->gcount;
-  int maxdepth = 0;
-  float h_max = 0.0f;
-  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
-  struct cell *temp;
-  struct part *parts = c->parts;
-  struct gpart *gparts = c->gparts;
-  struct xpart *xparts = c->xparts;
-
-  /* Check the depth. */
-  if (c->depth > s->maxdepth) s->maxdepth = c->depth;
-
-  /* Split or let it be? */
-  if (count > space_splitsize || gcount > space_splitsize) {
-
-    /* No longer just a leaf. */
-    c->split = 1;
-
-    /* Create the cell's progeny. */
-    for (int k = 0; k < 8; k++) {
-      temp = space_getcell(s);
-      temp->count = 0;
-      temp->gcount = 0;
-      temp->loc[0] = c->loc[0];
-      temp->loc[1] = c->loc[1];
-      temp->loc[2] = c->loc[2];
-      temp->h[0] = c->h[0] / 2;
-      temp->h[1] = c->h[1] / 2;
-      temp->h[2] = c->h[2] / 2;
-      temp->dmin = c->dmin / 2;
-      if (k & 4) temp->loc[0] += temp->h[0];
-      if (k & 2) temp->loc[1] += temp->h[1];
-      if (k & 1) temp->loc[2] += temp->h[2];
-      temp->depth = c->depth + 1;
-      temp->split = 0;
-      temp->h_max = 0.0;
-      temp->dx_max = 0.f;
-      temp->nodeID = c->nodeID;
-      temp->parent = c;
-      c->progeny[k] = temp;
-    }
-
-    /* Split the cell data. */
-    cell_split(c);
-
-    /* Remove any progeny with zero parts. */
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k]->count == 0 && c->progeny[k]->gcount == 0) {
-        space_recycle(s, c->progeny[k]);
-        c->progeny[k] = NULL;
-      } else {
-        space_split_mapper(c->progeny[k], s);
-        h_max = fmaxf(h_max, c->progeny[k]->h_max);
-        ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
-        ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
-        if (c->progeny[k]->maxdepth > maxdepth)
-          maxdepth = c->progeny[k]->maxdepth;
+  struct cell *cells = (struct cell *)map_data;
+
+  for (int ind = 0; ind < num_elements; ind++) {
+
+    struct cell *c = &cells[ind];
+
+    const int count = c->count;
+    const int gcount = c->gcount;
+    int maxdepth = 0;
+    float h_max = 0.0f;
+    int ti_end_min = max_nr_timesteps, ti_end_max = 0;
+    struct cell *temp;
+    struct part *parts = c->parts;
+    struct gpart *gparts = c->gparts;
+    struct xpart *xparts = c->xparts;
+
+    /* Check the depth. */
+    if (c->depth > s->maxdepth) s->maxdepth = c->depth;
+
+    /* Split or let it be? */
+    if (count > space_splitsize || gcount > space_splitsize) {
+
+      /* No longer just a leaf. */
+      c->split = 1;
+
+      /* Create the cell's progeny. */
+      for (int k = 0; k < 8; k++) {
+        temp = space_getcell(s);
+        temp->count = 0;
+        temp->gcount = 0;
+        temp->loc[0] = c->loc[0];
+        temp->loc[1] = c->loc[1];
+        temp->loc[2] = c->loc[2];
+        temp->h[0] = c->h[0] / 2;
+        temp->h[1] = c->h[1] / 2;
+        temp->h[2] = c->h[2] / 2;
+        temp->dmin = c->dmin / 2;
+        if (k & 4) temp->loc[0] += temp->h[0];
+        if (k & 2) temp->loc[1] += temp->h[1];
+        if (k & 1) temp->loc[2] += temp->h[2];
+        temp->depth = c->depth + 1;
+        temp->split = 0;
+        temp->h_max = 0.0;
+        temp->dx_max = 0.f;
+        temp->nodeID = c->nodeID;
+        temp->parent = c;
+        c->progeny[k] = temp;
       }
 
-    /* Set the values for this cell. */
-    c->h_max = h_max;
-    c->ti_end_min = ti_end_min;
-    c->ti_end_max = ti_end_max;
-    c->maxdepth = maxdepth;
-
-  }
+      /* Split the cell data. */
+      cell_split(c);
+
+      /* Remove any progeny with zero parts. */
+      for (int k = 0; k < 8; k++)
+        if (c->progeny[k]->count == 0 && c->progeny[k]->gcount == 0) {
+          space_recycle(s, c->progeny[k]);
+          c->progeny[k] = NULL;
+        } else {
+          space_split_mapper(c->progeny[k], 1, s);
+          h_max = fmaxf(h_max, c->progeny[k]->h_max);
+          ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
+          ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
+          if (c->progeny[k]->maxdepth > maxdepth)
+            maxdepth = c->progeny[k]->maxdepth;
+        }
 
-  /* Otherwise, collect the data for this cell. */
-  else {
+      /* Set the values for this cell. */
+      c->h_max = h_max;
+      c->ti_end_min = ti_end_min;
+      c->ti_end_max = ti_end_max;
+      c->maxdepth = maxdepth;
 
-    /* Clear the progeny. */
-    bzero(c->progeny, sizeof(struct cell *) * 8);
-    c->split = 0;
-    c->maxdepth = c->depth;
-
-    /* Get dt_min/dt_max. */
-    for (int k = 0; k < count; k++) {
-      struct part *p = &parts[k];
-      struct xpart *xp = &xparts[k];
-      const float h = p->h;
-      const int ti_end = p->ti_end;
-      xp->x_diff[0] = 0.f;
-      xp->x_diff[1] = 0.f;
-      xp->x_diff[2] = 0.f;
-      if (h > h_max) h_max = h;
-      if (ti_end < ti_end_min) ti_end_min = ti_end;
-      if (ti_end > ti_end_max) ti_end_max = ti_end;
     }
-    for (int k = 0; k < gcount; k++) {
-      struct gpart *gp = &gparts[k];
-      const int ti_end = gp->ti_end;
-      gp->x_diff[0] = 0.f;
-      gp->x_diff[1] = 0.f;
-      gp->x_diff[2] = 0.f;
-      if (ti_end < ti_end_min) ti_end_min = ti_end;
-      if (ti_end > ti_end_max) ti_end_max = ti_end;
+
+    /* Otherwise, collect the data for this cell. */
+    else {
+
+      /* Clear the progeny. */
+      bzero(c->progeny, sizeof(struct cell *) * 8);
+      c->split = 0;
+      c->maxdepth = c->depth;
+
+      /* Get dt_min/dt_max. */
+      for (int k = 0; k < count; k++) {
+        struct part *p = &parts[k];
+        struct xpart *xp = &xparts[k];
+        const float h = p->h;
+        const int ti_end = p->ti_end;
+        xp->x_diff[0] = 0.f;
+        xp->x_diff[1] = 0.f;
+        xp->x_diff[2] = 0.f;
+        if (h > h_max) h_max = h;
+        if (ti_end < ti_end_min) ti_end_min = ti_end;
+        if (ti_end > ti_end_max) ti_end_max = ti_end;
+      }
+      for (int k = 0; k < gcount; k++) {
+        struct gpart *gp = &gparts[k];
+        const int ti_end = gp->ti_end;
+        gp->x_diff[0] = 0.f;
+        gp->x_diff[1] = 0.f;
+        gp->x_diff[2] = 0.f;
+        if (ti_end < ti_end_min) ti_end_min = ti_end;
+        if (ti_end > ti_end_max) ti_end_max = ti_end;
+      }
+      c->h_max = h_max;
+      c->ti_end_min = ti_end_min;
+      c->ti_end_max = ti_end_max;
     }
-    c->h_max = h_max;
-    c->ti_end_min = ti_end_min;
-    c->ti_end_max = ti_end_max;
-  }
 
-  /* Set ownership according to the start of the parts array. */
-  if (s->nr_parts > 0)
-    c->owner =
-        ((c->parts - s->parts) % s->nr_parts) * s->nr_queues / s->nr_parts;
-  else if (s->nr_gparts > 0)
-    c->owner =
-        ((c->gparts - s->gparts) % s->nr_gparts) * s->nr_queues / s->nr_gparts;
-  else
-    c->owner = 0; /* Ok, there is really nothing on this rank... */
+    /* Set ownership according to the start of the parts array. */
+    if (s->nr_parts > 0)
+      c->owner =
+          ((c->parts - s->parts) % s->nr_parts) * s->nr_queues / s->nr_parts;
+    else if (s->nr_gparts > 0)
+      c->owner = ((c->gparts - s->gparts) % s->nr_gparts) * s->nr_queues /
+                 s->nr_gparts;
+    else
+      c->owner = 0; /* Ok, there is really nothing on this rank... */
+  }
 }
 
 /**
diff --git a/src/space.h b/src/space.h
index d448cd1279611c26139672608798089f7c6a7afb..d6802db02fccc398f15ad5637d24c5ab7140d147 100644
--- a/src/space.h
+++ b/src/space.h
@@ -150,12 +150,12 @@ void space_map_parts_xparts(struct space *s,
                                         struct cell *c));
 void space_map_cells_post(struct space *s, int full,
                           void (*fun)(struct cell *c, void *data), void *data);
-void space_parts_sort_mapper(void *map_data, void *extra_data);
-void space_gparts_sort_mapper(void *map_data, void *extra_data);
+void space_parts_sort_mapper(void *map_data, int num_elements, void *extra_data);
+void space_gparts_sort_mapper(void *map_data, int num_elements, void *extra_data);
 void space_rebuild(struct space *s, double h_max, int verbose);
 void space_recycle(struct space *s, struct cell *c);
 void space_split(struct space *s, struct cell *cells, int verbose);
-void space_split_mapper(void *map_data, void *extra_data);
+void space_split_mapper(void *map_data, int num_elements, void *extra_data);
 void space_do_parts_sort();
 void space_do_gparts_sort();
 void space_link_cleanup(struct space *s);
diff --git a/src/threadpool.c b/src/threadpool.c
index 09dd642e1c1bf061f9113440429abdb104c930c6..8605e04bac08bdf61295446bd5074de0182a573b 100644
--- a/src/threadpool.c
+++ b/src/threadpool.c
@@ -48,17 +48,25 @@ void *threadpool_runner(void *data) {
     if (tp->num_threads_waiting == tp->num_threads) {
       pthread_cond_signal(&tp->control_cond);
     }
-    
+
     /* Wait for the controller. */
     pthread_cond_wait(&tp->thread_cond, &tp->thread_mutex);
     tp->num_threads_waiting -= 1;
+    tp->num_threads_running += 1;
+    if (tp->num_threads_running == tp->num_threads) {
+      pthread_cond_signal(&tp->control_cond);
+    }
     pthread_mutex_unlock(&tp->thread_mutex);
 
     /* The index of the mapping task we will work on next. */
     size_t task_ind;
-    while ((task_ind = atomic_inc(&tp->map_data_count)) < tp->map_data_size) {
+    while ((task_ind = atomic_add(&tp->map_data_count, tp->map_data_chunk)) <
+           tp->map_data_size) {
+      const int num_elements = task_ind + tp->map_data_chunk > tp->map_data_size
+                                   ? tp->map_data_size - task_ind
+                                   : tp->map_data_chunk;
       tp->map_function(tp->map_data + tp->map_data_stride * task_ind,
-                       tp->map_extra_data);
+                       num_elements, tp->map_extra_data);
     }
   }
 }
@@ -80,6 +88,7 @@ void threadpool_init(struct threadpool *tp, int num_threads) {
   tp->map_data_size = 0;
   tp->map_data_count = 0;
   tp->map_data_stride = 0;
+  tp->map_data_chunk = 0;
   tp->map_function = NULL;
 
   /* Allocate the threads. */
@@ -89,13 +98,13 @@ void threadpool_init(struct threadpool *tp, int num_threads) {
   }
 
   /* Create and start the threads. */
+  pthread_mutex_lock(&tp->thread_mutex);
   for (int k = 0; k < num_threads; k++) {
     if (pthread_create(&tp->threads[k], NULL, &threadpool_runner, tp) != 0)
       error("Failed to create threadpool runner thread.");
   }
 
   /* Wait for all the threads to be up and running. */
-  pthread_mutex_lock(&tp->thread_mutex);
   while (tp->num_threads_waiting < tp->num_threads) {
     pthread_cond_wait(&tp->control_cond, &tp->thread_mutex);
   }
@@ -113,24 +122,35 @@ void threadpool_init(struct threadpool *tp, int num_threads) {
  * @param map_data The data on which the mapping function will be called.
  * @param N Number of elements in @c map_data.
  * @param stride Size, in bytes, of each element of @c map_data.
+ * @param chunk Number of map data elements to pass to the function at a time.
  * @param extra_data Addtitional pointer that will be passed to the mapping
  *        function, may contain additional data.
  */
 
 void threadpool_map(struct threadpool *tp, threadpool_map_function map_function,
-                    void *map_data, size_t N, int stride, void *extra_data) {
+                    void *map_data, size_t N, int stride, int chunk,
+                    void *extra_data) {
 
   /* Set the map data and signal the threads. */
   pthread_mutex_lock(&tp->thread_mutex);
   tp->map_data_stride = stride;
   tp->map_data_size = N;
   tp->map_data_count = 0;
+  tp->map_data_chunk = chunk;
   tp->map_function = map_function;
   tp->map_data = map_data;
   tp->map_extra_data = extra_data;
+  tp->num_threads_running = 0;
   pthread_cond_broadcast(&tp->thread_cond);
 
-  /* Wait for the threads to come home. */
-  pthread_cond_wait(&tp->control_cond, &tp->thread_mutex);
+  /* Wait for all the threads to be up and running. */
+  while (tp->num_threads_running < tp->num_threads) {
+    pthread_cond_wait(&tp->control_cond, &tp->thread_mutex);
+  }
+  
+  /* Wait for all threads to be done. */
+  while (tp->num_threads_waiting < tp->num_threads) {
+    pthread_cond_wait(&tp->control_cond, &tp->thread_mutex);
+  }
   pthread_mutex_unlock(&tp->thread_mutex);
 }
diff --git a/src/threadpool.h b/src/threadpool.h
index 8ebe34319b47fff35998355a77b8c2b6de30a856..66dc6752d29a654dbaacfd1d0267f1cedf4dd981 100644
--- a/src/threadpool.h
+++ b/src/threadpool.h
@@ -26,7 +26,8 @@
 #include <pthread.h>
 
 /* Function type for mappings. */
-typedef void (*threadpool_map_function)(void *map_data, void *extra_data);
+typedef void (*threadpool_map_function)(void *map_data, int num_elements,
+                                        void *extra_data);
 
 /* Data of a threadpool. */
 struct threadpool {
@@ -43,7 +44,8 @@ struct threadpool {
 
   /* Current map data and count. */
   void *map_data, *map_extra_data;
-  volatile size_t map_data_count, map_data_size, map_data_stride;
+  volatile size_t map_data_count, map_data_size, map_data_stride,
+      map_data_chunk;
   volatile threadpool_map_function map_function;
 
   /* Counter for the number of threads that are done. */
@@ -53,6 +55,6 @@ struct threadpool {
 /* Function prototypes. */
 void threadpool_init(struct threadpool *tp, int num_threads);
 void threadpool_map(struct threadpool *tp, threadpool_map_function map_function,
-                    void *map_data, size_t N, int stride, void *extra_data);
+                    void *map_data, size_t N, int stride, int chunk, void *extra_data);
 
 #endif /* SWIFT_THREADPOOL_H */
diff --git a/tests/threadpool_test.c b/tests/threadpool_test.c
index 9a7d6b1db4962e431ba15f324eb22e5792f98dfa..5207881c27b51ed4090c2c8c015f3b4f97e422fb 100644
--- a/tests/threadpool_test.c
+++ b/tests/threadpool_test.c
@@ -26,18 +26,24 @@
 #include "../src/threadpool.h"
 #include "../src/atomic.h"
 
-void map_function_first(void *map_data, void *extra_data) {
-  const int input = *(int *)map_data;
-  usleep(rand() % 1000000);
-  printf("map_function_first: got input %i.\n", input);
-  fflush(stdout);
+void map_function_first(void *map_data, int num_elements, void *extra_data) {
+  const int *inputs = (int *)map_data;
+  for (int ind = 0; ind < num_elements; ind++) {
+    int input = inputs[ind];
+    usleep(rand() % 1000000);
+    printf("map_function_first: got input %i.\n", input);
+    fflush(stdout);
+  }
 }
 
-void map_function_second(void *map_data, void *extra_data) {
-  const int input = *(int *)map_data;
-  usleep(rand() % 1000000);
-  printf("map_function_second: got input %i.\n", input);
-  fflush(stdout);
+void map_function_second(void *map_data, int num_elements, void *extra_data) {
+  const int *inputs = (int *)map_data;
+  for (int ind = 0; ind < num_elements; ind++) {
+    int input = inputs[ind];
+    usleep(rand() % 1000000);
+    printf("map_function_second: got input %i.\n", input);
+    fflush(stdout);
+  }
 }
 
 int main(int argc, char *argv[]) {
@@ -59,12 +65,16 @@ int main(int argc, char *argv[]) {
     for (int k = 0; k < N; k++) data[k] = k;
     printf("processing integers from 0..%i.\n", N);
     fflush(stdout);
-    threadpool_map(&tp, map_function_first, data, N, sizeof(int), NULL);
+    threadpool_map(&tp, map_function_first, data, N, sizeof(int), 1, NULL);
 
     // Do the same thing again, with less jobs than threads.
-    printf("processing integers from 0..%i.\n", num_threads / 2);
+    printf("processing integers from 0..%i.\n", N / 2);
+    fflush(stdout);
+    threadpool_map(&tp, map_function_second, data, N / 2, sizeof(int), 1, NULL);
+
+    // Do the same thing again, with a chunk size of two.
+    printf("processing integers from 0..%i.\n", N);
     fflush(stdout);
-    threadpool_map(&tp, map_function_second, data, num_threads / 2, sizeof(int),
-                   NULL);
+    threadpool_map(&tp, map_function_first, data, N, sizeof(int), 2, NULL);
   }
 }