diff --git a/src/engine.c b/src/engine.c
index c4f5dccad5c829efd3b1b50211c4f6a06a28e198..80c122b0a911582ee4fef393c549519d8e048066 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -172,7 +172,7 @@ void engine_redistribute(struct engine *e) {
     dest[k] = cells[cid].nodeID;
     counts[nodeID * nr_nodes + dest[k]] += 1;
   }
-  parts_sort(s->parts, s->xparts, dest, s->nr_parts, 0, nr_nodes - 1);
+  space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1);
 
   /* Get all the counts from all the nodes. */
   if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
@@ -990,6 +990,11 @@ void engine_maketasks(struct engine *e) {
   /* Re-set the scheduler. */
   scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell);
 
+  /* Add the space sorting tasks. */
+  for (i = 0; i < e->nr_threads; i++)
+    scheduler_addtask(sched, task_type_psort, task_subtype_none, i, 0, NULL,
+                      NULL, 0);
+
   /* Run through the highest level of cells and add pairs. */
   for (i = 0; i < cdim[0]; i++)
     for (j = 0; j < cdim[1]; j++)
@@ -1706,15 +1711,6 @@ void engine_launch(struct engine *e, int nr_runners, unsigned int mask) {
       error("Error while waiting for barrier.");
 }
 
-void hassorted(struct cell *c) {
-
-  if (c->sorted) error("Suprious sorted flags.");
-
-  if (c->split)
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) hassorted(c->progeny[k]);
-}
-
 /**
  * @brief Let the #engine loose to compute the forces.
  *
@@ -2168,13 +2164,18 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
   s->nr_queues = nr_queues;
 
   /* Append a kick1 task to each cell. */
-  scheduler_reset(&e->sched, s->tot_cells);
+  scheduler_reset(&e->sched, s->tot_cells + e->nr_threads);
   for (k = 0; k < s->nr_cells; k++)
     s->cells[k].kick1 =
         scheduler_addtask(&e->sched, task_type_kick1, task_subtype_none, 0, 0,
                           &s->cells[k], NULL, 0);
   scheduler_ranktasks(&e->sched);
 
+  /* Create the sorting tasks. */
+  for (i = 0; i < e->nr_threads; i++)
+    scheduler_addtask(&e->sched, task_type_psort, task_subtype_none, i, 0, NULL,
+                      NULL, 0);
+
   /* Allocate and init the threads. */
   if ((e->runners =
            (struct runner *)malloc(sizeof(struct runner) * nr_threads)) == NULL)
diff --git a/src/engine.h b/src/engine.h
index 65aa028401344f953d7a23a5c83c030278bbdcb7..dcf0d5cc0912aca42782d09df01fae6edfa45d81 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -129,6 +129,7 @@ struct engine {
 void engine_barrier(struct engine *e, int tid);
 void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
                  int nr_queues, int nr_nodes, int nodeID, int policy);
+void engine_launch(struct engine *e, int nr_runners, unsigned int mask);
 void engine_prepare(struct engine *e);
 void engine_step(struct engine *e);
 void engine_maketasks(struct engine *e);
diff --git a/src/runner.c b/src/runner.c
index e1956ebe9a8125c95e1109dfa9ee0211fbecdea4..b8bef2100b540e1b81370fdc73d44441777fece8 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -1214,12 +1214,12 @@ void *runner_main(void *data) {
       t->rid = r->cpuid;
 
       /* Set super to the first cell that I own. */
-      if (ci->super != NULL && ci->super->owner == r->qid)
+      if (ci != NULL && ci->super != NULL && ci->super->owner == r->qid)
         super = ci->super;
       else if (cj != NULL && cj->super != NULL && cj->super->owner == r->qid)
         super = cj->super;
-      /* else
-          super = NULL; */
+      else
+        super = NULL;
 
       /* Different types of tasks... */
       switch (t->type) {
@@ -1287,6 +1287,9 @@ void *runner_main(void *data) {
         case task_type_grav_down:
           runner_dograv_down(r, t->ci);
           break;
+        case task_type_psort:
+          space_do_parts_sort();
+          break;
         default:
           error("Unknown task type.");
       }
diff --git a/src/space.c b/src/space.c
index f4d9cda9d367332ea179a5b144fbc9d169c0dd99..f53a94bdeb44c3d08fcaf4efb2cdbcbada47ca54 100644
--- a/src/space.c
+++ b/src/space.c
@@ -43,6 +43,9 @@
 #include "lock.h"
 #include "runner.h"
 
+/* Shared sort structure. */
+struct parallel_sort space_sort_struct;
+
 /* Split size. */
 int space_splitsize = space_splitsize_default;
 int space_subsize = space_subsize_default;
@@ -389,7 +392,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   /* Sort the parts according to their cells. */
   // tic = getticks();
-  parts_sort(s->parts, s->xparts, ind, nr_parts, 0, s->nr_cells - 1);
+  space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1);
   // message( "parts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
   // * 1000 );
 
@@ -397,7 +400,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   for (k = 0; k < nr_parts; k++)
     if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k];
 
-  /* Verify sort. */
+  /* Verify space_sort_struct. */
   /* for ( k = 1 ; k < nr_parts ; k++ ) {
       if ( ind[k-1] > ind[k] ) {
           error( "Sort failed!" );
@@ -473,88 +476,97 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
  * @brief Sort the particles and condensed particles according to the given
  *indices.
  *
- * @param parts The list of #part
- * @param xparts The list of reduced particles
  * @param ind The indices with respect to which the parts are sorted.
  * @param N The number of parts
  * @param min Lowest index.
  * @param max highest index.
  */
 
-void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N,
-                int min, int max) {
-
-  struct qstack {
-    volatile int i, j, min, max;
-    volatile int ready;
-  };
-  struct qstack *qstack;
-  unsigned int qstack_size = 2 * (max - min) + 10;
-  volatile unsigned int first, last, waiting;
+void space_parts_sort(struct space *s, int *ind, int N, int min, int max) {
+  // Populate a parallel_sort structure with the input data.
+  struct parallel_sort sort;
+  space_sort_struct.parts = s->parts;
+  space_sort_struct.xparts = s->xparts;
+  space_sort_struct.ind = ind;
+  space_sort_struct.stack_size = 2 * (max - min) + 10;
+  if ((space_sort_struct.stack = malloc(sizeof(struct qstack) *
+                                        space_sort_struct.stack_size)) == NULL)
+    error("Failed to allocate sorting stack.");
+  for (int i = 0; i < space_sort_struct.stack_size; i++)
+    space_sort_struct.stack[i].ready = 0;
+
+  // Add the first interval.
+  space_sort_struct.stack[0].i = 0;
+  space_sort_struct.stack[0].j = N - 1;
+  space_sort_struct.stack[0].min = min;
+  space_sort_struct.stack[0].max = max;
+  space_sort_struct.stack[0].ready = 1;
+  space_sort_struct.first = 0;
+  space_sort_struct.last = 1;
+  space_sort_struct.waiting = 1;
+
+  // Launch the sorting tasks.
+  engine_launch(s->e, s->e->nr_threads, (1 << task_type_psort));
+
+  /* Verify space_sort_struct. */
+  /* for (int i = 1; i < N; i++)
+    if (ind[i - 1] > ind[i])
+      error("Sorting failed (ind[%i]=%i,ind[%i]=%i).", i - 1, ind[i - 1], i,
+            ind[i]); */
+
+  // Clean up.
+  free(space_sort_struct.stack);
+}
 
-  int pivot;
-  int i, ii, j, jj, temp_i, qid;
-  struct part temp_p;
-  struct xpart temp_xp;
+void space_do_parts_sort() {
 
-  /* for ( int k = 0 ; k < N ; k++ )
-      if ( ind[k] > max || ind[k] < min )
-          error( "ind[%i]=%i is not in [%i,%i]." , k , ind[k] , min , max ); */
-
-  /* Allocate the stack. */
-  if ((qstack = malloc(sizeof(struct qstack) * qstack_size)) == NULL)
-    error("Failed to allocate qstack.");
-
-  /* Init the interval stack. */
-  qstack[0].i = 0;
-  qstack[0].j = N - 1;
-  qstack[0].min = min;
-  qstack[0].max = max;
-  qstack[0].ready = 1;
-  for (i = 1; i < qstack_size; i++) qstack[i].ready = 0;
-  first = 0;
-  last = 1;
-  waiting = 1;
+  /* Pointers to the sorting data. */
+  int *ind = space_sort_struct.ind;
+  struct part *parts = space_sort_struct.parts;
+  struct xpart *xparts = space_sort_struct.xparts;
 
   /* Main loop. */
-  while (waiting > 0) {
+  while (space_sort_struct.waiting > 0) {
 
     /* Grab an interval off the queue. */
-    qid = (first++) % qstack_size;
+    int qid =
+        atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size;
 
     /* Get the stack entry. */
-    i = qstack[qid].i;
-    j = qstack[qid].j;
-    min = qstack[qid].min;
-    max = qstack[qid].max;
-    qstack[qid].ready = 0;
+    while (!space_sort_struct.stack[qid].ready)
+      if (!space_sort_struct.waiting) return;
+    int i = space_sort_struct.stack[qid].i;
+    int j = space_sort_struct.stack[qid].j;
+    int min = space_sort_struct.stack[qid].min;
+    int max = space_sort_struct.stack[qid].max;
+    space_sort_struct.stack[qid].ready = 0;
 
     /* Loop over sub-intervals. */
     while (1) {
 
       /* Bring beer. */
-      pivot = (min + max) / 2;
+      const int pivot = (min + max) / 2;
 
       /* One pass of QuickSort's partitioning. */
-      ii = i;
-      jj = j;
+      int ii = i;
+      int jj = j;
       while (ii < jj) {
         while (ii <= j && ind[ii] <= pivot) ii++;
         while (jj >= i && ind[jj] > pivot) jj--;
         if (ii < jj) {
-          temp_i = ind[ii];
+          int temp_i = ind[ii];
           ind[ii] = ind[jj];
           ind[jj] = temp_i;
-          temp_p = parts[ii];
+          struct part temp_p = parts[ii];
           parts[ii] = parts[jj];
           parts[jj] = temp_p;
-          temp_xp = xparts[ii];
+          struct xpart temp_xp = xparts[ii];
           xparts[ii] = xparts[jj];
           xparts[jj] = temp_xp;
         }
       }
 
-      /* Verify sort. */
+      /* Verify space_sort_struct. */
       /* for ( int k = i ; k <= jj ; k++ )
          if ( ind[k] > pivot ) {
          message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
@@ -573,13 +585,16 @@ void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N,
 
         /* Recurse on the left? */
         if (jj > i && pivot > min) {
-          qid = (last++) % qstack_size;
-          qstack[qid].i = i;
-          qstack[qid].j = jj;
-          qstack[qid].min = min;
-          qstack[qid].max = pivot;
-          qstack[qid].ready = 1;
-          if (waiting++ >= qstack_size) error("Qstack overflow.");
+          qid = atomic_inc(&space_sort_struct.last) %
+                space_sort_struct.stack_size;
+          space_sort_struct.stack[qid].i = i;
+          space_sort_struct.stack[qid].j = jj;
+          space_sort_struct.stack[qid].min = min;
+          space_sort_struct.stack[qid].max = pivot;
+          space_sort_struct.stack[qid].ready = 1;
+          if (atomic_inc(&space_sort_struct.waiting) >=
+              space_sort_struct.stack_size)
+            error("Qstack overflow.");
         }
 
         /* Recurse on the right? */
@@ -593,13 +608,16 @@ void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N,
 
         /* Recurse on the right? */
         if (jj + 1 < j && pivot + 1 < max) {
-          qid = (last++) % qstack_size;
-          qstack[qid].i = jj + 1;
-          qstack[qid].j = j;
-          qstack[qid].min = pivot + 1;
-          qstack[qid].max = max;
-          qstack[qid].ready = 1;
-          if ((waiting++) >= qstack_size) error("Qstack overflow.");
+          qid = atomic_inc(&space_sort_struct.last) %
+                space_sort_struct.stack_size;
+          space_sort_struct.stack[qid].i = jj + 1;
+          space_sort_struct.stack[qid].j = j;
+          space_sort_struct.stack[qid].min = pivot + 1;
+          space_sort_struct.stack[qid].max = max;
+          space_sort_struct.stack[qid].ready = 1;
+          if (atomic_inc(&space_sort_struct.waiting) >=
+              space_sort_struct.stack_size)
+            error("Qstack overflow.");
         }
 
         /* Recurse on the left? */
@@ -612,18 +630,9 @@ void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N,
 
     } /* loop over sub-intervals. */
 
-    waiting--;
+    atomic_dec(&space_sort_struct.waiting);
 
   } /* main loop. */
-
-  /* Verify sort. */
-  /* for ( i = 1 ; i < N ; i++ )
-      if ( ind[i-1] > ind[i] )
-          error( "Sorting failed (ind[%i]=%i,ind[%i]=%i)." , i-1 , ind[i-1] , i
-     , ind[i] ); */
-
-  /* Clean up. */
-  free(qstack);
 }
 
 void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max) {
@@ -694,7 +703,7 @@ void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max) {
         }
       }
 
-      /* Verify sort. */
+      /* Verify space_sort_struct. */
       /* for ( int k = i ; k <= jj ; k++ )
          if ( ind[k] > pivot ) {
          message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
@@ -756,7 +765,7 @@ void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max) {
 
   } /* main loop. */
 
-  /* Verify sort. */
+  /* Verify space_sort_struct. */
   /* for ( i = 1 ; i < N ; i++ )
       if ( ind[i-1] > ind[i] )
           error( "Sorting failed (ind[%i]=%i,ind[%i]=%i)." , i-1 , ind[i-1] , i
diff --git a/src/space.h b/src/space.h
index c12ec46be968d713618c41db5ab1385ed147d33e..4f37275b716d37644d7c6bd648a58cc70a3d76e2 100644
--- a/src/space.h
+++ b/src/space.h
@@ -111,9 +111,23 @@ struct space {
   int nr_parts_foreign, size_parts_foreign;
 };
 
+/* Interval stack necessary for parallel particle sorting. */
+struct qstack {
+  volatile int i, j, min, max;
+  volatile int ready;
+};
+struct parallel_sort {
+  struct part *parts;
+  struct xpart *xparts;
+  int *ind;
+  struct qstack *stack;
+  unsigned int stack_size;
+  volatile unsigned int first, last, waiting;
+};
+extern struct parallel_sort space_sort_struct;
+
 /* function prototypes. */
-void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N,
-                int min, int max);
+void space_parts_sort(struct space *s, int *ind, int N, int min, int max);
 void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max);
 struct cell *space_getcell(struct space *s);
 int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
@@ -130,5 +144,6 @@ void space_map_cells_post(struct space *s, int full,
 void space_rebuild(struct space *s, double h_max, int verbose);
 void space_recycle(struct space *s, struct cell *c);
 void space_split(struct space *s, struct cell *c);
+void space_do_parts_sort();
 
 #endif /* SWIFT_SPACE_H */
diff --git a/src/task.h b/src/task.h
index 0d3a68e1e8a892d554f8fb83f8f16d7030d5a54c..474fcc3592c7e4e85ff0f6fab71f99041c07bb65 100644
--- a/src/task.h
+++ b/src/task.h
@@ -44,6 +44,7 @@ enum task_types {
   task_type_grav_mm,
   task_type_grav_up,
   task_type_grav_down,
+  task_type_psort,
   task_type_count
 };