diff --git a/src/engine.c b/src/engine.c
index 1ce6aeec95a57e93416a9b9796954fc2ece2d5de..e4d77df941a70a592ff7e68d50780bf341910939 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -754,9 +754,12 @@ void engine_maketasks(struct engine *e) {
   scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell);
 
   /* Add the space sorting tasks. */
-  for (int i = 0; i < e->nr_threads; i++)
+  for (int i = 0; i < e->nr_threads; i++) {
     scheduler_addtask(sched, task_type_part_sort, task_subtype_none, i, 0, NULL,
                       NULL, 0);
+    scheduler_addtask(sched, task_type_gpart_sort, task_subtype_none, i, 0,
+                      NULL, NULL, 0);
+  }
 
   /* Run through the highest level of cells and add pairs. */
   for (int i = 0; i < cdim[0]; i++)
@@ -2049,9 +2052,13 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
   s->nr_queues = nr_queues;
 
   /* Create the sorting tasks. */
-  for (int i = 0; i < e->nr_threads; i++)
-    scheduler_addtask(&e->sched, task_type_part_sort, task_subtype_none, i, 0, NULL,
-                      NULL, 0);
+  for (int i = 0; i < e->nr_threads; i++) {
+    scheduler_addtask(&e->sched, task_type_part_sort, task_subtype_none, i, 0,
+                      NULL, NULL, 0);
+
+    scheduler_addtask(&e->sched, task_type_gpart_sort, task_subtype_none, i, 0,
+                      NULL, NULL, 0);
+  }
 
   scheduler_ranktasks(&e->sched);
 
diff --git a/src/runner.c b/src/runner.c
index 7591091b2b54ae5ce7fbea5be4e1462f783e24ef..5a7f84c040011cf669be3b81405c88b6057750f3 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -1060,6 +1060,9 @@ void *runner_main(void *data) {
         case task_type_part_sort:
           space_do_parts_sort();
           break;
+        case task_type_gpart_sort:
+          space_do_gparts_sort();
+          break;
         case task_type_split_cell:
           space_do_split(e->s, t->ci);
           break;
diff --git a/src/scheduler.c b/src/scheduler.c
index d8002e8da69e3dac0c1637cbb33b78190ba17398..58cfcb7aec7ffe994e396393e4d72b2196c8fff0 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -129,6 +129,8 @@ void scheduler_splittasks(struct scheduler *s) {
     /* Skip sorting tasks. */
     if (t->type == task_type_part_sort) continue;
 
+    if (t->type == task_type_gpart_sort) continue;
+
     /* Empty task? */
     if (t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) {
       t->type = task_type_none;
diff --git a/src/space.c b/src/space.c
index 941bbc8f48f520b809fd1ca7026e88e690c6f88a..9a255e38ca9afb9c93833d03bbb8c088bdbefb7b 100644
--- a/src/space.c
+++ b/src/space.c
@@ -487,7 +487,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 #endif
 
   /* Sort the parts according to their cells. */
-  space_gparts_sort(s->gparts, gind, nr_gparts, 0, s->nr_cells - 1);
+  space_parts_sort(s, ind, nr_gparts, 0, s->nr_cells - 1, verbose);
 
   /* Re-link the parts. */
   for (int k = 0; k < nr_gparts; k++)
@@ -554,7 +554,6 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
  * @param max highest index.
  * @param verbose Are we talkative ?
  */
-
 void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
                       int verbose) {
 
@@ -725,103 +724,140 @@ void space_do_parts_sort() {
   } /* main loop. */
 }
 
-void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
-                       int max) {
-
-  struct qstack {
-    volatile size_t i, j;
-    volatile int min, max;
-    volatile int ready;
-  };
-  struct qstack *qstack;
-  int qstack_size = 2 * (max - min) + 10;
-  volatile unsigned int first, last, waiting;
-
-  int pivot;
-  ptrdiff_t i, ii, j, jj, temp_i;
-  int qid;
-  struct gpart temp_p;
-
-  /* for ( int k = 0 ; k < N ; k++ )
-      if ( ind[k] > max || ind[k] < min )
-          error( "ind[%i]=%i is not in [%i,%i]." , k , ind[k] , min , max ); */
-
-  /* Allocate the stack. */
-  if ((qstack = malloc(sizeof(struct qstack) * qstack_size)) == NULL)
-    error("Failed to allocate qstack.");
-
-  /* Init the interval stack. */
-  qstack[0].i = 0;
-  qstack[0].j = N - 1;
-  qstack[0].min = min;
-  qstack[0].max = max;
-  qstack[0].ready = 1;
-  for (i = 1; i < qstack_size; i++) qstack[i].ready = 0;
-  first = 0;
-  last = 1;
-  waiting = 1;
+/**
+ * @brief Sort the g-particles and condensed particles according to the given
+ *indices.
+ *
+ * @param s The #space.
+ * @param ind The indices with respect to which the parts are sorted.
+ * @param N The number of parts
+ * @param min Lowest index.
+ * @param max highest index.
+ * @param verbose Are we talkative ?
+ */
+void space_gparts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
+                       int verbose) {
+
+  ticks tic = getticks();
+
+  /*Populate the global parallel_sort structure with the input data */
+  space_sort_struct.gparts = s->gparts;
+  space_sort_struct.ind = ind;
+  space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
+  if ((space_sort_struct.stack = malloc(sizeof(struct qstack) *
+                                        space_sort_struct.stack_size)) == NULL)
+    error("Failed to allocate sorting stack.");
+  for (int i = 0; i < space_sort_struct.stack_size; i++)
+    space_sort_struct.stack[i].ready = 0;
+
+  /* Add the first interval. */
+  space_sort_struct.stack[0].i = 0;
+  space_sort_struct.stack[0].j = N - 1;
+  space_sort_struct.stack[0].min = min;
+  space_sort_struct.stack[0].max = max;
+  space_sort_struct.stack[0].ready = 1;
+  space_sort_struct.first = 0;
+  space_sort_struct.last = 1;
+  space_sort_struct.waiting = 1;
+
+  /* Launch the sorting tasks. */
+  engine_launch(s->e, s->e->nr_threads, (1 << task_type_gpart_sort), 0);
+
+  /* Verify space_sort_struct. */
+  /* for (int i = 1; i < N; i++)
+    if (ind[i - 1] > ind[i])
+      error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1,
+  ind[i - 1], i,
+            ind[i], min, max);
+  message("Sorting succeeded."); */
+
+  /* Clean up. */
+  free(space_sort_struct.stack);
+
+  if (verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
+
+void space_do_gparts_sort() {
+
+  /* Pointers to the sorting data. */
+  size_t *ind = space_sort_struct.ind;
+  struct gpart *gparts = space_sort_struct.gparts;
 
   /* Main loop. */
-  while (waiting > 0) {
+  while (space_sort_struct.waiting) {
 
     /* Grab an interval off the queue. */
-    qid = (first++) % qstack_size;
+    int qid =
+        atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size;
+
+    /* Wait for the entry to be ready, or for the sorting do be done. */
+    while (!space_sort_struct.stack[qid].ready)
+      if (!space_sort_struct.waiting) return;
 
     /* Get the stack entry. */
-    i = qstack[qid].i;
-    j = qstack[qid].j;
-    min = qstack[qid].min;
-    max = qstack[qid].max;
-    qstack[qid].ready = 0;
+    ptrdiff_t i = space_sort_struct.stack[qid].i;
+    ptrdiff_t j = space_sort_struct.stack[qid].j;
+    int min = space_sort_struct.stack[qid].min;
+    int max = space_sort_struct.stack[qid].max;
+    space_sort_struct.stack[qid].ready = 0;
 
     /* Loop over sub-intervals. */
     while (1) {
 
       /* Bring beer. */
-      pivot = (min + max) / 2;
+      const int pivot = (min + max) / 2;
+      /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.",
+              i, j, min, max, pivot); */
 
       /* One pass of QuickSort's partitioning. */
-      ii = i;
-      jj = j;
+      ptrdiff_t ii = i;
+      ptrdiff_t jj = j;
       while (ii < jj) {
         while (ii <= j && ind[ii] <= pivot) ii++;
         while (jj >= i && ind[jj] > pivot) jj--;
         if (ii < jj) {
-          temp_i = ind[ii];
+          size_t temp_i = ind[ii];
           ind[ii] = ind[jj];
           ind[jj] = temp_i;
-          temp_p = gparts[ii];
+          struct gpart temp_p = gparts[ii];
           gparts[ii] = gparts[jj];
           gparts[jj] = temp_p;
         }
       }
 
       /* Verify space_sort_struct. */
-      /* for ( int k = i ; k <= jj ; k++ )
-         if ( ind[k] > pivot ) {
-         message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
-         N=%i." , k , ind[k] , pivot , i , j , N );
-         error( "Partition failed (<=pivot)." );
-         }
-         for ( int k = jj+1 ; k <= j ; k++ )
-         if ( ind[k] <= pivot ) {
-         message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
-         N=%i." , k , ind[k] , pivot , i , j , N );
-         error( "Partition failed (>pivot)." );
-         } */
+      /* for (int k = i; k <= jj; k++)
+        if (ind[k] > pivot) {
+          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
+                  ind[k], pivot, i, j);
+          error("Partition failed (<=pivot).");
+        }
+      for (int k = jj + 1; k <= j; k++)
+        if (ind[k] <= pivot) {
+          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
+                  ind[k], pivot, i, j);
+          error("Partition failed (>pivot).");
+        } */
 
       /* Split-off largest interval. */
       if (jj - i > j - jj + 1) {
 
         /* Recurse on the left? */
         if (jj > i && pivot > min) {
-          qid = (last++) % qstack_size;
-          qstack[qid].i = i;
-          qstack[qid].j = jj;
-          qstack[qid].min = min;
-          qstack[qid].max = pivot;
-          qstack[qid].ready = 1;
-          if ((waiting++) >= qstack_size) error("Qstack overflow.");
+          qid = atomic_inc(&space_sort_struct.last) %
+                space_sort_struct.stack_size;
+          while (space_sort_struct.stack[qid].ready)
+            ;
+          space_sort_struct.stack[qid].i = i;
+          space_sort_struct.stack[qid].j = jj;
+          space_sort_struct.stack[qid].min = min;
+          space_sort_struct.stack[qid].max = pivot;
+          if (atomic_inc(&space_sort_struct.waiting) >=
+              space_sort_struct.stack_size)
+            error("Qstack overflow.");
+          space_sort_struct.stack[qid].ready = 1;
         }
 
         /* Recurse on the right? */
@@ -835,13 +871,18 @@ void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
 
         /* Recurse on the right? */
         if (pivot + 1 < max) {
-          qid = (last++) % qstack_size;
-          qstack[qid].i = jj + 1;
-          qstack[qid].j = j;
-          qstack[qid].min = pivot + 1;
-          qstack[qid].max = max;
-          qstack[qid].ready = 1;
-          if ((waiting++) >= qstack_size) error("Qstack overflow.");
+          qid = atomic_inc(&space_sort_struct.last) %
+                space_sort_struct.stack_size;
+          while (space_sort_struct.stack[qid].ready)
+            ;
+          space_sort_struct.stack[qid].i = jj + 1;
+          space_sort_struct.stack[qid].j = j;
+          space_sort_struct.stack[qid].min = pivot + 1;
+          space_sort_struct.stack[qid].max = max;
+          if (atomic_inc(&space_sort_struct.waiting) >=
+              space_sort_struct.stack_size)
+            error("Qstack overflow.");
+          space_sort_struct.stack[qid].ready = 1;
         }
 
         /* Recurse on the left? */
@@ -854,18 +895,9 @@ void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
 
     } /* loop over sub-intervals. */
 
-    waiting--;
+    atomic_dec(&space_sort_struct.waiting);
 
   } /* main loop. */
-
-  /* Verify space_sort_struct. */
-  /* for ( i = 1 ; i < N ; i++ )
-      if ( ind[i-1] > ind[i] )
-          error( "Sorting failed (ind[%i]=%i,ind[%i]=%i)." , i-1 , ind[i-1] , i
-     , ind[i] ); */
-
-  /* Clean up. */
-  free(qstack);
 }
 
 /**
diff --git a/src/space.h b/src/space.h
index 91485ff7e2ebe9da8ab927748589ae9f71320803..db9463e03084fa52dc94ae58aae31e668faee547 100644
--- a/src/space.h
+++ b/src/space.h
@@ -116,6 +116,7 @@ struct qstack {
 };
 struct parallel_sort {
   struct part *parts;
+  struct gpart *gparts;
   struct xpart *xparts;
   size_t *ind;
   struct qstack *stack;
@@ -127,8 +128,8 @@ extern struct parallel_sort space_sort_struct;
 /* function prototypes. */
 void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
                       int verbose);
-void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
-                       int max);
+void space_gparts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
+                       int verbose);
 struct cell *space_getcell(struct space *s);
 int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                  double *shift);
@@ -150,5 +151,6 @@ void space_recycle(struct space *s, struct cell *c);
 void space_split(struct space *s, struct cell *cells, int verbose);
 void space_do_split(struct space *s, struct cell *c);
 void space_do_parts_sort();
+void space_do_gparts_sort();
 void space_link_cleanup(struct space *s);
 #endif /* SWIFT_SPACE_H */
diff --git a/src/task.c b/src/task.c
index 6e9a715760c9a32ede0191bdb0595c39b995439d..91c202ad96b14bb9417f7b52f8e8d8b9c83496a8 100644
--- a/src/task.c
+++ b/src/task.c
@@ -43,10 +43,10 @@
 
 /* Task type names. */
 const char *taskID_names[task_type_count] = {
-    "none",    "sort",    "self",      "pair",  "sub",        "init",
-    "ghost",   "drift",   "kick",      "send",  "recv",       "grav_pp",
-    "grav_mm", "grav_up", "grav_down", "part_sort", "gpart_sort",
-    "split_cell", "rewait"};
+    "none",      "sort",       "self",       "pair",    "sub",
+    "init",      "ghost",      "drift",      "kick",    "send",
+    "recv",      "grav_pp",    "grav_mm",    "grav_up", "grav_down",
+    "part_sort", "gpart_sort", "split_cell", "rewait"};
 
 const char *subtaskID_names[task_type_count] = {"none",  "density",
                                                 "force", "grav"};
@@ -79,9 +79,10 @@ float task_overlap(const struct task *ta, const struct task *tb) {
   /* First check if any of the two tasks are of a type that don't
      use cells. */
   if (ta == NULL || tb == NULL || ta->type == task_type_none ||
-      ta->type == task_type_part_sort || ta->type == task_type_split_cell ||
-      ta->type == task_type_rewait || tb->type == task_type_none ||
-      tb->type == task_type_part_sort || tb->type == task_type_split_cell ||
+      ta->type == task_type_part_sort || ta->type == task_type_gpart_sort ||
+      ta->type == task_type_split_cell || ta->type == task_type_rewait ||
+      tb->type == task_type_none || tb->type == task_type_part_sort ||
+      tb->type == task_type_gpart_sort || tb->type == task_type_split_cell ||
       tb->type == task_type_rewait)
     return 0.0f;