diff --git a/src/debug.c b/src/debug.c
index 15354b7d419544a8456543b79c38235eb3b68b2c..d73bc86a92cf5ca28c202e7b567cf7c40ba6eccb 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -154,8 +154,9 @@ int checkSpacehmax(struct space *s) {
   /* Loop over local cells. */
   float cell_h_max = 0.0f;
   for (int k = 0; k < s->nr_cells; k++) {
-    if (s->cells[k].nodeID == s->e->nodeID && s->cells[k].h_max > cell_h_max) {
-      cell_h_max = s->cells[k].h_max;
+    if (s->cells_top[k].nodeID == s->e->nodeID &&
+        s->cells_top[k].h_max > cell_h_max) {
+      cell_h_max = s->cells_top[k].h_max;
     }
   }
 
@@ -172,9 +173,9 @@ int checkSpacehmax(struct space *s) {
 
   /* There is a problem. Hunt it down. */
   for (int k = 0; k < s->nr_cells; k++) {
-    if (s->cells[k].nodeID == s->e->nodeID) {
-      if (s->cells[k].h_max > part_h_max) {
-        message("cell %d is inconsistent (%f > %f)", k, s->cells[k].h_max,
+    if (s->cells_top[k].nodeID == s->e->nodeID) {
+      if (s->cells_top[k].h_max > part_h_max) {
+        message("cell %d is inconsistent (%f > %f)", k, s->cells_top[k].h_max,
                 part_h_max);
       }
     }
diff --git a/src/engine.c b/src/engine.c
index 8dde99b8eb6f6b4fe3364149635fc9aa7be89c70..64d08d64f952efc4997b62ed8b346d5a8552a259 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -256,7 +256,7 @@ void engine_redistribute(struct engine *e) {
   const int nr_nodes = e->nr_nodes;
   const int nodeID = e->nodeID;
   struct space *s = e->s;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
   const int *cdim = s->cdim;
   const double iwidth[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
@@ -535,12 +535,12 @@ void engine_redistribute(struct engine *e) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify that all parts are in the right place. */
-  for (int k = 0; k < nr_parts; k++) {
-    int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0],
-                         parts_new[k].x[1] * iwidth[1],
-                         parts_new[k].x[2] * iwidth[2]);
+  for (size_t k = 0; k < nr_parts; k++) {
+    const int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0],
+                               parts_new[k].x[1] * iwidth[1],
+                               parts_new[k].x[2] * iwidth[2]);
     if (cells[cid].nodeID != nodeID)
-      error("Received particle (%i) that does not belong here (nodeID=%i).", k,
+      error("Received particle (%zu) that does not belong here (nodeID=%i).", k,
             cells[cid].nodeID);
   }
 
@@ -561,7 +561,7 @@ void engine_redistribute(struct engine *e) {
   for (size_t k = 0; k < nr_parts; ++k) {
 
     if (parts_new[k].gpart != NULL &&
-        parts_new[k].gpart->id_or_neg_offset != -k) {
+        parts_new[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
       error("Linking problem !");
     }
   }
@@ -852,7 +852,7 @@ void engine_exchange_cells(struct engine *e) {
 #ifdef WITH_MPI
 
   struct space *s = e->s;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
   const int nr_proxies = e->nr_proxies;
   int offset[nr_cells];
@@ -1012,7 +1012,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
   /* Put the parts and gparts into the corresponding proxies. */
   for (size_t k = 0; k < *Npart; k++) {
     /* Get the target node and proxy ID. */
-    const int node_id = e->s->cells[ind_part[k]].nodeID;
+    const int node_id = e->s->cells_top[ind_part[k]].nodeID;
     if (node_id < 0 || node_id >= e->nr_nodes)
       error("Bad node ID %i.", node_id);
     const int pid = e->proxy_ind[node_id];
@@ -1036,7 +1036,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
                      &s->xparts[offset_parts + k], 1);
   }
   for (size_t k = 0; k < *Ngpart; k++) {
-    const int node_id = e->s->cells[ind_gpart[k]].nodeID;
+    const int node_id = e->s->cells_top[ind_gpart[k]].nodeID;
     if (node_id < 0 || node_id >= e->nr_nodes)
       error("Bad node ID %i.", node_id);
     const int pid = e->proxy_ind[node_id];
@@ -1241,7 +1241,7 @@ void engine_make_gravity_tasks(struct engine *e) {
   struct space *s = e->s;
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
 
   for (int cid = 0; cid < nr_cells; ++cid) {
@@ -1296,7 +1296,7 @@ void engine_make_hydroloop_tasks(struct engine *e) {
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
   const int *cdim = s->cdim;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
 
   /* Run through the highest level of cells and add pairs. */
   for (int i = 0; i < cdim[0]; i++) {
@@ -1802,7 +1802,7 @@ void engine_make_gravityrecursive_tasks(struct engine *e) {
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
   const int nr_cells = s->nr_cells;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
 
   for (int k = 0; k < nr_cells; k++) {
 
@@ -1835,7 +1835,7 @@ void engine_maketasks(struct engine *e) {
 
   struct space *s = e->s;
   struct scheduler *sched = &e->sched;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
   const ticks tic = getticks();
 
@@ -2278,7 +2278,7 @@ void engine_prepare(struct engine *e) {
 
     /* First drift all particles to the current time */
     e->drift_all = 1;
-    threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells,
+    threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top,
                    e->s->nr_cells, sizeof(struct cell), 1, e);
 
     /* Restore the default drifting policy */
@@ -2396,8 +2396,8 @@ void engine_collect_timestep(struct engine *e) {
 
   /* Collect the cell data. */
   for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells[k].nodeID == e->nodeID) {
-      struct cell *c = &s->cells[k];
+    if (s->cells_top[k].nodeID == e->nodeID) {
+      struct cell *c = &s->cells_top[k];
 
       /* Make the top-cells recurse */
       engine_collect_kick(c);
@@ -2450,8 +2450,8 @@ void engine_print_stats(struct engine *e) {
 
   /* Collect the cell data. */
   for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells[k].nodeID == e->nodeID) {
-      struct cell *c = &s->cells[k];
+    if (s->cells_top[k].nodeID == e->nodeID) {
+      struct cell *c = &s->cells_top[k];
       mass += c->mass;
       e_kin += c->e_kin;
       e_int += c->e_int;
@@ -2666,7 +2666,7 @@ void engine_step(struct engine *e) {
 
     /* Drift everybody to the snapshot position */
     e->drift_all = 1;
-    threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells,
+    threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top,
                    e->s->nr_cells, sizeof(struct cell), 1, e);
 
     /* Restore the default drifting policy */
@@ -2706,7 +2706,7 @@ void engine_step(struct engine *e) {
   }
 
   /* Drift only the necessary particles */
-  threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells,
+  threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top,
                  e->s->nr_cells, sizeof(struct cell), 1, e);
 
   /* Re-distribute the particles amongst the nodes? */
@@ -2805,7 +2805,7 @@ void engine_makeproxies(struct engine *e) {
 #ifdef WITH_MPI
   const int *cdim = e->s->cdim;
   const struct space *s = e->s;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   struct proxy *proxies = e->proxies;
   ticks tic = getticks();
 
@@ -2972,7 +2972,8 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   }
   for (size_t k = 0; k < s->nr_parts; ++k) {
 
-    if (s->parts[k].gpart != NULL && s->parts[k].gpart->id_or_neg_offset != -k)
+    if (s->parts[k].gpart != NULL &&
+        s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k)
       error("Linking problem !");
   }
 
diff --git a/src/partition.c b/src/partition.c
index e216e12a5a23457b39b53070de3d84a2f257b927..8d17bedf0aaeadc64044b12ffe1bb8887b02d83e 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -143,7 +143,7 @@ static void split_vector(struct space *s, int nregions, int *samplecells) {
             select = l;
           }
         }
-        s->cells[n++].nodeID = select;
+        s->cells_top[n++].nodeID = select;
       }
     }
   }
@@ -274,7 +274,7 @@ static void accumulate_counts(struct space *s, int *counts) {
  */
 static void split_metis(struct space *s, int nregions, int *celllist) {
 
-  for (int i = 0; i < s->nr_cells; i++) s->cells[i].nodeID = celllist[i];
+  for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i];
 }
 #endif
 
@@ -419,7 +419,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
   /* Create weight arrays using task ticks for vertices and edges (edges
    * assume the same graph structure as used in the part_ calls). */
   int nr_cells = s->nr_cells;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0;
   int wtot = 0;
   int wmax = 1e9 / nr_nodes;
@@ -795,7 +795,7 @@ void partition_initial_partition(struct partition *initial_partition,
     /* Run through the cells and set their nodeID. */
     // message("s->dim = [%e,%e,%e]", s->dim[0], s->dim[1], s->dim[2]);
     for (k = 0; k < s->nr_cells; k++) {
-      c = &s->cells[k];
+      c = &s->cells_top[k];
       for (j = 0; j < 3; j++)
         ind[j] = c->loc[j] / s->dim[j] * initial_partition->grid[j];
       c->nodeID = ind[0] +
@@ -1037,10 +1037,10 @@ static int check_complete(struct space *s, int verbose, int nregions) {
   int failed = 0;
   for (int i = 0; i < nregions; i++) present[i] = 0;
   for (int i = 0; i < s->nr_cells; i++) {
-    if (s->cells[i].nodeID <= nregions)
-      present[s->cells[i].nodeID]++;
+    if (s->cells_top[i].nodeID <= nregions)
+      present[s->cells_top[i].nodeID]++;
     else
-      message("Bad nodeID: %d", s->cells[i].nodeID);
+      message("Bad nodeID: %d", s->cells_top[i].nodeID);
   }
   for (int i = 0; i < nregions; i++) {
     if (!present[i]) {
@@ -1085,13 +1085,13 @@ int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeIDs,
       for (int k = 0; k < s->cdim[2]; k++) {
 
         /* Scale indices to old cell space. */
-        int ii = rint(i * s->iwidth[0] * oldh[0]);
-        int jj = rint(j * s->iwidth[1] * oldh[1]);
-        int kk = rint(k * s->iwidth[2] * oldh[2]);
+        const int ii = rint(i * s->iwidth[0] * oldh[0]);
+        const int jj = rint(j * s->iwidth[1] * oldh[1]);
+        const int kk = rint(k * s->iwidth[2] * oldh[2]);
 
-        int cid = cell_getid(s->cdim, i, j, k);
-        int oldcid = cell_getid(oldcdim, ii, jj, kk);
-        s->cells[cid].nodeID = oldnodeIDs[oldcid];
+        const int cid = cell_getid(s->cdim, i, j, k);
+        const int oldcid = cell_getid(oldcdim, ii, jj, kk);
+        s->cells_top[cid].nodeID = oldnodeIDs[oldcid];
 
         if (oldnodeIDs[oldcid] > nr_nodes) nr_nodes = oldnodeIDs[oldcid];
       }
diff --git a/src/runner.c b/src/runner.c
index deb52cfa133cc2fa1bc0c816cd4787810fa17a35..3cc42f59c21eea03802d8797081c8a0f6866ef58 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -57,6 +57,18 @@
 #include "timers.h"
 #include "timestep.h"
 
+/**
+ * @brief  Entry in a list of sorted indices.
+ */
+struct entry {
+
+  /*! Distance on the axis */
+  float d;
+
+  /*! Particle index */
+  int i;
+};
+
 /* Orientation of the cell pairs */
 const double runner_shift[13][3] = {
     {5.773502691896258e-01, 5.773502691896258e-01, 5.773502691896258e-01},
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index a220ad1794d23999ff16752e797a499071fa2e65..0fcd2d2e80a72b92588acd5b8275b9dafc68df45 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -488,7 +488,7 @@ static void runner_do_grav_mm(struct runner *r, struct cell *ci, int timer) {
 
   /* Recover the list of top-level cells */
   const struct engine *e = r->e;
-  struct cell *cells = e->s->cells;
+  struct cell *cells = e->s->cells_top;
   const int nr_cells = e->s->nr_cells;
   const int ti_current = e->ti_current;
   const double max_d =
diff --git a/src/space.c b/src/space.c
index cccd40ddd7ba5e1de75b1b609d77a919e0d07835..b308420f4b8a63e1a5d08e73441b121b2d416e1e 100644
--- a/src/space.c
+++ b/src/space.c
@@ -88,6 +88,28 @@ const int sortlistID[27] = {
     /* (  1 ,  1 ,  0 ) */ 1,
     /* (  1 ,  1 ,  1 ) */ 0};
 
+/**
+ * @brief Interval stack necessary for parallel particle sorting.
+ */
+struct qstack {
+  volatile ptrdiff_t i, j;
+  volatile int min, max;
+  volatile int ready;
+};
+
+/**
+ * @brief Parallel particle-sorting stack
+ */
+struct parallel_sort {
+  struct part *parts;
+  struct gpart *gparts;
+  struct xpart *xparts;
+  int *ind;
+  struct qstack *stack;
+  unsigned int stack_size;
+  volatile unsigned int first, last, waiting;
+};
+
 /**
  * @brief Get the shift-id of the given pair of cells, swapping them
  *      if need be.
@@ -99,7 +121,6 @@ const int sortlistID[27] = {
  *
  * @return The shift ID and set shift, may or may not swap ci and cj.
  */
-
 int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                  double *shift) {
 
@@ -138,8 +159,9 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
 /**
  * @brief Recursively dismantle a cell tree.
  *
+ * @param s The #space.
+ * @param c The #cell to recycle.
  */
-
 void space_rebuild_recycle(struct space *s, struct cell *c) {
 
   if (c->split)
@@ -152,28 +174,27 @@ void space_rebuild_recycle(struct space *s, struct cell *c) {
 }
 
 /**
- * @brief Re-build the cell grid.
+ * @brief Re-build the top-level cell grid.
  *
  * @param s The #space.
  * @param cell_max Maximum cell edge length.
  * @param verbose Print messages to stdout or not.
  */
-
 void space_regrid(struct space *s, double cell_max, int verbose) {
 
   const size_t nr_parts = s->nr_parts;
-  struct cell *restrict c;
-  ticks tic = getticks();
+  const ticks tic = getticks();
   const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
 
   /* Run through the cells and get the current h_max. */
   // tic = getticks();
   float h_max = s->cell_min / kernel_gamma / space_stretch;
   if (nr_parts > 0) {
-    if (s->cells != NULL) {
+    if (s->cells_top != NULL) {
       for (int k = 0; k < s->nr_cells; k++) {
-        if (s->cells[k].nodeID == engine_rank && s->cells[k].h_max > h_max) {
-          h_max = s->cells[k].h_max;
+        if (s->cells_top[k].nodeID == engine_rank &&
+            s->cells_top[k].h_max > h_max) {
+          h_max = s->cells_top[k].h_max;
         }
       }
     } else {
@@ -197,10 +218,10 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
   if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max);
 
   /* Get the new putative cell dimensions. */
-  int cdim[3];
-  for (int k = 0; k < 3; k++)
-    cdim[k] =
-        floor(s->dim[k] / fmax(h_max * kernel_gamma * space_stretch, cell_max));
+  const int cdim[3] = {
+      floor(s->dim[0] / fmax(h_max * kernel_gamma * space_stretch, cell_max)),
+      floor(s->dim[1] / fmax(h_max * kernel_gamma * space_stretch, cell_max)),
+      floor(s->dim[2] / fmax(h_max * kernel_gamma * space_stretch, cell_max))};
 
   /* Check if we have enough cells for periodicity. */
   if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
@@ -239,7 +260,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       for (int j = 0; j < s->cdim[1]; j++) {
         for (int k = 0; k < s->cdim[2]; k++) {
           cid = cell_getid(oldcdim, i, j, k);
-          oldnodeIDs[cid] = s->cells[cid].nodeID;
+          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
         }
       }
     }
@@ -249,16 +270,16 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
 
   /* Do we need to re-build the upper-level cells? */
   // tic = getticks();
-  if (s->cells == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
+  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
       cdim[2] < s->cdim[2]) {
 
     /* Free the old cells, if they were allocated. */
-    if (s->cells != NULL) {
+    if (s->cells_top != NULL) {
       for (int k = 0; k < s->nr_cells; k++) {
-        space_rebuild_recycle(s, &s->cells[k]);
-        if (s->cells[k].sort != NULL) free(s->cells[k].sort);
+        space_rebuild_recycle(s, &s->cells_top[k]);
+        if (s->cells_top[k].sort != NULL) free(s->cells_top[k].sort);
       }
-      free(s->cells);
+      free(s->cells_top);
       s->maxdepth = 0;
     }
 
@@ -272,18 +293,19 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
 
     /* Allocate the highest level of cells. */
     s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
-    if (posix_memalign((void *)&s->cells, cell_align,
+    if (posix_memalign((void *)&s->cells_top, cell_align,
                        s->nr_cells * sizeof(struct cell)) != 0)
       error("Failed to allocate cells.");
-    bzero(s->cells, s->nr_cells * sizeof(struct cell));
+    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
     for (int k = 0; k < s->nr_cells; k++)
-      if (lock_init(&s->cells[k].lock) != 0) error("Failed to init spinlock.");
+      if (lock_init(&s->cells_top[k].lock) != 0)
+        error("Failed to init spinlock.");
 
     /* Set the cell location and sizes. */
     for (int i = 0; i < cdim[0]; i++)
       for (int j = 0; j < cdim[1]; j++)
         for (int k = 0; k < cdim[2]; k++) {
-          c = &s->cells[cell_getid(cdim, i, j, k)];
+          struct cell *restrict c = &s->cells_top[cell_getid(cdim, i, j, k)];
           c->loc[0] = i * s->width[0];
           c->loc[1] = j * s->width[1];
           c->loc[2] = k * s->width[2];
@@ -340,34 +362,35 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       free(oldnodeIDs);
     }
 #endif
+
+    // message( "rebuilding upper-level cells took %.3f %s." ,
+    // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());
+
   } /* re-build upper-level cells? */
-  // message( "rebuilding upper-level cells took %.3f %s." ,
-  // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());
 
-  /* Otherwise, just clean up the cells. */
-  else {
+  else { /* Otherwise, just clean up the cells. */
 
     /* Free the old cells, if they were allocated. */
     for (int k = 0; k < s->nr_cells; k++) {
-      space_rebuild_recycle(s, &s->cells[k]);
-      s->cells[k].sorts = NULL;
-      s->cells[k].nr_tasks = 0;
-      s->cells[k].nr_density = 0;
-      s->cells[k].nr_gradient = 0;
-      s->cells[k].nr_force = 0;
-      s->cells[k].density = NULL;
-      s->cells[k].gradient = NULL;
-      s->cells[k].force = NULL;
-      s->cells[k].dx_max = 0.0f;
-      s->cells[k].sorted = 0;
-      s->cells[k].count = 0;
-      s->cells[k].gcount = 0;
-      s->cells[k].init = NULL;
-      s->cells[k].extra_ghost = NULL;
-      s->cells[k].ghost = NULL;
-      s->cells[k].kick = NULL;
-      s->cells[k].super = &s->cells[k];
-      s->cells[k].gsuper = &s->cells[k];
+      space_rebuild_recycle(s, &s->cells_top[k]);
+      s->cells_top[k].sorts = NULL;
+      s->cells_top[k].nr_tasks = 0;
+      s->cells_top[k].nr_density = 0;
+      s->cells_top[k].nr_gradient = 0;
+      s->cells_top[k].nr_force = 0;
+      s->cells_top[k].density = NULL;
+      s->cells_top[k].gradient = NULL;
+      s->cells_top[k].force = NULL;
+      s->cells_top[k].dx_max = 0.0f;
+      s->cells_top[k].sorted = 0;
+      s->cells_top[k].count = 0;
+      s->cells_top[k].gcount = 0;
+      s->cells_top[k].init = NULL;
+      s->cells_top[k].extra_ghost = NULL;
+      s->cells_top[k].ghost = NULL;
+      s->cells_top[k].kick = NULL;
+      s->cells_top[k].super = &s->cells_top[k];
+      s->cells_top[k].gsuper = &s->cells_top[k];
     }
     s->maxdepth = 0;
   }
@@ -385,7 +408,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
  * @param verbose Print messages to stdout or not
  *
  */
-
 void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   const ticks tic = getticks();
@@ -398,7 +420,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   size_t nr_parts = s->nr_parts;
   size_t nr_gparts = s->nr_gparts;
-  struct cell *restrict cells = s->cells;
+  struct cell *restrict cells_top = s->cells_top;
   const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
 
   const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
@@ -420,7 +442,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
         p->x[j] -= dim[j];
     ind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[ind[k]].count++;
+    cells_top[ind[k]].count++;
   }
   // message( "getting particle indices took %.3f %s." ,
   // clocks_from_ticks(getticks() - tic), clocks_getunit()):
@@ -440,9 +462,9 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
         gp->x[j] -= dim[j];
     gind[k] =
         cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
-    cells[gind[k]].gcount++;
+    cells_top[gind[k]].gcount++;
   }
-// message( "getting particle indices took %.3f %s." ,
+// message( "getting g-particle indices took %.3f %s." ,
 // clocks_from_ticks(getticks() - tic), clocks_getunit());
 
 #ifdef WITH_MPI
@@ -450,8 +472,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   /* Move non-local parts to the end of the list. */
   const int local_nodeID = s->e->nodeID;
   for (size_t k = 0; k < nr_parts;) {
-    if (cells[ind[k]].nodeID != local_nodeID) {
-      cells[ind[k]].count -= 1;
+    if (cells_top[ind[k]].nodeID != local_nodeID) {
+      cells_top[ind[k]].count -= 1;
       nr_parts -= 1;
       const struct part tp = s->parts[k];
       s->parts[k] = s->parts[nr_parts];
@@ -477,12 +499,12 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that all parts are in the correct places. */
   for (size_t k = 0; k < nr_parts; k++) {
-    if (cells[ind[k]].nodeID != local_nodeID) {
+    if (cells_top[ind[k]].nodeID != local_nodeID) {
       error("Failed to move all non-local parts to send list");
     }
   }
   for (size_t k = nr_parts; k < s->nr_parts; k++) {
-    if (cells[ind[k]].nodeID == local_nodeID) {
+    if (cells_top[ind[k]].nodeID == local_nodeID) {
       error("Failed to remove local parts from send list");
     }
   }
@@ -490,8 +512,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   /* Move non-local gparts to the end of the list. */
   for (size_t k = 0; k < nr_gparts;) {
-    if (cells[gind[k]].nodeID != local_nodeID) {
-      cells[gind[k]].gcount -= 1;
+    if (cells_top[gind[k]].nodeID != local_nodeID) {
+      cells_top[gind[k]].gcount -= 1;
       nr_gparts -= 1;
       const struct gpart tp = s->gparts[k];
       s->gparts[k] = s->gparts[nr_gparts];
@@ -515,12 +537,12 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that all gparts are in the correct place (untested). */
   for (size_t k = 0; k < nr_gparts; k++) {
-    if (cells[gind[k]].nodeID != local_nodeID) {
+    if (cells_top[gind[k]].nodeID != local_nodeID) {
       error("Failed to move all non-local gparts to send list");
     }
   }
   for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
-    if (cells[gind[k]].nodeID == local_nodeID) {
+    if (cells_top[gind[k]].nodeID == local_nodeID) {
       error("Failed to remove local gparts from send list");
     }
   }
@@ -552,11 +574,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
     const struct part *const p = &s->parts[k];
     ind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[ind[k]].count += 1;
+    cells_top[ind[k]].count += 1;
 #ifdef SWIFT_DEBUG_CHECKS
-    if (cells[ind[k]].nodeID != local_nodeID)
+    if (cells_top[ind[k]].nodeID != local_nodeID)
       error("Received part that does not belong to me (nodeID=%i).",
-            cells[ind[k]].nodeID);
+            cells_top[ind[k]].nodeID);
 #endif
   }
   nr_parts = s->nr_parts;
@@ -602,16 +624,19 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
     const struct gpart *const p = &s->gparts[k];
     gind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[gind[k]].gcount += 1;
-    /* if ( cells[ ind[k] ].nodeID != nodeID )
-        error( "Received part that does not belong to me (nodeID=%i)." , cells[
-       ind[k] ].nodeID ); */
+    cells_top[gind[k]].gcount += 1;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (cells_top[ind[k]].nodeID != s->e->nodeID)
+      error("Received part that does not belong to me (nodeID=%i).",
+            cells_top[ind[k]].nodeID);
+#endif
   }
   nr_gparts = s->nr_gparts;
 
 #endif
 
-  /* Sort the parts according to their cells. */
+  /* Sort the gparts according to their cells. */
   space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose);
 
   /* Re-link the parts. */
@@ -638,7 +663,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   for (size_t k = 0; k < nr_parts; ++k) {
 
     if (s->parts[k].gpart != NULL &&
-        s->parts[k].gpart->id_or_neg_offset != -k) {
+        s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
       error("Linking problem !");
     }
   }
@@ -650,7 +675,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   struct xpart *xfinger = s->xparts;
   struct gpart *gfinger = s->gparts;
   for (int k = 0; k < s->nr_cells; k++) {
-    struct cell *restrict c = &cells[k];
+    struct cell *restrict c = &cells_top[k];
     c->ti_old = ti_current;
     c->parts = finger;
     c->xparts = xfinger;
@@ -664,7 +689,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   /* At this point, we have the upper-level cells, old or new. Now make
      sure that the parts in each cell are ok. */
-  space_split(s, cells, verbose);
+  space_split(s, cells_top, verbose);
 
   if (verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
@@ -674,6 +699,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 /**
  * @brief Split particles between cells of a hierarchy
  *
+ * This is done in parallel using threads in the #threadpool.
+ *
  * @param s The #space.
  * @param cells The cell hierarchy
  * @param verbose Are we talkative ?
@@ -692,7 +719,7 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
 
 /**
  * @brief Sort the particles and condensed particles according to the given
- *indices.
+ * indices.
  *
  * @param s The #space.
  * @param ind The indices with respect to which the parts are sorted.
@@ -701,7 +728,6 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
  * @param max highest index.
  * @param verbose Are we talkative ?
  */
-
 void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
                       int verbose) {
 
@@ -736,9 +762,9 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify space_sort_struct. */
-  for (int i = 1; i < N; i++)
+  for (size_t i = 1; i < N; i++)
     if (ind[i - 1] > ind[i])
-      error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1,
+      error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
             ind[i - 1], i, ind[i], min, max);
   message("Sorting succeeded.");
 #endif
@@ -878,8 +904,7 @@ void space_parts_sort_mapper(void *map_data, int num_elements,
 }
 
 /**
- * @brief Sort the g-particles and condensed particles according to the given
- *indices.
+ * @brief Sort the g-particles according to the given indices.
  *
  * @param s The #space.
  * @param ind The indices with respect to which the gparts are sorted.
@@ -921,9 +946,9 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify space_sort_struct. */
-  for (int i = 1; i < N; i++)
+  for (size_t i = 1; i < N; i++)
     if (ind[i - 1] > ind[i])
-      error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1,
+      error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
             ind[i - 1], i, ind[i], min, max);
   message("Sorting succeeded.");
 #endif
@@ -1061,7 +1086,6 @@ void space_gparts_sort_mapper(void *map_data, int num_elements,
 /**
  * @brief Mapping function to free the sorted indices buffers.
  */
-
 void space_map_clearsort(struct cell *c, void *data) {
 
   if (c->sort != NULL) {
@@ -1077,21 +1101,17 @@ void space_map_clearsort(struct cell *c, void *data) {
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 static void rec_map_parts(struct cell *c,
                           void (*fun)(struct part *p, struct cell *c,
                                       void *data),
                           void *data) {
-
-  int k;
-
   /* No progeny? */
   if (!c->split)
-    for (k = 0; k < c->count; k++) fun(&c->parts[k], c, data);
+    for (int k = 0; k < c->count; k++) fun(&c->parts[k], c, data);
 
   /* Otherwise, recurse. */
   else
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) rec_map_parts(c->progeny[k], fun, data);
 }
 
@@ -1102,16 +1122,13 @@ static void rec_map_parts(struct cell *c,
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 void space_map_parts(struct space *s,
                      void (*fun)(struct part *p, struct cell *c, void *data),
                      void *data) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_parts(&s->cells[cid], fun, data);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_parts(&s->cells_top[cid], fun, data);
 }
 
 /**
@@ -1120,20 +1137,17 @@ void space_map_parts(struct space *s,
  * @param c The #cell we are working in.
  * @param fun Function pointer to apply on the cells.
  */
-
 static void rec_map_parts_xparts(struct cell *c,
                                  void (*fun)(struct part *p, struct xpart *xp,
                                              struct cell *c)) {
 
-  int k;
-
   /* No progeny? */
   if (!c->split)
-    for (k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c);
+    for (int k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c);
 
   /* Otherwise, recurse. */
   else
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) rec_map_parts_xparts(c->progeny[k], fun);
 }
 
@@ -1143,16 +1157,13 @@ static void rec_map_parts_xparts(struct cell *c,
  * @param s The #space we are working in.
  * @param fun Function pointer to apply on the particles in the cells.
  */
-
 void space_map_parts_xparts(struct space *s,
                             void (*fun)(struct part *p, struct xpart *xp,
                                         struct cell *c)) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_parts_xparts(&s->cells[cid], fun);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_parts_xparts(&s->cells_top[cid], fun);
 }
 
 /**
@@ -1163,16 +1174,12 @@ void space_map_parts_xparts(struct space *s,
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 static void rec_map_cells_post(struct cell *c, int full,
                                void (*fun)(struct cell *c, void *data),
                                void *data) {
-
-  int k;
-
   /* Recurse. */
   if (c->split)
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
         rec_map_cells_post(c->progeny[k], full, fun, data);
 
@@ -1188,29 +1195,24 @@ static void rec_map_cells_post(struct cell *c, int full,
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 void space_map_cells_post(struct space *s, int full,
                           void (*fun)(struct cell *c, void *data), void *data) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_cells_post(&s->cells[cid], full, fun, data);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_cells_post(&s->cells_top[cid], full, fun, data);
 }
 
 static void rec_map_cells_pre(struct cell *c, int full,
                               void (*fun)(struct cell *c, void *data),
                               void *data) {
 
-  int k;
-
   /* No progeny? */
   if (full || !c->split) fun(c, data);
 
   /* Recurse. */
   if (c->split)
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
         rec_map_cells_pre(c->progeny[k], full, fun, data);
 }
@@ -1226,28 +1228,29 @@ static void rec_map_cells_pre(struct cell *c, int full,
 void space_map_cells_pre(struct space *s, int full,
                          void (*fun)(struct cell *c, void *data), void *data) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_cells_pre(&s->cells[cid], full, fun, data);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_cells_pre(&s->cells_top[cid], full, fun, data);
 }
 
 /**
  * @brief #threadpool mapper function to split cells if they contain
  *        too many particles.
+ *
+ * @param map_data Pointer towards the top-cells.
+ * @param num_elements The number of cells to treat.
+ * @param extra_data Pointers to the #space.
  */
-
 void space_split_mapper(void *map_data, int num_elements, void *extra_data) {
 
   /* Unpack the inputs. */
   struct space *s = (struct space *)extra_data;
-  struct cell *cells = (struct cell *)map_data;
+  struct cell *restrict cells_top = (struct cell *)map_data;
   struct engine *e = s->e;
 
   for (int ind = 0; ind < num_elements; ind++) {
 
-    struct cell *c = &cells[ind];
+    struct cell *c = &cells_top[ind];
 
     const int count = c->count;
     const int gcount = c->gcount;
@@ -1367,12 +1370,11 @@ void space_split_mapper(void *map_data, int num_elements, void *extra_data) {
 }
 
 /**
- * @brief Return a used cell to the cell buffer.
+ * @brief Return a used cell to the buffer od unused sub-cells.
  *
  * @param s The #space.
  * @param c The #cell.
  */
-
 void space_recycle(struct space *s, struct cell *c) {
 
   /* Lock the space. */
@@ -1388,8 +1390,8 @@ void space_recycle(struct space *s, struct cell *c) {
   bzero(c, sizeof(struct cell));
 
   /* Hook this cell into the buffer. */
-  c->next = s->cells_new;
-  s->cells_new = c;
+  c->next = s->cells_sub;
+  s->cells_sub = c;
   s->tot_cells -= 1;
 
   /* Unlock the space. */
@@ -1397,39 +1399,42 @@ void space_recycle(struct space *s, struct cell *c) {
 }
 
 /**
- * @brief Get a new empty cell.
+ * @brief Get a new empty (sub-)#cell.
+ *
+ * If there are cells in the buffer, use the one at the end of the linked list.
+ * If we have no cells, allocate a new chunk of memory and pick one from there.
  *
  * @param s The #space.
  */
-
 struct cell *space_getcell(struct space *s) {
 
-  struct cell *c;
-  int k;
-
   /* Lock the space. */
   lock_lock(&s->lock);
 
   /* Is the buffer empty? */
-  if (s->cells_new == NULL) {
-    if (posix_memalign((void *)&s->cells_new, cell_align,
+  if (s->cells_sub == NULL) {
+    if (posix_memalign((void *)&s->cells_sub, cell_align,
                        space_cellallocchunk * sizeof(struct cell)) != 0)
       error("Failed to allocate more cells.");
-    bzero(s->cells_new, space_cellallocchunk * sizeof(struct cell));
-    for (k = 0; k < space_cellallocchunk - 1; k++)
-      s->cells_new[k].next = &s->cells_new[k + 1];
-    s->cells_new[space_cellallocchunk - 1].next = NULL;
+
+    /* Zero everything for good measure */
+    bzero(s->cells_sub, space_cellallocchunk * sizeof(struct cell));
+
+    /* Constructed a linked list */
+    for (int k = 0; k < space_cellallocchunk - 1; k++)
+      s->cells_sub[k].next = &s->cells_sub[k + 1];
+    s->cells_sub[space_cellallocchunk - 1].next = NULL;
   }
 
   /* Pick off the next cell. */
-  c = s->cells_new;
-  s->cells_new = c->next;
+  struct cell *c = s->cells_sub;
+  s->cells_sub = c->next;
   s->tot_cells += 1;
 
   /* Unlock the space. */
   lock_unlock_blind(&s->lock);
 
-  /* Init some things in the cell. */
+  /* Init some things in the cell we just got. */
   bzero(c, sizeof(struct cell));
   c->nodeID = -1;
   if (lock_init(&c->lock) != 0 || lock_init(&c->glock) != 0)
@@ -1511,7 +1516,6 @@ void space_init_gparts(struct space *s) {
  * parts with a cutoff below half the cell width are then split
  * recursively.
  */
-
 void space_init(struct space *s, const struct swift_params *params,
                 double dim[3], struct part *parts, struct gpart *gparts,
                 size_t Npart, size_t Ngpart, int periodic, int gravity,
@@ -1534,7 +1538,6 @@ void space_init(struct space *s, const struct swift_params *params,
   s->gparts = gparts;
   s->cell_min = parser_get_param_double(params, "SPH:max_smoothing_length");
   s->nr_queues = 1; /* Temporary value until engine construction */
-  s->size_parts_foreign = 0;
 
   /* Get the constants for the scheduler */
   space_maxsize = parser_get_opt_param_int(params, "Scheduler:cell_max_size",
@@ -1651,8 +1654,8 @@ void space_link_cleanup(struct space *s) {
  */
 void space_clean(struct space *s) {
 
-  for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells[i]);
-  free(s->cells);
+  for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells_top[i]);
+  free(s->cells_top);
   free(s->parts);
   free(s->xparts);
   free(s->gparts);
diff --git a/src/space.h b/src/space.h
index 90313be8dbe817d65fbd0e6a8c30c156747594b1..66d82c6f78a08447851a8dfdaea1231e5778693b 100644
--- a/src/space.h
+++ b/src/space.h
@@ -37,7 +37,6 @@
 #include "space.h"
 
 /* Some constants. */
-#define space_maxdepth 10
 #define space_cellallocchunk 1000
 #define space_splitsize_default 400
 #define space_maxsize_default 8000000
@@ -53,84 +52,85 @@ extern int space_subsize;
 /* Map shift vector to sortlist. */
 extern const int sortlistID[27];
 
-/* Entry in a list of sorted indices. */
-struct entry {
-  float d;
-  int i;
-};
-
-/* The space in which the cells reside. */
+/**
+ * @brief The space in which the cells and particles reside.
+ */
 struct space {
 
-  /* Spatial extent. */
+  /*! Spatial extent. */
   double dim[3];
 
-  /* Cell widths. */
-  double width[3], iwidth[3];
+  /*! Is the space periodic? */
+  int periodic;
+
+  /*! Are we doing gravity? */
+  int gravity;
+
+  /*! Width of the top-level cells. */
+  double width[3];
+
+  /*! Inverse of the top-level cell width */
+  double iwidth[3];
 
-  /* The minimum cell width. */
+  /*! The minimum top-level cell width allowed. */
   double cell_min;
 
-  /* Current maximum displacement for particles. */
+  /*! Current maximum displacement for particles. */
   float dx_max;
 
-  /* Number of cells. */
-  int nr_cells, tot_cells;
+  /*! Space dimensions in number of top-cells. */
+  int cdim[3];
 
-  /* Space dimensions in number of cells. */
-  int maxdepth, cdim[3];
+  /*! Maximal depth reached by the tree */
+  int maxdepth;
 
-  /* The (level 0) cells themselves. */
-  struct cell *cells;
+  /*! Number of top-level cells. */
+  int nr_cells;
 
-  /* Buffer of unused cells. */
-  struct cell *cells_new;
+  /*! Total number of cells (top- and sub-) */
+  int tot_cells;
 
-  /* The particle data (cells have pointers to this). */
-  struct part *parts;
-  struct xpart *xparts;
-  struct gpart *gparts;
+  /*! The (level 0) cells themselves. */
+  struct cell *cells_top;
 
-  /* The total number of parts in the space. */
+  /*! Buffer of unused cells for the sub-cells. */
+  struct cell *cells_sub;
+
+  /*! The total number of parts in the space. */
   size_t nr_parts, size_parts;
+
+  /*! The total number of g-parts in the space. */
   size_t nr_gparts, size_gparts;
 
-  /* Is the space periodic? */
-  int periodic;
+  /*! The particle data (cells have pointers to this). */
+  struct part *parts;
 
-  /* Are we doing gravity? */
-  int gravity;
+  /*! The extended particle data (cells have pointers to this). */
+  struct xpart *xparts;
 
-  /* General-purpose lock for this space. */
+  /*! The g-particle data (cells have pointers to this). */
+  struct gpart *gparts;
+
+  /*! General-purpose lock for this space. */
   swift_lock_type lock;
 
-  /* Number of queues in the system. */
+  /*! Number of queues in the system. */
   int nr_queues;
 
-  /* The associated engine. */
+  /*! The associated engine. */
   struct engine *e;
 
-  /* Buffers for parts that we will receive from foreign cells. */
+#ifdef WITH_MPI
+
+  /*! Buffers for parts that we will receive from foreign cells. */
   struct part *parts_foreign;
   size_t nr_parts_foreign, size_parts_foreign;
+
+  /*! Buffers for g-parts that we will receive from foreign cells. */
   struct gpart *gparts_foreign;
   size_t nr_gparts_foreign, size_gparts_foreign;
-};
 
-/* Interval stack necessary for parallel particle sorting. */
-struct qstack {
-  volatile ptrdiff_t i, j;
-  volatile int min, max;
-  volatile int ready;
-};
-struct parallel_sort {
-  struct part *parts;
-  struct gpart *gparts;
-  struct xpart *xparts;
-  int *ind;
-  struct qstack *stack;
-  unsigned int stack_size;
-  volatile unsigned int first, last, waiting;
+#endif
 };
 
 /* function prototypes. */