diff --git a/examples/plot_scaling_results.py b/examples/plot_scaling_results.py
index cf9c96d41fe8f0897acb6be1f9af2269abee6174..5a76e9870bd3ec55807c7b79c475c62b14119e5c 100755
--- a/examples/plot_scaling_results.py
+++ b/examples/plot_scaling_results.py
@@ -48,7 +48,7 @@ threadList = []
 hexcols = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77',
            '#CC6677', '#882255', '#AA4499', '#661100', '#6699CC', '#AA4466',
            '#4477AA']
-linestyle = (hexcols[0],hexcols[1],hexcols[3],hexcols[5],hexcols[6])
+linestyle = (hexcols[0],hexcols[1],hexcols[3],hexcols[5],hexcols[6],hexcols[8])
 #cmdLine = './swift_fixdt -s -t 16 cosmoVolume.yml'
 #platform = 'KNL'
 
@@ -68,6 +68,9 @@ elif len(sys.argv) == 5:
 elif len(sys.argv) == 6:
   inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5])
   numOfSeries = 5
+elif len(sys.argv) == 7:
+  inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6])
+  numOfSeries = 6
 
 # Get the names of the branch, Git revision, hydro scheme and hydro kernel
 def parse_header(inputFile):
@@ -76,7 +79,8 @@ def parse_header(inputFile):
     for line in f:
       if 'Branch:' in line:
         s = line.split()
-        branch.append(s[2])
+        line = s[2:]
+        branch.append(" ".join(line))
       elif 'Revision:' in line:
         s = line.split() 
         revision.append(s[2])
@@ -129,8 +133,8 @@ def parse_files():
     parse_header(file_list[0])
     
     version.append(branch[i] + " " + revision[i] + "\n" + hydro_scheme[i] + 
-                   "\n" + hydro_kernel[i] + r", $N_{ngb}$=" + hydro_neighbours[i] + 
-                   r", $\eta$=" + hydro_eta[i])                  
+                   "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + 
+                   r", $\eta=%.3f$"%float(hydro_eta[i]))
     times.append([])
     totalTime.append([])
     speedUp.append([])
@@ -176,7 +180,7 @@ def print_results(times,totalTime,parallelEff,version):
 
 def plot_results(times,totalTime,speedUp,parallelEff):
   
-  fig, axarr = plt.subplots(2, 2, figsize=(10,10), frameon=False)
+  fig, axarr = plt.subplots(2, 2, figsize=(10,10), frameon=True)
   speedUpPlot = axarr[0, 0]
   parallelEffPlot = axarr[0, 1]
   totalTimePlot = axarr[1, 0]
@@ -218,7 +222,7 @@ def plot_results(times,totalTime,speedUp,parallelEff):
   totalTimePlot.set_xlim([0.9, 10**(np.floor(np.log10(threadList[i][-1]))+0.5)])
   totalTimePlot.set_ylim([10**np.floor(np.log10(np.min(totalTime)*0.6)), 1.2*10**np.floor(np.log10(np.max(totalTime) * 1.5)+1)])
   
-  totalTimePlot.legend(bbox_to_anchor=(1.14, 1), loc=2, borderaxespad=0.,prop={'size':12})
+  totalTimePlot.legend(bbox_to_anchor=(1.14, 0.97), loc=2, borderaxespad=0.,prop={'size':12}, frameon=False)
   emptyPlot.axis('off')
   
   for i, txt in enumerate(threadList[0]):
diff --git a/src/Makefile.am b/src/Makefile.am
index 0a4b2c64e85b30d9d15b9216f6135517e4217cc5..84cfd823ad4f076d3984847eb846e18067c9a6ab 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -70,8 +70,8 @@ nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_h
                  hydro/Gadget2/hydro_debug.h hydro/Gadget2/hydro_part.h \
 		 hydro/Gizmo/hydro.h hydro/Gizmo/hydro_iact.h hydro/Gizmo/hydro_io.h \
                  hydro/Gizmo/hydro_debug.h hydro/Gizmo/hydro_part.h \
-	         riemann.h \
-		 riemann/riemann_hllc.h riemann/riemann_trrs.h riemann/riemann_exact.h \
+	         riemann.h riemann/riemann_hllc.h riemann/riemann_trrs.h \
+		 riemann/riemann_exact.h riemann/riemann_vacuum.h \
                  cooling/const_du/cooling.h cooling/const_lambda/cooling.h
 
 # Sources and flags for regular library
diff --git a/src/cell.c b/src/cell.c
index 6e0d13edcb92ee07334d151de064ed4fbec8c5a8..7ce6fb81a8fa6875884d3f5c840c36e5177cdf6b 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -580,7 +580,7 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset) {
   }
 
   /* Re-link the gparts. */
-  part_relink_gparts(parts, count, parts_offset);
+  if (count > 0 && gcount > 0) part_relink_gparts(parts, count, parts_offset);
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify that _all_ the parts have been assigned to a cell. */
@@ -675,7 +675,8 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset) {
   }
 
   /* Re-link the parts. */
-  part_relink_parts(gparts, gcount, parts - parts_offset);
+  if (count > 0 && gcount > 0)
+    part_relink_parts(gparts, gcount, parts - parts_offset);
 }
 
 /**
diff --git a/src/cell.h b/src/cell.h
index 50da67952490b85fc8b078d325066774a32c8c22..fd836206241c55cd6b9da2e29157c11a14c5a892 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -128,9 +128,14 @@ struct cell {
   /* Parent cell. */
   struct cell *parent;
 
-  /* Super cell, i.e. the highest-level supercell that has interactions. */
+  /* Super cell, i.e. the highest-level supercell that has hydro interactions.
+   */
   struct cell *super;
 
+  /* Super cell, i.e. the highest-level supercell that has gravity interactions.
+   */
+  struct cell *gsuper;
+
   /* The task computing this cell's sorts. */
   struct task *sorts;
   int sortsize;
diff --git a/src/debug.c b/src/debug.c
index 15354b7d419544a8456543b79c38235eb3b68b2c..d73bc86a92cf5ca28c202e7b567cf7c40ba6eccb 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -154,8 +154,9 @@ int checkSpacehmax(struct space *s) {
   /* Loop over local cells. */
   float cell_h_max = 0.0f;
   for (int k = 0; k < s->nr_cells; k++) {
-    if (s->cells[k].nodeID == s->e->nodeID && s->cells[k].h_max > cell_h_max) {
-      cell_h_max = s->cells[k].h_max;
+    if (s->cells_top[k].nodeID == s->e->nodeID &&
+        s->cells_top[k].h_max > cell_h_max) {
+      cell_h_max = s->cells_top[k].h_max;
     }
   }
 
@@ -172,9 +173,9 @@ int checkSpacehmax(struct space *s) {
 
   /* There is a problem. Hunt it down. */
   for (int k = 0; k < s->nr_cells; k++) {
-    if (s->cells[k].nodeID == s->e->nodeID) {
-      if (s->cells[k].h_max > part_h_max) {
-        message("cell %d is inconsistent (%f > %f)", k, s->cells[k].h_max,
+    if (s->cells_top[k].nodeID == s->e->nodeID) {
+      if (s->cells_top[k].h_max > part_h_max) {
+        message("cell %d is inconsistent (%f > %f)", k, s->cells_top[k].h_max,
                 part_h_max);
       }
     }
diff --git a/src/engine.c b/src/engine.c
index f88005b37d99e8faa4aea11f120629748fc42204..333d3e5852881159a97dc70f836c27b8933e6af2 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -118,10 +118,10 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) {
  *
  * @param e The #engine.
  * @param c The #cell.
- * @param super The super #cell.
+ * @param gsuper The gsuper #cell.
  */
 void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c,
-                                            struct cell *super) {
+                                            struct cell *gsuper) {
 
   struct scheduler *s = &e->sched;
   const int is_with_external_gravity =
@@ -130,10 +130,10 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c,
   const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt;
 
   /* Is this the super-cell? */
-  if (super == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) {
+  if (gsuper == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) {
 
     /* This is the super cell, i.e. the first with gravity tasks attached. */
-    super = c;
+    gsuper = c;
 
     /* Local tasks only... */
     if (c->nodeID == e->nodeID) {
@@ -161,13 +161,13 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c,
   }
 
   /* Set the super-cell. */
-  c->super = super;
+  c->gsuper = gsuper;
 
   /* Recurse. */
   if (c->split)
     for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
-        engine_make_gravity_hierarchical_tasks(e, c->progeny[k], super);
+        engine_make_gravity_hierarchical_tasks(e, c->progeny[k], gsuper);
 }
 
 /**
@@ -262,7 +262,7 @@ void engine_redistribute(struct engine *e) {
   const int nr_nodes = e->nr_nodes;
   const int nodeID = e->nodeID;
   struct space *s = e->s;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
   const int *cdim = s->cdim;
   const double iwidth[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
@@ -541,12 +541,12 @@ void engine_redistribute(struct engine *e) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify that all parts are in the right place. */
-  for (int k = 0; k < nr_parts; k++) {
-    int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0],
-                         parts_new[k].x[1] * iwidth[1],
-                         parts_new[k].x[2] * iwidth[2]);
+  for (size_t k = 0; k < nr_parts; k++) {
+    const int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0],
+                               parts_new[k].x[1] * iwidth[1],
+                               parts_new[k].x[2] * iwidth[2]);
     if (cells[cid].nodeID != nodeID)
-      error("Received particle (%i) that does not belong here (nodeID=%i).", k,
+      error("Received particle (%zu) that does not belong here (nodeID=%i).", k,
             cells[cid].nodeID);
   }
 
@@ -567,7 +567,7 @@ void engine_redistribute(struct engine *e) {
   for (size_t k = 0; k < nr_parts; ++k) {
 
     if (parts_new[k].gpart != NULL &&
-        parts_new[k].gpart->id_or_neg_offset != -k) {
+        parts_new[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
       error("Linking problem !");
     }
   }
@@ -857,7 +857,7 @@ void engine_exchange_cells(struct engine *e) {
 #ifdef WITH_MPI
 
   struct space *s = e->s;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
   const int nr_proxies = e->nr_proxies;
   int offset[nr_cells];
@@ -1017,7 +1017,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
   /* Put the parts and gparts into the corresponding proxies. */
   for (size_t k = 0; k < *Npart; k++) {
     /* Get the target node and proxy ID. */
-    const int node_id = e->s->cells[ind_part[k]].nodeID;
+    const int node_id = e->s->cells_top[ind_part[k]].nodeID;
     if (node_id < 0 || node_id >= e->nr_nodes)
       error("Bad node ID %i.", node_id);
     const int pid = e->proxy_ind[node_id];
@@ -1041,7 +1041,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
                      &s->xparts[offset_parts + k], 1);
   }
   for (size_t k = 0; k < *Ngpart; k++) {
-    const int node_id = e->s->cells[ind_gpart[k]].nodeID;
+    const int node_id = e->s->cells_top[ind_gpart[k]].nodeID;
     if (node_id < 0 || node_id >= e->nr_nodes)
       error("Bad node ID %i.", node_id);
     const int pid = e->proxy_ind[node_id];
@@ -1246,7 +1246,7 @@ void engine_make_gravity_tasks(struct engine *e) {
   struct space *s = e->s;
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
 
   for (int cid = 0; cid < nr_cells; ++cid) {
@@ -1301,7 +1301,7 @@ void engine_make_hydroloop_tasks(struct engine *e) {
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
   const int *cdim = s->cdim;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
 
   /* Run through the highest level of cells and add pairs. */
   for (int i = 0; i < cdim[0]; i++) {
@@ -1429,11 +1429,11 @@ static inline void engine_make_gravity_dependencies(struct scheduler *sched,
                                                     struct cell *c) {
 
   /* init --> gravity --> kick */
-  scheduler_addunlock(sched, c->super->init, gravity);
-  scheduler_addunlock(sched, gravity, c->super->kick);
+  scheduler_addunlock(sched, c->gsuper->init, gravity);
+  scheduler_addunlock(sched, gravity, c->gsuper->kick);
 
   /* grav_up --> gravity ( --> kick) */
-  scheduler_addunlock(sched, c->super->grav_up, gravity);
+  scheduler_addunlock(sched, c->gsuper->grav_up, gravity);
 }
 
 /**
@@ -1475,10 +1475,10 @@ void engine_link_gravity_tasks(struct engine *e) {
 
       /* Gather the multipoles --> mm interaction --> kick */
       scheduler_addunlock(sched, gather, t);
-      scheduler_addunlock(sched, t, t->ci->super->kick);
+      scheduler_addunlock(sched, t, t->ci->gsuper->kick);
 
       /* init --> mm interaction */
-      scheduler_addunlock(sched, t->ci->super->init, t);
+      scheduler_addunlock(sched, t->ci->gsuper->init, t);
     }
 
     /* Self-interaction? */
@@ -1496,7 +1496,7 @@ void engine_link_gravity_tasks(struct engine *e) {
         engine_make_gravity_dependencies(sched, t, t->ci);
       }
 
-      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+      if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) {
 
         engine_make_gravity_dependencies(sched, t, t->cj);
       }
@@ -1518,7 +1518,7 @@ void engine_link_gravity_tasks(struct engine *e) {
 
         engine_make_gravity_dependencies(sched, t, t->ci);
       }
-      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+      if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) {
 
         engine_make_gravity_dependencies(sched, t, t->cj);
       }
@@ -1814,7 +1814,7 @@ void engine_make_gravityrecursive_tasks(struct engine *e) {
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
   const int nr_cells = s->nr_cells;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
 
   for (int k = 0; k < nr_cells; k++) {
 
@@ -1847,7 +1847,7 @@ void engine_maketasks(struct engine *e) {
 
   struct space *s = e->s;
   struct scheduler *sched = &e->sched;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
   const ticks tic = getticks();
 
@@ -1971,7 +1971,7 @@ void engine_marktasks_fixdt_mapper(void *map_data, int num_elements,
 
       /* Too much particle movement? */
       if (t->tight &&
-          (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
+          (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
            ci->dx_max > space_maxreldx * ci->h_max ||
            cj->dx_max > space_maxreldx * cj->h_max))
         *rebuild_space = 1;
@@ -2043,7 +2043,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
 
       /* Too much particle movement? */
       if (t->tight &&
-          (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
+          (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
            ci->dx_max > space_maxreldx * ci->h_max ||
            cj->dx_max > space_maxreldx * cj->h_max))
         *rebuild_space = 1;
@@ -2289,7 +2289,7 @@ void engine_prepare(struct engine *e) {
 
     /* First drift all particles to the current time */
     e->drift_all = 1;
-    threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells,
+    threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top,
                    e->s->nr_cells, sizeof(struct cell), 1, e);
 
     /* Restore the default drifting policy */
@@ -2407,8 +2407,8 @@ void engine_collect_timestep(struct engine *e) {
 
   /* Collect the cell data. */
   for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells[k].nodeID == e->nodeID) {
-      struct cell *c = &s->cells[k];
+    if (s->cells_top[k].nodeID == e->nodeID) {
+      struct cell *c = &s->cells_top[k];
 
       /* Make the top-cells recurse */
       engine_collect_kick(c);
@@ -2461,8 +2461,8 @@ void engine_print_stats(struct engine *e) {
 
   /* Collect the cell data. */
   for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells[k].nodeID == e->nodeID) {
-      struct cell *c = &s->cells[k];
+    if (s->cells_top[k].nodeID == e->nodeID) {
+      struct cell *c = &s->cells_top[k];
       mass += c->mass;
       e_kin += c->e_kin;
       e_int += c->e_int;
@@ -2677,7 +2677,7 @@ void engine_step(struct engine *e) {
 
     /* Drift everybody to the snapshot position */
     e->drift_all = 1;
-    threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells,
+    threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top,
                    e->s->nr_cells, sizeof(struct cell), 1, e);
 
     /* Restore the default drifting policy */
@@ -2717,7 +2717,7 @@ void engine_step(struct engine *e) {
   }
 
   /* Drift only the necessary particles */
-  threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells,
+  threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top,
                  e->s->nr_cells, sizeof(struct cell), 1, e);
 
   /* Re-distribute the particles amongst the nodes? */
@@ -2821,7 +2821,7 @@ void engine_makeproxies(struct engine *e) {
 #ifdef WITH_MPI
   const int *cdim = e->s->cdim;
   const struct space *s = e->s;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   struct proxy *proxies = e->proxies;
   ticks tic = getticks();
 
@@ -2952,7 +2952,8 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   s->xparts = xparts_new;
 
   /* Re-link the gparts. */
-  part_relink_gparts(s->parts, s->nr_parts, 0);
+  if (s->nr_parts > 0 && s->nr_gparts > 0)
+    part_relink_gparts(s->parts, s->nr_parts, 0);
 
   /* Re-allocate the local gparts. */
   if (e->verbose)
@@ -2968,7 +2969,8 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   s->gparts = gparts_new;
 
   /* Re-link the parts. */
-  part_relink_parts(s->gparts, s->nr_gparts, s->parts);
+  if (s->nr_parts > 0 && s->nr_gparts > 0)
+    part_relink_parts(s->gparts, s->nr_gparts, s->parts);
 
 #ifdef SWIFT_DEBUG_CHECKS
 
@@ -2988,7 +2990,8 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   }
   for (size_t k = 0; k < s->nr_parts; ++k) {
 
-    if (s->parts[k].gpart != NULL && s->parts[k].gpart->id_or_neg_offset != -k)
+    if (s->parts[k].gpart != NULL &&
+        s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k)
       error("Linking problem !");
   }
 
diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h
index d4249c46a3150a357aaecfb02f9251901d97a157..2415e20ac5eb68f1b773b990bab232707166903c 100644
--- a/src/gravity/Default/gravity.h
+++ b/src/gravity/Default/gravity.h
@@ -21,6 +21,7 @@
 #define SWIFT_DEFAULT_GRAVITY_H
 
 #include <float.h>
+#include "minmax.h"
 #include "potentials.h"
 
 /**
@@ -41,16 +42,14 @@ gravity_compute_timestep_external(const struct external_potential* potential,
   float dt = FLT_MAX;
 
 #ifdef EXTERNAL_POTENTIAL_POINTMASS
-  dt =
-      fminf(dt, external_gravity_pointmass_timestep(potential, phys_const, gp));
+  dt = min(dt, external_gravity_pointmass_timestep(potential, phys_const, gp));
 #endif
 #ifdef EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL
-  dt = fminf(dt, external_gravity_isothermalpotential_timestep(potential,
-                                                               phys_const, gp));
+  dt = min(dt, external_gravity_isothermalpotential_timestep(potential,
+                                                             phys_const, gp));
 #endif
 #ifdef EXTERNAL_POTENTIAL_DISK_PATCH
-  dt = fminf(dt,
-             external_gravity_disk_patch_timestep(potential, phys_const, gp));
+  dt = min(dt, external_gravity_disk_patch_timestep(potential, phys_const, gp));
 #endif
   return dt;
 }
diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h
index f61bff55821809fe1f5da27c95d75afbecbc04cc..ccdd0cee32b9386eff54da655b75285b8e08a598 100644
--- a/src/hydro/Default/hydro.h
+++ b/src/hydro/Default/hydro.h
@@ -22,6 +22,7 @@
 #include "adiabatic_index.h"
 #include "approx_math.h"
 #include "equation_of_state.h"
+#include "minmax.h"
 
 #include <float.h>
 
@@ -148,7 +149,7 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep(
       (p->force.u_dt != 0.0f) ? fabsf(const_max_u_change * p->u / p->force.u_dt)
                               : FLT_MAX;
 
-  return fminf(dt_cfl, dt_u_change);
+  return min(dt_cfl, dt_u_change);
 }
 
 /**
@@ -273,7 +274,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force(
   const float tau = h / (2.f * const_viscosity_length * p->force.soundspeed);
 
   /* Viscosity source term */
-  const float S = fmaxf(-normDiv_v, 0.f);
+  const float S = max(-normDiv_v, 0.f);
 
   /* Compute the particle's viscosity parameter time derivative */
   const float alpha_dot = (const_viscosity_alpha_min - p->alpha) / tau +
diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h
index 51fa7d07229f86918ef2d7019a9708110cef02e3..7b1c8c3b91ce917af46efc28f6001a4d47747e2a 100644
--- a/src/hydro/Default/hydro_iact.h
+++ b/src/hydro/Default/hydro_iact.h
@@ -395,7 +395,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
 
   /* Compute the relative velocity. (This is 0 if the particles move away from
    * each other and negative otherwise) */
-  omega_ij = fminf(dvdr, 0.f);
+  omega_ij = min(dvdr, 0.f);
 
   /* Compute signal velocity */
   v_sig = pi->force.soundspeed + pj->force.soundspeed - 2.0f * omega_ij;
@@ -441,8 +441,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
   pj->force.h_dt -= mi * dvdr / rhoi * wj_dr;
 
   /* Update the signal velocity. */
-  pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
-  pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig);
+  pi->force.v_sig = max(pi->force.v_sig, v_sig);
+  pj->force.v_sig = max(pj->force.v_sig, v_sig);
 }
 
 /**
@@ -635,8 +635,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
     pj[k]->force.u_dt += pju_dt.f[k];
     pi[k]->force.h_dt -= pih_dt.f[k];
     pj[k]->force.h_dt -= pjh_dt.f[k];
-    pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]);
-    pj[k]->force.v_sig = fmaxf(pj[k]->force.v_sig, v_sig.f[k]);
+    pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
+    pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]);
     for (j = 0; j < 3; j++) {
       pi[k]->a_hydro[j] -= pia[j].f[k];
       pj[k]->a_hydro[j] += pja[j].f[k];
@@ -696,7 +696,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
 
   /* Compute the relative velocity. (This is 0 if the particles move away from
    * each other and negative otherwise) */
-  omega_ij = fminf(dvdr, 0.f);
+  omega_ij = min(dvdr, 0.f);
 
   /* Compute signal velocity */
   v_sig = pi->force.soundspeed + pj->force.soundspeed - 2.0f * omega_ij;
@@ -737,7 +737,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->force.h_dt -= mj * dvdr / rhoj * wi_dr;
 
   /* Update the signal velocity. */
-  pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
+  pi->force.v_sig = max(pi->force.v_sig, v_sig);
 }
 
 /**
@@ -920,7 +920,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
   for (k = 0; k < VEC_SIZE; k++) {
     pi[k]->force.u_dt += piu_dt.f[k];
     pi[k]->force.h_dt -= pih_dt.f[k];
-    pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]);
+    pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
     for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
   }
 
diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h
index 0108e0663c0d84ff6b5698456f6be34d5ee08c14..8a4edfe62f59a3fae551fdb65f46987509f89251 100644
--- a/src/hydro/Gadget2/hydro_iact.h
+++ b/src/hydro/Gadget2/hydro_iact.h
@@ -20,6 +20,8 @@
 #ifndef SWIFT_GADGET2_HYDRO_IACT_H
 #define SWIFT_GADGET2_HYDRO_IACT_H
 
+#include "minmax.h"
+
 /**
  * @brief SPH interaction functions following the Gadget-2 version of SPH.
  *
@@ -641,8 +643,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
     }
     pi[k]->force.h_dt -= pih_dt.f[k];
     pj[k]->force.h_dt -= pjh_dt.f[k];
-    pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]);
-    pj[k]->force.v_sig = fmaxf(pj[k]->force.v_sig, v_sig.f[k]);
+    pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
+    pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]);
     pi[k]->entropy_dt += entropy_dt.f[k] * mj.f[k];
     pj[k]->entropy_dt += entropy_dt.f[k] * mi.f[k];
   }
@@ -900,7 +902,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
   for (k = 0; k < VEC_SIZE; k++) {
     for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
     pi[k]->force.h_dt -= pih_dt.f[k];
-    pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]);
+    pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
     pi[k]->entropy_dt += entropy_dt.f[k];
   }
 
diff --git a/src/hydro/Gizmo/hydro.h b/src/hydro/Gizmo/hydro.h
index e24a44529dc1907c9ceadeedfbfc9e49c308bcda..9dab5d7fd96a833ba9ea56889139c04000634645 100644
--- a/src/hydro/Gizmo/hydro.h
+++ b/src/hydro/Gizmo/hydro.h
@@ -21,6 +21,7 @@
 #include "adiabatic_index.h"
 #include "approx_math.h"
 #include "hydro_gradients.h"
+#include "minmax.h"
 
 /**
  * @brief Computes the hydro time-step of a given particle
diff --git a/src/hydro/Gizmo/hydro_iact.h b/src/hydro/Gizmo/hydro_iact.h
index 79973364617bb04855115bff9bfbf3808f46d04f..cf2b9a223b49c3ce2fbd6874b83c523e8213a5ce 100644
--- a/src/hydro/Gizmo/hydro_iact.h
+++ b/src/hydro/Gizmo/hydro_iact.h
@@ -242,14 +242,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
   if (dvdotdx > 0.) {
     vmax -= dvdotdx / r;
   }
-  pi->timestepvars.vmax = fmaxf(pi->timestepvars.vmax, vmax);
+  pi->timestepvars.vmax = max(pi->timestepvars.vmax, vmax);
   if (mode == 1) {
-    pj->timestepvars.vmax = fmaxf(pj->timestepvars.vmax, vmax);
+    pj->timestepvars.vmax = max(pj->timestepvars.vmax, vmax);
   }
 
   /* The flux will be exchanged using the smallest time step of the two
    * particles */
-  mindt = fminf(dti, dtj);
+  mindt = min(dti, dtj);
   dti = mindt;
   dtj = mindt;
 
diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h
index a8a855d9db81f6927c1d8b45410a57d50a8366de..9e2028c978dc7cad03cfba17931f645bbfbfe1a0 100644
--- a/src/hydro/Minimal/hydro_iact.h
+++ b/src/hydro/Minimal/hydro_iact.h
@@ -34,6 +34,7 @@
  */
 
 #include "adiabatic_index.h"
+#include "minmax.h"
 
 /**
  * @brief Density loop
@@ -161,7 +162,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
                      (pi->v[2] - pj->v[2]) * dx[2];
 
   /* Are the particles moving towards each others ? */
-  const float omega_ij = fminf(dvdr, 0.f);
+  const float omega_ij = min(dvdr, 0.f);
   const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */
 
   /* Compute sound speeds and signal velocity */
@@ -212,8 +213,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
   pj->force.h_dt -= mi * dvdr * r_inv / rhoi * wj_dr;
 
   /* Update the signal velocity. */
-  pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
-  pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig);
+  pi->force.v_sig = max(pi->force.v_sig, v_sig);
+  pj->force.v_sig = max(pj->force.v_sig, v_sig);
 }
 
 /**
@@ -272,7 +273,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
                      (pi->v[2] - pj->v[2]) * dx[2];
 
   /* Are the particles moving towards each others ? */
-  const float omega_ij = fminf(dvdr, 0.f);
+  const float omega_ij = min(dvdr, 0.f);
   const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */
 
   /* Compute sound speeds and signal velocity */
@@ -315,7 +316,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr;
 
   /* Update the signal velocity. */
-  pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
+  pi->force.v_sig = max(pi->force.v_sig, v_sig);
 }
 
 /**
diff --git a/src/minmax.h b/src/minmax.h
index 8000df6edf93a30b964c578c6f25d324fed3f4cf..9d92cd71d849dba615fdb05bc342014e0593d989 100644
--- a/src/minmax.h
+++ b/src/minmax.h
@@ -24,11 +24,11 @@
  *
  * This macro evaluates its arguments exactly once.
  */
-#define min(a, b)           \
-  ({                        \
-    __typeof__(a) _a = (a); \
-    __typeof__(b) _b = (b); \
-    _a < _b ? _a : _b;      \
+#define min(a, b)                 \
+  ({                              \
+    const __typeof__(a) _a = (a); \
+    const __typeof__(b) _b = (b); \
+    _a < _b ? _a : _b;            \
   })
 
 /**
@@ -36,11 +36,11 @@
  *
  * This macro evaluates its arguments exactly once.
  */
-#define max(a, b)           \
-  ({                        \
-    __typeof__(a) _a = (a); \
-    __typeof__(b) _b = (b); \
-    _a > _b ? _a : _b;      \
+#define max(a, b)                 \
+  ({                              \
+    const __typeof__(a) _a = (a); \
+    const __typeof__(b) _b = (b); \
+    _a > _b ? _a : _b;            \
   })
 
 #endif /* SWIFT_MINMAX_H */
diff --git a/src/partition.c b/src/partition.c
index e216e12a5a23457b39b53070de3d84a2f257b927..8d17bedf0aaeadc64044b12ffe1bb8887b02d83e 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -143,7 +143,7 @@ static void split_vector(struct space *s, int nregions, int *samplecells) {
             select = l;
           }
         }
-        s->cells[n++].nodeID = select;
+        s->cells_top[n++].nodeID = select;
       }
     }
   }
@@ -274,7 +274,7 @@ static void accumulate_counts(struct space *s, int *counts) {
  */
 static void split_metis(struct space *s, int nregions, int *celllist) {
 
-  for (int i = 0; i < s->nr_cells; i++) s->cells[i].nodeID = celllist[i];
+  for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i];
 }
 #endif
 
@@ -419,7 +419,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
   /* Create weight arrays using task ticks for vertices and edges (edges
    * assume the same graph structure as used in the part_ calls). */
   int nr_cells = s->nr_cells;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0;
   int wtot = 0;
   int wmax = 1e9 / nr_nodes;
@@ -795,7 +795,7 @@ void partition_initial_partition(struct partition *initial_partition,
     /* Run through the cells and set their nodeID. */
     // message("s->dim = [%e,%e,%e]", s->dim[0], s->dim[1], s->dim[2]);
     for (k = 0; k < s->nr_cells; k++) {
-      c = &s->cells[k];
+      c = &s->cells_top[k];
       for (j = 0; j < 3; j++)
         ind[j] = c->loc[j] / s->dim[j] * initial_partition->grid[j];
       c->nodeID = ind[0] +
@@ -1037,10 +1037,10 @@ static int check_complete(struct space *s, int verbose, int nregions) {
   int failed = 0;
   for (int i = 0; i < nregions; i++) present[i] = 0;
   for (int i = 0; i < s->nr_cells; i++) {
-    if (s->cells[i].nodeID <= nregions)
-      present[s->cells[i].nodeID]++;
+    if (s->cells_top[i].nodeID <= nregions)
+      present[s->cells_top[i].nodeID]++;
     else
-      message("Bad nodeID: %d", s->cells[i].nodeID);
+      message("Bad nodeID: %d", s->cells_top[i].nodeID);
   }
   for (int i = 0; i < nregions; i++) {
     if (!present[i]) {
@@ -1085,13 +1085,13 @@ int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeIDs,
       for (int k = 0; k < s->cdim[2]; k++) {
 
         /* Scale indices to old cell space. */
-        int ii = rint(i * s->iwidth[0] * oldh[0]);
-        int jj = rint(j * s->iwidth[1] * oldh[1]);
-        int kk = rint(k * s->iwidth[2] * oldh[2]);
+        const int ii = rint(i * s->iwidth[0] * oldh[0]);
+        const int jj = rint(j * s->iwidth[1] * oldh[1]);
+        const int kk = rint(k * s->iwidth[2] * oldh[2]);
 
-        int cid = cell_getid(s->cdim, i, j, k);
-        int oldcid = cell_getid(oldcdim, ii, jj, kk);
-        s->cells[cid].nodeID = oldnodeIDs[oldcid];
+        const int cid = cell_getid(s->cdim, i, j, k);
+        const int oldcid = cell_getid(oldcdim, ii, jj, kk);
+        s->cells_top[cid].nodeID = oldnodeIDs[oldcid];
 
         if (oldnodeIDs[oldcid] > nr_nodes) nr_nodes = oldnodeIDs[oldcid];
       }
diff --git a/src/queue.c b/src/queue.c
index 38f8620fdc75d744df31513792e96323dbf83647..af4dfa3c94470814d4f6e7f53687a2fcba69d6dd 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -188,7 +188,7 @@ struct task *queue_gettask(struct queue *q, const struct task *prev,
   /* Set some pointers we will use often. */
   int *qtid = q->tid;
   struct task *qtasks = q->tasks;
-  const int qcount = q->count;
+  const int old_qcount = q->count;
 
   /* Data for the sliding window in which to try the task with the
      best overlap with the previous task. */
@@ -201,7 +201,7 @@ struct task *queue_gettask(struct queue *q, const struct task *prev,
   int ind = -1;
 
   /* Loop over the queue entries. */
-  for (int k = 0; k < qcount; k++) {
+  for (int k = 0; k < old_qcount; k++) {
     if (k < queue_search_window) {
       window[window_count].ind = k;
       window[window_count].tid = qtid[k];
diff --git a/src/riemann/riemann_exact.h b/src/riemann/riemann_exact.h
index 9763d9f0d12da32d9142c24481105b9f139be588..10dfe56ef35a82e721a715bbb8c7a71979b8e6ce 100644
--- a/src/riemann/riemann_exact.h
+++ b/src/riemann/riemann_exact.h
@@ -31,6 +31,7 @@
 
 #include <float.h>
 #include "adiabatic_index.h"
+#include "minmax.h"
 #include "riemann_vacuum.h"
 
 /**
@@ -145,12 +146,12 @@ __attribute__((always_inline)) INLINE static float riemann_guess_p(
   float pguess, pmin, pmax, qmax;
   float ppv;
 
-  pmin = fminf(WL[4], WR[4]);
-  pmax = fmaxf(WL[4], WR[4]);
+  pmin = min(WL[4], WR[4]);
+  pmax = max(WL[4], WR[4]);
   qmax = pmax / pmin;
   ppv =
       0.5f * (WL[4] + WR[4]) - 0.125f * (vR - vL) * (WL[0] + WR[0]) * (aL + aR);
-  ppv = fmaxf(1.e-8f, ppv);
+  ppv = max(1.e-8f, ppv);
   if (qmax <= 2.0f && pmin <= ppv && ppv <= pmax) {
     pguess = ppv;
   } else {
@@ -171,7 +172,7 @@ __attribute__((always_inline)) INLINE static float riemann_guess_p(
      value for pressure (...).
      Thus in order to avoid negative guess values we introduce the small
      positive constant _tolerance" */
-  pguess = fmaxf(1.e-8f, pguess);
+  pguess = max(1.e-8f, pguess);
   return pguess;
 }
 
diff --git a/src/riemann/riemann_hllc.h b/src/riemann/riemann_hllc.h
index fdc22ce05b8d63bdba66e530d1a5a968801a9f10..b8b1239d7799221c98522c06631aba5cabe69183 100644
--- a/src/riemann/riemann_hllc.h
+++ b/src/riemann/riemann_hllc.h
@@ -21,6 +21,7 @@
 #define SWIFT_RIEMANN_HLLC_H
 
 #include "adiabatic_index.h"
+#include "minmax.h"
 #include "riemann_vacuum.h"
 
 __attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
@@ -57,7 +58,7 @@ __attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
   rhobar = 0.5 * (WL[0] + WR[0]);
   abar = 0.5 * (aL + aR);
   pPVRS = 0.5 * (WL[4] + WR[4]) - 0.5 * (uR - uL) * rhobar * abar;
-  pstar = fmaxf(0., pPVRS);
+  pstar = max(0., pPVRS);
 
   /* STEP 2: wave speed estimates
      all these speeds are along the interface normal, since uL and uR are */
diff --git a/src/runner.c b/src/runner.c
index d3fc7bc9e04596bc2fb848b14c1c1cde56cd3fe7..44ab5d5d3f31b6a9b6a1d1fdbb9bfea17419536a 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -58,6 +58,18 @@
 #include "timers.h"
 #include "timestep.h"
 
+/**
+ * @brief  Entry in a list of sorted indices.
+ */
+struct entry {
+
+  /*! Distance on the axis */
+  float d;
+
+  /*! Particle index */
+  int i;
+};
+
 /* Orientation of the cell pairs */
 const double runner_shift[13][3] = {
     {5.773502691896258e-01, 5.773502691896258e-01, 5.773502691896258e-01},
@@ -811,9 +823,8 @@ static void runner_do_drift(struct cell *c, struct engine *e) {
         /* Recurse. */
         runner_do_drift(cp, e);
 
-        /* Collect */
-        dx_max = fmaxf(dx_max, cp->dx_max);
-        h_max = fmaxf(h_max, cp->h_max);
+        dx_max = max(dx_max, cp->dx_max);
+        h_max = max(h_max, cp->h_max);
         mass += cp->mass;
         e_kin += cp->e_kin;
         e_int += cp->e_int;
@@ -1141,7 +1152,7 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) {
       // if(ti_end < ti_current) error("Received invalid particle !");
       ti_end_min = min(ti_end_min, ti_end);
       ti_end_max = max(ti_end_max, ti_end);
-      h_max = fmaxf(h_max, parts[k].h);
+      h_max = max(h_max, parts[k].h);
     }
     for (size_t k = 0; k < nr_gparts; k++) {
       const int ti_end = gparts[k].ti_end;
@@ -1159,7 +1170,7 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) {
         runner_do_recv_cell(r, c->progeny[k], 0);
         ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
         ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
-        h_max = fmaxf(h_max, c->progeny[k]->h_max);
+        h_max = max(h_max, c->progeny[k]->h_max);
       }
     }
   }
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 376400926432a9e9b6b9c736260dc3e119c2c64d..3c968cbf7d955198ad6bb44ab70e93af17735e99 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -1739,7 +1739,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
   if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
   /* Get the cell dimensions. */
-  const float h = fminf(ci->width[0], fminf(ci->width[1], ci->width[2]));
+  const float h = min(ci->width[0], min(ci->width[1], ci->width[2]));
 
   /* Get the type of pair if not specified explicitly. */
   // if ( sid < 0 )
@@ -1748,7 +1748,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
 
   /* Recurse? */
   if (ci->split && cj->split &&
-      fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
+      max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
           h / 2) {
 
     /* Different types of flags. */
@@ -2023,7 +2023,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
   if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
   /* Get the cell dimensions. */
-  const float h = fminf(ci->width[0], fminf(ci->width[1], ci->width[2]));
+  const float h = min(ci->width[0], min(ci->width[1], ci->width[2]));
 
   /* Get the type of pair if not specified explicitly. */
   // if ( sid < 0 )
@@ -2032,7 +2032,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
 
   /* Recurse? */
   if (ci->split && cj->split &&
-      fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
+      max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
           h / 2) {
 
     /* Different types of flags. */
@@ -2336,11 +2336,11 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
   else {
 
     /* Get the cell dimensions. */
-    const float h = fminf(ci->width[0], fminf(ci->width[1], ci->width[2]));
+    const float h = min(ci->width[0], min(ci->width[1], ci->width[2]));
 
     /* Recurse? */
     if (ci->split && cj->split &&
-        fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
+        max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
             h / 2) {
 
       /* Get the type of pair if not specified explicitly. */
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index a220ad1794d23999ff16752e797a499071fa2e65..0fcd2d2e80a72b92588acd5b8275b9dafc68df45 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -488,7 +488,7 @@ static void runner_do_grav_mm(struct runner *r, struct cell *ci, int timer) {
 
   /* Recover the list of top-level cells */
   const struct engine *e = r->e;
-  struct cell *cells = e->s->cells;
+  struct cell *cells = e->s->cells_top;
   const int nr_cells = e->s->nr_cells;
   const int ti_current = e->ti_current;
   const double max_d =
diff --git a/src/scheduler.c b/src/scheduler.c
index ef4d19107fb48684ca299f286436a155a6fe0151..fa065879f7daeb60cfeeb67e52c64eb2036cf3cb 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -152,8 +152,9 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
       if (ci->split) {
 
         /* Make a sub? */
-        if (scheduler_dosub && (ci->count * ci->count < space_subsize ||
-                                ci->gcount * ci->gcount < space_subsize)) {
+        if (scheduler_dosub &&
+            ((ci->count > 0 && ci->count < space_subsize / ci->count) ||
+             (ci->gcount > 0 && ci->gcount < space_subsize / ci->gcount))) {
 
           /* convert to a self-subtask. */
           t->type = task_type_sub_self;
@@ -763,8 +764,9 @@ void scheduler_set_unlocks(struct scheduler *s) {
     t->unlock_tasks = &s->unlocks[offsets[k]];
   }
 
+#ifdef SWIFT_DEBUG_CHECKS
   /* Verify that there are no duplicate unlocks. */
-  /* for (int k = 0; k < s->nr_tasks; k++) {
+  for (int k = 0; k < s->nr_tasks; k++) {
     struct task *t = &s->tasks[k];
     for (int i = 0; i < t->nr_unlock_tasks; i++) {
       for (int j = i + 1; j < t->nr_unlock_tasks; j++) {
@@ -772,7 +774,8 @@ void scheduler_set_unlocks(struct scheduler *s) {
           error("duplicate unlock!");
       }
     }
-  } */
+  }
+#endif
 
   /* Clean up. */
   free(counts);
@@ -861,9 +864,11 @@ void scheduler_reset(struct scheduler *s, int size) {
     if (s->tasks_ind != NULL) free(s->tasks_ind);
 
     /* Allocate the new lists. */
-    if ((s->tasks = (struct task *)malloc(sizeof(struct task) * size)) ==
-            NULL ||
-        (s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL)
+    if (posix_memalign((void *)&s->tasks, task_align,
+                       size * sizeof(struct task)) != 0)
+      error("Failed to allocate task array.");
+
+    if ((s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL)
       error("Failed to allocate task lists.");
   }
 
@@ -917,7 +922,7 @@ void scheduler_reweight(struct scheduler *s) {
                        (sizeof(int) * 8 - intrinsics_clz(t->ci->count));
           break;
         case task_type_self:
-          t->weight += 1 * t->ci->count * t->ci->count;
+          t->weight += 1 * wscale * t->ci->count * t->ci->count;
           break;
         case task_type_pair:
           if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID)
@@ -962,7 +967,7 @@ void scheduler_reweight(struct scheduler *s) {
   // clocks_from_ticks( getticks() - tic ), clocks_getunit());
 
   /* int min = tasks[0].weight, max = tasks[0].weight;
-  for ( k = 1 ; k < nr_tasks ; k++ )
+  for ( int k = 1 ; k < nr_tasks ; k++ )
       if ( tasks[k].weight < min )
           min = tasks[k].weight;
       else if ( tasks[k].weight > max )
@@ -1104,10 +1109,20 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
         break;
       case task_type_pair:
       case task_type_sub_pair:
-        qid = t->ci->super->owner;
-        if (qid < 0 ||
-            s->queues[qid].count > s->queues[t->cj->super->owner].count)
-          qid = t->cj->super->owner;
+        if (t->subtype == task_subtype_grav) {
+
+          qid = t->ci->gsuper->owner;
+          if (qid < 0 ||
+              s->queues[qid].count > s->queues[t->cj->gsuper->owner].count)
+            qid = t->cj->gsuper->owner;
+
+        } else {
+
+          qid = t->ci->super->owner;
+          if (qid < 0 ||
+              s->queues[qid].count > s->queues[t->cj->super->owner].count)
+            qid = t->cj->super->owner;
+        }
         break;
       case task_type_recv:
 #ifdef WITH_MPI
diff --git a/src/space.c b/src/space.c
index de6ac7226f01b815d39f3c689bdf23d7ad234c20..cdd5958cbc515003f4a86a41c9a7075fa3b4364f 100644
--- a/src/space.c
+++ b/src/space.c
@@ -88,6 +88,28 @@ const int sortlistID[27] = {
     /* (  1 ,  1 ,  0 ) */ 1,
     /* (  1 ,  1 ,  1 ) */ 0};
 
+/**
+ * @brief Interval stack necessary for parallel particle sorting.
+ */
+struct qstack {
+  volatile ptrdiff_t i, j;
+  volatile int min, max;
+  volatile int ready;
+};
+
+/**
+ * @brief Parallel particle-sorting stack
+ */
+struct parallel_sort {
+  struct part *parts;
+  struct gpart *gparts;
+  struct xpart *xparts;
+  int *ind;
+  struct qstack *stack;
+  unsigned int stack_size;
+  volatile unsigned int first, last, waiting;
+};
+
 /**
  * @brief Get the shift-id of the given pair of cells, swapping them
  *      if need be.
@@ -99,7 +121,6 @@ const int sortlistID[27] = {
  *
  * @return The shift ID and set shift, may or may not swap ci and cj.
  */
-
 int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                  double *shift) {
 
@@ -138,8 +159,9 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
 /**
  * @brief Recursively dismantle a cell tree.
  *
+ * @param s The #space.
+ * @param c The #cell to recycle.
  */
-
 void space_rebuild_recycle(struct space *s, struct cell *c) {
 
   if (c->split)
@@ -152,28 +174,27 @@ void space_rebuild_recycle(struct space *s, struct cell *c) {
 }
 
 /**
- * @brief Re-build the cell grid.
+ * @brief Re-build the top-level cell grid.
  *
  * @param s The #space.
  * @param cell_max Maximum cell edge length.
  * @param verbose Print messages to stdout or not.
  */
-
 void space_regrid(struct space *s, double cell_max, int verbose) {
 
   const size_t nr_parts = s->nr_parts;
-  struct cell *restrict c;
-  ticks tic = getticks();
+  const ticks tic = getticks();
   const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
 
   /* Run through the cells and get the current h_max. */
   // tic = getticks();
   float h_max = s->cell_min / kernel_gamma / space_stretch;
   if (nr_parts > 0) {
-    if (s->cells != NULL) {
+    if (s->cells_top != NULL) {
       for (int k = 0; k < s->nr_cells; k++) {
-        if (s->cells[k].nodeID == engine_rank && s->cells[k].h_max > h_max) {
-          h_max = s->cells[k].h_max;
+        if (s->cells_top[k].nodeID == engine_rank &&
+            s->cells_top[k].h_max > h_max) {
+          h_max = s->cells_top[k].h_max;
         }
       }
     } else {
@@ -197,10 +218,10 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
   if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max);
 
   /* Get the new putative cell dimensions. */
-  int cdim[3];
-  for (int k = 0; k < 3; k++)
-    cdim[k] =
-        floor(s->dim[k] / fmax(h_max * kernel_gamma * space_stretch, cell_max));
+  const int cdim[3] = {
+      floor(s->dim[0] / fmax(h_max * kernel_gamma * space_stretch, cell_max)),
+      floor(s->dim[1] / fmax(h_max * kernel_gamma * space_stretch, cell_max)),
+      floor(s->dim[2] / fmax(h_max * kernel_gamma * space_stretch, cell_max))};
 
   /* Check if we have enough cells for periodicity. */
   if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
@@ -239,7 +260,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       for (int j = 0; j < s->cdim[1]; j++) {
         for (int k = 0; k < s->cdim[2]; k++) {
           cid = cell_getid(oldcdim, i, j, k);
-          oldnodeIDs[cid] = s->cells[cid].nodeID;
+          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
         }
       }
     }
@@ -249,16 +270,16 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
 
   /* Do we need to re-build the upper-level cells? */
   // tic = getticks();
-  if (s->cells == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
+  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
       cdim[2] < s->cdim[2]) {
 
     /* Free the old cells, if they were allocated. */
-    if (s->cells != NULL) {
+    if (s->cells_top != NULL) {
       for (int k = 0; k < s->nr_cells; k++) {
-        space_rebuild_recycle(s, &s->cells[k]);
-        if (s->cells[k].sort != NULL) free(s->cells[k].sort);
+        space_rebuild_recycle(s, &s->cells_top[k]);
+        if (s->cells_top[k].sort != NULL) free(s->cells_top[k].sort);
       }
-      free(s->cells);
+      free(s->cells_top);
       s->maxdepth = 0;
     }
 
@@ -268,22 +289,23 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       s->width[k] = s->dim[k] / cdim[k];
       s->iwidth[k] = 1.0 / s->width[k];
     }
-    const float dmin = fminf(s->width[0], fminf(s->width[1], s->width[2]));
+    const float dmin = min(s->width[0], min(s->width[1], s->width[2]));
 
     /* Allocate the highest level of cells. */
     s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
-    if (posix_memalign((void *)&s->cells, cell_align,
+    if (posix_memalign((void *)&s->cells_top, cell_align,
                        s->nr_cells * sizeof(struct cell)) != 0)
       error("Failed to allocate cells.");
-    bzero(s->cells, s->nr_cells * sizeof(struct cell));
+    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
     for (int k = 0; k < s->nr_cells; k++)
-      if (lock_init(&s->cells[k].lock) != 0) error("Failed to init spinlock.");
+      if (lock_init(&s->cells_top[k].lock) != 0)
+        error("Failed to init spinlock.");
 
     /* Set the cell location and sizes. */
     for (int i = 0; i < cdim[0]; i++)
       for (int j = 0; j < cdim[1]; j++)
         for (int k = 0; k < cdim[2]; k++) {
-          c = &s->cells[cell_getid(cdim, i, j, k)];
+          struct cell *restrict c = &s->cells_top[cell_getid(cdim, i, j, k)];
           c->loc[0] = i * s->width[0];
           c->loc[1] = j * s->width[1];
           c->loc[2] = k * s->width[2];
@@ -295,6 +317,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
           c->count = 0;
           c->gcount = 0;
           c->super = c;
+          c->gsuper = c;
           c->ti_old = ti_current;
           lock_init(&c->lock);
         }
@@ -339,33 +362,35 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       free(oldnodeIDs);
     }
 #endif
+
+    // message( "rebuilding upper-level cells took %.3f %s." ,
+    // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());
+
   } /* re-build upper-level cells? */
-  // message( "rebuilding upper-level cells took %.3f %s." ,
-  // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());
 
-  /* Otherwise, just clean up the cells. */
-  else {
+  else { /* Otherwise, just clean up the cells. */
 
     /* Free the old cells, if they were allocated. */
     for (int k = 0; k < s->nr_cells; k++) {
-      space_rebuild_recycle(s, &s->cells[k]);
-      s->cells[k].sorts = NULL;
-      s->cells[k].nr_tasks = 0;
-      s->cells[k].nr_density = 0;
-      s->cells[k].nr_gradient = 0;
-      s->cells[k].nr_force = 0;
-      s->cells[k].density = NULL;
-      s->cells[k].gradient = NULL;
-      s->cells[k].force = NULL;
-      s->cells[k].dx_max = 0.0f;
-      s->cells[k].sorted = 0;
-      s->cells[k].count = 0;
-      s->cells[k].gcount = 0;
-      s->cells[k].init = NULL;
-      s->cells[k].extra_ghost = NULL;
-      s->cells[k].ghost = NULL;
-      s->cells[k].kick = NULL;
-      s->cells[k].super = &s->cells[k];
+      space_rebuild_recycle(s, &s->cells_top[k]);
+      s->cells_top[k].sorts = NULL;
+      s->cells_top[k].nr_tasks = 0;
+      s->cells_top[k].nr_density = 0;
+      s->cells_top[k].nr_gradient = 0;
+      s->cells_top[k].nr_force = 0;
+      s->cells_top[k].density = NULL;
+      s->cells_top[k].gradient = NULL;
+      s->cells_top[k].force = NULL;
+      s->cells_top[k].dx_max = 0.0f;
+      s->cells_top[k].sorted = 0;
+      s->cells_top[k].count = 0;
+      s->cells_top[k].gcount = 0;
+      s->cells_top[k].init = NULL;
+      s->cells_top[k].extra_ghost = NULL;
+      s->cells_top[k].ghost = NULL;
+      s->cells_top[k].kick = NULL;
+      s->cells_top[k].super = &s->cells_top[k];
+      s->cells_top[k].gsuper = &s->cells_top[k];
     }
     s->maxdepth = 0;
   }
@@ -383,7 +408,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
  * @param verbose Print messages to stdout or not
  *
  */
-
 void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   const ticks tic = getticks();
@@ -396,7 +420,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   size_t nr_parts = s->nr_parts;
   size_t nr_gparts = s->nr_gparts;
-  struct cell *restrict cells = s->cells;
+  struct cell *restrict cells_top = s->cells_top;
   const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
 
   const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
@@ -418,7 +442,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
         p->x[j] -= dim[j];
     ind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[ind[k]].count++;
+    cells_top[ind[k]].count++;
   }
   // message( "getting particle indices took %.3f %s." ,
   // clocks_from_ticks(getticks() - tic), clocks_getunit()):
@@ -438,9 +462,9 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
         gp->x[j] -= dim[j];
     gind[k] =
         cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
-    cells[gind[k]].gcount++;
+    cells_top[gind[k]].gcount++;
   }
-// message( "getting particle indices took %.3f %s." ,
+// message( "getting g-particle indices took %.3f %s." ,
 // clocks_from_ticks(getticks() - tic), clocks_getunit());
 
 #ifdef WITH_MPI
@@ -448,8 +472,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   /* Move non-local parts to the end of the list. */
   const int local_nodeID = s->e->nodeID;
   for (size_t k = 0; k < nr_parts;) {
-    if (cells[ind[k]].nodeID != local_nodeID) {
-      cells[ind[k]].count -= 1;
+    if (cells_top[ind[k]].nodeID != local_nodeID) {
+      cells_top[ind[k]].count -= 1;
       nr_parts -= 1;
       const struct part tp = s->parts[k];
       s->parts[k] = s->parts[nr_parts];
@@ -475,12 +499,12 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that all parts are in the correct places. */
   for (size_t k = 0; k < nr_parts; k++) {
-    if (cells[ind[k]].nodeID != local_nodeID) {
+    if (cells_top[ind[k]].nodeID != local_nodeID) {
       error("Failed to move all non-local parts to send list");
     }
   }
   for (size_t k = nr_parts; k < s->nr_parts; k++) {
-    if (cells[ind[k]].nodeID == local_nodeID) {
+    if (cells_top[ind[k]].nodeID == local_nodeID) {
       error("Failed to remove local parts from send list");
     }
   }
@@ -488,8 +512,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   /* Move non-local gparts to the end of the list. */
   for (size_t k = 0; k < nr_gparts;) {
-    if (cells[gind[k]].nodeID != local_nodeID) {
-      cells[gind[k]].gcount -= 1;
+    if (cells_top[gind[k]].nodeID != local_nodeID) {
+      cells_top[gind[k]].gcount -= 1;
       nr_gparts -= 1;
       const struct gpart tp = s->gparts[k];
       s->gparts[k] = s->gparts[nr_gparts];
@@ -513,12 +537,12 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that all gparts are in the correct place (untested). */
   for (size_t k = 0; k < nr_gparts; k++) {
-    if (cells[gind[k]].nodeID != local_nodeID) {
+    if (cells_top[gind[k]].nodeID != local_nodeID) {
       error("Failed to move all non-local gparts to send list");
     }
   }
   for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
-    if (cells[gind[k]].nodeID == local_nodeID) {
+    if (cells_top[gind[k]].nodeID == local_nodeID) {
       error("Failed to remove local gparts from send list");
     }
   }
@@ -550,11 +574,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
     const struct part *const p = &s->parts[k];
     ind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[ind[k]].count += 1;
+    cells_top[ind[k]].count += 1;
 #ifdef SWIFT_DEBUG_CHECKS
-    if (cells[ind[k]].nodeID != local_nodeID)
+    if (cells_top[ind[k]].nodeID != local_nodeID)
       error("Received part that does not belong to me (nodeID=%i).",
-            cells[ind[k]].nodeID);
+            cells_top[ind[k]].nodeID);
 #endif
   }
   nr_parts = s->nr_parts;
@@ -565,7 +589,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose);
 
   /* Re-link the gparts. */
-  part_relink_gparts(s->parts, nr_parts, 0);
+  if (nr_parts > 0 && nr_gparts > 0) part_relink_gparts(s->parts, nr_parts, 0);
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify space_sort_struct. */
@@ -600,20 +624,24 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
     const struct gpart *const p = &s->gparts[k];
     gind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[gind[k]].gcount += 1;
-    /* if ( cells[ ind[k] ].nodeID != nodeID )
-        error( "Received part that does not belong to me (nodeID=%i)." , cells[
-       ind[k] ].nodeID ); */
+    cells_top[gind[k]].gcount += 1;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (cells_top[ind[k]].nodeID != s->e->nodeID)
+      error("Received part that does not belong to me (nodeID=%i).",
+            cells_top[ind[k]].nodeID);
+#endif
   }
   nr_gparts = s->nr_gparts;
 
 #endif
 
-  /* Sort the parts according to their cells. */
+  /* Sort the gparts according to their cells. */
   space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose);
 
   /* Re-link the parts. */
-  part_relink_parts(s->gparts, nr_gparts, s->parts);
+  if (nr_parts > 0 && nr_gparts > 0)
+    part_relink_parts(s->gparts, nr_gparts, s->parts);
 
   /* We no longer need the indices as of here. */
   free(gind);
@@ -636,7 +664,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   for (size_t k = 0; k < nr_parts; ++k) {
 
     if (s->parts[k].gpart != NULL &&
-        s->parts[k].gpart->id_or_neg_offset != -k) {
+        s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
       error("Linking problem !");
     }
   }
@@ -648,7 +676,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   struct xpart *xfinger = s->xparts;
   struct gpart *gfinger = s->gparts;
   for (int k = 0; k < s->nr_cells; k++) {
-    struct cell *restrict c = &cells[k];
+    struct cell *restrict c = &cells_top[k];
     c->ti_old = ti_current;
     c->parts = finger;
     c->xparts = xfinger;
@@ -662,7 +690,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   /* At this point, we have the upper-level cells, old or new. Now make
      sure that the parts in each cell are ok. */
-  space_split(s, cells, verbose);
+  space_split(s, cells_top, verbose);
 
   if (verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
@@ -672,6 +700,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 /**
  * @brief Split particles between cells of a hierarchy
  *
+ * This is done in parallel using threads in the #threadpool.
+ *
  * @param s The #space.
  * @param cells The cell hierarchy
  * @param verbose Are we talkative ?
@@ -690,7 +720,7 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
 
 /**
  * @brief Sort the particles and condensed particles according to the given
- *indices.
+ * indices.
  *
  * @param s The #space.
  * @param ind The indices with respect to which the parts are sorted.
@@ -699,7 +729,6 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
  * @param max highest index.
  * @param verbose Are we talkative ?
  */
-
 void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
                       int verbose) {
 
@@ -734,9 +763,9 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify space_sort_struct. */
-  for (int i = 1; i < N; i++)
+  for (size_t i = 1; i < N; i++)
     if (ind[i - 1] > ind[i])
-      error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1,
+      error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
             ind[i - 1], i, ind[i], min, max);
   message("Sorting succeeded.");
 #endif
@@ -876,8 +905,7 @@ void space_parts_sort_mapper(void *map_data, int num_elements,
 }
 
 /**
- * @brief Sort the g-particles and condensed particles according to the given
- *indices.
+ * @brief Sort the g-particles according to the given indices.
  *
  * @param s The #space.
  * @param ind The indices with respect to which the gparts are sorted.
@@ -919,9 +947,9 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify space_sort_struct. */
-  for (int i = 1; i < N; i++)
+  for (size_t i = 1; i < N; i++)
     if (ind[i - 1] > ind[i])
-      error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1,
+      error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
             ind[i - 1], i, ind[i], min, max);
   message("Sorting succeeded.");
 #endif
@@ -1059,7 +1087,6 @@ void space_gparts_sort_mapper(void *map_data, int num_elements,
 /**
  * @brief Mapping function to free the sorted indices buffers.
  */
-
 void space_map_clearsort(struct cell *c, void *data) {
 
   if (c->sort != NULL) {
@@ -1075,21 +1102,17 @@ void space_map_clearsort(struct cell *c, void *data) {
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 static void rec_map_parts(struct cell *c,
                           void (*fun)(struct part *p, struct cell *c,
                                       void *data),
                           void *data) {
-
-  int k;
-
   /* No progeny? */
   if (!c->split)
-    for (k = 0; k < c->count; k++) fun(&c->parts[k], c, data);
+    for (int k = 0; k < c->count; k++) fun(&c->parts[k], c, data);
 
   /* Otherwise, recurse. */
   else
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) rec_map_parts(c->progeny[k], fun, data);
 }
 
@@ -1100,16 +1123,13 @@ static void rec_map_parts(struct cell *c,
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 void space_map_parts(struct space *s,
                      void (*fun)(struct part *p, struct cell *c, void *data),
                      void *data) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_parts(&s->cells[cid], fun, data);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_parts(&s->cells_top[cid], fun, data);
 }
 
 /**
@@ -1118,20 +1138,17 @@ void space_map_parts(struct space *s,
  * @param c The #cell we are working in.
  * @param fun Function pointer to apply on the cells.
  */
-
 static void rec_map_parts_xparts(struct cell *c,
                                  void (*fun)(struct part *p, struct xpart *xp,
                                              struct cell *c)) {
 
-  int k;
-
   /* No progeny? */
   if (!c->split)
-    for (k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c);
+    for (int k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c);
 
   /* Otherwise, recurse. */
   else
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) rec_map_parts_xparts(c->progeny[k], fun);
 }
 
@@ -1141,16 +1158,13 @@ static void rec_map_parts_xparts(struct cell *c,
  * @param s The #space we are working in.
  * @param fun Function pointer to apply on the particles in the cells.
  */
-
 void space_map_parts_xparts(struct space *s,
                             void (*fun)(struct part *p, struct xpart *xp,
                                         struct cell *c)) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_parts_xparts(&s->cells[cid], fun);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_parts_xparts(&s->cells_top[cid], fun);
 }
 
 /**
@@ -1161,16 +1175,12 @@ void space_map_parts_xparts(struct space *s,
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 static void rec_map_cells_post(struct cell *c, int full,
                                void (*fun)(struct cell *c, void *data),
                                void *data) {
-
-  int k;
-
   /* Recurse. */
   if (c->split)
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
         rec_map_cells_post(c->progeny[k], full, fun, data);
 
@@ -1186,29 +1196,24 @@ static void rec_map_cells_post(struct cell *c, int full,
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 void space_map_cells_post(struct space *s, int full,
                           void (*fun)(struct cell *c, void *data), void *data) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_cells_post(&s->cells[cid], full, fun, data);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_cells_post(&s->cells_top[cid], full, fun, data);
 }
 
 static void rec_map_cells_pre(struct cell *c, int full,
                               void (*fun)(struct cell *c, void *data),
                               void *data) {
 
-  int k;
-
   /* No progeny? */
   if (full || !c->split) fun(c, data);
 
   /* Recurse. */
   if (c->split)
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
         rec_map_cells_pre(c->progeny[k], full, fun, data);
 }
@@ -1224,28 +1229,29 @@ static void rec_map_cells_pre(struct cell *c, int full,
 void space_map_cells_pre(struct space *s, int full,
                          void (*fun)(struct cell *c, void *data), void *data) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_cells_pre(&s->cells[cid], full, fun, data);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_cells_pre(&s->cells_top[cid], full, fun, data);
 }
 
 /**
  * @brief #threadpool mapper function to split cells if they contain
  *        too many particles.
+ *
+ * @param map_data Pointer towards the top-cells.
+ * @param num_elements The number of cells to treat.
+ * @param extra_data Pointers to the #space.
  */
-
 void space_split_mapper(void *map_data, int num_elements, void *extra_data) {
 
   /* Unpack the inputs. */
   struct space *s = (struct space *)extra_data;
-  struct cell *cells = (struct cell *)map_data;
+  struct cell *restrict cells_top = (struct cell *)map_data;
   struct engine *e = s->e;
 
   for (int ind = 0; ind < num_elements; ind++) {
 
-    struct cell *c = &cells[ind];
+    struct cell *c = &cells_top[ind];
 
     const int count = c->count;
     const int gcount = c->gcount;
@@ -1290,6 +1296,8 @@ void space_split_mapper(void *map_data, int num_elements, void *extra_data) {
         temp->dx_max = 0.f;
         temp->nodeID = c->nodeID;
         temp->parent = c;
+        temp->super = NULL;
+        temp->gsuper = NULL;
         c->progeny[k] = temp;
       }
 
@@ -1303,7 +1311,7 @@ void space_split_mapper(void *map_data, int num_elements, void *extra_data) {
           c->progeny[k] = NULL;
         } else {
           space_split_mapper(c->progeny[k], 1, s);
-          h_max = fmaxf(h_max, c->progeny[k]->h_max);
+          h_max = max(h_max, c->progeny[k]->h_max);
           ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
           ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
           if (c->progeny[k]->maxdepth > maxdepth)
@@ -1363,12 +1371,11 @@ void space_split_mapper(void *map_data, int num_elements, void *extra_data) {
 }
 
 /**
- * @brief Return a used cell to the cell buffer.
+ * @brief Return a used cell to the buffer od unused sub-cells.
  *
  * @param s The #space.
  * @param c The #cell.
  */
-
 void space_recycle(struct space *s, struct cell *c) {
 
   /* Lock the space. */
@@ -1384,8 +1391,8 @@ void space_recycle(struct space *s, struct cell *c) {
   bzero(c, sizeof(struct cell));
 
   /* Hook this cell into the buffer. */
-  c->next = s->cells_new;
-  s->cells_new = c;
+  c->next = s->cells_sub;
+  s->cells_sub = c;
   s->tot_cells -= 1;
 
   /* Unlock the space. */
@@ -1393,39 +1400,42 @@ void space_recycle(struct space *s, struct cell *c) {
 }
 
 /**
- * @brief Get a new empty cell.
+ * @brief Get a new empty (sub-)#cell.
+ *
+ * If there are cells in the buffer, use the one at the end of the linked list.
+ * If we have no cells, allocate a new chunk of memory and pick one from there.
  *
  * @param s The #space.
  */
-
 struct cell *space_getcell(struct space *s) {
 
-  struct cell *c;
-  int k;
-
   /* Lock the space. */
   lock_lock(&s->lock);
 
   /* Is the buffer empty? */
-  if (s->cells_new == NULL) {
-    if (posix_memalign((void *)&s->cells_new, cell_align,
+  if (s->cells_sub == NULL) {
+    if (posix_memalign((void *)&s->cells_sub, cell_align,
                        space_cellallocchunk * sizeof(struct cell)) != 0)
       error("Failed to allocate more cells.");
-    bzero(s->cells_new, space_cellallocchunk * sizeof(struct cell));
-    for (k = 0; k < space_cellallocchunk - 1; k++)
-      s->cells_new[k].next = &s->cells_new[k + 1];
-    s->cells_new[space_cellallocchunk - 1].next = NULL;
+
+    /* Zero everything for good measure */
+    bzero(s->cells_sub, space_cellallocchunk * sizeof(struct cell));
+
+    /* Constructed a linked list */
+    for (int k = 0; k < space_cellallocchunk - 1; k++)
+      s->cells_sub[k].next = &s->cells_sub[k + 1];
+    s->cells_sub[space_cellallocchunk - 1].next = NULL;
   }
 
   /* Pick off the next cell. */
-  c = s->cells_new;
-  s->cells_new = c->next;
+  struct cell *c = s->cells_sub;
+  s->cells_sub = c->next;
   s->tot_cells += 1;
 
   /* Unlock the space. */
   lock_unlock_blind(&s->lock);
 
-  /* Init some things in the cell. */
+  /* Init some things in the cell we just got. */
   bzero(c, sizeof(struct cell));
   c->nodeID = -1;
   if (lock_init(&c->lock) != 0 || lock_init(&c->glock) != 0)
@@ -1507,7 +1517,6 @@ void space_init_gparts(struct space *s) {
  * parts with a cutoff below half the cell width are then split
  * recursively.
  */
-
 void space_init(struct space *s, const struct swift_params *params,
                 double dim[3], struct part *parts, struct gpart *gparts,
                 size_t Npart, size_t Ngpart, int periodic, int gravity,
@@ -1530,7 +1539,6 @@ void space_init(struct space *s, const struct swift_params *params,
   s->gparts = gparts;
   s->cell_min = parser_get_param_double(params, "SPH:max_smoothing_length");
   s->nr_queues = 1; /* Temporary value until engine construction */
-  s->size_parts_foreign = 0;
 
   /* Get the constants for the scheduler */
   space_maxsize = parser_get_opt_param_int(params, "Scheduler:cell_max_size",
@@ -1647,8 +1655,8 @@ void space_link_cleanup(struct space *s) {
  */
 void space_clean(struct space *s) {
 
-  for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells[i]);
-  free(s->cells);
+  for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells_top[i]);
+  free(s->cells_top);
   free(s->parts);
   free(s->xparts);
   free(s->gparts);
diff --git a/src/space.h b/src/space.h
index 90313be8dbe817d65fbd0e6a8c30c156747594b1..66d82c6f78a08447851a8dfdaea1231e5778693b 100644
--- a/src/space.h
+++ b/src/space.h
@@ -37,7 +37,6 @@
 #include "space.h"
 
 /* Some constants. */
-#define space_maxdepth 10
 #define space_cellallocchunk 1000
 #define space_splitsize_default 400
 #define space_maxsize_default 8000000
@@ -53,84 +52,85 @@ extern int space_subsize;
 /* Map shift vector to sortlist. */
 extern const int sortlistID[27];
 
-/* Entry in a list of sorted indices. */
-struct entry {
-  float d;
-  int i;
-};
-
-/* The space in which the cells reside. */
+/**
+ * @brief The space in which the cells and particles reside.
+ */
 struct space {
 
-  /* Spatial extent. */
+  /*! Spatial extent. */
   double dim[3];
 
-  /* Cell widths. */
-  double width[3], iwidth[3];
+  /*! Is the space periodic? */
+  int periodic;
+
+  /*! Are we doing gravity? */
+  int gravity;
+
+  /*! Width of the top-level cells. */
+  double width[3];
+
+  /*! Inverse of the top-level cell width */
+  double iwidth[3];
 
-  /* The minimum cell width. */
+  /*! The minimum top-level cell width allowed. */
   double cell_min;
 
-  /* Current maximum displacement for particles. */
+  /*! Current maximum displacement for particles. */
   float dx_max;
 
-  /* Number of cells. */
-  int nr_cells, tot_cells;
+  /*! Space dimensions in number of top-cells. */
+  int cdim[3];
 
-  /* Space dimensions in number of cells. */
-  int maxdepth, cdim[3];
+  /*! Maximal depth reached by the tree */
+  int maxdepth;
 
-  /* The (level 0) cells themselves. */
-  struct cell *cells;
+  /*! Number of top-level cells. */
+  int nr_cells;
 
-  /* Buffer of unused cells. */
-  struct cell *cells_new;
+  /*! Total number of cells (top- and sub-) */
+  int tot_cells;
 
-  /* The particle data (cells have pointers to this). */
-  struct part *parts;
-  struct xpart *xparts;
-  struct gpart *gparts;
+  /*! The (level 0) cells themselves. */
+  struct cell *cells_top;
 
-  /* The total number of parts in the space. */
+  /*! Buffer of unused cells for the sub-cells. */
+  struct cell *cells_sub;
+
+  /*! The total number of parts in the space. */
   size_t nr_parts, size_parts;
+
+  /*! The total number of g-parts in the space. */
   size_t nr_gparts, size_gparts;
 
-  /* Is the space periodic? */
-  int periodic;
+  /*! The particle data (cells have pointers to this). */
+  struct part *parts;
 
-  /* Are we doing gravity? */
-  int gravity;
+  /*! The extended particle data (cells have pointers to this). */
+  struct xpart *xparts;
 
-  /* General-purpose lock for this space. */
+  /*! The g-particle data (cells have pointers to this). */
+  struct gpart *gparts;
+
+  /*! General-purpose lock for this space. */
   swift_lock_type lock;
 
-  /* Number of queues in the system. */
+  /*! Number of queues in the system. */
   int nr_queues;
 
-  /* The associated engine. */
+  /*! The associated engine. */
   struct engine *e;
 
-  /* Buffers for parts that we will receive from foreign cells. */
+#ifdef WITH_MPI
+
+  /*! Buffers for parts that we will receive from foreign cells. */
   struct part *parts_foreign;
   size_t nr_parts_foreign, size_parts_foreign;
+
+  /*! Buffers for g-parts that we will receive from foreign cells. */
   struct gpart *gparts_foreign;
   size_t nr_gparts_foreign, size_gparts_foreign;
-};
 
-/* Interval stack necessary for parallel particle sorting. */
-struct qstack {
-  volatile ptrdiff_t i, j;
-  volatile int min, max;
-  volatile int ready;
-};
-struct parallel_sort {
-  struct part *parts;
-  struct gpart *gparts;
-  struct xpart *xparts;
-  int *ind;
-  struct qstack *stack;
-  unsigned int stack_size;
-  volatile unsigned int first, last, waiting;
+#endif
 };
 
 /* function prototypes. */
diff --git a/src/task.h b/src/task.h
index 0b28dba5fd6fac6929ee492b29cdd64ed2073dcf..f070451fe4e79e0c16dc3dcca1ce145c08841c47 100644
--- a/src/task.h
+++ b/src/task.h
@@ -29,6 +29,8 @@
 #include "cell.h"
 #include "cycle.h"
 
+#define task_align 128
+
 /**
  * @brief The different task types.
  */
@@ -53,7 +55,7 @@ enum task_types {
   task_type_grav_external,
   task_type_cooling,
   task_type_count
-};
+} __attribute__((packed));
 
 /**
  * @brief The different task sub-types (for pairs, selfs and sub-tasks).
@@ -66,7 +68,7 @@ enum task_subtypes {
   task_subtype_grav,
   task_subtype_tend,
   task_subtype_count
-};
+} __attribute__((packed));
 
 /**
  * @brief The type of particles/objects this task acts upon in a given cell.
@@ -95,48 +97,48 @@ extern const char *subtaskID_names[];
  */
 struct task {
 
-  /*! Type of the task */
-  enum task_types type;
+  /*! Pointers to the cells this task acts upon */
+  struct cell *ci, *cj;
 
-  /*! Sub-type of the task (for the tasks that have one */
-  enum task_subtypes subtype;
+  /*! List of tasks unlocked by this one */
+  struct task **unlock_tasks;
+
+  /*! Start and end time of this task */
+  ticks tic, toc;
+
+#ifdef WITH_MPI
+
+  /*! Buffer for this task's communications */
+  void *buff;
+
+  /*! MPI request corresponding to this task */
+  MPI_Request req;
+
+#endif
 
   /*! Flags used to carry additional information (e.g. sort directions) */
   int flags;
 
-  /*! Number of unsatisfied dependencies */
-  int wait;
-
   /*! Rank of a task in the order */
   int rank;
 
   /*! Weight of the task */
   int weight;
 
-  /*! Pointers to the cells this task acts upon */
-  struct cell *ci, *cj;
-
   /*! ID of the queue or runner owning this task */
-  int rid;
+  short int rid;
 
   /*! Number of tasks unlocked by this one */
-  int nr_unlock_tasks;
-
-  /*! List of tasks unlocked by this one */
-  struct task **unlock_tasks;
-
-#ifdef WITH_MPI
-
-  /*! Buffer for this task's communications */
-  void *buff;
+  short int nr_unlock_tasks;
 
-  /*! MPI request corresponding to this task */
-  MPI_Request req;
+  /*! Number of unsatisfied dependencies */
+  short int wait;
 
-#endif
+  /*! Type of the task */
+  enum task_types type;
 
-  /*! Start and end time of this task */
-  ticks tic, toc;
+  /*! Sub-type of the task (for the tasks that have one */
+  enum task_subtypes subtype;
 
   /*! Should the scheduler skip this task ? */
   char skip;
@@ -146,7 +148,8 @@ struct task {
 
   /*! Is this task implicit (i.e. does not do anything) ? */
   char implicit;
-};
+
+} __attribute__((aligned(32)));
 
 /* Function prototypes. */
 void task_unlock(struct task *t);
diff --git a/tests/test125cells.c b/tests/test125cells.c
index c7e01693b45f76fa21ffd397289fc06ad36af03d..a385a7c890fe27ed11d3c5d87d6903fa6d254516 100644
--- a/tests/test125cells.c
+++ b/tests/test125cells.c
@@ -564,7 +564,7 @@ int main(int argc, char *argv[]) {
 
   /* Start the test */
   ticks time = 0;
-  for (size_t i = 0; i < runs; ++i) {
+  for (size_t n = 0; n < runs; ++n) {
 
     const ticks tic = getticks();
 
@@ -642,7 +642,7 @@ int main(int argc, char *argv[]) {
     time += toc - tic;
 
     /* Dump if necessary */
-    if (i == 0) {
+    if (n == 0) {
       sprintf(outputFileName, "swift_dopair_125_%s.dat",
               outputFileNameExtension);
       dump_particle_fields(outputFileName, main_cell, solution, 0);
diff --git a/tests/testInteractions.c b/tests/testInteractions.c
index 9d8dca79f3f84ea06e42d61390e68467a5f1b415..52ad0c54258848883a9025bbcd9d68133eddc4b9 100644
--- a/tests/testInteractions.c
+++ b/tests/testInteractions.c
@@ -173,7 +173,7 @@ void test_interactions(struct part *parts, int count,
 
   /* Dump state of particles before serial interaction. */
   dump_indv_particle_fields(serial_filename, &pi);
-  for (size_t i = 1; i < count; i++)
+  for (int i = 1; i < count; i++)
     dump_indv_particle_fields(serial_filename, &parts[i]);
 
   /* Make copy of pi to be used in vectorised version. */
@@ -206,7 +206,7 @@ void test_interactions(struct part *parts, int count,
 
   /* Dump result of serial interaction. */
   dump_indv_particle_fields(serial_filename, &pi);
-  for (size_t i = 1; i < count; i++)
+  for (int i = 1; i < count; i++)
     dump_indv_particle_fields(serial_filename, &parts[i]);
 
   /* Setup arrays for vector interaction. */