diff --git a/AUTHORS b/AUTHORS
index 4a841f813ede6727a53861037be7fbb8aed021b2..32597ffd146795656286b9528656ab013c519c30 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,2 +1,5 @@
-Pedro Gonnet: Main author.
-Matthieu Schaller: TBA.
+Pedro Gonnet            gonnet@google.com	
+Matthieu Schaller       matthieu.schaller@durham.ac.uk
+Aidan Chalk             aidan.chalk@durham.ac.uk
+Peter W. Draper         p.w.draper@durham.ac.uk
+Bert Vandenbrouck       bert.vandenbroucke@gmail.com
diff --git a/INSTALL.swift b/INSTALL.swift
index 758a5a517fb796cdbde4dd89afabef0403355cdc..33c14049b640bee7e1d7be4aaca435dc18b7fda0 100644
--- a/INSTALL.swift
+++ b/INSTALL.swift
@@ -58,10 +58,36 @@ for instance. GCC address sanitizer flags can be included using the
 
 option. Note this requires a GCC compiler version of at least 4.8.
 
-Dependencies: needs to be filled in...
+SWIFT currently requires a compiler with OpenMP support.
 
 
-    
+                                 Dependencies
+                                 ============
 
+SWIFT depends on a number of thirdparty libraries that should be available
+before you can build it.
 
 
+HDF5: a HDF5 library is required to read and write particle data. One of the
+commands "h5cc" or "h5pcc" should be available. If "h5pcc" is located them a
+parallel HDF5 built for the version of MPI located should be provided. If
+the command is not available then it can be located using the "--with-hfd5"
+configure option. The value should be the full path to the "h5cc" or "h5pcc"
+commands.
+
+
+MPI: an optional MPI library that fully supports MPI_THREAD_MULTIPLE.  
+Before running configure the "mpirun" command should be available in the
+shell. If your command isn't called "mpirun" then define the "MPIRUN"
+environment variable, either in the shell or when running configure.
+
+
+METIS: a build of the METIS library can be optionally used to optimize the
+load between MPI nodes (requires an MPI library). This should be found in the
+standard installation directories, or pointed at using the "--with-metis"
+configuration option.  In this case the top-level installation directory of
+the METIS build should be given. Note to use METIS you should at least supply
+"--with-metis".
+
+
+DOXYGEN: the doxygen library is required to create the SWIFT API documentation.
diff --git a/README b/README
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..279ee92eab9774e8e6a10d24283b79dfceb65bdb 100644
--- a/README
+++ b/README
@@ -0,0 +1,3 @@
+This is SWIFT. See INSTALL.swift for instructions.
+
+www.swiftsim.com
diff --git a/configure.ac b/configure.ac
index 4cfda7aaac6609dcee6f154d0625b57bade744b1..8ff520fd42861b2b7267ed80973bd765a14fb13b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -57,15 +57,21 @@ AC_ARG_ENABLE([mpi],
     [enable_mpi="$enableval"],
     [enable_mpi="yes"]
 )
+good_mpi="yes"
 if test "$enable_mpi" = "yes"; then
     AX_MPI([CC="$MPICC" AC_DEFINE(HAVE_MPI, 1, [Define if you have the MPI library.]) ])
 
     # Various MPI implementations require additional libraries when also using
     # threads. Use mpirun (on PATH) as that seems to be only command with
-    # version flag.
-    AC_PATH_PROG([MPIRUN],[mpirun],[notfound])
+    # version flag, allow MPIRUN to override for systems that insist on
+    # a non-standard name (PRACE).
+    : ${MPIRUN='mpirun'}
+    if test "$MPIRUN" = "mpirun"; then
+       AC_PATH_PROG([MPIRUN],[mpirun],[notfound])
+    fi
     if test "$MPIRUN" = "notfound"; then
        AC_MSG_WARN([Cannot find mpirun command on PATH, thread support may not be correct])
+       enable_mpi="no"
     else
        # Special options we know about.
        # Intel: -mt_mpi
@@ -86,6 +92,15 @@ if test "$enable_mpi" = "yes"; then
          *"Open MPI"*)
             MPI_THREAD_LIBS=""
             AC_MSG_RESULT([Open MPI])
+            #  OpenMPI should be 1.8.6 or later, if not complain.
+            #  Version is last word on first line of -version output.
+            revision=`mpirun -version 2>&1 | grep "Open MPI" | awk '{print $NF}'`
+            AX_COMPARE_VERSION( $revision, [ge], [1.8.6],,[good_mpi="no"] )
+            if test "$good_mpi" = "no"; then
+                AC_MSG_WARN([
+    Open MPI version should be at least 1.8.6 (is $revision)])
+                enable_mpi="yes (but with warning)"
+            fi
          ;;
          *)
             MPI_THREAD_LIBS=""
@@ -95,7 +110,10 @@ if test "$enable_mpi" = "yes"; then
        AC_SUBST([MPI_THREAD_LIBS])
     fi
 fi
-AM_CONDITIONAL([HAVEMPI],[test -n "$MPICC"])
+AM_CONDITIONAL([HAVEMPI],[test $enable_mpi = "yes"])
+
+# Indicate that MPIRUN can be modified by an environement variable
+AC_ARG_VAR(MPIRUN, Path to the mpirun command if non-standard)
 
 # Need C99 and inline support.
 AC_PROG_CC_C99
@@ -170,39 +188,31 @@ AX_PTHREAD([LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
     or use CPPFLAGS and LDFLAGS if the library is installed in a
     non-standard location.]))
 
-# Check for OpenMP.
-AC_OPENMP
-AC_SUBST(OPENMP_CFLAGS)
-enable_openmp="no"
-if test -z "$OPENMP_CFLAGS"; then
-   echo $OPENMP_CFLAGS
-   AC_MSG_ERROR(Compiler does not support OpenMP, 1)
-else
-   CFLAGS="$CFLAGS $OPENMP_CFLAGS"
-   enable_openmp="yes"
-fi
 
 # Check for metis. Note AX_LIB_METIS exists, but cannot be configured
 # to be default off (i.e. given no option it tries to locate METIS), so we
 # don't use that.
 AC_ARG_WITH([metis],
     [AS_HELP_STRING([--with-metis=PATH],
-       [prefix where the metis library is installed @<:@default=yes@:>@]
+       [root directory where metis is installed @<:@default=yes@:>@]
     )],
     [],
     [with_metis="no"]
 )
 if test "x$with_metis" != "xno"; then
    if test "x$with_metis" != "xyes" -a "x$with_metis" != "x"; then
-      METIS_LIBS="-L$with_metis -lmetis"
+      METIS_LIBS="-L$with_metis/lib -lmetis"
+      METIS_INCS="-I$with_metis/include"
    else
       METIS_LIBS="-lmetis"
+      METIS_INCS=""
    fi
    AC_CHECK_LIB([metis],[METIS_PartGraphKway],
       AC_DEFINE([HAVE_METIS],1,[The metis library appears to be present.]),
       AC_MSG_ERROR(something is wrong with the metis library!),$METIS_LIBS)
 fi
 AC_SUBST([METIS_LIBS])
+AC_SUBST([METIS_INCS])
 AM_CONDITIONAL([HAVEMETIS],[test -n "$METIS_LIBS"])
 
 # Check for zlib.
@@ -219,7 +229,7 @@ AX_LIB_HDF5
 # The default is to use MPI support if it is available, i.e. this is
 # a parallel HDF5.
 # To do this need to ask the HDF5 compiler about its configuration,
-# -showconfig should have yes/no. 
+# -showconfig should have yes/no.
 have_parallel_hdf5="no"
 if test "$with_hdf5" = "yes"; then
     AC_ARG_ENABLE([parallel-hdf5],
@@ -284,7 +294,6 @@ AC_MSG_RESULT([
    MPI enabled: $enable_mpi
    HDF5 enabled: $with_hdf5
        parallel: $have_parallel_hdf5
-   OpenMP enabled: $enable_openmp
    Metis enabled: $with_metis
 ])
 
diff --git a/examples/Makefile.am b/examples/Makefile.am
index eff15afef3ed0597f95c38f5160178d9868abe48..0ae3cbba9ded84e94b4cbe583344064b391d1c4a 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -20,13 +20,13 @@
 MYFLAGS = -DTIMER
 
 # Add the source directory and debug to CFLAGS
-AM_CFLAGS = -Wall -Werror -I../src -DCPU_TPS=2.67e9
+AM_CFLAGS = -Wall -Werror -I../src -DCPU_TPS=2.67e9 $(HDF5_CPPFLAGS)
 
 AM_LDFLAGS =
 
-METIS_LIBS = @METIS_LIBS@
 MPI_THREAD_LIBS = @MPI_THREAD_LIBS@
 MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
+MPI_FLAGS = -DWITH_MPI $(METIS_INCS)
 
 # Set-up the library
 bin_PROGRAMS = test test_fixdt test_mindt test_single
@@ -53,17 +53,17 @@ test_mindt_LDADD =  ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS)
 
 # Sources for test_mpi
 test_mpi_SOURCES = test.c
-test_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DWITH_MPI -DENGINE_POLICY="engine_policy_multistep | engine_policy_keep"
+test_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_multistep | engine_policy_keep"
 test_mpi_LDADD =  ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS)
 
 # Sources for test_fixdt_mpi
 test_fixdt_mpi_SOURCES = test.c
-test_fixdt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DWITH_MPI -DENGINE_POLICY="engine_policy_fixdt | engine_policy_keep"
+test_fixdt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_fixdt | engine_policy_keep"
 test_fixdt_mpi_LDADD =  ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS)
 
 # Sources for test_mindt_mpi
 test_mindt_mpi_SOURCES = test.c
-test_mindt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DWITH_MPI -DENGINE_POLICY="engine_policy_keep"
+test_mindt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_keep"
 test_mindt_mpi_LDADD =  ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS)
 
 # Sources for test_single
diff --git a/examples/test.c b/examples/test.c
index f6d5d42bbb4eb6aa3461392adec877867bb372d9..188b20c4118bf6aec607566ff9859803892b9d9a 100644
--- a/examples/test.c
+++ b/examples/test.c
@@ -1,7 +1,8 @@
 /*******************************************************************************
  * This file is part of SWIFT.
- * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
  *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
  * 
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
@@ -31,7 +32,6 @@
 #include <float.h>
 #include <limits.h>
 #include <fenv.h>
-#include <omp.h>
 
 /* Conditional headers. */
 #ifdef HAVE_LIBZ
@@ -270,7 +270,6 @@ void pairs_n2 ( double *dim , struct part *__restrict__ parts , int N , int peri
     double rho_max = 0.0, rho_min = 100;
     
     /* Loop over all particle pairs. */
-    #pragma omp parallel for schedule(dynamic), default(none), private(k,i,dx,r2), shared(periodic,parts,dim,N,stdout)
     for ( j = 0 ; j < N ; j++ ) {
         if ( j % 1000 == 0 ) {
             printf( "pairs_n2: j=%i.\n" , j );
@@ -290,13 +289,11 @@ void pairs_n2 ( double *dim , struct part *__restrict__ parts , int N , int peri
             if ( r2 < parts[j].h*parts[j].h || r2 < parts[k].h*parts[k].h ) {
                 runner_iact_density( r2 , NULL , parts[j].h , parts[k].h , &parts[j] , &parts[k] );
                 /* if ( parts[j].h / parts[k].h > maxratio )
-                    #pragma omp critical
                     {
                     maxratio = parts[j].h / parts[k].h;
                     mj = j; mk = k;
                     }
                 else if ( parts[k].h / parts[j].h > maxratio )
-                    #pragma omp critical
                     {
                     maxratio = parts[k].h / parts[j].h;
                     mj = j; mk = k;
@@ -524,6 +521,23 @@ void density_dump ( int N ) {
 
     }
 
+/**
+ *  Factorize a given integer, attempts to keep larger pair of factors.
+ */
+void factor( int value, int *f1, int *f2 ) {
+    int j;
+    int i;
+
+    j = (int) sqrt( value );
+    for ( i = j; i > 0; i-- ) {
+        if ( ( value % i ) == 0 ) {
+            *f1 = i;
+            *f2 = value / i;
+            break;
+            }
+        }
+    }
+
 
 /**
  * @brief Main routine that loads a few particles and generates some output.
@@ -570,14 +584,21 @@ int main ( int argc , char *argv[] ) {
     if ( myrank == 0 )
         message( "MPI is up and running with %i nodes." , nr_nodes );
     fflush(stdout);
+
+    /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */
+    factor( nr_nodes, &grid[0], &grid[1] );
+    factor( nr_nodes / grid[1], &grid[0], &grid[2] );
+    factor( grid[0] * grid[1], &grid[1], &grid[0] );
 #endif
 
     /* Greeting message */
-    message( "This is %s\n", package_description() );
+    if ( myrank == 0 )
+        message( "This is %s\n", package_description() );
     
     /* Init the space. */
     bzero( &s , sizeof(struct space) );
 
+
     /* Parse the options */
     while ( ( c = getopt( argc , argv  , "a:c:d:f:g:m:q:r:s:t:w:z:" ) ) != -1 )
       switch( c )
@@ -609,8 +630,6 @@ int main ( int argc , char *argv[] ) {
 	case 'g':
 	  if ( sscanf( optarg , "%i %i %i" , &grid[0] , &grid[1] , &grid[2] ) != 3 )
 	    error( "Error parsing grid." );
-	  if ( myrank == 0 )
-        message( "grid set to [ %i %i %i ]." , grid[0] , grid[1] , grid[2] ); fflush(stdout);
 	  break;
 	case 'm':
 	  if ( sscanf( optarg , "%lf" , &h_max ) != 1 )
@@ -635,7 +654,6 @@ int main ( int argc , char *argv[] ) {
 	case 't':
 	  if ( sscanf( optarg , "%d" , &nr_threads ) != 1 )
 	    error( "Error parsing number of threads." );
-	  omp_set_num_threads( nr_threads );
 	  break;
 	case 'w':
 	  if ( sscanf( optarg , "%d" , &space_subsize ) != 1 )
@@ -654,7 +672,11 @@ int main ( int argc , char *argv[] ) {
 	  break;
 
 	}
-    
+
+#if defined( WITH_MPI )
+    if ( myrank == 0 )
+        message( "grid set to [ %i %i %i ]." , grid[0] , grid[1] , grid[2] ); fflush(stdout);
+#endif
 
     /* How large are the parts? */
     if ( myrank == 0 ) {
diff --git a/examples/test_single.c b/examples/test_single.c
index 9387f826acf4e43ed8aebc6e9242a9ac8afbee15..36c219eebf214feaf775283834578c5a487d5e90 100644
--- a/examples/test_single.c
+++ b/examples/test_single.c
@@ -31,7 +31,6 @@
 #include <float.h>
 #include <limits.h>
 #include <fenv.h>
-#include <omp.h>
 
 /* Conditional headers. */
 #ifdef HAVE_LIBZ
@@ -63,6 +62,9 @@ int main ( int argc , char *argv[] ) {
     struct part p1, p2;
     float x, w, dwdx, r2, dx[3] = { 0.0f , 0.0f , 0.0f }, gradw[3];
     
+    /* Greeting message */
+    printf( "This is %s\n", package_description() );
+
     /* Init the particles. */
     for ( k = 0 ; k < 3 ; k++ ) {
         p1.a[k] = 0.0f; p1.v[k] = 0.0f; p1.x[k] = 0.0;
diff --git a/src/Makefile.am b/src/Makefile.am
index 99ebb246b99a620b0207784b132e0a8275a12d36..78f29fe5b87b0ef8ef6a0948039df2ca52e712aa 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -17,7 +17,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Add the debug flag to the whole thing
-AM_CFLAGS = -DTIMER -DCOUNTER -DCPU_TPS=2.30e9
+AM_CFLAGS = -DTIMER -DCOUNTER -DCPU_TPS=2.30e9 $(HDF5_CPPFLAGS)
 
 # Assign a "safe" version number
 AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0 # -fsanitize=address
@@ -52,17 +52,24 @@ libswiftsim_la_SOURCES = $(AM_SOURCES)
 
 # Sources and flags for MPI library
 libswiftsim_mpi_la_SOURCES = $(AM_SOURCES)
-libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) -DWITH_MPI
+libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) -DWITH_MPI $(METIS_INCS)
 libswiftsim_mpi_la_SHORTNAME = mpi
 
 
 # Versioning. If any sources change then update the version.h file with
 # the current git revision and package version.
+# May have a checkout without a version.h file and no git command (tar/zip
+# download), allow that, but make sure we know it.
 version.h: version.h.in $(AM_SOURCES) $(include_HEADERS) $(noinst_HEADERS)
 	if test "X$(GIT_CMD)" != "X"; then \
 	    GIT_REVISION=`git describe --abbrev=8  --always --tags --dirty`; \
 	    sed -e "s,@PACKAGE_VERSION\@,$(PACKAGE_VERSION)," \
 	        -e "s,@GIT_REVISION\@,$${GIT_REVISION}," version.h.in > version.h; \
+	else \
+	    if test ! -f version.h; then \
+	        sed -e "s,@PACKAGE_VERSION\@,$(PACKAGE_VERSION)," \
+	            -e "s,@GIT_REVISION\@,unknown," version.h.in > version.h; \
+	    fi; \
 	fi
 
 #  Make sure version.h is built first.
diff --git a/src/engine.c b/src/engine.c
index f730f0fe4784ecaf830f088886bf25c7d335c873..88d9547b83a10327a6a7183f13f5bb499da43948 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -23,8 +23,6 @@
 /* Some standard headers. */
 #include <float.h>
 #include <limits.h>
-#include <math.h>
-#include <omp.h>
 #include <sched.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -34,12 +32,11 @@
 /* MPI headers. */
 #ifdef WITH_MPI
 #include <mpi.h>
-#endif
-
-/* METIS headers. */
+/* METIS headers only used when MPI is also available. */
 #ifdef HAVE_METIS
 #include <metis.h>
 #endif
+#endif
 
 /* This object's header. */
 #include "engine.h"
diff --git a/src/proxy.c b/src/proxy.c
index 447a7a90be24d8dcb196d73ed62611464d5f3c2d..bafa185cdcaf2100992398657b650a954daceb91 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -23,9 +23,6 @@
 /* Some standard headers. */
 #include <float.h>
 #include <limits.h>
-#include <math.h>
-#include <omp.h>
-#include <pthread.h>
 #include <sched.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/src/runner.c b/src/runner.c
index 173215a6a53a0627408f3b5b61d0805fee597e2d..a3056f414bd352c2a9b857b980e7989dfc28130a 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -23,12 +23,6 @@
 /* Some standard headers. */
 #include <float.h>
 #include <limits.h>
-#include <math.h>
-#include <omp.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
 
 /* MPI headers. */
 #ifdef WITH_MPI
diff --git a/src/scheduler.c b/src/scheduler.c
index bbcac93be24066e05878446eba7c60a5d80f4bf8..02defc31710e3ab0de067cf359d4281b94c17d90 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -23,7 +23,6 @@
 /* Some standard headers. */
 #include <limits.h>
 #include <math.h>
-#include <omp.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -803,7 +802,6 @@ void scheduler_reweight(struct scheduler *s) {
   /* Run throught the tasks backwards and set their waits and
      weights. */
   // tic = getticks();
-  // #pragma omp parallel for schedule(static) private(t,j)
   for (k = nr_tasks - 1; k >= 0; k--) {
     t = &tasks[tid[k]];
     t->weight = 0;
@@ -887,7 +885,6 @@ void scheduler_start(struct scheduler *s, unsigned int mask) {
 
   /* Run throught the tasks and set their waits. */
   // tic = getticks();
-  // #pragma omp parallel for schedule(static) private(t,j)
   for (k = nr_tasks - 1; k >= 0; k--) {
     t = &tasks[tid[k]];
     t->wait = 0;
diff --git a/src/space.c b/src/space.c
index f99f59329212a574ccacdb934133be1b3348ea61..fcdbfa24906153252f3b8a8855ca63703c648da6 100644
--- a/src/space.c
+++ b/src/space.c
@@ -24,10 +24,6 @@
 #include <float.h>
 #include <limits.h>
 #include <math.h>
-#include <omp.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 
 /* MPI headers. */
@@ -297,12 +293,12 @@ void space_regrid(struct space *s, double cell_max) {
 
 void space_rebuild(struct space *s, double cell_max) {
 
-  int j, k, cdim[3], nr_gparts = s->nr_gparts;
+  int j, k, cdim[3], nr_parts = s->nr_parts, nr_gparts = s->nr_gparts;
   struct cell *restrict c, *restrict cells;
   struct part *restrict finger, *restrict p, *parts = s->parts;
   struct xpart *xfinger, *xparts = s->xparts;
   struct gpart *gp, *gparts = s->gparts, *gfinger;
-  int *cell_id;
+  int *ind;
   double ih[3], dim[3];
   // ticks tic;
 
@@ -315,8 +311,8 @@ void space_rebuild(struct space *s, double cell_max) {
 
   /* Run through the particles and get their cell index. */
   // tic = getticks();
-  const int cell_id_size = s->nr_parts;
-  if ((cell_id = (int *)malloc(sizeof(int) * cell_id_size)) == NULL)
+  const int ind_size = s->size_parts;
+  if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
     error("Failed to allocate temporary particle indices.");
   ih[0] = s->ih[0];
   ih[1] = s->ih[1];
@@ -327,18 +323,16 @@ void space_rebuild(struct space *s, double cell_max) {
   cdim[0] = s->cdim[0];
   cdim[1] = s->cdim[1];
   cdim[2] = s->cdim[2];
-  for (k = 0; k < s->nr_parts; k++) {
+  for (k = 0; k < nr_parts; k++) {
     p = &parts[k];
     for (j = 0; j < 3; j++)
       if (p->x[j] < 0.0)
         p->x[j] += dim[j];
       else if (p->x[j] >= dim[j])
         p->x[j] -= dim[j];
-    cell_id[k] =
+    ind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    if (cell_id[k] < 0 || cell_id[k] >= s->nr_cells)
-      error("Bad cell id %i.", cell_id[k]);
-    atomic_inc(&cells[cell_id[k]].count);
+    cells[ind[k]].count++;
   }
 // message( "getting particle indices took %.3f ms." , (double)(getticks() -
 // tic) / CPU_TPS * 1000 );
@@ -346,80 +340,78 @@ void space_rebuild(struct space *s, double cell_max) {
 #ifdef WITH_MPI
   /* Move non-local parts to the end of the list. */
   int nodeID = s->e->nodeID;
-  int nr_local_parts = s->nr_parts;
-  for (k = 0; k < nr_local_parts; k++)
-    if (cells[cell_id[k]].nodeID != nodeID) {
-      cells[cell_id[k]].count -= 1;
-      nr_local_parts -= 1;
+  for (k = 0; k < nr_parts; k++)
+    if (cells[ind[k]].nodeID != nodeID) {
+      cells[ind[k]].count -= 1;
+      nr_parts -= 1;
       struct part tp = parts[k];
-      parts[k] = parts[nr_local_parts];
-      parts[nr_local_parts] = tp;
+      parts[k] = parts[nr_parts];
+      parts[nr_parts] = tp;
       struct xpart txp = xparts[k];
-      xparts[k] = xparts[nr_local_parts];
-      xparts[nr_local_parts] = txp;
-      int t = cell_id[k];
-      cell_id[k] = cell_id[nr_local_parts];
-      cell_id[nr_local_parts] = t;
+      xparts[k] = xparts[nr_parts];
+      xparts[nr_parts] = txp;
+      int t = ind[k];
+      ind[k] = ind[nr_parts];
+      ind[nr_parts] = t;
     }
 
   /* Exchange the strays, note that this potentially re-allocates
      the parts arrays. */
   s->nr_parts =
-      nr_local_parts + engine_exchange_strays(s->e, nr_local_parts,
-                                              &cell_id[nr_local_parts],
-                                              s->nr_parts - nr_local_parts);
+      nr_parts + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts],
+                                        s->nr_parts - nr_parts);
   parts = s->parts;
   xparts = s->xparts;
 
   /* Re-allocate the index array if needed.. */
-  if (s->nr_parts > cell_id_size) {
-    int *cell_id_new;
-    if ((cell_id_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
+  if (s->nr_parts > ind_size) {
+    int *ind_new;
+    if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
       error("Failed to allocate temporary particle indices.");
-    memcpy(cell_id_new, cell_id, sizeof(int) * nr_local_parts);
-    free(cell_id);
-    cell_id = cell_id_new;
+    memcpy(ind_new, ind, sizeof(int) * nr_parts);
+    free(ind);
+    ind = ind_new;
   }
 
   /* Assign each particle to its cell. */
-  for (k = nr_local_parts; k < s->nr_parts; k++) {
+  for (k = nr_parts; k < s->nr_parts; k++) {
     p = &parts[k];
-    cell_id[k] =
+    ind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[cell_id[k]].count += 1;
-    if (cells[cell_id[k]].nodeID != nodeID)
-      error(
-          "Received part that does not belong to me (nodeID=%i, x=[%e,%e,%e]).",
-          cells[cell_id[k]].nodeID, p->x[0], p->x[1], p->x[2]);
+    cells[ind[k]].count += 1;
+    /* if ( cells[ ind[k] ].nodeID != nodeID )
+        error( "Received part that does not belong to me (nodeID=%i)." , cells[
+       ind[k] ].nodeID ); */
   }
+  nr_parts = s->nr_parts;
 #endif
 
   /* Sort the parts according to their cells. */
   // tic = getticks();
-  parts_sort(parts, xparts, cell_id, s->nr_parts, 0, s->nr_cells - 1);
+  parts_sort(parts, xparts, ind, nr_parts, 0, s->nr_cells - 1);
   // message( "parts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
   // * 1000 );
 
   /* Re-link the gparts. */
-  for (k = 0; k < s->nr_parts; k++)
+  for (k = 0; k < nr_parts; k++)
     if (parts[k].gpart != NULL) parts[k].gpart->part = &parts[k];
 
   /* Verify sort. */
   /* for ( k = 1 ; k < nr_parts ; k++ ) {
-      if ( cell_id[k-1] > cell_id[k] ) {
+      if ( ind[k-1] > ind[k] ) {
           error( "Sort failed!" );
           }
-      else if ( cell_id[k] != cell_getid( cdim , parts[k].x[0]*ih[0] ,
+      else if ( ind[k] != cell_getid( cdim , parts[k].x[0]*ih[0] ,
      parts[k].x[1]*ih[1] , parts[k].x[2]*ih[2] ) )
           error( "Incorrect indices!" );
       } */
 
   /* We no longer need the indices as of here. */
-  free(cell_id);
+  free(ind);
 
   /* Run through the gravity particles and get their cell index. */
   // tic = getticks();
-  if ((cell_id = (int *)malloc(sizeof(int) * s->size_gparts)) == NULL)
+  if ((ind = (int *)malloc(sizeof(int) * s->size_gparts)) == NULL)
     error("Failed to allocate temporary particle indices.");
   for (k = 0; k < nr_gparts; k++) {
     gp = &gparts[k];
@@ -428,9 +420,9 @@ void space_rebuild(struct space *s, double cell_max) {
         gp->x[j] += dim[j];
       else if (gp->x[j] >= dim[j])
         gp->x[j] -= dim[j];
-    cell_id[k] =
+    ind[k] =
         cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
-    atomic_inc(&cells[cell_id[k]].gcount);
+    cells[ind[k]].gcount++;
   }
   // message( "getting particle indices took %.3f ms." , (double)(getticks() -
   // tic) / CPU_TPS * 1000 );
@@ -439,7 +431,7 @@ void space_rebuild(struct space *s, double cell_max) {
 
   /* Sort the parts according to their cells. */
   // tic = getticks();
-  gparts_sort(gparts, cell_id, nr_gparts, 0, s->nr_cells - 1);
+  gparts_sort(gparts, ind, nr_gparts, 0, s->nr_cells - 1);
   // message( "gparts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
   // * 1000 );
 
@@ -448,7 +440,7 @@ void space_rebuild(struct space *s, double cell_max) {
     if (gparts[k].id > 0) gparts[k].part->gpart = &gparts[k];
 
   /* We no longer need the indices as of here. */
-  free(cell_id);
+  free(ind);
 
   /* Hook the cells up to the parts. */
   // tic = getticks();
@@ -470,17 +462,8 @@ void space_rebuild(struct space *s, double cell_max) {
   /* At this point, we have the upper-level cells, old or new. Now make
      sure that the parts in each cell are ok. */
   // tic = getticks();
-  k = 0;
-  {
-    if (omp_get_thread_num() < 8)
-      while (1) {
-        int myk = atomic_inc(&k);
-        if (myk < s->nr_cells)
-          space_split(s, &cells[myk]);
-        else
-          break;
-      }
-  }
+  for (k = 0; k < s->nr_cells; k++) space_split(s, &cells[k]);
+
   // message( "space_split took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
   // * 1000 );
 }
@@ -532,130 +515,105 @@ void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N,
   last = 1;
   waiting = 1;
 
-/* Parallel bit. */
-#pragma omp parallel default(shared)                                           \
-    shared(N, first, last, waiting, qstack, parts, xparts, ind, qstack_size,   \
-           stderr, engine_rank) private(pivot, i, ii, j, jj, min, max, temp_i, \
-                                        qid, temp_xp, temp_p)
-  {
+  /* Main loop. */
+  while (waiting > 0) {
 
-    /* Main loop. */
-    if (omp_get_thread_num() < 8)
-      while (waiting > 0) {
-
-        /* Grab an interval off the queue. */
-        qid = atomic_inc(&first) % qstack_size;
-
-        /* Wait for the interval to be ready. */
-        while (waiting > 0 && atomic_cas(&qstack[qid].ready, 1, 1) != 1)
-          ;
-
-        /* Broke loop for all the wrong reasons? */
-        if (waiting == 0) break;
-
-        /* Get the stack entry. */
-        i = qstack[qid].i;
-        j = qstack[qid].j;
-        min = qstack[qid].min;
-        max = qstack[qid].max;
-        qstack[qid].ready = 0;
-        // message( "thread %i got interval [%i,%i] with values in [%i,%i]." ,
-        // omp_get_thread_num() , i , j , min , max );
-
-        /* Loop over sub-intervals. */
-        while (1) {
-
-          /* Bring beer. */
-          pivot = (min + max) / 2;
-
-          /* One pass of QuickSort's partitioning. */
-          ii = i;
-          jj = j;
-          while (ii < jj) {
-            while (ii <= j && ind[ii] <= pivot) ii++;
-            while (jj >= i && ind[jj] > pivot) jj--;
-            if (ii < jj) {
-              temp_i = ind[ii];
-              ind[ii] = ind[jj];
-              ind[jj] = temp_i;
-              temp_p = parts[ii];
-              parts[ii] = parts[jj];
-              parts[jj] = temp_p;
-              temp_xp = xparts[ii];
-              xparts[ii] = xparts[jj];
-              xparts[jj] = temp_xp;
-            }
-          }
+    /* Grab an interval off the queue. */
+    qid = (first++) % qstack_size;
 
-          /* Verify sort. */
-          /* for ( int k = i ; k <= jj ; k++ )
-              if ( ind[k] > pivot ) {
-                  message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i,
-          j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
-                  error( "Partition failed (<=pivot)." );
-                  }
-          for ( int k = jj+1 ; k <= j ; k++ )
-              if ( ind[k] <= pivot ) {
-                  message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i,
-          j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
-                  error( "Partition failed (>pivot)." );
-                  } */
-
-          /* Split-off largest interval. */
-          if (jj - i > j - jj + 1) {
-
-            /* Recurse on the left? */
-            if (jj > i && pivot > min) {
-              qid = atomic_inc(&last) % qstack_size;
-              while (atomic_cas(&qstack[qid].ready, 0, 0) != 0)
-                ;
-              qstack[qid].i = i;
-              qstack[qid].j = jj;
-              qstack[qid].min = min;
-              qstack[qid].max = pivot;
-              qstack[qid].ready = 1;
-              if (atomic_inc(&waiting) >= qstack_size)
-                error("Qstack overflow.");
-            }
-
-            /* Recurse on the right? */
-            if (jj + 1 < j && pivot + 1 < max) {
-              i = jj + 1;
-              min = pivot + 1;
-            } else
-              break;
-
-          } else {
-
-            /* Recurse on the right? */
-            if (jj + 1 < j && pivot + 1 < max) {
-              qid = atomic_inc(&last) % qstack_size;
-              while (atomic_cas(&qstack[qid].ready, 0, 0) != 0)
-                ;
-              qstack[qid].i = jj + 1;
-              qstack[qid].j = j;
-              qstack[qid].min = pivot + 1;
-              qstack[qid].max = max;
-              qstack[qid].ready = 1;
-              if (atomic_inc(&waiting) >= qstack_size)
-                error("Qstack overflow.");
-            }
-
-            /* Recurse on the left? */
-            if (jj > i && pivot > min) {
-              j = jj;
-              max = pivot;
-            } else
-              break;
-          }
+    /* Get the stack entry. */
+    i = qstack[qid].i;
+    j = qstack[qid].j;
+    min = qstack[qid].min;
+    max = qstack[qid].max;
+    qstack[qid].ready = 0;
 
-        } /* loop over sub-intervals. */
+    /* Loop over sub-intervals. */
+    while (1) {
 
-        atomic_dec(&waiting);
+      /* Bring beer. */
+      pivot = (min + max) / 2;
+
+      /* One pass of QuickSort's partitioning. */
+      ii = i;
+      jj = j;
+      while (ii < jj) {
+        while (ii <= j && ind[ii] <= pivot) ii++;
+        while (jj >= i && ind[jj] > pivot) jj--;
+        if (ii < jj) {
+          temp_i = ind[ii];
+          ind[ii] = ind[jj];
+          ind[jj] = temp_i;
+          temp_p = parts[ii];
+          parts[ii] = parts[jj];
+          parts[jj] = temp_p;
+          temp_xp = xparts[ii];
+          xparts[ii] = xparts[jj];
+          xparts[jj] = temp_xp;
+        }
+      }
 
-      } /* main loop. */
+      /* Verify sort. */
+      /* for ( int k = i ; k <= jj ; k++ )
+         if ( ind[k] > pivot ) {
+         message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
+         N=%i." , k , ind[k] , pivot , i , j , N );
+         error( "Partition failed (<=pivot)." );
+         }
+         for ( int k = jj+1 ; k <= j ; k++ )
+         if ( ind[k] <= pivot ) {
+         message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
+         N=%i." , k , ind[k] , pivot , i , j , N );
+         error( "Partition failed (>pivot)." );
+         } */
+
+      /* Split-off largest interval. */
+      if (jj - i > j - jj + 1) {
+
+        /* Recurse on the left? */
+        if (jj > i && pivot > min) {
+          qid = (last++) % qstack_size;
+          qstack[qid].i = i;
+          qstack[qid].j = jj;
+          qstack[qid].min = min;
+          qstack[qid].max = pivot;
+          qstack[qid].ready = 1;
+          if (waiting++ >= qstack_size) error("Qstack overflow.");
+        }
 
-  } /* parallel bit. */
+        /* Recurse on the right? */
+        if (jj + 1 < j && pivot + 1 < max) {
+          i = jj + 1;
+          min = pivot + 1;
+        } else
+          break;
+
+      } else {
+
+        /* Recurse on the right? */
+        if (jj + 1 < j && pivot + 1 < max) {
+          qid = (last++) % qstack_size;
+          qstack[qid].i = jj + 1;
+          qstack[qid].j = j;
+          qstack[qid].min = pivot + 1;
+          qstack[qid].max = max;
+          qstack[qid].ready = 1;
+          if ((waiting++) >= qstack_size) error("Qstack overflow.");
+        }
+
+        /* Recurse on the left? */
+        if (jj > i && pivot > min) {
+          j = jj;
+          max = pivot;
+        } else
+          break;
+      }
+
+    } /* loop over sub-intervals. */
+
+    waiting--;
+
+  } /* main loop. */
 
   /* Verify sort. */
   /* for ( i = 1 ; i < N ; i++ )
@@ -700,126 +658,102 @@ void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max) {
   last = 1;
   waiting = 1;
 
-/* Parallel bit. */
-#pragma omp parallel default(shared) shared(                           \
-    N, first, last, waiting, qstack, gparts, ind, qstack_size, stderr, \
-    engine_rank) private(pivot, i, ii, j, jj, min, max, temp_i, qid, temp_p)
-  {
+  /* Main loop. */
+  while (waiting > 0) {
 
-    /* Main loop. */
-    if (omp_get_thread_num() < 8)
-      while (waiting > 0) {
-
-        /* Grab an interval off the queue. */
-        qid = atomic_inc(&first) % qstack_size;
-
-        /* Wait for the interval to be ready. */
-        while (waiting > 0 && atomic_cas(&qstack[qid].ready, 1, 1) != 1)
-          ;
-
-        /* Broke loop for all the wrong reasons? */
-        if (waiting == 0) break;
-
-        /* Get the stack entry. */
-        i = qstack[qid].i;
-        j = qstack[qid].j;
-        min = qstack[qid].min;
-        max = qstack[qid].max;
-        qstack[qid].ready = 0;
-        // message( "thread %i got interval [%i,%i] with values in [%i,%i]." ,
-        // omp_get_thread_num() , i , j , min , max );
-
-        /* Loop over sub-intervals. */
-        while (1) {
-
-          /* Bring beer. */
-          pivot = (min + max) / 2;
-
-          /* One pass of QuickSort's partitioning. */
-          ii = i;
-          jj = j;
-          while (ii < jj) {
-            while (ii <= j && ind[ii] <= pivot) ii++;
-            while (jj >= i && ind[jj] > pivot) jj--;
-            if (ii < jj) {
-              temp_i = ind[ii];
-              ind[ii] = ind[jj];
-              ind[jj] = temp_i;
-              temp_p = gparts[ii];
-              gparts[ii] = gparts[jj];
-              gparts[jj] = temp_p;
-            }
-          }
+    /* Grab an interval off the queue. */
+    qid = (first++) % qstack_size;
 
-          /* Verify sort. */
-          /* for ( int k = i ; k <= jj ; k++ )
-              if ( ind[k] > pivot ) {
-                  message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i,
-          j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
-                  error( "Partition failed (<=pivot)." );
-                  }
-          for ( int k = jj+1 ; k <= j ; k++ )
-              if ( ind[k] <= pivot ) {
-                  message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i,
-          j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
-                  error( "Partition failed (>pivot)." );
-                  } */
-
-          /* Split-off largest interval. */
-          if (jj - i > j - jj + 1) {
-
-            /* Recurse on the left? */
-            if (jj > i && pivot > min) {
-              qid = atomic_inc(&last) % qstack_size;
-              while (atomic_cas(&qstack[qid].ready, 0, 0) != 0)
-                ;
-              qstack[qid].i = i;
-              qstack[qid].j = jj;
-              qstack[qid].min = min;
-              qstack[qid].max = pivot;
-              qstack[qid].ready = 1;
-              if (atomic_inc(&waiting) >= qstack_size)
-                error("Qstack overflow.");
-            }
-
-            /* Recurse on the right? */
-            if (jj + 1 < j && pivot + 1 < max) {
-              i = jj + 1;
-              min = pivot + 1;
-            } else
-              break;
-
-          } else {
-
-            /* Recurse on the right? */
-            if (jj + 1 < j && pivot + 1 < max) {
-              qid = atomic_inc(&last) % qstack_size;
-              while (atomic_cas(&qstack[qid].ready, 0, 0) != 0)
-                ;
-              qstack[qid].i = jj + 1;
-              qstack[qid].j = j;
-              qstack[qid].min = pivot + 1;
-              qstack[qid].max = max;
-              qstack[qid].ready = 1;
-              if (atomic_inc(&waiting) >= qstack_size)
-                error("Qstack overflow.");
-            }
-
-            /* Recurse on the left? */
-            if (jj > i && pivot > min) {
-              j = jj;
-              max = pivot;
-            } else
-              break;
-          }
+    /* Get the stack entry. */
+    i = qstack[qid].i;
+    j = qstack[qid].j;
+    min = qstack[qid].min;
+    max = qstack[qid].max;
+    qstack[qid].ready = 0;
+
+    /* Loop over sub-intervals. */
+    while (1) {
+
+      /* Bring beer. */
+      pivot = (min + max) / 2;
+
+      /* One pass of QuickSort's partitioning. */
+      ii = i;
+      jj = j;
+      while (ii < jj) {
+        while (ii <= j && ind[ii] <= pivot) ii++;
+        while (jj >= i && ind[jj] > pivot) jj--;
+        if (ii < jj) {
+          temp_i = ind[ii];
+          ind[ii] = ind[jj];
+          ind[jj] = temp_i;
+          temp_p = gparts[ii];
+          gparts[ii] = gparts[jj];
+          gparts[jj] = temp_p;
+        }
+      }
+
+      /* Verify sort. */
+      /* for ( int k = i ; k <= jj ; k++ )
+         if ( ind[k] > pivot ) {
+         message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
+         N=%i." , k , ind[k] , pivot , i , j , N );
+         error( "Partition failed (<=pivot)." );
+         }
+         for ( int k = jj+1 ; k <= j ; k++ )
+         if ( ind[k] <= pivot ) {
+         message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
+         N=%i." , k , ind[k] , pivot , i , j , N );
+         error( "Partition failed (>pivot)." );
+         } */
+
+      /* Split-off largest interval. */
+      if (jj - i > j - jj + 1) {
+
+        /* Recurse on the left? */
+        if (jj > i && pivot > min) {
+          qid = (last++) % qstack_size;
+          qstack[qid].i = i;
+          qstack[qid].j = jj;
+          qstack[qid].min = min;
+          qstack[qid].max = pivot;
+          qstack[qid].ready = 1;
+          if ((waiting++) >= qstack_size) error("Qstack overflow.");
+        }
+
+        /* Recurse on the right? */
+        if (jj + 1 < j && pivot + 1 < max) {
+          i = jj + 1;
+          min = pivot + 1;
+        } else
+          break;
+
+      } else {
 
-        } /* loop over sub-intervals. */
+        /* Recurse on the right? */
+        if (jj + 1 < j && pivot + 1 < max) {
+          qid = (last++) % qstack_size;
+          qstack[qid].i = jj + 1;
+          qstack[qid].j = j;
+          qstack[qid].min = pivot + 1;
+          qstack[qid].max = max;
+          qstack[qid].ready = 1;
+          if ((waiting++) >= qstack_size) error("Qstack overflow.");
+        }
+
+        /* Recurse on the left? */
+        if (jj > i && pivot > min) {
+          j = jj;
+          max = pivot;
+        } else
+          break;
+      }
 
-        atomic_dec(&waiting);
+    } /* loop over sub-intervals. */
 
-      } /* main loop. */
+    waiting--;
 
-  } /* parallel bit. */
+  } /* main loop. */
 
   /* Verify sort. */
   /* for ( i = 1 ; i < N ; i++ )
@@ -872,20 +806,11 @@ void space_map_parts(struct space *s,
   }
 
   /* Call the recursive function on all higher-level cells. */
-  {
-    int mycid;
-    while (1) {
-      mycid = cid++;
-      if (mycid < s->nr_cells)
-        rec_map(&s->cells[mycid]);
-      else
-        break;
-    }
-  }
+  for (cid = 0; cid < s->nr_cells; cid++) rec_map(&s->cells[cid]);
 }
 
 /**
- * @brief Map a function to all particles in a space.
+ * @brief Map a function to all particles in a aspace.
  *
  * @param s The #space we are working in.
  * @param full Map to all cells, including cells with sub-cells.
@@ -912,16 +837,7 @@ void space_map_cells_post(struct space *s, int full,
   }
 
   /* Call the recursive function on all higher-level cells. */
-  {
-    int mycid;
-    while (1) {
-      mycid = cid++;
-      if (mycid < s->nr_cells)
-        rec_map(&s->cells[mycid]);
-      else
-        break;
-    }
-  }
+  for (cid = 0; cid < s->nr_cells; cid++) rec_map(&s->cells[cid]);
 }
 
 void space_map_cells_pre(struct space *s, int full,
@@ -943,16 +859,7 @@ void space_map_cells_pre(struct space *s, int full,
   }
 
   /* Call the recursive function on all higher-level cells. */
-  {
-    int mycid;
-    while (1) {
-      mycid = cid++;
-      if (mycid < s->nr_cells)
-        rec_map(&s->cells[mycid]);
-      else
-        break;
-    }
-  }
+  for (cid = 0; cid < s->nr_cells; cid++) rec_map(&s->cells[cid]);
 }
 
 /**
@@ -965,7 +872,7 @@ void space_map_cells_pre(struct space *s, int full,
 void space_split(struct space *s, struct cell *c) {
 
   int k, count = c->count, gcount = c->gcount, maxdepth = 0;
-  float h, h_max = 0.0f, dt, dt_min = FLT_MAX, dt_max = FLT_MIN;
+  float h, h_max = 0.0f, dt, dt_min = c->parts[0].dt, dt_max = dt_min;
   struct cell *temp;
   struct part *p, *parts = c->parts;
   struct xpart *xp, *xparts = c->xparts;
diff --git a/src/task.c b/src/task.c
index 19b96799c7eb2e139457e8ed9202d005228b82e0..949caab56c4c4d8a0e3c73d05014ebc5ad68657a 100644
--- a/src/task.c
+++ b/src/task.c
@@ -23,9 +23,6 @@
 /* Some standard headers. */
 #include <float.h>
 #include <limits.h>
-#include <math.h>
-#include <omp.h>
-#include <pthread.h>
 #include <sched.h>
 #include <stdio.h>
 #include <stdlib.h>