diff --git a/configure.ac b/configure.ac
index 9fa9a1de591d63794dde5db6a8dd733cfcaada09..a02dcc57c720f1a9a792b160485caee728a91b98 100644
--- a/configure.ac
+++ b/configure.ac
@@ -351,7 +351,7 @@ AC_ARG_WITH([tcmalloc],
    [with_tcmalloc="no"]
 )
 if test "x$with_tcmalloc" != "xno"; then
-   if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then
+   if test "x$with_tcmalloc" != "xyes" -a "x$with_tcmalloc" != "x"; then
       tclibs="-L$with_tcmalloc -ltcmalloc"
    else
       tclibs="-ltcmalloc"
@@ -361,7 +361,7 @@ if test "x$with_tcmalloc" != "xno"; then
 
    #  Could just have the minimal version.
    if test "$have_tcmalloc" = "no"; then
-      if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then
+      if test "x$with_tcmalloc" != "xyes" -a "x$with_tcmalloc" != "x"; then
          tclibs="-L$with_tcmalloc -ltcmalloc_minimal"
       else
          tclibs="-ltcmalloc_minimal"
@@ -394,7 +394,7 @@ AC_ARG_WITH([profiler],
    [with_profiler="yes"]
 )
 if test "x$with_profiler" != "xno"; then
-   if test "x$with_profiler" != "xyes" && test "x$with_profiler" != "x"; then
+   if test "x$with_profiler" != "xyes" -a "x$with_profiler" != "x"; then
       proflibs="-L$with_profiler -lprofiler"
    else
       proflibs="-lprofiler"
@@ -411,6 +411,38 @@ fi
 AC_SUBST([PROFILER_LIBS])
 AM_CONDITIONAL([HAVEPROFILER],[test -n "$PROFILER_LIBS"])
 
+#  Check for jemalloc another fast malloc that is good with contention.
+have_jemalloc="no"
+AC_ARG_WITH([jemalloc],
+   [AS_HELP_STRING([--with-jemalloc],
+      [use jemalloc library or specify the directory with lib @<:@yes/no@:>@]
+   )],
+   [with_jemalloc="$withval"],
+   [with_jemalloc="no"]
+)
+if test "x$with_jemalloc" != "xno"; then
+   if test "x$with_jemalloc" != "xyes" -a "x$with_jemalloc" != "x"; then
+      jelibs="-L$with_jemalloc -ljemalloc"
+   else
+      jelibs="-ljemalloc"
+   fi
+   AC_CHECK_LIB([jemalloc],[malloc_usable_size],[have_jemalloc="yes"],[have_jemalloc="no"],
+                $jelibs)
+
+   if test "$have_jemalloc" = "yes"; then
+      JEMALLOC_LIBS="$jelibs"
+   else
+      JEMALLOC_LIBS=""
+   fi
+fi
+AC_SUBST([JEMALLOC_LIBS])
+AM_CONDITIONAL([HAVEJEMALLOC],[test -n "$JEMALLOC_LIBS"])
+
+#  Don't allow both tcmalloc and jemalloc.
+if test "x$have_tcmalloc" != "xno" -a "x$have_jemalloc" != "xno"; then
+   AC_MSG_ERROR([Cannot use tcmalloc at same time as jemalloc])
+fi
+
 # Check for HDF5. This is required.
 AX_LIB_HDF5
 
@@ -781,6 +813,7 @@ AC_MSG_RESULT([
    FFTW3 enabled   : $have_fftw3
    libNUMA enabled : $have_numa
    Using tcmalloc  : $have_tcmalloc
+   Using jemalloc  : $have_jemalloc
    CPU profiler    : $have_profiler
 
    Hydro scheme       : $with_hydro
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 4da84788a485dacd2103fe85ad3e729ade6b582a..28a4629bdb401c0736379a2fe14a3a5f19caf650 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -24,7 +24,7 @@ AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS)
 AM_LDFLAGS = $(HDF5_LDFLAGS)
 
 # Extra libraries.
-EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS)
+EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS)
 
 # MPI libraries.
 MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
diff --git a/examples/main.c b/examples/main.c
index dcc113ab6af6a06e7c20ac1aac7c2d3b715f7ef3..11163b42523fa5b1de1438ad8e67dde0fe9c88ef 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -45,6 +45,9 @@
 #define ENGINE_POLICY engine_policy_none
 #endif
 
+/* Global profiler. */
+struct profiler prof;
+
 /**
  * @brief Help messages for the command line parameters.
  */
diff --git a/src/Makefile.am b/src/Makefile.am
index 49223f5b9cd81f40fca159f32de181d412170748..88474d72c08a1e8fc6dc8ed273dbf168d3b134ba 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -25,7 +25,7 @@ AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0
 GIT_CMD = @GIT_CMD@
 
 # Additional dependencies for shared libraries.
-EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS)
+EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS)
 
 # MPI libraries.
 MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
@@ -44,7 +44,8 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \
     physical_constants.h physical_constants_cgs.h potential.h version.h \
     hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \
-    sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h
+    sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \
+    dump.h
 
 # Common source files
 AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
@@ -53,7 +54,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
     kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
     physical_constants.c potential.c hydro_properties.c \
     runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \
-    statistics.c runner_doiact_vec.c
+    statistics.c runner_doiact_vec.c profiler.c dump.c
 
 # Include files for distribution, not installation.
 nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
diff --git a/src/dump.c b/src/dump.c
new file mode 100644
index 0000000000000000000000000000000000000000..2c0cf221ebd897bab0d047c196ce8a2aeddc6eae
--- /dev/null
+++ b/src/dump.c
@@ -0,0 +1,153 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/* This object's header. */
+#include "dump.h"
+
+/* Local headers. */
+#include "atomic.h"
+#include "error.h"
+
+/**
+ * @brief Obtain a chunk of memory from a dump.
+ *
+ * @param d The #dump.
+ * @param count The number of bytes requested.
+ * @param offset The offset of the returned memory address within the dump file.
+ * @return A pointer to the memory-mapped chunk of data.
+ */
+
+void *dump_get(struct dump *d, size_t count, size_t *offset) {
+  size_t local_offset = atomic_add(&d->count, count);
+  *offset = local_offset + d->file_offset;
+  return (char *)d->data + local_offset;
+}
+
+/**
+ * @brief Ensure that at least size bytes are available in the #dump.
+ */
+
+void dump_ensure(struct dump *d, size_t size) {
+
+  /* If we have enough space already, just bail. */
+  if (d->size - d->count > size) return;
+
+  /* Unmap the current data. */
+  size_t trunc_count = d->count & d->page_mask;
+  if (munmap(d->data, trunc_count > 0 ? trunc_count : 1) != 0) {
+    error("Failed to unmap %zi bytes of dump data (%s).", trunc_count,
+          strerror(errno));
+  }
+
+  /* Update the size and count. */
+  d->file_offset += trunc_count;
+  d->count -= trunc_count;
+  d->size = (size * dump_grow_ensure_factor + ~d->page_mask) & d->page_mask;
+
+  /* Re-allocate the file size. */
+  if (posix_fallocate(d->fd, d->file_offset, d->size) != 0) {
+    error("Failed to pre-allocate the dump file.");
+  }
+
+  /* Re-map starting at the end of the file. */
+  if ((d->data = mmap(NULL, d->size, PROT_WRITE, MAP_SHARED, d->fd,
+                      d->file_offset)) == MAP_FAILED) {
+    error("Failed to allocate map of size %zi bytes (%s).", d->size,
+          strerror(errno));
+  }
+}
+
+/**
+ * @brief Flush the #dump to disk.
+ */
+
+void dump_sync(struct dump *d) {
+  if (msync(d->data, d->count, MS_SYNC) != 0)
+    error("Failed to sync memory-mapped data.");
+}
+
+/**
+ * @brief Finalize the #dump.
+ */
+
+void dump_close(struct dump *d) {
+  /* Unmap the data in memory. */
+  if (munmap(d->data, d->count) != 0) {
+    error("Failed to unmap dump data (%s).", strerror(errno));
+  }
+
+  /* Truncate the file to the correct length. */
+  if (ftruncate(d->fd, d->file_offset + d->count) != 0) {
+    error("Failed to truncate dump file (%s).", strerror(errno));
+  }
+
+  /* Close the memory-mapped file. */
+  if (close(d->fd) != 0) error("Failed to close memory-mapped file.");
+}
+
+/**
+ * @brief Initialize a file dump.
+ *
+ * @param d The #dump to initialize.
+ * @param filename The fully qualified name of the file in which to dump,
+ *                 note that it will be overwritten.
+ * @param size The initial buffer size for this #dump.
+ */
+
+void dump_init(struct dump *d, const char *filename, size_t size) {
+
+  /* Create the output file. */
+  if ((d->fd = open(filename, O_CREAT | O_RDWR, 0660)) == -1) {
+    error("Failed to create dump file '%s' (%s).", filename, strerror(errno));
+  }
+
+  /* Adjust the size to be at least the page size. */
+  const size_t page_mask = ~(sysconf(_SC_PAGE_SIZE) - 1);
+  size = (size + ~page_mask) & page_mask;
+
+  /* Pre-allocate the file size. */
+  if (posix_fallocate(d->fd, 0, size) != 0) {
+    error("Failed to pre-allocate the dump file.");
+  }
+
+  /* Map memory to the created file. */
+  if ((d->data = mmap(NULL, size, PROT_WRITE, MAP_SHARED, d->fd, 0)) ==
+      MAP_FAILED) {
+    error("Failed to allocate map of size %zi bytes (%s).", size,
+          strerror(errno));
+  }
+
+  /* Init some counters. */
+  d->size = size;
+  d->count = 0;
+  d->file_offset = 0;
+  d->page_mask = page_mask;
+}
diff --git a/src/dump.h b/src/dump.h
new file mode 100644
index 0000000000000000000000000000000000000000..a7e934218c271d2f82b99d39f278e5af3047be6e
--- /dev/null
+++ b/src/dump.h
@@ -0,0 +1,57 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_DUMP_H
+#define SWIFT_DUMP_H
+
+/* Includes. */
+#include "lock.h"
+
+/* Some constants. */
+#define dump_grow_ensure_factor 10
+
+/** The dump struct. */
+struct dump {
+
+  /* The memory-mapped data of this dump. */
+  void *data;
+
+  /* The size of the memory-mapped data, in bytes. */
+  size_t size;
+
+  /* The number of bytes that have been dumped. */
+  size_t count;
+
+  /* The offset of the data within the current file. */
+  size_t file_offset;
+
+  /* The file with which this memory is associated. */
+  int fd;
+
+  /* Mask containing the significant bits for page addresses. */
+  size_t page_mask;
+};
+
+/* Function prototypes. */
+void dump_init(struct dump *d, const char *filename, size_t size);
+void dump_ensure(struct dump *d, size_t size);
+void dump_sync(struct dump *d);
+void dump_close(struct dump *d);
+void *dump_get(struct dump *d, size_t count, size_t *offset);
+
+#endif /* SWIFT_DUMP_H */
diff --git a/src/engine.c b/src/engine.c
index 27aa684b80a62fd743c940dd6edaff9a4a0609c8..e4dc4bb4f661144b3faef856a79eab451eed21f3 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -59,6 +59,7 @@
 #include "parallel_io.h"
 #include "part.h"
 #include "partition.h"
+#include "profiler.h"
 #include "proxy.h"
 #include "runner.h"
 #include "serial_io.h"
@@ -320,6 +321,23 @@ void engine_redistribute(struct engine *e) {
                     MPI_COMM_WORLD) != MPI_SUCCESS)
     error("Failed to allreduce particle transfer counts.");
 
+  /* Report how many particles will be moved. */
+  if (e->verbose) {
+    if (e->nodeID == 0) {
+      size_t total = 0;
+      size_t unmoved = 0;
+      for (int p = 0, r = 0; p < nr_nodes; p++) {
+        for (int s = 0; s < nr_nodes; s++) {
+          total += counts[r];
+          if (p == s) unmoved += counts[r];
+          r++;
+        }
+      }
+      message("%ld of %ld (%.2f%%) of particles moved", total - unmoved, total,
+              100.0 * (double)(total - unmoved) / (double)total);
+    }
+  }
+
   /* Get all the g_counts from all the nodes. */
   if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
                     MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
@@ -3408,7 +3426,7 @@ void engine_print_policy(struct engine *e) {
 #else
   printf("%s engine_policy: engine policies are [ ",
          clocks_get_timesincestart());
-  for (int k = 1; k < 32; k++)
+  for (int k = 1; k < 31; k++)
     if (e->policy & (1 << k)) printf(" %s ", engine_policy_names[k + 1]);
   printf(" ]\n");
   fflush(stdout);
diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h
index 3b3454f1bb348b178ac57899da4f7611802a69cd..beb6f98b8c0d781aa709fb6ee3ca564a52704db2 100644
--- a/src/hydro/Minimal/hydro.h
+++ b/src/hydro/Minimal/hydro.h
@@ -66,7 +66,9 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
 __attribute__((always_inline)) INLINE static float hydro_get_pressure(
     const struct part *restrict p, float dt) {
 
-  return p->force.pressure;
+  const float u = p->u + p->u_dt * dt;
+
+  return gas_pressure_from_internal_energy(p->rho, u);
 }
 
 /**
diff --git a/src/partition.c b/src/partition.c
index 89ba3f2835cb78e07ec2bc5cb04c3f8f71751563..85745d880e3ab0f6beaf918a5c226730b6b82a7c 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -278,6 +278,18 @@ static void split_metis(struct space *s, int nregions, int *celllist) {
 #endif
 
 #if defined(WITH_MPI) && defined(HAVE_METIS)
+
+/* qsort support. */
+struct indexval {
+  int index;
+  int count;
+};
+static int indexvalcmp(const void *p1, const void *p2) {
+  const struct indexval *iv1 = (const struct indexval *)p1;
+  const struct indexval *iv2 = (const struct indexval *)p2;
+  return iv2->count - iv1->count;
+}
+
 /**
  * @brief Partition the given space into a number of connected regions.
  *
@@ -382,14 +394,70 @@ static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew,
     if (regionid[k] < 0 || regionid[k] >= nregions)
       error("Got bad nodeID %" PRIDX " for cell %i.", regionid[k], k);
 
+  /* We want a solution in which the current regions of the space are
+   * preserved when possible, to avoid unneccesary particle movement.
+   * So create a 2d-array of cells counts that are common to all pairs
+   * of old and new ranks. Each element of the array has a cell count and
+   * an unique index so we can sort into decreasing counts. */
+  int indmax = nregions * nregions;
+  struct indexval *ivs = malloc(sizeof(struct indexval) * indmax);
+  bzero(ivs, sizeof(struct indexval) * indmax);
+  for (int k = 0; k < ncells; k++) {
+    int index = regionid[k] + nregions * s->cells_top[k].nodeID;
+    ivs[index].count++;
+    ivs[index].index = index;
+  }
+  qsort(ivs, indmax, sizeof(struct indexval), indexvalcmp);
+
+  /* Go through the ivs using the largest counts first, these are the
+   * regions with the most cells in common, old partition to new. */
+  int *oldmap = malloc(sizeof(int) * nregions);
+  int *newmap = malloc(sizeof(int) * nregions);
+  for (int k = 0; k < nregions; k++) {
+    oldmap[k] = -1;
+    newmap[k] = -1;
+  }
+  for (int k = 0; k < indmax; k++) {
+
+    /* Stop when all regions with common cells have been considered. */
+    if (ivs[k].count == 0) break;
+
+    /* Store old and new IDs, if not already used. */
+    int oldregion = ivs[k].index / nregions;
+    int newregion = ivs[k].index - oldregion * nregions;
+    if (newmap[newregion] == -1 && oldmap[oldregion] == -1) {
+      newmap[newregion] = oldregion;
+      oldmap[oldregion] = newregion;
+    }
+  }
+
+  /* Handle any regions that did not get selected by picking an unused rank
+   * from oldmap and assigning to newmap. */
+  int spare = 0;
+  for (int k = 0; k < nregions; k++) {
+    if (newmap[k] == -1) {
+      for (int j = spare; j < nregions; j++) {
+        if (oldmap[j] == -1) {
+          newmap[k] = j;
+          oldmap[j] = j;
+          spare = j;
+          break;
+        }
+      }
+    }
+  }
+
   /* Set the cell list to the region index. */
   for (int k = 0; k < ncells; k++) {
-    celllist[k] = regionid[k];
+    celllist[k] = newmap[regionid[k]];
   }
 
   /* Clean up. */
   if (weights_v != NULL) free(weights_v);
   if (weights_e != NULL) free(weights_e);
+  free(ivs);
+  free(oldmap);
+  free(newmap);
   free(xadj);
   free(adjncy);
   free(regionid);
diff --git a/src/profiler.c b/src/profiler.c
new file mode 100644
index 0000000000000000000000000000000000000000..ad8338eebfd130d4088f9fd9d4fcc9856c8cc731
--- /dev/null
+++ b/src/profiler.c
@@ -0,0 +1,234 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 James S. Willis (james.s.willis@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <string.h>
+
+/* This object's header. */
+#include "profiler.h"
+
+/* Local includes */
+#include "clocks.h"
+#include "hydro.h"
+#include "version.h"
+
+/**
+ * @brief Resets all timers.
+ *
+ * @param profiler #profiler object that holds file pointers and
+ * function timers.
+ */
+void profiler_reset_timers(struct profiler *profiler) {
+
+  profiler->collect_timesteps_time = 0;
+  profiler->drift_time = 0;
+  profiler->rebuild_time = 0;
+  profiler->reweight_time = 0;
+  profiler->clear_waits_time = 0;
+  profiler->re_wait_time = 0;
+  profiler->enqueue_time = 0;
+  profiler->stats_time = 0;
+  profiler->launch_time = 0;
+  profiler->space_rebuild_time = 0;
+  profiler->engine_maketasks_time = 0;
+  profiler->engine_marktasks_time = 0;
+  profiler->space_regrid_time = 0;
+  profiler->space_parts_sort_time = 0;
+  profiler->space_split_time = 0;
+  profiler->space_parts_get_cell_id_time = 0;
+  profiler->space_count_parts_time = 0;
+}
+
+/**
+ * @brief Opens an output file and populates the header.
+ *
+ * @param e #engine object to get various properties.
+ * @param fileName name of file to be written to.
+ * @param functionName name of function that is being timed.
+ * @param file (return) pointer used to open output file.
+ */
+void profiler_write_timing_info_header(const struct engine *e, char *fileName,
+                                       char *functionName, FILE **file) {
+
+  /* Create the file name in the format: "fileName_(no. of threads)" */
+  char fullFileName[200] = "";
+  sprintf(fullFileName + strlen(fullFileName), "%s_%d.txt", fileName,
+          e->nr_nodes * e->nr_threads);
+
+  /* Open the file and write the header. */
+  *file = fopen(fullFileName, "w");
+  fprintf(*file,
+          "# Host: %s\n# Branch: %s\n# Revision: %s\n# Compiler: %s, "
+          "Version: %s \n# "
+          "Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic "
+          "scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f "
+          "+/- %.2f\n# Eta: %f\n"
+          "# %6s %14s %14s %10s %10s %16s [%s]\n",
+          hostname(), functionName, git_revision(), compiler_name(),
+          compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION,
+          kernel_name, e->hydro_properties->target_neighbours,
+          e->hydro_properties->delta_neighbours,
+          e->hydro_properties->eta_neighbours, "Step", "Time", "Time-step",
+          "Updates", "g-Updates", "Wall-clock time", clocks_getunit());
+
+  fflush(*file);
+}
+
+/**
+ * @brief Writes the headers for all output files. Should be called once at the
+ * start of the simulation, it could be called in engine_init() for example.
+ *
+ * @param e #engine object to get various properties.
+ * @param profiler #profiler object that holds file pointers and
+ * function timers.
+ */
+void profiler_write_all_timing_info_headers(const struct engine *e,
+                                            struct profiler *profiler) {
+
+  profiler_write_timing_info_header(e, "enginecollecttimesteps",
+                                    "engine_collect_timesteps",
+                                    &profiler->file_engine_collect_timesteps);
+  profiler_write_timing_info_header(e, "enginedrift", "engine_drift",
+                                    &profiler->file_engine_drift);
+  profiler_write_timing_info_header(e, "enginerebuild", "engine_rebuild",
+                                    &profiler->file_engine_rebuild);
+  profiler_write_timing_info_header(e, "schedulerreweight",
+                                    "scheduler_reweight",
+                                    &profiler->file_scheduler_reweight);
+  profiler_write_timing_info_header(e, "schedulerclearwaits",
+                                    "scheduler_clear_waits",
+                                    &profiler->file_scheduler_clear_waits);
+  profiler_write_timing_info_header(e, "schedulerrewait", "scheduler_rewait",
+                                    &profiler->file_scheduler_re_wait);
+  profiler_write_timing_info_header(e, "schedulerenqueue", "scheduler_enqueue",
+                                    &profiler->file_scheduler_enqueue);
+  profiler_write_timing_info_header(e, "engineprintstats", "engine_print_stats",
+                                    &profiler->file_engine_stats);
+  profiler_write_timing_info_header(e, "enginelaunch", "engine_launch",
+                                    &profiler->file_engine_launch);
+  profiler_write_timing_info_header(e, "spacerebuild", "space_rebuild",
+                                    &profiler->file_space_rebuild);
+  profiler_write_timing_info_header(e, "enginemaketasks", "engine_maketasks",
+                                    &profiler->file_engine_maketasks);
+  profiler_write_timing_info_header(e, "enginemarktasks", "engine_marktasks",
+                                    &profiler->file_engine_marktasks);
+  profiler_write_timing_info_header(e, "spaceregrid", "space_regrid",
+                                    &profiler->file_space_regrid);
+  profiler_write_timing_info_header(e, "spacepartssort", "space_parts_sort",
+                                    &profiler->file_space_parts_sort);
+  profiler_write_timing_info_header(e, "spacesplit", "space_split",
+                                    &profiler->file_space_split);
+  profiler_write_timing_info_header(e, "spacegetcellid", "space_get_cell_id",
+                                    &profiler->file_space_parts_get_cell_id);
+  profiler_write_timing_info_header(e, "spacecountparts", "space_count_parts",
+                                    &profiler->file_space_count_parts);
+}
+
+/**
+ * @brief Writes timing info to the output file.
+ *
+ * @param e #engine object to get various properties.
+ * @param time Time in ticks to be written to the output file.
+ * @param file pointer used to open output file.
+ */
+void profiler_write_timing_info(const struct engine *e, ticks time,
+                                FILE *file) {
+
+  fprintf(file, "  %6d %14e %14e %10zu %10zu %21.3f\n", e->step, e->time,
+          e->timeStep, e->updates, e->g_updates, clocks_from_ticks(time));
+  fflush(file);
+}
+
+/**
+ * @brief Writes timing info to all output files. Should be called at the end of
+ * every time step, in engine_step() for example.
+ *
+ * @param e #engine object to get various properties.
+ * @param profiler #profiler object that holds file pointers and
+ * function timers.
+ */
+void profiler_write_all_timing_info(const struct engine *e,
+                                    struct profiler *profiler) {
+
+  profiler_write_timing_info(e, profiler->drift_time,
+                             profiler->file_engine_drift);
+  profiler_write_timing_info(e, profiler->rebuild_time,
+                             profiler->file_engine_rebuild);
+  profiler_write_timing_info(e, profiler->reweight_time,
+                             profiler->file_scheduler_reweight);
+  profiler_write_timing_info(e, profiler->clear_waits_time,
+                             profiler->file_scheduler_clear_waits);
+  profiler_write_timing_info(e, profiler->re_wait_time,
+                             profiler->file_scheduler_re_wait);
+  profiler_write_timing_info(e, profiler->enqueue_time,
+                             profiler->file_scheduler_enqueue);
+  profiler_write_timing_info(e, profiler->stats_time,
+                             profiler->file_engine_stats);
+  profiler_write_timing_info(e, profiler->launch_time,
+                             profiler->file_engine_launch);
+  profiler_write_timing_info(e, profiler->space_rebuild_time,
+                             profiler->file_space_rebuild);
+  profiler_write_timing_info(e, profiler->engine_maketasks_time,
+                             profiler->file_engine_maketasks);
+  profiler_write_timing_info(e, profiler->engine_marktasks_time,
+                             profiler->file_engine_marktasks);
+  profiler_write_timing_info(e, profiler->space_regrid_time,
+                             profiler->file_space_regrid);
+  profiler_write_timing_info(e, profiler->space_parts_sort_time,
+                             profiler->file_space_parts_sort);
+  profiler_write_timing_info(e, profiler->space_split_time,
+                             profiler->file_space_split);
+  profiler_write_timing_info(e, profiler->space_parts_get_cell_id_time,
+                             profiler->file_space_parts_get_cell_id);
+  profiler_write_timing_info(e, profiler->space_count_parts_time,
+                             profiler->file_space_count_parts);
+
+  /* Reset timers. */
+  profiler_reset_timers(profiler);
+}
+
+/**
+ * @brief Closes all output files, should be called at the end of the
+ * simulation.
+ *
+ * @param profiler #profiler object that holds file pointers and
+ * function timers.
+ */
+void profiler_close_files(struct profiler *profiler) {
+
+  fclose(profiler->file_engine_drift);
+  fclose(profiler->file_engine_rebuild);
+  fclose(profiler->file_scheduler_reweight);
+  fclose(profiler->file_scheduler_clear_waits);
+  fclose(profiler->file_scheduler_re_wait);
+  fclose(profiler->file_scheduler_enqueue);
+  fclose(profiler->file_engine_stats);
+  fclose(profiler->file_engine_launch);
+  fclose(profiler->file_space_rebuild);
+  fclose(profiler->file_engine_maketasks);
+  fclose(profiler->file_engine_marktasks);
+  fclose(profiler->file_space_regrid);
+  fclose(profiler->file_space_parts_sort);
+  fclose(profiler->file_space_split);
+  fclose(profiler->file_space_parts_get_cell_id);
+  fclose(profiler->file_space_count_parts);
+}
diff --git a/src/profiler.h b/src/profiler.h
new file mode 100644
index 0000000000000000000000000000000000000000..b00bc986ece8b78282b11ce317a6746ecba5a50f
--- /dev/null
+++ b/src/profiler.h
@@ -0,0 +1,78 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 James S. Willis (james.s.willis@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_PROFILER_H
+#define SWIFT_PROFILER_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local includes */
+#include "engine.h"
+
+/* Profiler that holds file pointers and time taken in functions. */
+struct profiler {
+
+  /* File pointers for timing info. */
+  FILE *file_engine_collect_timesteps;
+  FILE *file_engine_drift;
+  FILE *file_engine_rebuild;
+  FILE *file_scheduler_reweight;
+  FILE *file_scheduler_clear_waits;
+  FILE *file_scheduler_re_wait;
+  FILE *file_scheduler_enqueue;
+  FILE *file_engine_stats;
+  FILE *file_engine_launch;
+  FILE *file_space_rebuild;
+  FILE *file_engine_maketasks;
+  FILE *file_engine_marktasks;
+  FILE *file_space_regrid;
+  FILE *file_space_parts_sort;
+  FILE *file_space_split;
+  FILE *file_space_parts_get_cell_id;
+  FILE *file_space_count_parts;
+
+  /* Time taken in functions. */
+  ticks collect_timesteps_time;
+  ticks drift_time;
+  ticks rebuild_time;
+  ticks reweight_time;
+  ticks clear_waits_time;
+  ticks re_wait_time;
+  ticks enqueue_time;
+  ticks stats_time;
+  ticks launch_time;
+  ticks space_rebuild_time;
+  ticks engine_maketasks_time;
+  ticks engine_marktasks_time;
+  ticks space_regrid_time;
+  ticks space_parts_sort_time;
+  ticks space_split_time;
+  ticks space_parts_get_cell_id_time;
+  ticks space_count_parts_time;
+};
+
+/* Function prototypes. */
+void profiler_reset_timers(struct profiler *profiler);
+void profiler_write_all_timing_info_headers(const struct engine *e,
+                                            struct profiler *profiler);
+void profiler_write_all_timing_info(const struct engine *e,
+                                    struct profiler *profiler);
+void profiler_close_files(struct profiler *profiler);
+
+#endif /* SWIFT_PROFILER_H */
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 308764e755806a124f1cc234dfae253c57e0eda6..6bc8f2da808cc2d953482b90e9441b833384bc75 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -1989,6 +1989,10 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) {
   /* Should we even bother? */
   if (!cell_is_active(ci, r->e)) return;
 
+#ifdef SWIFT_DEBUG_CHECKS
+  cell_is_drifted(ci, r->e);
+#endif
+
   /* Recurse? */
   if (ci->split) {
 
diff --git a/src/space.c b/src/space.c
index 935677a9ebed97acfde8341ec1545ef4f33a56c0..44bafdd2bb2c7a930c1a6e6691b3ea0beca66683 100644
--- a/src/space.c
+++ b/src/space.c
@@ -173,18 +173,68 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
  *
  * @param s The #space.
  * @param c The #cell to recycle.
+ * @param rec_begin Pointer to the start of the list of cells to recycle.
+ * @param rec_end Pointer to the end of the list of cells to recycle.
  */
-void space_rebuild_recycle(struct space *s, struct cell *c) {
-
+void space_rebuild_recycle_rec(struct space *s, struct cell *c,
+                               struct cell **rec_begin, struct cell **rec_end) {
   if (c->split)
     for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) {
-        space_rebuild_recycle(s, c->progeny[k]);
-        space_recycle(s, c->progeny[k]);
+        space_rebuild_recycle_rec(s, c->progeny[k], rec_begin, rec_end);
+        c->progeny[k]->next = *rec_begin;
+        *rec_begin = c->progeny[k];
+        if (*rec_end == NULL) *rec_end = *rec_begin;
         c->progeny[k] = NULL;
       }
 }
 
+void space_rebuild_recycle_mapper(void *map_data, int num_elements,
+                                  void *extra_data) {
+
+  struct space *s = (struct space *)extra_data;
+  struct cell *cells = (struct cell *)map_data;
+
+  for (int k = 0; k < num_elements; k++) {
+    struct cell *c = &cells[k];
+    struct cell *rec_begin = NULL, *rec_end = NULL;
+    space_rebuild_recycle_rec(s, c, &rec_begin, &rec_end);
+    if (rec_begin != NULL) space_recycle_list(s, rec_begin, rec_end);
+    c->sorts = NULL;
+    c->nr_tasks = 0;
+    c->density = NULL;
+    c->gradient = NULL;
+    c->force = NULL;
+    c->grav = NULL;
+    c->dx_max = 0.0f;
+    c->sorted = 0;
+    c->count = 0;
+    c->gcount = 0;
+    c->init = NULL;
+    c->extra_ghost = NULL;
+    c->ghost = NULL;
+    c->kick = NULL;
+    c->cooling = NULL;
+    c->sourceterms = NULL;
+    c->super = c;
+    if (c->sort != NULL) {
+      free(c->sort);
+      c->sort = NULL;
+    }
+#if WITH_MPI
+    c->recv_xv = NULL;
+    c->recv_rho = NULL;
+    c->recv_gradient = NULL;
+    c->recv_ti = NULL;
+
+    c->send_xv = NULL;
+    c->send_rho = NULL;
+    c->send_gradient = NULL;
+    c->send_ti = NULL;
+#endif
+  }
+}
+
 /**
  * @brief Re-build the top-level cell grid.
  *
@@ -297,10 +347,8 @@ void space_regrid(struct space *s, int verbose) {
 
     /* Free the old cells, if they were allocated. */
     if (s->cells_top != NULL) {
-      for (int k = 0; k < s->nr_cells; k++) {
-        space_rebuild_recycle(s, &s->cells_top[k]);
-        if (s->cells_top[k].sort != NULL) free(s->cells_top[k].sort);
-      }
+      threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
+                     s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
       free(s->cells_top);
       s->maxdepth = 0;
     }
@@ -388,42 +436,12 @@ void space_regrid(struct space *s, int verbose) {
     // message( "rebuilding upper-level cells took %.3f %s." ,
     // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());
 
-  } /* re-build upper-level cells? */
-
+  }      /* re-build upper-level cells? */
   else { /* Otherwise, just clean up the cells. */
 
     /* Free the old cells, if they were allocated. */
-    for (int k = 0; k < s->nr_cells; k++) {
-      space_rebuild_recycle(s, &s->cells_top[k]);
-      s->cells_top[k].sorts = NULL;
-      s->cells_top[k].nr_tasks = 0;
-      s->cells_top[k].density = NULL;
-      s->cells_top[k].gradient = NULL;
-      s->cells_top[k].force = NULL;
-      s->cells_top[k].grav = NULL;
-      s->cells_top[k].dx_max = 0.0f;
-      s->cells_top[k].sorted = 0;
-      s->cells_top[k].count = 0;
-      s->cells_top[k].gcount = 0;
-      s->cells_top[k].init = NULL;
-      s->cells_top[k].extra_ghost = NULL;
-      s->cells_top[k].ghost = NULL;
-      s->cells_top[k].kick = NULL;
-      s->cells_top[k].cooling = NULL;
-      s->cells_top[k].sourceterms = NULL;
-      s->cells_top[k].super = &s->cells_top[k];
-#if WITH_MPI
-      s->cells_top[k].recv_xv = NULL;
-      s->cells_top[k].recv_rho = NULL;
-      s->cells_top[k].recv_gradient = NULL;
-      s->cells_top[k].recv_ti = NULL;
-
-      s->cells_top[k].send_xv = NULL;
-      s->cells_top[k].send_rho = NULL;
-      s->cells_top[k].send_gradient = NULL;
-      s->cells_top[k].send_ti = NULL;
-#endif
-    }
+    threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
+                   s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
     s->maxdepth = 0;
   }
 
@@ -472,7 +490,6 @@ void space_rebuild(struct space *s, int verbose) {
     space_gparts_get_cell_index(s, gind, cells_top, verbose);
 
 #ifdef WITH_MPI
-
   /* Move non-local parts to the end of the list. */
   const int local_nodeID = s->e->nodeID;
   for (size_t k = 0; k < nr_parts;) {
@@ -1606,24 +1623,22 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) {
 }
 
 /**
- * @brief Return a used cell to the buffer od unused sub-cells.
+ * @brief Return a used cell to the buffer of unused sub-cells.
  *
  * @param s The #space.
  * @param c The #cell.
  */
 void space_recycle(struct space *s, struct cell *c) {
 
-  /* Lock the space. */
-  lock_lock(&s->lock);
-
   /* Clear the cell. */
-  if (lock_destroy(&c->lock) != 0) error("Failed to destroy spinlock.");
+  if (lock_destroy(&c->lock) != 0 || lock_destroy(&c->glock) != 0)
+    error("Failed to destroy spinlock.");
 
   /* Clear this cell's sort arrays. */
   if (c->sort != NULL) free(c->sort);
 
-  /* Clear the cell data. */
-  bzero(c, sizeof(struct cell));
+  /* Lock the space. */
+  lock_lock(&s->lock);
 
   /* Hook this cell into the buffer. */
   c->next = s->cells_sub;
@@ -1633,6 +1648,47 @@ void space_recycle(struct space *s, struct cell *c) {
   /* Unlock the space. */
   lock_unlock_blind(&s->lock);
 }
+
+/**
+ * @brief Return a list of used cells to the buffer of unused sub-cells.
+ *
+ * @param s The #space.
+ * @param list_begin Pointer to the first #cell in the linked list of
+ *        cells joined by their @c next pointers.
+ * @param list_end Pointer to the last #cell in the linked list of
+ *        cells joined by their @c next pointers. It is assumed that this
+ *        cell's @c next pointer is @c NULL.
+ */
+void space_recycle_list(struct space *s, struct cell *list_begin,
+                        struct cell *list_end) {
+
+  int count = 0;
+
+  /* Clean up the list of cells. */
+  for (struct cell *c = list_begin; c != NULL; c = c->next) {
+    /* Clear the cell. */
+    if (lock_destroy(&c->lock) != 0 || lock_destroy(&c->glock) != 0)
+      error("Failed to destroy spinlock.");
+
+    /* Clear this cell's sort arrays. */
+    if (c->sort != NULL) free(c->sort);
+
+    /* Count this cell. */
+    count += 1;
+  }
+
+  /* Lock the space. */
+  lock_lock(&s->lock);
+
+  /* Hook this cell into the buffer. */
+  list_end->next = s->cells_sub;
+  s->cells_sub = list_begin;
+  s->tot_cells -= count;
+
+  /* Unlock the space. */
+  lock_unlock_blind(&s->lock);
+}
+
 /**
  * @brief Get a new empty (sub-)#cell.
  *
@@ -1652,9 +1708,6 @@ struct cell *space_getcell(struct space *s) {
                        space_cellallocchunk * sizeof(struct cell)) != 0)
       error("Failed to allocate more cells.");
 
-    /* Zero everything for good measure */
-    bzero(s->cells_sub, space_cellallocchunk * sizeof(struct cell));
-
     /* Constructed a linked list */
     for (int k = 0; k < space_cellallocchunk - 1; k++)
       s->cells_sub[k].next = &s->cells_sub[k + 1];
diff --git a/src/space.h b/src/space.h
index 53cf2d0c8fa548ae19aa7452abb38c3e3e028165..4aea2a07560865c8d8a474f069b370748e12e65e 100644
--- a/src/space.h
+++ b/src/space.h
@@ -171,6 +171,8 @@ void space_gparts_sort_mapper(void *map_data, int num_elements,
                               void *extra_data);
 void space_rebuild(struct space *s, int verbose);
 void space_recycle(struct space *s, struct cell *c);
+void space_recycle_list(struct space *s, struct cell *list_begin,
+                        struct cell *list_end);
 void space_split(struct space *s, struct cell *cells, int nr_cells,
                  int verbose);
 void space_split_mapper(void *map_data, int num_elements, void *extra_data);
diff --git a/src/swift.h b/src/swift.h
index 1e3e0f2cf88d7307d19f36d42c59f692f282b98c..6c2bcf1811c336b7d5dd2b838b4a4f518ba34ebb 100644
--- a/src/swift.h
+++ b/src/swift.h
@@ -45,6 +45,7 @@
 #include "partition.h"
 #include "physical_constants.h"
 #include "potential.h"
+#include "profiler.h"
 #include "queue.h"
 #include "runner.h"
 #include "scheduler.h"
diff --git a/src/threadpool.c b/src/threadpool.c
index 6bc887d96cb72804f0fbc8e2801a6522bf27f947..c11fd8121bb02f36fce1796d79a7eb55a38102c4 100644
--- a/src/threadpool.c
+++ b/src/threadpool.c
@@ -91,6 +91,10 @@ void threadpool_init(struct threadpool *tp, int num_threads) {
   tp->num_threads = num_threads;
   tp->num_threads_waiting = 0;
 
+  /* If there is only a single thread, do nothing more as of here as
+     we will just do work in the (blocked) calling thread. */
+  if (num_threads == 1) return;
+
   /* Init the threadpool mutexes. */
   if (pthread_mutex_init(&tp->thread_mutex, NULL) != 0)
     error("Failed to initialize mutexex.");
@@ -144,6 +148,12 @@ void threadpool_map(struct threadpool *tp, threadpool_map_function map_function,
                     void *map_data, size_t N, int stride, int chunk,
                     void *extra_data) {
 
+  /* If we just have a single thread, call the map function directly. */
+  if (tp->num_threads == 1) {
+    map_function(map_data, N, extra_data);
+    return;
+  }
+
   /* Set the map data and signal the threads. */
   pthread_mutex_lock(&tp->thread_mutex);
   tp->map_data_stride = stride;
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 136b7ad231947574a5459298e7fb85902028a3f4..1250835853f7521c069f3978f920cabd8f03540b 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -25,7 +25,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr
         testPair.sh testPairPerturbed.sh test27cells.sh test27cellsPerturbed.sh  \
         testParser.sh testSPHStep test125cells.sh testKernelGrav testFFT \
         testAdiabaticIndex testRiemannExact testRiemannTRRS testRiemannHLLC \
-        testMatrixInversion testThreadpool
+        testMatrixInversion testThreadpool testDump
 
 # List of test programs to compile
 check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
@@ -33,7 +33,7 @@ check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
                  testKernel testKernelGrav testFFT testInteractions testMaths \
                  testSymmetry testThreadpool \
                  testAdiabaticIndex testRiemannExact testRiemannTRRS \
-                 testRiemannHLLC testMatrixInversion
+                 testRiemannHLLC testMatrixInversion testDump
 
 # Sources for the individual programs
 testGreetings_SOURCES = testGreetings.c
@@ -78,6 +78,8 @@ testMatrixInversion_SOURCES = testMatrixInversion.c
 
 testThreadpool_SOURCES = testThreadpool.c
 
+testDump_SOURCES = testDump.c
+
 # Files necessary for distribution
 EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \
 	     test27cells.sh test27cellsPerturbed.sh testParser.sh \
diff --git a/tests/testDump.c b/tests/testDump.c
new file mode 100644
index 0000000000000000000000000000000000000000..5f46d30a4eeb3d936563a3983b89c3d46ecd4a06
--- /dev/null
+++ b/tests/testDump.c
@@ -0,0 +1,84 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/* This object's header. */
+#include "../src/dump.h"
+
+/* Local headers. */
+#include "../src/threadpool.h"
+
+void dump_mapper(void *map_data, int num_elements, void *extra_data) {
+  struct dump *d = (struct dump *)extra_data;
+  size_t offset;
+  char *out_string = dump_get(d, 7, &offset);
+  char out_buff[8];
+  snprintf(out_buff, 8, "%06zi\n", offset / 7);
+  memcpy(out_string, out_buff, 7);
+}
+
+int main(int argc, char *argv[]) {
+
+  /* Some constants. */
+  const int num_threads = 4;
+  const char *filename = "/tmp/dump_test.out";
+  const int num_runs = 20;
+  const int chunk_size = 1000;
+
+  /* Prepare a threadpool to write to the dump. */
+  struct threadpool t;
+  threadpool_init(&t, num_threads);
+
+  /* Prepare a dump. */
+  struct dump d;
+  dump_init(&d, filename, 1024);
+
+  /* Dump numbers in chunks. */
+  for (int run = 0; run < num_runs; run++) {
+
+    /* Ensure capacity. */
+    dump_ensure(&d, 7 * chunk_size);
+
+    /* Dump a few numbers. */
+    printf("dumping %i chunks...\n", chunk_size);
+    fflush(stdout);
+    threadpool_map(&t, dump_mapper, NULL, chunk_size, 0, 1, &d);
+  }
+
+  /* Sync the file, not necessary before dump_close, but just to test this. */
+  dump_sync(&d);
+
+  /* Finalize the dump. */
+  dump_close(&d);
+  
+  /* Return a happy number. */
+  return 0;
+}