diff --git a/examples/Makefile.am b/examples/Makefile.am
index 54fab6edb5d1dc26def643cee04dccd8fd8f6429..6abaf5a542e58568d5575bc80eb054fe85d01d30 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -54,10 +54,10 @@ swift_fixdt_LDADD =  ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS)
 
 # Sources for swift_mpi, do we need an affinity policy for MPI?
 swift_mpi_SOURCES = main.c
-swift_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_keep"
+swift_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)"
 swift_mpi_LDADD =  ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS)
 
 swift_fixdt_mpi_SOURCES = main.c
-swift_fixdt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_fixdt | engine_policy_keep"
+swift_fixdt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_fixdt | engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)"
 swift_fixdt_mpi_LDADD =  ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS)
 
diff --git a/examples/main.c b/examples/main.c
index 1920448b4584f2ef611ab3c699d0d52cc0080efc..3328220a5e8e1e6c90234ad4a3fb4ef5b4e46d6d 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -119,17 +119,19 @@ int main(int argc, char *argv[]) {
   if (myrank == 0) greetings();
 
 #if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
-  /* Ensure the NUMA node on which we initialise (first touch) everything
-   * doesn't change before engine_init allocates NUMA-local workers. Otherwise,
-   * we may be scheduled elsewhere between the two times.
-   */
-  cpu_set_t affinity;
-  CPU_ZERO(&affinity);
-  CPU_SET(sched_getcpu(), &affinity);
-  if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity) != 0) {
-    message("failed to set entry thread's affinity");
-  } else {
-    message("set entry thread's affinity");
+  if ((ENGINE_POLICY) & engine_policy_setaffinity) {
+    /* Ensure the NUMA node on which we initialise (first touch) everything
+     * doesn't change before engine_init allocates NUMA-local workers. Otherwise,
+     * we may be scheduled elsewhere between the two times.
+     */
+    cpu_set_t affinity;
+    CPU_ZERO(&affinity);
+    CPU_SET(sched_getcpu(), &affinity);
+    if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity) != 0) {
+      message("failed to set entry thread's affinity");
+    } else {
+      message("set entry thread's affinity");
+    }
   }
 #endif
 
@@ -137,7 +139,7 @@ int main(int argc, char *argv[]) {
   bzero(&s, sizeof(struct space));
 
   /* Parse the options */
-  while ((c = getopt(argc, argv, "a:c:d:e:f:g:m:q:s:t:w:y:z:")) != -1)
+  while ((c = getopt(argc, argv, "a:c:d:e:f:g:m:oq:s:t:w:y:z:")) != -1)
     switch (c) {
       case 'a':
         if (sscanf(optarg, "%lf", &scaling) != 1)
diff --git a/src/Makefile.am b/src/Makefile.am
index cd100392ac7fd1712c04e64681edf6d0a2de517e..53741ca5c0b1658e96d8dfec0df423d3ff321146 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -46,7 +46,11 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
 # Include files for distribution, not installation.
 noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel.h vector.h \
 		 runner_doiact.h runner_doiact_grav.h units.h intrinsics.h \
-		 hydro.h hydro_io.h gravity.h hydro/*/*.h gravity/*/*.h
+		 hydro.h hydro_io.h gravity.h \
+		 hydro/Default/hydro.h hydro/Default/hydro_iact.h hydro/Default/hydro_io.h \
+                 hydro/Default/hydro_debug.h hydro/Default/hydro_part.h \
+		 hydro/Gadget2/hydro.h hydro/Gadget2/hydro_iact.h hydro/Gadget2/hydro_io.h \
+                 hydro/Gadget2/hydro_debug.h hydro/Gadget2/hydro_part.h
 
 # Sources and flags for regular library
 libswiftsim_la_SOURCES = $(AM_SOURCES)
diff --git a/src/cell.c b/src/cell.c
index cdb41872fa5fbe41eeb5c7b376ed901620dda5e4..f7fe61ba30c7d04362003c495702c9f23a6efbd3 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -162,8 +162,8 @@ int cell_pack(struct cell *c, struct pcell *pc) {
 
   /* Start by packing the data of the current cell. */
   pc->h_max = c->h_max;
-  c->t_end_min = pc->t_end_min;
-  c->t_end_max = pc->t_end_max;
+  pc->t_end_min = c->t_end_min;
+  pc->t_end_max = c->t_end_max;
   pc->count = c->count;
   c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag;
 
diff --git a/src/debug.c b/src/debug.c
index 3184c958fe70e9152843d466ec71ac1f2d420227..3efca024d10030c3bf4a9c0f9861ec0dc2f07310 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -22,7 +22,7 @@
 
 #include <stdio.h>
 
-#include "../config.h"
+#include "config.h"
 #include "const.h"
 #include "part.h"
 #include "debug.h"
diff --git a/src/engine.c b/src/engine.c
index 9a47564c8fced8de48ee4d78d7e9bb4b12cd8dc8..fbadc99322c29241d80635353abf80972660c104 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1706,15 +1706,16 @@ void engine_init_particles(struct engine *e) {
 
   struct space *s = e->s;
 
-  engine_prepare(e);
-
-  engine_marktasks(e);
+  message("Initialising particles");
 
   /* Make sure all particles are ready to go */
   /* i.e. clean-up any stupid state in the ICs */
-  message("Initialising particles");
   space_map_cells_pre(s, 1, cell_init_parts, NULL);
 
+  engine_prepare(e);
+
+  engine_marktasks(e);
+
   // printParticle(e->s->parts, 1000, e->s->nr_parts);
   // printParticle(e->s->parts, 515050, e->s->nr_parts);
 
@@ -1848,9 +1849,11 @@ if ( e->nodeID == 0 )
 
   TIMER_TOC2(timer_step);
 
-  printf("%d %f %f %d %.3f\n", e->step, e->time, e->timeStep, updates,
-         ((double)timers[timer_count - 1]) / CPU_TPS * 1000);
-  fflush(stdout);
+  if (e->nodeID == 0) {
+    printf("%d %f %f %d %.3f\n", e->step, e->time, e->timeStep, updates,
+           ((double)timers[timer_count - 1]) / CPU_TPS * 1000);
+    fflush(stdout);
+  }
 
   /* printParticle(e->s->parts, e->s->xparts,1000, e->s->nr_parts); */
   /* printParticle(e->s->parts, e->s->xparts,515050, e->s->nr_parts); */
diff --git a/src/serial_io.c b/src/serial_io.c
index 78b0ab58989a0ddf164156291f5c2c2c696c15dc..ebb7eee68882766c5f550cda7feee1d9b68ab5cc 100644
--- a/src/serial_io.c
+++ b/src/serial_io.c
@@ -241,9 +241,12 @@ void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name,
  *
  * Calls #error() if an error occurs.
  */
-void writeArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N,
-                       int dim, long long N_total, long long offset,
-                       char* part_c) {
+void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile,  char* name,
+		       enum DATA_TYPE type, int N, int dim, long long N_total,
+		       int mpi_rank, long long offset, char* part_c,
+		       struct UnitSystem* us,
+                       enum UnitConversionFactor convFactor) {
+
   hid_t h_data = 0, h_err = 0, h_memspace = 0, h_filespace = 0;
   hsize_t shape[2], offsets[2];
   void* temp = 0;
@@ -255,6 +258,12 @@ void writeArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N,
 
   /* message("Writing '%s' array...", name); */
 
+  /* Prepare the arrays in the file */
+  if(mpi_rank == 0)
+    prepareArray(grp, fileName, xmfFile, name, type, N_total, dim,
+		 us, convFactor);
+
+  
   /* Allocate temporary buffer */
   temp = malloc(N * dim * sizeOfType(type));
   if (temp == NULL) error("Unable to allocate memory for temporary buffer");
@@ -350,8 +359,9 @@ void writeArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N,
  */
 #define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \
                    mpi_rank, offset, field, us, convFactor)                   \
-  writeArrayBackEnd(grp, name, type, N, dim, N_total, offset,                 \
-                    (char*)(&(part[0]).field))
+  writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, N_total, \
+		    mpi_rank, offset, (char*)(&(part[0]).field),	 \
+		    us, convFactor)
 
 /* Import the right hydro definition */
 #include "hydro_io.h"
@@ -602,27 +612,6 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank,
     h_grp = H5Gcreate1(h_file, "/PartType0", 0);
     if (h_grp < 0) error("Error while creating particle group.\n");
 
-    /* Prepare the arrays in the file */
-    prepareArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N_total, 3,
-                 us, UNIT_CONV_LENGTH);
-    prepareArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N_total, 3, us,
-                 UNIT_CONV_SPEED);
-    prepareArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N_total, 1, us,
-                 UNIT_CONV_MASS);
-    prepareArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N_total, 1,
-                 us, UNIT_CONV_LENGTH);
-    prepareArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N_total, 1,
-                 us, UNIT_CONV_ENERGY_PER_UNIT_MASS);
-    prepareArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N_total, 1,
-                 us, UNIT_CONV_NO_UNITS);
-    /* prepareArray(h_grp, fileName, xmfFile, "TimeStep", FLOAT, N_total, 1, us,
-     */
-    /*              UNIT_CONV_TIME); */
-    prepareArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N_total, 3,
-                 us, UNIT_CONV_ACCELERATION);
-    prepareArray(h_grp, fileName, xmfFile, "Density", FLOAT, N_total, 1, us,
-                 UNIT_CONV_DENSITY);
-
     /* Close particle group */
     H5Gclose(h_grp);
 
@@ -650,7 +639,7 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank,
         error("Error while opening particle group on rank %d.\n", mpi_rank);
 
       /* Write particle fields from the particle structure */
-      hydro_write_particles(h_grp, fileName, xmfFile, N, N_total, 0, offset,
+      hydro_write_particles(h_grp, fileName, xmfFile, N, N_total, mpi_rank, offset,
                             parts, us);
 
       /* Close particle group */