diff --git a/src/cache.h b/src/cache.h
index 3eb1e194dd4232319ac1d4a4323ca8099f044063..c939da28589c1421e0e4241dca124a8320d5c87b 100644
--- a/src/cache.h
+++ b/src/cache.h
@@ -349,10 +349,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
     y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
     z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
     h[i] = parts_i[idx].h;
-    m[i] = parts_i[idx].mass;
     vx[i] = parts_i[idx].v[0];
     vy[i] = parts_i[idx].v[1];
     vz[i] = parts_i[idx].v[2];
+#ifdef GADGET2_SPH
+    m[i] = parts_i[idx].mass;
+#endif
   }
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -431,10 +433,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
     yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
     zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
     hj[i] = parts_j[idx].h;
-    mj[i] = parts_j[idx].mass;
     vxj[i] = parts_j[idx].v[0];
     vyj[i] = parts_j[idx].v[1];
     vzj[i] = parts_j[idx].v[2];
+#ifdef GADGET2_SPH
+    mj[i] = parts_j[idx].mass;
+#endif
   }
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -572,15 +576,17 @@ cache_read_two_partial_cells_sorted_force(
     y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
     z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
     h[i] = parts_i[idx].h;
-    m[i] = parts_i[idx].mass;
     vx[i] = parts_i[idx].v[0];
     vy[i] = parts_i[idx].v[1];
     vz[i] = parts_i[idx].v[2];
+#ifdef GADGET2_SPH
+    m[i] = parts_i[idx].mass;
     rho[i] = parts_i[idx].rho;
     grad_h[i] = parts_i[idx].force.f;
     pOrho2[i] = parts_i[idx].force.P_over_rho2;
     balsara[i] = parts_i[idx].force.balsara;
     soundspeed[i] = parts_i[idx].force.soundspeed;
+#endif
   }
 
   /* Pad cache with fake particles that exist outside the cell so will not
@@ -635,15 +641,17 @@ cache_read_two_partial_cells_sorted_force(
     yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
     zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
     hj[i] = parts_j[idx].h;
-    mj[i] = parts_j[idx].mass;
     vxj[i] = parts_j[idx].v[0];
     vyj[i] = parts_j[idx].v[1];
     vzj[i] = parts_j[idx].v[2];
+#ifdef GADGET2_SPH
+    mj[i] = parts_j[idx].mass;
     rhoj[i] = parts_j[idx].rho;
     grad_hj[i] = parts_j[idx].force.f;
     pOrho2j[i] = parts_j[idx].force.P_over_rho2;
     balsaraj[i] = parts_j[idx].force.balsara;
     soundspeedj[i] = parts_j[idx].force.soundspeed;
+#endif
   }
 
   /* Pad cache with fake particles that exist outside the cell so will not
diff --git a/src/common_io.c b/src/common_io.c
index 917f2a63755f88c300ed7ff654bfe6e8cf3c464f..47e04b868b9d610983b977bd6455c1c78d70bb41 100644
--- a/src/common_io.c
+++ b/src/common_io.c
@@ -40,10 +40,13 @@
 #include "common_io.h"
 
 /* Local includes. */
+#include "engine.h"
 #include "error.h"
 #include "hydro.h"
+#include "io_properties.h"
 #include "kernel_hydro.h"
 #include "part.h"
+#include "threadpool.h"
 #include "units.h"
 #include "version.h"
 
@@ -266,7 +269,6 @@ void io_write_attribute_f(hid_t grp, const char* name, float data) {
  * @param name The name of the attribute
  * @param data The value to write
  */
-
 void io_write_attribute_i(hid_t grp, const char* name, int data) {
   io_write_attribute(grp, name, INT, &data, 1);
 }
@@ -295,16 +297,19 @@ void io_write_attribute_s(hid_t grp, const char* name, const char* str) {
  * @brief Reads the Unit System from an IC file.
  * @param h_file The (opened) HDF5 file from which to read.
  * @param us The unit_system to fill.
+ * @param mpi_rank The MPI rank we are on.
  *
  * If the 'Units' group does not exist in the ICs, cgs units will be assumed
  */
-void io_read_unit_system(hid_t h_file, struct unit_system* us) {
+void io_read_unit_system(hid_t h_file, struct unit_system* us, int mpi_rank) {
 
   /* First check if it exists as this is *not* required. */
   const htri_t exists = H5Lexists(h_file, "/Units", H5P_DEFAULT);
 
   if (exists == 0) {
-    message("'Units' group not found in ICs. Assuming CGS unit system.");
+
+    if (mpi_rank == 0)
+      message("'Units' group not found in ICs. Assuming CGS unit system.");
 
     /* Default to CGS */
     us->UnitMass_in_cgs = 1.;
@@ -319,7 +324,7 @@ void io_read_unit_system(hid_t h_file, struct unit_system* us) {
           exists);
   }
 
-  message("Reading IC units from ICs.");
+  if (mpi_rank == 0) message("Reading IC units from ICs.");
   hid_t h_grp = H5Gopen(h_file, "/Units", H5P_DEFAULT);
 
   /* Ok, Read the damn thing */
@@ -401,6 +406,208 @@ void io_write_code_description(hid_t h_file) {
   H5Gclose(h_grpcode);
 }
 
+/**
+ * @brief Mapper function to copy #part or #gpart fields into a buffer.
+ */
+void io_copy_mapper(void* restrict temp, int N, void* restrict extra_data) {
+
+  const struct io_props props = *((const struct io_props*)extra_data);
+  const size_t typeSize = io_sizeof_type(props.type);
+  const size_t copySize = typeSize * props.dimension;
+
+  /* How far are we with this chunk? */
+  char* restrict temp_c = temp;
+  const ptrdiff_t delta = (temp_c - props.start_temp_c) / copySize;
+
+  for (int k = 0; k < N; k++) {
+    memcpy(&temp_c[k * copySize], props.field + (delta + k) * props.partSize,
+           copySize);
+  }
+}
+
+/**
+ * @brief Mapper function to copy #part into a buffer of floats using a
+ * conversion function.
+ */
+void io_convert_part_f_mapper(void* restrict temp, int N,
+                              void* restrict extra_data) {
+
+  const struct io_props props = *((const struct io_props*)extra_data);
+  const struct part* restrict parts = props.parts;
+  const struct engine* e = props.e;
+  const size_t dim = props.dimension;
+
+  /* How far are we with this chunk? */
+  float* restrict temp_f = temp;
+  const ptrdiff_t delta = (temp_f - props.start_temp_f) / dim;
+
+  for (int i = 0; i < N; i++)
+    props.convert_part_f(e, parts + delta + i, &temp_f[i * dim]);
+}
+
+/**
+ * @brief Mapper function to copy #part into a buffer of doubles using a
+ * conversion function.
+ */
+void io_convert_part_d_mapper(void* restrict temp, int N,
+                              void* restrict extra_data) {
+
+  const struct io_props props = *((const struct io_props*)extra_data);
+  const struct part* restrict parts = props.parts;
+  const struct engine* e = props.e;
+  const size_t dim = props.dimension;
+
+  /* How far are we with this chunk? */
+  double* restrict temp_d = temp;
+  const ptrdiff_t delta = (temp_d - props.start_temp_d) / dim;
+
+  for (int i = 0; i < N; i++)
+    props.convert_part_d(e, parts + delta + i, &temp_d[i * dim]);
+}
+
+/**
+ * @brief Mapper function to copy #gpart into a buffer of floats using a
+ * conversion function.
+ */
+void io_convert_gpart_f_mapper(void* restrict temp, int N,
+                               void* restrict extra_data) {
+
+  const struct io_props props = *((const struct io_props*)extra_data);
+  const struct gpart* restrict gparts = props.gparts;
+  const struct engine* e = props.e;
+  const size_t dim = props.dimension;
+
+  /* How far are we with this chunk? */
+  float* restrict temp_f = temp;
+  const ptrdiff_t delta = (temp_f - props.start_temp_f) / dim;
+
+  for (int i = 0; i < N; i++)
+    props.convert_gpart_f(e, gparts + delta + i, &temp_f[i * dim]);
+}
+
+/**
+ * @brief Mapper function to copy #gpart into a buffer of doubles using a
+ * conversion function.
+ */
+void io_convert_gpart_d_mapper(void* restrict temp, int N,
+                               void* restrict extra_data) {
+
+  const struct io_props props = *((const struct io_props*)extra_data);
+  const struct gpart* restrict gparts = props.gparts;
+  const struct engine* e = props.e;
+  const size_t dim = props.dimension;
+
+  /* How far are we with this chunk? */
+  double* restrict temp_d = temp;
+  const ptrdiff_t delta = (temp_d - props.start_temp_d) / dim;
+
+  for (int i = 0; i < N; i++)
+    props.convert_gpart_d(e, gparts + delta + i, &temp_d[i * dim]);
+}
+
+/**
+ * @brief Copy the particle data into a temporary buffer ready for i/o.
+ *
+ * @param temp The buffer to be filled. Must be allocated and aligned properly.
+ * @param e The #engine.
+ * @param props The #io_props corresponding to the particle field we are
+ * copying.
+ * @param N The number of particles to copy
+ * @param internal_units The system of units used internally.
+ * @param snapshot_units The system of units used for the snapshots.
+ */
+void io_copy_temp_buffer(void* temp, const struct engine* e,
+                         struct io_props props, size_t N,
+                         const struct unit_system* internal_units,
+                         const struct unit_system* snapshot_units) {
+
+  const size_t typeSize = io_sizeof_type(props.type);
+  const size_t copySize = typeSize * props.dimension;
+  const size_t num_elements = N * props.dimension;
+
+  /* Copy particle data to temporary buffer */
+  if (props.conversion == 0) { /* No conversion */
+
+    /* Prepare some parameters */
+    char* temp_c = temp;
+    props.start_temp_c = temp_c;
+
+    /* Copy the whole thing into a buffer */
+    threadpool_map((struct threadpool*)&e->threadpool, io_copy_mapper, temp_c,
+                   N, copySize, 0, (void*)&props);
+
+  } else { /* Converting particle to data */
+
+    if (props.convert_part_f != NULL) {
+
+      /* Prepare some parameters */
+      float* temp_f = temp;
+      props.start_temp_f = temp;
+      props.e = e;
+
+      /* Copy the whole thing into a buffer */
+      threadpool_map((struct threadpool*)&e->threadpool,
+                     io_convert_part_f_mapper, temp_f, N, copySize, 0,
+                     (void*)&props);
+
+    } else if (props.convert_part_d != NULL) {
+
+      /* Prepare some parameters */
+      double* temp_d = temp;
+      props.start_temp_d = temp;
+      props.e = e;
+
+      /* Copy the whole thing into a buffer */
+      threadpool_map((struct threadpool*)&e->threadpool,
+                     io_convert_part_d_mapper, temp_d, N, copySize, 0,
+                     (void*)&props);
+
+    } else if (props.convert_gpart_f != NULL) {
+
+      /* Prepare some parameters */
+      float* temp_f = temp;
+      props.start_temp_f = temp;
+      props.e = e;
+
+      /* Copy the whole thing into a buffer */
+      threadpool_map((struct threadpool*)&e->threadpool,
+                     io_convert_gpart_f_mapper, temp_f, N, copySize, 0,
+                     (void*)&props);
+
+    } else if (props.convert_gpart_d != NULL) {
+
+      /* Prepare some parameters */
+      double* temp_d = temp;
+      props.start_temp_d = temp;
+      props.e = e;
+
+      /* Copy the whole thing into a buffer */
+      threadpool_map((struct threadpool*)&e->threadpool,
+                     io_convert_gpart_d_mapper, temp_d, N, copySize, 0,
+                     (void*)&props);
+
+    } else {
+      error("Missing conversion function");
+    }
+  }
+
+  /* Unit conversion if necessary */
+  const double factor =
+      units_conversion_factor(internal_units, snapshot_units, props.units);
+  if (factor != 1.) {
+
+    /* message("Converting ! factor=%e", factor); */
+
+    if (io_is_double_precision(props.type)) {
+      swift_declare_aligned_ptr(double, temp_d, temp, IO_BUFFER_ALIGNMENT);
+      for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor;
+    } else {
+      swift_declare_aligned_ptr(float, temp_f, temp, IO_BUFFER_ALIGNMENT);
+      for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= factor;
+    }
+  }
+}
+
 #endif /* HAVE_HDF5 */
 
 /* ------------------------------------------------------------------------------------------------
diff --git a/src/common_io.h b/src/common_io.h
index 577d1664db20196c8d0c48ef5b5e4e960b0e195e..2e6d4247a5e2d0f558f6bfa601fd6c57d80e05d9 100644
--- a/src/common_io.h
+++ b/src/common_io.h
@@ -30,6 +30,11 @@
 #define FIELD_BUFFER_SIZE 200
 #define PARTICLE_GROUP_BUFFER_SIZE 50
 #define FILENAME_BUFFER_SIZE 150
+#define IO_BUFFER_ALIGNMENT 1024
+
+/* Avoid cyclic inclusion problems */
+struct io_props;
+struct engine;
 
 #if defined(HAVE_HDF5)
 
@@ -68,10 +73,15 @@ void io_write_attribute_s(hid_t grp, const char* name, const char* str);
 
 void io_write_code_description(hid_t h_file);
 
-void io_read_unit_system(hid_t h_file, struct unit_system* us);
+void io_read_unit_system(hid_t h_file, struct unit_system* us, int mpi_rank);
 void io_write_unit_system(hid_t h_grp, const struct unit_system* us,
                           const char* groupName);
 
+void io_copy_temp_buffer(void* temp, const struct engine* e,
+                         const struct io_props props, size_t N,
+                         const struct unit_system* internal_units,
+                         const struct unit_system* snapshot_units);
+
 #endif /* defined HDF5 */
 
 void io_collect_dm_gparts(const struct gpart* const gparts, size_t Ntot,
diff --git a/src/engine.c b/src/engine.c
index 76a2381312b1e0db27f389ce2441bc57a8398aac..009779111703511ee2c1617013fb21b8e3f3553c 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -2624,9 +2624,8 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
     /* Self-interaction? */
     else if (t->type == task_type_self && t->subtype == task_subtype_density) {
 
-      /* Make all density tasks depend on the drift and the sorts. */
+      /* Make the self-density tasks depend on the drift only. */
       scheduler_addunlock(sched, t->ci->super->drift_part, t);
-      scheduler_addunlock(sched, t->ci->super->sorts, t);
 
 #ifdef EXTRA_HYDRO_LOOP
       /* Start by constructing the task for the second  and third hydro loop. */
@@ -5004,7 +5003,7 @@ void engine_init(struct engine *e, struct space *s,
   if (with_aff) engine_unpin();
 #endif
 
-  if (with_aff) {
+  if (with_aff && nodeID == 0) {
 #ifdef HAVE_SETAFFINITY
 #ifdef WITH_MPI
     printf("[%04i] %s engine_init: cpu map is [ ", nodeID,
diff --git a/src/gravity/Default/gravity_io.h b/src/gravity/Default/gravity_io.h
index 26eed8a0a680e13130dd257d8b31e6cd00392f6d..671ccc24e95f322d71a734c0a39eface2a93bcec 100644
--- a/src/gravity/Default/gravity_io.h
+++ b/src/gravity/Default/gravity_io.h
@@ -21,6 +21,20 @@
 
 #include "io_properties.h"
 
+void convert_gpart_pos(const struct engine* e, const struct gpart* gp,
+                       double* ret) {
+
+  if (e->s->periodic) {
+    ret[0] = box_wrap(gp->x[0], 0.0, e->s->dim[0]);
+    ret[1] = box_wrap(gp->x[1], 0.0, e->s->dim[1]);
+    ret[2] = box_wrap(gp->x[2], 0.0, e->s->dim[2]);
+  } else {
+    ret[0] = gp->x[0];
+    ret[1] = gp->x[1];
+    ret[2] = gp->x[2];
+  }
+}
+
 /**
  * @brief Specifies which g-particle fields to read from a dataset
  *
@@ -52,15 +66,15 @@ void darkmatter_read_particles(struct gpart* gparts, struct io_props* list,
  * @param list The list of i/o properties to write.
  * @param num_fields The number of i/o fields to write.
  */
-void darkmatter_write_particles(struct gpart* gparts, struct io_props* list,
-                                int* num_fields) {
+void darkmatter_write_particles(const struct gpart* gparts,
+                                struct io_props* list, int* num_fields) {
 
   /* Say how much we want to read */
   *num_fields = 5;
 
   /* List what we want to read */
-  list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH,
-                                 gparts, x);
+  list[0] = io_make_output_field_convert_gpart(
+      "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, gparts, convert_gpart_pos);
   list[1] = io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED,
                                  gparts, v_full);
   list[2] =
diff --git a/src/hydro/Default/hydro_io.h b/src/hydro/Default/hydro_io.h
index 62e94b05ffea259aac99d4b3714e0eea7e7c955f..8c12f015bd69dc917a80c564422676a85ffcfb3b 100644
--- a/src/hydro/Default/hydro_io.h
+++ b/src/hydro/Default/hydro_io.h
@@ -53,6 +53,20 @@ void hydro_read_particles(struct part* parts, struct io_props* list,
                                 UNIT_CONV_DENSITY, parts, rho);
 }
 
+void convert_part_pos(const struct engine* e, const struct part* p,
+                      double* ret) {
+
+  if (e->s->periodic) {
+    ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]);
+    ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]);
+    ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]);
+  } else {
+    ret[0] = p->x[0];
+    ret[1] = p->x[1];
+    ret[2] = p->x[2];
+  }
+}
+
 /**
  * @brief Specifies which particle fields to write to a dataset
  *
@@ -66,8 +80,8 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
   *num_fields = 8;
 
   /* List what we want to write */
-  list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH,
-                                 parts, x);
+  list[0] = io_make_output_field_convert_part(
+      "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos);
   list[1] =
       io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, v);
   list[2] =
diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h
index 8b14c45614e4c09c48056c9398ed4bfe3ed90e38..5d2f76ab298cdd6199d95abe1e4435321302689e 100644
--- a/src/hydro/Default/hydro_part.h
+++ b/src/hydro/Default/hydro_part.h
@@ -125,6 +125,16 @@ struct part {
   /* Particle time-bin */
   timebin_t time_bin;
 
+#ifdef SWIFT_DEBUG_CHECKS
+
+  /* Time of the last drift */
+  integertime_t ti_drift;
+
+  /* Time of the last kick */
+  integertime_t ti_kick;
+
+#endif
+
 } SWIFT_STRUCT_ALIGN;
 
 #endif /* SWIFT_DEFAULT_HYDRO_PART_H */
diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h
index 3e46b2351eb6a3871946dd9c69c8d108b10da0df..d89fe5c277b0273e53304d77a241e14d2c71fc5d 100644
--- a/src/hydro/Gadget2/hydro_io.h
+++ b/src/hydro/Gadget2/hydro_io.h
@@ -55,14 +55,28 @@ void hydro_read_particles(struct part* parts, struct io_props* list,
                                 UNIT_CONV_DENSITY, parts, rho);
 }
 
-float convert_u(struct engine* e, struct part* p) {
+void convert_u(const struct engine* e, const struct part* p, float* ret) {
 
-  return hydro_get_internal_energy(p);
+  ret[0] = hydro_get_internal_energy(p);
 }
 
-float convert_P(struct engine* e, struct part* p) {
+void convert_P(const struct engine* e, const struct part* p, float* ret) {
 
-  return hydro_get_pressure(p);
+  ret[0] = hydro_get_pressure(p);
+}
+
+void convert_part_pos(const struct engine* e, const struct part* p,
+                      double* ret) {
+
+  if (e->s->periodic) {
+    ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]);
+    ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]);
+    ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]);
+  } else {
+    ret[0] = p->x[0];
+    ret[1] = p->x[1];
+    ret[2] = p->x[2];
+  }
 }
 
 /**
@@ -72,14 +86,14 @@ float convert_P(struct engine* e, struct part* p) {
  * @param list The list of i/o properties to write.
  * @param num_fields The number of i/o fields to write.
  */
-void hydro_write_particles(struct part* parts, struct io_props* list,
+void hydro_write_particles(const struct part* parts, struct io_props* list,
                            int* num_fields) {
 
   *num_fields = 10;
 
   /* List what we want to write */
-  list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH,
-                                 parts, x);
+  list[0] = io_make_output_field_convert_part(
+      "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos);
   list[1] =
       io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, v);
   list[2] =
@@ -96,9 +110,9 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
       io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts, rho);
   list[8] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1,
                                               UNIT_CONV_ENERGY_PER_UNIT_MASS,
-                                              parts, rho, convert_u);
+                                              parts, convert_u);
   list[9] = io_make_output_field_convert_part(
-      "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, rho, convert_P);
+      "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, convert_P);
 }
 
 /**
diff --git a/src/hydro/Gizmo/hydro_io.h b/src/hydro/Gizmo/hydro_io.h
index ea0a083f37078859b236ca0799268066f6fa0385..458ea6151e4b00b9ccf85ad6eff4ccf58277ff7d 100644
--- a/src/hydro/Gizmo/hydro_io.h
+++ b/src/hydro/Gizmo/hydro_io.h
@@ -69,10 +69,11 @@ void hydro_read_particles(struct part* parts, struct io_props* list,
  *
  * @param e #engine.
  * @param p Particle.
- * @return Internal energy of the particle
+ * @param ret (return) Internal energy of the particle
  */
-float convert_u(struct engine* e, struct part* p) {
-  return hydro_get_internal_energy(p);
+void convert_u(const struct engine* e, const struct part* p, float* ret) {
+
+  ret[0] = hydro_get_internal_energy(p);
 }
 
 /**
@@ -80,10 +81,10 @@ float convert_u(struct engine* e, struct part* p) {
  *
  * @param e #engine.
  * @param p Particle.
- * @return Entropic function of the particle
+ * @param ret (return) Entropic function of the particle
  */
-float convert_A(struct engine* e, struct part* p) {
-  return hydro_get_entropy(p);
+void convert_A(const struct engine* e, const struct part* p, float* ret) {
+  ret[0] = hydro_get_entropy(p);
 }
 
 /**
@@ -93,9 +94,9 @@ float convert_A(struct engine* e, struct part* p) {
  * @param p Particle.
  * @return Total energy of the particle
  */
-float convert_Etot(struct engine* e, struct part* p) {
+void convert_Etot(const struct engine* e, const struct part* p, float* ret) {
 #ifdef GIZMO_TOTAL_ENERGY
-  return p->conserved.energy;
+  ret[0] = p->conserved.energy;
 #else
   float momentum2;
 
@@ -103,10 +104,24 @@ float convert_Etot(struct engine* e, struct part* p) {
               p->conserved.momentum[1] * p->conserved.momentum[1] +
               p->conserved.momentum[2] * p->conserved.momentum[2];
 
-  return p->conserved.energy + 0.5f * momentum2 / p->conserved.mass;
+  ret[0] = p->conserved.energy + 0.5f * momentum2 / p->conserved.mass;
 #endif
 }
 
+void convert_part_pos(const struct engine* e, const struct part* p,
+                      double* ret) {
+
+  if (e->s->periodic) {
+    ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]);
+    ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]);
+    ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]);
+  } else {
+    ret[0] = p->x[0];
+    ret[1] = p->x[1];
+    ret[2] = p->x[2];
+  }
+}
+
 /**
  * @brief Specifies which particle fields to write to a dataset
  *
@@ -120,8 +135,8 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
   *num_fields = 10;
 
   /* List what we want to write */
-  list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH,
-                                 parts, x);
+  list[0] = io_make_output_field_convert_part(
+      "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos);
   list[1] = io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts,
                                  primitives.v);
   list[2] = io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, parts,
@@ -130,18 +145,17 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
                                  parts, h);
   list[4] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1,
                                               UNIT_CONV_ENERGY_PER_UNIT_MASS,
-                                              parts, primitives.P, convert_u);
+                                              parts, convert_u);
   list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1,
                                  UNIT_CONV_NO_UNITS, parts, id);
   list[6] = io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts,
                                  primitives.rho);
   list[7] = io_make_output_field_convert_part(
-      "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY, parts, primitives.P, convert_A);
+      "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY, parts, convert_A);
   list[8] = io_make_output_field("Pressure", FLOAT, 1, UNIT_CONV_PRESSURE,
                                  parts, primitives.P);
-  list[9] =
-      io_make_output_field_convert_part("TotEnergy", FLOAT, 1, UNIT_CONV_ENERGY,
-                                        parts, conserved.energy, convert_Etot);
+  list[9] = io_make_output_field_convert_part(
+      "TotEnergy", FLOAT, 1, UNIT_CONV_ENERGY, parts, convert_Etot);
 }
 
 /**
diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h
index 4d8ca5b05547467c973e17983774b64736060471..905412040cbb8ac9666e555d88cad9bc56de13ab 100644
--- a/src/hydro/Minimal/hydro.h
+++ b/src/hydro/Minimal/hydro.h
@@ -224,7 +224,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
   /* Finish the calculation by inserting the missing h-factors */
   p->rho *= h_inv_dim;
   p->density.rho_dh *= h_inv_dim_plus_one;
-  p->density.wcount *= kernel_norm;
+  p->density.wcount *= h_inv_dim;
   p->density.wcount_dh *= h_inv_dim_plus_one;
 }
 
diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h
index 2ec0cb11d1e3ccaaa09d9822c75b396364912df8..771e4ac2d066ea098edb8b2f11d7afd550783347 100644
--- a/src/hydro/Minimal/hydro_io.h
+++ b/src/hydro/Minimal/hydro_io.h
@@ -69,14 +69,28 @@ void hydro_read_particles(struct part* parts, struct io_props* list,
                                 UNIT_CONV_DENSITY, parts, rho);
 }
 
-float convert_S(struct engine* e, struct part* p) {
+void convert_S(const struct engine* e, const struct part* p, float* ret) {
 
-  return hydro_get_entropy(p);
+  ret[0] = hydro_get_entropy(p);
 }
 
-float convert_P(struct engine* e, struct part* p) {
+void convert_P(const struct engine* e, const struct part* p, float* ret) {
 
-  return hydro_get_pressure(p);
+  ret[0] = hydro_get_pressure(p);
+}
+
+void convert_part_pos(const struct engine* e, const struct part* p,
+                      double* ret) {
+
+  if (e->s->periodic) {
+    ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]);
+    ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]);
+    ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]);
+  } else {
+    ret[0] = p->x[0];
+    ret[1] = p->x[1];
+    ret[2] = p->x[2];
+  }
 }
 
 /**
@@ -92,8 +106,8 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
   *num_fields = 10;
 
   /* List what we want to write */
-  list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH,
-                                 parts, x);
+  list[0] = io_make_output_field_convert_part(
+      "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos);
   list[1] =
       io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, v);
   list[2] =
@@ -108,11 +122,10 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
                                  UNIT_CONV_ACCELERATION, parts, a_hydro);
   list[7] =
       io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts, rho);
-  list[8] = io_make_output_field_convert_part("Entropy", FLOAT, 1,
-                                              UNIT_CONV_ENTROPY_PER_UNIT_MASS,
-                                              parts, rho, convert_S);
+  list[8] = io_make_output_field_convert_part(
+      "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY_PER_UNIT_MASS, parts, convert_S);
   list[9] = io_make_output_field_convert_part(
-      "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, rho, convert_P);
+      "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, convert_P);
 }
 
 /**
diff --git a/src/hydro/PressureEntropy/hydro_io.h b/src/hydro/PressureEntropy/hydro_io.h
index 10243750c01d6bc64664f9834bc4cc245c786f49..d3780261ba587cfac92ec44a65b13f0a4344b3c3 100644
--- a/src/hydro/PressureEntropy/hydro_io.h
+++ b/src/hydro/PressureEntropy/hydro_io.h
@@ -67,14 +67,28 @@ void hydro_read_particles(struct part* parts, struct io_props* list,
                                 UNIT_CONV_DENSITY, parts, rho);
 }
 
-float convert_u(struct engine* e, struct part* p) {
+void convert_u(const struct engine* e, const struct part* p, float* ret) {
 
-  return hydro_get_internal_energy(p);
+  ret[0] = hydro_get_internal_energy(p);
 }
 
-float convert_P(struct engine* e, struct part* p) {
+void convert_P(const struct engine* e, const struct part* p, float* ret) {
 
-  return hydro_get_pressure(p);
+  ret[0] = hydro_get_pressure(p);
+}
+
+void convert_part_pos(const struct engine* e, const struct part* p,
+                      double* ret) {
+
+  if (e->s->periodic) {
+    ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]);
+    ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]);
+    ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]);
+  } else {
+    ret[0] = p->x[0];
+    ret[1] = p->x[1];
+    ret[2] = p->x[2];
+  }
 }
 
 /**
@@ -90,27 +104,27 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
   *num_fields = 11;
 
   /* List what we want to write */
-  list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH,
-                                 parts, x);
+  list[0] = io_make_output_field_convert_part(
+      "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos);
   list[1] =
       io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, v);
   list[2] =
       io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, parts, mass);
   list[3] = io_make_output_field("SmoothingLength", FLOAT, 1, UNIT_CONV_LENGTH,
                                  parts, h);
-  list[4] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1,
-                                              UNIT_CONV_ENERGY_PER_UNIT_MASS,
-                                              parts, entropy, convert_u);
+  list[4] = io_make_output_field(
+      "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY_PER_UNIT_MASS, parts, entropy);
   list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1,
                                  UNIT_CONV_NO_UNITS, parts, id);
   list[6] = io_make_output_field("Acceleration", FLOAT, 3,
                                  UNIT_CONV_ACCELERATION, parts, a_hydro);
   list[7] =
       io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts, rho);
-  list[8] = io_make_output_field(
-      "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY_PER_UNIT_MASS, parts, entropy);
+  list[8] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1,
+                                              UNIT_CONV_ENERGY_PER_UNIT_MASS,
+                                              parts, convert_u);
   list[9] = io_make_output_field_convert_part(
-      "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, rho, convert_P);
+      "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, convert_P);
   list[10] = io_make_output_field("WeightedDensity", FLOAT, 1,
                                   UNIT_CONV_DENSITY, parts, rho_bar);
 }
diff --git a/src/hydro/Shadowswift/hydro.h b/src/hydro/Shadowswift/hydro.h
index abbcdcd2f7879d8063a906e44ab2fe6a3e675828..eb9938a341f1e6273f8ea03264bdda6dc69832e6 100644
--- a/src/hydro/Shadowswift/hydro.h
+++ b/src/hydro/Shadowswift/hydro.h
@@ -22,6 +22,7 @@
 #include "approx_math.h"
 #include "equation_of_state.h"
 #include "hydro_gradients.h"
+#include "hydro_properties.h"
 #include "hydro_space.h"
 #include "voronoi_algorithm.h"
 
diff --git a/src/hydro/Shadowswift/hydro_io.h b/src/hydro/Shadowswift/hydro_io.h
index de45b5d68c78f96cee3030eadef4b4410e550c22..f6f6fcc6c070b0add8e2b97678e5ef1ada636bfd 100644
--- a/src/hydro/Shadowswift/hydro_io.h
+++ b/src/hydro/Shadowswift/hydro_io.h
@@ -19,6 +19,7 @@
 
 #include "adiabatic_index.h"
 #include "equation_of_state.h"
+#include "hydro.h"
 #include "hydro_gradients.h"
 #include "hydro_slope_limiters.h"
 #include "io_properties.h"
@@ -63,13 +64,8 @@ void hydro_read_particles(struct part* parts, struct io_props* list,
  * @param p Particle.
  * @return Internal energy of the particle
  */
-float convert_u(struct engine* e, struct part* p) {
-  if (p->primitives.rho > 0.) {
-    return gas_internal_energy_from_pressure(p->primitives.rho,
-                                             p->primitives.P);
-  } else {
-    return 0.;
-  }
+void convert_u(const struct engine* e, const struct part* p, float* ret) {
+  ret[0] = hydro_get_internal_energy(p);
 }
 
 /**
@@ -79,12 +75,8 @@ float convert_u(struct engine* e, struct part* p) {
  * @param p Particle.
  * @return Entropic function of the particle
  */
-float convert_A(struct engine* e, struct part* p) {
-  if (p->primitives.rho > 0.) {
-    return gas_entropy_from_pressure(p->primitives.rho, p->primitives.P);
-  } else {
-    return 0.;
-  }
+void convert_A(const struct engine* e, const struct part* p, float* ret) {
+  ret[0] = hydro_get_entropy(p);
 }
 
 /**
@@ -94,7 +86,7 @@ float convert_A(struct engine* e, struct part* p) {
  * @param p Particle.
  * @return Total energy of the particle
  */
-float convert_Etot(struct engine* e, struct part* p) {
+void convert_Etot(const struct engine* e, const struct part* p, float* ret) {
 #ifdef SHADOWFAX_TOTAL_ENERGY
   return p->conserved.energy;
 #else
@@ -105,13 +97,27 @@ float convert_Etot(struct engine* e, struct part* p) {
                 p->conserved.momentum[1] * p->conserved.momentum[1] +
                 p->conserved.momentum[2] * p->conserved.momentum[2];
 
-    return p->conserved.energy + 0.5f * momentum2 / p->conserved.mass;
+    ret[0] = p->conserved.energy + 0.5f * momentum2 / p->conserved.mass;
   } else {
-    return 0.;
+    ret[0] = 0.;
   }
 #endif
 }
 
+void convert_part_pos(const struct engine* e, const struct part* p,
+                      double* ret) {
+
+  if (e->s->periodic) {
+    ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]);
+    ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]);
+    ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]);
+  } else {
+    ret[0] = p->x[0];
+    ret[1] = p->x[1];
+    ret[2] = p->x[2];
+  }
+}
+
 /**
  * @brief Specifies which particle fields to write to a dataset
  *
@@ -125,8 +131,8 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
   *num_fields = 13;
 
   /* List what we want to write */
-  list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH,
-                                 parts, x);
+  list[0] = io_make_output_field_convert_part(
+      "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos);
   list[1] = io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts,
                                  primitives.v);
   list[2] = io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, parts,
@@ -135,7 +141,7 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
                                  parts, h);
   list[4] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1,
                                               UNIT_CONV_ENERGY_PER_UNIT_MASS,
-                                              parts, primitives.P, convert_u);
+                                              parts, convert_u);
   list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1,
                                  UNIT_CONV_NO_UNITS, parts, id);
   list[6] = io_make_output_field("Acceleration", FLOAT, 3,
@@ -147,12 +153,11 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
   list[9] = io_make_output_field("GradDensity", FLOAT, 3, UNIT_CONV_DENSITY,
                                  parts, primitives.gradients.rho);
   list[10] = io_make_output_field_convert_part(
-      "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY, parts, primitives.P, convert_A);
+      "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY, parts, convert_A);
   list[11] = io_make_output_field("Pressure", FLOAT, 1, UNIT_CONV_PRESSURE,
                                   parts, primitives.P);
-  list[12] =
-      io_make_output_field_convert_part("TotEnergy", FLOAT, 1, UNIT_CONV_ENERGY,
-                                        parts, conserved.energy, convert_Etot);
+  list[12] = io_make_output_field_convert_part(
+      "TotEnergy", FLOAT, 1, UNIT_CONV_ENERGY, parts, convert_Etot);
 }
 
 /**
diff --git a/src/io_properties.h b/src/io_properties.h
index 9fcf1a1ac67cae6afab6870369e51d06c752fc11..2142d8d555fb52258d3443f20d14c72cf7568045 100644
--- a/src/io_properties.h
+++ b/src/io_properties.h
@@ -16,19 +16,31 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  ******************************************************************************/
-
 #ifndef SWIFT_IO_PROPERTIES_H
 #define SWIFT_IO_PROPERTIES_H
 
 /* Config parameters. */
 #include "../config.h"
 
+/* Local includes. */
+#include "inline.h"
+
 /**
  * @brief The two sorts of data present in the GADGET IC files: compulsory to
  * start a run or optional.
  */
 enum DATA_IMPORTANCE { COMPULSORY = 1, OPTIONAL = 0, UNUSED = -1 };
 
+/* Helper typedefs */
+typedef void (*conversion_func_part_float)(const struct engine*,
+                                           const struct part*, float*);
+typedef void (*conversion_func_part_double)(const struct engine*,
+                                            const struct part*, double*);
+typedef void (*conversion_func_gpart_float)(const struct engine*,
+                                            const struct gpart*, float*);
+typedef void (*conversion_func_gpart_double)(const struct engine*,
+                                             const struct gpart*, double*);
+
 /**
  * @brief The properties of a given dataset for i/o
  */
@@ -52,18 +64,31 @@ struct io_props {
   /* Pointer to the field of the first particle in the array */
   char* field;
 
+  /* Pointer to the start of the temporary buffer used in i/o */
+  char* start_temp_c;
+  float* start_temp_f;
+  double* start_temp_d;
+
+  /* Pointer to the engine */
+  const struct engine* e;
+
   /* The size of the particles */
   size_t partSize;
 
   /* The particle arrays */
-  struct part* parts;
-  struct gpart* gparts;
+  const struct part* parts;
+  const struct gpart* gparts;
+
+  /* Are we converting? */
+  int conversion;
 
   /* Conversion function for part */
-  float (*convert_part)(struct engine*, struct part*);
+  conversion_func_part_float convert_part_f;
+  conversion_func_part_double convert_part_d;
 
   /* Conversion function for gpart */
-  float (*convert_gpart)(struct engine*, struct gpart*);
+  conversion_func_gpart_float convert_gpart_f;
+  conversion_func_gpart_double convert_gpart_d;
 };
 
 /**
@@ -86,11 +111,10 @@ struct io_props {
  *
  * Do not call this function directly. Use the macro defined above.
  */
-struct io_props io_make_input_field_(char name[FIELD_BUFFER_SIZE],
-                                     enum IO_DATA_TYPE type, int dimension,
-                                     enum DATA_IMPORTANCE importance,
-                                     enum unit_conversion_factor units,
-                                     char* field, size_t partSize) {
+INLINE static struct io_props io_make_input_field_(
+    char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension,
+    enum DATA_IMPORTANCE importance, enum unit_conversion_factor units,
+    char* field, size_t partSize) {
   struct io_props r;
   strcpy(r.name, name);
   r.type = type;
@@ -101,8 +125,11 @@ struct io_props io_make_input_field_(char name[FIELD_BUFFER_SIZE],
   r.partSize = partSize;
   r.parts = NULL;
   r.gparts = NULL;
-  r.convert_part = NULL;
-  r.convert_gpart = NULL;
+  r.conversion = 0;
+  r.convert_part_f = NULL;
+  r.convert_part_d = NULL;
+  r.convert_gpart_f = NULL;
+  r.convert_gpart_d = NULL;
 
   return r;
 }
@@ -126,10 +153,9 @@ struct io_props io_make_input_field_(char name[FIELD_BUFFER_SIZE],
  *
  * Do not call this function directly. Use the macro defined above.
  */
-struct io_props io_make_output_field_(char name[FIELD_BUFFER_SIZE],
-                                      enum IO_DATA_TYPE type, int dimension,
-                                      enum unit_conversion_factor units,
-                                      char* field, size_t partSize) {
+INLINE static struct io_props io_make_output_field_(
+    char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension,
+    enum unit_conversion_factor units, char* field, size_t partSize) {
   struct io_props r;
   strcpy(r.name, name);
   r.type = type;
@@ -140,8 +166,11 @@ struct io_props io_make_output_field_(char name[FIELD_BUFFER_SIZE],
   r.partSize = partSize;
   r.parts = NULL;
   r.gparts = NULL;
-  r.convert_part = NULL;
-  r.convert_gpart = NULL;
+  r.conversion = 0;
+  r.convert_part_f = NULL;
+  r.convert_part_d = NULL;
+  r.convert_gpart_f = NULL;
+  r.convert_gpart_d = NULL;
 
   return r;
 }
@@ -149,11 +178,10 @@ struct io_props io_make_output_field_(char name[FIELD_BUFFER_SIZE],
 /**
  * @brief Constructs an #io_props (with conversion) from its parameters
  */
-#define io_make_output_field_convert_part(name, type, dim, units, part, field, \
-                                          convert)                             \
-  io_make_output_field_convert_part_(name, type, dim, units,                   \
-                                     (char*)(&(part[0]).field),                \
-                                     sizeof(part[0]), part, convert)
+#define io_make_output_field_convert_part(name, type, dim, units, part, \
+                                          convert)                      \
+  io_make_output_field_convert_part_##type(name, type, dim, units,      \
+                                           sizeof(part[0]), part, convert)
 
 /**
  * @brief Construct an #io_props from its parameters
@@ -162,17 +190,16 @@ struct io_props io_make_output_field_(char name[FIELD_BUFFER_SIZE],
  * @param type The type of the data
  * @param dimension Dataset dimension (1D, 3D, ...)
  * @param units The units of the dataset
- * @param field Pointer to the field of the first particle
  * @param partSize The size in byte of the particle
  * @param parts The particle array
  * @param functionPtr The function used to convert a particle to a float
  *
  * Do not call this function directly. Use the macro defined above.
  */
-struct io_props io_make_output_field_convert_part_(
+INLINE static struct io_props io_make_output_field_convert_part_FLOAT(
     char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension,
-    enum unit_conversion_factor units, char* field, size_t partSize,
-    struct part* parts, float (*functionPtr)(struct engine*, struct part*)) {
+    enum unit_conversion_factor units, size_t partSize,
+    const struct part* parts, conversion_func_part_float functionPtr) {
 
   struct io_props r;
   strcpy(r.name, name);
@@ -180,12 +207,52 @@ struct io_props io_make_output_field_convert_part_(
   r.dimension = dimension;
   r.importance = UNUSED;
   r.units = units;
-  r.field = field;
+  r.field = NULL;
+  r.partSize = partSize;
+  r.parts = parts;
+  r.gparts = NULL;
+  r.conversion = 1;
+  r.convert_part_f = functionPtr;
+  r.convert_part_d = NULL;
+  r.convert_gpart_f = NULL;
+  r.convert_gpart_d = NULL;
+
+  return r;
+}
+
+/**
+ * @brief Construct an #io_props from its parameters
+ *
+ * @param name Name of the field to read
+ * @param type The type of the data
+ * @param dimension Dataset dimension (1D, 3D, ...)
+ * @param units The units of the dataset
+ * @param partSize The size in byte of the particle
+ * @param parts The particle array
+ * @param functionPtr The function used to convert a particle to a float
+ *
+ * Do not call this function directly. Use the macro defined above.
+ */
+INLINE static struct io_props io_make_output_field_convert_part_DOUBLE(
+    char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension,
+    enum unit_conversion_factor units, size_t partSize,
+    const struct part* parts, conversion_func_part_double functionPtr) {
+
+  struct io_props r;
+  strcpy(r.name, name);
+  r.type = type;
+  r.dimension = dimension;
+  r.importance = UNUSED;
+  r.units = units;
+  r.field = NULL;
   r.partSize = partSize;
   r.parts = parts;
   r.gparts = NULL;
-  r.convert_part = functionPtr;
-  r.convert_gpart = NULL;
+  r.conversion = 1;
+  r.convert_part_f = NULL;
+  r.convert_part_d = functionPtr;
+  r.convert_gpart_f = NULL;
+  r.convert_gpart_d = NULL;
 
   return r;
 }
@@ -193,11 +260,10 @@ struct io_props io_make_output_field_convert_part_(
 /**
  * @brief Constructs an #io_props (with conversion) from its parameters
  */
-#define io_make_output_field_convert_gpart(name, type, dim, units, part, \
-                                           field, convert)               \
-  io_make_output_field_convert_gpart_(name, type, dim, units,            \
-                                      (char*)(&(part[0]).field),         \
-                                      sizeof(part[0]), gpart, convert)
+#define io_make_output_field_convert_gpart(name, type, dim, units, gpart, \
+                                           convert)                       \
+  io_make_output_field_convert_gpart_##type(name, type, dim, units,       \
+                                            sizeof(gpart[0]), gpart, convert)
 
 /**
  * @brief Construct an #io_props from its parameters
@@ -206,17 +272,16 @@ struct io_props io_make_output_field_convert_part_(
  * @param type The type of the data
  * @param dimension Dataset dimension (1D, 3D, ...)
  * @param units The units of the dataset
- * @param field Pointer to the field of the first particle
- * @param partSize The size in byte of the particle
+ * @param gpartSize The size in byte of the particle
  * @param gparts The particle array
  * @param functionPtr The function used to convert a g-particle to a float
  *
  * Do not call this function directly. Use the macro defined above.
  */
-struct io_props io_make_output_field_convert_gpart_(
+INLINE static struct io_props io_make_output_field_convert_gpart_FLOAT(
     char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension,
-    enum unit_conversion_factor units, char* field, size_t partSize,
-    struct gpart* gparts, float (*functionPtr)(struct engine*, struct gpart*)) {
+    enum unit_conversion_factor units, size_t gpartSize,
+    const struct gpart* gparts, conversion_func_gpart_float functionPtr) {
 
   struct io_props r;
   strcpy(r.name, name);
@@ -224,12 +289,52 @@ struct io_props io_make_output_field_convert_gpart_(
   r.dimension = dimension;
   r.importance = UNUSED;
   r.units = units;
-  r.field = field;
-  r.partSize = partSize;
+  r.field = NULL;
+  r.partSize = gpartSize;
+  r.parts = NULL;
+  r.gparts = gparts;
+  r.conversion = 1;
+  r.convert_part_f = NULL;
+  r.convert_part_d = NULL;
+  r.convert_gpart_f = functionPtr;
+  r.convert_gpart_d = NULL;
+
+  return r;
+}
+
+/**
+ * @brief Construct an #io_props from its parameters
+ *
+ * @param name Name of the field to read
+ * @param type The type of the data
+ * @param dimension Dataset dimension (1D, 3D, ...)
+ * @param units The units of the dataset
+ * @param gpartSize The size in byte of the particle
+ * @param gparts The particle array
+ * @param functionPtr The function used to convert a g-particle to a float
+ *
+ * Do not call this function directly. Use the macro defined above.
+ */
+INLINE static struct io_props io_make_output_field_convert_gpart_DOUBLE(
+    char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension,
+    enum unit_conversion_factor units, size_t gpartSize,
+    const struct gpart* gparts, conversion_func_gpart_double functionPtr) {
+
+  struct io_props r;
+  strcpy(r.name, name);
+  r.type = type;
+  r.dimension = dimension;
+  r.importance = UNUSED;
+  r.units = units;
+  r.field = NULL;
+  r.partSize = gpartSize;
   r.parts = NULL;
   r.gparts = gparts;
-  r.convert_part = NULL;
-  r.convert_gpart = functionPtr;
+  r.conversion = 1;
+  r.convert_part_f = NULL;
+  r.convert_part_d = NULL;
+  r.convert_gpart_f = NULL;
+  r.convert_gpart_d = functionPtr;
 
   return r;
 }
diff --git a/src/parallel_io.c b/src/parallel_io.c
index b31d3b6351b00af4a042e4b28bf492258b17b073..e11c43c79badcc40bb76b7eee09348571e1841cc 100644
--- a/src/parallel_io.c
+++ b/src/parallel_io.c
@@ -51,6 +51,9 @@
 #include "units.h"
 #include "xmf.h"
 
+/* Are we timing the i/o? */
+//#define IO_SPEED_MEASUREMENT
+
 /* The current limit of ROMIO (the underlying MPI-IO layer) is 2GB */
 #define HDF5_PARALLEL_IO_MAX_BYTES 2000000000LL
 
@@ -252,7 +255,6 @@ void writeArray_chunk(struct engine* e, hid_t h_data, hid_t h_plist_id,
                       const struct unit_system* snapshot_units) {
 
   const size_t typeSize = io_sizeof_type(props.type);
-  const size_t copySize = typeSize * props.dimension;
   const size_t num_elements = N * props.dimension;
 
   /* Can't handle writes of more than 2GB */
@@ -263,44 +265,24 @@ void writeArray_chunk(struct engine* e, hid_t h_data, hid_t h_plist_id,
 
   /* Allocate temporary buffer */
   void* temp = malloc(num_elements * typeSize);
-  if (temp == NULL) error("Unable to allocate memory for temporary buffer");
-
-  /* Copy particle data to temporary buffer */
-  if (props.convert_part == NULL &&
-      props.convert_gpart == NULL) { /* No conversion */
-
-    char* temp_c = temp;
-    for (size_t i = 0; i < N; ++i)
-      memcpy(&temp_c[i * copySize], props.field + i * props.partSize, copySize);
-
-  } else if (props.convert_part != NULL) { /* conversion (for parts)*/
-
-    float* temp_f = temp;
-    for (size_t i = 0; i < N; ++i)
-      temp_f[i] = props.convert_part(e, &props.parts[i]);
+  if (posix_memalign((void**)&temp, IO_BUFFER_ALIGNMENT,
+                     num_elements * typeSize) != 0)
+    error("Unable to allocate temporary i/o buffer");
 
-  } else if (props.convert_gpart != NULL) { /* conversion (for gparts)*/
+#ifdef IO_SPEED_MEASUREMENT
+  MPI_Barrier(MPI_COMM_WORLD);
+  ticks tic = getticks();
+#endif
 
-    float* temp_f = temp;
-    for (size_t i = 0; i < N; ++i)
-      temp_f[i] = props.convert_gpart(e, &props.gparts[i]);
-  }
+  /* Copy the particle data to the temporary buffer */
+  io_copy_temp_buffer(temp, e, props, N, internal_units, snapshot_units);
 
-  /* Unit conversion if necessary */
-  const double factor =
-      units_conversion_factor(internal_units, snapshot_units, props.units);
-  if (factor != 1.) {
-
-    /* message("Converting ! factor=%e", factor); */
-
-    if (io_is_double_precision(props.type)) {
-      double* temp_d = temp;
-      for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor;
-    } else {
-      float* temp_f = temp;
-      for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= factor;
-    }
-  }
+#ifdef IO_SPEED_MEASUREMENT
+  MPI_Barrier(MPI_COMM_WORLD);
+  if(engine_rank == 0)
+    message("Copying for '%s' took %.3f %s." , props.name,
+	    clocks_from_ticks(getticks() - tic), clocks_getunit());
+#endif
 
   /* Create data space */
   const hid_t h_memspace = H5Screate(H5S_SIMPLE);
@@ -343,10 +325,14 @@ void writeArray_chunk(struct engine* e, hid_t h_data, hid_t h_plist_id,
   }
 
   /* message("Writing %lld '%s', %zd elements = %zd bytes (int=%d) at offset
-   * %zd", */
-  /* 	  N, props.name, N * props.dimension, N * props.dimension * typeSize, */
+   * %zd", N, props.name, N * props.dimension, N * props.dimension * typeSize, */
   /* 	  (int)(N * props.dimension * typeSize), offset); */
 
+#ifdef IO_SPEED_MEASUREMENT
+  MPI_Barrier(MPI_COMM_WORLD);
+  tic = getticks();
+#endif
+
   /* Write temporary buffer to HDF5 dataspace */
   h_err = H5Dwrite(h_data, io_hdf5_type(props.type), h_memspace, h_filespace,
                    h_plist_id, temp);
@@ -354,6 +340,18 @@ void writeArray_chunk(struct engine* e, hid_t h_data, hid_t h_plist_id,
     error("Error while writing data array '%s'.", props.name);
   }
 
+#ifdef IO_SPEED_MEASUREMENT
+  MPI_Barrier(MPI_COMM_WORLD);
+  ticks toc = getticks();
+  float ms = clocks_from_ticks(toc - tic);
+  int megaBytes = N * props.dimension * typeSize / (1024 * 1024);
+  int total = 0;
+  MPI_Reduce(&megaBytes, &total, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+  if (engine_rank == 0)
+    message("H5Dwrite for '%s' (%d MB) took %.3f %s (speed = %f MB/s).",
+            props.name, total, ms, clocks_getunit(), total / (ms / 1000.));
+#endif
+
   /* Free and close everything */
   free(temp);
   H5Sclose(h_memspace);
@@ -385,6 +383,10 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
 
   const size_t typeSize = io_sizeof_type(props.type);
 
+#ifdef IO_SPEED_MEASUREMENT
+  const ticks tic = getticks();
+#endif
+
   /* Work out properties of the array in the file */
   int rank;
   hsize_t shape_total[2];
@@ -497,6 +499,13 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
   H5Pclose(h_prop);
   H5Dclose(h_data);
   H5Pclose(h_plist_id);
+
+#ifdef IO_SPEED_MEASUREMENT
+  MPI_Barrier(MPI_COMM_WORLD);
+  if(engine_rank == 0)
+    message("'%s' took %.3f %s." , props.name,
+	    clocks_from_ticks(getticks() - tic), clocks_getunit());
+#endif
 }
 
 /**
@@ -617,7 +626,7 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units,
   /* Read the unit system used in the ICs */
   struct unit_system* ic_units = malloc(sizeof(struct unit_system));
   if (ic_units == NULL) error("Unable to allocate memory for IC unit system");
-  io_read_unit_system(h_file, ic_units);
+  io_read_unit_system(h_file, ic_units, mpi_rank);
 
   /* Tell the user if a conversion will be needed */
   if (mpi_rank == 0) {
@@ -735,7 +744,9 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units,
         break;
 
       default:
-        message("Particle Type %d not yet supported. Particles ignored", ptype);
+        if (mpi_rank == 0)
+          message("Particle Type %d not yet supported. Particles ignored",
+                  ptype);
     }
 
     /* Read everything */
@@ -825,16 +836,42 @@ void write_output_parallel(struct engine* e, const char* baseName,
   /* Prepare the XMF file for the new entry */
   if (mpi_rank == 0) xmfFile = xmf_prepare_file(baseName);
 
-  /* Open HDF5 file */
+  /* Prepare some file-access properties */
   hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
-  H5Pset_fapl_mpio(plist_id, comm, info);
+
+  /* Set some MPI-IO parameters */
+  // MPI_Info_set(info, "IBM_largeblock_io", "true");
+  MPI_Info_set(info, "romio_cb_write", "enable");
+  MPI_Info_set(info, "romio_ds_write", "disable");
+
+  /* Activate parallel i/o */
+  hid_t h_err = H5Pset_fapl_mpio(plist_id, comm, info);
+  if (h_err < 0) error("Error setting parallel i/o");
+  
+  /* Align on 4k pages. */
+  h_err = H5Pset_alignment(plist_id, 1024, 4096);
+  if (h_err < 0) error("Error setting Hdf5 alignment");
+
+  /* Disable meta-data cache eviction */
+  H5AC_cache_config_t mdc_config;
+  mdc_config.version = H5AC__CURR_CACHE_CONFIG_VERSION;
+  h_err = H5Pget_mdc_config(plist_id, &mdc_config);
+  if (h_err < 0) error("Error getting the MDC config");
+
+  mdc_config.evictions_enabled = 0; /* false */
+  mdc_config.incr_mode = H5C_incr__off;
+  mdc_config.decr_mode = H5C_decr__off;
+  mdc_config.flash_incr_mode = H5C_flash_incr__off;
+  h_err = H5Pset_mdc_config(plist_id, &mdc_config);
+  if (h_err < 0) error("Error setting the MDC config");
+
+  /* Open HDF5 file with the chosen parameters */
   h_file = H5Fcreate(fileName, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
   if (h_file < 0) {
     error("Error while opening file '%s'.", fileName);
   }
 
-  /* Compute offset in the file and total number of
-   * particles */
+  /* Compute offset in the file and total number of particles */
   size_t N[swift_type_count] = {Ngas, Ndm, 0, 0, Nstars, 0};
   long long N_total[swift_type_count] = {0};
   long long offset[swift_type_count] = {0};
@@ -847,8 +884,7 @@ void write_output_parallel(struct engine* e, const char* baseName,
   MPI_Bcast(&N_total, 6, MPI_LONG_LONG_INT, mpi_size - 1, comm);
 
   /* Now everybody konws its offset and the total number of
-   * particles of each
-   * type */
+   * particles of each type */
 
   /* Write the part of the XMF file corresponding to this
    * specific output */
diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index 5c29f1d6feb4c2bc542397ef2f0d1b9ae2b49a65..ce175878fa4641ab0becd7132acbe856eb3fcd20 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -26,7 +26,8 @@
 /* Local headers. */
 #include "active.h"
 
-#ifdef WITH_VECTORIZATION
+#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
+
 static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
 
 /**
@@ -515,7 +516,7 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj,
   *init_pj = last_pj;
 }
 
-#endif /* WITH_VECTORIZATION */
+#endif /* WITH_VECTORIZATION && GADGET2_SPH */
 
 /**
  * @brief Compute the cell self-interaction (non-symmetric) using vector
@@ -527,7 +528,8 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj,
 __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
     struct runner *r, struct cell *restrict c) {
 
-#ifdef WITH_VECTORIZATION
+#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
+
   /* Get some local variables */
   const struct engine *e = r->e;
   const timebin_t max_active_bin = e->max_active_bin;
@@ -723,6 +725,11 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
   } /* loop over all particles. */
 
   TIMER_TOC(timer_doself_density);
+
+#else
+
+  error("Incorrectly calling vectorized Gadget-2 functions!");
+
 #endif /* WITH_VECTORIZATION */
 }
 
@@ -740,10 +747,11 @@ __attribute__((always_inline)) INLINE void runner_doself_subset_density_vec(
     struct runner *r, struct cell *restrict c, struct part *restrict parts,
     int *restrict ind, int pi_count) {
 
-#ifdef WITH_VECTORIZATION
+#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
+
   const int count = c->count;
 
-  TIMER_TIC
+  TIMER_TIC;
 
   /* Get the particle cache from the runner and re-allocate
    * the cache if it is not big enough for the cell. */
@@ -925,6 +933,11 @@ __attribute__((always_inline)) INLINE void runner_doself_subset_density_vec(
   } /* loop over all particles. */
 
   TIMER_TOC(timer_doself_subset);
+
+#else
+
+  error("Incorrectly calling vectorized Gadget-2 functions!");
+
 #endif /* WITH_VECTORIZATION */
 }
 
@@ -938,7 +951,8 @@ __attribute__((always_inline)) INLINE void runner_doself_subset_density_vec(
 __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
     struct runner *r, struct cell *restrict c) {
 
-#ifdef WITH_VECTORIZATION
+#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
+
   const struct engine *e = r->e;
   const timebin_t max_active_bin = e->max_active_bin;
   struct part *restrict parts = c->parts;
@@ -1097,6 +1111,11 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
   } /* loop over all particles. */
 
   TIMER_TOC(timer_doself_force);
+
+#else
+
+  error("Incorrectly calling vectorized Gadget-2 functions!");
+
 #endif /* WITH_VECTORIZATION */
 }
 
@@ -1114,7 +1133,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
                                 struct cell *cj, const int sid,
                                 const double *shift) {
 
-#ifdef WITH_VECTORIZATION
+#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
+
   const struct engine *restrict e = r->e;
   const timebin_t max_active_bin = e->max_active_bin;
 
@@ -1442,6 +1462,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
 
   TIMER_TOC(timer_dopair_density);
 
+#else
+
+  error("Incorrectly calling vectorized Gadget-2 functions!");
+
 #endif /* WITH_VECTORIZATION */
 }
 
@@ -1459,7 +1483,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
                               struct cell *cj, const int sid,
                               const double *shift) {
 
-#ifdef WITH_VECTORIZATION
+#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
+
   const struct engine *restrict e = r->e;
   const timebin_t max_active_bin = e->max_active_bin;
 
@@ -1816,5 +1841,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
     TIMER_TOC(timer_dopair_density);
   }
 
+#else
+
+  error("Incorrectly calling vectorized Gadget-2 functions!");
+
 #endif /* WITH_VECTORIZATION */
 }
diff --git a/src/serial_io.c b/src/serial_io.c
index eb1e0e23fb34fd8d6a21230d9e38cfe82c47df1d..f8e3927c4a09a8a94afff5287a42f2b10afac9b4 100644
--- a/src/serial_io.c
+++ b/src/serial_io.c
@@ -176,9 +176,9 @@ void readArray(hid_t grp, const struct io_props props, size_t N,
  * Routines writing an output file
  *-----------------------------------------------------------------------------*/
 
-void prepareArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
-                  char* partTypeGroupName, const struct io_props props,
-                  unsigned long long N_total,
+void prepareArray(const struct engine* e, hid_t grp, char* fileName,
+                  FILE* xmfFile, char* partTypeGroupName,
+                  const struct io_props props, unsigned long long N_total,
                   const struct unit_system* internal_units,
                   const struct unit_system* snapshot_units) {
 
@@ -282,14 +282,14 @@ void prepareArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
  * @todo A better version using HDF5 hyper-slabs to write the file directly from
  * the part array will be written once the structures have been stabilized.
  */
-void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
-                char* partTypeGroupName, const struct io_props props, size_t N,
-                long long N_total, int mpi_rank, long long offset,
+void writeArray(const struct engine* e, hid_t grp, char* fileName,
+                FILE* xmfFile, char* partTypeGroupName,
+                const struct io_props props, size_t N, long long N_total,
+                int mpi_rank, long long offset,
                 const struct unit_system* internal_units,
                 const struct unit_system* snapshot_units) {
 
   const size_t typeSize = io_sizeof_type(props.type);
-  const size_t copySize = typeSize * props.dimension;
   const size_t num_elements = N * props.dimension;
 
   /* message("Writing '%s' array...", props.name); */
@@ -300,45 +300,13 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
                  internal_units, snapshot_units);
 
   /* Allocate temporary buffer */
-  void* temp = malloc(num_elements * io_sizeof_type(props.type));
-  if (temp == NULL) error("Unable to allocate memory for temporary buffer");
-
-  /* Copy particle data to temporary buffer */
-  if (props.convert_part == NULL &&
-      props.convert_gpart == NULL) { /* No conversion */
-
-    char* temp_c = temp;
-    for (size_t i = 0; i < N; ++i)
-      memcpy(&temp_c[i * copySize], props.field + i * props.partSize, copySize);
-
-  } else if (props.convert_part != NULL) { /* conversion (for parts)*/
-
-    float* temp_f = temp;
-    for (size_t i = 0; i < N; ++i)
-      temp_f[i] = props.convert_part(e, &props.parts[i]);
-
-  } else if (props.convert_gpart != NULL) { /* conversion (for gparts)*/
-
-    float* temp_f = temp;
-    for (size_t i = 0; i < N; ++i)
-      temp_f[i] = props.convert_gpart(e, &props.gparts[i]);
-  }
-
-  /* Unit conversion if necessary */
-  const double factor =
-      units_conversion_factor(internal_units, snapshot_units, props.units);
-  if (factor != 1.) {
-
-    /* message("Converting ! factor=%e", factor); */
+  void* temp = NULL;
+  if (posix_memalign((void**)&temp, IO_BUFFER_ALIGNMENT,
+                     num_elements * typeSize) != 0)
+    error("Unable to allocate temporary i/o buffer");
 
-    if (io_is_double_precision(props.type)) {
-      double* temp_d = temp;
-      for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor;
-    } else {
-      float* temp_f = temp;
-      for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= factor;
-    }
-  }
+  /* Copy the particle data to the temporary buffer */
+  io_copy_temp_buffer(temp, e, props, N, internal_units, snapshot_units);
 
   /* Construct information for the hyper-slab */
   int rank;
@@ -512,7 +480,7 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units,
 
     /* Read the unit system used in the ICs */
     if (ic_units == NULL) error("Unable to allocate memory for IC unit system");
-    io_read_unit_system(h_file, ic_units);
+    io_read_unit_system(h_file, ic_units, mpi_rank);
 
     if (units_are_equal(ic_units, internal_units)) {
 
@@ -656,8 +624,9 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units,
             break;
 
           default:
-            message("Particle Type %d not yet supported. Particles ignored",
-                    ptype);
+            if (mpi_rank == 0)
+              message("Particle Type %d not yet supported. Particles ignored",
+                      ptype);
         }
 
         /* Read everything */
diff --git a/src/single_io.c b/src/single_io.c
index 194563352dff5570b8703f828fac95bccbf7409f..a8199af6a84d3f1bdffd746e5a1a49e6e2518dca 100644
--- a/src/single_io.c
+++ b/src/single_io.c
@@ -169,57 +169,25 @@ void readArray(hid_t h_grp, const struct io_props prop, size_t N,
  * @todo A better version using HDF5 hyper-slabs to write the file directly from
  * the part array will be written once the structures have been stabilized.
  */
-void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
-                char* partTypeGroupName, const struct io_props props, size_t N,
+void writeArray(const struct engine* e, hid_t grp, char* fileName,
+                FILE* xmfFile, char* partTypeGroupName,
+                const struct io_props props, size_t N,
                 const struct unit_system* internal_units,
                 const struct unit_system* snapshot_units) {
 
   const size_t typeSize = io_sizeof_type(props.type);
-  const size_t copySize = typeSize * props.dimension;
   const size_t num_elements = N * props.dimension;
 
   /* message("Writing '%s' array...", props.name); */
 
   /* Allocate temporary buffer */
-  void* temp = malloc(num_elements * io_sizeof_type(props.type));
-  if (temp == NULL) error("Unable to allocate memory for temporary buffer");
-
-  /* Copy particle data to temporary buffer */
-  if (props.convert_part == NULL &&
-      props.convert_gpart == NULL) { /* No conversion */
-
-    char* temp_c = temp;
-    for (size_t i = 0; i < N; ++i)
-      memcpy(&temp_c[i * copySize], props.field + i * props.partSize, copySize);
-
-  } else if (props.convert_part != NULL) { /* conversion (for parts)*/
-
-    float* temp_f = temp;
-    for (size_t i = 0; i < N; ++i)
-      temp_f[i] = props.convert_part(e, &props.parts[i]);
-
-  } else if (props.convert_gpart != NULL) { /* conversion (for gparts)*/
-
-    float* temp_f = temp;
-    for (size_t i = 0; i < N; ++i)
-      temp_f[i] = props.convert_gpart(e, &props.gparts[i]);
-  }
-
-  /* Unit conversion if necessary */
-  const double factor =
-      units_conversion_factor(internal_units, snapshot_units, props.units);
-  if (factor != 1.) {
-
-    /* message("Converting ! factor=%e", factor); */
+  void* temp = NULL;
+  if (posix_memalign((void**)&temp, IO_BUFFER_ALIGNMENT,
+                     num_elements * typeSize) != 0)
+    error("Unable to allocate temporary i/o buffer");
 
-    if (io_is_double_precision(props.type)) {
-      double* temp_d = temp;
-      for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor;
-    } else {
-      float* temp_f = temp;
-      for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= factor;
-    }
-  }
+  /* Copy the particle data to the temporary buffer */
+  io_copy_temp_buffer(temp, e, props, N, internal_units, snapshot_units);
 
   /* Create data space */
   const hid_t h_space = H5Screate(H5S_SIMPLE);
@@ -419,7 +387,7 @@ void read_ic_single(char* fileName, const struct unit_system* internal_units,
   /* Read the unit system used in the ICs */
   struct unit_system* ic_units = malloc(sizeof(struct unit_system));
   if (ic_units == NULL) error("Unable to allocate memory for IC unit system");
-  io_read_unit_system(h_file, ic_units);
+  io_read_unit_system(h_file, ic_units, 0);
 
   /* Tell the user if a conversion will be needed */
   if (units_are_equal(ic_units, internal_units)) {
diff --git a/src/space.c b/src/space.c
index 6051320e26c9c463cb85598ac8c106abd769d32e..5c9c6cc2f74e9803df4929ca7612dfc7b6f6f3cc 100644
--- a/src/space.c
+++ b/src/space.c
@@ -2933,14 +2933,15 @@ void space_init(struct space *s, const struct swift_params *params,
   hydro_space_init(&s->hs, s);
 
   ticks tic = getticks();
-  message("first init...");
+  if (verbose) message("first init...");
   /* Set the particles in a state where they are ready for a run */
   space_first_init_parts(s);
   space_first_init_xparts(s);
   space_first_init_gparts(s);
   space_first_init_sparts(s);
-  message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-          clocks_getunit());
+  if (verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
 
   /* Init the space lock. */
   if (lock_init(&s->lock) != 0) error("Failed to create space spin-lock.");
@@ -2962,7 +2963,8 @@ void space_replicate(struct space *s, int replicate, int verbose) {
 
   if (replicate < 1) error("Invalid replicate value: %d", replicate);
 
-  message("Replicating space %d times along each axis.", replicate);
+  if (verbose)
+    message("Replicating space %d times along each axis.", replicate);
 
   const int factor = replicate * replicate * replicate;