diff --git a/m4/ax_cc_maxopt.m4 b/m4/ax_cc_maxopt.m4
index d2a0937232e0e8bf9c3e7e79c20267bfa1d75880..d5b87c903c5af46b767d7b49e40963345c8128af 100644
--- a/m4/ax_cc_maxopt.m4
+++ b/m4/ax_cc_maxopt.m4
@@ -128,6 +128,8 @@ if test "$ac_test_CFLAGS" != "set"; then
 		    *3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*|*4?6[[ef]]?:*:*:*) icc_flags="-xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;;
 		    *000?f[[346]]?:*:*:*|?f[[346]]?:*:*:*|f[[346]]?:*:*:*) icc_flags="-xSSE3 -xP -xO -xN -xW -xK" ;;
 		    *00??f??:*:*:*|??f??:*:*:*|?f??:*:*:*|f??:*:*:*) icc_flags="-xSSE2 -xN -xW -xK" ;;
+		    *5?6E?:*:*:*) icc_flags="-xCORE-AVX512" ;;
+		    *5?67?:*:*:*) icc_flags="-xMIC-AVX512" ;;
                   esac ;;
               esac ;;
           esac
diff --git a/src/Makefile.am b/src/Makefile.am
index 5859c59d9ddbc7e701145c40f8f0a35b92a075c3..cd0e2553601399e0bd07d4a0fd5fcc2b9858dc12 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -44,7 +44,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \
     physical_constants.h physical_constants_cgs.h potential.h version.h \
     hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \
-    sourceterms_struct.h
+    sourceterms_struct.h statistics.h
 
 
 # Common source files
@@ -53,7 +53,8 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
     units.c common_io.c single_io.c multipole.c version.c map.c \
     kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
     physical_constants.c potential.c hydro_properties.c \
-    runner_doiact_fft.c threadpool.c cooling.c sourceterms.c
+    runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \
+    statistics.c
 
 # Include files for distribution, not installation.
 nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
diff --git a/src/cell.c b/src/cell.c
index 3e3a8e6cc537f948a809c1a3ebf3cb18194fedd1..a54906d667dd312fd35a7584047113a45183b46c 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -827,6 +827,7 @@ void cell_check_multipole(struct cell *c, void *data) {
         error("Multipole CoM are different (%12.15e vs. %12.15e", ma.CoM[k],
               mb.CoM[k]);
 
+#if const_gravity_multipole_order >= 2
     if (fabsf(ma.I_xx - mb.I_xx) / fabsf(ma.I_xx + mb.I_xx) > 1e-5 &&
         ma.I_xx > 1e-9)
       error("Multipole I_xx are different (%12.15e vs. %12.15e)", ma.I_xx,
@@ -851,6 +852,7 @@ void cell_check_multipole(struct cell *c, void *data) {
         ma.I_yz > 1e-9)
       error("Multipole I_yz are different (%12.15e vs. %12.15e)", ma.I_yz,
             mb.I_yz);
+#endif
   }
 }
 
diff --git a/src/cell.h b/src/cell.h
index f1b5359e92251d73733a49eccd8029af993aa3f9..ae22de0cb268bb5240c480b2eed9435ec218adf8 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -176,12 +176,6 @@ struct cell {
   /* ID of the previous owner, e.g. runner. */
   int owner;
 
-  /* Momentum of particles in cell. */
-  double mom[3], ang_mom[3];
-
-  /* Mass, potential, internal  and kinetic energy of particles in this cell. */
-  double mass, e_pot, e_int, e_kin, e_rad, entropy;
-
   /* Number of particles updated in this cell. */
   int updated, g_updated;
 
diff --git a/src/const.h b/src/const.h
index ef26525b7865bac4c0164f7678398ca307fc1d67..11ee65bfdfabbf43962c721b8599928c25fef7cc 100644
--- a/src/const.h
+++ b/src/const.h
@@ -85,7 +85,7 @@
 #define SLOPE_LIMITER_CELL_WIDE
 
 /* Self gravity stuff. */
-#define const_gravity_multipole_order 2
+#define const_gravity_multipole_order 1
 #define const_gravity_a_smooth 1.25f
 #define const_gravity_r_cut 4.5f
 #define const_gravity_eta 0.025f
diff --git a/src/engine.c b/src/engine.c
index 3bcfbf539311044d6210e31d0f7e2bc1b21cf112..6f360907a35d4d8c37e0f9ee33bbefa7b3a1978f 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -63,6 +63,7 @@
 #include "runner.h"
 #include "serial_io.h"
 #include "single_io.h"
+#include "statistics.h"
 #include "timers.h"
 #include "tools.h"
 #include "units.h"
@@ -1248,7 +1249,7 @@ void engine_make_external_gravity_tasks(struct engine *e) {
     /* Is that neighbour local ? */
     if (ci->nodeID != nodeID) continue;
 
-    /* If the cell is local build a self-interaction */
+    /* If the cell is local, build a self-interaction */
     scheduler_addtask(sched, task_type_self, task_subtype_external_grav, 0, 0,
                       ci, NULL, 0);
   }
@@ -2506,76 +2507,28 @@ void engine_collect_timestep(struct engine *e) {
 void engine_print_stats(struct engine *e) {
 
   const ticks tic = getticks();
-  const struct space *s = e->s;
 
-  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, e_rad = 0.0;
-  double entropy = 0.0, mass = 0.0;
-  double mom[3] = {0.0, 0.0, 0.0}, ang_mom[3] = {0.0, 0.0, 0.0};
+  struct statistics stats;
+  stats_init(&stats);
 
-  /* Collect the cell data. */
-  for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells_top[k].nodeID == e->nodeID) {
-      struct cell *c = &s->cells_top[k];
-      mass += c->mass;
-      e_kin += c->e_kin;
-      e_int += c->e_int;
-      e_pot += c->e_pot;
-      e_rad += c->e_rad;
-      entropy += c->entropy;
-      mom[0] += c->mom[0];
-      mom[1] += c->mom[1];
-      mom[2] += c->mom[2];
-      ang_mom[0] += c->ang_mom[0];
-      ang_mom[1] += c->ang_mom[1];
-      ang_mom[2] += c->ang_mom[2];
-    }
+  /* Collect the stats on this node */
+  stats_collect(e->s, &stats);
 
 /* Aggregate the data from the different nodes. */
 #ifdef WITH_MPI
-  {
-    double in[12] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.};
-    double out[12];
-    out[0] = e_kin;
-    out[1] = e_int;
-    out[2] = e_pot;
-    out[3] = e_rad;
-    out[4] = mom[0];
-    out[5] = mom[1];
-    out[6] = mom[2];
-    out[7] = ang_mom[0];
-    out[8] = ang_mom[1];
-    out[9] = ang_mom[2];
-    out[10] = mass;
-    out[11] = entropy;
-    if (MPI_Reduce(out, in, 11, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD) !=
-        MPI_SUCCESS)
-      error("Failed to aggregate stats.");
-    e_kin = out[0];
-    e_int = out[1];
-    e_pot = out[2];
-    e_rad = out[3];
-    mom[0] = out[4];
-    mom[1] = out[5];
-    mom[2] = out[6];
-    ang_mom[0] = out[7];
-    ang_mom[1] = out[8];
-    ang_mom[2] = out[9];
-    mass = out[10];
-    entropy = out[11];
-  }
-#endif
+  struct statistics global_stats;
+  stats_init(&global_stats);
 
-  const double e_tot = e_kin + e_int + e_pot;
+  if (MPI_Reduce(&stats, &global_stats, 1, statistics_mpi_type,
+                 statistics_mpi_reduce_op, 0, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to aggregate stats.");
+#else
+  struct statistics global_stats = stats;
+#endif
 
   /* Print info */
-  if (e->nodeID == 0) {
-    fprintf(e->file_stats,
-            " %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e "
-            "%14e\n",
-            e->time, mass, e_tot, e_kin, e_int, e_pot, e_rad, entropy, mom[0],
-            mom[1], mom[2], ang_mom[0], ang_mom[1], ang_mom[2]);
-    fflush(e->file_stats);
-  }
+  if (e->nodeID == 0)
+    stats_print_to_file(e->file_stats, &global_stats, e->time);
 
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
@@ -2786,12 +2739,6 @@ void engine_step(struct engine *e) {
     fflush(e->file_timesteps);
   }
 
-  /* Save some statistics */
-  if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) {
-    engine_print_stats(e);
-    e->timeLastStatistics += e->deltaTimeStatistics;
-  }
-
   /* Drift only the necessary particles, that means all particles
    * if we are about to repartition. */
   int repart = (e->forcerepart != REPART_NONE);
@@ -2888,6 +2835,12 @@ void engine_step(struct engine *e) {
   engine_launch(e, e->nr_threads, mask, submask);
   TIMER_TOC(timer_runners);
 
+  /* Save some statistics */
+  if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) {
+    engine_print_stats(e);
+    e->timeLastStatistics += e->deltaTimeStatistics;
+  }
+
   TIMER_TOC2(timer_step);
 
   clocks_gettime(&time2);
@@ -2915,8 +2868,8 @@ void engine_drift(struct engine *e) {
                  e->s->nr_cells, sizeof(struct cell), 1, e);
 
   if (e->verbose)
-    message("took %.3f %s (including task unskipping).", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
+    message("took %.3f %s (including task unskipping).",
+            clocks_from_ticks(getticks() - tic), clocks_getunit());
 }
 
 /**
@@ -3544,6 +3497,7 @@ void engine_init(struct engine *e, struct space *s,
 /* Construct types for MPI communications */
 #ifdef WITH_MPI
   part_create_mpi_types();
+  stats_create_MPI_type();
 #endif
 
   /* Initialize the threadpool. */
diff --git a/src/runner.c b/src/runner.c
index 415af5cb1b4d7f78ece423b40ed31d327ee734ab..f5efc99d492be837509e50bd2674ab6923404446 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -790,12 +790,7 @@ static void runner_do_drift(struct cell *c, struct engine *e, int drift) {
 
   /* Drift from the last time the cell was drifted to the current time */
   const double dt = (ti_current - ti_old) * timeBase;
-
   float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
-  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, e_rad = 0.0;
-  double entropy = 0.0, mass = 0.0;
-  double mom[3] = {0.0, 0.0, 0.0};
-  double ang_mom[3] = {0.0, 0.0, 0.0};
 
   /* No children? */
   if (!c->split) {
@@ -820,7 +815,7 @@ static void runner_do_drift(struct cell *c, struct engine *e, int drift) {
         dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
       }
 
-      /* Loop over all the particles in the cell (more work for these !) */
+      /* Loop over all the particles in the cell */
       const size_t nr_parts = c->count;
       for (size_t k = 0; k < nr_parts; k++) {
 
@@ -839,39 +834,6 @@ static void runner_do_drift(struct cell *c, struct engine *e, int drift) {
 
         /* Maximal smoothing length */
         h_max = (h_max > p->h) ? h_max : p->h;
-
-        /* Now collect quantities for statistics */
-
-        const float half_dt =
-            (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase;
-        const double x[3] = {p->x[0], p->x[1], p->x[2]};
-        const float v[3] = {xp->v_full[0] + p->a_hydro[0] * half_dt,
-                            xp->v_full[1] + p->a_hydro[1] * half_dt,
-                            xp->v_full[2] + p->a_hydro[2] * half_dt};
-
-        const float m = hydro_get_mass(p);
-
-        /* Collect mass */
-        mass += m;
-
-        /* Collect momentum */
-        mom[0] += m * v[0];
-        mom[1] += m * v[1];
-        mom[2] += m * v[2];
-
-        /* Collect angular momentum */
-        ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]);
-        ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]);
-        ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]);
-
-        /* Collect energies. */
-        e_kin += 0.5 * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
-        e_pot += 0.;
-        e_int += m * hydro_get_internal_energy(p, half_dt);
-        e_rad += cooling_get_radiated_energy(xp);
-
-        /* Collect entropy */
-        entropy += m * hydro_get_entropy(p, half_dt);
       }
 
       /* Now, get the maximal particle motion from its square */
@@ -892,36 +854,12 @@ static void runner_do_drift(struct cell *c, struct engine *e, int drift) {
         runner_do_drift(cp, e, drift);
         dx_max = max(dx_max, cp->dx_max);
         h_max = max(h_max, cp->h_max);
-        mass += cp->mass;
-        e_kin += cp->e_kin;
-        e_int += cp->e_int;
-        e_pot += cp->e_pot;
-        e_rad += cp->e_rad;
-        entropy += cp->entropy;
-        mom[0] += cp->mom[0];
-        mom[1] += cp->mom[1];
-        mom[2] += cp->mom[2];
-        ang_mom[0] += cp->ang_mom[0];
-        ang_mom[1] += cp->ang_mom[1];
-        ang_mom[2] += cp->ang_mom[2];
       }
   }
 
   /* Store the values */
   c->h_max = h_max;
   c->dx_max = dx_max;
-  c->mass = mass;
-  c->e_kin = e_kin;
-  c->e_int = e_int;
-  c->e_pot = e_pot;
-  c->e_rad = e_rad;
-  c->entropy = entropy;
-  c->mom[0] = mom[0];
-  c->mom[1] = mom[1];
-  c->mom[2] = mom[2];
-  c->ang_mom[0] = ang_mom[0];
-  c->ang_mom[1] = ang_mom[1];
-  c->ang_mom[2] = ang_mom[2];
 
   /* Update the time of the last drift */
   c->ti_old = ti_current;
diff --git a/src/statistics.c b/src/statistics.c
new file mode 100644
index 0000000000000000000000000000000000000000..0afca84f517ca201fc184de2abff40b9ce9d5711
--- /dev/null
+++ b/src/statistics.c
@@ -0,0 +1,316 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <string.h>
+
+/* MPI headers. */
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+/* This object's header. */
+#include "statistics.h"
+
+/* Local headers. */
+#include "cooling.h"
+#include "engine.h"
+#include "error.h"
+#include "hydro.h"
+#include "threadpool.h"
+
+/**
+ * @brief Information required to compute the statistics in the mapper
+ */
+struct index_data {
+  /*! The space we play with */
+  const struct space *s;
+
+  /*! The #statistics aggregator to fill */
+  struct statistics *stats;
+};
+
+/**
+ * @brief Adds the content of one #statistics aggregator to another one.
+ *
+ * Performs a += b;
+ *
+ * @param a The #statistics structure to update.
+ * @param b The #statistics structure to add to a.
+ */
+void stats_add(struct statistics *a, const struct statistics *b) {
+
+  /* Add everything */
+  a->E_kin += b->E_kin;
+  a->E_int += b->E_int;
+  a->E_pot += b->E_pot;
+  a->E_rad += b->E_rad;
+  a->entropy += b->entropy;
+  a->mass += b->mass;
+  a->mom[0] += b->mom[0];
+  a->mom[1] += b->mom[1];
+  a->mom[2] += b->mom[2];
+  a->ang_mom[0] += b->ang_mom[0];
+  a->ang_mom[1] += b->ang_mom[1];
+  a->ang_mom[2] += b->ang_mom[2];
+}
+
+/**
+ * @brief Initialises a statistics aggregator to a valid state.
+ *
+ * @param s The #statistics aggregator to initialise
+ */
+void stats_init(struct statistics *s) {
+
+  /* Zero everything */
+  bzero(s, sizeof(struct statistics));
+
+  /* Set the lock */
+  lock_init(&s->lock);
+}
+
+/**
+ * @brief The #threadpool mapper function used to collect statistics for #part.
+ *
+ * @param map_data Pointer to the particles.
+ * @param nr_parts The number of particles in this chunk
+ * @param extra_data The #statistics aggregator.
+ */
+void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) {
+
+  /* Unpack the data */
+  const struct index_data *data = (struct index_data *)extra_data;
+  const struct space *s = data->s;
+  const struct part *restrict parts = (struct part *)map_data;
+  const struct xpart *restrict xparts =
+      s->xparts + (ptrdiff_t)(parts - s->parts);
+  const int ti_current = s->e->ti_current;
+  const double timeBase = s->e->timeBase;
+  struct statistics *const global_stats = data->stats;
+
+  /* Local accumulator */
+  struct statistics stats;
+  stats_init(&stats);
+
+  /* Loop over particles */
+  for (int k = 0; k < nr_parts; k++) {
+
+    /* Get the particle */
+    const struct part *p = &parts[k];
+    const struct xpart *xp = &xparts[k];
+
+    /* Get useful variables */
+    const float dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase;
+    const double x[3] = {p->x[0], p->x[1], p->x[2]};
+    float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]};
+    if (p->gpart != NULL) {
+      a_tot[0] += p->gpart->a_grav[0];
+      a_tot[1] += p->gpart->a_grav[1];
+      a_tot[2] += p->gpart->a_grav[2];
+    }
+    const float v[3] = {xp->v_full[0] + a_tot[0] * dt,
+                        xp->v_full[1] + a_tot[1] * dt,
+                        xp->v_full[2] + a_tot[2] * dt};
+
+    const float m = hydro_get_mass(p);
+
+    /* Collect mass */
+    stats.mass += m;
+
+    /* Collect momentum */
+    stats.mom[0] += m * v[0];
+    stats.mom[1] += m * v[1];
+    stats.mom[2] += m * v[2];
+
+    /* Collect angular momentum */
+    stats.ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]);
+    stats.ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]);
+    stats.ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]);
+
+    /* Collect energies. */
+    stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
+    stats.E_pot += 0.;
+    stats.E_int += m * hydro_get_internal_energy(p, dt);
+    stats.E_rad += cooling_get_radiated_energy(xp);
+
+    /* Collect entropy */
+    stats.entropy += m * hydro_get_entropy(p, dt);
+  }
+
+  /* Now write back to memory */
+  if (lock_lock(&global_stats->lock) == 0) stats_add(global_stats, &stats);
+  if (lock_unlock(&global_stats->lock) != 0) error("Failed to unlock stats.");
+}
+
+/**
+ * @brief The #threadpool mapper function used to collect statistics for #gpart.
+ *
+ * @param map_data Pointer to the g-particles.
+ * @param nr_gparts The number of g-particles in this chunk
+ * @param extra_data The #statistics aggregator.
+ */
+void stats_collect_gpart_mapper(void *map_data, int nr_gparts,
+                                void *extra_data) {
+
+  /* Unpack the data */
+  const struct index_data *data = (struct index_data *)extra_data;
+  const struct space *s = data->s;
+  const struct gpart *restrict gparts = (struct gpart *)map_data;
+  const int ti_current = s->e->ti_current;
+  const double timeBase = s->e->timeBase;
+  struct statistics *const global_stats = data->stats;
+
+  /* Local accumulator */
+  struct statistics stats;
+  stats_init(&stats);
+
+  /* Loop over particles */
+  for (int k = 0; k < nr_gparts; k++) {
+
+    /* Get the particle */
+    const struct gpart *gp = &gparts[k];
+
+    /* If the g-particle has a counterpart, ignore it */
+    if (gp->id_or_neg_offset < 0) continue;
+
+    /* Get useful variables */
+    const float dt = (ti_current - (gp->ti_begin + gp->ti_end) / 2) * timeBase;
+    const double x[3] = {gp->x[0], gp->x[1], gp->x[2]};
+    const float v[3] = {gp->v_full[0] + gp->a_grav[0] * dt,
+                        gp->v_full[1] + gp->a_grav[1] * dt,
+                        gp->v_full[2] + gp->a_grav[2] * dt};
+
+    const float m = gp->mass;
+
+    /* Collect mass */
+    stats.mass += m;
+
+    /* Collect momentum */
+    stats.mom[0] += m * v[0];
+    stats.mom[1] += m * v[1];
+    stats.mom[2] += m * v[2];
+
+    /* Collect angular momentum */
+    stats.ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]);
+    stats.ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]);
+    stats.ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]);
+
+    /* Collect energies. */
+    stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
+    stats.E_pot += 0.;
+  }
+
+  /* Now write back to memory */
+  if (lock_lock(&global_stats->lock) == 0) stats_add(global_stats, &stats);
+  if (lock_unlock(&global_stats->lock) != 0) error("Failed to unlock stats.");
+}
+
+/**
+ * @brief Collect physical statistics over all particles in a #space.
+ *
+ * @param s The #space to collect from.
+ * @param stats The #statistics aggregator to fill.
+ */
+void stats_collect(const struct space *s, struct statistics *stats) {
+
+  /* Prepare the data */
+  struct index_data extra_data;
+  extra_data.s = s;
+  extra_data.stats = stats;
+
+  /* Run parallel collection of statistics for parts */
+  if (s->nr_parts > 0)
+    threadpool_map(&s->e->threadpool, stats_collect_part_mapper, s->parts,
+                   s->nr_parts, sizeof(struct part), 10000, &extra_data);
+
+  /* Run parallel collection of statistics for gparts */
+  if (s->nr_gparts > 0)
+    threadpool_map(&s->e->threadpool, stats_collect_gpart_mapper, s->gparts,
+                   s->nr_gparts, sizeof(struct gpart), 10000, &extra_data);
+}
+
+/**
+ * @brief Prints the content of a #statistics aggregator to a file
+ *
+ * @param file File to write to.
+ * @param stats The #statistics object to write to the file
+ * @param time The current physical time.
+ */
+void stats_print_to_file(FILE *file, const struct statistics *stats,
+                         double time) {
+
+  const double E_tot = stats->E_kin + stats->E_int + stats->E_pot;
+
+  fprintf(file,
+          " %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e "
+          "%14e\n",
+          time, stats->mass, E_tot, stats->E_kin, stats->E_int, stats->E_pot,
+          stats->E_rad, stats->entropy, stats->mom[0], stats->mom[1],
+          stats->mom[2], stats->ang_mom[0], stats->ang_mom[1],
+          stats->ang_mom[2]);
+  fflush(file);
+}
+
+/* Extra stuff in MPI-land */
+#ifdef WITH_MPI
+
+/**
+ * @brief MPI datatype corresponding to the #statistics structure.
+ */
+MPI_Datatype statistics_mpi_type;
+
+/**
+ * @brief MPI operator used for the reduction of #statistics structure.
+ */
+MPI_Op statistics_mpi_reduce_op;
+
+/**
+ * @brief MPI reduce operator for #statistics structures.
+ */
+void stats_add_MPI(void *in, void *inout, int *len, MPI_Datatype *datatype) {
+
+  for (int i = 0; i < *len; ++i)
+    stats_add(&((struct statistics *)inout)[0],
+              &((const struct statistics *)in)[i]);
+}
+
+/**
+ * @brief Registers MPI #statistics type and reduction function.
+ */
+void stats_create_MPI_type() {
+
+  /* This is not the recommended way of doing this.
+     One should define the structure field by field
+     But as long as we don't do serialization via MPI-IO
+     we don't really care.
+     Also we would have to modify this function everytime something
+     is added to the statistics structure. */
+  if (MPI_Type_contiguous(sizeof(struct statistics) / sizeof(unsigned char),
+                          MPI_BYTE, &statistics_mpi_type) != MPI_SUCCESS ||
+      MPI_Type_commit(&statistics_mpi_type) != MPI_SUCCESS) {
+    error("Failed to create MPI type for statistics.");
+  }
+
+  /* Create the reduction operation */
+  MPI_Op_create(stats_add_MPI, 1, &statistics_mpi_reduce_op);
+}
+#endif
diff --git a/src/statistics.h b/src/statistics.h
new file mode 100644
index 0000000000000000000000000000000000000000..ece1f813bb8aff2b5402753bafc031696cfa562d
--- /dev/null
+++ b/src/statistics.h
@@ -0,0 +1,76 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_STATISTICS_H
+#define SWIFT_STATISTICS_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "lock.h"
+#include "space.h"
+
+/**
+ * @brief Quantities collected for physics statistics
+ */
+struct statistics {
+
+  /*! Kinetic energy */
+  double E_kin;
+
+  /*! Internal energy */
+  double E_int;
+
+  /*! Potential energy */
+  double E_pot;
+
+  /*! Radiative energy */
+  double E_rad;
+
+  /*! Entropy */
+  double entropy;
+
+  /*! Mass */
+  double mass;
+
+  /*! Momentum */
+  double mom[3];
+
+  /*! Angular momentum */
+  double ang_mom[3];
+
+  /*! Lock for threaded access */
+  swift_lock_type lock;
+};
+
+void stats_collect(const struct space* s, struct statistics* stats);
+void stats_add(struct statistics* a, const struct statistics* b);
+void stats_print_to_file(FILE* file, const struct statistics* stats,
+                         double time);
+void stats_init(struct statistics* s);
+
+#ifdef WITH_MPI
+extern MPI_Datatype statistics_mpi_type;
+extern MPI_Op statistics_mpi_reduce_op;
+
+void stats_add_MPI(void* in, void* out, int* len, MPI_Datatype* datatype);
+void stats_create_MPI_type();
+#endif
+
+#endif /* SWIFT_STATISTICS_H */