diff --git a/examples/test.c b/examples/test.c
index 55ae12f851f16ec1b40959d9a6bd915d3ec7b39a..5a5c3a76a0969f873a17d7a6665ae8f440f2cbf7 100644
--- a/examples/test.c
+++ b/examples/test.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
  *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
  *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
- * 
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
  * by the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- * 
+ *
  ******************************************************************************/
 
 /* Config parameters. */
@@ -35,12 +35,12 @@
 
 /* Conditional headers. */
 #ifdef HAVE_LIBZ
-    #include <zlib.h>
+#include <zlib.h>
 #endif
 
 /* MPI headers. */
 #ifdef WITH_MPI
-    #include <mpi.h>
+#include <mpi.h>
 #endif
 
 /* Local headers. */
@@ -48,71 +48,67 @@
 
 /* Ticks per second on this machine. */
 #ifndef CPU_TPS
-    #define CPU_TPS 2.40e9
+#define CPU_TPS 2.40e9
 #endif
 
 /* Engine policy flags. */
 #ifndef ENGINE_POLICY
-    #define ENGINE_POLICY engine_policy_none
+#define ENGINE_POLICY engine_policy_none
 #endif
 
-
 /**
  * @brief Mapping function to draw a specific cell (gnuplot).
  */
 
-void map_cells_plot ( struct cell *c , void *data ) {
-
-    int depth = *(int *)data;
-    double *l = c->loc, *h = c->h;
-
-    if ( c->depth <= depth ) {
-    
-        printf( "%.16e %.16e %.16e\n" , l[0] , l[1] , l[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0]+h[0] , l[1] , l[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0]+h[0] , l[1]+h[1] , l[2] );
-        printf( "%.16e %.16e %.16e\n\n\n" , l[0] , l[1]+h[1] , l[2] );
-    
-        printf( "%.16e %.16e %.16e\n" , l[0] , l[1] , l[2]+h[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0]+h[0] , l[1] , l[2]+h[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0]+h[0] , l[1]+h[1] , l[2]+h[2] );
-        printf( "%.16e %.16e %.16e\n\n\n" , l[0] , l[1]+h[1] , l[2]+h[2] );
-    
-        printf( "%.16e %.16e %.16e\n" , l[0] , l[1] , l[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0] , l[1]+h[1] , l[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0] , l[1]+h[1] , l[2]+h[2] );
-        printf( "%.16e %.16e %.16e\n\n\n" , l[0] , l[1] , l[2]+h[2] );
-    
-        printf( "%.16e %.16e %.16e\n" , l[0]+h[0] , l[1] , l[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0]+h[0] , l[1]+h[1] , l[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0]+h[0] , l[1]+h[1] , l[2]+h[2] );
-        printf( "%.16e %.16e %.16e\n\n\n" , l[0]+h[0] , l[1] , l[2] +h[2]);
-    
-        printf( "%.16e %.16e %.16e\n" , l[0] , l[1] , l[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0] , l[1] , l[2]+h[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0]+h[0] , l[1] , l[2]+h[2] );
-        printf( "%.16e %.16e %.16e\n\n\n" , l[0]+h[0] , l[1] , l[2] );
-    
-        printf( "%.16e %.16e %.16e\n" , l[0] , l[1]+h[1] , l[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0] , l[1]+h[1] , l[2]+h[2] );
-        printf( "%.16e %.16e %.16e\n" , l[0]+h[0] , l[1]+h[1] , l[2]+h[2] );
-        printf( "%.16e %.16e %.16e\n\n\n" , l[0]+h[0] , l[1]+h[1] , l[2] );
-        
-        if ( !c->split ) {
-            for ( int k = 0 ; k < c->count ; k++ )
-                printf( "0 0 0 %.16e %.16e %.16e\n" ,
-                    c->parts[k].x[0] , c->parts[k].x[1] , c->parts[k].x[2] );
-            printf( "\n\n" );
-            }
-        /* else
-            for ( int k = 0 ; k < 8 ; k++ )
-                if ( c->progeny[k] != NULL )
-                    map_cells_plot( c->progeny[k] , data ); */
-    
-        }
+void map_cells_plot(struct cell *c, void *data) {
 
-    }
+  int depth = *(int *)data;
+  double *l = c->loc, *h = c->h;
+
+  if (c->depth <= depth) {
 
+    printf("%.16e %.16e %.16e\n", l[0], l[1], l[2]);
+    printf("%.16e %.16e %.16e\n", l[0] + h[0], l[1], l[2]);
+    printf("%.16e %.16e %.16e\n", l[0] + h[0], l[1] + h[1], l[2]);
+    printf("%.16e %.16e %.16e\n\n\n", l[0], l[1] + h[1], l[2]);
+
+    printf("%.16e %.16e %.16e\n", l[0], l[1], l[2] + h[2]);
+    printf("%.16e %.16e %.16e\n", l[0] + h[0], l[1], l[2] + h[2]);
+    printf("%.16e %.16e %.16e\n", l[0] + h[0], l[1] + h[1], l[2] + h[2]);
+    printf("%.16e %.16e %.16e\n\n\n", l[0], l[1] + h[1], l[2] + h[2]);
+
+    printf("%.16e %.16e %.16e\n", l[0], l[1], l[2]);
+    printf("%.16e %.16e %.16e\n", l[0], l[1] + h[1], l[2]);
+    printf("%.16e %.16e %.16e\n", l[0], l[1] + h[1], l[2] + h[2]);
+    printf("%.16e %.16e %.16e\n\n\n", l[0], l[1], l[2] + h[2]);
+
+    printf("%.16e %.16e %.16e\n", l[0] + h[0], l[1], l[2]);
+    printf("%.16e %.16e %.16e\n", l[0] + h[0], l[1] + h[1], l[2]);
+    printf("%.16e %.16e %.16e\n", l[0] + h[0], l[1] + h[1], l[2] + h[2]);
+    printf("%.16e %.16e %.16e\n\n\n", l[0] + h[0], l[1], l[2] + h[2]);
+
+    printf("%.16e %.16e %.16e\n", l[0], l[1], l[2]);
+    printf("%.16e %.16e %.16e\n", l[0], l[1], l[2] + h[2]);
+    printf("%.16e %.16e %.16e\n", l[0] + h[0], l[1], l[2] + h[2]);
+    printf("%.16e %.16e %.16e\n\n\n", l[0] + h[0], l[1], l[2]);
+
+    printf("%.16e %.16e %.16e\n", l[0], l[1] + h[1], l[2]);
+    printf("%.16e %.16e %.16e\n", l[0], l[1] + h[1], l[2] + h[2]);
+    printf("%.16e %.16e %.16e\n", l[0] + h[0], l[1] + h[1], l[2] + h[2]);
+    printf("%.16e %.16e %.16e\n\n\n", l[0] + h[0], l[1] + h[1], l[2]);
+
+    if (!c->split) {
+      for (int k = 0; k < c->count; k++)
+        printf("0 0 0 %.16e %.16e %.16e\n", c->parts[k].x[0], c->parts[k].x[1],
+               c->parts[k].x[2]);
+      printf("\n\n");
+    }
+    /* else
+        for ( int k = 0 ; k < 8 ; k++ )
+            if ( c->progeny[k] != NULL )
+                map_cells_plot( c->progeny[k] , data ); */
+  }
+}
 
 /**
  * @brief Mapping function for checking if each part is in its box.
@@ -127,129 +123,112 @@ void map_cells_plot ( struct cell *c , void *data ) {
 
     } */
 
-
 /**
  * @brief Mapping function for neighbour count.
  */
 
-void map_cellcheck ( struct cell *c , void *data ) {
-
-    int k, *count = (int *)data;
-    struct part *p;
-    
-    __sync_fetch_and_add( count , c->count );
-    
-    /* Loop over all parts and check if they are in the cell. */
-    for ( k = 0 ; k < c->count ; k++ ) {
-        p = &c->parts[k];
-        if ( p->x[0] < c->loc[0] || p->x[1] < c->loc[1] || p->x[2] < c->loc[2] ||
-             p->x[0] > c->loc[0] + c->h[0] || p->x[1] > c->loc[1] + c->h[1] || p->x[2] > c->loc[2] + c->h[2] ) {
-            printf( "map_cellcheck: particle at [ %.16e %.16e %.16e ] outside of cell [ %.16e %.16e %.16e ] - [ %.16e %.16e %.16e ].\n" ,
-                p->x[0] , p->x[1] , p->x[2] ,
-                c->loc[0] , c->loc[1] , c->loc[2] ,
-                c->loc[0] + c->h[0] , c->loc[1] + c->h[1] , c->loc[2] + c->h[2] );
-            error( "particle out of bounds!" );
-            }
-        }
-
+void map_cellcheck(struct cell *c, void *data) {
+
+  int k, *count = (int *)data;
+  struct part *p;
+
+  __sync_fetch_and_add(count, c->count);
+
+  /* Loop over all parts and check if they are in the cell. */
+  for (k = 0; k < c->count; k++) {
+    p = &c->parts[k];
+    if (p->x[0] < c->loc[0] || p->x[1] < c->loc[1] || p->x[2] < c->loc[2] ||
+        p->x[0] > c->loc[0] + c->h[0] || p->x[1] > c->loc[1] + c->h[1] ||
+        p->x[2] > c->loc[2] + c->h[2]) {
+      printf(
+          "map_cellcheck: particle at [ %.16e %.16e %.16e ] outside of cell [ "
+          "%.16e %.16e %.16e ] - [ %.16e %.16e %.16e ].\n",
+          p->x[0], p->x[1], p->x[2], c->loc[0], c->loc[1], c->loc[2],
+          c->loc[0] + c->h[0], c->loc[1] + c->h[1], c->loc[2] + c->h[2]);
+      error("particle out of bounds!");
     }
-
+  }
+}
 
 /**
  * @brief Mapping function for maxdepth cell count.
  */
 
-void map_maxdepth ( struct cell *c , void *data ) {
+void map_maxdepth(struct cell *c, void *data) {
 
-    int maxdepth = ((int *)data)[0];
-    int *count = &((int *)data)[1];
-    
-    // printf( "%e\n" , p->count );
+  int maxdepth = ((int *)data)[0];
+  int *count = &((int *)data)[1];
 
-    if ( c->depth == maxdepth )
-        *count += 1;
-
-    }
+  // printf( "%e\n" , p->count );
 
+  if (c->depth == maxdepth) *count += 1;
+}
 
 /**
  * @brief Mapping function for neighbour count.
  */
 
-void map_count ( struct part *p , struct cell *c , void *data ) {
-
-    double *wcount = (double *)data;
-    
-    // printf( "%i %e %e\n" , p->id , p->count , p->count_dh );
+void map_count(struct part *p, struct cell *c, void *data) {
 
-    *wcount += p->density.wcount;
+  double *wcount = (double *)data;
 
-    }
+  // printf( "%i %e %e\n" , p->id , p->count , p->count_dh );
 
-void map_wcount_min ( struct part *p , struct cell *c , void *data ) {
+  *wcount += p->density.wcount;
+}
 
-    struct part **p2 = (struct part **)data;
-    
-    if ( p->density.wcount < (*p2)->density.wcount )
-        *p2 = p;
+void map_wcount_min(struct part *p, struct cell *c, void *data) {
 
-    }
+  struct part **p2 = (struct part **)data;
 
-void map_wcount_max ( struct part *p , struct cell *c , void *data ) {
+  if (p->density.wcount < (*p2)->density.wcount) *p2 = p;
+}
 
-    struct part **p2 = (struct part **)data;
-    
-    if ( p->density.wcount > (*p2)->density.wcount )
-        *p2 = p;
+void map_wcount_max(struct part *p, struct cell *c, void *data) {
 
-    }
+  struct part **p2 = (struct part **)data;
 
-void map_h_min ( struct part *p , struct cell *c , void *data ) {
+  if (p->density.wcount > (*p2)->density.wcount) *p2 = p;
+}
 
-    struct part **p2 = (struct part **)data;
-    
-    if ( p->h < (*p2)->h )
-        *p2 = p;
+void map_h_min(struct part *p, struct cell *c, void *data) {
 
-    }
+  struct part **p2 = (struct part **)data;
 
-void map_h_max ( struct part *p , struct cell *c , void *data ) {
+  if (p->h < (*p2)->h) *p2 = p;
+}
 
-    struct part **p2 = (struct part **)data;
-    
-    if ( p->h > (*p2)->h )
-        *p2 = p;
+void map_h_max(struct part *p, struct cell *c, void *data) {
 
-    }
+  struct part **p2 = (struct part **)data;
 
+  if (p->h > (*p2)->h) *p2 = p;
+}
 
 /**
  * @brief Mapping function for neighbour count.
  */
 
-void map_icount ( struct part *p , struct cell *c , void *data ) {
+void map_icount(struct part *p, struct cell *c, void *data) {
 
-    // int *count = (int *)data;
-    
-    // printf( "%i\n" , p->icount );
+  // int *count = (int *)data;
 
-    // *count += p->icount;
-
-    }
+  // printf( "%i\n" , p->icount );
 
+  // *count += p->icount;
+}
 
 /**
  * @brief Mapping function to print the particle position.
  */
 
-void map_dump ( struct part *p , struct cell *c , void *data ) {
-
-    double *shift = (double *)data;
+void map_dump(struct part *p, struct cell *c, void *data) {
 
-    printf( "%g\t%g\t%g\n" , p->x[0]-shift[0] , p->x[1]-shift[1] , p->x[2]-shift[2] );
-
-    }
+  double *shift = (double *)data;
 
+  printf("%g\t%g\t%g\n", p->x[0] - shift[0], p->x[1] - shift[1],
+         p->x[2] - shift[2]);
+}
 
 /**
  * @brief Compute the average number of pairs per particle using
@@ -261,713 +240,709 @@ void map_dump ( struct part *p , struct cell *c , void *data ) {
  * @param periodic Periodic boundary conditions flag.
  */
 
-void pairs_n2 ( double *dim , struct part *__restrict__ parts , int N , int periodic ) {
-
-    int i, j, k, count = 0;
-    // int mj, mk;
-    // double maxratio = 1.0;
-    double r2, dx[3], rho = 0.0;
-    double rho_max = 0.0, rho_min = 100;
-    
-    /* Loop over all particle pairs. */
-    for ( j = 0 ; j < N ; j++ ) {
-        if ( j % 1000 == 0 ) {
-            printf( "pairs_n2: j=%i.\n" , j );
-            fflush(stdout);
-            }
-        for ( k = j+1 ; k < N ; k++ ) {
-            for ( i = 0 ; i < 3 ; i++ ) {
-                dx[i] = parts[j].x[i] - parts[k].x[i];
-                if ( periodic ) {
-                    if ( dx[i] < -dim[i]/2 )
-                        dx[i] += dim[i];
-                    else if ( dx[i] > dim[i]/2 )
-                        dx[i] -= dim[i];
-                    }
-                }
-            r2 = dx[0]*dx[0] + dx[1]*dx[1] + dx[2]*dx[2];
-            if ( r2 < parts[j].h*parts[j].h || r2 < parts[k].h*parts[k].h ) {
-                runner_iact_density( r2 , NULL , parts[j].h , parts[k].h , &parts[j] , &parts[k] );
-                /* if ( parts[j].h / parts[k].h > maxratio )
-                    {
-                    maxratio = parts[j].h / parts[k].h;
-                    mj = j; mk = k;
-                    }
-                else if ( parts[k].h / parts[j].h > maxratio )
-                    {
-                    maxratio = parts[k].h / parts[j].h;
-                    mj = j; mk = k;
-                    } */
-                }
-            }
-        }
-        
-    /* Aggregate the results. */
-    for ( k = 0 ; k < N ; k++ ) {
-        // count += parts[k].icount;
-        rho += parts[k].density.wcount;
-        rho_min = fmin( parts[k].density.wcount , rho_min );
-        rho_min = fmax( parts[k].density.wcount , rho_max );
+void pairs_n2(double *dim, struct part *__restrict__ parts, int N,
+              int periodic) {
+
+  int i, j, k, count = 0;
+  // int mj, mk;
+  // double maxratio = 1.0;
+  double r2, dx[3], rho = 0.0;
+  double rho_max = 0.0, rho_min = 100;
+
+  /* Loop over all particle pairs. */
+  for (j = 0; j < N; j++) {
+    if (j % 1000 == 0) {
+      printf("pairs_n2: j=%i.\n", j);
+      fflush(stdout);
+    }
+    for (k = j + 1; k < N; k++) {
+      for (i = 0; i < 3; i++) {
+        dx[i] = parts[j].x[i] - parts[k].x[i];
+        if (periodic) {
+          if (dx[i] < -dim[i] / 2)
+            dx[i] += dim[i];
+          else if (dx[i] > dim[i] / 2)
+            dx[i] -= dim[i];
         }
-            
-    /* Dump the result. */
-    printf( "pairs_n2: avg. density per part is %.3f (nr. pairs %.3f).\n" , rho/N + 32.0/3 , ((double)count)/N );
-    printf( "pairs_n2: densities are in [ %e , %e ].\n" , rho_min/N + 32.0/3 , rho_max/N + 32.0/3 );
-    /* printf( "pairs_n2: maximum ratio between parts %i [%e,%e,%e] and %i [%e,%e,%e] is %.3f/%.3f\n" ,
-        mj , parts[mj].x[0] , parts[mj].x[1] , parts[mj].x[2] ,
-        mk , parts[mk].x[0] , parts[mk].x[1] , parts[mk].x[2] ,
-        parts[mj].h , parts[mk].h ); fflush(stdout); */
-    fflush(stdout);
-            
+      }
+      r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+      if (r2 < parts[j].h * parts[j].h || r2 < parts[k].h * parts[k].h) {
+        runner_iact_density(r2, NULL, parts[j].h, parts[k].h, &parts[j],
+                            &parts[k]);
+        /* if ( parts[j].h / parts[k].h > maxratio )
+            {
+            maxratio = parts[j].h / parts[k].h;
+            mj = j; mk = k;
+            }
+        else if ( parts[k].h / parts[j].h > maxratio )
+            {
+            maxratio = parts[k].h / parts[j].h;
+            mj = j; mk = k;
+            } */
+      }
     }
+  }
 
+  /* Aggregate the results. */
+  for (k = 0; k < N; k++) {
+    // count += parts[k].icount;
+    rho += parts[k].density.wcount;
+    rho_min = fmin(parts[k].density.wcount, rho_min);
+    rho_min = fmax(parts[k].density.wcount, rho_max);
+  }
 
-void pairs_single_density ( double *dim , long long int pid , struct part *__restrict__ parts , int N , int periodic ) {
-
-    int i, k;
-    // int mj, mk;
-    // double maxratio = 1.0;
-    double r2, dx[3];
-    float fdx[3];
-    struct part p;
-    // double ih = 12.0/6.25;
-    
-    /* Find "our" part. */
-    for ( k = 0 ; k < N && parts[k].id != pid ; k++ );
-    if ( k == N )
-        error( "Part not found." );
-    p = parts[k];
-    printf( "pairs_single: part[%i].id == %lli.\n" , k , pid );
-    
-    p.rho = 0.0;
-    p.density.wcount = 0.0;
-    // p.icount = 0;
-    p.rho_dh = 0.0;
-            
-    /* Loop over all particle pairs. */
-    for ( k = 0 ; k < N ; k++ ) {
-        if ( parts[k].id == p.id )
-            continue;
-        for ( i = 0 ; i < 3 ; i++ ) {
-            dx[i] = p.x[i] - parts[k].x[i];
-            if ( periodic ) {
-                if ( dx[i] < -dim[i]/2 )
-                    dx[i] += dim[i];
-                else if ( dx[i] > dim[i]/2 )
-                    dx[i] -= dim[i];
-                }
-            fdx[i] = dx[i];
-            }
-        r2 = fdx[0]*fdx[0] + fdx[1]*fdx[1] + fdx[2]*fdx[2];
-        if ( r2 < p.h*p.h ) {
-            runner_iact_nonsym_density( r2 , fdx , p.h , parts[k].h , &p , &parts[k] );
-            /* printf( "pairs_simple: interacting particles %lli [%i,%i,%i] and %lli [%i,%i,%i], r=%e.\n" ,
-                pid , (int)(p.x[0]*ih) , (int)(p.x[1]*ih) , (int)(p.x[2]*ih) ,
-                parts[k].id , (int)(parts[k].x[0]*ih) , (int)(parts[k].x[1]*ih) , (int)(parts[k].x[2]*ih) ,
-                sqrtf(r2) ); */
-            }
-        }
-        
-    /* Dump the result. */
-    printf( "pairs_single: wcount of part %lli (h=%e) is %f.\n" , p.id , p.h , p.density.wcount + 32.0/3 );
-    fflush(stdout);
-    
+  /* Dump the result. */
+  printf("pairs_n2: avg. density per part is %.3f (nr. pairs %.3f).\n",
+         rho / N + 32.0 / 3, ((double)count) / N);
+  printf("pairs_n2: densities are in [ %e , %e ].\n", rho_min / N + 32.0 / 3,
+         rho_max / N + 32.0 / 3);
+  /* printf( "pairs_n2: maximum ratio between parts %i [%e,%e,%e] and %i
+     [%e,%e,%e] is %.3f/%.3f\n" ,
+      mj , parts[mj].x[0] , parts[mj].x[1] , parts[mj].x[2] ,
+      mk , parts[mk].x[0] , parts[mk].x[1] , parts[mk].x[2] ,
+      parts[mj].h , parts[mk].h ); fflush(stdout); */
+  fflush(stdout);
+}
+
+void pairs_single_density(double *dim, long long int pid,
+                          struct part *__restrict__ parts, int N,
+                          int periodic) {
+
+  int i, k;
+  // int mj, mk;
+  // double maxratio = 1.0;
+  double r2, dx[3];
+  float fdx[3];
+  struct part p;
+  // double ih = 12.0/6.25;
+
+  /* Find "our" part. */
+  for (k = 0; k < N && parts[k].id != pid; k++)
+    ;
+  if (k == N) error("Part not found.");
+  p = parts[k];
+  printf("pairs_single: part[%i].id == %lli.\n", k, pid);
+
+  p.rho = 0.0;
+  p.density.wcount = 0.0;
+  // p.icount = 0;
+  p.rho_dh = 0.0;
+
+  /* Loop over all particle pairs. */
+  for (k = 0; k < N; k++) {
+    if (parts[k].id == p.id) continue;
+    for (i = 0; i < 3; i++) {
+      dx[i] = p.x[i] - parts[k].x[i];
+      if (periodic) {
+        if (dx[i] < -dim[i] / 2)
+          dx[i] += dim[i];
+        else if (dx[i] > dim[i] / 2)
+          dx[i] -= dim[i];
+      }
+      fdx[i] = dx[i];
     }
+    r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2];
+    if (r2 < p.h * p.h) {
+      runner_iact_nonsym_density(r2, fdx, p.h, parts[k].h, &p, &parts[k]);
+      /* printf( "pairs_simple: interacting particles %lli [%i,%i,%i] and %lli
+         [%i,%i,%i], r=%e.\n" ,
+          pid , (int)(p.x[0]*ih) , (int)(p.x[1]*ih) , (int)(p.x[2]*ih) ,
+          parts[k].id , (int)(parts[k].x[0]*ih) , (int)(parts[k].x[1]*ih) ,
+         (int)(parts[k].x[2]*ih) ,
+          sqrtf(r2) ); */
+    }
+  }
 
+  /* Dump the result. */
+  printf("pairs_single: wcount of part %lli (h=%e) is %f.\n", p.id, p.h,
+         p.density.wcount + 32.0 / 3);
+  fflush(stdout);
+}
 
-void pairs_single_grav ( double *dim , long long int pid , struct gpart *__restrict__ parts , int N , int periodic ) {
-
-    int i, k;
-    // int mj, mk;
-    // double maxratio = 1.0;
-    double r2, dx[3];
-    float fdx[3], a[3] = { 0.0 , 0.0 , 0.0 }, aabs[3] = { 0.0 , 0.0 , 0.0 };
-    struct gpart pi, pj;
-    // double ih = 12.0/6.25;
-    
-    /* Find "our" part. */
-    for ( k = 0 ; k < N ; k++ )
-        if ( ( parts[k].id > 0 && parts[k].part->id == pid ) || parts[k].id == -pid )
-            break;
-    if ( k == N )
-        error( "Part not found." );
-    pi = parts[k];
-    pi.a[0] = 0.0f; pi.a[1] = 0.0f; pi.a[2] = 0.0f;
-    
-    /* Loop over all particle pairs. */
-    for ( k = 0 ; k < N ; k++ ) {
-        if ( parts[k].id == pi.id )
-            continue;
-        pj = parts[k];
-        for ( i = 0 ; i < 3 ; i++ ) {
-            dx[i] = pi.x[i] - pj.x[i];
-            if ( periodic ) {
-                if ( dx[i] < -dim[i]/2 )
-                    dx[i] += dim[i];
-                else if ( dx[i] > dim[i]/2 )
-                    dx[i] -= dim[i];
-                }
-            fdx[i] = dx[i];
-            }
-        r2 = fdx[0]*fdx[0] + fdx[1]*fdx[1] + fdx[2]*fdx[2];
-        runner_iact_grav( r2 , fdx , &pi , &pj );
-        a[0] += pi.a[0]; a[1] += pi.a[1]; a[2] += pi.a[2];
-        aabs[0] += fabsf( pi.a[0] ); aabs[1] += fabsf( pi.a[1] ); aabs[2] += fabsf( pi.a[2] );
-        pi.a[0] = 0.0f; pi.a[1] = 0.0f; pi.a[2] = 0.0f;
-        }
-        
-    /* Dump the result. */
-    message( "acceleration on gpart %lli is a=[ %e %e %e ], |a|=[ %.2e %.2e %.2e ].\n" , pi.part->id , a[0] , a[1] , a[2] , aabs[0] , aabs[1] , aabs[2] );
-    
+void pairs_single_grav(double *dim, long long int pid,
+                       struct gpart *__restrict__ parts, int N, int periodic) {
+
+  int i, k;
+  // int mj, mk;
+  // double maxratio = 1.0;
+  double r2, dx[3];
+  float fdx[3], a[3] = {0.0, 0.0, 0.0}, aabs[3] = {0.0, 0.0, 0.0};
+  struct gpart pi, pj;
+  // double ih = 12.0/6.25;
+
+  /* Find "our" part. */
+  for (k = 0; k < N; k++)
+    if ((parts[k].id > 0 && parts[k].part->id == pid) || parts[k].id == -pid)
+      break;
+  if (k == N) error("Part not found.");
+  pi = parts[k];
+  pi.a[0] = 0.0f;
+  pi.a[1] = 0.0f;
+  pi.a[2] = 0.0f;
+
+  /* Loop over all particle pairs. */
+  for (k = 0; k < N; k++) {
+    if (parts[k].id == pi.id) continue;
+    pj = parts[k];
+    for (i = 0; i < 3; i++) {
+      dx[i] = pi.x[i] - pj.x[i];
+      if (periodic) {
+        if (dx[i] < -dim[i] / 2)
+          dx[i] += dim[i];
+        else if (dx[i] > dim[i] / 2)
+          dx[i] -= dim[i];
+      }
+      fdx[i] = dx[i];
     }
+    r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2];
+    runner_iact_grav(r2, fdx, &pi, &pj);
+    a[0] += pi.a[0];
+    a[1] += pi.a[1];
+    a[2] += pi.a[2];
+    aabs[0] += fabsf(pi.a[0]);
+    aabs[1] += fabsf(pi.a[1]);
+    aabs[2] += fabsf(pi.a[2]);
+    pi.a[0] = 0.0f;
+    pi.a[1] = 0.0f;
+    pi.a[2] = 0.0f;
+  }
 
+  /* Dump the result. */
+  message(
+      "acceleration on gpart %lli is a=[ %e %e %e ], |a|=[ %.2e %.2e %.2e ].\n",
+      pi.part->id, a[0], a[1], a[2], aabs[0], aabs[1], aabs[2]);
+}
 
 /**
  * @brief Test the kernel function by dumping it in the interval [0,1].
  *
  * @param N number of intervals in [0,1].
  */
- 
-void kernel_dump ( int N ) {
-
-    int k;
-    float x, w, dw_dx;
-    float x4[4] = {0.0f,0.0f,0.0f,0.0f};
-    float w4[4] = {0.0f,0.0f,0.0f,0.0f};
-    // float dw_dx4[4] __attribute__ ((aligned (16)));
-
-    for ( k = 0 ; k <= N ; k++ ) {
-        x = ((float)k) / N;
-        x4[3] = x4[2]; x4[2] = x4[1]; x4[1] = x4[0]; x4[0] = x;
-        kernel_deval( x , &w , &dw_dx );
-        // kernel_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 );
-        printf( " %e %e %e %e %e %e %e\n" , x , w , dw_dx , w4[0] , w4[1] , w4[2] , w4[3] );
-        }
-
-    }
 
+void kernel_dump(int N) {
+
+  int k;
+  float x, w, dw_dx;
+  float x4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+  float w4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+  // float dw_dx4[4] __attribute__ ((aligned (16)));
+
+  for (k = 0; k <= N; k++) {
+    x = ((float)k) / N;
+    x4[3] = x4[2];
+    x4[2] = x4[1];
+    x4[1] = x4[0];
+    x4[0] = x;
+    kernel_deval(x, &w, &dw_dx);
+    // kernel_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 );
+    printf(" %e %e %e %e %e %e %e\n", x, w, dw_dx, w4[0], w4[1], w4[2], w4[3]);
+  }
+}
 
-float gadget ( float r ) {
-  float fac, h_inv, u, r2 = r*r;
-  if ( r >= const_epsilon )
+float gadget(float r) {
+  float fac, h_inv, u, r2 = r * r;
+  if (r >= const_epsilon)
     fac = 1.0f / (r2 * r);
   else {
     h_inv = 1. / const_epsilon;
     u = r * h_inv;
-    if ( u < 0.5 )
+    if (u < 0.5)
       fac = const_iepsilon3 * (10.666666666667 + u * u * (32.0 * u - 38.4));
     else
-      fac = const_iepsilon3 * (21.333333333333 - 48.0 * u +
-			       38.4 * u * u - 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u));
+      fac = const_iepsilon3 *
+            (21.333333333333 - 48.0 * u + 38.4 * u * u -
+             10.666666666667 * u * u * u - 0.066666666667 / (u * u * u));
   }
   return const_G * fac;
 }
 
-
-void gravity_dump ( float r_max , int N ) {
-
-    int k;
-    float x, w;
-    float x4[4] = {0.0f,0.0f,0.0f,0.0f};
-    float w4[4] = {0.0f,0.0f,0.0f,0.0f};
-    // float dw_dx4[4] __attribute__ ((aligned (16)));
-    
-    for ( k = 1 ; k <= N ; k++ ) {
-        x = (r_max * k) / N;
-        x4[3] = x4[2]; x4[2] = x4[1]; x4[1] = x4[0]; x4[0] = x;
-        kernel_grav_eval( x , &w );
-        w *= const_G / ( x*x*x );
-        // blender_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 );
-        printf( " %.16e %.16e %.16e %.16e %.16e %.16e %.16e\n" , x , w*x , w4[0] , w4[1] , w4[2] , w4[3] , gadget(x)*x );
-        }
-
-    }
-
+void gravity_dump(float r_max, int N) {
+
+  int k;
+  float x, w;
+  float x4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+  float w4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+  // float dw_dx4[4] __attribute__ ((aligned (16)));
+
+  for (k = 1; k <= N; k++) {
+    x = (r_max * k) / N;
+    x4[3] = x4[2];
+    x4[2] = x4[1];
+    x4[1] = x4[0];
+    x4[0] = x;
+    kernel_grav_eval(x, &w);
+    w *= const_G / (x * x * x);
+    // blender_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 );
+    printf(" %.16e %.16e %.16e %.16e %.16e %.16e %.16e\n", x, w * x, w4[0],
+           w4[1], w4[2], w4[3], gadget(x) * x);
+  }
+}
 
 /**
  * @brief Test the density function by dumping it for two random parts.
  *
  * @param N number of intervals in [0,1].
  */
- 
-void density_dump ( int N ) {
-
-    int k;
-    float r2[4] = {0.0f,0.0f,0.0f,0.0f}, hi[4], hj[4];
-    struct part *pi[4], *pj[4], Pi[4], Pj[4];
-    
-    /* Init the interaction parameters. */
-    for ( k = 0 ; k < 4 ; k++ ) {
-        Pi[k].mass = 1.0f; Pi[k].rho = 0.0f; Pi[k].density.wcount = 0.0f;
-        Pj[k].mass = 1.0f; Pj[k].rho = 0.0f; Pj[k].density.wcount = 0.0f;
-        hi[k] = 1.0;
-        hj[k] = 1.0;
-        pi[k] = &Pi[k];
-        pj[k] = &Pj[k];
-        }
 
-    for ( k = 0 ; k <= N ; k++ ) {
-        r2[3] = r2[2]; r2[2] = r2[1]; r2[1] = r2[0];
-        r2[0] = ((float)k) / N;
-        Pi[0].density.wcount = 0; Pj[0].density.wcount = 0;
-        runner_iact_density( r2[0] , NULL , hi[0] , hj[0] , &Pi[0] , &Pj[0] );
-        printf( " %e %e %e" , r2[0] , Pi[0].density.wcount , Pj[0].density.wcount );
-        Pi[0].density.wcount = 0; Pj[0].density.wcount = 0;
-        Pi[1].density.wcount = 0; Pj[1].density.wcount = 0;
-        Pi[2].density.wcount = 0; Pj[2].density.wcount = 0;
-        Pi[3].density.wcount = 0; Pj[3].density.wcount = 0;
-        runner_iact_vec_density( r2 , NULL , hi , hj , pi , pj );
-        printf( " %e %e %e %e\n" , Pi[0].density.wcount , Pi[1].density.wcount , Pi[2].density.wcount , Pi[3].density.wcount );
-        }
+void density_dump(int N) {
+
+  int k;
+  float r2[4] = {0.0f, 0.0f, 0.0f, 0.0f}, hi[4], hj[4];
+  struct part *pi[4], *pj[4], Pi[4], Pj[4];
+
+  /* Init the interaction parameters. */
+  for (k = 0; k < 4; k++) {
+    Pi[k].mass = 1.0f;
+    Pi[k].rho = 0.0f;
+    Pi[k].density.wcount = 0.0f;
+    Pj[k].mass = 1.0f;
+    Pj[k].rho = 0.0f;
+    Pj[k].density.wcount = 0.0f;
+    hi[k] = 1.0;
+    hj[k] = 1.0;
+    pi[k] = &Pi[k];
+    pj[k] = &Pj[k];
+  }
 
-    }
+  for (k = 0; k <= N; k++) {
+    r2[3] = r2[2];
+    r2[2] = r2[1];
+    r2[1] = r2[0];
+    r2[0] = ((float)k) / N;
+    Pi[0].density.wcount = 0;
+    Pj[0].density.wcount = 0;
+    runner_iact_density(r2[0], NULL, hi[0], hj[0], &Pi[0], &Pj[0]);
+    printf(" %e %e %e", r2[0], Pi[0].density.wcount, Pj[0].density.wcount);
+    Pi[0].density.wcount = 0;
+    Pj[0].density.wcount = 0;
+    Pi[1].density.wcount = 0;
+    Pj[1].density.wcount = 0;
+    Pi[2].density.wcount = 0;
+    Pj[2].density.wcount = 0;
+    Pi[3].density.wcount = 0;
+    Pj[3].density.wcount = 0;
+    runner_iact_vec_density(r2, NULL, hi, hj, pi, pj);
+    printf(" %e %e %e %e\n", Pi[0].density.wcount, Pi[1].density.wcount,
+           Pi[2].density.wcount, Pi[3].density.wcount);
+  }
+}
 
 /**
  *  Factorize a given integer, attempts to keep larger pair of factors.
  */
-void factor( int value, int *f1, int *f2 ) {
-    int j;
-    int i;
-
-    j = (int) sqrt( value );
-    for ( i = j; i > 0; i-- ) {
-        if ( ( value % i ) == 0 ) {
-            *f1 = i;
-            *f2 = value / i;
-            break;
-            }
-        }
+void factor(int value, int *f1, int *f2) {
+  int j;
+  int i;
+
+  j = (int)sqrt(value);
+  for (i = j; i > 0; i--) {
+    if ((value % i) == 0) {
+      *f1 = i;
+      *f2 = value / i;
+      break;
     }
-
+  }
+}
 
 /**
  * @brief Main routine that loads a few particles and generates some output.
  *
  */
- 
-int main ( int argc , char *argv[] ) {
-
-    int c, icount, j, k, N = -1, periodic = 1;
-    int nr_threads = 1, nr_queues = -1, runs = INT_MAX;
-    int data[2];
-    double dim[3] = { 1.0 , 1.0 , 1.0 }, shift[3] = { 0.0 , 0.0 , 0.0 };
-    double h_max = -1.0 , scaling = 1.0;
-    double clock = DBL_MAX;
-    struct part *parts = NULL;
-    struct space s;
-    struct engine e;
-    struct UnitSystem us;
-    char ICfileName[200] = "";
-    float dt_max = 0.0f;
-    ticks tic;
-    int nr_nodes = 1, myrank = 0, grid[3] = { 1 , 1 , 1 };
-    
-    /* Choke on FP-exceptions. */
-    // feenableexcept( FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW );
-    
-    
+
+int main(int argc, char *argv[]) {
+
+  int c, icount, j, k, N = -1, periodic = 1;
+  int nr_threads = 1, nr_queues = -1, runs = INT_MAX;
+  int data[2];
+  double dim[3] = {1.0, 1.0, 1.0}, shift[3] = {0.0, 0.0, 0.0};
+  double h_max = -1.0, scaling = 1.0;
+  double clock = DBL_MAX;
+  struct part *parts = NULL;
+  struct space s;
+  struct engine e;
+  struct UnitSystem us;
+  char ICfileName[200] = "";
+  float dt_max = 0.0f;
+  ticks tic;
+  int nr_nodes = 1, myrank = 0, grid[3] = {1, 1, 1};
+
+/* Choke on FP-exceptions. */
+// feenableexcept( FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW );
+
 #ifdef WITH_MPI
-    /* Start by initializing MPI. */
-    int res, prov;
-    if ( ( res = MPI_Init_thread( &argc , &argv , MPI_THREAD_MULTIPLE , &prov ) ) != MPI_SUCCESS )
-        error( "Call to MPI_Init failed with error %i." , res );
-    if ( prov != MPI_THREAD_MULTIPLE )
-        error( "MPI does not provide the level of threading required (MPI_THREAD_MULTIPLE)." );
-    if ( ( res = MPI_Comm_size( MPI_COMM_WORLD , &nr_nodes ) != MPI_SUCCESS ) )
-        error( "MPI_Comm_size failed with error %i." , res );
-    if ( ( res = MPI_Comm_rank( MPI_COMM_WORLD , &myrank ) ) != MPI_SUCCESS )
-        error( "Call to MPI_Comm_rank failed with error %i." , res );
-    if ( ( res = MPI_Comm_set_errhandler( MPI_COMM_WORLD , MPI_ERRORS_RETURN ) ) != MPI_SUCCESS )
-        error( "Call to MPI_Comm_set_errhandler failed with error %i." , res );
-    if ( myrank == 0 )
-        message( "MPI is up and running with %i node(s)." , nr_nodes );
-    fflush(stdout);
-
-    /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */
-    factor( nr_nodes, &grid[0], &grid[1] );
-    factor( nr_nodes / grid[1], &grid[0], &grid[2] );
-    factor( grid[0] * grid[1], &grid[1], &grid[0] );
+  /* Start by initializing MPI. */
+  int res, prov;
+  if ((res = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &prov)) !=
+      MPI_SUCCESS)
+    error("Call to MPI_Init failed with error %i.", res);
+  if (prov != MPI_THREAD_MULTIPLE)
+    error(
+        "MPI does not provide the level of threading required "
+        "(MPI_THREAD_MULTIPLE).");
+  if ((res = MPI_Comm_size(MPI_COMM_WORLD, &nr_nodes) != MPI_SUCCESS))
+    error("MPI_Comm_size failed with error %i.", res);
+  if ((res = MPI_Comm_rank(MPI_COMM_WORLD, &myrank)) != MPI_SUCCESS)
+    error("Call to MPI_Comm_rank failed with error %i.", res);
+  if ((res = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)) !=
+      MPI_SUCCESS)
+    error("Call to MPI_Comm_set_errhandler failed with error %i.", res);
+  if (myrank == 0) message("MPI is up and running with %i node(s).", nr_nodes);
+  fflush(stdout);
+
+  /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */
+  factor(nr_nodes, &grid[0], &grid[1]);
+  factor(nr_nodes / grid[1], &grid[0], &grid[2]);
+  factor(grid[0] * grid[1], &grid[1], &grid[0]);
 #endif
 
-    /* Greeting message */
-    if ( myrank == 0 )
-        greetings( );
-        
-    
-    /* Init the space. */
-    bzero( &s , sizeof(struct space) );
-
-
-    /* Parse the options */
-    while ( ( c = getopt( argc , argv  , "a:c:d:f:g:m:q:r:s:t:w:z:" ) ) != -1 )
-      switch( c )
-	{
-	case 'a':
-	  if ( sscanf( optarg , "%lf" , &scaling ) != 1 )
-	    error( "Error parsing cutoff scaling." );
-	  if ( myrank == 0 )
-        message( "scaling cutoff by %.3f." , scaling ); fflush(stdout);
-	  break;
-	case 'c':
-	  if ( sscanf( optarg , "%lf" , &clock ) != 1 )
-	    error( "Error parsing clock." );
-	  if ( myrank == 0 )
-        message( "clock set to %.3e." , clock ); fflush(stdout);
-	  break;
-	case 'd':
-	  if ( sscanf( optarg , "%f" , &dt_max ) != 1 )
-	    error( "Error parsing timestep." );
-	  if ( myrank == 0 )
-        message( "dt set to %e." , dt_max ); fflush(stdout);
-	  break;
-	case 'f':
-	  if( !strcpy(ICfileName, optarg))
-	    error("Error parsing IC file name.");
-	  break;
-	case 'g':
-	  if ( sscanf( optarg , "%i %i %i" , &grid[0] , &grid[1] , &grid[2] ) != 3 )
-	    error( "Error parsing grid." );
-	  break;
-	case 'm':
-	  if ( sscanf( optarg , "%lf" , &h_max ) != 1 )
-	    error( "Error parsing h_max." );
-	  if ( myrank == 0 )
-        message( "maximum h set to %e." , h_max ); fflush(stdout);
-	  break;
-	case 'q':
-	  if ( sscanf( optarg , "%d" , &nr_queues ) != 1 )
-	    error( "Error parsing number of queues." );
-	  break;
-	case 'r':
-	  if ( sscanf( optarg , "%d" , &runs ) != 1 )
-	    error( "Error parsing number of runs." );
-	  break;
-	case 's':
-	  if ( sscanf( optarg , "%lf %lf %lf" , &shift[0] , &shift[1] , &shift[2] ) != 3 )
-	    error( "Error parsing shift." );
-	  if ( myrank == 0 )
-        message( "will shift parts by [ %.3f %.3f %.3f ]." , shift[0] , shift[1] , shift[2] );
-	  break;
-	case 't':
-	  if ( sscanf( optarg , "%d" , &nr_threads ) != 1 )
-	    error( "Error parsing number of threads." );
-	  break;
-	case 'w':
-	  if ( sscanf( optarg , "%d" , &space_subsize ) != 1 )
-	    error( "Error parsing sub size." );
-	  if ( myrank == 0 )
-        message( "sub size set to %i." , space_subsize );
-	  break;
-	case 'z':
-	  if ( sscanf( optarg , "%d" , &space_splitsize ) != 1 )
-	    error( "Error parsing split size." );
-	  if ( myrank == 0 )
-        message( "split size set to %i." , space_splitsize );
-	  break;
-	case '?':
-	  error( "Unknown option." );
-	  break;
-
-	}
-
-#if defined( WITH_MPI )
-    if ( myrank == 0 )
-      message( "Running with %i thread(s) per node.", nr_threads); 
+  /* Greeting message */
+  if (myrank == 0) greetings();
+
+  /* Init the space. */
+  bzero(&s, sizeof(struct space));
+
+  /* Parse the options */
+  while ((c = getopt(argc, argv, "a:c:d:f:g:m:q:r:s:t:w:z:")) != -1)
+    switch (c) {
+      case 'a':
+        if (sscanf(optarg, "%lf", &scaling) != 1)
+          error("Error parsing cutoff scaling.");
+        if (myrank == 0) message("scaling cutoff by %.3f.", scaling);
+        fflush(stdout);
+        break;
+      case 'c':
+        if (sscanf(optarg, "%lf", &clock) != 1) error("Error parsing clock.");
+        if (myrank == 0) message("clock set to %.3e.", clock);
+        fflush(stdout);
+        break;
+      case 'd':
+        if (sscanf(optarg, "%f", &dt_max) != 1)
+          error("Error parsing timestep.");
+        if (myrank == 0) message("dt set to %e.", dt_max);
+        fflush(stdout);
+        break;
+      case 'f':
+        if (!strcpy(ICfileName, optarg)) error("Error parsing IC file name.");
+        break;
+      case 'g':
+        if (sscanf(optarg, "%i %i %i", &grid[0], &grid[1], &grid[2]) != 3)
+          error("Error parsing grid.");
+        break;
+      case 'm':
+        if (sscanf(optarg, "%lf", &h_max) != 1) error("Error parsing h_max.");
+        if (myrank == 0) message("maximum h set to %e.", h_max);
+        fflush(stdout);
+        break;
+      case 'q':
+        if (sscanf(optarg, "%d", &nr_queues) != 1)
+          error("Error parsing number of queues.");
+        break;
+      case 'r':
+        if (sscanf(optarg, "%d", &runs) != 1)
+          error("Error parsing number of runs.");
+        break;
+      case 's':
+        if (sscanf(optarg, "%lf %lf %lf", &shift[0], &shift[1], &shift[2]) != 3)
+          error("Error parsing shift.");
+        if (myrank == 0)
+          message("will shift parts by [ %.3f %.3f %.3f ].", shift[0], shift[1],
+                  shift[2]);
+        break;
+      case 't':
+        if (sscanf(optarg, "%d", &nr_threads) != 1)
+          error("Error parsing number of threads.");
+        break;
+      case 'w':
+        if (sscanf(optarg, "%d", &space_subsize) != 1)
+          error("Error parsing sub size.");
+        if (myrank == 0) message("sub size set to %i.", space_subsize);
+        break;
+      case 'z':
+        if (sscanf(optarg, "%d", &space_splitsize) != 1)
+          error("Error parsing split size.");
+        if (myrank == 0) message("split size set to %i.", space_splitsize);
+        break;
+      case '?':
+        error("Unknown option.");
+        break;
+    }
+
+#if defined(WITH_MPI)
+  if (myrank == 0) message("Running with %i thread(s) per node.", nr_threads);
 #else
-    if ( myrank == 0 )
-      message( "Running with %i thread(s).", nr_threads);
+  if (myrank == 0) message("Running with %i thread(s).", nr_threads);
 #endif
 
-#if defined( WITH_MPI )
-    if ( myrank == 0 )
-        message( "grid set to [ %i %i %i ]." , grid[0] , grid[1] , grid[2] ); fflush(stdout);
+#if defined(WITH_MPI)
+  if (myrank == 0)
+    message("grid set to [ %i %i %i ].", grid[0], grid[1], grid[2]);
+  fflush(stdout);
 #endif
 
-    /* How large are the parts? */
-    if ( myrank == 0 ) {
-        message( "sizeof(struct part) is %li bytes." , (long int)sizeof( struct part ));
-        message( "sizeof(struct gpart) is %li bytes." , (long int)sizeof( struct gpart ));
-        }
+  /* How large are the parts? */
+  if (myrank == 0) {
+    message("sizeof(struct part) is %li bytes.", (long int)sizeof(struct part));
+    message("sizeof(struct gpart) is %li bytes.",
+            (long int)sizeof(struct gpart));
+  }
 
-    /* Initilaize unit system */
-    initUnitSystem(&us);
-    if ( myrank == 0 )
-      {
-	message( "Unit system: U_M = %e g.", us.UnitMass_in_cgs );
-	message( "Unit system: U_L = %e cm.", us.UnitLength_in_cgs );
-	message( "Unit system: U_t = %e s.", us.UnitTime_in_cgs );
-	message( "Unit system: U_I = %e A.", us.UnitCurrent_in_cgs );
-	message( "Unit system: U_T = %e K.", us.UnitTemperature_in_cgs );
-	message( "Density units: %e a^%f h^%f.", conversionFactor(&us, UNIT_CONV_DENSITY), aFactor(&us, UNIT_CONV_DENSITY), hFactor(&us, UNIT_CONV_DENSITY) );
-	message( "Entropy units: %e a^%f h^%f.", conversionFactor(&us, UNIT_CONV_ENTROPY), aFactor(&us, UNIT_CONV_ENTROPY), hFactor(&us, UNIT_CONV_ENTROPY) );
-      }
+  /* Initilaize unit system */
+  initUnitSystem(&us);
+  if (myrank == 0) {
+    message("Unit system: U_M = %e g.", us.UnitMass_in_cgs);
+    message("Unit system: U_L = %e cm.", us.UnitLength_in_cgs);
+    message("Unit system: U_t = %e s.", us.UnitTime_in_cgs);
+    message("Unit system: U_I = %e A.", us.UnitCurrent_in_cgs);
+    message("Unit system: U_T = %e K.", us.UnitTemperature_in_cgs);
+    message("Density units: %e a^%f h^%f.",
+            conversionFactor(&us, UNIT_CONV_DENSITY),
+            aFactor(&us, UNIT_CONV_DENSITY), hFactor(&us, UNIT_CONV_DENSITY));
+    message("Entropy units: %e a^%f h^%f.",
+            conversionFactor(&us, UNIT_CONV_ENTROPY),
+            aFactor(&us, UNIT_CONV_ENTROPY), hFactor(&us, UNIT_CONV_ENTROPY));
+  }
 
-    /* Check whether an IC file has been provided */
-    if ( strcmp( ICfileName, "" ) == 0 )
-      error( "An IC file name must be provided via the option -f" );
+  /* Check whether an IC file has been provided */
+  if (strcmp(ICfileName, "") == 0)
+    error("An IC file name must be provided via the option -f");
 
-    /* Read particles and space information from (GADGET) IC */
-    tic = getticks();
-#if defined( WITH_MPI ) 
-#if defined( HAVE_PARALLEL_HDF5 )
-    read_ic_parallel( ICfileName , dim , &parts , &N , &periodic, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL );
+  /* Read particles and space information from (GADGET) IC */
+  tic = getticks();
+#if defined(WITH_MPI)
+#if defined(HAVE_PARALLEL_HDF5)
+  read_ic_parallel(ICfileName, dim, &parts, &N, &periodic, myrank, nr_nodes,
+                   MPI_COMM_WORLD, MPI_INFO_NULL);
 #else
-    read_ic_serial( ICfileName , dim , &parts , &N , &periodic, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL );
+  read_ic_serial(ICfileName, dim, &parts, &N, &periodic, myrank, nr_nodes,
+                 MPI_COMM_WORLD, MPI_INFO_NULL);
 #endif
 #else
-    read_ic_single( ICfileName , dim , &parts , &N , &periodic );
+  read_ic_single(ICfileName, dim, &parts, &N, &periodic);
 #endif
 
-    if ( myrank == 0 )
-        message( "reading particle properties took %.3f ms." , ((double)(getticks() - tic)) / CPU_TPS * 1000 ); fflush(stdout);
-    
-    /* Apply h scaling */
-    if( scaling != 1.0 )
-      for ( k = 0 ; k < N ; k++ )
-	    parts[k].h *= scaling;
-
-    /* Apply shift */
-    if(shift[0] !=0 || shift[1] !=0 || shift[2] !=0 )
-      for ( k = 0 ; k < N ; k++ ) {
-	    parts[k].x[0] += shift[0];
-	    parts[k].x[1] += shift[1];
-	    parts[k].x[2] += shift[2];
-      }
+  if (myrank == 0)
+    message("reading particle properties took %.3f ms.",
+            ((double)(getticks() - tic)) / CPU_TPS * 1000);
+  fflush(stdout);
+
+  /* Apply h scaling */
+  if (scaling != 1.0)
+    for (k = 0; k < N; k++) parts[k].h *= scaling;
+
+  /* Apply shift */
+  if (shift[0] != 0 || shift[1] != 0 || shift[2] != 0)
+    for (k = 0; k < N; k++) {
+      parts[k].x[0] += shift[0];
+      parts[k].x[1] += shift[1];
+      parts[k].x[2] += shift[2];
+    }
 
-    /* printParticle( parts , 10312237508790 , N );
-    printParticle( parts , 10312286091950 , N ); */
-    /* for ( k = 0 ; k < N ; k++ )
-	if ( parts[k].id == 10312286091950 ||
-	     parts[k].id == 10286889371446  ||
-	     parts[k].id == 9536045071298 ||
-	     parts[k].id == 12726778692106  ||
-	     parts[k].id == 9479892852626  ||
-	     parts[k].id == 9535843125514  ||
-	     parts[k].id == 14151507889834  ||
-	     parts[k].id == 14144038209438  ||
-	     parts[k].id == 14121890205050  ||
-	     parts[k].id == 5868762382714  ||
-	     parts[k].id == 12840527117206 ||
-	     parts[k].id == 10292087642778  ||
-	     parts[k].id == 9465178320650  ||
-	     parts[k].id == 2834846537770  ||
-	     parts[k].id == 9483000048314  ||
-	     parts[k].id == 10247332828902  ||
-	     parts[k].id == 10223834653674  ||
-	     parts[k].id == 16719632108962  ||
-	     parts[k].id == 16759192850622  ||
-	     parts[k].id == 9483599082554  ||
-	     parts[k].id == 10247340329226 )
-            parts[k] = parts[--N]; */
-
-    /* Set default number of queues. */
-    if ( nr_queues < 0 )
-        nr_queues = nr_threads;
-            
-    /* Initialize the space with this data. */
-    tic = getticks();
-    space_init( &s , dim , parts , N , periodic , h_max );
-    if ( myrank == 0 )
-        message( "space_init took %.3f ms." , ((double)(getticks() - tic)) / CPU_TPS * 1000 ); fflush(stdout);
-
-    /* Set the default time step to 1.0f. */
-    if ( myrank == 0 )
-        message( "dt_max is %e." , dt_max );
-    
-    /* Say a few nice things about the space we just created. */
-    if ( myrank == 0 ) {
-        message( "space dimensions are [ %.3f %.3f %.3f ]." , s.dim[0] , s.dim[1] , s.dim[2] );
-        message( "space %s periodic." , s.periodic ? "is" : "isn't" );
-        message( "highest-level cell dimensions are [ %i %i %i ]." , s.cdim[0] , s.cdim[1] , s.cdim[2] );
-        message( "%i parts in %i cells." , s.nr_parts , s.tot_cells );
-        message( "maximum depth is %d." , s.maxdepth );
-        // message( "cutoffs in [ %g %g ]." , s.h_min , s.h_max ); fflush(stdout);
-        }
-    
-    /* Verify that each particle is in it's propper cell. */
-    if ( myrank == 0 ) {
-        icount = 0;
-        space_map_cells_pre( &s , 0 , &map_cellcheck , &icount );
-        message( "map_cellcheck picked up %i parts." , icount );
-        }
-    
-    if ( myrank == 0 ) {
-        data[0] = s.maxdepth; data[1] = 0;
-        space_map_cells_pre( &s , 0 , &map_maxdepth , data );
-        message( "nr of cells at depth %i is %i." , data[0] , data[1] );
-        }
-    
-    /* Dump the particle positions. */
-    // space_map_parts( &s , &map_dump , shift );
-    
-    
-    /* Initialize the engine with this space. */
-    tic = getticks();
-    message( "nr_nodes is %i." , nr_nodes );
-    engine_init( &e , &s , dt_max , nr_threads , nr_queues , nr_nodes , myrank , ENGINE_POLICY | engine_policy_steal );
-    if ( myrank == 0 )
-        message( "engine_init took %.3f ms." , ((double)(getticks() - tic)) / CPU_TPS * 1000 ); fflush(stdout);
+
+  /* Set default number of queues. */
+  if (nr_queues < 0) nr_queues = nr_threads;
+
+  /* Initialize the space with this data. */
+  tic = getticks();
+  space_init(&s, dim, parts, N, periodic, h_max);
+  if (myrank == 0)
+    message("space_init took %.3f ms.",
+            ((double)(getticks() - tic)) / CPU_TPS * 1000);
+  fflush(stdout);
+
+  /* Set the default time step to 1.0f. */
+  if (myrank == 0) message("dt_max is %e.", dt_max);
+
+  /* Say a few nice things about the space we just created. */
+  if (myrank == 0) {
+    message("space dimensions are [ %.3f %.3f %.3f ].", s.dim[0], s.dim[1],
+            s.dim[2]);
+    message("space %s periodic.", s.periodic ? "is" : "isn't");
+    message("highest-level cell dimensions are [ %i %i %i ].", s.cdim[0],
+            s.cdim[1], s.cdim[2]);
+    message("%i parts in %i cells.", s.nr_parts, s.tot_cells);
+    message("maximum depth is %d.", s.maxdepth);
+    // message( "cutoffs in [ %g %g ]." , s.h_min , s.h_max ); fflush(stdout);
+  }
+
+  /* Verify that each particle is in it's propper cell. */
+  if (myrank == 0) {
+    icount = 0;
+    space_map_cells_pre(&s, 0, &map_cellcheck, &icount);
+    message("map_cellcheck picked up %i parts.", icount);
+  }
+
+  if (myrank == 0) {
+    data[0] = s.maxdepth;
+    data[1] = 0;
+    space_map_cells_pre(&s, 0, &map_maxdepth, data);
+    message("nr of cells at depth %i is %i.", data[0], data[1]);
+  }
+
+  /* Dump the particle positions. */
+  // space_map_parts( &s , &map_dump , shift );
+
+  /* Initialize the engine with this space. */
+  tic = getticks();
+  message("nr_nodes is %i.", nr_nodes);
+  engine_init(&e, &s, dt_max, nr_threads, nr_queues, nr_nodes, myrank,
+              ENGINE_POLICY | engine_policy_steal);
+  if (myrank == 0)
+    message("engine_init took %.3f ms.",
+            ((double)(getticks() - tic)) / CPU_TPS * 1000);
+  fflush(stdout);
 
 #ifdef WITH_MPI
-    /* Split the space. */
-    engine_split( &e , grid );
-    engine_redistribute ( &e );
+  /* Split the space. */
+  engine_split(&e, grid);
+  engine_redistribute(&e);
 #endif
 
-    message("Before write !");
-    /* Write the state of the system as it is before starting time integration. */
-    tic = getticks();
-#if defined( WITH_MPI ) 
-#if defined( HAVE_PARALLEL_HDF5 )
-    write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL);
+  message("Before write !");
+  /* Write the state of the system as it is before starting time integration. */
+  tic = getticks();
+#if defined(WITH_MPI)
+#if defined(HAVE_PARALLEL_HDF5)
+  write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD,
+                        MPI_INFO_NULL);
 #else
-    write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL);
+  write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL);
 #endif
 #else
-    write_output_single(&e, &us);
+  write_output_single(&e, &us);
 #endif
-    message( "writing particle properties took %.3f ms." , ((double)(getticks() - tic)) / CPU_TPS * 1000 ); fflush(stdout);
-    
-    /* Init the runner history. */
-    #ifdef HIST
-    for ( k = 0 ; k < runner_hist_N ; k++ )
-        runner_hist_bins[k] = 0;
-    #endif
-    
-    /* Inauguration speech. */
-    if ( runs < INT_MAX )
-        message( "starting for %i steps with %i threads and %i queues..." , runs , e.nr_threads , e.sched.nr_queues );
-    else
-        message( "starting for t=%.3e with %i threads and %i queues..." , clock , e.nr_threads , e.sched.nr_queues );
-    fflush(stdout);
-    
-    /* Set a target particle. */
-    /* long long int pid[5];
-    unsigned int seed = 6178;
-    for ( k = 0 ; k < 5 ; k++ )
-        pid[k] = s.gparts[ rand_r( &seed ) % N ].part->id;
-    for ( k = 0 ; k < 5 ; k++ )
-        pairs_single_grav( dim , pid[k] , s.gparts , N , 0 ); */
-    
-    /* Legend. */
-    if ( myrank == 0 )
-        printf( "# step time e_tot e_kin e_temp dt dt_step count dt_min dt_max\n" );
-    
-    /* Let loose a runner on the space. */
-    for ( j = 0 ; j < runs && e.time < clock ; j++ ) {
-    
-        /* Repartition the space amongst the nodes? */
-        #if defined(WITH_MPI) && defined(HAVE_METIS)
-            if ( j % 100 == 2 )
-                e.forcerepart = 1;
-        #endif
-        
-        /* Force a rebuild for testing. */
-        /* if ( j % 4 == 3 )
-            e.forcerebuild = 1; */
-        
-        // message( "starting run %i/%i (t=%.3e) with %i threads and %i queues..." , j+1 , runs , e.time , e.nr_threads , e.nr_queues ); fflush(stdout);
-        timers_reset( timers_mask_all );
-        #ifdef COUNTER
-            for ( k = 0 ; k < runner_counter_count ; k++ )
-                runner_counter[k] = 0;
-        #endif
-        
-        /* Take a step. */
-        engine_step( &e );
-        
-        if ( j % 100 == 0 )
-	  {
-
-#if defined( WITH_MPI ) 
-#if defined( HAVE_PARALLEL_HDF5 )
-	    write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL);
+  message("writing particle properties took %.3f ms.",
+          ((double)(getticks() - tic)) / CPU_TPS * 1000);
+  fflush(stdout);
+
+/* Init the runner history. */
+#ifdef HIST
+  for (k = 0; k < runner_hist_N; k++) runner_hist_bins[k] = 0;
+#endif
+
+  /* Inauguration speech. */
+  if (runs < INT_MAX)
+    message("starting for %i steps with %i threads and %i queues...", runs,
+            e.nr_threads, e.sched.nr_queues);
+  else
+    message("starting for t=%.3e with %i threads and %i queues...", clock,
+            e.nr_threads, e.sched.nr_queues);
+  fflush(stdout);
+
+
+  /* Legend. */
+  if (myrank == 0)
+    printf("# step time e_tot e_kin e_temp dt dt_step count dt_min dt_max\n");
+
+  /* Let loose a runner on the space. */
+  for (j = 0; j < runs && e.time < clock; j++) {
+
+/* Repartition the space amongst the nodes? */
+#if defined(WITH_MPI) && defined(HAVE_METIS)
+    if (j % 100 == 2) e.forcerepart = 1;
+#endif
+
+    timers_reset(timers_mask_all);
+#ifdef COUNTER
+    for (k = 0; k < runner_counter_count; k++) runner_counter[k] = 0;
+#endif
+
+    /* Take a step. */
+    engine_step(&e);
+
+    if (j % 100 == 0) {
+
+#if defined(WITH_MPI)
+#if defined(HAVE_PARALLEL_HDF5)
+      write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD,
+                            MPI_INFO_NULL);
 #else
-	    write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL);
+      write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD,
+                          MPI_INFO_NULL);
 #endif
 #else
-	    write_output_single(&e, &us);
+      write_output_single(&e, &us);
 #endif
+    }
 
-          }
-                
-        /* Dump a line of agregate output. */
-        if ( myrank == 0 ) {
-            printf( "%i %e %.16e %.16e %.16e %.3e %.3e %i %.3e %.3e" ,
-                j , e.time ,
-                e.ekin+e.epot , e.ekin , e.epot ,
-                e.dt , e.dt_step , e.count_step ,
-                e.dt_min , e.dt_max );
-            for ( k = 0 ; k < timer_count ; k++ )
-                printf( " %.3f" , ((double)timers[k])/CPU_TPS*1000 );
-            printf( "\n" ); fflush(stdout);
-            }
-        /* for ( k = 0 ; k < 5 ; k++ )
-            printgParticle( s.gparts , pid[k] , N ); */
-        
+    /* Dump a line of agregate output. */
+    if (myrank == 0) {
+      printf("%i %e %.16e %.16e %.16e %.3e %.3e %i %.3e %.3e", j, e.time,
+             e.ekin + e.epot, e.ekin, e.epot, e.dt, e.dt_step, e.count_step,
+             e.dt_min, e.dt_max);
+      for (k = 0; k < timer_count; k++)
+        printf(" %.3f", ((double)timers[k]) / CPU_TPS * 1000);
+      printf("\n");
+      fflush(stdout);
     }
-        
-    /* Print the values of the runner histogram. */
+    /* for ( k = 0 ; k < 5 ; k++ )
+        printgParticle( s.gparts , pid[k] , N ); */
+  }
+
+/* Print the values of the runner histogram. */
 #ifdef HIST
-    printf( "main: runner histogram data:\n" );
-    for ( k = 0 ; k < runner_hist_N ; k++ )
-      printf( " %e %e %e\n" ,
-	      runner_hist_a + k * (runner_hist_b - runner_hist_a) / runner_hist_N ,
-	      runner_hist_a + (k + 1) * (runner_hist_b - runner_hist_a) / runner_hist_N ,
-	      (double)runner_hist_bins[k] );
+  printf("main: runner histogram data:\n");
+  for (k = 0; k < runner_hist_N; k++)
+    printf(" %e %e %e\n",
+           runner_hist_a + k * (runner_hist_b - runner_hist_a) / runner_hist_N,
+           runner_hist_a +
+               (k + 1) * (runner_hist_b - runner_hist_a) / runner_hist_N,
+           (double)runner_hist_bins[k]);
 #endif
 
-    /* Loop over the parts directly. */
-    // for ( k = 0 ; k < N ; k++ )
-    //     printf( " %i %e %e\n" , s.parts[k].id , s.parts[k].count , s.parts[k].count_dh );
-    
-    /* Dump the task data. */
-    /* #ifdef WITH_MPI
-    for ( j = 0 ; j < nr_nodes ; j++ ) {
-    MPI_Barrier( MPI_COMM_WORLD );
-    if ( j == myrank ) {
-    printf( " %03i 0 0 0 0 %lli 0 0 0 0\n" , myrank , e.tic_step );
-    for ( k = 0 ; k < e.sched.nr_tasks ; k++ )
-        if ( !e.sched.tasks[k].skip && !e.sched.tasks[k].implicit )
-            printf( " %03i %i %i %i %i %lli %lli %i %i %i\n" ,
-                myrank ,
-                e.sched.tasks[k].rid , e.sched.tasks[k].type , e.sched.tasks[k].subtype ,
-                (e.sched.tasks[k].cj == NULL) , e.sched.tasks[k].tic , e.sched.tasks[k].toc ,
-		e.sched.tasks[k].ci->count , (e.sched.tasks[k].cj!=NULL)?e.sched.tasks[k].cj->count:0 , e.sched.tasks[k].flags); 
-    fflush(stdout);
-    sleep(1);
-    }
-    }
-    #else
-    for ( k = 0 ; k < e.sched.nr_tasks ; k++ )
-        if ( !e.sched.tasks[k].skip && !e.sched.tasks[k].implicit )
-                printf( " %i %i %i %i %lli %lli %i %i\n" ,
-                e.sched.tasks[k].rid , e.sched.tasks[k].type , e.sched.tasks[k].subtype , 
-                (e.sched.tasks[k].cj == NULL) , e.sched.tasks[k].tic , e.sched.tasks[k].toc ,
-                e.sched.tasks[k].ci->count , 
-                (e.sched.tasks[k].cj==NULL)?0:e.sched.tasks[k].cj->count ); 
-    #endif */
-    
-    /* Write final output. */
-#if defined( WITH_MPI ) 
-#if defined( HAVE_PARALLEL_HDF5 )
-    write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL);
+
+/* Dump the task data. */
+/* #ifdef WITH_MPI
+for ( j = 0 ; j < nr_nodes ; j++ ) {
+MPI_Barrier( MPI_COMM_WORLD );
+if ( j == myrank ) {
+printf( " %03i 0 0 0 0 %lli 0 0 0 0\n" , myrank , e.tic_step );
+for ( k = 0 ; k < e.sched.nr_tasks ; k++ )
+    if ( !e.sched.tasks[k].skip && !e.sched.tasks[k].implicit )
+        printf( " %03i %i %i %i %i %lli %lli %i %i %i\n" ,
+            myrank ,
+            e.sched.tasks[k].rid , e.sched.tasks[k].type ,
+e.sched.tasks[k].subtype ,
+            (e.sched.tasks[k].cj == NULL) , e.sched.tasks[k].tic ,
+e.sched.tasks[k].toc ,
+            e.sched.tasks[k].ci->count ,
+(e.sched.tasks[k].cj!=NULL)?e.sched.tasks[k].cj->count:0 ,
+e.sched.tasks[k].flags);
+fflush(stdout);
+sleep(1);
+}
+}
 #else
-    write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL);
+for ( k = 0 ; k < e.sched.nr_tasks ; k++ )
+    if ( !e.sched.tasks[k].skip && !e.sched.tasks[k].implicit )
+            printf( " %i %i %i %i %lli %lli %i %i\n" ,
+            e.sched.tasks[k].rid , e.sched.tasks[k].type ,
+e.sched.tasks[k].subtype ,
+            (e.sched.tasks[k].cj == NULL) , e.sched.tasks[k].tic ,
+e.sched.tasks[k].toc ,
+            e.sched.tasks[k].ci->count ,
+            (e.sched.tasks[k].cj==NULL)?0:e.sched.tasks[k].cj->count );
+#endif */
+
+/* Write final output. */
+#if defined(WITH_MPI)
+#if defined(HAVE_PARALLEL_HDF5)
+  write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD,
+                        MPI_INFO_NULL);
+#else
+  write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL);
 #endif
 #else
-    write_output_single(&e, &us);
+  write_output_single(&e, &us);
 #endif
-        
+
 #ifdef WITH_MPI
-    if ( MPI_Finalize() != MPI_SUCCESS )
-      error( "call to MPI_Finalize failed with error %i." , res );
+  if (MPI_Finalize() != MPI_SUCCESS)
+    error("call to MPI_Finalize failed with error %i.", res);
 #endif
-    
-    /* Say goodbye. */
-    message( "done." );
-    
-    /* All is calm, all is bright. */
-    return 0;
-    
+
+  /* Say goodbye. */
+  message("done.");
+
+  /* All is calm, all is bright. */
+  return 0;
 }