diff --git a/src/debug.c b/src/debug.c
index 93f0136f0d1ffa60dfd69e6e3ddf9ef483b21958..716fa66934f988196b9ebfa8f037c4c8c28d92d2 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -58,7 +58,7 @@ void printParticle(struct part *parts, struct xpart *xparts, long long int id,
   /* Look for the particle. */
   for (size_t i = 0; i < N; i++)
     if (parts[i].id == id) {
-      printf("## Particle[%d]:\n id=%lld", i, parts[i].id);
+      printf("## Particle[%zd]:\n id=%lld", i, parts[i].id);
       hydro_debug_particle(&parts[i], &xparts[i]);
       found = 1;
       break;
@@ -75,7 +75,7 @@ void printgParticle(struct gpart *parts, long long int id, int N) {
   for (size_t i = 0; i < N; i++)
     if (parts[i].id == -id || (parts[i].id > 0 && parts[i].part->id == id)) {
       printf(
-          "## gParticle[%d]: id=%lld, x=[%.16e,%.16e,%.16e], "
+          "## gParticle[%zd]: id=%lld, x=[%.16e,%.16e,%.16e], "
           "v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], m=%.3e, t_begin=%d, "
           "t_end=%d\n",
           i, parts[i].part->id, parts[i].x[0], parts[i].x[1], parts[i].x[2],
diff --git a/src/engine.c b/src/engine.c
index cc69711bf1021263f1f2b0dffb42a631ba6db787..d75cef107c8effe8a24df01e94c660863b6be8ce 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -295,344 +295,17 @@ void engine_repartition(struct engine *e) {
 
 #if defined(WITH_MPI) && defined(HAVE_METIS)
 
-  int res;
-  idx_t *inds, *nodeIDs;
-  idx_t *weights_v = NULL, *weights_e = NULL;
-  struct space *s = e->s;
-  int nr_cells = s->nr_cells, my_cells = 0;
-  struct cell *cells = s->cells;
-  int ind[3], *cdim = s->cdim;
-  struct task *tasks = e->sched.tasks;
-  int nr_nodes = e->nr_nodes, nodeID = e->nodeID;
-  float wscale = 1e-3, vscale = 1e-3, wscale_buff;
-  idx_t wtot = 0;
-  idx_t wmax = 1e9 / e->nr_nodes;
-  idx_t wmin;
-
   /* Clear the repartition flag. */
   enum repartition_type reparttype = e->forcerepart;
   e->forcerepart = REPART_NONE;
 
   /* Nothing to do if only using a single node. Also avoids METIS
    * bug that doesn't handle this case well. */
-  if (nr_nodes == 1) return;
-
-  /* Allocate the inds and weights. */
-  if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL ||
-      (weights_v = (idx_t *)malloc(sizeof(idx_t) *nr_cells)) == NULL ||
-      (weights_e = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL ||
-      (nodeIDs = (idx_t *)malloc(sizeof(idx_t) * nr_cells)) == NULL)
-    error("Failed to allocate inds and weights arrays.");
-
-  /* Fill the inds array. */
-  for (int cid = 0; cid < nr_cells; cid++) {
-    ind[0] = cells[cid].loc[0] / s->cells[cid].h[0] + 0.5;
-    ind[1] = cells[cid].loc[1] / s->cells[cid].h[1] + 0.5;
-    ind[2] = cells[cid].loc[2] / s->cells[cid].h[2] + 0.5;
-    int l = 0;
-    for (int i = -1; i <= 1; i++) {
-      int ii = ind[0] + i;
-      if (ii < 0)
-        ii += cdim[0];
-      else if (ii >= cdim[0])
-        ii -= cdim[0];
-      for (int j = -1; j <= 1; j++) {
-        int jj = ind[1] + j;
-        if (jj < 0)
-          jj += cdim[1];
-        else if (jj >= cdim[1])
-          jj -= cdim[1];
-        for (int k = -1; k <= 1; k++) {
-          int kk = ind[2] + k;
-          if (kk < 0)
-            kk += cdim[2];
-          else if (kk >= cdim[2])
-            kk -= cdim[2];
-          if (i || j || k) {
-            inds[cid * 26 + l] = cell_getid(cdim, ii, jj, kk);
-            l += 1;
-          }
-        }
-      }
-    }
-  }
-
-  /* Init the weights arrays. */
-  bzero(weights_e, sizeof(idx_t) * 26 * nr_cells);
-  bzero(weights_v, sizeof(idx_t) * nr_cells);
-
-  /* Loop over the tasks... */
-  for (int j = 0; j < e->sched.nr_tasks; j++) {
-
-    /* Get a pointer to the kth task. */
-    const struct task *t = &tasks[j];
-
-    /* Skip un-interesting tasks. */
-    if (t->type != task_type_self && t->type != task_type_pair &&
-        t->type != task_type_sub && t->type != task_type_ghost &&
-        t->type != task_type_kick1 && t->type != task_type_kick2)
-      continue;
-
-    /* Get the task weight. */
-    idx_t w = (t->toc - t->tic) * wscale;
-    if (w < 0) error("Bad task weight (%" SCIDX ").", w);
-
-    /* Do we need to re-scale? */
-    wtot += w;
-    while (wtot > wmax) {
-      wscale /= 2;
-      wtot /= 2;
-      w /= 2;
-      for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= 0.5;
-      for (int k = 0; k < nr_cells; k++) weights_v[k] *= 0.5;
-    }
-
-    /* Get the top-level cells involved. */
-    const struct cell *ci, *cj = NULL;
-    for (ci = t->ci; ci->parent != NULL; ci = ci->parent)
-      ;
-    if (t->cj != NULL)
-      for (cj = t->cj; cj->parent != NULL; cj = cj->parent)
-        ;
-
-    /* Get the cell IDs. */
-    const int cid = ci - cells;
-
-    /* Different weights for different tasks. */
-    if (t->type == task_type_ghost || t->type == task_type_kick1 ||
-        t->type == task_type_kick2) {
-
-      /* Particle updates add only to vertex weight. */
-      weights_v[cid] += w;
-
-    }
-
-    /* Self interaction? */
-    else if ((t->type == task_type_self && ci->nodeID == nodeID) ||
-             (t->type == task_type_sub && cj == NULL && ci->nodeID == nodeID)) {
-
-      /* Self interactions add only to vertex weight. */
-      weights_v[cid] += w;
-
-    }
-
-    /* Pair? */
-    else if (t->type == task_type_pair ||
-             (t->type == task_type_sub && cj != NULL)) {
-
-      /* In-cell pair? */
-      if (ci == cj) {
-
-        /* Add weight to vertex for ci. */
-        weights_v[cid] += w;
-
-      }
-
-      /* Distinct cells with local ci? */
-      else if (ci->nodeID == nodeID) {
-
-        /* Index of the jth cell. */
-        const int cjd = cj - cells;
-
-        /* Add half of weight to each cell. */
-        if (ci->nodeID == nodeID) weights_v[cid] += 0.5 * w;
-        if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w;
-
-        /* Add Weight to edge. */
-        int k = 26 * cid;
-        while (inds[k] != cjd) k++;
-        weights_e[k] += w;
-        k = 26 * cjd;
-        while (inds[k] != cid) k++;
-        weights_e[k] += w;
-      }
-    }
-  }
+  if (e->nr_nodes == 1) return;
 
-  /* Get the minimum scaling and re-scale if necessary. */
-  if ((res = MPI_Allreduce(&wscale, &wscale_buff, 1, MPI_FLOAT, MPI_MIN,
-                           MPI_COMM_WORLD)) != MPI_SUCCESS) {
-    char buff[MPI_MAX_ERROR_STRING];
-    int len;
-    MPI_Error_string(res, buff, &len);
-    error("Failed to allreduce the weight scale (%s).", buff);
-  }
-  if (wscale_buff != wscale) {
-    float scale = wscale_buff / wscale;
-    for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= scale;
-    for (int k = 0; k < nr_cells; k++) weights_v[k] *= scale;
-  }
-
-/* Merge the weights arrays across all nodes. */
-#if IDXTYPEWIDTH == 32
-  if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
-                        nr_cells, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) !=
-      MPI_SUCCESS) {
-#else
-  if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
-                        nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0,
-                        MPI_COMM_WORLD)) != MPI_SUCCESS) {
-#endif
-    char buff[MPI_MAX_ERROR_STRING];
-    int len;
-    MPI_Error_string(res, buff, &len);
-    error("Failed to allreduce vertex weights (%s).", buff);
-  }
-#if IDXTYPEWIDTH == 32
-  if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
-                 26 * nr_cells, MPI_INT, MPI_SUM, 0,
-                 MPI_COMM_WORLD) != MPI_SUCCESS)
-#else
-  if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
-                 26 * nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0,
-                 MPI_COMM_WORLD) != MPI_SUCCESS)
-#endif
-    error("Failed to allreduce edge weights.");
-
-  /* As of here, only one node needs to compute the partition. */
-  if (nodeID == 0) {
-
-    /* Final rescale of all weights to avoid a large range. Large ranges have
-     * been seen to cause an incomplete graph. */
-    wmin = wmax;
-    wmax = 0.0;
-    for (int k = 0; k < 26 * nr_cells; k++) {
-      wmax = weights_e[k] > wmax ? weights_e[k] : wmax;
-      wmin = weights_e[k] < wmin ? weights_e[k] : wmin;
-    }
-    if ((wmax - wmin) > engine_maxmetisweight) {
-      wscale = engine_maxmetisweight / (wmax - wmin);
-      for (int k = 0; k < 26 * nr_cells; k++) {
-        weights_e[k] = (weights_e[k] - wmin) * wscale + 1;
-      }
-      for (int k = 0; k < nr_cells; k++) {
-        weights_v[k] = (weights_v[k] - wmin) * wscale + 1;
-      }
-    }
-
-    /* Check that the edge weights are fully symmetric. */
-    /* for ( cid = 0 ; cid < nr_cells ; cid++ )
-        for ( k = 0 ; k < 26 ; k++ ) {
-            cjd = inds[ cid*26 + k ];
-            for ( j = 26*cjd ; inds[j] != cid ; j++ );
-            if ( weights_e[ cid*26+k ] != weights_e[ j ] )
-                error( "Unsymmetric edge weights detected (%i vs %i)." ,
-       weights_e[ cid*26+k ] , weights_e[ j ] );
-            } */
-    /* int w_min = weights_e[0], w_max = weights_e[0], w_tot = weights_e[0];
-    for ( k = 1 ; k < 26*nr_cells ; k++ ) {
-        w_tot += weights_e[k];
-        if ( weights_e[k] < w_min )
-            w_min = weights_e[k];
-        else if ( weights_e[k] > w_max )
-            w_max = weights_e[k];
-        }
-    message( "edge weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot );
-    w_min = weights_e[0], w_max = weights_e[0]; w_tot = weights_v[0];
-    for ( k = 1 ; k < nr_cells ; k++ ) {
-        w_tot += weights_v[k];
-        if ( weights_v[k] < w_min )
-            w_min = weights_v[k];
-        else if ( weights_v[k] > w_max )
-            w_max = weights_v[k];
-        }
-    message( "vertex weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot );
-    */
-
-    /* Make sure there are no zero weights. */
-    for (int k = 0; k < 26 * nr_cells; k++)
-      if (weights_e[k] == 0) weights_e[k] = 1;
-    for (int k = 0; k < nr_cells; k++)
-      if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1;
-
-    /* Allocate and fill the connection array. */
-    idx_t *offsets;
-    if ((offsets = (idx_t *)malloc(sizeof(idx_t) * (nr_cells + 1))) == NULL)
-      error("Failed to allocate offsets buffer.");
-    offsets[0] = 0;
-    for (int k = 0; k < nr_cells; k++) offsets[k + 1] = offsets[k] + 26;
-
-    /* Set the METIS options. +1 to keep the GCC sanitizer happy. */
-    idx_t options[METIS_NOPTIONS + 1];
-    METIS_SetDefaultOptions(options);
-    options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT;
-    options[METIS_OPTION_NUMBERING] = 0;
-    options[METIS_OPTION_CONTIG] = 1;
-    options[METIS_OPTION_NCUTS] = 10;
-    options[METIS_OPTION_NITER] = 20;
-    // options[ METIS_OPTION_UFACTOR ] = 1;
-
-    /* Set the initial partition, although this is probably ignored. */
-    for (int k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID;
-
-    /* Call METIS. */
-    idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes;
-    idx_t objval;
-
-    /* Dump graph in METIS format */
-    /*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds,
-                   weights_v, NULL, weights_e);*/
-
-    if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v,
-                                 NULL, weights_e, &idx_nr_nodes, NULL, NULL,
-                                 options, &objval, nodeIDs) != METIS_OK)
-      error("Call to METIS_PartGraphRecursive failed.");
-
-    /* Dump the 3d array of cell IDs. */
-    /* printf( "engine_repartition: nodeIDs = reshape( [" );
-    for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ )
-        printf( "%i " , (int)nodeIDs[ i ] );
-    printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */
-
-    /* Check that the nodeIDs are ok. */
-    for (int k = 0; k < nr_cells; k++) {
-      if (nodeIDs[k] < 0 || nodeIDs[k] >= nr_nodes)
-        error("Got bad nodeID %" PRIDX " for cell %i.", nodeIDs[k], k);
-    }
-
-    /* Check that the partition is complete and all nodes have some work. */
-    int present[nr_nodes];
-    int failed = 0;
-    for (int i = 0; i < nr_nodes; i++) present[i] = 0;
-    for (int i = 0; i < nr_cells; i++) present[nodeIDs[i]]++;
-    for (int i = 0; i < nr_nodes; i++) {
-      if (!present[i]) {
-        failed = 1;
-        message("Node %d is not present after repartition", i);
-      }
-    }
-
-    /* If partition failed continue with the current one, but make this
-     * clear. */
-    if (failed) {
-      message(
-          "WARNING: METIS repartition has failed, continuing with "
-          "the current partition, load balance will not be optimal");
-      for (int k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID;
-    }
-  }
-
-/* Broadcast the result of the partition. */
-#if IDXTYPEWIDTH == 32
-  if (MPI_Bcast(nodeIDs, nr_cells, MPI_INT, 0, MPI_COMM_WORLD) != MPI_SUCCESS)
-    error("Failed to bcast the node IDs.");
-#else
-  if (MPI_Bcast(nodeIDs, nr_cells, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD) !=
-      MPI_SUCCESS)
-    error("Failed to bcast the node IDs.");
-#endif
-
-  /* Set the cell nodeIDs and clear any non-local parts. */
-  for (int k = 0; k < nr_cells; k++) {
-    cells[k].nodeID = nodeIDs[k];
-    if (nodeIDs[k] == nodeID) my_cells += 1;
-  }
-
-  /* Clean up. */
-  free(inds);
-  free(weights_v);
-  free(weights_e);
-  free(nodeIDs);
+  /* Do the repartitioning. */
+  partition_repartition(reparttype, e->nodeID, e->nr_nodes, e->s,
+                        e->sched.tasks, e->sched.nr_tasks);
 
   /* Now comes the tricky part: Exchange particles between all nodes.
      This is done in two steps, first allreducing a matrix of
@@ -687,6 +360,7 @@ void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up,
 
 void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) {
 
+#ifdef WITH_MPI
   struct link *l = NULL;
   struct scheduler *s = &e->sched;
 
@@ -740,6 +414,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) {
 void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
                           struct task *t_rho) {
 
+#ifdef WITH_MPI
   struct scheduler *s = &e->sched;
 
   /* Do we need to construct a recv task? */
@@ -1321,7 +996,7 @@ int engine_marktasks(struct engine *e) {
   struct scheduler *s = &e->sched;
   const int nr_tasks = s->nr_tasks, *ind = s->tasks_ind;
   struct task *tasks = s->tasks;
-  const float dt_step = e->dt_step;
+  float ti_end = e->ti_current;
 
   /* Much less to do here if we're on a fixed time-step. */
   if ((e->policy & engine_policy_fixdt) == engine_policy_fixdt) {
@@ -1456,8 +1131,8 @@ void engine_print(struct engine *e) {
 
   /* Count and print the number of each task type. */
   int counts[task_type_count + 1];
-  for (k = 0; k <= task_type_count; k++) counts[k] = 0;
-  for (k = 0; k < sched->nr_tasks; k++)
+  for (int k = 0; k <= task_type_count; k++) counts[k] = 0;
+  for (int k = 0; k < sched->nr_tasks; k++)
     if (!sched->tasks[k].skip)
       counts[(int)sched->tasks[k].type] += 1;
     else
@@ -1468,7 +1143,7 @@ void engine_print(struct engine *e) {
 #else
   printf("engine_print: task counts are [ %s=%i", taskID_names[0], counts[0]);
 #endif
-  for (k = 1; k < task_type_count; k++)
+  for (int k = 1; k < task_type_count; k++)
     printf(" %s=%i", taskID_names[k], counts[k]);
   printf(" skipped=%i ]\n", counts[task_type_count]);
   fflush(stdout);