diff --git a/autom4te.cache/requests b/autom4te.cache/requests
index 5f6702525b223f35d76288fa655f62b87988382b..7675eba62587b513c5752f30ea1140a2981e22c3 100644
--- a/autom4te.cache/requests
+++ b/autom4te.cache/requests
@@ -58,9 +58,9 @@
                         '_LT_AC_LANG_CXX_CONFIG' => 1,
                         'AM_PROG_MKDIR_P' => 1,
                         'DX_IF_FEATURE' => 1,
+                        'DX_FEATURE_doc' => 1,
                         'AM_AUTOMAKE_VERSION' => 1,
                         'DX_CHM_FEATURE' => 1,
-                        'DX_FEATURE_doc' => 1,
                         'DX_PDF_FEATURE' => 1,
                         'AM_SUBST_NOTMAKE' => 1,
                         'AM_MISSING_PROG' => 1,
diff --git a/examples/test_bh.c b/examples/test_bh.c
index a7e2a9cb6912597369b633bcf85d6eed84ae6388..4c410a518c64d43e948fc4acbe6d6a3bfa95c062 100644
--- a/examples/test_bh.c
+++ b/examples/test_bh.c
@@ -1,22 +1,22 @@
 /*******************************************************************************
  * This file is part of QuickSched.
- * Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
- * 
+ * Coypright (c) 2014 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
  * by the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- * 
- ******************************************************************************/
-
+ *
+* *****************************************************************************/
 
 /* Config parameters. */
 #include "../config.h"
@@ -27,98 +27,178 @@
 #include <string.h>
 #include <unistd.h>
 #include <math.h>
+#include <float.h>
+#include <limits.h>
 #include <omp.h>
+#include <fenv.h>
 
 /* Local includes. */
 #include "quicksched.h"
 
-
 /* Some local constants. */
-#define cell_pool_grow 100
+#define cell_pool_grow 1000
 #define cell_maxparts 100
-#define task_limit 5000
-#define const_G 6.6738e-11
-#define dist_min 0.5
+#define task_limit 1e8
+#define const_G 1    // 6.6738e-8
+#define dist_min 0.5 /* Used for legacy walk only */
+#define dist_cutoff_ratio 1.5
 
+#define ICHECK -1
+#define NO_SANITY_CHECKS
+#define NO_COM_AS_TASK
+#define NO_COUNTERS
 
 /** Data structure for the particles. */
 struct part {
-    double x[3];
-    double a[3];
-    double mass;
-    int id;
-    };
-   
-    
+  double x[3];
+  // union {
+  float a[3];
+  float a_legacy[3];
+  float a_exact[3];
+  // };
+  float mass;
+  int id;
+};  // __attribute__((aligned(64)));
+
+struct multipole {
+  double com[3];
+  float mass;
+};
+
 /** Data structure for the BH tree cell. */
 struct cell {
-    double loc[3];
-    double h[3];
-    int split, count;
-    struct part *parts;
-    struct cell *progeny[8];
-    struct cell *parent;
-    double com[3];
-    double mass;
-    int res, com_tid;
-    };
-    
-    
+  double loc[3];
+  double h;
+
+  int count;
+  unsigned short int split, sorted;
+  struct part *parts;
+
+  struct cell *firstchild; /* Next node if opening */
+  struct cell *sibling;    /* Next node */
+
+  /* We keep both CoMs and masses to make sure the comp_com calculation is
+   * correct (use an anonymous union to keep variable names compact).  */
+  union {
+
+    /* Information for the legacy walk */
+    struct multipole legacy;
+
+    /* Information for the QuickShed walk */
+    struct multipole new;
+  };
+
+  int res, com_tid;
+  struct index *indices;
+
+} __attribute__((aligned(128)));
+
 /** Task types. */
 enum task_type {
-    task_type_self = 0,
-    task_type_pair,
-    task_type_pair_pc,
-    task_type_com,
-    task_type_count
-    };
-    
+  task_type_self = 0,
+  task_type_pair,
+  task_type_self_pc,
+  task_type_com,
+  task_type_count
+};
+
+#ifdef COUNTERS
+int count_direct_unsorted;
+int count_direct_sorted_pp;
+int count_direct_sorted_pm_i;
+int count_direct_sorted_pm_j;
+#endif
+
 /** Per-type timers. */
-ticks task_timers[ task_type_count ];
-    
-    
+ticks task_timers[task_type_count];
+
 /** Global variable for the pool of allocated cells. */
 struct cell *cell_pool = NULL;
 
-
 /**
  * @brief Get a #cell from the pool.
  */
- 
-struct cell *cell_get ( ) {
+struct cell *cell_get() {
 
-    struct cell *res;
-    int k;
+  struct cell *res;
+  int k;
 
-    /* Allocate a new batch? */
-    if ( cell_pool == NULL ) {
-    
-        /* Allocate the cell array. */
-        if ( ( cell_pool = (struct cell *)malloc( sizeof(struct cell) * cell_pool_grow ) ) == NULL )
-            error( "Failed to allocate fresh batch of cells." );
-            
-        /* Clear the cells. */
-        bzero( cell_pool , sizeof(struct cell) * cell_pool_grow );
-        
-        /* Link them up via their progeny pointers. */
-        for ( k = 1 ; k < 100 ; k++ )
-            cell_pool[k-1].progeny[0] = &cell_pool[k];
-    
-        }
-        
-    /* Pick a cell off the pool. */
-    res = cell_pool;
-    cell_pool = cell_pool->progeny[0];
-    
-    /* Clean up a few things. */
-    res->res = qsched_res_none;
-    
-    /* Return the cell. */
-    return res;
+  /* Allocate a new batch? */
+  if (cell_pool == NULL) {
+
+    /* Allocate the cell array. */
+    if ((cell_pool =
+             (struct cell *)calloc(cell_pool_grow, sizeof(struct cell))) ==
+        NULL)
+      error("Failed to allocate fresh batch of cells.");
+
+    /* Link them up via their progeny pointers. */
+    for (k = 1; k < cell_pool_grow; k++)
+      cell_pool[k - 1].firstchild = &cell_pool[k];
+  }
+
+  /* Pick a cell off the pool. */
+  res = cell_pool;
+  cell_pool = cell_pool->firstchild;
+
+  /* Clean up a few things. */
+  res->res = qsched_res_none;
+  res->firstchild = 0;
+
+  /* Return the cell. */
+  return res;
+}
 
+/**
+ * @brief Compute the center of mass of a given cell.
+ *
+ * @param c The #cell.
+ */
+void comp_com(struct cell *c) {
+
+  int k, count = c->count;
+  struct cell *cp;
+  struct part *parts = c->parts;
+  double com[3] = {0.0, 0.0, 0.0}, mass = 0.0;
+
+  if (c->split) {
+
+    /* Loop over the projenitors and collect the multipole information. */
+    for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
+      float cp_mass = cp->new.mass;
+      com[0] += cp->new.com[0] * cp_mass;
+      com[1] += cp->new.com[1] * cp_mass;
+      com[2] += cp->new.com[2] * cp_mass;
+      mass += cp_mass;
     }
-    
-    
+
+    /* Otherwise, collect the multipole from the particles. */
+  } else {
+
+    for (k = 0; k < count; k++) {
+      float p_mass = parts[k].mass;
+      com[0] += parts[k].x[0] * p_mass;
+      com[1] += parts[k].x[1] * p_mass;
+      com[2] += parts[k].x[2] * p_mass;
+      mass += p_mass;
+    }
+  }
+
+  /* Store the COM data, if any was collected. */
+  if (mass > 0.0) {
+    float imass = 1.0f / mass;
+    c->new.com[0] = com[0] * imass;
+    c->new.com[1] = com[1] * imass;
+    c->new.com[2] = com[2] * imass;
+    c->new.mass = mass;
+  } else {
+    c->new.com[0] = 0.0;
+    c->new.com[1] = 0.0;
+    c->new.com[2] = 0.0;
+    c->new.mass = 0.0;
+  }
+}
+
 /**
  * @brief Sort the parts into eight bins along the given pivots and
  *        fill the multipoles. Also adds the hierarchical resources
@@ -128,369 +208,614 @@ struct cell *cell_get ( ) {
  * @param N The total number of parts.
  * @param s The #sched to store the resources.
  */
- 
-void cell_split ( struct cell *c , struct qsched *s ) {
-
-    int i, j, k, count = c->count;
-    struct part temp, *parts = c->parts;
-    struct cell *cp;
-    int left[8], right[8];
-    double pivot[3];
-    static struct cell *root = NULL;
-    
-    /* Set the root cell. */
-    if ( root == NULL )
-        root = c;
-    
-    /* Add a resource for this cell if it doesn't have one yet. */
-    if ( c->res == qsched_res_none ) 
-        c->res = qsched_addres( s , qsched_owner_none , qsched_res_none );
-    
-    /* Attach a center-of-mass task to the cell. */
-    c->com_tid = qsched_addtask( s , task_type_com , task_flag_none , &c , sizeof(struct cell *) , 1 );
-    
-    /* Does this cell need to be split? */
-    if ( count > cell_maxparts ) {
-    
-        /* Mark this cell as split. */
-        c->split = 1;
-    
-        /* Create the progeny. */
-        for ( k = 0 ; k < 8 ; k++ ) {
-            c->progeny[k] = cp = cell_get();
-            cp->parent = c;
-            cp->loc[0] = c->loc[0];
-            cp->loc[1] = c->loc[1];
-            cp->loc[2] = c->loc[2];
-            cp->h[0] = c->h[0]/2;
-            cp->h[1] = c->h[1]/2;
-            cp->h[2] = c->h[2]/2;
-            cp->res = qsched_addres( s , qsched_owner_none , c->res );
-            if ( k & 4 )
-                cp->loc[0] += cp->h[0];
-            if ( k & 2 )
-                cp->loc[1] += cp->h[1];
-            if ( k & 1 )
-                cp->loc[2] += cp->h[2];
-            }
-    
-        /* Init the pivots. */
-        for ( k = 0 ; k < 3 ; k++ )
-            pivot[k] = c->loc[k] + c->h[k]/2;
-
-        /* Split along the x-axis. */
-        i = 0; j = count - 1;
-        while ( i <= j ) {
-            while ( i <= count-1 && parts[i].x[0] <= pivot[0] )
-                i += 1;
-            while ( j >= 0 && parts[j].x[0] > pivot[0] )
-                j -= 1;
-            if ( i < j ) {
-                temp = parts[i]; parts[i] = parts[j]; parts[j] = temp;
-                }
-            }
-        left[1] = i; right[1] = count - 1;
-        left[0] = 0; right[0] = j;
-
-        /* Split along the y axis, twice. */
-        for ( k = 1 ; k >= 0 ; k-- ) {
-            i = left[k]; j = right[k];
-            while ( i <= j ) {
-                while ( i <= right[k] && parts[i].x[1] <= pivot[1] )
-                    i += 1;
-                while ( j >= left[k] && parts[j].x[1] > pivot[1] )
-                    j -= 1;
-                if ( i < j ) {
-                    temp = parts[i]; parts[i] = parts[j]; parts[j] = temp;
-                    }
-                }
-            left[2*k+1] = i; right[2*k+1] = right[k];
-            left[2*k] = left[k]; right[2*k] = j;
-            }
-
-        /* Split along the z axis, four times. */
-        for ( k = 3 ; k >= 0 ; k-- ) {
-            i = left[k]; j = right[k];
-            while ( i <= j ) {
-                while ( i <= right[k] && parts[i].x[2] <= pivot[2] )
-                    i += 1;
-                while ( j >= left[k] && parts[j].x[2] > pivot[2] )
-                    j -= 1;
-                if ( i < j ) {
-                    temp = parts[i]; parts[i] = parts[j]; parts[j] = temp;
-                    }
-                }
-            left[2*k+1] = i; right[2*k+1] = right[k];
-            left[2*k] = left[k]; right[2*k] = j;
-            }
-
-        /* Store the counts and offsets. */
-        for ( k = 0 ; k < 8 ; k++ ) {
-            c->progeny[k]->count = right[k] - left[k] + 1;
-            c->progeny[k]->parts = &c->parts[ left[k] ];
-            }
-            
-        /* Recurse. */
-        for ( k = 0 ; k < 8 ; k++ )
-            cell_split( c->progeny[k] , s );
-            
-        /* Link the COM tasks. */
-        for ( k = 0 ; k < 8 ; k++ )
-            qsched_addunlock( s , c->progeny[k]->com_tid , c->com_tid );
-            
-        } /* does the cell need to be split? */
-        
-    /* Set this cell's resources ownership. */
-    qsched_res_own( s , c->res , s->nr_queues * ( c->parts - root->parts ) / root->count );
-        
+void cell_split(struct cell *c, struct qsched *s) {
+
+  int i, j, k, kk, count = c->count;
+  struct part temp, *parts = c->parts;
+  struct cell *cp;
+  int left[8], right[8];
+  double pivot[3];
+  static struct cell *root = NULL;
+  struct cell *progenitors[8];
+
+  /* Set the root cell. */
+  if (root == NULL) {
+    root = c;
+    c->sibling = 0;
+  }
+
+  /* Add a resource for this cell if it doesn't have one yet. */
+  if (c->res == qsched_res_none)
+    c->res = qsched_addres(s, qsched_owner_none, qsched_res_none);
+
+/* Attach a center-of-mass task to the cell. */
+#ifdef COM_AS_TASK
+  if (count > 0)
+    c->com_tid = qsched_addtask(s, task_type_com, task_flag_none, &c,
+                                sizeof(struct cell *), 1);
+#endif
+
+  /* Does this cell need to be split? */
+  if (count > cell_maxparts) {
+
+    /* Mark this cell as split. */
+    c->split = 1;
+
+    /* Create the progeny. */
+    for (k = 0; k < 8; k++) {
+      progenitors[k] = cp = cell_get();
+      cp->loc[0] = c->loc[0];
+      cp->loc[1] = c->loc[1];
+      cp->loc[2] = c->loc[2];
+      cp->h = c->h * 0.5;
+      cp->res = qsched_addres(s, qsched_owner_none, c->res);
+      if (k & 4) cp->loc[0] += cp->h;
+      if (k & 2) cp->loc[1] += cp->h;
+      if (k & 1) cp->loc[2] += cp->h;
     }
-    
-    
+
+    /* Init the pivots. */
+    for (k = 0; k < 3; k++) pivot[k] = c->loc[k] + c->h * 0.5;
+
+    /* Split along the x-axis. */
+    i = 0;
+    j = count - 1;
+    while (i <= j) {
+      while (i <= count - 1 && parts[i].x[0] < pivot[0]) i += 1;
+      while (j >= 0 && parts[j].x[0] >= pivot[0]) j -= 1;
+      if (i < j) {
+        temp = parts[i];
+        parts[i] = parts[j];
+        parts[j] = temp;
+      }
+    }
+    left[1] = i;
+    right[1] = count - 1;
+    left[0] = 0;
+    right[0] = j;
+
+    /* Split along the y axis, twice. */
+    for (k = 1; k >= 0; k--) {
+      i = left[k];
+      j = right[k];
+      while (i <= j) {
+        while (i <= right[k] && parts[i].x[1] < pivot[1]) i += 1;
+        while (j >= left[k] && parts[j].x[1] >= pivot[1]) j -= 1;
+        if (i < j) {
+          temp = parts[i];
+          parts[i] = parts[j];
+          parts[j] = temp;
+        }
+      }
+      left[2 * k + 1] = i;
+      right[2 * k + 1] = right[k];
+      left[2 * k] = left[k];
+      right[2 * k] = j;
+    }
+
+    /* Split along the z axis, four times. */
+    for (k = 3; k >= 0; k--) {
+      i = left[k];
+      j = right[k];
+      while (i <= j) {
+        while (i <= right[k] && parts[i].x[2] < pivot[2]) i += 1;
+        while (j >= left[k] && parts[j].x[2] >= pivot[2]) j -= 1;
+        if (i < j) {
+          temp = parts[i];
+          parts[i] = parts[j];
+          parts[j] = temp;
+        }
+      }
+      left[2 * k + 1] = i;
+      right[2 * k + 1] = right[k];
+      left[2 * k] = left[k];
+      right[2 * k] = j;
+    }
+
+    /* Store the counts and offsets. */
+    for (k = 0; k < 8; k++) {
+      progenitors[k]->count = right[k] - left[k] + 1;
+      progenitors[k]->parts = &c->parts[left[k]];
+    }
+
+    /* Find the first non-empty progenitor */
+    for (k = 0; k < 8; k++)
+      if (progenitors[k]->count > 0) {
+        c->firstchild = progenitors[k];
+        break;
+      }
+
+#ifdef SANITY_CHECKS
+    if (c->firstchild == NULL)
+      error("Cell has been split but all progenitors have 0 particles");
+#endif
+
+    /* Prepare the pointers. */
+    for (k = 0; k < 8; k++) {
+
+      /* Find the next non-empty sibling */
+      for (kk = k + 1; kk < 8; ++kk) {
+        if (progenitors[kk]->count > 0) {
+          progenitors[k]->sibling = progenitors[kk];
+          break;
+        }
+      }
+
+      /* No non-empty sibling ? Go back a level */
+      if (kk == 8) progenitors[k]->sibling = c->sibling;
+    }
+
+    /* Recurse. */
+    for (k = 0; k < 8; k++)
+      if (progenitors[k]->count > 0) cell_split(progenitors[k], s);
+
+/* Link the COM tasks. */
+#ifdef COM_AS_TASK
+    for (k = 0; k < 8; k++)
+      if (progenitors[k]->count > 0)
+        qsched_addunlock(s, progenitors[k]->com_tid, c->com_tid);
+#endif
+
+    /* Otherwise, we're at a leaf, so create the cell's particle-cell task. */
+  } else {
+    struct cell *data[2] = {root, c};
+    int tid = qsched_addtask(s, task_type_self_pc, task_flag_none, data,
+                             2 * sizeof(struct cell *), 1);
+    qsched_addlock(s, tid, c->res);
+#ifdef COM_AS_TASK
+    qsched_addunlock(s, root->com_tid, tid);
+#endif
+  } /* does the cell need to be split? */
+
+/* Compute the cell's center of mass. */
+#ifndef COM_AS_TASK
+  comp_com(c);
+#endif
+
+  /* Set this cell's resources ownership. */
+  qsched_res_own(s, c->res,
+                 s->nr_queues * (c->parts - root->parts) / root->count);
+}
+
+/* -------------------------------------------------------------------------- */
+/* New tree walk */
+/* -------------------------------------------------------------------------- */
+
 /**
- * @brief Compute the center of mass of a given cell.
+ * @brief Interacts all particles in ci with the monopole in cj
+ */
+static inline void make_interact_pc(struct cell *leaf, struct cell *cj) {
+
+  int j, k, count = leaf->count;
+  double com[3] = {0.0, 0.0, 0.0};
+  float mcom, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w;
+  struct part *parts = leaf->parts;
+
+#ifdef SANITY_CHECKS
+
+  /* Sanity checks */
+  if (leaf->count == 0) error("Empty cell!");
+
+  /* Sanity check. */
+  if (cj->new.mass == 0.0) {
+    message("%e %e %e %d %p %d %p", cj->new.com[0], cj->new.com[1],
+            cj->new.com[2], cj->count, cj, cj->split, cj->sibling);
+
+    for (j = 0; j < cj->count; ++j)
+      message("part %d mass=%e id=%d", j, cj->parts[j].mass, cj->parts[j].id);
+
+    error("com does not seem to have been set.");
+  }
+
+#endif
+
+  /* Init the com's data. */
+  for (k = 0; k < 3; k++) com[k] = cj->new.com[k];
+  mcom = cj->new.mass;
+
+  /* Loop over every particle in leaf. */
+  for (j = 0; j < count; j++) {
+
+    /* Compute the pairwise distance. */
+    for (r2 = 0.0, k = 0; k < 3; k++) {
+      dx[k] = com[k] - parts[j].x[k];
+      r2 += dx[k] * dx[k];
+    }
+
+    /* Apply the gravitational acceleration. */
+    ir = 1.0f / sqrtf(r2);
+    w = mcom * const_G * ir * ir * ir;
+    for (k = 0; k < 3; k++) parts[j].a[k] += w * dx[k];
+
+#if ICHECK >= 0
+    if (leaf->parts[j].id == ICHECK)
+      printf("[DEBUG] cell [%f,%f,%f] interacting with cj->loc=[%f,%f,%f] "
+             "m=%f h=%f\n",
+             leaf->loc[0], leaf->loc[1], leaf->loc[2], cj->loc[0], cj->loc[1],
+             cj->loc[2], mcom, cj->h);
+
+    if (leaf->parts[j].id == ICHECK)
+      printf("[NEW] Interaction with monopole a=( %e %e %e ) h= %f Nj= %d m_j= "
+             "%f\n",
+             w * dx[0], w * dx[1], w * dx[2], cj->h, cj->count, mcom);
+#endif
+
+  } /* loop over every particle. */
+}
+
+/**
+ * @brief Checks whether the cell leaf is a subvolume of the cell c
+ */
+static inline int is_inside(struct cell *leaf, struct cell *c) {
+  return (leaf->parts >= c->parts) && (leaf->parts < c->parts + c->count);
+}
+
+/**
+ * @brief Checks whether the cells are direct neighbours ot not
+ */
+static inline int are_neighbours_different_size(struct cell *ci,
+                                                struct cell *cj) {
+
+  int k;
+  float dx[3];
+  double cih = ci->h, cjh = cj->h;
+
+  /* Maximum allowed distance */
+  float min_dist = 0.5 * (cih + cjh);
+
+  /* (Manhattan) Distance between the cells */
+  for (k = 0; k < 3; k++) {
+    float center_i = ci->loc[k] + 0.5 * cih;
+    float center_j = cj->loc[k] + 0.5 * cjh;
+    dx[k] = fabs(center_i - center_j);
+  }
+
+  return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist);
+}
+
+/**
+ * @brief Checks whether the cells are direct neighbours ot not. Both cells have
+ * to be of the same size
+ */
+static inline int are_neighbours(struct cell *ci, struct cell *cj) {
+
+  int k;
+  float dx[3];
+
+#ifdef SANITY_CHECKS
+  if (ci->h != cj->h)
+    error(" Cells of different size in distance calculation.");
+#endif
+
+  /* Maximum allowed distance */
+  float min_dist = ci->h;
+
+  /* (Manhattan) Distance between the cells */
+  for (k = 0; k < 3; k++) {
+    float center_i = ci->loc[k];
+    float center_j = cj->loc[k];
+    dx[k] = fabs(center_i - center_j);
+  }
+
+  return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist);
+}
+
+/**
+ * @brief Compute the interactions between all particles in a cell leaf
+ *        and the center of mass of all the cells in a part of the tree
+ * described by ci and cj
  *
- * @param c The #cell.
+ * @param ci The #cell containing the particle
+ * @param cj The #cell containing the center of mass.
  */
- 
-void comp_com ( struct cell *c ) {
+static inline void iact_pair_pc(struct cell *ci, struct cell *cj,
+                                struct cell *leaf) {
 
-    int k, count = c->count;
-    struct cell *cp;
-    struct part *p, *parts = c->parts;
+  struct cell *cp, *cps;
 
-    /* Is the cell split? */
-    if ( c->split ) {
-    
-        /* Collect the centre of gravity and mass from the progeny. */
-        for ( k = 0 ; k < 8 ; k++ ) {
-            cp = c->progeny[k];
-            c->com[0] += cp->com[0]*cp->mass;
-            c->com[1] += cp->com[1]*cp->mass;
-            c->com[2] += cp->com[2]*cp->mass;
-            c->mass += cp->mass;
-            }
-        c->com[0] /= c->mass; c->com[1] /= c->mass; c->com[2] /= c->mass;
-            
-        }
-        
-    /* Otherwise, collect the multipole from local data. */
-    else {
-    
-        for ( k = 0 ; k < count ; k++ ) {
-            p = &parts[k];
-            c->com[0] += p->x[0]*p->mass;
-            c->com[1] += p->x[1]*p->mass;
-            c->com[2] += p->x[2]*p->mass;
-            c->mass += p->mass;
-            }
-        c->com[0] /= c->mass; c->com[1] /= c->mass; c->com[2] /= c->mass;
-            
+#ifdef SANITY_CHECKS
+
+  /* Early abort? */
+  if (ci->count == 0 || cj->count == 0) error("Empty cell !");
+
+  /* Sanity check */
+  if (ci == cj)
+    error("The impossible has happened: pair interaction between a cell and "
+          "itself.");
+
+  /* Sanity check */
+  if (!is_inside(leaf, ci))
+    error("The impossible has happened: The leaf is not within ci");
+
+  /* Are the cells direct neighbours? */
+  if (!are_neighbours(ci, cj)) error("Cells are not neighours");
+
+  /* Are both cells split ? */
+  if (!ci->split || !cj->split) error("One of the cells is not split !");
+#endif
+
+  /* Let's find in which subcell of ci the leaf is */
+  for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) {
+
+    if (is_inside(leaf, cp)) break;
+  }
+
+  if (are_neighbours_different_size(cp, cj)) {
+
+    /* Now interact this subcell with all subcells of cj */
+    for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) {
+
+      /* Check whether we have to recurse or can directly jump to the multipole
+       * calculation */
+      if (are_neighbours(cp, cps)) {
+
+        /* We only recurse if the children are split */
+        if (cp->split && cps->split) {
+          iact_pair_pc(cp, cps, leaf);
         }
-        
+
+      } else {
+        make_interact_pc(leaf, cps);
+      }
     }
-    
-    
+  } else {
+
+    /* If cp is not a neoghbour of cj, we can directly interact with the
+     * multipoles */
+    for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) {
+
+      make_interact_pc(leaf, cps);
+    }
+  }
+}
+
+/**
+ * @brief Compute the interactions between all particles in a leaf and
+ *        and all the monopoles in the cell c
+ *
+ * @param c The #cell containing the monopoles
+ * @param leaf The #cell containing the particles
+ */
+static inline void iact_self_pc(struct cell *c, struct cell *leaf) {
+
+  struct cell *cp, *cps;
+
+#ifdef SANITY_CHECKS
+
+  /* Early abort? */
+  if (c->count == 0) error("Empty cell !");
+
+  if (!c->split) error("Cell is not split !");
+
+#endif
+
+  /* Find in which subcell of c the leaf is */
+  for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
+
+    /* Only recurse if the leaf is in this part of the tree */
+    if (is_inside(leaf, cp)) break;
+  }
+
+  if (cp->split) {
+
+    /* Recurse if the cell can be split */
+    iact_self_pc(cp, leaf);
+
+    /* Now, interact with every other subcell */
+    for (cps = c->firstchild; cps != c->sibling; cps = cps->sibling) {
+
+      /* Since cp and cps will be direct neighbours it is only worth recursing
+       */
+      /* if the cells can both be split */
+      if (cp != cps && cps->split) iact_pair_pc(cp, cps, leaf);
+    }
+  }
+}
+
 /**
  * @brief Compute the interactions between all particles in a cell
- *        and the center of mass of another cell.
+ *        and all particles in the other cell (N^2 algorithm)
  *
  * @param ci The #cell containing the particles.
- * @param cj The #cell containing the center of mass.
+ * @param cj The #cell containing the other particles
  */
- 
-void iact_pair_pc ( struct cell *ci , struct cell *cj ) {
+static inline void iact_pair_direct(struct cell *ci, struct cell *cj) {
 
-    int j, k, count = ci->count;
-    double com[3], mcom, dx[3], r2, ir, w;
-    struct part *parts = ci->parts;
-    
-    /* Early abort? */
-    if ( count == 0 )
-        return;
-    
-    /* Init the com's data. */
-    for ( k = 0 ; k < 3 ; k++ )
-        com[k] = cj->com[k];
-    mcom = cj->mass;
+  int i, j, k;
+  int count_i = ci->count, count_j = cj->count;
+  struct part *parts_i = ci->parts, *parts_j = cj->parts;
+  double xi[3];
+  float dx[3], ai[3], mi, mj, r2, w, ir;
 
-    /* Loop over every following particle. */
-    for ( j = 0 ; j < count ; j++ ) {
+#ifdef SANITY_CHECKS
 
-        /* Compute the pairwise distance. */
-        for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) {
-            dx[k] = com[k] - parts[j].x[k];
-            r2 += dx[k]*dx[k];
-            }
+  /* Bad stuff will happen if cell sizes are different */
+  if (ci->h != cj->h)
+    error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h);
 
-        /* Apply the gravitational acceleration. */
-        ir = 1.0 / sqrt( r2 );
-        w = mcom * const_G * ir * ir * ir;
-        for ( k = 0 ; k < 3 ; k++ )
-            parts[j].a[k] += w * dx[k];
+#endif
 
-        } /* loop over every other particle. */
-                
+  /* Loop over all particles in ci... */
+  for (i = 0; i < count_i; i++) {
+
+    /* Init the ith particle's data. */
+    for (k = 0; k < 3; k++) {
+      xi[k] = parts_i[i].x[k];
+      ai[k] = 0.0f;
     }
-    
+    mi = parts_i[i].mass;
+
+    /* Loop over every particle in the other cell. */
+    for (j = 0; j < count_j; j++) {
+
+      /* Compute the pairwise distance. */
+      for (r2 = 0.0, k = 0; k < 3; k++) {
+        dx[k] = xi[k] - parts_j[j].x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+      /* Apply the gravitational acceleration. */
+      ir = 1.0f / sqrtf(r2);
+      w = const_G * ir * ir * ir;
+      mj = parts_j[j].mass;
+      for (k = 0; k < 3; k++) {
+        float wdx = w * dx[k];
+        parts_j[j].a[k] += wdx * mi;
+        ai[k] -= wdx * mj;
+      }
+
+#ifdef COUNTERS
+      ++count_direct_unsorted;
+#endif
+
+#if ICHECK >= 0 && 0
+      if (parts_i[i].id == ICHECK)
+        printf(
+            "[NEW] Interaction with particle id= %d (pair i) a=( %e %e %e )\n",
+            parts_j[j].id, -mi * w * dx[0], -mi * w * dx[1], -mi * w * dx[2]);
+
+      if (parts_j[j].id == ICHECK)
+        printf(
+            "[NEW] Interaction with particle id= %d (pair j) a=( %e %e %e )\n",
+            parts_i[i].id, mj * w * dx[0], mj * w * dx[1], mj * w * dx[2]);
+#endif
+
+    } /* loop over every other particle. */
+
+    /* Store the accumulated acceleration on the ith part. */
+    for (k = 0; k < 3; k++) parts_i[i].a[k] += ai[k];
+
+  } /* loop over all particles. */
+}
 
 /**
- * @brief Compute the interactions between all particles in a cell.
+ * @brief Decides whether two cells use the direct summation interaction or the
+ * multipole interactions
  *
  * @param ci The #cell.
  * @param cj The other #cell.
  */
- 
-void iact_pair ( struct cell *ci , struct cell *cj ) {
+void iact_pair(struct cell *ci, struct cell *cj) {
 
-    int i, j, k;
-    int count_i = ci->count, count_j = cj->count;
-    double xi[3], ai[3], mi, mj, dx[3], r2, ir, w;
-    struct part *parts_i = ci->parts, *parts_j = cj->parts;
-    
-    /* Early abort? */
-    if ( count_i == 0 || count_j == 0 )
-        return;
-    
-    /* Recurse? */
-    if ( ci->split && cj->split )
-        for ( j = 0 ; j < 8 ; j++ )
-            for ( k = j+1 ; k < 8 ; k++ )
-                iact_pair( ci->progeny[j] , cj->progeny[k] );
-            
-    /* Get the minimum distance between both cells. */
-    for ( r2 = 0.0, k = 0 ; k < 3 ; k++ ) {
-        dx[k] = fabs( ci->loc[k] - cj->loc[k] );
-        if ( dx[k] > 0 )
-            dx[k] -= ci->h[k];
-        r2 += dx[k]*dx[k];
-        }
+  struct cell *cp, *cps;
 
-    /* Sufficiently well-separated? */
-    if ( r2/ci->h[0] > dist_min*dist_min ) {
+#ifdef SANITY_CHECKS
 
-        /* Compute the center of mass interactions. */
-        iact_pair_pc( ci , cj );
-        iact_pair_pc( cj , ci );
+  /* Early abort? */
+  if (ci->count == 0 || cj->count == 0) error("Empty cell !");
 
-        }
+  /* Bad stuff will happen if cell sizes are different */
+  if (ci->h != cj->h)
+    error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h);
+
+  /* Sanity check */
+  if (ci == cj)
+    error("The impossible has happened: pair interaction between a cell and "
+          "itself.");
+
+#endif
 
-    /* Recurse? */
-    else if ( ci->split && cj->split )
-        for ( j = 0 ; j < 8 ; j++ )
-            for ( k = j+1 ; k < 8 ; k++ )
-                iact_pair( ci->progeny[j] , cj->progeny[k] );
-            
-    /* Otherwise, do direct interactions. */
-    else {
-
-        /* Loop over all particles... */
-        for ( i = 0 ; i < count_i ; i++ ) {
-
-            /* Init the ith particle's data. */
-            for ( k = 0 ; k < 3 ; k++ ) {
-                xi[k] = parts_i[i].x[k];
-                ai[k] = 0.0;
-                }
-            mi = parts_i[i].mass;
-
-            /* Loop over every following particle. */
-            for ( j = 0 ; j < count_j ; j++ ) {
-
-                /* Compute the pairwise distance. */
-                for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) {
-                    dx[k] = xi[k] - parts_j[j].x[k];
-                    r2 += dx[k]*dx[k];
-                    }
-
-                /* Apply the gravitational acceleration. */
-                ir = 1.0 / sqrt( r2 );
-                w = const_G * ir * ir * ir;
-                mj = parts_j[j].mass;
-                for ( k = 0 ; k < 3 ; k++ ) {
-                    parts_j[j].a[k] += w * dx[k] * mi;
-                    ai[k] -= w * dx[k] * mj;
-                    }
-
-                } /* loop over every other particle. */
-
-            /* Store the accumulated acceleration on the ith part. */
-            for ( k = 0 ; k < 3 ; k++ )
-                parts_i[i].a[k] += ai[k];
-
-            } /* loop over all particles. */
-
-        } /* otherwise, compute interactions directly. */
+  /* Are the cells direct neighbours? */
+  if (are_neighbours(ci, cj)) {
 
+    /* Are both cells split ? */
+    if (ci->split && cj->split) {
+
+      /* Let's split both cells and build all possible pairs */
+      for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) {
+        for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) {
+
+          /* If the cells are neighbours, recurse. */
+          if (are_neighbours(cp, cps)) {
+            iact_pair(cp, cps);
+          }
+        }
+      }
+    } else {/* Otherwise, compute the interactions at this level directly. */
+      iact_pair_direct(ci, cj);
     }
-    
+  }
+}
 
 /**
  * @brief Compute the interactions between all particles in a cell.
  *
  * @param c The #cell.
  */
- 
-void iact_self ( struct cell *c ) {
+void iact_self_direct(struct cell *c) {
+  int i, j, k, count = c->count;
+  double xi[3] = {0.0, 0.0, 0.0};
+  float ai[3] = {0.0, 0.0, 0.0}, mi, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w;
+  struct part *parts = c->parts;
+  struct cell *cp, *cps;
 
-    int i, j, k, count = c->count;
-    double xi[3], ai[3], mi, mj, dx[3], r2, ir, w;
-    struct part *parts = c->parts;
-    
-    /* Early abort? */
-    if ( count == 0 )
-        return;
-    
-    /* Recurse? */
-    if ( c->split )
-        for ( j = 0 ; j < 8 ; j++ ) {
-            iact_self( c->progeny[j] );
-            for ( k = j+1 ; k < 8 ; k++ )
-                iact_pair( c->progeny[j] , c->progeny[k] );
-            }
-            
-    /* Otherwise, compute interactions directly. */
-    else {
-    
-        /* Loop over all particles... */
-        for ( i = 0 ; i < count ; i++ ) {
-        
-            /* Init the ith particle's data. */
-            for ( k = 0 ; k < 3 ; k++ ) {
-                xi[k] = parts[i].x[k];
-                ai[k] = 0.0;
-                }
-            mi = parts[i].mass;
-                
-            /* Loop over every following particle. */
-            for ( j = i+1 ; j < count ; j++ ) {
-            
-                /* Compute the pairwise distance. */
-                for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) {
-                    dx[k] = xi[k] - parts[j].x[k];
-                    r2 += dx[k]*dx[k];
-                    }
-                    
-                /* Apply the gravitational acceleration. */
-                ir = 1.0 / sqrt( r2 );
-                w = const_G * ir * ir * ir;
-                mj = parts[j].mass;
-                for ( k = 0 ; k < 3 ; k++ ) {
-                    parts[j].a[k] += w * dx[k] * mi;
-                    ai[k] -= w * dx[k] * mj;
-                    }
-            
-                } /* loop over every other particle. */
-                
-            /* Store the accumulated acceleration on the ith part. */
-            for ( k = 0 ; k < 3 ; k++ )
-                parts[i].a[k] += ai[k];
-        
-            } /* loop over all particles. */
-    
-        } /* otherwise, compute interactions directly. */
+#ifdef SANITY_CHECKS
+
+  /* Early abort? */
+  if (count == 0) error("Empty cell !");
 
+#endif
+
+  /* If the cell is split, interact each progeny with itself, and with
+     each of its siblings. */
+  if (c->split) {
+    for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
+      iact_self_direct(cp);
+      for (cps = cp->sibling; cps != c->sibling; cps = cps->sibling)
+        iact_pair(cp, cps);
     }
-    
+
+    /* Otherwise, compute the interactions directly. */
+  } else {
+
+    /* Loop over all particles... */
+    for (i = 0; i < count; i++) {
+
+      /* Init the ith particle's data. */
+      for (k = 0; k < 3; k++) {
+        xi[k] = parts[i].x[k];
+        ai[k] = 0.0;
+      }
+      mi = parts[i].mass;
+
+      /* Loop over every following particle. */
+      for (j = i + 1; j < count; j++) {
+
+        /* Compute the pairwise distance. */
+        for (r2 = 0.0, k = 0; k < 3; k++) {
+          dx[k] = xi[k] - parts[j].x[k];
+          r2 += dx[k] * dx[k];
+        }
+
+        /* Apply the gravitational acceleration. */
+        ir = 1.0f / sqrtf(r2);
+        w = const_G * ir * ir * ir;
+        mj = parts[j].mass;
+        for (k = 0; k < 3; k++) {
+          float wdx = w * dx[k];
+          parts[j].a[k] += wdx * mi;
+          ai[k] -= wdx * mj;
+        }
+
+#if ICHECK >= 0
+        if (parts[i].id == ICHECK)
+          message("[NEW] Interaction with particle id= %d (self i)",
+                  parts[j].id);
+
+        if (parts[j].id == ICHECK)
+          message("[NEW] Interaction with particle id= %d (self j)",
+                  parts[i].id);
+#endif
+
+      } /* loop over every other particle. */
+
+      /* Store the accumulated acceleration on the ith part. */
+      for (k = 0; k < 3; k++) parts[i].a[k] += ai[k];
+
+    } /* loop over all particles. */
+
+  } /* otherwise, compute interactions directly. */
+}
 
 /**
  * @brief Create the tasks for the cell pair/self.
@@ -499,298 +824,661 @@ void iact_self ( struct cell *c ) {
  * @param ci The first #cell.
  * @param cj The second #cell.
  */
- 
-void create_tasks ( struct qsched *s , struct cell *ci , struct cell *cj ) {
+void create_tasks(struct qsched *s, struct cell *ci, struct cell *cj) {
 
-    int j, k;
-    qsched_task_t tid;
-    struct cell *data[2];
-    double dx, r2;
-    
-    /* If either cell is empty, stop. */
-    if ( ci->count == 0 || ( cj != NULL && cj->count == 0 ) )
-        return;
+  qsched_task_t tid;
+  struct cell *data[2], *cp, *cps;
+
+#ifdef SANITY_CHECKS
+
+  /* If either cell is empty, stop. */
+  if (ci->count == 0 || (cj != NULL && cj->count == 0)) error("Empty cell !");
+
+#endif
+
+  /* Single cell? */
+  if (cj == NULL) {
+
+    /* Is this cell split and above the task limit ? */
+    if (ci->split && ci->count > task_limit / ci->count) {
+
+      /* Loop over each of this cell's progeny. */
+      for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) {
+
+        /* Make self-interaction task. */
+        create_tasks(s, cp, NULL);
+
+        /* Make all pair-interaction tasks. */
+        for (cps = cp->sibling; cps != ci->sibling; cps = cps->sibling)
+          create_tasks(s, cp, cps);
+      }
+
+      /* Otherwise, add a self-interaction task. */
+    } else {
+
+      /* Set the data. */
+      data[0] = ci;
+      data[1] = NULL;
+
+      /* Create the task. */
+      tid =
+          qsched_addtask(s, task_type_self, task_flag_none, data,
+                         sizeof(struct cell *) * 2, ci->count * ci->count / 2);
+
+      /* Add the resource (i.e. the cell) to the new task. */
+      qsched_addlock(s, tid, ci->res);
+
+/* If this call might recurse, add a dependency on the cell's COM
+   task. */
+#ifdef COM_AS_TASK
+      if (ci->split) qsched_addunlock(s, ci->com_tid, tid);
+#endif
+    }
 
-    /* Single cell? */
-    if ( cj == NULL ) {
-    
-        /* Is this cell split? */
-        if ( ci->split && ci->count > task_limit ) {
-        
-            /* Recurse over the progeny. */
-            for ( j = 0 ; j < 8 ; j++ ) {
-            
-                /* Make self-task. */
-                create_tasks( s , ci->progeny[j] , NULL );
-                
-                /* Make all pair tasks. */
-                for ( k = j+1 ; k < 8 ; k++ )
-                    create_tasks( s , ci->progeny[j] , ci->progeny[k] );
-                }
-        
-            }
-            
-        /* Otherwise, add a self-interaction task. */
-        else {
-            
-            /* Set the data. */
-            data[0] = ci; data[1] = NULL;
-            
-            /* Create the task. */
-            tid = qsched_addtask( s , task_type_self , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*ci->count/2 );
-            
-            /* Add the resource. */
-            qsched_addlock( s , tid , ci->res );
-        
-            }
-    
-        }
-        
     /* Otherwise, it's a pair. */
-    else {
-    
-        /* Get the minimum distance between both cells. */
-        for ( r2 = 0.0, k = 0 ; k < 3 ; k++ ) {
-            dx = fabs( ci->loc[k] - cj->loc[k] );
-            if ( dx > 0 )
-                dx -= ci->h[k];
-            r2 += dx*dx;
-            }
-            
-        /* Are the cells sufficiently well separated? */
-        if ( r2/ci->h[0] > dist_min*dist_min ) {
-
-            /* Interact ci's parts with cj as a cell. */
-            data[0] = ci; data[1] = cj;
-            tid = qsched_addtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count );
-            qsched_addlock( s , tid , ci->res );
-            qsched_addunlock( s , cj->com_tid , tid );
-
-            /* Interact cj's parts with ci as a cell. */
-            data[0] = cj; data[1] = ci;
-            tid = qsched_addtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count );
-            qsched_addlock( s , tid , cj->res );
-            qsched_addunlock( s , ci->com_tid , tid );
-
-            }
-            
-        /* Does this task need to be broken-down further? */
-        else if ( ci->split && cj->split &&
-                  ci->count > task_limit && cj->count > task_limit ) {
-             
-            /* Loop over all pairs between ci and cj's progeny. */
-            for ( j = 0 ; j < 8 ; j++ )
-                for ( k = 0 ; k < 8 ; k++ )
-                    create_tasks( s , ci->progeny[j] , cj->progeny[k] );
-        
-            }
-            
-        /* Otherwise, generate a part-part task. */
-        else {
-        
-            /* Set the data. */
-            data[0] = ci; data[1] = cj;
-
-            /* Create the task. */
-            tid = qsched_addtask( s , task_type_pair , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*cj->count );
-
-            /* Add the resources. */
-            qsched_addlock( s , tid , ci->res );
-            qsched_addlock( s , tid , cj->res );
-
-            /* Depend on the COMs in case this task recurses. */
-            if ( ci->split || cj->split ) {
-                qsched_addunlock( s , ci->com_tid , tid );
-                qsched_addunlock( s , cj->com_tid , tid );
-                }
-
-            }
-        
-        } /* otherwise, it's a pair. */
+  } else {
+
+    /* Are the cells NOT neighbours ? */
+    if (!are_neighbours(ci, cj)) {
+
+    } else {/* Cells are direct neighbours */
 
+      /* Are both cells split ? */
+      if (ci->split && cj->split && ci->count > task_limit / cj->count) {
+
+        /* Let's split both cells and build all possible pairs */
+        for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) {
+          for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) {
+            /* Recurse */
+            create_tasks(s, cp, cps);
+          }
+        }
+        /* Otherwise, at least one of the cells is not split, build a direct
+         * interaction. */
+      } else {
+
+        /* Set the data. */
+        data[0] = ci;
+        data[1] = cj;
+
+        /* Create the task. */
+        tid = qsched_addtask(s, task_type_pair, task_flag_none, data,
+                             sizeof(struct cell *) * 2, ci->count * cj->count);
+
+        /* Add the resources. */
+        qsched_addlock(s, tid, ci->res);
+        qsched_addlock(s, tid, cj->res);
+      }
+    } /* Cells are direct neighbours */
+  }   /* Otherwise it's a pair */
+}
+
+/* -------------------------------------------------------------------------- */
+/* Legacy tree walk */
+/* -------------------------------------------------------------------------- */
+
+/**
+ * @brief Compute the center of mass of a given cell recursively.
+ *
+ * @param c The #cell.
+ */
+void legacy_comp_com(struct cell *c, int *countCoMs) {
+
+  int k, count = c->count;
+  struct cell *cp;
+  struct part *p, *parts = c->parts;
+  double com[3] = {0.0, 0.0, 0.0}, mass = 0.0;
+
+  ++(*countCoMs);
+
+  /* Is the cell split? */
+  if (c->split) {
+
+    /* Loop over the progeny. */
+    for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
+      /* Recurse */
+      legacy_comp_com(cp, countCoMs);
+
+      /* Collect multipole information */
+      float cp_mass = cp->legacy.mass;
+      com[0] += cp->legacy.com[0] * cp_mass;
+      com[1] += cp->legacy.com[1] * cp_mass;
+      com[2] += cp->legacy.com[2] * cp_mass;
+      mass += cp_mass;
     }
-    
-    
+
+    /* Otherwise, collect the multipole from local data. */
+  } else {
+
+    for (k = 0; k < count; k++) {
+      p = &parts[k];
+      float p_mass = p->mass;
+      com[0] += p->x[0] * p_mass;
+      com[1] += p->x[1] * p_mass;
+      com[2] += p->x[2] * p_mass;
+      mass += p_mass;
+    }
+  }
+
+  /* Finish multipole calculation */
+  if (mass > 0.0) {
+    float imass = 1.0f / mass;
+    c->legacy.com[0] = com[0] * imass;
+    c->legacy.com[1] = com[1] * imass;
+    c->legacy.com[2] = com[2] * imass;
+    c->legacy.mass = mass;
+  } else {
+    c->legacy.com[0] = 0.0;
+    c->legacy.com[1] = 0.0;
+    c->legacy.com[2] = 0.0;
+    c->legacy.mass = 0.0;
+  }
+}
+
+/**
+ * @brief Interacts a particle with a cell recursively using the original B-H
+ * tree walk procedure
+ *
+ * @param parts The array of particles
+ * @param i The particle of interest
+ * @param root The root of the tree under which we will search.
+ * @param monitor If set to @c parts[i].id, will produce debug output when
+ *        ICHECK is set.
+ * @param cell The cell the particle interacts with
+ */
+void legacy_interact(struct part *parts, int i, struct cell *root, int monitor,
+                     int *countMultipoles, int *countPairs) {
+
+  int j, k;
+  float r2, dx[3], ir, w;
+  float a[3] = {0.0, 0.0, 0.0};
+  double pix[3] = {parts[i].x[0], parts[i].x[1], parts[i].x[2]};
+  int pid = parts[i].id;
+  struct cell *cell = root;
+
+  /* Traverse the cells of the tree. */
+  while (cell != NULL) {
+
+    /* Are we in a leaf ? */
+    if (!cell->split) {
+
+      /* Interact the particle with the particles in the leaf */
+      for (j = 0; j < cell->count; ++j) {
+        if (cell->parts[j].id == pid) continue;
+
+        /* Compute the pairwise distance. */
+        for (r2 = 0.0, k = 0; k < 3; k++) {
+          dx[k] = cell->parts[j].x[k] - pix[k];
+          r2 += dx[k] * dx[k];
+        }
+
+        /* Apply the gravitational acceleration. */
+        ir = 1.0f / sqrtf(r2);
+        w = cell->parts[j].mass * const_G * ir * ir * ir;
+        for (k = 0; k < 3; k++) a[k] += w * dx[k];
+
+        (*countPairs)++;
+
+#if ICHECK >= 0
+        if (pid == monitor)
+          message("[BH_] Interaction with particle id= %d a=( %e %e %e ) h= %f "
+                  "loc=( %e %e %e )\n",
+                  cell->parts[j].id, w * dx[0], w * dx[1], w * dx[2], cell->h,
+                  cell->loc[0], cell->loc[1], cell->loc[2]);
+#endif
+      }
+
+      cell = cell->sibling;
+    } else {
+
+      /* We are in a node */
+      for (r2 = 0.0, k = 0; k < 3; k++) {
+        dx[k] = cell->legacy.com[k] - pix[k];
+        r2 += dx[k] * dx[k];
+      }
+
+#if ICHECK >= 0
+      if (pid == monitor)
+        message("This is a node with %d particles h= %f. r= %f theta= %f",
+                cell->count, cell->h, sqrt(r2), dist_min);
+#endif
+
+      /* Is the cell far enough ? */
+      if (dist_min * dist_min * r2 < cell->h * cell->h) {
+
+#if ICHECK >= 0
+        if (pid == monitor) printf("Recursing...\n");
+#endif
+        cell = cell->firstchild;
+        continue;
+      }
+
+#if ICHECK >= 0
+      if (pid == monitor)
+        message("[BH_] Can interact with the monopole. x= %f %f %f m= %f h= %f",
+                cell->legacy.com[0], cell->legacy.com[1], cell->legacy.com[2],
+                cell->legacy.mass, cell->h);
+#endif
+
+      /* Apply the gravitational acceleration. */
+      ir = 1.0f / sqrtf(r2);
+      w = cell->legacy.mass * const_G * ir * ir * ir;
+      for (k = 0; k < 3; k++) a[k] += w * dx[k];
+
+      (*countMultipoles)++;
+
+      /* Move to the next node */
+      cell = cell->sibling;
+    }
+  }
+
+  /* Store the locally computed acceleration back to the particle. */
+  for (k = 0; k < 3; k++) parts[i].a_legacy[k] += a[k];
+}
+
+/**
+ * @brief Does a tree walk as in the B-H original work for all particles
+ *
+ * @param N The number of particles
+ * @param parts The array of particles
+ * @param root The root cell of the tree
+ * @param monitor ID of the particle to monitor and output interactions to
+ *        stdout
+ */
+void legacy_tree_walk(int N, struct part *parts, struct cell *root, int monitor,
+                      int *countMultipoles, int *countPairs, int *countCoMs) {
+
+  int i;
+
+  /* Compute multipoles (recursively) */
+  legacy_comp_com(root, countCoMs);
+
+  //#pragma omp parallel for
+  for (i = 0; i < N; ++i) {
+    if (parts[i].id == monitor)
+      message("tree walk for particle %d x= %f %f %f", parts[i].id,
+              parts[i].x[0], parts[i].x[1], parts[i].x[2]);
+
+    legacy_interact(parts, i, root, monitor, countMultipoles, countPairs);
+
+    if (parts[i].id == monitor)
+      message("[check] legacy acceleration for particle %d a= %.3e %.3e %.3e",
+              parts[i].id, parts[i].a_legacy[0], parts[i].a_legacy[1],
+              parts[i].a_legacy[2]);
+  }
+}
+
+/* -------------------------------------------------------------------------- */
+/* Exact interaction */
+/* -------------------------------------------------------------------------- */
+
+/**
+ * @brief Solve the particle interactions using the stupid N^2 algorithm
+ *
+ * @param N The number of particles
+ * @param parts The array of particles
+ */
+void interact_exact(int N, struct part *parts, int monitor) {
+
+  int i, j, k;
+  float ir, w, r2, dx[3];
+
+  /* Loop over all particles. */
+  for (i = 0; i < N; ++i) {
+
+    /* Some things to store locally. */
+    double pix[3] = {parts[i].x[0], parts[i].x[1], parts[i].x[2]};
+    float mi = parts[i].mass;
+
+    /* Loop over every other particle. */
+    for (j = i + 1; j < N; ++j) {
+
+      /* Compute the pairwise distance. */
+      for (r2 = 0.0, k = 0; k < 3; k++) {
+        dx[k] = parts[j].x[k] - pix[k];
+        r2 += dx[k] * dx[k];
+      }
+
+      /* Apply the gravitational acceleration. */
+      ir = 1.0f / sqrtf(r2);
+      w = const_G * ir * ir * ir;
+
+      for (k = 0; k < 3; k++) {
+        float wdx = w * dx[k];
+        parts[j].a_exact[k] -= wdx * mi;
+        parts[i].a_exact[k] += wdx * parts[j].mass;
+      }
+    }
+  }
+
+#if ICHECK >= 0
+  for (i = 0; i < N; ++i)
+    if (parts[i].id == monitor)
+      message("[check] exact acceleration for particle %d a= %.3e %.3e %.3e\n",
+              parts[i].id, parts[i].a_exact[0], parts[i].a_exact[1],
+              parts[i].a_exact[2]);
+#endif
+}
+
 /**
  * @brief Set up and run a task-based Barnes-Hutt N-body solver.
  *
  * @param N The number of random particles to use.
  * @param nr_threads Number of threads to use.
+ * @param runs Number of force evaluations to use as a benchmark.
+ * @param fileName Input file name. If @c NULL or an empty string, random
+ *        particle positions will be used.
  */
- 
-void test_bh ( int N , int nr_threads , int runs ) {
-
-    int k;
-    struct cell *root;
-    struct part *parts;
-    struct qsched s;
-    ticks tic, toc_run, tot_setup = 0, tot_run = 0;
-    
-    /* Runner function. */
-    void runner ( int type , void *data ) {
-    
-        ticks tic = getticks();
-    
-        /* Decode the data. */
-        struct cell **d = (struct cell **)data;
-    
-        /* Decode and execute the task. */
-        switch ( type ) {
-            case task_type_self:
-                iact_self( d[0] );
-                break;
-            case task_type_pair:
-                iact_pair( d[0] , d[1] );
-                break;
-            case task_type_pair_pc:
-                iact_pair_pc( d[0] , d[1] );
-                break;
-            case task_type_com:
-                comp_com( d[0] );
-                break;
-            default:
-                error( "Unknown task type." );
-            }
-            
-        atomic_add( &task_timers[type] , getticks() - tic );
-        
-        }
-        
-    /* Initialize the per-task type timers. */
-    for ( k = 0 ; k < task_type_count ; k++ )
-        task_timers[k] = 0;
-    
-    /* Initialize the scheduler. */
-    qsched_init( &s , nr_threads , qsched_flag_noreown );
-    
-    /* Init and fill the particle array. */
-    if ( ( parts = (struct part *)malloc( sizeof(struct part) * N ) ) == NULL )
-        error( "Failed to allocate particle buffer." );
-    for ( k = 0 ; k < N ; k++ ) {
-        parts[k].id = k;
-        parts[k].x[0] = ((double)rand())/RAND_MAX;
-        parts[k].x[1] = ((double)rand())/RAND_MAX;
-        parts[k].x[2] = ((double)rand())/RAND_MAX;
-        parts[k].mass = ((double)rand())/RAND_MAX;
-        parts[k].a[0] = 0.0;
-        parts[k].a[1] = 0.0;
-        parts[k].a[2] = 0.0;
-        }
-        
-    /* Init the cells. */
-    root = cell_get();
-    root->loc[0] = 0.0; root->loc[1] = 0.0; root->loc[2] = 0.0;
-    root->h[0] = 1.0; root->h[1] = 1.0; root->h[2] = 1.0;
-    root->count = N;
-    root->parts = parts;
-    cell_split( root , &s );
-
-    /* Create the tasks. */
+void test_bh(int N, int nr_threads, int runs, char *fileName) {
+
+  int i, k;
+  struct cell *root;
+  struct part *parts;
+  FILE *file;
+  struct qsched s;
+  ticks tic, toc_run, tot_setup = 0, tot_run = 0;
+  int countMultipoles = 0, countPairs = 0, countCoMs = 0;
+
+  /* Runner function. */
+  void runner(int type, void * data) {
+
+    ticks tic = getticks();
+
+    /* Decode the data. */
+    struct cell **d = (struct cell **)data;
+
+    /* Decode and execute the task. */
+    switch (type) {
+      case task_type_self:
+        iact_self_direct(d[0]);
+        break;
+      case task_type_pair:
+        iact_pair(d[0], d[1]);
+        break;
+      case task_type_self_pc:
+        iact_self_pc(d[0], d[1]);
+        break;
+      case task_type_com:
+        comp_com(d[0]);
+        break;
+      default:
+        error("Unknown task type.");
+    }
+
+    atomic_add(&task_timers[type], getticks() - tic);
+  }
+
+  /* Initialize the per-task type timers. */
+  for (k = 0; k < task_type_count; k++) task_timers[k] = 0;
+
+  /* Initialize the scheduler. */
+  qsched_init(&s, nr_threads, qsched_flag_noreown |
+                              qsched_flag_pthread);
+
+  /* Init and fill the particle array. */
+  if ((parts = (struct part *)malloc(sizeof(struct part) * N)) == NULL)
+    error("Failed to allocate particle buffer.");
+
+  /* If no input file was specified, generate random particle positions. */
+  if (fileName == NULL || fileName[0] == 0) {
+    for (k = 0; k < N; k++) {
+      parts[k].id = k;
+      parts[k].x[0] = ((double)rand()) / RAND_MAX;
+      parts[k].x[1] = ((double)rand()) / RAND_MAX;
+      parts[k].x[2] = ((double)rand()) / RAND_MAX;
+      parts[k].mass = ((double)rand()) / RAND_MAX;
+      parts[k].a_legacy[0] = 0.0;
+      parts[k].a_legacy[1] = 0.0;
+      parts[k].a_legacy[2] = 0.0;
+    }
+
+    /* Otherwise, read them from a file. */
+  } else {
+    file = fopen(fileName, "r");
+    if (file) {
+      for (k = 0; k < N; k++) {
+        if (fscanf(file, "%d", &parts[k].id) != 1)
+          error("Failed to read ID of part %i.", k);
+        if (fscanf(file, "%lf%lf%lf", &parts[k].x[0], &parts[k].x[1],
+                   &parts[k].x[2]) !=
+            3)
+          error("Failed to read position of part %i.", k);
+        if (fscanf(file, "%f", &parts[k].mass) != 1)
+          error("Failed to read mass of part %i.", k);
+      }
+      fclose(file);
+    }
+  }
+
+  /* Init the cells. */
+  root = cell_get();
+  root->loc[0] = 0.0;
+  root->loc[1] = 0.0;
+  root->loc[2] = 0.0;
+  root->h = 1.0;
+  root->count = N;
+  root->parts = parts;
+  cell_split(root, &s);
+
+  /* Iterate over the cells and get the average number of particles
+     per leaf. */
+  struct cell *c = root;
+  int nr_leaves = 0;
+  while (c != NULL) {
+    if (!c->split) {
+      nr_leaves++;
+      c = c->sibling;
+    } else {
+      c = c->firstchild;
+    }
+  }
+  message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves);
+
+#if ICHECK > 0
+  printf("----------------------------------------------------------\n");
+
+  /* Do a N^2 interactions calculation */
+
+  ticks tic_exact = getticks();
+  interact_exact(N, parts, ICHECK);
+  ticks toc_exact = getticks();
+
+  printf("Exact calculation (1 thread) took %lli (= %e) ticks\n",
+         toc_exact - tic_exact, (float)(toc_exact - tic_exact));
+
+  printf("----------------------------------------------------------\n");
+#endif
+
+  /* Create the tasks. */
+  tic = getticks();
+  create_tasks(&s, root, NULL);
+  tot_setup += getticks() - tic;
+
+  /* Dump the number of tasks. */
+  message("total nr of tasks: %i.", s.count);
+  message("total nr of deps: %i.", s.count_deps);
+  message("total nr of res: %i.", s.count_res);
+  message("total nr of locks: %i.", s.count_locks);
+  message("total nr of uses: %i.", s.count_uses);
+  int counts[task_type_count];
+  for (k = 0; k < task_type_count; k++) counts[k] = 0;
+  for (k = 0; k < s.count; k++) counts[s.tasks[k].type] += 1;
+
+  // char buffer[200];
+  // sprintf(buffer, "timings_legacy_%d_%d.dat", cell_maxparts, nr_threads);
+  // FILE *fileTime = fopen(buffer, "w");
+
+  /* Loop over the number of runs. */
+  for (k = 0; k < 0 /* runs */; k++) {
+
+    countMultipoles = 0;
+    countPairs = 0;
+    countCoMs = 0;
+
+    /* Execute the legacy walk. */
     tic = getticks();
-    create_tasks( &s , root , NULL );
-    tot_setup += getticks() - tic;
-
-    /* Dump the number of tasks. */
-    message( "total nr of tasks: %i." , s.count );
-    message( "total nr of deps: %i." , s.count_deps );
-    message( "total nr of res: %i." , s.count_res );
-    message( "total nr of locks: %i." , s.count_locks );
-    message( "total nr of uses: %i." , s.count_uses );
-    int counts[ task_type_count ];
-    for ( k = 0 ; k < task_type_count ; k++ )
-        counts[k] = 0;
-    for ( k = 0 ; k < s.count ; k++ )
-        counts[ s.tasks[k].type ] += 1;
-    printf( "task counts: [ " );
-    for ( k = 0 ; k < task_type_count ; k++ )
-        printf( "%i " , counts[k] );
-    printf( "].\n" );
-        
-    /* Loop over the number of runs. */
-    for ( k = 0 ; k < runs ; k++ ) {
-    
-        /* Execute the tasks. */
-        tic = getticks();
-        qsched_run( &s , nr_threads , runner );
-	    toc_run = getticks();
-	    message( "%ith run took %lli ticks..." , k , toc_run - tic );
-        tot_run += toc_run - tic;
-        
-        }
-        
-    /* Dump the tasks. */
-    /* for ( k = 0 ; k < s.count ; k++ )
-        printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , s.tasks[k].tic , s.tasks[k].toc ); */
-        
-    /* Dump the costs. */
-    message( "costs: setup=%lli ticks, run=%lli ticks." ,
-        tot_setup , tot_run/runs );
-        
-    /* Dump the timers. */
-    for ( k = 0 ; k < qsched_timer_count ; k++ )
-        message( "timer %s is %lli ticks." , qsched_timer_names[k] , s.timers[k]/runs );
-    
-    /* Dump the per-task type timers. */
-    printf( "task timers: [ " );
-    for ( k = 0 ; k < task_type_count ; k++ )
-        printf( "%lli " , task_timers[k]/runs );
-    printf( "] ticks.\n" );
-    
-    /* Clean up. */
-    qsched_free( &s );
-    
+    legacy_tree_walk(N, parts, root, ICHECK, &countMultipoles, &countPairs,
+                     &countCoMs);
+    toc_run = getticks();
+
+    /* Dump some timings. */
+    message("%ith legacy run took %lli (= %e) ticks...", k, toc_run - tic,
+            (float)(toc_run - tic));
+    tot_run += toc_run - tic;
+    // fprintf(fileTime, "%lli %e\n", toc_run - tic, (float)(toc_run - tic));
+  }
+
+// fclose(fileTime);
+
+#if ICHECK >= 0
+  message("[check] accel of part %i is [%.3e,%.3e,%.3e]", ICHECK,
+          root->parts[ICHECK].a[0], root->parts[ICHECK].a[1],
+          root->parts[ICHECK].a[2]);
+#endif
+  printf("task counts: [ %8s %8s %8s %8s %8s ]\n", "self", "pair", "m-poles",
+         "direct", "CoMs");
+  printf("task counts: [ %8i %8i %8i %8i %8i ] (legacy).\n", 0, 0,
+         countMultipoles, countPairs, countCoMs);
+  printf("task counts: [ ");
+  for (k = 0; k < task_type_count; k++) printf("%8i ", counts[k]);
+  printf("] (new).\n");
+
+  /* Loop over the number of runs. */
+  for (k = 0; k < runs; k++) {
+
+    for (i = 0; i < N; ++i) {
+      parts[i].a[0] = 0.0;
+      parts[i].a[1] = 0.0;
+      parts[i].a[2] = 0.0;
     }
     
+    /* Execute the tasks. */
+    tic = getticks();
+    qsched_run(&s, nr_threads, runner);
+    toc_run = getticks();
+    message("%ith run took %lli (= %e) ticks...", k, toc_run - tic,
+            (float)(toc_run - tic));
+    tot_run += toc_run - tic;
+  }
+
+  message("[check] root mass= %f %f", root->legacy.mass, root->new.mass);
+  message("[check] root CoMx= %f %f", root->legacy.com[0], root->new.com[0]);
+  message("[check] root CoMy= %f %f", root->legacy.com[1], root->new.com[1]);
+  message("[check] root CoMz= %f %f", root->legacy.com[2], root->new.com[2]);
+#if ICHECK >= 0
+  for (i = 0; i < N; ++i)
+    if (root->parts[i].id == ICHECK)
+      message("[check] accel of part %i is [%.3e,%.3e,%.3e]", ICHECK,
+              root->parts[i].a[0], root->parts[i].a[1], root->parts[i].a[2]);
+#endif
+
+  /* Dump the tasks. */
+  /* for ( k = 0 ; k < s.count ; k++ )
+      printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid ,
+     s.tasks[k].tic , s.tasks[k].toc ); */
+
+  /* Dump the costs. */
+  message("costs: setup=%lli ticks, run=%lli ticks.", tot_setup,
+          tot_run / runs);
+
+  /* Dump the timers. */
+  for (k = 0; k < qsched_timer_count; k++)
+    message("timer %s is %lli ticks.", qsched_timer_names[k],
+            s.timers[k] / runs);
+
+  /* Dump the per-task type timers. */
+  printf("task timers: [ ");
+  for (k = 0; k < task_type_count; k++) printf("%lli ", task_timers[k] / runs);
+  printf("] ticks.\n");
+
+  /* Dump the particles to a file */
+  file = fopen("particle_dump.dat", "w");
+  fprintf(file,
+          "# ID m x y z a_exact.x   a_exact.y    a_exact.z    a_legacy.x    "
+          "a_legacy.y    a_legacy.z    a_new.x     a_new.y    a_new.z\n");
+  for (k = 0; k < N; ++k)
+    fprintf(file, "%d %e %e %e %e %e %e %e %e %e %e %e %e %e\n", parts[k].id,
+            parts[k].mass, parts[k].x[0], parts[k].x[1], parts[k].x[2],
+            parts[k].a_exact[0], parts[k].a_exact[1], parts[k].a_exact[2],
+            parts[k].a_legacy[0], parts[k].a_legacy[1], parts[k].a_legacy[2],
+            parts[k].a[0], parts[k].a[1], parts[k].a[2]);
+  fclose(file);
+
+  /* Clean up. */
+  qsched_free(&s);
+  free(parts);
+}
 
 /**
  * @brief Main function.
  */
- 
-int main ( int argc , char *argv[] ) {
 
-    int c, nr_threads;
-    int N = 1000, runs = 1;
-    
-    /* Get the number of threads. */
-    #pragma omp parallel shared(nr_threads)
-    {
-        if ( omp_get_thread_num() == 0 )
-            nr_threads = omp_get_num_threads();
-    }
-    
-    /* Parse the options */
-    while ( ( c = getopt( argc , argv  , "n:r:t:" ) ) != -1 )
-        switch( c ) {
-	        case 'n':
-	            if ( sscanf( optarg , "%d" , &N ) != 1 )
-	                error( "Error parsing number of particles." );
-	            break;
-            case 'r':
-                if ( sscanf( optarg , "%d" , &runs ) != 1 )
-                    error( "Error parsing number of runs." );
-                break;
-	        case 't':
-	            if ( sscanf( optarg , "%d" , &nr_threads ) != 1 )
-	                error( "Error parsing number of threads." );
-	            omp_set_num_threads( nr_threads );
-	            break;
-	        case '?':
-                fprintf( stderr , "Usage: %s [-t nr_threads] [-n N] [-r runs]\n" , argv[0] );
-                fprintf( stderr , "Solves the N-body problem using a Barnes-Hutt\n"
-                                  "tree code with N random particles in [0,1]^3 using\n"
-                                  "nr_threads threads.\n" );
-	            exit( EXIT_FAILURE );
-	        }
-            
-    /* Dump arguments. */
-    message( "Computing the N-body problem over %i particles using %i threads (%i runs)." ,
-        N , nr_threads , runs );
-        
-    /* Run the test. */
-    test_bh( N , nr_threads, runs );
-    
-    return 0;
-    
+int main(int argc, char *argv[]) {
+
+  int c, nr_threads;
+  int N = 1000, runs = 1;
+  char fileName[100] = {0};
+
+  /* Die on FP-exceptions. */
+  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+
+/* Get the number of threads. */
+#pragma omp parallel shared(nr_threads)
+  {
+    if (omp_get_thread_num() == 0) nr_threads = omp_get_num_threads();
+  }
+
+  /* Parse the options */
+  while ((c = getopt(argc, argv, "n:r:t:f:c:i:")) != -1) switch (c) {
+      case 'n':
+        if (sscanf(optarg, "%d", &N) != 1)
+          error("Error parsing number of particles.");
+        break;
+      case 'r':
+        if (sscanf(optarg, "%d", &runs) != 1)
+          error("Error parsing number of runs.");
+        break;
+      case 't':
+        if (sscanf(optarg, "%d", &nr_threads) != 1)
+          error("Error parsing number of threads.");
+        omp_set_num_threads(nr_threads);
+        break;
+      case 'f':
+        if (sscanf(optarg, "%s", &fileName[0]) != 1)
+          error("Error parsing file name.");
+        break;
+      case '?':
+        fprintf(stderr, "Usage: %s [-t nr_threads] [-n N] [-r runs] [-f file] "
+                        "[-c Nparts] [-i Niterations] \n",
+                argv[0]);
+        fprintf(stderr, "Solves the N-body problem using a Barnes-Hut\n"
+                        "tree code with N random particles read from a file in "
+                        "[0,1]^3 using"
+                        "nr_threads threads.\n"
+                        "A test of the neighbouring cells interaction with "
+                        "Nparts per cell is also run Niterations times.\n");
+        exit(EXIT_FAILURE);
     }
-    
-    
+
+  /* Tree node information */
+  printf("Size of cell: %zu bytes.\n", sizeof(struct cell));
+
+  /* Part information */
+  printf("Size of part: %zu bytes.\n", sizeof(struct part));
+
+  /* Dump arguments. */
+  if (fileName[0] == 0) {
+    message("Computing the N-body problem over %i random particles using %i "
+            "threads (%i runs).",
+            N, nr_threads, runs);
+  } else {
+    message("Computing the N-body problem over %i particles read from '%s' "
+            "using %i threads (%i runs).",
+            N, fileName, nr_threads, runs);
+  }
+
+  /* Run the BH test. */
+  test_bh(N, nr_threads, runs, fileName);
+
+  return 0;
+}
diff --git a/examples/test_bh.cu b/examples/test_bh.cu
index f269cd47bf8de54327dbac53a1acef6fbef05821..cfc5af5e9802a1a85e1431b793c78eee7d4b5a73 100644
--- a/examples/test_bh.cu
+++ b/examples/test_bh.cu
@@ -128,12 +128,12 @@ __device__ struct part *parts;
 
 
 #ifdef TODO
+__device__ double *part_x0;
 __device__ double *part_x1;
 __device__ double *part_x2;
-__device__ double *part_x3;
+__device__ float  *part_a0;
 __device__ float  *part_a1;
 __device__ float  *part_a2;
-__device__ float  *part_a3;
 __device__ float *part_mass;
 #endif
 
@@ -243,10 +243,10 @@ __device__ void comp_com_CUDA(struct cell *c) {
   } else {
 
     for (k = threadIdx.x; k < count; k+= blockDim.x) {
-      float p_mass = parts[k].mass;
-      com[0] += parts[k].x[0] * p_mass;
-      com[1] += parts[k].x[1] * p_mass;
-      com[2] += parts[k].x[2] * p_mass;
+      float p_mass = part_mass[k];
+      com[0] += part_x0[k] * p_mass;
+      com[1] += part_x1[k] * p_mass;
+      com[2] += part_x2[k] * p_mass;
       mass += p_mass;
     }
     reduceSumDouble(com);
@@ -308,11 +308,12 @@ __device__ __forceinline__ int are_neighbours_CUDA(struct cell *ci, struct cell
  * @param ci The #cell containing the particles.
  * @param cj The #cell containing the other particles
  */
-__device__ __forceinline__ void iact_pair_direct_CUDA(struct cell *ci, struct cell *cj)
+__device__ __forceinline__ void iact_pair_direct_CUDA(struct cell_device *ci, struct cell_device *cj)
 {
     int i, j, k;
     int count_i = ci->count, count_j = cj->count;
-    struct part *parts_i = ci->parts, *parts_j = cj->parts;
+    //struct part *parts_i = ci->parts, *parts_j = cj->parts;
+    int parts_it = ci->part_loc, parts_jt = cj->part_loc;
     double xi[3];
     float dx[3], ai[3], r2, w, ir;
 
@@ -328,8 +329,10 @@ __device__ __forceinline__ void iact_pair_direct_CUDA(struct cell *ci, struct ce
     for(i=0; i < count_i; i+= blockDim.x)
     {
         /* Init the ith particle's data. */
+          xi[0] = part_x0[parts_it+i];
+          xi[1] = part_x1[parts_it+i];
+          xi[2] = part_x2[parts_it+i];
         for (k = 0; k < 3; k++) {
-          xi[k] = parts_i[i].x[k];
           ai[k] = 0.0f;
         }
      //   mi = parts_i[i].mass;
@@ -337,19 +340,24 @@ __device__ __forceinline__ void iact_pair_direct_CUDA(struct cell *ci, struct ce
         for(j = 0; j < count_j; j++)
         {
             /* Compute the pairwise distance. */
-            for (r2 = 0.0, k = 0; k < 3; k++) {
-            dx[k] = xi[k] - parts_j[j].x[k];
-            r2 += dx[k] * dx[k];
-            }
+//            for (r2 = 0.0, k = 0; k < 3; k++) {
+            dx[0] = xi[0] - part_x0[parts_jt+j];
+            dx[1] = xi[1] - part_x1[parts_jt+j];
+            dx[2] = xi[2] - part_x2[parts_jt+j];
+            r2 = dx[0] * dx[0];
+            r2 += dx[1] * dx[1];
+            r2 += dx[2] * dx[2];
+  //          }
             /* Apply the gravitational acceleration. */
             ir = 1.0f / sqrtf(r2);
             w = const_G * ir * ir * ir;
             for(k = 0; k < 3; k++) {
-                ai[k] = w * dx[k] * parts_j[j].mass;
+                ai[k] += w * dx[k] * part_mass[parts_jt+j];;
             }
         }
-
-        for (k = 0; k < 3; k++) atomicAdd( &parts[i].a[k] , ai[k]);
+        atomicAdd(&parts_a0[parts_it+i], ai[0]);
+        atomicAdd(&parts_a1[parts_it+i], ai[1]);
+        atomicAdd(&parts_a2[parts_it+i], ai[2]);
     }/* Loop over particles in cell i.*/
 }
 
@@ -403,12 +411,13 @@ __device__ void iact_pair_CUDA(struct cell *ci, struct cell *cj) {
   }
 }
 
-__device__ void iact_self_direct_CUDA(struct cell *c)
+__device__ void iact_self_direct_CUDA(struct cell_device *c)
 {
     int i, j, k, count = c->count;
     double xi[3] = {0.0, 0.0, 0.0};
-    float ai[3] = {0.0, 0.0, 0.0}, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w;
+   // float ai[3] = {0.0, 0.0, 0.0}, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w;
     struct part *parts = c->parts;
+    int partst = c->part_loc;
     struct cell *cp, *cps;
     
 #ifdef SANITY_CHECKS
@@ -442,7 +451,7 @@ __device__ void iact_self_direct_CUDA(struct cell *c)
             }
 //            mi = parts[i].mass;
 
-           /* Loop all particles - note this was an improvement in quicksched so we use it here. */
+           /* Loop all particles - note this was an improvement in mdcore so we use it here. */
             for(j = 0; j < count; j++)
             {
                 /* Compute the pairwise distance. */
@@ -1045,13 +1054,6 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) {
       parts[k].a_legacy[0] = 0.0;
       parts[k].a_legacy[1] = 0.0;
       parts[k].a_legacy[2] = 0.0;
-      partsx0[k] = parts[k].x[0];
-      partsx1[k] = parts[k].x[1];
-      partsx2[k] = parts[k].x[2];
-      partsmass[k] = parts[k].mass;
-      partsa0[k] = 0.0f;
-      partsa1[k] = 0.0f;
-      partsa2[k] = 0.0f;
     }
 
     /* Otherwise, read them from a file. */
@@ -1104,6 +1106,17 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) {
   }
   message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves);
 
+  for(k = 0; k < N; k++)
+  {
+      partsx0[k] = parts[k].x[0];
+      partsx1[k] = parts[k].x[1];
+      partsx2[k] = parts[k].x[2];
+      partsmass[k] = parts[k].mass;
+      partsa0[k] = 0.0f;
+      partsa1[k] = 0.0f;
+      partsa2[k] = 0.0f;
+  }
+
 #if ICHECK > 0
   printf("----------------------------------------------------------\n");
 
@@ -1160,6 +1173,9 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) {
       parts[i].a[0] = 0.0;
       parts[i].a[1] = 0.0;
       parts[i].a[2] = 0.0;
+      partsa0[k] = 0.0f;
+      partsa1[k] = 0.0f;
+      partsa2[k] = 0.0f;
     }
     
     /* Execute the tasks. */
diff --git a/examples/test_bh_2.cu b/examples/test_bh_2.cu
index 534b9b38fa3ff18c2b1e1f33617585622a28fbb1..be5dcd4ba73a655bd6d43cb27befa0cdc06aaaf8 100644
--- a/examples/test_bh_2.cu
+++ b/examples/test_bh_2.cu
@@ -479,5 +479,113 @@ void test_bh(int N, int runs, char *fileName) {
 
     //Create host particle arrays.
     if( cudaMallocHost(&parts_pos_xy_host, sizeof(double2) * N) != cudaSuccess)
-        error("Failed to allocated parts array");
+        error("Failed to allocate parts array");
+    if( cudaMallocHost(&parts_pos_z_host, sizeof(double) * N) != cudaSuccess)
+        error("Failed to allocate parts array");
+    if( cudaMallocHost( &parts_a_m_host, sizeof(float4) * N) != cudaSuccess )
+        error("Failed to allocate parts array");
+
+    if(fileName == NULL || fileName[0] == 0) {
+        for(k = 0; k < N; k++) {
+            parts_pos_xy_host.x = ((double)rand())/ RAND_MAX;
+            parts_pos_xy_host.y = ((double)rand())/ RAND_MAX;
+            parts_pos_z_host = ((double)rand())/ RAND_MAX;
+            parts_a_m_host.w = ((double)rand()) / RAND_MAX;
+        }
+    
+    } else {
+        file = fopen(fileName, "r");
+        if(file) {
+            for(k = 0; k < N; k++) {
+                if(fscanf(file, "%lf%lf%lf", &parts_pos_xy_host.x, &parts_pos_xy_host.y, &parts_pos_z_host) != 3)
+                    error("Failed to read position of part %i.", k);
+                if(fscanf(file, "%f", &parts[k].mass != 1)
+                    error("Failed to read mass of part %i.", k);
+            }
+            fclose(file);
+        }
+    }
+
+    /* Init the cells. */
+    root = cell_get();
+    root->loc_xy.x = 0.0;
+    root->loc_xy.y = 0.0;
+    root->loc_z = 0.0;
+    root->h = 1.0;
+    root->count = N;
+    root->parts = 0;
+    cell_split(root, &s);
+
+    int c = root - cell_pool;
+    int nr_leaves = 0;
+    while(c >= 0) {
+        if(!cell_pool[c].split) {
+            nr_leaves++;
+            c = c->sibling;
+        } else {
+            c = c->firstchild;
+        }
+    }
+
+    message("Average number of parts per leaf is %lf.", ((double)N) / ((double)nr_leaves));
+    
+}
+
+
+int main(int argc, char *argv[]) {
+    int c, nr_threads;
+    int N = 1000, runs = 1;
+    char fileName[100] = {0};
+
+    /* Parse the options */
+  while ((c = getopt(argc, argv, "n:r:t:f:c:i:")) != -1) switch (c) {
+      case 'n':
+        if (sscanf(optarg, "%d", &N) != 1)
+          error("Error parsing number of particles.");
+        break;
+      case 'r':
+        if (sscanf(optarg, "%d", &runs) != 1)
+          error("Error parsing number of runs.");
+        break;
+      case 't':
+        if (sscanf(optarg, "%d", &nr_threads) != 1)
+          error("Error parsing number of threads.");
+//        omp_set_num_threads(nr_threads);
+        break;
+      case 'f':
+        if (sscanf(optarg, "%s", &fileName[0]) != 1)
+          error("Error parsing file name.");
+        break;
+      case '?':
+        fprintf(stderr, "Usage: %s [-t nr_threads] [-n N] [-r runs] [-f file] "
+                        "[-c Nparts] [-i Niterations] \n",
+                argv[0]);
+        fprintf(stderr, "Solves the N-body problem using a Barnes-Hut\n"
+                        "tree code with N random particles read from a file in "
+                        "[0,1]^3 using"
+                        "nr_threads threads.\n"
+                        "A test of the neighbouring cells interaction with "
+                        "Nparts per cell is also run Niterations times.\n");
+        exit(EXIT_FAILURE);
+    }
+
+  /* Tree node information */
+  printf("Size of cell: %zu bytes.\n", sizeof(struct cell));
+
+  /* Part information */
+  printf("Size of part: %zu bytes.\n", sizeof(struct part));
+
+  /* Dump arguments. */
+  if (fileName[0] == 0) {
+    message("Computing the N-body problem over %i random particles using %i "
+            "threads (%i runs).",
+            N, nr_threads, runs);
+  } else {
+    message("Computing the N-body problem over %i particles read from '%s' "
+            "using %i threads (%i runs).",
+            N, fileName, nr_threads, runs);
+  }
+
+  /* Run the BH test. */
+  test_bh(N, nr_threads, runs, fileName);
 }
diff --git a/src/Makefile.in b/src/Makefile.in
deleted file mode 100644
index 4a3e029bb2bceef56d7fcdaf5167ff8d03c78f52..0000000000000000000000000000000000000000
--- a/src/Makefile.in
+++ /dev/null
@@ -1,659 +0,0 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-# This file is part of Quickqsched.
-# Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
-# 
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-# 
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-
-# Build a CUDA-enabled version too?
-@HAVE_CUDA_TRUE@am__append_1 = libquicksched_cuda.la
-subdir = src
-DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \
-	$(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
-	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
-	$(top_srcdir)/m4/ax_check_compiler_flags.m4 \
-	$(top_srcdir)/m4/ax_ext.m4 \
-	$(top_srcdir)/m4/ax_func_posix_memalign.m4 \
-	$(top_srcdir)/m4/ax_gcc_archflag.m4 \
-	$(top_srcdir)/m4/ax_gcc_x86_cpuid.m4 \
-	$(top_srcdir)/m4/ax_openmp.m4 \
-	$(top_srcdir)/m4/ax_prog_doxygen.m4 \
-	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
-	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
-	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.in
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
-	$(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
-    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
-    *) f=$$p;; \
-  esac;
-am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-am__install_max = 40
-am__nobase_strip_setup = \
-  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
-am__nobase_strip = \
-  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-am__nobase_list = $(am__nobase_strip_setup); \
-  for p in $$list; do echo "$$p $$p"; done | \
-  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
-  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
-    if (++n[$$2] == $(am__install_max)) \
-      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
-    END { for (dir in files) print dir, files[dir] }'
-am__base_list = \
-  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
-  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"
-LTLIBRARIES = $(lib_LTLIBRARIES)
-libquicksched_la_LIBADD =
-am_libquicksched_la_OBJECTS = qsched.lo queue.lo
-libquicksched_la_OBJECTS = $(am_libquicksched_la_OBJECTS)
-libquicksched_cuda_la_LIBADD =
-am__libquicksched_cuda_la_SOURCES_DIST = qsched.c queue.c \
-	cuda_queue.cu
-@HAVE_CUDA_TRUE@am__objects_1 = libquicksched_cuda_la-qsched.lo \
-@HAVE_CUDA_TRUE@	libquicksched_cuda_la-queue.lo cuda_queue.lo
-@HAVE_CUDA_TRUE@am_libquicksched_cuda_la_OBJECTS = $(am__objects_1)
-libquicksched_cuda_la_OBJECTS = $(am_libquicksched_cuda_la_OBJECTS)
-libquicksched_cuda_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
-	$(LIBTOOLFLAGS) --mode=link $(CCLD) \
-	$(libquicksched_cuda_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
-	$(LDFLAGS) -o $@
-@HAVE_CUDA_TRUE@am_libquicksched_cuda_la_rpath = -rpath $(libdir)
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
-depcomp = $(SHELL) $(top_srcdir)/depcomp
-am__depfiles_maybe = depfiles
-am__mv = mv -f
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
-	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
-	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-CCLD = $(CC)
-LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
-	$(LDFLAGS) -o $@
-SOURCES = $(libquicksched_la_SOURCES) $(libquicksched_cuda_la_SOURCES)
-DIST_SOURCES = $(libquicksched_la_SOURCES) \
-	$(am__libquicksched_cuda_la_SOURCES_DIST)
-HEADERS = $(include_HEADERS)
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-CC = @CC@
-CCDEPMODE = @CCDEPMODE@
-CFLAGS = @CFLAGS@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CUDA_CFLAGS = @CUDA_CFLAGS@
-CUDA_LIBS = @CUDA_LIBS@
-CYGPATH_W = @CYGPATH_W@
-DEFS = @DEFS@
-DEPDIR = @DEPDIR@
-DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-DX_CONFIG = @DX_CONFIG@
-DX_DOCDIR = @DX_DOCDIR@
-DX_DOT = @DX_DOT@
-DX_DOXYGEN = @DX_DOXYGEN@
-DX_DVIPS = @DX_DVIPS@
-DX_EGREP = @DX_EGREP@
-DX_ENV = @DX_ENV@
-DX_FLAG_chi = @DX_FLAG_chi@
-DX_FLAG_chm = @DX_FLAG_chm@
-DX_FLAG_doc = @DX_FLAG_doc@
-DX_FLAG_dot = @DX_FLAG_dot@
-DX_FLAG_html = @DX_FLAG_html@
-DX_FLAG_man = @DX_FLAG_man@
-DX_FLAG_pdf = @DX_FLAG_pdf@
-DX_FLAG_ps = @DX_FLAG_ps@
-DX_FLAG_rtf = @DX_FLAG_rtf@
-DX_FLAG_xml = @DX_FLAG_xml@
-DX_HHC = @DX_HHC@
-DX_LATEX = @DX_LATEX@
-DX_MAKEINDEX = @DX_MAKEINDEX@
-DX_PDFLATEX = @DX_PDFLATEX@
-DX_PERL = @DX_PERL@
-DX_PROJECT = @DX_PROJECT@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FGREP = @FGREP@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAKEINFO = @MAKEINFO@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-NVCC = @NVCC@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OPENMP_CFLAGS = @OPENMP_CFLAGS@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PRTDIAG = @PRTDIAG@
-PTHREAD_CC = @PTHREAD_CC@
-PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
-PTHREAD_LIBS = @PTHREAD_LIBS@
-RANLIB = @RANLIB@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-SIMD_FLAGS = @SIMD_FLAGS@
-STRIP = @STRIP@
-VERSION = @VERSION@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-acx_pthread_config = @acx_pthread_config@
-am__include = @am__include@
-am__leading_dot = @am__leading_dot@
-am__quote = @am__quote@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-lt_ECHO = @lt_ECHO@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-
-# Automake stuff
-AUTOMAKE_OPTIONS = gnu
-
-# Add the debug flag to the whole thing
-AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
-    -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) -DTIMERS \
-     #-fsanitize=address -fno-omit-frame-pointer
-
-
-# Assign a "safe" version number
-AM_LDFLAGS = -version-info 0:0:0
-
-# Build the libquicksched library
-lib_LTLIBRARIES = libquicksched.la $(am__append_1)
-libquicksched_la_SOURCES = qsched.c queue.c
-
-# List required headers
-include_HEADERS = atomic.h lock.h queue.h qsched.h task.h res.h error.h qsched.h
-@HAVE_CUDA_FALSE@SOURCES_CUDA = 
-
-# CUDA sources 
-@HAVE_CUDA_TRUE@SOURCES_CUDA = qsched.c queue.c cuda_queue.cu
-@HAVE_CUDA_TRUE@CUDA_MYFLAGS = -O3 -g -DCPU_TPS=3.1e9 -Xnvlink -rdc=true -lineinfo -src-in-ptx --maxrregcount=32 -Xptxas="-v" -Xptxas -dlcm=cg -gencode arch=compute_30,code=sm_30 -ftz=true -fmad=true -DFPTYPE_SINGLE -DWITH_CUDA #-fsanitize=address -fno-omit-frame-pointer
-@HAVE_CUDA_TRUE@libquicksched_cuda_la_SOURCES = $(SOURCES_CUDA)
-@HAVE_CUDA_TRUE@libquicksched_cuda_la_CFLAGS = -DFPTYPE_SINGLE $(AM_CFLAGS) -DWITH_CUDA $(CUDA_CFLAGS)
-all: all-am
-
-.SUFFIXES:
-.SUFFIXES: .c .cu .lo .o .obj
-$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
-	@for dep in $?; do \
-	  case '$(am__configure_deps)' in \
-	    *$$dep*) \
-	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
-	        && { if test -f $@; then exit 0; else break; fi; }; \
-	      exit 1;; \
-	  esac; \
-	done; \
-	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \
-	$(am__cd) $(top_srcdir) && \
-	  $(AUTOMAKE) --gnu src/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
-	@case '$?' in \
-	  *config.status*) \
-	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
-	  *) \
-	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
-	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
-	esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
-	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure:  $(am__configure_deps)
-	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
-	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-install-libLTLIBRARIES: $(lib_LTLIBRARIES)
-	@$(NORMAL_INSTALL)
-	test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
-	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
-	list2=; for p in $$list; do \
-	  if test -f $$p; then \
-	    list2="$$list2 $$p"; \
-	  else :; fi; \
-	done; \
-	test -z "$$list2" || { \
-	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
-	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
-	}
-
-uninstall-libLTLIBRARIES:
-	@$(NORMAL_UNINSTALL)
-	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
-	for p in $$list; do \
-	  $(am__strip_dir) \
-	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
-	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
-	done
-
-clean-libLTLIBRARIES:
-	-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
-	@list='$(lib_LTLIBRARIES)'; for p in $$list; do \
-	  dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
-	  test "$$dir" != "$$p" || dir=.; \
-	  echo "rm -f \"$${dir}/so_locations\""; \
-	  rm -f "$${dir}/so_locations"; \
-	done
-libquicksched.la: $(libquicksched_la_OBJECTS) $(libquicksched_la_DEPENDENCIES) 
-	$(LINK) -rpath $(libdir) $(libquicksched_la_OBJECTS) $(libquicksched_la_LIBADD) $(LIBS)
-libquicksched_cuda.la: $(libquicksched_cuda_la_OBJECTS) $(libquicksched_cuda_la_DEPENDENCIES) 
-	$(libquicksched_cuda_la_LINK) $(am_libquicksched_cuda_la_rpath) $(libquicksched_cuda_la_OBJECTS) $(libquicksched_cuda_la_LIBADD) $(LIBS)
-
-mostlyclean-compile:
-	-rm -f *.$(OBJEXT)
-
-distclean-compile:
-	-rm -f *.tab.c
-
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libquicksched_cuda_la-qsched.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libquicksched_cuda_la-queue.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/qsched.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/queue.Plo@am__quote@
-
-.c.o:
-@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(COMPILE) -c $<
-
-.c.obj:
-@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`
-
-.c.lo:
-@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<
-
-libquicksched_cuda_la-qsched.lo: qsched.c
-@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -MT libquicksched_cuda_la-qsched.lo -MD -MP -MF $(DEPDIR)/libquicksched_cuda_la-qsched.Tpo -c -o libquicksched_cuda_la-qsched.lo `test -f 'qsched.c' || echo '$(srcdir)/'`qsched.c
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/libquicksched_cuda_la-qsched.Tpo $(DEPDIR)/libquicksched_cuda_la-qsched.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='qsched.c' object='libquicksched_cuda_la-qsched.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -c -o libquicksched_cuda_la-qsched.lo `test -f 'qsched.c' || echo '$(srcdir)/'`qsched.c
-
-libquicksched_cuda_la-queue.lo: queue.c
-@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -MT libquicksched_cuda_la-queue.lo -MD -MP -MF $(DEPDIR)/libquicksched_cuda_la-queue.Tpo -c -o libquicksched_cuda_la-queue.lo `test -f 'queue.c' || echo '$(srcdir)/'`queue.c
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/libquicksched_cuda_la-queue.Tpo $(DEPDIR)/libquicksched_cuda_la-queue.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='queue.c' object='libquicksched_cuda_la-queue.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -c -o libquicksched_cuda_la-queue.lo `test -f 'queue.c' || echo '$(srcdir)/'`queue.c
-
-mostlyclean-libtool:
-	-rm -f *.lo
-
-clean-libtool:
-	-rm -rf .libs _libs
-install-includeHEADERS: $(include_HEADERS)
-	@$(NORMAL_INSTALL)
-	test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)"
-	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
-	for p in $$list; do \
-	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
-	  echo "$$d$$p"; \
-	done | $(am__base_list) | \
-	while read files; do \
-	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \
-	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \
-	done
-
-uninstall-includeHEADERS:
-	@$(NORMAL_UNINSTALL)
-	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
-	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
-	test -n "$$files" || exit 0; \
-	echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \
-	cd "$(DESTDIR)$(includedir)" && rm -f $$files
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
-	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
-	unique=`for i in $$list; do \
-	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	mkid -fID $$unique
-tags: TAGS
-
-TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
-		$(TAGS_FILES) $(LISP)
-	set x; \
-	here=`pwd`; \
-	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
-	unique=`for i in $$list; do \
-	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	shift; \
-	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
-	  test -n "$$unique" || unique=$$empty_fix; \
-	  if test $$# -gt 0; then \
-	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	      "$$@" $$unique; \
-	  else \
-	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	      $$unique; \
-	  fi; \
-	fi
-ctags: CTAGS
-CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
-		$(TAGS_FILES) $(LISP)
-	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
-	unique=`for i in $$list; do \
-	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	test -z "$(CTAGS_ARGS)$$unique" \
-	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
-	     $$unique
-
-GTAGS:
-	here=`$(am__cd) $(top_builddir) && pwd` \
-	  && $(am__cd) $(top_srcdir) \
-	  && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
-	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
-	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-	list='$(DISTFILES)'; \
-	  dist_files=`for file in $$list; do echo $$file; done | \
-	  sed -e "s|^$$srcdirstrip/||;t" \
-	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
-	case $$dist_files in \
-	  */*) $(MKDIR_P) `echo "$$dist_files" | \
-			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
-			   sort -u` ;; \
-	esac; \
-	for file in $$dist_files; do \
-	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
-	  if test -d $$d/$$file; then \
-	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
-	    if test -d "$(distdir)/$$file"; then \
-	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-	    fi; \
-	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
-	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
-	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-	    fi; \
-	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
-	  else \
-	    test -f "$(distdir)/$$file" \
-	    || cp -p $$d/$$file "$(distdir)/$$file" \
-	    || exit 1; \
-	  fi; \
-	done
-check-am: all-am
-check: check-am
-all-am: Makefile $(LTLIBRARIES) $(HEADERS)
-installdirs:
-	for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \
-	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
-	done
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
-	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
-	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	  `test -z '$(STRIP)' || \
-	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
-	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
-	@echo "This command is intended for maintainers to use"
-	@echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
-	mostlyclean-am
-
-distclean: distclean-am
-	-rm -rf ./$(DEPDIR)
-	-rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
-	distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am: install-includeHEADERS
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am: install-libLTLIBRARIES
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
-	-rm -rf ./$(DEPDIR)
-	-rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
-	mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES
-
-.MAKE: install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
-	clean-libLTLIBRARIES clean-libtool ctags distclean \
-	distclean-compile distclean-generic distclean-libtool \
-	distclean-tags distdir dvi dvi-am html html-am info info-am \
-	install install-am install-data install-data-am install-dvi \
-	install-dvi-am install-exec install-exec-am install-html \
-	install-html-am install-includeHEADERS install-info \
-	install-info-am install-libLTLIBRARIES install-man install-pdf \
-	install-pdf-am install-ps install-ps-am install-strip \
-	installcheck installcheck-am installdirs maintainer-clean \
-	maintainer-clean-generic mostlyclean mostlyclean-compile \
-	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
-	tags uninstall uninstall-am uninstall-includeHEADERS \
-	uninstall-libLTLIBRARIES
-
-@HAVE_CUDA_TRUE@.cu: qsched.c queue.c
-@HAVE_CUDA_TRUE@.cu.o:
-@HAVE_CUDA_TRUE@	$(NVCC) -c $(NVCCFLAGS) $(CUDA_CFLAGS) $(CUDA_MYFLAGS) $< -o $@
-@HAVE_CUDA_TRUE@.cu.lo:
-@HAVE_CUDA_TRUE@	$(top_srcdir)/cudalt.py $@ $(NVCC) -c $(NVCCFLAGS) $(CUDA_CFLAGS) $(CUDA_MYFLAGS) $<
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT: