diff --git a/examples/test_bh_new.c b/examples/test_bh_new.c
index 0fabd158cbaaf8cd5e53a03e2967ab4d57103ef2..2533cff9a9994a75b4863ed119ad5702f7528482 100644
--- a/examples/test_bh_new.c
+++ b/examples/test_bh_new.c
@@ -38,13 +38,13 @@
 /* Some local constants. */
 #define cell_pool_grow 1000
 #define cell_maxparts 128
-#define task_limit 1
+#define task_limit 1e8
 #define const_G 1    // 6.6738e-8
 #define dist_min 0.5 /* Used for legacy walk only */
 #define dist_cutoff_ratio 1.5
 
 #define ICHECK -1
-#define SANITY_CHECKS
+#define NO_SANITY_CHECKS
 #define NO_COM_AS_TASK
 #define NO_COUNTERS
 #define QUADRUPOLES
@@ -70,7 +70,12 @@ struct part_local {
 struct multipole {
 
 #ifdef QUADRUPOLES
-  double I[3][3];
+  double I_xx;
+  double I_yy;
+  double I_zz;
+  double I_xy;
+  double I_xz;
+  double I_yz;
 #endif
 
   double com[3];
@@ -80,7 +85,12 @@ struct multipole {
 struct multipole_local {
 
 #ifdef QUADRUPOLES
-  float I[3][3];
+  float I_xx;
+  float I_yy;
+  float I_zz;
+  float I_xy;
+  float I_xz;
+  float I_yz;
 #endif
 
   float com[3];
@@ -114,7 +124,7 @@ struct cell {
   int res, com_tid;
   struct index *indices;
 
-};// __attribute__((aligned(256)));
+} __attribute__((aligned(64)));
 
 /** Task types. */
 enum task_type {
@@ -182,10 +192,15 @@ void comp_com(struct cell *c) {
   int k, count = c->count;
   struct cell *cp;
   struct part *parts = c->parts;
-  double com[3] = {0.0, 0.0, 0.0}, mass = 0.0;
+  double com[3] = {0.0, 0.0, 0.0}, mass = 0.0, imass = 0.0;
 
 #ifdef QUADRUPOLES
-  double I[3][3] = {{0.0, 0.0, 0.0} , {0.0, 0.0, 0.0} , {0.0, 0.0, 0.0} };
+  double I_xx = 0.;
+  double I_yy = 0.;
+  double I_zz = 0.;
+  double I_xy = 0.;
+  double I_xz = 0.;
+  double I_yz = 0.;
 #endif
 
 #ifdef SANITY_CHECKS
@@ -204,26 +219,16 @@ void comp_com(struct cell *c) {
       mass += cp_mass;
 
 #ifdef QUADRUPOLES
-      I[0][0] += cp->new.I[0][0];
-      I[0][1] += cp->new.I[0][1];
-      I[0][2] += cp->new.I[0][2];
-      I[1][0] += cp->new.I[1][0];
-      I[1][1] += cp->new.I[1][1];
-      I[1][2] += cp->new.I[1][2];
-      I[2][0] += cp->new.I[2][0];
-      I[2][1] += cp->new.I[2][1];
-      I[2][2] += cp->new.I[2][2];
+      I_xx += cp->new.I_xx;
+      I_yy += cp->new.I_yy;
+      I_zz += cp->new.I_zz;
+      I_xy += cp->new.I_xy;
+      I_xz += cp->new.I_xz;
+      I_yz += cp->new.I_yz;
 #endif 
 
     }
 
-    /* Final operation for the COM */
-    float imass = 1.0f / mass;
-    com[0] *= imass;
-    com[1] *= imass;
-    com[2] *= imass;
-
-
   /* Otherwise, collect the multipole from the particles. */
   } else {
 
@@ -235,43 +240,34 @@ void comp_com(struct cell *c) {
       mass += p_mass;
 
 #ifdef QUADRUPOLES
-      I[0][0] += parts[k].x[0] * parts[k].x[0] * p_mass;
-      I[0][1] += parts[k].x[0] * parts[k].x[1] * p_mass;
-      I[0][2] += parts[k].x[0] * parts[k].x[2] * p_mass;
-      I[1][0] += parts[k].x[1] * parts[k].x[0] * p_mass;
-      I[1][1] += parts[k].x[1] * parts[k].x[1] * p_mass;
-      I[1][2] += parts[k].x[1] * parts[k].x[2] * p_mass;
-      I[2][0] += parts[k].x[2] * parts[k].x[0] * p_mass;
-      I[2][1] += parts[k].x[2] * parts[k].x[1] * p_mass;
-      I[2][2] += parts[k].x[2] * parts[k].x[2] * p_mass;
+      I_xx += parts[k].x[0] * parts[k].x[0] * p_mass;
+      I_yy += parts[k].x[1] * parts[k].x[1] * p_mass;
+      I_zz += parts[k].x[2] * parts[k].x[2] * p_mass;
+      I_xy += parts[k].x[0] * parts[k].x[1] * p_mass;
+      I_xz += parts[k].x[0] * parts[k].x[2] * p_mass;
+      I_yz += parts[k].x[1] * parts[k].x[2] * p_mass;
 #endif 
 
     }
 
-    /* Final operation for the COM */
-    float imass = 1.0f / mass;
-    com[0] *= imass;
-    com[1] *= imass;
-    com[2] *= imass;
   }
 
   /* Store the multipole data */
+  imass = 1.0f / mass;
   c->new.mass = mass;
-  c->new.com[0] = com[0];
-  c->new.com[1] = com[1];
-  c->new.com[2] = com[2];
+  c->new.com[0] = com[0] * imass;
+  c->new.com[1] = com[1] * imass;
+  c->new.com[2] = com[2] * imass;
 
 
 #ifdef QUADRUPOLES
-  c->new.I[0][0] = I[0][0];
-  c->new.I[0][1] = I[0][1];
-  c->new.I[0][2] = I[0][2];
-  c->new.I[1][0] = I[1][0];
-  c->new.I[1][1] = I[1][1];
-  c->new.I[1][2] = I[1][2];
-  c->new.I[2][0] = I[2][0];
-  c->new.I[2][1] = I[2][1];
-  c->new.I[2][2] = I[2][2];
+  /* Store the quadrupole data */
+  c->new.I_xx = I_xx;
+  c->new.I_yy = I_yy;
+  c->new.I_zz = I_zz;
+  c->new.I_xy = I_xy;
+  c->new.I_xz = I_xz;
+  c->new.I_yz = I_yz;
 #endif
 
 }
@@ -524,12 +520,13 @@ static inline void make_interact_pc(struct part_local *parts, int part_count,
 
 #ifndef QUADRUPOLES
 
-	parts[i].a[0] += mrinv3 * dx[0];
-	parts[i].a[1] += mrinv3 * dx[1];
-	parts[i].a[2] += mrinv3 * dx[2];
+      parts[i].a[0] += mrinv3 * dx[0];
+      parts[i].a[1] += mrinv3 * dx[1];
+      parts[i].a[2] += mrinv3 * dx[2];
 
 #else
-      
+
+      /* Follows the notation in Bonsai */
       float mrinv5 = mrinv3 * ir * ir;
       float mrinv7 = mrinv5 * ir * ir;
 
@@ -537,10 +534,10 @@ static inline void make_interact_pc(struct part_local *parts, int part_count,
       float D2 =  3.f * mrinv5;
       float D3 = -15.f * mrinv7;
 
-      float q = mults[j].I[0][0] + mults[j].I[1][1] + mults[j].I[2][2];
-      float qRx = mults[j].I[0][0]*dx[0] + mults[j].I[0][1]*dx[1] + mults[j].I[0][2]*dx[2];
-      float qRy = mults[j].I[1][0]*dx[0] + mults[j].I[1][1]*dx[1] + mults[j].I[1][2]*dx[2];
-      float qRz = mults[j].I[2][0]*dx[0] + mults[j].I[2][1]*dx[1] + mults[j].I[2][2]*dx[2];
+      float q = mults[j].I_xx + mults[j].I_yy + mults[j].I_zz;
+      float qRx = mults[j].I_xx*dx[0] + mults[j].I_xy*dx[1] + mults[j].I_xz*dx[2];
+      float qRy = mults[j].I_xy*dx[0] + mults[j].I_yy*dx[1] + mults[j].I_yz*dx[2];
+      float qRz = mults[j].I_xz*dx[0] + mults[j].I_yz*dx[1] + mults[j].I_zz*dx[2];
       float qRR = qRx * dx[0] + qRy * dx[1] + qRz * dx[2];
       float C = D1 + 0.5f * D2 * q + 0.5f * D3 * qRR;
 
@@ -548,11 +545,10 @@ static inline void make_interact_pc(struct part_local *parts, int part_count,
       parts[i].a[1] -= C * dx[1] + D2 * qRy;
       parts[i].a[2] -= C * dx[2] + D2 * qRz;
 
-      /* message(" %f %f %f", C * dx[0] + D2 * qRx, C * dx[1] + D2 * qRy, C * dx[2] + D2 * qRz); */
 
 #endif
 
-    } /* loop over every multipoles. */
+    } /* loop over every multipole. */
   } /* loop over every particle. */
 }
 
@@ -615,15 +611,12 @@ static inline void iact_pair_pc(struct cell *ci, struct cell *cj) {
 
 	/* Final operation to get the quadrupoles */
 	double imass = 1. / cps->new.mass;
-	mult_local[mult_count].I[0][0] = cps->new.I[0][0]*imass - cps->new.com[0] * cps->new.com[0];
-	mult_local[mult_count].I[0][1] = cps->new.I[0][1]*imass - cps->new.com[0] * cps->new.com[1];
-	mult_local[mult_count].I[0][2] = cps->new.I[0][2]*imass - cps->new.com[0] * cps->new.com[2];
-	mult_local[mult_count].I[1][0] = cps->new.I[1][0]*imass - cps->new.com[1] * cps->new.com[0];
-	mult_local[mult_count].I[1][1] = cps->new.I[1][1]*imass - cps->new.com[1] * cps->new.com[1];
-	mult_local[mult_count].I[1][2] = cps->new.I[1][2]*imass - cps->new.com[1] * cps->new.com[2];
-	mult_local[mult_count].I[2][0] = cps->new.I[2][0]*imass - cps->new.com[2] * cps->new.com[0];
-	mult_local[mult_count].I[2][1] = cps->new.I[2][1]*imass - cps->new.com[2] * cps->new.com[1];
-	mult_local[mult_count].I[2][2] = cps->new.I[2][2]*imass - cps->new.com[2] * cps->new.com[2];
+	mult_local[mult_count].I_xx = cps->new.I_xx*imass - cps->new.com[0] * cps->new.com[0];
+	mult_local[mult_count].I_xy = cps->new.I_xy*imass - cps->new.com[0] * cps->new.com[1];
+	mult_local[mult_count].I_xz = cps->new.I_xz*imass - cps->new.com[0] * cps->new.com[2];
+	mult_local[mult_count].I_yy = cps->new.I_yy*imass - cps->new.com[1] * cps->new.com[1];
+	mult_local[mult_count].I_yz = cps->new.I_yz*imass - cps->new.com[1] * cps->new.com[2];
+	mult_local[mult_count].I_zz = cps->new.I_zz*imass - cps->new.com[2] * cps->new.com[2];
 #endif	
 
 	mult_count++;
@@ -1281,14 +1274,13 @@ void interact_exact(int N, struct part *parts, int monitor) {
  */
 void test_bh(int N, int nr_threads, int runs, char *fileName) {
 
-  int i,j, k;
+  int i, k;
   struct cell *root;
   struct part *parts;
   FILE *file;
   struct qsched s;
   ticks tic, toc_run, tot_setup = 0, tot_run = 0;
   int countMultipoles = 0, countPairs = 0, countCoMs = 0;
-  //  double I[3][3] = {{0., 0., 0.},{0., 0., 0.},{0., 0., 0.}};
 
   /* Runner function. */
   void runner(int type, void * data) {
@@ -1341,23 +1333,6 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) {
       parts[k].a_legacy[1] = 0.0;
       parts[k].a_legacy[2] = 0.0;
     }
-    j++;
-    /* for (i = 0; i < 4; i++) { */
-    /*   for (j = 0; j < 4; j++) { */
-    /* 	for (k = 0; k < 4; k++) { */
-    /* 	  int l = 16*i + 4*j + k; */
-    /* 	  parts[l].id = l; */
-    /* 	  parts[l].x[0] = 0.125 + 0.25 * i;  */
-    /* 	  parts[l].x[1] = 0.125 + 0.25 * j; */
-    /* 	  parts[l].x[2] = 0.125 + 0.25 * k; */
-    /* 	  parts[l].mass = 1.; */
-    /* 	  parts[l].a_legacy[0] = 0.0; */
-    /* 	  parts[l].a_legacy[1] = 0.0; */
-    /* 	  parts[l].a_legacy[2] = 0.0; */
-
-    /* 	} */
-    /*   } */
-    /* } */
 
     /* Otherwise, read them from a file. */
   } else {
@@ -1401,7 +1376,7 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) {
   }
   message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves);
 
-#if 1 //ICHECK > 0
+#if ICHECK > 0
   printf("----------------------------------------------------------\n");
 
   /* Do a N^2 interactions calculation */
@@ -1482,42 +1457,6 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) {
     tot_run += toc_run - tic;
   }
 
-  /* for (k = 0; k < N; ++k) { */
-  /*   I[0][0] += parts[k].mass * parts[k].x[0] * parts[k].x[0]; */
-  /*   I[0][1] += parts[k].mass * parts[k].x[0] * parts[k].x[1]; */
-  /*   I[0][2] += parts[k].mass * parts[k].x[0] * parts[k].x[2]; */
-  /*   I[1][0] += parts[k].mass * parts[k].x[1] * parts[k].x[0]; */
-  /*   I[1][1] += parts[k].mass * parts[k].x[1] * parts[k].x[1]; */
-  /*   I[1][2] += parts[k].mass * parts[k].x[1] * parts[k].x[2]; */
-  /*   I[2][0] += parts[k].mass * parts[k].x[2] * parts[k].x[0]; */
-  /*   I[2][1] += parts[k].mass * parts[k].x[2] * parts[k].x[1];       */
-  /*   I[2][2] += parts[k].mass * parts[k].x[2] * parts[k].x[2]; */
-  /* } */
-
-  /* for (k = 0; k < N; ++k) { */
-  /*   I[0][0] = I[0][0] / root->new.mass - root->new.com[0]*root->new.com[0]; */
-  /*   I[0][1] = I[0][1] / root->new.mass - root->new.com[0]*root->new.com[1]; */
-  /*   I[0][2] = I[0][2] / root->new.mass - root->new.com[0]*root->new.com[2]; */
-  /*   I[1][0] = I[1][0] / root->new.mass - root->new.com[1]*root->new.com[0]; */
-  /*   I[1][1] = I[1][1] / root->new.mass - root->new.com[1]*root->new.com[1]; */
-  /*   I[1][2] = I[1][2] / root->new.mass - root->new.com[1]*root->new.com[2]; */
-  /*   I[2][0] = I[2][0] / root->new.mass - root->new.com[2]*root->new.com[0]; */
-  /*   I[2][1] = I[2][1] / root->new.mass - root->new.com[2]*root->new.com[1]; */
-  /*   I[2][2] = I[2][2] / root->new.mass - root->new.com[2]*root->new.com[2]; */
-      
-  /* } */
-
-  /* message("[check] root mass= %f %f", root->legacy.mass, root->new.mass); */
-  /* message("[check] root CoMx= %f %f", root->legacy.com[0], root->new.com[0]); */
-  /* message("[check] root CoMy= %f %f", root->legacy.com[1], root->new.com[1]); */
-  /* message("[check] root CoMz= %f %f", root->legacy.com[2], root->new.com[2]); */
-  /* message("   | %f %f %f |", root->new.I[0][0], root->new.I[0][1], root->new.I[0][2]); */
-  /* message("In=| %f %f %f |", root->new.I[1][0], root->new.I[1][1], root->new.I[1][2]); */
-  /* message("   | %f %f %f |", root->new.I[2][0], root->new.I[2][1], root->new.I[2][2]); */
-  /* message("------------------------------------------------") */
-  /* message("   | %f %f %f |", I[0][0], I[0][1], I[0][2]); */
-  /* message("I =| %f %f %f |", I[1][0], I[1][1], I[1][2]); */
-  /* message("   | %f %f %f |", I[2][0], I[2][1], I[2][2]); */
 
 #if ICHECK >= 0
   for (i = 0; i < N; ++i)