Implemented the field tensor calculation for FMM.

ea3deb8e · Matthieu Schaller · 6e5380ba · ea3deb8e · ea3deb8e
Commit ea3deb8e authored 10 years ago by Matthieu Schaller
--- a/examples/multipoles.h
+++ b/examples/multipoles.h
+
+
+struct multipole {
+
+#ifdef QUADRUPOLES
+  /* Quadrupole */
+  float M_200;
+  float M_020;
+  float M_002;
+  float M_110;
+  float M_101;
+  float M_011;
+#endif
+  /* CoM */
+  double M_100;
+  double M_010;
+  double M_001;
+ 
+  /* Mass */
+  float M_000;
+};
+
+
+struct fieldTensor {
+#ifdef QUADRUPOLES
+  float F_200;
+  float F_020;
+  float F_002;
+  float F_110;
+  float F_101;
+  float F_011;
+#endif
+  float F_100;
+  float F_010;
+  float F_001;
+
+  float F_000;
+};
+
+
+static inline void initFieldTensor(struct fieldTensor f) {
+  f.F_000 = 0.f;
+  f.F_100 = f.F_010 = f.F_001 = 0.f;
+#ifdef QUADRUPOLES
+  f.F_200 = f.F_020 = f.F_002 = 0.f;
+  f.F_110 = f.F_101 = f.F_011 = 0.f;
+#endif
+}
+
+
+/******************************
+ * Derivatives of the potential
+ * 
+ * All assume inv_r = 1/sqrt(r_x^2 + r_y^2 + r_z^2)
+ ******************************/
+
+/* 0-th order */
+static inline double D_000(double r_x, double r_y, double r_z, double inv_r) {  return  inv_r; }   // phi(r)
+
+/* 1-st order */
+static inline double D_100(double r_x, double r_y, double r_z, double inv_r) {  return -r_x * inv_r * inv_r * inv_r; }  // d/dx phi(r)
+static inline double D_010(double r_x, double r_y, double r_z, double inv_r) {  return -r_y * inv_r * inv_r * inv_r; }  // d/dy phi(r)
+static inline double D_001(double r_x, double r_y, double r_z, double inv_r) {  return -r_z * inv_r * inv_r * inv_r; }  // d/dz phi(r)
+
+/* 2-nd order */
+static inline double D_200(double r_x, double r_y, double r_z, double inv_r) {  // d2/dxdx phi(r)
+  
+  double inv_r2 = inv_r * inv_r;
+  double inv_r3 = inv_r * inv_r2;
+  double inv_r5 = inv_r3 * inv_r2;
+  return 3. * r_x * r_x * inv_r5 - inv_r3;
+}
+static inline double D_020(double r_x, double r_y, double r_z, double inv_r) {  // d2/dydy phi(r)
+  
+  double inv_r2 = inv_r * inv_r;
+  double inv_r3 = inv_r * inv_r2;
+  double inv_r5 = inv_r3 * inv_r2;
+  return 3. * r_y * r_y * inv_r5 - inv_r3;
+}
+static inline double D_002(double r_x, double r_y, double r_z, double inv_r) {  // d2/dzdz phi(r)
+  
+  double inv_r2 = inv_r * inv_r;
+  double inv_r3 = inv_r * inv_r2;
+  double inv_r5 = inv_r3 * inv_r2;
+  return 3. * r_z * r_z * inv_r5 - inv_r3;
+}
+
+static inline double D_110(double r_x, double r_y, double r_z, double inv_r) {  // d2/dxdy phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  return 3. * r_x * r_y * inv_r5;
+}
+
+static inline double D_101(double r_x, double r_y, double r_z, double inv_r) {  // d2/dxdz phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  return 3. * r_x * r_z * inv_r5;
+}
+
+static inline double D_011(double r_x, double r_y, double r_z, double inv_r) {  // d2/dydz phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  return 3. * r_y * r_z * inv_r5;
+}
+
+
+
+/* 3-rd order */
+static inline double D_300(double r_x, double r_y, double r_z, double inv_r) {  // d3/dxdxdx phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  double inv_r7 = inv_r5 * inv_r2;
+  return -15. * r_x * r_x * r_x * inv_r7 + 9. * r_x * inv_r5;
+}
+
+static inline double D_030(double r_x, double r_y, double r_z, double inv_r) {  // d3/dydydy phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  double inv_r7 = inv_r5 * inv_r2;
+  return -15. * r_y * r_y * r_y * inv_r7 + 9. * r_y * inv_r5;
+}
+
+static inline double D_003(double r_x, double r_y, double r_z, double inv_r) {  // d3/dzdzdz phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  double inv_r7 = inv_r5 * inv_r2;
+  return -15. * r_z * r_z * r_z * inv_r7 + 9. * r_z * inv_r5;
+}
+
+static inline double D_210(double r_x, double r_y, double r_z, double inv_r) {  // d3/dxdxdy phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  double inv_r7 = inv_r5 * inv_r2;
+  return -15. * r_x * r_x * r_y * inv_r7 + 3. * r_y * inv_r5;
+}
+
+static inline double D_201(double r_x, double r_y, double r_z, double inv_r) {  // d3/dxdxdz phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  double inv_r7 = inv_r5 * inv_r2;
+  return -15. * r_x * r_x * r_z * inv_r7 + 3. * r_z * inv_r5;
+}
+
+static inline double D_120(double r_x, double r_y, double r_z, double inv_r) {  // d3/dxdydy phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  double inv_r7 = inv_r5 * inv_r2;
+  return -15. * r_x * r_y * r_y * inv_r7 + 3. * r_x * inv_r5;
+}
+
+static inline double D_021(double r_x, double r_y, double r_z, double inv_r) {  // d3/dydydz phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  double inv_r7 = inv_r5 * inv_r2;
+  return -15. * r_y * r_y * r_z * inv_r7 + 3. * r_z * inv_r5;
+}
+
+static inline double D_102(double r_x, double r_y, double r_z, double inv_r) {  // d3/dxdzdz phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  double inv_r7 = inv_r5 * inv_r2;
+  return -15. * r_x * r_z * r_z * inv_r7 + 3. * r_x * inv_r5;
+}
+
+static inline double D_012(double r_x, double r_y, double r_z, double inv_r) {  // d3/dydzdz phi(r)
+
+  double inv_r2 = inv_r * inv_r;
+  double inv_r5 = inv_r2 * inv_r2 * inv_r;
+  double inv_r7 = inv_r5 * inv_r2;
+  return -15. * r_y * r_z * r_z * inv_r7 + 3. * r_y * inv_r5;
+}
+
+
+static inline double D_111(double r_x, double r_y, double r_z, double inv_r) {  // d3/dxdydz phi(r)
+
+  double inv_r3 = inv_r * inv_r * inv_r;
+  double inv_r7 = inv_r3 * inv_r3 * inv_r;
+  return -15. * r_x * r_z * r_z * inv_r7;
+}
+
+
+/************************************************
+ * Field tensor calculation for the accelerations
+ ************************************************/
+
+
+static inline void computeFieldTensors_x(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B)
+{
+
+  B.F_000 += A.M_000 * D_100(r_x, r_y, r_z, inv_r);
+
+#ifdef QUADRUPOLES
+  B.F_000 += A.M_200 * D_300(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_020 * D_120(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_002 * D_102(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_110 * D_210(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_011 * D_111(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_101 * D_201(r_x, r_y, r_z, inv_r);
+#endif
+  
+  B.F_100 += A.M_000 * D_200(r_x, r_y, r_z, inv_r);
+  B.F_010 += A.M_000 * D_110(r_x, r_y, r_z, inv_r);
+  B.F_001 += A.M_000 * D_101(r_x, r_y, r_z, inv_r);
+  
+#ifdef QUADRUPOLES
+  B.F_200 += A.M_000 * D_300(r_x, r_y, r_z, inv_r);
+  B.F_020 += A.M_000 * D_120(r_x, r_y, r_z, inv_r);
+  B.F_002 += A.M_000 * D_102(r_x, r_y, r_z, inv_r);
+  B.F_110 += A.M_000 * D_210(r_x, r_y, r_z, inv_r);
+  B.F_101 += A.M_000 * D_201(r_x, r_y, r_z, inv_r);
+  B.F_011 += A.M_000 * D_111(r_x, r_y, r_z, inv_r);
+#endif
+
+}
+
+static inline void computeFieldTensors_y(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B)
+{
+
+  B.F_000 += A.M_000 * D_010(r_x, r_y, r_z, inv_r);
+
+#ifdef QUADRUPOLES
+  B.F_000 += A.M_200 * D_210(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_020 * D_030(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_002 * D_012(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_110 * D_120(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_011 * D_021(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_101 * D_111(r_x, r_y, r_z, inv_r);
+#endif
+  
+  B.F_100 += A.M_000 * D_110(r_x, r_y, r_z, inv_r);
+  B.F_010 += A.M_000 * D_020(r_x, r_y, r_z, inv_r);
+  B.F_001 += A.M_000 * D_011(r_x, r_y, r_z, inv_r);
+  
+#ifdef QUADRUPOLES
+  B.F_200 += A.M_000 * D_210(r_x, r_y, r_z, inv_r);
+  B.F_020 += A.M_000 * D_030(r_x, r_y, r_z, inv_r);
+  B.F_002 += A.M_000 * D_012(r_x, r_y, r_z, inv_r);
+  B.F_110 += A.M_000 * D_120(r_x, r_y, r_z, inv_r);
+  B.F_101 += A.M_000 * D_111(r_x, r_y, r_z, inv_r);
+  B.F_011 += A.M_000 * D_021(r_x, r_y, r_z, inv_r);
+#endif
+
+}
+
+static inline void computeFieldTensors_z(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B)
+{
+
+  B.F_000 += A.M_000 * D_100(r_x, r_y, r_z, inv_r);
+
+#ifdef QUADRUPOLES
+  B.F_000 += A.M_200 * D_201(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_020 * D_021(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_002 * D_003(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_110 * D_111(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_011 * D_012(r_x, r_y, r_z, inv_r);
+  B.F_000 += A.M_101 * D_102(r_x, r_y, r_z, inv_r);
+#endif
+  
+  B.F_100 += A.M_000 * D_101(r_x, r_y, r_z, inv_r);
+  B.F_010 += A.M_000 * D_011(r_x, r_y, r_z, inv_r);
+  B.F_001 += A.M_000 * D_002(r_x, r_y, r_z, inv_r);
+  
+#ifdef QUADRUPOLES
+  B.F_200 += A.M_000 * D_201(r_x, r_y, r_z, inv_r);
+  B.F_020 += A.M_000 * D_021(r_x, r_y, r_z, inv_r);
+  B.F_002 += A.M_000 * D_003(r_x, r_y, r_z, inv_r);
+  B.F_110 += A.M_000 * D_111(r_x, r_y, r_z, inv_r);
+  B.F_101 += A.M_000 * D_102(r_x, r_y, r_z, inv_r);
+  B.F_011 += A.M_000 * D_012(r_x, r_y, r_z, inv_r);
+#endif
+
+}
--- a/examples/test_fmm.c
+++ b/examples/test_fmm.c
@@ -47,6 +47,10 @@
 #define SANITY_CHECKS
 #define COM_AS_TASK
 #define COUNTERS
+#define QUADRUPOLES
+
+#include "multipoles.h"
+

 /** Data structure for the particles. */
 struct part {
@@ -67,35 +71,10 @@ struct part_local {
  float mass;
 } __attribute__((aligned(32)));

-struct multipole {

-  /* Quadrupole */
-  double I_xx;
-  double I_yy;
-  double I_zz;
-  double I_xy;
-  double I_xz;
-  double I_yz;
-  
-  /* CoM */
-  double com[3];
+struct legacy_multipole {
  float mass;
-};
-
-
-struct fieldTensor {
-  double f_200;
-  double f_020;
-  double f_002;
-  double f_110;
-  double f_101;
-  double f_011;
-
-  double f_100;
-  double f_010;
-  double f_001;
-
-  float f_000;
+  double com[3];
 };

 /** Data structure for the BH tree cell. */
@@ -110,21 +89,15 @@ struct cell {
  struct cell *firstchild; /* Next node if opening */
  struct cell *sibling;    /* Next node */

-  /* We keep both CoMs and masses to make sure the comp_com calculation is
-   * correct (use an anonymous union to keep variable names compact).  */
-  union {
-
-    /* Information for the legacy walk */
-    struct multipole legacy;
-
-    /* Information for the QuickShed walk */
-    struct multipole new;
-  };
+  /* Information for the legacy walk */
+  struct legacy_multipole legacy;

-  struct fieldTensor field;
+  /* Information for the QuickShed walk */
+  struct multipole new;
+  struct fieldTensor field_x;
+  struct fieldTensor field_y;
+  struct fieldTensor field_z;
  
-  double a[3];
-
  int res, com_tid, down_tid;
  struct index *indices;

@@ -199,7 +172,9 @@ void comp_com(struct cell *c) {
  struct part *parts = c->parts;
  double com[3] = {0.0, 0.0, 0.0}, mass = 0.0;

-  double I_xx = 0., I_yy = 0., I_zz = 0.,  I_xy = 0., I_xz = 0., I_yz = 0.;
+#ifdef QUADRUPOLES
+  float M_200 = 0., M_020 = 0., M_002 = 0.,  M_110 = 0., M_101 = 0., M_011 = 0.;
+#endif

 #ifdef SANITY_CHECKS
  if ( count == 0 )
@@ -210,18 +185,19 @@ void comp_com(struct cell *c) {

    /* Loop over the projenitors and collect the multipole information. */
    for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
-      float cp_mass = cp->new.mass;
-      com[0] += cp->new.com[0] * cp_mass;
-      com[1] += cp->new.com[1] * cp_mass;
-      com[2] += cp->new.com[2] * cp_mass;
+      float cp_mass = cp->new.M_000;
+      com[0] += cp->new.M_100 * cp_mass;
+      com[1] += cp->new.M_010 * cp_mass;
+      com[2] += cp->new.M_001 * cp_mass;
      mass += cp_mass;
-
-      I_xx += cp->new.I_xx + cp_mass * cp->new.com[0] * cp->new.com[0];
-      I_yy += cp->new.I_yy + cp_mass * cp->new.com[1] * cp->new.com[1];
-      I_zz += cp->new.I_zz + cp_mass * cp->new.com[2] * cp->new.com[2];
-      I_xy += cp->new.I_xy + cp_mass * cp->new.com[0] * cp->new.com[1];
-      I_xz += cp->new.I_xz + cp_mass * cp->new.com[0] * cp->new.com[2];
-      I_yz += cp->new.I_yz + cp_mass * cp->new.com[1] * cp->new.com[2];
+#ifdef QUADRUPOLES
+      M_200 += cp->new.M_200 + 0.5f * cp_mass * cp->new.M_100 * cp->new.M_100;
+      M_020 += cp->new.M_020 + 0.5f * cp_mass * cp->new.M_010 * cp->new.M_010;
+      M_002 += cp->new.M_002 + 0.5f * cp_mass * cp->new.M_001 * cp->new.M_001;
+      M_110 += cp->new.M_110 + cp_mass * cp->new.M_100 * cp->new.M_010;
+      M_101 += cp->new.M_101 + cp_mass * cp->new.M_100 * cp->new.M_001;
+      M_011 += cp->new.M_011 + cp_mass * cp->new.M_010 * cp->new.M_001;
+#endif
    }

    /* Otherwise, collect the multipole from the particles. */
@@ -233,33 +209,36 @@ void comp_com(struct cell *c) {
      com[1] += parts[k].x[1] * p_mass;
      com[2] += parts[k].x[2] * p_mass;
      mass += p_mass;
-
-      I_xx += parts[k].x[0] * parts[k].x[0] * p_mass;
-      I_yy += parts[k].x[1] * parts[k].x[1] * p_mass;
-      I_zz += parts[k].x[2] * parts[k].x[2] * p_mass;
-      I_xy += parts[k].x[0] * parts[k].x[1] * p_mass;
-      I_xz += parts[k].x[0] * parts[k].x[2] * p_mass;
-      I_yz += parts[k].x[1] * parts[k].x[2] * p_mass;
+#ifdef QUADRUPOLES
+      M_200 += parts[k].x[0] * parts[k].x[0] * p_mass;
+      M_020 += parts[k].x[1] * parts[k].x[1] * p_mass;
+      M_002 += parts[k].x[2] * parts[k].x[2] * p_mass;
+      M_110 += parts[k].x[0] * parts[k].x[1] * p_mass;
+      M_101 += parts[k].x[0] * parts[k].x[2] * p_mass;
+      M_011 += parts[k].x[1] * parts[k].x[2] * p_mass;
+#endif
    }
  }

+
  /* Store the multipole data */
  float imass = 1.0f / mass;
-  c->new.mass = mass;
-  c->new.com[0] = com[0] * imass;
-  c->new.com[1] = com[1] * imass;
-  c->new.com[2] = com[2] * imass;
-  c->new.I_xx = I_xx - imass * com[0] * com[0];
-  c->new.I_yy = I_yy - imass * com[1] * com[1];
-  c->new.I_zz = I_zz - imass * com[2] * com[2];
-  c->new.I_xy = I_xy - imass * com[0] * com[1];
-  c->new.I_xz = I_xz - imass * com[0] * com[2];
-  c->new.I_yz = I_yz - imass * com[1] * com[2];
-
-  /* Reset COM acceleration */
-  c->a[0] = 0.;
-  c->a[1] = 0.;
-  c->a[2] = 0.;
+  c->new.M_000 = mass;
+  c->new.M_100 = com[0] * imass;
+  c->new.M_010 = com[1] * imass;
+  c->new.M_001 = com[2] * imass;
+#ifdef QUADRUPOLES
+  c->new.M_200 = 0.5f * M_200 - 0.5f * imass * com[0] * com[0];
+  c->new.M_020 = 0.5f * M_020 - 0.5f * imass * com[1] * com[1];
+  c->new.M_002 = 0.5f * M_002 - 0.5f * imass * com[2] * com[2];
+  c->new.M_110 = M_110 - imass * com[0] * com[1];
+  c->new.M_101 = M_101 - imass * com[0] * com[2];
+  c->new.M_011 = M_011 - imass * com[1] * com[2];
+#endif
+
+  initFieldTensor(c->field_x);
+  initFieldTensor(c->field_y);
+  initFieldTensor(c->field_z);
 }


@@ -273,7 +252,7 @@ void comp_down(struct cell *c) {

  int k, count = c->count;
  struct cell *cp;
-  struct part *parts = c->parts;
+  //struct part *parts = c->parts;

  //  message("h=%f a= %f %f %f", c->h, c->a[0], c->a[1], c->a[2]);

@@ -283,9 +262,9 @@ void comp_down(struct cell *c) {
    //message("first= %p, sibling= %p", c->firstchild, c->sibling);
    /* Loop over the siblings and propagate accelerations. */
    for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
-      cp->a[0] += c->a[0];
-      cp->a[1] += c->a[1];
-      cp->a[2] += c->a[2];
+      /* cp->a[0] += c->a[0]; */
+      /* cp->a[1] += c->a[1]; */
+      /* cp->a[2] += c->a[2]; */

      /* Recurse */
      //comp_down(cp);
@@ -295,9 +274,9 @@ void comp_down(struct cell *c) {

    /* Otherwise, propagate the accelerations to the siblings. */
    for (k = 0; k < count; k++) {
-      parts[k].a[0] += c->a[0];
-      parts[k].a[1] += c->a[1];
-      parts[k].a[2] += c->a[2];
+      /* parts[k].a[0] += c->a[0]; */
+      /* parts[k].a[1] += c->a[1]; */
+      /* parts[k].a[2] += c->a[2]; */
    }
  }

@@ -521,80 +500,6 @@ static inline int are_neighbours(struct cell *ci, struct cell *cj) {



-/* /\** */
-/*  * @brief Interacts all count particles in parts */
-/*  * with the monopole in cj */
-/*  * */
-/*  * @param parts An array of particles */
-/*  * @param count The number of particles in the array */
-/*  * @param cj The cell with which the particles will interact */
-/*  *\/ */
-/* static inline void make_interact_pc(struct part *parts, int count, struct cell *cj) { */
-
-/*   int j, k; */
-/*   float com[3] = {0.0, 0.0, 0.0}; */
-/*   float mcom, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; */
-
-/* #ifdef SANITY_CHECKS */
-
-/*   /\* Sanity checks *\/ */
-/*   if (count == 0) error("Empty cell!"); */
-
-/*   /\* Sanity check. *\/ */
-/*   if (cj->new.mass == 0.0) { */
-/*     message("%e %e %e %d %p %d %p", cj->new.com[0], cj->new.com[1], */
-/*             cj->new.com[2], cj->count, cj, cj->split, cj->sibling); */
-
-/*     for (j = 0; j < cj->count; ++j) */
-/*       message("part %d mass=%e id=%d", j, cj->parts[j].mass, cj->parts[j].id); */
-
-/*     error("com does not seem to have been set."); */
-/*   } */
-
-/* #endif */
-
-/*   /\* Init the com's data. *\/ */
-/*   for (k = 0; k < 3; k++) com[k] = cj->new.com[k]; */
-/*   mcom = cj->new.mass; */
-
-/*   /\* Loop over every particle in leaf. *\/ */
-/*   for (j = 0; j < count; j++) { */
-
-/*     /\* Compute the pairwise distance. *\/ */
-/*     for (r2 = 0.0, k = 0; k < 3; k++) { */
-/*       dx[k] = com[k] - parts[j].x[k]; */
-/*       r2 += dx[k] * dx[k]; */
-/*     } */
-
-/*     /\* Apply the gravitational acceleration. *\/ */
-/*     ir = 1.0f / sqrtf(r2); */
-/*     w = mcom * const_G * ir * ir * ir; */
-/*     for (k = 0; k < 3; k++) parts[j].a[k] += w * dx[k]; */
-
-/*   } /\* loop over every particle. *\/ */
-/* } */
-
-
-
-/** 
- * @brief Computes the different derivatives of the potential.
- * Assumes that inv_r = 1./sqrt(x^2 + y^2 + z^2)
- * 
- */
-static inline double D_000(double x, double y, double z, double inv_r) {  return const_G * inv_r; }
-
-static inline double D_100(double x, double y, double z, double inv_r) {  return const_G * x * inv_r * inv_r * inv_r; }
-static inline double D_010(double x, double y, double z, double inv_r) {  return const_G * y * inv_r * inv_r * inv_r; }
-static inline double D_001(double x, double y, double z, double inv_r) {  return const_G * z * inv_r * inv_r * inv_r; }
-
-static inline double D_200(double x, double y, double z, double inv_r) {  return 3. * const_G * x * x * inv_r * inv_r * inv_r * inv_r * inv_r - const_G * inv_r * inv_r * inv_r; }
-static inline double D_020(double x, double y, double z, double inv_r) {  return 3. * const_G * y * y * inv_r * inv_r * inv_r * inv_r * inv_r - const_G * inv_r * inv_r * inv_r; }
-static inline double D_002(double x, double y, double z, double inv_r) {  return 3. * const_G * z * z * inv_r * inv_r * inv_r * inv_r * inv_r - const_G * inv_r * inv_r * inv_r; }
-
-static inline double D_110(double x, double y, double z, double inv_r) {  return 3. * const_G * x * y * inv_r * inv_r * inv_r * inv_r * inv_r; }
-static inline double D_101(double x, double y, double z, double inv_r) {  return 3. * const_G * x * z * inv_r * inv_r * inv_r * inv_r * inv_r; }
-static inline double D_011(double x, double y, double z, double inv_r) {  return 3. * const_G * y * z * inv_r * inv_r * inv_r * inv_r * inv_r; }
-

 /**
 * @brief Loops all siblings of cells ci and cj and launches all
@@ -606,8 +511,7 @@ static inline double D_011(double x, double y, double z, double inv_r) {  return
 static inline void iact_pair_pc(struct cell *ci, struct cell *cj) {

  struct cell *cp, *cps;
-  int k;
-  float r2, ir, dx[3];
+  double r_x, r_y, r_z, r2, inv_r;
  
 #ifdef SANITY_CHECKS
  if (ci->h != cj->h)
@@ -623,18 +527,22 @@ static inline void iact_pair_pc(struct cell *ci, struct cell *cj) {

      if ( ! are_neighbours(cp, cps) ) {

+	/* Distance between cells */
+	r_x = ci->new.M_100 - cj->new.M_100;
+	r_y = ci->new.M_010 - cj->new.M_010;
+	r_z = ci->new.M_001 - cj->new.M_001;
+	
+	r2 = r_x * r_x + r_y * r_y + r_z * r_z;
+	inv_r = 1. / sqrt( r2 );

+	/* Compute the field tensors */
+	computeFieldTensors_x( r_x, r_y, r_z, inv_r, cj->new, ci->field_x );
+	computeFieldTensors_y( r_x, r_y, r_z, inv_r, cj->new, ci->field_y );
+	computeFieldTensors_z( r_x, r_y, r_z, inv_r, cj->new, ci->field_z );

-	/* Compute the pairwise distance. */
-	for (r2 = 0.0, k = 0; k < 3; k++) {
-	  dx[k] = cps->new.com[k] - cp->new.com[k];
-	  r2 += dx[k] * dx[k];
-	}
-	ir = 1.0f / sqrtf(r2);
-
-	/* Apply the gravitational acceleration. */
-	cps->field.f_000 += cp->new.mass  * D_000(dx[0], dx[1], dx[2], ir);
-	cp->field.f_000  += cps->new.mass * D_000(dx[0], dx[1], dx[2], ir);
+	computeFieldTensors_x( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_x );
+	computeFieldTensors_y( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_y );
+	computeFieldTensors_z( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_z );

      }
    }
@@ -1456,33 +1364,33 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) {
    tot_run += toc_run - tic;
  }

-  message("[check] root mass= %f %f", root->legacy.mass, root->new.mass);
-  message("[check] root CoMx= %f %f", root->legacy.com[0], root->new.com[0]);
-  message("[check] root CoMy= %f %f", root->legacy.com[1], root->new.com[1]);
-  message("[check] root CoMz= %f %f", root->legacy.com[2], root->new.com[2]);
+  message("[check] root mass= %f %f", root->legacy.mass, root->new.M_000);
+  message("[check] root CoMx= %f %f", root->legacy.com[0], root->new.M_100);
+  message("[check] root CoMy= %f %f", root->legacy.com[1], root->new.M_010);
+  message("[check] root CoMz= %f %f", root->legacy.com[2], root->new.M_001);
  
-  /* message("[check]     | %f %f %f |", root->new.I_xx, root->new.I_xy, root->new.I_xz); */
-  /* message("[check] I = | %f %f %f |", root->new.I_xy, root->new.I_yy, root->new.I_yz); */
-  /* message("[check]     | %f %f %f |", root->new.I_xz, root->new.I_yz, root->new.I_zz); */
+  /* message("[check]     | %f %f %f |", root->new.M_200, root->new.M_110, root->new.M_101); */
+  /* message("[check] I = | %f %f %f |", root->new.M_110, root->new.M_020, root->new.M_011); */
+  /* message("[check]     | %f %f %f |", root->new.M_101, root->new.M_011, root->new.M_002); */

-  /* double I_xx = 0., I_yy = 0., I_zz = 0.,  I_xy = 0., I_xz = 0., I_yz = 0.; */
+  /* double M_200 = 0., M_020 = 0., M_002 = 0.,  M_110 = 0., M_101 = 0., M_011 = 0.; */
  /* for (i = 0; i < N; ++i) { */
-  /*   I_xx += root->parts[i].x[0] * root->parts[i].x[0] * root->parts[i].mass; */
-  /*   I_yy += root->parts[i].x[1] * root->parts[i].x[1] * root->parts[i].mass; */
-  /*   I_zz += root->parts[i].x[2] * root->parts[i].x[2] * root->parts[i].mass; */
-  /*   I_xy += root->parts[i].x[0] * root->parts[i].x[1] * root->parts[i].mass; */
-  /*   I_xz += root->parts[i].x[0] * root->parts[i].x[2] * root->parts[i].mass; */
-  /*   I_yz += root->parts[i].x[1] * root->parts[i].x[2] * root->parts[i].mass; */
+  /*   M_200 += root->parts[i].x[0] * root->parts[i].x[0] * root->parts[i].mass; */
+  /*   M_020 += root->parts[i].x[1] * root->parts[i].x[1] * root->parts[i].mass; */
+  /*   M_002 += root->parts[i].x[2] * root->parts[i].x[2] * root->parts[i].mass; */
+  /*   M_110 += root->parts[i].x[0] * root->parts[i].x[1] * root->parts[i].mass; */
+  /*   M_101 += root->parts[i].x[0] * root->parts[i].x[2] * root->parts[i].mass; */
+  /*   M_011 += root->parts[i].x[1] * root->parts[i].x[2] * root->parts[i].mass; */
  /* } */
-  /* I_xx = I_xx - root->new.mass * root->new.com[0] * root->new.com[0]; */
-  /* I_yy = I_yy - root->new.mass * root->new.com[1] * root->new.com[1]; */
-  /* I_zz = I_zz - root->new.mass * root->new.com[2] * root->new.com[2]; */
-  /* I_xy = I_xy - root->new.mass * root->new.com[0] * root->new.com[1]; */
-  /* I_xz = I_xz - root->new.mass * root->new.com[0] * root->new.com[2]; */
-  /* I_yz = I_yz - root->new.mass * root->new.com[1] * root->new.com[2]; */
-  /* message("[check]     | %f %f %f |", I_xx, I_xy, I_xz); */
-  /* message("[check] I = | %f %f %f |", I_xy, I_yy, I_yz); */
-  /* message("[check]     | %f %f %f |", I_xz, I_yz, I_zz); */
+  /* M_200 = M_200 - root->new.M_000 * root->new.M_100 * root->new.M_100; */
+  /* M_020 = M_020 - root->new.M_000 * root->new.M_010 * root->new.M_010; */
+  /* M_002 = M_002 - root->new.M_000 * root->new.M_001 * root->new.M_001; */
+  /* M_110 = M_110 - root->new.M_000 * root->new.M_100 * root->new.M_010; */
+  /* M_101 = M_101 - root->new.M_000 * root->new.M_100 * root->new.M_001; */
+  /* M_011 = M_011 - root->new.M_000 * root->new.M_010 * root->new.M_001; */
+  /* message("[check]     | %f %f %f |", M_200, M_110, M_101); */
+  /* message("[check] I = | %f %f %f |", M_110, M_020, M_011); */
+  /* message("[check]     | %f %f %f |", M_101, M_011, M_002); */

 #if ICHECK >= 0
  for (i = 0; i < N; ++i)