diff --git a/examples/multipoles.h b/examples/multipoles.h new file mode 100644 index 0000000000000000000000000000000000000000..0209ad6022c702a4b55b473b915f131c18e59826 --- /dev/null +++ b/examples/multipoles.h @@ -0,0 +1,283 @@ + + +struct multipole { + +#ifdef QUADRUPOLES + /* Quadrupole */ + float M_200; + float M_020; + float M_002; + float M_110; + float M_101; + float M_011; +#endif + /* CoM */ + double M_100; + double M_010; + double M_001; + + /* Mass */ + float M_000; +}; + + +struct fieldTensor { +#ifdef QUADRUPOLES + float F_200; + float F_020; + float F_002; + float F_110; + float F_101; + float F_011; +#endif + float F_100; + float F_010; + float F_001; + + float F_000; +}; + + +static inline void initFieldTensor(struct fieldTensor f) { + f.F_000 = 0.f; + f.F_100 = f.F_010 = f.F_001 = 0.f; +#ifdef QUADRUPOLES + f.F_200 = f.F_020 = f.F_002 = 0.f; + f.F_110 = f.F_101 = f.F_011 = 0.f; +#endif +} + + +/****************************** + * Derivatives of the potential + * + * All assume inv_r = 1/sqrt(r_x^2 + r_y^2 + r_z^2) + ******************************/ + +/* 0-th order */ +static inline double D_000(double r_x, double r_y, double r_z, double inv_r) { return inv_r; } // phi(r) + +/* 1-st order */ +static inline double D_100(double r_x, double r_y, double r_z, double inv_r) { return -r_x * inv_r * inv_r * inv_r; } // d/dx phi(r) +static inline double D_010(double r_x, double r_y, double r_z, double inv_r) { return -r_y * inv_r * inv_r * inv_r; } // d/dy phi(r) +static inline double D_001(double r_x, double r_y, double r_z, double inv_r) { return -r_z * inv_r * inv_r * inv_r; } // d/dz phi(r) + +/* 2-nd order */ +static inline double D_200(double r_x, double r_y, double r_z, double inv_r) { // d2/dxdx phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r3 = inv_r * inv_r2; + double inv_r5 = inv_r3 * inv_r2; + return 3. * r_x * r_x * inv_r5 - inv_r3; +} +static inline double D_020(double r_x, double r_y, double r_z, double inv_r) { // d2/dydy phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r3 = inv_r * inv_r2; + double inv_r5 = inv_r3 * inv_r2; + return 3. * r_y * r_y * inv_r5 - inv_r3; +} +static inline double D_002(double r_x, double r_y, double r_z, double inv_r) { // d2/dzdz phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r3 = inv_r * inv_r2; + double inv_r5 = inv_r3 * inv_r2; + return 3. * r_z * r_z * inv_r5 - inv_r3; +} + +static inline double D_110(double r_x, double r_y, double r_z, double inv_r) { // d2/dxdy phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + return 3. * r_x * r_y * inv_r5; +} + +static inline double D_101(double r_x, double r_y, double r_z, double inv_r) { // d2/dxdz phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + return 3. * r_x * r_z * inv_r5; +} + +static inline double D_011(double r_x, double r_y, double r_z, double inv_r) { // d2/dydz phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + return 3. * r_y * r_z * inv_r5; +} + + + +/* 3-rd order */ +static inline double D_300(double r_x, double r_y, double r_z, double inv_r) { // d3/dxdxdx phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + double inv_r7 = inv_r5 * inv_r2; + return -15. * r_x * r_x * r_x * inv_r7 + 9. * r_x * inv_r5; +} + +static inline double D_030(double r_x, double r_y, double r_z, double inv_r) { // d3/dydydy phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + double inv_r7 = inv_r5 * inv_r2; + return -15. * r_y * r_y * r_y * inv_r7 + 9. * r_y * inv_r5; +} + +static inline double D_003(double r_x, double r_y, double r_z, double inv_r) { // d3/dzdzdz phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + double inv_r7 = inv_r5 * inv_r2; + return -15. * r_z * r_z * r_z * inv_r7 + 9. * r_z * inv_r5; +} + +static inline double D_210(double r_x, double r_y, double r_z, double inv_r) { // d3/dxdxdy phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + double inv_r7 = inv_r5 * inv_r2; + return -15. * r_x * r_x * r_y * inv_r7 + 3. * r_y * inv_r5; +} + +static inline double D_201(double r_x, double r_y, double r_z, double inv_r) { // d3/dxdxdz phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + double inv_r7 = inv_r5 * inv_r2; + return -15. * r_x * r_x * r_z * inv_r7 + 3. * r_z * inv_r5; +} + +static inline double D_120(double r_x, double r_y, double r_z, double inv_r) { // d3/dxdydy phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + double inv_r7 = inv_r5 * inv_r2; + return -15. * r_x * r_y * r_y * inv_r7 + 3. * r_x * inv_r5; +} + +static inline double D_021(double r_x, double r_y, double r_z, double inv_r) { // d3/dydydz phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + double inv_r7 = inv_r5 * inv_r2; + return -15. * r_y * r_y * r_z * inv_r7 + 3. * r_z * inv_r5; +} + +static inline double D_102(double r_x, double r_y, double r_z, double inv_r) { // d3/dxdzdz phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + double inv_r7 = inv_r5 * inv_r2; + return -15. * r_x * r_z * r_z * inv_r7 + 3. * r_x * inv_r5; +} + +static inline double D_012(double r_x, double r_y, double r_z, double inv_r) { // d3/dydzdz phi(r) + + double inv_r2 = inv_r * inv_r; + double inv_r5 = inv_r2 * inv_r2 * inv_r; + double inv_r7 = inv_r5 * inv_r2; + return -15. * r_y * r_z * r_z * inv_r7 + 3. * r_y * inv_r5; +} + + +static inline double D_111(double r_x, double r_y, double r_z, double inv_r) { // d3/dxdydz phi(r) + + double inv_r3 = inv_r * inv_r * inv_r; + double inv_r7 = inv_r3 * inv_r3 * inv_r; + return -15. * r_x * r_z * r_z * inv_r7; +} + + +/************************************************ + * Field tensor calculation for the accelerations + ************************************************/ + + +static inline void computeFieldTensors_x(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B) +{ + + B.F_000 += A.M_000 * D_100(r_x, r_y, r_z, inv_r); + +#ifdef QUADRUPOLES + B.F_000 += A.M_200 * D_300(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_020 * D_120(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_002 * D_102(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_110 * D_210(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_011 * D_111(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_101 * D_201(r_x, r_y, r_z, inv_r); +#endif + + B.F_100 += A.M_000 * D_200(r_x, r_y, r_z, inv_r); + B.F_010 += A.M_000 * D_110(r_x, r_y, r_z, inv_r); + B.F_001 += A.M_000 * D_101(r_x, r_y, r_z, inv_r); + +#ifdef QUADRUPOLES + B.F_200 += A.M_000 * D_300(r_x, r_y, r_z, inv_r); + B.F_020 += A.M_000 * D_120(r_x, r_y, r_z, inv_r); + B.F_002 += A.M_000 * D_102(r_x, r_y, r_z, inv_r); + B.F_110 += A.M_000 * D_210(r_x, r_y, r_z, inv_r); + B.F_101 += A.M_000 * D_201(r_x, r_y, r_z, inv_r); + B.F_011 += A.M_000 * D_111(r_x, r_y, r_z, inv_r); +#endif + +} + +static inline void computeFieldTensors_y(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B) +{ + + B.F_000 += A.M_000 * D_010(r_x, r_y, r_z, inv_r); + +#ifdef QUADRUPOLES + B.F_000 += A.M_200 * D_210(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_020 * D_030(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_002 * D_012(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_110 * D_120(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_011 * D_021(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_101 * D_111(r_x, r_y, r_z, inv_r); +#endif + + B.F_100 += A.M_000 * D_110(r_x, r_y, r_z, inv_r); + B.F_010 += A.M_000 * D_020(r_x, r_y, r_z, inv_r); + B.F_001 += A.M_000 * D_011(r_x, r_y, r_z, inv_r); + +#ifdef QUADRUPOLES + B.F_200 += A.M_000 * D_210(r_x, r_y, r_z, inv_r); + B.F_020 += A.M_000 * D_030(r_x, r_y, r_z, inv_r); + B.F_002 += A.M_000 * D_012(r_x, r_y, r_z, inv_r); + B.F_110 += A.M_000 * D_120(r_x, r_y, r_z, inv_r); + B.F_101 += A.M_000 * D_111(r_x, r_y, r_z, inv_r); + B.F_011 += A.M_000 * D_021(r_x, r_y, r_z, inv_r); +#endif + +} + +static inline void computeFieldTensors_z(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B) +{ + + B.F_000 += A.M_000 * D_100(r_x, r_y, r_z, inv_r); + +#ifdef QUADRUPOLES + B.F_000 += A.M_200 * D_201(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_020 * D_021(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_002 * D_003(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_110 * D_111(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_011 * D_012(r_x, r_y, r_z, inv_r); + B.F_000 += A.M_101 * D_102(r_x, r_y, r_z, inv_r); +#endif + + B.F_100 += A.M_000 * D_101(r_x, r_y, r_z, inv_r); + B.F_010 += A.M_000 * D_011(r_x, r_y, r_z, inv_r); + B.F_001 += A.M_000 * D_002(r_x, r_y, r_z, inv_r); + +#ifdef QUADRUPOLES + B.F_200 += A.M_000 * D_201(r_x, r_y, r_z, inv_r); + B.F_020 += A.M_000 * D_021(r_x, r_y, r_z, inv_r); + B.F_002 += A.M_000 * D_003(r_x, r_y, r_z, inv_r); + B.F_110 += A.M_000 * D_111(r_x, r_y, r_z, inv_r); + B.F_101 += A.M_000 * D_102(r_x, r_y, r_z, inv_r); + B.F_011 += A.M_000 * D_012(r_x, r_y, r_z, inv_r); +#endif + +} diff --git a/examples/test_fmm.c b/examples/test_fmm.c index 346f1c2133fe46d679458a6500527b90461cccd3..3243c0be980323b2c5e2e0d45a148c5345774f79 100644 --- a/examples/test_fmm.c +++ b/examples/test_fmm.c @@ -47,6 +47,10 @@ #define SANITY_CHECKS #define COM_AS_TASK #define COUNTERS +#define QUADRUPOLES + +#include "multipoles.h" + /** Data structure for the particles. */ struct part { @@ -67,35 +71,10 @@ struct part_local { float mass; } __attribute__((aligned(32))); -struct multipole { - /* Quadrupole */ - double I_xx; - double I_yy; - double I_zz; - double I_xy; - double I_xz; - double I_yz; - - /* CoM */ - double com[3]; +struct legacy_multipole { float mass; -}; - - -struct fieldTensor { - double f_200; - double f_020; - double f_002; - double f_110; - double f_101; - double f_011; - - double f_100; - double f_010; - double f_001; - - float f_000; + double com[3]; }; /** Data structure for the BH tree cell. */ @@ -110,21 +89,15 @@ struct cell { struct cell *firstchild; /* Next node if opening */ struct cell *sibling; /* Next node */ - /* We keep both CoMs and masses to make sure the comp_com calculation is - * correct (use an anonymous union to keep variable names compact). */ - union { - - /* Information for the legacy walk */ - struct multipole legacy; - - /* Information for the QuickShed walk */ - struct multipole new; - }; + /* Information for the legacy walk */ + struct legacy_multipole legacy; - struct fieldTensor field; + /* Information for the QuickShed walk */ + struct multipole new; + struct fieldTensor field_x; + struct fieldTensor field_y; + struct fieldTensor field_z; - double a[3]; - int res, com_tid, down_tid; struct index *indices; @@ -199,7 +172,9 @@ void comp_com(struct cell *c) { struct part *parts = c->parts; double com[3] = {0.0, 0.0, 0.0}, mass = 0.0; - double I_xx = 0., I_yy = 0., I_zz = 0., I_xy = 0., I_xz = 0., I_yz = 0.; +#ifdef QUADRUPOLES + float M_200 = 0., M_020 = 0., M_002 = 0., M_110 = 0., M_101 = 0., M_011 = 0.; +#endif #ifdef SANITY_CHECKS if ( count == 0 ) @@ -210,18 +185,19 @@ void comp_com(struct cell *c) { /* Loop over the projenitors and collect the multipole information. */ for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { - float cp_mass = cp->new.mass; - com[0] += cp->new.com[0] * cp_mass; - com[1] += cp->new.com[1] * cp_mass; - com[2] += cp->new.com[2] * cp_mass; + float cp_mass = cp->new.M_000; + com[0] += cp->new.M_100 * cp_mass; + com[1] += cp->new.M_010 * cp_mass; + com[2] += cp->new.M_001 * cp_mass; mass += cp_mass; - - I_xx += cp->new.I_xx + cp_mass * cp->new.com[0] * cp->new.com[0]; - I_yy += cp->new.I_yy + cp_mass * cp->new.com[1] * cp->new.com[1]; - I_zz += cp->new.I_zz + cp_mass * cp->new.com[2] * cp->new.com[2]; - I_xy += cp->new.I_xy + cp_mass * cp->new.com[0] * cp->new.com[1]; - I_xz += cp->new.I_xz + cp_mass * cp->new.com[0] * cp->new.com[2]; - I_yz += cp->new.I_yz + cp_mass * cp->new.com[1] * cp->new.com[2]; +#ifdef QUADRUPOLES + M_200 += cp->new.M_200 + 0.5f * cp_mass * cp->new.M_100 * cp->new.M_100; + M_020 += cp->new.M_020 + 0.5f * cp_mass * cp->new.M_010 * cp->new.M_010; + M_002 += cp->new.M_002 + 0.5f * cp_mass * cp->new.M_001 * cp->new.M_001; + M_110 += cp->new.M_110 + cp_mass * cp->new.M_100 * cp->new.M_010; + M_101 += cp->new.M_101 + cp_mass * cp->new.M_100 * cp->new.M_001; + M_011 += cp->new.M_011 + cp_mass * cp->new.M_010 * cp->new.M_001; +#endif } /* Otherwise, collect the multipole from the particles. */ @@ -233,33 +209,36 @@ void comp_com(struct cell *c) { com[1] += parts[k].x[1] * p_mass; com[2] += parts[k].x[2] * p_mass; mass += p_mass; - - I_xx += parts[k].x[0] * parts[k].x[0] * p_mass; - I_yy += parts[k].x[1] * parts[k].x[1] * p_mass; - I_zz += parts[k].x[2] * parts[k].x[2] * p_mass; - I_xy += parts[k].x[0] * parts[k].x[1] * p_mass; - I_xz += parts[k].x[0] * parts[k].x[2] * p_mass; - I_yz += parts[k].x[1] * parts[k].x[2] * p_mass; +#ifdef QUADRUPOLES + M_200 += parts[k].x[0] * parts[k].x[0] * p_mass; + M_020 += parts[k].x[1] * parts[k].x[1] * p_mass; + M_002 += parts[k].x[2] * parts[k].x[2] * p_mass; + M_110 += parts[k].x[0] * parts[k].x[1] * p_mass; + M_101 += parts[k].x[0] * parts[k].x[2] * p_mass; + M_011 += parts[k].x[1] * parts[k].x[2] * p_mass; +#endif } } + /* Store the multipole data */ float imass = 1.0f / mass; - c->new.mass = mass; - c->new.com[0] = com[0] * imass; - c->new.com[1] = com[1] * imass; - c->new.com[2] = com[2] * imass; - c->new.I_xx = I_xx - imass * com[0] * com[0]; - c->new.I_yy = I_yy - imass * com[1] * com[1]; - c->new.I_zz = I_zz - imass * com[2] * com[2]; - c->new.I_xy = I_xy - imass * com[0] * com[1]; - c->new.I_xz = I_xz - imass * com[0] * com[2]; - c->new.I_yz = I_yz - imass * com[1] * com[2]; - - /* Reset COM acceleration */ - c->a[0] = 0.; - c->a[1] = 0.; - c->a[2] = 0.; + c->new.M_000 = mass; + c->new.M_100 = com[0] * imass; + c->new.M_010 = com[1] * imass; + c->new.M_001 = com[2] * imass; +#ifdef QUADRUPOLES + c->new.M_200 = 0.5f * M_200 - 0.5f * imass * com[0] * com[0]; + c->new.M_020 = 0.5f * M_020 - 0.5f * imass * com[1] * com[1]; + c->new.M_002 = 0.5f * M_002 - 0.5f * imass * com[2] * com[2]; + c->new.M_110 = M_110 - imass * com[0] * com[1]; + c->new.M_101 = M_101 - imass * com[0] * com[2]; + c->new.M_011 = M_011 - imass * com[1] * com[2]; +#endif + + initFieldTensor(c->field_x); + initFieldTensor(c->field_y); + initFieldTensor(c->field_z); } @@ -273,7 +252,7 @@ void comp_down(struct cell *c) { int k, count = c->count; struct cell *cp; - struct part *parts = c->parts; + //struct part *parts = c->parts; // message("h=%f a= %f %f %f", c->h, c->a[0], c->a[1], c->a[2]); @@ -283,9 +262,9 @@ void comp_down(struct cell *c) { //message("first= %p, sibling= %p", c->firstchild, c->sibling); /* Loop over the siblings and propagate accelerations. */ for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { - cp->a[0] += c->a[0]; - cp->a[1] += c->a[1]; - cp->a[2] += c->a[2]; + /* cp->a[0] += c->a[0]; */ + /* cp->a[1] += c->a[1]; */ + /* cp->a[2] += c->a[2]; */ /* Recurse */ //comp_down(cp); @@ -295,9 +274,9 @@ void comp_down(struct cell *c) { /* Otherwise, propagate the accelerations to the siblings. */ for (k = 0; k < count; k++) { - parts[k].a[0] += c->a[0]; - parts[k].a[1] += c->a[1]; - parts[k].a[2] += c->a[2]; + /* parts[k].a[0] += c->a[0]; */ + /* parts[k].a[1] += c->a[1]; */ + /* parts[k].a[2] += c->a[2]; */ } } @@ -521,80 +500,6 @@ static inline int are_neighbours(struct cell *ci, struct cell *cj) { -/* /\** */ -/* * @brief Interacts all count particles in parts */ -/* * with the monopole in cj */ -/* * */ -/* * @param parts An array of particles */ -/* * @param count The number of particles in the array */ -/* * @param cj The cell with which the particles will interact */ -/* *\/ */ -/* static inline void make_interact_pc(struct part *parts, int count, struct cell *cj) { */ - -/* int j, k; */ -/* float com[3] = {0.0, 0.0, 0.0}; */ -/* float mcom, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; */ - -/* #ifdef SANITY_CHECKS */ - -/* /\* Sanity checks *\/ */ -/* if (count == 0) error("Empty cell!"); */ - -/* /\* Sanity check. *\/ */ -/* if (cj->new.mass == 0.0) { */ -/* message("%e %e %e %d %p %d %p", cj->new.com[0], cj->new.com[1], */ -/* cj->new.com[2], cj->count, cj, cj->split, cj->sibling); */ - -/* for (j = 0; j < cj->count; ++j) */ -/* message("part %d mass=%e id=%d", j, cj->parts[j].mass, cj->parts[j].id); */ - -/* error("com does not seem to have been set."); */ -/* } */ - -/* #endif */ - -/* /\* Init the com's data. *\/ */ -/* for (k = 0; k < 3; k++) com[k] = cj->new.com[k]; */ -/* mcom = cj->new.mass; */ - -/* /\* Loop over every particle in leaf. *\/ */ -/* for (j = 0; j < count; j++) { */ - -/* /\* Compute the pairwise distance. *\/ */ -/* for (r2 = 0.0, k = 0; k < 3; k++) { */ -/* dx[k] = com[k] - parts[j].x[k]; */ -/* r2 += dx[k] * dx[k]; */ -/* } */ - -/* /\* Apply the gravitational acceleration. *\/ */ -/* ir = 1.0f / sqrtf(r2); */ -/* w = mcom * const_G * ir * ir * ir; */ -/* for (k = 0; k < 3; k++) parts[j].a[k] += w * dx[k]; */ - -/* } /\* loop over every particle. *\/ */ -/* } */ - - - -/** - * @brief Computes the different derivatives of the potential. - * Assumes that inv_r = 1./sqrt(x^2 + y^2 + z^2) - * - */ -static inline double D_000(double x, double y, double z, double inv_r) { return const_G * inv_r; } - -static inline double D_100(double x, double y, double z, double inv_r) { return const_G * x * inv_r * inv_r * inv_r; } -static inline double D_010(double x, double y, double z, double inv_r) { return const_G * y * inv_r * inv_r * inv_r; } -static inline double D_001(double x, double y, double z, double inv_r) { return const_G * z * inv_r * inv_r * inv_r; } - -static inline double D_200(double x, double y, double z, double inv_r) { return 3. * const_G * x * x * inv_r * inv_r * inv_r * inv_r * inv_r - const_G * inv_r * inv_r * inv_r; } -static inline double D_020(double x, double y, double z, double inv_r) { return 3. * const_G * y * y * inv_r * inv_r * inv_r * inv_r * inv_r - const_G * inv_r * inv_r * inv_r; } -static inline double D_002(double x, double y, double z, double inv_r) { return 3. * const_G * z * z * inv_r * inv_r * inv_r * inv_r * inv_r - const_G * inv_r * inv_r * inv_r; } - -static inline double D_110(double x, double y, double z, double inv_r) { return 3. * const_G * x * y * inv_r * inv_r * inv_r * inv_r * inv_r; } -static inline double D_101(double x, double y, double z, double inv_r) { return 3. * const_G * x * z * inv_r * inv_r * inv_r * inv_r * inv_r; } -static inline double D_011(double x, double y, double z, double inv_r) { return 3. * const_G * y * z * inv_r * inv_r * inv_r * inv_r * inv_r; } - /** * @brief Loops all siblings of cells ci and cj and launches all @@ -606,8 +511,7 @@ static inline double D_011(double x, double y, double z, double inv_r) { return static inline void iact_pair_pc(struct cell *ci, struct cell *cj) { struct cell *cp, *cps; - int k; - float r2, ir, dx[3]; + double r_x, r_y, r_z, r2, inv_r; #ifdef SANITY_CHECKS if (ci->h != cj->h) @@ -623,18 +527,22 @@ static inline void iact_pair_pc(struct cell *ci, struct cell *cj) { if ( ! are_neighbours(cp, cps) ) { + /* Distance between cells */ + r_x = ci->new.M_100 - cj->new.M_100; + r_y = ci->new.M_010 - cj->new.M_010; + r_z = ci->new.M_001 - cj->new.M_001; + + r2 = r_x * r_x + r_y * r_y + r_z * r_z; + inv_r = 1. / sqrt( r2 ); + /* Compute the field tensors */ + computeFieldTensors_x( r_x, r_y, r_z, inv_r, cj->new, ci->field_x ); + computeFieldTensors_y( r_x, r_y, r_z, inv_r, cj->new, ci->field_y ); + computeFieldTensors_z( r_x, r_y, r_z, inv_r, cj->new, ci->field_z ); - /* Compute the pairwise distance. */ - for (r2 = 0.0, k = 0; k < 3; k++) { - dx[k] = cps->new.com[k] - cp->new.com[k]; - r2 += dx[k] * dx[k]; - } - ir = 1.0f / sqrtf(r2); - - /* Apply the gravitational acceleration. */ - cps->field.f_000 += cp->new.mass * D_000(dx[0], dx[1], dx[2], ir); - cp->field.f_000 += cps->new.mass * D_000(dx[0], dx[1], dx[2], ir); + computeFieldTensors_x( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_x ); + computeFieldTensors_y( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_y ); + computeFieldTensors_z( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_z ); } } @@ -1456,33 +1364,33 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { tot_run += toc_run - tic; } - message("[check] root mass= %f %f", root->legacy.mass, root->new.mass); - message("[check] root CoMx= %f %f", root->legacy.com[0], root->new.com[0]); - message("[check] root CoMy= %f %f", root->legacy.com[1], root->new.com[1]); - message("[check] root CoMz= %f %f", root->legacy.com[2], root->new.com[2]); + message("[check] root mass= %f %f", root->legacy.mass, root->new.M_000); + message("[check] root CoMx= %f %f", root->legacy.com[0], root->new.M_100); + message("[check] root CoMy= %f %f", root->legacy.com[1], root->new.M_010); + message("[check] root CoMz= %f %f", root->legacy.com[2], root->new.M_001); - /* message("[check] | %f %f %f |", root->new.I_xx, root->new.I_xy, root->new.I_xz); */ - /* message("[check] I = | %f %f %f |", root->new.I_xy, root->new.I_yy, root->new.I_yz); */ - /* message("[check] | %f %f %f |", root->new.I_xz, root->new.I_yz, root->new.I_zz); */ + /* message("[check] | %f %f %f |", root->new.M_200, root->new.M_110, root->new.M_101); */ + /* message("[check] I = | %f %f %f |", root->new.M_110, root->new.M_020, root->new.M_011); */ + /* message("[check] | %f %f %f |", root->new.M_101, root->new.M_011, root->new.M_002); */ - /* double I_xx = 0., I_yy = 0., I_zz = 0., I_xy = 0., I_xz = 0., I_yz = 0.; */ + /* double M_200 = 0., M_020 = 0., M_002 = 0., M_110 = 0., M_101 = 0., M_011 = 0.; */ /* for (i = 0; i < N; ++i) { */ - /* I_xx += root->parts[i].x[0] * root->parts[i].x[0] * root->parts[i].mass; */ - /* I_yy += root->parts[i].x[1] * root->parts[i].x[1] * root->parts[i].mass; */ - /* I_zz += root->parts[i].x[2] * root->parts[i].x[2] * root->parts[i].mass; */ - /* I_xy += root->parts[i].x[0] * root->parts[i].x[1] * root->parts[i].mass; */ - /* I_xz += root->parts[i].x[0] * root->parts[i].x[2] * root->parts[i].mass; */ - /* I_yz += root->parts[i].x[1] * root->parts[i].x[2] * root->parts[i].mass; */ + /* M_200 += root->parts[i].x[0] * root->parts[i].x[0] * root->parts[i].mass; */ + /* M_020 += root->parts[i].x[1] * root->parts[i].x[1] * root->parts[i].mass; */ + /* M_002 += root->parts[i].x[2] * root->parts[i].x[2] * root->parts[i].mass; */ + /* M_110 += root->parts[i].x[0] * root->parts[i].x[1] * root->parts[i].mass; */ + /* M_101 += root->parts[i].x[0] * root->parts[i].x[2] * root->parts[i].mass; */ + /* M_011 += root->parts[i].x[1] * root->parts[i].x[2] * root->parts[i].mass; */ /* } */ - /* I_xx = I_xx - root->new.mass * root->new.com[0] * root->new.com[0]; */ - /* I_yy = I_yy - root->new.mass * root->new.com[1] * root->new.com[1]; */ - /* I_zz = I_zz - root->new.mass * root->new.com[2] * root->new.com[2]; */ - /* I_xy = I_xy - root->new.mass * root->new.com[0] * root->new.com[1]; */ - /* I_xz = I_xz - root->new.mass * root->new.com[0] * root->new.com[2]; */ - /* I_yz = I_yz - root->new.mass * root->new.com[1] * root->new.com[2]; */ - /* message("[check] | %f %f %f |", I_xx, I_xy, I_xz); */ - /* message("[check] I = | %f %f %f |", I_xy, I_yy, I_yz); */ - /* message("[check] | %f %f %f |", I_xz, I_yz, I_zz); */ + /* M_200 = M_200 - root->new.M_000 * root->new.M_100 * root->new.M_100; */ + /* M_020 = M_020 - root->new.M_000 * root->new.M_010 * root->new.M_010; */ + /* M_002 = M_002 - root->new.M_000 * root->new.M_001 * root->new.M_001; */ + /* M_110 = M_110 - root->new.M_000 * root->new.M_100 * root->new.M_010; */ + /* M_101 = M_101 - root->new.M_000 * root->new.M_100 * root->new.M_001; */ + /* M_011 = M_011 - root->new.M_000 * root->new.M_010 * root->new.M_001; */ + /* message("[check] | %f %f %f |", M_200, M_110, M_101); */ + /* message("[check] I = | %f %f %f |", M_110, M_020, M_011); */ + /* message("[check] | %f %f %f |", M_101, M_011, M_002); */ #if ICHECK >= 0 for (i = 0; i < N; ++i)