Skip to content
Snippets Groups Projects
Commit d97468ea authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Initial comprehensive FMM implementation. Still some incorrect terms hidden...

parent ea3deb8e
Branches
No related tags found
No related merge requests found
......@@ -22,6 +22,7 @@ struct multipole {
struct fieldTensor {
#ifdef QUADRUPOLES
float F_200;
float F_020;
......@@ -39,12 +40,16 @@ struct fieldTensor {
static inline void initFieldTensor(struct fieldTensor f) {
f.F_000 = 0.f;
f.F_100 = f.F_010 = f.F_001 = 0.f;
#ifdef QUADRUPOLES
f.F_200 = f.F_020 = f.F_002 = 0.f;
f.F_110 = f.F_101 = f.F_011 = 0.f;
#endif
}
......@@ -55,12 +60,26 @@ static inline void initFieldTensor(struct fieldTensor f) {
******************************/
/* 0-th order */
static inline double D_000(double r_x, double r_y, double r_z, double inv_r) { return inv_r; } // phi(r)
static inline double D_000(double r_x, double r_y, double r_z, double inv_r) { // phi(r)
return inv_r;
}
/* 1-st order */
static inline double D_100(double r_x, double r_y, double r_z, double inv_r) { return -r_x * inv_r * inv_r * inv_r; } // d/dx phi(r)
static inline double D_010(double r_x, double r_y, double r_z, double inv_r) { return -r_y * inv_r * inv_r * inv_r; } // d/dy phi(r)
static inline double D_001(double r_x, double r_y, double r_z, double inv_r) { return -r_z * inv_r * inv_r * inv_r; } // d/dz phi(r)
static inline double D_100(double r_x, double r_y, double r_z, double inv_r) { // d/dx phi(r)
return -r_x * inv_r * inv_r * inv_r;
}
static inline double D_010(double r_x, double r_y, double r_z, double inv_r) { // d/dy phi(r)
return -r_y * inv_r * inv_r * inv_r;
}
static inline double D_001(double r_x, double r_y, double r_z, double inv_r) { // d/dz phi(r)
return -r_z * inv_r * inv_r * inv_r;
}
/* 2-nd order */
static inline double D_200(double r_x, double r_y, double r_z, double inv_r) { // d2/dxdx phi(r)
......@@ -70,6 +89,7 @@ static inline double D_200(double r_x, double r_y, double r_z, double inv_r) {
double inv_r5 = inv_r3 * inv_r2;
return 3. * r_x * r_x * inv_r5 - inv_r3;
}
static inline double D_020(double r_x, double r_y, double r_z, double inv_r) { // d2/dydy phi(r)
double inv_r2 = inv_r * inv_r;
......@@ -77,6 +97,7 @@ static inline double D_020(double r_x, double r_y, double r_z, double inv_r) {
double inv_r5 = inv_r3 * inv_r2;
return 3. * r_y * r_y * inv_r5 - inv_r3;
}
static inline double D_002(double r_x, double r_y, double r_z, double inv_r) { // d2/dzdz phi(r)
double inv_r2 = inv_r * inv_r;
......@@ -195,89 +216,151 @@ static inline double D_111(double r_x, double r_y, double r_z, double inv_r) {
************************************************/
static inline void computeFieldTensors_x(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B)
static inline void computeFieldTensors_x(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor* B)
{
B.F_000 += A.M_000 * D_100(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_000 * D_100(r_x, r_y, r_z, inv_r);
#ifdef QUADRUPOLES
B.F_000 += A.M_200 * D_300(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_020 * D_120(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_002 * D_102(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_110 * D_210(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_011 * D_111(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_101 * D_201(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_200 * D_300(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_020 * D_120(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_002 * D_102(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_110 * D_210(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_011 * D_111(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_101 * D_201(r_x, r_y, r_z, inv_r);
#endif
B.F_100 += A.M_000 * D_200(r_x, r_y, r_z, inv_r);
B.F_010 += A.M_000 * D_110(r_x, r_y, r_z, inv_r);
B.F_001 += A.M_000 * D_101(r_x, r_y, r_z, inv_r);
B->F_100 += A.M_000 * D_200(r_x, r_y, r_z, inv_r);
B->F_010 += A.M_000 * D_110(r_x, r_y, r_z, inv_r);
B->F_001 += A.M_000 * D_101(r_x, r_y, r_z, inv_r);
#ifdef QUADRUPOLES
B.F_200 += A.M_000 * D_300(r_x, r_y, r_z, inv_r);
B.F_020 += A.M_000 * D_120(r_x, r_y, r_z, inv_r);
B.F_002 += A.M_000 * D_102(r_x, r_y, r_z, inv_r);
B.F_110 += A.M_000 * D_210(r_x, r_y, r_z, inv_r);
B.F_101 += A.M_000 * D_201(r_x, r_y, r_z, inv_r);
B.F_011 += A.M_000 * D_111(r_x, r_y, r_z, inv_r);
B->F_200 += A.M_000 * D_300(r_x, r_y, r_z, inv_r);
B->F_020 += A.M_000 * D_120(r_x, r_y, r_z, inv_r);
B->F_002 += A.M_000 * D_102(r_x, r_y, r_z, inv_r);
B->F_110 += A.M_000 * D_210(r_x, r_y, r_z, inv_r);
B->F_101 += A.M_000 * D_201(r_x, r_y, r_z, inv_r);
B->F_011 += A.M_000 * D_111(r_x, r_y, r_z, inv_r);
#endif
}
static inline void computeFieldTensors_y(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B)
static inline void computeFieldTensors_y(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor* B)
{
B.F_000 += A.M_000 * D_010(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_000 * D_010(r_x, r_y, r_z, inv_r);
#ifdef QUADRUPOLES
B.F_000 += A.M_200 * D_210(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_020 * D_030(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_002 * D_012(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_110 * D_120(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_011 * D_021(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_101 * D_111(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_200 * D_210(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_020 * D_030(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_002 * D_012(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_110 * D_120(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_011 * D_021(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_101 * D_111(r_x, r_y, r_z, inv_r);
#endif
B.F_100 += A.M_000 * D_110(r_x, r_y, r_z, inv_r);
B.F_010 += A.M_000 * D_020(r_x, r_y, r_z, inv_r);
B.F_001 += A.M_000 * D_011(r_x, r_y, r_z, inv_r);
B->F_100 += A.M_000 * D_110(r_x, r_y, r_z, inv_r);
B->F_010 += A.M_000 * D_020(r_x, r_y, r_z, inv_r);
B->F_001 += A.M_000 * D_011(r_x, r_y, r_z, inv_r);
#ifdef QUADRUPOLES
B.F_200 += A.M_000 * D_210(r_x, r_y, r_z, inv_r);
B.F_020 += A.M_000 * D_030(r_x, r_y, r_z, inv_r);
B.F_002 += A.M_000 * D_012(r_x, r_y, r_z, inv_r);
B.F_110 += A.M_000 * D_120(r_x, r_y, r_z, inv_r);
B.F_101 += A.M_000 * D_111(r_x, r_y, r_z, inv_r);
B.F_011 += A.M_000 * D_021(r_x, r_y, r_z, inv_r);
B->F_200 += A.M_000 * D_210(r_x, r_y, r_z, inv_r);
B->F_020 += A.M_000 * D_030(r_x, r_y, r_z, inv_r);
B->F_002 += A.M_000 * D_012(r_x, r_y, r_z, inv_r);
B->F_110 += A.M_000 * D_120(r_x, r_y, r_z, inv_r);
B->F_101 += A.M_000 * D_111(r_x, r_y, r_z, inv_r);
B->F_011 += A.M_000 * D_021(r_x, r_y, r_z, inv_r);
#endif
}
static inline void computeFieldTensors_z(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B)
static inline void computeFieldTensors_z(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor* B)
{
B.F_000 += A.M_000 * D_100(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_000 * D_001(r_x, r_y, r_z, inv_r);
#ifdef QUADRUPOLES
B.F_000 += A.M_200 * D_201(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_020 * D_021(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_002 * D_003(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_110 * D_111(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_011 * D_012(r_x, r_y, r_z, inv_r);
B.F_000 += A.M_101 * D_102(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_200 * D_201(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_020 * D_021(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_002 * D_003(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_110 * D_111(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_011 * D_012(r_x, r_y, r_z, inv_r);
B->F_000 += A.M_101 * D_102(r_x, r_y, r_z, inv_r);
#endif
B.F_100 += A.M_000 * D_101(r_x, r_y, r_z, inv_r);
B.F_010 += A.M_000 * D_011(r_x, r_y, r_z, inv_r);
B.F_001 += A.M_000 * D_002(r_x, r_y, r_z, inv_r);
B->F_100 += A.M_000 * D_101(r_x, r_y, r_z, inv_r);
B->F_010 += A.M_000 * D_011(r_x, r_y, r_z, inv_r);
B->F_001 += A.M_000 * D_002(r_x, r_y, r_z, inv_r);
#ifdef QUADRUPOLES
B->F_200 += A.M_000 * D_201(r_x, r_y, r_z, inv_r);
B->F_020 += A.M_000 * D_021(r_x, r_y, r_z, inv_r);
B->F_002 += A.M_000 * D_003(r_x, r_y, r_z, inv_r);
B->F_110 += A.M_000 * D_111(r_x, r_y, r_z, inv_r);
B->F_101 += A.M_000 * D_102(r_x, r_y, r_z, inv_r);
B->F_011 += A.M_000 * D_012(r_x, r_y, r_z, inv_r);
#endif
}
static inline void shiftAndAddTensor(struct fieldTensor A, struct fieldTensor* B, double dx, double dy, double dz)
{
B->F_000 += A.F_000;
B->F_000 += dx * A.F_100;
B->F_000 += dy * A.F_010;
B->F_000 += dz * A.F_001;
#ifdef QUADRUPOLES
B->F_000 += 0.5f * dx * dx * A.F_200;
B->F_000 += 0.5f * dy * dy * A.F_020;
B->F_000 += 0.5f * dz * dz * A.F_002;
B->F_000 += 0.5f * dx * dy * A.F_110;
B->F_000 += 0.5f * dx * dz * A.F_101;
B->F_000 += 0.5f * dy * dz * A.F_011;
#endif
B->F_100 += A.F_100;
B->F_010 += A.F_010;
B->F_001 += A.F_001;
#ifdef QUADRUPOLES
B->F_100 += dx * A.F_200 + dy * A.F_110 + dz * A.F_101;
B->F_010 += dx * A.F_110 + dy * A.F_020 + dz * A.F_011;
B->F_001 += dx * A.F_101 + dy * A.F_011 + dz * A.F_002;
B->F_200 += A.F_200;
B->F_020 += A.F_020;
B->F_002 += A.F_002;
B->F_110 += A.F_110;
B->F_101 += A.F_101;
B->F_011 += A.F_011;
#endif
}
static inline float applyFieldAcceleration(struct fieldTensor B, double dx, double dy, double dz)
{
float a = 0.f;
a += B.F_000;
//a += dx * B.F_100;
//a += dy * B.F_010;
//a += dz * B.F_001;
#ifdef QUADRUPOLES
B.F_200 += A.M_000 * D_201(r_x, r_y, r_z, inv_r);
B.F_020 += A.M_000 * D_021(r_x, r_y, r_z, inv_r);
B.F_002 += A.M_000 * D_003(r_x, r_y, r_z, inv_r);
B.F_110 += A.M_000 * D_111(r_x, r_y, r_z, inv_r);
B.F_101 += A.M_000 * D_102(r_x, r_y, r_z, inv_r);
B.F_011 += A.M_000 * D_012(r_x, r_y, r_z, inv_r);
a += 0.5f * dx * dx * B.F_200;
a += 0.5f * dy * dy * B.F_020;
a += 0.5f * dz * dz * B.F_002;
a += 0.5f * dx * dy * B.F_110;
a += 0.5f * dx * dz * B.F_101;
a += 0.5f * dy * dz * B.F_011;
#endif
return a;
}
......@@ -37,7 +37,7 @@
/* Some local constants. */
#define cell_pool_grow 1000
#define cell_maxparts 100
#define cell_maxparts 1
#define task_limit 1
#define const_G 1 // 6.6738e-8
#define dist_min 0.5 /* Used for legacy walk only */
......@@ -47,7 +47,7 @@
#define SANITY_CHECKS
#define COM_AS_TASK
#define COUNTERS
#define QUADRUPOLES
#define NO_QUADRUPOLES
#include "multipoles.h"
......@@ -55,11 +55,11 @@
/** Data structure for the particles. */
struct part {
double x[3];
// union {
//union {
float a[3];
float a_legacy[3];
float a_exact[3];
// };
//};
float mass;
int id;
};// __attribute__((aligned(32)));
......@@ -252,31 +252,55 @@ void comp_down(struct cell *c) {
int k, count = c->count;
struct cell *cp;
//struct part *parts = c->parts;
double dx=0.f, dy=0.f, dz=0.f;
struct part *parts = c->parts;
// message("h=%f a= %f %f %f", c->h, c->a[0], c->a[1], c->a[2]);
if (c->split) {
//message("first= %p, sibling= %p", c->firstchild, c->sibling);
//message("Downpass: %f", c->field_x.F_000);
/* Loop over the siblings and propagate accelerations. */
for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
/* cp->a[0] += c->a[0]; */
/* cp->a[1] += c->a[1]; */
/* cp->a[2] += c->a[2]; */
/* Distance over which to shift */
dx = cp->new.M_100 - c->new.M_100;
dy = cp->new.M_100 - c->new.M_100;
dz = cp->new.M_100 - c->new.M_100;
/* Propagate the tensors downwards */
shiftAndAddTensor(c->field_x, &cp->field_x, dx, dy, dz);
shiftAndAddTensor(c->field_y, &cp->field_y, dx, dy, dz);
shiftAndAddTensor(c->field_z, &cp->field_z, dx, dy, dz);
/* Recurse */
//comp_down(cp);
comp_down(cp);
}
} else {
/* Otherwise, propagate the accelerations to the siblings. */
for (k = 0; k < count; k++) {
/* parts[k].a[0] += c->a[0]; */
/* parts[k].a[1] += c->a[1]; */
/* parts[k].a[2] += c->a[2]; */
dx = parts[k].x[0] - c->new.M_100;
dy = parts[k].x[1] - c->new.M_010;
dz = parts[k].x[2] - c->new.M_001;
float ax, ay, az;
ax = const_G * applyFieldAcceleration(c->field_x, dx, dy, dz);
ay = const_G * applyFieldAcceleration(c->field_y, dx, dy, dz);
az = const_G * applyFieldAcceleration(c->field_z, dx, dy, dz);
//message("Apply to particle: %f %f", ax, c->field_x.F_000);
parts[k].a[0] += ax;
parts[k].a[1] += ay;
parts[k].a[2] += az;
}
}
......@@ -320,9 +344,9 @@ void cell_split(struct cell *c, struct qsched *s) {
sizeof(struct cell *), 1);
#endif
c->down_tid = qsched_addtask(s, task_type_down, task_flag_none, &c,
sizeof(struct cell *), 1);
qsched_addlock(s, c->down_tid, c->res);
//c->down_tid = qsched_addtask(s, task_type_down, task_flag_none, &c,
// sizeof(struct cell *), 1);
//qsched_addlock(s, c->down_tid, c->res);
}
/* Does this cell need to be split? */
......@@ -447,11 +471,11 @@ void cell_split(struct cell *c, struct qsched *s) {
#endif
/* Link the downward tasks. */
#ifdef COM_AS_TASK
for (k = 0; k < 8; k++)
if (progenitors[k]->count > 0)
qsched_addunlock(s, c->down_tid, progenitors[k]->down_tid);
#endif
//#ifdef COM_AS_TASK
//for (k = 0; k < 8; k++)
// if (progenitors[k]->count > 0)
// qsched_addunlock(s, c->down_tid, progenitors[k]->down_tid);
//#endif
} /* does the cell need to be split? */
......@@ -528,21 +552,28 @@ static inline void iact_pair_pc(struct cell *ci, struct cell *cj) {
if ( ! are_neighbours(cp, cps) ) {
/* Distance between cells */
r_x = ci->new.M_100 - cj->new.M_100;
r_y = ci->new.M_010 - cj->new.M_010;
r_z = ci->new.M_001 - cj->new.M_001;
r_x = cp->new.M_100 - cps->new.M_100;
r_y = cp->new.M_010 - cps->new.M_010;
r_z = cp->new.M_001 - cps->new.M_001;
r2 = r_x * r_x + r_y * r_y + r_z * r_z;
inv_r = 1. / sqrt( r2 );
/* Compute the field tensors */
computeFieldTensors_x( r_x, r_y, r_z, inv_r, cj->new, ci->field_x );
computeFieldTensors_y( r_x, r_y, r_z, inv_r, cj->new, ci->field_y );
computeFieldTensors_z( r_x, r_y, r_z, inv_r, cj->new, ci->field_z );
computeFieldTensors_x( r_x, r_y, r_z, inv_r, cps->new, &cp->field_x );
computeFieldTensors_y( r_x, r_y, r_z, inv_r, cps->new, &cp->field_y );
computeFieldTensors_z( r_x, r_y, r_z, inv_r, cps->new, &cp->field_z );
computeFieldTensors_x( -r_x, -r_y, -r_z, inv_r, cp->new, &cps->field_x );
computeFieldTensors_y( -r_x, -r_y, -r_z, inv_r, cp->new, &cps->field_y );
computeFieldTensors_z( -r_x, -r_y, -r_z, inv_r, cp->new, &cps->field_z );
computeFieldTensors_x( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_x );
computeFieldTensors_y( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_y );
computeFieldTensors_z( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_z );
/* message("r= %f %f %f %f M_000= %f %f F_000= %f", r_x, r_y, r_z, sqrt(r2), */
/* cps->new.M_000, */
/* cps->new.M_000 * D_100( r_x, r_y, r_z, inv_r), */
/* cp->field_x.F_000); */
}
}
......@@ -1229,17 +1260,37 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) {
/* If no input file was specified, generate random particle positions. */
if (fileName == NULL || fileName[0] == 0) {
for (k = 0; k < N; k++) {
parts[k].id = k;
parts[k].x[0] = ((double)rand()) / RAND_MAX;
parts[k].x[1] = ((double)rand()) / RAND_MAX;
parts[k].x[2] = ((double)rand()) / RAND_MAX;
parts[k].mass = ((double)rand()) / RAND_MAX;
parts[k].a_legacy[0] = 0.0;
parts[k].a_legacy[1] = 0.0;
parts[k].a_legacy[2] = 0.0;
}
/* for (k = 0; k < N; k++) { */
/* parts[k].id = k; */
/* parts[k].x[0] = ((double)rand()) / RAND_MAX; */
/* parts[k].x[1] = ((double)rand()) / RAND_MAX; */
/* parts[k].x[2] = ((double)rand()) / RAND_MAX; */
/* parts[k].mass = 1.f;//((double)rand()) / RAND_MAX; */
/* parts[k].a_legacy[0] = 0.0; */
/* parts[k].a_legacy[1] = 0.0; */
/* parts[k].a_legacy[2] = 0.0; */
/* } */
int ii, jj, kk;
for (ii = 0; ii < 8; ++ii) {
for (jj = 0; jj < 8; ++jj) {
for (kk = 0; kk < 8; ++kk) {
int index = 64*ii + 8*jj + kk;
parts[index].id = index;
parts[index].x[0] = ii*0.125 + 0.0625;
parts[index].x[1] = jj*0.125 + 0.0625;
parts[index].x[2] = kk*0.125 + 0.0625;
parts[index].mass = 1.;
parts[index].a_legacy[0] = 0.f;
parts[index].a_legacy[1] = 0.f;
parts[index].a_legacy[2] = 0.f;
}
}
}
/* Otherwise, read them from a file. */
} else {
file = fopen(fileName, "r");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment