diff --git a/examples/multipoles.h b/examples/multipoles.h index 0209ad6022c702a4b55b473b915f131c18e59826..fa3594352b072ffe3052e6879acd6753262df0ba 100644 --- a/examples/multipoles.h +++ b/examples/multipoles.h @@ -22,6 +22,7 @@ struct multipole { struct fieldTensor { + #ifdef QUADRUPOLES float F_200; float F_020; @@ -39,12 +40,16 @@ struct fieldTensor { static inline void initFieldTensor(struct fieldTensor f) { + f.F_000 = 0.f; + f.F_100 = f.F_010 = f.F_001 = 0.f; + #ifdef QUADRUPOLES f.F_200 = f.F_020 = f.F_002 = 0.f; f.F_110 = f.F_101 = f.F_011 = 0.f; #endif + } @@ -55,12 +60,26 @@ static inline void initFieldTensor(struct fieldTensor f) { ******************************/ /* 0-th order */ -static inline double D_000(double r_x, double r_y, double r_z, double inv_r) { return inv_r; } // phi(r) +static inline double D_000(double r_x, double r_y, double r_z, double inv_r) { // phi(r) + + return inv_r; +} /* 1-st order */ -static inline double D_100(double r_x, double r_y, double r_z, double inv_r) { return -r_x * inv_r * inv_r * inv_r; } // d/dx phi(r) -static inline double D_010(double r_x, double r_y, double r_z, double inv_r) { return -r_y * inv_r * inv_r * inv_r; } // d/dy phi(r) -static inline double D_001(double r_x, double r_y, double r_z, double inv_r) { return -r_z * inv_r * inv_r * inv_r; } // d/dz phi(r) +static inline double D_100(double r_x, double r_y, double r_z, double inv_r) { // d/dx phi(r) + + return -r_x * inv_r * inv_r * inv_r; +} + +static inline double D_010(double r_x, double r_y, double r_z, double inv_r) { // d/dy phi(r) + + return -r_y * inv_r * inv_r * inv_r; +} + +static inline double D_001(double r_x, double r_y, double r_z, double inv_r) { // d/dz phi(r) + + return -r_z * inv_r * inv_r * inv_r; +} /* 2-nd order */ static inline double D_200(double r_x, double r_y, double r_z, double inv_r) { // d2/dxdx phi(r) @@ -70,6 +89,7 @@ static inline double D_200(double r_x, double r_y, double r_z, double inv_r) { double inv_r5 = inv_r3 * inv_r2; return 3. * r_x * r_x * inv_r5 - inv_r3; } + static inline double D_020(double r_x, double r_y, double r_z, double inv_r) { // d2/dydy phi(r) double inv_r2 = inv_r * inv_r; @@ -77,6 +97,7 @@ static inline double D_020(double r_x, double r_y, double r_z, double inv_r) { double inv_r5 = inv_r3 * inv_r2; return 3. * r_y * r_y * inv_r5 - inv_r3; } + static inline double D_002(double r_x, double r_y, double r_z, double inv_r) { // d2/dzdz phi(r) double inv_r2 = inv_r * inv_r; @@ -195,89 +216,151 @@ static inline double D_111(double r_x, double r_y, double r_z, double inv_r) { ************************************************/ -static inline void computeFieldTensors_x(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B) +static inline void computeFieldTensors_x(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor* B) { - B.F_000 += A.M_000 * D_100(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_000 * D_100(r_x, r_y, r_z, inv_r); #ifdef QUADRUPOLES - B.F_000 += A.M_200 * D_300(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_020 * D_120(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_002 * D_102(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_110 * D_210(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_011 * D_111(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_101 * D_201(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_200 * D_300(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_020 * D_120(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_002 * D_102(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_110 * D_210(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_011 * D_111(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_101 * D_201(r_x, r_y, r_z, inv_r); #endif - B.F_100 += A.M_000 * D_200(r_x, r_y, r_z, inv_r); - B.F_010 += A.M_000 * D_110(r_x, r_y, r_z, inv_r); - B.F_001 += A.M_000 * D_101(r_x, r_y, r_z, inv_r); + B->F_100 += A.M_000 * D_200(r_x, r_y, r_z, inv_r); + B->F_010 += A.M_000 * D_110(r_x, r_y, r_z, inv_r); + B->F_001 += A.M_000 * D_101(r_x, r_y, r_z, inv_r); #ifdef QUADRUPOLES - B.F_200 += A.M_000 * D_300(r_x, r_y, r_z, inv_r); - B.F_020 += A.M_000 * D_120(r_x, r_y, r_z, inv_r); - B.F_002 += A.M_000 * D_102(r_x, r_y, r_z, inv_r); - B.F_110 += A.M_000 * D_210(r_x, r_y, r_z, inv_r); - B.F_101 += A.M_000 * D_201(r_x, r_y, r_z, inv_r); - B.F_011 += A.M_000 * D_111(r_x, r_y, r_z, inv_r); + B->F_200 += A.M_000 * D_300(r_x, r_y, r_z, inv_r); + B->F_020 += A.M_000 * D_120(r_x, r_y, r_z, inv_r); + B->F_002 += A.M_000 * D_102(r_x, r_y, r_z, inv_r); + B->F_110 += A.M_000 * D_210(r_x, r_y, r_z, inv_r); + B->F_101 += A.M_000 * D_201(r_x, r_y, r_z, inv_r); + B->F_011 += A.M_000 * D_111(r_x, r_y, r_z, inv_r); #endif } -static inline void computeFieldTensors_y(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B) +static inline void computeFieldTensors_y(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor* B) { - B.F_000 += A.M_000 * D_010(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_000 * D_010(r_x, r_y, r_z, inv_r); #ifdef QUADRUPOLES - B.F_000 += A.M_200 * D_210(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_020 * D_030(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_002 * D_012(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_110 * D_120(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_011 * D_021(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_101 * D_111(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_200 * D_210(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_020 * D_030(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_002 * D_012(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_110 * D_120(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_011 * D_021(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_101 * D_111(r_x, r_y, r_z, inv_r); #endif - B.F_100 += A.M_000 * D_110(r_x, r_y, r_z, inv_r); - B.F_010 += A.M_000 * D_020(r_x, r_y, r_z, inv_r); - B.F_001 += A.M_000 * D_011(r_x, r_y, r_z, inv_r); + B->F_100 += A.M_000 * D_110(r_x, r_y, r_z, inv_r); + B->F_010 += A.M_000 * D_020(r_x, r_y, r_z, inv_r); + B->F_001 += A.M_000 * D_011(r_x, r_y, r_z, inv_r); #ifdef QUADRUPOLES - B.F_200 += A.M_000 * D_210(r_x, r_y, r_z, inv_r); - B.F_020 += A.M_000 * D_030(r_x, r_y, r_z, inv_r); - B.F_002 += A.M_000 * D_012(r_x, r_y, r_z, inv_r); - B.F_110 += A.M_000 * D_120(r_x, r_y, r_z, inv_r); - B.F_101 += A.M_000 * D_111(r_x, r_y, r_z, inv_r); - B.F_011 += A.M_000 * D_021(r_x, r_y, r_z, inv_r); + B->F_200 += A.M_000 * D_210(r_x, r_y, r_z, inv_r); + B->F_020 += A.M_000 * D_030(r_x, r_y, r_z, inv_r); + B->F_002 += A.M_000 * D_012(r_x, r_y, r_z, inv_r); + B->F_110 += A.M_000 * D_120(r_x, r_y, r_z, inv_r); + B->F_101 += A.M_000 * D_111(r_x, r_y, r_z, inv_r); + B->F_011 += A.M_000 * D_021(r_x, r_y, r_z, inv_r); #endif } -static inline void computeFieldTensors_z(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor B) +static inline void computeFieldTensors_z(double r_x, double r_y, double r_z, double inv_r, struct multipole A, struct fieldTensor* B) { - B.F_000 += A.M_000 * D_100(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_000 * D_001(r_x, r_y, r_z, inv_r); #ifdef QUADRUPOLES - B.F_000 += A.M_200 * D_201(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_020 * D_021(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_002 * D_003(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_110 * D_111(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_011 * D_012(r_x, r_y, r_z, inv_r); - B.F_000 += A.M_101 * D_102(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_200 * D_201(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_020 * D_021(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_002 * D_003(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_110 * D_111(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_011 * D_012(r_x, r_y, r_z, inv_r); + B->F_000 += A.M_101 * D_102(r_x, r_y, r_z, inv_r); #endif - B.F_100 += A.M_000 * D_101(r_x, r_y, r_z, inv_r); - B.F_010 += A.M_000 * D_011(r_x, r_y, r_z, inv_r); - B.F_001 += A.M_000 * D_002(r_x, r_y, r_z, inv_r); + B->F_100 += A.M_000 * D_101(r_x, r_y, r_z, inv_r); + B->F_010 += A.M_000 * D_011(r_x, r_y, r_z, inv_r); + B->F_001 += A.M_000 * D_002(r_x, r_y, r_z, inv_r); + +#ifdef QUADRUPOLES + B->F_200 += A.M_000 * D_201(r_x, r_y, r_z, inv_r); + B->F_020 += A.M_000 * D_021(r_x, r_y, r_z, inv_r); + B->F_002 += A.M_000 * D_003(r_x, r_y, r_z, inv_r); + B->F_110 += A.M_000 * D_111(r_x, r_y, r_z, inv_r); + B->F_101 += A.M_000 * D_102(r_x, r_y, r_z, inv_r); + B->F_011 += A.M_000 * D_012(r_x, r_y, r_z, inv_r); +#endif + +} + + + +static inline void shiftAndAddTensor(struct fieldTensor A, struct fieldTensor* B, double dx, double dy, double dz) +{ + B->F_000 += A.F_000; + B->F_000 += dx * A.F_100; + B->F_000 += dy * A.F_010; + B->F_000 += dz * A.F_001; + +#ifdef QUADRUPOLES + B->F_000 += 0.5f * dx * dx * A.F_200; + B->F_000 += 0.5f * dy * dy * A.F_020; + B->F_000 += 0.5f * dz * dz * A.F_002; + B->F_000 += 0.5f * dx * dy * A.F_110; + B->F_000 += 0.5f * dx * dz * A.F_101; + B->F_000 += 0.5f * dy * dz * A.F_011; +#endif + + B->F_100 += A.F_100; + B->F_010 += A.F_010; + B->F_001 += A.F_001; + +#ifdef QUADRUPOLES + B->F_100 += dx * A.F_200 + dy * A.F_110 + dz * A.F_101; + B->F_010 += dx * A.F_110 + dy * A.F_020 + dz * A.F_011; + B->F_001 += dx * A.F_101 + dy * A.F_011 + dz * A.F_002; + + B->F_200 += A.F_200; + B->F_020 += A.F_020; + B->F_002 += A.F_002; + B->F_110 += A.F_110; + B->F_101 += A.F_101; + B->F_011 += A.F_011; +#endif +} + + + + +static inline float applyFieldAcceleration(struct fieldTensor B, double dx, double dy, double dz) +{ + float a = 0.f; + + a += B.F_000; + + //a += dx * B.F_100; + //a += dy * B.F_010; + //a += dz * B.F_001; + #ifdef QUADRUPOLES - B.F_200 += A.M_000 * D_201(r_x, r_y, r_z, inv_r); - B.F_020 += A.M_000 * D_021(r_x, r_y, r_z, inv_r); - B.F_002 += A.M_000 * D_003(r_x, r_y, r_z, inv_r); - B.F_110 += A.M_000 * D_111(r_x, r_y, r_z, inv_r); - B.F_101 += A.M_000 * D_102(r_x, r_y, r_z, inv_r); - B.F_011 += A.M_000 * D_012(r_x, r_y, r_z, inv_r); + a += 0.5f * dx * dx * B.F_200; + a += 0.5f * dy * dy * B.F_020; + a += 0.5f * dz * dz * B.F_002; + a += 0.5f * dx * dy * B.F_110; + a += 0.5f * dx * dz * B.F_101; + a += 0.5f * dy * dz * B.F_011; #endif + return a; } diff --git a/examples/test_fmm.c b/examples/test_fmm.c index 3243c0be980323b2c5e2e0d45a148c5345774f79..5a0fb2cda47d17dfaf254b7fcac11ac56c4fc258 100644 --- a/examples/test_fmm.c +++ b/examples/test_fmm.c @@ -37,7 +37,7 @@ /* Some local constants. */ #define cell_pool_grow 1000 -#define cell_maxparts 100 +#define cell_maxparts 1 #define task_limit 1 #define const_G 1 // 6.6738e-8 #define dist_min 0.5 /* Used for legacy walk only */ @@ -47,7 +47,7 @@ #define SANITY_CHECKS #define COM_AS_TASK #define COUNTERS -#define QUADRUPOLES +#define NO_QUADRUPOLES #include "multipoles.h" @@ -55,11 +55,11 @@ /** Data structure for the particles. */ struct part { double x[3]; - // union { + //union { float a[3]; float a_legacy[3]; float a_exact[3]; - // }; + //}; float mass; int id; };// __attribute__((aligned(32))); @@ -252,31 +252,55 @@ void comp_down(struct cell *c) { int k, count = c->count; struct cell *cp; - //struct part *parts = c->parts; + double dx=0.f, dy=0.f, dz=0.f; + struct part *parts = c->parts; - // message("h=%f a= %f %f %f", c->h, c->a[0], c->a[1], c->a[2]); if (c->split) { - //message("first= %p, sibling= %p", c->firstchild, c->sibling); + //message("Downpass: %f", c->field_x.F_000); + /* Loop over the siblings and propagate accelerations. */ for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { - /* cp->a[0] += c->a[0]; */ - /* cp->a[1] += c->a[1]; */ - /* cp->a[2] += c->a[2]; */ + + /* Distance over which to shift */ + dx = cp->new.M_100 - c->new.M_100; + dy = cp->new.M_100 - c->new.M_100; + dz = cp->new.M_100 - c->new.M_100; + + /* Propagate the tensors downwards */ + shiftAndAddTensor(c->field_x, &cp->field_x, dx, dy, dz); + shiftAndAddTensor(c->field_y, &cp->field_y, dx, dy, dz); + shiftAndAddTensor(c->field_z, &cp->field_z, dx, dy, dz); + /* Recurse */ - //comp_down(cp); + comp_down(cp); } } else { /* Otherwise, propagate the accelerations to the siblings. */ for (k = 0; k < count; k++) { - /* parts[k].a[0] += c->a[0]; */ - /* parts[k].a[1] += c->a[1]; */ - /* parts[k].a[2] += c->a[2]; */ + + dx = parts[k].x[0] - c->new.M_100; + dy = parts[k].x[1] - c->new.M_010; + dz = parts[k].x[2] - c->new.M_001; + + float ax, ay, az; + + ax = const_G * applyFieldAcceleration(c->field_x, dx, dy, dz); + ay = const_G * applyFieldAcceleration(c->field_y, dx, dy, dz); + az = const_G * applyFieldAcceleration(c->field_z, dx, dy, dz); + + + + //message("Apply to particle: %f %f", ax, c->field_x.F_000); + + parts[k].a[0] += ax; + parts[k].a[1] += ay; + parts[k].a[2] += az; } } @@ -320,9 +344,9 @@ void cell_split(struct cell *c, struct qsched *s) { sizeof(struct cell *), 1); #endif - c->down_tid = qsched_addtask(s, task_type_down, task_flag_none, &c, - sizeof(struct cell *), 1); - qsched_addlock(s, c->down_tid, c->res); + //c->down_tid = qsched_addtask(s, task_type_down, task_flag_none, &c, + // sizeof(struct cell *), 1); + //qsched_addlock(s, c->down_tid, c->res); } /* Does this cell need to be split? */ @@ -447,11 +471,11 @@ void cell_split(struct cell *c, struct qsched *s) { #endif /* Link the downward tasks. */ -#ifdef COM_AS_TASK - for (k = 0; k < 8; k++) - if (progenitors[k]->count > 0) - qsched_addunlock(s, c->down_tid, progenitors[k]->down_tid); -#endif + //#ifdef COM_AS_TASK + //for (k = 0; k < 8; k++) + // if (progenitors[k]->count > 0) + // qsched_addunlock(s, c->down_tid, progenitors[k]->down_tid); + //#endif } /* does the cell need to be split? */ @@ -528,21 +552,28 @@ static inline void iact_pair_pc(struct cell *ci, struct cell *cj) { if ( ! are_neighbours(cp, cps) ) { /* Distance between cells */ - r_x = ci->new.M_100 - cj->new.M_100; - r_y = ci->new.M_010 - cj->new.M_010; - r_z = ci->new.M_001 - cj->new.M_001; + r_x = cp->new.M_100 - cps->new.M_100; + r_y = cp->new.M_010 - cps->new.M_010; + r_z = cp->new.M_001 - cps->new.M_001; r2 = r_x * r_x + r_y * r_y + r_z * r_z; inv_r = 1. / sqrt( r2 ); + + /* Compute the field tensors */ - computeFieldTensors_x( r_x, r_y, r_z, inv_r, cj->new, ci->field_x ); - computeFieldTensors_y( r_x, r_y, r_z, inv_r, cj->new, ci->field_y ); - computeFieldTensors_z( r_x, r_y, r_z, inv_r, cj->new, ci->field_z ); + computeFieldTensors_x( r_x, r_y, r_z, inv_r, cps->new, &cp->field_x ); + computeFieldTensors_y( r_x, r_y, r_z, inv_r, cps->new, &cp->field_y ); + computeFieldTensors_z( r_x, r_y, r_z, inv_r, cps->new, &cp->field_z ); + + computeFieldTensors_x( -r_x, -r_y, -r_z, inv_r, cp->new, &cps->field_x ); + computeFieldTensors_y( -r_x, -r_y, -r_z, inv_r, cp->new, &cps->field_y ); + computeFieldTensors_z( -r_x, -r_y, -r_z, inv_r, cp->new, &cps->field_z ); - computeFieldTensors_x( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_x ); - computeFieldTensors_y( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_y ); - computeFieldTensors_z( -r_x, -r_y, -r_z, inv_r, ci->new, cj->field_z ); + /* message("r= %f %f %f %f M_000= %f %f F_000= %f", r_x, r_y, r_z, sqrt(r2), */ + /* cps->new.M_000, */ + /* cps->new.M_000 * D_100( r_x, r_y, r_z, inv_r), */ + /* cp->field_x.F_000); */ } } @@ -1229,17 +1260,37 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { /* If no input file was specified, generate random particle positions. */ if (fileName == NULL || fileName[0] == 0) { - for (k = 0; k < N; k++) { - parts[k].id = k; - parts[k].x[0] = ((double)rand()) / RAND_MAX; - parts[k].x[1] = ((double)rand()) / RAND_MAX; - parts[k].x[2] = ((double)rand()) / RAND_MAX; - parts[k].mass = ((double)rand()) / RAND_MAX; - parts[k].a_legacy[0] = 0.0; - parts[k].a_legacy[1] = 0.0; - parts[k].a_legacy[2] = 0.0; - } + /* for (k = 0; k < N; k++) { */ + /* parts[k].id = k; */ + /* parts[k].x[0] = ((double)rand()) / RAND_MAX; */ + /* parts[k].x[1] = ((double)rand()) / RAND_MAX; */ + /* parts[k].x[2] = ((double)rand()) / RAND_MAX; */ + /* parts[k].mass = 1.f;//((double)rand()) / RAND_MAX; */ + /* parts[k].a_legacy[0] = 0.0; */ + /* parts[k].a_legacy[1] = 0.0; */ + /* parts[k].a_legacy[2] = 0.0; */ + /* } */ + + int ii, jj, kk; + for (ii = 0; ii < 8; ++ii) { + for (jj = 0; jj < 8; ++jj) { + for (kk = 0; kk < 8; ++kk) { + + int index = 64*ii + 8*jj + kk; + + parts[index].id = index; + parts[index].x[0] = ii*0.125 + 0.0625; + parts[index].x[1] = jj*0.125 + 0.0625; + parts[index].x[2] = kk*0.125 + 0.0625; + parts[index].mass = 1.; + parts[index].a_legacy[0] = 0.f; + parts[index].a_legacy[1] = 0.f; + parts[index].a_legacy[2] = 0.f; + } + } + } + /* Otherwise, read them from a file. */ } else { file = fopen(fileName, "r");