diff --git a/examples/test_fmm.c b/examples/test_fmm.c index 03ea2c5f93bb24a23915cac365bfa642cb870154..a897a8cd8c11e82579e4b73254702fff34c15cb6 100644 --- a/examples/test_fmm.c +++ b/examples/test_fmm.c @@ -58,7 +58,8 @@ struct part { // }; float mass; int id; -}; // __attribute__((aligned(32))); +};// __attribute__((aligned(32))); + struct part_local { float x[3]; @@ -94,10 +95,12 @@ struct cell { struct multipole new; }; - int res, com_tid; + double a[3]; + + int res, com_tid, down_tid; struct index *indices; -} __attribute__((aligned(128))); +} __attribute__((aligned(32))); /** Task types. */ enum task_type { @@ -105,6 +108,7 @@ enum task_type { task_type_pair, task_type_pair_pc, task_type_com, + task_type_down, task_type_count }; @@ -203,8 +207,55 @@ void comp_com(struct cell *c) { c->new.com[2] = 0.0; c->new.mass = 0.0; } + + /* Reset COM acceleration */ + c->a[0] = 0.; + c->a[1] = 0.; + c->a[2] = 0.; } + + +/** + * @brief Add a cell's acceleration to all its siblings + * + * @param c The #cell. + */ +void comp_down(struct cell *c) { + + int k, count = c->count; + struct cell *cp; + struct part *parts = c->parts; + + // message("h=%f a= %f %f %f", c->h, c->a[0], c->a[1], c->a[2]); + + + if (c->split) { + + //message("first= %p, sibling= %p", c->firstchild, c->sibling); + /* Loop over the siblings and propagate accelerations. */ + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + cp->a[0] += c->a[0]; + cp->a[1] += c->a[1]; + cp->a[2] += c->a[2]; + + /* Recurse */ + //comp_down(cp); + } + + } else { + + /* Otherwise, propagate the accelerations to the siblings. */ + for (k = 0; k < count; k++) { + parts[k].a[0] += c->a[0]; + parts[k].a[1] += c->a[1]; + parts[k].a[2] += c->a[2]; + } + } + +} + + /** * @brief Sort the parts into eight bins along the given pivots and * fill the multipoles. Also adds the hierarchical resources @@ -234,13 +285,19 @@ void cell_split(struct cell *c, struct qsched *s) { if (c->res == qsched_res_none) c->res = qsched_addres(s, qsched_owner_none, qsched_res_none); -/* Attach a center-of-mass task to the cell. */ +/* Attach a center-of-mass and downward pass task to the cell. */ + if (count > 0) { + #ifdef COM_AS_TASK - if (count > 0) c->com_tid = qsched_addtask(s, task_type_com, task_flag_none, &c, sizeof(struct cell *), 1); #endif + c->down_tid = qsched_addtask(s, task_type_down, task_flag_none, &c, + sizeof(struct cell *), 1); + qsched_addlock(s, c->down_tid, c->res); + } + /* Does this cell need to be split? */ if (count > cell_maxparts) { @@ -355,13 +412,21 @@ void cell_split(struct cell *c, struct qsched *s) { for (k = 0; k < 8; k++) if (progenitors[k]->count > 0) cell_split(progenitors[k], s); -/* Link the COM tasks. */ + /* Link the COM tasks. */ #ifdef COM_AS_TASK for (k = 0; k < 8; k++) if (progenitors[k]->count > 0) qsched_addunlock(s, progenitors[k]->com_tid, c->com_tid); #endif + /* Link the downward tasks. */ +#ifdef COM_AS_TASK + for (k = 0; k < 8; k++) + if (progenitors[k]->count > 0) + qsched_addunlock(s, c->down_tid, progenitors[k]->down_tid); +#endif + + } /* does the cell need to be split? */ @@ -408,58 +473,58 @@ static inline int are_neighbours(struct cell *ci, struct cell *cj) { -/** - * @brief Interacts all count particles in parts - * with the monopole in cj - * - * @param parts An array of particles - * @param count The number of particles in the array - * @param cj The cell with which the particles will interact - */ -static inline void make_interact_pc(struct part *parts, int count, struct cell *cj) { +/* /\** */ +/* * @brief Interacts all count particles in parts */ +/* * with the monopole in cj */ +/* * */ +/* * @param parts An array of particles */ +/* * @param count The number of particles in the array */ +/* * @param cj The cell with which the particles will interact */ +/* *\/ */ +/* static inline void make_interact_pc(struct part *parts, int count, struct cell *cj) { */ - int j, k; - float com[3] = {0.0, 0.0, 0.0}; - float mcom, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; +/* int j, k; */ +/* float com[3] = {0.0, 0.0, 0.0}; */ +/* float mcom, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; */ -#ifdef SANITY_CHECKS +/* #ifdef SANITY_CHECKS */ - /* Sanity checks */ - if (count == 0) error("Empty cell!"); +/* /\* Sanity checks *\/ */ +/* if (count == 0) error("Empty cell!"); */ - /* Sanity check. */ - if (cj->new.mass == 0.0) { - message("%e %e %e %d %p %d %p", cj->new.com[0], cj->new.com[1], - cj->new.com[2], cj->count, cj, cj->split, cj->sibling); +/* /\* Sanity check. *\/ */ +/* if (cj->new.mass == 0.0) { */ +/* message("%e %e %e %d %p %d %p", cj->new.com[0], cj->new.com[1], */ +/* cj->new.com[2], cj->count, cj, cj->split, cj->sibling); */ - for (j = 0; j < cj->count; ++j) - message("part %d mass=%e id=%d", j, cj->parts[j].mass, cj->parts[j].id); +/* for (j = 0; j < cj->count; ++j) */ +/* message("part %d mass=%e id=%d", j, cj->parts[j].mass, cj->parts[j].id); */ - error("com does not seem to have been set."); - } +/* error("com does not seem to have been set."); */ +/* } */ -#endif +/* #endif */ - /* Init the com's data. */ - for (k = 0; k < 3; k++) com[k] = cj->new.com[k]; - mcom = cj->new.mass; +/* /\* Init the com's data. *\/ */ +/* for (k = 0; k < 3; k++) com[k] = cj->new.com[k]; */ +/* mcom = cj->new.mass; */ - /* Loop over every particle in leaf. */ - for (j = 0; j < count; j++) { +/* /\* Loop over every particle in leaf. *\/ */ +/* for (j = 0; j < count; j++) { */ - /* Compute the pairwise distance. */ - for (r2 = 0.0, k = 0; k < 3; k++) { - dx[k] = com[k] - parts[j].x[k]; - r2 += dx[k] * dx[k]; - } +/* /\* Compute the pairwise distance. *\/ */ +/* for (r2 = 0.0, k = 0; k < 3; k++) { */ +/* dx[k] = com[k] - parts[j].x[k]; */ +/* r2 += dx[k] * dx[k]; */ +/* } */ - /* Apply the gravitational acceleration. */ - ir = 1.0f / sqrtf(r2); - w = mcom * const_G * ir * ir * ir; - for (k = 0; k < 3; k++) parts[j].a[k] += w * dx[k]; +/* /\* Apply the gravitational acceleration. *\/ */ +/* ir = 1.0f / sqrtf(r2); */ +/* w = mcom * const_G * ir * ir * ir; */ +/* for (k = 0; k < 3; k++) parts[j].a[k] += w * dx[k]; */ - } /* loop over every particle. */ -} +/* } /\* loop over every particle. *\/ */ +/* } */ @@ -474,6 +539,8 @@ static inline void make_interact_pc(struct part *parts, int count, struct cell * static inline void iact_pair_pc(struct cell *ci, struct cell *cj) { struct cell *cp, *cps; + int k; + float r2, w, ir, dx[3]; #ifdef SANITY_CHECKS if (ci->h != cj->h) @@ -489,8 +556,21 @@ static inline void iact_pair_pc(struct cell *ci, struct cell *cj) { if ( ! are_neighbours(cp, cps) ) { - make_interact_pc(cp->parts, cp->count, cps); - make_interact_pc(cps->parts, cps->count, cp ); + + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = cps->new.com[k] - cp->new.com[k]; + r2 += dx[k] * dx[k]; + } + ir = 1.0f / sqrtf(r2); + + /* Apply the gravitational acceleration. */ + w = cps->new.mass * const_G * ir * ir * ir; + for (k = 0; k < 3; k++) cp->a[k] += w * dx[k]; + + w = cp->new.mass * const_G * ir * ir * ir; + for (k = 0; k < 3; k++) cps->a[k] -= w * dx[k]; } } @@ -1155,6 +1235,9 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { case task_type_com: comp_com(d[0]); break; + case task_type_down: + comp_down(d[0]); + break; default: error("Unknown task type."); } @@ -1227,7 +1310,7 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { } message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves); -#if 1//ICHECK > 0 +#if 1 //ICHECK > 0 printf("----------------------------------------------------------\n"); /* Do a N^2 interactions calculation */ @@ -1283,12 +1366,12 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { // fclose(fileTime); - printf("task counts: [ %10s %10s %10s %10s %10s ]\n", "self", "pair", "m-poles", - "direct", "CoMs"); - printf("task counts: [ %10i %10i %10i %10i %10i ] (legacy).\n", 0, 0, - countMultipoles, countPairs, countCoMs); - printf("task counts: [ %10i %10i %10i %10i %10i ] (legacy).\n", counts[0], - counts[1], counts[2], 0, counts[3]); + printf("task counts: [ %10s %10s %10s %10s %10s %10s ]\n", "self", "pair", "m-poles", + "direct", "CoMs", "Downpass"); + printf("task counts: [ %10i %10i %10i %10i %10i %10i ] (legacy).\n", 0, 0, + countMultipoles, countPairs, countCoMs, 0); + printf("task counts: [ %10i %10i %10i %10i %10i %10i ] (new).\n", counts[0], + counts[1], counts[2], 0, counts[3], counts[4]); /* Loop over the number of runs. */ for (k = 0; k < runs; k++) { @@ -1302,6 +1385,7 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { /* Execute the tasks. */ tic = getticks(); qsched_run(&s, nr_threads, runner); + comp_down(root); toc_run = getticks(); message("%ith run took %lli (= %e) ticks...", k, toc_run - tic, (float)(toc_run - tic));