diff --git a/examples/test_bh_new.c b/examples/test_bh_new.c index 76be2f978e548a9b6246b1d182529fd32b5e276e..0256e9bb9ecb5c2b802261e84260e3cd4a0cfe16 100644 --- a/examples/test_bh_new.c +++ b/examples/test_bh_new.c @@ -38,14 +38,14 @@ /* Some local constants. */ #define cell_pool_grow 1000 #define cell_maxparts 100 -#define task_limit 1 +#define task_limit 1e8 #define const_G 1 // 6.6738e-8 #define dist_min 0.5 /* Used for legacy walk only */ #define dist_cutoff_ratio 1.5 #define ICHECK -1 #define NO_SANITY_CHECKS -#define COM_AS_TASK +#define NO_COM_AS_TASK #define NO_COUNTERS /** Data structure for the particles. */ @@ -71,6 +71,12 @@ struct multipole { float mass; }; +struct multipole_local { + float com[3]; + float mass; +}; + + /** Data structure for the BH tree cell. */ struct cell { double loc[3]; @@ -355,8 +361,8 @@ void cell_split(struct cell *c, struct qsched *s) { for (k = 0; k < 8; k++) if (progenitors[k]->count > 0) cell_split(progenitors[k], s); -/* Link the COM tasks. */ #ifdef COM_AS_TASK + /* Link the COM tasks. */ for (k = 0; k < 8; k++) if (progenitors[k]->count > 0) qsched_addunlock(s, progenitors[k]->com_tid, c->com_tid); @@ -416,49 +422,42 @@ static inline int are_neighbours(struct cell *ci, struct cell *cj) { * @param count The number of particles in the array * @param cj The cell with which the particles will interact */ -static inline void make_interact_pc(struct part_local *parts, int count, - double *loc, struct cell *cj) { +static inline void make_interact_pc(struct part_local *parts, int part_count, + struct multipole_local mults[7], int mult_count) { - int j, k; - float com[3] = {0.0, 0.0, 0.0}; - float mcom, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; + int i, j, k; + float dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; #ifdef SANITY_CHECKS /* Sanity checks */ - if (count == 0) error("Empty cell!"); + if (part_count == 0) error("Empty cell!"); /* Sanity check. */ - if (cj->new.mass == 0.0) { - message("%e %e %e %d %p %d %p", cj->new.com[0], cj->new.com[1], - cj->new.com[2], cj->count, cj, cj->split, cj->sibling); - - for (j = 0; j < cj->count; ++j) - message("part %d mass=%e id=%d", j, cj->parts[j].mass, cj->parts[j].id); - - error("com does not seem to have been set."); - } + for (j = 0; j < mult_count; j++) + if (mults[j].mass == 0.0) + error("com %d does not seem to have been set.", j); #endif - /* Init the com's data. */ - for (k = 0; k < 3; k++) com[k] = cj->new.com[k] - loc[k]; - mcom = cj->new.mass; - /* Loop over every particle in leaf. */ - for (j = 0; j < count; j++) { + for (i = 0; i < part_count; i++) { - /* Compute the pairwise distance. */ - for (r2 = 0.0, k = 0; k < 3; k++) { - dx[k] = com[k] - parts[j].x[k]; - r2 += dx[k] * dx[k]; - } + /* Loop over the multipoles */ + for (j = 0; j < mult_count; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = mults[j].com[k] - parts[i].x[k]; + r2 += dx[k] * dx[k]; + } - /* Apply the gravitational acceleration. */ - ir = 1.0f / sqrtf(r2); - w = mcom * const_G * ir * ir * ir; - for (k = 0; k < 3; k++) parts[j].a[k] += w * dx[k]; + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = mults[j].mass * const_G * ir * ir * ir; + for (k = 0; k < 3; k++) parts[i].a[k] += w * dx[k]; + } /* loop over every multipoles. */ } /* loop over every particle. */ } @@ -475,8 +474,9 @@ static inline void make_interact_pc(struct part_local *parts, int count, static inline void iact_pair_pc(struct cell *ci, struct cell *cj) { struct part_local *parts = NULL; + struct multipole_local mult_local[8]; struct cell *cp, *cps; - int count; + int part_count, mult_count; #ifdef SANITY_CHECKS if (ci->h != cj->h) @@ -489,13 +489,13 @@ static inline void iact_pair_pc(struct cell *ci, struct cell *cj) { /* Let's loop over all siblings of ci */ for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { - count = cp->count; + part_count = cp->count; /* Get local copies of the particle data in cp. */ - if ((parts = (struct part_local *) malloc(sizeof(struct part_local) * count)) == NULL) + if ((parts = (struct part_local *) malloc(sizeof(struct part_local) * part_count)) == NULL) error("Failed to allocate local parts."); - for (int k = 0; k < count; k++) { + for (int k = 0; k < part_count; k++) { for (int j = 0; j < 3; j++) { parts[k].x[j] = cp->parts[k].x[j] - cp->loc[j]; parts[k].a[j] = 0.0f; @@ -503,21 +503,28 @@ static inline void iact_pair_pc(struct cell *ci, struct cell *cj) { parts[k].mass = cp->parts[k].mass; } - + mult_count = 0; + /* Now loop over all the other cells */ for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { - /* And interact with the ones that are not neighbours */ + /* And build the list of multipoles to interact with */ if ( ! are_neighbours(cp, cps) ) { - make_interact_pc(parts, cp->count, cp->loc, cps); + mult_local[mult_count].com[0] = cps->new.com[0] - cp->loc[0]; + mult_local[mult_count].com[1] = cps->new.com[1] - cp->loc[1]; + mult_local[mult_count].com[2] = cps->new.com[2] - cp->loc[2]; + mult_local[mult_count].mass = cps->new.mass; + mult_count++; } } + /* Perform the interaction between the particles and the list of multipoles */ + make_interact_pc(parts, part_count, mult_local, mult_count); /* Clean up local parts? */ - for (int k = 0; k < count; k++) { + for (int k = 0; k < part_count; k++) { for (int j = 0; j < 3; j++) cp->parts[k].a[j] += parts[k].a[j]; } free(parts); @@ -1266,7 +1273,7 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { } message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves); -#if 1//ICHECK > 0 +#if ICHECK > 0 printf("----------------------------------------------------------\n"); /* Do a N^2 interactions calculation */ @@ -1326,7 +1333,7 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { "direct", "CoMs"); printf("task counts: [ %10i %10i %10i %10i %10i ] (legacy).\n", 0, 0, countMultipoles, countPairs, countCoMs); - printf("task counts: [ %10i %10i %10i %10i %10i ] (legacy).\n", counts[0], + printf("task counts: [ %10i %10i %10i %10i %10i ] (new).\n", counts[0], counts[1], counts[2], 0, counts[3]); /* Loop over the number of runs. */ @@ -1359,9 +1366,9 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { #endif /* Dump the tasks. */ - /* for ( k = 0 ; k < s.count ; k++ ) - printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , - s.tasks[k].tic , s.tasks[k].toc ); */ + /* for ( k = 0 ; k < s.count ; k++ ) */ + /* printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , */ + /* s.tasks[k].tic , s.tasks[k].toc ); */ /* Dump the costs. */ message("costs: setup=%lli ticks, run=%lli ticks.", tot_setup, @@ -1369,8 +1376,8 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { /* Dump the timers. */ for (k = 0; k < qsched_timer_count; k++) - message("timer %s is %lli ticks.", qsched_timer_names[k], - s.timers[k] / runs); + message("timer %s is %lli ticks.", qsched_timer_names[k], s.timers[k] / runs); + /* Dump the per-task type timers. */ printf("task timers: [ ");