diff --git a/src/cell.h b/src/cell.h index 1bdf67c5d8a25b490b9e4bb7d530a6fd7f093e05..58a7b31d3d8d882b8233f3935c206d93bfb57fcb 100644 --- a/src/cell.h +++ b/src/cell.h @@ -117,7 +117,7 @@ struct cell { struct link *density, *force, *grav; int nr_density, nr_force, nr_grav; - /* The ghost task to link density to interactions. */ + /* The hierarchical tasks. */ struct task *ghost, *init, *drift, *kick; /* Task receiving data. */ diff --git a/src/engine.c b/src/engine.c index aad07e21b87352b1f2c11624a024f4f010e07144..dd9f6cc5910a6049f2296e58b4917f7caecfdbd0 100644 --- a/src/engine.c +++ b/src/engine.c @@ -95,7 +95,8 @@ struct link *engine_addlink(struct engine *e, struct link *l, struct task *t) { } /** - * @brief Generate the ghosts all the O(Npart) tasks for a hierarchy of cells. + * @brief Generate the hierarchical tasks for a hierarchy of cells - i.e. all + * the O(Npart) tasks. * * Tasks are only created here. The dependencies will be added later on. * @@ -104,8 +105,8 @@ struct link *engine_addlink(struct engine *e, struct link *l, struct task *t) { * @param super The super #cell. */ -void engine_make_ghost_tasks(struct engine *e, struct cell *c, - struct cell *super) { +void engine_make_hierarchical_tasks(struct engine *e, struct cell *c, + struct cell *super) { struct scheduler *s = &e->sched; const int is_with_external_gravity = @@ -157,7 +158,7 @@ void engine_make_ghost_tasks(struct engine *e, struct cell *c, if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_make_ghost_tasks(e, c->progeny[k], super); + engine_make_hierarchical_tasks(e, c->progeny[k], super); } /** @@ -1458,10 +1459,9 @@ void engine_maketasks(struct engine *e) { depend on the sorts of its progeny. */ engine_count_and_link_tasks(e); - /* Append a ghost task to each cell, and add kick tasks to the - super cells. */ + /* Append hierarchical tasks to each cells */ for (int k = 0; k < nr_cells; k++) - engine_make_ghost_tasks(e, &cells[k], NULL); + engine_make_hierarchical_tasks(e, &cells[k], NULL); /* Run through the tasks and make force tasks for each density task. Each force task depends on the cell ghosts and unlocks the kick task diff --git a/src/runner.c b/src/runner.c index 9e920eec68f66e40e7b9cd8b10084898240cdbaf..c59c80ffb048eb787a14e8096e6ace76377f5981 100644 --- a/src/runner.c +++ b/src/runner.c @@ -92,7 +92,7 @@ const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, * @param c cell * @param timer 1 if the time is to be recorded. */ -void runner_dograv_external(struct runner *r, struct cell *c, int timer) { +void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { struct gpart *g, *gparts = c->gparts; int i, k, gcount = c->gcount; @@ -105,7 +105,7 @@ void runner_dograv_external(struct runner *r, struct cell *c, int timer) { /* Recurse? */ if (c->split) { for (k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_dograv_external(r, c->progeny[k], 0); + if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0); return; } @@ -123,6 +123,7 @@ void runner_dograv_external(struct runner *r, struct cell *c, int timer) { if (g->ti_end <= ti_current) { external_gravity(potential, constants, g); + } } if (timer) TIMER_TOC(timer_dograv_external); @@ -135,7 +136,7 @@ void runner_dograv_external(struct runner *r, struct cell *c, int timer) { * @param N The number of entries. */ -void runner_dosort_ascending(struct entry *sort, int N) { +void runner_do_sort_ascending(struct entry *sort, int N) { struct { short int lo, hi; @@ -217,7 +218,7 @@ void runner_dosort_ascending(struct entry *sort, int N) { * for recursive calls. */ -void runner_dosort(struct runner *r, struct cell *c, int flags, int clock) { +void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { struct entry *finger; struct entry *fingers[8]; @@ -251,7 +252,7 @@ void runner_dosort(struct runner *r, struct cell *c, int flags, int clock) { for (k = 0; k < 8; k++) { if (c->progeny[k] == NULL) continue; missing = flags & ~c->progeny[k]->sorted; - if (missing) runner_dosort(r, c->progeny[k], missing, 0); + if (missing) runner_do_sort(r, c->progeny[k], missing, 0); } /* Loop over the 13 different sort arrays. */ @@ -341,7 +342,7 @@ void runner_dosort(struct runner *r, struct cell *c, int flags, int clock) { if (flags & (1 << j)) { sort[j * (count + 1) + count].d = FLT_MAX; sort[j * (count + 1) + count].i = 0; - runner_dosort_ascending(&sort[j * (count + 1)], count); + runner_do_sort_ascending(&sort[j * (count + 1)], count); c->sorted |= (1 << j); } } @@ -369,7 +370,7 @@ void runner_dosort(struct runner *r, struct cell *c, int flags, int clock) { * @param c The cell. * @param timer 1 if the time is to be recorded. */ -void runner_doinit(struct runner *r, struct cell *c, int timer) { +void runner_do_init(struct runner *r, struct cell *c, int timer) { struct part *const parts = c->parts; struct gpart *const gparts = c->gparts; @@ -382,7 +383,7 @@ void runner_doinit(struct runner *r, struct cell *c, int timer) { /* Recurse? */ if (c->split) { for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_doinit(r, c->progeny[k], 0); + if (c->progeny[k] != NULL) runner_do_init(r, c->progeny[k], 0); return; } else { @@ -423,7 +424,7 @@ void runner_doinit(struct runner *r, struct cell *c, int timer) { * @param c The cell. */ -void runner_doghost(struct runner *r, struct cell *c) { +void runner_do_ghost(struct runner *r, struct cell *c) { struct part *p, *parts = c->parts; struct xpart *xp, *xparts = c->xparts; @@ -446,7 +447,7 @@ void runner_doghost(struct runner *r, struct cell *c) { /* Recurse? */ if (c->split) { for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_doghost(r, c->progeny[k]); + if (c->progeny[k] != NULL) runner_do_ghost(r, c->progeny[k]); return; } @@ -568,7 +569,7 @@ void runner_doghost(struct runner *r, struct cell *c) { if (count) message("Smoothing length failed to converge on %i particles.", count); - TIMER_TOC(timer_doghost); + TIMER_TOC(timer_do_ghost); } /** @@ -578,7 +579,7 @@ void runner_doghost(struct runner *r, struct cell *c) { * @param c The cell. * @param timer Are we timing this ? */ -void runner_dodrift(struct runner *r, struct cell *c, int timer) { +void runner_do_drift(struct runner *r, struct cell *c, int timer) { const double timeBase = r->e->timeBase; const double dt = (r->e->ti_current - r->e->ti_old) * timeBase; @@ -685,7 +686,7 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { struct cell *cp = c->progeny[k]; - runner_dodrift(r, cp, 0); + runner_do_drift(r, cp, 0); dx_max = fmaxf(dx_max, cp->dx_max); h_max = fmaxf(h_max, cp->h_max); @@ -706,7 +707,7 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { * @param c The cell. * @param timer Are we timing this ? */ -void runner_dokick(struct runner *r, struct cell *c, int timer) { +void runner_do_kick(struct runner *r, struct cell *c, int timer) { const float global_dt_min = r->e->dt_min; const float global_dt_max = r->e->dt_max; @@ -998,7 +999,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { struct cell *const cp = c->progeny[k]; /* Recurse */ - runner_dokick(r, cp, 0); + runner_do_kick(r, cp, 0); /* And aggregate */ updated += cp->updated; @@ -1044,7 +1045,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { * @param c The cell. * @param timer Are we timing this ? */ -void runner_dorecv_cell(struct runner *r, struct cell *c, int timer) { +void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) { const struct part *const parts = c->parts; const struct gpart *const gparts = c->gparts; @@ -1141,7 +1142,7 @@ void *runner_main(void *data) { error("Unknown task subtype."); break; case task_type_sort: - runner_dosort(r, ci, t->flags, 1); + runner_do_sort(r, ci, t->flags, 1); break; case task_type_sub: if (t->subtype == task_subtype_density) @@ -1154,21 +1155,21 @@ void *runner_main(void *data) { error("Unknown task subtype."); break; case task_type_init: - runner_doinit(r, ci, 1); + runner_do_init(r, ci, 1); break; case task_type_ghost: - runner_doghost(r, ci); + runner_do_ghost(r, ci); break; case task_type_drift: - runner_dodrift(r, ci, 1); + runner_do_drift(r, ci, 1); break; case task_type_kick: - runner_dokick(r, ci, 1); + runner_do_kick(r, ci, 1); break; case task_type_send: break; case task_type_recv: - runner_dorecv_cell(r, ci, 1); + runner_do_recv_cell(r, ci, 1); break; /* case task_type_grav_pp: */ /* if (t->cj == NULL) */ @@ -1182,11 +1183,11 @@ void *runner_main(void *data) { case task_type_grav_up: runner_dograv_up(r, t->ci); break; - /* case task_type_grav_down: */ - /* runner_dograv_down(r, t->ci); */ - /* break; */ + case task_type_grav_down: + runner_dograv_down(r, t->ci); + break; case task_type_grav_external: - runner_dograv_external(r, t->ci, 1); + runner_do_grav_external(r, t->ci, 1); break; case task_type_part_sort: space_do_parts_sort(); diff --git a/src/runner.h b/src/runner.h index 7953b33361ca59e51e6e5ea07dde59db016239b0..f53822cdbf9608a473fc72e6a2d049cfd6d3c5a2 100644 --- a/src/runner.h +++ b/src/runner.h @@ -47,12 +47,12 @@ struct runner { }; /* Function prototypes. */ -void runner_doghost(struct runner *r, struct cell *c); -void runner_dosort(struct runner *r, struct cell *c, int flag, int clock); -void runner_dogsort(struct runner *r, struct cell *c, int flag, int clock); -void runner_dokick(struct runner *r, struct cell *c, int timer); -void runner_dodrift(struct runner *r, struct cell *c, int timer); -void runner_doinit(struct runner *r, struct cell *c, int timer); +void runner_do_ghost(struct runner *r, struct cell *c); +void runner_do_sort(struct runner *r, struct cell *c, int flag, int clock); +void runner_do_gsort(struct runner *r, struct cell *c, int flag, int clock); +void runner_do_kick(struct runner *r, struct cell *c, int timer); +void runner_do_drift(struct runner *r, struct cell *c, int timer); +void runner_do_init(struct runner *r, struct cell *c, int timer); void *runner_main(void *data); #endif /* SWIFT_RUNNER_H */ diff --git a/src/runner_doiact.h b/src/runner_doiact.h index de339db6133fcc829bdc6ee0ce9e537b68982422..97e22138b6c09750c1737741f58e61744a891c98 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -1982,8 +1982,8 @@ void DOSUB1(struct runner *r, struct cell *ci, struct cell *cj, int sid, else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) { /* Do any of the cells need to be sorted first? */ - if (!(ci->sorted & (1 << sid))) runner_dosort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid))) runner_dosort(r, cj, (1 << sid), 1); + if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1); /* Compute the interactions. */ DOPAIR1(r, ci, cj); @@ -2251,8 +2251,8 @@ void DOSUB2(struct runner *r, struct cell *ci, struct cell *cj, int sid, else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) { /* Do any of the cells need to be sorted first? */ - if (!(ci->sorted & (1 << sid))) runner_dosort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid))) runner_dosort(r, cj, (1 << sid), 1); + if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1); /* Compute the interactions. */ DOPAIR2(r, ci, cj); @@ -2850,7 +2850,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, sid = sortlistID[sid]; /* Do any of the cells need to be sorted first? */ - if (!(cj->sorted & (1 << sid))) runner_dosort(r, cj, (1 << sid), 1); + if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1); /* Compute the interactions. */ DOPAIR_SUBSET(r, ci, parts, ind, count, cj); diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 1007b09ed901095b7c2f96b9b41934e512de126d..a4d34e802461753997c23108bfb5cefbac73f50f 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -25,6 +25,178 @@ #include "clocks.h" #include "part.h" +/** + * @brief Compute the sorted gravity interactions between a cell pair. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ + +void runner_dopair_grav_new(struct runner *r, struct cell *ci, + struct cell *cj) { + + struct engine *restrict e = r->e; + int pid, pjd, k, sid; + double rshift, shift[3] = {0.0, 0.0, 0.0}, nshift[3]; + struct entry *restrict sort_i, *restrict sort_j; + struct gpart *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j; + double pix[3]; + float dx[3], r2, h_max, di, dj; + int count_i, count_j, cnj, cnj_new; + const int ti_current = e->ti_current; + struct multipole m; +#ifdef VECTORIZE + int icount = 0; + float r2q[VEC_SIZE] __attribute__((aligned(16))); + float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); + struct gpart *piq[VEC_SIZE], *pjq[VEC_SIZE]; +#endif + TIMER_TIC + + /* Anything to do here? */ + if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; + + /* Get the sort ID. */ + sid = space_getsid(e->s, &ci, &cj, shift); + + /* Make sure the cells are sorted. */ + runner_dogsort(r, ci, (1 << sid), 0); + runner_dogsort(r, cj, (1 << sid), 0); + + /* Have the cells been sorted? */ + if (!(ci->gsorted & (1 << sid)) || !(cj->gsorted & (1 << sid))) + error("Trying to interact unsorted cells."); + + /* Get the cutoff shift. */ + for (rshift = 0.0, k = 0; k < 3; k++) + rshift += shift[k] * runner_shift[3 * sid + k]; + + /* Pick-out the sorted lists. */ + sort_i = &ci->gsort[sid * (ci->count + 1)]; + sort_j = &cj->gsort[sid * (cj->count + 1)]; + + /* Get some other useful values. */ + h_max = + sqrtf(ci->h[0] * ci->h[0] + ci->h[1] * ci->h[1] + ci->h[2] * ci->h[2]) * + const_theta_max; + count_i = ci->gcount; + count_j = cj->gcount; + parts_i = ci->gparts; + parts_j = cj->gparts; + cnj = count_j; + multipole_reset(&m); + nshift[0] = -shift[0]; + nshift[1] = -shift[1]; + nshift[2] = -shift[2]; + + /* Loop over the parts in ci. */ + for (pid = count_i - 1; pid >= 0; pid--) { + + /* Get a hold of the ith part in ci. */ + pi = &parts_i[sort_i[pid].i]; + if (pi->ti_end > ti_current) continue; + di = sort_i[pid].d + h_max - rshift; + + for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + + /* Loop over the parts in cj. */ + for (pjd = 0; pjd < cnj && sort_j[pjd].d < di; pjd++) { + + /* Get a pointer to the jth particle. */ + pj = &parts_j[sort_j[pjd].i]; + + /* Compute the pairwise distance. */ + r2 = 0.0f; + for (k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifndef VECTORIZE + + // if ( pi->part->id == 3473472412525 || pj->part->id == 3473472412525 ) + // message( "interacting particles pi=%lli and pj=%lli with r=%.3e in + // cells %lli/%lli." , pi->part->id , pj->part->id , sqrtf(r2) , ((long + // long int)ci) / sizeof(struct cell) , ((long long int)cj) / + // sizeof(struct cell) ); + + runner_iact_grav(r2, dx, pi, pj); + +#else + + /* Add this interaction to the queue. */ + r2q[icount] = r2; + dxq[3 * icount + 0] = dx[0]; + dxq[3 * icount + 1] = dx[1]; + dxq[3 * icount + 2] = dx[2]; + piq[icount] = pi; + pjq[icount] = pj; + icount += 1; + + /* Flush? */ + if (icount == VEC_SIZE) { + runner_iact_vec_grav(r2q, dxq, piq, pjq); + icount = 0; + } + +#endif + + } /* loop over the parts in cj. */ + + /* Set the new limit. */ + cnj_new = pjd; + + /* Add trailing parts to the multipole. */ + for (pjd = cnj_new; pjd < cnj; pjd++) { + + /* Add the part to the multipole. */ + multipole_addpart(&m, &parts_j[sort_j[pjd].i]); + + } /* add trailing parts to the multipole. */ + + /* Set the new cnj. */ + cnj = cnj_new; + + /* Interact the ith particle with the multipole. */ + multipole_iact_mp(&m, pi, nshift); + + } /* loop over the parts in ci. */ + +#ifdef VECTORIZE + /* Pick up any leftovers. */ + if (icount > 0) + for (k = 0; k < icount; k++) + runner_iact_grav(r2q[k], &dxq[3 * k], piq[k], pjq[k]); +#endif + + /* Re-set the multipole. */ + multipole_reset(&m); + + /* Loop over the parts in cj and interact with the multipole in ci. */ + for (pid = count_i - 1, pjd = 0; pjd < count_j; pjd++) { + + /* Get the position of pj along the axis. */ + dj = sort_j[pjd].d - h_max + rshift; + + /* Add any left-over parts in cell_i to the multipole. */ + while (pid >= 0 && sort_i[pid].d < dj) { + + /* Add this particle to the multipole. */ + multipole_addpart(&m, &parts_i[sort_i[pid].i]); + + /* Decrease pid. */ + pid -= 1; + } + + /* Interact pj with the multipole. */ + multipole_iact_mp(&m, &parts_j[sort_j[pjd].i], shift); + + } /* loop over the parts in cj and interact with the multipole. */ + + TIMER_TOC(TIMER_DOPAIR); +} + /** * @brief Compute the recursive upward sweep, i.e. construct the * multipoles in a cell hierarchy. diff --git a/src/timers.h b/src/timers.h index 81e5a674eddc3662e8db567ff7fc12302320320f..92b685ebe9b11d49c4703e5837d35cffdca81c4d 100644 --- a/src/timers.h +++ b/src/timers.h @@ -45,7 +45,7 @@ enum { timer_dosub_force, timer_dosub_grav, timer_dopair_subset, - timer_doghost, + timer_do_ghost, timer_dorecv_cell, timer_gettask, timer_qget, diff --git a/tests/test27cells.c b/tests/test27cells.c index bcd964620c5c729094a74c79275cdb66180f8dd3..504c07b4db391a2bc6fa60b7ec354367008aeec1 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -126,7 +126,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->sorted = 0; cell->sort = NULL; cell->sortsize = 0; - runner_dosort(NULL, cell, 0x1FFF, 0); + runner_do_sort(NULL, cell, 0x1FFF, 0); return cell; } diff --git a/tests/testPair.c b/tests/testPair.c index 07e576332f81b0471f35f09dfcda21ad535adc8b..a70cb381fd9a6dec061a100eeba17bfd54b0e973 100644 --- a/tests/testPair.c +++ b/tests/testPair.c @@ -91,7 +91,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->sorted = 0; cell->sort = NULL; cell->sortsize = 0; - runner_dosort(NULL, cell, 0x1FFF, 0); + runner_do_sort(NULL, cell, 0x1FFF, 0); return cell; } diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c index 223078ecb637e64d94e37cdf8c0f60a86bdd5ff7..3af0c6ad1afdeab749a378153fd1a8e016f29659 100644 --- a/tests/testSPHStep.c +++ b/tests/testSPHStep.c @@ -141,7 +141,7 @@ int main() { /* Compute density */ runner_doself1_density(&r, ci); - runner_doghost(&r, ci); + runner_do_ghost(&r, ci); message("h=%f rho=%f N_ngb=%f", p->h, p->rho, p->density.wcount); message("c=%f", p->force.c); diff --git a/tests/testTimeIntegration.c b/tests/testTimeIntegration.c index f3802888bccc40a424d659cde2605d12c9268e47..03893daf3530df040e5a5630bc6dc1d930ddcd1b 100644 --- a/tests/testTimeIntegration.c +++ b/tests/testTimeIntegration.c @@ -115,10 +115,10 @@ int main() { c.parts[0].a_hydro[1] = -(G * M_sun * c.parts[0].x[1] / r * r * r); /* Kick... */ - runner_dokick(&run, &c, 0); + runner_do_kick(&run, &c, 0); /* Drift... */ - runner_dodrift(&run, &c, 0); + runner_do_drift(&run, &c, 0); } /* Clean-up */