diff --git a/src/cell.c b/src/cell.c index b566584807f6928d906207eb1c4f78181aa423ea..0102caf4a511df0293c32511d56ee3000ed736aa 100644 --- a/src/cell.c +++ b/src/cell.c @@ -908,24 +908,29 @@ int cell_is_drift_needed(struct cell *c, const struct engine *e) { */ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { - /* Reset the sort flags if the particles have moved too much. */ - if (c->dx_max_sort > space_maxreldx * c->h_max) c->sorted = 0; - /* Un-skip the density tasks involved with this cell. */ for (struct link *l = c->density; l != NULL; l = l->next) { struct task *t = l->t; - const struct cell *ci = t->ci; - const struct cell *cj = t->cj; + struct cell *ci = t->ci; + struct cell *cj = t->cj; scheduler_activate(s, t); /* Set the correct sorting flags */ if (t->type == task_type_pair) { + if (1 || ci->dx_max_sort > space_maxreldx * ci->dmin) ci->sorted = 0; + if (1 || cj->dx_max_sort > space_maxreldx * cj->dmin) cj->sorted = 0; if (!(ci->sorted & (1 << t->flags))) { - atomic_or(&ci->sorts->flags, (1 << t->flags)); +#ifdef SWIFT_DEBUG_CHECKS + if (!(ci->sorts->flags & (1 << t->flags))) + error("bad flags in sort task."); +#endif scheduler_activate(s, ci->sorts); } if (!(cj->sorted & (1 << t->flags))) { - atomic_or(&cj->sorts->flags, (1 << t->flags)); +#ifdef SWIFT_DEBUG_CHECKS + if (!(cj->sorts->flags & (1 << t->flags))) + error("bad flags in sort task."); +#endif scheduler_activate(s, cj->sorts); } } diff --git a/src/engine.c b/src/engine.c index dba17a4d6aea49a6cd66c0f0f91cde506c4db417..0306c8a7838a04388a9f985b6953733184434944 100644 --- a/src/engine.c +++ b/src/engine.c @@ -162,6 +162,7 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { scheduler_addunlock(s, c->kick1, c->drift); scheduler_addunlock(s, c->drift, c->init); + scheduler_addunlock(s, c->drift, c->sorts); /* Generate the ghost task. */ if (is_hydro) diff --git a/src/runner.c b/src/runner.c index 167ef3969e531c80075537b89bf69998214c6beb..f286e6a3ccdf457cc2bab7c9fa4175a6e5857ba3 100644 --- a/src/runner.c +++ b/src/runner.c @@ -337,13 +337,13 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { struct xpart *xparts = c->xparts; struct entry *sort; const int count = c->count; - int off[8], inds[8], temp_i, missing; + int off[8], inds[8], temp_i; float buff[8]; TIMER_TIC /* Clean-up the flags, i.e. filter out what's already been sorted. */ - flags &= ~c->sorted; + // flags &= ~c->sorted; if (flags == 0) return; /* start by allocating the entry arrays. */ @@ -361,9 +361,8 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { /* Fill in the gaps within the progeny. */ for (int k = 0; k < 8; k++) { - if (c->progeny[k] == NULL) continue; - missing = flags & ~c->progeny[k]->sorted; - if (missing) runner_do_sort(r, c->progeny[k], missing, 0); + if (c->progeny[k] != NULL) + runner_do_sort(r, c->progeny[k], flags, 0); } /* Loop over the 13 different sort arrays. */ @@ -458,6 +457,9 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { c->sorted |= (1 << j); } } + + /* Finally, clear the dx_max_sort field of this cell. */ + c->dx_max_sort = 0.f; #ifdef SWIFT_DEBUG_CHECKS /* Verify the sorting. */ diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 8ab099f268c85d05db70b2af11086aac02bdaf7a..c56e509342238bc7ef7e34cb28c7e950d385aad6 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -38,8 +38,11 @@ #define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f) #define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION) -#define _DOPAIR_NAIVE(f) PASTE(runner_dopair_naive, f) -#define DOPAIR_NAIVE _DOPAIR_NAIVE(FUNCTION) +#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f) +#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION) + +#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f) +#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION) #define _DOSELF_NAIVE(f) PASTE(runner_doself_naive, f) #define DOSELF_NAIVE _DOSELF_NAIVE(FUNCTION) @@ -99,18 +102,152 @@ #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) /** - * @brief Compute the interactions between a cell pair. + * @brief Compute the interactions between a cell pair (non-symmetric). * * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. */ -void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, +void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, struct cell *restrict cj) { const struct engine *e = r->e; - error("Don't use in actual runs ! Slow code !"); +#ifndef SWIFT_DEBUG_CHECKS + // error("Don't use in actual runs ! Slow code !"); +#endif + +#ifdef WITH_VECTORIZATION + int icount = 0; + float r2q[VEC_SIZE] __attribute__((aligned(16))); + float hiq[VEC_SIZE] __attribute__((aligned(16))); + float hjq[VEC_SIZE] __attribute__((aligned(16))); + float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); + struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; +#endif + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[pid]; + const float hi = pi->h; + + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + +#ifndef WITH_VECTORIZATION + + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + +#else + + /* Add this interaction to the queue. */ + r2q[icount] = r2; + dxq[3 * icount + 0] = dx[0]; + dxq[3 * icount + 1] = dx[1]; + dxq[3 * icount + 2] = dx[2]; + hiq[icount] = hi; + hjq[icount] = pj->h; + piq[icount] = pi; + pjq[icount] = pj; + icount += 1; + + /* Flush? */ + if (icount == VEC_SIZE) { + IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); + icount = 0; + } + +#endif + } + if (r2 < pj->h * pj->h * kernel_gamma2) { + +#ifndef WITH_VECTORIZATION + + for (int k = 0; k < 3; k++) dx[k] = -dx[k]; + IACT_NONSYM(r2, dx, pj->h, hi, pj, pi); + +#else + + /* Add this interaction to the queue. */ + r2q[icount] = r2; + dxq[3 * icount + 0] = -dx[0]; + dxq[3 * icount + 1] = -dx[1]; + dxq[3 * icount + 2] = -dx[2]; + hiq[icount] = pj->h; + hjq[icount] = hi; + piq[icount] = pj; + pjq[icount] = pi; + icount += 1; + + /* Flush? */ + if (icount == VEC_SIZE) { + IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); + icount = 0; + } + +#endif + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + +#ifdef WITH_VECTORIZATION + /* Pick up any leftovers. */ + if (icount > 0) + for (int k = 0; k < icount; k++) + IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); +#endif + + TIMER_TOC(TIMER_DOPAIR); +} + +void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj) { + + const struct engine *e = r->e; + +#ifndef SWIFT_DEBUG_CHECKS + // error("Don't use in actual runs ! Slow code !"); +#endif #ifdef WITH_VECTORIZATION int icount = 0; @@ -211,7 +348,9 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; - error("Don't use in actual runs ! Slow code !"); +#ifndef SWIFT_DEBUG_CHECKS + // error("Don't use in actual runs ! Slow code !"); +#endif #ifdef WITH_VECTORIZATION int icount = 0; diff --git a/src/scheduler.h b/src/scheduler.h index f2225f5f5b8d0a5db54eb8506e02d78b14f4bb88..224ed4fd494bf83587b29ee9d402229b379bbee9 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -42,7 +42,7 @@ /* Some constants. */ #define scheduler_maxwait 3 #define scheduler_init_nr_unlocks 10000 -#define scheduler_dosub 1 +#define scheduler_dosub 0 #define scheduler_maxsteal 10 #define scheduler_maxtries 2 #define scheduler_doforcesplit \ diff --git a/src/space.h b/src/space.h index e17b827e6f800a387d78a772d21c44093d0af1c9..67ff51cad34b0a9d1f4fd5492ab0e9d80ef064f2 100644 --- a/src/space.h +++ b/src/space.h @@ -44,7 +44,7 @@ #define space_maxcount_default 10000 #define space_max_top_level_cells_default 12 #define space_stretch 1.10f -#define space_maxreldx 0.25f +#define space_maxreldx 0.1f /* Maximum allowed depth of cell splits. */ #define space_cell_maxdepth 52