diff --git a/src/cell.c b/src/cell.c index 0102caf4a511df0293c32511d56ee3000ed736aa..1684111d5b03ce712df11f4dabffeb2fdbf33dc3 100644 --- a/src/cell.c +++ b/src/cell.c @@ -917,14 +917,21 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { /* Set the correct sorting flags */ if (t->type == task_type_pair) { - if (1 || ci->dx_max_sort > space_maxreldx * ci->dmin) ci->sorted = 0; - if (1 || cj->dx_max_sort > space_maxreldx * cj->dmin) cj->sorted = 0; + if (ci->dx_max_sort > space_maxreldx * ci->dmin) { + for (struct cell *finger = ci; finger != NULL; finger = finger->parent) + finger->sorted = 0; + } + if (cj->dx_max_sort > space_maxreldx * cj->dmin) { + for (struct cell *finger = cj; finger != NULL; finger = finger->parent) + finger->sorted = 0; + } if (!(ci->sorted & (1 << t->flags))) { #ifdef SWIFT_DEBUG_CHECKS if (!(ci->sorts->flags & (1 << t->flags))) error("bad flags in sort task."); #endif scheduler_activate(s, ci->sorts); + scheduler_activate(s, ci->super->drift); } if (!(cj->sorted & (1 << t->flags))) { #ifdef SWIFT_DEBUG_CHECKS @@ -932,6 +939,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { error("bad flags in sort task."); #endif scheduler_activate(s, cj->sorts); + scheduler_activate(s, cj->super->drift); } } @@ -1009,7 +1017,13 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { ; if (l == NULL) error("Missing link to send_ti task."); scheduler_activate(s, l->t); + } else { + scheduler_activate(s, ci->super->drift); + scheduler_activate(s, cj->super->drift); } +#else + scheduler_activate(s, ci->super->drift); + scheduler_activate(s, cj->super->drift); #endif } } @@ -1072,7 +1086,7 @@ void cell_drift(struct cell *c, const struct engine *e) { /* Drift from the last time the cell was drifted to the current time */ const double dt = (ti_current - ti_old) * timeBase; float dx_max = 0.f, dx2_max = 0.f; - float dx_max_sort = 0.f, dx2_max_sort = 0.f; + float dx_max_sort = c->dx_max_sort, dx2_max_sort = 0.f; float h_max = 0.f; /* Check that we are actually going to move forward. */ @@ -1107,7 +1121,7 @@ void cell_drift(struct cell *c, const struct engine *e) { const float dx2 = gp->x_diff[0] * gp->x_diff[0] + gp->x_diff[1] * gp->x_diff[1] + gp->x_diff[2] * gp->x_diff[2]; - dx2_max = (dx2_max > dx2) ? dx2_max : dx2; + dx2_max = max(dx2_max, dx2); } /* Loop over all the particles in the cell */ @@ -1125,19 +1139,19 @@ void cell_drift(struct cell *c, const struct engine *e) { const float dx2 = xp->x_diff[0] * xp->x_diff[0] + xp->x_diff[1] * xp->x_diff[1] + xp->x_diff[2] * xp->x_diff[2]; - dx2_max = (dx2_max > dx2) ? dx2_max : dx2; + dx2_max = max(dx2_max, dx2); const float dx2_sort = xp->x_diff_sort[0] * xp->x_diff_sort[0] + xp->x_diff_sort[1] * xp->x_diff_sort[1] + xp->x_diff_sort[2] * xp->x_diff_sort[2]; - dx2_max_sort = (dx2_max_sort > dx2_sort) ? dx2_max_sort : dx2_sort; + dx2_max_sort = max(dx2_max_sort, dx2_sort); /* Maximal smoothing length */ - h_max = (h_max > p->h) ? h_max : p->h; + h_max = max(h_max, p->h); } /* Now, get the maximal particle motion from its square */ dx_max = sqrtf(dx2_max); - dx_max_sort = sqrt(dx2_max_sort); + dx_max_sort = max(dx_max_sort, sqrtf(dx2_max_sort)); } else { diff --git a/src/drift.h b/src/drift.h index c87d6b1c0ab3a982203af5e175fed4ee19a147cf..86ea4bfdc81157863615a4ca329dfb2705a26cee 100644 --- a/src/drift.h +++ b/src/drift.h @@ -89,7 +89,7 @@ __attribute__((always_inline)) INLINE static void drift_part( /* Predict the values of the extra fields */ hydro_predict_extra(p, xp, dt); - /* Compute offset since last cell construction */ + /* Compute offsets since last cell construction */ for (int k = 0; k < 3; k++) { const float dx = xp->v_full[k] * dt; xp->x_diff[k] -= dx; diff --git a/src/engine.c b/src/engine.c index 0306c8a7838a04388a9f985b6953733184434944..285e68dc94bdb2214ba8228d4ffa2df0b10c0813 100644 --- a/src/engine.c +++ b/src/engine.c @@ -162,7 +162,6 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { scheduler_addunlock(s, c->kick1, c->drift); scheduler_addunlock(s, c->drift, c->init); - scheduler_addunlock(s, c->drift, c->sorts); /* Generate the ghost task. */ if (is_hydro) @@ -1686,8 +1685,16 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { for (int ind = 0; ind < nr_tasks; ind++) { struct task *t = &sched->tasks[ind]; + /* Sort tasks depend on the drift of the super-cell. */ + if (t->type == task_type_sort) { + scheduler_addunlock(sched, t->ci->super->drift, t); + } + /* Self-interaction? */ - if (t->type == task_type_self && t->subtype == task_subtype_density) { + else if (t->type == task_type_self && t->subtype == task_subtype_density) { + + /* Make all density tasks depend on the drift. */ + scheduler_addunlock(sched, t->ci->super->drift, t); #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -1721,6 +1728,11 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Otherwise, pair interaction? */ else if (t->type == task_type_pair && t->subtype == task_subtype_density) { + /* Make all density tasks depend on the drift. */ + scheduler_addunlock(sched, t->ci->super->drift, t); + if (t->ci->super != t->cj->super) + scheduler_addunlock(sched, t->cj->super->drift, t); + #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ struct task *t2 = scheduler_addtask( @@ -1772,6 +1784,9 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { else if (t->type == task_type_sub_self && t->subtype == task_subtype_density) { + /* Make all density tasks depend on the drift. */ + scheduler_addunlock(sched, t->ci->super->drift, t); + #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -1814,6 +1829,11 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { else if (t->type == task_type_sub_pair && t->subtype == task_subtype_density) { + /* Make all density tasks depend on the drift. */ + scheduler_addunlock(sched, t->ci->super->drift, t); + if (t->ci->super != t->cj->super) + scheduler_addunlock(sched, t->cj->super->drift, t); + #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -2049,8 +2069,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements, else if (t->type == task_type_pair || t->type == task_type_sub_pair) { /* Local pointers. */ - const struct cell *ci = t->ci; - const struct cell *cj = t->cj; + struct cell *ci = t->ci; + struct cell *cj = t->cj; /* Too much particle movement? */ if (t->tight && @@ -2070,6 +2090,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Set the sort flags. */ if (t->type == task_type_pair) { + if (ci->dx_max_sort > space_maxreldx * ci->dmin) ci->sorted = 0; + if (cj->dx_max_sort > space_maxreldx * cj->dmin) cj->sorted = 0; if (!(ci->sorted & (1 << t->flags))) { atomic_or(&ci->sorts->flags, (1 << t->flags)); scheduler_activate(s, ci->sorts); @@ -2146,8 +2168,13 @@ void engine_marktasks_mapper(void *map_data, int num_elements, ; if (l == NULL) error("Missing link to send_ti task."); scheduler_activate(s, l->t); + } else { + scheduler_activate(s, ci->super->drift); + scheduler_activate(s, cj->super->drift); } - +#else + scheduler_activate(s, ci->super->drift); + scheduler_activate(s, cj->super->drift); #endif } diff --git a/src/runner.c b/src/runner.c index f286e6a3ccdf457cc2bab7c9fa4175a6e5857ba3..3bb5151fde8d0da34c557c5981a60d707d9af794 100644 --- a/src/runner.c +++ b/src/runner.c @@ -342,10 +342,21 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { TIMER_TIC - /* Clean-up the flags, i.e. filter out what's already been sorted. */ - // flags &= ~c->sorted; + /* Clean-up the flags, i.e. filter out what's already been sorted, but + only if the sorts are recent. */ + if (c->dx_max_sort == 0.0f) { + /* Ignore dimensions that have been sorted. */ + flags &= ~c->sorted; + } else { + /* Re-do all the sorts. */ + flags |= c->sorted; + c->sorted = 0; + } if (flags == 0) return; + /* Sorting an un-drifted cell? */ + if (!cell_is_drifted(c, r->e)) error("Sorting undrifted cell."); + /* start by allocating the entry arrays. */ if (c->sort == NULL || c->sortsize < count) { if (c->sort != NULL) free(c->sort); @@ -361,8 +372,11 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { /* Fill in the gaps within the progeny. */ for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) - runner_do_sort(r, c->progeny[k], flags, 0); + if (c->progeny[k] != NULL) { + if (c->progeny[k]->dx_max_sort > c->dmin * space_maxreldx) + runner_do_sort(r, c->progeny[k], flags, 0); + c->dx_max_sort = max(c->dx_max_sort, c->progeny[k]->dx_max_sort); + } } /* Loop over the 13 different sort arrays. */ @@ -436,9 +450,10 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { /* Fill the sort array. */ for (int k = 0; k < count; k++) { - xparts[k].x_diff_sort[0] = xparts[k].x_diff_sort[1] = - xparts[k].x_diff_sort[2] = 0.0f; - const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]}; + xparts[k].x_diff_sort[0] = 0.0f; + xparts[k].x_diff_sort[1] = 0.0f; + xparts[k].x_diff_sort[2] = 0.0f; + const float px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]}; for (int j = 0; j < 13; j++) if (flags & (1 << j)) { sort[j * (count + 1) + k].i = k; @@ -456,10 +471,10 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { runner_do_sort_ascending(&sort[j * (count + 1)], count); c->sorted |= (1 << j); } + + /* Finally, clear the dx_max_sort field of this cell. */ + c->dx_max_sort = 0.f; } - - /* Finally, clear the dx_max_sort field of this cell. */ - c->dx_max_sort = 0.f; #ifdef SWIFT_DEBUG_CHECKS /* Verify the sorting. */ @@ -739,9 +754,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { #ifdef SWIFT_DEBUG_CHECKS if (count) { - message("Smoothing length failed to converge on %i particles.", count); - - error("Aborting...."); + error("Smoothing length failed to converge on %i particles.", count); } #else if (count) @@ -763,12 +776,6 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { */ static void runner_do_unskip(struct cell *c, struct engine *e) { - /* Unskip any active tasks. */ - if (cell_is_active(c, e)) { - const int forcerebuild = cell_unskip_tasks(c, &e->sched); - if (forcerebuild) atomic_inc(&e->forcerebuild); - } - /* Recurse */ if (c->split) { for (int k = 0; k < 8; k++) { @@ -778,6 +785,12 @@ static void runner_do_unskip(struct cell *c, struct engine *e) { } } } + + /* Unskip any active tasks. */ + if (cell_is_active(c, e)) { + const int forcerebuild = cell_unskip_tasks(c, &e->sched); + if (forcerebuild) atomic_inc(&e->forcerebuild); + } } /** @@ -1310,6 +1323,9 @@ void *runner_main(void *data) { #ifdef SWIFT_DEBUG_TASKS t->rid = r->cpuid; #endif +#ifdef SWIFT_DEBUG_CHECKS + t->ti_run = e->ti_current; +#endif /* Check that we haven't scheduled an inactive task */ #ifdef SWIFT_DEBUG_CHECKS @@ -1321,13 +1337,13 @@ void *runner_main(void *data) { } else if (cj == NULL) { /* self */ - if (!cell_is_active(ci, e) && t->type != task_type_sort && + /* if (!cell_is_active(ci, e) && t->type != task_type_sort && t->type != task_type_send && t->type != task_type_recv) error( "Task (type='%s/%s') should have been skipped ti_current=%lld " "c->ti_end_min=%lld", taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, - ci->ti_end_min); + ci->ti_end_min); */ /* Special case for sorts */ if (!cell_is_active(ci, e) && t->type == task_type_sort && diff --git a/src/runner_doiact.h b/src/runner_doiact.h index c56e509342238bc7ef7e34cb28c7e950d385aad6..7e6165c5be37cfa8da1343e11a98ca5d33c28347 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -109,12 +109,12 @@ * @param cj The second #cell. */ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { + struct cell *restrict cj) { const struct engine *e = r->e; #ifndef SWIFT_DEBUG_CHECKS - // error("Don't use in actual runs ! Slow code !"); +// error("Don't use in actual runs ! Slow code !"); #endif #ifdef WITH_VECTORIZATION @@ -200,7 +200,7 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, if (r2 < pj->h * pj->h * kernel_gamma2) { #ifndef WITH_VECTORIZATION - + for (int k = 0; k < 3; k++) dx[k] = -dx[k]; IACT_NONSYM(r2, dx, pj->h, hi, pj, pi); @@ -241,12 +241,12 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, } void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { + struct cell *restrict cj) { const struct engine *e = r->e; #ifndef SWIFT_DEBUG_CHECKS - // error("Don't use in actual runs ! Slow code !"); +// error("Don't use in actual runs ! Slow code !"); #endif #ifdef WITH_VECTORIZATION @@ -349,7 +349,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; #ifndef SWIFT_DEBUG_CHECKS - // error("Don't use in actual runs ! Slow code !"); +// error("Don't use in actual runs ! Slow code !"); #endif #ifdef WITH_VECTORIZATION @@ -860,8 +860,8 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e)) cell_drift(ci, e); - if (!cell_is_drifted(cj, e)) cell_drift(cj, e); + if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + error("Interacting undrifted cells."); /* Get the sort ID. */ double shift[3] = {0.0, 0.0, 0.0}; @@ -879,6 +879,27 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)]; const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)]; +#ifdef SWIFT_DEBUG_CHECKS + /* Check that the dx_max_sort values in the cell are indeed an upper + bound on particle movement. */ + for (int pid = 0; pid < ci->count; pid++) { + const struct part *p = &ci->parts[sort_i[pid].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > 1.0e-6) + error("particle shift diff exceeds dx_max_sort."); + } + for (int pjd = 0; pjd < cj->count; pjd++) { + const struct part *p = &cj->parts[sort_j[pjd].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > 1.0e-6) + error("particle shift diff exceeds dx_max_sort."); + } +#endif /* SWIFT_DEBUG_CHECKS */ + /* Get some other useful values. */ const double hi_max = ci->h_max * kernel_gamma - rshift; const double hj_max = cj->h_max * kernel_gamma; @@ -1071,8 +1092,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e)) error("Cell ci not drifted"); - if (!cell_is_drifted(cj, e)) error("Cell cj not drifted"); + if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + error("Interacting undrifted cells."); /* Get the shift ID. */ double shift[3] = {0.0, 0.0, 0.0}; @@ -1496,7 +1517,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { if (!cell_is_active(c, e)) return; - if (!cell_is_drifted(c, e)) cell_drift(c, e); + if (!cell_is_drifted(c, e)) error("Interacting undrifted cell."); struct part *restrict parts = c->parts; const int count = c->count; @@ -2202,7 +2223,7 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { if (!cell_is_active(ci, r->e)) return; #ifdef SWIFT_DEBUG_CHECKS - cell_is_drifted(ci, r->e); + if (!cell_is_drifted(ci, r->e)) error("Interacting undrifted cell."); #endif /* Recurse? */ diff --git a/src/task.h b/src/task.h index 04aba60a1c631211e2191b0acc428f68c18568a7..2294129ea6f8921f45e5488d2ff77b24c4c5c882 100644 --- a/src/task.h +++ b/src/task.h @@ -160,6 +160,11 @@ struct task { ticks tic, toc; #endif +#ifdef SWIFT_DEBUG_CHECKS + /* When was this task last run? */ + integertime_t ti_run; +#endif /* SWIFT_DEBUG_CHECKS */ + } SWIFT_STRUCT_ALIGN; /* Function prototypes. */