diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml index 6afffed0f9d39b34588b89569a85ab56223fc548..7d07b2cef22f2a23b7d66af79b3ef1306df2de01 100644 --- a/examples/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_12/eagle_12.yml @@ -14,7 +14,7 @@ TimeIntegration: dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units). Scheduler: - cell_split_size: 50 + cell_split_size: 400 # Parameters governing the snapshots Snapshots: diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 97e7948317387356d4eed5dbc3b9acf673486fba..d825cc7f97996beed4660e23c799a6eb264e04f9 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -43,11 +43,11 @@ Statistics: # Parameters for the hydrodynamics scheme SPH: resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. - max_ghost_iterations: 30 # (Optional) Maximal number of iterations allowed to converge towards the smoothing length. - max_volume_change: 2. # (Optional) Maximal allowed change of kernel volume over one time-step + h_tolerance: 1e-4 # (Optional) Relative accuracy of the Netwon-Raphson scheme for the smoothing lengths. h_max: 10. # (Optional) Maximal allowed smoothing length in internal units. Defaults to FLT_MAX if unspecified. + max_volume_change: 1.4 # (Optional) Maximal allowed change of kernel volume over one time-step. + max_ghost_iterations: 30 # (Optional) Maximal number of iterations allowed to converge towards the smoothing length. # Parameters for the self-gravity scheme Gravity: diff --git a/src/cell.c b/src/cell.c index 15ea31bef8e9219f47d4a060474deec55f54f0a1..ae76162fcfc36439a634788687c29a78fddc540f 100644 --- a/src/cell.c +++ b/src/cell.c @@ -1315,6 +1315,355 @@ int cell_is_drift_needed(struct cell *c, const struct engine *e) { return 0; } +/** + * @brief Clear the drift flags on the given cell. + */ +void cell_clear_drift_flags(struct cell *c, void *data) { + c->do_drift = 0; + c->do_sub_drift = 0; +} + +/** + * @brief Activate the drifts on the given cell. + */ +void cell_activate_drift_part(struct cell *c, struct scheduler *s) { + + /* If this cell is already marked for drift, quit early. */ + if (c->do_drift) return; + + /* Mark this cell for drifting. */ + c->do_drift = 1; + + /* Set the do_sub_drifts all the way up and activate the super drift + if this has not yet been done. */ + if (c == c->super) { + scheduler_activate(s, c->drift_part); + } else { + for (struct cell *parent = c->parent; + parent != NULL && !parent->do_sub_drift; parent = parent->parent) { + parent->do_sub_drift = 1; + if (parent == c->super) { + scheduler_activate(s, parent->drift_part); + break; + } + } + } +} + +/** + * @brief Activate the sorts up a cell hierarchy. + */ + +void cell_activate_sorts_up(struct cell *c, struct scheduler *s) { + if (c == c->super) { + scheduler_activate(s, c->sorts); + if (c->nodeID == engine_rank) cell_activate_drift_part(c, s); + } else { + for (struct cell *parent = c->parent; + parent != NULL && !parent->do_sub_sort; parent = parent->parent) { + parent->do_sub_sort = 1; + if (parent == c->super) { + scheduler_activate(s, parent->sorts); + if (parent->nodeID == engine_rank) cell_activate_drift_part(parent, s); + break; + } + } + } +} + +/** + * @brief Activate the sorts on a given cell, if needed. + */ +void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s) { + + /* Do we need to re-sort? */ + if (c->dx_max_sort > space_maxreldx * c->dmin) { + + /* Climb up the tree to active the sorts in that direction */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + if (finger->requires_sorts) { + atomic_or(&finger->do_sort, finger->requires_sorts); + cell_activate_sorts_up(finger, s); + } + finger->sorted = 0; + } + } + + /* Has this cell been sorted at all for the given sid? */ + if (!(c->sorted & (1 << sid)) || c->nodeID != engine_rank) { + atomic_or(&c->do_sort, (1 << sid)); + cell_activate_sorts_up(c, s); + } +} + +/** + * @brief Traverse a sub-cell task and activate the sort tasks along the way. + */ +void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj, + struct scheduler *s) { + const struct engine *e = s->space->e; + + /* Store the current dx_max and h_max values. */ + ci->dx_max_old = ci->dx_max_part; + ci->h_max_old = ci->h_max; + if (cj != NULL) { + cj->dx_max_old = cj->dx_max_part; + cj->h_max_old = cj->h_max; + } + + /* Self interaction? */ + if (cj == NULL) { + /* Do anything? */ + if (!cell_is_active(ci, e)) return; + + /* Recurse? */ + if (cell_can_recurse_in_self_task(ci)) { + + /* Loop over all progenies and pairs of progenies */ + for (int j = 0; j < 8; j++) { + if (ci->progeny[j] != NULL) { + cell_activate_subcell_tasks(ci->progeny[j], NULL, s); + for (int k = j + 1; k < 8; k++) + if (ci->progeny[k] != NULL) + cell_activate_subcell_tasks(ci->progeny[j], ci->progeny[k], s); + } + } + } else { + + /* We have reached the bottom of the tree: activate drift */ + cell_activate_drift_part(ci, s); + } + } + + /* Otherwise, pair interation, recurse? */ + else if (cell_can_recurse_in_pair_task(ci) && + cell_can_recurse_in_pair_task(cj)) { + + /* Get the type of pair if not specified explicitly. */ + double shift[3]; + int sid = space_getsid(s->space, &ci, &cj, shift); + + /* Different types of flags. */ + switch (sid) { + + /* Regular sub-cell interactions of a single cell. */ + case 0: /* ( 1 , 1 , 1 ) */ + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + break; + + case 1: /* ( 1 , 1 , 0 ) */ + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[0], s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[1], s); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + break; + + case 3: /* ( 1 , 0 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[0], s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[2], s); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[0], s); + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[1], s); + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[2], s); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[0], s); + if (ci->progeny[5] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[1], s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[3], s); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[0], s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + if (ci->progeny[6] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[2], s); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[3], s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[1], s); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[2], s); + if (ci->progeny[7] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[3], s); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[1], s); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[3], s); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s); + break; + + case 7: /* ( 1 , -1 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[2], s); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[3], s); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s); + break; + + case 9: /* ( 0 , 1 , 1 ) */ + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[0], s); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[4], s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[4], s); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + if (ci->progeny[2] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[0], s); + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[1], s); + if (ci->progeny[2] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[4], s); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[5], s); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[0], s); + if (ci->progeny[3] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[1], s); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[4], s); + if (ci->progeny[3] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[5], s); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[0], s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + if (ci->progeny[6] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[4], s); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[5], s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[1], s); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[4], s); + if (ci->progeny[7] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[5], s); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[1], s); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[5], s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[5], s); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + if (ci->progeny[1] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[0], s); + if (ci->progeny[1] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[2], s); + if (ci->progeny[1] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[4], s); + if (ci->progeny[1] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[6], s); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[0], s); + if (ci->progeny[3] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[2], s); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[4], s); + if (ci->progeny[3] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[6], s); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[0], s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s); + if (ci->progeny[5] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[4], s); + if (ci->progeny[5] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[6], s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[2], s); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[4], s); + if (ci->progeny[7] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[6], s); + break; + } + + } + + /* Otherwise, activate the sorts and drifts. */ + else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { + + /* Get the type of pair if not specified explicitly. */ + double shift[3]; + int sid = space_getsid(s->space, &ci, &cj, shift); + + /* We are going to interact this pair, so store some values. */ + atomic_or(&ci->requires_sorts, 1 << sid); + atomic_or(&cj->requires_sorts, 1 << sid); + ci->dx_max_sort_old = ci->dx_max_sort; + cj->dx_max_sort_old = cj->dx_max_sort; + + /* Activate the drifts if the cells are local. */ + if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); + + /* Do we need to sort the cells? */ + cell_activate_sorts(ci, sid, s); + cell_activate_sorts(cj, sid, s); + } +} + /** * @brief Un-skips all the tasks associated with a given cell and checks * if the space needs to be rebuilt. @@ -1341,30 +1690,23 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { /* Set the correct sorting flags */ if (t->type == task_type_pair) { - if (ci->dx_max_sort > space_maxreldx * ci->dmin) { - for (struct cell *finger = ci; finger != NULL; finger = finger->parent) - finger->sorted = 0; - } - if (cj->dx_max_sort > space_maxreldx * cj->dmin) { - for (struct cell *finger = cj; finger != NULL; finger = finger->parent) - finger->sorted = 0; - } - if (!(ci->sorted & (1 << t->flags))) { -#ifdef SWIFT_DEBUG_CHECKS - if (!(ci->sorts->flags & (1 << t->flags))) - error("bad flags in sort task."); -#endif - scheduler_activate(s, ci->sorts); - if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift_part); - } - if (!(cj->sorted & (1 << t->flags))) { -#ifdef SWIFT_DEBUG_CHECKS - if (!(cj->sorts->flags & (1 << t->flags))) - error("bad flags in sort task."); -#endif - scheduler_activate(s, cj->sorts); - if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift_part); - } + /* Store some values. */ + atomic_or(&ci->requires_sorts, 1 << t->flags); + atomic_or(&cj->requires_sorts, 1 << t->flags); + ci->dx_max_sort_old = ci->dx_max_sort; + cj->dx_max_sort_old = cj->dx_max_sort; + + /* Activate the drift tasks. */ + if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_sorts(ci, t->flags, s); + cell_activate_sorts(cj, t->flags, s); + } + /* Store current values of dx_max and h_max. */ + else if (t->type == task_type_sub_pair || t->type == task_type_sub_self) { + cell_activate_subcell_tasks(t->ci, t->cj, s); } /* Only interested in pair interactions as of here. */ @@ -1372,12 +1714,10 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { /* Check whether there was too much particle motion, i.e. the cell neighbour conditions were violated. */ - if (max(ci->h_max, cj->h_max) + ci->dx_max_part + cj->dx_max_part > - cj->dmin) - rebuild = 1; + if (cell_need_rebuild_for_pair(ci, cj)) rebuild = 1; #ifdef WITH_MPI - /* Activate the send/recv flags. */ + /* Activate the send/recv tasks. */ if (ci->nodeID != engine_rank) { /* Activate the tasks to recv foreign cell ci's data. */ @@ -1398,12 +1738,9 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift_part) - scheduler_activate(s, l->t->ci->drift_part); - else - error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, cj->drift_part); + /* Drift the cell which will be sent at the level at which it is sent, + i.e. drift the cell specified in the send task (l->t) itself. */ + cell_activate_drift_part(l->t->ci, s); if (cell_is_active(cj, e)) { @@ -1448,12 +1785,9 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift_part) - scheduler_activate(s, l->t->ci->drift_part); - else - error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, ci->drift_part); + /* Drift the cell which will be sent at the level at which it is sent, + i.e. drift the cell specified in the send task (l->t) itself. */ + cell_activate_drift_part(l->t->ci, s); if (cell_is_active(ci, e)) { @@ -1477,14 +1811,6 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (l == NULL) error("Missing link to send_ti task."); scheduler_activate(s, l->t); } - } else if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift_part); - scheduler_activate(s, cj->drift_part); - } -#else - if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift_part); - scheduler_activate(s, cj->drift_part); } #endif } @@ -1498,9 +1824,10 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { for (struct link *l = c->grav; l != NULL; l = l->next) scheduler_activate(s, l->t); if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost); + if (c->ghost_in != NULL) scheduler_activate(s, c->ghost_in); + if (c->ghost_out != NULL) scheduler_activate(s, c->ghost_out); if (c->ghost != NULL) scheduler_activate(s, c->ghost); if (c->init_grav != NULL) scheduler_activate(s, c->init_grav); - if (c->drift_part != NULL) scheduler_activate(s, c->drift_part); if (c->drift_gpart != NULL) scheduler_activate(s, c->drift_gpart); if (c->kick1 != NULL) scheduler_activate(s, c->kick1); if (c->kick2 != NULL) scheduler_activate(s, c->kick2); @@ -1540,8 +1867,9 @@ void cell_set_super(struct cell *c, struct cell *super) { * * @param c The #cell. * @param e The #engine (to get ti_current). + * @param force Drift the particles irrespective of the #cell flags. */ -void cell_drift_part(struct cell *c, const struct engine *e) { +void cell_drift_part(struct cell *c, const struct engine *e, int force) { const float hydro_h_max = e->hydro_properties->h_max; const double timeBase = e->timeBase; @@ -1556,11 +1884,19 @@ void cell_drift_part(struct cell *c, const struct engine *e) { float dx_max_sort = 0.0f, dx2_max_sort = 0.f; float cell_h_max = 0.f; + /* Drift irrespective of cell flags? */ + force |= c->do_drift; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we only drift local cells. */ + if (c->nodeID != engine_rank) error("Drifting a foreign cell is nope."); + /* Check that we are actually going to move forward. */ if (ti_current < ti_old_part) error("Attempt to drift to the past"); +#endif // SWIFT_DEBUG_CHECKS /* Are we not in a leaf ? */ - if (c->split) { + if (c->split && (force || c->do_sub_drift)) { /* Loop over the progeny and collect their data. */ for (int k = 0; k < 8; k++) @@ -1568,7 +1904,7 @@ void cell_drift_part(struct cell *c, const struct engine *e) { struct cell *cp = c->progeny[k]; /* Collect */ - cell_drift_part(cp, e); + cell_drift_part(cp, e, force); /* Update */ dx_max = max(dx_max, cp->dx_max_part); @@ -1576,7 +1912,15 @@ void cell_drift_part(struct cell *c, const struct engine *e) { cell_h_max = max(cell_h_max, cp->h_max); } - } else if (ti_current > ti_old_part) { + /* Store the values */ + c->h_max = cell_h_max; + c->dx_max_part = dx_max; + c->dx_max_sort = dx_max_sort; + + /* Update the time of the last drift */ + c->ti_old_part = ti_current; + + } else if (!c->split && force && ti_current > ti_old_part) { /* Loop over all the gas particles in the cell */ const size_t nr_parts = c->count; @@ -1615,20 +1959,18 @@ void cell_drift_part(struct cell *c, const struct engine *e) { dx_max = sqrtf(dx2_max); dx_max_sort = sqrtf(dx2_max_sort); - } else { + /* Store the values */ + c->h_max = cell_h_max; + c->dx_max_part = dx_max; + c->dx_max_sort = dx_max_sort; - cell_h_max = c->h_max; - dx_max = c->dx_max_part; - dx_max_sort = c->dx_max_sort; + /* Update the time of the last drift */ + c->ti_old_part = ti_current; } - /* Store the values */ - c->h_max = cell_h_max; - c->dx_max_part = dx_max; - c->dx_max_sort = dx_max_sort; - - /* Update the time of the last drift */ - c->ti_old_part = ti_current; + /* Clear the drift flags. */ + c->do_drift = 0; + c->do_sub_drift = 0; } /** diff --git a/src/cell.h b/src/cell.h index 178f77592543ed1a617d76d3d9fae91079395353..93a59545bbed36fdd639bd7305443e4f6770e465 100644 --- a/src/cell.h +++ b/src/cell.h @@ -31,15 +31,16 @@ /* Local includes. */ #include "align.h" +#include "kernel_hydro.h" #include "lock.h" #include "multipole.h" #include "part.h" +#include "space.h" #include "task.h" #include "timeline.h" /* Avoid cyclic inclusions */ struct engine; -struct space; struct scheduler; /* Max tag size set to 2^29 to take into account some MPI implementations @@ -151,7 +152,9 @@ struct cell { /*! The multipole initialistation task */ struct task *init_grav; - /*! The ghost task */ + /*! The ghost tasks */ + struct task *ghost_in; + struct task *ghost_out; struct task *ghost; /*! The extra ghost task for complex hydro schemes */ @@ -269,9 +272,6 @@ struct cell { /*! Nr of #spart in this cell. */ int scount; - /*! The size of the sort array */ - int sortsize; - /*! Bit-mask indicating the sorted directions */ unsigned int sorted; @@ -326,6 +326,26 @@ struct cell { /*! The maximal depth of this cell and its progenies */ char maxdepth; + /*! Values of dx_max and h_max before the drifts, used for sub-cell tasks. */ + float dx_max_old; + float h_max_old; + float dx_max_sort_old; + + /* Bit mask of sort directions that will be needed in the next timestep. */ + unsigned int requires_sorts; + + /*! Does this cell need to be drifted? */ + char do_drift; + + /*! Do any of this cell's sub-cells need to be drifted? */ + char do_sub_drift; + + /*! Bit mask of sorts that need to be computed for this cell. */ + unsigned int do_sort; + + /*! Do any of this cell's sub-cells need to be sorted? */ + char do_sub_sort; + #ifdef SWIFT_DEBUG_CHECKS /*! The list of tasks that have been executed on this cell */ char tasks_executed[64]; @@ -373,10 +393,102 @@ void cell_reset_task_counters(struct cell *c); int cell_is_drift_needed(struct cell *c, const struct engine *e); int cell_unskip_tasks(struct cell *c, struct scheduler *s); void cell_set_super(struct cell *c, struct cell *super); -void cell_drift_part(struct cell *c, const struct engine *e); +void cell_drift_part(struct cell *c, const struct engine *e, int force); void cell_drift_gpart(struct cell *c, const struct engine *e); void cell_drift_multipole(struct cell *c, const struct engine *e); void cell_drift_all_multipoles(struct cell *c, const struct engine *e); void cell_check_timesteps(struct cell *c); +void cell_store_pre_drift_values(struct cell *c); +void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj, + struct scheduler *s); +void cell_activate_drift_part(struct cell *c, struct scheduler *s); +void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s); +void cell_clear_drift_flags(struct cell *c, void *data); + +/* Inlined functions (for speed). */ + +/** + * @brief Can a sub-pair hydro task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int cell_can_recurse_in_pair_task( + const struct cell *c) { + + /* Is the cell split ? */ + /* If so, is the cut-off radius plus the max distance the parts have moved */ + /* smaller than the sub-cell sizes ? */ + /* Note: We use the _old values as these might have been updated by a drift */ + return c->split && + ((kernel_gamma * c->h_max_old + c->dx_max_old) < 0.5f * c->dmin); +} + +/** + * @brief Can a sub-self hydro task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int cell_can_recurse_in_self_task( + const struct cell *c) { + + /* Is the cell split ? */ + /* Note: No need for more checks here as all the sub-pairs and sub-self */ + /* operations will be executed. So no need for the particle to be at exactly + */ + /* the right place. */ + return c->split; +} + +/** + * @brief Can a pair task associated with a cell be split into smaller + * sub-tasks. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int cell_can_split_pair_task( + const struct cell *c) { + + /* Is the cell split ? */ + /* If so, is the cut-off radius with some leeway smaller than */ + /* the sub-cell sizes ? */ + /* Note that since tasks are create after a rebuild no need to take */ + /* into account any part motion (i.e. dx_max == 0 here) */ + return c->split && (space_stretch * kernel_gamma * c->h_max < 0.5f * c->dmin); +} + +/** + * @brief Can a self task associated with a cell be split into smaller + * sub-tasks. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int cell_can_split_self_task( + const struct cell *c) { + + /* Is the cell split ? */ + /* Note: No need for more checks here as all the sub-pairs and sub-self */ + /* tasks will be created. So no need to check for h_max */ + return c->split && (space_stretch * kernel_gamma * c->h_max < 0.5f * c->dmin); +} + +/** + * @brief Have particles in a pair of cells moved too much and require a rebuild + * ? + * + * @param ci The first #cell. + * @param cj The second #cell. + */ +__attribute__((always_inline)) INLINE static int cell_need_rebuild_for_pair( + const struct cell *ci, const struct cell *cj) { + + /* Is the cut-off radius plus the max distance the parts in both cells have */ + /* moved larger than the cell size ? */ + /* Note ci->dmin == cj->dmin */ + return (kernel_gamma * max(ci->h_max, cj->h_max) + ci->dx_max_part + + cj->dx_max_part > + cj->dmin); +} #endif /* SWIFT_CELL_H */ diff --git a/src/dimension.h b/src/dimension.h index 7c58b5b846490e8f5227a232eaaeb3df5995f795..4ee9377b4f9608b2549f3dfee2a683b61a71c76a 100644 --- a/src/dimension.h +++ b/src/dimension.h @@ -118,6 +118,34 @@ __attribute__((always_inline)) INLINE static float pow_dimension_plus_one( #endif } +/** + * @brief Returns the argument to the power given by the dimension minus one + * + * Computes \f$x^{d-1}\f$. + */ +__attribute__((always_inline)) INLINE static float pow_dimension_minus_one( + float x) { + +#if defined(HYDRO_DIMENSION_3D) + + return x * x; + +#elif defined(HYDRO_DIMENSION_2D) + + return x; + +#elif defined(HYDRO_DIMENSION_1D) + + return 1.f; + +#else + + error("The dimension is not defined !"); + return 0.f; + +#endif +} + /** * @brief Inverts the given dimension by dimension matrix (in place) * diff --git a/src/engine.c b/src/engine.c index f038969280fcfa16e6872a199e03d00ca1fa8133..2680080ee60ae0b7061143eb98706397b9d31ec5 100644 --- a/src/engine.c +++ b/src/engine.c @@ -120,6 +120,24 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { res->next = atomic_swap(l, res); } +/** + * @brief Recursively add non-implicit ghost tasks to a cell hierarchy. + */ +void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in, + struct task *ghost_out) { + if (!c->split || c->count < engine_max_parts_per_ghost) { + struct scheduler *s = &e->sched; + c->ghost = + scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0, c, NULL); + scheduler_addunlock(s, ghost_in, c->ghost); + scheduler_addunlock(s, c->ghost, ghost_out); + } else { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_add_ghosts(e, c->progeny[k], ghost_in, ghost_out); + } +} + /** * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - * i.e. all the O(Npart) tasks. @@ -143,9 +161,17 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { /* Are we in a super-cell ? */ if (c->super == c) { + /* Add the sort task. */ + c->sorts = + scheduler_addtask(s, task_type_sort, task_subtype_none, 0, 0, c, NULL); + /* Local tasks only... */ if (c->nodeID == e->nodeID) { + /* Add the drift task. */ + c->drift_part = scheduler_addtask(s, task_type_drift_part, + task_subtype_none, 0, 0, c, NULL); + /* Add the two half kicks */ c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0, c, NULL); @@ -180,10 +206,16 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { scheduler_addunlock(s, c->grav_down, c->kick2); } - /* Generate the ghost task. */ - if (is_hydro) - c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, - 0, c, NULL); + /* Generate the ghost tasks. */ + if (is_hydro) { + c->ghost_in = + scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, + /* implicit = */ 1, c, NULL); + c->ghost_out = + scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, + /* implicit = */ 1, c, NULL); + engine_add_ghosts(e, c, c->ghost_in, c->ghost_out); + } #ifdef EXTRA_HYDRO_LOOP /* Generate the extra ghost task. */ @@ -1042,18 +1074,15 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, scheduler_addunlock(s, t_rho, ci->super->kick2); /* The send_rho task depends on the cell's ghost task. */ - scheduler_addunlock(s, ci->super->ghost, t_rho); + scheduler_addunlock(s, ci->super->ghost_out, t_rho); /* The send_xv task should unlock the super-cell's ghost task. */ - scheduler_addunlock(s, t_xv, ci->super->ghost); + scheduler_addunlock(s, t_xv, ci->super->ghost_in); #endif /* Drift before you send */ - if (ci->drift_part == NULL) - ci->drift_part = scheduler_addtask(s, task_type_drift_part, - task_subtype_none, 0, 0, ci, NULL); - scheduler_addunlock(s, ci->drift_part, t_xv); + scheduler_addunlock(s, ci->super->drift_part, t_xv); /* The super-cell's timestep task should unlock the send_ti task. */ scheduler_addunlock(s, ci->super->timestep, t_ti); @@ -1862,27 +1891,11 @@ void engine_count_and_link_tasks(struct engine *e) { struct cell *const ci = t->ci; struct cell *const cj = t->cj; - /* Link sort tasks to the next-higher sort task. */ + /* Link sort tasks to all the higher sort task. */ if (t->type == task_type_sort) { - struct cell *finger = t->ci->parent; - while (finger != NULL && finger->sorts == NULL) finger = finger->parent; - if (finger != NULL) scheduler_addunlock(sched, t, finger->sorts); - } - - /* Link drift tasks to the next-higher drift task. */ - else if (t->type == task_type_drift_part) { - struct cell *finger = ci->parent; - while (finger != NULL && finger->drift_part == NULL) - finger = finger->parent; - if (finger != NULL) scheduler_addunlock(sched, t, finger->drift_part); - } - - /* Link drift tasks to the next-higher drift task. */ - else if (t->type == task_type_drift_gpart) { - struct cell *finger = ci->parent; - while (finger != NULL && finger->drift_gpart == NULL) - finger = finger->parent; - if (finger != NULL) scheduler_addunlock(sched, t, finger->drift_gpart); + for (struct cell *finger = t->ci->parent; finger != NULL; + finger = finger->parent) + if (finger->sorts != NULL) scheduler_addunlock(sched, t, finger->sorts); } /* Link self tasks to cells. */ @@ -2108,8 +2121,8 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, struct cell *c, int with_cooling) { /* density loop --> ghost --> force loop */ - scheduler_addunlock(sched, density, c->super->ghost); - scheduler_addunlock(sched, c->super->ghost, force); + scheduler_addunlock(sched, density, c->super->ghost_in); + scheduler_addunlock(sched, c->super->ghost_out, force); if (with_cooling) { /* force loop --> cooling (--> kick2) */ @@ -2145,14 +2158,15 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Sort tasks depend on the drift of the cell. */ if (t->type == task_type_sort && t->ci->nodeID == engine_rank) { - scheduler_addunlock(sched, t->ci->drift_part, t); + scheduler_addunlock(sched, t->ci->super->drift_part, t); } /* Self-interaction? */ else if (t->type == task_type_self && t->subtype == task_subtype_density) { - /* Make all density tasks depend on the drift. */ - scheduler_addunlock(sched, t->ci->drift_part, t); + /* Make all density tasks depend on the drift and the sorts. */ + scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super->sorts, t); #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -2186,11 +2200,15 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Otherwise, pair interaction? */ else if (t->type == task_type_pair && t->subtype == task_subtype_density) { - /* Make all density tasks depend on the drift. */ + /* Make all density tasks depend on the drift and the sorts. */ if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->drift_part, t); - if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->drift_part, t); + scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super->sorts, t); + if (t->ci->super != t->cj->super) { + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->drift_part, t); + scheduler_addunlock(sched, t->cj->super->sorts, t); + } #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -2243,6 +2261,10 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { else if (t->type == task_type_sub_self && t->subtype == task_subtype_density) { + /* Make all density tasks depend on the drift and sorts. */ + scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super->sorts, t); + #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -2285,6 +2307,16 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { else if (t->type == task_type_sub_pair && t->subtype == task_subtype_density) { + /* Make all density tasks depend on the drift. */ + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super->sorts, t); + if (t->ci->super != t->cj->super) { + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->drift_part, t); + scheduler_addunlock(sched, t->cj->super->sorts, t); + } + #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -2542,6 +2574,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Set this task's skip. */ if (cell_is_active(t->ci, e)) scheduler_activate(s, t); + + /* Store current values of dx_max and h_max. */ + if (t->type == task_type_sub_self && t->subtype == task_subtype_density) { + cell_activate_subcell_tasks(t->ci, NULL, s); + } } /* Pair? */ @@ -2561,38 +2598,27 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (t->subtype != task_subtype_density) continue; /* Too much particle movement? */ - if (max(ci->h_max, cj->h_max) + ci->dx_max_part + cj->dx_max_part > - cj->dmin) - *rebuild_space = 1; + if (cell_need_rebuild_for_pair(ci, cj)) *rebuild_space = 1; /* Set the correct sorting flags */ if (t->type == task_type_pair) { - if (ci->dx_max_sort > space_maxreldx * ci->dmin) { - for (struct cell *finger = ci; finger != NULL; - finger = finger->parent) - finger->sorted = 0; - } - if (cj->dx_max_sort > space_maxreldx * cj->dmin) { - for (struct cell *finger = cj; finger != NULL; - finger = finger->parent) - finger->sorted = 0; - } - if (!(ci->sorted & (1 << t->flags))) { -#ifdef SWIFT_DEBUG_CHECKS - if (!(ci->sorts->flags & (1 << t->flags))) - error("bad flags in sort task."); -#endif - scheduler_activate(s, ci->sorts); - if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift_part); - } - if (!(cj->sorted & (1 << t->flags))) { -#ifdef SWIFT_DEBUG_CHECKS - if (!(cj->sorts->flags & (1 << t->flags))) - error("bad flags in sort task."); -#endif - scheduler_activate(s, cj->sorts); - if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift_part); - } + /* Store some values. */ + atomic_or(&ci->requires_sorts, 1 << t->flags); + atomic_or(&cj->requires_sorts, 1 << t->flags); + ci->dx_max_sort_old = ci->dx_max_sort; + cj->dx_max_sort_old = cj->dx_max_sort; + + /* Activate the drift tasks. */ + if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); + + /* Activate the sorts where needed. */ + cell_activate_sorts(ci, t->flags, s); + cell_activate_sorts(cj, t->flags, s); + } + /* Store current values of dx_max and h_max. */ + else if (t->type == task_type_sub_pair) { + cell_activate_subcell_tasks(t->ci, t->cj, s); } #ifdef WITH_MPI @@ -2617,15 +2643,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift_part) - scheduler_activate(s, l->t->ci->drift_part); - else - error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, cj->drift_part); + /* Drift the cell which will be sent at the level at which it is sent, + i.e. drift the cell specified in the send task (l->t) itself. */ + cell_activate_drift_part(l->t->ci, s); if (cell_is_active(cj, e)) { - for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next) ; @@ -2667,12 +2689,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift_part) - scheduler_activate(s, l->t->ci->drift_part); - else - error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, ci->drift_part); + /* Drift the cell which will be sent at the level at which it is sent, + i.e. drift the cell specified in the send task (l->t) itself. */ + cell_activate_drift_part(l->t->ci, s); if (cell_is_active(ci, e)) { for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; @@ -2695,24 +2714,14 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_ti task."); scheduler_activate(s, l->t); } - - } else if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift_part); - scheduler_activate(s, cj->drift_part); - } -#else - if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift_part); - scheduler_activate(s, cj->drift_part); } #endif } /* Kick/Drift/init ? */ - else if (t->type == task_type_kick1 || t->type == task_type_kick2 || - t->type == task_type_drift_part || - t->type == task_type_drift_gpart || - t->type == task_type_init_grav) { + if (t->type == task_type_kick1 || t->type == task_type_kick2 || + t->type == task_type_drift_part || t->type == task_type_drift_gpart || + t->type == task_type_init_grav) { if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } @@ -3166,6 +3175,9 @@ void engine_skip_force_and_kick(struct engine *e) { t->type == task_type_cooling || t->type == task_type_sourceterms) t->skip = 1; } + + /* Run through the cells and clear some flags. */ + space_map_cells_pre(e->s, 1, cell_clear_drift_flags, NULL); } /** @@ -3182,10 +3194,12 @@ void engine_skip_drift(struct engine *e) { struct task *t = &tasks[i]; - /* Skip everything that moves the particles */ - if (t->type == task_type_drift_part || t->type == task_type_drift_gpart) - t->skip = 1; + /* Skip everything that updates the particles */ + if (t->type == task_type_drift_part) t->skip = 1; } + + /* Run through the cells and clear some flags. */ + space_map_cells_pre(e->s, 1, cell_clear_drift_flags, NULL); } /** @@ -3561,7 +3575,7 @@ void engine_do_drift_all_mapper(void *map_data, int num_elements, struct cell *c = &cells[ind]; if (c != NULL && c->nodeID == e->nodeID) { /* Drift all the particles */ - cell_drift_part(c, e); + cell_drift_part(c, e, 1); /* Drift all the g-particles */ cell_drift_gpart(c, e); @@ -4252,7 +4266,7 @@ void engine_init(struct engine *e, struct space *s, "Version: %s \n# " "Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic " "scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f " - "+/- %.2f\n# Eta: %f\n", + "+/- %.4f\n# Eta: %f\n", hostname(), git_branch(), git_revision(), compiler_name(), compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION, kernel_name, e->hydro_properties->target_neighbours, diff --git a/src/engine.h b/src/engine.h index b995a20494c8e4324db9df5ef63d0346b3f9be6c..07769be463dd7110cfb6a86debee33d69964c8da 100644 --- a/src/engine.h +++ b/src/engine.h @@ -82,6 +82,7 @@ extern const char *engine_policy_names[]; #define engine_redistribute_alloc_margin 1.2 #define engine_default_energy_file_name "energy" #define engine_default_timesteps_file_name "timesteps" +#define engine_max_parts_per_ghost 1000 /* The rank of the engine as a global variable (for messages). */ extern int engine_rank; diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index 24cf766d1c39bd11fe7588f887302d3d5e5ffded..66a475f32ec06eb40ff2bc890bc156f76e3b7b9f 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -206,12 +206,13 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( p->rho += p->mass * kernel_root; p->density.rho_dh -= hydro_dimension * p->mass * kernel_root; p->density.wcount += kernel_root; + p->density.wcount_dh -= hydro_dimension * kernel_root; /* Finish the calculation by inserting the missing h-factors */ p->rho *= h_inv_dim; p->density.rho_dh *= h_inv_dim_plus_one; - p->density.wcount *= kernel_norm; - p->density.wcount_dh *= h_inv * kernel_gamma * kernel_norm; + p->density.wcount *= h_inv_dim; + p->density.wcount_dh *= h_inv_dim_plus_one; const float rho_inv = 1.f / p->rho; @@ -264,6 +265,9 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( const float fac_mu = 1.f; /* Will change with cosmological integration */ + /* Inverse of the physical density */ + const float rho_inv = 1.f / p->rho; + /* Compute the norm of the curl */ const float curl_v = sqrtf(p->density.rot_v[0] * p->density.rot_v[0] + p->density.rot_v[1] * p->density.rot_v[1] + @@ -279,7 +283,6 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure); /* Divide the pressure by the density squared to get the SPH term */ - const float rho_inv = 1.f / p->rho; const float P_over_rho2 = pressure * rho_inv * rho_inv; /* Compute the Balsara switch */ @@ -287,11 +290,11 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( abs_div_v / (abs_div_v + curl_v + 0.0001f * soundspeed / fac_mu / p->h); /* Compute the "grad h" term */ - const float grad_h_term = + const float omega_inv = 1.f / (1.f + hydro_dimension_inv * p->h * p->density.rho_dh * rho_inv); /* Update variables. */ - p->force.f = grad_h_term; + p->force.f = omega_inv; p->force.P_over_rho2 = P_over_rho2; p->force.soundspeed = soundspeed; p->force.balsara = balsara; diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index ae4ee27f5ed83350aaf1a0bfd0069d7b85701854..25eb1e4b76d088d7dcd3d4ba57fe5901188be000 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -64,7 +64,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( /* Compute contribution to the number of neighbours */ pi->density.wcount += wi; - pi->density.wcount_dh -= ui * wi_dx; + pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); /* Compute the kernel function for pj */ const float hj_inv = 1.f / hj; @@ -77,7 +77,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( /* Compute contribution to the number of neighbours */ pj->density.wcount += wj; - pj->density.wcount_dh -= uj * wj_dx; + pj->density.wcount_dh -= (hydro_dimension * wj + uj * wj_dx); const float faci = mj * wi_dx * r_inv; const float facj = mi * wj_dx * r_inv; @@ -112,9 +112,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, struct part **pj) { -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION - vector r, ri, r2, xi, xj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx; + vector r, ri, r2, ui, uj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx; vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh; vector mi, mj; vector dx[3], dv[3]; @@ -161,15 +161,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( hi.v = vec_load(Hi); hi_inv = vec_reciprocal(hi); - xi.v = r.v * hi_inv.v; + ui.v = r.v * hi_inv.v; hj.v = vec_load(Hj); hj_inv = vec_reciprocal(hj); - xj.v = r.v * hj_inv.v; + uj.v = r.v * hj_inv.v; /* Compute the kernel function. */ - kernel_deval_vec(&xi, &wi, &wi_dx); - kernel_deval_vec(&xj, &wj, &wj_dx); + kernel_deval_vec(&ui, &wi, &wi_dx); + kernel_deval_vec(&uj, &wj, &wj_dx); /* Compute dv. */ dv[0].v = vi[0].v - vj[0].v; @@ -188,17 +188,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( /* Compute density of pi. */ rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v); + rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); wcounti.v = wi.v; - wcounti_dh.v = xi.v * wi_dx.v; + wcounti_dh.v = (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); div_vi.v = mj.v * dvdr.v * wi_dx.v; for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; /* Compute density of pj. */ rhoj.v = mi.v * wj.v; - rhoj_dh.v = mi.v * (vec_set1(hydro_dimension) * wj.v + xj.v * wj_dx.v); + rhoj_dh.v = mi.v * (vec_set1(hydro_dimension) * wj.v + uj.v * wj_dx.v); wcountj.v = wj.v; - wcountj_dh.v = xj.v * wj_dx.v; + wcountj_dh.v = (vec_set1(hydro_dimension) * wj.v + uj.v * wj_dx.v); div_vj.v = mi.v * dvdr.v * wj_dx.v; for (k = 0; k < 3; k++) curl_vj[k].v = mi.v * curlvr[k].v * wj_dx.v; @@ -241,7 +241,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( /* Get r and r inverse. */ const float r = sqrtf(r2); - const float ri = 1.0f / r; + const float r_inv = 1.0f / r; /* Compute the kernel function */ const float hi_inv = 1.0f / hi; @@ -254,9 +254,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( /* Compute contribution to the number of neighbours */ pi->density.wcount += wi; - pi->density.wcount_dh -= ui * wi_dx; + pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); - const float fac = mj * wi_dx * ri; + const float fac = mj * wi_dx * r_inv; /* Compute dv dot r */ dv[0] = pi->v[0] - pj->v[0]; @@ -282,9 +282,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, struct part **pj) { -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION - vector r, ri, r2, xi, hi, hi_inv, wi, wi_dx; + vector r, ri, r2, ui, hi, hi_inv, wi, wi_dx; vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi; vector mj; vector dx[3], dv[3]; @@ -328,9 +328,9 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, hi.v = vec_load(Hi); hi_inv = vec_reciprocal(hi); - xi.v = r.v * hi_inv.v; + ui.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); + kernel_deval_vec(&ui, &wi, &wi_dx); /* Compute dv. */ dv[0].v = vi[0].v - vj[0].v; @@ -349,9 +349,9 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, /* Compute density of pi. */ rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v); + rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); wcounti.v = wi.v; - wcounti_dh.v = xi.v * wi_dx.v; + wcounti_dh.v = (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); div_vi.v = mj.v * dvdr.v * wi_dx.v; for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; @@ -390,7 +390,7 @@ runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz, vector *curlvySum, vector *curlvzSum, vector mask, int knlMask) { - vector r, ri, xi, wi, wi_dx; + vector r, ri, ui, wi, wi_dx; vector mj; vector dvx, dvy, dvz; vector vjx, vjy, vjz; @@ -407,10 +407,10 @@ runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz, ri = vec_reciprocal_sqrt(*r2); r.v = vec_mul(r2->v, ri.v); - xi.v = vec_mul(r.v, hi_inv.v); + ui.v = vec_mul(r.v, hi_inv.v); /* Calculate the kernel for two particles. */ - kernel_deval_1_vec(&xi, &wi, &wi_dx); + kernel_deval_1_vec(&ui, &wi, &wi_dx); /* Compute dv. */ dvx.v = vec_sub(vix.v, vjx.v); @@ -440,12 +440,13 @@ runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz, rho_dhSum->v = _mm512_mask_sub_ps(rho_dhSum->v, knlMask, rho_dhSum->v, vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v, - vec_mul(xi.v, wi_dx.v)))); + vec_mul(ui.v, wi_dx.v)))); wcountSum->v = _mm512_mask_add_ps(wcountSum->v, knlMask, wi.v, wcountSum->v); - wcount_dhSum->v = _mm512_mask_sub_ps(wcount_dhSum->v, knlMask, - wcount_dhSum->v, vec_mul(xi.v, wi_dx.v)); + wcount_dhSum->v = _mm512_mask_sub_ps( + rho_dhSum->v, knlMask, rho_dhSum->v, + vec_fma(vec_set1(hydro_dimension), wi.v, vec_mul(ui.v, wi_dx.v))); div_vSum->v = _mm512_mask_sub_ps(div_vSum->v, knlMask, div_vSum->v, vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v))); @@ -464,10 +465,11 @@ runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz, #else rhoSum->v += vec_and(vec_mul(mj.v, wi.v), mask.v); rho_dhSum->v -= vec_and(vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v, - vec_mul(xi.v, wi_dx.v))), + vec_mul(ui.v, wi_dx.v))), mask.v); wcountSum->v += vec_and(wi.v, mask.v); - wcount_dhSum->v -= vec_and(vec_mul(xi.v, wi_dx.v), mask.v); + wcount_dhSum->v -= vec_and( + vec_fma(vec_set1(hydro_dimension), wi.v, vec_mul(ui.v, wi_dx.v)), mask.v); div_vSum->v -= vec_and(vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)), mask.v); curlvxSum->v += vec_and(vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)), mask.v); curlvySum->v += vec_and(vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)), mask.v); @@ -487,13 +489,13 @@ runner_iact_nonsym_2_vec_density( vector *div_vSum, vector *curlvxSum, vector *curlvySum, vector *curlvzSum, vector mask, vector mask2, int knlMask, int knlMask2) { - vector r, ri, r2, xi, wi, wi_dx; + vector r, ri, r2, ui, wi, wi_dx; vector mj; vector dx, dy, dz, dvx, dvy, dvz; vector vjx, vjy, vjz; vector dvdr; vector curlvrx, curlvry, curlvrz; - vector r_2, ri2, r2_2, xi2, wi2, wi_dx2; + vector r_2, ri2, r2_2, ui2, wi2, wi_dx2; vector mj2; vector dx2, dy2, dz2, dvx2, dvy2, dvz2; vector vjx2, vjy2, vjz2; @@ -524,11 +526,11 @@ runner_iact_nonsym_2_vec_density( r.v = vec_mul(r2.v, ri.v); r_2.v = vec_mul(r2_2.v, ri2.v); - xi.v = vec_mul(r.v, hi_inv.v); - xi2.v = vec_mul(r_2.v, hi_inv.v); + ui.v = vec_mul(r.v, hi_inv.v); + ui2.v = vec_mul(r_2.v, hi_inv.v); /* Calculate the kernel for two particles. */ - kernel_deval_2_vec(&xi, &wi, &wi_dx, &xi2, &wi2, &wi_dx2); + kernel_deval_2_vec(&ui, &wi, &wi_dx, &ui2, &wi2, &wi_dx2); /* Compute dv. */ dvx.v = vec_sub(vix.v, vjx.v); @@ -575,20 +577,23 @@ runner_iact_nonsym_2_vec_density( rho_dhSum->v = _mm512_mask_sub_ps(rho_dhSum->v, knlMask, rho_dhSum->v, vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v, - vec_mul(xi.v, wi_dx.v)))); - rho_dhSum->v = _mm512_mask_sub_ps( - rho_dhSum->v, knlMask2, rho_dhSum->v, - vec_mul(mj2.v, vec_fma(vec_set1(hydro_dimension), wi2.v, - vec_mul(xi2.v, wi_dx2.v)))); + vec_mul(ui.v, wi_dx.v)))); + rho_dhSum->v = + _mm512_mask_sub_ps(rho_dhSum->v, knlMask2, rho_dhSum->v, + vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi2.v, + vec_mul(ui2.v, wi_dx2.v)))); wcountSum->v = _mm512_mask_add_ps(wcountSum->v, knlMask, wi.v, wcountSum->v); wcountSum->v = _mm512_mask_add_ps(wcountSum->v, knlMask2, wi2.v, wcountSum->v); - wcount_dhSum->v = _mm512_mask_sub_ps(wcount_dhSum->v, knlMask, - wcount_dhSum->v, vec_mul(xi.v, wi_dx.v)); wcount_dhSum->v = _mm512_mask_sub_ps( - wcount_dhSum->v, knlMask2, wcount_dhSum->v, vec_mul(xi2.v, wi_dx2.v)); + rho_dhSum->v, knlMask, rho_dhSum->v, + vec_fma(vec_set1(hydro_dimension), wi.v, vec_mul(ui.v, wi_dx.v))); + + wcount_dhSum->v = _mm512_mask_sub_ps( + rho_dhSum->v, knlMask2, rho_dhSum->v, + vec_fma(vec_set1(hydro_dimension), wi2.v, vec_mul(ui2.v, wi_dx2.v))); div_vSum->v = _mm512_mask_sub_ps(div_vSum->v, knlMask, div_vSum->v, vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v))); @@ -619,16 +624,19 @@ runner_iact_nonsym_2_vec_density( rhoSum->v += vec_and(vec_mul(mj.v, wi.v), mask.v); rhoSum->v += vec_and(vec_mul(mj2.v, wi2.v), mask2.v); rho_dhSum->v -= vec_and(vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v, - vec_mul(xi.v, wi_dx.v))), + vec_mul(ui.v, wi_dx.v))), mask.v); rho_dhSum->v -= vec_and(vec_mul(mj2.v, vec_fma(vec_set1(hydro_dimension), wi2.v, - vec_mul(xi2.v, wi_dx2.v))), + vec_mul(ui2.v, wi_dx2.v))), mask2.v); wcountSum->v += vec_and(wi.v, mask.v); wcountSum->v += vec_and(wi2.v, mask2.v); - wcount_dhSum->v -= vec_and(vec_mul(xi.v, wi_dx.v), mask.v); - wcount_dhSum->v -= vec_and(vec_mul(xi2.v, wi_dx2.v), mask2.v); + wcount_dhSum->v -= vec_and( + vec_fma(vec_set1(hydro_dimension), wi.v, vec_mul(ui.v, wi_dx.v)), mask.v); + wcount_dhSum->v -= vec_and( + vec_fma(vec_set1(hydro_dimension), wi2.v, vec_mul(ui2.v, wi_dx2.v)), + mask2.v); div_vSum->v -= vec_and(vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)), mask.v); div_vSum->v -= vec_and(vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v)), mask2.v); curlvxSum->v += vec_and(vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)), mask.v); diff --git a/src/hydro/Gizmo/hydro.h b/src/hydro/Gizmo/hydro.h index 5b38637c8c7d441cd1eea5c08b621b35ba9abdbf..ee9335f8e95bfea689cd7b1b20c7ec7a0388a196 100644 --- a/src/hydro/Gizmo/hydro.h +++ b/src/hydro/Gizmo/hydro.h @@ -225,14 +225,15 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( /* Some smoothing length multiples. */ const float h = p->h; const float ih = 1.0f / h; + const float ihdim = pow_dimension(ih); + const float ihdim_plus_one = ihdim * ih; /* Final operation on the density. */ p->density.wcount += kernel_root; - p->density.wcount *= kernel_norm; - - p->density.wcount_dh *= ih * kernel_gamma * kernel_norm; + p->density.wcount *= ihdim; - const float ihdim = pow_dimension(ih); + p->density.wcount_dh -= hydro_dimension * kernel_root; + p->density.wcount_dh *= ihdim_plus_one; /* Final operation on the geometry. */ /* we multiply with the smoothing kernel normalization ih3 and calculate the diff --git a/src/hydro/Gizmo/hydro_iact.h b/src/hydro/Gizmo/hydro_iact.h index 8798dc859a790a83ab7a3b6f1709b1302f574581..c7e86c22da5a8beeece0d7a1fcc9cff560105e6e 100644 --- a/src/hydro/Gizmo/hydro_iact.h +++ b/src/hydro/Gizmo/hydro_iact.h @@ -57,7 +57,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( kernel_deval(xi, &wi, &wi_dx); pi->density.wcount += wi; - pi->density.wcount_dh -= xi * wi_dx; + pi->density.wcount_dh -= (hydro_dimension * wi + xi * wi_dx); /* these are eqns. (1) and (2) in the summary */ pi->geometry.volume += wi; @@ -74,7 +74,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( kernel_deval(xj, &wj, &wj_dx); pj->density.wcount += wj; - pj->density.wcount_dh -= xj * wj_dx; + pj->density.wcount_dh -= (hydro_dimension * wj + xj * wj_dx); /* these are eqns. (1) and (2) in the summary */ pj->geometry.volume += wj; @@ -121,7 +121,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( kernel_deval(xi, &wi, &wi_dx); pi->density.wcount += wi; - pi->density.wcount_dh -= xi * wi_dx; + pi->density.wcount_dh -= (hydro_dimension * wi + xi * wi_dx); /* these are eqns. (1) and (2) in the summary */ pi->geometry.volume += wi; diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index 9fffc6d888d575f197481216419d3fef0b0aed5d..c33d31fedd01e7b8aa706571adfa11fce07893b3 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -219,12 +219,34 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( p->rho += p->mass * kernel_root; p->density.rho_dh -= hydro_dimension * p->mass * kernel_root; p->density.wcount += kernel_root; + p->density.wcount_dh -= hydro_dimension * kernel_root; /* Finish the calculation by inserting the missing h-factors */ p->rho *= h_inv_dim; p->density.rho_dh *= h_inv_dim_plus_one; p->density.wcount *= kernel_norm; - p->density.wcount_dh *= h_inv * kernel_gamma * kernel_norm; + p->density.wcount_dh *= h_inv_dim_plus_one; +} + +/** + * @brief Sets all particle fields to sensible values when the #part has 0 ngbs. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( + struct part *restrict p, struct xpart *restrict xp) { + + /* Some smoothing length multiples. */ + const float h = p->h; + const float h_inv = 1.0f / h; /* 1/h */ + const float h_inv_dim = pow_dimension(h_inv); /* 1/h^d */ + + /* Re-set problematic values */ + p->rho = p->mass * kernel_root * h_inv_dim; + p->density.wcount = kernel_root * kernel_norm * h_inv_dim; + p->density.rho_dh = 0.f; + p->density.wcount_dh = 0.f; } /** diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index 169947b99e92d9bd1b0870d502a49e311820ff81..621177a3363e651e12dd728ad96ddadce3812f0e 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -51,23 +51,23 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( /* Compute density of pi. */ const float hi_inv = 1.f / hi; - const float xi = r * hi_inv; - kernel_deval(xi, &wi, &wi_dx); + const float ui = r * hi_inv; + kernel_deval(ui, &wi, &wi_dx); pi->rho += mj * wi; - pi->density.rho_dh -= mj * (hydro_dimension * wi + xi * wi_dx); + pi->density.rho_dh -= mj * (hydro_dimension * wi + ui * wi_dx); pi->density.wcount += wi; - pi->density.wcount_dh -= xi * wi_dx; + pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); /* Compute density of pj. */ const float hj_inv = 1.f / hj; - const float xj = r * hj_inv; - kernel_deval(xj, &wj, &wj_dx); + const float uj = r * hj_inv; + kernel_deval(uj, &wj, &wj_dx); pj->rho += mi * wj; - pj->density.rho_dh -= mi * (hydro_dimension * wj + xj * wj_dx); + pj->density.rho_dh -= mi * (hydro_dimension * wj + uj * wj_dx); pj->density.wcount += wj; - pj->density.wcount_dh -= xj * wj_dx; + pj->density.wcount_dh -= (hydro_dimension * wj + uj * wj_dx); } /** @@ -96,13 +96,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( const float r = sqrtf(r2); const float h_inv = 1.f / hi; - const float xi = r * h_inv; - kernel_deval(xi, &wi, &wi_dx); + const float ui = r * h_inv; + kernel_deval(ui, &wi, &wi_dx); pi->rho += mj * wi; - pi->density.rho_dh -= mj * (hydro_dimension * wi + xi * wi_dx); + pi->density.rho_dh -= mj * (hydro_dimension * wi + ui * wi_dx); pi->density.wcount += wi; - pi->density.wcount_dh -= xi * wi_dx; + pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); } /** diff --git a/src/hydro/PressureEntropy/hydro.h b/src/hydro/PressureEntropy/hydro.h index d3de13b8a36607764188d214af2737d7abffe21d..6d8d06abb126a574fd82c71a809235eb833494c6 100644 --- a/src/hydro/PressureEntropy/hydro.h +++ b/src/hydro/PressureEntropy/hydro.h @@ -252,11 +252,11 @@ __attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( /* Re-set problematic values */ p->rho = p->mass * kernel_root * h_inv_dim; - p->rho_bar = p->mass * p->entropy_one_over_gamma * kernel_root * h_inv_dim; + p->rho_bar = p->mass * kernel_root * h_inv_dim; p->density.wcount = kernel_root * kernel_norm * h_inv_dim; p->density.rho_dh = 0.f; p->density.wcount_dh = 0.f; - p->density.pressure_dh = 0.f; // MATTHIEU: to be checked + p->density.pressure_dh = 0.f; p->density.div_v = 0.f; p->density.rot_v[0] = 0.f; p->density.rot_v[1] = 0.f; diff --git a/src/hydro_properties.c b/src/hydro_properties.c index 818c1b6349192ed73b28cd4c3ae771f89a3754cd..276d6d837bcc164bbc006906b2e632041e033cae 100644 --- a/src/hydro_properties.c +++ b/src/hydro_properties.c @@ -33,16 +33,26 @@ #include "kernel_hydro.h" #define hydro_props_default_max_iterations 30 -#define hydro_props_default_volume_change 2.0f +#define hydro_props_default_volume_change 1.4f #define hydro_props_default_h_max FLT_MAX +#define hydro_props_default_h_tolerance 1e-4 void hydro_props_init(struct hydro_props *p, const struct swift_params *params) { /* Kernel properties */ p->eta_neighbours = parser_get_param_float(params, "SPH:resolution_eta"); + + /* Tolerance for the smoothing length Newton-Raphson scheme */ + p->h_tolerance = parser_get_opt_param_float(params, "SPH:h_tolerance", + hydro_props_default_h_tolerance); + + /* Get derived properties */ p->target_neighbours = pow_dimension(p->eta_neighbours) * kernel_norm; - p->delta_neighbours = parser_get_param_float(params, "SPH:delta_neighbours"); + const float delta_eta = p->eta_neighbours * (1.f + p->h_tolerance); + p->delta_neighbours = + (pow_dimension(delta_eta) - pow_dimension(p->eta_neighbours)) * + kernel_norm; #ifdef SHADOWFAX_SPH /* change the meaning of target_neighbours and delta_neighbours */ @@ -81,9 +91,11 @@ void hydro_props_print(const struct hydro_props *p) { message("Hydrodynamic scheme: %s in %dD.", SPH_IMPLEMENTATION, (int)hydro_dimension); - message("Hydrodynamic kernel: %s with %.2f +/- %.2f neighbours (eta=%f).", - kernel_name, p->target_neighbours, p->delta_neighbours, - p->eta_neighbours); + message("Hydrodynamic kernel: %s with eta=%f (%.2f neighbours).", kernel_name, + p->eta_neighbours, p->target_neighbours); + + message("Hydrodynamic tolerance in h: %.5f (+/- %.4f neighbours).", + p->h_tolerance, p->delta_neighbours); message("Hydrodynamic integration: CFL parameter: %.4f.", p->CFL_condition); @@ -110,6 +122,7 @@ void hydro_props_print_snapshot(hid_t h_grpsph, const struct hydro_props *p) { io_write_attribute_f(h_grpsph, "Kernel target N_ngb", p->target_neighbours); io_write_attribute_f(h_grpsph, "Kernel delta N_ngb", p->delta_neighbours); io_write_attribute_f(h_grpsph, "Kernel eta", p->eta_neighbours); + io_write_attribute_f(h_grpsph, "Smoothing length tolerance", p->h_tolerance); io_write_attribute_f(h_grpsph, "Maximal smoothing length", p->h_max); io_write_attribute_f(h_grpsph, "CFL parameter", p->CFL_condition); io_write_attribute_f(h_grpsph, "Volume log(max(delta h))", diff --git a/src/hydro_properties.h b/src/hydro_properties.h index 716c4c060c21eb95d05f9d50e13d4681a958a6fd..a887ccb6df13b649cd1ef1009059c6f08908669c 100644 --- a/src/hydro_properties.h +++ b/src/hydro_properties.h @@ -16,10 +16,14 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ - #ifndef SWIFT_HYDRO_PROPERTIES #define SWIFT_HYDRO_PROPERTIES +/** + * @file hydro_properties.h + * @brief Contains all the constants and parameters of the hydro scheme + */ + /* Config parameters. */ #include "../config.h" @@ -35,19 +39,28 @@ */ struct hydro_props { - /* Kernel properties */ + /*! Resolution parameter */ float eta_neighbours; + + /*! Target weightd number of neighbours (for info only)*/ float target_neighbours; + + /*! Smoothing length tolerance */ + float h_tolerance; + + /*! Tolerance on neighbour number (for info only)*/ float delta_neighbours; - /* Maximal smoothing length */ + /*! Maximal smoothing length */ float h_max; - /* Number of iterations to converge h */ + /*! Maximal number of iterations to converge h */ int max_smoothing_iterations; - /* Time integration properties */ + /*! Time integration properties */ float CFL_condition; + + /*! Maximal change of h over one time-step */ float log_max_h_change; }; diff --git a/src/runner.c b/src/runner.c index 735865bad088f09e01756a4e9c6ee4c2f81ab4aa..c6aeb6db895b0545ba33a5fd0d940839e784c466 100644 --- a/src/runner.c +++ b/src/runner.c @@ -316,23 +316,35 @@ void runner_check_sorts(struct cell *c, int flags) { * @param r The #runner. * @param c The #cell. * @param flags Cell flag. + * @param cleanup If true, re-build the sorts for the selected flags instead + * of just adding them. * @param clock Flag indicating whether to record the timing or not, needed * for recursive calls. */ -void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { +void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, + int clock) { struct entry *finger; struct entry *fingers[8]; struct part *parts = c->parts; struct xpart *xparts = c->xparts; - struct entry *sort; const int count = c->count; float buff[8]; TIMER_TIC; + /* We need to do the local sorts plus whatever was requested further up. */ + flags |= c->do_sort; + if (cleanup) { + c->sorted = 0; + } else { + flags &= ~c->sorted; + } + if (flags == 0 && !c->do_sub_sort) return; + /* Check that the particles have been moved to the current time */ - if (!cell_are_part_drifted(c, r->e)) error("Sorting un-drifted cell"); + if (flags && !cell_are_part_drifted(c, r->e)) + error("Sorting un-drifted cell"); #ifdef SWIFT_DEBUG_CHECKS /* Make sure the sort flags are consistent (downward). */ @@ -345,42 +357,37 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { } #endif - /* Clean-up the flags, i.e. filter out what's already been sorted, but - only if the sorts are recent. */ - if (c->ti_sort == r->e->ti_current) { - /* Ignore dimensions that have been sorted in this timestep. */ - // flags &= ~c->sorted; - } else { - /* Clean old (stale) sorts. */ - flags |= c->sorted; - c->sorted = 0; - } - if (flags == 0) return; + /* Update the sort timer which represents the last time the sorts + were re-set. */ + if (c->sorted == 0) c->ti_sort = r->e->ti_current; /* start by allocating the entry arrays. */ - if (c->sort == NULL || c->sortsize < count) { - if (c->sort != NULL) free(c->sort); - c->sortsize = count * 1.1; - if ((c->sort = (struct entry *)malloc(sizeof(struct entry) * - (c->sortsize + 1) * 13)) == NULL) + if (c->sort == NULL) { + if ((c->sort = (struct entry *)malloc(sizeof(struct entry) * (count + 1) * + 13)) == NULL) error("Failed to allocate sort memory."); } - sort = c->sort; + struct entry *sort = c->sort; /* Does this cell have any progeny? */ if (c->split) { /* Fill in the gaps within the progeny. */ float dx_max_sort = 0.0f; + float dx_max_sort_old = 0.0f; for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) { - if (flags & ~c->progeny[k]->sorted || - c->progeny[k]->dx_max_sort > c->dmin * space_maxreldx) - runner_do_sort(r, c->progeny[k], flags, 0); + /* Only propagate cleanup if the progeny is stale. */ + runner_do_sort(r, c->progeny[k], flags, + cleanup && (c->progeny[k]->dx_max_sort > + space_maxreldx * c->progeny[k]->dmin), + 0); dx_max_sort = max(dx_max_sort, c->progeny[k]->dx_max_sort); + dx_max_sort_old = max(dx_max_sort_old, c->progeny[k]->dx_max_sort_old); } } c->dx_max_sort = dx_max_sort; + c->dx_max_sort_old = dx_max_sort_old; /* Loop over the 13 different sort arrays. */ for (int j = 0; j < 13; j++) { @@ -444,7 +451,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { sort[j * (count + 1) + count].i = 0; /* Mark as sorted. */ - c->sorted |= (1 << j); + atomic_or(&c->sorted, 1 << j); } /* loop over sort arrays. */ @@ -453,13 +460,23 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { /* Otherwise, just sort. */ else { - /* Reset the sort distance if we are in a local cell */ - if (xparts != NULL) { - for (int k = 0; k < count; k++) { - xparts[k].x_diff_sort[0] = 0.0f; - xparts[k].x_diff_sort[1] = 0.0f; - xparts[k].x_diff_sort[2] = 0.0f; + /* Reset the sort distance */ + if (c->sorted == 0) { +#ifdef SWIFT_DEBUG_CHECKS + if (xparts != NULL && c->nodeID != engine_rank) + error("Have non-NULL xparts in foreign cell"); +#endif + + /* And the individual sort distances if we are a local cell */ + if (xparts != NULL) { + for (int k = 0; k < count; k++) { + xparts[k].x_diff_sort[0] = 0.0f; + xparts[k].x_diff_sort[1] = 0.0f; + xparts[k].x_diff_sort[2] = 0.0f; + } } + c->dx_max_sort_old = 0.f; + c->dx_max_sort = 0.f; } /* Fill the sort array. */ @@ -480,22 +497,10 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { sort[j * (count + 1) + count].d = FLT_MAX; sort[j * (count + 1) + count].i = 0; runner_do_sort_ascending(&sort[j * (count + 1)], count); - c->sorted |= (1 << j); + atomic_or(&c->sorted, 1 << j); } - - /* Finally, clear the dx_max_sort field of this cell. */ - c->dx_max_sort = 0.f; - - /* If this was not just an update, invalidate the sorts above this one. */ - if (c->ti_sort < r->e->ti_current) - for (struct cell *finger = c->parent; finger != NULL; - finger = finger->parent) - finger->sorted = 0; } - /* Update the sort timer. */ - c->ti_sort = r->e->ti_current; - #ifdef SWIFT_DEBUG_CHECKS /* Verify the sorting. */ for (int j = 0; j < 13; j++) { @@ -518,6 +523,11 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { } #endif + /* Clear the cell's sort flags. */ + c->do_sort = 0; + c->do_sub_sort = 0; + c->requires_sorts = 0; + if (clock) TIMER_TOC(timer_dosort); } @@ -621,11 +631,9 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { const struct engine *e = r->e; const struct space *s = e->s; const float hydro_h_max = e->hydro_properties->h_max; - const float target_wcount = e->hydro_properties->target_neighbours; - const float max_wcount = - target_wcount + e->hydro_properties->delta_neighbours; - const float min_wcount = - target_wcount - e->hydro_properties->delta_neighbours; + const float eps = e->hydro_properties->h_tolerance; + const float hydro_eta_dim = + pow_dimension(e->hydro_properties->eta_neighbours); const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations; int redo = 0, count = 0; @@ -669,28 +677,47 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { if (!part_is_active(p, e)) error("Ghost applied to inactive particle"); #endif - /* Finish the density calculation */ - hydro_end_density(p); + /* Get some useful values */ + const float h_old = p->h; + const float h_old_dim = pow_dimension(h_old); + const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); + float h_new; - /* Did we get the right number of neighbours? */ - if (p->density.wcount > max_wcount || p->density.wcount < min_wcount) { + if (p->density.wcount == 0.f) { /* No neighbours case */ - float h_corr = 0.f; + /* Double h and try again */ + h_new = 2.f * h_old; + } else { - /* If no derivative, double the smoothing length. */ - if (p->density.wcount_dh == 0.0f) h_corr = p->h; + /* Finish the density calculation */ + hydro_end_density(p); - /* Otherwise, compute the smoothing length update (Newton step). */ - else { - h_corr = (target_wcount - p->density.wcount) / p->density.wcount_dh; + /* Compute one step of the Newton-Raphson scheme */ + const float n_sum = p->density.wcount * h_old_dim; + const float n_target = hydro_eta_dim; + const float f = n_sum - n_target; + const float f_prime = + p->density.wcount_dh * h_old_dim + + hydro_dimension * p->density.wcount * h_old_dim_minus_one; - /* Truncate to the range [ -p->h/2 , p->h ]. */ - h_corr = (h_corr < p->h) ? h_corr : p->h; - h_corr = (h_corr > -0.5f * p->h) ? h_corr : -0.5f * p->h; - } + h_new = h_old - f / f_prime; + +#ifdef SWIFT_DEBUG_CHECKS + if ((f > 0.f && h_new > h_old) || (f < 0.f && h_new < h_old)) + error( + "Smoothing length correction not going in the right direction"); +#endif + + /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ + h_new = min(h_new, 2.f * h_old); + h_new = max(h_new, 0.5f * h_old); + } + + /* Check whether the particle has an inappropriate smoothing length */ + if (fabsf(h_new - h_old) > eps * h_old) { /* Ok, correct then */ - p->h += h_corr; + p->h = h_new; /* If below the absolute maximum, try again */ if (p->h < hydro_h_max) { @@ -742,6 +769,11 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { /* Run through this cell's density interactions. */ for (struct link *l = finger->density; l != NULL; l = l->next) { +#ifdef SWIFT_DEBUG_CHECKS + if (l->t->ti_run < r->e->ti_current) + error("Density task should have been run."); +#endif + /* Self-interaction? */ if (l->t->type == task_type_self) runner_doself_subset_density(r, finger, parts, pid, count); @@ -786,7 +818,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { } #else if (count) - message("Smoothing length failed to converge on %i particles.", count); + error("Smoothing length failed to converge on %i particles.", count); #endif /* Be clean */ @@ -854,7 +886,7 @@ void runner_do_drift_part(struct runner *r, struct cell *c, int timer) { TIMER_TIC; - cell_drift_part(c, r->e); + cell_drift_part(c, r->e, 0); if (timer) TIMER_TOC(timer_drift_part); } @@ -1496,6 +1528,10 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, timebin_t time_bin_max = 0; float h_max = 0.f; +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) error("Updating a local cell!"); +#endif + /* Clear this cell's sorted mask. */ if (clear_sorts) c->sorted = 0; @@ -1575,6 +1611,10 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { timebin_t time_bin_min = num_time_bins; timebin_t time_bin_max = 0; +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) error("Updating a local cell!"); +#endif + /* If this cell is a leaf, collect the particle data. */ if (!c->split) { @@ -1648,6 +1688,10 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { timebin_t time_bin_min = num_time_bins; timebin_t time_bin_max = 0; +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) error("Updating a local cell!"); +#endif + /* If this cell is a leaf, collect the particle data. */ if (!c->split) { @@ -1778,7 +1822,7 @@ void *runner_main(void *data) { /* Special case for sorts */ if (!cell_is_active(ci, e) && t->type == task_type_sort && - t->flags == 0) + !(ci->do_sort || ci->do_sub_sort)) error( "Task (type='%s/%s') should have been skipped ti_current=%lld " "c->ti_end_min=%lld t->flags=%d", @@ -1879,7 +1923,11 @@ void *runner_main(void *data) { break; case task_type_sort: - runner_do_sort(r, ci, t->flags, 1); + /* Cleanup only if any of the indices went stale. */ + runner_do_sort(r, ci, t->flags, + ci->dx_max_sort_old > space_maxreldx * ci->dmin, 1); + /* Reset the sort flags as our work here is done. */ + t->flags = 0; break; case task_type_init_grav: runner_do_init_grav(r, ci, 1); @@ -1922,9 +1970,9 @@ void *runner_main(void *data) { } else if (t->subtype == task_subtype_xv) { runner_do_recv_part(r, ci, 1, 1); } else if (t->subtype == task_subtype_rho) { - runner_do_recv_part(r, ci, 1, 1); + runner_do_recv_part(r, ci, 0, 1); } else if (t->subtype == task_subtype_gradient) { - runner_do_recv_part(r, ci, 1, 1); + runner_do_recv_part(r, ci, 0, 1); } else if (t->subtype == task_subtype_gpart) { runner_do_recv_gpart(r, ci, 1); } else if (t->subtype == task_subtype_spart) { diff --git a/src/runner.h b/src/runner.h index 0c6edc3c0c1406855ac79c96617bbdaa310bb46d..facadf1608fb7e06af952eedbf1151fa68530bef 100644 --- a/src/runner.h +++ b/src/runner.h @@ -61,7 +61,8 @@ struct runner { /* Function prototypes. */ void runner_do_ghost(struct runner *r, struct cell *c, int timer); void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer); -void runner_do_sort(struct runner *r, struct cell *c, int flag, int clock); +void runner_do_sort(struct runner *r, struct cell *c, int flag, int cleanup, + int clock); void runner_do_drift_part(struct runner *r, struct cell *c, int timer); void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer); void runner_do_kick1(struct runner *r, struct cell *c, int timer); diff --git a/src/runner_doiact.h b/src/runner_doiact.h index a2abd9458f8b490497fe0b9b33c2179b283d639d..af38ac6041cd392578a93440bc052097be53724d 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -634,12 +634,10 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, const int flipped = runner_flip[sid]; sid = sortlistID[sid]; - /* Have the cells been sorted? */ + /* Has the cell cj been sorted? */ if (!(cj->sorted & (1 << sid)) || - cj->dx_max_sort > space_maxreldx * cj->dmin) { - DOPAIR_SUBSET_NAIVE(r, ci, parts_i, ind, count, cj); - return; - } + cj->dx_max_sort_old > space_maxreldx * cj->dmin) + error("Interacting unsorted cells."); /* Pick-out the sorted lists. */ const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)]; @@ -920,8 +918,13 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, p->x[1] * runner_shift[sid][1] + p->x[2] * runner_shift[sid][2]; if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > - 1.0e-6 * max(fabsf(d), ci->dx_max_sort)) - error("particle shift diff exceeds dx_max_sort."); + 1.0e-4 * max(fabsf(d), ci->dx_max_sort_old)) + error( + "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " + "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e " + "ci->dx_max_sort_old=%e", + ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort, + ci->dx_max_sort_old); } for (int pjd = 0; pjd < cj->count; pjd++) { const struct part *p = &cj->parts[sort_j[pjd].i]; @@ -929,8 +932,13 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, p->x[1] * runner_shift[sid][1] + p->x[2] * runner_shift[sid][2]; if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > - 1.0e-6 * max(fabsf(d), cj->dx_max_sort)) - error("particle shift diff exceeds dx_max_sort."); + 1.0e-4 * max(fabsf(d), cj->dx_max_sort_old)) + error( + "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e " + "cj->dx_max_sort_old=%e", + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort, + cj->dx_max_sort_old); } #endif /* SWIFT_DEBUG_CHECKS */ @@ -1128,14 +1136,12 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { const int sid = space_getsid(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) - runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) - runner_do_sort(r, cj, (1 << sid), 1); - - /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid))) - error("Trying to interact unsorted cells."); + if (!(ci->sorted & (1 << sid)) || + ci->dx_max_sort_old > space_maxreldx * ci->dmin) + error("Interacting unsorted cells."); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort_old > space_maxreldx * cj->dmin) + error("Interacting unsorted cells."); #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \ (DOPAIR1_BRANCH == runner_dopair1_density_branch) @@ -1187,10 +1193,12 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { const int sid = space_getsid(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) - runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) - runner_do_sort(r, cj, (1 << sid), 1); + if (!(ci->sorted & (1 << sid)) || + ci->dx_max_sort_old > space_maxreldx * ci->dmin) + error("Interacting unsorted cells."); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort_old > space_maxreldx * cj->dmin) + error("Interacting unsorted cells."); /* Get the cutoff shift. */ double rshift = 0.0; @@ -1209,8 +1217,13 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { p->x[1] * runner_shift[sid][1] + p->x[2] * runner_shift[sid][2]; if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > - 1.0e-6 * max(fabsf(d), ci->dx_max_sort)) - error("particle shift diff exceeds dx_max_sort."); + 1.0e-4 * max(fabsf(d), ci->dx_max_sort_old)) + error( + "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " + "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e " + "ci->dx_max_sort_old=%e", + ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort, + ci->dx_max_sort_old); } for (int pjd = 0; pjd < cj->count; pjd++) { const struct part *p = &cj->parts[sort_j[pjd].i]; @@ -1218,8 +1231,13 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { p->x[1] * runner_shift[sid][1] + p->x[2] * runner_shift[sid][2]; if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > - 1.0e-6 * max(fabsf(d), cj->dx_max_sort)) - error("particle shift diff exceeds dx_max_sort."); + 1.0e-4 * max(fabsf(d), cj->dx_max_sort_old)) + error( + "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e " + "cj->dx_max_sort_old=%e", + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort, + cj->dx_max_sort_old); } #endif /* SWIFT_DEBUG_CHECKS */ @@ -2095,19 +2113,12 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; if (ci->count == 0 || cj->count == 0) return; - /* Get the cell dimensions. */ - const float h = min(ci->width[0], min(ci->width[1], ci->width[2])); - /* Get the type of pair if not specified explicitly. */ - // if ( sid < 0 ) double shift[3]; sid = space_getsid(s, &ci, &cj, shift); /* Recurse? */ - if (ci->split && cj->split && - max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max_sort + - cj->dx_max_sort < - h / 2) { + if (cell_can_recurse_in_pair_task(ci) && cell_can_recurse_in_pair_task(cj)) { /* Different types of flags. */ switch (sid) { @@ -2311,16 +2322,16 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_part_drifted(ci, e)) cell_drift_part(ci, e); - if (!cell_are_part_drifted(cj, e)) cell_drift_part(cj, e); + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells."); /* Do any of the cells need to be sorted first? */ if (!(ci->sorted & (1 << sid)) || - ci->dx_max_sort > ci->dmin * space_maxreldx) - runner_do_sort(r, ci, (1 << sid), 1); + ci->dx_max_sort_old > ci->dmin * space_maxreldx) + error("Interacting unsorted cell."); if (!(cj->sorted & (1 << sid)) || - cj->dx_max_sort > cj->dmin * space_maxreldx) - runner_do_sort(r, cj, (1 << sid), 1); + cj->dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cell."); /* Compute the interactions. */ DOPAIR1_BRANCH(r, ci, cj); @@ -2344,7 +2355,7 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { if (ci->count == 0 || !cell_is_active(ci, r->e)) return; /* Recurse? */ - if (ci->split) { + if (cell_can_recurse_in_self_task(ci)) { /* Loop over all progeny. */ for (int k = 0; k < 8; k++) @@ -2360,7 +2371,7 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { else { /* Drift the cell to the current timestep if needed. */ - if (!cell_are_part_drifted(ci, r->e)) cell_drift_part(ci, r->e); + if (!cell_are_part_drifted(ci, r->e)) error("Interacting undrifted cell."); #if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && \ defined(GADGET2_SPH) @@ -2397,19 +2408,12 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; if (ci->count == 0 || cj->count == 0) return; - /* Get the cell dimensions. */ - const float h = min(ci->width[0], min(ci->width[1], ci->width[2])); - /* Get the type of pair if not specified explicitly. */ - // if ( sid < 0 ) double shift[3]; sid = space_getsid(s, &ci, &cj, shift); /* Recurse? */ - if (ci->split && cj->split && - max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max_sort + - cj->dx_max_sort < - h / 2) { + if (cell_can_recurse_in_pair_task(ci) && cell_can_recurse_in_pair_task(cj)) { /* Different types of flags. */ switch (sid) { @@ -2613,16 +2617,16 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_part_drifted(ci, e)) cell_drift_part(ci, e); - if (!cell_are_part_drifted(cj, e)) cell_drift_part(cj, e); + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells."); /* Do any of the cells need to be sorted first? */ if (!(ci->sorted & (1 << sid)) || - ci->dx_max_sort > ci->dmin * space_maxreldx) - runner_do_sort(r, ci, (1 << sid), 1); + ci->dx_max_sort_old > ci->dmin * space_maxreldx) + error("Interacting unsorted cells."); if (!(cj->sorted & (1 << sid)) || - cj->dx_max_sort > cj->dmin * space_maxreldx) - runner_do_sort(r, cj, (1 << sid), 1); + cj->dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cells."); /* Compute the interactions. */ DOPAIR2(r, ci, cj); @@ -2646,7 +2650,7 @@ void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) { if (ci->count == 0 || !cell_is_active(ci, r->e)) return; /* Recurse? */ - if (ci->split) { + if (cell_can_recurse_in_self_task(ci)) { /* Loop over all progeny. */ for (int k = 0; k < 8; k++) @@ -2674,22 +2678,29 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, TIMER_TIC; + /* Should we even bother? */ + if (!cell_is_active(ci, e) && (cj == NULL || !cell_is_active(cj, e))) return; + if (ci->count == 0 || (cj != NULL && cj->count == 0)) return; + /* Find out in which sub-cell of ci the parts are. */ struct cell *sub = NULL; - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) { - if (&parts[ind[0]] >= &ci->progeny[k]->parts[0] && - &parts[ind[0]] < &ci->progeny[k]->parts[ci->progeny[k]->count]) { - sub = ci->progeny[k]; - break; + if (ci->split) { + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) { + if (&parts[ind[0]] >= &ci->progeny[k]->parts[0] && + &parts[ind[0]] < &ci->progeny[k]->parts[ci->progeny[k]->count]) { + sub = ci->progeny[k]; + break; + } } } + } /* Is this a single cell? */ if (cj == NULL) { /* Recurse? */ - if (ci->split) { + if (cell_can_recurse_in_self_task(ci)) { /* Loop over all progeny. */ DOSUB_SUBSET(r, sub, parts, ind, count, NULL, -1, 0); @@ -2708,14 +2719,9 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, /* Otherwise, it's a pair interaction. */ else { - /* Get the cell dimensions. */ - const float h = min(ci->width[0], min(ci->width[1], ci->width[2])); - /* Recurse? */ - if (ci->split && cj->split && - max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max_sort + - cj->dx_max_sort < - h / 2) { + if (cell_can_recurse_in_pair_task(ci) && + cell_can_recurse_in_pair_task(cj)) { /* Get the type of pair if not specified explicitly. */ double shift[3] = {0.0, 0.0, 0.0}; @@ -3226,26 +3232,8 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, /* Otherwise, compute the pair directly. */ else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { - /* Get the relative distance between the pairs, wrapping. */ - double shift[3] = {0.0, 0.0, 0.0}; - for (int k = 0; k < 3; k++) { - if (cj->loc[k] - ci->loc[k] < -s->dim[k] / 2) - shift[k] = s->dim[k]; - else if (cj->loc[k] - ci->loc[k] > s->dim[k] / 2) - shift[k] = -s->dim[k]; - } - - /* Get the sorting index. */ - int new_sid = 0; - for (int k = 0; k < 3; k++) - new_sid = 3 * new_sid + - ((cj->loc[k] - ci->loc[k] + shift[k] < 0) - ? 0 - : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1); - new_sid = sortlistID[new_sid]; - /* Do any of the cells need to be drifted first? */ - if (!cell_are_part_drifted(cj, e)) cell_drift_part(cj, e); + if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); DOPAIR_SUBSET(r, ci, parts, ind, count, cj); } diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index b05a150047d7b15de97e0b1f5ea9308bfb7f60f7..1f68163a3080a19bbcca722cf872cc99cd91fd38 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -642,8 +642,13 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, p->x[1] * runner_shift[sid][1] + p->x[2] * runner_shift[sid][2]; if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > - 1.0e-6 * max(fabsf(d), ci->dx_max_sort)) - error("particle shift diff exceeds dx_max_sort."); + 1.0e-4 * max(fabsf(d), ci->dx_max_sort_old)) + error( + "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " + "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e " + "ci->dx_max_sort_old=%e", + ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort, + ci->dx_max_sort_old); } for (int pjd = 0; pjd < cj->count; pjd++) { const struct part *p = &cj->parts[sort_j[pjd].i]; @@ -651,8 +656,13 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, p->x[1] * runner_shift[sid][1] + p->x[2] * runner_shift[sid][2]; if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > - 1.0e-6 * max(fabsf(d), cj->dx_max_sort)) - error("particle shift diff exceeds dx_max_sort."); + 1.0e-4 * max(fabsf(d), cj->dx_max_sort_old)) + error( + "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e " + "cj->dx_max_sort_old=%e", + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort, + cj->dx_max_sort_old); } #endif /* SWIFT_DEBUG_CHECKS */ diff --git a/src/scheduler.c b/src/scheduler.c index b07c403e4ecd960b22b51f24372ca0a3420a453f..cb232db0a74f9a25bb8b7f42eb62fb1e8cf4fceb 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -140,7 +140,6 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Get a handle on the cell involved. */ struct cell *ci = t->ci; - const double width = ci->dmin; /* Foreign task? */ if (ci->nodeID != s->nodeID) { @@ -149,7 +148,7 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { } /* Is this cell even split and the task does not violate h ? */ - if (ci->split && 2.f * kernel_gamma * ci->h_max * space_stretch < width) { + if (cell_can_split_self_task(ci)) { /* Make a sub? */ if (scheduler_dosub && /* Note division here to avoid overflow */ @@ -158,9 +157,6 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* convert to a self-subtask. */ t->type = task_type_sub_self; - /* Depend on local sorts on this cell. */ - if (ci->sorts != NULL) scheduler_addunlock(s, ci->sorts, t); - /* Otherwise, make tasks explicitly. */ } else { @@ -191,16 +187,6 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { } } /* Cell is split */ - /* Otherwise, make sure the self task has a drift task */ - else { - - lock_lock(&ci->lock); - - if (ci->drift_part == NULL) - ci->drift_part = scheduler_addtask(s, task_type_drift_part, - task_subtype_none, 0, 0, ci, NULL); - lock_unlock_blind(&ci->lock); - } } /* Self interaction */ /* Pair interaction? */ @@ -221,13 +207,8 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { double shift[3]; const int sid = space_getsid(s->space, &ci, &cj, shift); - const double width_i = ci->dmin; - const double width_j = cj->dmin; - /* Should this task be split-up? */ - if (ci->split && cj->split && - 2.f * kernel_gamma * space_stretch * ci->h_max < width_i && - 2.f * kernel_gamma * space_stretch * cj->h_max < width_j) { + if (cell_can_split_pair_task(ci) && cell_can_split_pair_task(cj)) { /* Replace by a single sub-task? */ if (scheduler_dosub && @@ -237,10 +218,6 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Make this task a sub task. */ t->type = task_type_sub_pair; - /* Depend on the sort tasks of both cells. */ - if (ci->sorts != NULL) scheduler_addunlock(s, ci->sorts, t); - if (cj->sorts != NULL) scheduler_addunlock(s, cj->sorts, t); - /* Otherwise, split it. */ } else { @@ -602,35 +579,6 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { scheduler_splittask_hydro(tl, s); tl->flags = space_getsid(s->space, &t->ci, &t->cj, shift); } - - /* Otherwise, if not spilt, stitch-up the sorting. */ - } else { - - /* Create the drift and sort for ci. */ - lock_lock(&ci->lock); - if (ci->drift_part == NULL && ci->nodeID == engine_rank) - ci->drift_part = scheduler_addtask(s, task_type_drift_part, - task_subtype_none, 0, 0, ci, NULL); - if (ci->sorts == NULL) - ci->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, - 1 << sid, 0, ci, NULL); - else - ci->sorts->flags |= (1 << sid); - lock_unlock_blind(&ci->lock); - scheduler_addunlock(s, ci->sorts, t); - - /* Create the drift and sort for cj. */ - lock_lock(&cj->lock); - if (cj->drift_part == NULL && cj->nodeID == engine_rank) - cj->drift_part = scheduler_addtask(s, task_type_drift_part, - task_subtype_none, 0, 0, cj, NULL); - if (cj->sorts == NULL) - cj->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, - 1 << sid, 0, cj, NULL); - else - cj->sorts->flags |= (1 << sid); - lock_unlock_blind(&cj->lock); - scheduler_addunlock(s, cj->sorts, t); } } /* pair interaction? */ } /* iterate over the current task. */ @@ -823,13 +771,14 @@ void scheduler_splittasks(struct scheduler *s) { * @param type The type of the task. * @param subtype The sub-type of the task. * @param flags The flags of the task. - * @param wait The number of unsatisfied dependencies of this task. + * @param implicit If true, only use this task to unlock dependencies, i.e. + * this task is never enqueued. * @param ci The first cell to interact. * @param cj The second cell to interact. */ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, - enum task_subtypes subtype, int flags, int wait, - struct cell *ci, struct cell *cj) { + enum task_subtypes subtype, int flags, + int implicit, struct cell *ci, struct cell *cj) { #ifdef SWIFT_DEBUG_CHECKS if (ci == NULL && cj != NULL) @@ -850,11 +799,11 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, t->type = type; t->subtype = subtype; t->flags = flags; - t->wait = wait; + t->wait = 0; t->ci = ci; t->cj = cj; t->skip = 1; /* Mark tasks as skip by default. */ - t->implicit = 0; + t->implicit = implicit; t->weight = 0; t->rank = 0; t->nr_unlock_tasks = 0; @@ -1184,11 +1133,6 @@ void scheduler_rewait_mapper(void *map_data, int num_elements, if (t->wait < 0) error("Task unlocked by more than %d tasks!", (1 << (8 * sizeof(t->wait) - 1)) - 1); - - /* Skip sort tasks that have already been performed */ - if (t->type == task_type_sort && t->flags == 0) { - error("Empty sort task encountered."); - } #endif /* Sets the waits of the dependances */ @@ -1277,14 +1221,14 @@ void scheduler_start(struct scheduler *s) { ci->ti_end_min); /* Special treatment for sort tasks */ - if (ci->ti_end_min == ti_current && t->skip && + /* if (ci->ti_end_min == ti_current && t->skip && t->type == task_type_sort && t->flags == 0) error( "Task (type='%s/%s') should not have been skipped " "ti_current=%lld " "c->ti_end_min=%lld t->flags=%d", taskID_names[t->type], subtaskID_names[t->subtype], ti_current, - ci->ti_end_min, t->flags); + ci->ti_end_min, t->flags); */ } else { /* pair */ @@ -1338,6 +1282,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { /* If this is an implicit task, just pretend it's done. */ if (t->implicit) { +#ifdef SWIFT_DEBUG_CHECKS + t->ti_run = s->space->e->ti_current; +#endif + t->skip = 1; for (int j = 0; j < t->nr_unlock_tasks; j++) { struct task *t2 = t->unlock_tasks[j]; if (atomic_dec(&t2->wait) == 1) scheduler_enqueue(s, t2); diff --git a/src/scheduler.h b/src/scheduler.h index 7bf9a40e7cec89eb25dfa6ce7a56912bf3a9e639..f38e5fb4d849842217756b7b93713a5e1375c9c5 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -133,8 +133,8 @@ void scheduler_reset(struct scheduler *s, int nr_tasks); void scheduler_ranktasks(struct scheduler *s); void scheduler_reweight(struct scheduler *s, int verbose); struct task *scheduler_addtask(struct scheduler *s, enum task_types type, - enum task_subtypes subtype, int flags, int wait, - struct cell *ci, struct cell *cj); + enum task_subtypes subtype, int flags, + int implicit, struct cell *ci, struct cell *cj); void scheduler_splittasks(struct scheduler *s); struct task *scheduler_done(struct scheduler *s, struct task *t); struct task *scheduler_unlock(struct scheduler *s, struct task *t); diff --git a/src/space.c b/src/space.c index 5d7e306ebe331d449daf607a3405712b4f42f4b1..d7aa9a32fd9c8e663a23b3287c26af81559652ce 100644 --- a/src/space.c +++ b/src/space.c @@ -214,6 +214,8 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->scount = 0; c->init_grav = NULL; c->extra_ghost = NULL; + c->ghost_in = NULL; + c->ghost_out = NULL; c->ghost = NULL; c->kick1 = NULL; c->kick2 = NULL; @@ -227,6 +229,10 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->grav_long_range = NULL; c->grav_down = NULL; c->super = c; + c->parts = NULL; + c->xparts = NULL; + c->gparts = NULL; + c->sparts = NULL; if (c->sort != NULL) { free(c->sort); c->sort = NULL; @@ -357,7 +363,7 @@ void space_regrid(struct space *s, int verbose) { /* Be verbose about this. */ #ifdef SWIFT_DEBUG_CHECKS - message("re)griding space cdim=(%d %d %d)", cdim[0], cdim[1], cdim[2]); + message("(re)griding space cdim=(%d %d %d)", cdim[0], cdim[1], cdim[2]); fflush(stdout); #endif @@ -499,7 +505,7 @@ void space_rebuild(struct space *s, int verbose) { /* Be verbose about this. */ #ifdef SWIFT_DEBUG_CHECKS - if (s->e->nodeID == 0 || verbose) message("re)building space"); + if (s->e->nodeID == 0 || verbose) message("(re)building space"); fflush(stdout); #endif @@ -910,14 +916,16 @@ void space_rebuild(struct space *s, int verbose) { c->ti_old_part = ti_old; c->ti_old_gpart = ti_old; c->ti_old_multipole = ti_old; - c->parts = finger; - c->xparts = xfinger; - c->gparts = gfinger; - c->sparts = sfinger; - finger = &finger[c->count]; - xfinger = &xfinger[c->count]; - gfinger = &gfinger[c->gcount]; - sfinger = &sfinger[c->scount]; + if (c->nodeID == engine_rank) { + c->parts = finger; + c->xparts = xfinger; + c->gparts = gfinger; + c->sparts = sfinger; + finger = &finger[c->count]; + xfinger = &xfinger[c->count]; + gfinger = &gfinger[c->gcount]; + sfinger = &sfinger[c->scount]; + } } // message( "hooking up cells took %.3f %s." , // clocks_from_ticks(getticks() - tic), clocks_getunit()); diff --git a/src/space.h b/src/space.h index 5c26f359e15b757b66519d958c595f670c5d6990..5d548b53b10ebfb75e6371ff78a163e8e71f4c15 100644 --- a/src/space.h +++ b/src/space.h @@ -30,13 +30,15 @@ #include <stddef.h> /* Includes. */ -#include "cell.h" #include "hydro_space.h" #include "lock.h" #include "parser.h" #include "part.h" #include "space.h" +/* Avoid cyclic inclusions */ +struct cell; + /* Some constants. */ #define space_cellallocchunk 1000 #define space_splitsize_default 400 diff --git a/tests/test125cells.c b/tests/test125cells.c index 0f3939c834cd3c552cea7882211321b62cc7d966..f76fb3f348f9b9a59994020305c215fd0a25faca 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -342,7 +342,6 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, cell->sorted = 0; cell->sort = NULL; - cell->sortsize = 0; return cell; } @@ -550,8 +549,7 @@ int main(int argc, char *argv[]) { prog_const.const_newton_G = 1.f; struct hydro_props hp; - hp.target_neighbours = pow_dimension(h) * kernel_norm; - hp.delta_neighbours = 4.; + hp.h_tolerance = 1e0; hp.h_max = FLT_MAX; hp.max_smoothing_iterations = 1; hp.CFL_condition = 0.1; @@ -618,7 +616,8 @@ int main(int argc, char *argv[]) { } /* First, sort stuff */ - for (int j = 0; j < 125; ++j) runner_do_sort(&runner, cells[j], 0x1FFF, 0); + for (int j = 0; j < 125; ++j) + runner_do_sort(&runner, cells[j], 0x1FFF, 0, 0); /* Do the density calculation */ #if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) diff --git a/tests/test27cells.c b/tests/test27cells.c index fd9d62756ad0e8461f60b2e452c6ca86135a8e89..dbaa2a02a7b6b456f3295c00e450bc02003a25ec 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -180,7 +180,6 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->sorted = 0; cell->sort = NULL; - cell->sortsize = 0; return cell; } @@ -423,7 +422,7 @@ int main(int argc, char *argv[]) { runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0); - runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0); + runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); } } } diff --git a/tests/testPair.c b/tests/testPair.c index 92068e286fce1570010cf83862a6cdc9a5ad4ffc..539f04425afd61c18de64702831504a2888cfe3d 100644 --- a/tests/testPair.c +++ b/tests/testPair.c @@ -122,7 +122,6 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->sorted = 0; cell->sort = NULL; - cell->sortsize = 0; return cell; } @@ -290,8 +289,8 @@ int main(int argc, char *argv[]) { for (size_t i = 0; i < type + 1; ++i) offset[i] = 1.; cj = make_cell(particles, offset, size, h, rho, &partId, perturbation); - runner_do_sort(&runner, ci, 0x1FFF, 0); - runner_do_sort(&runner, cj, 0x1FFF, 0); + runner_do_sort(&runner, ci, 0x1FFF, 0, 0); + runner_do_sort(&runner, cj, 0x1FFF, 0, 0); time = 0; for (size_t i = 0; i < runs; ++i) { diff --git a/tests/testPeriodicBC.c b/tests/testPeriodicBC.c index 19faf252745e18624f9ed5d2a3cbfc42098ef49e..6dbe27a64908e232cb9478dfd2e2cf41d908d463 100644 --- a/tests/testPeriodicBC.c +++ b/tests/testPeriodicBC.c @@ -179,7 +179,6 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->sorted = 0; cell->sort = NULL; - cell->sortsize = 0; return cell; } @@ -500,7 +499,7 @@ int main(int argc, char *argv[]) { runner_do_drift_part(&runner, cells[i * (dim * dim) + j * dim + k], 0); - runner_do_sort(&runner, cells[i * (dim * dim) + j * dim + k], 0x1FFF, + runner_do_sort(&runner, cells[i * (dim * dim) + j * dim + k], 0x1FFF, 0, 0); } } diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c index 014dacd1eb62040b03e6038b2c23183a24ec4850..c3df2ca6da5ee5043d46f0266ca7a5b580f230ae 100644 --- a/tests/testSPHStep.c +++ b/tests/testSPHStep.c @@ -82,7 +82,6 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { cell->sorted = 0; cell->sort = NULL; - cell->sortsize = 0; return cell; } diff --git a/tests/tolerance_27_normal.dat b/tests/tolerance_27_normal.dat index 3ec58173c9c5a0465382de9094b5d15b81b88a2a..2664972b165444249a05e5a3381995d37780f713 100644 --- a/tests/tolerance_27_normal.dat +++ b/tests/tolerance_27_normal.dat @@ -1,4 +1,4 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 4e-5 2e-4 2e-3 1e-5 6e-6 6e-6 6e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 4e-5 3e-4 1e-2 1e-5 6e-6 6e-6 6e-6 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-4 1e-4 1e-4 2e-4 1e-4 1e-4 1e-4 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 diff --git a/tests/tolerance_27_perturbed.dat b/tests/tolerance_27_perturbed.dat index ed7df2ca9f55dd6211ed6321ffc40faa37d98092..2bc26dc8ecf590ffa674f3df1535c548e4ecbfa9 100644 --- a/tests/tolerance_27_perturbed.dat +++ b/tests/tolerance_27_perturbed.dat @@ -1,4 +1,4 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 1e-4 2e-4 3e-3 1e-5 3e-6 3e-6 7e-6 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-5 1e-4 6e-5 2e-3 2e-3 2e-3 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 4e-4 1e-6 1e-6 1e-6 2e-6 2e-6 2e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 1e-4 2e-4 1e-2 1e-5 3e-6 3e-6 7e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-5 1e-3 6e-5 2e-3 2e-3 2e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 4e-4 1e-6 1e0 1e-6 2e-6 2e-6 2e-6 diff --git a/tests/tolerance_27_perturbed_h.dat b/tests/tolerance_27_perturbed_h.dat index 8a83dcb8c460a1bf673e1918e082de446248ec69..4b9ce9562d77ffceee38e217d965c252338e767e 100644 --- a/tests/tolerance_27_perturbed_h.dat +++ b/tests/tolerance_27_perturbed_h.dat @@ -1,4 +1,4 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 1e-4 2e-4 4e-3 1e-5 3e-6 3e-6 7e-6 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.4e-2 1e-5 1e-4 2.5e-4 3e-3 3e-3 3e-3 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 4e-6 4e-6 4e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 1e-4 4e-4 1.2e-2 1e-5 3e-6 3e-6 7e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.4e-2 1e-5 1e-3 2.5e-4 3e-3 3e-3 3e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e0 1e-6 4e-6 4e-6 4e-6 diff --git a/tests/tolerance_periodic_BC_normal.dat b/tests/tolerance_periodic_BC_normal.dat index ce24743d0668a049ac13c53ba5c921e7a46ed099..823e4af488b343f57e3c90e89ee2d4f13d3ca94b 100644 --- a/tests/tolerance_periodic_BC_normal.dat +++ b/tests/tolerance_periodic_BC_normal.dat @@ -1,4 +1,4 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 4e-6 4e-5 3e-4 3e-3 2e-4 2e-4 2e-4 2e-4 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 2e-4 1e-4 1e-4 6e-4 2e-3 2e-3 2e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 4e-6 4e-5 1e-3 1e-2 2e-4 2e-4 2e-4 2e-4 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 2e-4 1e-4 2e-4 6e-4 2e-3 2e-3 2e-3 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-4 1e-6 1e-4 5e-4 2e-4 2e-4 2e-4 diff --git a/tests/tolerance_periodic_BC_perturbed.dat b/tests/tolerance_periodic_BC_perturbed.dat index 78e573ee01811bfdc5c72f59b20f17fe00a54348..df5ee6458ba05eed08006586514467fcdb715990 100644 --- a/tests/tolerance_periodic_BC_perturbed.dat +++ b/tests/tolerance_periodic_BC_perturbed.dat @@ -1,4 +1,4 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-6 4e-5 2e-4 3e-3 2e-4 1e-4 1e-4 1e-4 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 6e-3 1e-4 1e-4 1e-2 6e-3 6e-3 6e-3 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-6 1e-4 5e-4 3e-3 3e-3 3e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-6 4e-5 1e-3 1e-2 2e-4 1e-4 1e-4 1e-4 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 6e-3 1e-4 3e-3 1e-2 6e-3 6e-3 6e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-6 1e0 5e-4 3e-3 3e-3 3e-3