diff --git a/examples/DwarfGalaxy/run.sh b/examples/DwarfGalaxy/run.sh index 4cc87880215a37c9eac59b19e584f4887cba2c38..ced1959de34882937a5aa37df4facc294db26050 100755 --- a/examples/DwarfGalaxy/run.sh +++ b/examples/DwarfGalaxy/run.sh @@ -7,5 +7,5 @@ then ./getIC.sh fi -../swift -b -G -s -S -t 8 dwarf_galaxy.yml 2>&1 | tee output.log +../swift -b -G -s -S -t 8 $@ dwarf_galaxy.yml 2>&1 | tee output.log diff --git a/src/active.h b/src/active.h index a3d2d5ad90c93b06bbd4853a2633d087304ee570..7aa8a145526e7edf65e40b1e489f034cf6eb6479 100644 --- a/src/active.h +++ b/src/active.h @@ -74,6 +74,20 @@ __attribute__((always_inline)) INLINE static int cell_are_gpart_drifted( return (c->grav.ti_old_part == e->ti_current); } +/** + * @brief Check that the #spart in a #cell have been drifted to the current + * time. + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if the #cell has been drifted to the current time, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_are_spart_drifted( + const struct cell *c, const struct engine *e) { + + return cell_are_gpart_drifted(c, e); +} + /* Are cells / particles active for regular tasks ? */ /** @@ -185,9 +199,16 @@ __attribute__((always_inline)) INLINE static int cell_is_all_active_gravity( __attribute__((always_inline)) INLINE static int cell_is_active_stars( const struct cell *c, const struct engine *e) { - // LOIC: Need star-specific implementation +#ifdef SWIFT_DEBUG_CHECKS + if (c->stars.ti_end_min < e->ti_current) + error( + "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and " + "e->ti_current=%lld (t=%e, a=%e)", + c->stars.ti_end_min, c->stars.ti_end_min * e->time_base, e->ti_current, + e->ti_current * e->time_base, e->cosmology->a); +#endif - return cell_is_active_gravity(c, e); + return (c->stars.ti_end_min == e->ti_current); } /** diff --git a/src/cell.c b/src/cell.c index 645dfbd3eaa94c4266243f19309c205824ce79f8..5515838ed8d8f31dcb80535ace6238b1beccf3f8 100644 --- a/src/cell.c +++ b/src/cell.c @@ -342,6 +342,7 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, temp->hydro.dx_max_part = 0.f; temp->hydro.dx_max_sort = 0.f; temp->stars.dx_max_part = 0.f; + temp->stars.dx_max_sort = 0.f; temp->nodeID = c->nodeID; temp->parent = c; c->progeny[k] = temp; @@ -1561,12 +1562,20 @@ void cell_check_multipole(struct cell *c) { */ void cell_clean(struct cell *c) { + /* Hydro */ for (int i = 0; i < 13; i++) if (c->hydro.sort[i] != NULL) { free(c->hydro.sort[i]); c->hydro.sort[i] = NULL; } + /* Stars */ + for (int i = 0; i < 13; i++) + if (c->stars.sort[i] != NULL) { + free(c->stars.sort[i]); + c->stars.sort[i] = NULL; + } + /* Recurse */ for (int k = 0; k < 8; k++) if (c->progeny[k]) cell_clean(c->progeny[k]); @@ -1724,6 +1733,68 @@ void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s) { } } +/** + * @brief Activate the sorts up a cell hierarchy. + */ +void cell_activate_stars_sorts_up(struct cell *c, struct scheduler *s) { + + if (c == c->super) { +#ifdef SWIFT_DEBUG_CHECKS + if (c->stars.sorts == NULL) + error("Trying to activate un-existing c->stars.sorts"); +#endif + scheduler_activate(s, c->stars.sorts); + if (c->nodeID == engine_rank) { + cell_activate_drift_part(c, s); + cell_activate_drift_spart(c, s); + } + } else { + + for (struct cell *parent = c->parent; + parent != NULL && !parent->stars.do_sub_sort; + parent = parent->parent) { + parent->stars.do_sub_sort = 1; + if (parent == c->super) { +#ifdef SWIFT_DEBUG_CHECKS + if (parent->stars.sorts == NULL) + error("Trying to activate un-existing parents->hydro.sorts"); +#endif + scheduler_activate(s, parent->stars.sorts); + if (parent->nodeID == engine_rank) { + cell_activate_drift_part(parent, s); + cell_activate_drift_spart(parent, s); + } + break; + } + } + } +} + +/** + * @brief Activate the sorts on a given cell, if needed. + */ +void cell_activate_stars_sorts(struct cell *c, int sid, struct scheduler *s) { + + /* Do we need to re-sort? */ + if (c->stars.dx_max_sort > space_maxreldx * c->dmin) { + + /* Climb up the tree to active the sorts in that direction */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + if (finger->stars.requires_sorts) { + atomic_or(&finger->stars.do_sort, finger->stars.requires_sorts); + cell_activate_stars_sorts_up(finger, s); + } + finger->stars.sorted = 0; + } + } + + /* Has this cell been sorted at all for the given sid? */ + if (!(c->stars.sorted & (1 << sid)) || c->nodeID != engine_rank) { + atomic_or(&c->stars.do_sort, (1 << sid)); + cell_activate_stars_sorts_up(c, s); + } +} + /** * @brief Traverse a sub-cell task and activate the hydro drift tasks that are * required by a hydro task @@ -2105,7 +2176,9 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, if (cj == NULL) { /* Do anything? */ - if (ci->stars.count == 0 || !cell_is_active_stars(ci, e)) return; + if (!cell_is_active_stars(ci, e) || ci->hydro.count == 0 || + ci->stars.count == 0) + return; /* Recurse? */ if (cell_can_recurse_in_self_stars_task(ci)) { @@ -2133,7 +2206,10 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, /* Should we even bother? */ if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; - if (ci->stars.count == 0 || cj->stars.count == 0) return; + + int should_do = ci->stars.count != 0 && cj->hydro.count != 0; + should_do |= cj->stars.count != 0 && ci->hydro.count != 0; + if (!should_do) return; /* Get the orientation of the pair. */ double shift[3]; @@ -2418,23 +2494,43 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, } /* Otherwise, activate the sorts and drifts. */ - else if (cell_is_active_stars(ci, e) || cell_is_active_stars(cj, e)) { + else { - /* We are going to interact this pair, so store some values. */ - atomic_or(&ci->hydro.requires_sorts, 1 << sid); - atomic_or(&cj->hydro.requires_sorts, 1 << sid); - ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; - cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + if (cell_is_active_stars(ci, e) && cj->hydro.count != 0 && + ci->stars.count != 0) { + /* We are going to interact this pair, so store some values. */ + atomic_or(&cj->hydro.requires_sorts, 1 << sid); + atomic_or(&ci->stars.requires_sorts, 1 << sid); - /* Activate the drifts if the cells are local. */ - if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); - if (ci->nodeID == engine_rank) cell_activate_drift_spart(ci, s); - if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); - if (cj->nodeID == engine_rank) cell_activate_drift_spart(cj, s); + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; - /* Do we need to sort the cells? */ - cell_activate_sorts(ci, sid, s); - cell_activate_sorts(cj, sid, s); + /* Activate the drifts if the cells are local. */ + if (ci->nodeID == engine_rank) cell_activate_drift_spart(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); + + /* Do we need to sort the cells? */ + cell_activate_sorts(cj, sid, s); + cell_activate_stars_sorts(ci, sid, s); + } + + if (cell_is_active_stars(cj, e) && ci->hydro.count != 0 && + cj->stars.count != 0) { + /* We are going to interact this pair, so store some values. */ + atomic_or(&cj->stars.requires_sorts, 1 << sid); + atomic_or(&ci->hydro.requires_sorts, 1 << sid); + + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; + + /* Activate the drifts if the cells are local. */ + if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_spart(cj, s); + + /* Do we need to sort the cells? */ + cell_activate_sorts(ci, sid, s); + cell_activate_stars_sorts(cj, sid, s); + } } } /* Otherwise, pair interation */ } @@ -2965,25 +3061,48 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { /* Activate drifts */ if (t->type == task_type_self) { - if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); - if (ci->nodeID == nodeID) cell_activate_drift_gpart(ci, s); + if (ci->nodeID == nodeID) { + cell_activate_drift_part(ci, s); + cell_activate_drift_spart(ci, s); + } } /* Set the correct sorting flags and activate hydro drifts */ else if (t->type == task_type_pair) { - /* Store some values. */ - atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + /* Do ci */ + /* stars for ci */ + atomic_or(&ci->stars.requires_sorts, 1 << t->flags); + ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; + + /* hydro for cj */ atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); - ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; /* Activate the drift tasks. */ - if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); + if (ci->nodeID == nodeID) cell_activate_drift_spart(ci, s); if (cj->nodeID == nodeID) cell_activate_drift_part(cj, s); /* Check the sorts and activate them if needed. */ - cell_activate_sorts(ci, t->flags, s); + cell_activate_stars_sorts(ci, t->flags, s); cell_activate_sorts(cj, t->flags, s); + + /* Do cj */ + /* hydro for ci */ + atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + + /* stars for cj */ + atomic_or(&cj->stars.requires_sorts, 1 << t->flags); + cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; + + /* Activate the drift tasks. */ + if (cj->nodeID == nodeID) cell_activate_drift_spart(cj, s); + if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_sorts(ci, t->flags, s); + cell_activate_stars_sorts(cj, t->flags, s); + } /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_pair || t->type == task_type_sub_self) { @@ -3402,6 +3521,7 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { struct spart *const sparts = c->stars.parts; float dx_max = 0.f, dx2_max = 0.f; + float dx_max_sort = 0.0f, dx2_max_sort = 0.f; float cell_h_max = 0.f; /* Drift irrespective of cell flags? */ @@ -3428,6 +3548,7 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { /* Update */ dx_max = max(dx_max, cp->stars.dx_max_part); + dx_max_sort = max(dx_max_sort, cp->stars.dx_max_sort); cell_h_max = max(cell_h_max, cp->stars.h_max); } } @@ -3435,6 +3556,7 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { /* Store the values */ c->stars.h_max = cell_h_max; c->stars.dx_max_part = dx_max; + c->stars.dx_max_sort = dx_max_sort; /* Update the time of the last drift */ c->grav.ti_old_part = ti_current; @@ -3528,6 +3650,12 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { sp->x_diff[2] * sp->x_diff[2]; dx2_max = max(dx2_max, dx2); + const float dx2_sort = sp->x_diff_sort[0] * sp->x_diff_sort[0] + + sp->x_diff_sort[1] * sp->x_diff_sort[1] + + sp->x_diff_sort[2] * sp->x_diff_sort[2]; + + dx2_max_sort = max(dx2_max_sort, dx2_sort); + /* Maximal smoothing length */ cell_h_max = max(cell_h_max, sp->h); @@ -3535,10 +3663,12 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { /* Now, get the maximal particle motion from its square */ dx_max = sqrtf(dx2_max); + dx_max_sort = sqrtf(dx2_max_sort); /* Store the values */ c->stars.h_max = cell_h_max; c->stars.dx_max_part = dx_max; + c->stars.dx_max_sort = dx_max_sort; /* Update the time of the last drift */ c->grav.ti_old_part = ti_current; @@ -3627,7 +3757,8 @@ void cell_drift_multipole(struct cell *c, const struct engine *e) { void cell_check_timesteps(struct cell *c) { #ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.ti_end_min == 0 && c->grav.ti_end_min == 0 && c->nr_tasks > 0) + if (c->hydro.ti_end_min == 0 && c->grav.ti_end_min == 0 && + c->stars.ti_end_min == 0 && c->nr_tasks > 0) error("Cell without assigned time-step"); if (c->split) { diff --git a/src/cell.h b/src/cell.h index bc3a21dbbdbb1e03b6d51a4363e0153a750340f8..b364364a65ffd9d7c83ab1fc52a1a9a0b1b10200 100644 --- a/src/cell.h +++ b/src/cell.h @@ -460,6 +460,30 @@ struct cell { /*! Values of dx_max before the drifts, used for sub-cell tasks. */ float dx_max_part_old; + /*! Maximum particle movement in this cell since the last sort. */ + float dx_max_sort; + + /*! Values of dx_max_sort before the drifts, used for sub-cell tasks. */ + float dx_max_sort_old; + + /*! Bit mask of sort directions that will be needed in the next timestep. */ + unsigned int requires_sorts; + + /*! Pointer for the sorted indices. */ + struct entry *sort[13]; + + /*! Bit-mask indicating the sorted directions */ + unsigned int sorted; + + /*! Bit mask of sorts that need to be computed for this cell. */ + unsigned int do_sort; + + /*! Do any of this cell's sub-cells need to be sorted? */ + char do_sub_sort; + + /*! Maximum end of (integer) time step in this cell for gravity tasks. */ + integertime_t ti_end_min; + /*! Dependency implicit task for the star ghost (in->ghost->out)*/ struct task *ghost_in; @@ -469,6 +493,9 @@ struct cell { /*! The star ghost task itself */ struct task *ghost; + /*! The task computing this cell's sorts. */ + struct task *sorts; + /*! Linked list of the tasks computing this cell's star density. */ struct link *density; @@ -484,6 +511,11 @@ struct cell { /*! Spin lock for various uses (#spart case). */ swift_lock_type lock; +#ifdef SWIFT_DEBUG_CHECKS + /*! Last (integer) time the cell's sort arrays were updated. */ + integertime_t ti_sort; +#endif + } stars; #ifdef WITH_MPI @@ -652,7 +684,9 @@ void cell_activate_subcell_external_grav_tasks(struct cell *ci, struct scheduler *s); void cell_activate_drift_part(struct cell *c, struct scheduler *s); void cell_activate_drift_gpart(struct cell *c, struct scheduler *s); +void cell_activate_drift_spart(struct cell *c, struct scheduler *s); void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s); +void cell_activate_stars_sorts(struct cell *c, int sid, struct scheduler *s); void cell_clear_drift_flags(struct cell *c, void *data); void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data); int cell_has_tasks(struct cell *c); diff --git a/src/drift.h b/src/drift.h index 351b15381dde9a95af908003b1c8df01b56610fa..a4bdf9be74aade4fe0f1349544cf472363c81c99 100644 --- a/src/drift.h +++ b/src/drift.h @@ -142,6 +142,7 @@ __attribute__((always_inline)) INLINE static void drift_spart( for (int k = 0; k < 3; k++) { const float dx = sp->v[k] * dt_drift; sp->x_diff[k] -= dx; + sp->x_diff_sort[k] -= dx; } } diff --git a/src/engine.c b/src/engine.c index 123c692edfbfbe6f6d8cef027998b43c0fa79c9d..cd1aed30abcc245c501f942e078dda46b80dd71d 100644 --- a/src/engine.c +++ b/src/engine.c @@ -128,6 +128,7 @@ struct end_of_step_data { size_t inhibited, g_inhibited, s_inhibited; integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; + integertime_t ti_stars_end_min; struct engine *e; }; @@ -1924,7 +1925,8 @@ int engine_estimate_nr_tasks(struct engine *e) { n1 += 2; } if (e->policy & engine_policy_stars) { - n1 += 2; + /* 1 self, 1 sort, 26/2 pairs */ + n1 += 15; } #if defined(WITH_LOGGER) n1 += 1; @@ -2214,6 +2216,7 @@ void engine_collect_end_of_step_recurse(struct cell *c, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; /* Collect the values from the progeny. */ for (int k = 0; k < 8; k++) { @@ -2233,6 +2236,8 @@ void engine_collect_end_of_step_recurse(struct cell *c, ti_gravity_end_max = max(ti_gravity_end_max, cp->grav.ti_end_max); ti_gravity_beg_max = max(ti_gravity_beg_max, cp->grav.ti_beg_max); + ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); + updated += cp->hydro.updated; g_updated += cp->grav.updated; s_updated += cp->stars.updated; @@ -2255,6 +2260,7 @@ void engine_collect_end_of_step_recurse(struct cell *c, c->grav.ti_end_min = ti_gravity_end_min; c->grav.ti_end_max = ti_gravity_end_max; c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.ti_end_min = ti_stars_end_min; c->hydro.updated = updated; c->grav.updated = g_updated; c->stars.updated = s_updated; @@ -2289,6 +2295,7 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &s->cells_top[local_cells[ind]]; @@ -2309,6 +2316,9 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, ti_gravity_end_max = max(ti_gravity_end_max, c->grav.ti_end_max); ti_gravity_beg_max = max(ti_gravity_beg_max, c->grav.ti_beg_max); + if (c->stars.ti_end_min > e->ti_current) + ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min); + updated += c->hydro.updated; g_updated += c->grav.updated; s_updated += c->stars.updated; @@ -2347,6 +2357,9 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, max(ti_gravity_end_max, data->ti_gravity_end_max); data->ti_gravity_beg_max = max(ti_gravity_beg_max, data->ti_gravity_beg_max); + + if (ti_stars_end_min > e->ti_current) + data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min); } if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space"); diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index 54a73513935cbd73db7362711bd992e669cbe7ca..5e26f65b0c7e777bdc2a2bdad38ea45eb97f4306 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -733,6 +733,10 @@ void engine_make_hierarchical_tasks_stars(struct engine *e, struct cell *c) { /* Are we in a super-cell ? */ if (c->super == c) { + /* Add the sort task. */ + c->stars.sorts = scheduler_addtask(s, task_type_stars_sort, + task_subtype_none, 0, 0, c, NULL); + /* Local tasks only... */ if (c->nodeID == e->nodeID) { @@ -1010,6 +1014,14 @@ void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, scheduler_addunlock(sched, t, finger->hydro.sorts); } + /* Link stars sort tasks to all the higher sort task. */ + if (t_type == task_type_stars_sort) { + for (struct cell *finger = t->ci->parent; finger != NULL; + finger = finger->parent) + if (finger->stars.sorts != NULL) + scheduler_addunlock(sched, t, finger->stars.sorts); + } + /* Link self tasks to cells. */ else if (t_type == task_type_self) { atomic_inc(&ci->nr_tasks); @@ -1576,6 +1588,11 @@ void engine_link_stars_tasks_mapper(void *map_data, int num_elements, for (int ind = 0; ind < num_elements; ind++) { struct task *t = &((struct task *)map_data)[ind]; + /* Sort tasks depend on the drift of the cell. */ + if (t->type == task_type_stars_sort && t->ci->nodeID == engine_rank) { + scheduler_addunlock(sched, t->ci->super->grav.drift, t); + } + /* Self-interaction? */ if (t->type == task_type_self && t->subtype == task_subtype_stars_density) { @@ -1596,13 +1613,18 @@ void engine_link_stars_tasks_mapper(void *map_data, int num_elements, t->subtype == task_subtype_stars_density) { /* Make all density tasks depend on the drift and the sorts. */ - if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->super->hydro.drift, t); - scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->hydro.drift, t); + scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); + + scheduler_addunlock(sched, t->ci->super->stars.sorts, t); + if (t->ci->super != t->cj->super) { - if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->super->hydro.drift, t); - scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + + scheduler_addunlock(sched, t->cj->super->stars.sorts, t); } /* Now, build all the dependencies for the stars for the cells */ @@ -1624,6 +1646,7 @@ void engine_link_stars_tasks_mapper(void *map_data, int num_elements, /* Make all density tasks depend on the drift and sorts. */ scheduler_addunlock(sched, t->ci->super->hydro.drift, t); scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + scheduler_addunlock(sched, t->ci->super->stars.sorts, t); /* Now, build all the dependencies for the stars for the cells */ /* that are local and are not descendant of the same super-cells */ @@ -1638,13 +1661,18 @@ void engine_link_stars_tasks_mapper(void *map_data, int num_elements, t->subtype == task_subtype_stars_density) { /* Make all density tasks depend on the drift. */ - if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->super->hydro.drift, t); - scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->hydro.drift, t); + scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); + + scheduler_addunlock(sched, t->ci->super->stars.sorts, t); + if (t->ci->super != t->cj->super) { - if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->super->hydro.drift, t); - scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + + scheduler_addunlock(sched, t->cj->super->stars.sorts, t); } /* Now, build all the dependencies for the stars for the cells */ @@ -1697,8 +1725,8 @@ void engine_make_starsloop_tasks_mapper(void *map_data, int num_elements, /* Get the cell */ struct cell *ci = &cells[cid]; - /* Skip cells without star particles */ - if (ci->stars.count == 0) continue; + /* Skip cells without particles */ + if (ci->stars.count == 0 && ci->hydro.count == 0) continue; /* If the cells is local build a self-interaction */ if (ci->nodeID == nodeID) @@ -1724,7 +1752,7 @@ void engine_make_starsloop_tasks_mapper(void *map_data, int num_elements, struct cell *cj = &cells[cjd]; /* Is that neighbour local and does it have particles ? */ - if (cid >= cjd || cj->hydro.count == 0 || + if (cid >= cjd || (cj->stars.count == 0 && cj->hydro.count == 0) || (ci->nodeID != nodeID && cj->nodeID != nodeID)) continue; @@ -1851,6 +1879,12 @@ void engine_maketasks(struct engine *e) { s->nr_cells, 1, 0, e); } + if (e->verbose) + message("Making stars tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + /* Add the self gravity tasks. */ if (e->policy & engine_policy_self_gravity) engine_make_self_gravity_tasks(e); @@ -1881,7 +1915,7 @@ void engine_maketasks(struct engine *e) { #endif const size_t self_grav_tasks_per_cell = 125; const size_t ext_grav_tasks_per_cell = 1; - const size_t stars_tasks_per_cell = 15; + const size_t stars_tasks_per_cell = 27; if (e->policy & engine_policy_hydro) e->size_links += s->tot_cells * hydro_tasks_per_cell; diff --git a/src/engine_marktasks.c b/src/engine_marktasks.c index 3d607bcf85fde93fc58581132e8b443a5b257fe4..11ec4b4d86f6330c5367a7a5a409c640d868b0d8 100644 --- a/src/engine_marktasks.c +++ b/src/engine_marktasks.c @@ -128,7 +128,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (cell_is_active_stars(ci, e)) { scheduler_activate(s, t); cell_activate_drift_part(ci, s); - cell_activate_drift_gpart(ci, s); + cell_activate_drift_spart(ci, s); } } @@ -195,9 +195,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, scheduler_activate(s, t); /* Set the correct sorting flags */ - if (t_type == task_type_pair && - (t_subtype == task_subtype_density || - t_subtype == task_subtype_stars_density)) { + if (t_type == task_type_pair && t_subtype == task_subtype_density) { /* Store some values. */ atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); @@ -217,8 +215,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Store current values of dx_max and h_max. */ else if (t_type == task_type_sub_pair && - (t_subtype == task_subtype_density || - t_subtype == task_subtype_stars_density)) { + t_subtype == task_subtype_density) { cell_activate_subcell_hydro_tasks(t->ci, t->cj, s); } } @@ -233,26 +230,40 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Set the correct sorting flags */ if (t_type == task_type_pair) { + /* Do ci */ /* Store some values. */ - atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); - ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + atomic_or(&ci->stars.requires_sorts, 1 << t->flags); + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; /* Activate the hydro drift tasks. */ - if (ci_nodeID == nodeID) { - cell_activate_drift_part(ci, s); - cell_activate_drift_gpart(ci, s); - } - if (cj_nodeID == nodeID) { - cell_activate_drift_part(cj, s); - cell_activate_drift_gpart(cj, s); - } + if (ci_nodeID == nodeID) cell_activate_drift_spart(ci, s); + + if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); /* Check the sorts and activate them if needed. */ - cell_activate_sorts(ci, t->flags, s); cell_activate_sorts(cj, t->flags, s); + cell_activate_stars_sorts(ci, t->flags, s); + + /* Do cj */ + /* Store some values. */ + atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + atomic_or(&cj->stars.requires_sorts, 1 << t->flags); + + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; + + /* Activate the hydro drift tasks. */ + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); + + if (cj_nodeID == nodeID) cell_activate_drift_spart(cj, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_sorts(ci, t->flags, s); + cell_activate_stars_sorts(cj, t->flags, s); } /* Store current values of dx_max and h_max. */ diff --git a/src/runner.c b/src/runner.c index 0026d833b98ec9f07bdec7f13d3d5bccf6a4b1e8..1b4a31dbd6771870020910817651cfb506e334c5 100644 --- a/src/runner.c +++ b/src/runner.c @@ -619,18 +619,25 @@ void runner_do_sort_ascending(struct entry *sort, int N) { * @param c The #cell to check. * @param flags The sorting flags to check. */ -void runner_check_sorts(struct cell *c, int flags) { - #ifdef SWIFT_DEBUG_CHECKS - if (flags & ~c->hydro.sorted) error("Inconsistent sort flags (downward)!"); - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) - runner_check_sorts(c->progeny[k], c->hydro.sorted); +#define RUNNER_CHECK_SORTS(TYPE) \ + void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ + \ + if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \ + if (c->split) \ + for (int k = 0; k < 8; k++) \ + if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0) \ + runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted); \ + } #else - error("Calling debugging code without debugging flag activated."); +#define RUNNER_CHECK_SORTS(TYPE) \ + void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ + error("Calling debugging code without debugging flag activated."); \ + } #endif -} + +RUNNER_CHECK_SORTS(hydro) +RUNNER_CHECK_SORTS(stars) /** * @brief Sort the particles in the given cell along all cardinal directions. @@ -643,8 +650,8 @@ void runner_check_sorts(struct cell *c, int flags) { * @param clock Flag indicating whether to record the timing or not, needed * for recursive calls. */ -void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, - int clock) { +void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, + int cleanup, int clock) { struct entry *fingers[8]; const int count = c->hydro.count; @@ -669,7 +676,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, #ifdef SWIFT_DEBUG_CHECKS /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts(c, c->hydro.sorted); + runner_check_sorts_hydro(c, c->hydro.sorted); /* Make sure the sort flags are consistent (upard). */ for (struct cell *finger = c->parent; finger != NULL; @@ -701,10 +708,10 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { /* Only propagate cleanup if the progeny is stale. */ - runner_do_sort(r, c->progeny[k], flags, - cleanup && (c->progeny[k]->hydro.dx_max_sort > - space_maxreldx * c->progeny[k]->dmin), - 0); + runner_do_hydro_sort(r, c->progeny[k], flags, + cleanup && (c->progeny[k]->hydro.dx_max_sort > + space_maxreldx * c->progeny[k]->dmin), + 0); dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort); dx_max_sort_old = max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old); @@ -838,7 +845,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, } /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts(c, flags); + runner_check_sorts_hydro(c, flags); /* Make sure the sort flags are consistent (upward). */ for (struct cell *finger = c->parent; finger != NULL; @@ -856,6 +863,224 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, if (clock) TIMER_TOC(timer_dosort); } +/** + * @brief Sort the stars particles in the given cell along all cardinal + * directions. + * + * @param r The #runner. + * @param c The #cell. + * @param flags Cell flag. + * @param cleanup If true, re-build the sorts for the selected flags instead + * of just adding them. + * @param clock Flag indicating whether to record the timing or not, needed + * for recursive calls. + */ +void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, + int cleanup, int clock) { + + struct entry *fingers[8]; + const int count = c->stars.count; + struct spart *sparts = c->stars.parts; + float buff[8]; + + TIMER_TIC; + + /* We need to do the local sorts plus whatever was requested further up. */ + flags |= c->stars.do_sort; + if (cleanup) { + c->stars.sorted = 0; + } else { + flags &= ~c->stars.sorted; + } + if (flags == 0 && !c->stars.do_sub_sort) return; + + /* Check that the particles have been moved to the current time */ + if (flags && !cell_are_spart_drifted(c, r->e)) + error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); + +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_stars(c, c->stars.sorted); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->stars.sorted & ~c->stars.sorted) + error("Inconsistent sort flags (upward)."); + } + + /* Update the sort timer which represents the last time the sorts + were re-set. */ + if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current; +#endif + + /* start by allocating the entry arrays in the requested dimensions. */ + for (int j = 0; j < 13; j++) { + if ((flags & (1 << j)) && c->stars.sort[j] == NULL) { + if ((c->stars.sort[j] = (struct entry *)malloc(sizeof(struct entry) * + (count + 1))) == NULL) + error("Failed to allocate sort memory."); + } + } + + /* Does this cell have any progeny? */ + if (c->split) { + + /* Fill in the gaps within the progeny. */ + float dx_max_sort = 0.0f; + float dx_max_sort_old = 0.0f; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { + /* Only propagate cleanup if the progeny is stale. */ + runner_do_stars_sort(r, c->progeny[k], flags, + cleanup && (c->progeny[k]->stars.dx_max_sort > + space_maxreldx * c->progeny[k]->dmin), + 0); + dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort); + dx_max_sort_old = + max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old); + } + } + c->stars.dx_max_sort = dx_max_sort; + c->stars.dx_max_sort_old = dx_max_sort_old; + + /* Loop over the 13 different sort arrays. */ + for (int j = 0; j < 13; j++) { + + /* Has this sort array been flagged? */ + if (!(flags & (1 << j))) continue; + + /* Init the particle index offsets. */ + int off[8]; + off[0] = 0; + for (int k = 1; k < 8; k++) + if (c->progeny[k - 1] != NULL) + off[k] = off[k - 1] + c->progeny[k - 1]->stars.count; + else + off[k] = off[k - 1]; + + /* Init the entries and indices. */ + int inds[8]; + for (int k = 0; k < 8; k++) { + inds[k] = k; + if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { + fingers[k] = c->progeny[k]->stars.sort[j]; + buff[k] = fingers[k]->d; + off[k] = off[k]; + } else + buff[k] = FLT_MAX; + } + + /* Sort the buffer. */ + for (int i = 0; i < 7; i++) + for (int k = i + 1; k < 8; k++) + if (buff[inds[k]] < buff[inds[i]]) { + int temp_i = inds[i]; + inds[i] = inds[k]; + inds[k] = temp_i; + } + + /* For each entry in the new sort list. */ + struct entry *finger = c->stars.sort[j]; + for (int ind = 0; ind < count; ind++) { + + /* Copy the minimum into the new sort array. */ + finger[ind].d = buff[inds[0]]; + finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; + + /* Update the buffer. */ + fingers[inds[0]] += 1; + buff[inds[0]] = fingers[inds[0]]->d; + + /* Find the smallest entry. */ + for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { + int temp_i = inds[k - 1]; + inds[k - 1] = inds[k]; + inds[k] = temp_i; + } + + } /* Merge. */ + + /* Add a sentinel. */ + c->stars.sort[j][count].d = FLT_MAX; + c->stars.sort[j][count].i = 0; + + /* Mark as sorted. */ + atomic_or(&c->stars.sorted, 1 << j); + + } /* loop over sort arrays. */ + + } /* progeny? */ + + /* Otherwise, just sort. */ + else { + + /* Reset the sort distance */ + if (c->stars.sorted == 0) { + + /* And the individual sort distances if we are a local cell */ + for (int k = 0; k < count; k++) { + sparts[k].x_diff_sort[0] = 0.0f; + sparts[k].x_diff_sort[1] = 0.0f; + sparts[k].x_diff_sort[2] = 0.0f; + } + c->stars.dx_max_sort_old = 0.f; + c->stars.dx_max_sort = 0.f; + } + + /* Fill the sort array. */ + for (int k = 0; k < count; k++) { + const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]}; + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->stars.sort[j][k].i = k; + c->stars.sort[j][k].d = px[0] * runner_shift[j][0] + + px[1] * runner_shift[j][1] + + px[2] * runner_shift[j][2]; + } + } + + /* Add the sentinel and sort. */ + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->stars.sort[j][count].d = FLT_MAX; + c->stars.sort[j][count].i = 0; + runner_do_sort_ascending(c->stars.sort[j], count); + atomic_or(&c->stars.sorted, 1 << j); + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify the sorting. */ + for (int j = 0; j < 13; j++) { + if (!(flags & (1 << j))) continue; + struct entry *finger = c->stars.sort[j]; + for (int k = 1; k < count; k++) { + if (finger[k].d < finger[k - 1].d) + error("Sorting failed, ascending array."); + if (finger[k].i >= count) error("Sorting failed, indices borked."); + } + } + + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_stars(c, flags); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->stars.sorted & ~c->stars.sorted) + error("Inconsistent sort flags."); + } +#endif + + /* Clear the cell's sort flags. */ + c->stars.do_sort = 0; + c->stars.do_sub_sort = 0; + c->stars.requires_sorts = 0; + + if (clock) TIMER_TOC(timer_do_stars_sort); +} + /** * @brief Initialize the multipoles before the gravity calculation. * @@ -1814,6 +2039,7 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; /* No children? */ if (!c->split) { @@ -1990,6 +2216,8 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); + ti_stars_end_min = min(ti_current + ti_new_step, ti_stars_end_min); + /* What is the next starting point for this cell ? */ ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); @@ -2006,6 +2234,8 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { ti_gravity_end_min = min(ti_end, ti_gravity_end_min); ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + ti_stars_end_min = min(ti_end, ti_stars_end_min); + const integertime_t ti_beg = get_integer_time_begin(ti_current + 1, sp->time_bin); @@ -2036,6 +2266,7 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); + ti_stars_end_min = min(cp->stars.ti_end_min, ti_stars_end_min); } } @@ -2052,6 +2283,7 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { c->grav.ti_end_min = ti_gravity_end_min; c->grav.ti_end_max = ti_gravity_end_max; c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.ti_end_min = ti_stars_end_min; #ifdef SWIFT_DEBUG_CHECKS if (c->hydro.ti_end_min == e->ti_current && @@ -2060,6 +2292,9 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { if (c->grav.ti_end_min == e->ti_current && c->grav.ti_end_min < max_nr_timesteps) error("End of next gravity step is current time!"); + if (c->stars.ti_end_min == e->ti_current && + c->stars.ti_end_min < max_nr_timesteps) + error("End of next stars step is current time!"); #endif if (timer) TIMER_TOC(timer_timestep); @@ -2576,12 +2811,18 @@ void *runner_main(void *data) { case task_type_sort: /* Cleanup only if any of the indices went stale. */ - runner_do_sort(r, ci, t->flags, - ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, - 1); + runner_do_hydro_sort( + r, ci, t->flags, + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, 1); /* Reset the sort flags as our work here is done. */ t->flags = 0; break; + case task_type_stars_sort: + /* Cleanup only if any of the indices went stale. */ + runner_do_stars_sort( + r, ci, t->flags, + ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin, 1); + break; case task_type_init_grav: runner_do_init_grav(r, ci, 1); break; diff --git a/src/runner.h b/src/runner.h index 29c42da405255c217d43cc905e87dde9b69276f9..6af0cd227374afd616b3329a8dbd527634902922 100644 --- a/src/runner.h +++ b/src/runner.h @@ -69,8 +69,10 @@ struct runner { /* Function prototypes. */ void runner_do_ghost(struct runner *r, struct cell *c, int timer); void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer); -void runner_do_sort(struct runner *r, struct cell *c, int flag, int cleanup, - int clock); +void runner_do_hydro_sort(struct runner *r, struct cell *c, int flag, + int cleanup, int clock); +void runner_do_stars_sort(struct runner *r, struct cell *c, int flag, + int cleanup, int clock); void runner_do_drift_part(struct runner *r, struct cell *c, int timer); void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer); void runner_do_kick1(struct runner *r, struct cell *c, int timer); diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h index e696e4fd10853536008d9d9fafc90e6475fd291a..b89c1a50409055b6cd1be4b8e560448bd30fc69c 100644 --- a/src/runner_doiact_stars.h +++ b/src/runner_doiact_stars.h @@ -17,6 +17,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ +#ifndef SWIFT_RUNNER_DOIACT_STARS_H +#define SWIFT_RUNNER_DOIACT_STARS_H #include "swift.h" @@ -35,6 +37,7 @@ void runner_doself_stars_density(struct runner *r, struct cell *c, int timer) { /* Anything to do here? */ if (!cell_is_active_stars(c, e)) return; + if (c->hydro.count == 0 && c->stars.count == 0) return; /* Cosmological terms */ const float a = cosmo->a; @@ -99,7 +102,7 @@ void runner_dosubpair_stars_density(struct runner *r, struct cell *restrict ci, const struct cosmology *cosmo = e->cosmology; /* Anything to do here? */ - if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; + if (!cell_is_active_stars(ci, e)) return; const int scount_i = ci->stars.count; const int count_j = cj->hydro.count; @@ -162,8 +165,10 @@ void runner_dopair_stars_density(struct runner *r, struct cell *restrict ci, TIMER_TIC; - runner_dosubpair_stars_density(r, ci, cj); - runner_dosubpair_stars_density(r, cj, ci); + if (ci->stars.count != 0 && cj->hydro.count != 0) + runner_dosubpair_stars_density(r, ci, cj); + if (cj->stars.count != 0 && ci->hydro.count != 0) + runner_dosubpair_stars_density(r, cj, ci); if (timer) TIMER_TOC(timer_dopair_stars_density); } @@ -382,7 +387,6 @@ void runner_dosub_subset_stars_density(struct runner *r, struct cell *ci, if (!cell_is_active_stars(ci, e) && (cj == NULL || !cell_is_active_stars(cj, e))) return; - if (ci->stars.count == 0 || (cj != NULL && cj->stars.count == 0)) return; /* Find out in which sub-cell of ci the parts are. */ struct cell *sub = NULL; @@ -969,6 +973,34 @@ void runner_doself_branch_stars_density(struct runner *r, struct cell *c) { runner_doself_stars_density(r, c, 1); } +#define RUNNER_STARS_CHECK_SORT(TYPE, PART) \ + void runner_stars_check_sort_##TYPE(struct cell *cj, struct cell *ci, \ + const int sid) { \ + const struct entry *restrict sort_j = cj->TYPE.sort[sid]; \ + \ + for (int pjd = 0; pjd < cj->TYPE.count; pjd++) { \ + const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i]; \ + const float d = p->x[0] * runner_shift[sid][0] + \ + p->x[1] * runner_shift[sid][1] + \ + p->x[2] * runner_shift[sid][2]; \ + if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ + 1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) && \ + (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ + cj->width[0] * 1.0e-10) \ + error( \ + "particle shift diff exceeds dx_max_sort in cell cj. " \ + "cj->nodeID=%d " \ + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE \ + ".dx_max_sort=%e " \ + "cj->" #TYPE ".dx_max_sort_old=%e", \ + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \ + cj->TYPE.dx_max_sort_old); \ + } \ + } + +RUNNER_STARS_CHECK_SORT(hydro, part) +RUNNER_STARS_CHECK_SORT(stars, spart) + /** * @brief Determine which version of DOPAIR1 needs to be called depending on the * orientation of the cells or whether DOPAIR1 needs to be called at all. @@ -982,64 +1014,54 @@ void runner_dopair_branch_stars_density(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; + const int ci_active = cell_is_active_stars(ci, e); + const int cj_active = cell_is_active_stars(cj, e); + const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 && ci_active); + const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 && cj_active); /* Anything to do here? */ - if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; - - /* Check that cells are drifted. */ - if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells."); + if (!do_ci && !do_cj) return; /* Get the sort ID. */ double shift[3] = {0.0, 0.0, 0.0}; const int sid = space_getsid(e->s, &ci, &cj, shift); + /* Check that cells are drifted. */ + if (do_ci && + (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) + error("Interacting undrifted cells."); + /* Have the cells been sorted? */ - if (!(ci->hydro.sorted & (1 << sid)) || - ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin) + if (do_ci && (!(ci->stars.sorted & (1 << sid)) || + ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin)) error("Interacting unsorted cells."); - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) + + if (do_ci && (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)) + error("Interacting unsorted cells."); + + if (do_cj && + (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e))) + error("Interacting undrifted cells."); + + /* Have the cells been sorted? */ + if (do_cj && (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin)) + error("Interacting unsorted cells."); + + if (do_cj && (!(cj->stars.sorted & (1 << sid)) || + cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin)) error("Interacting unsorted cells."); #ifdef SWIFT_DEBUG_CHECKS - /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->hydro.sort[sid]; - const struct entry *restrict sort_j = cj->hydro.sort[sid]; - - /* Check that the dx_max_sort values in the cell are indeed an upper - bound on particle movement. */ - for (int pid = 0; pid < ci->hydro.count; pid++) { - const struct part *p = &ci->hydro.parts[sort_i[pid].i]; - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > - 1.0e-4 * max(fabsf(d), ci->hydro.dx_max_sort_old) && - fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > - ci->width[0] * 1.0e-10) - error( - "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " - "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->hydro.dx_max_sort=%e " - "ci->hydro.dx_max_sort_old=%e", - ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->hydro.dx_max_sort, - ci->hydro.dx_max_sort_old); + if (do_ci) { + runner_stars_check_sort_hydro(cj, ci, sid); + runner_stars_check_sort_stars(ci, cj, sid); } - for (int pjd = 0; pjd < cj->hydro.count; pjd++) { - const struct part *p = &cj->hydro.parts[sort_j[pjd].i]; - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if ((fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort) > - 1.0e-4 * max(fabsf(d), cj->hydro.dx_max_sort_old) && - (fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort) > - cj->width[0] * 1.0e-10) - error( - "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->hydro.dx_max_sort=%e " - "cj->hydro.dx_max_sort_old=%e", - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->hydro.dx_max_sort, - cj->hydro.dx_max_sort_old); + + if (do_cj) { + runner_stars_check_sort_hydro(ci, cj, sid); + runner_stars_check_sort_stars(cj, ci, sid); } #endif /* SWIFT_DEBUG_CHECKS */ @@ -1067,8 +1089,11 @@ void runner_dosub_pair_stars_density(struct runner *r, struct cell *ci, TIMER_TIC; /* Should we even bother? */ - if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; - if (ci->stars.count == 0 || cj->stars.count == 0) return; + int should_do = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e); + should_do |= cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e); + if (!should_do) return; /* Get the type of pair if not specified explicitly. */ double shift[3]; @@ -1353,22 +1378,52 @@ void runner_dosub_pair_stars_density(struct runner *r, struct cell *ci, } /* Otherwise, compute the pair directly. */ - else if (cell_is_active_stars(ci, e) || cell_is_active_stars(cj, e)) { - - /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells."); - - /* Do any of the cells need to be sorted first? */ - if (!(ci->hydro.sorted & (1 << sid)) || - ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) - error("Interacting unsorted cell."); - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) - error("Interacting unsorted cell."); - - /* Compute the interactions. */ - runner_dopair_branch_stars_density(r, ci, cj); + else { + + const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e); + const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e); + + if (do_ci) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_spart_drifted(ci, e)) + error("Interacting undrifted cells (sparts)."); + + if (!cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells (parts)."); + + /* Do any of the cells need to be sorted first? */ + if (!(ci->stars.sorted & (1 << sid)) || + ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) + error("Interacting unsorted cell (sparts)."); + + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cell (parts)."); + } + + if (do_cj) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_part_drifted(ci, e)) + error("Interacting undrifted cells (parts)."); + + if (!cell_are_spart_drifted(cj, e)) + error("Interacting undrifted cells (sparts)."); + + /* Do any of the cells need to be sorted first? */ + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) + error("Interacting unsorted cell (parts)."); + + if (!(cj->stars.sorted & (1 << sid)) || + cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cell (sparts)."); + } + + if (do_ci || do_cj) runner_dopair_branch_stars_density(r, ci, cj); } if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); @@ -1387,7 +1442,9 @@ void runner_dosub_self_stars_density(struct runner *r, struct cell *ci, TIMER_TIC; /* Should we even bother? */ - if (ci->stars.count == 0 || !cell_is_active_stars(ci, r->e)) return; + if (ci->hydro.count == 0 || ci->stars.count == 0 || + !cell_is_active_stars(ci, r->e)) + return; /* Recurse? */ if (cell_can_recurse_in_self_stars_task(ci)) { @@ -1406,8 +1463,13 @@ void runner_dosub_self_stars_density(struct runner *r, struct cell *ci, /* Otherwise, compute self-interaction. */ else { + /* Drift the cell to the current timestep if needed. */ + if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell."); + runner_doself_branch_stars_density(r, ci); } if (gettimer) TIMER_TOC(timer_dosub_self_stars_density); } + +#endif // SWIFT_RUNNER_DOIACT_STARS_H diff --git a/src/scheduler.c b/src/scheduler.c index 28fb4146983ee40144ff30693453f7e3b27eac3b..f0f6ac6cb34ccbaf31069112b4a7f1955ba2c5ae 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -408,7 +408,7 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Reset the redo flag. */ redo = 0; - /* Non-splittable task? */ + /* Empty task? */ if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL) || t->ci->hydro.count == 0 || (t->cj != NULL && t->cj->hydro.count == 0)) { t->type = task_type_none; @@ -883,7 +883,7 @@ static void scheduler_splittask_stars(struct task *t, struct scheduler *s) { /* Reset the redo flag. */ redo = 0; - /* Non-splittable task? */ + /* Empty task? */ if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL) || t->ci->stars.count == 0 || (t->cj != NULL && t->cj->stars.count == 0)) { t->type = task_type_none; @@ -1835,6 +1835,11 @@ void scheduler_reweight(struct scheduler *s, int verbose) { (sizeof(int) * 8 - intrinsics_clz(t->ci->hydro.count)); break; + case task_type_stars_sort: + cost = wscale * intrinsics_popcount(t->flags) * scount_i * + (sizeof(int) * 8 - intrinsics_clz(t->ci->stars.count)); + break; + case task_type_self: if (t->subtype == task_subtype_grav) cost = 1.f * (wscale * gcount_i) * gcount_i; @@ -2124,6 +2129,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_kick2: case task_type_stars_ghost: case task_type_logger: + case task_type_stars_sort: case task_type_timestep: qid = t->ci->super->owner; break; diff --git a/src/space.c b/src/space.c index 983e1d2a933f717ca446300ae04b94e53c092262..e2bb6249bdcaa48110ab127b0705237588751c29 100644 --- a/src/space.c +++ b/src/space.c @@ -167,6 +167,7 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, space_recycle_list(s, cell_rec_begin, cell_rec_end, multipole_rec_begin, multipole_rec_end); c->hydro.sorts = NULL; + c->stars.sorts = NULL; c->nr_tasks = 0; c->grav.nr_mm_tasks = 0; c->hydro.density = NULL; @@ -177,7 +178,9 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->hydro.dx_max_part = 0.0f; c->hydro.dx_max_sort = 0.0f; c->stars.dx_max_part = 0.f; + c->stars.dx_max_sort = 0.f; c->hydro.sorted = 0; + c->stars.sorted = 0; c->hydro.count = 0; c->hydro.updated = 0; c->hydro.inhibited = 0; @@ -217,21 +220,28 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->grav.parts = NULL; c->stars.parts = NULL; c->hydro.do_sub_sort = 0; + c->stars.do_sub_sort = 0; c->grav.do_sub_drift = 0; c->hydro.do_sub_drift = 0; c->hydro.ti_end_min = -1; c->hydro.ti_end_max = -1; c->grav.ti_end_min = -1; c->grav.ti_end_max = -1; + c->stars.ti_end_min = -1; #ifdef SWIFT_DEBUG_CHECKS c->cellID = 0; #endif if (s->gravity) bzero(c->grav.multipole, sizeof(struct gravity_tensors)); - for (int i = 0; i < 13; i++) + for (int i = 0; i < 13; i++) { if (c->hydro.sort[i] != NULL) { free(c->hydro.sort[i]); c->hydro.sort[i] = NULL; } + if (c->stars.sort[i] != NULL) { + free(c->stars.sort[i]); + c->stars.sort[i] = NULL; + } + } #if WITH_MPI c->mpi.tag = -1; @@ -1836,11 +1846,16 @@ void space_gparts_sort(struct gpart *gparts, struct part *parts, */ void space_map_clearsort(struct cell *c, void *data) { - for (int i = 0; i < 13; i++) + for (int i = 0; i < 13; i++) { if (c->hydro.sort[i] != NULL) { free(c->hydro.sort[i]); c->hydro.sort[i] = NULL; } + if (c->stars.sort[i] != NULL) { + free(c->stars.sort[i]); + c->stars.sort[i] = NULL; + } + } } /** @@ -2011,6 +2026,7 @@ void space_split_recursive(struct space *s, struct cell *c, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; struct part *parts = c->hydro.parts; struct gpart *gparts = c->grav.parts; struct spart *sparts = c->stars.parts; @@ -2111,12 +2127,14 @@ void space_split_recursive(struct space *s, struct cell *c, cp->hydro.dx_max_sort = 0.f; cp->stars.h_max = 0.f; cp->stars.dx_max_part = 0.f; + cp->stars.dx_max_sort = 0.f; cp->nodeID = c->nodeID; cp->parent = c; cp->super = NULL; cp->hydro.super = NULL; cp->grav.super = NULL; cp->hydro.do_sub_sort = 0; + cp->stars.do_sub_sort = 0; cp->grav.do_sub_drift = 0; cp->hydro.do_sub_drift = 0; #ifdef WITH_MPI @@ -2166,6 +2184,7 @@ void space_split_recursive(struct space *s, struct cell *c, ti_gravity_end_min = min(ti_gravity_end_min, cp->grav.ti_end_min); ti_gravity_end_max = max(ti_gravity_end_max, cp->grav.ti_end_max); ti_gravity_beg_max = max(ti_gravity_beg_max, cp->grav.ti_beg_max); + ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); /* Increase the depth */ if (cp->maxdepth > maxdepth) maxdepth = cp->maxdepth; @@ -2292,6 +2311,7 @@ void space_split_recursive(struct space *s, struct cell *c, timebin_t hydro_time_bin_min = num_time_bins, hydro_time_bin_max = 0; timebin_t gravity_time_bin_min = num_time_bins, gravity_time_bin_max = 0; + timebin_t stars_time_bin_min = num_time_bins; /* parts: Get dt_min/dt_max and h_max. */ for (int k = 0; k < count; k++) { @@ -2329,6 +2349,8 @@ void space_split_recursive(struct space *s, struct cell *c, #endif gravity_time_bin_min = min(gravity_time_bin_min, sparts[k].time_bin); gravity_time_bin_max = max(gravity_time_bin_max, sparts[k].time_bin); + stars_time_bin_min = min(stars_time_bin_min, sparts[k].time_bin); + stars_h_max = max(stars_h_max, sparts[k].h); /* Reset x_diff */ @@ -2346,6 +2368,7 @@ void space_split_recursive(struct space *s, struct cell *c, ti_gravity_end_max = get_integer_time_end(ti_current, gravity_time_bin_max); ti_gravity_beg_max = get_integer_time_begin(ti_current + 1, gravity_time_bin_max); + ti_stars_end_min = get_integer_time_end(ti_current, stars_time_bin_min); /* Construct the multipole and the centre of mass*/ if (s->gravity) { @@ -2383,6 +2406,7 @@ void space_split_recursive(struct space *s, struct cell *c, c->grav.ti_end_min = ti_gravity_end_min; c->grav.ti_end_max = ti_gravity_end_max; c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.ti_end_min = ti_stars_end_min; c->stars.h_max = stars_h_max; c->maxdepth = maxdepth; @@ -2586,8 +2610,10 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { /* Init some things in the cell we just got. */ for (int j = 0; j < nr_cells; j++) { - for (int k = 0; k < 13; k++) + for (int k = 0; k < 13; k++) { if (cells[j]->hydro.sort[k] != NULL) free(cells[j]->hydro.sort[k]); + if (cells[j]->stars.sort[k] != NULL) free(cells[j]->stars.sort[k]); + } struct gravity_tensors *temp = cells[j]->grav.multipole; bzero(cells[j], sizeof(struct cell)); cells[j]->grav.multipole = temp; @@ -2608,11 +2634,16 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { void space_free_buff_sort_indices(struct space *s) { for (struct cell *finger = s->cells_sub; finger != NULL; finger = finger->next) { - for (int k = 0; k < 13; k++) + for (int k = 0; k < 13; k++) { if (finger->hydro.sort[k] != NULL) { free(finger->hydro.sort[k]); finger->hydro.sort[k] = NULL; } + if (finger->stars.sort[k] != NULL) { + free(finger->stars.sort[k]); + finger->stars.sort[k] = NULL; + } + } } } diff --git a/src/stars/Default/stars_part.h b/src/stars/Default/stars_part.h index 1922cc0e260a3c0f2052649a87252019514e637f..32006a15b67b9e97be7aac1a86ec45d369bcc1a0 100644 --- a/src/stars/Default/stars_part.h +++ b/src/stars/Default/stars_part.h @@ -41,6 +41,9 @@ struct spart { /* Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /* Offset between current position and position at last tree rebuild. */ + float x_diff_sort[3]; + /*! Particle velocity. */ float v[3]; diff --git a/src/task.c b/src/task.c index 9d4be3aaa5aaa074b19f7a22bddab99629ef9085..0916cba850f7b558c9ae0ac1684a8a5e052f7719 100644 --- a/src/task.c +++ b/src/task.c @@ -79,7 +79,8 @@ const char *taskID_names[task_type_count] = {"none", "logger", "stars_ghost_in", "stars_ghost", - "stars_ghost_out"}; + "stars_ghost_out", + "stars_sort"}; /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { @@ -148,6 +149,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_all; case task_type_stars_ghost: + case task_type_stars_sort: return task_action_spart; break; @@ -343,11 +345,17 @@ void task_unlock(struct task *t) { cell_gunlocktree(ci); break; + case task_type_stars_sort: + cell_sunlocktree(ci); + break; + case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { cell_gunlocktree(ci); cell_munlocktree(ci); + } else if (subtype == task_subtype_stars_density) { + cell_sunlocktree(ci); } else { cell_unlocktree(ci); } @@ -360,6 +368,9 @@ void task_unlock(struct task *t) { cell_gunlocktree(cj); cell_munlocktree(ci); cell_munlocktree(cj); + } else if (subtype == task_subtype_stars_density) { + cell_sunlocktree(ci); + cell_sunlocktree(cj); } else { cell_unlocktree(ci); cell_unlocktree(cj); @@ -441,6 +452,11 @@ int task_lock(struct task *t) { if (cell_locktree(ci) != 0) return 0; break; + case task_type_stars_sort: + if (ci->stars.hold) return 0; + if (cell_slocktree(ci) != 0) return 0; + break; + case task_type_drift_gpart: case task_type_grav_mesh: if (ci->grav.phold) return 0; @@ -458,7 +474,12 @@ int task_lock(struct task *t) { cell_gunlocktree(ci); return 0; } + + } else if (subtype == task_subtype_stars_density) { + if (ci->stars.hold) return 0; + if (cell_slocktree(ci) != 0) return 0; } else { + if (ci->hydro.hold) return 0; if (cell_locktree(ci) != 0) return 0; } break; @@ -482,6 +503,13 @@ int task_lock(struct task *t) { cell_munlocktree(ci); return 0; } + } else if (subtype == task_subtype_stars_density) { + if (ci->stars.hold || cj->stars.hold) return 0; + if (cell_slocktree(ci) != 0) return 0; + if (cell_slocktree(cj) != 0) { + cell_sunlocktree(ci); + return 0; + } } else { /* Lock the parts in both cells */ if (ci->hydro.hold || cj->hydro.hold) return 0; diff --git a/src/task.h b/src/task.h index 64b611534ed10e3c34b628e2c733a508e5d42a1c..994b2b14c05965b71e877feac5cb9827a1d1b4bb 100644 --- a/src/task.h +++ b/src/task.h @@ -72,6 +72,7 @@ enum task_types { task_type_stars_ghost_in, task_type_stars_ghost, task_type_stars_ghost_out, + task_type_stars_sort, task_type_count } __attribute__((packed)); diff --git a/src/timers.c b/src/timers.c index 898c833c3b6764f05ed9205efa0db6220e911a7e..2a02bbbacfedecfaa327f71cdb0b6b1145647082 100644 --- a/src/timers.c +++ b/src/timers.c @@ -35,67 +35,66 @@ ticks timers[timer_count]; /* Timer names. */ -const char* timers_names[timer_count] = { - "none", - "prepare", - "init", - "init_grav", - "drift_part", - "drift_gpart", - "kick1", - "kick2", - "timestep", - "endforce", - "dosort", - "doself_density", - "doself_gradient", - "doself_force", - "doself_grav_pp", - "dopair_density", - "dopair_gradient", - "dopair_force", - "dopair_grav_mm", - "dopair_grav_pp", - "dograv_external", - "dograv_down", - "dograv_mesh", - "dograv_top_level", - "dograv_long_range", - "dosource", - "dosub_self_density", - "dosub_self_gradient", - "dosub_self_force", - "dosub_self_grav", - "dosub_pair_density", - "dosub_pair_gradient", - "dosub_pair_force", - "dosub_pair_grav", - "doself_subset", - "dopair_subset", - "dopair_subset_naive", - "dosub_subset", - "do_ghost", - "do_extra_ghost", - "dorecv_part", - "dorecv_gpart", - "dorecv_spart", - "do_cooling", - "do_star_formation", - "gettask", - "qget", - "qsteal", - "locktree", - "runners", - "step", - "doself_stars_density", - "dopair_stars_density", - "do_stars_ghost", - "doself_subset_stars_density", - "dopair_subset_stars_density", - "dosubpair_stars_density", - "dosub_self_stars_density", - "logger", -}; +const char* timers_names[timer_count] = {"none", + "prepare", + "init", + "init_grav", + "drift_part", + "drift_gpart", + "kick1", + "kick2", + "timestep", + "endforce", + "dosort", + "doself_density", + "doself_gradient", + "doself_force", + "doself_grav_pp", + "dopair_density", + "dopair_gradient", + "dopair_force", + "dopair_grav_mm", + "dopair_grav_pp", + "dograv_external", + "dograv_down", + "dograv_mesh", + "dograv_top_level", + "dograv_long_range", + "dosource", + "dosub_self_density", + "dosub_self_gradient", + "dosub_self_force", + "dosub_self_grav", + "dosub_pair_density", + "dosub_pair_gradient", + "dosub_pair_force", + "dosub_pair_grav", + "doself_subset", + "dopair_subset", + "dopair_subset_naive", + "dosub_subset", + "do_ghost", + "do_extra_ghost", + "dorecv_part", + "dorecv_gpart", + "dorecv_spart", + "do_cooling", + "do_star_formation", + "gettask", + "qget", + "qsteal", + "locktree", + "runners", + "step", + "doself_stars_density", + "dopair_stars_density", + "do_stars_ghost", + "doself_subset_stars_density", + "dopair_subset_stars_density", + "dosubpair_stars_density", + "dosub_self_stars_density", + "logger", + "do_stars_sort"}; /* File to store the timers */ static FILE* timers_file; diff --git a/src/timers.h b/src/timers.h index c43f0154d2aaaa9d1c5aed3ad51b912e4fc5d751..1b11d6e0884b988958e7c491878b317887c745ad 100644 --- a/src/timers.h +++ b/src/timers.h @@ -96,6 +96,7 @@ enum { timer_dosubpair_stars_density, timer_dosub_self_stars_density, timer_logger, + timer_do_stars_sort, timer_count, }; diff --git a/tests/test125cells.c b/tests/test125cells.c index cfe7e18b07b30c309531642a769241841de3a593..5a9c4ea9511b5d75a3098f7997b83607cdcbd715 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -663,7 +663,7 @@ int main(int argc, char *argv[]) { /* First, sort stuff */ for (int j = 0; j < 125; ++j) - runner_do_sort(&runner, cells[j], 0x1FFF, 0, 0); + runner_do_hydro_sort(&runner, cells[j], 0x1FFF, 0, 0); /* Do the density calculation */ diff --git a/tests/test27cells.c b/tests/test27cells.c index 6930638efe28aafab1c614e8ce71b353eb0c5b8f..d100e2e30f0bb1452b3366ddde51dbae0575d67a 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -492,7 +492,7 @@ int main(int argc, char *argv[]) { runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0); - runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); + runner_do_hydro_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); } } } diff --git a/tests/test27cellsStars.c b/tests/test27cellsStars.c index 3875bf75b1bb315bf48acae13b9553c689416a18..0377fc49edfedc8b1d9ce0630821622117187c9b 100644 --- a/tests/test27cellsStars.c +++ b/tests/test27cellsStars.c @@ -164,6 +164,7 @@ struct cell *make_cell(size_t n, size_t n_stars, double *offset, double size, cell->stars.count = scount; cell->hydro.dx_max_part = 0.; cell->hydro.dx_max_sort = 0.; + cell->stars.dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; cell->width[2] = size; @@ -174,8 +175,10 @@ struct cell *make_cell(size_t n, size_t n_stars, double *offset, double size, cell->hydro.ti_old_part = 8; cell->hydro.ti_end_min = 8; cell->hydro.ti_end_max = 8; + cell->grav.ti_old_part = 8; cell->grav.ti_end_min = 8; cell->grav.ti_end_max = 8; + cell->stars.ti_end_min = 8; cell->nodeID = NODE_ID; shuffle_particles(cell->hydro.parts, cell->hydro.count); @@ -414,7 +417,8 @@ int main(int argc, char *argv[]) { runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0); - runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); + runner_do_hydro_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); + runner_do_stars_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0); } } } diff --git a/tests/testActivePair.c b/tests/testActivePair.c index a4eb04330fba7c984333fc7c705235223810ec4d..402a9a7ac416a0b2651f628eb32988d8ad62a14f 100644 --- a/tests/testActivePair.c +++ b/tests/testActivePair.c @@ -298,8 +298,8 @@ void test_pair_interactions(struct runner *runner, struct cell **ci, interaction_func vec_interaction, init_func init, finalise_func finalise) { - runner_do_sort(runner, *ci, 0x1FFF, 0, 0); - runner_do_sort(runner, *cj, 0x1FFF, 0, 0); + runner_do_hydro_sort(runner, *ci, 0x1FFF, 0, 0); + runner_do_hydro_sort(runner, *cj, 0x1FFF, 0, 0); /* Zero the fields */ init(*ci, runner->e->cosmology, runner->e->hydro_properties); diff --git a/tests/testPeriodicBC.c b/tests/testPeriodicBC.c index e4a5a85845f92bfbecae85d9dc9453b38b9b7646..f49a59c650b46f4de902b050b5248b01e971bfbb 100644 --- a/tests/testPeriodicBC.c +++ b/tests/testPeriodicBC.c @@ -505,8 +505,8 @@ int main(int argc, char *argv[]) { runner_do_drift_part(&runner, cells[i * (dim * dim) + j * dim + k], 0); - runner_do_sort(&runner, cells[i * (dim * dim) + j * dim + k], 0x1FFF, 0, - 0); + runner_do_hydro_sort(&runner, cells[i * (dim * dim) + j * dim + k], + 0x1FFF, 0, 0); } } }