diff --git a/src/cell.h b/src/cell.h index b282a094da7cd53de931638be18acc3e31d565b1..42fbd8c41ffbffa55974157940fa2bc8c6c63c2e 100644 --- a/src/cell.h +++ b/src/cell.h @@ -291,6 +291,9 @@ struct cell { /*! The star ghost task itself */ struct task *star_ghost; + /*! Linked list of the tasks computing this cell's star density. */ + struct link *star_density; + /*! Task for cooling */ struct task *cooling; diff --git a/src/engine.c b/src/engine.c index c6629f0abeb5add982e6ea287de7a8790389dcce..fb626fac9cc49b6c62a6858b39aa09a2b000b600 100644 --- a/src/engine.c +++ b/src/engine.c @@ -2657,6 +2657,85 @@ void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, } } +/** + * @brief Constructs the top-level pair tasks for the star loop over + * neighbours + * + * Here we construct all the tasks for all possible neighbouring non-empty + * local cells in the hierarchy. No dependencies are being added thus far. + * Additional loop over neighbours can later be added by simply duplicating + * all the tasks created by this function. + * + * @param map_data Offset of first two indices disguised as a pointer. + * @param num_elements Number of cells to traverse. + * @param extra_data The #engine. + */ +void engine_make_starloop_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + + /* Extract the engine pointer. */ + struct engine *e = (struct engine *)extra_data; + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + const int *cdim = s->cdim; + struct cell *cells = s->cells_top; + + /* Loop through the elements, which are just byte offsets from NULL. */ + for (int ind = 0; ind < num_elements; ind++) { + + /* Get the cell index. */ + const int cid = (size_t)(map_data) + ind; + const int i = cid / (cdim[1] * cdim[2]); + const int j = (cid / cdim[2]) % cdim[1]; + const int k = cid % cdim[2]; + + /* Get the cell */ + struct cell *ci = &cells[cid]; + + /* Skip cells without star particles */ + if (ci->scount == 0) continue; + + /* If the cells is local build a self-interaction */ + if (ci->nodeID == nodeID) + scheduler_addtask(sched, task_type_self, task_subtype_star_density, 0, 0, ci, + NULL); + + /* Now loop over all the neighbours of this cell */ + for (int ii = -1; ii < 2; ii++) { + int iii = i + ii; + if (!s->periodic && (iii < 0 || iii >= cdim[0])) continue; + iii = (iii + cdim[0]) % cdim[0]; + for (int jj = -1; jj < 2; jj++) { + int jjj = j + jj; + if (!s->periodic && (jjj < 0 || jjj >= cdim[1])) continue; + jjj = (jjj + cdim[1]) % cdim[1]; + for (int kk = -1; kk < 2; kk++) { + int kkk = k + kk; + if (!s->periodic && (kkk < 0 || kkk >= cdim[2])) continue; + kkk = (kkk + cdim[2]) % cdim[2]; + + /* Get the neighbouring cell */ + const int cjd = cell_getid(cdim, iii, jjj, kkk); + struct cell *cj = &cells[cjd]; + + /* Is that neighbour local and does it have particles ? */ + if (cid >= cjd || cj->scount == 0 || + (ci->nodeID != nodeID && cj->nodeID != nodeID)) + continue; + + /* Construct the pair task */ + const int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; + scheduler_addtask(sched, task_type_pair, task_subtype_star_density, sid, 0, + ci, cj); + } + } + } + } +} + + /** * @brief Counts the tasks associated with one cell and constructs the links * @@ -2696,6 +2775,9 @@ void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, } else if (t_subtype == task_subtype_external_grav) { engine_addlink(e, &ci->grav, t); } + if (t->subtype == task_subtype_star_density) { + engine_addlink(e, &ci->star_density, t); + } /* Link pair tasks to cells. */ } else if (t_type == task_type_pair) { @@ -2714,6 +2796,10 @@ void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, error("Found a pair/external-gravity task..."); } #endif + if (t->subtype == task_subtype_star_density) { + engine_addlink(e, &ci->star_density, t); + engine_addlink(e, &cj->star_density, t); + } /* Link sub-self tasks to cells. */ } else if (t_type == task_type_sub_self) { @@ -2726,6 +2812,9 @@ void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, } else if (t_subtype == task_subtype_external_grav) { engine_addlink(e, &ci->grav, t); } + if (t->subtype == task_subtype_star_density) { + engine_addlink(e, &ci->star_density, t); + } /* Link sub-pair tasks to cells. */ } else if (t_type == task_type_sub_pair) { @@ -2739,8 +2828,16 @@ void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, engine_addlink(e, &ci->grav, t); engine_addlink(e, &cj->grav, t); } +<<<<<<< HEAD #ifdef SWIFT_DEBUG_CHECKS else if (t_subtype == task_subtype_external_grav) { +======= + if (t->subtype == task_subtype_star_density) { + engine_addlink(e, &ci->star_density, t); + engine_addlink(e, &cj->star_density, t); + } + if (t->subtype == task_subtype_external_grav) { +>>>>>>> star smoothing length close to be implemented error("Found a sub-pair/external-gravity task..."); } #endif @@ -2954,7 +3051,7 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, * @param density The density task to link. * @param c The cell. */ -static inline void engine_make_stars_loops_dependencies(struct scheduler *sched, +static inline void engine_make_star_loops_dependencies(struct scheduler *sched, struct task *density, struct cell *c) { /* density loop --> ghost */ @@ -3213,6 +3310,111 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, } } +/** + * @brief Duplicates the first hydro loop and construct all the + * dependencies for the hydro part + * + * This is done by looping over all the previously constructed tasks + * and adding another task involving the same cells but this time + * corresponding to the second hydro loop over neighbours. + * With all the relevant tasks for a given cell available, we construct + * all the dependencies for that cell. + */ +void engine_link_star_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &((struct task *)map_data)[ind]; + + /* Self-interaction? */ + if (t->type == task_type_self && t->subtype == task_subtype_star_density) { + + /* Make the self-density tasks depend on the drift only. */ + scheduler_addunlock(sched, t->ci->super->drift_part, t); + + /* Now, build all the dependencies for the hydro */ + engine_make_star_loops_dependencies(sched, t, t->ci); + scheduler_addunlock(sched, t->ci->star_ghost_out, t->ci->super->end_force); + } + + /* Otherwise, pair interaction? */ + else if (t->type == task_type_pair && t->subtype == task_subtype_star_density) { + + /* Make all density tasks depend on the drift and the sorts. */ + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super->sorts, t); + if (t->ci->super != t->cj->super) { + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->drift_part, t); + scheduler_addunlock(sched, t->cj->super->sorts, t); + } + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super_hydro-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_star_loops_dependencies(sched, t, t->ci); + } + if (t->cj->nodeID == nodeID) { + if (t->ci->super_hydro != t->cj->super) + engine_make_star_loops_dependencies(sched, t, t->cj); + if (t->ci->super != t->cj->super) + scheduler_addunlock(sched, t, t->cj->super->end_force); + } + + } + + /* Otherwise, sub-self interaction? */ + else if (t->type == task_type_sub_self && + t->subtype == task_subtype_star_density) { + + /* Make all density tasks depend on the drift and sorts. */ + scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super->sorts, t); + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super_hydro-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_star_loops_dependencies(sched, t, t->ci); + scheduler_addunlock(sched, t, t->ci->super->end_force); + } else + error("oo"); + } + + /* Otherwise, sub-pair interaction? */ + else if (t->type == task_type_sub_pair && + t->subtype == task_subtype_star_density) { + + /* Make all density tasks depend on the drift. */ + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super->sorts, t); + if (t->ci->super != t->cj->super) { + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->super->drift_part, t); + scheduler_addunlock(sched, t->cj->super->sorts, t); + } + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super_hydro-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_star_loops_dependencies(sched, t, t->ci); + scheduler_addunlock(sched, t, t->ci->super->end_force); + } + if (t->cj->nodeID == nodeID) { + if (t->ci->super != t->cj->super) + engine_make_star_loops_dependencies(sched, t, t->cj); + if (t->ci->super != t->cj->super) + scheduler_addunlock(sched, t, t->cj->super->end_force); + } + } + } +} + /** * @brief Fill the #space's task list. * @@ -3225,14 +3427,19 @@ void engine_maketasks(struct engine *e) { struct cell *cells = s->cells_top; const int nr_cells = s->nr_cells; const ticks tic = getticks(); - - /* Re-set the scheduler. */ + + /* Re-set the scheduler. */ scheduler_reset(sched, engine_estimate_nr_tasks(e)); +<<<<<<< HEAD ticks tic2 = getticks(); /* Construct the firt hydro loop over neighbours */ if (e->policy & engine_policy_hydro) +======= + /* Construct the first hydro loop over neighbours */ + if (e->policy & engine_policy_hydro) { +>>>>>>> star smoothing length close to be implemented threadpool_map(&e->threadpool, engine_make_hydroloop_tasks_mapper, NULL, s->nr_cells, 1, 0, e); @@ -3242,6 +3449,12 @@ void engine_maketasks(struct engine *e) { tic2 = getticks(); + /* Construct the star hydro loop over neighbours */ + if (e->policy & engine_policy_stars) { + threadpool_map(&e->threadpool, engine_make_starloop_tasks_mapper, NULL, + s->nr_cells, 1, 0, e); + } + /* Add the self gravity tasks. */ if (e->policy & engine_policy_self_gravity) engine_make_self_gravity_tasks(e); @@ -3358,11 +3571,19 @@ void engine_maketasks(struct engine *e) { if (e->policy & (engine_policy_self_gravity | engine_policy_external_gravity)) engine_link_gravity_tasks(e); +<<<<<<< HEAD if (e->verbose) message("Linking gravity tasks took %.3f %s.", clocks_from_ticks(getticks() - tic2), clocks_getunit()); +======= + if (e->policy & engine_policy_stars) + threadpool_map(&e->threadpool, engine_link_star_tasks_mapper, sched->tasks, + sched->nr_tasks, sizeof(struct task), 0, e); +>>>>>>> star smoothing length close to be implemented #ifdef WITH_MPI + if (e->policy & engine_policy_stars) + error("Cannot run stars with MPI"); /* Add the communication tasks if MPI is being used. */ if (e->policy & engine_policy_mpi) { @@ -3482,7 +3703,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (ci->nodeID != engine_rank) error("Non-local self task found"); - /* Activate the hydro drift */ + /* Activate the hydro drift */ if (t->type == task_type_self && t->subtype == task_subtype_density) { if (cell_is_active_hydro(ci, e)) { scheduler_activate(s, t); @@ -3520,6 +3741,24 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } #endif + /* Activate the star density */ + else if (t->type == task_type_self && + t->subtype == task_subtype_star_density) { + if (cell_is_active_star(ci, e)) { + scheduler_activate(s, t); + cell_activate_drift_part(ci, s); + } + } + + /* Store current values of dx_max and h_max. */ + else if (t->type == task_type_sub_self && + t->subtype == task_subtype_star_density) { + if (cell_is_active_star(ci, e)) { + scheduler_activate(s, t); + cell_activate_subcell_hydro_tasks(ci, NULL, s); + } + } + /* Activate the gravity drift */ else if (t->type == task_type_self && t->subtype == task_subtype_grav) { if (cell_is_active_gravity(ci, e)) { @@ -3558,14 +3797,16 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Only activate tasks that involve a local active cell. */ if ((t->subtype == task_subtype_density || t->subtype == task_subtype_gradient || - t->subtype == task_subtype_force) && + t->subtype == task_subtype_force || + t->subtype == task_subtype_star_density) && ((ci_active_hydro && ci->nodeID == engine_rank) || (cj_active_hydro && cj->nodeID == engine_rank))) { scheduler_activate(s, t); /* Set the correct sorting flags */ - if (t->type == task_type_pair && t->subtype == task_subtype_density) { + if (t->type == task_type_pair && (t->subtype == task_subtype_density || + t->subtype == task_subtype_star_density)) { /* Store some values. */ atomic_or(&ci->requires_sorts, 1 << t->flags); @@ -3585,7 +3826,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_pair && - t->subtype == task_subtype_density) { + (t->subtype == task_subtype_density || + t->subtype == task_subtype_star_density)) { cell_activate_subcell_hydro_tasks(t->ci, t->cj, s); } } @@ -3703,6 +3945,90 @@ void engine_marktasks_mapper(void *map_data, int num_elements, #endif } + /* Only interested in star_density tasks as of here. */ + if (t->subtype == task_subtype_star_density) { + + /* Too much particle movement? */ + if (cell_need_rebuild_for_pair(ci, cj)) *rebuild_space = 1; + +#ifdef WITH_MPI + error("MPI with stars not implemented"); + /* /\* Activate the send/recv tasks. *\/ */ + /* if (ci->nodeID != engine_rank) { */ + + /* /\* If the local cell is active, receive data from the foreign cell. *\/ */ + /* if (cj_active_hydro) { */ + /* scheduler_activate(s, ci->recv_xv); */ + /* if (ci_active_hydro) { */ + /* scheduler_activate(s, ci->recv_rho); */ + /* } */ + /* } */ + + /* /\* If the foreign cell is active, we want its ti_end values. *\/ */ + /* if (ci_active_hydro) scheduler_activate(s, ci->recv_ti); */ + + /* /\* Is the foreign cell active and will need stuff from us? *\/ */ + /* if (ci_active_hydro) { */ + + /* struct link *l = */ + /* scheduler_activate_send(s, cj->send_xv, ci->nodeID); */ + + /* /\* Drift the cell which will be sent at the level at which it is */ + /* sent, i.e. drift the cell specified in the send task (l->t) */ + /* itself. *\/ */ + /* cell_activate_drift_part(l->t->ci, s); */ + + /* /\* If the local cell is also active, more stuff will be needed. *\/ */ + /* if (cj_active_hydro) { */ + /* scheduler_activate_send(s, cj->send_rho, ci->nodeID); */ + + /* } */ + /* } */ + + /* /\* If the local cell is active, send its ti_end values. *\/ */ + /* if (cj_active_hydro) */ + /* scheduler_activate_send(s, cj->send_ti, ci->nodeID); */ + + /* } else if (cj->nodeID != engine_rank) { */ + + /* /\* If the local cell is active, receive data from the foreign cell. *\/ */ + /* if (ci_active_hydro) { */ + /* scheduler_activate(s, cj->recv_xv); */ + /* if (cj_active_hydro) { */ + /* scheduler_activate(s, cj->recv_rho); */ + /* } */ + /* } */ + + /* /\* If the foreign cell is active, we want its ti_end values. *\/ */ + /* if (cj_active_hydro) scheduler_activate(s, cj->recv_ti); */ + + /* /\* Is the foreign cell active and will need stuff from us? *\/ */ + /* if (cj_active_hydro) { */ + + /* struct link *l = */ + /* scheduler_activate_send(s, ci->send_xv, cj->nodeID); */ + + /* /\* Drift the cell which will be sent at the level at which it is */ + /* sent, i.e. drift the cell specified in the send task (l->t) */ + /* itself. *\/ */ + /* cell_activate_drift_part(l->t->ci, s); */ + + /* /\* If the local cell is also active, more stuff will be needed. *\/ */ + /* if (ci_active_hydro) { */ + + /* scheduler_activate_send(s, ci->send_rho, cj->nodeID); */ + + /* } */ + /* } */ + + /* /\* If the local cell is active, send its ti_end values. *\/ */ + /* if (ci_active_hydro) */ + /* scheduler_activate_send(s, ci->send_ti, cj->nodeID); */ + /* } */ +#endif + } + + /* Only interested in gravity tasks as of here. */ if (t->subtype == task_subtype_grav) { diff --git a/src/runner.c b/src/runner.c index ef3ced2044da1a249feb6a753822e7a6618158de..43b2e3275aceba4fd1807fa399a9cfbd15bde6d5 100644 --- a/src/runner.c +++ b/src/runner.c @@ -2393,6 +2393,8 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_dosub_self2_force(r, ci, 1); + else if (t->subtype == task_subtype_star_density) + runner_dosub_self_star_density(r, ci, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; @@ -2406,6 +2408,8 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_dosub_pair2_force(r, ci, cj, t->flags, 1); + else if (t->subtype == task_subtype_star_density) + runner_dosub_pair_star_density(r, ci, cj, t->flags, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; diff --git a/src/runner_doiact_star.h b/src/runner_doiact_star.h index 1891e292a54c748da13a0bab32bd68b4c03b08f0..12675fcf239631986cd10bcf12e02cee8a51757e 100644 --- a/src/runner_doiact_star.h +++ b/src/runner_doiact_star.h @@ -72,8 +72,8 @@ void runner_doself_star_density(struct runner *r, struct cell *c, int timer) { #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (si->ti_drift != e->ti_current) - error("Particle si not drifted to current time"); + /* if (si->ti_drift != e->ti_current) */ + /* error("Particle si not drifted to current time"); */ if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif @@ -148,8 +148,8 @@ void runner_dosubpair_star_density(struct runner *r, struct cell *restrict ci, #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (si->ti_drift != e->ti_current) - error("Particle si not drifted to current time"); + /* if (si->ti_drift != e->ti_current) */ + /* error("Particle si not drifted to current time"); */ if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif @@ -237,8 +237,8 @@ void runner_dopair_subset_star_density(struct runner *r, struct cell *restrict c #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (spi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); + /* if (spi->ti_drift != e->ti_current) */ + /* error("Particle pi not drifted to current time"); */ if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif @@ -309,8 +309,8 @@ void runner_doself_subset_star_density(struct runner *r, struct cell *restrict c #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ - if (spi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); + /* if (spi->ti_drift != e->ti_current) */ + /* error("Particle pi not drifted to current time"); */ if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif @@ -946,3 +946,395 @@ void runner_dosub_subset_star_density(struct runner *r, struct cell *ci, struct if (gettimer) TIMER_TOC(timer_dosub_subset); } + + +/** + * @brief Determine which version of runner_doself needs to be called depending on the + * optimisation level. + * + * @param r #runner + * @param c #cell c + * + */ +void runner_doself_branch_star_density(struct runner *r, struct cell *c) { + + const struct engine *restrict e = r->e; + + /* Anything to do here? */ + if (!cell_is_active_star(c, e)) return; + + /* Did we mess up the recursion? */ + if (c->h_max_old * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); + + /* /\* Check that cells are drifted. *\/ */ + /* if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); */ + + runner_doself_star_density(r, c, 1); +} + + +/** + * @brief Determine which version of DOPAIR1 needs to be called depending on the + * orientation of the cells or whether DOPAIR1 needs to be called at all. + * + * @param r #runner + * @param ci #cell ci + * @param cj #cell cj + * + */ +void runner_dopair_branch_star_density(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *restrict e = r->e; + + /* Anything to do here? */ + if (!cell_is_active_star(ci, e) && !cell_is_active_star(cj, e)) return; + + /* /\* Check that cells are drifted. *\/ */ + /* if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) */ + /* error("Interacting undrifted cells."); */ + + /* Get the sort ID. */ + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(e->s, &ci, &cj, shift); + + /* Have the cells been sorted? */ + if (!(ci->sorted & (1 << sid)) || + ci->dx_max_sort_old > space_maxreldx * ci->dmin) + error("Interacting unsorted cells."); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort_old > space_maxreldx * cj->dmin) + error("Interacting unsorted cells."); + +#ifdef SWIFT_DEBUG_CHECKS + /* Pick-out the sorted lists. */ + const struct entry *restrict sort_i = ci->sort[sid]; + const struct entry *restrict sort_j = cj->sort[sid]; + + /* Check that the dx_max_sort values in the cell are indeed an upper + bound on particle movement. */ + for (int pid = 0; pid < ci->count; pid++) { + const struct part *p = &ci->parts[sort_i[pid].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > + 1.0e-4 * max(fabsf(d), ci->dx_max_sort_old) && + fabsf(d - sort_i[pid].d) - ci->dx_max_sort > ci->width[0] * 1.0e-10) + error( + "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " + "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e " + "ci->dx_max_sort_old=%e", + ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort, + ci->dx_max_sort_old); + } + for (int pjd = 0; pjd < cj->count; pjd++) { + const struct part *p = &cj->parts[sort_j[pjd].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if ((fabsf(d - sort_j[pjd].d) - cj->dx_max_sort) > + 1.0e-4 * max(fabsf(d), cj->dx_max_sort_old) && + (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort) > cj->width[0] * 1.0e-10) + error( + "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e " + "cj->dx_max_sort_old=%e", + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort, + cj->dx_max_sort_old); + } +#endif /* SWIFT_DEBUG_CHECKS */ + + runner_dopair_star_density(r, ci, cj, 1); +} + +/** + * @brief Compute grouped sub-cell interactions for pairs + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + * @param sid The direction linking the cells + * @param gettimer Do we have a timer ? + * + * @todo Hard-code the sid on the recursive calls to avoid the + * redundant computations to find the sid on-the-fly. + */ +void runner_dosub_pair_star_density(struct runner *r, struct cell *ci, struct cell *cj, int sid, + int gettimer) { + + struct space *s = r->e->s; + const struct engine *e = r->e; + + TIMER_TIC; + + /* Should we even bother? */ + if (!cell_is_active_star(ci, e) && !cell_is_active_star(cj, e)) return; + if (ci->scount == 0 || cj->scount == 0) return; + + /* Get the type of pair if not specified explicitly. */ + double shift[3]; + sid = space_getsid(s, &ci, &cj, shift); + + /* Recurse? */ + if (cell_can_recurse_in_pair_task(ci) && cell_can_recurse_in_pair_task(cj)) { + + /* Different types of flags. */ + switch (sid) { + + /* Regular sub-cell interactions of a single cell. */ + case 0: /* ( 1 , 1 , 1 ) */ + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[0], -1, 0); + break; + + case 1: /* ( 1 , 1 , 0 ) */ + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[1], -1, 0); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[1], -1, 0); + break; + + case 3: /* ( 1 , 0 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[2], -1, 0); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[4], cj->progeny[0], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[4], cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[4], cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[1], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[2], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[3], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[3], -1, 0); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[4], cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[3], -1, 0); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[2], -1, 0); + break; + + case 7: /* ( 1 , -1 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[4], cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[3], -1, 0); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[4], cj->progeny[3], -1, 0); + break; + + case 9: /* ( 0 , 1 , 1 ) */ + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[4], -1, 0); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + if (ci->progeny[2] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[2], cj->progeny[0], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[2], cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[2], cj->progeny[4], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[2], cj->progeny[5], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[3], cj->progeny[1], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[3], cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[4], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[5], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[5], -1, 0); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[2], cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[2], cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[6], cj->progeny[5], -1, 0); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + if (ci->progeny[1] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[1], cj->progeny[0], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[1], cj->progeny[2], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[1], cj->progeny[4], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[6] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[1], cj->progeny[6], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[3], cj->progeny[2], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[6] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[3], cj->progeny[6], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[4], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[6] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[5], cj->progeny[6], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[6] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[7], cj->progeny[6], -1, 0); + break; + } + + } + + /* Otherwise, compute the pair directly. */ + else if (cell_is_active_star(ci, e) || cell_is_active_star(cj, e)) { + + /* /\* Make sure both cells are drifted to the current timestep. *\/ */ + /* if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) */ + /* error("Interacting undrifted cells."); */ + + /* Do any of the cells need to be sorted first? */ + if (!(ci->sorted & (1 << sid)) || + ci->dx_max_sort_old > ci->dmin * space_maxreldx) + error("Interacting unsorted cell."); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cell."); + + /* Compute the interactions. */ + runner_dopair_branch_star_density(r, ci, cj); + } + + if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); +} + + +/** + * @brief Compute grouped sub-cell interactions for self tasks + * + * @param r The #runner. + * @param ci The first #cell. + * @param gettimer Do we have a timer ? + */ +void runner_dosub_self_star_density(struct runner *r, struct cell *ci, int gettimer) { + + TIMER_TIC; + + /* Should we even bother? */ + if (ci->scount == 0 || !cell_is_active_star(ci, r->e)) return; + + /* Recurse? */ + if (cell_can_recurse_in_self_task(ci)) { + + /* Loop over all progeny. */ + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) { + runner_dosub_self_star_density(r, ci->progeny[k], 0); + for (int j = k + 1; j < 8; j++) + if (ci->progeny[j] != NULL) + runner_dosub_pair_star_density(r, ci->progeny[k], ci->progeny[j], -1, 0); + } + } + + /* Otherwise, compute self-interaction. */ + else { + + /* /\* Check that cells are drifted. *\/ */ + /* if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); */ + + runner_doself_branch_star_density(r, ci); + } + + if (gettimer) TIMER_TOC(timer_dosub_self_star_density); +} diff --git a/src/scheduler.c b/src/scheduler.c index 1c20899e41cbbdce729bea986f99f99682262b51..72dc03caa194ad5b78f7a05f730c94a7f1c1f65a 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -241,7 +241,7 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { int density_cluster[4] = {0}; int gradient_cluster[4] = {0}; int force_cluster[4] = {0}; - int gravity_cluster[5] = {0}; + int star_density_cluster[4] = {0}; /* Check whether we need to construct a group of tasks */ for (int type = 0; type < task_type_count; ++type) { @@ -262,6 +262,8 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { force_cluster[k] = 1; if (type == task_type_self + k && subtype == task_subtype_grav) gravity_cluster[k] = 1; + if (type == task_type_self + k && subtype == task_subtype_star_density) + star_density_cluster[k] = 1; } if (type == task_type_grav_mesh) gravity_cluster[2] = 1; if (type == task_type_grav_long_range) gravity_cluster[3] = 1; @@ -312,6 +314,15 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { fprintf(f, "\t\t %s;\n", taskID_names[task_type_grav_mm]); fprintf(f, "\t};\n"); + /* Make a cluster for the density tasks */ + fprintf(f, "\t subgraph cluster0{\n"); + fprintf(f, "\t\t label=\"\";\n"); + for (int k = 0; k < 4; ++k) + if (star_density_cluster[k]) + fprintf(f, "\t\t \"%s %s\";\n", taskID_names[task_type_self + k], + subtaskID_names[task_subtype_star_density]); + fprintf(f, "\t};\n"); + /* Be clean */ fprintf(f, "}"); fclose(f); @@ -977,6 +988,8 @@ void scheduler_splittasks_mapper(void *map_data, int num_elements, scheduler_splittask_gravity(t, s); } else if (t->type == task_type_grav_mesh) { /* For future use */ + } else if (t->subtype == task_subtype_star_density) { + /* For future use */ } else { #ifdef SWIFT_DEBUG_CHECKS error("Unexpected task sub-type"); diff --git a/src/task.c b/src/task.c index 12b1bb7a2cd2dcc12264b9da7ffab9f9427e3782..f3ab4ec7c0fa4d9b2621985eaccfac02ee8dca88 100644 --- a/src/task.c +++ b/src/task.c @@ -62,7 +62,8 @@ const char *taskID_names[task_type_count] = { /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { "none", "density", "gradient", "force", "grav", "external_grav", - "tend", "xv", "rho", "gpart", "multipole", "spart"}; + "tend", "xv", "rho", "gpart", "multipole", "spart", + "star_density"}; #ifdef WITH_MPI /* MPI communicators for the subtypes. */ @@ -137,6 +138,10 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_part; break; + case task_subtype_star_density: + return task_action_all; + break; + case task_subtype_grav: case task_subtype_external_grav: return task_action_gpart; diff --git a/src/task.h b/src/task.h index a430ff9aeafc83d865406df412ebe3ac5267e651..fd7bb985b0894793973b7b4814f8c96c62663eb1 100644 --- a/src/task.h +++ b/src/task.h @@ -82,13 +82,13 @@ enum task_subtypes { task_subtype_force, task_subtype_grav, task_subtype_external_grav, - task_subtype_star_density, task_subtype_tend, task_subtype_xv, task_subtype_rho, task_subtype_gpart, task_subtype_multipole, task_subtype_spart, + task_subtype_star_density, task_subtype_count } __attribute__((packed)); diff --git a/src/timers.c b/src/timers.c index 642595ec7dfa6e6f58ac755a0194f06944b9d568..b28eccf4d0685195f39d528212a7e0b2eda246c9 100644 --- a/src/timers.c +++ b/src/timers.c @@ -92,6 +92,7 @@ const char* timers_names[timer_count] = { "doself_subset_star_density", "dopair_subset_star_density", "dosubpair_star_density", + "dosub_self_star_density", }; /* File to store the timers */ diff --git a/src/timers.h b/src/timers.h index 6228328d67b98d89aded19a1ed19fa06667899de..04e2d441c4f8864402af849ab2bc2d917031f312 100644 --- a/src/timers.h +++ b/src/timers.h @@ -93,6 +93,7 @@ enum { timer_doself_subset_star_density, timer_dopair_subset_star_density, timer_dosubpair_star_density, + timer_dosub_self_star_density, timer_count, };