From fdbe16b3542b79bfc6331c700d5d364aecf99a2f Mon Sep 17 00:00:00 2001 From: loikki <loic.hausammann@protonmail.ch> Date: Sun, 2 Dec 2018 18:33:34 +0100 Subject: [PATCH] implement the stars density over mpi --- examples/main.c | 1 - src/cell.c | 187 +++++++++++----------- src/cell.h | 17 ++ src/debug.c | 80 +++++++++- src/engine_maketasks.c | 282 +++++++++++++++++++++++++++++---- src/engine_marktasks.c | 150 +++++++++++++++--- src/runner.c | 40 ++++- src/runner_doiact_stars.h | 53 ++++++- src/scheduler.c | 42 +++-- src/stars/Default/stars.h | 2 - src/stars/Default/stars_iact.h | 2 - src/stars/Default/stars_io.h | 13 +- src/stars/Default/stars_part.h | 3 - 13 files changed, 697 insertions(+), 175 deletions(-) diff --git a/examples/main.c b/examples/main.c index 1d46ee954b..fe0e4bc353 100644 --- a/examples/main.c +++ b/examples/main.c @@ -460,7 +460,6 @@ int main(int argc, char *argv[]) { #ifdef WITH_MPI if (with_mpole_reconstruction && nr_nodes > 1) error("Cannot reconstruct m-poles every step over MPI (yet)."); - if (with_feedback) error("Can't run with feedback over MPI (yet)."); if (with_star_formation) error("Can't run with star formation over MPI (yet)"); if (with_limiter) error("Can't run with time-step limiter over MPI (yet)"); diff --git a/src/cell.c b/src/cell.c index bd1022f1fa..c96aa4dafa 100644 --- a/src/cell.c +++ b/src/cell.c @@ -381,6 +381,7 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc, /* Start by packing the data of the current cell. */ pc->hydro.h_max = c->hydro.h_max; + pc->stars.h_max = c->stars.h_max; pc->hydro.ti_end_min = c->hydro.ti_end_min; pc->hydro.ti_end_max = c->hydro.ti_end_max; pc->grav.ti_end_min = c->grav.ti_end_min; @@ -485,6 +486,7 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, /* Unpack the current pcell. */ c->hydro.h_max = pc->hydro.h_max; + c->stars.h_max = pc->stars.h_max; c->hydro.ti_end_min = pc->hydro.ti_end_min; c->hydro.ti_end_max = pc->hydro.ti_end_max; c->grav.ti_end_min = pc->grav.ti_end_min; @@ -2038,6 +2040,9 @@ void cell_activate_stars_sorts_up(struct cell *c, struct scheduler *s) { */ void cell_activate_stars_sorts(struct cell *c, int sid, struct scheduler *s) { + // TODO Alexei, remove this + if (c->nodeID != engine_rank) return; + /* Do we need to re-sort? */ if (c->stars.dx_max_sort > space_maxreldx * c->dmin) { @@ -3344,6 +3349,10 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { struct cell *cj = t->cj; const int ci_active = cell_is_active_stars(ci, e); const int cj_active = (cj != NULL) ? cell_is_active_stars(cj, e) : 0; +#ifdef WITH_MPI + const int ci_nodeID = ci->nodeID; + const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; +#endif /* Only activate tasks that involve a local active cell. */ if ((ci_active && ci->nodeID == nodeID) || @@ -3361,38 +3370,42 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { /* Set the correct sorting flags and activate hydro drifts */ else if (t->type == task_type_pair) { /* Do ci */ - /* stars for ci */ - atomic_or(&ci->stars.requires_sorts, 1 << t->flags); - ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; - - /* hydro for cj */ - atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); - cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; - - /* Activate the drift tasks. */ - if (ci->nodeID == nodeID) cell_activate_drift_spart(ci, s); - if (cj->nodeID == nodeID) cell_activate_drift_part(cj, s); - - /* Check the sorts and activate them if needed. */ - cell_activate_stars_sorts(ci, t->flags, s); - cell_activate_hydro_sorts(cj, t->flags, s); + if (ci_active && cj->hydro.count != 0 && ci->stars.count != 0) { + /* stars for ci */ + atomic_or(&ci->stars.requires_sorts, 1 << t->flags); + ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; + + /* hydro for cj */ + atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + + /* Activate the drift tasks. */ + if (ci->nodeID == nodeID) cell_activate_drift_spart(ci, s); + if (cj->nodeID == nodeID) cell_activate_drift_part(cj, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_stars_sorts(ci, t->flags, s); + cell_activate_hydro_sorts(cj, t->flags, s); + } /* Do cj */ - /* hydro for ci */ - atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); - ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; - - /* stars for cj */ - atomic_or(&cj->stars.requires_sorts, 1 << t->flags); - cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; - - /* Activate the drift tasks. */ - if (cj->nodeID == nodeID) cell_activate_drift_spart(cj, s); - if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); - - /* Check the sorts and activate them if needed. */ - cell_activate_hydro_sorts(ci, t->flags, s); - cell_activate_stars_sorts(cj, t->flags, s); + if (cj_active && ci->hydro.count != 0 && cj->stars.count != 0) { + /* hydro for ci */ + atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + + /* stars for cj */ + atomic_or(&cj->stars.requires_sorts, 1 << t->flags); + cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; + + /* Activate the drift tasks. */ + if (cj->nodeID == nodeID) cell_activate_drift_spart(cj, s); + if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); + + /* Check the sorts and activate them if needed. */ + cell_activate_hydro_sorts(ci, t->flags, s); + cell_activate_stars_sorts(cj, t->flags, s); + } } /* Store current values of dx_max and h_max. */ @@ -3407,85 +3420,81 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s) { /* Check whether there was too much particle motion, i.e. the cell neighbour conditions were violated. */ if (cell_need_rebuild_for_stars_pair(ci, cj)) rebuild = 1; + if (cell_need_rebuild_for_hydro_pair(ci, cj)) rebuild = 1; #ifdef WITH_MPI - error("MPI with stars not implemented"); - /* /\* Activate the send/recv tasks. *\/ */ - /* if (ci->nodeID != nodeID) { */ - - /* /\* If the local cell is active, receive data from the foreign cell. - * *\/ */ - /* if (cj_active) { */ - /* scheduler_activate(s, ci->hydro.recv_xv); */ - /* if (ci_active) { */ - /* scheduler_activate(s, ci->hydro.recv_rho); */ - - /* } */ - /* } */ + /* Activate the send/recv tasks. */ + if (ci_nodeID != nodeID) { - /* /\* If the foreign cell is active, we want its ti_end values. *\/ */ - /* if (ci_active) scheduler_activate(s, ci->mpi.recv_ti); */ + // TODO Alexei: In this section, you will find some comments that + // are from the hydro code. It should look the same for the feedback. + /* If the local cell is active, receive data from the foreign cell. */ + if (cj_active) { + scheduler_activate(s, ci->mpi.hydro.recv_xv); + /* if (ci_active) { */ + /* scheduler_activate(s, ci->mpi.hydro.recv_rho); */ + /* } */ + } - /* /\* Is the foreign cell active and will need stuff from us? *\/ */ - /* if (ci_active) { */ + /* /\* If the foreign cell is active, we want its ti_end values. *\/ */ + /* if (ci_active) scheduler_activate(s, ci->mpi.recv_ti); */ - /* scheduler_activate_send(s, cj->hydro.send_xv, ci->nodeID); */ + /* Is the foreign cell active and will need stuff from us? */ + if (ci_active) { - /* /\* Drift the cell which will be sent; note that not all sent */ - /* particles will be drifted, only those that are needed. *\/ */ - /* cell_activate_drift_part(cj, s); */ + scheduler_activate_send(s, cj->mpi.hydro.send_xv, ci_nodeID); - /* /\* If the local cell is also active, more stuff will be needed. - * *\/ */ - /* if (cj_active) { */ - /* scheduler_activate_send(s, cj->hydro.send_rho, ci->nodeID); */ + /* Drift the cell which will be sent; note that not all sent + particles will be drifted, only those that are needed. */ + cell_activate_drift_part(cj, s); - /* } */ - /* } */ + /* /\* If the local cell is also active, more stuff will be needed. + * *\/ */ + /* if (cj_active) { */ + /* scheduler_activate_send(s, cj->mpi.hydro.send_rho, ci_nodeID); */ - /* /\* If the local cell is active, send its ti_end values. *\/ */ - /* if (cj_active) scheduler_activate_send(s, cj->mpi.send_ti, - * ci->nodeID); - */ + /* } */ + } - /* } else if (cj->nodeID != nodeID) { */ + /* /\* If the local cell is active, send its ti_end values. *\/ */ + /* if (cj_active) scheduler_activate_send(s, cj->mpi.send_ti, + * ci_nodeID); */ - /* /\* If the local cell is active, receive data from the foreign cell. - * *\/ */ - /* if (ci_active) { */ - /* scheduler_activate(s, cj->hydro.recv_xv); */ - /* if (cj_active) { */ - /* scheduler_activate(s, cj->hydro.recv_rho); */ + } else if (cj_nodeID != nodeID) { - /* } */ - /* } */ + /* If the local cell is active, receive data from the foreign cell. */ + if (ci_active) { + scheduler_activate(s, cj->mpi.hydro.recv_xv); + /* if (cj_active) { */ + /* scheduler_activate(s, cj->mpi.hydro.recv_rho); */ + /* } */ + } - /* /\* If the foreign cell is active, we want its ti_end values. *\/ */ - /* if (cj_active) scheduler_activate(s, cj->mpi.recv_ti); */ + /* /\* If the foreign cell is active, we want its ti_end values. *\/ */ + /* if (cj_active) scheduler_activate(s, cj->mpi.recv_ti); */ - /* /\* Is the foreign cell active and will need stuff from us? *\/ */ - /* if (cj_active) { */ + /* Is the foreign cell active and will need stuff from us? */ + if (cj_active) { - /* scheduler_activate_send(s, ci->hydro.send_xv, cj->nodeID); */ + scheduler_activate_send(s, ci->mpi.hydro.send_xv, cj_nodeID); - /* /\* Drift the cell which will be sent; note that not all sent */ - /* particles will be drifted, only those that are needed. *\/ */ - /* cell_activate_drift_part(ci, s); */ + /* Drift the cell which will be sent; note that not all sent + particles will be drifted, only those that are needed. */ + cell_activate_drift_part(ci, s); - /* /\* If the local cell is also active, more stuff will be needed. - * *\/ */ - /* if (ci_active) { */ + /* /\* If the local cell is also active, more stuff will be needed. + * *\/ */ + /* if (ci_active) { */ - /* scheduler_activate_send(s, ci->hydro.send_rho, cj->nodeID); */ + /* scheduler_activate_send(s, ci->mpi.hydro.send_rho, cj_nodeID); */ - /* } */ - /* } */ + /* } */ + } - /* /\* If the local cell is active, send its ti_end values. *\/ */ - /* if (ci_active) scheduler_activate_send(s, ci->mpi.send_ti, - * cj->nodeID); - */ - /* } */ + /* /\* If the local cell is active, send its ti_end values. *\/ */ + /* if (ci_active) scheduler_activate_send(s, ci->mpi.send_ti, + * cj_nodeID); */ + } #endif } } diff --git a/src/cell.h b/src/cell.h index c5fbc9b8c0..3fca9ce21c 100644 --- a/src/cell.h +++ b/src/cell.h @@ -996,6 +996,23 @@ cell_need_rebuild_for_hydro_pair(const struct cell *ci, const struct cell *cj) { ci->hydro.dx_max_part + cj->hydro.dx_max_part > cj->dmin); } +/** + * @brief Have particles in a pair of cells moved too much and require a rebuild + * ? + * + * @param ci The first #cell. + * @param cj The second #cell. + */ +__attribute__((always_inline)) INLINE static int +cell_need_rebuild_for_stars_pair(const struct cell *ci, const struct cell *cj) { + + /* Is the cut-off radius plus the max distance the parts in both cells have */ + /* moved larger than the cell size ? */ + /* Note ci->dmin == cj->dmin */ + return (kernel_gamma * max(ci->stars.h_max, cj->stars.h_max) + + ci->stars.dx_max_part + cj->stars.dx_max_part > + cj->dmin); +} /** * @brief Have star particles in a pair of cells moved too much and require a diff --git a/src/debug.c b/src/debug.c index 2c1d81b676..d2aff378a1 100644 --- a/src/debug.c +++ b/src/debug.c @@ -181,6 +181,14 @@ int checkSpacehmax(struct space *s) { } } + float cell_stars_h_max = 0.0f; + for (int k = 0; k < s->nr_cells; k++) { + if (s->cells_top[k].nodeID == s->e->nodeID && + s->cells_top[k].stars.h_max > cell_stars_h_max) { + cell_stars_h_max = s->cells_top[k].stars.h_max; + } + } + /* Now all particles. */ float part_h_max = 0.0f; for (size_t k = 0; k < s->nr_parts; k++) { @@ -189,10 +197,21 @@ int checkSpacehmax(struct space *s) { } } + /* Now all the sparticles. */ + float spart_h_max = 0.0f; + for (size_t k = 0; k < s->nr_sparts; k++) { + if (s->sparts[k].h > spart_h_max) { + spart_h_max = s->sparts[k].h; + } + } + /* If within some epsilon we are OK. */ - if (fabsf(cell_h_max - part_h_max) <= FLT_EPSILON) return 1; + if (fabsf(cell_h_max - part_h_max) <= FLT_EPSILON && + fabsf(cell_stars_h_max - spart_h_max) <= FLT_EPSILON) + return 1; /* There is a problem. Hunt it down. */ + /* part */ for (int k = 0; k < s->nr_cells; k++) { if (s->cells_top[k].nodeID == s->e->nodeID) { if (s->cells_top[k].hydro.h_max > part_h_max) { @@ -209,6 +228,23 @@ int checkSpacehmax(struct space *s) { } } + /* spart */ + for (int k = 0; k < s->nr_cells; k++) { + if (s->cells_top[k].nodeID == s->e->nodeID) { + if (s->cells_top[k].stars.h_max > spart_h_max) { + message("cell %d is inconsistent (%f > %f)", k, + s->cells_top[k].stars.h_max, spart_h_max); + } + } + } + + for (size_t k = 0; k < s->nr_sparts; k++) { + if (s->sparts[k].h > cell_stars_h_max) { + message("spart %lld is inconsistent (%f > %f)", s->sparts[k].id, + s->sparts[k].h, cell_stars_h_max); + } + } + return 0; } @@ -227,6 +263,8 @@ int checkCellhdxmax(const struct cell *c, int *depth) { float h_max = 0.0f; float dx_max = 0.0f; + float stars_h_max = 0.0f; + float stars_dx_max = 0.0f; int result = 1; const double loc_min[3] = {c->loc[0], c->loc[1], c->loc[2]}; @@ -262,6 +300,33 @@ int checkCellhdxmax(const struct cell *c, int *depth) { dx_max = max(dx_max, sqrt(dx2)); } + const size_t nr_sparts = c->stars.count; + struct spart *sparts = c->stars.parts; + for (size_t k = 0; k < nr_sparts; k++) { + + struct spart *const sp = &sparts[k]; + + if (sp->x[0] < loc_min[0] || sp->x[0] >= loc_max[0] || + sp->x[1] < loc_min[1] || sp->x[1] >= loc_max[1] || + sp->x[2] < loc_min[2] || sp->x[2] >= loc_max[2]) { + + message( + "Inconsistent part position p->x=[%e %e %e], c->loc=[%e %e %e] " + "c->width=[%e %e %e]", + sp->x[0], sp->x[1], sp->x[2], c->loc[0], c->loc[1], c->loc[2], + c->width[0], c->width[1], c->width[2]); + + result = 0; + } + + const float dx2 = sp->x_diff[0] * sp->x_diff[0] + + sp->x_diff[1] * sp->x_diff[1] + + sp->x_diff[2] * sp->x_diff[2]; + + stars_h_max = max(stars_h_max, sp->h); + stars_dx_max = max(stars_dx_max, sqrt(dx2)); + } + if (c->split) { for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) { @@ -285,6 +350,19 @@ int checkCellhdxmax(const struct cell *c, int *depth) { result = 0; } + if (c->stars.h_max != stars_h_max) { + message("%d Inconsistent stars_h_max: cell %f != parts %f", *depth, + c->stars.h_max, stars_h_max); + message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); + result = 0; + } + if (c->stars.dx_max_part != stars_dx_max) { + message("%d Inconsistent stars_dx_max: %f != %f", *depth, + c->stars.dx_max_part, stars_dx_max); + message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); + result = 0; + } + return result; } diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index d1d3be6c12..ebc6b17429 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -203,6 +203,86 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, #endif } +/** + * @brief Add send tasks for the stars pairs to a hierarchy of cells. + * + * @param e The #engine. + * @param ci The sending #cell. + * @param cj Dummy cell containing the nodeID of the receiving node. + * @param t_xv The send_xv #task, if it has already been created. + * @param t_rho The send_rho #task, if it has already been created. + */ +void engine_addtasks_send_stars(struct engine *e, struct cell *ci, + struct cell *cj, struct task *t_xv, + struct task *t_rho) { + +#ifdef WITH_MPI + struct link *l = NULL; + struct scheduler *s = &e->sched; + const int nodeID = cj->nodeID; + + /* Check if any of the density tasks are for the target node. */ + for (l = ci->stars.density; l != NULL; l = l->next) + if (l->t->ci->nodeID == nodeID || + (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) + break; + + /* If so, attach send tasks. */ + if (l != NULL) { + /* Get the task if created in hydro part */ + struct link *hydro = NULL; + for (hydro = ci->mpi.hydro.send_xv; hydro != NULL; hydro = hydro->next) { + if (hydro->t->ci->nodeID == nodeID || + (hydro->t->cj != NULL && hydro->t->cj->nodeID == nodeID)) { + break; + } + } + + // TODO Alexei: I guess that you can assume that if the send_xv exists, + // send_rho exists too + + if (t_xv == NULL) { + + /* Already exists, just need to get it */ + if (hydro != NULL) { + // TODO Alexei: set t_feedback + t_xv = hydro->t; + + /* This task does not exists, need to create it */ + } else { + + // TODO Alexei: create task and do correct unlocks + + /* Make sure this cell is tagged. */ + cell_ensure_tagged(ci); + + /* Create the tasks and their dependencies? */ + t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, + ci->mpi.tag, 0, ci, cj); + + /* Drift before you send */ + scheduler_addunlock(s, ci->hydro.super->hydro.drift, t_xv); + } + } + + if (hydro == NULL) { + engine_addlink(e, &ci->mpi.hydro.send_xv, t_xv); + // TODO Alexei: addlink + /* engine_addlink(e, &ci->mpi.hydro.send_rho, t_rho); */ + } + } + + /* Recurse? */ + if (ci->split) + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) + engine_addtasks_send_stars(e, ci->progeny[k], cj, t_xv, t_rho); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + /** * @brief Add send tasks for the time-step to a hierarchy of cells. * @@ -348,6 +428,74 @@ void engine_addtasks_recv_hydro(struct engine *e, struct cell *c, #endif } +/** + * @brief Add recv tasks for stars pairs to a hierarchy of cells. + * + * @param e The #engine. + * @param c The foreign #cell. + * @param t_xv The recv_xv #task, if it has already been created. + * @param t_rho The recv_rho #task, if it has already been created. + */ +void engine_addtasks_recv_stars(struct engine *e, struct cell *c, + struct task *t_xv, struct task *t_rho) { + +#ifdef WITH_MPI + struct scheduler *s = &e->sched; + int new_task = 0; + + /* Have we reached a level where there are any stars (or hydro) tasks ? */ + if (t_xv == NULL && (c->stars.density != NULL || c->hydro.density != NULL)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure this cell has a valid tag. */ + if (c->mpi.tag < 0) error("Trying to receive from untagged cell."); +#endif // SWIFT_DEBUG_CHECKS + + /* Create the tasks. */ + if (c->mpi.hydro.recv_xv == NULL) { + new_task = 1; + t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, c->mpi.tag, + 0, c, NULL); + // TODO Alexei: create t_feedback task + /* t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho, + * c->mpi.tag, */ + /* 0, c, NULL); */ + } else { + // TODO Alexei: set t_feedback + t_xv = c->mpi.hydro.recv_xv; + } + } + + // TODO Alexei: set pointer + c->mpi.hydro.recv_xv = t_xv; + /* c->mpi.hydro.recv_rho = t_rho; */ + + /* Add dependencies. */ + if (c->hydro.sorts != NULL && new_task) { + scheduler_addunlock(s, t_xv, c->hydro.sorts); + } + + // TODO Alexei: You will need to sort the particles after receiving the spart + + for (struct link *l = c->stars.density; l != NULL; l = l->next) { + scheduler_addunlock(s, t_xv, l->t); + // TODO Alexei: I guess that you will need to unlock the recv here + /* scheduler_addunlock(s, l->t, t_rho); */ + } + // TODO Alexei: unlock feedback task + /* for (struct link *l = c->hydro.force; l != NULL; l = l->next) */ + /* scheduler_addunlock(s, t_rho, l->t); */ + /* Recurse? */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_addtasks_recv_stars(e, c->progeny[k], t_xv, t_rho); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + /** * @brief Add recv tasks for gravity pairs to a hierarchy of cells. * @@ -784,12 +932,12 @@ void engine_make_hierarchical_tasks_stars(struct engine *e, struct cell *c) { /* Are we in a super-cell ? */ if (c->super == c) { - /* Add the sort task. */ - c->stars.sorts = scheduler_addtask(s, task_type_stars_sort, - task_subtype_none, 0, 0, c, NULL); - /* Local tasks only... */ if (c->nodeID == e->nodeID) { + // TODO Alexei: do not need to be only on local node with feedback + /* Add the sort task. */ + c->stars.sorts = scheduler_addtask(s, task_type_stars_sort, + task_subtype_none, 0, 0, c, NULL); /* Generate the ghost tasks. */ c->stars.ghost_in = @@ -1820,6 +1968,9 @@ void engine_make_extra_starsloop_tasks_mapper(void *map_data, int num_elements, else if (t->type == task_type_self && t->subtype == task_subtype_stars_density) { + /* Make the self-density tasks depend on the drifts. */ + scheduler_addunlock(sched, t->ci->hydro.super->hydro.drift, t); + /* Make the self-density tasks depend on the drift and gravity drift. */ scheduler_addunlock(sched, t->ci->hydro.super->hydro.drift, t); scheduler_addunlock(sched, t->ci->super->grav.drift, t); @@ -1846,19 +1997,28 @@ void engine_make_extra_starsloop_tasks_mapper(void *map_data, int num_elements, /* Make all stars density tasks depend on the hydro drift and sorts, * gravity drift and star sorts. */ if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->super->hydro.drift, t); - scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); - if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->super->grav.drift, t); - scheduler_addunlock(sched, t->ci->super->stars.sorts, t); + scheduler_addunlock(sched, t->ci->hydro.super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->hydro.super->hydro.sorts, t); - if (t->ci->super != t->cj->super) { + if (t->ci->nodeID == engine_rank) { + scheduler_addunlock(sched, t->ci->super->grav.drift, t); + // TODO Alexei: the stars in foreign cells need to be sorted before + // the feedback loop and after the ghosts + scheduler_addunlock(sched, t->ci->super->stars.sorts, t); + } + + if (t->ci->hydro.super != t->cj->hydro.super) { if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->super->hydro.drift, t); - scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); - if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->super->grav.drift, t); - scheduler_addunlock(sched, t->cj->super->stars.sorts, t); + scheduler_addunlock(sched, t->cj->hydro.super->hydro.drift, t); + scheduler_addunlock(sched, t->cj->hydro.super->hydro.sorts, t); + } + + if (t->ci->super != t->cj->super) { + if (t->cj->nodeID == engine_rank) { + scheduler_addunlock(sched, t->cj->super->grav.drift, t); + // TODO Alexei: same here, sort before feedback + scheduler_addunlock(sched, t->cj->super->stars.sorts, t); + } } /* Start by constructing the task for the second stars loop */ @@ -1889,8 +2049,8 @@ void engine_make_extra_starsloop_tasks_mapper(void *map_data, int num_elements, /* Make all stars density tasks depend on the hydro drift and sorts, * gravity drift and star sorts. */ - scheduler_addunlock(sched, t->ci->super->hydro.drift, t); - scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + scheduler_addunlock(sched, t->ci->hydro.super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->hydro.super->hydro.sorts, t); scheduler_addunlock(sched, t->ci->super->grav.drift, t); scheduler_addunlock(sched, t->ci->super->stars.sorts, t); @@ -1916,19 +2076,27 @@ void engine_make_extra_starsloop_tasks_mapper(void *map_data, int num_elements, /* Make all stars density tasks depend on the hydro drift and sorts, * gravity drift and star sorts. */ if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->super->hydro.drift, t); - scheduler_addunlock(sched, t->cj->super->hydro.sorts, t); - if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->hydro.super->hydro.drift, t); + scheduler_addunlock(sched, t->cj->hydro.super->hydro.sorts, t); + + if (t->cj->nodeID == engine_rank) { scheduler_addunlock(sched, t->cj->super->grav.drift, t); - scheduler_addunlock(sched, t->ci->super->stars.sorts, t); + // TODO Alexei: Still the same + scheduler_addunlock(sched, t->cj->super->stars.sorts, t); + } - if (t->ci->super != t->cj->super) { - if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->super->hydro.drift, t); - scheduler_addunlock(sched, t->ci->super->hydro.sorts, t); + if (t->ci->hydro.super != t->cj->hydro.super) { if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->hydro.super->hydro.drift, t); + scheduler_addunlock(sched, t->ci->hydro.super->hydro.sorts, t); + } + + if (t->ci->super != t->cj->super) { + if (t->ci->nodeID == engine_rank) { scheduler_addunlock(sched, t->ci->super->grav.drift, t); - scheduler_addunlock(sched, t->cj->super->stars.sorts, t); + // TODO Alexei: still the same + scheduler_addunlock(sched, t->ci->super->stars.sorts, t); + } } /* Start by constructing the task for the second stars loop */ @@ -2021,7 +2189,9 @@ void engine_make_starsloop_tasks_mapper(void *map_data, int num_elements, struct cell *cj = &cells[cjd]; /* Is that neighbour local and does it have particles ? */ - if (cid >= cjd || (cj->stars.count == 0 && cj->hydro.count == 0) || + if (cid >= cjd || + ((cj->stars.count == 0 || ci->hydro.count == 0) && + (cj->hydro.count == 0 || ci->stars.count == 0)) || (ci->nodeID != nodeID && cj->nodeID != nodeID)) continue; @@ -2029,6 +2199,50 @@ void engine_make_starsloop_tasks_mapper(void *map_data, int num_elements, const int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; scheduler_addtask(sched, task_type_pair, task_subtype_stars_density, sid, 0, ci, cj); + +#ifdef SWIFT_DEBUG_CHECKS +#ifdef WITH_MPI + + /* Let's cross-check that we had a proxy for that cell */ + if (ci->nodeID == nodeID && cj->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[cj->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", cj->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (n = 0; n < p->nr_cells_in; n++) + if (p->cells_in[n] == cj) break; + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "stars task!", + cjd); + } else if (cj->nodeID == nodeID && ci->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[ci->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", ci->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (n = 0; n < p->nr_cells_in; n++) + if (p->cells_in[n] == ci) break; + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "stars task!", + cid); + } +#endif /* WITH_MPI */ +#endif /* SWIFT_DEBUG_CHECKS */ } } } @@ -2186,6 +2400,12 @@ void engine_addtasks_send_mapper(void *map_data, int num_elements, engine_addtasks_send_hydro(e, ci, cj, /*t_xv=*/NULL, /*t_rho=*/NULL, /*t_gradient=*/NULL); + /* Add the send tasks for the cells in the proxy that have a stars + * connection. */ + if ((e->policy & engine_policy_feedback) && (type & proxy_cell_type_hydro)) + engine_addtasks_send_stars(e, ci, cj, /*t_xv=*/NULL, + /*t_rho=*/NULL); + /* Add the send tasks for the cells in the proxy that have a gravity * connection. */ if ((e->policy & engine_policy_self_gravity) && @@ -2212,6 +2432,11 @@ void engine_addtasks_recv_mapper(void *map_data, int num_elements, if ((e->policy & engine_policy_hydro) && (type & proxy_cell_type_hydro)) engine_addtasks_recv_hydro(e, ci, NULL, NULL, NULL); + /* Add the recv tasks for the cells in the proxy that have a stars + * connection. */ + if ((e->policy & engine_policy_feedback) && (type & proxy_cell_type_hydro)) + engine_addtasks_recv_stars(e, ci, NULL, NULL); + /* Add the recv tasks for the cells in the proxy that have a gravity * connection. */ if ((e->policy & engine_policy_self_gravity) && @@ -2381,9 +2606,6 @@ void engine_maketasks(struct engine *e) { tic2 = getticks(); #ifdef WITH_MPI - if (e->policy & engine_policy_feedback) - error("Cannot run stellar feedback with MPI (yet)."); - /* Add the communication tasks if MPI is being used. */ if (e->policy & engine_policy_mpi) { diff --git a/src/engine_marktasks.c b/src/engine_marktasks.c index 3a26dbb2f4..e45e3c11f0 100644 --- a/src/engine_marktasks.c +++ b/src/engine_marktasks.c @@ -53,6 +53,101 @@ #include "proxy.h" #include "timers.h" +#ifdef WITH_MPI +/** + * @brief Activate the MPI for the stars + */ +void engine_activate_stars_mpi(struct engine *e, struct scheduler *s, + struct cell *ci, struct cell *cj) { + + const int nodeID = e->nodeID; + const int ci_nodeID = ci->nodeID; + const int cj_nodeID = cj->nodeID; + const int ci_active_stars = cell_is_active_stars(ci, e) && + ci->stars.count != 0 && cj->hydro.count != 0; + const int cj_active_stars = cell_is_active_stars(cj, e) && + cj->stars.count != 0 && ci->hydro.count != 0; + + /* Activate the send/recv tasks. */ + if (ci_nodeID != nodeID) { + + // TODO Alexei: here I think you will just need to uncomment the code + // and modify it from hydro to stars (this is almost just a copy from the + // hydro) + /* If the local cell is active, receive data from the foreign cell. */ + if (cj_active_stars) { + scheduler_activate(s, ci->mpi.hydro.recv_xv); + /* if (ci_active_hydro) { */ + /* scheduler_activate(s, ci->mpi.hydro.recv_rho); */ + /* } */ + } + + /* If the foreign cell is active, we want its ti_end values. */ + /* if (ci_active_stars) scheduler_activate(s, ci->mpi.recv_ti); */ + + /* Is the foreign cell active and will need stuff from us? */ + if (ci_active_stars) { + + struct link *l = + scheduler_activate_send(s, cj->mpi.hydro.send_xv, ci_nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_part(l->t->ci, s); + + /* If the local cell is also active, more stuff will be needed. */ + /* if (cj_active_hydro) { */ + /* scheduler_activate_send(s, cj->mpi.hydro.send_rho, ci_nodeID); + */ + + /* } */ + } + + /* If the local cell is active, send its ti_end values. */ + /* if (cj_active_hydro) */ + /* scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID); */ + + } else if (cj_nodeID != nodeID) { + /* If the local cell is active, receive data from the foreign cell. */ + if (ci_active_stars) { + + scheduler_activate(s, cj->mpi.hydro.recv_xv); + /* if (cj_active_hydro) { */ + /* scheduler_activate(s, cj->mpi.hydro.recv_rho); */ + /* } */ + } + + /* If the foreign cell is active, we want its ti_end values. */ + /* if (cj_active_hydro) scheduler_activate(s, cj->mpi.recv_ti); */ + + /* Is the foreign cell active and will need stuff from us? */ + if (cj_active_stars) { + + struct link *l = + scheduler_activate_send(s, ci->mpi.hydro.send_xv, cj_nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_part(l->t->ci, s); + + /* If the local cell is also active, more stuff will be needed. */ + /* if (ci_active_hydro) { */ + + /* scheduler_activate_send(s, ci->mpi.hydro.send_rho, cj_nodeID); + */ + + /* } */ + } + + /* If the local cell is active, send its ti_end values. */ + /* if (ci_active_hydro) */ + /* scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID); */ + } +} +#endif + /** * @brief Mark tasks to be un-skipped and set the sort flags accordingly. * Threadpool mapper function. @@ -273,39 +368,43 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (t_type == task_type_pair) { /* Do ci */ - /* Store some values. */ - atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); - atomic_or(&ci->stars.requires_sorts, 1 << t->flags); + if (ci_active_stars && ci->nodeID == engine_rank) { + /* Store some values. */ + atomic_or(&cj->hydro.requires_sorts, 1 << t->flags); + atomic_or(&ci->stars.requires_sorts, 1 << t->flags); - cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; - ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + ci->stars.dx_max_sort_old = ci->stars.dx_max_sort; - /* Activate the hydro drift tasks. */ - if (ci_nodeID == nodeID) cell_activate_drift_spart(ci, s); + /* Activate the hydro drift tasks. */ + if (ci_nodeID == nodeID) cell_activate_drift_spart(ci, s); - if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); + if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); - /* Check the sorts and activate them if needed. */ - cell_activate_hydro_sorts(cj, t->flags, s); + /* Check the sorts and activate them if needed. */ + cell_activate_hydro_sorts(cj, t->flags, s); - cell_activate_stars_sorts(ci, t->flags, s); + cell_activate_stars_sorts(ci, t->flags, s); + } /* Do cj */ - /* Store some values. */ - atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); - atomic_or(&cj->stars.requires_sorts, 1 << t->flags); + if (ci_active_stars && ci->nodeID == engine_rank) { + /* Store some values. */ + atomic_or(&ci->hydro.requires_sorts, 1 << t->flags); + atomic_or(&cj->stars.requires_sorts, 1 << t->flags); - ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; - cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + cj->stars.dx_max_sort_old = cj->stars.dx_max_sort; - /* Activate the hydro drift tasks. */ - if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); + /* Activate the hydro drift tasks. */ + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); - if (cj_nodeID == nodeID) cell_activate_drift_spart(cj, s); + if (cj_nodeID == nodeID) cell_activate_drift_spart(cj, s); - /* Check the sorts and activate them if needed. */ - cell_activate_hydro_sorts(ci, t->flags, s); - cell_activate_stars_sorts(cj, t->flags, s); + /* Check the sorts and activate them if needed. */ + cell_activate_hydro_sorts(ci, t->flags, s); + cell_activate_stars_sorts(cj, t->flags, s); + } } /* Store current values of dx_max and h_max. */ @@ -368,7 +467,6 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Is the foreign cell active and will need stuff from us? */ if (ci_active_hydro) { - struct link *l = scheduler_activate_send(s, cj->mpi.hydro.send_xv, ci_nodeID); @@ -396,6 +494,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* If the local cell is active, receive data from the foreign cell. */ if (ci_active_hydro) { + scheduler_activate(s, cj->mpi.hydro.recv_xv); if (cj_active_hydro) { scheduler_activate(s, cj->mpi.hydro.recv_rho); @@ -443,8 +542,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Too much particle movement? */ if (cell_need_rebuild_for_stars_pair(ci, cj)) *rebuild_space = 1; + if (cell_need_rebuild_for_hydro_pair(ci, cj)) *rebuild_space = 1; - // LOIC: Need implementing MPI case +#ifdef WITH_MPI + engine_activate_stars_mpi(e, s, ci, cj); +#endif } /* Only interested in gravity tasks as of here. */ diff --git a/src/runner.c b/src/runner.c index df8a52e2ca..e25a62914f 100644 --- a/src/runner.c +++ b/src/runner.c @@ -109,7 +109,9 @@ /* Import the stars density loop functions. */ #define FUNCTION density +#define ONLY_LOCAL 1 #include "runner_doiact_stars.h" +#undef ONLY_LOCAL #undef FUNCTION /* Import the stars feedback loop functions. */ @@ -204,6 +206,14 @@ void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { sp->density.wcount_dh * h_old_dim + hydro_dimension * sp->density.wcount * h_old_dim_minus_one; + /* Skip if h is already h_max and we don't have enough neighbours */ + if ((sp->h >= stars_h_max) && (f < 0.f)) { + + /* Ok, we are done with this particle */ + continue; + } + + /* Normal case: Use Newton-Raphson to get a better value of h */ /* Avoid floating point exception from f_prime = 0 */ h_new = h_old - f / (f_prime + FLT_MIN); #ifdef SWIFT_DEBUG_CHECKS @@ -915,8 +925,9 @@ void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, if (flags == 0 && !c->stars.do_sub_sort) return; /* Check that the particles have been moved to the current time */ - if (flags && !cell_are_spart_drifted(c, r->e)) + if (flags && !cell_are_spart_drifted(c, r->e)) { error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); + } #ifdef SWIFT_DEBUG_CHECKS /* Make sure the sort flags are consistent (downward). */ @@ -2861,9 +2872,11 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { * * @param r The runner thread. * @param c The cell. + * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? * @param timer Are we timing this ? */ -void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { +void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts, + int timer) { #ifdef WITH_MPI @@ -2871,19 +2884,22 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { const size_t nr_sparts = c->stars.count; const integertime_t ti_current = r->e->ti_current; - error("Need to add h_max computation"); - TIMER_TIC; integertime_t ti_gravity_end_min = max_nr_timesteps; integertime_t ti_gravity_end_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps; timebin_t time_bin_min = num_time_bins; timebin_t time_bin_max = 0; + float h_max = 0.f; #ifdef SWIFT_DEBUG_CHECKS if (c->nodeID == engine_rank) error("Updating a local cell!"); #endif + /* Clear this cell's sorted mask. */ + if (clear_sorts) c->stars.sorted = 0; + /* If this cell is a leaf, collect the particle data. */ if (!c->split) { @@ -2892,22 +2908,27 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { if (sparts[k].time_bin == time_bin_inhibited) continue; time_bin_min = min(time_bin_min, sparts[k].time_bin); time_bin_max = max(time_bin_max, sparts[k].time_bin); + h_max = max(h_max, sparts[k].h); } /* Convert into a time */ ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min); ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max); + ti_stars_end_min = get_integer_time_end(ti_current, time_bin_min); } /* Otherwise, recurse and collect. */ else { for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { - runner_do_recv_spart(r, c->progeny[k], 0); + runner_do_recv_spart(r, c->progeny[k], clear_sorts, 0); ti_gravity_end_min = min(ti_gravity_end_min, c->progeny[k]->grav.ti_end_min); ti_gravity_end_max = max(ti_gravity_end_max, c->progeny[k]->grav.ti_end_max); + ti_stars_end_min = + min(ti_stars_end_min, c->progeny[k]->stars.ti_end_min); + h_max = max(h_max, c->progeny[k]->stars.h_max); } } } @@ -2918,12 +2939,19 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { "Received a cell at an incorrect time c->ti_end_min=%lld, " "e->ti_current=%lld.", ti_gravity_end_min, ti_current); + + if (ti_stars_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_stars_end_min, ti_current); #endif /* ... and store. */ // c->grav.ti_end_min = ti_gravity_end_min; // c->grav.ti_end_max = ti_gravity_end_max; c->grav.ti_old_part = ti_current; + c->stars.h_max = h_max; if (timer) TIMER_TOC(timer_dorecv_spart); @@ -3152,7 +3180,7 @@ void *runner_main(void *data) { } else if (t->subtype == task_subtype_gpart) { runner_do_recv_gpart(r, ci, 1); } else if (t->subtype == task_subtype_spart) { - runner_do_recv_spart(r, ci, 1); + runner_do_recv_spart(r, ci, 0, 1); } else if (t->subtype == task_subtype_multipole) { cell_unpack_multipoles(ci, (struct gravity_tensors *)t->buff); free(t->buff); diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h index e816d80399..40bea6cfc0 100644 --- a/src/runner_doiact_stars.h +++ b/src/runner_doiact_stars.h @@ -74,6 +74,11 @@ * @param timer 1 if the time is to be recorded. */ void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != engine_rank) error("Should be run on a different node"); +#endif + const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -138,6 +143,12 @@ void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { void DO_NONSYM_PAIR1_STARS(struct runner *r, struct cell *restrict ci, struct cell *restrict cj) { +#ifdef SWIFT_DEBUG_CHECKS +#ifdef ONLY_LOCAL + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#endif +#endif + const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -202,9 +213,16 @@ void DO_NONSYM_PAIR1_STARS(struct runner *r, struct cell *restrict ci, void DOPAIR1_STARS(struct runner *r, struct cell *restrict ci, struct cell *restrict cj, int timer) { - if (ci->stars.count != 0 && cj->hydro.count != 0) +#ifdef ONLY_LOCAL + const int ci_local = ci->nodeID == engine_rank; + const int cj_local = cj->nodeID == engine_rank; +#else + const int ci_local = 1; + const int cj_local = 1; +#endif + if (ci_local && ci->stars.count != 0 && cj->hydro.count != 0) DO_NONSYM_PAIR1_STARS(r, ci, cj); - if (cj->stars.count != 0 && ci->hydro.count != 0) + if (cj_local && cj->stars.count != 0 && ci->hydro.count != 0) DO_NONSYM_PAIR1_STARS(r, cj, ci); } @@ -227,6 +245,9 @@ void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, int scount, struct cell *restrict cj, const double *shift) { +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#endif const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -293,6 +314,10 @@ void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, struct spart *restrict sparts, int *restrict ind, int scount) { +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#endif + const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -1028,8 +1053,17 @@ void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; const int ci_active = cell_is_active_stars(ci, e); const int cj_active = cell_is_active_stars(cj, e); - const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 && ci_active); - const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 && cj_active); +#ifdef ONLY_LOCAL + const int ci_local = ci->nodeID == engine_rank; + const int cj_local = cj->nodeID == engine_rank; +#else + const int ci_local = 1; + const int cj_local = 1; +#endif + const int do_ci = + (ci->stars.count != 0 && cj->hydro.count != 0 && ci_active && ci_local); + const int do_cj = + (cj->stars.count != 0 && ci->hydro.count != 0 && cj_active && cj_local); /* Anything to do here? */ if (!do_ci && !do_cj) return; @@ -1314,10 +1348,17 @@ void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, /* Otherwise, compute the pair directly. */ else { +#ifdef ONLY_LOCAL + const int ci_local = ci->nodeID == engine_rank; + const int cj_local = cj->nodeID == engine_rank; +#else + const int ci_local = 1; + const int cj_local = 1; +#endif const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && - cell_is_active_stars(ci, e); + cell_is_active_stars(ci, e) && ci_local; const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && - cell_is_active_stars(cj, e); + cell_is_active_stars(cj, e) && cj_local; if (do_ci) { diff --git a/src/scheduler.c b/src/scheduler.c index 1083be1996..1c82e9dd2e 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -1030,8 +1030,17 @@ static void scheduler_splittask_stars(struct task *t, struct scheduler *s) { redo = 0; /* Empty task? */ + /* Need defines in order to evaluate after check for t->ci == NULL */ +#define pair_no_part \ + t->type == task_type_pair && \ + (t->ci->stars.count == 0 || t->cj->hydro.count == 0) && \ + (t->cj->stars.count == 0 || t->ci->hydro.count == 0) +#define self_no_part \ + t->type == task_type_self && \ + (t->ci->stars.count == 0 || t->ci->hydro.count == 0) + if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL) || - t->ci->stars.count == 0 || (t->cj != NULL && t->cj->stars.count == 0)) { + (self_no_part) || (pair_no_part)) { t->type = task_type_none; t->subtype = task_subtype_none; t->cj = NULL; @@ -1079,9 +1088,11 @@ static void scheduler_splittask_stars(struct task *t, struct scheduler *s) { /* Make a task for each pair of progeny */ for (int j = 0; j < 8; j++) - if (ci->progeny[j] != NULL && ci->progeny[j]->stars.count) + if (ci->progeny[j] != NULL && + (ci->progeny[j]->stars.count || ci->progeny[j]->hydro.count)) for (int k = j + 1; k < 8; k++) - if (ci->progeny[k] != NULL && ci->progeny[k]->stars.count) + if (ci->progeny[k] != NULL && (ci->progeny[k]->stars.count || + ci->progeny[k]->hydro.count)) scheduler_splittask_stars( scheduler_addtask(s, task_type_pair, t->subtype, sub_sid_flag[j][k], 0, ci->progeny[j], @@ -1110,14 +1121,25 @@ static void scheduler_splittask_stars(struct task *t, struct scheduler *s) { double shift[3]; const int sid = space_getsid(s->space, &ci, &cj, shift); +#ifdef SWIFT_DEBUG_CHECKS + if (sid != t->flags) + error("Got pair task with incorrect flags: sid=%d flags=%lld", sid, + t->flags); +#endif + + /* Compute number of interactions */ + const int ci_interaction = (ci->stars.count * cj->hydro.count); + const int cj_interaction = (cj->stars.count * ci->hydro.count); + + const int number_interactions = (ci_interaction + cj_interaction); + /* Should this task be split-up? */ if (cell_can_split_pair_stars_task(ci) && cell_can_split_pair_stars_task(cj)) { /* Replace by a single sub-task? */ - if (scheduler_dosub && /* Use division to avoid integer overflow. */ - ci->stars.count * sid_scale[sid] < - space_subsize_pair_stars / cj->stars.count && + if (scheduler_dosub && + number_interactions * sid_scale[sid] < space_subsize_pair_stars && !sort_is_corner(sid)) { /* Make this task a sub task. */ @@ -1466,15 +1488,17 @@ static void scheduler_splittask_stars(struct task *t, struct scheduler *s) { /* Otherwise, break it up if it is too large? */ } else if (scheduler_doforcesplit && ci->split && cj->split && - (ci->stars.count > space_maxsize / cj->stars.count)) { + (number_interactions > space_maxsize)) { /* Replace the current task. */ t->type = task_type_none; for (int j = 0; j < 8; j++) - if (ci->progeny[j] != NULL && ci->progeny[j]->stars.count) + if (ci->progeny[j] != NULL && + (ci->progeny[j]->stars.count || ci->progeny[j]->hydro.count)) for (int k = 0; k < 8; k++) - if (cj->progeny[k] != NULL && cj->progeny[k]->stars.count) { + if (cj->progeny[k] != NULL && (cj->progeny[k]->stars.count || + cj->progeny[k]->hydro.count)) { struct task *tl = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[j], cj->progeny[k]); diff --git a/src/stars/Default/stars.h b/src/stars/Default/stars.h index ab4c6c7013..688a52b7a7 100644 --- a/src/stars/Default/stars.h +++ b/src/stars/Default/stars.h @@ -56,8 +56,6 @@ __attribute__((always_inline)) INLINE static void stars_init_spart( struct spart* sp) { #ifdef DEBUG_INTERACTIONS_STARS - for (int i = 0; i < MAX_NUM_OF_NEIGHBOURS_STARS; ++i) - sp->ids_ngbs_density[i] = -1; sp->num_ngb_density = 0; #endif diff --git a/src/stars/Default/stars_iact.h b/src/stars/Default/stars_iact.h index 9e27f86028..c543054f5f 100644 --- a/src/stars/Default/stars_iact.h +++ b/src/stars/Default/stars_iact.h @@ -33,8 +33,6 @@ runner_iact_nonsym_stars_density(float r2, const float *dx, float hi, float hj, #ifdef DEBUG_INTERACTIONS_STARS /* Update ngb counters */ - if (si->num_ngb_density < MAX_NUM_OF_NEIGHBOURS_STARS) - si->ids_ngbs_density[si->num_ngb_density] = pj->id; ++si->num_ngb_density; #endif } diff --git a/src/stars/Default/stars_io.h b/src/stars/Default/stars_io.h index a6c2768f71..d5d692e403 100644 --- a/src/stars/Default/stars_io.h +++ b/src/stars/Default/stars_io.h @@ -60,10 +60,14 @@ INLINE static void stars_write_particles(const struct spart *sparts, struct io_props *list, int *num_fields) { - /* Say how much we want to read */ +/* Say how much we want to write */ +#ifdef DEBUG_INTERACTIONS_STARS + *num_fields = 6; +#else *num_fields = 5; +#endif - /* List what we want to read */ + /* List what we want to write */ list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, sparts, x); list[1] = @@ -74,6 +78,11 @@ INLINE static void stars_write_particles(const struct spart *sparts, sparts, id); list[4] = io_make_output_field("SmoothingLength", FLOAT, 1, UNIT_CONV_LENGTH, sparts, h); + +#ifdef DEBUG_INTERACTIONS_STARS + list[5] = io_make_output_field("NumberNeighbors", INT, 1, UNIT_CONV_NO_UNITS, + sparts, num_ngb_density); +#endif } /** diff --git a/src/stars/Default/stars_part.h b/src/stars/Default/stars_part.h index eb253fd3c9..53b92da1f4 100644 --- a/src/stars/Default/stars_part.h +++ b/src/stars/Default/stars_part.h @@ -86,9 +86,6 @@ struct spart { #endif #ifdef DEBUG_INTERACTIONS_STARS - /*! List of interacting particles in the density SELF and PAIR */ - long long ids_ngbs_density[MAX_NUM_OF_NEIGHBOURS_STARS]; - /*! Number of interactions in the density SELF and PAIR */ int num_ngb_density; #endif -- GitLab