From d370894c72c1a688422673664a3d0def46fc04fe Mon Sep 17 00:00:00 2001 From: "Peter W. Draper" <p.w.draper@durham.ac.uk> Date: Thu, 29 Sep 2016 18:09:34 +0100 Subject: [PATCH] Need to un-skip non-local tasks when using MPI, so avoid doing this job at the same time as drifts All active cells must be drifted so do not skip them if rebuilding. If rebuilding no need to unskip tasks, that will be after the rebuild Make sure we visit all related sends of the tasks, not just of the cell --- src/cell.c | 61 ++++++++++++++++++++++++++++++++++++++-------------- src/runner.c | 18 ++++++---------- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/src/cell.c b/src/cell.c index d246a0b0ce..cb78ee51c4 100644 --- a/src/cell.c +++ b/src/cell.c @@ -931,17 +931,53 @@ int cell_unskip_tasks(struct cell *c) { return 1; #ifdef WITH_MPI - /* Activate the recv tasks. */ + /* Activate the send/recv flags. */ if (ci->nodeID != engine_rank) { - if (ci->recv_xv != NULL) ci->recv_xv->skip = 0; - if (ci->recv_rho != NULL) ci->recv_rho->skip = 0; - if (ci->recv_gradient != NULL) ci->recv_gradient->skip = 0; - if (ci->recv_ti != NULL) ci->recv_ti->skip = 0; + + /* Activate the tasks to recv foreign cell ci's data. */ + ci->recv_xv->skip = 0; + ci->recv_rho->skip = 0; + ci->recv_ti->skip = 0; + + /* Look for the local cell cj's send tasks. */ + struct link *l = NULL; + for (l = cj->send_xv; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next); + if (l == NULL) error("Missing link to send_xv task."); + l->t->skip = 0; + + for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next); + if (l == NULL) error("Missing link to send_rho task."); + l->t->skip = 0; + + for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next); + if (l == NULL) error("Missing link to send_ti task."); + l->t->skip = 0; + } else if (cj->nodeID != engine_rank) { - if (cj->recv_xv != NULL) cj->recv_xv->skip = 0; - if (cj->recv_rho != NULL) cj->recv_rho->skip = 0; - if (cj->recv_gradient != NULL) cj->recv_gradient->skip = 0; - if (cj->recv_ti != NULL) cj->recv_ti->skip = 0; + + /* Activate the tasks to recv foreign cell cj's data. */ + cj->recv_xv->skip = 0; + cj->recv_rho->skip = 0; + cj->recv_ti->skip = 0; + /* Look for the local cell ci's send tasks. */ + struct link *l = NULL; + for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next); + if (l == NULL) error("Missing link to send_xv task."); + l->t->skip = 0; + + for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next); + if (l == NULL) error("Missing link to send_rho task."); + l->t->skip = 0; + + for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next); + if (l == NULL) error("Missing link to send_ti task."); + l->t->skip = 0; } #endif } @@ -951,13 +987,6 @@ int cell_unskip_tasks(struct cell *c) { for (struct link *l = c->gradient; l != NULL; l = l->next) l->t->skip = 0; for (struct link *l = c->force; l != NULL; l = l->next) l->t->skip = 0; for (struct link *l = c->grav; l != NULL; l = l->next) l->t->skip = 0; -#ifdef WITH_MPI - for (struct link *l = c->send_xv; l != NULL; l = l->next) l->t->skip = 0; - for (struct link *l = c->send_rho; l != NULL; l = l->next) l->t->skip = 0; - for (struct link *l = c->send_gradient; l != NULL; l = l->next) - l->t->skip = 0; - for (struct link *l = c->send_ti; l != NULL; l = l->next) l->t->skip = 0; -#endif if (c->extra_ghost != NULL) c->extra_ghost->skip = 0; if (c->ghost != NULL) c->ghost->skip = 0; if (c->init != NULL) c->init->skip = 0; diff --git a/src/runner.c b/src/runner.c index 410458fadb..d303417b31 100644 --- a/src/runner.c +++ b/src/runner.c @@ -765,9 +765,6 @@ static void runner_do_drift(struct cell *c, struct engine *e) { c->updated = 0; c->g_updated = 0; - /* Should we abort as a rebuild has been triggered ? */ - if (e->forcerebuild) return; - /* Do we need to drift ? */ if (!e->drift_all && !cell_is_drift_needed(c, ti_current)) return; @@ -909,14 +906,6 @@ static void runner_do_drift(struct cell *c, struct engine *e) { /* Update the time of the last drift */ c->ti_old = ti_current; - - /* Now let's un-skip the tasks associated with this active cell */ - if (c->ti_end_min == ti_current) { - - const int forcerebuild = cell_unskip_tasks(c); - - if (forcerebuild) atomic_inc(&e->forcerebuild); - } } /** @@ -938,6 +927,13 @@ void runner_do_drift_mapper(void *map_data, int num_elements, /* Only drift local particles. */ if (c != NULL && c->nodeID == e->nodeID) runner_do_drift(c, e); + + /* Now let's un-skip the tasks associated with this cell if active + * and we're not rebuilding which will repeat this work. */ + if (!e->forcerebuild && c->ti_end_min == e->ti_current) { + const int forcerebuild = cell_unskip_tasks(c); + if (forcerebuild) atomic_inc(&e->forcerebuild); + } } } -- GitLab