diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml index 8ebe29fb0216e16aeaafcdc086085d8c9879fc5f..77d3f358b9ba66ab9d40fe664d85ee2d511ab5de 100644 --- a/examples/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_12/eagle_12.yml @@ -22,9 +22,6 @@ TimeIntegration: dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). -Scheduler: - max_top_level_cells: 8 - # Parameters governing the snapshots Snapshots: basename: eagle # Common part of the name of output files diff --git a/examples/main.c b/examples/main.c index cf90adc8c8733445d3ee36f8a764b7bcdfe53e5a..38847543bd25d6ce877b713b7e6e9ff614eb16bb 100644 --- a/examples/main.c +++ b/examples/main.c @@ -486,6 +486,17 @@ int main(int argc, char *argv[]) { MPI_Bcast(params, sizeof(struct swift_params), MPI_BYTE, 0, MPI_COMM_WORLD); #endif + /* Temporary early aborts for modes not supported over MPI. */ +#ifdef WITH_MPI + if (with_mpole_reconstruction && nr_nodes > 1) + error("Cannot reconstruct m-poles every step over MPI (yet)."); +#endif + +#if defined(WITH_MPI) && defined(HAVE_VELOCIRAPTOR) + if (with_structure_finding && nr_nodes > 1) + error("VEOCIraptor not yet enabled over MPI."); +#endif + /* Check that we can write the snapshots by testing if the output * directory exists and is searchable and writable. */ char basename[PARSER_MAX_LINE_SIZE]; @@ -729,15 +740,6 @@ int main(int argc, char *argv[]) { fflush(stdout); } -#ifdef WITH_MPI - if (periodic && with_self_gravity) - error("Periodic self-gravity over MPI temporarily disabled."); -#endif - -#if defined(WITH_MPI) && defined(HAVE_VELOCIRAPTOR) - if (with_structure_finding) error("VEOCIraptor not yet enabled over MPI."); -#endif - #ifdef SWIFT_DEBUG_CHECKS /* Check once and for all that we don't have unwanted links */ if (!with_stars && !dry_run) { @@ -769,8 +771,7 @@ int main(int argc, char *argv[]) { if (myrank == 0) message( "Read %lld gas particles, %lld star particles and %lld gparts from " - "the " - "ICs.", + "the ICs.", N_total[0], N_total[2], N_total[1]); /* Verify that the fields to dump actually exist */ @@ -905,11 +906,10 @@ int main(int argc, char *argv[]) { "particles (%lld gravity particles)", N_total[0], N_total[2], N_total[1] > 0 ? N_DM : 0, N_total[1]); message( - "from t=%.3e until t=%.3e with %d threads and %d queues " - "(dt_min=%.3e, " - "dt_max=%.3e)...", - e.time_begin, e.time_end, e.nr_threads, e.sched.nr_queues, e.dt_min, - e.dt_max); + "from t=%.3e until t=%.3e with %d ranks, %d threads / rank and %d " + "task queues / rank (dt_min=%.3e, dt_max=%.3e)...", + e.time_begin, e.time_end, nr_nodes, e.nr_threads, e.sched.nr_queues, + e.dt_min, e.dt_max); fflush(stdout); } } diff --git a/src/active.h b/src/active.h index 3fe52a86b373ff0b33b88eca0dac9b7c6b58a216..949babf1b1f8d27c89d0a5d9c3a93f1825512fd3 100644 --- a/src/active.h +++ b/src/active.h @@ -140,6 +140,19 @@ __attribute__((always_inline)) INLINE static int cell_is_active_gravity( return (c->ti_gravity_end_min == e->ti_current); } +/** + * @brief Does a cell contain any multipole requiring calculation ? + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if the #cell contains at least an active particle, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_is_active_gravity_mm( + const struct cell *c, const struct engine *e) { + + return (c->ti_gravity_end_min == e->ti_current); +} + /** * @brief Are *all* g-particles in a cell finishing their time-step now ? * diff --git a/src/cell.c b/src/cell.c index f7a374e293e13e108b14c733ec066028f8a33c7b..6ad7e4d97e754e98a4ab0eeb76a188ed6bea7c73 100644 --- a/src/cell.c +++ b/src/cell.c @@ -167,10 +167,13 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) { * @param c The #cell. * @param pc Pointer to an array of packed cells in which the * cells will be packed. + * @param with_gravity Are we running with gravity and hence need + * to exchange multipoles? * * @return The number of packed cells. */ -int cell_pack(struct cell *restrict c, struct pcell *restrict pc) { +int cell_pack(struct cell *restrict c, struct pcell *restrict pc, + const int with_gravity) { #ifdef WITH_MPI @@ -187,6 +190,21 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) { pc->gcount = c->gcount; pc->scount = c->scount; + /* Copy the Multipole related information */ + if (with_gravity) { + const struct gravity_tensors *mp = c->multipole; + + pc->m_pole = mp->m_pole; + pc->CoM[0] = mp->CoM[0]; + pc->CoM[1] = mp->CoM[1]; + pc->CoM[2] = mp->CoM[2]; + pc->CoM_rebuild[0] = mp->CoM_rebuild[0]; + pc->CoM_rebuild[1] = mp->CoM_rebuild[1]; + pc->CoM_rebuild[2] = mp->CoM_rebuild[2]; + pc->r_max = mp->r_max; + pc->r_max_rebuild = mp->r_max_rebuild; + } + #ifdef SWIFT_DEBUG_CHECKS pc->cellID = c->cellID; #endif @@ -196,7 +214,7 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) { for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { pc->progeny[k] = count; - count += cell_pack(c->progeny[k], &pc[count]); + count += cell_pack(c->progeny[k], &pc[count], with_gravity); } else { pc->progeny[k] = -1; } @@ -251,11 +269,13 @@ int cell_pack_tags(const struct cell *c, int *tags) { * @param pc An array of packed #pcell. * @param c The #cell in which to unpack the #pcell. * @param s The #space in which the cells are created. + * @param with_gravity Are we running with gravity and hence need + * to exchange multipoles? * * @return The number of cells created. */ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, - struct space *restrict s) { + struct space *restrict s, const int with_gravity) { #ifdef WITH_MPI @@ -275,6 +295,22 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, c->cellID = pc->cellID; #endif + /* Copy the Multipole related information */ + if (with_gravity) { + + struct gravity_tensors *mp = c->multipole; + + mp->m_pole = pc->m_pole; + mp->CoM[0] = pc->CoM[0]; + mp->CoM[1] = pc->CoM[1]; + mp->CoM[2] = pc->CoM[2]; + mp->CoM_rebuild[0] = pc->CoM_rebuild[0]; + mp->CoM_rebuild[1] = pc->CoM_rebuild[1]; + mp->CoM_rebuild[2] = pc->CoM_rebuild[2]; + mp->r_max = pc->r_max; + mp->r_max_rebuild = pc->r_max_rebuild; + } + /* Number of new cells created. */ int count = 1; @@ -304,7 +340,7 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, temp->parent = c; c->progeny[k] = temp; c->split = 1; - count += cell_unpack(&pc[pc->progeny[k]], temp, s); + count += cell_unpack(&pc[pc->progeny[k]], temp, s, with_gravity); } /* Return the total number of unpacked cells. */ @@ -1167,6 +1203,7 @@ void cell_clean_links(struct cell *c, void *data) { c->gradient = NULL; c->force = NULL; c->grav = NULL; + c->grav_mm = NULL; } /** @@ -1948,7 +1985,7 @@ void cell_activate_grav_mm_task(struct cell *ci, struct cell *cj, const struct engine *e = s->space->e; /* Anything to do here? */ - if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e)) + if (!cell_is_active_gravity_mm(ci, e) && !cell_is_active_gravity_mm(cj, e)) error("Inactive MM task being activated"); /* Atomically drift the multipole in ci */ @@ -2144,15 +2181,22 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { struct cell *cj = t->cj; const int ci_active = cell_is_active_hydro(ci, e); const int cj_active = (cj != NULL) ? cell_is_active_hydro(cj, e) : 0; +#ifdef WITH_MPI + const int ci_nodeID = ci->nodeID; + const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; +#else + const int ci_nodeID = nodeID; + const int cj_nodeID = nodeID; +#endif /* Only activate tasks that involve a local active cell. */ - if ((ci_active && ci->nodeID == nodeID) || - (cj_active && cj->nodeID == nodeID)) { + if ((ci_active && ci_nodeID == nodeID) || + (cj_active && cj_nodeID == nodeID)) { scheduler_activate(s, t); /* Activate hydro drift */ if (t->type == task_type_self) { - if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); } /* Set the correct sorting flags and activate hydro drifts */ @@ -2164,8 +2208,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { cj->dx_max_sort_old = cj->dx_max_sort; /* Activate the drift tasks. */ - if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); - if (cj->nodeID == nodeID) cell_activate_drift_part(cj, s); + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); + if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); /* Check the sorts and activate them if needed. */ cell_activate_sorts(ci, t->flags, s); @@ -2186,7 +2230,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { #ifdef WITH_MPI /* Activate the send/recv tasks. */ - if (ci->nodeID != nodeID) { + if (ci_nodeID != nodeID) { /* If the local cell is active, receive data from the foreign cell. */ if (cj_active) { @@ -2206,7 +2250,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* Is the foreign cell active and will need stuff from us? */ if (ci_active) { - scheduler_activate_send(s, cj->send_xv, ci->nodeID); + scheduler_activate_send(s, cj->send_xv, ci_nodeID); /* Drift the cell which will be sent; note that not all sent particles will be drifted, only those that are needed. */ @@ -2214,18 +2258,18 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* If the local cell is also active, more stuff will be needed. */ if (cj_active) { - scheduler_activate_send(s, cj->send_rho, ci->nodeID); + scheduler_activate_send(s, cj->send_rho, ci_nodeID); #ifdef EXTRA_HYDRO_LOOP - scheduler_activate_send(s, cj->send_gradient, ci->nodeID); + scheduler_activate_send(s, cj->send_gradient, ci_nodeID); #endif } } /* If the local cell is active, send its ti_end values. */ - if (cj_active) scheduler_activate_send(s, cj->send_ti, ci->nodeID); + if (cj_active) scheduler_activate_send(s, cj->send_ti, ci_nodeID); - } else if (cj->nodeID != nodeID) { + } else if (cj_nodeID != nodeID) { /* If the local cell is active, receive data from the foreign cell. */ if (ci_active) { @@ -2245,7 +2289,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* Is the foreign cell active and will need stuff from us? */ if (cj_active) { - scheduler_activate_send(s, ci->send_xv, cj->nodeID); + scheduler_activate_send(s, ci->send_xv, cj_nodeID); /* Drift the cell which will be sent; note that not all sent particles will be drifted, only those that are needed. */ @@ -2254,16 +2298,16 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* If the local cell is also active, more stuff will be needed. */ if (ci_active) { - scheduler_activate_send(s, ci->send_rho, cj->nodeID); + scheduler_activate_send(s, ci->send_rho, cj_nodeID); #ifdef EXTRA_HYDRO_LOOP - scheduler_activate_send(s, ci->send_gradient, cj->nodeID); + scheduler_activate_send(s, ci->send_gradient, cj_nodeID); #endif } } /* If the local cell is active, send its ti_end values. */ - if (ci_active) scheduler_activate_send(s, ci->send_ti, cj->nodeID); + if (ci_active) scheduler_activate_send(s, ci->send_ti, cj_nodeID); } #endif } @@ -2312,10 +2356,15 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { struct task *t = l->t; struct cell *ci = t->ci; struct cell *cj = t->cj; - const int ci_nodeID = ci->nodeID; - const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; const int ci_active = cell_is_active_gravity(ci, e); const int cj_active = (cj != NULL) ? cell_is_active_gravity(cj, e) : 0; +#ifdef WITH_MPI + const int ci_nodeID = ci->nodeID; + const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; +#else + const int ci_nodeID = nodeID; + const int cj_nodeID = nodeID; +#endif /* Only activate tasks that involve a local active cell. */ if ((ci_active && ci_nodeID == nodeID) || @@ -2331,7 +2380,9 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { } else if (t->type == task_type_pair) { cell_activate_subcell_grav_tasks(ci, cj, s); } else if (t->type == task_type_grav_mm) { - cell_activate_grav_mm_task(ci, cj, s); +#ifdef SWIFT_DEBUG_CHECKS + error("Incorrectly linked M-M task!"); +#endif } } @@ -2342,9 +2393,7 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { if (ci_nodeID != nodeID) { /* If the local cell is active, receive data from the foreign cell. */ - if (cj_active) { - scheduler_activate(s, ci->recv_grav); - } + if (cj_active) scheduler_activate(s, ci->recv_grav); /* If the foreign cell is active, we want its ti_end values. */ if (ci_active) scheduler_activate(s, ci->recv_ti); @@ -2366,9 +2415,7 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { } else if (cj_nodeID != nodeID) { /* If the local cell is active, receive data from the foreign cell. */ - if (ci_active) { - scheduler_activate(s, cj->recv_grav); - } + if (ci_active) scheduler_activate(s, cj->recv_grav); /* If the foreign cell is active, we want its ti_end values. */ if (cj_active) scheduler_activate(s, cj->recv_ti); @@ -2391,8 +2438,36 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { } } + for (struct link *l = c->grav_mm; l != NULL; l = l->next) { + + struct task *t = l->t; + struct cell *ci = t->ci; + struct cell *cj = t->cj; + const int ci_active = cell_is_active_gravity_mm(ci, e); + const int cj_active = (cj != NULL) ? cell_is_active_gravity_mm(cj, e) : 0; +#ifdef WITH_MPI + const int ci_nodeID = ci->nodeID; + const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1; +#else + const int ci_nodeID = nodeID; + const int cj_nodeID = nodeID; +#endif + +#ifdef SWIFT_DEBUG_CHECKS + if (t->type != task_type_grav_mm) error("Incorrectly linked gravity task!"); +#endif + + /* Only activate tasks that involve a local active cell. */ + if ((ci_active && ci_nodeID == nodeID) || + (cj_active && cj_nodeID == nodeID)) { + + scheduler_activate(s, t); + cell_activate_grav_mm_task(ci, cj, s); + } + } + /* Unskip all the other task types. */ - if (c->nodeID == nodeID && cell_is_active_gravity(c, e)) { + if (c->nodeID == nodeID && cell_is_active_gravity_mm(c, e)) { if (c->init_grav != NULL) scheduler_activate(s, c->init_grav); if (c->init_grav_out != NULL) scheduler_activate(s, c->init_grav_out); @@ -2418,7 +2493,7 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { void cell_set_super(struct cell *c, struct cell *super) { /* Are we in a cell with some kind of self/pair task ? */ - if (super == NULL && c->nr_tasks > 0) super = c; + if (super == NULL && (c->nr_tasks > 0 || c->nr_mm_tasks > 0)) super = c; /* Set the super-cell */ c->super = super; @@ -2461,7 +2536,8 @@ void cell_set_super_hydro(struct cell *c, struct cell *super_hydro) { void cell_set_super_gravity(struct cell *c, struct cell *super_gravity) { /* Are we in a cell with some kind of self/pair task ? */ - if (super_gravity == NULL && c->grav != NULL) super_gravity = c; + if (super_gravity == NULL && (c->grav != NULL || c->grav_mm != NULL)) + super_gravity = c; /* Set the super-cell */ c->super_gravity = super_gravity; @@ -2938,3 +3014,63 @@ int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj, return gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2); } + +/** + * @brief Can we use the MM interactions fo a given pair of cells? + * + * @param ci The first #cell. + * @param cj The second #cell. + * @param e The #engine. + * @param s The #space. + */ +int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj, + const struct engine *e, + const struct space *s) { + + const double theta_crit2 = e->gravity_properties->theta_crit2; + const int periodic = s->periodic; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + + /* Recover the multipole information */ + const struct gravity_tensors *const multi_i = ci->multipole; + const struct gravity_tensors *const multi_j = cj->multipole; + +#ifdef SWIFT_DEBUG_CHECKS + + if (multi_i->CoM[0] < ci->loc[0] || + multi_i->CoM[0] > ci->loc[0] + ci->width[0]) + error("Invalid multipole position ci"); + if (multi_i->CoM[1] < ci->loc[1] || + multi_i->CoM[1] > ci->loc[1] + ci->width[1]) + error("Invalid multipole position ci"); + if (multi_i->CoM[2] < ci->loc[2] || + multi_i->CoM[2] > ci->loc[2] + ci->width[2]) + error("Invalid multipole position ci"); + + if (multi_j->CoM[0] < cj->loc[0] || + multi_j->CoM[0] > cj->loc[0] + cj->width[0]) + error("Invalid multipole position cj"); + if (multi_j->CoM[1] < cj->loc[1] || + multi_j->CoM[1] > cj->loc[1] + cj->width[1]) + error("Invalid multipole position cj"); + if (multi_j->CoM[2] < cj->loc[2] || + multi_j->CoM[2] > cj->loc[2] + cj->width[2]) + error("Invalid multipole position cj"); + +#endif + + /* Get the distance between the CoMs */ + double dx = multi_i->CoM[0] - multi_j->CoM[0]; + double dy = multi_i->CoM[1] - multi_j->CoM[1]; + double dz = multi_i->CoM[2] - multi_j->CoM[2]; + + /* Apply BC */ + if (periodic) { + dx = nearest(dx, dim[0]); + dy = nearest(dy, dim[1]); + dz = nearest(dz, dim[2]); + } + const double r2 = dx * dx + dy * dy + dz * dz; + + return gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2); +} diff --git a/src/cell.h b/src/cell.h index 9438804e12d599f7e36bf937b9bdc2958793feac..747b79f05a799b9401c67c23b72b722d385c5ced 100644 --- a/src/cell.h +++ b/src/cell.h @@ -77,6 +77,24 @@ struct link { */ struct pcell { + /*! This cell's gravity-related tensors */ + struct multipole m_pole; + + /*! Centre of mass. */ + double CoM[3]; + + /*! Centre of mass at rebuild time. */ + double CoM_rebuild[3]; + + /*! Upper limit of the CoM<->gpart distance. */ + double r_max; + + /*! Upper limit of the CoM<->gpart distance at last rebuild. */ + double r_max_rebuild; + + /*! Relative indices of the cell's progeny. */ + int progeny[8]; + /*! Maximal smoothing length. */ double h_max; @@ -116,9 +134,6 @@ struct pcell { /*! Number of #spart in this cell. */ int scount; - /*! Relative indices of the cell's progeny. */ - int progeny[8]; - #ifdef SWIFT_DEBUG_CHECKS /* Cell ID (for debugging) */ int cellID; @@ -213,6 +228,9 @@ struct cell { /*! Linked list of the tasks computing this cell's gravity forces. */ struct link *grav; + /*! Linked list of the tasks computing this cell's gravity M-M forces. */ + struct link *grav_mm; + /*! The task computing this cell's sorts. */ struct task *sorts; @@ -422,6 +440,9 @@ struct cell { /*! Number of tasks that are associated with this cell. */ short int nr_tasks; + /*! Number of M-M tasks that are associated with this cell. */ + short int nr_mm_tasks; + /*! The depth of this cell in the tree. */ char depth; @@ -479,8 +500,9 @@ int cell_mlocktree(struct cell *c); void cell_munlocktree(struct cell *c); int cell_slocktree(struct cell *c); void cell_sunlocktree(struct cell *c); -int cell_pack(struct cell *c, struct pcell *pc); -int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); +int cell_pack(struct cell *c, struct pcell *pc, const int with_gravity); +int cell_unpack(struct pcell *pc, struct cell *c, struct space *s, + const int with_gravity); int cell_pack_tags(const struct cell *c, int *tags); int cell_unpack_tags(const int *tags, struct cell *c); int cell_pack_end_step(struct cell *c, struct pcell_step *pcell); @@ -522,6 +544,8 @@ void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data); int cell_has_tasks(struct cell *c); int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj, const struct engine *e, const struct space *s); +int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj, + const struct engine *e, const struct space *s); /* Inlined functions (for speed). */ diff --git a/src/cosmology.c b/src/cosmology.c index ff1116bd65837f6718903f7924c6076b5ff40c26..f209997dd3faae3ba884632ffa41cca5d6ae0710 100644 --- a/src/cosmology.c +++ b/src/cosmology.c @@ -535,7 +535,7 @@ void cosmology_init_no_cosmo(struct cosmology *c) { c->w_0 = 0.; c->w_a = 0.; c->h = 1.; - c->w = 0.; + c->w = -1.; c->a_begin = 1.; c->a_end = 1.; @@ -544,6 +544,7 @@ void cosmology_init_no_cosmo(struct cosmology *c) { c->H = 0.; c->a = 1.; + c->z = 0.; c->a_inv = 1.; c->a2_inv = 1.; c->a3_inv = 1.; diff --git a/src/engine.c b/src/engine.c index 73d74bed758834dadab2ef898f2b9733bc3f9c17..f039d619c769f6132acadc8b456fd839f3bb5ea7 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1670,10 +1670,11 @@ void engine_exchange_cells(struct engine *e) { struct space *s = e->s; const int nr_proxies = e->nr_proxies; + const int with_gravity = e->policy & engine_policy_self_gravity; const ticks tic = getticks(); /* Exchange the cell structure with neighbouring ranks. */ - proxy_cells_exchange(e->proxies, e->nr_proxies, e->s); + proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity); /* Count the number of particles we need to import and re-allocate the buffer if needed. */ @@ -2175,21 +2176,21 @@ void engine_exchange_proxy_multipoles(struct engine *e) { /* And the actual number of things we are going to ship */ for (int k = 0; k < p->nr_cells_in; k++) - count_recv += p->cells_in[k]->pcell_size; + count_recv_cells += p->cells_in[k]->pcell_size; for (int k = 0; k < p->nr_cells_out; k++) - count_send += p->cells_out[k]->pcell_size; + count_send_cells += p->cells_out[k]->pcell_size; } /* Allocate the buffers for the packed data */ struct gravity_tensors *buffer_send = NULL; if (posix_memalign((void **)&buffer_send, SWIFT_CACHE_ALIGNMENT, - count_send * sizeof(struct gravity_tensors)) != 0) + count_send_cells * sizeof(struct gravity_tensors)) != 0) error("Unable to allocate memory for multipole transactions"); struct gravity_tensors *buffer_recv = NULL; if (posix_memalign((void **)&buffer_recv, SWIFT_CACHE_ALIGNMENT, - count_recv * sizeof(struct gravity_tensors)) != 0) + count_recv_cells * sizeof(struct gravity_tensors)) != 0) error("Unable to allocate memory for multipole transactions"); /* Also allocate the MPI requests */ @@ -2448,7 +2449,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, if (periodic && min_radius > max_distance) continue; /* Are the cells too close for a MM interaction ? */ - if (!cell_can_use_pair_mm(ci, cj, e, s)) { + if (!cell_can_use_pair_mm_rebuild(ci, cj, e, s)) { /* Ok, we need to add a direct pair calculation */ scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, @@ -3513,10 +3514,12 @@ void engine_marktasks_mapper(void *map_data, int num_elements, cell_activate_subcell_grav_tasks(t->ci, t->cj, s); } +#ifdef SWIFT_DEBUG_CHECKS else if (t->type == task_type_sub_pair && t->subtype == task_subtype_grav) { error("Invalid task sub-type encountered"); } +#endif } /* Only interested in density tasks as of here. */ @@ -3621,9 +3624,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (ci->nodeID != engine_rank) { /* If the local cell is active, receive data from the foreign cell. */ - if (cj_active_gravity) { - scheduler_activate(s, ci->recv_grav); - } + if (cj_active_gravity) scheduler_activate(s, ci->recv_grav); /* If the foreign cell is active, we want its ti_end values. */ if (ci_active_gravity) scheduler_activate(s, ci->recv_ti); @@ -3647,9 +3648,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } else if (cj->nodeID != engine_rank) { /* If the local cell is active, receive data from the foreign cell. */ - if (ci_active_gravity) { - scheduler_activate(s, cj->recv_grav); - } + if (ci_active_gravity) scheduler_activate(s, cj->recv_grav); /* If the foreign cell is active, we want its ti_end values. */ if (cj_active_gravity) scheduler_activate(s, cj->recv_ti); @@ -3710,8 +3709,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements, const struct cell *cj = t->cj; const int ci_nodeID = ci->nodeID; const int cj_nodeID = cj->nodeID; - const int ci_active_gravity = cell_is_active_gravity(ci, e); - const int cj_active_gravity = cell_is_active_gravity(cj, e); + const int ci_active_gravity = cell_is_active_gravity_mm(ci, e); + const int cj_active_gravity = cell_is_active_gravity_mm(cj, e); if ((ci_active_gravity && ci_nodeID == engine_rank) || (cj_active_gravity && cj_nodeID == engine_rank)) @@ -3950,19 +3949,14 @@ void engine_rebuild(struct engine *e, int clean_smoothing_length_values) { /* If in parallel, exchange the cell structure, top-level and neighbouring * multipoles. */ #ifdef WITH_MPI - engine_exchange_cells(e); - if (e->policy & engine_policy_self_gravity) engine_exchange_top_multipoles(e); + + engine_exchange_cells(e); #endif /* Re-build the tasks. */ engine_maketasks(e); -#ifdef WITH_MPI - if (e->policy & engine_policy_self_gravity) - engine_exchange_proxy_multipoles(e); -#endif - /* Make the list of top-level cells that have tasks */ space_list_cells_with_tasks(e->s); @@ -5289,19 +5283,30 @@ void engine_makeproxies(struct engine *e) { #ifdef WITH_MPI const int nodeID = e->nodeID; const struct space *s = e->s; - const int *cdim = s->cdim; + + /* Handle on the cells and proxies */ + struct cell *cells = s->cells_top; + struct proxy *proxies = e->proxies; + + /* Some info about the domain */ + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; const int periodic = s->periodic; + const double cell_width[3] = {cells[0].width[0], cells[0].width[1], + cells[0].width[2]}; /* Get some info about the physics */ - const double *dim = s->dim; - const struct gravity_props *props = e->gravity_properties; - const double theta_crit2 = props->theta_crit2; const int with_hydro = (e->policy & engine_policy_hydro); const int with_gravity = (e->policy & engine_policy_self_gravity); + const double theta_crit_inv = e->gravity_properties->theta_crit_inv; + const double theta_crit2 = e->gravity_properties->theta_crit2; + const double max_distance = e->mesh->r_cut_max; - /* Handle on the cells and proxies */ - struct cell *cells = s->cells_top; - struct proxy *proxies = e->proxies; + /* Maximal distance between CoMs and any particle in the cell */ + const double r_max2 = cell_width[0] * cell_width[0] + + cell_width[1] * cell_width[1] + + cell_width[2] * cell_width[2]; + const double r_max = sqrt(r_max2); /* Let's time this */ const ticks tic = getticks(); @@ -5315,14 +5320,34 @@ void engine_makeproxies(struct engine *e) { /* Compute how many cells away we need to walk */ int delta = 1; /*hydro case */ + + /* Gravity needs to take the opening angle into account */ if (with_gravity) { - const double distance = 2.5 * cells[0].width[0] / props->theta_crit; - delta = (int)(distance / cells[0].width[0]) + 1; + const double distance = 2. * r_max * theta_crit_inv; + delta = (int)(distance / cells[0].dmin) + 1; + } + + /* Turn this into upper and lower bounds for loops */ + int delta_m = delta; + int delta_p = delta; + + /* Special case where every cell is in range of every other one */ + if (delta >= cdim[0] / 2) { + if (cdim[0] % 2 == 0) { + delta_m = cdim[0] / 2; + delta_p = cdim[0] / 2 - 1; + } else { + delta_m = cdim[0] / 2; + delta_p = cdim[0] / 2; + } } /* Let's be verbose about this choice */ if (e->verbose) - message("Looking for proxies up to %d top-level cells away", delta); + message( + "Looking for proxies up to %d top-level cells away (delta_m=%d " + "delta_p=%d)", + delta, delta_m, delta_p); /* Loop over each cell in the space. */ int ind[3]; @@ -5333,33 +5358,24 @@ void engine_makeproxies(struct engine *e) { /* Get the cell ID. */ const int cid = cell_getid(cdim, ind[0], ind[1], ind[2]); - double CoM_i[3] = {0., 0., 0.}; - double r_max_i = 0.; - - if (with_gravity) { - - /* Get ci's multipole */ - const struct gravity_tensors *multi_i = cells[cid].multipole; - CoM_i[0] = multi_i->CoM[0]; - CoM_i[1] = multi_i->CoM[1]; - CoM_i[2] = multi_i->CoM[2]; - r_max_i = multi_i->r_max; - } + /* and it's location */ + const double loc_i[3] = {cells[cid].loc[0], cells[cid].loc[1], + cells[cid].loc[2]}; /* Loop over all its neighbours (periodic). */ - for (int i = -delta; i <= delta; i++) { + for (int i = -delta_m; i <= delta_p; i++) { int ii = ind[0] + i; if (ii >= cdim[0]) ii -= cdim[0]; else if (ii < 0) ii += cdim[0]; - for (int j = -delta; j <= delta; j++) { + for (int j = -delta_m; j <= delta_p; j++) { int jj = ind[1] + j; if (jj >= cdim[1]) jj -= cdim[1]; else if (jj < 0) jj += cdim[1]; - for (int k = -delta; k <= delta; k++) { + for (int k = -delta_m; k <= delta_p; k++) { int kk = ind[2] + k; if (kk >= cdim[2]) kk -= cdim[2]; @@ -5382,9 +5398,12 @@ void engine_makeproxies(struct engine *e) { int proxy_type = 0; - /* In the hydro case, only care about neighbours */ + /* In the hydro case, only care about direct neighbours */ if (with_hydro) { + // MATTHIEU: to do: Write a better expression for the + // non-periodic case. + /* This is super-ugly but checks for direct neighbours */ /* with periodic BC */ if (((abs(ind[0] - ii) <= 1 || @@ -5402,16 +5421,20 @@ void engine_makeproxies(struct engine *e) { /* In the gravity case, check distances using the MAC. */ if (with_gravity) { - /* Get cj's multipole */ - const struct gravity_tensors *multi_j = cells[cjd].multipole; - const double CoM_j[3] = {multi_j->CoM[0], multi_j->CoM[1], - multi_j->CoM[2]}; - const double r_max_j = multi_j->r_max; + /* We don't have multipoles yet (or there CoMs) so we will have + to cook up something based on cell locations only. We hence + need an upper limit on the distance that the CoMs in those + cells could have. We then can decide whether we are too close + for an M2L interaction and hence require a proxy as this pair + of cells cannot rely on just an M2L calculation. */ + + const double loc_j[3] = {cells[cjd].loc[0], cells[cjd].loc[1], + cells[cjd].loc[2]}; - /* Let's compute the current distance between the cell pair*/ - double dx = CoM_i[0] - CoM_j[0]; - double dy = CoM_i[1] - CoM_j[1]; - double dz = CoM_i[2] - CoM_j[2]; + /* Start with the distance between the cell centres. */ + double dx = loc_i[0] - loc_j[0]; + double dy = loc_i[1] - loc_j[1]; + double dz = loc_i[2] - loc_j[2]; /* Apply BC */ if (periodic) { @@ -5419,11 +5442,32 @@ void engine_makeproxies(struct engine *e) { dy = nearest(dy, dim[1]); dz = nearest(dz, dim[2]); } + + /* Add to it for the case where the future CoMs are in the + * corners */ + dx += cell_width[0]; + dy += cell_width[1]; + dz += cell_width[2]; + + /* This is a crazy upper-bound but the best we can do */ const double r2 = dx * dx + dy * dy + dz * dz; - /* Are we too close for M2L? */ - if (!gravity_M2L_accept(r_max_i, r_max_j, theta_crit2, r2)) - proxy_type |= (int)proxy_cell_type_gravity; + /* Minimal distance between any pair of particles */ + const double min_radius = sqrt(r2) - 2. * r_max; + + /* Are we beyond the distance where the truncated forces are 0 + * but not too far such that M2L can be used? */ + if (periodic) { + + if ((min_radius < max_distance) && + (!gravity_M2L_accept(r_max, r_max, theta_crit2, r2))) + proxy_type |= (int)proxy_cell_type_gravity; + + } else { + + if (!gravity_M2L_accept(r_max, r_max, theta_crit2, r2)) + proxy_type |= (int)proxy_cell_type_gravity; + } } /* Abort if not in range at all */ diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c index 2359b8a9cdf785bce719a1d0379d177d00328b9e..d49b850f98c3d011b4bc1f4a7462be31954e1e7f 100644 --- a/src/mesh_gravity.c +++ b/src/mesh_gravity.c @@ -314,7 +314,7 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, fftw_plan inverse_plan = fftw_plan_dft_c2r_3d( N, N, N, frho, rho, FFTW_ESTIMATE | FFTW_DESTROY_INPUT); - const ticks tic = getticks(); + ticks tic = getticks(); /* Zero everything */ bzero(rho, N * N * N * sizeof(double)); @@ -324,9 +324,23 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, gpart_to_mesh_CIC(&s->gparts[i], rho, N, cell_fac, dim); if (verbose) - message("gpart assignment took %.3f %s.", + message("Gpart assignment took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); +#ifdef WITH_MPI + + MPI_Barrier(MPI_COMM_WORLD); + tic = getticks(); + + /* Merge everybody's share of the density mesh */ + MPI_Allreduce(MPI_IN_PLACE, rho, N * N * N, MPI_DOUBLE, MPI_SUM, + MPI_COMM_WORLD); + + if (verbose) + message("Mesh comunication took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); +#endif + /* message("\n\n\n DENSITY"); */ /* print_array(rho, N); */ diff --git a/src/multipole.h b/src/multipole.h index c05aa36890313ea22f725ee272746bdf63f597ea..2ada835e32565dc4075dd1352ba3959b2ba4766b 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -186,12 +186,12 @@ struct gravity_tensors { /*! The actual content */ struct { - /*! Multipole mass */ - struct multipole m_pole; - /*! Field tensor for the potential */ struct grav_tensor pot; + /*! Multipole mass */ + struct multipole m_pole; + /*! Centre of mass of the matter dsitribution */ double CoM[3]; diff --git a/src/proxy.c b/src/proxy.c index 8d34e18f250f3b70e74c0e4a30be2eebcb631666..55d1f7d082fe883ce6dba83791017391c48cf032 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -159,103 +159,6 @@ void proxy_tags_exchange(struct proxy *proxies, int num_proxies, #endif } -/** - * @brief Exchange the cell structures with all proxies. - * - * @param proxies The list of #proxy that will send/recv cells. - * @param num_proxies The number of proxies. - * @param s The space into which the particles will be unpacked. - */ -void proxy_cells_exchange(struct proxy *proxies, int num_proxies, - struct space *s) { - -#ifdef WITH_MPI - - MPI_Request *reqs; - if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * num_proxies)) == - NULL) - error("Failed to allocate request buffers."); - MPI_Request *reqs_in = reqs; - MPI_Request *reqs_out = &reqs[num_proxies]; - - /* Run through the cells and get the size of the ones that will be sent off. - */ - int count_out = 0; - int offset[s->nr_cells]; - for (int k = 0; k < s->nr_cells; k++) { - offset[k] = count_out; - if (s->cells_top[k].sendto) - count_out += - (s->cells_top[k].pcell_size = cell_getsize(&s->cells_top[k])); - } - - /* Allocate the pcells. */ - struct pcell *pcells = NULL; - if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT, - sizeof(struct pcell) * count_out) != 0) - error("Failed to allocate pcell buffer."); - - /* Pack the cells. */ - for (int k = 0; k < s->nr_cells; k++) - if (s->cells_top[k].sendto) { - cell_pack(&s->cells_top[k], &pcells[offset[k]]); - s->cells_top[k].pcell = &pcells[offset[k]]; - } - - /* Launch the first part of the exchange. */ - for (int k = 0; k < num_proxies; k++) { - proxy_cells_exchange_first(&proxies[k]); - reqs_in[k] = proxies[k].req_cells_count_in; - reqs_out[k] = proxies[k].req_cells_count_out; - } - - /* Wait for each count to come in and start the recv. */ - for (int k = 0; k < num_proxies; k++) { - int pid = MPI_UNDEFINED; - MPI_Status status; - if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || - pid == MPI_UNDEFINED) - error("MPI_Waitany failed."); - // message( "request from proxy %i has arrived." , pid ); - proxy_cells_exchange_second(&proxies[pid]); - } - - /* Wait for all the sends to have finished too. */ - if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) - error("MPI_Waitall on sends failed."); - - /* Set the requests for the cells. */ - for (int k = 0; k < num_proxies; k++) { - reqs_in[k] = proxies[k].req_cells_in; - reqs_out[k] = proxies[k].req_cells_out; - } - - /* Wait for each pcell array to come in from the proxies. */ - for (int k = 0; k < num_proxies; k++) { - int pid = MPI_UNDEFINED; - MPI_Status status; - if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || - pid == MPI_UNDEFINED) - error("MPI_Waitany failed."); - // message( "cell data from proxy %i has arrived." , pid ); - for (int count = 0, j = 0; j < proxies[pid].nr_cells_in; j++) - count += cell_unpack(&proxies[pid].pcells_in[count], - proxies[pid].cells_in[j], s); - } - - /* Wait for all the sends to have finished too. */ - if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) - error("MPI_Waitall on sends failed."); - - /* Clean up. */ - free(reqs); - free(pcells); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - /** * @brief Exchange cells with a remote node, first part. * @@ -350,6 +253,105 @@ void proxy_cells_exchange_second(struct proxy *p) { #endif } +/** + * @brief Exchange the cell structures with all proxies. + * + * @param proxies The list of #proxy that will send/recv cells. + * @param num_proxies The number of proxies. + * @param s The space into which the particles will be unpacked. + * @param with_gravity Are we running with gravity and hence need + * to exchange multipoles? + */ +void proxy_cells_exchange(struct proxy *proxies, int num_proxies, + struct space *s, const int with_gravity) { + +#ifdef WITH_MPI + + MPI_Request *reqs; + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * num_proxies)) == + NULL) + error("Failed to allocate request buffers."); + MPI_Request *reqs_in = reqs; + MPI_Request *reqs_out = &reqs[num_proxies]; + + /* Run through the cells and get the size of the ones that will be sent off. + */ + int count_out = 0; + int offset[s->nr_cells]; + for (int k = 0; k < s->nr_cells; k++) { + offset[k] = count_out; + if (s->cells_top[k].sendto) + count_out += + (s->cells_top[k].pcell_size = cell_getsize(&s->cells_top[k])); + } + + /* Allocate the pcells. */ + struct pcell *pcells = NULL; + if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT, + sizeof(struct pcell) * count_out) != 0) + error("Failed to allocate pcell buffer."); + + /* Pack the cells. */ + for (int k = 0; k < s->nr_cells; k++) + if (s->cells_top[k].sendto) { + cell_pack(&s->cells_top[k], &pcells[offset[k]], with_gravity); + s->cells_top[k].pcell = &pcells[offset[k]]; + } + + /* Launch the first part of the exchange. */ + for (int k = 0; k < num_proxies; k++) { + proxy_cells_exchange_first(&proxies[k]); + reqs_in[k] = proxies[k].req_cells_count_in; + reqs_out[k] = proxies[k].req_cells_count_out; + } + + /* Wait for each count to come in and start the recv. */ + for (int k = 0; k < num_proxies; k++) { + int pid = MPI_UNDEFINED; + MPI_Status status; + if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || + pid == MPI_UNDEFINED) + error("MPI_Waitany failed."); + // message( "request from proxy %i has arrived." , pid ); + proxy_cells_exchange_second(&proxies[pid]); + } + + /* Wait for all the sends to have finished too. */ + if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) + error("MPI_Waitall on sends failed."); + + /* Set the requests for the cells. */ + for (int k = 0; k < num_proxies; k++) { + reqs_in[k] = proxies[k].req_cells_in; + reqs_out[k] = proxies[k].req_cells_out; + } + + /* Wait for each pcell array to come in from the proxies. */ + for (int k = 0; k < num_proxies; k++) { + int pid = MPI_UNDEFINED; + MPI_Status status; + if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || + pid == MPI_UNDEFINED) + error("MPI_Waitany failed."); + // message( "cell data from proxy %i has arrived." , pid ); + for (int count = 0, j = 0; j < proxies[pid].nr_cells_in; j++) + count += cell_unpack(&proxies[pid].pcells_in[count], + proxies[pid].cells_in[j], s, with_gravity); + } + + /* Wait for all the sends to have finished too. */ + if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) + error("MPI_Waitall on sends failed."); + + /* Clean up. */ + free(reqs); + free(pcells); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + /** * @brief Add a cell to the given proxy's input list. * diff --git a/src/proxy.h b/src/proxy.h index 63f51846d0bfb646e1f2f9209413437b4c966983..fbc3f1a163333146b26c937fae55a7fc4c1f12d7 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -102,9 +102,7 @@ void proxy_parts_exchange_second(struct proxy *p); void proxy_addcell_in(struct proxy *p, struct cell *c, int type); void proxy_addcell_out(struct proxy *p, struct cell *c, int type); void proxy_cells_exchange(struct proxy *proxies, int num_proxies, - struct space *s); -void proxy_cells_exchange_first(struct proxy *p); -void proxy_cells_exchange_second(struct proxy *p); + struct space *s, int with_gravity); void proxy_tags_exchange(struct proxy *proxies, int num_proxies, struct space *s); diff --git a/src/runner.c b/src/runner.c index 7e67c6e943f7493bf2e472da622291653cfe9001..94d36f0f9c0c450c20a961327808cf203f13bafb 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1955,11 +1955,6 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { if (gparts[k].time_bin == time_bin_inhibited) continue; time_bin_min = min(time_bin_min, gparts[k].time_bin); time_bin_max = max(time_bin_max, gparts[k].time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - if (gparts[k].ti_drift != ti_current) - error("Received un-drifted g-particle !"); -#endif } /* Convert into a time */ @@ -2034,11 +2029,6 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { if (sparts[k].time_bin == time_bin_inhibited) continue; time_bin_min = min(time_bin_min, sparts[k].time_bin); time_bin_max = max(time_bin_max, sparts[k].time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - if (sparts[k].ti_drift != ti_current) - error("Received un-drifted s-particle !"); -#endif } /* Convert into a time */ @@ -2068,8 +2058,8 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { #endif /* ... and store. */ - c->ti_gravity_end_min = ti_gravity_end_min; - c->ti_gravity_end_max = ti_gravity_end_max; + // c->ti_gravity_end_min = ti_gravity_end_min; + // c->ti_gravity_end_max = ti_gravity_end_max; c->ti_old_gpart = ti_current; if (timer) TIMER_TOC(timer_dorecv_spart); diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 240f812984362fcd4130f2465ff1b176bb6fb067..34d3e690dd27f1d55511847b8a921b9c9ba84aac 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -1144,7 +1144,7 @@ static INLINE void runner_dopair_grav_mm(struct runner *r, TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active_gravity(ci, e) || ci->nodeID != engine_rank) return; + if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return; /* Short-cut to the multipole */ const struct multipole *multi_j = &cj->multipole->m_pole; @@ -1485,8 +1485,8 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, error("Running M-M task with two inactive cells."); #endif - if (cell_is_active_gravity(ci, e)) runner_dopair_grav_mm(r, ci, cj); - if (cell_is_active_gravity(cj, e)) runner_dopair_grav_mm(r, cj, ci); + if (cell_is_active_gravity_mm(ci, e)) runner_dopair_grav_mm(r, ci, cj); + if (cell_is_active_gravity_mm(cj, e)) runner_dopair_grav_mm(r, cj, ci); } /** diff --git a/src/scheduler.c b/src/scheduler.c index c751a75fb6ed55491202e55b823f8f0fca61da17..a90a3f1b0fa2401a6bc128069486c679df535435 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -804,11 +804,8 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { */ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { -/* Temporarily prevent MPI here */ -#ifndef WITH_MPI const struct space *sp = s->space; struct engine *e = sp->e; -#endif /* Iterate on this task until we're done with it. */ int redo = 1; @@ -838,9 +835,6 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { break; } -/* Temporarily prevent MPI here */ -#ifndef WITH_MPI - /* Should we split this task? */ if (cell_can_split_self_gravity_task(ci)) { @@ -879,7 +873,6 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { } /* Self-gravity only */ } /* Make tasks explicitly */ } /* Cell is split */ -#endif /* WITH_MPI */ } /* Self interaction */ /* Pair interaction? */ @@ -895,20 +888,17 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { break; } -/* Temporarily prevent MPI here */ -#ifndef WITH_MPI - /* Should we replace it with an M-M task? */ - if (cell_can_use_pair_mm(ci, cj, e, sp)) { + if (cell_can_use_pair_mm_rebuild(ci, cj, e, sp)) { t->type = task_type_grav_mm; t->subtype = task_subtype_none; /* Since this task will not be split, we can already link it */ - atomic_inc(&ci->nr_tasks); - atomic_inc(&cj->nr_tasks); - engine_addlink(e, &ci->grav, t); - engine_addlink(e, &cj->grav, t); + atomic_inc(&ci->nr_mm_tasks); + atomic_inc(&cj->nr_mm_tasks); + engine_addlink(e, &ci->grav_mm, t); + engine_addlink(e, &cj->grav_mm, t); break; } @@ -916,9 +906,12 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { if (cell_can_split_pair_gravity_task(ci) && cell_can_split_pair_gravity_task(cj)) { + const long long gcount_i = ci->gcount; + const long long gcount_j = cj->gcount; + /* Replace by a single sub-task? */ if (scheduler_dosub && /* Use division to avoid integer overflow. */ - ci->gcount < space_subsize_pair_grav / cj->gcount) { + gcount_i * gcount_j < ((long long)space_subsize_pair_grav)) { /* Otherwise, split it. */ } else { @@ -954,9 +947,8 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { } } /* Split the pair */ } -#endif /* WITH_MPI */ - } /* pair interaction? */ - } /* iterate over the current task. */ + } /* pair interaction? */ + } /* iterate over the current task. */ } /** diff --git a/src/space.c b/src/space.c index 797757f71e36e808f652c14ece5d54de9ed3d12d..9a9dd05c237a69d034bf618490c834ade5ee33f7 100644 --- a/src/space.c +++ b/src/space.c @@ -164,10 +164,12 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, multipole_rec_end); c->sorts = NULL; c->nr_tasks = 0; + c->nr_mm_tasks = 0; c->density = NULL; c->gradient = NULL; c->force = NULL; c->grav = NULL; + c->grav_mm = NULL; c->dx_max_part = 0.0f; c->dx_max_sort = 0.0f; c->sorted = 0; @@ -202,6 +204,13 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->do_sub_sort = 0; c->do_grav_sub_drift = 0; c->do_sub_drift = 0; + c->ti_hydro_end_min = -1; + c->ti_hydro_end_max = -1; + c->ti_gravity_end_min = -1; + c->ti_gravity_end_max = -1; +#ifdef SWIFT_DEBUG_CHECKS + c->cellID = 0; +#endif if (s->gravity) bzero(c->multipole, sizeof(struct gravity_tensors)); for (int i = 0; i < 13; i++) if (c->sort[i] != NULL) { @@ -334,7 +343,7 @@ void space_regrid(struct space *s, int verbose) { /* Are we about to allocate new top level cells without a regrid? * Can happen when restarting the application. */ - int no_regrid = (s->cells_top == NULL && oldnodeIDs == NULL); + const int no_regrid = (s->cells_top == NULL && oldnodeIDs == NULL); #endif /* Do we need to re-build the upper-level cells? */ @@ -428,6 +437,10 @@ void space_regrid(struct space *s, int verbose) { c->tag = -1; #endif // WITH_MPI if (s->gravity) c->multipole = &s->multipoles_top[cid]; +#ifdef SWIFT_DEBUG_CHECKS + c->cellID = -last_cell_id; + last_cell_id++; +#endif } /* Be verbose about the change. */ @@ -919,6 +932,12 @@ void space_rebuild(struct space *s, int verbose) { c->ti_old_part = ti_current; c->ti_old_gpart = ti_current; c->ti_old_multipole = ti_current; + +#ifdef SWIFT_DEBUG_CHECKS + c->cellID = -last_cell_id; + last_cell_id++; +#endif + if (c->nodeID == engine_rank) { c->parts = finger; c->xparts = xfinger; @@ -2903,6 +2922,10 @@ void space_init(struct space *s, struct swift_params *params, /* Init the space lock. */ if (lock_init(&s->lock) != 0) error("Failed to create space spin-lock."); +#ifdef SWIFT_DEBUG_CHECKS + last_cell_id = 1; +#endif + /* Build the cells recursively. */ if (!dry_run) space_regrid(s, verbose); }