From 5dd4aef0f34adfd7ac17b0656f708ac8587552c7 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller <matthieu.schaller@durham.ac.uk> Date: Sun, 26 Aug 2018 11:49:20 +0100 Subject: [PATCH] Exchange the multipoles at the same time as the cells when running with gravity. --- src/cell.c | 17 +++-- src/cell.h | 13 ++-- src/engine.c | 24 ++++--- src/proxy.c | 196 ++++++++++++++++++++++++++------------------------- src/proxy.h | 4 +- 5 files changed, 136 insertions(+), 118 deletions(-) diff --git a/src/cell.c b/src/cell.c index 78b9fdd2a2..2919839d50 100644 --- a/src/cell.c +++ b/src/cell.c @@ -167,14 +167,19 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) { * @param c The #cell. * @param pc Pointer to an array of packed cells in which the * cells will be packed. + * @param with_gravity Are we running with gravity and hence need + * to exchange multipoles? * * @return The number of packed cells. */ -int cell_pack(struct cell *restrict c, struct pcell *restrict pc) { +int cell_pack(struct cell *restrict c, struct pcell *restrict pc, + const int with_gravity) { #ifdef WITH_MPI /* Start by packing the data of the current cell. */ + if(with_gravity) + pc->multipole = *(c->multipole); pc->h_max = c->h_max; pc->ti_hydro_end_min = c->ti_hydro_end_min; pc->ti_hydro_end_max = c->ti_hydro_end_max; @@ -196,7 +201,7 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) { for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { pc->progeny[k] = count; - count += cell_pack(c->progeny[k], &pc[count]); + count += cell_pack(c->progeny[k], &pc[count], with_gravity); } else { pc->progeny[k] = -1; } @@ -251,15 +256,19 @@ int cell_pack_tags(const struct cell *c, int *tags) { * @param pc An array of packed #pcell. * @param c The #cell in which to unpack the #pcell. * @param s The #space in which the cells are created. + * @param with_gravity Are we running with gravity and hence need + * to exchange multipoles? * * @return The number of cells created. */ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, - struct space *restrict s) { + struct space *restrict s, const int with_gravity) { #ifdef WITH_MPI /* Unpack the current pcell. */ + if(with_gravity) + *(c->multipole) = pc->multipole; c->h_max = pc->h_max; c->ti_hydro_end_min = pc->ti_hydro_end_min; c->ti_hydro_end_max = pc->ti_hydro_end_max; @@ -304,7 +313,7 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, temp->parent = c; c->progeny[k] = temp; c->split = 1; - count += cell_unpack(&pc[pc->progeny[k]], temp, s); + count += cell_unpack(&pc[pc->progeny[k]], temp, s, with_gravity); } /* Return the total number of unpacked cells. */ diff --git a/src/cell.h b/src/cell.h index 356e03773c..c209f8d99b 100644 --- a/src/cell.h +++ b/src/cell.h @@ -77,6 +77,12 @@ struct link { */ struct pcell { + /*! This cell's gravity-related tensors */ + struct gravity_tensors multipole; + + /*! Relative indices of the cell's progeny. */ + int progeny[8]; + /*! Maximal smoothing length. */ double h_max; @@ -116,9 +122,6 @@ struct pcell { /*! Number of #spart in this cell. */ int scount; - /*! Relative indices of the cell's progeny. */ - int progeny[8]; - #ifdef SWIFT_DEBUG_CHECKS /* Cell ID (for debugging) */ int cellID; @@ -485,8 +488,8 @@ int cell_mlocktree(struct cell *c); void cell_munlocktree(struct cell *c); int cell_slocktree(struct cell *c); void cell_sunlocktree(struct cell *c); -int cell_pack(struct cell *c, struct pcell *pc); -int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); +int cell_pack(struct cell *c, struct pcell *pc, const int with_gravity); +int cell_unpack(struct pcell *pc, struct cell *c, struct space *s, const int with_gravity); int cell_pack_tags(const struct cell *c, int *tags); int cell_unpack_tags(const int *tags, struct cell *c); int cell_pack_end_step(struct cell *c, struct pcell_step *pcell); diff --git a/src/engine.c b/src/engine.c index ab8c4dd691..6059839788 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1670,10 +1670,11 @@ void engine_exchange_cells(struct engine *e) { struct space *s = e->s; const int nr_proxies = e->nr_proxies; + const int with_gravity = e->policy & engine_policy_self_gravity; const ticks tic = getticks(); /* Exchange the cell structure with neighbouring ranks. */ - proxy_cells_exchange(e->proxies, e->nr_proxies, e->s); + proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity); /* Count the number of particles we need to import and re-allocate the buffer if needed. */ @@ -3948,18 +3949,14 @@ void engine_rebuild(struct engine *e, int clean_smoothing_length_values) { /* If in parallel, exchange the cell structure, top-level and neighbouring * multipoles. */ #ifdef WITH_MPI - engine_exchange_cells(e); - if (e->policy & engine_policy_self_gravity) engine_exchange_top_multipoles(e); + + engine_exchange_cells(e); #endif /* Re-build the tasks. */ engine_maketasks(e); -#ifdef WITH_MPI - if (e->policy & engine_policy_self_gravity) engine_exchange_proxy_multipoles(e); -#endif - /* Make the list of top-level cells that have tasks */ space_list_cells_with_tasks(e->s); @@ -5400,8 +5397,17 @@ void engine_makeproxies(struct engine *e) { if (with_gravity) { /* Are we too close for M2L? */ - if (!cell_can_use_pair_mm_rebuild(&cells[cid], &cells[cjd], e, - s)) + /* if (!cell_can_use_pair_mm_rebuild(&cells[cid], &cells[cjd], e, */ + /* s)) */ + if (((abs(ind[0] - ii) <= 5 || + abs(ind[0] - ii - cdim[0]) <= 5 || + abs(ind[0] - ii + cdim[0]) <= 5) && + (abs(ind[1] - jj) <= 5 || + abs(ind[1] - jj - cdim[1]) <= 5 || + abs(ind[1] - jj + cdim[1]) <= 5) && + (abs(ind[2] - kk) <= 5 || + abs(ind[2] - kk - cdim[2]) <= 5 || + abs(ind[2] - kk + cdim[2]) <= 5))) proxy_type |= (int)proxy_cell_type_gravity; } diff --git a/src/proxy.c b/src/proxy.c index 6156291378..a5d30848a3 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -159,103 +159,6 @@ void proxy_tags_exchange(struct proxy *proxies, int num_proxies, #endif } -/** - * @brief Exchange the cell structures with all proxies. - * - * @param proxies The list of #proxy that will send/recv cells. - * @param num_proxies The number of proxies. - * @param s The space into which the particles will be unpacked. - */ -void proxy_cells_exchange(struct proxy *proxies, int num_proxies, - struct space *s) { - -#ifdef WITH_MPI - - MPI_Request *reqs; - if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * num_proxies)) == - NULL) - error("Failed to allocate request buffers."); - MPI_Request *reqs_in = reqs; - MPI_Request *reqs_out = &reqs[num_proxies]; - - /* Run through the cells and get the size of the ones that will be sent off. - */ - int count_out = 0; - int offset[s->nr_cells]; - for (int k = 0; k < s->nr_cells; k++) { - offset[k] = count_out; - if (s->cells_top[k].sendto) - count_out += - (s->cells_top[k].pcell_size = cell_getsize(&s->cells_top[k])); - } - - /* Allocate the pcells. */ - struct pcell *pcells = NULL; - if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT, - sizeof(struct pcell) * count_out) != 0) - error("Failed to allocate pcell buffer."); - - /* Pack the cells. */ - for (int k = 0; k < s->nr_cells; k++) - if (s->cells_top[k].sendto) { - cell_pack(&s->cells_top[k], &pcells[offset[k]]); - s->cells_top[k].pcell = &pcells[offset[k]]; - } - - /* Launch the first part of the exchange. */ - for (int k = 0; k < num_proxies; k++) { - proxy_cells_exchange_first(&proxies[k]); - reqs_in[k] = proxies[k].req_cells_count_in; - reqs_out[k] = proxies[k].req_cells_count_out; - } - - /* Wait for each count to come in and start the recv. */ - for (int k = 0; k < num_proxies; k++) { - int pid = MPI_UNDEFINED; - MPI_Status status; - if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || - pid == MPI_UNDEFINED) - error("MPI_Waitany failed."); - // message( "request from proxy %i has arrived." , pid ); - proxy_cells_exchange_second(&proxies[pid]); - } - - /* Wait for all the sends to have finished too. */ - if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) - error("MPI_Waitall on sends failed."); - - /* Set the requests for the cells. */ - for (int k = 0; k < num_proxies; k++) { - reqs_in[k] = proxies[k].req_cells_in; - reqs_out[k] = proxies[k].req_cells_out; - } - - /* Wait for each pcell array to come in from the proxies. */ - for (int k = 0; k < num_proxies; k++) { - int pid = MPI_UNDEFINED; - MPI_Status status; - if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || - pid == MPI_UNDEFINED) - error("MPI_Waitany failed."); - // message( "cell data from proxy %i has arrived." , pid ); - for (int count = 0, j = 0; j < proxies[pid].nr_cells_in; j++) - count += cell_unpack(&proxies[pid].pcells_in[count], - proxies[pid].cells_in[j], s); - } - - /* Wait for all the sends to have finished too. */ - if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) - error("MPI_Waitall on sends failed."); - - /* Clean up. */ - free(reqs); - free(pcells); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - /** * @brief Exchange cells with a remote node, first part. * @@ -350,6 +253,105 @@ void proxy_cells_exchange_second(struct proxy *p) { #endif } +/** + * @brief Exchange the cell structures with all proxies. + * + * @param proxies The list of #proxy that will send/recv cells. + * @param num_proxies The number of proxies. + * @param s The space into which the particles will be unpacked. + * @param with_gravity Are we running with gravity and hence need + * to exchange multipoles? + */ +void proxy_cells_exchange(struct proxy *proxies, int num_proxies, + struct space *s, const int with_gravity) { + +#ifdef WITH_MPI + + MPI_Request *reqs; + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * num_proxies)) == + NULL) + error("Failed to allocate request buffers."); + MPI_Request *reqs_in = reqs; + MPI_Request *reqs_out = &reqs[num_proxies]; + + /* Run through the cells and get the size of the ones that will be sent off. + */ + int count_out = 0; + int offset[s->nr_cells]; + for (int k = 0; k < s->nr_cells; k++) { + offset[k] = count_out; + if (s->cells_top[k].sendto) + count_out += + (s->cells_top[k].pcell_size = cell_getsize(&s->cells_top[k])); + } + + /* Allocate the pcells. */ + struct pcell *pcells = NULL; + if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT, + sizeof(struct pcell) * count_out) != 0) + error("Failed to allocate pcell buffer."); + + /* Pack the cells. */ + for (int k = 0; k < s->nr_cells; k++) + if (s->cells_top[k].sendto) { + cell_pack(&s->cells_top[k], &pcells[offset[k]], with_gravity); + s->cells_top[k].pcell = &pcells[offset[k]]; + } + + /* Launch the first part of the exchange. */ + for (int k = 0; k < num_proxies; k++) { + proxy_cells_exchange_first(&proxies[k]); + reqs_in[k] = proxies[k].req_cells_count_in; + reqs_out[k] = proxies[k].req_cells_count_out; + } + + /* Wait for each count to come in and start the recv. */ + for (int k = 0; k < num_proxies; k++) { + int pid = MPI_UNDEFINED; + MPI_Status status; + if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || + pid == MPI_UNDEFINED) + error("MPI_Waitany failed."); + // message( "request from proxy %i has arrived." , pid ); + proxy_cells_exchange_second(&proxies[pid]); + } + + /* Wait for all the sends to have finished too. */ + if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) + error("MPI_Waitall on sends failed."); + + /* Set the requests for the cells. */ + for (int k = 0; k < num_proxies; k++) { + reqs_in[k] = proxies[k].req_cells_in; + reqs_out[k] = proxies[k].req_cells_out; + } + + /* Wait for each pcell array to come in from the proxies. */ + for (int k = 0; k < num_proxies; k++) { + int pid = MPI_UNDEFINED; + MPI_Status status; + if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || + pid == MPI_UNDEFINED) + error("MPI_Waitany failed."); + // message( "cell data from proxy %i has arrived." , pid ); + for (int count = 0, j = 0; j < proxies[pid].nr_cells_in; j++) + count += cell_unpack(&proxies[pid].pcells_in[count], + proxies[pid].cells_in[j], s, with_gravity); + } + + /* Wait for all the sends to have finished too. */ + if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) + error("MPI_Waitall on sends failed."); + + /* Clean up. */ + free(reqs); + free(pcells); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + /** * @brief Add a cell to the given proxy's input list. * diff --git a/src/proxy.h b/src/proxy.h index 63f51846d0..fbc3f1a163 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -102,9 +102,7 @@ void proxy_parts_exchange_second(struct proxy *p); void proxy_addcell_in(struct proxy *p, struct cell *c, int type); void proxy_addcell_out(struct proxy *p, struct cell *c, int type); void proxy_cells_exchange(struct proxy *proxies, int num_proxies, - struct space *s); -void proxy_cells_exchange_first(struct proxy *p); -void proxy_cells_exchange_second(struct proxy *p); + struct space *s, int with_gravity); void proxy_tags_exchange(struct proxy *proxies, int num_proxies, struct space *s); -- GitLab