From 8b25dfc44abb4cf4749e74f6535507566ca36797 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller <schaller@strw.leidenuniv.nl> Date: Mon, 24 Sep 2018 17:34:51 +0100 Subject: [PATCH] Added more detailed timing of engine_exchange_cells() --- src/engine.c | 12 ++++++++++++ src/proxy.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/engine.c b/src/engine.c index 954aaec272..3d83f739b8 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1676,6 +1676,8 @@ void engine_exchange_cells(struct engine *e) { /* Exchange the cell structure with neighbouring ranks. */ proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity); + ticks tic2 = getticks(); + /* Count the number of particles we need to import and re-allocate the buffer if needed. */ size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0; @@ -1709,6 +1711,12 @@ void engine_exchange_cells(struct engine *e) { error("Failed to allocate foreign spart data."); } + if (e->verbose) + message("Counting and allocating arrays took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + /* Unpack the cells and link to the particle data. */ struct part *parts = s->parts_foreign; struct gpart *gparts = s->gparts_foreign; @@ -1734,6 +1742,10 @@ void engine_exchange_cells(struct engine *e) { s->nr_gparts_foreign = gparts - s->gparts_foreign; s->nr_sparts_foreign = sparts - s->sparts_foreign; + if (e->verbose) + message("Recursively linking arrays took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); diff --git a/src/proxy.c b/src/proxy.c index 691db71a73..80b33a776c 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -290,6 +290,8 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, MPI_Request *reqs_in = reqs; MPI_Request *reqs_out = &reqs[num_proxies]; + ticks tic2 = getticks(); + /* Run through the cells and get the size of the ones that will be sent off. */ int count_out = 0; @@ -301,12 +303,18 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, (s->cells_top[k].pcell_size = cell_getsize(&s->cells_top[k])); } + if (s->e->verbose) + message("Counting cells to send took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + /* Allocate the pcells. */ struct pcell *pcells = NULL; if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT, sizeof(struct pcell) * count_out) != 0) error("Failed to allocate pcell buffer."); + tic2 = getticks(); + /* Pack the cells. */ for (int k = 0; k < s->nr_cells; k++) if (s->cells_top[k].sendto) { @@ -314,6 +322,10 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, s->cells_top[k].pcell = &pcells[offset[k]]; } + if (s->e->verbose) + message("Packing cells took %.3f %s.", clocks_from_ticks(getticks() - tic2), + clocks_getunit()); + /* Launch the first part of the exchange. */ for (int k = 0; k < num_proxies; k++) { proxy_cells_exchange_first(&proxies[k]); @@ -342,6 +354,8 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, reqs_out[k] = proxies[k].req_cells_out; } + tic2 = getticks(); + /* Wait for each pcell array to come in from the proxies. */ for (int k = 0; k < num_proxies; k++) { int pid = MPI_UNDEFINED; @@ -355,6 +369,10 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, proxies[pid].cells_in[j], s, with_gravity); } + if (s->e->verbose) + message("Un-packing cells took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + /* Wait for all the sends to have finished too. */ if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) error("MPI_Waitall on sends failed."); -- GitLab