From 8b25dfc44abb4cf4749e74f6535507566ca36797 Mon Sep 17 00:00:00 2001
From: Matthieu Schaller <schaller@strw.leidenuniv.nl>
Date: Mon, 24 Sep 2018 17:34:51 +0100
Subject: [PATCH] Added more detailed timing of engine_exchange_cells()

---
 src/engine.c | 12 ++++++++++++
 src/proxy.c  | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/src/engine.c b/src/engine.c
index 954aaec272..3d83f739b8 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1676,6 +1676,8 @@ void engine_exchange_cells(struct engine *e) {
   /* Exchange the cell structure with neighbouring ranks. */
   proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity);
 
+  ticks tic2 = getticks();
+
   /* Count the number of particles we need to import and re-allocate
      the buffer if needed. */
   size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0;
@@ -1709,6 +1711,12 @@ void engine_exchange_cells(struct engine *e) {
       error("Failed to allocate foreign spart data.");
   }
 
+  if (e->verbose)
+    message("Counting and allocating arrays took %.3f %s.",
+            clocks_from_ticks(getticks() - tic2), clocks_getunit());
+
+  tic2 = getticks();
+
   /* Unpack the cells and link to the particle data. */
   struct part *parts = s->parts_foreign;
   struct gpart *gparts = s->gparts_foreign;
@@ -1734,6 +1742,10 @@ void engine_exchange_cells(struct engine *e) {
   s->nr_gparts_foreign = gparts - s->gparts_foreign;
   s->nr_sparts_foreign = sparts - s->sparts_foreign;
 
+  if (e->verbose)
+    message("Recursively linking arrays took %.3f %s.",
+            clocks_from_ticks(getticks() - tic2), clocks_getunit());
+
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
             clocks_getunit());
diff --git a/src/proxy.c b/src/proxy.c
index 691db71a73..80b33a776c 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -290,6 +290,8 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
   MPI_Request *reqs_in = reqs;
   MPI_Request *reqs_out = &reqs[num_proxies];
 
+  ticks tic2 = getticks();
+
   /* Run through the cells and get the size of the ones that will be sent off.
    */
   int count_out = 0;
@@ -301,12 +303,18 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
           (s->cells_top[k].pcell_size = cell_getsize(&s->cells_top[k]));
   }
 
+  if (s->e->verbose)
+    message("Counting cells to send took %.3f %s.",
+            clocks_from_ticks(getticks() - tic2), clocks_getunit());
+
   /* Allocate the pcells. */
   struct pcell *pcells = NULL;
   if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT,
                      sizeof(struct pcell) * count_out) != 0)
     error("Failed to allocate pcell buffer.");
 
+  tic2 = getticks();
+
   /* Pack the cells. */
   for (int k = 0; k < s->nr_cells; k++)
     if (s->cells_top[k].sendto) {
@@ -314,6 +322,10 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
       s->cells_top[k].pcell = &pcells[offset[k]];
     }
 
+  if (s->e->verbose)
+    message("Packing cells took %.3f %s.", clocks_from_ticks(getticks() - tic2),
+            clocks_getunit());
+
   /* Launch the first part of the exchange. */
   for (int k = 0; k < num_proxies; k++) {
     proxy_cells_exchange_first(&proxies[k]);
@@ -342,6 +354,8 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
     reqs_out[k] = proxies[k].req_cells_out;
   }
 
+  tic2 = getticks();
+
   /* Wait for each pcell array to come in from the proxies. */
   for (int k = 0; k < num_proxies; k++) {
     int pid = MPI_UNDEFINED;
@@ -355,6 +369,10 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
                            proxies[pid].cells_in[j], s, with_gravity);
   }
 
+  if (s->e->verbose)
+    message("Un-packing cells took %.3f %s.",
+            clocks_from_ticks(getticks() - tic2), clocks_getunit());
+
   /* Wait for all the sends to have finished too. */
   if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
     error("MPI_Waitall on sends failed.");
-- 
GitLab