From 5dd4aef0f34adfd7ac17b0656f708ac8587552c7 Mon Sep 17 00:00:00 2001
From: Matthieu Schaller <matthieu.schaller@durham.ac.uk>
Date: Sun, 26 Aug 2018 11:49:20 +0100
Subject: [PATCH] Exchange the multipoles at the same time as the cells when
 running with gravity.

---
 src/cell.c   |  17 +++--
 src/cell.h   |  13 ++--
 src/engine.c |  24 ++++---
 src/proxy.c  | 196 ++++++++++++++++++++++++++-------------------------
 src/proxy.h  |   4 +-
 5 files changed, 136 insertions(+), 118 deletions(-)

diff --git a/src/cell.c b/src/cell.c
index 78b9fdd2a2..2919839d50 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -167,14 +167,19 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) {
  * @param c The #cell.
  * @param pc Pointer to an array of packed cells in which the
  *      cells will be packed.
+ * @param with_gravity Are we running with gravity and hence need
+ *      to exchange multipoles?
  *
  * @return The number of packed cells.
  */
-int cell_pack(struct cell *restrict c, struct pcell *restrict pc) {
+int cell_pack(struct cell *restrict c, struct pcell *restrict pc,
+	      const int with_gravity) {
 
 #ifdef WITH_MPI
 
   /* Start by packing the data of the current cell. */
+  if(with_gravity)
+    pc->multipole = *(c->multipole);
   pc->h_max = c->h_max;
   pc->ti_hydro_end_min = c->ti_hydro_end_min;
   pc->ti_hydro_end_max = c->ti_hydro_end_max;
@@ -196,7 +201,7 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) {
   for (int k = 0; k < 8; k++)
     if (c->progeny[k] != NULL) {
       pc->progeny[k] = count;
-      count += cell_pack(c->progeny[k], &pc[count]);
+      count += cell_pack(c->progeny[k], &pc[count], with_gravity);
     } else {
       pc->progeny[k] = -1;
     }
@@ -251,15 +256,19 @@ int cell_pack_tags(const struct cell *c, int *tags) {
  * @param pc An array of packed #pcell.
  * @param c The #cell in which to unpack the #pcell.
  * @param s The #space in which the cells are created.
+ * @param with_gravity Are we running with gravity and hence need
+ *      to exchange multipoles?
  *
  * @return The number of cells created.
  */
 int cell_unpack(struct pcell *restrict pc, struct cell *restrict c,
-                struct space *restrict s) {
+                struct space *restrict s, const int with_gravity) {
 
 #ifdef WITH_MPI
 
   /* Unpack the current pcell. */
+  if(with_gravity)
+    *(c->multipole) = pc->multipole;
   c->h_max = pc->h_max;
   c->ti_hydro_end_min = pc->ti_hydro_end_min;
   c->ti_hydro_end_max = pc->ti_hydro_end_max;
@@ -304,7 +313,7 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c,
       temp->parent = c;
       c->progeny[k] = temp;
       c->split = 1;
-      count += cell_unpack(&pc[pc->progeny[k]], temp, s);
+      count += cell_unpack(&pc[pc->progeny[k]], temp, s, with_gravity);
     }
 
   /* Return the total number of unpacked cells. */
diff --git a/src/cell.h b/src/cell.h
index 356e03773c..c209f8d99b 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -77,6 +77,12 @@ struct link {
  */
 struct pcell {
 
+  /*! This cell's gravity-related tensors */
+  struct gravity_tensors multipole;
+
+  /*! Relative indices of the cell's progeny. */
+  int progeny[8];
+
   /*! Maximal smoothing length. */
   double h_max;
 
@@ -116,9 +122,6 @@ struct pcell {
   /*! Number of #spart in this cell. */
   int scount;
 
-  /*! Relative indices of the cell's progeny. */
-  int progeny[8];
-
 #ifdef SWIFT_DEBUG_CHECKS
   /* Cell ID (for debugging) */
   int cellID;
@@ -485,8 +488,8 @@ int cell_mlocktree(struct cell *c);
 void cell_munlocktree(struct cell *c);
 int cell_slocktree(struct cell *c);
 void cell_sunlocktree(struct cell *c);
-int cell_pack(struct cell *c, struct pcell *pc);
-int cell_unpack(struct pcell *pc, struct cell *c, struct space *s);
+int cell_pack(struct cell *c, struct pcell *pc, const int with_gravity);
+int cell_unpack(struct pcell *pc, struct cell *c, struct space *s, const int with_gravity);
 int cell_pack_tags(const struct cell *c, int *tags);
 int cell_unpack_tags(const int *tags, struct cell *c);
 int cell_pack_end_step(struct cell *c, struct pcell_step *pcell);
diff --git a/src/engine.c b/src/engine.c
index ab8c4dd691..6059839788 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1670,10 +1670,11 @@ void engine_exchange_cells(struct engine *e) {
 
   struct space *s = e->s;
   const int nr_proxies = e->nr_proxies;
+  const int with_gravity = e->policy & engine_policy_self_gravity;
   const ticks tic = getticks();
 
   /* Exchange the cell structure with neighbouring ranks. */
-  proxy_cells_exchange(e->proxies, e->nr_proxies, e->s);
+  proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity);
 
   /* Count the number of particles we need to import and re-allocate
      the buffer if needed. */
@@ -3948,18 +3949,14 @@ void engine_rebuild(struct engine *e, int clean_smoothing_length_values) {
 /* If in parallel, exchange the cell structure, top-level and neighbouring
  * multipoles. */
 #ifdef WITH_MPI
-  engine_exchange_cells(e);
-
   if (e->policy & engine_policy_self_gravity) engine_exchange_top_multipoles(e);
+
+  engine_exchange_cells(e);
 #endif
 
   /* Re-build the tasks. */
   engine_maketasks(e);
 
-#ifdef WITH_MPI
-  if (e->policy & engine_policy_self_gravity) engine_exchange_proxy_multipoles(e);
-#endif
-
   /* Make the list of top-level cells that have tasks */
   space_list_cells_with_tasks(e->s);
 
@@ -5400,8 +5397,17 @@ void engine_makeproxies(struct engine *e) {
               if (with_gravity) {
 
                 /* Are we too close for M2L? */
-                if (!cell_can_use_pair_mm_rebuild(&cells[cid], &cells[cjd], e,
-                                                  s))
+                /* if (!cell_can_use_pair_mm_rebuild(&cells[cid], &cells[cjd], e, */
+                /*                                   s)) */
+                if (((abs(ind[0] - ii) <= 5 ||
+                      abs(ind[0] - ii - cdim[0]) <= 5 ||
+                      abs(ind[0] - ii + cdim[0]) <= 5) &&
+                     (abs(ind[1] - jj) <= 5 ||
+                      abs(ind[1] - jj - cdim[1]) <= 5 ||
+                      abs(ind[1] - jj + cdim[1]) <= 5) &&
+                     (abs(ind[2] - kk) <= 5 ||
+                      abs(ind[2] - kk - cdim[2]) <= 5 ||
+                      abs(ind[2] - kk + cdim[2]) <= 5)))
                   proxy_type |= (int)proxy_cell_type_gravity;
               }
 
diff --git a/src/proxy.c b/src/proxy.c
index 6156291378..a5d30848a3 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -159,103 +159,6 @@ void proxy_tags_exchange(struct proxy *proxies, int num_proxies,
 #endif
 }
 
-/**
- * @brief Exchange the cell structures with all proxies.
- *
- * @param proxies The list of #proxy that will send/recv cells.
- * @param num_proxies The number of proxies.
- * @param s The space into which the particles will be unpacked.
- */
-void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
-                          struct space *s) {
-
-#ifdef WITH_MPI
-
-  MPI_Request *reqs;
-  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * num_proxies)) ==
-      NULL)
-    error("Failed to allocate request buffers.");
-  MPI_Request *reqs_in = reqs;
-  MPI_Request *reqs_out = &reqs[num_proxies];
-
-  /* Run through the cells and get the size of the ones that will be sent off.
-   */
-  int count_out = 0;
-  int offset[s->nr_cells];
-  for (int k = 0; k < s->nr_cells; k++) {
-    offset[k] = count_out;
-    if (s->cells_top[k].sendto)
-      count_out +=
-          (s->cells_top[k].pcell_size = cell_getsize(&s->cells_top[k]));
-  }
-
-  /* Allocate the pcells. */
-  struct pcell *pcells = NULL;
-  if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT,
-                     sizeof(struct pcell) * count_out) != 0)
-    error("Failed to allocate pcell buffer.");
-
-  /* Pack the cells. */
-  for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells_top[k].sendto) {
-      cell_pack(&s->cells_top[k], &pcells[offset[k]]);
-      s->cells_top[k].pcell = &pcells[offset[k]];
-    }
-
-  /* Launch the first part of the exchange. */
-  for (int k = 0; k < num_proxies; k++) {
-    proxy_cells_exchange_first(&proxies[k]);
-    reqs_in[k] = proxies[k].req_cells_count_in;
-    reqs_out[k] = proxies[k].req_cells_count_out;
-  }
-
-  /* Wait for each count to come in and start the recv. */
-  for (int k = 0; k < num_proxies; k++) {
-    int pid = MPI_UNDEFINED;
-    MPI_Status status;
-    if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
-        pid == MPI_UNDEFINED)
-      error("MPI_Waitany failed.");
-    // message( "request from proxy %i has arrived." , pid );
-    proxy_cells_exchange_second(&proxies[pid]);
-  }
-
-  /* Wait for all the sends to have finished too. */
-  if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
-    error("MPI_Waitall on sends failed.");
-
-  /* Set the requests for the cells. */
-  for (int k = 0; k < num_proxies; k++) {
-    reqs_in[k] = proxies[k].req_cells_in;
-    reqs_out[k] = proxies[k].req_cells_out;
-  }
-
-  /* Wait for each pcell array to come in from the proxies. */
-  for (int k = 0; k < num_proxies; k++) {
-    int pid = MPI_UNDEFINED;
-    MPI_Status status;
-    if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
-        pid == MPI_UNDEFINED)
-      error("MPI_Waitany failed.");
-    // message( "cell data from proxy %i has arrived." , pid );
-    for (int count = 0, j = 0; j < proxies[pid].nr_cells_in; j++)
-      count += cell_unpack(&proxies[pid].pcells_in[count],
-                           proxies[pid].cells_in[j], s);
-  }
-
-  /* Wait for all the sends to have finished too. */
-  if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
-    error("MPI_Waitall on sends failed.");
-
-  /* Clean up. */
-  free(reqs);
-  free(pcells);
-
-#else
-  error("SWIFT was not compiled with MPI support.");
-#endif
-}
-
 /**
  * @brief Exchange cells with a remote node, first part.
  *
@@ -350,6 +253,105 @@ void proxy_cells_exchange_second(struct proxy *p) {
 #endif
 }
 
+/**
+ * @brief Exchange the cell structures with all proxies.
+ *
+ * @param proxies The list of #proxy that will send/recv cells.
+ * @param num_proxies The number of proxies.
+ * @param s The space into which the particles will be unpacked.
+ * @param with_gravity Are we running with gravity and hence need
+ *      to exchange multipoles?
+ */
+void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
+                          struct space *s, const int with_gravity) {
+
+#ifdef WITH_MPI
+
+  MPI_Request *reqs;
+  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * num_proxies)) ==
+      NULL)
+    error("Failed to allocate request buffers.");
+  MPI_Request *reqs_in = reqs;
+  MPI_Request *reqs_out = &reqs[num_proxies];
+
+  /* Run through the cells and get the size of the ones that will be sent off.
+   */
+  int count_out = 0;
+  int offset[s->nr_cells];
+  for (int k = 0; k < s->nr_cells; k++) {
+    offset[k] = count_out;
+    if (s->cells_top[k].sendto)
+      count_out +=
+          (s->cells_top[k].pcell_size = cell_getsize(&s->cells_top[k]));
+  }
+
+  /* Allocate the pcells. */
+  struct pcell *pcells = NULL;
+  if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT,
+                     sizeof(struct pcell) * count_out) != 0)
+    error("Failed to allocate pcell buffer.");
+
+  /* Pack the cells. */
+  for (int k = 0; k < s->nr_cells; k++)
+    if (s->cells_top[k].sendto) {
+      cell_pack(&s->cells_top[k], &pcells[offset[k]], with_gravity);
+      s->cells_top[k].pcell = &pcells[offset[k]];
+    }
+
+  /* Launch the first part of the exchange. */
+  for (int k = 0; k < num_proxies; k++) {
+    proxy_cells_exchange_first(&proxies[k]);
+    reqs_in[k] = proxies[k].req_cells_count_in;
+    reqs_out[k] = proxies[k].req_cells_count_out;
+  }
+
+  /* Wait for each count to come in and start the recv. */
+  for (int k = 0; k < num_proxies; k++) {
+    int pid = MPI_UNDEFINED;
+    MPI_Status status;
+    if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
+        pid == MPI_UNDEFINED)
+      error("MPI_Waitany failed.");
+    // message( "request from proxy %i has arrived." , pid );
+    proxy_cells_exchange_second(&proxies[pid]);
+  }
+
+  /* Wait for all the sends to have finished too. */
+  if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
+    error("MPI_Waitall on sends failed.");
+
+  /* Set the requests for the cells. */
+  for (int k = 0; k < num_proxies; k++) {
+    reqs_in[k] = proxies[k].req_cells_in;
+    reqs_out[k] = proxies[k].req_cells_out;
+  }
+
+  /* Wait for each pcell array to come in from the proxies. */
+  for (int k = 0; k < num_proxies; k++) {
+    int pid = MPI_UNDEFINED;
+    MPI_Status status;
+    if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
+        pid == MPI_UNDEFINED)
+      error("MPI_Waitany failed.");
+    // message( "cell data from proxy %i has arrived." , pid );
+    for (int count = 0, j = 0; j < proxies[pid].nr_cells_in; j++)
+      count += cell_unpack(&proxies[pid].pcells_in[count],
+                           proxies[pid].cells_in[j], s, with_gravity);
+  }
+
+  /* Wait for all the sends to have finished too. */
+  if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
+    error("MPI_Waitall on sends failed.");
+
+  /* Clean up. */
+  free(reqs);
+  free(pcells);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
 /**
  * @brief Add a cell to the given proxy's input list.
  *
diff --git a/src/proxy.h b/src/proxy.h
index 63f51846d0..fbc3f1a163 100644
--- a/src/proxy.h
+++ b/src/proxy.h
@@ -102,9 +102,7 @@ void proxy_parts_exchange_second(struct proxy *p);
 void proxy_addcell_in(struct proxy *p, struct cell *c, int type);
 void proxy_addcell_out(struct proxy *p, struct cell *c, int type);
 void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
-                          struct space *s);
-void proxy_cells_exchange_first(struct proxy *p);
-void proxy_cells_exchange_second(struct proxy *p);
+                          struct space *s, int with_gravity);
 void proxy_tags_exchange(struct proxy *proxies, int num_proxies,
                          struct space *s);
 
-- 
GitLab