From cf71c7aac326cfc20a4593f53994f3497e7a2537 Mon Sep 17 00:00:00 2001
From: Matthieu Schaller <matthieu.schaller@durham.ac.uk>
Date: Thu, 28 Sep 2017 17:39:01 +0100
Subject: [PATCH] All-reduce all the top-level multipoles after a rebuild() to
 make sure we have all information in hands to construct the tasks.

---
 src/cell.c               |   3 --
 src/engine.c             | 101 +++++++++++++++++++++++++++++++++------
 src/engine.h             |   4 +-
 src/multipole.h          |  10 ++--
 src/runner.c             |   4 +-
 src/runner_doiact_grav.h |   2 +-
 src/space.c              |  10 ++--
 7 files changed, 102 insertions(+), 32 deletions(-)

diff --git a/src/cell.c b/src/cell.c
index cbc35a15c1..07d7f280f9 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -1100,9 +1100,6 @@ void cell_check_multipole_drift_point(struct cell *c, void *data) {
 
   const integertime_t ti_drift = *(integertime_t *)data;
 
-  /* Only check local cells */
-  if (c->nodeID != engine_rank) return;
-
   if (c->ti_old_multipole != ti_drift)
     error(
         "Cell multipole in an incorrect time-zone! c->ti_old_multipole=%lld "
diff --git a/src/engine.c b/src/engine.c
index 7de758c86e..a4b835fb68 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1715,12 +1715,84 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
 #endif
 }
 
+/**
+ * @brief Exchanges the top-level multipoles between all the nodes
+ * such that every node has a multipole for each top-level cell.
+ *
+ * @param e The #engine.
+ */
+void engine_exchange_top_multipoles(struct engine *e) {
+  
+#ifdef WITH_MPI
+
+#ifdef SWIFT_DEBUG_CHECKS
+  for(int i=0; i<e->s->nr_cells; ++i){
+    const struct gravity_tensors *m = &e->s->multipoles_top[i]; 
+    if(e->s->cells_top[i].nodeID == engine_rank) {
+      if(m->m_pole.M_000 > 0.) {
+	if(m->CoM[0] < 0. || m->CoM[0] > e->s->dim[0])
+	  error("Invalid multipole position in X");
+	if(m->CoM[1] < 0. || m->CoM[1] > e->s->dim[1])
+	  error("Invalid multipole position in Y");
+	if(m->CoM[2] < 0. || m->CoM[2] > e->s->dim[2])
+	  error("Invalid multipole position in Z");
+      }
+    } else {
+      if(m->m_pole.M_000 != 0.) error("Non-zero mass for foreign m-pole");
+      if(m->CoM[0] != 0.) error("Non-zero position in X for foreign m-pole");
+      if(m->CoM[1] != 0.) error("Non-zero position in Y for foreign m-pole");
+      if(m->CoM[2] != 0.) error("Non-zero position in Z for foreign m-pole");
+      if(m->m_pole.num_gpart != 0) error("Non-zero gpart count in foreign m-pole");
+    }
+  }
+#endif
+
+  /* Each node (space) has constructed its own top-level multipoles.  
+   * We now need to make sure every other node knows about them. 
+   *
+   * WARNING: Adult stuff ahead: don't do this at home!
+   *
+   * Since all nodes have their top-level multi-poles computed 
+   * and all foreign ones set to 0 (all bytes), we can gather all the m-poles
+   * by doing a bit-wise OR reduction across all the nodes directly in 
+   * place inside the multi-poles_top array. 
+   * This only works if the foreign m-poles on every nodes are zeroed and no 
+   * multi-pole is present on more than one node (two things guaranteed by the
+   * domain decomposition).
+   */
+  MPI_Allreduce(MPI_IN_PLACE, e->s->multipoles_top,
+  		e->s->nr_cells*sizeof(struct gravity_tensors), MPI_BYTE,
+  		MPI_BOR, MPI_COMM_WORLD);
+  
+#ifdef SWIFT_DEBUG_CHECKS
+  long long counter = 0;
+
+  /* Let's check that what we received makes sense */
+  for(int i=0; i<e->s->nr_cells; ++i){
+    const struct gravity_tensors *m = &e->s->multipoles_top[i]; 
+    counter += m->m_pole.num_gpart;
+    if(m->m_pole.M_000 > 0.) {
+      if(m->CoM[0] < 0. || m->CoM[0] > e->s->dim[0])
+	error("Invalid multipole position in X");
+      if(m->CoM[1] < 0. || m->CoM[1] > e->s->dim[1])
+	error("Invalid multipole position in Y");
+      if(m->CoM[2] < 0. || m->CoM[2] > e->s->dim[2])
+	error("Invalid multipole position in Z");
+    }
+  }
+  if(counter != e->total_nr_gparts)
+    error("Total particles in multipoles inconsistent with engine");
+#endif
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
 /**
  * @brief Constructs the top-level tasks for the short-range gravity
  * and long-range gravity interactions.
  *
- * - One FTT task per MPI rank.
- * - Multiple gravity ghosts for dependencies.
  * - All top-cells get a self task.
  * - All pairs within range according to the multipole acceptance
  *   criterion get a pair task.
@@ -1785,15 +1857,12 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
           const int cjd = cell_getid(cdim, ii, jj, kk);
           struct cell *cj = &cells[cjd];
 
-          /* Avoid duplicates */
-          if (cid <= cjd) continue;
+          /* Avoid duplicates that are purely local */
+          if (cid <= cjd && cj->nodeID == nodeID) continue;
 
           /* Skip cells without gravity particles */
           if (cj->gcount == 0) continue;
 
-          /* Is that neighbour local ? */
-          if (cj->nodeID != nodeID) continue;  // MATTHIEU
-
           /* Recover the multipole information */
           const struct gravity_tensors *const multi_j = cj->multipole;
 
@@ -1826,11 +1895,10 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
 
 /**
  * @brief Constructs the top-level tasks for the short-range gravity
- * interactions.
+ * interactions (master function).
  *
- * - All top-cells get a self task.
- * - All pairs within range according to the multipole acceptance
- *   criterion get a pair task.
+ * - Create the FFT task and the array of gravity ghosts. 
+ * - Call the mapper function to create the other tasks.
  *
  * @param e The #engine.
  */
@@ -3073,9 +3141,12 @@ void engine_rebuild(struct engine *e, int clean_h_values) {
   /* Initial cleaning up session ? */
   if (clean_h_values) space_sanitize(e->s);
 
-/* If in parallel, exchange the cell structure. */
+  /* If in parallel, exchange the cell structure and top-level multipoles. */
 #ifdef WITH_MPI
   engine_exchange_cells(e);
+
+  if(e->policy & engine_policy_self_gravity)
+    engine_exchange_top_multipoles(e);
 #endif
 
   /* Re-build the tasks. */
@@ -3526,11 +3597,11 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs,
   if (e->policy & engine_policy_self_gravity) {
     for (int i = 0; i < e->s->nr_cells; ++i)
       num_gpart_mpole += e->s->cells_top[i].multipole->m_pole.num_gpart;
-    if (num_gpart_mpole != e->s->nr_gparts)
+    if (num_gpart_mpole != e->total_nr_gparts)
       error(
           "Multipoles don't contain the total number of gpart s->nr_gpart=%zd, "
           "m_poles=%zd",
-          e->s->nr_gparts, num_gpart_mpole);
+          e->total_nr_gparts, num_gpart_mpole);
   }
 #endif
 
@@ -4294,7 +4365,7 @@ void engine_unpin() {
  */
 void engine_init(struct engine *e, struct space *s,
                  const struct swift_params *params, int nr_nodes, int nodeID,
-                 int nr_threads, int Ngas, int Ndm, int with_aff, int policy,
+                 int nr_threads, long long Ngas, long long Ndm, int with_aff, int policy,
                  int verbose, struct repartition *reparttype,
                  const struct unit_system *internal_units,
                  const struct phys_const *physical_constants,
diff --git a/src/engine.h b/src/engine.h
index 2b5a4eee7c..df1d047122 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -147,7 +147,7 @@ struct engine {
   size_t updates, g_updates, s_updates;
 
   /* Total numbers of particles in the system. */
-  size_t total_nr_parts, total_nr_gparts;
+  long long total_nr_parts, total_nr_gparts;
 
   /* The internal system of units */
   const struct unit_system *internal_units;
@@ -265,7 +265,7 @@ void engine_print_stats(struct engine *e);
 void engine_dump_snapshot(struct engine *e);
 void engine_init(struct engine *e, struct space *s,
                  const struct swift_params *params, int nr_nodes, int nodeID,
-                 int nr_threads, int Ngas, int Ndm, int with_aff, int policy,
+                 int nr_threads, long long Ngas, long long Ndm, int with_aff, int policy,
                  int verbose, struct repartition *reparttype,
                  const struct unit_system *internal_units,
                  const struct phys_const *physical_constants,
diff --git a/src/multipole.h b/src/multipole.h
index d842081814..9e157370fc 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -181,19 +181,19 @@ struct gravity_tensors {
 
       /*! Multipole mass */
       struct multipole m_pole;
-
+      
       /*! Field tensor for the potential */
       struct grav_tensor pot;
-
+      
       /*! Centre of mass of the matter dsitribution */
       double CoM[3];
-
+      
       /*! Centre of mass of the matter dsitribution at the last rebuild */
       double CoM_rebuild[3];
-
+      
       /*! Upper limit of the CoM<->gpart distance */
       double r_max;
-
+    
       /*! Upper limit of the CoM<->gpart distance at the last rebuild */
       double r_max_rebuild;
     };
diff --git a/src/runner.c b/src/runner.c
index 87e5170735..7d00045218 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -1476,14 +1476,14 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) {
 
           /* Check that this gpart has interacted with all the other
            * particles (via direct or multipoles) in the box */
-          if (gp->num_interacted != (long long)e->s->nr_gparts)
+          if (gp->num_interacted != (long long)e->total_nr_gparts)
             error(
                 "g-particle (id=%lld, type=%s) did not interact "
                 "gravitationally "
                 "with all other gparts gp->num_interacted=%lld, "
                 "total_gparts=%zd",
                 gp->id_or_neg_offset, part_type_names[gp->type],
-                gp->num_interacted, e->s->nr_gparts);
+                gp->num_interacted, e->total_nr_gparts);
         }
 #endif
       }
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 5d4de609eb..8ff90412cc 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -148,7 +148,7 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci,
 #ifdef SWIFT_DEBUG_CHECKS
   if (ci == cj) error("Interacting a cell with itself using M2L");
 
-  if (multi_j->M_000 == 0.f) error("Multipole does not seem to have been set.");
+  if (multi_j->num_gpart == 0) error("Multipole does not seem to have been set.");
 
   if (ci->multipole->pot.ti_init != e->ti_current)
     error("ci->grav tensor not initialised.");
diff --git a/src/space.c b/src/space.c
index 739955ebaf..022d1ca5d9 100644
--- a/src/space.c
+++ b/src/space.c
@@ -2234,10 +2234,12 @@ void space_split_recursive(struct space *s, struct cell *c,
         c->multipole->r_max = sqrt(dx * dx + dy * dy + dz * dz);
       } else {
         gravity_multipole_init(&c->multipole->m_pole);
-        c->multipole->CoM[0] = c->loc[0] + c->width[0] / 2.;
-        c->multipole->CoM[1] = c->loc[1] + c->width[1] / 2.;
-        c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.;
-        c->multipole->r_max = 0.;
+	if(c->nodeID == engine_rank) {
+	  c->multipole->CoM[0] = c->loc[0] + c->width[0] / 2.;
+	  c->multipole->CoM[1] = c->loc[1] + c->width[1] / 2.;
+	  c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.;
+	  c->multipole->r_max = 0.;
+	}
       }
       c->multipole->r_max_rebuild = c->multipole->r_max;
       c->multipole->CoM_rebuild[0] = c->multipole->CoM[0];
-- 
GitLab