Merge branch 'gpart_send_recv' into 'new_timeline_mpi'

Add another communication sub-type for the gparts. Send the gparts when these tasks get enqueued. This follows suit with the addition of task sub-types for the communications. I have added one to ship the gparts between nodes. This also allows to send/recv the gparts instead of the parts at enqueuing time. Note that the gpart communications are currently unused since I never pushed the support for gravity over MPI to the master. It also allows to only loop over the relevant bits we received and not over the whole particle zoo to construct the time information on the receiving side. This merges into the `new_timeline_mpi` branch and not in `master` as it is yet another MPI bug/feature fix. See merge request !305

Merge branch 'gpart_send_recv' into 'new_timeline_mpi'
fbd6aac8 · Matthieu Schaller · 7a3a5474 · c1e5b26a · fbd6aac8 · fbd6aac8
Commit fbd6aac8 authored 8 years ago by Matthieu Schaller
--- a/src/runner.c
+++ b/src/runner.c
@@ -1208,20 +1208,18 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) {
 }

 /**
- * @brief Construct the cell properties from the received particles
+ * @brief Construct the cell properties from the received #part.
 *
 * @param r The runner thread.
 * @param c The cell.
 * @param timer Are we timing this ?
 */
-void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) {
+void runner_do_recv_part(struct runner *r, struct cell *c, int timer) {

 #ifdef WITH_MPI

  const struct part *restrict parts = c->parts;
-  const struct gpart *restrict gparts = c->gparts;
  const size_t nr_parts = c->count;
-  const size_t nr_gparts = c->gcount;
  const integertime_t ti_current = r->e->ti_current;

  TIMER_TIC;
@@ -1246,34 +1244,98 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) {
        error("Received un-drifted particle !");
 #endif
    }
+  }
+
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        runner_do_recv_part(r, c->progeny[k], 0);
+        ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
+        ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
+        h_max = max(h_max, c->progeny[k]->h_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  c->ti_end_min = ti_end_min;
+  c->ti_end_max = ti_end_max;
+  c->ti_old = ti_current;
+  c->h_max = h_max;
+
+  if (timer) TIMER_TOC(timer_dorecv_part);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Construct the cell properties from the received #gpart.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) {
+
+#ifdef WITH_MPI
+
+  const struct gpart *restrict gparts = c->gparts;
+  const size_t nr_gparts = c->gcount;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_end_min = max_nr_timesteps;
+  integertime_t ti_end_max = 0;
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
    for (size_t k = 0; k < nr_gparts; k++) {
      const integertime_t ti_end =
          get_integer_time_end(ti_current, gparts[k].time_bin);
      ti_end_min = min(ti_end_min, ti_end);
      ti_end_max = max(ti_end_max, ti_end);
    }
-
  }

  /* Otherwise, recurse and collect. */
  else {
    for (int k = 0; k < 8; k++) {
      if (c->progeny[k] != NULL) {
-        runner_do_recv_cell(r, c->progeny[k], 0);
+        runner_do_recv_gpart(r, c->progeny[k], 0);
        ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
        ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
-        h_max = max(h_max, c->progeny[k]->h_max);
      }
    }
  }

+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_end_min, ti_current);
+#endif
+
  /* ... and store. */
  c->ti_end_min = ti_end_min;
  c->ti_end_max = ti_end_max;
  c->ti_old = ti_current;
-  c->h_max = h_max;

-  if (timer) TIMER_TOC(timer_dorecv_cell);
+  if (timer) TIMER_TOC(timer_dorecv_gpart);

 #else
  error("SWIFT was not compiled with MPI support.");
@@ -1473,8 +1535,11 @@ void *runner_main(void *data) {
          if (t->subtype == task_subtype_tend) {
            cell_unpack_ti_ends(ci, t->buff);
            free(t->buff);
-          } else {
-            runner_do_recv_cell(r, ci, 1);
+          } else if (t->subtype == task_subtype_xv ||
+                     t->subtype == task_subtype_rho) {
+            runner_do_recv_part(r, ci, 1);
+          } else if (t->subtype == task_subtype_gpart) {
+            runner_do_recv_gpart(r, ci, 1);
          }
          break;
 #endif

--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -1177,7 +1177,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
  /* Otherwise, look for a suitable queue. */
  else {
 #ifdef WITH_MPI
-    int err;
+    int err = MPI_SUCCESS;
 #endif

    /* Find the previous owner for each task type, and do
@@ -1208,16 +1208,22 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
          err = MPI_Irecv(t->buff, t->ci->pcell_size * sizeof(integertime_t),
                          MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD,
                          &t->req);
-        } else {
+        } else if (t->subtype == task_subtype_xv ||
+                   t->subtype == task_subtype_rho) {
          err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type,
                          t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+          // message( "receiving %i parts with tag=%i from %i to %i." ,
+          //     t->ci->count , t->flags , t->ci->nodeID , s->nodeID );
+          // fflush(stdout);
+        } else if (t->subtype == task_subtype_gpart) {
+          err = MPI_Irecv(t->ci->gparts, t->ci->gcount, gpart_mpi_type,
+                          t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+        } else {
+          error("Unknown communication sub-type");
        }
        if (err != MPI_SUCCESS) {
          mpi_error(err, "Failed to emit irecv for particle data.");
        }
-        // message( "receiving %i parts with tag=%i from %i to %i." ,
-        //     t->ci->count , t->flags , t->ci->nodeID , s->nodeID );
-        // fflush(stdout);
        qid = 1 % s->nr_queues;
 #else
        error("SWIFT was not compiled with MPI support.");
@@ -1231,7 +1237,8 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
          err = MPI_Isend(t->buff, t->ci->pcell_size * sizeof(integertime_t),
                          MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD,
                          &t->req);
-        } else {
+        } else if (t->subtype == task_subtype_xv ||
+                   t->subtype == task_subtype_rho) {
 #ifdef SWIFT_DEBUG_CHECKS
          for (int k = 0; k < t->ci->count; k++)
            if (t->ci->parts[k].ti_drift != s->space->e->ti_current)
@@ -1239,13 +1246,19 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
 #endif
          err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type,
                          t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+
+          // message( "sending %i parts with tag=%i from %i to %i." ,
+          //     t->ci->count , t->flags , s->nodeID , t->cj->nodeID );
+          // fflush(stdout);
+        } else if (t->subtype == task_subtype_gpart) {
+          err = MPI_Isend(t->ci->gparts, t->ci->gcount, gpart_mpi_type,
+                          t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+        } else {
+          error("Unknown communication sub-type");
        }
        if (err != MPI_SUCCESS) {
          mpi_error(err, "Failed to emit isend for particle data.");
        }
-        // message( "sending %i parts with tag=%i from %i to %i." ,
-        //     t->ci->count , t->flags , s->nodeID , t->cj->nodeID );
-        // fflush(stdout);
        qid = 0;
 #else
        error("SWIFT was not compiled with MPI support.");

--- a/src/task.c
+++ b/src/task.c
@@ -56,7 +56,7 @@ const char *taskID_names[task_type_count] = {

 const char *subtaskID_names[task_subtype_count] = {
    "none",          "density", "gradient", "force", "grav",
-    "external_grav", "tend",    "xv",       "rho"};
+    "external_grav", "tend",    "xv",       "rho",   "gpart"};

 /**
 * @brief Computes the overlap between the parts array of two given cells.

--- a/src/task.h
+++ b/src/task.h
@@ -73,6 +73,7 @@ enum task_subtypes {
  task_subtype_tend,
  task_subtype_xv,
  task_subtype_rho,
+  task_subtype_gpart,
  task_subtype_count
 } __attribute__((packed));


--- a/src/timers.h
+++ b/src/timers.h
@@ -60,7 +60,8 @@ enum {
  timer_dopair_subset,
  timer_do_ghost,
  timer_do_extra_ghost,
-  timer_dorecv_cell,
+  timer_dorecv_part,
+  timer_dorecv_gpart,
  timer_gettask,
  timer_qget,
  timer_qsteal,