Added the initial test_bh_mpi file. Fixed a bug in adding child resources...

Added the initial test_bh_mpi file. Fixed a bug in adding child resources where it used position based on integer spacing instead of character spacing (i.e. skipped 4*offset bytes as opposed to offset bytes)

Added the initial test_bh_mpi file. Fixed a bug in adding child resources...
d1648725 · d74ksy · 4e5eacc1 · d1648725 · d1648725 · d1648725
Commit d1648725 authored 9 years ago by d74ksy
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -23,8 +23,13 @@ AM_CFLAGS = -I../src -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-v

 AM_LDFLAGS =

+
+METIS_LIBS = @METIS_LIBS@
+MPI_THREAD_LIBS = @MPI_THREAD_LIBS@
+MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
+
 # Set-up the library
-bin_PROGRAMS = test test_bh test_bh_sorted test_fmm_sorted
+bin_PROGRAMS = test test_bh test_bh_sorted test_fmm_sorted test_bh_mpi
 if HAVECBLAS
 bin_PROGRAMS += test_qr 
 endif
@@ -53,3 +58,8 @@ test_bh_sorted_LDADD =  ../src/.libs/libquicksched.a
 test_fmm_sorted_SOURCES = test_fmm_sorted.c
 test_fmm_sorted_CFLAGS = $(AM_CFLAGS)
 test_fmm_sorted_LDADD =  ../src/.libs/libquicksched.a
+
+test_bh_mpi_SOURCES = test_bh_mpi.c
+test_bh_mpi_CFLAGS = $(AM_CFLAGS) -DWITH_MPI
+test_bh_mpi_LDADD =  ../src/.libs/libquickschedMPI.a $(METIS_LIBS)
+test_bh_mpi_LDFLAGS = $(MPI_THREAD_LIBS)
--- a/examples/test_bh_mpi.c
+++ b/examples/test_bh_mpi.c
+/*******************************************************************************
+ * This file is part of QuickSched.
+ * Coypright (c) 2014 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+* *****************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Standard includes. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+#include <omp.h>
+#include <fenv.h>
+#include <mpi.h>
+
+/* Local includes. */
+#include "quicksched.h"
+
+/* Some local constants. */
+#define cell_pool_grow 1000
+#define cell_maxparts 100
+#define task_limit 1e8
+#define const_G 1    // 6.6738e-8
+#define dist_min 0.5 /* Used for legacy walk only */
+#define dist_cutoff_ratio 1.5
+
+#define ICHECK -1
+#define NO_SANITY_CHECKS
+#define NO_COM_AS_TASK
+#define NO_COUNTERS
+
+
+
+/** Data structure for the particles. */
+struct part {
+  double x[3];
+  union {
+    float a[3];
+    float a_legacy[3];
+    float a_exact[3];
+  };
+  float mass;
+  int id;
+};  // __attribute__((aligned(32)));
+
+struct part_local {
+  float x[3];
+  float a[3];
+  float mass;
+} __attribute__((aligned(32)));
+
+struct multipole {
+  double com[3];
+  float mass;
+};
+
+/** Data structure for the BH tree cell. */
+struct cell {
+  double loc[3];
+  double h;
+
+  int count;
+  unsigned short int split, sorted;
+  struct part *parts;
+
+  qsched_res_t firstchild; /* Next node if opening */
+  qsched_res_t sibling;    /* Next node */
+
+  /* We keep both CoMs and masses to make sure the comp_com calculation is
+   * correct (use an anonymous union to keep variable names compact).  */
+  union {
+
+    /* Information for the legacy walk */
+    struct multipole legacy;
+
+    /* Information for the QuickShed walk */
+    struct multipole new;
+  };
+
+  long long int res,res_parts, com_tid;
+  struct index *indices;
+
+} __attribute__((aligned(128)));
+
+/** Global variable for the pool of allocated cells. */
+struct cell *cell_pool = NULL;
+
+
+struct cell *cell_get(struct qsched *s) {
+
+  struct cell *res;
+//  int k;
+
+  long long int resource = qsched_addres(s, qsched_owner_none, sizeof(struct cell), (void**) &res );
+  res->res = resource;
+  res->firstchild = -1;
+
+#ifdef THINK_CAN_BE_DELETED
+  /* Allocate a new batch? */
+  if (cell_pool == NULL) {
+
+    /* Allocate the cell array. */
+    if ((cell_pool =
+             (struct cell *)calloc(cell_pool_grow, sizeof(struct cell))) ==
+        NULL)
+      error("Failed to allocate fresh batch of cells.");
+
+    /* Link them up via their progeny pointers. */
+    for (k = 1; k < cell_pool_grow; k++)
+      cell_pool[k - 1].firstchild = cell_pool[k].res;
+  }
+
+  /* Pick a cell off the pool. */
+  res = cell_pool;
+  cell_pool = cell_pool->firstchild;
+
+  /* Clean up a few things. */
+  res->res = qsched_res_none;
+  res->firstchild = 0;
+#endif
+  /* Return the cell. */
+  return res;
+}
+
+
+
+/**
+ * @brief Sort the parts into eight bins along the given pivots and
+ *        fill the multipoles. Also adds the hierarchical resources
+ *        to the sched.
+ *
+ * @param c The #cell to be split.
+ * @param N The total number of parts.
+ * @param s The #sched to store the resources.
+ */
+
+void cell_split(struct cell *c, struct qsched *s) {
+
+  int i, j, k, kk, count = c->count;
+  struct part temp, *parts = qsched_getresdata( s, c->res_parts );
+  struct cell *cp;
+  int left[8], right[8];
+  double pivot[3];
+  static struct cell *root = NULL;
+  struct cell *progenitors[8];
+
+  /* Set the root cell. */
+  if (root == NULL) {
+    root = c;
+    c->sibling = 0;
+  }
+
+  /* Add a resource for this cell if it doesn't have one yet. */
+  //if (c->res == qsched_res_none)
+    //c->res = qsched_addres_local(s, qsched_owner_none, qsched_res_none);
+
+  /* Does this cell need to be split? */
+  if (count > cell_maxparts) {
+
+    /* Mark this cell as split. */
+    c->split = 1;
+
+    /* Create the progeny. */
+    for (k = 0; k < 8; k++) {
+      progenitors[k] = cp = cell_get(s);
+      cp->loc[0] = c->loc[0];
+      cp->loc[1] = c->loc[1];
+      cp->loc[2] = c->loc[2];
+      cp->h = c->h * 0.5;
+      //cp->res = qsched_addres_local(s, qsched_owner_none, c->res);
+      if (k & 4) cp->loc[0] += cp->h;
+      if (k & 2) cp->loc[1] += cp->h;
+      if (k & 1) cp->loc[2] += cp->h;
+    }
+
+    /* Init the pivots. */
+    for (k = 0; k < 3; k++) pivot[k] = c->loc[k] + c->h * 0.5;
+
+    /* Split along the x-axis. */
+    i = 0;
+    j = count - 1;
+    while (i <= j) {
+      while (i <= count - 1 && parts[i].x[0] < pivot[0]) i += 1;
+      while (j >= 0 && parts[j].x[0] >= pivot[0]) j -= 1;
+      if (i < j) {
+        temp = parts[i];
+        parts[i] = parts[j];
+        parts[j] = temp;
+      }
+    }
+    left[1] = i;
+    right[1] = count - 1;
+    left[0] = 0;
+    right[0] = j;
+
+    /* Split along the y axis, twice. */
+    for (k = 1; k >= 0; k--) {
+      i = left[k];
+      j = right[k];
+      while (i <= j) {
+        while (i <= right[k] && parts[i].x[1] < pivot[1]) i += 1;
+        while (j >= left[k] && parts[j].x[1] >= pivot[1]) j -= 1;
+        if (i < j) {
+          temp = parts[i];
+          parts[i] = parts[j];
+          parts[j] = temp;
+        }
+      }
+      left[2 * k + 1] = i;
+      right[2 * k + 1] = right[k];
+      left[2 * k] = left[k];
+      right[2 * k] = j;
+    }
+
+    /* Split along the z axis, four times. */
+    for (k = 3; k >= 0; k--) {
+      i = left[k];
+      j = right[k];
+      while (i <= j) {
+        while (i <= right[k] && parts[i].x[2] < pivot[2]) i += 1;
+        while (j >= left[k] && parts[j].x[2] >= pivot[2]) j -= 1;
+        if (i < j) {
+          temp = parts[i];
+          parts[i] = parts[j];
+          parts[j] = temp;
+        }
+      }
+      left[2 * k + 1] = i;
+      right[2 * k + 1] = right[k];
+      left[2 * k] = left[k];
+      right[2 * k] = j;
+    }
+
+    /* Store the counts and offsets. */
+    for (k = 0; k < 8; k++) {
+      progenitors[k]->count = right[k] - left[k] + 1;
+//      progenitors[k]->parts = &c->parts[left[k]];
+      progenitors[k]->res_parts = qsched_addchildres(s, c->res_parts, qsched_owner_none, (right[k] - left[k] + 1)*sizeof(struct part), left[k]*sizeof(struct part), (void**) &progenitors[k]->parts );
+    }
+
+    /* Find the first non-empty progenitor */
+    for (k = 0; k < 8; k++)
+      if (progenitors[k]->count > 0) {
+        c->firstchild = progenitors[k]->res;
+        break;
+      }
+
+    /* Prepare the pointers. */
+    for (k = 0; k < 8; k++) {
+
+      /* Find the next non-empty sibling */
+      for (kk = k + 1; kk < 8; ++kk) {
+        if (progenitors[kk]->count > 0) {
+          progenitors[k]->sibling = progenitors[kk]->res;
+          break;
+        }
+      }
+
+      /* No non-empty sibling ? Go back a level */
+      if (kk == 8) progenitors[k]->sibling = c->sibling;
+    }
+
+    /* Recurse. */
+    for (k = 0; k < 8; k++)
+      if (progenitors[k]->count > 0) cell_split(progenitors[k], s);
+
+
+    /* Otherwise, we're at a leaf, so create the cell's particle-cell task. */
+  } else {
+    //struct cell *data[2] = {root, c};
+    /*int tid = qsched_addtask_local(s, task_type_self_pc, task_flag_none, data,
+                             2 * sizeof(struct cell *), 1);
+    qsched_addlock(s, tid, c->res);*/
+  } /* does the cell need to be split? */
+
+  /* Set this cell's resources ownership. */
+  /*qsched_res_own(s, c->res,
+                 s->nr_queues * (c->parts - root->parts) / root->count);*/
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/**
+ * @brief Set up and run a task-based Barnes-Hutt N-body solver.
+ *
+ * @param N The number of random particles to use.
+ * @param nr_threads Number of threads to use.
+ * @param runs Number of force evaluations to use as a benchmark.
+ * @param fileName Input file name. If @c NULL or an empty string, random
+ *        particle positions will be used.
+ */
+void test_bh(int N, int nr_threads, int runs, char *fileName) {
+
+  int k;
+  struct cell *root;
+  struct part *parts;
+  qsched_res_t parts_res;
+  FILE *file;
+  struct qsched s;
+ // ticks tic, toc_run, tot_setup = 0, tot_run = 0;
+//  int countMultipoles = 0, countPairs = 0, countCoMs = 0;
+char processor_name[MPI_MAX_PROCESSOR_NAME];
+    int name_len;
+
+
+
+  /* Runner function. */
+  void runner(int type, void * data) {
+
+//    ticks tic = getticks();
+
+    /* Decode the data. */
+//    struct cell **d = (struct cell **)data;
+
+    /* Decode and execute the task. */
+   /* switch (type) {
+      case task_type_self:
+        iact_self_direct(d[0]);
+        break;
+      case task_type_pair:
+        iact_pair(d[0], d[1]);
+        break;
+      case task_type_self_pc:
+        iact_self_pc(d[0], d[1], NULL);
+        break;
+      case task_type_com:
+        comp_com(d[0]);
+        break;
+      default:
+        error("Unknown task type.");
+    }
+
+    atomic_add(&task_timers[type], getticks() - tic);*/
+  }
+
+    // Initialize the MPI environment
+int MpiThreadLevel;
+ MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &MpiThreadLevel);
+
+if(MpiThreadLevel != MPI_THREAD_MULTIPLE)
+    error("We didn't get thread multiple!");
+
+    MPI_Get_processor_name(processor_name, &name_len);
+
+  /* Initialize the per-task type timers. */
+//  for (k = 0; k < task_type_count; k++) task_timers[k] = 0;
+
+  /* Initialize the scheduler. */
+    qsched_init(&s, nr_threads, qsched_flag_noreown, MPI_COMM_WORLD);
+
+  /* Init and fill the particle array. */
+//  if ((parts = (struct part *)malloc(sizeof(struct part) * N)) == NULL)
+  //  error("Failed to allocate particle buffer.");
+
+  parts_res = qsched_addres(&s, qsched_owner_none, sizeof(struct part) * N, (void**) &parts);
+
+  /* If no input file was specified, generate random particle positions. */
+  if (fileName == NULL || fileName[0] == 0) {
+    for (k = 0; k < N; k++) {
+      parts[k].id = k;
+      parts[k].x[0] = ((double)rand()) / RAND_MAX;
+      parts[k].x[1] = ((double)rand()) / RAND_MAX;
+      parts[k].x[2] = ((double)rand()) / RAND_MAX;
+      parts[k].mass = ((double)rand()) / RAND_MAX;
+      parts[k].a_legacy[0] = 0.0;
+      parts[k].a_legacy[1] = 0.0;
+      parts[k].a_legacy[2] = 0.0;
+    }
+
+    /* Otherwise, read them from a file. */
+  } else {
+    file = fopen(fileName, "r");
+    if (file) {
+      for (k = 0; k < N; k++) {
+        if (fscanf(file, "%d", &parts[k].id) != 1)
+          error("Failed to read ID of part %i.", k);
+        if (fscanf(file, "%lf%lf%lf", &parts[k].x[0], &parts[k].x[1],
+                   &parts[k].x[2]) !=
+            3)
+          error("Failed to read position of part %i.", k);
+        if (fscanf(file, "%f", &parts[k].mass) != 1)
+          error("Failed to read mass of part %i.", k);
+      }
+      fclose(file);
+    }
+  }
+
+  /* Init the cells. */
+  root = cell_get(&s);
+  root->loc[0] = 0.0;
+  root->loc[1] = 0.0;
+  root->loc[2] = 0.0;
+  root->h = 1.0;
+  root->count = N;
+  root->parts = parts;
+  root->res_parts = parts_res;
+  cell_split(root, &s);
+
+  /* Iterate over the cells and get the average number of particles
+     per leaf. */
+  struct cell *c = root;
+  int nr_leaves = 0;
+  while (c != NULL && (c->sibling > 0 || c->firstchild > 0)) {
+    if (!c->split) {
+      nr_leaves++;
+      c = (struct cell*) qsched_getresdata( &s, c->sibling );
+    } else {
+      c = (struct cell*) qsched_getresdata( &s, c->firstchild );
+    }
+  }
+  message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves);
+
+
+
+  // Print off a hello world message
+    printf("Hello world from processor %s, rank = %i, count_ranks = %i\n",
+           processor_name, s.rank, s.count_ranks);
+
+    // Finalize the MPI environment.
+    MPI_Finalize();
+}
+
+
+/**
+ * @brief Main function.
+ */
+
+int main(int argc, char *argv[]) {
+
+  int c, nr_threads;
+  int N = 1000, runs = 1;
+  char fileName[100] = {0};
+
+  /* Die on FP-exceptions. */
+  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+
+/* Get the number of threads. */
+#pragma omp parallel shared(nr_threads)
+  {
+    if (omp_get_thread_num() == 0) nr_threads = omp_get_num_threads();
+  }
+
+  /* Parse the options */
+  while ((c = getopt(argc, argv, "n:r:t:f:c:i:")) != -1) switch (c) {
+      case 'n':
+        if (sscanf(optarg, "%d", &N) != 1)
+          error("Error parsing number of particles.");
+        break;
+      case 'r':
+        if (sscanf(optarg, "%d", &runs) != 1)
+          error("Error parsing number of runs.");
+        break;
+      case 't':
+        if (sscanf(optarg, "%d", &nr_threads) != 1)
+          error("Error parsing number of threads.");
+        omp_set_num_threads(nr_threads);
+        break;
+      case 'f':
+        if (sscanf(optarg, "%s", &fileName[0]) != 1)
+          error("Error parsing file name.");
+        break;
+      case '?':
+        fprintf(stderr, "Usage: %s [-t nr_threads] [-n N] [-r runs] [-f file] "
+                        "[-c Nparts] [-i Niterations] \n",
+                argv[0]);
+        fprintf(stderr, "Solves the N-body problem using a Barnes-Hut\n"
+                        "tree code with N random particles read from a file in "
+                        "[0,1]^3 using"
+                        "nr_threads threads.\n"
+                        "A test of the neighbouring cells interaction with "
+                        "Nparts per cell is also run Niterations times.\n");
+        exit(EXIT_FAILURE);
+    }
+
+  /* Tree node information */
+  printf("Size of cell: %zu bytes.\n", sizeof(struct cell));
+
+  /* Part information */
+  printf("Size of part: %zu bytes.\n", sizeof(struct part));
+
+  /* Dump arguments. */
+  if (fileName[0] == 0) {
+    message("Computing the N-body problem over %i random particles using %i "
+            "threads (%i runs).",
+            N, nr_threads, runs);
+  } else {
+    message("Computing the N-body problem over %i particles read from '%s' "
+            "using %i threads (%i runs).",
+            N, fileName, nr_threads, runs);
+  }
+
+  /* Run the BH test. */
+  test_bh(N, nr_threads, runs, fileName);
+
+  return 0;
+}
+
+
--- a/src/qsched.c
+++ b/src/qsched.c
@@ -3896,7 +3896,7 @@ int index = getindex(id,s);
    s->res[ index ].parent = parent;
    s->res[ index ].offset = position;
    s->res[ index ].node = s->rank;
-    int *temp = (int*) s->res[getindex(parent,s)].data;
+    char *temp = (char*) s->res[getindex(parent,s)].data;
    s->res[ index ].data = &temp[position];
    s->res[ index ].size = size;
    s->res[ index ].users = NULL;