Added a trivial matrix addition example to test the ability to cope with data...

Added a trivial matrix addition example to test the ability to cope with data sources from multiple nodes and to test if it scales ok for completely trivial problems.

Added a trivial matrix addition example to test the ability to cope with data...
c8305199 · Aidan Chalk · fb531a76 · c8305199 · c8305199
Commit c8305199 authored 7 years ago by Aidan Chalk
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -29,7 +29,7 @@ MPI_THREAD_LIBS = @MPI_THREAD_LIBS@
 MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
 # Set-up the library
-bin_PROGRAMS = test test_bh test_bh_sorted test_fmm_sorted test_bh_mpi test_qr_mpi test_qr
+bin_PROGRAMS = test test_bh test_bh_sorted test_fmm_sorted test_bh_mpi test_qr_mpi test_qr test_matadd_mpi
 if HAVECBLAS
 bin_PROGRAMS += test_qr_mpi_cblas
 endif
@@ -74,3 +74,8 @@ test_qr_mpi_cblas_SOURCES = test_qr_mpi.c
 test_qr_mpi_cblas_CFLAGS = $(AM_CFLAGS) -DWITH_MPI -DWITH_CBLAS_LIB
 test_qr_mpi_cblas_LDADD =  ../src/.libs/libquickschedMPI.a -llapacke -llapacke -lblas -lcblas $(METIS_LIBS)
 test_qr_mpi_cblas_LDFLAGS = $(MPI_THREAD_LIBS)
+test_matadd_mpi_SOURCES = test_matadd_mpi.c
+test_matadd_mpi_CFLAGS = $(AM_CFLAGS) -DWITH_MPI
+test_madadd_mpi_LDADD = ../src/.libs/libquickschedMPI.a $(METIS_LIBS)
+test_matadd_mpi_LDFLAGS = $(MPI_THREAD_LIBS)
--- a/examples/test_matadd_mpi.c
+++ b/examples/test_matadd_mpi.c
+/*******************************************************************************
+ * This file is part of QuickSched.
+ * Coypright (c) 2018 Aidan Chalk (aidan.chalk@stfc.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+* *****************************************************************************/
+/* Config parameters. */
+#include "../config.h"
+/* Standard includes. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+#include <omp.h>
+#include <fenv.h>
+#include <mpi.h>
+#define NO_TASK_TIMERS
+#define NO_LOAD_BALANCE_EXACT
+/* Local includes. */
+#include "quicksched.h"
+#include "res.h"
+#define TILESIZE 64
+/** 
+ * @brief Computes the matrix addition of two tiles, C=A+B.
+ *
+ * @param A Pointer to the tile of A
+ * @param B Pointer to the tile of B
+ * @param C Pointer to the tile of the output matrix, C
+ *
+*/
+void addMatrixTile(double *A, double *B, double *C){
+  for(int i = 0; i < TILESIZE*TILESIZE; i++){
+      C[i] = A[i] + B[i]; // This is wrong
+  }
+}
+//Matrix is matsize*matsize tiles of TILESIZE*TILESIZE
+void test_matadd(int matsize,int nr_threads`){
+  struct qsched s;
+  const int add_task = 206;
+  // Initialize the MPI environment
+  int MpiThreadLevel;
+  double *local_A, *local_B, *local_C;
+  MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &MpiThreadLevel);
+  if(MpiThreadLevel != MPI_THREAD_MULTIPLE)
+    error("We didn't get thread multiple!");
+  qsched_init(&s, nr_threads, qsched_flag_yield | qsched_flag_pthread, MPI_COMM_WORLD);
+  srand(s.rank);
+  int startRow;
+  int numRows;
+  numRows = matsize / s.count_ranks;
+  if(numRows*s.count_ranks != matsize){
+    numRows = numRows + 1;
+    startRow = s.rank * numRows;
+    if(startRow + numRows > matsize){
+      numRows = matsize-startRow;
+    }
+  }
+  local_A = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE);
+  local_B = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE);
+  local_C = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE);
+  if(local_A == NULL || local_B == NULL || local_C == NULL) error("Failed to allocate arrays");
+  for(int i = 0; i < numRows*matsize*TILESIZE*TILESIZE){
+    local_A = 2.0 * ((double)rand()) / RAND_MAX - 1.0;
+    local_B = 2.0 * ((double)rand()) / RAND_MAX - 1.0;
+  }
+  bzero(local_C,sizeof(double)*numRows*matsize*TILESIZE*TILESIZE);
+  //Initialise resources.
+  qsched_res_t *local_A_rid = NULL, *local_B_rid = NULL, *local_C_rid = NULL;
+  double **local_A_tiles, **local_B_tiles, **local_C_tiles;
+  local_A_rid = malloc(sizeof(qsched_res_t) * numRows*matsize);
+  local_B_rid = malloc(sizeof(qsched_res_t) * numRows*matsize);
+  local_C_rid = malloc(sizeof(qsched_res_t) * numRows*matsize);
+  local_A_tiles = malloc(sizeof(double*) * numRows*matsize);
+  local_B_tiles = malloc(sizeof(double*) * numRows*matsize);
+  local_C_tiles = malloc(sizeof(double*) * numRows*matsize);
+  for(int i = 0; i < numRows*matsize; i++){
+    local_A_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_A_tiles[i]);
+    memcpy(local_A_tiles[i], local_A[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
+    local_B_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_B_tiles[i]);
+    memcpy(local_B_tiles[i], local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
+    local_C_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_C_tiles[i]);
+    memcpy(local_B_tiles[i], local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
+  }
+  message("Synchronizing resources.");
+  qsched_sync_resources(&s);
+  //Generate tasks
+  qsched_task_t /* *local_tid = NULL,*/ tid_new = -1;
+  long long int MPI_data[3];
+//  local_tid = malloc(sizeof(qsched_task_t)*numRows*matsize);
+  for(int i = 0; i < numRows*matsize; i++){
+    MPI_Data[0] = local_A_rid[i];
+    MPI_Data[1] = local_B_rid[i];
+    MPI_Data[2] = local_C_rid[i];
+    tid_new = qsched_addtask(&s, add_task, task_flag_none, MPI_data,
+                             sizeof(long long int) * 3, 200);
+    qsched_addlock(&s, tid_new, local_C_rid[i]);
+    qsched_adduse(&s, tid_new, local_A_rid[i]);
+    qsched_adduse(&s, tid_new, local_B_rid[i]);
+  }
+  /* Runner function to pass to the scheduler. */
+  void runner(struct qsched *s, int type, void * data) {
+    /* Decode the task data. */
+    long long int* idata = (long long int*)data;
+//    int i = idata[0], j = idata[1], k = idata[2];
+    long long int a, b, c;
+    /* Need to pull the resources.*/
+    /* Decode and execute the task. */
+    a = idata[0];
+    b = idata[1];
+    c = idata[2];
+    double *A = (double*)qsched_getresdata(s, a);
+    double *B = (double*)qsched_getresdata(s, b);
+    double *C = (double*)qsched_getresdata(s, c);
+    addMatrixTile(A,B,C);
+  }
+  qsched_run_MPI(&s, nr_threads, runner);
+  // Print off a hello world message
+    printf("Hello world from processor rank = %i, count_ranks = %i\n",
+           s.rank, s.count_ranks);
+}
+int main(int argc, char* argv[]) {
+  int c, nr_threads=1;
+  int M = 4, runs = 1;
+/* Get the number of threads. */
+//#pragma omp parallel shared(nr_threads)
+  //{
+   // if (omp_get_thread_num() == 0) nr_threads = omp_get_num_threads();
+  //}
+  /* Parse the options */
+  while ((c = getopt(argc, argv, "m:n:k:r:t:")) != -1) switch (c) {
+      case 'm':
+        if (sscanf(optarg, "%d", &M) != 1) error("Error parsing dimension.");
+        break;
+      case 'r':
+        if (sscanf(optarg, "%d", &runs) != 1)
+          error("Error parsing number of runs.");
+        break;
+      case 't':
+        if (sscanf(optarg, "%d", &nr_threads) != 1)
+          error("Error parsing number of threads.");
+        break;
+      case '?':
+        fprintf(stderr, "Usage: %s [-t nr_threads] [-m M]\n",
+                argv[0]);
+        fprintf(stderr, "Computes the addition of a pair of MxM tiled\n"
+                        "matrix using nr_threads threads.\n");
+        exit(EXIT_FAILURE);
+    }
+  /* Dump arguments. */
+  message("Computing the sum of two %ix%i tiled matrices using %i threads.\n",M,M,nr_threads);
+  test_matadd(M,nr_threads);
+}