From a5adf9bc94639260eca473ac30e981faf58343d3 Mon Sep 17 00:00:00 2001
From: Aidan <aidan.chalk@gmail.com>
Date: Wed, 18 Apr 2018 16:30:39 +0100
Subject: [PATCH] Fixed test_matadd and the errors it found to do with
 send/recv tasks becoming visible to user code (rather than being solely
 contained in the library. It finds severe issues with load balancing (I
 believe) slash movement of data when the initial layout would be load
 balanced. Need a python script to generate task plots from this.

---
 examples/Makefile.am       |  2 +-
 examples/test_matadd_mpi.c | 47 ++++++++++++++++++++++++++++++--------
 src/qsched.c               |  6 +++--
 3 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/examples/Makefile.am b/examples/Makefile.am
index be88cc2..5e968e4 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -77,5 +77,5 @@ test_qr_mpi_cblas_LDFLAGS = $(MPI_THREAD_LIBS)
 
 test_matadd_mpi_SOURCES = test_matadd_mpi.c
 test_matadd_mpi_CFLAGS = $(AM_CFLAGS) -DWITH_MPI
-test_madadd_mpi_LDADD = ../src/.libs/libquickschedMPI.a $(METIS_LIBS)
+test_matadd_mpi_LDADD = ../src/.libs/libquickschedMPI.a $(METIS_LIBS)
 test_matadd_mpi_LDFLAGS = $(MPI_THREAD_LIBS)
diff --git a/examples/test_matadd_mpi.c b/examples/test_matadd_mpi.c
index f7f8e70..0a90c10 100644
--- a/examples/test_matadd_mpi.c
+++ b/examples/test_matadd_mpi.c
@@ -32,7 +32,7 @@
 #include <fenv.h>
 #include <mpi.h>
 
-#define NO_TASK_TIMERS
+#define TASK_TIMERS
 #define NO_LOAD_BALANCE_EXACT
 
 /* Local includes. */
@@ -61,7 +61,7 @@ void addMatrixTile(double *A, double *B, double *C){
 
 
 //Matrix is matsize*matsize tiles of TILESIZE*TILESIZE
-void test_matadd(int matsize,int nr_threads`){
+void test_matadd(int matsize,int nr_threads){
 
 
   struct qsched s;
@@ -92,9 +92,9 @@ void test_matadd(int matsize,int nr_threads`){
   local_B = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE);
   local_C = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE);
   if(local_A == NULL || local_B == NULL || local_C == NULL) error("Failed to allocate arrays");
-  for(int i = 0; i < numRows*matsize*TILESIZE*TILESIZE){
-    local_A = 2.0 * ((double)rand()) / RAND_MAX - 1.0;
-    local_B = 2.0 * ((double)rand()) / RAND_MAX - 1.0;
+  for(int i = 0; i < numRows*matsize*TILESIZE*TILESIZE; i++){
+    local_A[i] = 2.0 * ((double)rand()) / RAND_MAX - 1.0;
+    local_B[i] = 2.0 * ((double)rand()) / RAND_MAX - 1.0;
   }
   bzero(local_C,sizeof(double)*numRows*matsize*TILESIZE*TILESIZE);
 
@@ -111,24 +111,24 @@ void test_matadd(int matsize,int nr_threads`){
 
   for(int i = 0; i < numRows*matsize; i++){
     local_A_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_A_tiles[i]);
-    memcpy(local_A_tiles[i], local_A[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
+    memcpy(local_A_tiles[i], &local_A[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
     local_B_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_B_tiles[i]);
-    memcpy(local_B_tiles[i], local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
+    memcpy(local_B_tiles[i], &local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
     local_C_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_C_tiles[i]);
-    memcpy(local_B_tiles[i], local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
+    memcpy(local_C_tiles[i], &local_C[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
   }
   message("Synchronizing resources.");
   qsched_sync_resources(&s);
 
   //Generate tasks
   qsched_task_t /* *local_tid = NULL,*/ tid_new = -1;
-  long long int MPI_data[3];
+  long long int MPI_Data[3];
 //  local_tid = malloc(sizeof(qsched_task_t)*numRows*matsize);
   for(int i = 0; i < numRows*matsize; i++){
     MPI_Data[0] = local_A_rid[i];
     MPI_Data[1] = local_B_rid[i];
     MPI_Data[2] = local_C_rid[i];
-    tid_new = qsched_addtask(&s, add_task, task_flag_none, MPI_data,
+    tid_new = qsched_addtask(&s, add_task, task_flag_none, MPI_Data,
                              sizeof(long long int) * 3, 200);
     qsched_addlock(&s, tid_new, local_C_rid[i]);
     qsched_adduse(&s, tid_new, local_A_rid[i]);
@@ -156,6 +156,33 @@ void test_matadd(int matsize,int nr_threads`){
   // Print off a hello world message
     printf("Hello world from processor rank = %i, count_ranks = %i\n",
            s.rank, s.count_ranks);
+#ifdef TASK_TIMERS
+//Each rank wants to loop through the tasks they executed and output the data, then synchronize.
+int j;
+FILE *file;
+if(s.rank == 0)
+{
+    file = fopen("task_timers.tks", "w");
+    fclose(file);
+}
+for(int i = 0; i < s.count_ranks; i++)
+{
+    if(i == s.rank)
+    {
+        file = fopen("task_timers.tks", "a");
+        for(j = 0; j < s.task_ranks[s.count_ranks]; j++)
+        {
+            if(s.tasks[j].node_executed == s.rank)
+            {
+                struct task *t = &s.tasks[j];
+                fprintf(file, "%lli %i %llu %llu %i %i\n", t->id, t->type, t->task_start, t->task_finish, t->node_executed, t->thread_executed);
+            }
+        }
+        fclose(file);
+    }
+    MPI_Barrier(s.comm);
+}
+#endif
 }
 
 
diff --git a/src/qsched.c b/src/qsched.c
index ffc74f5..6edfb7f 100644
--- a/src/qsched.c
+++ b/src/qsched.c
@@ -147,7 +147,7 @@ void* qsched_getresdata( struct qsched *s, qsched_res_t res_id )
         }
     }
     if(s->res[getindex(res_id, s)].data == NULL)
-        error("Data is NULL");
+        error("Data is NULL for resource %lli",res_id);
     return s->res[getindex(res_id, s)].data;
 }
 #endif
@@ -3150,6 +3150,7 @@ for(i = 0; i < node_count; i++)
             if( METIS_PartGraphKway(&node_count, &one, edgelist_pos, edgelist_new, nodelist, NULL, edgelist_vwgt, &temp_count_ranks, NULL, NULL,options, objval, nodeIDs) != METIS_OK)
                 error("Failed to partition\n");
             /*qsched_partition_random(s, nodelist, edgelist_new, node_count, nodeIDs );*/
+            /*qsched_partition_roundrobin(s, nodelist, edgelist_new, node_count, nodeIDs);*/
         }
            toc = getticks();
     message("METIS_PartGraphKway took %lli (= %.3fms) ticks\n", toc-tic, (float)(toc-tic)/2.67e6);
@@ -3490,7 +3491,6 @@ if(tid == NULL)
         }
         
     /* Sort the tasks topologically. */
-
     for ( j = 0 , k = 0 ; k < count ; k++ )
         if ( s->tasks[k].wait == 0 ) {
             tid[j] = s->tasks[k].id;
@@ -3702,7 +3702,9 @@ void *qsched_pthread_run ( void *in ) {
         #ifndef WITH_MPI
         s->fun( t->type , &s->data[ t->data ] );
         #else
+        if(t->type != task_type_send && t->type != task_type_recv){
         s->fun( s,  t->type , &s->data[ t->data ] );        
+        }
         #endif
         /* Mark that task as done. */
         qsched_done( s , t );
-- 
GitLab