From 9859244d2aea6a307ce34e7bb09afd70d974244b Mon Sep 17 00:00:00 2001
From: Aidan Chalk <d74ksy@cosma-e.cosma>
Date: Tue, 17 Nov 2015 17:05:58 +0000
Subject: [PATCH] Fixed a variety of bugs that prevented working on distributed
 memory machiens

---
 examples/test_qr_mpi.c | 16 ++++-----------
 src/Makefile.am        |  1 +
 src/qsched.c           | 28 +++++++++++++++++++-------
 src/qsched.h           |  4 ++--
 src/queue.c            | 45 ++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 71 insertions(+), 23 deletions(-)

diff --git a/examples/test_qr_mpi.c b/examples/test_qr_mpi.c
index bf295a4..3e5b45c 100644
--- a/examples/test_qr_mpi.c
+++ b/examples/test_qr_mpi.c
@@ -41,7 +41,7 @@
 #include "quicksched.h"
 #include "res.h"
 
-#define TASK_TIMERS
+#define NO_TASK_TIMERS
 
 #ifdef WITH_CBLAS_LIB
 /**
@@ -626,11 +626,10 @@ if(MpiThreadLevel != MPI_THREAD_MULTIPLE)
     #endif
   }
 
-    qsched_init(&s, 2, qsched_flag_none, MPI_COMM_WORLD);
+    qsched_init(&s, nr_threads, qsched_flag_none, MPI_COMM_WORLD);
 
 #ifdef TASK_TIMERS
 task_start = (long long int*)calloc(sizeof(long long int), m*n*m*n);
-printf("Created task_start of size %i", m*n*m*n);
 task_finish = (long long int*)calloc(sizeof(long long int), m*n*m*n);
 task_tids = (int*)calloc(sizeof(int), m*n*m*n);
 task_types = (int*)calloc(sizeof(int), m*n*m*n);
@@ -704,18 +703,13 @@ if(s.rank == 0) {
     #ifdef TASK_TIMERS
     MPI_data[2] = k;
     tid_new = qsched_addtask(&s, task_DGEQRF, task_flag_none, MPI_data,
-                               sizeof(long long int) * 3, 300);
+                               sizeof(long long int) * 3, 200);
     #else
     tid_new = qsched_addtask(&s, task_DGEQRF, task_flag_none, MPI_data,
                              sizeof(long long int) * 2, 200);
     #endif
     qsched_addlock(&s, tid_new, rid[k * m + k]);
     qsched_addlock(&s, tid_new, tau_id[k*m+k]);
-    if(k ==2 )
-    {
-        message("Task (2,2,2) == %lli", tid_new);
-        message("Resource (2,2) == %lli", rid[k*m+k]);
-    }
     if(k == 0)
     {
         memcpy(rids[k*m+k], &A_orig[(data[1]*m+data[0])*K*K], sizeof(double)*K*K);
@@ -740,8 +734,6 @@ if(s.rank == 0) {
       tid_new = qsched_addtask(&s, task_DLARFT, task_flag_none, MPI_data,
                                sizeof(long long int) * 3, 300);
       #endif
-        if(k == 2 && j == 6)
-            message("Task (2,6,2) == %lli", tid_new);
         if(k == 0)
         {
            memcpy(rids[j*m+k], &A_orig[(data[1]*m+data[0])*K*K], sizeof(double)*K*K);
@@ -797,7 +789,7 @@ if(s.rank == 0) {
         MPI_data[5] = i;
         MPI_data[6] = j;
         tid_new = qsched_addtask(&s, task_DSSRFT, task_flag_none, MPI_data,
-                                 sizeof(long long int) * 7, 300);
+                                 sizeof(long long int) * 7, 500);
         #else
         tid_new = qsched_addtask(&s, task_DSSRFT, task_flag_none, MPI_data,
                                  sizeof(long long int) * 4, 500);
diff --git a/src/Makefile.am b/src/Makefile.am
index e6a55f4..b40775a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -37,6 +37,7 @@ libquicksched_la_SOURCES = qsched.c queue.c
 libquickschedMPI_la_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
     -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) -DTIMERS -std=gnu99 \
     -DWITH_MPI
+#-DTASK_TIMERS
 libquickschedMPI_la_SOURCES = qsched.c queue.c
 
 # List required headers
diff --git a/src/qsched.c b/src/qsched.c
index e5ad34e..a127279 100644
--- a/src/qsched.c
+++ b/src/qsched.c
@@ -40,7 +40,7 @@
     #include <pthread.h>
 #endif
 
-#define TASK_TIMERS
+#define NO_TASK_TIMERS
 
 /* Local includes. */
 #include "cycle.h"
@@ -2766,7 +2766,7 @@ for(i = 0; i < count; i++)
 
             }/* If data is out of date */else{
             
-                /* We have up to date data, but we need to check all the parents of this deck are completed before this recv task can occur.*/
+                /* We have up to date data, but we need to check all the parents of this task are completed before this recv task can occur.*/
                 for(k = 0; k < num_current; k++)
                 {
                     if(! (s->tasks[tid[current_parents[k]]].flags & task_flag_skip))
@@ -3066,7 +3066,7 @@ for(i = 0; i < node_count; i++)
 //            toc = getticks();
 //    message("METIS_PartGraphKway took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
         //TODO Check the costs.
-        message("node_count = %i", node_count);
+//        message("node_count = %i", node_count);
         int count_me = 0;
         for(i = 0; i < node_count; i++)
         {
@@ -3077,12 +3077,14 @@ for(i = 0; i < node_count; i++)
                 count_me+= nodelist[i];
             }
         }
-        printf("\n");
+//        printf("\n");
         message("My \"cost\" = %i", count_me);
 
 
 
 //    tic = getticks();
+if(s->count_ranks > 1)
+{
     MPI_Request *reqs;
     reqs = (MPI_Request*) calloc(sizeof(MPI_Request) , node_count * 2);
     for(i = 0; i < node_count * 2; i++)
@@ -3136,7 +3138,7 @@ for(i = 0; i < node_count; i++)
         }
 
         free(reqs);
-
+}
         //Update all the child resources. Relies on children appearing after their parents in the resources array but this is currently guaranteed.
         for(j = 0; j < s->res_ranks[s->count_ranks]; j++)
         {
@@ -3533,18 +3535,21 @@ printf("Rank[%i]: qsched_prepare_mpi took %lli (= %e) ticks\n", s->rank,
             
             #ifdef TASK_TIMERS
                 t->task_finish = getticks() - s->start;
+                t->node_executed = s->rank;
+                t->thread_executed = omp_get_thread_num();
             #endif
             } /* loop as long as there are tasks. */
             
         } /* parallel loop. */
 
-
+    MPI_Barrier(s->comm);
     toc = getticks();
 
         printf("Rank[%i]: Execution took %lli (= %e) ticks\n", s->rank,
          toc - tic, (float)(toc - tic));
+    //TIMINGS BASED ON COSMA4!
+    message("Execution took %.3f milliseconds.", (toc-tic)/2.67e6);
 
-    MPI_Barrier(s->comm);
 #else
     error( "QuickSched was not compiled with OpenMP support." );
 #endif
@@ -3842,6 +3847,10 @@ void qsched_enqueue ( struct qsched *s , struct task *t ) {
     int j, qid, scores[ s->nr_queues ], oid;
 
 
+   if(t->type < -150 || t->type == 0)
+    {
+        message("Enqueueing a wierd task with type %i", t->type);
+    }
     /* When we enqueue the send or recv tasks we emit the Isend/Irecv commands. */
     if(t->type == task_type_send ){
         #ifdef WITH_MPI
@@ -3856,6 +3865,7 @@ void qsched_enqueue ( struct qsched *s , struct task *t ) {
         struct res *resource = &s->res[getindex(resID, s)];
         int err;
 //        printf("Sending tag = %i\n", tag);
+       // message("s %i", tag);
         if(resource->data == NULL)
         {
             if(resource->parent != -1)
@@ -3874,6 +3884,7 @@ void qsched_enqueue ( struct qsched *s , struct task *t ) {
           message("%s", buff);
           error("Failed to emit isend for task (%lli).", t->id);
         }
+        //message("Sent data with tag %i", tag);
         #else
             error("QuickSched not compiled with MPI support.");
         #endif
@@ -3890,6 +3901,7 @@ void qsched_enqueue ( struct qsched *s , struct task *t ) {
         int tag = data[4];
         long long int parent_id = -1;
         struct res *resource = &s->res[getindex(resID, s)];
+       // message("r %i", tag);
         if(resource->parent >=0)
         {
             /* Recurse and check if any of the parents exist locally.*/
@@ -3942,6 +3954,7 @@ void qsched_enqueue ( struct qsched *s , struct task *t ) {
           message("%s", buff);
           error("Failed to emit irecv for particle data (%s).", buff);
         }
+        //message("Waiting for data with tag %i", tag);
         #else
             error("QuickSched not compiled with MPI support.");
         #endif
@@ -4165,6 +4178,7 @@ int qsched_locktask ( struct qsched *s , long long int tid ) {
 #endif
 
   }
+
     /* Try to lock all the task's locks. */
     for ( k = 0 ; k < t->nr_locks ; k++ )
         if ( qsched_lockres( s , t->locks[k] ) == 0 )
diff --git a/src/qsched.h b/src/qsched.h
index f488a89..8be1c17 100644
--- a/src/qsched.h
+++ b/src/qsched.h
@@ -189,9 +189,9 @@ struct qsched {
     int size_users;
     int count_users;
     MPI_Comm comm;
-    #ifdef TASK_TIMERS
+//    #ifdef TASK_TIMERS
     ticks start;
-    #endif
+//    #endif
     #endif
     
     };
diff --git a/src/queue.c b/src/queue.c
index 008cb99..10f1eb1 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -39,11 +39,17 @@
 #include "qsched.h"
 #include "queue.h"
 
-
+#ifndef WITH_MPI
 inline int gettaskindex(long long int id, struct qsched *s)
 {
     return id;
 }
+#else
+inline int gettaskindex(long long int id, struct qsched *s)
+{
+    return s->task_ranks[id>>48] + (id & 0xFFFFFFFFFFFFFF);
+}
+#endif
 
 /**
  * @brief Get a task index from the given #queue.
@@ -132,8 +138,43 @@ long long int queue_get ( struct queue *q , struct qsched *s , int insist ) {
         
     /* Otherwise, clear the task ID. */
     else
+    {
+//        message("q->count = %i", q->count);
+       /* for(k = 0; k < q->count; k++)
+        {
+            if(s->tasks[gettaskindex(q->inds[k],s)].type == task_type_recv)
+            {
+                struct task *t = &s->tasks[gettaskindex(q->inds[k], s)];
+                if(t == NULL)
+                    error("We got a null task?");
+               // int *data = (int*)(&s->data[t->data]);
+                //int *data = (int*)(&s->data[s->tasks[gettaskindex(q->inds[k],s)].data]);
+             //   int tag = data[4];
+             //   message("Failed to receive data (yet) with tag %i", tag);
+            }
+            else if(s->tasks[gettaskindex(q->inds[k],s)].type == task_type_send)
+            {
+                struct task *t = &s->tasks[gettaskindex(q->inds[k], s)];
+                if(t == NULL)
+                    error("We got a null task?");
+                int *data = (int*)(&s->data[t->data]);
+                int tag = data[4];
+            }
+            else if (s->tasks[gettaskindex(q->inds[k],s)].type < 0)
+            {
+                int i;
+                for(i = 0; i < q->count; i++)
+                {
+                    message("q[%i] = task %lli, index %i, type %i", i, q->inds[i],gettaskindex(q->inds[i],s ), s->tasks[gettaskindex(q->inds[i],s)].type);
+                }
+                #ifdef WITH_MPI
+                error("Got here somehow, type is %i, nr_locks = %i. k = %i, count = %i. s->count = %i", s->tasks[gettaskindex(q->inds[k],s)].type, s->tasks[gettaskindex(q->inds[k],s)].nr_locks, k, q->count, s->task_ranks[s->count_ranks]);
+#endif
+            }
+        } */  
         tid = qsched_task_none;
-        
+    }     
+       
     /* Unlock the queue. */
     lock_unlock_blind( &q->lock );
 
-- 
GitLab