From 36cf45a25534b28ec403f4ea788c8282f78d0096 Mon Sep 17 00:00:00 2001
From: d74ksy <aidan.chalk@durham.ac.uk>
Date: Thu, 5 Nov 2015 13:59:18 +0000
Subject: [PATCH] Maybe have reconstructed the matrix A to contain QR and the
 tau vector. Maybe

---
 examples/test_qr_mpi.c | 80 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 75 insertions(+), 5 deletions(-)

diff --git a/examples/test_qr_mpi.c b/examples/test_qr_mpi.c
index b24bc09..f5825ab 100644
--- a/examples/test_qr_mpi.c
+++ b/examples/test_qr_mpi.c
@@ -597,10 +597,6 @@ if(MpiThreadLevel != MPI_THREAD_MULTIPLE)
                 printf("Ran DSSRFT with k = %lli, j = %lli, i = %lli\n", kk, jj, ii);
         }*/
         toc_run = getticks();
-        if(type == task_DGEQRF){
-            if(idata[2] == 2)
-                message("Ran task (2,2,2)");
-        }
 //        message("tid = %lli", tid);
 //        message("sizeof arrays = %i", m*n*m*n);
         task_start[tid] = tic - start;
@@ -655,7 +651,8 @@ task_k = (int*)calloc(sizeof(int),  m*n*m*n);
     else
       A_orig[k] = matrix[k];
   }
-  memcpy(A, A_orig, sizeof(double) * m * n * K * K);
+//  memcpy(A, A_orig, sizeof(double) * m * n * K * K);
+  bzero(A, sizeof(double) * m * n * K * K);
   bzero(tau, sizeof(double) * m * n * K);
 if(s.rank == 0){
   /* Initialize the scheduler. */
@@ -870,6 +867,79 @@ FILE *file = NULL;
         fclose(file2);
      
     }
+if(s.rank != 0)
+{
+    rid = (qsched_res_t*)malloc(sizeof(qsched_res_t) * m * n);
+    if(rid == NULL)
+        error("Failed to allocate rid");
+}
+
+MPI_Bcast(rid, m * n, MPI_LONG_LONG_INT, 0, s.comm);
+
+/* Reconstruct A*/
+    for(i = 0; i < n; i++)
+    {
+        for(j = 0; j < m; j++)
+        {
+            long long int id = rid[j*m + i];
+            struct res *r = &s.res[s.res_ranks[(id>>48)] + (id & 0xFFFFFFFFFFFFFF)];
+            if(r->node == s.rank)
+            {
+                memcpy(&A[((j*m+i)*K*K)], qsched_getresdata(&s, r->ID), sizeof(double) *K *K );
+            }
+        }
+    }
+
+MPI_Allreduce(MPI_IN_PLACE, A, m*n*K*K, MPI_DOUBLE, MPI_SUM, s.comm);
+
+//TODO Synchronize Tau.
+
+if(s.rank != 0)
+{
+    tau_id = (qsched_res_t*)malloc(sizeof(qsched_res_t) * m*n);
+}
+if(tau_id == NULL)
+    error("tau_id not allocated");
+
+MPI_Bcast(tau_id, m*n, MPI_LONG_LONG_INT, 0, s.comm);
+
+for(i = 0; i < n; i++)
+{
+    for(j = 0; j < m; j++)
+    {
+        long long int id = tau_id[j*m+i];
+        struct res *r = &s.res[s.res_ranks[(id>>48)] + (id & 0xFFFFFFFFFFFFFF)];
+        if(r->node == s.rank)
+        {
+            memcpy(&tau[(j*m+i)*K], qsched_getresdata(&s, r->ID), sizeof(double)*K );
+        }
+    }
+}
+
+MPI_Allreduce(MPI_IN_PLACE, tau, m*n*K, MPI_DOUBLE, MPI_SUM, s.comm);
+
+
+#ifdef WITH_CBLAS
+    //This should check correctness.
+double *tempMatrix = tileToColumn(A, m*n*K*K, m, n, K);
+   double *Q = computeQ(tempMatrix, m*K, K, tau, m);
+   double *R = getR(tempMatrix, m*K);
+   cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m*K, m*K, m*K, 1.0, Q,
+   m*K, R, m*K, 0.0, tempMatrix, m*K);
+       free(Q);
+       Q = tileToColumn(A_orig, m*n*K*K, m, n, K);
+   for(i = 0; i < m * n * K * K; i++)
+   {
+       if(Q[i] != 0 && (Q[i] / tempMatrix[i] > 1.005 || Q[i] / tempMatrix[i] <
+   0.995))
+           printf("Not correct at value %i %.3f %.3e %.3e\n", i, A[i], Q[i],
+   tempMatrix[i]);
+   }
+   free(tempMatrix);
+   free(Q);
+   free(R);
+#endif
+
  //TODO Clean up the resource data.
     qsched_free(&s);
     free(A);
-- 
GitLab