From 36cf45a25534b28ec403f4ea788c8282f78d0096 Mon Sep 17 00:00:00 2001 From: d74ksy <aidan.chalk@durham.ac.uk> Date: Thu, 5 Nov 2015 13:59:18 +0000 Subject: [PATCH] Maybe have reconstructed the matrix A to contain QR and the tau vector. Maybe --- examples/test_qr_mpi.c | 80 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 5 deletions(-) diff --git a/examples/test_qr_mpi.c b/examples/test_qr_mpi.c index b24bc09..f5825ab 100644 --- a/examples/test_qr_mpi.c +++ b/examples/test_qr_mpi.c @@ -597,10 +597,6 @@ if(MpiThreadLevel != MPI_THREAD_MULTIPLE) printf("Ran DSSRFT with k = %lli, j = %lli, i = %lli\n", kk, jj, ii); }*/ toc_run = getticks(); - if(type == task_DGEQRF){ - if(idata[2] == 2) - message("Ran task (2,2,2)"); - } // message("tid = %lli", tid); // message("sizeof arrays = %i", m*n*m*n); task_start[tid] = tic - start; @@ -655,7 +651,8 @@ task_k = (int*)calloc(sizeof(int), m*n*m*n); else A_orig[k] = matrix[k]; } - memcpy(A, A_orig, sizeof(double) * m * n * K * K); +// memcpy(A, A_orig, sizeof(double) * m * n * K * K); + bzero(A, sizeof(double) * m * n * K * K); bzero(tau, sizeof(double) * m * n * K); if(s.rank == 0){ /* Initialize the scheduler. */ @@ -870,6 +867,79 @@ FILE *file = NULL; fclose(file2); } +if(s.rank != 0) +{ + rid = (qsched_res_t*)malloc(sizeof(qsched_res_t) * m * n); + if(rid == NULL) + error("Failed to allocate rid"); +} + +MPI_Bcast(rid, m * n, MPI_LONG_LONG_INT, 0, s.comm); + +/* Reconstruct A*/ + for(i = 0; i < n; i++) + { + for(j = 0; j < m; j++) + { + long long int id = rid[j*m + i]; + struct res *r = &s.res[s.res_ranks[(id>>48)] + (id & 0xFFFFFFFFFFFFFF)]; + if(r->node == s.rank) + { + memcpy(&A[((j*m+i)*K*K)], qsched_getresdata(&s, r->ID), sizeof(double) *K *K ); + } + } + } + +MPI_Allreduce(MPI_IN_PLACE, A, m*n*K*K, MPI_DOUBLE, MPI_SUM, s.comm); + +//TODO Synchronize Tau. + +if(s.rank != 0) +{ + tau_id = (qsched_res_t*)malloc(sizeof(qsched_res_t) * m*n); +} +if(tau_id == NULL) + error("tau_id not allocated"); + +MPI_Bcast(tau_id, m*n, MPI_LONG_LONG_INT, 0, s.comm); + +for(i = 0; i < n; i++) +{ + for(j = 0; j < m; j++) + { + long long int id = tau_id[j*m+i]; + struct res *r = &s.res[s.res_ranks[(id>>48)] + (id & 0xFFFFFFFFFFFFFF)]; + if(r->node == s.rank) + { + memcpy(&tau[(j*m+i)*K], qsched_getresdata(&s, r->ID), sizeof(double)*K ); + } + } +} + +MPI_Allreduce(MPI_IN_PLACE, tau, m*n*K, MPI_DOUBLE, MPI_SUM, s.comm); + + +#ifdef WITH_CBLAS + //This should check correctness. +double *tempMatrix = tileToColumn(A, m*n*K*K, m, n, K); + double *Q = computeQ(tempMatrix, m*K, K, tau, m); + double *R = getR(tempMatrix, m*K); + cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m*K, m*K, m*K, 1.0, Q, + m*K, R, m*K, 0.0, tempMatrix, m*K); + free(Q); + Q = tileToColumn(A_orig, m*n*K*K, m, n, K); + for(i = 0; i < m * n * K * K; i++) + { + if(Q[i] != 0 && (Q[i] / tempMatrix[i] > 1.005 || Q[i] / tempMatrix[i] < + 0.995)) + printf("Not correct at value %i %.3f %.3e %.3e\n", i, A[i], Q[i], + tempMatrix[i]); + } + free(tempMatrix); + free(Q); + free(R); +#endif + //TODO Clean up the resource data. qsched_free(&s); free(A); -- GitLab