Skip to content
Snippets Groups Projects
Commit 36cf45a2 authored by d74ksy's avatar d74ksy
Browse files

Maybe have reconstructed the matrix A to contain QR and the tau vector. Maybe

parent b4531761
No related branches found
No related tags found
No related merge requests found
......@@ -597,10 +597,6 @@ if(MpiThreadLevel != MPI_THREAD_MULTIPLE)
printf("Ran DSSRFT with k = %lli, j = %lli, i = %lli\n", kk, jj, ii);
}*/
toc_run = getticks();
if(type == task_DGEQRF){
if(idata[2] == 2)
message("Ran task (2,2,2)");
}
// message("tid = %lli", tid);
// message("sizeof arrays = %i", m*n*m*n);
task_start[tid] = tic - start;
......@@ -655,7 +651,8 @@ task_k = (int*)calloc(sizeof(int), m*n*m*n);
else
A_orig[k] = matrix[k];
}
memcpy(A, A_orig, sizeof(double) * m * n * K * K);
// memcpy(A, A_orig, sizeof(double) * m * n * K * K);
bzero(A, sizeof(double) * m * n * K * K);
bzero(tau, sizeof(double) * m * n * K);
if(s.rank == 0){
/* Initialize the scheduler. */
......@@ -870,6 +867,79 @@ FILE *file = NULL;
fclose(file2);
}
if(s.rank != 0)
{
rid = (qsched_res_t*)malloc(sizeof(qsched_res_t) * m * n);
if(rid == NULL)
error("Failed to allocate rid");
}
MPI_Bcast(rid, m * n, MPI_LONG_LONG_INT, 0, s.comm);
/* Reconstruct A*/
for(i = 0; i < n; i++)
{
for(j = 0; j < m; j++)
{
long long int id = rid[j*m + i];
struct res *r = &s.res[s.res_ranks[(id>>48)] + (id & 0xFFFFFFFFFFFFFF)];
if(r->node == s.rank)
{
memcpy(&A[((j*m+i)*K*K)], qsched_getresdata(&s, r->ID), sizeof(double) *K *K );
}
}
}
MPI_Allreduce(MPI_IN_PLACE, A, m*n*K*K, MPI_DOUBLE, MPI_SUM, s.comm);
//TODO Synchronize Tau.
if(s.rank != 0)
{
tau_id = (qsched_res_t*)malloc(sizeof(qsched_res_t) * m*n);
}
if(tau_id == NULL)
error("tau_id not allocated");
MPI_Bcast(tau_id, m*n, MPI_LONG_LONG_INT, 0, s.comm);
for(i = 0; i < n; i++)
{
for(j = 0; j < m; j++)
{
long long int id = tau_id[j*m+i];
struct res *r = &s.res[s.res_ranks[(id>>48)] + (id & 0xFFFFFFFFFFFFFF)];
if(r->node == s.rank)
{
memcpy(&tau[(j*m+i)*K], qsched_getresdata(&s, r->ID), sizeof(double)*K );
}
}
}
MPI_Allreduce(MPI_IN_PLACE, tau, m*n*K, MPI_DOUBLE, MPI_SUM, s.comm);
#ifdef WITH_CBLAS
//This should check correctness.
double *tempMatrix = tileToColumn(A, m*n*K*K, m, n, K);
double *Q = computeQ(tempMatrix, m*K, K, tau, m);
double *R = getR(tempMatrix, m*K);
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m*K, m*K, m*K, 1.0, Q,
m*K, R, m*K, 0.0, tempMatrix, m*K);
free(Q);
Q = tileToColumn(A_orig, m*n*K*K, m, n, K);
for(i = 0; i < m * n * K * K; i++)
{
if(Q[i] != 0 && (Q[i] / tempMatrix[i] > 1.005 || Q[i] / tempMatrix[i] <
0.995))
printf("Not correct at value %i %.3f %.3e %.3e\n", i, A[i], Q[i],
tempMatrix[i]);
}
free(tempMatrix);
free(Q);
free(R);
#endif
//TODO Clean up the resource data.
qsched_free(&s);
free(A);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment