Skip to content
Snippets Groups Projects
Commit dd1bfd01 authored by aidan's avatar aidan
Browse files

The QR decomposition with MPI can now compute a check, and it succeeds so it...

The QR decomposition with MPI can now compute a check, and it succeeds so it works! LeakSanitizer cries about a lot of stuff so I'll try to fix that next maybe
parent 9deb7ef1
No related branches found
No related tags found
No related merge requests found
......@@ -71,5 +71,5 @@ test_qr_mpi_LDFLAGS = $(MPI_THREAD_LIBS)
test_qr_mpi_cblas_SOURCES = test_qr_mpi.c
test_qr_mpi_cblas_CFLAGS = $(AM_CFLAGS) -DWITH_MPI -DWITH_CBLAS_LIB
test_qr_mpi_cblas_LDADD = ../src/.libs/libquickschedMPI.a $(METIS_LIBS) -llapacke -llapacke -lblas -lcblas
test_qr_mpi_cblas_LDADD = ../src/.libs/libquickschedMPI.a -llapacke -llapacke -lblas -lcblas $(METIS_LIBS)
test_qr_mpi_cblas_LDFLAGS = $(MPI_THREAD_LIBS)
......@@ -30,6 +30,7 @@
#include <pthread.h>
#include <cblas.h>
#include <lapacke.h>
/* Local includes. */
#include "quicksched.h"
......@@ -497,7 +498,7 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
error("Unknown task type.");
}
}
srand(6);
/* Allocate and fill the original matrix. */
if ((A = (double*)malloc(sizeof(double)* m* n* K* K)) == NULL ||
(tau = (double*)malloc(sizeof(double)* m* n* K)) == NULL ||
......@@ -513,13 +514,13 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
bzero(tau, sizeof(double) * m * n * K);
/* Dump A_orig. */
/* message( "A_orig = [" );
message( "A_orig = [" );
for ( k = 0 ; k < m*K ; k++ ) {
for ( j = 0 ; j < n*K ; j++ )
printf( "%.3f " , A_orig[ j*m*K + k ] );
printf( "\n" );
}
printf( "];\n" ); */
printf( "];\n" );
/* Initialize the scheduler. */
qsched_init(&s, nr_threads, qsched_flag_none);
......@@ -541,7 +542,7 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
data[0] = k;
data[1] = k;
data[2] = k;
tid_new = qsched_addtask(&s, task_DGEQRF, task_flag_none, data,
tid_new = qsched_addtask_local(&s, task_DGEQRF, task_flag_none, data,
sizeof(int) * 3, 2);
qsched_addlock(&s, tid_new, rid[k * m + k]);
if (tid[k * m + k] != -1) qsched_addunlock(&s, tid[k * m + k], tid_new);
......@@ -552,7 +553,7 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
data[0] = k;
data[1] = j;
data[2] = k;
tid_new = qsched_addtask(&s, task_DLARFT, task_flag_none, data,
tid_new = qsched_addtask_local(&s, task_DLARFT, task_flag_none, data,
sizeof(int) * 3, 3);
qsched_addlock(&s, tid_new, rid[j * m + k]);
qsched_adduse(&s, tid_new, rid[k * m + k]);
......@@ -568,7 +569,7 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
data[0] = i;
data[1] = k;
data[2] = k;
tid_new = qsched_addtask(&s, task_DTSQRF, task_flag_none, data,
tid_new = qsched_addtask_local(&s, task_DTSQRF, task_flag_none, data,
sizeof(int) * 3, 3);
qsched_addlock(&s, tid_new, rid[k * m + i]);
qsched_adduse(&s, tid_new, rid[k * m + k]);
......@@ -581,12 +582,12 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
data[0] = i;
data[1] = j;
data[2] = k;
tid_new = qsched_addtask(&s, task_DSSRFT, task_flag_none, data,
tid_new = qsched_addtask_local(&s, task_DSSRFT, task_flag_none, data,
sizeof(int) * 3, 5);
qsched_addlock(&s, tid_new, rid[j * m + i]);
qsched_adduse(&s, tid_new, rid[k * m + i]);
qsched_adduse(&s, tid_new, rid[j * m + k]);
// qsched_addunlock(&s, tid[k * m + i], tid_new);
qsched_addlock(&s, tid_new, rid[j * m + k]);
qsched_addunlock(&s, tid[k * m + i], tid_new);
qsched_addunlock(&s, tid[j * m + (i - 1)], tid_new);
if (tid[j * m + i] != -1) qsched_addunlock(&s, tid[j * m + i], tid_new);
......@@ -616,22 +617,22 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
}
/* Dump A. */
/* message( "A = [" );
message( "A = [" );
for ( k = 0 ; k < m*K ; k++ ) {
for ( j = 0 ; j < n*K ; j++ )
printf( "%.3f " , A[ j*m*K + k ] );
printf( "\n" );
}
printf( "];\n" ); */
printf( "];\n" );
/* Dump tau. */
/* message( "tau = [" );
message( "tau = [" );
for ( k = 0 ; k < m*K ; k++ ) {
for ( j = 0 ; j < n ; j++ )
printf( "%.3f " , tau[ j*m*K + k ] );
printf( "\n" );
}
printf( "];\n" ); */
printf( "];\n" );
/* Dump the tasks. */
/* for ( k = 0 ; k < s.count ; k++ ) {
......@@ -654,7 +655,7 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
}
/* Test if the decomposition was correct.*/
/*double *tempMatrix = tileToColumn(A, m*n*K*K, m, n, K);
double *tempMatrix = tileToColumn(A, m*n*K*K, m, n, K);
double *Q = computeQ(tempMatrix, m*K, K, tau, m);
double *R = getR(tempMatrix, m*K);
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m*K, m*K, m*K, 1.0, Q,
......@@ -670,7 +671,7 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
}
free(tempMatrix);
free(Q);
free(R);*/
free(R);
/* Clean up. */
free(A);
......
......@@ -33,9 +33,8 @@
#include <mpi.h>
#ifdef WITH_CLBAS
#include <cblas.h>
#endif
/* Local includes. */
#include "quicksched.h"
......@@ -639,7 +638,7 @@ task_j = (int*)calloc(sizeof(int), m*n*m*n);
task_k = (int*)calloc(sizeof(int), m*n*m*n);
#endif
srand(6);
/* Allocate and fill the original matrix. */
if ((A = (double*)malloc(sizeof(double)* m* n* K* K)) == NULL ||
(tau = (double*)malloc(sizeof(double)* m* n* K)) == NULL ||
......@@ -681,6 +680,17 @@ if(s.rank == 0) {
for (k = 0; k < m * n; k++) {
tid[k] = qsched_task_none;
}
#ifdef WITH_CBLAS_LIB
/* Dump A_orig. */
message( "A_orig = [" );
for ( k = 0 ; k < m*K ; k++ ) {
for ( j = 0 ; j < n*K ; j++ )
printf( "%.3f " , A_orig[ j*m*K + k ] );
printf( "\n" );
}
printf( "];\n" );
#endif
/* Build the tasks. */
for (k = 0; k < m && k < n; k++) {
......@@ -907,20 +917,59 @@ for(i = 0; i < n; i++)
{
for(j = 0; j < m; j++)
{
long long int id = tau_id[j*m+i];
long long int id = tau_id[i*m+j];
struct res *r = &s.res[s.res_ranks[(id>>48)] + (id & 0xFFFFFFFFFFFFFF)];
if(r->node == s.rank)
{
memcpy(&tau[(j*m+i)*K], qsched_getresdata(&s, r->ID), sizeof(double)*K );
memcpy(&tau[(i*m+j)*K], qsched_getresdata(&s, r->ID), sizeof(double)*K );
}
}
}
MPI_Allreduce(MPI_IN_PLACE, tau, m*n*K, MPI_DOUBLE, MPI_SUM, s.comm);
double *tau_new = (double*)calloc(sizeof(double), m*n*K);
for(i = 0; i < n; i++)
{
double *column_old = &tau[i*m*K];
double *column_new = &tau_new[i*m*K];
for(j = 0; j < m; j++)
{
for(k = 0; k < K; k++)
{
column_new[k*m+j] = column_old[j*K+k];
}
}
}
free(tau);
tau = tau_new;
#ifdef WITH_CBLAS_LIB
//This should check correctness.
if(s.rank == 0){
message( "A = [" );
for ( k = 0 ; k < m*K ; k++ ) {
for ( j = 0 ; j < n*K ; j++ )
printf( "%.3f " , A[ j*m*K + k ] );
printf( "\n" );
}
printf( "];\n" );
/* Dump tau. */
message( "tau = [" );
for ( k = 0 ; k < m*K ; k++ ) {
for ( j = 0 ; j < n ; j++ )
printf( "%.3f " , tau[ j*m*K + k ] );
printf( "\n" );
}
printf( "];\n" );
double *tempMatrix = tileToColumn(A, m*n*K*K, m, n, K);
double *Q = computeQ(tempMatrix, m*K, K, tau, m);
double *R = getR(tempMatrix, m*K);
......@@ -938,6 +987,7 @@ double *tempMatrix = tileToColumn(A, m*n*K*K, m, n, K);
free(tempMatrix);
free(Q);
free(R);
}
#endif
//TODO Clean up the resource data.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment