Skip to content
Snippets Groups Projects
Commit e9327ef3 authored by Aidan Chalk's avatar Aidan Chalk
Browse files

Fixed issues with multiple runs. Users can now pull the tasks back from the...

Fixed issues with multiple runs. Users can now pull the tasks back from the device to query tic/toc parameters which can be used for task plots
parent cdb7c197
No related branches found
No related tags found
No related merge requests found
......@@ -20,13 +20,13 @@
AUTOMAKE_OPTIONS=gnu
# Add the source directory and debug to CFLAGS
AM_CFLAGS = -g -Wall -Werror -I../src $(OPENMP_CFLAGS) -DCPU_TPS=2.67e9 -DTIMERS \
AM_CFLAGS = -g -Wall -Werror -I../src $(OPENMP_CFLAGS) -DCPU_TPS=3.1e9 -DTIMERS \
# -fsanitize=address -fno-omit-frame-pointer
AM_LDFLAGS = -lm # -fsanitize=address
AM_LDFLAGS = -lm # -fsanitize=address
# Set-up the library
bin_PROGRAMS = test test_qr test_bh
bin_PROGRAMS = test test_qr #test_bh
# Sources for test
test_SOURCES = test.c
......@@ -35,11 +35,19 @@ test_LDADD = ../src/.libs/libquicksched.a
# Sources for test_qr
test_qr_SOURCES = test_qr.c
test_qr_CFLAGS = $(AM_CFLAGS)
test_qr_LDADD = ../src/.libs/libquicksched.a -llapacke -llapack -lblas
test_qr_CFLAGS = $(AM_CFLAGS) -I/home/aidan/lapack-3.5.0/lapacke/include/
test_qr_LDFLAGS = -I/home/aidan/lapack-3.5.0/lapacke/include/
test_qr_LDADD = ../src/.libs/libquicksched.a /home/aidan/lapack-3.5.0/liblapacke.a /home/aidan/lapack-3.5.0/liblapack.a -lblas
# Sources for test_bh
test_bh_SOURCES = test_bh.c
test_bh_CFLAGS = $(AM_CFLAGS)
test_bh_LDADD = ../src/.libs/libquicksched.a
#test_bh_SOURCES = test_bh.c
#test_bh_CFLAGS = $(AM_CFLAGS)
#test_bh_LDADD = ../src/.libs/libquicksched.a
#if HAVE_CUDA
#test_cuda_SOURCES = test_gpu_simple.cu
#test_cuda_CFLAGS = -DWITH_CUDA $(AM_CFLAGS) $(CUDA_CFLAGS) $(CUDA_MYFLAGS)
#test_cuda_LINK = $(NVCC)
#test_cuda_LDADD = ../src/.libs/libquicksched_cuda.a $(CUDA_LIBS)
#endif
......@@ -118,7 +118,7 @@ void test2 ( int m , int n , int k , int nr_threads ) {
/* Build a task for each tile of the matrix c. */
for ( i = 0 ; i < m ; i++ )
for ( j = 0 ; j < n ; j++ ) {
rid = qsched_addres( &s , qsched_owner_none , qsched_res_none );
rid = qsched_addres( &s , qsched_owner_none , qsched_res_none, NULL , 0 );
data[0] = i; data[1] = j;
for ( kk = 0 ; kk < k ; kk++ ) {
data[2] = kk;
......@@ -222,7 +222,7 @@ void test1 ( int m , int n , int k , int nr_threads ) {
for ( i = 0 ; i < m ; i++ )
for ( j = 0 ; j < n ; j++ ) {
data[0] = i; data[1] = j;
rid = qsched_addres( &s , qsched_owner_none , qsched_res_none );
rid = qsched_addres( &s , qsched_owner_none , qsched_res_none , NULL , 0);
tid = qsched_addtask( &s , 1 , task_flag_none , data , 2*sizeof(int) , 1 );
qsched_addlock( &s , tid , rid );
}
......
......@@ -37,7 +37,6 @@
/* Local includes. */
#include "quicksched.h"
/*
* Sam's routines for the tiled QR decomposition.
*/
......@@ -47,9 +46,9 @@
Computes the 2-norm by computing the following: \f[\textrm{2-norm}=\sqrt_0^lx(i)^2\f]
*/
double do2norm(double* x, int l)
float do2norm(float* x, int l)
{
double sum = 0, norm;
float sum = 0, norm;
int i;
for(i = 0; i < l; i++)
......@@ -74,14 +73,14 @@ double do2norm(double* x, int l)
*
* \returns void
*/
void calcvkDouble (double topDiag,
void calcvkfloat (float topDiag,
int ma,
double* xb,
float* xb,
int l,
double* vk)
float* vk)
{
int sign, i;
double norm, div;
float norm, div;
//same non-standard normalisation as for single blocks above, but organised without a temporary beta veriable
sign = topDiag >= 0.0 ? 1 : -1;
......@@ -106,16 +105,16 @@ void calcvkDouble (double topDiag,
}
void updateDoubleQ_WY (double* blockA,
double* blockB,
double* blockTau,
void updatefloatQ_WY (float* blockA,
float* blockB,
float* blockTau,
int k, int ma, int mb, int n,
int ldm,
double* hhVector)//bottom, essential part.
float* hhVector)//bottom, essential part.
{
int i, j;
double tau = 1.0, beta;
float tau = 1.0, beta;
/* Compute tau = 2/v'v */
for(i = 0; i < mb; i ++)
......@@ -148,17 +147,17 @@ void updateDoubleQ_WY (double* blockA,
blockTau[k] = tau;
}
void DTSQRF (double* blockA,
double* blockB,
double* blockTau,
void DTSQRF (float* blockA,
float* blockB,
float* blockTau,
int ma,
int mb,
int n,
int ldm,
double* hhVector)
float* hhVector)
{
int k;
double* xVectA, *xVectB;
float* xVectA, *xVectB;
xVectA = blockA;
xVectB = blockB;
......@@ -167,11 +166,11 @@ void DTSQRF (double* blockA,
{
//vk = sign(x[1])||x||_2e1 + x
//vk = vk/vk[0]
calcvkDouble(xVectA[0], ma - k, xVectB, (ma + mb) - k, hhVector);//returns essential
calcvkfloat(xVectA[0], ma - k, xVectB, (ma + mb) - k, hhVector);//returns essential
//matA(k:ma,k:na) = matA(k:ma,k:na) - (2/(vk.T*vk))*vk*(vk.T*matA(k:ma,k:na)
//update both blocks, preserving the vectors already stored below the diagonal in the top block and treating them as if they were zeros.
updateDoubleQ_WY (blockA, blockB,
updatefloatQ_WY (blockA, blockB,
blockTau,
k, ma, mb, n,
ldm,
......@@ -182,14 +181,14 @@ void DTSQRF (double* blockA,
}
}
void DSSRFT (double* blockV,
double* blockA, double* blockB,
double* blockTau,
void DSSRFT (float* blockV,
float* blockA, float* blockB,
float* blockTau,
int b, int n, int ldm)
{
int i, j, k;
double tau, beta;
float tau, beta;
/* Compute b_j = b_j - tau*v*v'*b_j for each column j of blocks A & B,
and for each householder vector v of blockV */
......@@ -223,7 +222,89 @@ void DSSRFT (double* blockV,
}
}
}
float* randomMatrix(int m, int n)
{
float* Matrix;
Matrix = (float*) malloc( sizeof(float) * m*n*32*32);
if(Matrix == NULL)
error("Failed to allocate Matrix");
int r,c;
m = m*32;
n = n*32;
for(c = 0; c < n; c++)
{
for(r = 0; r < m; r++)
{
//CO(i,j,m) ((m * j) + i)
Matrix[(m*c)+r] = ((float)(rand() % 201) - 100.0) / 100.0;
}
}
return Matrix;
}
float* generateMatrix( int m, int n)
{
float* Matrix;
Matrix = (float*) malloc( sizeof(float) * m*n*32*32);
if(Matrix == NULL)
error("Failed to allocate Matrix");
int i, j;
memset ( Matrix, 0, sizeof(float)*m*n*32*32 );
for(i = 0; i < n*32; i++)
{
for(j = 0; j < m*32; j++)
{
Matrix[i*m*32 + j] = (float)(i+j);
}
}
return Matrix;
}
float* createIdentity(int m, int n)
{
float* Matrix;
Matrix = (float*) malloc( sizeof(float) * m*n*32*32);
if(Matrix == NULL)
error("Failed to allocate Matrix");
int i, j;
memset ( Matrix, 0, sizeof(float)*m*n*32*32 );
for(i = 0; i < n*32; i++)
{
for(j = 0; j < m*32; j++)
{
if(i==j)
{
Matrix[i*m*32 + j] = 1.0;
}else
{
Matrix[i*m*32 + j] = 0.0;
}
}
}
return Matrix;
}
void printMatrix(float* Matrix, int m, int n)
{
int i, j;
for(i=0; i < m*32; i++)
{
printf("{ ");
for(j = 0; j < n*32; j++)
{
printf(" %.3f ", Matrix[j*m*32 +i]);
}
printf(" }");
printf("\n");
}
}
/**
* @brief Computed a tiled QR factorization using QuickSched.
......@@ -236,7 +317,7 @@ void DSSRFT (double* blockV,
void test_qr ( int m , int n , int K , int nr_threads , int runs ) {
int k, j, i;
double *A, *A_orig, *tau;
float *A, *A_orig, *tau;
struct qsched s;
qsched_task_t *tid, tid_new;
qsched_res_t *rid;
......@@ -251,27 +332,28 @@ void test_qr ( int m , int n , int K , int nr_threads , int runs ) {
/* Decode the task data. */
int *idata = (int *)data;
int i = idata[0], j = idata[1], k = idata[2];
double buff[ 2*K*K ];
int i = idata[0], j = idata[1];//, k = idata[2];
float buff[ 2*K*K ];
/* Decode and execute the task. */
switch ( type ) {
case task_DGEQRF:
LAPACKE_dgeqrf_work( LAPACK_COL_MAJOR , K, K ,
LAPACKE_sgeqrf_work( LAPACK_COL_MAJOR , K, K ,
&A[ j*m*K*K + i*K ] , m*K , &tau[ j*m*K + i*K ] ,
buff , 2*K*K );
break;
case task_DLARFT:
LAPACKE_dlarft_work( LAPACK_COL_MAJOR , 'F' , 'C' ,
LAPACKE_slarft_work( LAPACK_COL_MAJOR , 'F' , 'C' ,
K , K , &A[ i*m*K*K + i*K ] ,
m*K , &tau[ i*m*K + i*K ] , &A[ j*m*K*K + i*K ] ,
m*K );
break;
case task_DTSQRF:
DTSQRF( &A[ j*m*K*K + j*K ] , &A[ j*m*K*K + i*K ] , &tau[ j*m*K + i*K ] , K , K , K , K*m , buff );
//DTSQRF( &A[ j*m*K*K + j*K ] , &A[ j*m*K*K + i*K ] , &tau[ j*m*K + i*K ] , K , K , K , K*m , buff );
break;
case task_DSSRFT:
DSSRFT( &A[ k*m*K + i*K ] , &A[ j*m*K*K + k*K ] , &A[ j*m*K*K + i*K ] , &tau[ k*m*K + i*K ] , K , K , K*m );
//DSSRFT( &A[ k*m*K + i*K ] , &A[ j*m*K*K + k*K ] , &A[ j*m*K*K + i*K ] , &tau[ k*m*K + i*K ] , K , K , K*m );
break;
default:
error( "Unknown task type." );
......@@ -281,15 +363,28 @@ void test_qr ( int m , int n , int K , int nr_threads , int runs ) {
/* Allocate and fill the original matrix. */
if ( ( A = (double *)malloc( sizeof(double) * m * n * K * K ) ) == NULL ||
( tau = (double *)malloc( sizeof(double) * m * n * K ) ) == NULL ||
( A_orig = (double *)malloc( sizeof(double) * m * n * K * K ) ) == NULL )
if ( ( A = (float *)malloc( sizeof(float) * m * n * K * K ) ) == NULL ||
( tau = (float *)malloc( sizeof(float) * m * n * K ) ) == NULL ||
( A_orig = (float *)malloc( sizeof(float) * m * n * K * K ) ) == NULL )
error( "Failed to allocate matrices." );
for ( k = 0 ; k < m * n * K * K ; k++ )
A_orig[k] = 2*((double)rand()) / RAND_MAX - 1.0;
memcpy( A , A_orig , sizeof(double) * m * n * K * K );
bzero( tau , sizeof(double) * m * n * K );
free(A_orig);
// for ( k = 0 ; k < m * n * K * K ; k++ )
// A_orig[k] = 2*((float)rand()) / RAND_MAX - 1.0;
// A_orig = generateMatrix(m, n);
srand(5);
A_orig = randomMatrix(m,n);
printMatrix(A_orig, m, n);
printf("\n\n\n");
memcpy( A , A_orig , sizeof(float) * m * n * K * K );
bzero( tau , sizeof(float) * m * n * K );
LAPACKE_sgeqrf( LAPACK_COL_MAJOR , K*m, K*n ,
A, m*K , tau);
printMatrix(A, m , n );
printf("\n\n\n");
memcpy( A , A_orig , sizeof(float) * m * n * K * K );
bzero( tau , sizeof(float) * m * n * K );
/* Dump A_orig. */
/* message( "A_orig = [" );
for ( k = 0 ; k < m*K ; k++ ) {
......@@ -309,7 +404,7 @@ void test_qr ( int m , int n , int K , int nr_threads , int runs ) {
error( "Failed to allocate tid/rid matrix." );
for ( k = 0 ; k < m * n ; k++ ) {
tid[k] = qsched_task_none;
rid[k] = qsched_addres( &s , qsched_owner_none , qsched_res_none );
rid[k] = qsched_addres( &s , qsched_owner_none , qsched_res_none , NULL, 0 );
}
/* Build the tasks. */
......@@ -386,7 +481,13 @@ void test_qr ( int m , int n , int K , int nr_threads , int runs ) {
tot_run += toc_run - tic;
}
printf("tau = ");
for(k = 0; k < m * n * K ; k++)
{
printf("%.3f ", tau[k]);
}
printf("\n");
printMatrix(A, m, n);
/* Dump A. */
/* message( "A = [" );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment