diff --git a/examples/Makefile.am b/examples/Makefile.am index 4f5c31993a10137756292b725859228b15e90d95..cebcebfd7b134a03f6765986c7ee6f780afa0adc 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -36,7 +36,7 @@ test_LDADD = ../src/.libs/libquicksched.a # Sources for test_qr test_qr_SOURCES = test_qr.c test_qr_CFLAGS = $(AM_CFLAGS) -test_qr_LDADD = ../src/.libs/libquicksched.a -lblas +test_qr_LDADD = ../src/.libs/libquicksched.a -llapacke -lcblas # Sources for test_bh test_bh_SOURCES = test_bh.c diff --git a/examples/Makefile.in b/examples/Makefile.in index eaa5c64349c05503412236c587605ec06e491382..9da0b5092de51ac2a8c8c1202ea89eaf2f9f7613 100644 --- a/examples/Makefile.in +++ b/examples/Makefile.in @@ -295,7 +295,7 @@ test_LDADD = ../src/.libs/libquicksched.a # Sources for test_qr test_qr_SOURCES = test_qr.c test_qr_CFLAGS = $(AM_CFLAGS) -test_qr_LDADD = ../src/.libs/libquicksched.a -lblas +test_qr_LDADD = ../src/.libs/libquicksched.a -llapacke -lcblas # Sources for test_bh test_bh_SOURCES = test_bh.c diff --git a/examples/test_qr.c b/examples/test_qr.c index 32d1b2754e575dd363099924d3ce28f329b4409c..f025ce68cd94cb68bbb27fdb9f2821cdcb8c2ee2 100644 --- a/examples/test_qr.c +++ b/examples/test_qr.c @@ -29,12 +29,13 @@ #include <math.h> #include <omp.h> -/* Local includes. */ -#include "quicksched.h" +/* LAPACKE header. */ +#include <lapacke.h> +#include <cblas.h> -/* Prototypes for BLAS function. */ -void dtrmm_ ( char * , char * , char * , char * , int * , int * , double * , double * , int * , double * , int * ); +/* Local includes. */ +#include "quicksched.h" /* @@ -105,47 +106,6 @@ void calcvkDouble (double topDiag, } -/** - * \brief Computes a Householder reflector \f$v\f$ of a vector \f$x\f$ for a single block - * - * Computes: \f[v = \textrm{sign}(x_1)||x||_2e_1+x\f] - * Then does a non-standard normalisation \f$v\f$: \f[v = \frac{v}{v_1}\f] - * - * \param x Pointer to an array containing a column vector to compute the Householder reflector of - * \param l The number of elements in \f$x\f$ - * \param vk A pointer to an allocated array to store the resulting vector of size l - 1 - * due to the implied 1 as the first element - * - * \returns void - */ -void calcvkSingle (double* x, - int l, - double* vk) -{ - int sign, i; - double norm, div, beta; - - sign = x[0] >= 0.0 ? 1 : -1; - beta = x[0]; - //copy the values - for(i = 1; i < l; i++) - vk[i-1] = x[i]; - - //take the euclidian norm of the original vector - norm = do2norm(x, l); - //calculate the new normalisation - beta += norm * sign; - - if(norm != 0.0) - { - //normalise - div = 1/beta; - - for(i = 0; i < l-1; i++) - vk[i] *= div; - } -} - void updateDoubleQ_WY (double* blockA, double* blockB, double* blockTau, @@ -188,60 +148,6 @@ void updateDoubleQ_WY (double* blockA, blockTau[k] = tau; } -void updatekthSingleWY (double* blockV, - double* tauBlock, - double beta, - int k, - int m, int n, int ldm, - double* w) -{ - /* Insert beta on the diagonal of Tau */ - tauBlock[k] = beta; -} - -void updateSingleQ_WY (double* block, - double* tauBlock, - int k, - int m, int n, int ldm,//dims of block - double* workVector) -{ - /* Compute A = A - 2/v'v*vv'A */ - int i, j; - double beta = 1.0f, prod; - - for(i = k + 1; i < m; i ++) - { - beta += workVector[i - k - 1] * workVector[i - k - 1]; - } - /* Finish computation of 2/v'v */ - beta = (-2)/beta; - - for(j = k; j < 32; j ++) - { - /* Compute prod = v'A_j */ - prod = block[(j*ldm) + k];//(k,k) to (k,n) - - for(i = k + 1; i < m; i ++) - prod += block[(j*ldm) + i] * workVector[i - k - 1]; - - /* Compute A_j = A_j - beta*v*prod */ - block[(j*ldm) + k] += beta * prod; - - for(i = k + 1; i < m; i ++) - block[(j*ldm) + i] += beta * prod * workVector[i - k - 1]; - } - - /* Insert nonessential vector below diagonal. */ - for(i = k + 1; i < m; i ++) - block[(k*ldm) + i] = workVector[i - k - 1]; - - updatekthSingleWY (block, - tauBlock, - -beta, - k, m, n, ldm, - workVector); -} - void DTSQRF (double* blockA, double* blockB, double* blockTau, @@ -319,74 +225,6 @@ void DSSRFT (double* blockV, } -void DGEQRF (double* block, - double* tauBlock, - int m, int n, int ldm, - double* workVector) -{ - int k; - double* xVect; - - xVect = block; - - for(k = 0; k < n; k ++) - { - /* Get kth householder vector into position starting at workVector */ - calcvkSingle(xVect, m-k, workVector); - - /* Apply householder vector (with an implied 1 in first element to block, - generating WY matrices in the process. - Stores vector below the diagonal. */ - updateSingleQ_WY (block, tauBlock, - k, m, n, ldm, - workVector); - - /* Shift one along & one down */ - xVect += ldm + 1; - } -} - -void DLARFT (double* block, - double* blockV, - double* tauBlock, - int m, int n, int ldm) -{ - /* Perform the transformation block = block - blockV*(tauBlock*(blockV^T*block)) - Equivalent to B = B - V(T(V^TB)) - Noting that T is upper triangular, and V is unit lower triangular. */ - int i, j, k; - - double tau, beta; - - /* For each column of the block. */ - for(j = 0; j < n; j ++) - { - /* Apply successive reflectors with b_j - tau_k*v_k*v_k'b_j */ - for(k = 0; k < n; k ++) - { - /* tau_k is at blockV(k) */ - tau = tauBlock[k]; - - /* Compute v_k'*b_j, with v_k,k = 1 implied */ - beta = block[(j*ldm) + k];//*1.0 - - /* Rest of vector. */ - for(i = k+1; i < m; i ++) - beta += blockV[(k*ldm) + i] * block[(j*ldm) + i]; - - beta *= tau; - - /* Compute b_j = b_j - beta*v_k, again with an implied 1 at v_kk */ - block[(j*ldm) + k] -= beta;/* *1.0 */ - - /* Compute for rest of b_j */ - for(i = k+1; i < m; i ++) - block[(j*ldm) + i] -= beta * blockV[(k*ldm) + i]; - } - } -} - - /** * @brief Computed a tiled QR factorization using QuickSched. * @@ -418,10 +256,15 @@ void test_qr ( int m , int n , int nr_threads ) { /* Decode and execute the task. */ switch ( type ) { case task_DGEQRF: - DGEQRF( &A[ j*m*32*32 + i*32 ] , &tau[ j*m*32 + i*32 ] , 32 , 32 , 32*m , buff ); + LAPACKE_dgeqrf_work( LAPACK_COL_MAJOR , 32, 32 , + &A[ j*m*32*32 + i*32 ] , m*32 , &tau[ j*m*32 + i*32 ] , + buff , 2*32*32 ); break; case task_DLARFT: - DLARFT( &A[ j*m*32*32 + i*32 ] , &A[ i*m*32*32 + i*32 ] , &tau[ i*m*32 + i*32 ] , 32 , 32 , 32*m ); + LAPACKE_dlarft_work( LAPACK_COL_MAJOR , 'F' , 'C' , + 32 , 32 , &A[ i*m*32*32 + i*32 ] , + m*32 , &tau[ i*m*32 + i*32 ] , &A[ j*m*32*32 + i*32 ] , + m*32 ); break; case task_DTSQRF: DTSQRF( &A[ j*m*32*32 + j*32 ] , &A[ j*m*32*32 + i*32 ] , &tau[ j*m*32 + i*32 ] , 32 , 32 , 32 , 32*m , buff ); @@ -436,7 +279,6 @@ void test_qr ( int m , int n , int nr_threads ) { } - /* Allocate and fill the original matrix. */ if ( ( A = (double *)malloc( sizeof(double) * m * n * 32 * 32 ) ) == NULL || ( tau = (double *)malloc( sizeof(double) * m * n * 32 ) ) == NULL || @@ -464,8 +306,8 @@ void test_qr ( int m , int n , int nr_threads ) { ( rid = (qsched_res_t *)malloc( sizeof(qsched_res_t) * m * n ) ) == NULL ) error( "Failed to allocate tid/rid matrix." ); for ( k = 0 ; k < m * n ; k++ ) { - tid[k] = -1; - rid[k] = qsched_addres( &s , -1 ); + tid[k] = qsched_task_none; + rid[k] = qsched_addres( &s , qsched_res_none ); } /* Build the tasks. */ @@ -550,6 +392,9 @@ void test_qr ( int m , int n , int nr_threads ) { printf( " %i %i %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , d[0] , d[1] , s.tasks[k].tic , s.tasks[k].toc ); } + /* Clean up. */ + qsched_free( &s ); + }