diff --git a/examples/Makefile.am b/examples/Makefile.am
index d91f91d7fb3816dde5f6114c9d8e78677e60803d..2d20cbb1630044ced0bfd7b6d87d05e38696623b 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -20,13 +20,13 @@
 AUTOMAKE_OPTIONS=gnu
 
 # Add the source directory and debug to CFLAGS
-AM_CFLAGS = -g -Wall -Werror -I../src $(OPENMP_CFLAGS) -DCPU_TPS=2.67e9 -DTIMERS \
+AM_CFLAGS = -g -Wall -Werror -I../src $(OPENMP_CFLAGS) -DCPU_TPS=3.1e9 -DTIMERS \
     # -fsanitize=address -fno-omit-frame-pointer
 
-AM_LDFLAGS = -lm # -fsanitize=address
+    AM_LDFLAGS = -lm # -fsanitize=address
 
 # Set-up the library
-bin_PROGRAMS = test test_qr test_bh
+bin_PROGRAMS = test test_qr #test_bh 
 
 # Sources for test
 test_SOURCES = test.c
@@ -35,11 +35,19 @@ test_LDADD =  ../src/.libs/libquicksched.a
 
 # Sources for test_qr
 test_qr_SOURCES = test_qr.c
-test_qr_CFLAGS = $(AM_CFLAGS)
-test_qr_LDADD =  ../src/.libs/libquicksched.a -llapacke -llapack -lblas
+test_qr_CFLAGS = $(AM_CFLAGS) -I/home/aidan/lapack-3.5.0/lapacke/include/
+test_qr_LDFLAGS = -I/home/aidan/lapack-3.5.0/lapacke/include/
+test_qr_LDADD =  ../src/.libs/libquicksched.a /home/aidan/lapack-3.5.0/liblapacke.a /home/aidan/lapack-3.5.0/liblapack.a -lblas 
 
 # Sources for test_bh
-test_bh_SOURCES = test_bh.c
-test_bh_CFLAGS = $(AM_CFLAGS)
-test_bh_LDADD =  ../src/.libs/libquicksched.a
+#test_bh_SOURCES = test_bh.c
+#test_bh_CFLAGS = $(AM_CFLAGS)
+#test_bh_LDADD =  ../src/.libs/libquicksched.a
 
+
+#if HAVE_CUDA
+#test_cuda_SOURCES = test_gpu_simple.cu
+#test_cuda_CFLAGS = -DWITH_CUDA $(AM_CFLAGS) $(CUDA_CFLAGS) $(CUDA_MYFLAGS)
+#test_cuda_LINK = $(NVCC)
+#test_cuda_LDADD = ../src/.libs/libquicksched_cuda.a $(CUDA_LIBS)
+#endif
diff --git a/examples/test.c b/examples/test.c
index 9d181aa1d4b00ff2e5900d039ca9ab658d07f676..66dfae6af5e7128dcad3f96f952da6d6bea1af82 100644
--- a/examples/test.c
+++ b/examples/test.c
@@ -118,7 +118,7 @@ void test2 ( int m , int n , int k , int nr_threads ) {
     /* Build a task for each tile of the matrix c. */
     for ( i = 0 ; i < m ; i++ )
         for ( j = 0 ; j < n ; j++ ) {
-            rid = qsched_addres( &s , qsched_owner_none , qsched_res_none );
+            rid = qsched_addres( &s , qsched_owner_none , qsched_res_none, NULL , 0 );
             data[0] = i; data[1] = j;
             for ( kk = 0 ; kk < k ; kk++ ) {
                 data[2] = kk;
@@ -222,7 +222,7 @@ void test1 ( int m , int n , int k , int nr_threads ) {
     for ( i = 0 ; i < m ; i++ )
         for ( j = 0 ; j < n ; j++ ) {
             data[0] = i; data[1] = j;
-            rid = qsched_addres( &s , qsched_owner_none , qsched_res_none );
+            rid = qsched_addres( &s , qsched_owner_none , qsched_res_none , NULL , 0);
             tid = qsched_addtask( &s , 1 , task_flag_none , data , 2*sizeof(int) , 1 );
             qsched_addlock( &s , tid , rid );
             }
diff --git a/examples/test_qr.c b/examples/test_qr.c
index 2a754c04d4976e138170aaaf261bfcd8afe77ba5..6f77203afbba2c5b3e1c0a953eb9b89323609187 100644
--- a/examples/test_qr.c
+++ b/examples/test_qr.c
@@ -37,7 +37,6 @@
 /* Local includes. */
 #include "quicksched.h"
 
-
 /*
  * Sam's routines for the tiled QR decomposition.
  */
@@ -47,9 +46,9 @@
   
   Computes the 2-norm by computing the following: \f[\textrm{2-norm}=\sqrt_0^lx(i)^2\f]
  */
-double do2norm(double* x, int l)
+float do2norm(float* x, int l)
 {
-	double sum = 0, norm;
+	float sum = 0, norm;
 	int i;
 
 	for(i = 0; i < l; i++)
@@ -74,14 +73,14 @@ double do2norm(double* x, int l)
  *
  * \returns void
  */
-void calcvkDouble	(double topDiag,
+void calcvkfloat	(float topDiag,
 			int ma,
-			double* xb,
+			float* xb,
 			int l,
-			double* vk)
+			float* vk)
 {
 	int sign, i;
-	double norm, div;
+	float norm, div;
 	//same non-standard normalisation as for single blocks above, but organised without a temporary beta veriable
 
 	sign = topDiag >= 0.0 ? 1 : -1;
@@ -106,16 +105,16 @@ void calcvkDouble	(double topDiag,
 }
 
 
-void updateDoubleQ_WY	(double* blockA,
-			double* blockB,
-			double* blockTau,
+void updatefloatQ_WY	(float* blockA,
+			float* blockB,
+			float* blockTau,
 			int k, int ma, int mb, int n,
 			int ldm,
-			double* hhVector)//bottom, essential part.
+			float* hhVector)//bottom, essential part.
 {
 	int i, j;
 
-	double tau = 1.0, beta;
+	float tau = 1.0, beta;
 
 	/* Compute tau = 2/v'v */
 	for(i = 0; i < mb; i ++)
@@ -148,17 +147,17 @@ void updateDoubleQ_WY	(double* blockA,
 	blockTau[k] = tau;
 }
 
-void DTSQRF	(double* blockA,
-		double* blockB,
-		double* blockTau,
+void DTSQRF	(float* blockA,
+		float* blockB,
+		float* blockTau,
 		int ma,
 		int mb,
 		int n,
 		int ldm,
-		double* hhVector)
+		float* hhVector)
 {
 	int k;
-	double* xVectA, *xVectB;
+	float* xVectA, *xVectB;
 	
 	xVectA = blockA;
 	xVectB = blockB;
@@ -167,11 +166,11 @@ void DTSQRF	(double* blockA,
 	{
 		//vk = sign(x[1])||x||_2e1 + x
 		//vk = vk/vk[0]
-		calcvkDouble(xVectA[0], ma - k, xVectB, (ma + mb) - k, hhVector);//returns essential
+		calcvkfloat(xVectA[0], ma - k, xVectB, (ma + mb) - k, hhVector);//returns essential
 
 		//matA(k:ma,k:na) = matA(k:ma,k:na) - (2/(vk.T*vk))*vk*(vk.T*matA(k:ma,k:na)
 		//update both blocks, preserving the vectors already stored below the diagonal in the top block and treating them as if they were zeros.
-		updateDoubleQ_WY	(blockA, blockB,
+		updatefloatQ_WY	(blockA, blockB,
 					blockTau,
 					k, ma, mb, n,
 					ldm,
@@ -182,14 +181,14 @@ void DTSQRF	(double* blockA,
 	}
 }
 
-void DSSRFT	(double* blockV,
-		double* blockA, double* blockB,
-		double* blockTau,
+void DSSRFT	(float* blockV,
+		float* blockA, float* blockB,
+		float* blockTau,
 		int b, int n, int ldm)
 {
 	int i, j, k;
 
-	double tau, beta;
+	float tau, beta;
 
 	/* Compute b_j = b_j - tau*v*v'*b_j for each column j of blocks A & B,
 	   and for each householder vector v of blockV */
@@ -223,7 +222,89 @@ void DSSRFT	(double* blockV,
 		}
 	}
 }
+
+float* randomMatrix(int m, int n)
+{
+    float* Matrix;
+    Matrix = (float*) malloc( sizeof(float) * m*n*32*32);
+    if(Matrix == NULL)
+        error("Failed to allocate Matrix");
+    int r,c;
+    m = m*32;
+    n = n*32;
+for(c = 0; c < n; c++)
+	{
+		for(r = 0; r < m; r++)
+		{
+            //CO(i,j,m) ((m * j) + i)
+				Matrix[(m*c)+r] = ((float)(rand() % 201) - 100.0) / 100.0;
+		}
+	}
+return Matrix;
+
+}
 			
+float* generateMatrix( int m, int n)
+{
+    float* Matrix;
+    Matrix = (float*) malloc( sizeof(float) * m*n*32*32);
+    if(Matrix == NULL)
+        error("Failed to allocate Matrix");
+    int i, j;
+    memset ( Matrix, 0, sizeof(float)*m*n*32*32 );
+
+    for(i = 0; i < n*32; i++)
+    {
+        for(j = 0; j < m*32; j++)
+        {
+                Matrix[i*m*32 + j] = (float)(i+j);
+        }
+    }
+    return Matrix;
+}
+
+float* createIdentity(int m, int n)
+{
+    float* Matrix;
+    Matrix = (float*) malloc( sizeof(float) * m*n*32*32);
+    if(Matrix == NULL)
+        error("Failed to allocate Matrix");
+    int i, j;
+    memset ( Matrix, 0, sizeof(float)*m*n*32*32 );
+
+    for(i = 0; i < n*32; i++)
+    {
+        for(j = 0; j < m*32; j++)
+        {
+            if(i==j)
+            {
+                Matrix[i*m*32 + j] = 1.0;
+            }else
+            {
+                Matrix[i*m*32 + j] = 0.0;
+            }
+        }
+    }
+    return Matrix;
+}
+
+void printMatrix(float* Matrix, int m, int n)
+{
+    int i, j;
+    
+    for(i=0; i < m*32; i++)
+    {
+        printf("{ ");
+        for(j = 0; j < n*32; j++)
+        {
+            printf(" %.3f ", Matrix[j*m*32 +i]);
+
+        }
+        printf(" }");
+        printf("\n");
+    }
+
+}
 			
 /**
  * @brief Computed a tiled QR factorization using QuickSched.
@@ -236,7 +317,7 @@ void DSSRFT	(double* blockV,
 void test_qr ( int m , int n , int K , int nr_threads , int runs ) {
 
     int k, j, i;
-    double *A, *A_orig, *tau;
+    float *A, *A_orig, *tau;
     struct qsched s;
     qsched_task_t *tid, tid_new;
     qsched_res_t *rid;
@@ -251,27 +332,28 @@ void test_qr ( int m , int n , int K , int nr_threads , int runs ) {
     
         /* Decode the task data. */
         int *idata = (int *)data;
-        int i = idata[0], j = idata[1], k = idata[2];
-        double buff[ 2*K*K ];
+        int i = idata[0], j = idata[1];//, k = idata[2];
+        float buff[ 2*K*K ];
         
         /* Decode and execute the task. */
         switch ( type ) {
             case task_DGEQRF:
-                LAPACKE_dgeqrf_work( LAPACK_COL_MAJOR , K, K ,
+                LAPACKE_sgeqrf_work( LAPACK_COL_MAJOR , K, K ,
                                 &A[ j*m*K*K + i*K ] , m*K , &tau[ j*m*K + i*K ] ,
                                 buff , 2*K*K );
+                
                 break;
             case task_DLARFT:
-                LAPACKE_dlarft_work( LAPACK_COL_MAJOR , 'F' , 'C' ,
+                LAPACKE_slarft_work( LAPACK_COL_MAJOR , 'F' , 'C' ,
                                 K , K , &A[ i*m*K*K + i*K ] ,
                                 m*K , &tau[ i*m*K + i*K ] , &A[ j*m*K*K + i*K ] ,
                                 m*K );
                 break;
             case task_DTSQRF:
-                DTSQRF( &A[ j*m*K*K + j*K ] , &A[ j*m*K*K + i*K ] , &tau[ j*m*K + i*K ] , K , K , K , K*m , buff );
+                //DTSQRF( &A[ j*m*K*K + j*K ] , &A[ j*m*K*K + i*K ] , &tau[ j*m*K + i*K ] , K , K , K , K*m , buff );
                 break;
             case task_DSSRFT:
-                DSSRFT(	&A[ k*m*K + i*K ] , &A[ j*m*K*K + k*K ] , &A[ j*m*K*K + i*K ] , &tau[ k*m*K + i*K ] , K , K , K*m );
+                //DSSRFT(	&A[ k*m*K + i*K ] , &A[ j*m*K*K + k*K ] , &A[ j*m*K*K + i*K ] , &tau[ k*m*K + i*K ] , K , K , K*m );
                 break;
             default:
                 error( "Unknown task type." );
@@ -281,15 +363,28 @@ void test_qr ( int m , int n , int K , int nr_threads , int runs ) {
         
     
     /* Allocate and fill the original matrix. */
-    if ( ( A = (double *)malloc( sizeof(double) * m * n * K * K ) ) == NULL ||
-         ( tau = (double *)malloc( sizeof(double) * m * n * K ) ) == NULL ||
-         ( A_orig = (double *)malloc( sizeof(double) * m * n * K * K ) ) == NULL )
+    if ( ( A = (float *)malloc( sizeof(float) * m * n * K * K ) ) == NULL ||
+         ( tau = (float *)malloc( sizeof(float) * m * n * K ) ) == NULL ||
+         ( A_orig = (float *)malloc( sizeof(float) * m * n * K * K ) ) == NULL )
         error( "Failed to allocate matrices." );
-    for ( k = 0 ; k < m * n * K * K ; k++ )
-        A_orig[k] = 2*((double)rand()) / RAND_MAX - 1.0;
-    memcpy( A , A_orig , sizeof(double) * m * n * K * K );
-    bzero( tau , sizeof(double) * m * n * K );
+    free(A_orig);
+//    for ( k = 0 ; k < m * n * K * K ; k++ )
+  //      A_orig[k] = 2*((float)rand()) / RAND_MAX - 1.0;
+//    A_orig = generateMatrix(m, n);
+	srand(5);
+    A_orig = randomMatrix(m,n);
+    printMatrix(A_orig, m, n);
+    printf("\n\n\n");
+    memcpy( A , A_orig , sizeof(float) * m * n * K * K );
+    bzero( tau , sizeof(float) * m * n * K );
+
+    LAPACKE_sgeqrf( LAPACK_COL_MAJOR , K*m, K*n ,
+                                A, m*K , tau);
     
+    printMatrix(A, m , n );
+    printf("\n\n\n");
+    memcpy( A , A_orig , sizeof(float) * m * n * K * K );
+    bzero( tau , sizeof(float) * m * n * K );
     /* Dump A_orig. */
     /* message( "A_orig = [" );
     for ( k = 0 ; k < m*K ; k++ ) {
@@ -309,7 +404,7 @@ void test_qr ( int m , int n , int K , int nr_threads , int runs ) {
         error( "Failed to allocate tid/rid matrix." );
     for ( k = 0 ; k < m * n ; k++ ) {
         tid[k] = qsched_task_none;
-        rid[k] = qsched_addres( &s , qsched_owner_none , qsched_res_none );
+        rid[k] = qsched_addres( &s , qsched_owner_none , qsched_res_none , NULL, 0 );
         }
     
     /* Build the tasks. */
@@ -386,7 +481,13 @@ void test_qr ( int m , int n , int K , int nr_threads , int runs ) {
         tot_run += toc_run - tic;
         
         }
-    
+        printf("tau = ");
+        for(k = 0; k < m * n * K ; k++)
+        {
+            printf("%.3f ", tau[k]);
+        }
+        printf("\n");
+        printMatrix(A, m, n);
         
     /* Dump A. */
     /* message( "A = [" );