diff --git a/examples/Makefile.am b/examples/Makefile.am
index 75e1aa2245a836b2a10183f0ffcae94665fbada4..46d5cd60eb7d2162522416a6035dcdd4dc62e8cb 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -29,9 +29,9 @@ MPI_THREAD_LIBS = @MPI_THREAD_LIBS@
 MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
 
 # Set-up the library
-bin_PROGRAMS = test test_bh test_bh_sorted test_fmm_sorted test_bh_mpi
+bin_PROGRAMS = test test_bh test_bh_sorted test_fmm_sorted test_bh_mpi test_qr_mpi
 if HAVECBLAS
-bin_PROGRAMS += test_qr 
+bin_PROGRAMS += test_qr
 endif
 
 # Sources for test
@@ -63,3 +63,8 @@ test_bh_mpi_SOURCES = test_bh_mpi.c
 test_bh_mpi_CFLAGS = $(AM_CFLAGS) -DWITH_MPI
 test_bh_mpi_LDADD =  ../src/.libs/libquickschedMPI.a $(METIS_LIBS)
 test_bh_mpi_LDFLAGS = $(MPI_THREAD_LIBS)
+
+test_qr_mpi_SOURCES = test_qr_mpi.c
+test_qr_mpi_CFLAGS = $(AM_CFLAGS) -DWITH_MPI
+test_qr_mpi_LDADD =  ../src/.libs/libquickschedMPI.a $(METIS_LIBS)
+test_qr_mpi_LDFLAGS = $(MPI_THREAD_LIBS)
diff --git a/examples/test_bh_mpi.c b/examples/test_bh_mpi.c
index aff03025a545aa1b96759a5d04bebc2ab278a622..79ee40a04a0271b2640994986acd8ebcaebee83e 100644
--- a/examples/test_bh_mpi.c
+++ b/examples/test_bh_mpi.c
@@ -495,7 +495,7 @@ static inline void iact_pair_pc(struct qsched *s, struct cell *ci, struct cell *
     #ifdef SANITY_CHECKS
     if(cp->parts == NULL)
     {
-        error("cp->parts is NULL after we ttempted to get the data from the scheduler.");
+        error("cp->parts is NULL after we attempted to get the data from the scheduler.");
     }
     #endif
     /* Get local copies of the particle data in cp. */     
@@ -1014,7 +1014,7 @@ void interact_exact(int N, struct part *parts) {
 
   int i, j, k;
   float ir, w, r2, dx[3];
-
+    message("Starting exact interaction");
   /* Loop over all particles. */
   for (i = 0; i < N; ++i) {
 
@@ -1050,6 +1050,7 @@ void interact_exact(int N, struct part *parts) {
               parts[i].id, parts[i].a_exact[0], parts[i].a_exact[1],
               parts[i].a_exact[2]);*/
 #endif
+    message("Completed exact interaction");
 }
 
 void output_parts(struct qsched *s)
@@ -1286,6 +1287,9 @@ for(i = 0; i < s.count_ranks; i++)
 }
 
 //Need to clean up everything.
+    free(parts);
+    //TODO Clean up the cell-resource data.
+    qsched_free(&s);
 
     // Finalize the MPI environment.
     MPI_Finalize();
diff --git a/examples/test_qr_mpi.c b/examples/test_qr_mpi.c
new file mode 100644
index 0000000000000000000000000000000000000000..673f1ee5ce0512d0c160264011999d25c750b0b2
--- /dev/null
+++ b/examples/test_qr_mpi.c
@@ -0,0 +1,742 @@
+/*******************************************************************************
+ * This file is part of QuickSched.
+ * Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+* *****************************************************************************/
+/* Config parameters. */
+#include "../config.h"
+
+/* Standard includes. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+#include <omp.h>
+#include <fenv.h>
+#include <mpi.h>
+
+
+#ifdef WITH_CLBAS
+#include <cblas.h>
+#endif
+
+/* Local includes. */
+#include "quicksched.h"
+#include "res.h"
+
+
+#ifdef WITH_CBLAS
+/**
+ * Takes a column major matrix, NOT tile major. size is length of a side of the
+ * matrix. Only works for square matrices.
+ * This function is simply for validation and is implemented naively as we know
+ * of no implementation to retrieve Q from the tiled QR.
+ */
+double* computeQ(double* HR, int size, int tilesize, double* tau, int tauNum) {
+  double* Q = malloc(sizeof(double) * size * size);
+  double* Qtemp = malloc(sizeof(double) * size * size);
+  double* w = malloc(sizeof(double) * size);
+  double* ww = malloc(sizeof(double) * size * size);
+  double* temp = malloc(sizeof(double) * size * size);
+  int i, k, l, j, n;
+  bzero(Q, sizeof(double) * size * size);
+  bzero(Qtemp, sizeof(double) * size * size);
+  bzero(ww, sizeof(double) * size * size);
+  for (i = 0; i < size; i++) {
+    Q[i * size + i] = 1.0;
+  }
+  int numcoltile = size / tilesize;
+  int numrowtile = size / tilesize;
+  for (k = 0; k < numrowtile; k++) {
+    for (l = 0; l < tilesize; l++) {
+      bzero(Qtemp, sizeof(double) * size * size);
+      for (i = 0; i < size; i++) {
+        Qtemp[i * size + i] = 1.0;
+      }
+
+      for (i = k; i < numcoltile; i++) {
+        bzero(w, sizeof(double) * size);
+
+        for (j = 0; j < tilesize; j++) {
+          w[i * tilesize + j] =
+              HR[(k * tilesize + l) * size + i * tilesize + j];
+        }
+        w[k * tilesize + l] = 1.0;
+        if (k * tilesize + l > i * tilesize) {
+          for (j = 0; j < k * tilesize + l; j++) w[j] = 0.0;
+        }
+
+        /* Compute (I - tau*w*w')' */
+        for (j = 0; j < size; j++) {
+          for (n = 0; n < size; n++) {
+            if (j != n)
+              ww[n * size + j] =
+                  -tau[(k * tilesize + l) * tauNum + i] * w[j] * w[n];
+            else
+              ww[n * size + j] =
+                  1.0 - tau[(k * tilesize + l) * tauNum + i] * w[j] * w[n];
+          }
+        }
+
+        /* Qtemp = Qtemp * (I-tau*w*w')' */
+        cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, size, size, size,
+                    1.0, Qtemp, size, ww, size, 0.0, temp, size);
+        double* b = Qtemp;
+        Qtemp = temp;
+        temp = b;
+      }
+      cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, size, size, size,
+                  1.0, Q, size, Qtemp, size, 0.0, temp, size);
+      double* b = Q;
+      Q = temp;
+      temp = b;
+    }
+  }
+
+  free(Qtemp);
+  free(w);
+  free(ww);
+  free(temp);
+  return Q;
+}
+
+#endif
+
+double* getR(double* HR, int size) {
+  double* R = malloc(sizeof(double) * size * size);
+  int i, j;
+  bzero(R, sizeof(double) * size * size);
+  for (i = 0; i < size; i++) {
+    for (j = 0; j <= i; j++) {
+      R[i * size + j] = HR[i * size + j];
+    }
+  }
+  return R;
+}
+
+void printMatrix(double* Matrix, int m, int n, int tilesize) {
+  int i, j;
+
+  for (i = 0; i < m * tilesize; i++) {
+    for (j = 0; j < n * tilesize; j++) {
+      printf(" %.3f ", Matrix[j * m * tilesize + i]);
+    }
+    printf("\n");
+  }
+}
+
+double* columnToTile(double* columnMatrix, int size, int m, int n,
+                     int tilesize) {
+  double* TileMatrix;
+  TileMatrix = malloc(sizeof(double) * size);
+  if (TileMatrix == NULL) error("failed to allocate TileMatrix");
+  int i, j, k, l;
+
+  for (i = 0; i < n; i++) {
+    for (j = 0; j < m; j++) {
+      double* tileStart =
+          &columnMatrix[i * m * tilesize * tilesize + j * tilesize];
+      double* tilePos =
+          &TileMatrix[i * m * tilesize * tilesize + j * tilesize * tilesize];
+      for (k = 0; k < tilesize; k++) {
+        tileStart = &columnMatrix[i * m * tilesize * tilesize +
+                                  k * m * tilesize + j * tilesize];
+        for (l = 0; l < tilesize; l++) {
+          tilePos[k * tilesize + l] = tileStart[l];
+        }
+      }
+    }
+  }
+
+  return TileMatrix;
+}
+
+double* tileToColumn(double* tileMatrix, int size, int m, int n, int tilesize) {
+  double* ColumnMatrix;
+  ColumnMatrix = (double*)malloc(sizeof(double) * size);
+  if (ColumnMatrix == NULL) error("failed to allocate ColumnMatrix");
+  int i, j, k, l;
+  for (i = 0; i < n; i++) {
+    for (j = 0; j < m; j++) {
+      /* Tile on ith column is at i*m*32*32.*/
+      /* Tile on jth is at j*32*32 */
+      double* tile =
+          &tileMatrix[i * m * tilesize * tilesize + j * tilesize * tilesize];
+      /* Column starts at same position as tile. */
+      /* Row j*32.*/
+      double* tilePos =
+          &ColumnMatrix[i * m * tilesize * tilesize + j * tilesize];
+      for (k = 0; k < tilesize; k++) {
+        for (l = 0; l < tilesize; l++) {
+          tilePos[l] = tile[l];
+        }
+        /* Next 32 elements are the position of the tile in the next column.*/
+        tile = &tile[tilesize];
+        /* Move to the j*32th position in the next column. */
+        tilePos = &tilePos[tilesize * m];
+      }
+    }
+  }
+  return ColumnMatrix;
+}
+
+/* Routines for the tiled QR decomposition.*/
+
+/**
+ *
+ * @brief Computes the QR decomposition of a tile.
+ *
+ * @param cornerTile A pointer to the tile for which the decomposition is
+* computed.
+ * @param tilesize The number of elements on a row/column of the tile.
+ * @param tauMatrix A pointer to the tau Matrix.
+ * @param k The value of k for the QR decomposition
+ * @param tauNum The number of tau values stored for each row of the matrix
+* (this is equal to the number of tiles on each column).
+ *
+ *
+ */
+void DGEQRF(double* restrict cornerTile, int tileSize,
+            double* restrict tauMatrix, int k, int tauNum) {
+  int i, j, n;
+  double norm = 0.0, sign, u1, tau, z;
+  double w[tileSize];
+
+  /*Find the householder vector for each row. */
+  for (i = 0; i < tileSize; i++) {
+    norm = 0.0;
+    /*Fill w with the vector.*/
+    for (j = i; j < tileSize; j++) {
+      /* ith row is i*tileSize, only want elements on diagonal or below.*/
+      w[j] = cornerTile[i * tileSize + j];
+      /*Find the norm as well */
+      norm = norm + w[j] * w[j];
+    }
+    if (w[i] >= 0.0)
+      sign = -1;
+    else
+      sign = 1;
+
+    norm = sqrt(norm);
+
+    u1 = w[i] - sign * norm;
+
+    if (u1 != 0.0) {
+      for (j = i + 1; j < tileSize; j++) w[j] = w[j] / u1;
+    } else {
+      for (j = i + 1; j < tileSize; j++) w[j] = 0.0;
+    }
+
+    if (norm != 0.0)
+      tau = -sign * u1 / norm;
+    else
+      tau = 0.0;
+
+    /*Store the below diagonal vector */
+
+    for (j = i + 1; j < tileSize; j++) {
+      cornerTile[i * tileSize + j] = w[j];
+    }
+    cornerTile[i * tileSize + i] = sign * norm;
+    w[i] = 1.0;
+    /* Apply the householder transformation to the rest of the tile, for
+     * everything to the right of the diagonal. */
+    for (j = i + 1; j < tileSize; j++) {
+      /*Compute w'*A_j*/
+      z = cornerTile[j * tileSize + i];
+      for (n = i + 1; n < tileSize; n++) {
+        z = z + cornerTile[j * tileSize + n] * w[n];
+      }
+      /* Tile(m,n) = Tile(m,n) - tau*w[n]* w'A_j */
+      for (n = i; n < tileSize; n++) {
+        cornerTile[j * tileSize + n] =
+            cornerTile[j * tileSize + n] - tau * w[n] * z;
+      }
+    }
+    /* Store tau. We're on k*tileSize+ith row. kth column.*/
+//    tauMatrix[(k * tileSize + i) * tauNum + k] = tau;
+      tauMatrix[i] = tau;
+  }
+}
+
+/**
+ *
+ * @brief Applies the householder factorisation of the corner to the row tile.
+ *
+ * @param cornerTile A pointer to the tile for which the householder is stored.
+ * @param rowTiles The tile to which the householder is applied.
+ * @param tilesize The number of elements on a row/column of the tile.
+ * @param tauMatrix A pointer to the tau Matrix.
+ * @param jj The value of j for the QR decomposition.
+ * @param kk The value of k for the QR decomposition.
+ * @param tauNum The number of tau values stored for each row of the matrix
+* (this is equal to the number of tiles on each column).
+ *
+ *
+ */
+void DLARFT(double* restrict cornerTile, double* restrict rowTile, int tileSize,
+            int jj, int kk, double* restrict tauMatrix, int tauNum) {
+  int i, j, n;
+  double z = 0.0;
+  double w[tileSize];
+
+  /* For each row in the corner Tile*/
+  for (i = 0; i < tileSize; i++) {
+    /*Get w for row i */
+    for (j = i; j < tileSize; j++) {
+      w[j] = cornerTile[i * tileSize + j];
+    }
+    w[i] = 1.0;
+
+    /* Apply to the row Tile */
+    for (j = 0; j < tileSize; j++) {
+      z = 0.0;
+      /* Below Diagonal!*/
+      /*Compute w'*A_j*/
+      for (n = i; n < tileSize; n++) {
+        z = z + w[n] * rowTile[j * tileSize + n];
+      }
+      for (n = i; n < tileSize; n++) {
+        rowTile[j * tileSize + n] =
+            rowTile[j * tileSize + n] -
+            //tauMatrix[(kk * tileSize + i) * tauNum + kk] * w[n] * z;
+            tauMatrix[i] * w[n] * z;
+      }
+    }
+  }
+}
+
+/**
+ *
+ * @brief Applies the householder factorisation of the corner to the row tile.
+ *
+ * @param cornerTile The corner tile for this value of k.
+ * @param columnTile The tile for which householders are computed.
+ * @param tilesize The number of elements on a row/column of the tile.
+ * @param tauMatrix A pointer to the tau Matrix.
+ * @param ii The value of i for the QR decomposition.
+ * @param kk The value of k for the QR decomposition.
+ * @param tauNum The number of tau values stored for each row of the matrix
+* (this is equal to the number of tiles on each column).
+ *
+ *
+ */
+void DTSQRF(double* restrict cornerTile, double* restrict columnTile,
+            int tilesize, int ii, int kk, double* restrict tauMatrix,
+            int tauNum) {
+  int i, j, n;
+  double norm = 0.0, sign, u1, tau, z;
+  double w[2*tilesize];
+
+  /* For each column compute the householder vector. */
+  for (i = 0; i < tilesize; i++) {
+    norm = 0.0;
+    w[i] = cornerTile[i * tilesize + i];
+    norm = norm + w[i] * w[i];
+    for (j = i + 1; j < tilesize; j++) {
+      w[j] = 0.0;
+    }
+    for (j = 0; j < tilesize; j++) {
+      w[tilesize + j] = columnTile[i * tilesize + j];
+      norm = norm + w[tilesize + j] * w[tilesize + j];
+    }
+
+    norm = sqrt(norm);
+    if (w[i] >= 0.0)
+      sign = -1;
+    else
+      sign = 1;
+
+    u1 = w[i] - sign * norm;
+    if (u1 != 0.0) {
+      for (j = i + 1; j < 2 * tilesize; j++) {
+        w[j] = w[j] / u1;
+      }
+    } else {
+      for (j = i + 1; j < 2 * tilesize; j++) w[j] = 0.0;
+    }
+
+    if (norm != 0)
+      tau = -sign * u1 / norm;
+    else
+      tau = 0.0;
+
+    /* Apply to each row to the right.*/
+    for (j = i; j < tilesize; j++) {
+      /* Find w'*A_j, w is 0s except for first value with upper tile.*/
+      z = 1.0 * cornerTile[j * tilesize + i];
+      for (n = 0; n < tilesize; n++) {
+        z = z + w[tilesize + n] * columnTile[j * tilesize + n];
+      }
+      /* Apply to upper tile.*/
+      cornerTile[j * tilesize + i] =
+          cornerTile[j * tilesize + i] - tau * 1.0 * z;
+      for (n = i + 1; n < tilesize; n++) {
+        cornerTile[j * tilesize + n] =
+            cornerTile[j * tilesize + n] - tau * w[n] * z;
+      }
+      /* Apply to lower tile.*/
+      for (n = 0; n < tilesize; n++) {
+        columnTile[j * tilesize + n] =
+            columnTile[j * tilesize + n] - tau * w[tilesize + n] * z;
+      }
+    }
+    /* Store w*/
+    for (j = 0; j < tilesize; j++) {
+      columnTile[i * tilesize + j] = w[tilesize + j];
+    }
+//    tauMatrix[(kk * tilesize + i) * tauNum + ii] = tau;
+      tauMatrix[i] = tau;
+  }
+}
+
+/**
+ *
+ * @brief Applies the householder factorisation of the corner to the row tile.
+ *
+ * @param cornerTile A pointer to the tile to have the householder applied.
+ * @param columnTile The tile in which the householders are stored.
+ * @param rowTile The upper tile to have the householders applied.
+ * @param tilesize The number of elements on a row/column of the tile.
+ * @param tauMatrix A pointer to the tau Matrix.
+ * @param ii The value of i for the QR decomposition.
+ * @param jj The value of j for the QR decomposition.
+ * @param kk The value of k for the QR decomposition.
+ * @param tauNum The number of tau values stored for each row of the matrix
+* (this is equal to the number of tiles on each column).
+ *
+ *
+ */
+void DSSRFT(double* restrict cornerTile, double* restrict columnTile,
+            double* restrict rowTile, int tilesize, int ii, int jj, int kk,
+            double* restrict tauMatrix, int tauNum) {
+  int i, j, n;
+  double z;
+  double w[2*tilesize];
+
+  for (i = 0; i < tilesize; i++) {
+    for (j = 0; j < i; j++) w[j] = 0.0;
+    w[i] = 1.0;
+    for (j = i + 1; j < tilesize; j++) w[j] = 0.0;
+    for (j = 0; j < tilesize; j++)
+      w[j + tilesize] = columnTile[i * tilesize + j];
+
+    /* Apply householder vector (w) to the tiles.*/
+    for (j = 0; j < tilesize; j++) {
+      z = 0.0;
+      /* Compute w' * A_j */
+      for (n = 0; n < tilesize; n++) {
+        z += w[n] * rowTile[j * tilesize + n];
+        z += w[n + tilesize] * cornerTile[j * tilesize + n];
+      }
+      for (n = 0; n < tilesize; n++) {
+        rowTile[j * tilesize + n] =
+            rowTile[j * tilesize + n] -
+            //tauMatrix[(kk * tilesize + i) * tauNum + ii] * w[n] * z;
+            tauMatrix[i] * w[n] * z;
+        cornerTile[j * tilesize + n] =
+            cornerTile[j * tilesize + n] -
+        //    tauMatrix[(kk * tilesize + i) * tauNum + ii] * w[tilesize + n] * z;
+            tauMatrix[i] * w[tilesize+ n] * z;
+      }
+    }
+  }
+}
+
+
+
+void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
+
+  int k, j, i;
+  double* A, *A_orig= NULL, *tau;
+  struct qsched s;
+  qsched_task_t* tid = NULL, tid_new;
+  qsched_res_t *rid = NULL, *tau_id = NULL;
+  double **rids = NULL, **taus = NULL;
+  int data[3];
+  long long int MPI_data[6];
+//  ticks tic, toc_run, tot_setup, tot_run = 0;
+char processor_name[MPI_MAX_PROCESSOR_NAME];
+    int name_len;
+
+    // Initialize the MPI environment
+int MpiThreadLevel;
+ MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &MpiThreadLevel);
+
+if(MpiThreadLevel != MPI_THREAD_MULTIPLE)
+    error("We didn't get thread multiple!");
+
+    MPI_Get_processor_name(processor_name, &name_len);
+    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN );
+
+  enum task_types {
+    task_DGEQRF,
+    task_DLARFT,
+    task_DTSQRF,
+    task_DSSRFT
+  };
+
+  /* Runner function to pass to the scheduler. */
+  void runner(struct qsched *s, int type, void * data) {
+
+    /* Decode the task data. */
+    long long int* idata = (long long int*)data;
+//    int i = idata[0], j = idata[1], k = idata[2];
+    long long int i, j , a , t;
+    /* Need to pull the resources.*/
+
+    /* Decode and execute the task. */
+    switch (type) {
+      case task_DGEQRF:
+        i = idata[0];
+        j = idata[1];
+        double *cornerTile = (double*)qsched_getresdata(s, i);
+        double *tau = (double*)qsched_getresdata(s, j);
+        DGEQRF(cornerTile, K, tau, k, m);
+        break;
+      case task_DLARFT:
+        t = idata[2];
+        i = idata[0];
+        j = idata[1];
+        double *lockedTile = (double*)qsched_getresdata(s, i);
+        cornerTile = (double*)qsched_getresdata(s, j);
+        tau = (double*)qsched_getresdata(s, t);
+        DLARFT(cornerTile, lockedTile, K, j, k, tau,
+               m);
+        break;
+      case task_DTSQRF:
+        i = idata[0];
+        j = idata[1];
+        t = idata[2];
+        lockedTile = (double*)qsched_getresdata(s, i);
+        cornerTile = (double*)qsched_getresdata(s, j);
+        tau = (double*)qsched_getresdata(s, t);
+        DTSQRF(cornerTile, lockedTile, K, i, k, tau,
+               m);
+        break;
+      case task_DSSRFT:
+        a = idata[2];
+        i = idata[0];
+        j = idata[1];
+        t = idata[3];
+        double *lockedTile1 = (double*)qsched_getresdata(s, i);
+        double *usedTile = (double*)qsched_getresdata(s, j);
+        double *lockedTile2 = (double*)qsched_getresdata(s, a);
+        tau = (double*)qsched_getresdata(s, t);
+        DSSRFT(lockedTile1, usedTile,
+               lockedTile2, K, i, j, k, tau, m);
+        break;
+//      default:
+  //      error("Unknown task type.");
+    }
+  }
+
+    qsched_init(&s, nr_threads, qsched_flag_noreown, MPI_COMM_WORLD);
+
+if(s.rank == 0){
+  /* Allocate and fill the original matrix. */
+  if ((A = (double*)malloc(sizeof(double)* m* n* K* K)) == NULL ||
+      (tau = (double*)malloc(sizeof(double)* m* n* K)) == NULL ||
+      (A_orig = (double*)malloc(sizeof(double) * m * n * K * K)) == NULL)
+    error("Failed to allocate matrices.");
+  for (k = 0; k < m * n * K * K; k++) {
+    if (matrix == NULL)
+      A_orig[k] = 2 * ((double)rand()) / RAND_MAX - 1.0;
+    else
+      A_orig[k] = matrix[k];
+  }
+  memcpy(A, A_orig, sizeof(double) * m * n * K * K);
+  bzero(tau, sizeof(double) * m * n * K);
+
+  /* Initialize the scheduler. */
+    /* Allocate and init the task ID and resource ID matrix. */
+//  tic = getticks();
+  if ((tid = (qsched_task_t*)malloc(sizeof(qsched_task_t)* m* n)) == NULL ||
+      (rid = (qsched_res_t*)malloc(sizeof(qsched_res_t) * m * n)) == NULL)
+    error("Failed to allocate tid/rid matrix.");
+  if( (rids = (double**)malloc(sizeof(double*)*m*n)) == NULL) 
+    error("Failed to allocate rids.");
+  if( (tau_id = (qsched_res_t*)malloc(sizeof(qsched_res_t) * m*n) ) == NULL)
+    error("Failed to allocate tau_id matrix.");
+  if( (taus = (double**)malloc(sizeof(double*)*m*n)) == NULL)
+    error("Failed to allocate taus.");
+  for (k = 0; k < m * n; k++) {
+    //tid[k] = qsched_task_none;
+    rid[k] = qsched_addres(&s, qsched_owner_none, sizeof(double) * K * K, (void**) &rids[k]);
+    tau_id[k] = qsched_addres(&s, qsched_owner_none, sizeof(double) * K, (void**) &taus[k]);
+    memcpy(taus[k], tau, sizeof(double) * K);
+  }
+}
+    message("Synchronizing resources.");
+    qsched_sync_resources(&s);
+
+if(s.rank == 0) {
+  for (k = 0; k < m * n; k++) {
+    tid[k] = qsched_task_none;
+  }
+/* Build the tasks. */
+  for (k = 0; k < m && k < n; k++) {
+
+    /* Add kth corner task. */
+    data[0] = k;
+    data[1] = k;
+    data[2] = k;
+    MPI_data[0] = rid[k*m+k];
+    MPI_data[1] = tau_id[k*m+k];
+    tid_new = qsched_addtask(&s, task_DGEQRF, task_flag_none, MPI_data,
+                             sizeof(long long int) * 2, 200);
+    qsched_addlock(&s, tid_new, rid[k * m + k]);
+    qsched_addlock(&s, tid_new, tau_id[k*m+k]);
+    if(k == 0)
+    {
+        memcpy(rids[k*m+k], &A_orig[(data[1]*m+data[0])*K*K], sizeof(double)*K*K);
+    }
+    if (tid[k * m + k] != -1) qsched_addunlock(&s, tid[k * m + k], tid_new);
+    tid[k * m + k] = tid_new;
+
+    /* Add column tasks on kth row. */
+    for (j = k + 1; j < n; j++) {
+      data[0] = k;
+      data[1] = j;
+      data[2] = k;
+      MPI_data[0] = rid[j*m+k];
+      MPI_data[1] = rid[k*m+k];
+      MPI_data[2] = tau_id[k*m+k];
+      tid_new = qsched_addtask(&s, task_DLARFT, task_flag_none, MPI_data,
+                               sizeof(long long int) * 3, 300);
+        if(k == 0)
+        {
+           memcpy(rids[j*m+k], &A_orig[(data[1]*m+data[0])*K*K], sizeof(double)*K*K);
+        }
+      qsched_addlock(&s, tid_new, rid[j * m + k]);
+      qsched_adduse(&s, tid_new, rid[k * m + k]);
+      qsched_adduse(&s, tid_new, tau_id[k*m+k]);
+      qsched_addunlock(&s, tid[k * m + k], tid_new);
+      if (tid[j * m + k] != -1) qsched_addunlock(&s, tid[j * m + k], tid_new);
+      tid[j * m + k] = tid_new;
+    }
+
+    /* For each following row... */
+    for (i = k + 1; i < m; i++) {
+
+      /* Add the row taks for the kth column. */
+      data[0] = i;
+      data[1] = k;
+      data[2] = k;
+      MPI_data[0] = rid[k*m+i];
+      MPI_data[1] = rid[k*m+k];
+      MPI_data[2] = tau_id[k*m+i];
+      tid_new = qsched_addtask(&s, task_DTSQRF, task_flag_none, MPI_data,
+                               sizeof(long long int) * 3, 300);
+        if(k == 0)
+        {
+            memcpy(rids[k*m+i], &A_orig[(data[1]*m+data[0])*K*K], sizeof(double)*K*K);
+        }
+      qsched_addlock(&s, tid_new, rid[k * m + i]);
+      qsched_adduse(&s, tid_new, rid[k * m + k]);
+      qsched_addlock(&s, tid_new, tau_id[k*m+i]);
+      qsched_addunlock(&s, tid[k * m + (i - 1)], tid_new);
+      if (tid[k * m + i] != -1) qsched_addunlock(&s, tid[k * m + i], tid_new);
+      tid[k * m + i] = tid_new;
+
+      /* Add the inner tasks. */
+      for (j = k + 1; j < n; j++) {
+        data[0] = i;
+        data[1] = j;
+        data[2] = k;
+        MPI_data[0] = rid[j*m+i];
+        MPI_data[1] = rid[k*m+i];
+        MPI_data[2] = rid[j*m+k];
+        MPI_data[3] = tau_id[k*m+i];
+        tid_new = qsched_addtask(&s, task_DSSRFT, task_flag_none, MPI_data,
+                                 sizeof(long long int) * 4, 500);
+        if(k == 0)
+        {
+            memcpy(rids[j*m+i], &A_orig[(data[1]*m+data[0])*K*K], sizeof(double)*K*K);
+        }
+        qsched_addlock(&s, tid_new, rid[j * m + i]);
+        qsched_adduse(&s, tid_new, rid[k * m + i]);
+        qsched_addlock(&s, tid_new, rid[j * m + k]);
+        qsched_adduse(&s, tid_new, tau_id[k*m+i]);
+        // qsched_addunlock(&s, tid[k * m + i], tid_new);
+        qsched_addunlock(&s, tid[j * m + i - 1], tid_new);
+        if (tid[j * m + i] != -1) qsched_addunlock(&s, tid[j * m + i], tid_new);
+
+        tid[j * m + i] = tid_new;
+      }
+    }
+
+  } /* build the tasks. */
+}
+qsched_run_MPI(&s, nr_threads, runner);
+  // Print off a hello world message
+    printf("Hello world from processor %s, rank = %i, count_ranks = %i\n",
+           processor_name, s.rank, s.count_ranks);
+
+ //TODO Clean up the resource data.
+    qsched_free(&s);
+
+    // Finalize the MPI environment.
+    MPI_Finalize();
+}
+
+/**
+ * @brief Main function.
+ */
+
+int main(int argc, char* argv[]) {
+
+  int c, nr_threads=1;
+  int M = 4, N = 4, runs = 1, K = 32;
+
+  /* Parse the options */
+  while ((c = getopt(argc, argv, "m:n:k:r:t:")) != -1) switch (c) {
+      case 'm':
+        if (sscanf(optarg, "%d", &M) != 1) error("Error parsing dimension M.");
+        break;
+      case 'n':
+        if (sscanf(optarg, "%d", &N) != 1) error("Error parsing dimension M.");
+        break;
+      case 'k':
+        if (sscanf(optarg, "%d", &K) != 1) error("Error parsing tile size.");
+        break;
+      case 'r':
+        if (sscanf(optarg, "%d", &runs) != 1)
+          error("Error parsing number of runs.");
+        break;
+      case 't':
+        if (sscanf(optarg, "%d", &nr_threads) != 1)
+          error("Error parsing number of threads.");
+        omp_set_num_threads(nr_threads);
+        break;
+      case '?':
+        fprintf(stderr, "Usage: %s [-t nr_threads] [-m M] [-n N] [-k K]\n",
+                argv[0]);
+        fprintf(stderr, "Computes the tiled QR decomposition of an MxN tiled\n"
+                        "matrix using nr_threads threads.\n");
+        exit(EXIT_FAILURE);
+    }
+
+  /* Dump arguments. */
+  message("Computing the tiled QR decomposition of a %ix%i matrix using %i "
+          "threads (%i runs).",
+          32 * M, 32 * N, nr_threads, runs);
+
+  test_qr(M, N, K, nr_threads, runs, NULL);
+}
diff --git a/src/qsched.c b/src/qsched.c
index 0b88d3f8480734ea262a7b0cb486cd931f577e19..a9ba6fb0ccc90f938291435580c1f12199dde38f 100644
--- a/src/qsched.c
+++ b/src/qsched.c
@@ -1621,8 +1621,6 @@ void *temp;
     
     t->node = ts->rank;
     i++;
-    if(i % 1000 == 0)
-        message("Added %i send/recv tasks", i/2);
     /* Return the task ID. */
     return id;
 
@@ -2999,6 +2997,8 @@ for(i = 0; i < node_count; i++)
         }
         message("My \"cost\" = %i", count_me);
 
+
+
     tic = getticks();
     MPI_Request *reqs;
     reqs = (MPI_Request*) calloc(sizeof(MPI_Request) , node_count * 2);
@@ -3183,8 +3183,13 @@ for(i = 0; i < node_count; i++)
     toc = getticks();
     message("qsched_partition_create_sends took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
 
-
-
+    free(edge_lists);
+    free(edge_sizes);
+    free(edge_vwgts);
+    free(edgelist_pos);
+    free(edgelist_new);
+    free(edgelist_vwgt);
+    free(nodelist);