diff --git a/examples/test_bh_mpi.c b/examples/test_bh_mpi.c
index deef47386e0d7b284885d14ac928843694769935..d4e5fdafa94636c4a27e86b56d4c1a71096d5a5d 100644
--- a/examples/test_bh_mpi.c
+++ b/examples/test_bh_mpi.c
@@ -45,7 +45,7 @@
 #define dist_min 0.5 /* Used for legacy walk only */
 #define dist_cutoff_ratio 1.5
 
-#define ICHECK -1
+#define ICHECK 9501
 #define SANITY_CHECKS
 #define NO_COM_AS_TASK
 #define NO_COUNTERS
@@ -350,15 +350,21 @@ static inline void iact_pair_direct(struct qsched *s, struct cell *ci, struct ce
       ++count_direct_unsorted;
 #endif
 
-#if ICHECK >= 0 && 0
+#if ICHECK >= 0
       if (parts_i[i].id == ICHECK)
-        printf(
-            "[NEW] Interaction with particle id= %d (pair i) a=( %e %e %e )\n",
-            parts_j[j].id, -mi * w * dx[0], -mi * w * dx[1], -mi * w * dx[2]);
-
-      if (parts_j[j].id == ICHECK)
-        printf(
-            "[NEW] Interaction with particle id= %d (pair j) a=( %e %e %e )\n",
+        /*message(
+            "[NEW] Interaction with particle id= %d (pair i) a=( %e %e %e )",
+            cj->parts[j].id, -mi * w * dx[0], -mi * w * dx[1], -mi * w * dx[2]);*/
+        message(
+            "%d %e %e %e",
+            cj->parts[j].id, -mi * w * dx[0], -mi * w * dx[1], -mi * w * dx[2]);
+
+      if (cj->parts[j].id == ICHECK)
+        /*message(
+            "[NEW] Interaction with particle id= %d (pair j) a=( %e %e %e )",
+            parts_i[i].id, mj * w * dx[0], mj * w * dx[1], mj * w * dx[2]);*/
+        message(
+            "%d %e %e %e",
             parts_i[i].id, mj * w * dx[0], mj * w * dx[1], mj * w * dx[2]);
 #endif
 
@@ -440,12 +446,16 @@ void iact_self_direct(struct qsched *s, struct cell *c) {
 
 #if ICHECK >= 0
         if (c->parts[i].id == ICHECK)
-          message("[NEW] Interaction with particle id= %d (self i)",
-                  c->parts[j].id);
+/*          message("[NEW] Interaction with particle id= %d (self i)  a=( %e %e %e )",
+                  c->parts[j].id,  -mi * w * dx[0], -mi * w * dx[1], -mi * w * dx[2]);*/
+          message("%d %e %e %e",
+                  c->parts[j].id,  -mi * w * dx[0], -mi * w * dx[1], -mi * w * dx[2]);
 
         if (c->parts[j].id == ICHECK)
-          message("[NEW] Interaction with particle id= %d (self j)",
-                  c->parts[i].id);
+/*          message("[NEW] Interaction with particle id= %d (self j) a =( %e %e %e )",
+                  c->parts[i].id,  -mj * w * dx[0], -mj * w * dx[1], -mj * w * dx[2]);*/
+          message("%d %e %e %e",
+                  c->parts[i].id,  -mj * w * dx[0], -mj * w * dx[1], -mj * w * dx[2]);
 #endif
 
       } /* loop over every other particle. */
@@ -1090,6 +1100,10 @@ void output_parts(struct qsched *s)
                     struct part cur = parts[j];
                     fprintf(file, "%i %e %e %e %e %e %e %e %e %e %e %e %e %e\n", cur.id, cur.mass, cur.x[0], cur.x[1], cur.x[2], 
                             cur.a_exact[0], cur.a_exact[1], cur.a_exact[2], cur.a_legacy[0], cur.a_legacy[1], cur.a_legacy[2], cur.a[0], cur.a[1], cur.a[2]);
+                #if ICHECK >= 0
+                    if(cur.id == ICHECK)
+                        message("Final accelerations = %e %e %e", cur.a[0], cur.a[1], cur.a[2]);
+                #endif
                 }
             }
         }
@@ -1269,9 +1283,10 @@ qsched_run_MPI(&s, nr_threads, runner);
 if(s.rank == 0)
 {
     file = fopen("particle_dump.out", "w");
-  fprintf(file,
+  /*fprintf(file,
           "ID m x y z a_exact.x   a_exact.y    a_exact.z    a_legacy.x    "
-          "a_legacy.y    a_legacy.z    a_new.x     a_new.y    a_new.z\n");
+          "a_legacy.y    a_legacy.z    a_new.x     a_new.y    a_new.z\n");*/
+//    fprintf(file, "");
     fclose(file);
 }
 
diff --git a/examples/test_qr_mpi.c b/examples/test_qr_mpi.c
index 0a75ef79e603975651de035699cdd84bbb823f5e..3dcddffae90479f500c32fc37cd1f12027d8589c 100644
--- a/examples/test_qr_mpi.c
+++ b/examples/test_qr_mpi.c
@@ -480,6 +480,7 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
 //  ticks tic, toc_run, tot_setup, tot_run = 0;
 #ifdef TASK_TIMERS
   ticks tic, toc_run;
+  ticks start;
 #endif
 char processor_name[MPI_MAX_PROCESSOR_NAME];
     int name_len;
@@ -488,6 +489,9 @@ char processor_name[MPI_MAX_PROCESSOR_NAME];
   long long int *task_finish;
   int *task_tids;
   int *task_types;
+  int *task_i;
+  int *task_j;
+  int *task_k;
   #endif
 
     // Initialize the MPI environment
@@ -593,10 +597,31 @@ if(MpiThreadLevel != MPI_THREAD_MULTIPLE)
                 printf("Ran DSSRFT with k = %lli, j = %lli, i = %lli\n", kk, jj, ii);
         }*/
         toc_run = getticks();
-        task_start[tid] = tic;
-        task_finish[tid] = toc_run;
+        message("tid = %lli", tid);
+        message("sizeof arrays = %i", m*n*m*n);
+        task_start[tid] = tic - start;
+        task_finish[tid] = toc_run - start;
         task_tids[tid] = omp_get_thread_num();
         task_types[tid] = type;
+        if(type == task_DGEQRF){
+            task_i[tid] = (int)idata[2];
+            task_j[tid] = (int)idata[2];
+            task_k[tid] = (int)idata[2];
+        }else if (type == task_DLARFT)
+        {
+            task_i[tid] = (int)idata[3];
+            task_j[tid] = (int)idata[4];
+            task_k[tid] = (int)idata[3];
+        }else if(type == task_DTSQRF)
+        {
+            task_i[tid] = (int)idata[4];
+            task_j[tid] = (int)idata[3];
+            task_k[tid] = (int)idata[3];
+        }else{
+            task_i[tid] = (int)idata[5];
+            task_j[tid] = (int)idata[6];
+            task_k[tid] = (int)idata[4];
+        }
     }
     #endif
   }
@@ -604,11 +629,14 @@ if(MpiThreadLevel != MPI_THREAD_MULTIPLE)
     qsched_init(&s, 2, qsched_flag_none, MPI_COMM_WORLD);
 
 #ifdef TASK_TIMERS
-task_start = (long long int*)calloc(sizeof(long long int), m*n*K*K);
-printf("Created task_start of size %i", m*n*K*K);
-task_finish = (long long int*)calloc(sizeof(long long int), m*n*K*K);
-task_tids = (int*)calloc(sizeof(int), m*n*K*K);
-task_types = (int*)calloc(sizeof(int), m*n*K*K);
+task_start = (long long int*)calloc(sizeof(long long int), m*n*m*n);
+printf("Created task_start of size %i", m*n*m*n);
+task_finish = (long long int*)calloc(sizeof(long long int), m*n*m*n);
+task_tids = (int*)calloc(sizeof(int), m*n*m*n);
+task_types = (int*)calloc(sizeof(int), m*n*m*n);
+task_i = (int*)calloc(sizeof(int),  m*n*m*n);
+task_j = (int*)calloc(sizeof(int),  m*n*m*n);
+task_k = (int*)calloc(sizeof(int),  m*n*m*n);
 #endif
 
 
@@ -671,6 +699,10 @@ if(s.rank == 0) {
     #endif
     qsched_addlock(&s, tid_new, rid[k * m + k]);
     qsched_addlock(&s, tid_new, tau_id[k*m+k]);
+    if(k == 10){
+        message("Task ID of task (10, 10, 10) = %lli", tid_new);
+        message("Resource ID of cell (10,10) = %lli", rid[k*m+k]);
+    }
     if(k == 0)
     {
         memcpy(rids[k*m+k], &A_orig[(data[1]*m+data[0])*K*K], sizeof(double)*K*K);
@@ -695,6 +727,10 @@ if(s.rank == 0) {
       tid_new = qsched_addtask(&s, task_DLARFT, task_flag_none, MPI_data,
                                sizeof(long long int) * 3, 300);
       #endif
+        if(j == 11 && k == 10){
+            message("Task ID of task (10, 11, 10) = %lli", tid_new);
+            message("Adding a use to rid = %lli", rid[k*m+k]);
+        }
         if(k == 0)
         {
            memcpy(rids[j*m+k], &A_orig[(data[1]*m+data[0])*K*K], sizeof(double)*K*K);
@@ -774,6 +810,10 @@ if(s.rank == 0) {
 
   } /* build the tasks. */
 }
+#ifdef TASK_TIMERS
+    MPI_Barrier(s.comm);
+    start = getticks();
+#endif
 qsched_run_MPI(&s, nr_threads, runner);
   // Print off a hello world message
     printf("Hello world from processor %s, rank = %i, count_ranks = %i\n",
@@ -794,7 +834,7 @@ FILE *file = NULL;
     for(i = 0; i < m*n*K*K; i++)
     {
         if(task_types[i] > 0)
-            fprintf(file, "%i %i %lli %lli\n", task_types[i], s.rank*nr_threads+task_tids[i],task_start[i], task_finish[i]  );
+            fprintf(file, "%i %i %lli %lli %i %i %i\n", task_types[i], s.rank*nr_threads+task_tids[i],task_start[i], task_finish[i] ,task_i[i], task_j[i], task_k[i] );
     }
 
 
diff --git a/src/qsched.c b/src/qsched.c
index 4d9751504daace55369f262e6372a1909eafa211..a7cffa08b72da34092a192b1f200a959bcbda241 100644
--- a/src/qsched.c
+++ b/src/qsched.c
@@ -1415,6 +1415,19 @@ void qsched_partition_topsort( struct qsched *s, long long int *tid)
 void qsched_partition_compute_waitinit(struct qsched *s, int *wait_init)
 {
     int i;
+    int k, j;
+    struct task *t;
+    int count = s->task_ranks[s->count_ranks];
+    for( k = 0; k < count; k++) {
+        s->tasks[k].wait = 0;
+    }
+
+    for ( k = 0 ; k < count ; k++ ) {
+        t = &s->tasks[k];
+       // if ( !( t->flags & task_flag_skip ) )
+            for ( j = 0 ; j < t->nr_unlocks ; j++ )
+                s->tasks[ gettaskindex(t->unlocks[j], s) ].wait += 1;
+        }
     for(i = 0; i < s->task_ranks[s->count_ranks]; i++)
     {
         wait_init[i] = s->tasks[i].wait;
@@ -2258,7 +2271,7 @@ if(first_recv == NULL)
     error("Failed to allocate first_send array.");
 for(i = 0; i < s->res_ranks[s->count_ranks]; i++)
 {
-    data_pos[i] = s->res[i].node;
+    data_pos[i] = -1;
     first_recv[i] = -1;
     data_task[i] = -1;
 }
@@ -2288,6 +2301,7 @@ for(i = 0; i < count; i++)
 {
     struct task *t = &s->tasks[gettaskindex(tid[i], s)];
     
+    
     /* If we don't execute this task locally then we can skip it!*/
     if( t->flags & task_flag_skip )
         continue;
@@ -2366,7 +2380,6 @@ for(i = 0; i < count; i++)
     } /* If the task has no dependencies. */
 
     /* If the task has dependencies*/
-
     /*Find the parents. */
     num_parents = 0;
     for(j = 0; j < i && num_parents < wait_init[gettaskindex(t->id, s)]; j++)
@@ -2600,7 +2613,7 @@ for(i = 0; i < count; i++)
     }/* j over locks. */
 
 
-/* Loop over the locked resources */
+/* Loop over the used resources */
     for(j = 0; j < t->nr_uses; j++)
     {
         /* Find the parents that unlock this resource. */
@@ -2624,7 +2637,8 @@ for(i = 0; i < count; i++)
                 }
             }
         }/* k over parents. */
-
+/*        if( t->uses[j] == 340)
+            message("We found the use! From task %lli debugf = %i", tid[i], debugf);*/
         /* We have the current parents. */
         if(num_current == 0)
         {
@@ -2730,14 +2744,36 @@ for(i = 0; i < count; i++)
                     }
                     /* All the parents of this task are completed before the send task can occur also!*/            
                     int found = 0;
+/*                    message("data_pos for this resources is %i",data_pos[getindex(t->uses[j], s)]);
+                    message("Owner of the data is %i", s->res[getindex(t->uses[j], s)].node);
+                    message("Trying to reach ts.tasks[%lli]", (send_task[getindex(t->uses[j], s)] & 0xFFFFFFFFF) - ts.id_count);
+                    message("send_task = %lli", send_task[getindex(t->uses[j], s)]);
+                    message("send_task & 0xFFFFFFFFF = %lli", send_task[getindex(t->uses[j], s)] & 0xFFFFFFFFF);
+                    message("j=%i, i = %i, k = %i", j, i, k);*/
                     struct task *temp = &ts.tasks[(send_task[getindex(t->uses[j], s)] & 0xFFFFFFFFF) - ts.id_count];
-                    for(l = 0; l < temp->nr_unlocks; l++)
+//                    for(l = 0; l < temp->nr_unlocks; l++)
+                    for(l = 0; l < ts.count_unlockers; l++)
                     {
-                        if(temp->unlocks[l] == tid[current_parents[k]] && ts.deps_key[l] == t->uses[j])
+                        /*if(s.rank == 0 && i == 3 && j == 0 && k == 1)
+                        {
+                            message("temp->unlocks[l] = %i", temp->unlocks[l]);
+                            message("tid[current_parents[k]] = %i", tid[current_parents[k]]);
+                            
+                        }*/
+
+                        //TODO What are we actually doing here?.
+                        //TODO We want to look through ALL of ts.unlocks (as I don't think they have been sorted yet - maybe we're lucky and they're in order but i'm not sure thats the case).
+                        //TODO We want to find if the current_parent unlocks this task.
+                        if(ts.unlockers_key[l] == temp->id && ts.unlockers[l] == tid[current_parents[k]])
                         {
                             found = 1;
                             break;
-                        }       
+                        }
+/*                        if(temp->unlocks[l] == tid[current_parents[k]] && ts.deps_key[l] == t->uses[j])
+                        {
+                            found = 1;
+                            break;
+                        }       */
                     }
                     if(!found)
                     {
@@ -2767,6 +2803,8 @@ for(i = 0; i < count; i++)
 free(parents);
 free(current_parents);
 
+    message("ts->count = %i", ts.count);
+    message("ts.count_unlockers = %i", ts.count_unlockers);
     /* All the nodes have created new tasks, we need to synchronize this across nodes now.*/
     tsched_synchronize( &ts, s);
 
@@ -2783,15 +2821,15 @@ void qsched_partition( struct qsched *s){
     int i, j;
     struct task *t;
     int errors;
-    ticks tic, toc;
+//    ticks tic, toc;
    // struct res *r;
 
     res_costs = (idx_t*) calloc(s->res_ranks[s->count_ranks], sizeof(idx_t));
     
-    tic = getticks();
+//    tic = getticks();
     qsched_partition_compute_costs( s, res_costs);
-    toc = getticks();
-    message("qsched_partition_compute_costs took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
+//    toc = getticks();
+//    message("qsched_partition_compute_costs took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
 
     //All reduce res_costs;
     #if IDXTYPEWIDTH == 32
@@ -2826,10 +2864,10 @@ void qsched_partition( struct qsched *s){
     for(i = 0; i < s->res_ranks[s->count_ranks]; i++)
         pos_in_nodelist[i] = -1;
 
-    tic = getticks();
+//    tic = getticks();
     qsched_partition_build_nodelist(s, nodelist, noderef, &node_count, res_costs, pos_in_nodelist);
-    toc = getticks();
-    message("qsched_partition_build_nodelist took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
+//    toc = getticks();
+   // message("qsched_partition_build_nodelist took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
 
 
 //Build an edgelist where edges are of weight += task->weight for each task that locks both. If noderef doesn't contain, recurse until we find the ones it does contain (yuck). Build a set of adjacency lists.
@@ -2859,17 +2897,15 @@ for(i = 0; i < node_count; i++)
     edge_sizes[i] = initial_size;
 }
 
-    tic = getticks();
+//    tic = getticks();
     qsched_partition_build_edgelist(s, edge_vwgts, edge_lists, edge_counts, edge_sizes, node_count, noderef, pos_in_nodelist);
-    toc = getticks();
-    message("qsched_partition_build_edgelist took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
+//    toc = getticks();
+//    message("qsched_partition_build_edgelist took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
 
 idx_t edgelist_size = 0;
 for(i = 0; i < node_count; i++)
     edgelist_size += edge_counts[i];
 
-printf("edgelist_size = %i\n", edgelist_size);
-printf("node_count = %i\n", node_count);
 /* Convert the edgelists to METIS version.*/
     idx_t *edgelist_new = malloc(sizeof(idx_t) * edgelist_size);
     if(edgelist_new == NULL)
@@ -2981,13 +3017,13 @@ for(i = 0; i < node_count; i++)
         if(nodeIDs == NULL)
             error("Failed to allocate nodeIDs");
         idx_t temp_count_ranks = s->count_ranks;
-        tic = getticks();
+//        tic = getticks();
         if(s->count_ranks > 1) {
             if( METIS_PartGraphKway(&node_count, &one, edgelist_pos, edgelist_new, nodelist, NULL, edgelist_vwgt, &temp_count_ranks, NULL, NULL,options, objval, nodeIDs) != METIS_OK)
                 error("Failed to partition\n");
         }
-            toc = getticks();
-    message("METIS_PartGraphKway took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
+//            toc = getticks();
+//    message("METIS_PartGraphKway took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
         //TODO Check the costs.
         int count_me = 0;
         for(i = 0; i < node_count; i++)
@@ -2999,7 +3035,7 @@ for(i = 0; i < node_count; i++)
 
 
 
-    tic = getticks();
+//    tic = getticks();
     MPI_Request *reqs;
     reqs = (MPI_Request*) calloc(sizeof(MPI_Request) , node_count * 2);
     for(i = 0; i < node_count * 2; i++)
@@ -3108,14 +3144,14 @@ for(i = 0; i < node_count; i++)
             if(temp->node == s->rank && temp->data == NULL)
                 error("Local resource has data set to NULL");
         }
-    toc = getticks();
-    message("qsched_partition synchronizing resources took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
+//    toc = getticks();
+//    message("qsched_partition synchronizing resources took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
         // Move the tasks time!
         // Tasks belong to the node of the resource they lock of highest size.
         // If they don't lock any resources, the resources they use of highest size.
         // Everyone does it for all tasks at the moment...
         // TODO ISSUE: Whoever is assigned bigger resources will run more tasks - not balanced well. Minimises communication, less relevant.
-    tic = getticks();
+//    tic = getticks();
         for(i = 0; i < s->task_ranks[s->count_ranks]; i++)
         {
             struct task *t = &s->tasks[i];
@@ -3149,8 +3185,8 @@ for(i = 0; i < node_count; i++)
         }
 
 
-    toc = getticks();
-    message("qsched_partition task \"movement\" took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
+//    toc = getticks();
+//    message("qsched_partition task \"movement\" took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
 
          //First we need to do a topological sort.
     int count = s->task_ranks[s->count_ranks];
@@ -3178,10 +3214,10 @@ for(i = 0; i < node_count; i++)
     }
 
     //Now we just need to create the send/recv tasks from the dependencies.
-    tic = getticks();
+//    tic = getticks();
     qsched_partition_create_sends( s, tid);
-    toc = getticks();
-    message("qsched_partition_create_sends took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
+//    toc = getticks();
+//    message("qsched_partition_create_sends took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
 
     free(edge_lists);
     free(edge_sizes);
@@ -3206,7 +3242,7 @@ void qsched_prepare_mpi( struct qsched *s){
     struct task *t, *tasks;
     
     TIMER_TIC
-    ticks tic, toc;
+//    ticks tic, toc;
 
     /* Lock the sched. */
     lock_lock( &s->lock );
@@ -3229,15 +3265,15 @@ void qsched_prepare_mpi( struct qsched *s){
     /* If the sched is dirty... */
     if ( s->flags & qsched_flag_dirty ) {
         
-        tic = getticks();
+//        tic = getticks();
         qsched_sync_schedulers( s );
-        toc = getticks();
-        message("qsched_sync_schedulers took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
+//        toc = getticks();
+//        message("qsched_sync_schedulers took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
         tasks = s->tasks;        
         count = s->count;
 //        count = s->res_ranks[s->count_ranks];
         /* Do the sorts in parallel, if possible. */
-        tic = getticks();
+//        tic = getticks();
         #pragma omp parallel
         {
     
@@ -3285,14 +3321,13 @@ void qsched_prepare_mpi( struct qsched *s){
         
         /* All cleaned-up now! */
         s->flags &= ~qsched_flag_dirty;
-        toc = getticks();
-        message("Cleaning up scheduler took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
+//        toc = getticks();
+   //     message("Cleaning up scheduler took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
         }
-    tic = getticks();
+//    tic = getticks();
     qsched_partition(s);
-    toc = getticks();
-    message("qsched_partition took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
-    //return;    
+//    toc = getticks();
+ //   message("qsched_partition took %lli (= %e) ticks\n", toc-tic, (float)(toc-tic));
 
 long long int *tid = malloc(sizeof(long long int) * s->task_ranks[s->count_ranks]);
 if(tid == NULL)