diff --git a/examples/test_qr_mpi.c b/examples/test_qr_mpi.c
index 6711a95a92740eb069fabb386f73ac8b4db0c4ac..33d982a39d4f83fb20392f51a27f906603a9d68d 100644
--- a/examples/test_qr_mpi.c
+++ b/examples/test_qr_mpi.c
@@ -723,7 +723,7 @@ for(i = 0; i < s.count_ranks; i++)
             if(s.tasks[j].node_executed == s.rank)
             {
                 struct task *t = &s.tasks[j];
-                fprintf(file, "%lli %i %llu %llu %i %i\n", t->id, t->type, t->task_start, t->task_finish, t->node_executed, t->thread_executed);
+                fprintf(file, "%lli %i %llu %llu %i %i %i %i\n", t->id, t->type, t->task_start, t->task_finish, t->node_executed, t->thread_executed, t->weight, t->MPI_fails);
             }   
         }
         fclose(file);
diff --git a/src/qsched.c b/src/qsched.c
index ce297716a4dd51f20a04f4d096596f28762cc320..475671771d93d063d7ced5ba9d889543d6e2e262 100644
--- a/src/qsched.c
+++ b/src/qsched.c
@@ -1635,6 +1635,8 @@ void *temp;
     t->nr_uses = 0;
     t->id = id;
     #ifdef TASK_TIMERS
+        t->task_start = 0;
+        t->task_finish = 0;
         t->node_executed = -1;
         t->thread_executed = -1;
     #endif
@@ -2922,6 +2924,27 @@ free(current_parents);
     tsched_free( &ts) ;
 }
 
+void qsched_partition_roundrobin(struct qsched *s, idx_t* vertices, idx_t* edges, idx_t count_vert, idx_t* result)
+{
+    int i,j=0;
+    for(i = 0; i < count_vert; i++)
+    {
+        result[i] = j;
+        j ++;
+        j %= s->count_ranks;
+    }
+}
+
+void qsched_partition_random(struct qsched *s,  idx_t* vertices, idx_t* edges, idx_t count_vert, idx_t* result)
+{
+    int i;
+    srand(6);
+    for(i = 0; i < count_vert; i++)
+    {
+        result[i] = rand() % s->count_ranks;
+    }
+}
+
 #endif
 void qsched_partition( struct qsched *s){
 #ifdef WITH_MPI
@@ -3108,11 +3131,12 @@ for(i = 0; i < node_count; i++)
         idx_t options[METIS_NOPTIONS];
         METIS_SetDefaultOptions( options );
         options[ METIS_OPTION_OBJTYPE ] = METIS_OBJTYPE_CUT;
+/*        options[ METIS_OPTION_OBJTYPE ] = METIS_OBJTYPE_VOL;*/
         options[ METIS_OPTION_NUMBERING ] = 0;
         options[ METIS_OPTION_CONTIG ] = 0; //TODO 1
         options[ METIS_OPTION_NCUTS ] = 10;
         options[ METIS_OPTION_NITER ] = 10;
-        options[ METIS_OPTION_UFACTOR ] = 30;
+        options[ METIS_OPTION_UFACTOR ] = 1;
         options[ METIS_OPTION_SEED ] = 359872345;
     
         idx_t one = 1;
@@ -3129,6 +3153,7 @@ for(i = 0; i < node_count; i++)
         if(s->count_ranks > 1) {
             if( METIS_PartGraphKway(&node_count, &one, edgelist_pos, edgelist_new, nodelist, NULL, edgelist_vwgt, &temp_count_ranks, NULL, NULL,options, objval, nodeIDs) != METIS_OK)
                 error("Failed to partition\n");
+            /*qsched_partition_random(s, nodelist, edgelist_new, node_count, nodeIDs );*/
         }
            toc = getticks();
     message("METIS_PartGraphKway took %lli (= %.3fms) ticks\n", toc-tic, (float)(toc-tic)/2.67e6);
@@ -3152,6 +3177,7 @@ for(i = 0; i < node_count; i++)
     tic = getticks();
 if(s->count_ranks > 1)
 {
+    MPI_Barrier(s->comm);
     MPI_Request *reqs;
     reqs = (MPI_Request*) calloc(sizeof(MPI_Request) , node_count * 2);
     for(i = 0; i < node_count * 2; i++)
@@ -3186,7 +3212,7 @@ if(s->count_ranks > 1)
                 s->res[noderef[i]].node = nodeIDs[i];
             }
         }
-
+    message("Emitted all of the %i Ireqs after %.3fms", reqnr, (float)(getticks()-tic)/2.67e6);
     MPI_Status stats[node_count*2];
         if ( ( MPI_Waitall( node_count*2, reqs , stats ) ) != MPI_SUCCESS ) {
             error("Failed to send/recv stuff");
@@ -3487,6 +3513,17 @@ if(tid == NULL)
         
     /* Run through the topologically sorted tasks backwards and
        set their weights, re-setting the waits while we're at it. */
+    for( k = 0; k < count; k++) {
+        t = &tasks[ gettaskindex(tid[k],s) ];
+        for ( j = 0; j < t->nr_unlocks; j++ ) {
+            if( s->tasks[gettaskindex(t->unlocks[j], s)].type == task_type_send || s->tasks[gettaskindex(t->unlocks[j], s)].type == task_type_recv )
+            {
+                t->cost *= 1.1;
+            }
+        }
+    }
+
+    int max_weight = -1;
     for ( k = count-1 ; k >= 0 ; k-- ) {
         long long int maxweight = 0;
         t = &tasks[ gettaskindex(tid[k],s) ];
@@ -3496,13 +3533,23 @@ if(tid == NULL)
                 maxweight = tasks[ gettaskindex(t->unlocks[j],s) ].weight;
             }
         t->weight = t->cost + maxweight;
+        if(t->weight > max_weight)
+            max_weight = t->weight;
         }
 
-/* Reset the waits and now compute them for non-skipped tasks only! */
     for( k = 0; k < count; k++) {
         s->tasks[k].wait = 0;
+        if(s->tasks[k].type == task_type_send)
+        {
+            s->tasks[k].weight += max_weight;
+        }else if(s->tasks[k].type == task_type_recv)
+        {
+            s->tasks[k].weight /= 2;
+        }
     }
 
+
+/* Reset the waits and now compute them for non-skipped tasks only! */
     for ( k = 0 ; k < count ; k++ ) {
         t = &s->tasks[k];
         if ( !( t->flags & task_flag_skip ) ){
@@ -4151,13 +4198,13 @@ void qsched_enqueue ( struct qsched *s , struct task *t ) {
         
         }
     /* If its a send or recv place it in queue 0. */
-/*    else if (t->type == task_type_send || t->type == task_type_recv){
+    else if (t->type == task_type_send || t->type == task_type_recv){
         #ifdef WITH_MPI
         queue_put(&s->queues[0], s, t->id);
         #else
         error("Task of type send/recv in non-MPI process - please don't use these reserved types.");
         #endif
-    }*/
+    }
     /* Otherwise, find a home (queue) for it. */
     else {
     
@@ -4213,7 +4260,7 @@ void qsched_done ( struct qsched *s , struct task *t ) {
         t->cost = t->toc - t->tic;
 
 #ifdef TASK_TIMERS
-    t->task_finish = getticks() - s->start;
+    t->task_finish += getticks() - s->start;
     t->node_executed = s->rank;
     #ifndef HAVE_PTHREAD
     t->thread_executed = omp_get_thread_num();
@@ -4356,29 +4403,71 @@ int qsched_locktask ( struct qsched *s , long long int tid ) {
 
         
 #ifdef TASK_TIMERS
-    t->task_start = getticks() - s->start;
+    t->task_start = getticks();
 #endif
 /* Communication task? */
   if (t->type == task_type_recv || t->type == task_type_send) {
 
 #ifdef WITH_MPI
-    /* Check the status of the MPI request. */
-    int res, err;
-    MPI_Status stat;    
-    if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
-      char buff[MPI_MAX_ERROR_STRING];
-      int len;
-      MPI_Error_string(err, buff, &len);
-      error("Failed to test request on send/recv task (tag=%i, %s).", t->flags,
-            buff);
-    }
-    return res;
+/*    if(t->type == task_type_send){*/
+        /* Check the status of the MPI request. */
+        int res, err;
+        MPI_Status stat;    
+        if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
+          char buff[MPI_MAX_ERROR_STRING];
+          int len;
+          MPI_Error_string(err, buff, &len);
+          error("Failed to test request on send/recv task (tag=%i, %s).", t->flags,
+                buff);
+        }
+        #ifdef TASK_TIMERS
+        if(!res)
+        {
+            t->MPI_fails++;
+            //t->task_finish += getticks() - t->task_start;
+        }
+        t->task_start -= s->start;
+        #endif
+        return res;
+/*    }else{
+        int res, err;
+        MPI_Status stat;
+        int *data = (int*)(&s->data[t->data]);
+        if( (err = MPI_Iprobe(data[0], data[4], s->comm, &res, &stat)) != MPI_SUCCESS){
+          char buff[MPI_MAX_ERROR_STRING];
+          int len;
+          MPI_Error_string(err, buff, &len);
+          error("Failed to probe request on recv task (tag=%i, %s).", t->flags,
+                buff);
+        }
+        if(res)
+        {
+            if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
+              char buff[MPI_MAX_ERROR_STRING];
+              int len;
+              MPI_Error_string(err, buff, &len);
+              error("Failed to test request on recv task (tag=%i, %s).", t->flags,
+                    buff);
+            }
+            #ifdef TASK_TIMERS
+            if(!res)
+            {
+                t->MPI_fails++;
+                //t->task_finish += getticks() - t->task_start;
+            }
+            t->task_start -= s->start;
+        #endif
+        }
+        return res;
+    }*/
 #else
     error("QuickSched was not compiled with MPI support.");
 #endif
 
   }
-
+    #ifdef TASK_TIMERS
+    t->task_start -= s->start;
+    #endif
     /* Try to lock all the task's locks. */
     for ( k = 0 ; k < t->nr_locks ; k++ )
         if ( qsched_lockres( s , t->locks[k] ) == 0 )
@@ -5131,8 +5220,11 @@ void *temp;
     
     t->node = s->rank;
     #ifdef TASK_TIMERS
+        t->task_start = 0;
+        t->task_finish = 0;
         t->node_executed = -1;
         t->thread_executed = -1;
+        t->MPI_fails = -1;
     #endif
     /* The sched is now dirty. */
     s->flags |= qsched_flag_dirty;
diff --git a/src/task.h b/src/task.h
index bee97ca4408863b72417b5d8db506a2384cbb80a..5be2cd157e4584aff0dab8b25e5a1e4bc28d3762 100644
--- a/src/task.h
+++ b/src/task.h
@@ -74,6 +74,7 @@ struct task {
     ticks task_start, task_finish;
     int node_executed;
     int thread_executed;
+    int MPI_fails;
     #endif
     #endif