diff --git a/examples/test_bh_2.cu b/examples/test_bh_2.cu
index 259a31f7d1a5fd8aa732f1d274f86aca523bec45..7981f6b9877e8445dd4f7c192672d00dd4b55a96 100644
--- a/examples/test_bh_2.cu
+++ b/examples/test_bh_2.cu
@@ -90,7 +90,9 @@ float4 *parts_a_m_host;
 double2 *com_xy_host;
 double *com_z_host;
 float *com_mass_host;
-
+double2 *parts_pos_xy_temp;
+double *parts_pos_z_temp;
+float4 *parts_a_m_temp;
 
 
 
@@ -113,9 +115,9 @@ __device__ __forceinline__ void iact_pair_direct(struct cell *ci, struct cell *c
 
     /* Loop over cell i.*/
     for(i = parts_i + threadIdx.x; i < parts_i + count_i; i+= blockDim.x) {
-        xi[0] = parts_xy[i].x;
-        xi[1] = parts_xy[i].y;
-        xi[2] = parts_z[i];     
+        xi[0] = parts_pos_xy[i].x;
+        xi[1] = parts_pos_xy[i].y;
+        xi[2] = parts_pos_z[i];     
         for(k = 0; k < 3; k++) {
             ai[k] = 0.0f;
         }
@@ -154,9 +156,9 @@ __device__ __forceinline__ void iact_pair_direct(struct cell *ci, struct cell *c
     }
  /*Loop over cell j. */
     for(i = parts_j + threadIdx.x; i < parts_j + count_j; i+= blockDim.x) {
-        xi[0] = parts_xy[i].x;
-        xi[1] = parts_xy[i].y;
-        xi[2] = parts_z[i];     
+        xi[0] = parts_pos_xy[i].x;
+        xi[1] = parts_pos_xy[i].y;
+        xi[2] = parts_pos_z[i];     
         for(k = 0; k < 3; k++) {
             ai[k] = 0.0f;
         }
@@ -290,23 +292,40 @@ __device__ void iact_pair_pc(struct cell *ci, struct cell *cj, struct cell *leaf
 
     struct cell *cp ,*cps;
 
+    if(leaf->split)
+    {
+        printf("Leaf split = 1, oh dear.");
+        asm("trap;");
+    }
+if(ci->split > 1)
+    {
+        printf("Cell %i had split > 1\n", ci - cells);
+        asm("trap;");
+    }
+    if(cj->split > 1)
+    {
+        printf("cell %i had split > 1\n", cj - cells);
+        asm("trap;");
+    }
+
     for(cp = &cells[ci->firstchild]; cp != &cells[ci->sibling]; cp = &cells[cp->sibling]) {
         if(is_inside(leaf, cp)) break;
     }
 
     if(are_neighbours_different_size(cp, cj)) {
-        for(cps = &cells[cj->firstchild]; cps != &cells[ci->sibling]; cps = &cells[cps->sibling]) {
+        for(cps = &cells[cj->firstchild]; cps != &cells[cj->sibling]; cps = &cells[cps->sibling]) {
             if(are_neighbours(cp, cps)) {
                 if(cp->split && cps->split) {
                     iact_pair_pc(cp, cps, leaf);
                 }
             } else {
                 make_interact_pc(leaf, cps);
+                __syncthreads();
             }
         }
     }
 
-
+    __syncthreads();
 }   
 
 /**
@@ -320,6 +339,17 @@ __device__ void iact_self_pc(struct cell *c, struct cell *leaf) {
 
     struct cell *cp, *cps;
 
+    if(leaf->split)
+    {
+        printf("Leaf split = 1, oh dear.");
+        asm("trap;");
+    }
+    if(c->split > 1)
+    {
+        printf("Cell had split > 1\n");
+        asm("trap;");
+    }
+
     /* Find the subcell of c the leaf is in.*/
     for( cp = &cells[c->firstchild]; cp != &cells[c->sibling]; cp = &cells[cp->sibling]) {
         if(is_inside(leaf, cp)) break;
@@ -606,23 +636,23 @@ void cell_split(int c, struct qsched *s) {
     {
         if( cudaHostGetDevicePointer(&temp_xy, &parts_pos_xy_host[cell_pool[c].parts], 0) != cudaSuccess )
             error("Failed to get host device pointer.");
-        printf("tempxy = %p\n", temp_xy);
+//        printf("tempxy = %p\n", temp_xy);
         cell_pool[c].res = qsched_addres(s, qsched_owner_none, qsched_res_none, temp_xy,
-                                        sizeof(double2) * cell_pool[c].count, parts_pos_xy + cell_pool[c].parts);
+                                        sizeof(double2) * cell_pool[c].count, parts_pos_xy_temp + cell_pool[c].parts);
     }
     if(cell_pool[c].resz == qsched_res_none)
     {
         if( cudaHostGetDevicePointer(&temp_z, &parts_pos_z_host[cell_pool[c].parts], 0) != cudaSuccess )
             error("Failed to get host device pointer.");
         cell_pool[c].resz = qsched_addres(s, qsched_owner_none, qsched_res_none, temp_z,
-                                        sizeof(double) * cell_pool[c].count, parts_pos_z + cell_pool[c].parts);
+                                        sizeof(double) * cell_pool[c].count, parts_pos_z_temp + cell_pool[c].parts);
     }
     if(cell_pool[c].resm == qsched_res_none)
     {
         if( cudaHostGetDevicePointer(&temp_a_m, &parts_a_m_host[cell_pool[c].parts], 0) != cudaSuccess )
             error("Failed to get host device pointer.");
         cell_pool[c].resm = qsched_addres(s, qsched_owner_none, qsched_res_none, temp_a_m,
-                                        sizeof(float4) * cell_pool[c].count, parts_a_m + cell_pool[c].parts);
+                                        sizeof(float4) * cell_pool[c].count, parts_a_m_temp + cell_pool[c].parts);
     }
        // error("Cell has no resource");*///TODO
 
@@ -731,15 +761,15 @@ void cell_split(int c, struct qsched *s) {
         if( cudaHostGetDevicePointer(&temp_xy, &parts_pos_xy_host[cell_pool[progenitors[k]].parts], 0) != cudaSuccess )
             error("Failed to get host device pointer.");
         cell_pool[progenitors[k]].res = qsched_addres(s, qsched_owner_none, cell->res, temp_xy,
-                                        sizeof(double2) * cell_pool[progenitors[k]].count, parts_pos_xy + cell_pool[progenitors[k]].parts);
+                                        sizeof(double2) * cell_pool[progenitors[k]].count, parts_pos_xy_temp + cell_pool[progenitors[k]].parts);
         if( cudaHostGetDevicePointer(&temp_z, &parts_pos_z_host[cell_pool[progenitors[k]].parts], 0) != cudaSuccess )
             error("Failed to get host device pointer.");
         cell_pool[progenitors[k]].resz = qsched_addres(s, qsched_owner_none, cell->resz, temp_z,
-                                        sizeof(double) * cell_pool[progenitors[k]].count, parts_pos_z + cell_pool[progenitors[k]].parts);
+                                        sizeof(double) * cell_pool[progenitors[k]].count, parts_pos_z_temp + cell_pool[progenitors[k]].parts);
         if( cudaHostGetDevicePointer(&temp_a_m, &parts_a_m_host[cell_pool[progenitors[k]].parts], 0) != cudaSuccess )
             error("Failed to get host device pointer.");
         cell_pool[progenitors[k]].resm = qsched_addres(s, qsched_owner_none, cell->resm, temp_a_m,
-                                        sizeof(float4) * cell_pool[progenitors[k]].count, parts_a_m + cell_pool[progenitors[k]].parts);
+                                        sizeof(float4) * cell_pool[progenitors[k]].count, parts_a_m_temp + cell_pool[progenitors[k]].parts);
         }
 
         /* Find the first non-empty progenitor */
@@ -784,6 +814,9 @@ void cell_split(int c, struct qsched *s) {
     int data[2] = {root, c};
         int tid = qsched_addtask(s, task_type_self_pc, task_flag_none, data,
                                  2 * sizeof(int), 1);
+        qsched_addlock(s, tid, cell_pool[root].res);
+        qsched_addlock(s, tid, cell_pool[root].resz);
+        qsched_addlock(s, tid, cell_pool[root].resm);
         qsched_addlock(s, tid, cell_pool[c].res);
         qsched_addlock(s, tid, cell_pool[c].resz);
         qsched_addlock(s, tid, cell_pool[c].resm);
@@ -913,9 +946,7 @@ void test_bh(int N, int runs, char *fileName) {
   struct qsched s;
   ticks tic, toc_run, tot_setup = 0, tot_run = 0;
   int countMultipoles = 0, countPairs = 0, countCoMs = 0;
-  double2 *parts_pos_xy_temp;
-  double *parts_pos_z_temp;
-  float4 *parts_a_m_temp;
+  struct cell *gpu_ptr_cells;
 
   cudaFree(0);
     if( cudaMemcpyFromSymbol( &func , function , sizeof(qsched_funtype) ) != cudaSuccess)
@@ -934,10 +965,10 @@ void test_bh(int N, int runs, char *fileName) {
 
     if( cudaMalloc(&parts_pos_xy_temp, sizeof(double2) * N) != cudaSuccess)
         error("Failed to allocate device parts array");
-    printf("parts_pos_xy_temp = %p\n", parts_pos_xy_temp);
+  //  printf("parts_pos_xy_temp = %p\n", parts_pos_xy_temp);
     if( cudaMemcpyToSymbol(parts_pos_xy, &parts_pos_xy_temp, sizeof(double2*), 0, cudaMemcpyHostToDevice) != cudaSuccess)
         error("Failed to set device symbol for parts array");
-    printf("parts_pos_xy = %p\n", parts_pos_xy);
+//    printf("parts_pos_xy = %p\n", parts_pos_xy);
     if( cudaMalloc(&parts_pos_z_temp, sizeof(double) * N) != cudaSuccess)
         error("Failed to allocate device parts array");
     if( cudaMemcpyToSymbol(parts_pos_z, &parts_pos_z_temp, sizeof(double*), 0, cudaMemcpyHostToDevice) != cudaSuccess)
@@ -1018,6 +1049,10 @@ void test_bh(int N, int runs, char *fileName) {
     message("Average number of parts per leaf is %lf.", ((double)N) / ((double)nr_leaves));
     message("Max number of parts in a leaf is %i, min number is %i", maxparts, minparts);
     
+    for(k = 0; k < num_cells; k++)
+        if(cell_pool[k].split > 1 ) 
+            printf("Split > 1\n");
+
     create_tasks(&s, root, NULL);    
 
     message("total number of tasks: %i.", s.count);
@@ -1033,6 +1068,56 @@ void test_bh(int N, int runs, char *fileName) {
             parts_a_m_host[i].z = 0.0;
         }
 
+
+    /* Copy the cells to the device. */
+    if( cudaMalloc( &gpu_ptr_cells , sizeof(struct cell) * used_cells) != cudaSuccess)
+        error("Failed to allocate cells on the GPU");
+    if( cudaMemcpy( gpu_ptr_cells, cell_pool, sizeof(struct cell) * used_cells, cudaMemcpyHostToDevice) != cudaSuccess)
+        error("Failed to copy cells to the GPU");
+    if( cudaMemcpyToSymbol(cells, &gpu_ptr_cells, sizeof(struct cell*), 0, cudaMemcpyHostToDevice) != cudaSuccess )
+        error("Failed to copy cell pointer to the GPU");
+
+double2 *com_temp;
+double *comz_temp;
+float *comm_temp;
+
+    if(cudaMalloc( &com_temp, sizeof(double2) * used_cells) != cudaSuccess)
+        error("Failed to allocate com on the GPU");
+    if( cudaMemcpy( com_temp, com_xy_host, sizeof(double2) * used_cells, cudaMemcpyHostToDevice) != cudaSuccess )
+        error("failed to copy com to the GPU");
+    if( cudaMemcpyToSymbol(com_xy, &com_temp, sizeof(double2 *), 0, cudaMemcpyHostToDevice) != cudaSuccess)
+        error("Failed to copy com pointer to the GPU");
+
+
+    if(cudaMalloc( &comz_temp, sizeof(double) * used_cells) != cudaSuccess)
+        error("Failed to allocate com on the GPU");
+    if( cudaMemcpy( comz_temp, com_z_host, sizeof(double) * used_cells, cudaMemcpyHostToDevice) != cudaSuccess )
+        error("failed to copy com to the GPU");
+    if( cudaMemcpyToSymbol(com_z, &comz_temp, sizeof(double *), 0, cudaMemcpyHostToDevice) != cudaSuccess)
+        error("Failed to copy com pointer to the GPU");
+
+    if(cudaMalloc( &comm_temp, sizeof(float) * used_cells) != cudaSuccess)
+        error("Failed to allocate com on the GPU");
+    if( cudaMemcpy( comm_temp, com_z_host, sizeof(float) * used_cells, cudaMemcpyHostToDevice) != cudaSuccess )
+        error("failed to copy com to the GPU");
+    if( cudaMemcpyToSymbol(com_mass, &comm_temp, sizeof(float *), 0, cudaMemcpyHostToDevice) != cudaSuccess)
+        error("Failed to copy com pointer to the GPU");
+
+   /* for(i = 0; i < s.count; i++){
+        int *idata = (int*)&s.data[s.tasks[i].data];
+        if(s.tasks[i].type == task_type_self)
+            printf("Self task with data[0] = %i\n", idata[0]);
+        if(s.tasks[i].type == task_type_pair) {
+            printf("Pair task with data[0] = %i and data[1] = %i\n", idata[0], idata[1]);
+        }
+        if(s.tasks[i].type == task_type_self_pc) {
+            printf("PC task with data[0] = %i and data[1] = %i\n", idata[0], idata[1]);
+        }
+    }*/
+        
+
+        
+
         //Run code.
         printf("gpu_data = %p\n", (int*)s.res[0].gpu_data);
         qsched_run_CUDA( &s , func );
diff --git a/src/cuda_queue.cu b/src/cuda_queue.cu
index 9beba72c9f2fe2189f12a7ccece603762d278888..853ed2b8afa1f24f2c7909bd27c916be6e380965 100644
--- a/src/cuda_queue.cu
+++ b/src/cuda_queue.cu
@@ -735,16 +735,16 @@ void qsched_create_loads(struct qsched *s, int ID, int size, int numChildren, in
         s->res[ID].task = task;
         utask = qsched_addtask( s , type_ghost, task_flag_none, NULL, 0 , 0 );
         qsched_adduse(s, task, ID);
-        s->res[ID].task = utask;
-        if(parent >= 0)
-        {
+        s->res[ID].utask = utask;
+      //  if(parent >= 0)
+//        {
             /* Create dependecy to parent. */ 
           //  qsched_addunlock(s, task, s->res[parent].task );
           ///  qsched_addunlock(s, s->res[parent].utask, utask );
-        }
+  //      }
         for(i = sorted[ID]; i < sorted[ID+1]; i++)
         {
-            qsched_create_loads(s, i, s->res[res[i]].size, sorted[i+1]-sorted[i], ID, res, sorted);
+            qsched_create_loads(s, res[i], s->res[res[i]].size, sorted[res[i]+1]-sorted[res[i]], ID, res, sorted);
         }
     }else{
         task = qsched_addtask( s , type_load , task_flag_none, &ID, sizeof(int), 0 );
@@ -757,11 +757,11 @@ void qsched_create_loads(struct qsched *s, int ID, int size, int numChildren, in
             s->res[res[j]].task = task;
         }
         /* If it has a parent then set the parents ghost task to be dependent on this.*/
-        if(parent >= 0)
-        {
+   //     if(parent >= 0)
+     //   {
           //  qsched_addunlock(s, task, s->res[parent].task );
           //  qsched_addunlock(s, s->res[parent].utask, utask);
-        }
+       // }
     }
 }
 
@@ -828,156 +828,15 @@ int transitive_use_locks(struct qsched *s, int tid, int res, int depth)
     return 0;
 }
 
-/*
-* Not used.
-*/
-void qsched_prepare_deps( struct qsched *s )
-{
-    int **is_loaded;
-    int **parents;
-    int i, k, j, use, usek, usem;
-    int *num_parents;
-    is_loaded = (int**)malloc(sizeof(int*) * s->count);
-    parents = (int**)malloc(sizeof(int*) * s->count);
-    num_parents = (int*)malloc(sizeof(int) * s->count);
-    bzero(num_parents, sizeof(int)*s->count);
-    k = (sizeof(int)*s->count_res)/32 +1;
-    for(i = 0; i < s->count; i++)
-    {
-        is_loaded[i] = (int*)malloc(k);
-        bzero(is_loaded[i], k);
-    }
-    /* Is loaded[i][k] gives the set of k*32 resources for task i*/
-
-    /* Reset the waits to 0... */
-   for( k = 0; k < s->count; k++ )
-    {
-        s->tasks[k].wait = 0;
-    }    
-
-    /* Run through the tasks and set the waits... */
-    for ( k = 0 ; k < s->count ; k++ ) {
-        struct task *t = &s->tasks[k];
-        if ( !( t->flags & task_flag_skip ) )
-            for ( j = 0 ; j < t->nr_unlocks ; j++ )
-                s->tasks[ t->unlocks[j] ].wait += 1;
-        }
-        
-    /* Sort the tasks topologically. */
-    int *tid = (int *)malloc( sizeof(int) * s->count );
-    for ( j = 0 , k = 0 ; k < s->count ; k++ )
-        if ( s->tasks[k].wait == 0 ) {
-            tid[j] = k;
-            j += 1;
-            }
-    for ( k = 0 ; k < j ; k++ ) {
-        struct task *t = &s->tasks[ tid[k] ];
-        for ( int kk = 0 ; kk < t->nr_unlocks ; kk++ )
-            if ( ( s->tasks[ t->unlocks[kk] ].wait -= 1 ) == 0 ) {
-                tid[j] = t->unlocks[kk];
-                j += 1;
-                }
-        }
-    if ( k < s->count )
-    {
-        error( "Circular dependencies detected." );
-     }
-    int max_parents = 0;
-    for(i = s->count-1; i >= 0; i--)
-    {
-        for(j = 0; j < s->tasks[i].nr_unlocks; j++)
-        {
-            num_parents[s->tasks[i].unlocks[j]]++;
-        }
-    }
-    for(i = 0; i < s->count; i++)
-    {
-        if(num_parents[i] > 0)
-            parents[i] = (int*)calloc(num_parents[i],sizeof(int));
-        else
-            parents[i] = NULL;
-
-        if(num_parents[i] > max_parents)
-        {
-            max_parents = num_parents[i];
-        }
-
-        num_parents[i] = 0;
-    }
-
-    for(i = 0; i < s->count; i++)
-    {
-        if(s->tasks[i].type == type_load || s->tasks[i].type == type_unload)
-            continue;
-        for(k = 0; k < s->tasks[i].nr_uses; k++)
-        {
-            use = s->tasks[i].uses[k];
-            usek = use >> 5; // use / 32;
-            usem = use & 31; // use % 32.
-
-            if((is_loaded[i][usek] & (1 << (31-usem))) == 0 )
-            {
-                qsched_addunlock(s, s->res[use].task , i ) ;
-                is_loaded[i][usek] |= (1 <<(31-usem));
-            }
-        }
-        for(k = 0; k < s->tasks[i].nr_unlocks; k++)
-        {
-            if(s->tasks[s->tasks[i].unlocks[k]].type == type_load ||
-                    s->tasks[s->tasks[i].unlocks[k]].type == type_unload )
-                continue;
-            for(j = 0; j < s->count_res/32 +1; j++)
-            {
-                    is_loaded[s->tasks[i].unlocks[k]][j] |= is_loaded[i][j];
-            }
-            parents[s->tasks[i].unlocks[k]][num_parents[s->tasks[i].unlocks[k]]] = i;
-            num_parents[s->tasks[i].unlocks[k]] = num_parents[s->tasks[i].unlocks[k]] + 1;
-        }
-    }
-    max_parents = 0;
-    for(i = 0; i < s->count; i++)
-    {
-        if(s->tasks[i].type == type_load || s->tasks[i].type == type_unload)
-            continue;
-        if(num_parents[i] > max_parents)
-        {
-            max_parents = num_parents[i];
-        }
-        bzero(is_loaded[i], k);
-    }
-    for(i = s->count-1; i >= 0; i--)
-    {
-        if(s->tasks[i].type == type_load || s->tasks[i].type == type_unload)
-            continue;
-        for(k = 0; k < s->tasks[i].nr_uses; k++)
-        {
-            use = s->tasks[i].uses[k];
-            usek = use >> 5; // use / 32;
-            usem = use & 31; // use % 32.
-            if((is_loaded[i][usek] & (1 << (31-usem))) == 0 )
-            {   
-                qsched_addunlock(s, i, s->res[use].utask );
-                is_loaded[i][usek] |= (1 << (31-usem));
-            }
-        }
-        for(k = 0; k < num_parents[i]; k++)
-        {
-            for(j = 0; j < s->count_res/32 +1; j++)
-            {   
-                is_loaded[parents[i][k]][j] |= is_loaded[i][j];
-            }
-        }
-    }
-
-}
-
 void qsched_prepare_loads ( struct qsched *s ) {
 
-int i, task, unload, j , k , x, unlocked = 0;
+int i, task, unload, j , k , unlocked = 0;
 struct task *t;
 int *sorted, lastindex;
 int *res, *res_data;
 
+//printf("tasks[63].wait_init = %i\n", s->tasks[63].wait_init);
+
 if(s->res[0].task != -1)
 {
     printf("Tasks already initialised, not redoing load/unload tasks");
@@ -991,19 +850,14 @@ ticks tic, toc_run, toc2 ;
 qsched_task_t *deps_new, *deps_new_key;
 s->size_deps *= qsched_stretch;
 
-
-
 j = 0;
-//printf("%i\n", s->tasks[94].unlocks);
-//        for(k = 0; k < s->tasks[94].nr_unlocks; k++)
-//            printf("%i ", s->tasks[94].unlocks[k]);
-//        printf("\n");
 
 /* Allocate a new dependency list. */
 if ( ( deps_new = (int*) malloc(s->size_deps * sizeof(int) ) ) == NULL ||
     ( deps_new_key = (int*) malloc(  s->size_deps * sizeof(int) ) ) == NULL )
         error( "Failed to allocate new deps lists." );
 tic = getticks();
+
 /* Copy the dependency list to the new list, leaving a space between each task to fit unload dependencies in.*/
 for(i = 0; i < s->count; i++)
 {
@@ -1011,7 +865,6 @@ for(i = 0; i < s->count; i++)
     /* Its possible we might not fit in weird cases so we need to make sure to expand if needed.*/
     if(j + t->nr_unlocks + t->nr_uses + t->nr_locks > s->size_deps)
     {
-    //    printf("\n");
         qsched_task_t *temp1, *temp2;
         s->size_deps *= qsched_stretch;
         /* Allocate a new task list. */
@@ -1033,34 +886,18 @@ for(i = 0; i < s->count; i++)
         free(deps_new_key);
         deps_new = temp1;
         deps_new_key = temp2;
-       // printf("Stretch at line 828. m = %i\n", m);
-    t = &s->tasks[i];
+        t = &s->tasks[i];
     }
     int start_j = j;
 
-    /*if(i <= 94)
-{
-        printf("%i\n", s->tasks[94].unlocks);
-        for(k = 0; k < s->tasks[94].nr_unlocks; k++)
-            printf("%i ", s->tasks[94].unlocks[k]);
-        printf(" i = %i\n", i);
-        printf("%i\n", t->unlocks);
-}*/
+    /* Sort the unlocks into position, and add enough room for any unlocks to load and unload tasks. */
     for(k = 0; k < t->nr_unlocks; k++)
     {
         deps_new[j] = t->unlocks[k];
-        //if(i == 48)
-          //  printf("%i %i %i %i\n", t->unlocks +k,s->deps_key, s->deps, s->count_deps);
         deps_new_key[j] = s->deps_key[&t->unlocks[k] - s->deps];
         j++;
     }
     t->unlocks = &deps_new[start_j];
-    /*if(i == 94)
-{
-        for(k = 0; k < t->nr_unlocks; k++)
-            printf("%i ", t->unlocks[k]);
-        printf("\n");
-}*/
     j+=t->nr_uses + t->nr_locks;
 }
 
@@ -1132,25 +969,6 @@ for(i = 1; i < s->count_res; i++)
 /* Sort super resources by memory address.*/
 qsched_sort(&res[sorted[s->count_res-1]], &res_data[sorted[s->count_res-1]], s->count_res - sorted[s->count_res-1], minVal(&res_data[sorted[s->count_res-1]], s->count_res - sorted[s->count_res-1]), maxVal(&res_data[sorted[s->count_res-1]], s->count_res - sorted[s->count_res-1]));
 
-
-/*
-for(i = 0; i < s->count_res; i++)
-{
-    printf("%i ", res_data[i]);
-}
-printf("\n");
-
-for(i = 0; i < s->count_res; i++)
-{
-printf("%i ", res[i]);
-}
-printf("\n");
-
-for(i = 0; i < s->count_res; i++)
-{
-    printf("%i ", s->res[res[i]].parent);   
-}
-printf("\n");*/
 /*res now contains an array of indices, first sorted by parent, then memory address of data. */
 int size=0;
 if(sorted[0] != 0)
@@ -1180,7 +998,6 @@ for( i = sorted[s->count_res-1]; i < s->count_res-1; i++ )
 {
     if(res_data[i] + (s->res[res[i]].size/sizeof(int)) > res_data[i+1])
     {
-        /*printf("i=%i, s->count_res=%i, res_data[i] = %i, size = %i, res_data[i]+size = %i, res_data[i+1] = %i\n",i, s->count_res, res_data[i], s->res[res[i]].size, res_data[i] + (s->res[res[i]].size/sizeof(int)), res_data[i+1]);*/
         error("Overlapping resources are not allowed.");
     }
 }
@@ -1192,7 +1009,6 @@ for(i = sorted[s->count_res]; i >= 0; i-- )
 }
 
 toc_run = getticks();
-//    message( "Sorting took %.3f ms" , ((double)(toc_run - tic)) * itpms );
 
 tic = getticks();
 /* If nothing overlaps create tasks.*/
@@ -1252,6 +1068,7 @@ for(i = 0; i < s->count_res; i++ )
     size_uses[i] = s->count_uses / s->count_res + 1;   
 }
 
+
 /* Add deps from tasks to unload tasks. */
 for(i = 0; i < s->count; i++)
 {
@@ -1295,9 +1112,22 @@ for(i = 0; i < s->count; i++)
     }
 }
 
-/* Loop through resources. */
+/* Loop through resources, count number dependencies each task needs to have added. */
+int *num_deps = (int*)calloc(sizeof(int) , s->count);
+for(i = 0; i < s->count_res; i++ )
+{
+    int ID = res[i];
+    struct res *resource = &s->res[res[i]];
+    num_deps[s->res[ID].utask] += sorted[ID+1] - sorted[ID];
+    if(resource->parent >= 0)
+        num_deps[resource->task]++;
+    num_deps[resource->task] += num_uses[ res[ i ] ];
+    
+}
+
+int *tasks_assigned = (int*)calloc(sizeof(int), s->count);
 
-// TODO Make sure to stretch if needed.
+/* Loop through resources. */
 for(i = 0; i < s->count_res; i++ )
 {
     int ID = res[i];
@@ -1306,16 +1136,16 @@ for(i = 0; i < s->count_res; i++ )
     int parent = s->res[ID].parent;
     struct res *resource = &s->res[ res[i] ];
 
-    printf("ID = %i, size = %i, numChild = %i, parent = %i\n", ID, size, numChildren, parent);
-    printf("task = %i, utask = %i\n", s->res[ID].task, s->res[ID].utask);
     if(s->res[ID].task == s->res[parent].task)
         continue;
     /* Loop through children if there are any. */    
     if(numChildren > 0)
     {
         /* Do unload task stuff first. */
-        s->tasks[resource->utask].unlocks = &deps_new[s->count_deps];
-        s->tasks[resource->utask].nr_unlocks = 0;
+        if(!tasks_assigned[resource->utask]) {
+            s->tasks[resource->utask].unlocks = &deps_new[s->count_deps];
+            s->tasks[resource->utask].nr_unlocks = 0;
+        }
         if(s->count_deps + numChildren > s->size_deps)
         {
             qsched_task_t *temp1, *temp2;
@@ -1338,7 +1168,6 @@ for(i = 0; i < s->count_res; i++ )
             free(deps_new_key);
             deps_new = temp1;
             deps_new_key = temp2;
-      //  printf("Stretch at line 1102, m = %i.\n", m);
         }
         for(j = 0; j < numChildren; j++)
         {
@@ -1350,15 +1179,19 @@ for(i = 0; i < s->count_res; i++ )
                 s->tasks[child->utask].wait_init += 1;
                 deps_new_key[s->count_deps] = resource->utask;
                 s->tasks[resource->utask].nr_unlocks += 1;
-                s->count_deps += 1;
             }
         }
+        if(!tasks_assigned[resource->utask]) {
+            s->count_deps += num_deps[resource->utask];
+            tasks_assigned[resource->utask] = 1;
+        }
     }
 
     /* Do load task stuff. */
-
-    s->tasks[resource->task].unlocks = &deps_new[s->count_deps];
-    s->tasks[resource->task].nr_unlocks = 0;
+    if(!tasks_assigned[resource->task]) {
+        s->tasks[resource->task].unlocks = &deps_new[s->count_deps];
+        s->tasks[resource->task].nr_unlocks = 0;
+    }
     if(numChildren > 0)
     {
         for(j = 0; j < numChildren; j++)
@@ -1388,12 +1221,13 @@ for(i = 0; i < s->count_res; i++ )
                     free(deps_new_key);
                     deps_new = temp1;
                     deps_new_key = temp2;
-        //            printf("Stretch at line 1151.\n");
                 }                
                 for(k = 0; k < num_uses[ res[ sorted[ ID ] + j ] ]; k++)
                 {
                     s->tasks[resource->task].unlocks[ s->tasks[resource->task].nr_unlocks ] = usage_list[ res[ sorted[ ID ] +j ] ][k];
                     s->tasks[ usage_list[ res[ sorted[ ID ] +j ] ][k] ].wait_init += 1;
+//                    if(usage_list[res[sorted[ID]+j]][k] == 63)
+//                        printf("Increased wait_init of task 63, wait_init now %i\n", s->tasks[63].wait_init);
                     deps_new_key[s->count_deps] = resource->task;
                     s->tasks[resource->task].nr_unlocks += 1;
                     s->count_deps += 1;
@@ -1423,14 +1257,17 @@ for(i = 0; i < s->count_res; i++ )
             free(deps_new_key);
             deps_new = temp1;
             deps_new_key = temp2;
-   //     printf("Stretch at line 1185.\n");
     }
     if( parent > 0 )
     {
         s->tasks[resource->task].unlocks[ s->tasks[resource->task].nr_unlocks ] = s->res[parent].task;
         deps_new_key[s->count_deps] = resource->task;
         s->tasks[s->res[parent].task].wait_init += 1;
+        //if(s->res[parent].task == 63)
+//            printf("task %i should unlock task 63, %i position.\n", resource->task, s->tasks[resource->task].nr_unlocks);
         s->tasks[resource->task].nr_unlocks += 1;
+//        if(s->res[parent].task == 63)
+//            printf("s->tasks[resource->task].nr_unlocks = %i\n", s->tasks[resource->task].nr_unlocks);
         s->count_deps += 1;
     }
 
@@ -1439,10 +1276,19 @@ for(i = 0; i < s->count_res; i++ )
         s->tasks[resource->task].unlocks[ s->tasks[resource->task].nr_unlocks ] = usage_list[ res[ i ] ][k];
         deps_new_key[s->count_deps] = resource->task;
         s->tasks[usage_list[ res[ i ] ][k]].wait_init += 1;
+//        if(usage_list[res[i]][k] == 63)
+//            printf("task %i should unlock task 63\n", resource->task);
         s->tasks[resource->task].nr_unlocks += 1;
-        s->count_deps += 1;
+//        s->count_deps += 1;
     }
-
+    if(!tasks_assigned[resource->task]) {
+        s->count_deps += num_deps[resource->task];
+        tasks_assigned[resource->task] = 1;
+    }
+  //r(k = 0; k < s->tasks[66].nr_unlocks; k++)
+//        printf("%i ", s->tasks[66].unlocks[k]);
+//    printf("\n");
+//    printf("s->tasks[66].unlocks - deps_new = %i\n", s->tasks[66].unlocks - deps_new);
 }
 //printf("s->deps = %i, deps_new = %i\n", s->deps, deps_new);
 free(s->deps);
@@ -1451,164 +1297,38 @@ s->deps = deps_new;
 s->deps_key = deps_new_key;
 s->flags &= ~qsched_flag_dirty;
 toc2 += getticks() - tic;
+ //   printf("s->tasks[50].unlocks - s->deps = %i\n", s->tasks[50].unlocks - s->deps);
 
-//printf("Number tasks = %i\n", s->count);
-//printf("Number dependencies = %i\n", s->count_deps);
-/* Set up dependencies with the rest of the system.*/
-
-
-
-/*New version*/
-    /* Reset the waits to 0... */
-/*    for( k = 0; k < s->count; k++ )
-    {
-        s->tasks[k].wait = 0;
-    }    */
-
-    /* Run through the tasks and set the waits... */
-/*    for ( k = 0 ; k < s->count ; k++ ) {
-        struct task *t = &s->tasks[k];
-        if ( !( t->flags & task_flag_skip ) )
-            for ( j = 0 ; j < t->nr_unlocks ; j++ )
-                s->tasks[ t->unlocks[j] ].wait += 1;
-        }
-        */
-    /* Sort the tasks topologically. */
-/*    int *tid = (int *)malloc( sizeof(int) * s->count );
-    for ( j = 0 , k = 0 ; k < s->count ; k++ )
-        if ( s->tasks[k].wait == 0 ) {
-            tid[j] = k;
-            j += 1;
-            }
-    for ( k = 0 ; k < j ; k++ ) {
-        struct task *t = &s->tasks[ tid[k] ];
-        for ( int kk = 0 ; kk < t->nr_unlocks ; kk++ )
-            if ( ( s->tasks[ t->unlocks[kk] ].wait -= 1 ) == 0 ) {
-                tid[j] = t->unlocks[kk];
-                j += 1;
-                }
-        }
-    if ( k < s->count )
-    {
-        //printf("k = %i, count = %i\n", k, count);
-        error( "Circular dependencies detected." );
-     }
-
-*/
-/*Do unlocks */
-/*for(i = 0; i < s->count; i++)
-{
-    struct task *t = &s->tasks[i];
-    int result = 0;
-    if(t->type == type_ghost || t->type == type_unload || t->type == type_load)
-        continue;
-    for(j = 0; j < t->nr_uses; j++)
-    {
-        result = 0;
-        for(k = 0; k < t->nr_unlocks && result == 0; k++)
-        {
-            result = transitive_use_unlocks(s, &s->tasks[t->unlocks[k]], t->uses[j],0);
-                
-
-        }
-        if( result == 0)
-            qsched_addunlock(s, i, s->res[t->uses[j]].utask);
-    }
-    for(j = 0; j < t->nr_locks; j++)
-    {
-        result = 0;
-        for(k = 0; k < t->nr_unlocks && result == 0; k++)
-        {
-            result =transitive_use_unlocks(s, &s->tasks[t->unlocks[k]], t->locks[j],0);
+/*printf("Dependencies leading from task 50's\n");
+for(i = 0; i < s->tasks[50].nr_unlocks + 5; i++)
+    printf("%i ", s->tasks[50].unlocks[i]);
+printf("\n");
 
-        }
-        if(result == 0)
-            qsched_addunlock(s, i, s->res[t->locks[j]].utask);
-    }
-}*/
+printf("s->tasks[50].nr_unlocks = %i, num_deps[50] = %i\n", s->tasks[50].nr_unlocks, num_deps[50]);
 
-/*Do locks */
-/*for(i = s->count-1; i >= 0; i--)
-{
-    struct task *t = &s->tasks[i];
-    struct task *new_t;
-    int result = 0;
-    if(t->type == type_ghost || t->type == type_unload || t->type == type_load)
-        continue;
-    for(j = 0; j < t->nr_uses; j++)
-    {
-        result = 0;
-        for(k = i-1; k >= 0; k--)
-        {
-            new_t = &s->tasks[k];
-            for(x = 0; x < new_t->nr_unlocks && result == 0; x++)
-            {
-                if(new_t->unlocks[x] == i)
-                {
-                    result = transitive_use_locks(s, k, t->uses[j],0);
-                }
-            }
-        }
-        if(result == 0)
-        {
-            qsched_addunlock(s, s->res[t->uses[j]].task, i);
-        } 
-    }
-    for(j = 0; j < t->nr_locks; j++)
+free(num_deps);
+printf("Printing dependencies here:\n");
+for(i = 0; i < s->count; i++)
+{*/
+   // printf("Task ID: %i, type = %i: ", i, s->tasks[i].type);
+    /*if(s->tasks[i].type < 100)
     {
-        result = 0;
-        for(k = i-1; k >= 0; k--)
+        for(k = 0; k < s->count_res; k++)
         {
-            new_t = &s->tasks[k];
-            for(x = 0; x < new_t->nr_unlocks && result == 0; x++)
+            if(s->res[k].task == i || s->res[k].utask == i )
             {
-                if(new_t->unlocks[x] == i)
-                {
-                    result = transitive_use_locks(s, k, t->locks[j],0);
-                }
+                printf(" resource ID = %i", k);
+                if(s->res[k].parent != -1)
+                    printf(" parent resource ID = %i", s->res[k].parent);
+                printf(": ");
             }
         }
-        if(result == 0)
-        {
-            qsched_addunlock(s, s->res[t->locks[j]].task, i);
-        } 
-    }
-   
-}*/
-
-/* Old version*/
-/*for(i = 0; i < s->count; i++)
-{
-    struct task *t = &s->tasks[i];
-    if(t->type == type_load || t->type == type_unload || t-> type == type_ghost)
-        continue;
-
-    for(k = 0; k < t->nr_uses; k++)
-    {
-        qsched_addunlock(s, s->res[t->uses[k]].task, i);
-        qsched_addunlock(s, i, s->res[t->uses[k]].utask);
-    }
-
-    for(k = 0; k < t->nr_locks; k++)
-    {
-        qsched_addunlock(s, s->res[t->locks[k]].task, i);
-        qsched_addunlock(s, i, s->res[t->locks[k]].utask);
-    }
-}*/
-//qsched_prepare_deps( s );
-//printf("Number dependencies = %i\n", s->count_deps);
-/*#ifdef PRIQ
-int PCI_res;
-PCI_res = qsched_addres(s , qsched_owner_none , qsched_res_none , NULL, 0 , NULL);
-s->res[PCI_res].lock = PCIEX;
-for(i = 0; i < s->count; i++)
-{
-    if(s->tasks[i].type == type_load)
-    {
-        qsched_addlock(s, i, PCI_res);
-    }
+    }*/
+   /* for(k = 0; k < s->tasks[i].nr_unlocks; k++)
+        printf("%i ", s->tasks[i].unlocks[k]);
+    printf("\n");
 }
-#endif*/
+    printf("----------------\n");*/
 toc_run = getticks();
     //message( "Setting up dependencies took %.3f ms" , toc2 * itpms );
 //error("Got to here");
@@ -1756,7 +1476,6 @@ toc_run = getticks();
 
     
 
-    
     /* Get a pointer to the tasks, set the count. */
     tasks = s->tasks;
     count = s->count;
@@ -1783,13 +1502,13 @@ toc_run = getticks();
       //  }
         
         /* Run throught the tasks and link the locks and unlocks. */
-        tasks[0].unlocks = s->deps;
+//        tasks[0].unlocks = s->deps;
         tasks[0].locks = s->locks;
-        tasks[0].uses = s->uses;
+ //       tasks[0].uses = s->uses;
         for ( k = 1 ; k < count ; k++ ) {
-            tasks[k].unlocks = &tasks[k-1].unlocks[ tasks[k-1].nr_unlocks ];
+      //      tasks[k].unlocks = &tasks[k-1].unlocks[ tasks[k-1].nr_unlocks ];
             tasks[k].locks = &tasks[k-1].locks[ tasks[k-1].nr_locks ];
-            tasks[k].uses = &tasks[k-1].uses[ tasks[k-1].nr_uses ];
+        //    tasks[k].uses = &tasks[k-1].uses[ tasks[k-1].nr_uses ];
             }
         
         /* All cleaned-up now! */
@@ -1825,7 +1544,7 @@ toc_run = getticks();
     //        printf("Task ID %i has wait %i and wait_init %i and type %i\n", k, t->wait, t->wait_init, t->type);
       //  }
     }
-        
+
     /* Sort the tasks topologically. */
     int *tid = (int *)malloc( sizeof(int) * count );
     for ( j = 0 , k = 0 ; k < count ; k++ )
@@ -1846,26 +1565,34 @@ toc_run = getticks();
         }
         }
 
+
         /* Print all dependencies */
-        for(i = 0; i < count; i++ )
-        {
-            printf("Task ID: %i, type=%i, ", i, tasks[i].type);
-            for(j = 0; j < tasks[i].nr_unlocks; j++)
-            {
-                printf("%i ", tasks[i].unlocks[j]);
-            }
-            printf("\n");
-        }
+       // for(i = 0; i < count; i++ )
+//        {
+//            printf("Task ID: %i, type=%i, wait= %i,  ", tid[i], tasks[tid[i]].type, tasks[tid[i]].wait);
+  //          printf("Task ID: %i, type=%i, wait= %i,  ", i, tasks[i].type, tasks[i].wait);
+//            for(j = 0; j < tasks[tid[i]].nr_unlocks; j++)
+//            {
+//                printf("%i ", tasks[tid[i]].unlocks[j]);
+//            }
+//            printf("\n");
+//        }
     if ( k < count )
     {
-        printf("k = %i, wait = %i\n", tid[k-1], tasks[tid[k-1]].wait);
+        for(i = 0; i < count; i++)
+            if(tasks[tid[i]].wait != 0)
+                printf("Task %i has nonzero wait, wait is %i, wait_init is %i\n", tid[i], tasks[tid[i]].wait, tasks[tid[i]].wait_init);
+        //printf("tasks[63].wait = %i, tasks[63].wait_init = %i\n", tasks[63].wait, tasks[63].wait_init);
+        printf("k = %i, count = %i\n", k, count);
+        printf("tid[k-1] = %i, tid[k-1].wait = %i, initial wait = %i\n", tid[k], tasks[tid[k]].wait, tasks[tid[k]].wait_init);
+
         for(i = 0; i < count; i++)
         {
-            t = &tasks[i];
+            t = &tasks[tid[i]];
             for(j = 0; j < t->nr_unlocks; j++)
             {
-                if(t->unlocks[j] == tid[k-1])
-                    printf("Task %i is unlocking task %i\n",j, tid[k-1]);
+                if(t->unlocks[j] == tid[k])
+                    printf("Task %i is unlocking task %i\n",tid[i], tid[k]);
             } 
         }
         error( "Circular dependencies detected." );
@@ -2078,13 +1805,14 @@ qsize = max(2*s->count, 512);
             data[queues[1].count++] = i;
             data2[queues[1].count-1] = -temp[i].weight;
         }
-        if(temp[i].type != type_load && temp[i].wait == 0)
+        if(temp[i].type != type_load && temp[i].type != type_ghost && temp[i].wait == 0)
         {
-            printf("%i %i\n", temp[i].type, i);
+            printf("Task of type %i with no wait, ID = %i\n", temp[i].type, i);
         }   
     
     }
-    qsched_sort(data, data2, queues[1].count, minVal(data2,queues[1].count), maxVal(data2, queues[1].count));
+    /* Opt to use quicksort as difference between max and min weights can be too large.*/
+    qsched_quicksort(data, data2, queues[1].count, minVal(data2,queues[1].count), maxVal(data2, queues[1].count));
     free(data2);
      for ( k = queues[1].count ; k < qsize ; k++ )
         data[k] = -1;  
diff --git a/src/cuda_queue.cu.outdated b/src/cuda_queue.cu.outdated
new file mode 100644
index 0000000000000000000000000000000000000000..9ef4acf8c426e8fe01d7cd8c1f52908096242491
--- /dev/null
+++ b/src/cuda_queue.cu.outdated
@@ -0,0 +1,150 @@
+/*
+* Not used.
+*/
+void qsched_prepare_deps( struct qsched *s )
+{
+    int **is_loaded;
+    int **parents;
+    int i, k, j, use, usek, usem;
+    int *num_parents;
+    is_loaded = (int**)malloc(sizeof(int*) * s->count);
+    parents = (int**)malloc(sizeof(int*) * s->count);
+    num_parents = (int*)malloc(sizeof(int) * s->count);
+    bzero(num_parents, sizeof(int)*s->count);
+    k = (sizeof(int)*s->count_res)/32 +1;
+    for(i = 0; i < s->count; i++)
+    {
+        is_loaded[i] = (int*)malloc(k);
+        bzero(is_loaded[i], k);
+    }
+    /* Is loaded[i][k] gives the set of k*32 resources for task i*/
+
+    /* Reset the waits to 0... */
+   for( k = 0; k < s->count; k++ )
+    {
+        s->tasks[k].wait = 0;
+    }    
+
+    /* Run through the tasks and set the waits... */
+    for ( k = 0 ; k < s->count ; k++ ) {
+        struct task *t = &s->tasks[k];
+        if ( !( t->flags & task_flag_skip ) )
+            for ( j = 0 ; j < t->nr_unlocks ; j++ )
+                s->tasks[ t->unlocks[j] ].wait += 1;
+        }
+        
+    /* Sort the tasks topologically. */
+    int *tid = (int *)malloc( sizeof(int) * s->count );
+    for ( j = 0 , k = 0 ; k < s->count ; k++ )
+        if ( s->tasks[k].wait == 0 ) {
+            tid[j] = k;
+            j += 1;
+            }
+    for ( k = 0 ; k < j ; k++ ) {
+        struct task *t = &s->tasks[ tid[k] ];
+        for ( int kk = 0 ; kk < t->nr_unlocks ; kk++ )
+            if ( ( s->tasks[ t->unlocks[kk] ].wait -= 1 ) == 0 ) {
+                tid[j] = t->unlocks[kk];
+                j += 1;
+                }
+        }
+    if ( k < s->count )
+    {
+        error( "Circular dependencies detected." );
+     }
+
+    /* Store the maximum number of parents of a task.*/
+    int max_parents = 0;
+
+    /* Compute the number of parents for each task */
+    for(i = s->count-1; i >= 0; i--)
+    {
+        for(j = 0; j < s->tasks[i].nr_unlocks; j++)
+        {
+            num_parents[s->tasks[i].unlocks[j]]++;
+        }
+    }
+    /* Allocate memory to store parents in.*/
+    for(i = 0; i < s->count; i++)
+    {
+        if(num_parents[i] > 0)
+            parents[i] = (int*)calloc(num_parents[i],sizeof(int));
+        else
+            parents[i] = NULL;
+
+        if(num_parents[i] > max_parents)
+        {
+            max_parents = num_parents[i];
+        }
+
+        num_parents[i] = 0;
+    }
+
+    /* This seems to be outdated and unused?*/
+    for(i = 0; i < s->count; i++)
+    {
+        if(s->tasks[i].type == type_load || s->tasks[i].type == type_unload)
+            continue;
+        
+        for(k = 0; k < s->tasks[i].nr_uses; k++)
+        {
+            use = s->tasks[i].uses[k];
+            usek = use >> 5; // use / 32;
+            usem = use & 31; // use % 32.
+
+            if((is_loaded[i][usek] & (1 << (31-usem))) == 0 )
+            {
+                qsched_addunlock(s, s->res[use].task , i ) ;
+                printf("We actually did this?!");
+                is_loaded[i][usek] |= (1 <<(31-usem));
+            }
+        }
+        for(k = 0; k < s->tasks[i].nr_unlocks; k++)
+        {
+            if(s->tasks[s->tasks[i].unlocks[k]].type == type_load ||
+                    s->tasks[s->tasks[i].unlocks[k]].type == type_unload )
+                continue;
+            for(j = 0; j < s->count_res/32 +1; j++)
+            {
+                    is_loaded[s->tasks[i].unlocks[k]][j] |= is_loaded[i][j];
+            }
+            parents[s->tasks[i].unlocks[k]][num_parents[s->tasks[i].unlocks[k]]] = i;
+            num_parents[s->tasks[i].unlocks[k]] = num_parents[s->tasks[i].unlocks[k]] + 1;
+        }
+    }
+    max_parents = 0;
+    for(i = 0; i < s->count; i++)
+    {
+        if(s->tasks[i].type == type_load || s->tasks[i].type == type_unload)
+            continue;
+        if(num_parents[i] > max_parents)
+        {
+            max_parents = num_parents[i];
+        }
+        bzero(is_loaded[i], k);
+    }
+    for(i = s->count-1; i >= 0; i--)
+    {
+        if(s->tasks[i].type == type_load || s->tasks[i].type == type_unload)
+            continue;
+        for(k = 0; k < s->tasks[i].nr_uses; k++)
+        {
+            use = s->tasks[i].uses[k];
+            usek = use >> 5; // use / 32;
+            usem = use & 31; // use % 32.
+            if((is_loaded[i][usek] & (1 << (31-usem))) == 0 )
+            {   
+                qsched_addunlock(s, i, s->res[use].utask );
+                is_loaded[i][usek] |= (1 << (31-usem));
+            }
+        }
+        for(k = 0; k < num_parents[i]; k++)
+        {
+            for(j = 0; j < s->count_res/32 +1; j++)
+            {   
+                is_loaded[parents[i][k]][j] |= is_loaded[i][j];
+            }
+        }
+    }
+
+}