diff --git a/src/cuda_queue.cu b/src/cuda_queue.cu
index 4477f61be656a653bb98c98b8d1dd67e68a5d15d..0b50bb1b715420f6384a6752e24a9b5620b9db78 100644
--- a/src/cuda_queue.cu
+++ b/src/cuda_queue.cu
@@ -16,11 +16,24 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  * 
  ******************************************************************************/
+/* Config parameters. */
+#include "../config.h"
+
+/* Standard includes. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 
 #include "cuda_queue.h"
+#include "quicksched.h"
+#include "res.h"
 
-/*Define if conflicts are enabled or not.*/
+/*Define task types required for GPU */
+#define type_load -100
+#define type_unload -101
 
+
+/*Define if conflicts are enabled or not.*/
 //#define GPU_locks
 
 /*Declare variables required for the queue*/
@@ -29,9 +42,9 @@ __constant__ int cuda_nrqueues;
 __constant__ int cuda_queue_size;
 
 __device__ struct task *tasks_cuda;
-__device__ int *locks_cuda;
-__device__ int *uses_cuda;
-__device__ int *deps_cuda;
+__device__ qsched_res_t *locks_cuda;
+__device__ qsched_res_t *uses_cuda;
+__device__ qsched_task_t *deps_cuda;
 __device__ struct res *res_cuda;
 
 /**
@@ -149,30 +162,151 @@ __device__ int runner_cuda_gettask ( struct queue_cuda *q ) {
 
 
 
-int qsched_prepare_cuda ( struct qsched *s , ) {
+
+void qsched_prepare_loads ( struct qsched *s ) {
+
+int i, task, unload, tb, j, k, unlocked=0;
+struct task *t;
+/* Create a load task for each resource. */
+for(i = 0; i < s->count_res; i++)
+{   
+    
+    if(s->res[i].size == 0)
+        continue;
+
+
+
+
+    task = qsched_addtask( s , type_load , 0 , &i , sizeof(int) , 0 );
+    unload = qsched_addtask( s , type_unload, 0 , &i, sizeof(int), 0 );
+    /*Load task unlocks each task that uses or locks the specified resource */
+    for(j = 0; j < s->count; j++)
+    {
+          t = &s->tasks[j];
+        
+        for(k = 0; k < t->nr_uses; k++)
+        {
+            if(t->uses[k] == i){
+                qsched_addunlock( s , task, j );
+                qsched_addunlock( s , j , unload);
+                unlocked = 1;
+                break;
+            }
+        }
+        if(unlocked == 1)
+        {
+            unlocked = 0;
+            continue;
+        }
+        for(k = 0; k < t->nr_locks; k++)
+        {
+            if(t->locks[k] == i){
+                qsched_addunlock( s , task , j );
+                qsched_addunlock( s , j , unload);  
+                break;  
+            }
+
+        }
+    }
+    
+    
+}
+
+
+
+
+
+}
+
+
+
+
+int qsched_prepare_cuda ( struct qsched *s ) {
+
+int i;
+struct task *cuda_t;
+qsched_res_t *setup;
+qsched_task_t *setup_t;
+struct res *res_t;
+int *data;
+
+/*Setup Load and unload tasks*/
+
+
+
+
+
+
+
+
+
+
+
 
 /*Copy the qsched data to the device*/
-if( cudaMalloc( &tasks_cuda , sizeof(struct task) * s->count != cudaSuccess )
+if( cudaMalloc( &cuda_t , sizeof(struct task) * s->count ) != cudaSuccess )
     error("Failed to allocate task array on the device.");
 
-if( cudaMemcpy(  )
+if( cudaMemcpy( cuda_t, s->tasks, sizeof(struct task) * s->count , cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy tasks to the device.");
+if( cudaMemcpyToSymbol ( cuda_t , tasks_cuda, sizeof(struct task *) ,  cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy task pointer to the device.");
 
-if( cudaMalloc( &locks_cuda , sizeof(int) * s->count_lock != cudaSuccess )
+if( cudaMalloc( &setup , sizeof(qsched_res_t) * s->count_locks ) != cudaSuccess )
     error("Failed to allocate locks array on the device.");
+if( cudaMemcpy( &setup , s->locks , sizeof(qsched_res_t) * s->count_locks, cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy locks to the device.");
+if( cudaMemcpyToSymbol ( setup , locks_cuda, sizeof(qsched_res_t *),  cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy locks pointer to the device.");
 
-if( cudaMalloc( &deps_cuda , sizeof(int) * s->count_deps != cudaSuccess )
+if( cudaMalloc( &setup_t , sizeof(qsched_task_t) * s->count_deps ) != cudaSuccess )
     error("Failed to allocate deps array on the device.");
+if( cudaMemcpy( &setup_t , s->deps , sizeof(qsched_task_t) * s->count_deps, cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy deps to the device.");
+if( cudaMemcpyToSymbol ( &setup_t , deps_cuda, sizeof(qsched_task_t *) ,  cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy deps pointer to the device.");
 
-if( cudaMalloc( &uses_cuda , sizeof(int) * s->count_uses != cudaSuccess )
+if( cudaMalloc( &uses_cuda , sizeof(qsched_res_t) * s->count_uses ) != cudaSuccess )
     error("Failed to allocate use array on the device.");
+if( cudaMemcpy( &setup , s->uses , sizeof(qsched_res_t) * s->count_uses, cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy locks to the device.");
+if( cudaMemcpyToSymbol ( setup , uses_cuda, sizeof(qsched_res_t *),  cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy locks pointer to the device.");
 
-if( cudaMalloc( &res_cuda , sizeof( res) * s->count_res != cudaSuccess )
+if( cudaMalloc( &res_t , sizeof(struct res) * s->count_res ) != cudaSuccess )
     error("Failed to allocated on the device.");
 
+for(i = 0; i < s->count_res; i++)
+{   
+    if(s->res[i].size == 0)
+        continue;
+    if( cudaMalloc( &data, sizeof(int) * s->res[i].size) != cudaSuccess )
+        error("Failed to allocate data space on the device.");
+    s->res[i].gpu_data = data;
+}
+
+
+if( cudaMemcpy( &res_t , s->res , sizeof(struct res) * s->count_res , cudaMemcpyHostToDevice) != cudaSuccess )
+    error("Failed to copy resources to the device.");
+if( cudaMemcpyToSymbol( res_t , res_cuda , sizeof(struct res *) * s->count_res , cudaMemcpyHostToDevice) != cudaSuccess )
+    error("Failed to copy res pointer to the device.");
+
+
+
+
+
+
+
+
+
+
+
+
+
 /* Initialize the queues. */
 int nr_queues= 2, k, qsize;
-int *data;
-struct queue_cuda queues[ cuda_maxqueues ];
+//int *data;
+struct queue_cuda queues[ cuda_numqueues ];
 
 
 
diff --git a/src/cycle.h b/src/cycle.h
index 16f57e7e1ef942d2736f4328be9117b2deab6d6e..f92d1d973fa84aaf166e5154b0627a9fad955d32 100644
--- a/src/cycle.h
+++ b/src/cycle.h
@@ -78,6 +78,10 @@
 
 /***************************************************************************/
 
+#ifdef WITH_CUDA
+#define INLINE inline
+#endif
+
 #if TIME_WITH_SYS_TIME
 # include <sys/time.h>
 # include <time.h>
@@ -526,3 +530,6 @@ INLINE_ELAPSED(inline)
 #endif
 #endif /* HAVE_MIPS_ZBUS_TIMER */
 
+#ifdef WITH_CUDA
+#undef INLINE
+#endif
diff --git a/src/lock.h b/src/lock.h
index abdfc5ddf1df0ee03438ca3aa99385c9ccf6f3ad..f814a09e378b570fc606ff24535cc21adb60a5c3 100644
--- a/src/lock.h
+++ b/src/lock.h
@@ -35,16 +35,16 @@
     #define lock_type volatile int
     #define lock_init( l ) ( *l = 0 )
     #define lock_destroy( l ) 0
-    INLINE static int lock_lock ( volatile int *l ) {
-        while( atomicCAS( l , 0 , 1 ) != 0 );
+    __device__ inline static int lock_lock ( volatile int *l ) {
+        while( atomicCAS( (int *)l , 0 , 1 ) != 0 );
         return 0;
         }
-    INLINE static int lock_trylock ( volatile int *l ) {
-        int res = atomicCAS( l , 0 , 1 );
+    __device__ inline static int lock_trylock ( volatile int *l ) {
+        int res = atomicCAS( (int *)l , 0 , 1 );
         return res;
     }
-    INLINE static int lock_unlock ( volatile int *l ) {
-        int res = atomicCAS ( l , 1 , 0 ) != 1;
+    __device__ inline static int lock_unlock ( volatile int *l ) {
+        int res = atomicCAS ( (int *)l , 1 , 0 ) != 1;
         return res;
     }
     #define lock_unlock_blind( l ) atomicCAS( &l , 1 , 0 )
diff --git a/src/qsched.c b/src/qsched.c
index a183dd24611a3b9e8f6b7c8ac95e5b9a168bb148..5bcf1487d8b321ae308f5aa46254e8fdbb9902b6 100644
--- a/src/qsched.c
+++ b/src/qsched.c
@@ -884,6 +884,10 @@ void qsched_prepare ( struct qsched *s ) {
 
     /* Lock the sched. */
     lock_lock( &s->lock );
+
+    #ifdef WITH_CUDA
+    qsched_prepare_loads( s );
+    #endif
     
     /* Get a pointer to the tasks, set the count. */
     tasks = s->tasks;
diff --git a/src/qsched.h b/src/qsched.h
index ee08fe41f7a58d530e8af0277a499aafa2167b4e..a2d18d3818d074a5e74c9c640a85e0503f6fee7e 100644
--- a/src/qsched.h
+++ b/src/qsched.h
@@ -163,8 +163,8 @@ struct qsched {
 
 /* Function prototypes. */
 /* Internal functions. */
-void qsched_sort ( int *restrict data , int *restrict ind , int N , int min , int max );
-void qsched_sort_rec ( int *restrict data , int *restrict ind , int N , int min , int max );
+void qsched_sort ( int *data , int *ind , int N , int min , int max );
+void qsched_sort_rec ( int *data , int *ind , int N , int min , int max );
 struct task *qsched_gettask ( struct qsched *s , int qid );
 void qsched_done ( struct qsched *s , struct task *t );
 void *qsched_getdata( struct qsched *s , struct task *t );
diff --git a/src/task.h b/src/task.h
index c9c57d6436fe659352133ce38b79fc63c719da7e..dccc2dae7e078510db30c7d368ad1c9769668978 100644
--- a/src/task.h
+++ b/src/task.h
@@ -22,7 +22,6 @@
 #define task_flag_skip                  1
 #define task_flag_virtual               2
 
-
 /* The task data structure. */
 struct task {