Code now automatically generated load and unload tasks for the device

cdb7c197 · Aidan Chalk · 90449ae1 · cdb7c197 · cdb7c197 · cdb7c197
Commit cdb7c197 authored 11 years ago by Aidan Chalk
--- a/src/cuda_queue.cu
+++ b/src/cuda_queue.cu
@@ -16,11 +16,24 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 ******************************************************************************/
+/* Config parameters. */
+#include "../config.h"
+
+/* Standard includes. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>

 #include "cuda_queue.h"
+#include "quicksched.h"
+#include "res.h"

-/*Define if conflicts are enabled or not.*/
+/*Define task types required for GPU */
+#define type_load -100
+#define type_unload -101

+
+/*Define if conflicts are enabled or not.*/
 //#define GPU_locks

 /*Declare variables required for the queue*/
@@ -29,9 +42,9 @@ __constant__ int cuda_nrqueues;
 __constant__ int cuda_queue_size;

 __device__ struct task *tasks_cuda;
-__device__ int *locks_cuda;
-__device__ int *uses_cuda;
-__device__ int *deps_cuda;
+__device__ qsched_res_t *locks_cuda;
+__device__ qsched_res_t *uses_cuda;
+__device__ qsched_task_t *deps_cuda;
 __device__ struct res *res_cuda;

 /**
@@ -149,30 +162,151 @@ __device__ int runner_cuda_gettask ( struct queue_cuda *q ) {



-int qsched_prepare_cuda ( struct qsched *s , ) {
+
+void qsched_prepare_loads ( struct qsched *s ) {
+
+int i, task, unload, tb, j, k, unlocked=0;
+struct task *t;
+/* Create a load task for each resource. */
+for(i = 0; i < s->count_res; i++)
+{   
+    
+    if(s->res[i].size == 0)
+        continue;
+
+
+
+
+    task = qsched_addtask( s , type_load , 0 , &i , sizeof(int) , 0 );
+    unload = qsched_addtask( s , type_unload, 0 , &i, sizeof(int), 0 );
+    /*Load task unlocks each task that uses or locks the specified resource */
+    for(j = 0; j < s->count; j++)
+    {
+          t = &s->tasks[j];
+        
+        for(k = 0; k < t->nr_uses; k++)
+        {
+            if(t->uses[k] == i){
+                qsched_addunlock( s , task, j );
+                qsched_addunlock( s , j , unload);
+                unlocked = 1;
+                break;
+            }
+        }
+        if(unlocked == 1)
+        {
+            unlocked = 0;
+            continue;
+        }
+        for(k = 0; k < t->nr_locks; k++)
+        {
+            if(t->locks[k] == i){
+                qsched_addunlock( s , task , j );
+                qsched_addunlock( s , j , unload);  
+                break;  
+            }
+
+        }
+    }
+    
+    
+}
+
+
+
+
+
+}
+
+
+
+
+int qsched_prepare_cuda ( struct qsched *s ) {
+
+int i;
+struct task *cuda_t;
+qsched_res_t *setup;
+qsched_task_t *setup_t;
+struct res *res_t;
+int *data;
+
+/*Setup Load and unload tasks*/
+
+
+
+
+
+
+
+
+
+
+

 /*Copy the qsched data to the device*/
-if( cudaMalloc( &tasks_cuda , sizeof(struct task) * s->count != cudaSuccess )
+if( cudaMalloc( &cuda_t , sizeof(struct task) * s->count ) != cudaSuccess )
    error("Failed to allocate task array on the device.");

-if( cudaMemcpy(  )
+if( cudaMemcpy( cuda_t, s->tasks, sizeof(struct task) * s->count , cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy tasks to the device.");
+if( cudaMemcpyToSymbol ( cuda_t , tasks_cuda, sizeof(struct task *) ,  cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy task pointer to the device.");

-if( cudaMalloc( &locks_cuda , sizeof(int) * s->count_lock != cudaSuccess )
+if( cudaMalloc( &setup , sizeof(qsched_res_t) * s->count_locks ) != cudaSuccess )
    error("Failed to allocate locks array on the device.");
+if( cudaMemcpy( &setup , s->locks , sizeof(qsched_res_t) * s->count_locks, cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy locks to the device.");
+if( cudaMemcpyToSymbol ( setup , locks_cuda, sizeof(qsched_res_t *),  cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy locks pointer to the device.");

-if( cudaMalloc( &deps_cuda , sizeof(int) * s->count_deps != cudaSuccess )
+if( cudaMalloc( &setup_t , sizeof(qsched_task_t) * s->count_deps ) != cudaSuccess )
    error("Failed to allocate deps array on the device.");
+if( cudaMemcpy( &setup_t , s->deps , sizeof(qsched_task_t) * s->count_deps, cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy deps to the device.");
+if( cudaMemcpyToSymbol ( &setup_t , deps_cuda, sizeof(qsched_task_t *) ,  cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy deps pointer to the device.");

-if( cudaMalloc( &uses_cuda , sizeof(int) * s->count_uses != cudaSuccess )
+if( cudaMalloc( &uses_cuda , sizeof(qsched_res_t) * s->count_uses ) != cudaSuccess )
    error("Failed to allocate use array on the device.");
+if( cudaMemcpy( &setup , s->uses , sizeof(qsched_res_t) * s->count_uses, cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy locks to the device.");
+if( cudaMemcpyToSymbol ( setup , uses_cuda, sizeof(qsched_res_t *),  cudaMemcpyHostToDevice ) != cudaSuccess )
+    error("Failed to copy locks pointer to the device.");

-if( cudaMalloc( &res_cuda , sizeof( res) * s->count_res != cudaSuccess )
+if( cudaMalloc( &res_t , sizeof(struct res) * s->count_res ) != cudaSuccess )
    error("Failed to allocated on the device.");

+for(i = 0; i < s->count_res; i++)
+{   
+    if(s->res[i].size == 0)
+        continue;
+    if( cudaMalloc( &data, sizeof(int) * s->res[i].size) != cudaSuccess )
+        error("Failed to allocate data space on the device.");
+    s->res[i].gpu_data = data;
+}
+
+
+if( cudaMemcpy( &res_t , s->res , sizeof(struct res) * s->count_res , cudaMemcpyHostToDevice) != cudaSuccess )
+    error("Failed to copy resources to the device.");
+if( cudaMemcpyToSymbol( res_t , res_cuda , sizeof(struct res *) * s->count_res , cudaMemcpyHostToDevice) != cudaSuccess )
+    error("Failed to copy res pointer to the device.");
+
+
+
+
+
+
+
+
+
+
+
+
+
 /* Initialize the queues. */
 int nr_queues= 2, k, qsize;
-int *data;
-struct queue_cuda queues[ cuda_maxqueues ];
+//int *data;
+struct queue_cuda queues[ cuda_numqueues ];




--- a/src/cycle.h
+++ b/src/cycle.h
@@ -78,6 +78,10 @@

 /***************************************************************************/

+#ifdef WITH_CUDA
+#define INLINE inline
+#endif
+
 #if TIME_WITH_SYS_TIME
 # include <sys/time.h>
 # include <time.h>
@@ -526,3 +530,6 @@ INLINE_ELAPSED(inline)
 #endif
 #endif /* HAVE_MIPS_ZBUS_TIMER */

+#ifdef WITH_CUDA
+#undef INLINE
+#endif
--- a/src/lock.h
+++ b/src/lock.h
@@ -35,16 +35,16 @@
    #define lock_type volatile int
    #define lock_init( l ) ( *l = 0 )
    #define lock_destroy( l ) 0
-    INLINE static int lock_lock ( volatile int *l ) {
-        while( atomicCAS( l , 0 , 1 ) != 0 );
+    __device__ inline static int lock_lock ( volatile int *l ) {
+        while( atomicCAS( (int *)l , 0 , 1 ) != 0 );
        return 0;
        }
-    INLINE static int lock_trylock ( volatile int *l ) {
-        int res = atomicCAS( l , 0 , 1 );
+    __device__ inline static int lock_trylock ( volatile int *l ) {
+        int res = atomicCAS( (int *)l , 0 , 1 );
        return res;
    }
-    INLINE static int lock_unlock ( volatile int *l ) {
-        int res = atomicCAS ( l , 1 , 0 ) != 1;
+    __device__ inline static int lock_unlock ( volatile int *l ) {
+        int res = atomicCAS ( (int *)l , 1 , 0 ) != 1;
        return res;
    }
    #define lock_unlock_blind( l ) atomicCAS( &l , 1 , 0 )

--- a/src/qsched.c
+++ b/src/qsched.c
@@ -884,6 +884,10 @@ void qsched_prepare ( struct qsched *s ) {

    /* Lock the sched. */
    lock_lock( &s->lock );
+
+    #ifdef WITH_CUDA
+    qsched_prepare_loads( s );
+    #endif
    
    /* Get a pointer to the tasks, set the count. */
    tasks = s->tasks;

--- a/src/qsched.h
+++ b/src/qsched.h
@@ -163,8 +163,8 @@ struct qsched {

 /* Function prototypes. */
 /* Internal functions. */
-void qsched_sort ( int *restrict data , int *restrict ind , int N , int min , int max );
-void qsched_sort_rec ( int *restrict data , int *restrict ind , int N , int min , int max );
+void qsched_sort ( int *data , int *ind , int N , int min , int max );
+void qsched_sort_rec ( int *data , int *ind , int N , int min , int max );
 struct task *qsched_gettask ( struct qsched *s , int qid );
 void qsched_done ( struct qsched *s , struct task *t );
 void *qsched_getdata( struct qsched *s , struct task *t );

--- a/src/task.h
+++ b/src/task.h
@@ -22,7 +22,6 @@
 #define task_flag_skip                  1
 #define task_flag_virtual               2

-
 /* The task data structure. */
 struct task {