Skip to content
Snippets Groups Projects
Commit cdb7c197 authored by Aidan Chalk's avatar Aidan Chalk
Browse files

Code now automatically generated load and unload tasks for the device

parent 90449ae1
No related branches found
No related tags found
No related merge requests found
......@@ -16,11 +16,24 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
/* Config parameters. */
#include "../config.h"
/* Standard includes. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cuda_queue.h"
#include "quicksched.h"
#include "res.h"
/*Define if conflicts are enabled or not.*/
/*Define task types required for GPU */
#define type_load -100
#define type_unload -101
/*Define if conflicts are enabled or not.*/
//#define GPU_locks
/*Declare variables required for the queue*/
......@@ -29,9 +42,9 @@ __constant__ int cuda_nrqueues;
__constant__ int cuda_queue_size;
__device__ struct task *tasks_cuda;
__device__ int *locks_cuda;
__device__ int *uses_cuda;
__device__ int *deps_cuda;
__device__ qsched_res_t *locks_cuda;
__device__ qsched_res_t *uses_cuda;
__device__ qsched_task_t *deps_cuda;
__device__ struct res *res_cuda;
/**
......@@ -149,30 +162,151 @@ __device__ int runner_cuda_gettask ( struct queue_cuda *q ) {
int qsched_prepare_cuda ( struct qsched *s , ) {
void qsched_prepare_loads ( struct qsched *s ) {
int i, task, unload, tb, j, k, unlocked=0;
struct task *t;
/* Create a load task for each resource. */
for(i = 0; i < s->count_res; i++)
{
if(s->res[i].size == 0)
continue;
task = qsched_addtask( s , type_load , 0 , &i , sizeof(int) , 0 );
unload = qsched_addtask( s , type_unload, 0 , &i, sizeof(int), 0 );
/*Load task unlocks each task that uses or locks the specified resource */
for(j = 0; j < s->count; j++)
{
t = &s->tasks[j];
for(k = 0; k < t->nr_uses; k++)
{
if(t->uses[k] == i){
qsched_addunlock( s , task, j );
qsched_addunlock( s , j , unload);
unlocked = 1;
break;
}
}
if(unlocked == 1)
{
unlocked = 0;
continue;
}
for(k = 0; k < t->nr_locks; k++)
{
if(t->locks[k] == i){
qsched_addunlock( s , task , j );
qsched_addunlock( s , j , unload);
break;
}
}
}
}
}
int qsched_prepare_cuda ( struct qsched *s ) {
int i;
struct task *cuda_t;
qsched_res_t *setup;
qsched_task_t *setup_t;
struct res *res_t;
int *data;
/*Setup Load and unload tasks*/
/*Copy the qsched data to the device*/
if( cudaMalloc( &tasks_cuda , sizeof(struct task) * s->count != cudaSuccess )
if( cudaMalloc( &cuda_t , sizeof(struct task) * s->count ) != cudaSuccess )
error("Failed to allocate task array on the device.");
if( cudaMemcpy( )
if( cudaMemcpy( cuda_t, s->tasks, sizeof(struct task) * s->count , cudaMemcpyHostToDevice ) != cudaSuccess )
error("Failed to copy tasks to the device.");
if( cudaMemcpyToSymbol ( cuda_t , tasks_cuda, sizeof(struct task *) , cudaMemcpyHostToDevice ) != cudaSuccess )
error("Failed to copy task pointer to the device.");
if( cudaMalloc( &locks_cuda , sizeof(int) * s->count_lock != cudaSuccess )
if( cudaMalloc( &setup , sizeof(qsched_res_t) * s->count_locks ) != cudaSuccess )
error("Failed to allocate locks array on the device.");
if( cudaMemcpy( &setup , s->locks , sizeof(qsched_res_t) * s->count_locks, cudaMemcpyHostToDevice ) != cudaSuccess )
error("Failed to copy locks to the device.");
if( cudaMemcpyToSymbol ( setup , locks_cuda, sizeof(qsched_res_t *), cudaMemcpyHostToDevice ) != cudaSuccess )
error("Failed to copy locks pointer to the device.");
if( cudaMalloc( &deps_cuda , sizeof(int) * s->count_deps != cudaSuccess )
if( cudaMalloc( &setup_t , sizeof(qsched_task_t) * s->count_deps ) != cudaSuccess )
error("Failed to allocate deps array on the device.");
if( cudaMemcpy( &setup_t , s->deps , sizeof(qsched_task_t) * s->count_deps, cudaMemcpyHostToDevice ) != cudaSuccess )
error("Failed to copy deps to the device.");
if( cudaMemcpyToSymbol ( &setup_t , deps_cuda, sizeof(qsched_task_t *) , cudaMemcpyHostToDevice ) != cudaSuccess )
error("Failed to copy deps pointer to the device.");
if( cudaMalloc( &uses_cuda , sizeof(int) * s->count_uses != cudaSuccess )
if( cudaMalloc( &uses_cuda , sizeof(qsched_res_t) * s->count_uses ) != cudaSuccess )
error("Failed to allocate use array on the device.");
if( cudaMemcpy( &setup , s->uses , sizeof(qsched_res_t) * s->count_uses, cudaMemcpyHostToDevice ) != cudaSuccess )
error("Failed to copy locks to the device.");
if( cudaMemcpyToSymbol ( setup , uses_cuda, sizeof(qsched_res_t *), cudaMemcpyHostToDevice ) != cudaSuccess )
error("Failed to copy locks pointer to the device.");
if( cudaMalloc( &res_cuda , sizeof( res) * s->count_res != cudaSuccess )
if( cudaMalloc( &res_t , sizeof(struct res) * s->count_res ) != cudaSuccess )
error("Failed to allocated on the device.");
for(i = 0; i < s->count_res; i++)
{
if(s->res[i].size == 0)
continue;
if( cudaMalloc( &data, sizeof(int) * s->res[i].size) != cudaSuccess )
error("Failed to allocate data space on the device.");
s->res[i].gpu_data = data;
}
if( cudaMemcpy( &res_t , s->res , sizeof(struct res) * s->count_res , cudaMemcpyHostToDevice) != cudaSuccess )
error("Failed to copy resources to the device.");
if( cudaMemcpyToSymbol( res_t , res_cuda , sizeof(struct res *) * s->count_res , cudaMemcpyHostToDevice) != cudaSuccess )
error("Failed to copy res pointer to the device.");
/* Initialize the queues. */
int nr_queues= 2, k, qsize;
int *data;
struct queue_cuda queues[ cuda_maxqueues ];
//int *data;
struct queue_cuda queues[ cuda_numqueues ];
......
......@@ -78,6 +78,10 @@
/***************************************************************************/
#ifdef WITH_CUDA
#define INLINE inline
#endif
#if TIME_WITH_SYS_TIME
# include <sys/time.h>
# include <time.h>
......@@ -526,3 +530,6 @@ INLINE_ELAPSED(inline)
#endif
#endif /* HAVE_MIPS_ZBUS_TIMER */
#ifdef WITH_CUDA
#undef INLINE
#endif
......@@ -35,16 +35,16 @@
#define lock_type volatile int
#define lock_init( l ) ( *l = 0 )
#define lock_destroy( l ) 0
INLINE static int lock_lock ( volatile int *l ) {
while( atomicCAS( l , 0 , 1 ) != 0 );
__device__ inline static int lock_lock ( volatile int *l ) {
while( atomicCAS( (int *)l , 0 , 1 ) != 0 );
return 0;
}
INLINE static int lock_trylock ( volatile int *l ) {
int res = atomicCAS( l , 0 , 1 );
__device__ inline static int lock_trylock ( volatile int *l ) {
int res = atomicCAS( (int *)l , 0 , 1 );
return res;
}
INLINE static int lock_unlock ( volatile int *l ) {
int res = atomicCAS ( l , 1 , 0 ) != 1;
__device__ inline static int lock_unlock ( volatile int *l ) {
int res = atomicCAS ( (int *)l , 1 , 0 ) != 1;
return res;
}
#define lock_unlock_blind( l ) atomicCAS( &l , 1 , 0 )
......
......@@ -884,6 +884,10 @@ void qsched_prepare ( struct qsched *s ) {
/* Lock the sched. */
lock_lock( &s->lock );
#ifdef WITH_CUDA
qsched_prepare_loads( s );
#endif
/* Get a pointer to the tasks, set the count. */
tasks = s->tasks;
......
......@@ -163,8 +163,8 @@ struct qsched {
/* Function prototypes. */
/* Internal functions. */
void qsched_sort ( int *restrict data , int *restrict ind , int N , int min , int max );
void qsched_sort_rec ( int *restrict data , int *restrict ind , int N , int min , int max );
void qsched_sort ( int *data , int *ind , int N , int min , int max );
void qsched_sort_rec ( int *data , int *ind , int N , int min , int max );
struct task *qsched_gettask ( struct qsched *s , int qid );
void qsched_done ( struct qsched *s , struct task *t );
void *qsched_getdata( struct qsched *s , struct task *t );
......
......@@ -22,7 +22,6 @@
#define task_flag_skip 1
#define task_flag_virtual 2
/* The task data structure. */
struct task {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment