From 97f9eded9232dfa92fe8afdb0fdb31141f00f7bd Mon Sep 17 00:00:00 2001 From: d74ksy <aidan.chalk@durham.ac.uk> Date: Thu, 21 Dec 2017 15:43:39 +0000 Subject: [PATCH] Implementation (currently not compiled) of reduction in load/unload tasks --- src/CUDA/queue_cuda.h | 2 ++ src/CUDA/runner_cuda_main.cu | 33 ++++++++++++++++++++++++++------- src/CUDA/task_cuda.h | 5 +++++ src/task.h | 2 ++ 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/CUDA/queue_cuda.h b/src/CUDA/queue_cuda.h index 99a7a0dd35..6e4a19a00f 100644 --- a/src/CUDA/queue_cuda.h +++ b/src/CUDA/queue_cuda.h @@ -34,6 +34,8 @@ extern "C" { #define type_unload task_type_unload #define type_implicit_load task_type_implicit_load #define type_implicit_unload task_type_implicit_unload +#define type_recv_load task_type_recv_load +#define type_send_unload task_type_send_unload const int num_gpu_types = 5; const int gpu_work_task_array[num_gpu_types] = {task_type_self, task_type_pair, task_type_sub_self, diff --git a/src/CUDA/runner_cuda_main.cu b/src/CUDA/runner_cuda_main.cu index 9c0095ae79..772f61d3d2 100644 --- a/src/CUDA/runner_cuda_main.cu +++ b/src/CUDA/runner_cuda_main.cu @@ -475,10 +475,10 @@ __device__ void doself_density(struct cell_cuda *ci) { /* Is the cell active? */ if (!cuda_cell_is_active(ci)) { if(threadIdx.x ==0) - printf( +/* printf( "Cell isn't active..., ti_end_min=%lli, ti_current=%lli, " "max_active_bin=%i, cell_id = %lli\n", - ci->ti_end_min, ti_current, max_active_bin, (ci-cells_cuda)); + ci->ti_end_min, ti_current, max_active_bin, (ci-cells_cuda));*/ return; } @@ -2229,6 +2229,9 @@ __host__ void create_transfer_tasks(struct cell *c, int *k, tasks_host[*k].skip = 0; tasks_host[*k].implicit = 0; tasks_host[*k].task = NULL; +#ifdef REDUCED_TRANSFER + tasks_host[*k].cell = c; +#endif /* The load implicit tasks unlocks the parent's task */ if (parent_load_task >= 0) { tasks_host[*k].unlocks[tasks_host[*k].nr_unlock_tasks++] = @@ -2254,7 +2257,9 @@ __host__ void create_transfer_tasks(struct cell *c, int *k, tasks_host[*k].skip = 0; tasks_host[*k].implicit = 0; tasks_host[*k].task = NULL; - +#ifdef REDUCED_TRANSFER + tasks_host[*k].cell = c; +#endif /* The unload implicit task is unlocked by the parent task */ if (parent_unload_task >= 0) { tasks_host[parent_unload_task] @@ -2284,6 +2289,9 @@ __host__ void create_transfer_tasks(struct cell *c, int *k, tasks_host[*k].skip = 0; tasks_host[*k].implicit = 0; tasks_host[*k].task = NULL; +#ifdef REDUCED_TRANSFER + tasks_host[*k].cell = c; +#endif /* This load task unlocks the parent's task. */ if (parent_load_task >= 0) { tasks_host[*k].unlocks[tasks_host[*k].nr_unlock_tasks++] = @@ -2305,6 +2313,9 @@ __host__ void create_transfer_tasks(struct cell *c, int *k, tasks_host[*k].skip = 0; tasks_host[*k].implicit = 0; tasks_host[*k].task = NULL; +#ifdef REDUCED_TRANSFER + tasks_host[*k].cell = c; +#endif /* The unload task is unlocked by the parent task */ if (parent_unload_task >= 0) { tasks_host[parent_unload_task] @@ -2488,7 +2499,7 @@ __host__ void update_tasks(struct engine *e) { /* Relies on assumption implicit unloads are always before unloads in host_tasks, which i believe to be true by conscruction. for(int i = 0; i < nr_gpu_tasks; i++){ - if(host_tasks[i].type == type_unload && host_tasks[i].type == type_implicit_unload){ + if(host_tasks[i].type == type_unload || host_tasks[i].type == type_implicit_unload){ if(host_tasks[i].wait==1){ host_tasks[i].skip = 1; task_count--; @@ -2496,16 +2507,24 @@ __host__ void update_tasks(struct engine *e) { int *unlocks = host_unlock_copy + (temp_t->unlocks-host_unlock_pointer); for(int ii = 0; ii < temp_t->nr_unlock_tasks; ii++){ if(!host_tasks[unlocks[ii].skip) - host_tasks[unlock[ii]].wait--; + host_tasks[unlocks[ii]].wait--; } *Find the corresponding load task* - Have to search the cells for this at the moment. - + struct task_cuda *l_task = &host_tasks[host_tasks[i].cell->load_task]; + l_tasks->skip = 1; + task_count--; + int *unlocks = host_unlock_copy + (l_task->unlocks-host_unlock_pointer); + for(int ii=0; ii < l_task->nr_unlock_tasks; ii++){ + if(!host_tasks[unlocks[ii].skip) + host_tasks[unlocks[ii]].wait--; + } } } }*/ + /* TODO Reset the waits again.*/ + cudaErrCheck(cudaMemcpyToSymbol(tot_num_tasks, &task_count, sizeof(int))); /* Reset the queue data.*/ int qsize; diff --git a/src/CUDA/task_cuda.h b/src/CUDA/task_cuda.h index d1583a344e..ae70941274 100644 --- a/src/CUDA/task_cuda.h +++ b/src/CUDA/task_cuda.h @@ -46,6 +46,11 @@ struct task_cuda { /* Size of unlock array during initialisation. */ int size_unlocks; +#ifdef REDUCED_TRANSFER + /* Pointer to the loaded/unloaded cell for load/unload tasks*/ + struct cell *cell; +#endif + #ifdef CUDA_TASK_TIMERS /* Executing block*/ int blockID; diff --git a/src/task.h b/src/task.h index 13fcc5cb36..c6a163bb9a 100644 --- a/src/task.h +++ b/src/task.h @@ -43,6 +43,8 @@ enum task_types { task_type_unload = -102, task_type_implicit_load = -103, task_type_implicit_unload = -104, + task_type_recv_load = -201, + task_type_send_unload = -202, task_type_none = 0, task_type_sort, task_type_self, -- GitLab