diff --git a/examples/test_bh_2.cu b/examples/test_bh_2.cu new file mode 100644 index 0000000000000000000000000000000000000000..534b9b38fa3ff18c2b1e1f33617585622a28fbb1 --- /dev/null +++ b/examples/test_bh_2.cu @@ -0,0 +1,483 @@ +/******************************************************************************* + * This file is part of QuickSched. + * Coypright (c) 2014 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + * Aidan Chalk (aidan.chalk@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * +* *****************************************************************************/ + + +/* Config parameters. */ +#include "../config.h" + +/* Standard includes. */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <math.h> +#include <float.h> +#include <limits.h> +#include <omp.h> +#include <fenv.h> + +/* Local includes. */ +#include "quicksched.h" + + +struct cell{ + +double2 loc_xy; +double loc_z; +double h; +int count; +unsigned short int split, sorted; +int parts, firstchild, sibling; +int res, com_tid; + +}__attribute__((aligned(64))); + +/* Requred variables to obtain cells. */ +#define CELL_STRETCH 2 +#define INITIAL_CELLS 256 +struct *cell cell_pool=NULL; +int used_cells=0; +int num_cells = INITIAL_CELLS; +int cell_size = INITIAL_CELLS*sizeof(struct cell); + +/* Device locations for the particle values. */ +__device__ double2 *parts_pos_xy; +__device__ double *parts_pos_z; +__device__ float4 *parts_a_m; +__device__ double2 *com_xy; +__device__ double *com_z; +__device__ float com_mass; + + +/* Host locations for the particle values. */ +double2 *parts_pos_xy_host; +double *parts_pos_z_host; +float4 *parts_a_m_host; +double2 *com_xy_host; +double *com_z_host; +float com_mass_host; + + +/** + * @brief Checks whether the cells are direct neighbours ot not. Both cells have + * to be of the same size + */ +static inline int are_neighbours(struct cell *ci, struct cell *cj) { + + int k; + float dx[3]; + +#ifdef SANITY_CHECKS + if (ci->h != cj->h) + error(" Cells of different size in distance calculation."); +#endif + + /* Maximum allowed distance */ + float min_dist = ci->h; + + /* (Manhattan) Distance between the cells */ + double2 loc1=ci->loc_xy, loc2=cj->loc_xy; + float center_i = loc1.x; + float center_j = loc2.x; + dx[0] = fabs(center_i - center_j); + center_i = loc1.y; + center_j = loc2.y; + dx[1] = fabs(center_i - center_j); + center_i = ci->loc_z; + center_J = cj->loc_z; + dx[2] = fabs(center_i - center_j); + + return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist); +} + + +struct *cell cell_get() +{ + struct *cell res; + + if(used_cells >= num_cells) + { + /* Stretch */ + struct *cell new_pool; + cell_size *= CELL_STRETCH; + new_pool = (struct *cell) calloc(cell_size); + if(cell_pool != NULL) + memcpy(new_pool, cell_pool, num_cells*sizeof(struct cell)); + + + + double2 *temp = (*double2) calloc(num_cells*sizeof(double2)); + memcpy(temp, com_xy_host, sizeof(double2)*num_cells); + free(com_xy_host); + com_xy_host = temp; + double temp2 = (*double) calloc(num_cells*sizeof(double)); + memcpy(temp2, com_z_host, num_cells*sizeof(double)); + free(com_z_host); + com_z_host = temp2; + float temp3 = (*float) calloc(num_cells*sizeof(float)); + memcpy(temp3, com_mass_host, num_cells*sizeof(float)); + free(com_mass_host); + com_mass_host = temp3; + + num_cells *= CELL_STRETCH; + free(cell_pool); + cell_pool = new_pool; + } + used_cells++; + cell_pool[used_cells-1].sibling = -1; + cell_pool[used_cells-1].firstchild = -1; + cell_poo[used_cells-1].res = qsched_res_none; + return &cell_pool[used_cells-1]; +} + +void comp_com(struct cell *c){ + + int k, count = c->count; + int cpi; + struct cell *cp; + int parts = c->parts; + double com[3] = {0.0, 0.0, 0.0}, mass = 0.0; + + if(c->split) { + for(cp = &cell_pool[(cpi = c->firstchild)]; cp != &cell_pool[c->sibling]; &cell_pool[(cpi = cp->sibling)]) { + float cp_mass = com_mass_host[cpi]; + com[0] += com_xy_host[cpi].x * cp_mass; + com[1] += com_xy_host[cpi].y * cp_mass; + com[2] += com_z_host[cpi] * cp_mass; + mass += cp_mass; + } + + + /* Otherwise collect the multiple from the particles */ + } else { + + for(k = parts; k < parts+count; k++) + { + float p_mass = parts_a_m_host[k].w; + com[0] += parts_pos_xy_host[k].x * p_mass; + com[1] += parts_pos_xy_host[k].y * p_mass; + com[2] += parts_pos_z_host[k] * p_mass; + mass += p_mass; + } + } + + + k = c - cell_pool; + /* Store the COM data, if it was collected. */ + if(mass > 0.0) { + float imass = 1.0f/mass; + com_xy_host[k].x = com[0] * imass; + com_xy_host[k].y = com[1] * imass; + com_z_host[k] = com[2] * imass; + com_mass_host[k] = mass; + }else + { + com_xy_host[k].x = 0.0; + com_xy_host[k].y = 0.0; + com_z_host[k] = 0.0; + com_mass_host[k] = 0.0f; + } + + + +} + +/** + * @brief Sort the parts into eight bins along the given pivots and + * fill the multipoles. Also adds the hierarchical resources + * to the sched (TODO). + * + * @param c The #cell to be split. + * @param N The total number of parts. + * @param s The #sched to store the resources. + */ +void cell_split(struct cell *c, struct qsched *s) { + int i, j, k, kk, count = c->count; + int parts = c->parts; + double2 temp; + double temp1; + float4 temp2; + struct cell *cp; + int left[8], right[8]; + double pivot[3]; + static struct cell *root = NULL; + struct cell *progenitors[8]; + + /* Set the root cell. */ + if (root == NULL) { + root = c; + c->sibling = 0; + } + + if(c->res == qsched_res_none) + error("Cell has no resource"); + + if(c->count > cell_maxparts ) + { + c->split = 1; + + for(k = 0; k < 8; k++) + { + progenitors[k] = cp = cell_get(); + cp->loc_xy = c->loc_xy; + cp->loc_z = c->loc_z; + cp->h = c->h*0.5; + if(k & 4) cp->loc_xy.x += cp->h; + if(k & 2) cp->loc_xy.y += cp->h; + if(k & 1) cp->loc_z += cp->h; + } + + /* Init the pivots.*/ + pivot[0] = c->loc_xy.x + c->h * 0.5; + pivot[1] = c->loc_xy.y + c->h * 0.5; + pivot[2] = c->loc_z + c->h * 0.5; + + /* Split along the x axis. */ + i = parts; + j = parts+count-1; + while(i < j) + { + while(i <= parts+count-1 && parts_pos_xy_host[i].x < pivot[0]) i += 1; + while(j >= parts && parts_pos_xy_host[j].x >= pivot[0]) j -= 1; + if(i < j){ + temp = parts_pos_xy_host[i]; + temp1 = parts_pos_z_host[i]; + temp2 = parts_a_m_host[i]; + parts_pos_xy_host[i] = parts_pos_xy_host[j]; + parts_pos_z_host[i] = parts_pos_z_host[j]; + parts_a_m_host[i] = parts_a_m_host[j]; + parts_pos_xy_host[j] = temp; + parts_pos_z_host[j] = temp1; + parts_a_m_host[j] = temp2; + } + } + left[1] = i; + right[1] parts+count-1; + left[0] = parts; + right[0] = j; + + + /* Split along the y axis twice. */ + for (k = 1; k >= 0; k--) { + i = left[k]; + j = right[k]; + while(i <= j){ + while(i <= right[k] && parts_pos_xy_host[i].y < pivot[1]) i += 1; + while(j >= left[k] && parts_pos_xy_host[j].y >= pivot[1]) j -= 1; + if(i < j) + { + temp = parts_pos_xy_host[i]; + temp1 = parts_pos_z_host[i]; + temp2 = parts_a_m_host[i]; + parts_pos_xy_host[i] = parts_pos_xy_host[j]; + parts_pos_z_host[i] = parts_pos_z_host[j]; + parts_a_m_host[i] = parts_a_m_host[j]; + parts_pos_xy_host[j] = temp; + parts_pos_z_host[j] = temp1; + parts_a_m_host[j] = temp2; + } + } + left[2*k+1] = i; + right[2*k+1] = right[k]; + left[2*k] = left[k]; + right[2*k] = j; + } + + /* Split along the z axis four times.*/ + for(k = 3; k >=0; k--) + { + i = left[k]; + j = right[k]; + while(i <= j){ + while(i <= right[k] && parts_pos_z_host[i] < pivot[2]) i += 1; + while(i >= left[k] && parts_post_z_host[i] >= pivot[2]) j -= 1; + if(i < j) + { + temp = parts_pos_xy_host[i]; + temp1 = parts_pos_z_host[i]; + temp2 = parts_a_m_host[i]; + parts_pos_xy_host[i] = parts_pos_xy_host[j]; + parts_pos_z_host[i] = parts_pos_z_host[j]; + parts_a_m_host[i] = parts_a_m_host[j]; + parts_pos_xy_host[j] = temp; + parts_pos_z_host[j] = temp1; + parts_a_m_host[j] = temp2; + } + } + left[2 * k + 1] = i; + right[2 * k + 1] = right[k]; + left[2 * k] = left[k]; + right[2 * k] = j; + } + + /* Store the counts and offsets. */ + for(k = 0; k < 8; k++) + { + progenitors[k]->count = right[k]-left[k]+1; + progenitors[k]->parts = left[k]; + //TODO ADD RESOURCES HERE. + } + + /* Find the first non-empty progenitor */ + for(k = 0; k < 8; k++) + { + if(progenitors[k]->count > 0) + { + c->firstchild = &progenitors[k]-cell_pool; + break; + } + } + + #ifdef SANITY_CHECKS + if(c->firstchild == -1) + error("Cell has been split but all children have 0 parts"); + #endif + + /*Prepare the pointers*/ + for(k = 0; k < 8; k++) + { + /* Find the next non-empty sibling */ + for(kk = k+1; kk < 8; ++kk){ + if(progenitors[kk]->count > 0){ + progenitors[k]->sibling = &progenitors[kk]-cell_pool; + break; + } + } + + /* No non-empty sibling, go back a level.*/ + if(kk == 8) progenitors[k]->sibling = c->sibling; + + } + + /* Recurse */ + for(k = 0; k < 8; k++) + if(progenitors[k]->count > 0) cell_split(progenitors[k], s); + + /* Otherwise we're at a leaf so we need to make the cell's particle-cell task. */ + } else { + + struct cell *data[2] = {root, c}; + int tid = qsched_addtask(s, task_type_self_pc, task_flag_none, data, + 2 * sizeof(struct cell *), 1); + qsched_addlock(s, tid, c->res); + //TODO Create task. + //TODO Deal with multiple resources. + } + +#ifndef COM_AS_TASK + comp_com(c); +#endif +} + +/** + * @brief Create the tasks for the cell pair/self. + * + * @param s The #sched in which to create the tasks. + * @param ci The first #cell. + * @param cj The second #cell. + */ +void create_tasks(struct qsched *s, struct cell *ci, struct cell *cj){ + + qsched_task_t tid; + int *data[2]; + struct cell /**data[2],*/ *cp, *cps; + int cpi; + + + if(cj == NULL) + { + if(ci->split) + { + for(cp = &cell_pool[ci->firstchild]; cp != &cell_pool[ci->sibling]; cp = &cell_pool[cp->sibling]) + { + //Self Interaction. + create_tasks(s, cp, NULL); + + for(cps = &cell_pool[cp->sibling]; cps != &cell_pool[ci->sibling]; cps = &cell_pool[cps->sibling]) + create_tasks(s, cp, cps); + } + } + /* Self task */ + else{ + data[0] = ci - cell_pool; + data[1] = -1; + + tid = qsched_addtask(s, task_type_self, task_flag_none, data, sizeof(int)*2, ci->count*ci->count/2); + qsched_addlock(s, tid, ci->res); + } + } + /* Else its a pair!*/ + else{ + if(are_neighbours(ci,cj){/* Cells are neighbours */ + /*Are both split? */ + if(ci->split && cj->split) + { + /* Recurse over both cells. */ + for(cp = &cell_pool[ci->firstchild]; cp != &cell_pool[ci->sibling]; cp = &cell_pool[cp->sibling]) + for(cps = &cell_pool[cj->firstchild]; cps != &cell_pool[cj->sibling]; cps = &cell_pool[cps->sibling]) + create_tasks(s, cp, cps); + + /* Otherwise, at least one of the cells is not split, build a direct + * interaction. */ + }else{ + data[0] = ci-cell_pool; + data[1] = cj-cell_pool; + + /* Create the task. */ + tid = qsched_addtask(s, task_type_pair, task_flag_none, data, + sizeof(struct cell *) * 2, ci->count * cj->count); + + /* Add the resources. */ + qsched_addlock(s, tid, ci->res); + qsched_addlock(s, tid, cj->res); + } + } + + } + + + +} + +/** + * @brief Set up and run a task-based Barnes-Hutt N-body solver. + * + * @param N The number of random particles to use. + * @param runs Number of force evaluations to use as a benchmark. + * @param fileName Input file name. If @c NULL or an empty string, random + * particle positions will be used. + */ +void test_bh(int N, int runs, char *fileName) { + int i, k; + struct cell *root; + struct part *parts; + FILE *file; + struct qsched s; + ticks tic, toc_run, tot_setup = 0, tot_run = 0; + int countMultipoles = 0, countPairs = 0, countCoMs = 0; + + + /* Initialize the scheduler. */ + qsched_init(&s, 1, qsched_flag_none); + + //Create host particle arrays. + if( cudaMallocHost(&parts_pos_xy_host, sizeof(double2) * N) != cudaSuccess) + error("Failed to allocated parts array"); +} diff --git a/examples/test_gpu_simple.cu b/examples/test_gpu_simple.cu new file mode 100644 index 0000000000000000000000000000000000000000..066b53bd670f98e5ce6708f60ff83901d970c67f --- /dev/null +++ b/examples/test_gpu_simple.cu @@ -0,0 +1,234 @@ +/******************************************************************************* + * This file is part of QuickSched. + * Coypright (c) 2013 Aidan Chalk (aidan.chalk@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +/* Standard includes. */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <math.h> +#include <omp.h> + +/* Local includes. */ +extern "C"{ +#include "quicksched.h" +} +#include "cuda_queue.h" + +#define type_square 1 +#define type_double 2 +#define type_quarter 3 + +#define M 100000 + +#define TID threadIdx.x + +__device__ float *cuda_data; + + + +__device__ void square(float *data) +{ + int i; + for( i = TID; i < 1000; i += blockDim.x ) + data[i] = data[i] * data[i]; + +} + +__device__ void doubles(float *data) +{ + int i; + for( i = TID; i < 1000; i += blockDim.x ) + data[i] = 2.0f * data[i]; +} + + +__device__ void quarter(float *data) +{ + int i; + for( i = TID; i < 1000; i += blockDim.x ) + data[i] = data[i] * 0.25f; +} + +__global__ void Manual(float *src) +{ + int i; + int datas = blockIdx.x; + cuda_data[datas*1000+threadIdx.x] = src[datas*1000+threadIdx.x]; + + square(&cuda_data[datas*1000]); + doubles(&cuda_data[datas*1000]); + quarter(&cuda_data[datas*1000]); + + src[datas*1000+threadIdx.x] = cuda_data[datas*1000+threadIdx.x]; +} + + +__device__ __noinline__ void gpuTest(int type , void *data) +{ + int datas = *(int*)data; + + if(type == type_square) + { + square(&cuda_data[datas*1000]); + }else if( type == type_double ) + { + doubles(&cuda_data[datas*1000]); + }else if( type == type_quarter ) + { + quarter(&cuda_data[datas*1000]); + } +} + +__device__ qsched_funtype function = gpuTest; + +__global__ void Setup() +{ + printf("%i\n", function); +} + + + + + + + +int main ( int argc , char *argv[] ) { + float *array, *cuda_array, *cuda_array2, *device_array; + int i, k=0; + qsched_funtype func; + struct qsched s; + qsched_task_t *tid; + qsched_res_t *rid; + ticks tic, toc_run, tot_setup, tot_run = 0; + qsched_init( &s , 1 , qsched_flag_none ); + cudaDeviceReset(); + cudaSetDevice(0); + Setup<<<1,1>>>(); + if(cudaDeviceSynchronize() != cudaSuccess) + error("Setup Failed: %s", cudaGetErrorString(cudaPeekAtLastError())); + + if( cudaMalloc(&device_array , M*sizeof(float) ) != cudaSuccess ) + error("Failed to allocate device array: %s", cudaGetErrorString(cudaPeekAtLastError())); + if( cudaMemcpyToSymbol( cuda_data , &device_array,sizeof(float*), 0 , cudaMemcpyHostToDevice) != cudaSuccess ) + error("Failed to copy array pointer to device: %s", cudaGetErrorString(cudaPeekAtLastError())); + /* Allocate and fill the original array*/ + if( cudaHostAlloc( &array , M*sizeof(float) , cudaHostAllocMapped) != cudaSuccess) + error("Failed to allocate array"); + for( i = 0; i < M ; i++ ){ + array[i] = (float)i; +} + + // if(cudaHostGetDevicePointer(&cuda_array , &array[k*1000] , 0) != cudaSuccess) + //error("Failed to get device pointer for data: %s", cudaGetErrorString(cudaPeekAtLastError())); + +// if ( cudaMallocHost(&array, M*sizeof(float)) != cudaSuccess ) + // error("Failed to allocate array"); + + /* Initialize the scheduler. */ + + + /* Allocate and init the task ID and resource ID matrix. */ + if( cudaMallocHost(&tid , sizeof(qsched_task_t) * 3 * M/1000 ) != cudaSuccess ) + error("Failed to allocate tid"); + if( cudaMallocHost(&rid , sizeof(qsched_task_t) * M/1000) != cudaSuccess) + error("Failed to allocate rid"); + + + for ( k = 0 ; k < M / 1000 ; k++ ) { + tid[k] = qsched_task_none; + if(cudaHostGetDevicePointer(&cuda_array , &array[k*1000] , 0) != cudaSuccess) + error("Failed to get device pointer for data %i: %s",k, cudaGetErrorString(cudaPeekAtLastError())); + rid[k] = qsched_addres( &s , qsched_owner_none , qsched_res_none , cuda_array, sizeof(float) * 1000, &device_array[k*1000]); + } + for ( k = M/1000 ; k < 3*M / 1000 ; k++ ){ + tid[k] = qsched_task_none; + } + + + + /*Build the tasks*/ + for( k = 0; k < M/1000; k++){ + tid[3*k] = qsched_addtask(&s , type_square , task_flag_none , &k , sizeof(int) , 0 ); + qsched_adduse(&s , tid[3*k] , rid[k] ); + tid[3*k+1] = qsched_addtask(&s , type_double , task_flag_none , &k , sizeof(int) , 0 ); + qsched_adduse( &s , tid[3*k+1] , rid[k] ); + qsched_addunlock( &s , tid[3*k] , tid[3*k+1] ); + tid[3*k+2] = qsched_addtask(&s , type_quarter , task_flag_none , &k , sizeof(int) , 0 ); + qsched_adduse( &s , tid[3*k+2] , rid[k] ); + qsched_addunlock( &s , tid[3*k+1] , tid[3*k+2] ); + } + + if( cudaMemcpyFromSymbol( &func , function , sizeof(qsched_funtype) ) != cudaSuccess) + error("Failed to copy function pointer from device"); + + + tic = getticks(); + qsched_run_CUDA( &s , func ); + toc_run = getticks(); + double itpms = 1000.0 / CPU_TPS; + message( "qsched_run_CUDA took %.3f ms..." , ((double)(toc_run - tic)) * itpms ); + tot_run += toc_run - tic; + + for(i = 0; i < M; i++ ) + if(array[i] != ((float)i)*((float)i)*0.5f) + printf("%i wrong, %.3f != %.3f\n", i, array[i], ((float)i)*((float)i)*0.5f ); +printf("Starting second run\n"); + qsched_run_CUDA( &s , func ); +printf("Second run complete\n"); + + struct task *completed_tasks = qsched_get_timers( &s, s.count ); +/* tic = getticks(); + qsched_run_CUDA( &s , func ); + toc_run = getticks(); + message( "qsched_run_CUDA took %lli ticks..." , toc_run - tic ); + tot_run += toc_run - tic;*/ + + + +// if( cudaFreeHost( array) != cudaSuccess) + // error("Failed to free array"); + +cudaDeviceReset(); + + if( cudaHostAlloc( &array , M*sizeof(float) , cudaHostAllocMapped) != cudaSuccess) + error("Failed to allocate array"); + for( i = 0; i < M ; i++ ) + array[i] = i; + + if(cudaHostGetDevicePointer(&cuda_array , array , 0) != cudaSuccess) + error("Failed to get device pointer for data: %s",cudaGetErrorString(cudaPeekAtLastError())); + if( cudaMalloc(&device_array , M*sizeof(float) ) != cudaSuccess ) + error("Failed to allocate device array: %s", cudaGetErrorString(cudaPeekAtLastError())); + /*if( cudaMemcpy( &device_array , array , M*sizeof(float), cudaMemcpyHostToDevice ) != cudaSuccess ) + error("Failed to copy device array: %s", cudaGetErrorString(cudaPeekAtLastError()));*/ + if( cudaMemcpyToSymbol( cuda_data , &device_array,sizeof(float*), 0 , cudaMemcpyHostToDevice) != cudaSuccess ) + error("Failed to copy array pointer to device: %s", cudaGetErrorString(cudaPeekAtLastError())); + tic = getticks(); + Manual<<<100, 1000>>>(cuda_array); + cudaDeviceSynchronize(); + toc_run = getticks(); + message( "Manual run took %.3f ms" , ((double)(toc_run - tic)) * itpms ); + + for(i = 0; i < M; i++ ) + if(array[i] != ((float)i)*((float)i)*0.5f) + printf("%i wrong, %.3f != %.3f\n", i, array[i], ((float)i)*((float)i)*0.5f ); + + //printf("%.3f\n", array[2]); + +} diff --git a/examples/test_hierarchy.cu b/examples/test_hierarchy.cu new file mode 100644 index 0000000000000000000000000000000000000000..bd911ec48f47904273506b743bb4c057c21e3c0b --- /dev/null +++ b/examples/test_hierarchy.cu @@ -0,0 +1,79 @@ +/* Config parameters. */ +#include "../config.h" + +/* Standard includes. */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <math.h> + +/* Local includes. */ +extern "C"{ +#include "quicksched.h" +} +#include "cuda_queue.h" + +#define size 6 + +__device__ int device_array[size]; + +__device__ void runner ( int type , void *data ) { + + printf("Hi\n"); + +} + +__device__ qsched_funtype function = runner; + +__global__ void Setup() +{ + printf("%i\n", function); +} + +qsched_funtype func; +int main ( int argc , char *argv[] ) { +int *data; +int *res_data; +int res1; +int res2; +int res11; +int res12; +int res21; +int res22; +int task_id; +qsched s; + + cudaDeviceReset(); + cudaSetDevice(0); + Setup<<<1,1>>>(); + if(cudaDeviceSynchronize() != cudaSuccess) + error("Setup Failed: %s", cudaGetErrorString(cudaPeekAtLastError())); + + qsched_init( &s , 1 , qsched_flag_none ); + data = (int*) malloc(sizeof(int)); + res_data = (int*) malloc(sizeof(int) * size); + res2 = qsched_addres(&s , qsched_owner_none , qsched_res_none , &res_data[3], sizeof(int)*3, NULL); + res1 = qsched_addres(&s , qsched_owner_none , qsched_res_none , &res_data[0], sizeof(int)*3, NULL); + res11 = qsched_addres( &s, qsched_owner_none, res1, &res_data[2], sizeof(int), NULL); + res21 = qsched_addres( &s, qsched_owner_none, res2, &res_data[4], sizeof(int), NULL); + res12 = qsched_addres( &s, qsched_owner_none, res1, &res_data[1], sizeof(int), NULL); + res22 = qsched_addres( &s, qsched_owner_none, res2, &res_data[5], sizeof(int), NULL); + + task_id = qsched_addtask(&s, 1, task_flag_none, data, sizeof(int), 1); + qsched_addlock(&s, task_id, res1); + qsched_addlock(&s, task_id, res11); + qsched_addlock(&s, task_id, res12); + task_id = qsched_addtask(&s, 2, task_flag_none, data, sizeof(int), 1); + qsched_addlock(&s, task_id, res2); + qsched_addlock(&s, task_id, res21); + qsched_addlock(&s, task_id, res22); + if( cudaMemcpyFromSymbol( &func , function , sizeof(qsched_funtype) ) != cudaSuccess) + error("Failed to copy function pointer from device"); + + qsched_run_CUDA( &s , func ); + + free(data); + free(res_data); + +} diff --git a/src/CUDACompile.sh b/src/CUDACompile.sh new file mode 100755 index 0000000000000000000000000000000000000000..df36d95939ff236fe45ef30ec5a0afa26bb8a7c2 --- /dev/null +++ b/src/CUDACompile.sh @@ -0,0 +1,38 @@ +#!/bin/bash +FLAGS2="-Xcompiler=-fsanitize=address -Xcompiler=-fno-omit-frame-pointer" +DEBUG_FLAGS="-G -DDEBUG_GPU" +FLAGS="-O3 -g -DCPU_TPS=3.1e9 -lineinfo -src-in-ptx -Xptxas -dlcm=cg --maxrregcount=32 -gencode arch=compute_30,code=sm_30 -ftz=true -fmad=true -DFPTYPE_SINGLE -lgomp -DWITH_CUDA -DTIMERS -ccbin=/usr/bin/gcc-4.8" +# -DGPU_locks -Xptxas -dlcm=cg -Xptxas="-v"" +# -DNO_LOADS + +CFLAGS="-fsanitize=address -fno-omit-frame-pointer" + +gcc-4.8 -std=gnu99 -DHAVE_CONFIG_H -I. -I.. -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize -funroll-loops -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -fopenmp -DTIMERS -DWITH_CUDA -g -O2 -march=native -pthread -MT qsched.lo -MD -MP -MF .deps/qsched.Tpo -c qsched.c -o qsched.o -lprofiler +gcc-4.8 -std=gnu99 -DHAVE_CONFIG_H -I. -I.. -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize -funroll-loops -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -fopenmp -DTIMERS -DWITH_CUDA -g -O2 -march=native -pthread -MT qsched.lo -MD -MP -MF .deps/qsched.Tpo -c queue.c -o queue.o -lprofiler + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -dc cuda_queue.cu qsched.o queue.o -lprofiler + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -lib cuda_queue.o qsched.o queue.o -o .libs/libquicksched_cuda.a -lprofiler + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -DGPU_locks -dc cuda_queue.cu qsched.o queue.o -lprofiler + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -lib cuda_queue.o qsched.o queue.o -o .libs/libquicksched_cuda_locks.a -lprofiler + +cd ../examples + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -dc -m64 -I../src -dc -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -lcudart -lcuda test_gpu_simple.cu -lprofiler + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -m64 -I../src -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -Xnvlink -v test_gpu_simple.o ../src/.libs/libquicksched_cuda.a -o simple_cuda_test -lprofiler + + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -dc -m64 -I../src -dc -lf77blas -lcblas -latlas -lm -L/home/aidan/ATLAS/ATLAS_linux/lib/ -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -lcudart -lcuda /usr/lib64/atlas/libcblas.a /usr/lib64/atlas/libptcblas.a test_qr.cu -lprofiler + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -m64 -I../src -lf77blas -lcblas -latlas -lm -L/home/aidan/ATLAS/ATLAS_linux/lib/ -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -Xnvlink -v test_qr.o ../src/.libs/libquicksched_cuda.a /usr/lib64/atlas/libcblas.a /usr/lib64/atlas/libptcblas.a -o qr_cuda_test -lprofiler + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -dc -m64 -I../src -dc -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -lcudart -lcuda test_hierarchy.cu -lprofiler + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -m64 -I../src -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -Xnvlink -v test_hierarchy.o ../src/.libs/libquicksched_cuda.a -o test_heirarchy -lprofiler + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -dc -m64 -I../src -dc -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -lcudart -lcuda test_bh.cu -lprofiler + +/home/aidan/cuda_6.0/bin/nvcc $FLAGS -m64 -I../src -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -Xnvlink -v test_bh.o ../src/.libs/libquicksched_cuda.a -o test_heirarchy -lprofiler diff --git a/src/Makefile.in b/src/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..4a3e029bb2bceef56d7fcdaf5167ff8d03c78f52 --- /dev/null +++ b/src/Makefile.in @@ -0,0 +1,659 @@ +# Makefile.in generated by automake 1.11.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This file is part of Quickqsched. +# Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk), +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ + +# Build a CUDA-enabled version too? +@HAVE_CUDA_TRUE@am__append_1 = libquicksched_cuda.la +subdir = src +DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_check_compile_flag.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_ext.m4 \ + $(top_srcdir)/m4/ax_func_posix_memalign.m4 \ + $(top_srcdir)/m4/ax_gcc_archflag.m4 \ + $(top_srcdir)/m4/ax_gcc_x86_cpuid.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 \ + $(top_srcdir)/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.in +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libquicksched_la_LIBADD = +am_libquicksched_la_OBJECTS = qsched.lo queue.lo +libquicksched_la_OBJECTS = $(am_libquicksched_la_OBJECTS) +libquicksched_cuda_la_LIBADD = +am__libquicksched_cuda_la_SOURCES_DIST = qsched.c queue.c \ + cuda_queue.cu +@HAVE_CUDA_TRUE@am__objects_1 = libquicksched_cuda_la-qsched.lo \ +@HAVE_CUDA_TRUE@ libquicksched_cuda_la-queue.lo cuda_queue.lo +@HAVE_CUDA_TRUE@am_libquicksched_cuda_la_OBJECTS = $(am__objects_1) +libquicksched_cuda_la_OBJECTS = $(am_libquicksched_cuda_la_OBJECTS) +libquicksched_cuda_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +@HAVE_CUDA_TRUE@am_libquicksched_cuda_la_rpath = -rpath $(libdir) +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(libquicksched_la_SOURCES) $(libquicksched_cuda_la_SOURCES) +DIST_SOURCES = $(libquicksched_la_SOURCES) \ + $(am__libquicksched_cuda_la_SOURCES_DIST) +HEADERS = $(include_HEADERS) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CUDA_CFLAGS = @CUDA_CFLAGS@ +CUDA_LIBS = @CUDA_LIBS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NVCC = @NVCC@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PRTDIAG = @PRTDIAG@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMD_FLAGS = @SIMD_FLAGS@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +# Automake stuff +AUTOMAKE_OPTIONS = gnu + +# Add the debug flag to the whole thing +AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \ + -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) -DTIMERS \ + #-fsanitize=address -fno-omit-frame-pointer + + +# Assign a "safe" version number +AM_LDFLAGS = -version-info 0:0:0 + +# Build the libquicksched library +lib_LTLIBRARIES = libquicksched.la $(am__append_1) +libquicksched_la_SOURCES = qsched.c queue.c + +# List required headers +include_HEADERS = atomic.h lock.h queue.h qsched.h task.h res.h error.h qsched.h +@HAVE_CUDA_FALSE@SOURCES_CUDA = + +# CUDA sources +@HAVE_CUDA_TRUE@SOURCES_CUDA = qsched.c queue.c cuda_queue.cu +@HAVE_CUDA_TRUE@CUDA_MYFLAGS = -O3 -g -DCPU_TPS=3.1e9 -Xnvlink -rdc=true -lineinfo -src-in-ptx --maxrregcount=32 -Xptxas="-v" -Xptxas -dlcm=cg -gencode arch=compute_30,code=sm_30 -ftz=true -fmad=true -DFPTYPE_SINGLE -DWITH_CUDA #-fsanitize=address -fno-omit-frame-pointer +@HAVE_CUDA_TRUE@libquicksched_cuda_la_SOURCES = $(SOURCES_CUDA) +@HAVE_CUDA_TRUE@libquicksched_cuda_la_CFLAGS = -DFPTYPE_SINGLE $(AM_CFLAGS) -DWITH_CUDA $(CUDA_CFLAGS) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cu .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +libquicksched.la: $(libquicksched_la_OBJECTS) $(libquicksched_la_DEPENDENCIES) + $(LINK) -rpath $(libdir) $(libquicksched_la_OBJECTS) $(libquicksched_la_LIBADD) $(LIBS) +libquicksched_cuda.la: $(libquicksched_cuda_la_OBJECTS) $(libquicksched_cuda_la_DEPENDENCIES) + $(libquicksched_cuda_la_LINK) $(am_libquicksched_cuda_la_rpath) $(libquicksched_cuda_la_OBJECTS) $(libquicksched_cuda_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libquicksched_cuda_la-qsched.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libquicksched_cuda_la-queue.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/qsched.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/queue.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +libquicksched_cuda_la-qsched.lo: qsched.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -MT libquicksched_cuda_la-qsched.lo -MD -MP -MF $(DEPDIR)/libquicksched_cuda_la-qsched.Tpo -c -o libquicksched_cuda_la-qsched.lo `test -f 'qsched.c' || echo '$(srcdir)/'`qsched.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libquicksched_cuda_la-qsched.Tpo $(DEPDIR)/libquicksched_cuda_la-qsched.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='qsched.c' object='libquicksched_cuda_la-qsched.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -c -o libquicksched_cuda_la-qsched.lo `test -f 'qsched.c' || echo '$(srcdir)/'`qsched.c + +libquicksched_cuda_la-queue.lo: queue.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -MT libquicksched_cuda_la-queue.lo -MD -MP -MF $(DEPDIR)/libquicksched_cuda_la-queue.Tpo -c -o libquicksched_cuda_la-queue.lo `test -f 'queue.c' || echo '$(srcdir)/'`queue.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libquicksched_cuda_la-queue.Tpo $(DEPDIR)/libquicksched_cuda_la-queue.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='queue.c' object='libquicksched_cuda_la-queue.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -c -o libquicksched_cuda_la-queue.lo `test -f 'queue.c' || echo '$(srcdir)/'`queue.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + test -n "$$files" || exit 0; \ + echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(includedir)" && rm -f $$files + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-includeHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-includeHEADERS install-info \ + install-info-am install-libLTLIBRARIES install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-includeHEADERS \ + uninstall-libLTLIBRARIES + +@HAVE_CUDA_TRUE@.cu: qsched.c queue.c +@HAVE_CUDA_TRUE@.cu.o: +@HAVE_CUDA_TRUE@ $(NVCC) -c $(NVCCFLAGS) $(CUDA_CFLAGS) $(CUDA_MYFLAGS) $< -o $@ +@HAVE_CUDA_TRUE@.cu.lo: +@HAVE_CUDA_TRUE@ $(top_srcdir)/cudalt.py $@ $(NVCC) -c $(NVCCFLAGS) $(CUDA_CFLAGS) $(CUDA_MYFLAGS) $< + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: