diff --git a/examples/test_bh_2.cu b/examples/test_bh_2.cu
new file mode 100644
index 0000000000000000000000000000000000000000..534b9b38fa3ff18c2b1e1f33617585622a28fbb1
--- /dev/null
+++ b/examples/test_bh_2.cu
@@ -0,0 +1,483 @@
+/*******************************************************************************
+ * This file is part of QuickSched.
+ * Coypright (c) 2014 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
+ *                    Aidan Chalk (aidan.chalk@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+* *****************************************************************************/
+
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Standard includes. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+#include <omp.h>
+#include <fenv.h>
+
+/* Local includes. */
+#include "quicksched.h"
+
+
+struct cell{
+
+double2 loc_xy;
+double loc_z;
+double h;
+int count;
+unsigned short int split, sorted;
+int parts, firstchild, sibling;
+int res, com_tid;
+
+}__attribute__((aligned(64)));
+
+/* Requred variables to obtain cells. */
+#define CELL_STRETCH 2
+#define INITIAL_CELLS 256
+struct *cell cell_pool=NULL;
+int used_cells=0;
+int num_cells = INITIAL_CELLS;
+int cell_size = INITIAL_CELLS*sizeof(struct cell);
+
+/* Device locations for the particle values. */
+__device__ double2 *parts_pos_xy;
+__device__ double *parts_pos_z;
+__device__ float4 *parts_a_m;
+__device__ double2 *com_xy;
+__device__ double *com_z;
+__device__ float com_mass;
+
+
+/* Host locations for the particle values. */
+double2 *parts_pos_xy_host;
+double *parts_pos_z_host;
+float4 *parts_a_m_host;
+double2 *com_xy_host;
+double *com_z_host;
+float com_mass_host;
+
+
+/**
+ * @brief Checks whether the cells are direct neighbours ot not. Both cells have
+ * to be of the same size
+ */
+static inline int are_neighbours(struct cell *ci, struct cell *cj) {
+
+  int k;
+  float dx[3];
+
+#ifdef SANITY_CHECKS
+  if (ci->h != cj->h)
+    error(" Cells of different size in distance calculation.");
+#endif
+
+  /* Maximum allowed distance */
+  float min_dist = ci->h;
+
+  /* (Manhattan) Distance between the cells */
+    double2 loc1=ci->loc_xy, loc2=cj->loc_xy;
+    float center_i = loc1.x;
+    float center_j = loc2.x;
+    dx[0] = fabs(center_i - center_j);
+    center_i = loc1.y;
+    center_j = loc2.y;
+    dx[1] = fabs(center_i - center_j);
+    center_i = ci->loc_z;
+    center_J = cj->loc_z;
+    dx[2] = fabs(center_i - center_j);
+
+  return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist);
+}
+
+
+struct *cell cell_get()
+{
+    struct *cell res;
+    
+    if(used_cells >= num_cells)
+    {
+        /* Stretch */
+        struct *cell new_pool;
+        cell_size *= CELL_STRETCH;
+        new_pool = (struct *cell) calloc(cell_size);
+        if(cell_pool != NULL)
+        memcpy(new_pool, cell_pool, num_cells*sizeof(struct cell));
+
+
+        
+        double2 *temp = (*double2) calloc(num_cells*sizeof(double2));
+        memcpy(temp, com_xy_host, sizeof(double2)*num_cells);
+        free(com_xy_host);
+        com_xy_host = temp;
+        double temp2 = (*double) calloc(num_cells*sizeof(double));
+        memcpy(temp2, com_z_host, num_cells*sizeof(double));
+        free(com_z_host);
+        com_z_host = temp2;
+        float temp3 = (*float) calloc(num_cells*sizeof(float));
+        memcpy(temp3, com_mass_host, num_cells*sizeof(float));
+        free(com_mass_host);
+        com_mass_host = temp3;
+
+        num_cells *= CELL_STRETCH;
+        free(cell_pool);
+        cell_pool = new_pool;
+    }
+    used_cells++;
+    cell_pool[used_cells-1].sibling = -1;
+    cell_pool[used_cells-1].firstchild = -1;
+    cell_poo[used_cells-1].res = qsched_res_none;
+    return &cell_pool[used_cells-1];
+}
+
+void comp_com(struct cell *c){
+
+    int k, count = c->count;
+    int cpi;
+    struct cell *cp;
+    int parts = c->parts;
+    double com[3] = {0.0, 0.0, 0.0}, mass = 0.0;
+
+    if(c->split) {
+        for(cp = &cell_pool[(cpi = c->firstchild)]; cp != &cell_pool[c->sibling]; &cell_pool[(cpi = cp->sibling)]) {
+            float cp_mass = com_mass_host[cpi];
+            com[0] += com_xy_host[cpi].x * cp_mass;
+            com[1] += com_xy_host[cpi].y * cp_mass;
+            com[2] += com_z_host[cpi] * cp_mass;
+            mass += cp_mass;
+        }
+
+
+     /* Otherwise collect the multiple from the particles */
+    } else {
+
+        for(k = parts; k < parts+count; k++)
+        {
+            float p_mass = parts_a_m_host[k].w;
+            com[0] += parts_pos_xy_host[k].x * p_mass;
+            com[1] += parts_pos_xy_host[k].y * p_mass;
+            com[2] += parts_pos_z_host[k] * p_mass;
+            mass += p_mass;
+        }
+    }
+
+
+    k = c - cell_pool;
+    /* Store the COM data, if it was collected. */
+    if(mass > 0.0) {
+        float imass = 1.0f/mass;
+        com_xy_host[k].x = com[0] * imass;
+        com_xy_host[k].y = com[1] * imass;
+        com_z_host[k] = com[2] * imass;
+        com_mass_host[k] = mass;
+    }else
+    {
+        com_xy_host[k].x = 0.0;
+        com_xy_host[k].y = 0.0;
+        com_z_host[k] = 0.0;
+        com_mass_host[k] = 0.0f;
+    }
+
+
+
+}
+
+/**
+ * @brief Sort the parts into eight bins along the given pivots and
+ *        fill the multipoles. Also adds the hierarchical resources
+ *        to the sched (TODO).
+ *
+ * @param c The #cell to be split.
+ * @param N The total number of parts.
+ * @param s The #sched to store the resources.
+ */
+void cell_split(struct cell *c, struct qsched *s) {
+    int i, j, k, kk, count = c->count;
+    int parts = c->parts;
+    double2 temp;
+    double temp1;
+    float4 temp2;
+    struct cell *cp;
+    int left[8], right[8];
+    double pivot[3];
+    static struct cell *root = NULL;
+    struct cell *progenitors[8];
+
+    /* Set the root cell. */
+    if (root == NULL) {
+        root = c;
+        c->sibling = 0;
+    }
+
+    if(c->res == qsched_res_none)
+        error("Cell has no resource");
+
+    if(c->count > cell_maxparts )
+    {
+        c->split = 1;
+
+        for(k = 0; k < 8; k++)
+        {
+            progenitors[k] = cp = cell_get();
+            cp->loc_xy = c->loc_xy;
+            cp->loc_z = c->loc_z;
+            cp->h = c->h*0.5;
+            if(k & 4) cp->loc_xy.x += cp->h;
+            if(k & 2) cp->loc_xy.y += cp->h;
+            if(k & 1) cp->loc_z += cp->h;
+        }
+
+        /* Init the pivots.*/
+        pivot[0] = c->loc_xy.x + c->h * 0.5;
+        pivot[1] = c->loc_xy.y + c->h * 0.5;
+        pivot[2] = c->loc_z + c->h * 0.5;
+
+        /* Split along the x axis. */
+        i = parts;
+        j = parts+count-1;
+        while(i < j)
+        {
+            while(i <= parts+count-1 && parts_pos_xy_host[i].x < pivot[0]) i += 1;
+            while(j >= parts && parts_pos_xy_host[j].x >= pivot[0]) j -= 1;
+            if(i < j){
+                temp = parts_pos_xy_host[i];
+                temp1 = parts_pos_z_host[i];
+                temp2 = parts_a_m_host[i];
+                parts_pos_xy_host[i] = parts_pos_xy_host[j];
+                parts_pos_z_host[i] = parts_pos_z_host[j];
+                parts_a_m_host[i] = parts_a_m_host[j];
+                parts_pos_xy_host[j] = temp;
+                parts_pos_z_host[j] = temp1;
+                parts_a_m_host[j] = temp2;
+            }
+        }
+        left[1] = i;
+        right[1] parts+count-1;
+        left[0] = parts;
+        right[0] = j;
+        
+
+        /* Split along the y axis twice. */
+        for (k = 1; k >= 0; k--) {
+            i = left[k];
+            j = right[k];
+            while(i <= j){
+                while(i <= right[k] && parts_pos_xy_host[i].y < pivot[1]) i += 1;
+                while(j >= left[k] && parts_pos_xy_host[j].y >= pivot[1]) j -= 1;
+                if(i < j)
+                {
+                    temp = parts_pos_xy_host[i];
+                    temp1 = parts_pos_z_host[i];
+                    temp2 = parts_a_m_host[i];
+                    parts_pos_xy_host[i] = parts_pos_xy_host[j];
+                    parts_pos_z_host[i] = parts_pos_z_host[j];
+                    parts_a_m_host[i] = parts_a_m_host[j];
+                    parts_pos_xy_host[j] = temp;
+                    parts_pos_z_host[j] = temp1;
+                    parts_a_m_host[j] = temp2;
+                }
+            }
+            left[2*k+1] = i;
+            right[2*k+1] = right[k];
+            left[2*k] = left[k];
+            right[2*k] = j;
+        }
+        
+        /* Split along the z axis four times.*/
+        for(k = 3; k >=0; k--)
+        {
+            i = left[k];
+            j = right[k];
+            while(i <= j){
+                while(i <= right[k] && parts_pos_z_host[i] < pivot[2]) i += 1;
+                while(i >= left[k] && parts_post_z_host[i] >= pivot[2]) j -= 1;
+                if(i < j)
+                {
+                    temp = parts_pos_xy_host[i];
+                    temp1 = parts_pos_z_host[i];
+                    temp2 = parts_a_m_host[i];
+                    parts_pos_xy_host[i] = parts_pos_xy_host[j];
+                    parts_pos_z_host[i] = parts_pos_z_host[j];
+                    parts_a_m_host[i] = parts_a_m_host[j];
+                    parts_pos_xy_host[j] = temp;
+                    parts_pos_z_host[j] = temp1;
+                    parts_a_m_host[j] = temp2;
+                }
+            }
+            left[2 * k + 1] = i;
+            right[2 * k + 1] = right[k];
+            left[2 * k] = left[k];
+            right[2 * k] = j;
+        }
+        
+        /* Store the counts and offsets. */
+        for(k = 0; k < 8; k++)
+        {
+            progenitors[k]->count = right[k]-left[k]+1;
+            progenitors[k]->parts = left[k];
+            //TODO ADD RESOURCES HERE.
+        }
+
+        /* Find the first non-empty progenitor */
+        for(k = 0; k < 8; k++)
+        {
+            if(progenitors[k]->count > 0)
+            {
+                c->firstchild = &progenitors[k]-cell_pool;
+                break;
+            }
+        }
+
+        #ifdef SANITY_CHECKS
+            if(c->firstchild == -1)
+                error("Cell has been split but all children have 0 parts");
+        #endif
+
+        /*Prepare the pointers*/
+        for(k = 0; k < 8; k++)
+        {
+            /* Find the next non-empty sibling */
+            for(kk = k+1; kk < 8; ++kk){
+                if(progenitors[kk]->count > 0){
+                    progenitors[k]->sibling = &progenitors[kk]-cell_pool;
+                    break;
+                }
+            }
+
+            /* No non-empty sibling, go back a level.*/
+            if(kk == 8) progenitors[k]->sibling = c->sibling;
+
+        }
+
+        /* Recurse */
+        for(k = 0; k < 8; k++)
+            if(progenitors[k]->count > 0) cell_split(progenitors[k], s);
+     
+    /* Otherwise we're at a leaf so we need to make the cell's particle-cell task. */   
+    } else {
+
+    struct cell *data[2] = {root, c};
+        int tid = qsched_addtask(s, task_type_self_pc, task_flag_none, data,
+                                 2 * sizeof(struct cell *), 1);
+        qsched_addlock(s, tid, c->res);
+        //TODO Create task.
+        //TODO Deal with multiple resources.
+    }
+
+#ifndef COM_AS_TASK
+    comp_com(c);
+#endif
+}
+
+/**
+ * @brief Create the tasks for the cell pair/self.
+ *
+ * @param s The #sched in which to create the tasks.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+void create_tasks(struct qsched *s, struct cell *ci, struct cell *cj){
+    
+    qsched_task_t tid;
+    int *data[2];
+    struct cell /**data[2],*/ *cp, *cps;
+    int cpi;
+    
+    
+    if(cj == NULL)
+    {
+        if(ci->split)
+        {
+            for(cp = &cell_pool[ci->firstchild]; cp != &cell_pool[ci->sibling]; cp = &cell_pool[cp->sibling])
+            {
+                //Self Interaction.
+                create_tasks(s, cp, NULL);
+                
+                for(cps = &cell_pool[cp->sibling]; cps != &cell_pool[ci->sibling]; cps = &cell_pool[cps->sibling])
+                    create_tasks(s, cp, cps);
+            }
+        }
+        /* Self task */
+        else{
+            data[0] = ci - cell_pool;
+            data[1] = -1;
+
+            tid = qsched_addtask(s, task_type_self, task_flag_none, data, sizeof(int)*2, ci->count*ci->count/2);
+            qsched_addlock(s, tid, ci->res);
+        }
+    }
+    /* Else its a pair!*/
+    else{
+        if(are_neighbours(ci,cj){/* Cells are neighbours */
+            /*Are both split? */
+            if(ci->split && cj->split)
+            {
+                /* Recurse over both cells. */
+                for(cp = &cell_pool[ci->firstchild]; cp != &cell_pool[ci->sibling]; cp = &cell_pool[cp->sibling])
+                    for(cps = &cell_pool[cj->firstchild]; cps != &cell_pool[cj->sibling]; cps = &cell_pool[cps->sibling])
+                        create_tasks(s, cp, cps);
+
+            /* Otherwise, at least one of the cells is not split, build a direct
+             * interaction. */
+            }else{
+                data[0] = ci-cell_pool;
+                data[1] = cj-cell_pool;
+                
+                /* Create the task. */
+                tid = qsched_addtask(s, task_type_pair, task_flag_none, data,
+                                     sizeof(struct cell *) * 2, ci->count * cj->count);
+
+                /* Add the resources. */
+                qsched_addlock(s, tid, ci->res);
+                qsched_addlock(s, tid, cj->res);
+            }
+        }
+
+    }
+
+
+
+}
+
+/**
+ * @brief Set up and run a task-based Barnes-Hutt N-body solver.
+ *
+ * @param N The number of random particles to use.
+ * @param runs Number of force evaluations to use as a benchmark.
+ * @param fileName Input file name. If @c NULL or an empty string, random
+ *        particle positions will be used.
+ */
+void test_bh(int N, int runs, char *fileName) {
+  int i, k;
+  struct cell *root;
+  struct part *parts;
+  FILE *file;
+  struct qsched s;
+  ticks tic, toc_run, tot_setup = 0, tot_run = 0;
+  int countMultipoles = 0, countPairs = 0, countCoMs = 0;
+
+
+  /* Initialize the scheduler. */
+  qsched_init(&s, 1, qsched_flag_none);
+
+    //Create host particle arrays.
+    if( cudaMallocHost(&parts_pos_xy_host, sizeof(double2) * N) != cudaSuccess)
+        error("Failed to allocated parts array");
+}
diff --git a/examples/test_gpu_simple.cu b/examples/test_gpu_simple.cu
new file mode 100644
index 0000000000000000000000000000000000000000..066b53bd670f98e5ce6708f60ff83901d970c67f
--- /dev/null
+++ b/examples/test_gpu_simple.cu
@@ -0,0 +1,234 @@
+/*******************************************************************************
+ * This file is part of QuickSched.
+ * Coypright (c) 2013 Aidan Chalk (aidan.chalk@durham.ac.uk)
+ * 
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * 
+ ******************************************************************************/
+/* Standard includes. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+#include <omp.h>
+
+/* Local includes. */
+extern "C"{
+#include "quicksched.h"
+}
+#include "cuda_queue.h"
+
+#define type_square 1
+#define type_double 2
+#define type_quarter 3
+
+#define M 100000
+
+#define TID threadIdx.x
+
+__device__ float *cuda_data;
+
+
+
+__device__ void square(float *data)
+{
+    int i;
+    for( i = TID; i < 1000; i += blockDim.x )
+        data[i] = data[i] * data[i];
+
+}
+
+__device__ void doubles(float *data)
+{
+    int i;
+    for( i = TID; i < 1000; i += blockDim.x )
+        data[i] = 2.0f * data[i];
+}
+
+
+__device__ void quarter(float *data)
+{
+    int i;
+    for( i = TID; i < 1000; i += blockDim.x )
+        data[i] = data[i] * 0.25f;
+}
+
+__global__ void Manual(float *src)
+{
+    int i;
+    int datas = blockIdx.x;
+    cuda_data[datas*1000+threadIdx.x] = src[datas*1000+threadIdx.x];
+    
+    square(&cuda_data[datas*1000]);
+    doubles(&cuda_data[datas*1000]);
+    quarter(&cuda_data[datas*1000]);
+
+    src[datas*1000+threadIdx.x] = cuda_data[datas*1000+threadIdx.x];
+}
+
+
+__device__ __noinline__ void gpuTest(int type , void *data)
+{
+    int datas = *(int*)data;
+
+    if(type == type_square)
+    {
+        square(&cuda_data[datas*1000]);
+    }else if( type == type_double )
+    {
+        doubles(&cuda_data[datas*1000]);
+    }else if( type == type_quarter )
+    {
+        quarter(&cuda_data[datas*1000]);
+    }
+}
+
+__device__ qsched_funtype function = gpuTest;
+
+__global__ void Setup()
+{
+    printf("%i\n", function);
+}
+
+
+
+
+
+
+
+int main ( int argc , char *argv[] ) {
+    float *array, *cuda_array, *cuda_array2, *device_array;
+    int i, k=0;
+    qsched_funtype func;
+    struct qsched s;
+    qsched_task_t *tid;
+    qsched_res_t *rid;
+    ticks tic, toc_run, tot_setup, tot_run = 0;
+    qsched_init( &s , 1 , qsched_flag_none );
+    cudaDeviceReset();
+    cudaSetDevice(0);
+    Setup<<<1,1>>>();
+    if(cudaDeviceSynchronize() != cudaSuccess)
+    error("Setup Failed: %s", cudaGetErrorString(cudaPeekAtLastError()));
+    
+    if( cudaMalloc(&device_array , M*sizeof(float) ) != cudaSuccess )
+        error("Failed to allocate device array: %s", cudaGetErrorString(cudaPeekAtLastError()));
+    if( cudaMemcpyToSymbol( cuda_data , &device_array,sizeof(float*), 0 ,  cudaMemcpyHostToDevice) != cudaSuccess )
+        error("Failed to copy array pointer to device: %s", cudaGetErrorString(cudaPeekAtLastError()));
+    /* Allocate and fill the original array*/
+    if( cudaHostAlloc( &array , M*sizeof(float) , cudaHostAllocMapped) != cudaSuccess)
+        error("Failed to allocate array");
+        for( i = 0; i < M ; i++ ){
+        array[i] = (float)i;
+}
+
+       // if(cudaHostGetDevicePointer(&cuda_array , &array[k*1000] , 0) != cudaSuccess)
+        //error("Failed to get device pointer for data: %s", cudaGetErrorString(cudaPeekAtLastError()));
+
+//    if ( cudaMallocHost(&array, M*sizeof(float)) != cudaSuccess )
+  //      error("Failed to allocate array");
+
+ /* Initialize the scheduler. */
+
+
+   /* Allocate and init the task ID and resource ID matrix. */
+    if( cudaMallocHost(&tid , sizeof(qsched_task_t) * 3 * M/1000 ) != cudaSuccess )
+        error("Failed to allocate tid");
+    if( cudaMallocHost(&rid , sizeof(qsched_task_t) * M/1000) != cudaSuccess)
+        error("Failed to allocate rid");
+
+
+    for ( k = 0 ; k < M / 1000 ; k++ ) {
+        tid[k] = qsched_task_none;
+        if(cudaHostGetDevicePointer(&cuda_array , &array[k*1000] , 0) != cudaSuccess)
+            error("Failed to get device pointer for data %i: %s",k, cudaGetErrorString(cudaPeekAtLastError()));
+        rid[k] = qsched_addres( &s , qsched_owner_none , qsched_res_none , cuda_array, sizeof(float) * 1000, &device_array[k*1000]);
+        }
+    for ( k = M/1000 ; k < 3*M / 1000 ; k++ ){
+        tid[k] = qsched_task_none;
+    }
+
+
+
+    /*Build the tasks*/
+    for( k  = 0; k < M/1000; k++){
+        tid[3*k] = qsched_addtask(&s , type_square , task_flag_none , &k , sizeof(int) , 0 );
+        qsched_adduse(&s , tid[3*k] , rid[k] );
+        tid[3*k+1] = qsched_addtask(&s , type_double , task_flag_none , &k , sizeof(int) , 0 );
+        qsched_adduse( &s , tid[3*k+1] , rid[k] );
+        qsched_addunlock( &s , tid[3*k] , tid[3*k+1] );
+        tid[3*k+2] = qsched_addtask(&s , type_quarter , task_flag_none , &k , sizeof(int) , 0 );
+        qsched_adduse( &s , tid[3*k+2] , rid[k] );
+        qsched_addunlock( &s , tid[3*k+1] , tid[3*k+2] );
+    }
+
+    if( cudaMemcpyFromSymbol( &func , function , sizeof(qsched_funtype) ) != cudaSuccess)
+        error("Failed to copy function pointer from device");
+
+
+    tic = getticks();
+    qsched_run_CUDA( &s , func );
+    toc_run = getticks(); 
+    double itpms = 1000.0 / CPU_TPS;
+	message( "qsched_run_CUDA took %.3f ms..." , ((double)(toc_run - tic)) * itpms );
+    tot_run += toc_run - tic;
+
+    for(i = 0; i < M; i++ )
+        if(array[i] != ((float)i)*((float)i)*0.5f)
+            printf("%i wrong, %.3f != %.3f\n", i, array[i], ((float)i)*((float)i)*0.5f );
+printf("Starting second run\n");
+        qsched_run_CUDA( &s , func );
+printf("Second run complete\n");
+
+    struct task *completed_tasks = qsched_get_timers( &s, s.count );
+/*    tic = getticks();
+    qsched_run_CUDA( &s , func );
+    toc_run = getticks(); 
+	message( "qsched_run_CUDA took %lli ticks..." , toc_run - tic );
+    tot_run += toc_run - tic;*/
+
+
+
+//    if( cudaFreeHost( array) != cudaSuccess)
+  //      error("Failed to free array");
+
+cudaDeviceReset();
+
+    if( cudaHostAlloc( &array , M*sizeof(float) , cudaHostAllocMapped) != cudaSuccess)
+        error("Failed to allocate array");
+        for( i = 0; i < M ; i++ )
+        array[i] = i;
+
+    if(cudaHostGetDevicePointer(&cuda_array , array , 0) != cudaSuccess)
+        error("Failed to get device pointer for data: %s",cudaGetErrorString(cudaPeekAtLastError()));
+    if( cudaMalloc(&device_array , M*sizeof(float) ) != cudaSuccess )
+        error("Failed to allocate device array: %s", cudaGetErrorString(cudaPeekAtLastError()));
+    /*if( cudaMemcpy( &device_array , array , M*sizeof(float), cudaMemcpyHostToDevice ) != cudaSuccess )
+        error("Failed to copy device array: %s", cudaGetErrorString(cudaPeekAtLastError()));*/
+    if( cudaMemcpyToSymbol( cuda_data , &device_array,sizeof(float*), 0 ,  cudaMemcpyHostToDevice) != cudaSuccess )
+        error("Failed to copy array pointer to device: %s", cudaGetErrorString(cudaPeekAtLastError()));
+    tic = getticks();
+    Manual<<<100, 1000>>>(cuda_array);
+    cudaDeviceSynchronize();
+    toc_run = getticks(); 
+	message( "Manual run took %.3f ms" , ((double)(toc_run - tic)) * itpms );
+
+    for(i = 0; i < M; i++ )
+        if(array[i] != ((float)i)*((float)i)*0.5f)
+            printf("%i wrong, %.3f != %.3f\n", i, array[i], ((float)i)*((float)i)*0.5f );
+
+    //printf("%.3f\n", array[2]);
+
+}
diff --git a/examples/test_hierarchy.cu b/examples/test_hierarchy.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bd911ec48f47904273506b743bb4c057c21e3c0b
--- /dev/null
+++ b/examples/test_hierarchy.cu
@@ -0,0 +1,79 @@
+/* Config parameters. */
+#include "../config.h"
+
+/* Standard includes. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+
+/* Local includes. */
+extern "C"{
+#include "quicksched.h"
+}
+#include "cuda_queue.h"
+
+#define size 6
+
+__device__ int device_array[size];
+
+__device__ void runner ( int type , void *data ) {
+    
+	printf("Hi\n");
+
+}
+
+__device__ qsched_funtype function = runner;
+
+__global__ void Setup()
+{
+    printf("%i\n", function);
+}
+
+qsched_funtype func;
+int main ( int argc , char *argv[] ) {
+int *data;
+int *res_data;
+int res1;
+int res2;
+int res11;
+int res12;
+int res21;
+int res22;
+int task_id;
+qsched s;
+
+    cudaDeviceReset();
+    cudaSetDevice(0);
+    Setup<<<1,1>>>();
+    if(cudaDeviceSynchronize() != cudaSuccess)
+    error("Setup Failed: %s", cudaGetErrorString(cudaPeekAtLastError()));
+
+    qsched_init( &s , 1 , qsched_flag_none );
+    data = (int*) malloc(sizeof(int));
+    res_data = (int*) malloc(sizeof(int) * size);
+    res2 = qsched_addres(&s , qsched_owner_none , qsched_res_none , &res_data[3], sizeof(int)*3, NULL);
+    res1 = qsched_addres(&s , qsched_owner_none , qsched_res_none , &res_data[0], sizeof(int)*3, NULL);
+    res11 = qsched_addres( &s, qsched_owner_none, res1, &res_data[2], sizeof(int), NULL);
+    res21 = qsched_addres( &s, qsched_owner_none, res2, &res_data[4], sizeof(int), NULL);
+    res12 = qsched_addres( &s, qsched_owner_none, res1, &res_data[1], sizeof(int), NULL);
+    res22 = qsched_addres( &s, qsched_owner_none, res2, &res_data[5], sizeof(int), NULL);
+
+    task_id = qsched_addtask(&s, 1, task_flag_none, data, sizeof(int), 1);
+    qsched_addlock(&s, task_id, res1);
+    qsched_addlock(&s, task_id, res11);
+    qsched_addlock(&s, task_id, res12);
+    task_id = qsched_addtask(&s, 2, task_flag_none, data, sizeof(int), 1);
+    qsched_addlock(&s, task_id, res2);
+    qsched_addlock(&s, task_id, res21);
+    qsched_addlock(&s, task_id, res22);
+    if( cudaMemcpyFromSymbol( &func , function , sizeof(qsched_funtype) ) != cudaSuccess)
+        error("Failed to copy function pointer from device");
+    
+    qsched_run_CUDA( &s , func );       
+
+    free(data);
+    free(res_data);
+
+}
diff --git a/src/CUDACompile.sh b/src/CUDACompile.sh
new file mode 100755
index 0000000000000000000000000000000000000000..df36d95939ff236fe45ef30ec5a0afa26bb8a7c2
--- /dev/null
+++ b/src/CUDACompile.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+FLAGS2="-Xcompiler=-fsanitize=address -Xcompiler=-fno-omit-frame-pointer"
+DEBUG_FLAGS="-G -DDEBUG_GPU"
+FLAGS="-O3 -g -DCPU_TPS=3.1e9 -lineinfo -src-in-ptx -Xptxas -dlcm=cg --maxrregcount=32 -gencode arch=compute_30,code=sm_30 -ftz=true -fmad=true -DFPTYPE_SINGLE -lgomp -DWITH_CUDA -DTIMERS -ccbin=/usr/bin/gcc-4.8"
+# -DGPU_locks -Xptxas -dlcm=cg  -Xptxas="-v"" 
+# -DNO_LOADS
+
+CFLAGS="-fsanitize=address -fno-omit-frame-pointer"
+
+gcc-4.8 -std=gnu99 -DHAVE_CONFIG_H -I. -I.. -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize -funroll-loops -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -fopenmp -DTIMERS -DWITH_CUDA -g -O2 -march=native -pthread -MT qsched.lo -MD -MP -MF .deps/qsched.Tpo -c qsched.c -o qsched.o -lprofiler
+gcc-4.8 -std=gnu99 -DHAVE_CONFIG_H -I. -I.. -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize -funroll-loops -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -fopenmp -DTIMERS -DWITH_CUDA -g -O2 -march=native -pthread -MT qsched.lo -MD -MP -MF .deps/qsched.Tpo -c queue.c -o queue.o -lprofiler
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS -dc cuda_queue.cu qsched.o queue.o -lprofiler
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS  -lib cuda_queue.o qsched.o queue.o -o .libs/libquicksched_cuda.a -lprofiler
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS  -DGPU_locks -dc cuda_queue.cu qsched.o queue.o -lprofiler
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS  -lib cuda_queue.o qsched.o queue.o -o .libs/libquicksched_cuda_locks.a -lprofiler
+
+cd ../examples
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS  -dc -m64 -I../src -dc -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -lcudart -lcuda  test_gpu_simple.cu -lprofiler
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS  -m64 -I../src -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -Xnvlink -v test_gpu_simple.o ../src/.libs/libquicksched_cuda.a -o simple_cuda_test -lprofiler
+
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS  -dc -m64 -I../src -dc -lf77blas -lcblas -latlas -lm  -L/home/aidan/ATLAS/ATLAS_linux/lib/ -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -lcudart -lcuda /usr/lib64/atlas/libcblas.a /usr/lib64/atlas/libptcblas.a test_qr.cu -lprofiler
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS  -m64 -I../src -lf77blas -lcblas -latlas -lm  -L/home/aidan/ATLAS/ATLAS_linux/lib/ -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -Xnvlink -v test_qr.o ../src/.libs/libquicksched_cuda.a /usr/lib64/atlas/libcblas.a /usr/lib64/atlas/libptcblas.a -o qr_cuda_test -lprofiler
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS  -dc -m64 -I../src -dc -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -lcudart -lcuda  test_hierarchy.cu -lprofiler
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS  -m64 -I../src -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -Xnvlink -v test_hierarchy.o ../src/.libs/libquicksched_cuda.a -o test_heirarchy -lprofiler
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS  -dc -m64 -I../src -dc -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -lcudart -lcuda  test_bh.cu -lprofiler
+
+/home/aidan/cuda_6.0/bin/nvcc $FLAGS  -m64 -I../src -L/home/aidan/cuda_6.0/lib -L/home/aidan/cuda_6.0/lib64 -Xnvlink -v test_bh.o ../src/.libs/libquicksched_cuda.a -o test_heirarchy -lprofiler
diff --git a/src/Makefile.in b/src/Makefile.in
new file mode 100644
index 0000000000000000000000000000000000000000..4a3e029bb2bceef56d7fcdaf5167ff8d03c78f52
--- /dev/null
+++ b/src/Makefile.in
@@ -0,0 +1,659 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# This file is part of Quickqsched.
+# Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+
+# Build a CUDA-enabled version too?
+@HAVE_CUDA_TRUE@am__append_1 = libquicksched_cuda.la
+subdir = src
+DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \
+	$(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compiler_flags.m4 \
+	$(top_srcdir)/m4/ax_ext.m4 \
+	$(top_srcdir)/m4/ax_func_posix_memalign.m4 \
+	$(top_srcdir)/m4/ax_gcc_archflag.m4 \
+	$(top_srcdir)/m4/ax_gcc_x86_cpuid.m4 \
+	$(top_srcdir)/m4/ax_openmp.m4 \
+	$(top_srcdir)/m4/ax_prog_doxygen.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"
+LTLIBRARIES = $(lib_LTLIBRARIES)
+libquicksched_la_LIBADD =
+am_libquicksched_la_OBJECTS = qsched.lo queue.lo
+libquicksched_la_OBJECTS = $(am_libquicksched_la_OBJECTS)
+libquicksched_cuda_la_LIBADD =
+am__libquicksched_cuda_la_SOURCES_DIST = qsched.c queue.c \
+	cuda_queue.cu
+@HAVE_CUDA_TRUE@am__objects_1 = libquicksched_cuda_la-qsched.lo \
+@HAVE_CUDA_TRUE@	libquicksched_cuda_la-queue.lo cuda_queue.lo
+@HAVE_CUDA_TRUE@am_libquicksched_cuda_la_OBJECTS = $(am__objects_1)
+libquicksched_cuda_la_OBJECTS = $(am_libquicksched_cuda_la_OBJECTS)
+libquicksched_cuda_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) \
+	$(libquicksched_cuda_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
+@HAVE_CUDA_TRUE@am_libquicksched_cuda_la_rpath = -rpath $(libdir)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
+SOURCES = $(libquicksched_la_SOURCES) $(libquicksched_cuda_la_SOURCES)
+DIST_SOURCES = $(libquicksched_la_SOURCES) \
+	$(am__libquicksched_cuda_la_SOURCES_DIST)
+HEADERS = $(include_HEADERS)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CUDA_CFLAGS = @CUDA_CFLAGS@
+CUDA_LIBS = @CUDA_LIBS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+DX_CONFIG = @DX_CONFIG@
+DX_DOCDIR = @DX_DOCDIR@
+DX_DOT = @DX_DOT@
+DX_DOXYGEN = @DX_DOXYGEN@
+DX_DVIPS = @DX_DVIPS@
+DX_EGREP = @DX_EGREP@
+DX_ENV = @DX_ENV@
+DX_FLAG_chi = @DX_FLAG_chi@
+DX_FLAG_chm = @DX_FLAG_chm@
+DX_FLAG_doc = @DX_FLAG_doc@
+DX_FLAG_dot = @DX_FLAG_dot@
+DX_FLAG_html = @DX_FLAG_html@
+DX_FLAG_man = @DX_FLAG_man@
+DX_FLAG_pdf = @DX_FLAG_pdf@
+DX_FLAG_ps = @DX_FLAG_ps@
+DX_FLAG_rtf = @DX_FLAG_rtf@
+DX_FLAG_xml = @DX_FLAG_xml@
+DX_HHC = @DX_HHC@
+DX_LATEX = @DX_LATEX@
+DX_MAKEINDEX = @DX_MAKEINDEX@
+DX_PDFLATEX = @DX_PDFLATEX@
+DX_PERL = @DX_PERL@
+DX_PROJECT = @DX_PROJECT@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NVCC = @NVCC@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENMP_CFLAGS = @OPENMP_CFLAGS@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PRTDIAG = @PRTDIAG@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SIMD_FLAGS = @SIMD_FLAGS@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Automake stuff
+AUTOMAKE_OPTIONS = gnu
+
+# Add the debug flag to the whole thing
+AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
+    -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) -DTIMERS \
+     #-fsanitize=address -fno-omit-frame-pointer
+
+
+# Assign a "safe" version number
+AM_LDFLAGS = -version-info 0:0:0
+
+# Build the libquicksched library
+lib_LTLIBRARIES = libquicksched.la $(am__append_1)
+libquicksched_la_SOURCES = qsched.c queue.c
+
+# List required headers
+include_HEADERS = atomic.h lock.h queue.h qsched.h task.h res.h error.h qsched.h
+@HAVE_CUDA_FALSE@SOURCES_CUDA = 
+
+# CUDA sources 
+@HAVE_CUDA_TRUE@SOURCES_CUDA = qsched.c queue.c cuda_queue.cu
+@HAVE_CUDA_TRUE@CUDA_MYFLAGS = -O3 -g -DCPU_TPS=3.1e9 -Xnvlink -rdc=true -lineinfo -src-in-ptx --maxrregcount=32 -Xptxas="-v" -Xptxas -dlcm=cg -gencode arch=compute_30,code=sm_30 -ftz=true -fmad=true -DFPTYPE_SINGLE -DWITH_CUDA #-fsanitize=address -fno-omit-frame-pointer
+@HAVE_CUDA_TRUE@libquicksched_cuda_la_SOURCES = $(SOURCES_CUDA)
+@HAVE_CUDA_TRUE@libquicksched_cuda_la_CFLAGS = -DFPTYPE_SINGLE $(AM_CFLAGS) -DWITH_CUDA $(CUDA_CFLAGS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .cu .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu src/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+	@$(NORMAL_INSTALL)
+	test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	list2=; for p in $$list; do \
+	  if test -f $$p; then \
+	    list2="$$list2 $$p"; \
+	  else :; fi; \
+	done; \
+	test -z "$$list2" || { \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+	}
+
+uninstall-libLTLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	for p in $$list; do \
+	  $(am__strip_dir) \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
+	done
+
+clean-libLTLIBRARIES:
+	-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+	@list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+	  dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+	  test "$$dir" != "$$p" || dir=.; \
+	  echo "rm -f \"$${dir}/so_locations\""; \
+	  rm -f "$${dir}/so_locations"; \
+	done
+libquicksched.la: $(libquicksched_la_OBJECTS) $(libquicksched_la_DEPENDENCIES) 
+	$(LINK) -rpath $(libdir) $(libquicksched_la_OBJECTS) $(libquicksched_la_LIBADD) $(LIBS)
+libquicksched_cuda.la: $(libquicksched_cuda_la_OBJECTS) $(libquicksched_cuda_la_DEPENDENCIES) 
+	$(libquicksched_cuda_la_LINK) $(am_libquicksched_cuda_la_rpath) $(libquicksched_cuda_la_OBJECTS) $(libquicksched_cuda_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libquicksched_cuda_la-qsched.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libquicksched_cuda_la-queue.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/qsched.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/queue.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<
+
+libquicksched_cuda_la-qsched.lo: qsched.c
+@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -MT libquicksched_cuda_la-qsched.lo -MD -MP -MF $(DEPDIR)/libquicksched_cuda_la-qsched.Tpo -c -o libquicksched_cuda_la-qsched.lo `test -f 'qsched.c' || echo '$(srcdir)/'`qsched.c
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/libquicksched_cuda_la-qsched.Tpo $(DEPDIR)/libquicksched_cuda_la-qsched.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='qsched.c' object='libquicksched_cuda_la-qsched.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -c -o libquicksched_cuda_la-qsched.lo `test -f 'qsched.c' || echo '$(srcdir)/'`qsched.c
+
+libquicksched_cuda_la-queue.lo: queue.c
+@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -MT libquicksched_cuda_la-queue.lo -MD -MP -MF $(DEPDIR)/libquicksched_cuda_la-queue.Tpo -c -o libquicksched_cuda_la-queue.lo `test -f 'queue.c' || echo '$(srcdir)/'`queue.c
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/libquicksched_cuda_la-queue.Tpo $(DEPDIR)/libquicksched_cuda_la-queue.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='queue.c' object='libquicksched_cuda_la-queue.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -c -o libquicksched_cuda_la-queue.lo `test -f 'queue.c' || echo '$(srcdir)/'`queue.c
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-includeHEADERS: $(include_HEADERS)
+	@$(NORMAL_INSTALL)
+	test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)"
+	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; \
+	done | $(am__base_list) | \
+	while read files; do \
+	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \
+	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \
+	done
+
+uninstall-includeHEADERS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	test -n "$$files" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(includedir)" && rm -f $$files
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	set x; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(HEADERS)
+installdirs:
+	for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+	mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-includeHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+	clean-libLTLIBRARIES clean-libtool ctags distclean \
+	distclean-compile distclean-generic distclean-libtool \
+	distclean-tags distdir dvi dvi-am html html-am info info-am \
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-includeHEADERS install-info \
+	install-info-am install-libLTLIBRARIES install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags uninstall uninstall-am uninstall-includeHEADERS \
+	uninstall-libLTLIBRARIES
+
+@HAVE_CUDA_TRUE@.cu: qsched.c queue.c
+@HAVE_CUDA_TRUE@.cu.o:
+@HAVE_CUDA_TRUE@	$(NVCC) -c $(NVCCFLAGS) $(CUDA_CFLAGS) $(CUDA_MYFLAGS) $< -o $@
+@HAVE_CUDA_TRUE@.cu.lo:
+@HAVE_CUDA_TRUE@	$(top_srcdir)/cudalt.py $@ $(NVCC) -c $(NVCCFLAGS) $(CUDA_CFLAGS) $(CUDA_MYFLAGS) $<
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT: