diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..c81593b49d4867eae768d19e314bd6d9417ae63f --- /dev/null +++ b/Makefile.am @@ -0,0 +1,21 @@ +# This file is part of SWIFT. +# Coypright (c) 2012 pedro.gonnet@durham.ac.uk. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Automake stuff +ACLOCAL_AMFLAGS = -I m4 + +# Show the way... +SUBDIRS = src examples doc diff --git a/src/CUDACompile.sh b/src/CUDACompile.sh index e70603d972b2bd93164b8ef4c232c579cd7c4bb0..b454302c727aa424aff64e9f18fbeedbf86951c4 100755 --- a/src/CUDACompile.sh +++ b/src/CUDACompile.sh @@ -1,49 +1,41 @@ #!/bin/bash FLAGS2="-Xcompiler=-fsanitize=address -Xcompiler=-fno-omit-frame-pointer" DEBUG_FLAGS="-G -DDEBUG_GPU" -FLAGS="-O3 -g -DCPU_TPS=3.1e9 -lineinfo -src-in-ptx -Xptxas -dlcm=cg --maxrregcount=32 -gencode arch=compute_30,code=sm_30 -ftz=true -fmad=true -DFPTYPE_SINGLE -lgomp -DWITH_CUDA -DTIMERS -ccbin=/usr/bin/gcc-4.8" +FLAGS="-O3 -g -DCPU_TPS=3.1e9 -lineinfo -src-in-ptx -Xptxas -dlcm=cg --maxrregcount=32 -gencode arch=compute_30,code=sm_30 -ftz=true -fmad=true -DFPTYPE_SINGLE -lgomp -DWITH_CUDA -DTIMERS -ccbin=gcc" # -DGPU_locks -Xptxas -dlcm=cg -Xptxas="-v"" # -DNO_LOADS #-DSIM -#-lprofiler +# CFLAGS="-fsanitize=address -fno-omit-frame-pointer" -gcc-4.8 -std=gnu99 -DHAVE_CONFIG_H -I. -I.. -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize -funroll-loops -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -fopenmp -DTIMERS -DWITH_CUDA -g -O2 -march=native -pthread -MT qsched.lo -MD -MP -MF .deps/qsched.Tpo -c qsched.c -o qsched.o -lprofiler #-fsanitize=address -fno-omit-frame-pointer -gcc-4.8 -std=gnu99 -DHAVE_CONFIG_H -I. -I.. -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize -funroll-loops -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -fopenmp -DTIMERS -DWITH_CUDA -g -O2 -march=native -pthread -MT qsched.lo -MD -MP -MF .deps/qsched.Tpo -c queue.c -o queue.o -lprofiler #-fsanitize=address -fno-omit-frame-pointer +gcc -std=gnu99 -DHAVE_CONFIG_H -I. -I.. -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize -funroll-loops -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -fopenmp -DTIMERS -DWITH_CUDA -g -O2 -march=native -pthread -MT qsched.lo -MD -MP -MF .deps/qsched.Tpo -c qsched.c -o qsched.o #-fsanitize=address -fno-omit-frame-pointer +gcc -std=gnu99 -DHAVE_CONFIG_H -I. -I.. -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize -funroll-loops -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -fopenmp -DTIMERS -DWITH_CUDA -g -O2 -march=native -pthread -MT qsched.lo -MD -MP -MF .deps/qsched.Tpo -c queue.c -o queue.o #-fsanitize=address -fno-omit-frame-pointer -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -dc cuda_queue.cu qsched.o queue.o -lprofiler +nvcc $FLAGS -dc cuda_queue.cu qsched.o queue.o -lm -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -lib cuda_queue.o qsched.o queue.o -o .libs/libquicksched_cuda.a -lprofiler +nvcc $FLAGS -lib cuda_queue.o qsched.o queue.o -o .libs/libquicksched_cuda.a -lm -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -DGPU_locks -dc cuda_queue.cu qsched.o queue.o -lprofiler +nvcc $FLAGS -DGPU_locks -dc cuda_queue.cu qsched.o queue.o -lm -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -lib cuda_queue.o qsched.o queue.o -o .libs/libquicksched_cuda_locks.a -lprofiler +nvcc $FLAGS -lib cuda_queue.o qsched.o queue.o -o .libs/libquicksched_cuda_locks.a -lm cd ../examples -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -dc -m64 -I../src -dc -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -lcudart -lcuda test_gpu_simple.cu -lprofiler +nvcc $FLAGS -dc -m64 -I../src -dc -lcudart -lcuda test_gpu_simple.cu -lm -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -m64 -I../src -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -Xnvlink -v test_gpu_simple.o ../src/.libs/libquicksched_cuda.a -o simple_cuda_test -lprofiler +nvcc $FLAGS -m64 -I../src -Xnvlink -v test_gpu_simple.o ../src/.libs/libquicksched_cuda.a -o simple_cuda_test -lm -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -dc -m64 -I../src -dc -lf77blas -lcblas -latlas -lm -I$CULA_INC_PATH -L$CULA_LIB_PATH_64 -L/home/aidan/ATLAS/ATLAS_linux/lib/ -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -lcudart -lcula_lapack -lcuda /usr/lib64/atlas/libcblas.a /usr/lib64/atlas/libptcblas.a test_qr.cu -lprofiler +nvcc $FLAGS -dc -m64 -I../src -dc -lcudart -lcuda -lm test_qr.cu -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -m64 -I../src -lf77blas -lcblas -latlas -lm -I$CULA_INC_PATH -L$CULA_LIB_PATH_64 -L/home/aidan/ATLAS/ATLAS_linux/lib/ -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -lcula_lapack -Xnvlink -v test_qr.o ../src/.libs/libquicksched_cuda.a /usr/lib64/atlas/libcblas.a /usr/lib64/atlas/libptcblas.a -o qr_cuda_test -lprofiler +nvcc $FLAGS -m64 -I../src -lm -Xnvlink -v test_qr.o ../src/.libs/libquicksched_cuda.a -o test_qr_gpu -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -dc -m64 -I../src -dc -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -lcudart -lcuda test_hierarchy.cu -lprofiler +nvcc $FLAGS -dc -m64 -I../src -dc -lcudart -lcuda test_hierarchy.cu -lm -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -m64 -I../src -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -Xnvlink -v test_hierarchy.o ../src/.libs/libquicksched_cuda.a -o test_heirarchy -lprofiler +nvcc $FLAGS -m64 -I../src -Xnvlink -v test_hierarchy.o ../src/.libs/libquicksched_cuda.a -o test_heirarchy -lm -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -dc -m64 -I../src -dc -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -lcudart -lcuda test_bh_2.cu -lprofiler +nvcc $FLAGS -dc -m64 -I../src -dc -lcudart -lcuda test_bh.cu -lm -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -m64 -I../src -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -Xnvlink -v test_bh_2.o ../src/.libs/libquicksched_cuda.a -o test_bh_2 -lprofiler - -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -dc -m64 -I../src -dc -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -lcudart -lcuda test_bh_3.cu -lprofiler - -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -m64 -I../src -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -Xnvlink -v test_bh_3.o ../src/.libs/libquicksched_cuda.a -o test_bh_3 -lprofiler - -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -dc -m64 -I../src -dc -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -lcudart -lcuda test_bh_4.cu -lprofiler - -/home/aidan/cuda_7.0/bin/nvcc $FLAGS -m64 -I../src -L/home/aidan/cuda_7.0/lib -L/home/aidan/cuda_7.0/lib64 -Xnvlink -v test_bh_4.o ../src/.libs/libquicksched_cuda.a -o test_bh_4 -lprofiler +nvcc $FLAGS -m64 -I../src -Xnvlink -v test_bh.o ../src/.libs/libquicksched_cuda.a -o test_bh -lm diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..e53e78bf316c15183be720c10a191b4b4192a8bd --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,33 @@ +# This file is part of Quicksched. +# Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk), +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Add the debug flag to the whole thing +AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \ + -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) -DTIMERS \ + # -fsanitize=address -fno-omit-frame-pointer + +# Assign a "safe" version number +AM_LDFLAGS = -version-info 0:0:0 + +# Build the libquicksched library +lib_LTLIBRARIES = libquicksched.la +libquicksched_la_SOURCES = qsched.c queue.c + +# List required headers +include_HEADERS = atomic.h lock.h queue.h qsched.h task.h res.h error.h + +# Private headers. +noinst_HEADERS = cycle.h quicksched.h