... | ... | @@ -66,4 +66,46 @@ To do that, run your code with the following: |
|
|
```
|
|
|
nvprof --metrics achieved_occupancy,executed_ipc -o metrics.prof --export-profile timeline.prof
|
|
|
```
|
|
|
and this will generate two files, ```metrics.prof``` and ```timeline.prof```. Copy these to your local machine and launch ```nvvp``` (which can be downloaded [here](https://developer.nvidia.com/cuda-downloads), around 1.4GB). More information on running the profiler can be found in [these manual pages](http://docs.nvidia.com/cuda/profiler-users-guide/index.html#collecting-remote-data) but to get you started you want to use File -> Import, choose nvprof, and select your two files. Then import them and get to work! |
|
|
\ No newline at end of file |
|
|
and this will generate two files, ```metrics.prof``` and ```timeline.prof```. Copy these to your local machine and launch ```nvvp``` (which can be downloaded [here](https://developer.nvidia.com/cuda-downloads), around 1.4GB). More information on running the profiler can be found in [these manual pages](http://docs.nvidia.com/cuda/profiler-users-guide/index.html#collecting-remote-data) but to get you started you want to use File -> Import, choose nvprof, and select your two files. Then import them and get to work!
|
|
|
|
|
|
To *actually* profile the code, we will need to use CUPTI. There are some examples on Piz Daint at
|
|
|
```/opt/nvidia/cudatoolkit8.0/8.0.54_2.2.8_ga620558-2.1/extras/CUPTI```. Try the pc_sampling one -- you will need to edit the Makefile to be
|
|
|
```
|
|
|
#
|
|
|
# Copyright 2014-2015 NVIDIA Corporation. All rights reserved
|
|
|
#
|
|
|
|
|
|
ifndef OS
|
|
|
OS := $(shell uname)
|
|
|
HOST_ARCH := $(shell uname -m)
|
|
|
endif
|
|
|
|
|
|
ifeq ($(OS),Windows_NT)
|
|
|
export PATH := $(PATH):../../libWin32:../../libx64
|
|
|
LIBS= -lcuda -L ../../libWin32 -L ../../libx64 -lcupti
|
|
|
OBJ = obj
|
|
|
else
|
|
|
ifeq ($(OS), Darwin)
|
|
|
export DYCRAY_LD_LIBRARY_PATH := $(DYCRAY_LD_LIBRARY_PATH):../../lib
|
|
|
LIBS= -Xlinker -framework -Xlinker cuda -L ../../lib -lcupti
|
|
|
else
|
|
|
# export CRAY_LD_LIBRARY_PATH := $(CRAY_LD_LIBRARY_PATH):../../lib:../../lib64
|
|
|
LIBS= -lcuda -L ../../lib -L ../../lib64 -lcupti
|
|
|
endif
|
|
|
OBJ = o
|
|
|
endif
|
|
|
|
|
|
CRAY_CUDATOOLKIT_POST_LINK_OPTS = -L/opt/nvidia/cudatoolkit8.0/8.0.54_2.2.8_ga620558-2.1/lib64 -L/opt/nvidia/cudatoolkit8.0/8.0.54_2.2.8_ga620558-2.1/extras/CUPTI/lib64 -lcupti -L/opt/cray/nvidia/default/lib64 -lcuda
|
|
|
|
|
|
pc_sampling: pc_sampling.$(OBJ)
|
|
|
nvcc -o $@ pc_sampling.$(OBJ) $(CRAY_CUDATOOLKIT_POST_LINK_OPTS)
|
|
|
|
|
|
pc_sampling.$(OBJ): pc_sampling.cu
|
|
|
nvcc -arch=sm_52 -lineinfo -c $(CRAY_CUDATOOLKIT_INCLUDE_OPTS) $<
|
|
|
|
|
|
run: pc_sampling
|
|
|
./$<
|
|
|
|
|
|
clean:
|
|
|
rm -f pc_sampling pc_sampling.$(OBJ)
|
|
|
``` |
|
|
\ No newline at end of file |