29 #ifndef PROFILEHOOKS_H 30 #define PROFILEHOOKS_H 35 #define WKFUSEGETTID 1 38 #include <sys/types.h> 39 #include <sys/syscall.h> 42 #define gettid() syscall(SYS_gettid) 50 #include <cuda_runtime.h> 51 #include <cuda_profiler_api.h> 56 #if CUDART_VERSION >= 10000 57 #include <nvtx3/nvToolsExt.h> 59 #error NVTXv3 requires CUDA 10.0 or greater 60 #include <nvToolsExt.h> 67 const uint32_t WKF_nvtx_colors[] = {
80 const int WKF_nvtx_colors_len =
sizeof(WKF_nvtx_colors)/
sizeof(uint32_t);
82 #define PROFILE_GREEN 0 83 #define PROFILE_ORANGE 1 84 #define PROFILE_BLUE 2 85 #define PROFILE_YELLOW 3 86 #define PROFILE_PURPLE 4 87 #define PROFILE_TEAL 5 89 #define PROFILE_GRAY 7 90 #define PROFILE_WHITE 8 93 #define PROFILE_INITIALIZE() do { nvtxInitialize(NULL); } while(0) // terminate with semicolon 96 #define PROFILE_START() \ 98 cudaProfilerStart(); \ 99 } while (0) // terminate with semicolon 102 #define PROFILE_STOP() \ 104 cudaDeviceSynchronize(); \ 105 cudaProfilerStop(); \ 106 } while (0) // terminate with semicolon 117 #if defined(WKFUSEGETTID) 120 #define PROFILE_MAIN_THREAD() \ 123 nvtxNameOsThread(gettid(), "Main thread"); \ 124 } while (0) // terminate with semicolon 128 #define PROFILE_NAME_THREAD(name) \ 130 nvtxNameOsThread(gettid(), name); \ 131 } while (0) // terminate with semicolon 136 #define PROFILE_MAIN_THREAD() \ 140 pthread_threadid_np(pthread_self(), &tid);
141 nvtxNameOsThread(tid,
"Main thread"); \
146 #define PROFILE_NAME_THREAD(name) \ 149 pthread_threadid_np(pthread_self(), &tid);
150 nvtxNameOsThread(gettid(), name); \
160 #define PROFILE_MARK(name,cid) \ 164 int color_id = cid; \ 165 color_id = color_id % WKF_nvtx_colors_len; \ 166 nvtxEventAttributes_t eventAttrib = {0}; \ 167 eventAttrib.version = NVTX_VERSION; \ 168 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ 169 eventAttrib.colorType = NVTX_COLOR_ARGB; \ 170 eventAttrib.color = WKF_nvtx_colors[color_id]; \ 171 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ 172 eventAttrib.message.ascii = name; \ 173 nvtxMarkEx(&eventAttrib); \ 174 } while(0) // terminate with semicolon 181 #define PROFILE_PUSH_RANGE(name,cid) \ 183 int color_id = cid; \ 184 color_id = color_id % WKF_nvtx_colors_len; \ 185 nvtxEventAttributes_t eventAttrib = {0}; \ 186 eventAttrib.version = NVTX_VERSION; \ 187 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ 188 eventAttrib.colorType = NVTX_COLOR_ARGB; \ 189 eventAttrib.color = WKF_nvtx_colors[color_id]; \ 190 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ 191 eventAttrib.message.ascii = name; \ 192 nvtxRangePushEx(&eventAttrib); \ 193 } while(0) // must terminate with semi-colon 198 #define PROFILE_POP_RANGE(empty) \ 201 } while(0) // terminate with semicolon 205 class WKF_NVTX_Tracer {
212 #define PROFILE_RANGE(name,cid) \ 216 WKF_NVTX_Tracer wkf_nvtx_tracer##cid(name,cid) 219 #if defined(WKFNVTX_SYNCPRETTY) 220 #define PROFILE_STREAM_SYNC_PRETTY(stream) \ 229 cudaStreamSynchronize(stream); \ 230 } while(0) // terminate with semicolon 233 #define PROFILE_STREAM_SYNC_PRETTY(stream) do { } while(0) // term w/ semicolon 250 #define PROFILE_INITIALIZE() do { } while(0) // terminate with semicolon 253 #define PROFILE_START() do { } while(0) // terminate with semicolon 256 #define PROFILE_STOP() do { } while(0) // terminate with semicolon 259 #define PROFILE_MAIN_THREAD() do { } while(0) // terminate with semicolon 263 #define PROFILE_NAME_THREAD(name) do { } while(0) // terminate with semicolon 269 #define PROFILE_MARK(name,cid) do { } while(0) // terminate with semicolon 275 #define PROFILE_PUSH_RANGE(name,cid) do { } while(0) // terminate with semicolon 279 #define PROFILE_POP_RANGE() do { } while(0) // terminate with semicolon 282 #define PROFILE_RANGE(name,cid) do { } while(0) // terminate with semicolon 289 #define PROFILE_STREAM_SYNC_PRETTY(stream) do { } while(0) // term w/ semicolon #define PROFILE_PUSH_RANGE(name, cid)
Pushes a time range annotation onto the profiler's trace stack, beginning at the time of submission...
#define PROFILE_POP_RANGE()
Pops the innermost time range off of the profiler's trace stack, at the time of execution.