Skip to content

Commit 5c37b81

Browse files
author
Carsten Griwodz
committed
Remove profiling nvtx from develop branch.
This make trouble for continuous integration and is apparently not supported on all platforms. Since it is a debug function, it's just as well to remove it from the mainstream tree.
1 parent bdd37e5 commit 5c37b81

12 files changed

+1
-103
lines changed

CMakeLists.txt

-13
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-$
1818

1919
option(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON)
2020
option(PopSift_BUILD_DOCS "Build PopSift documentation." OFF)
21-
option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF)
2221
option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF)
2322
option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON)
2423
option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON)
@@ -90,10 +89,6 @@ find_package(CUDAToolkit)
9089
message(STATUS "CUDA Version is ${CUDAToolkit_VERSION}")
9190
set(CUDA_VERSION ${CUDAToolkit_VERSION})
9291

93-
if(PopSift_USE_NVTX_PROFILING)
94-
message(STATUS "PROFILING CPU CODE: NVTX is in use")
95-
endif()
96-
9792
if(PopSift_ERRCHK_AFTER_KERNEL)
9893
message(STATUS "Synchronizing and checking errors after every kernel call")
9994
list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL")
@@ -150,13 +145,6 @@ else()
150145
set(DISABLE_GRID_FILTER 0)
151146
endif()
152147

153-
if(PopSift_USE_NVTX_PROFILING)
154-
# library required for NVTX profiling of the CPU
155-
set(PopSift_USE_NVTX 1)
156-
else()
157-
set(PopSift_USE_NVTX 0)
158-
endif()
159-
160148
add_subdirectory(src)
161149

162150
if(PopSift_BUILD_DOCS)
@@ -194,7 +182,6 @@ message(STATUS "Build Shared libs: " ${BUILD_SHARED_LIBS})
194182
message(STATUS "Build examples: " ${PopSift_BUILD_EXAMPLES})
195183
message(STATUS "Build documentation: " ${PopSift_BUILD_DOCS})
196184
message(STATUS "Generate position independent code: " ${CMAKE_POSITION_INDEPENDENT_CODE})
197-
message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING})
198185
message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL})
199186
message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER})
200187
message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS})

appveyor.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ install:
4444
before_build:
4545
- md build
4646
- cd build
47-
- cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_NVTX_PROFILING:BOOL=OFF -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake ..
47+
- cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake ..
4848
- ls -l
4949

5050
build:

cmake/sift_config.h.in

-1
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,4 @@
1414
#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @PopSift_HAVE_SHFL_DOWN_SYNC@
1515
#define POPSIFT_HAVE_NORMF() @PopSift_HAVE_NORMF@
1616
#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@
17-
#define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@
1817

cudaInstallAppveyor.cmd

-3
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,12 @@ echo Downloading CUDA toolkit 12 for Windows 10
44

55
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvcc.zip
66
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.5.82-archive.zip -Filename cuda_cudart.zip
7-
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvtx.zip
87
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.5.82-archive.zip -Filename vs_integration.zip
98
dir
109

1110
echo Unzipping CUDA toolkit 12
1211
tar -xf cuda_nvcc.zip
1312
tar -xf cuda_cudart.zip
14-
tar -xf cuda_nvtx.zip
1513
tar -xf vs_integration.zip
1614
dir
1715

@@ -22,7 +20,6 @@ mkdir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras"
2220
echo Copying toolkit files to install dir(s)
2321
xcopy cuda_cudart-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
2422
xcopy cuda_nvcc-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
25-
xcopy cuda_nvtx-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
2623
xcopy visual_studio_integration-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras" /s /e /i /y
2724

2825

src/CMakeLists.txt

-6
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,6 @@ target_link_libraries(popsift
4545
CUDA::cudart
4646
Threads::Threads)
4747

48-
if(PopSift_USE_NVTX_PROFILING)
49-
target_link_libraries(popsift
50-
PUBLIC
51-
CUDA::nvtx3)
52-
endif()
53-
5448
set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION})
5549
set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d")
5650
set_target_properties(popsift PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

src/popsift/popsift.cu

-6
Original file line numberDiff line numberDiff line change
@@ -438,18 +438,12 @@ void SiftJob::setImg( popsift::ImageBase* img )
438438

439439
popsift::ImageBase* SiftJob::getImg()
440440
{
441-
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
442-
_nvtx_id = nvtxRangeStartA( "inserting image" );
443-
#endif
444441
return _img;
445442
}
446443

447444
void SiftJob::setFeatures( popsift::FeaturesBase* f )
448445
{
449446
_p.set_value( f );
450-
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
451-
nvtxRangeEnd( _nvtx_id );
452-
#endif
453447
}
454448

455449
popsift::FeaturesHost* SiftJob::get()

src/popsift/popsift.h

-10
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,6 @@
2323
#include <thread>
2424
#include <vector>
2525

26-
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
27-
#include <nvtx3/nvToolsExtCuda.h>
28-
#else
29-
#define nvtxRangeStartA(a)
30-
#define nvtxRangeEnd(a)
31-
#endif
32-
3326
/* user parameters */
3427
namespace popsift
3528
{
@@ -50,9 +43,6 @@ class SiftJob
5043
unsigned char* _imageData;
5144
popsift::ImageBase* _img;
5245
std::exception_ptr _err;
53-
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
54-
nvtxRangeId_t _nvtx_id;
55-
#endif
5646

5747
public:
5848

src/popsift/s_filtergrid.cu

-9
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,6 @@
99
#include "sift_extremum.h"
1010
#include "sift_pyramid.h"
1111

12-
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
13-
#include <nvtx3/nvToolsExtCuda.h>
14-
#else
15-
#define nvtxRangePushA(a)
16-
#define nvtxRangePop()
17-
#endif
18-
1912
#if ! POPSIFT_IS_DEFINED(POPSIFT_DISABLE_GRID_FILTER)
2013

2114
#include <thrust/copy.h>
@@ -317,9 +310,7 @@ int Pyramid::extrema_filter_grid( const Config& conf, int ext_total )
317310
}
318311
}
319312

320-
nvtxRangePushA( "writing back count" );
321313
writeDescCountersToDevice( );
322-
nvtxRangePop( );
323314

324315
return ret_ext_total;
325316
}

src/popsift/s_image.cu

-23
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,6 @@
1515
#include <fstream>
1616
#include <iostream>
1717

18-
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
19-
#include <nvtx3/nvToolsExtCuda.h>
20-
#else
21-
#define nvtxRangePushA(a)
22-
#define nvtxRangePop()
23-
#endif
24-
2518
using namespace std;
2619

2720
namespace popsift {
@@ -98,8 +91,6 @@ void Image::resetDimensions( int w, int h )
9891
destroyTexture( );
9992
createTexture( );
10093
} else {
101-
nvtxRangePushA( "reallocating host-side image memory" );
102-
10394
_max_w = max( w, _max_w );
10495
_max_h = max( h, _max_h );
10596
_input_image_h.freeHost( popsift::CudaAllocated );
@@ -111,21 +102,15 @@ void Image::resetDimensions( int w, int h )
111102

112103
destroyTexture( );
113104
createTexture( );
114-
115-
nvtxRangePop(); // "reallocating host-side image memory"
116105
}
117106
}
118107

119108
void Image::allocate( int w, int h )
120109
{
121-
nvtxRangePushA( "allocating host-side image memory" );
122-
123110
_input_image_h.allocHost( w, h, popsift::CudaAllocated );
124111
_input_image_d.allocDev( w, h );
125112

126113
createTexture( );
127-
128-
nvtxRangePop(); // "allocating host-side image memory"
129114
}
130115

131116
void Image::destroyTexture( )
@@ -222,8 +207,6 @@ void ImageFloat::resetDimensions( int w, int h )
222207
destroyTexture( );
223208
createTexture( );
224209
} else {
225-
nvtxRangePushA( "reallocating host-side image memory" );
226-
227210
_max_w = max( w, _max_w );
228211
_max_h = max( h, _max_h );
229212
_input_image_h.freeHost( popsift::CudaAllocated );
@@ -235,21 +218,15 @@ void ImageFloat::resetDimensions( int w, int h )
235218

236219
destroyTexture( );
237220
createTexture( );
238-
239-
nvtxRangePop(); // "reallocating host-side image memory"
240221
}
241222
}
242223

243224
void ImageFloat::allocate( int w, int h )
244225
{
245-
nvtxRangePushA( "allocating host-side image memory" );
246-
247226
_input_image_h.allocHost( w, h, popsift::CudaAllocated );
248227
_input_image_d.allocDev( w, h );
249228

250229
createTexture( );
251-
252-
nvtxRangePop(); // "allocating host-side image memory"
253230
}
254231

255232
void ImageFloat::destroyTexture( )

src/popsift/s_orientation.cu

-7
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,6 @@
1818
#include <cmath>
1919
#include <cstdio>
2020

21-
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
22-
#include <nvtx3/nvToolsExtCuda.h>
23-
#else
24-
#define nvtxRangePushA(a)
25-
#define nvtxRangePop()
26-
#endif
27-
2821
using namespace popsift;
2922
using namespace std;
3023

src/popsift/sift_desc.cu

-10
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,6 @@
2121
#include <cstdio>
2222
#include <iostream>
2323

24-
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
25-
#include <nvtx3/nvToolsExtCuda.h>
26-
#else
27-
#define nvtxRangePushA(a)
28-
#define nvtxRangePop()
29-
#endif
30-
3124
using namespace popsift;
3225
using namespace std;
3326

@@ -55,11 +48,8 @@ using namespace std;
5548
__host__
5649
void Pyramid::descriptors( const Config& conf )
5750
{
58-
nvtxRangePushA("Reading orientation count");
59-
6051
readDescCountersFromDevice( _octaves[0].getStream() );
6152
cudaStreamSynchronize( _octaves[0].getStream() );
62-
nvtxRangePop( );
6353

6454
for( int octave=_num_octaves-1; octave>=0; octave-- )
6555
// for( int octave=0; octave<_num_octaves; octave++ )

src/popsift/sift_pyramid.cu

-14
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,6 @@
2525
#define mkdir(path, perm) _mkdir(path)
2626
#endif
2727

28-
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
29-
#include <nvtx3/nvToolsExtCuda.h>
30-
#else
31-
#define nvtxRangePushA(a)
32-
#define nvtxRangePop()
33-
#endif
34-
3528
#define PYRAMID_PRINT_DEBUG 0
3629

3730
using namespace std;
@@ -285,22 +278,18 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf )
285278

286279
readDescCountersFromDevice();
287280

288-
nvtxRangePushA( "download descriptors" );
289281
FeaturesHost* features = new FeaturesHost( hct.ext_total, hct.ori_total );
290282

291283
if( hct.ext_total == 0 || hct.ori_total == 0 )
292284
{
293-
nvtxRangePop();
294285
return features;
295286
}
296287

297288
dim3 grid( grid_divide( hct.ext_total, 32 ) );
298289
prep_features<<<grid,32,0,_download_stream>>>( features->getDescriptors(), up_fac );
299290
POP_SYNC_CHK;
300291

301-
nvtxRangePushA( "register host memory" );
302292
features->pin( );
303-
nvtxRangePop();
304293
popcuda_memcpy_async( features->getFeatures(),
305294
dobuf_shadow.features,
306295
hct.ext_total * sizeof(Feature),
@@ -313,10 +302,7 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf )
313302
cudaMemcpyDeviceToHost,
314303
_download_stream );
315304
cudaStreamSynchronize( _download_stream );
316-
nvtxRangePushA( "unregister host memory" );
317305
features->unpin( );
318-
nvtxRangePop();
319-
nvtxRangePop();
320306

321307
return features;
322308
}

0 commit comments

Comments
 (0)