diff --git a/.gitmodules b/.gitmodules index 282746ed0b53e..4da46e56f4669 100644 --- a/.gitmodules +++ b/.gitmodules @@ -154,3 +154,6 @@ [submodule "third_party/cutlass"] path = third_party/cutlass url = https://github.com/NVIDIA/cutlass.git +[submodule "third_party/NVTX"] + path = third_party/NVTX + url = https://github.com/NVIDIA/NVTX.git diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 4e5993f998e36..25e34249afce7 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1475,7 +1475,8 @@ if(USE_CUDA) target_link_libraries(torch_cpu PRIVATE torch::cudart) endif() target_link_libraries(torch_cuda INTERFACE torch::cudart) - target_link_libraries(torch_cuda PUBLIC c10_cuda torch::nvtoolsext) + target_link_libraries(torch_cuda PUBLIC c10_cuda) + target_link_libraries(torch_cuda PRIVATE torch::nvtoolsext) target_include_directories( torch_cuda INTERFACE $) @@ -1530,7 +1531,7 @@ if(BUILD_SHARED_LIBS) # not find them, because they're usually in non-standard locations) if(USE_CUDA) target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}) - target_link_libraries(torch_global_deps torch::cudart torch::nvtoolsext) + target_link_libraries(torch_global_deps torch::cudart) endif() if(USE_TBB) target_link_libraries(torch_global_deps TBB::tbb) diff --git a/caffe2/python/__init__.py b/caffe2/python/__init__.py index 888d286458a3a..283f08cd1f546 100644 --- a/caffe2/python/__init__.py +++ b/caffe2/python/__init__.py @@ -26,13 +26,6 @@ th_root = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'torch') th_dll_path = os.path.join(th_root, 'lib') - if not os.path.exists(os.path.join(th_dll_path, 'nvToolsExt64_1.dll')) and \ - not os.path.exists(os.path.join(py_dll_path, 'nvToolsExt64_1.dll')): - nvtoolsext_dll_path = os.path.join( - os.getenv('NVTOOLSEXT_PATH', 'C:\\Program Files\\NVIDIA Corporation\\NvToolsExt'), 'bin', 'x64') - else: - nvtoolsext_dll_path = '' - import importlib.util import glob spec = importlib.util.spec_from_file_location('torch_version', os.path.join(th_root, 'version.py')) @@ -50,7 +43,7 @@ import ctypes kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True) - dll_paths = list(filter(os.path.exists, [th_dll_path, py_dll_path, nvtoolsext_dll_path, cuda_path])) + dll_paths = list(filter(os.path.exists, [th_dll_path, py_dll_path, cuda_path])) with_load_library_flags = hasattr(kernel32, 'AddDllDirectory') prev_error_mode = kernel32.SetErrorMode(0x0001) diff --git a/cmake/TorchConfig.cmake.in b/cmake/TorchConfig.cmake.in index 6fecc86ffed36..95fe4481f24d6 100644 --- a/cmake/TorchConfig.cmake.in +++ b/cmake/TorchConfig.cmake.in @@ -129,30 +129,18 @@ endif() if(@USE_CUDA@) if(MSVC) - if(NOT NVTOOLEXT_HOME) - set(NVTOOLEXT_HOME "C:/Program Files/NVIDIA Corporation/NvToolsExt") - endif() - if(DEFINED ENV{NVTOOLSEXT_PATH}) - set(NVTOOLEXT_HOME $ENV{NVTOOLSEXT_PATH}) - endif() - set(TORCH_CUDA_LIBRARIES - ${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib - ${CUDA_LIBRARIES}) - list(APPEND TORCH_INCLUDE_DIRS ${NVTOOLEXT_HOME}/include) + set(TORCH_CUDA_LIBRARIES ${CUDA_LIBRARIES}) find_library(CAFFE2_NVRTC_LIBRARY caffe2_nvrtc PATHS "${TORCH_INSTALL_PREFIX}/lib") list(APPEND TORCH_CUDA_LIBRARIES ${CAFFE2_NVRTC_LIBRARY}) elseif(APPLE) set(TORCH_CUDA_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib/libcudart.dylib ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvrtc.dylib - ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvToolsExt.dylib ${CUDA_LIBRARIES}) else() - find_library(LIBNVTOOLSEXT libnvToolsExt.so PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/) set(TORCH_CUDA_LIBRARIES ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB} - ${LIBNVTOOLSEXT} ${CUDA_LIBRARIES}) endif() if(@BUILD_SHARED_LIBS@) diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake index 68de16b5a0de6..416473f5d7f67 100644 --- a/cmake/public/cuda.cmake +++ b/cmake/public/cuda.cmake @@ -65,10 +65,6 @@ if(NOT CMAKE_CUDA_COMPILER_VERSION STREQUAL CUDAToolkit_VERSION OR "V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIR}'") endif() -if(NOT TARGET CUDA::nvToolsExt) - message(FATAL_ERROR "Failed to find nvToolsExt") -endif() - message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION}) message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE}) message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR}) @@ -214,9 +210,19 @@ endif() # nvToolsExt add_library(torch::nvtoolsext INTERFACE IMPORTED) -set_property( - TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES - CUDA::nvToolsExt) +find_path( + nvtx3_dir + NAMES nvtx3 + PATHS ${CUDA_INCLUDE_DIRS} + NO_DEFAULT_PATH) +find_package_handle_standard_args(nvtx3 DEFAULT_MSG nvtx3_dir) +if(NOT nvtx3_FOUND) + set(nvtx3_dir "${CMAKE_CURRENT_LIST_DIR}/../../third_party/NVTX/c/include") + message(WARNING "use NVTX library in ${nvtx3_dir}") + include_directories(SYSTEM "${nvtx3_dir}") + target_include_directories(torch::nvtoolsext INTERFACE "${nvtx3_dir}") +endif() + # cublas add_library(caffe2::cublas INTERFACE IMPORTED) diff --git a/setup.py b/setup.py index 9a258caba1eea..566e1e96457ce 100644 --- a/setup.py +++ b/setup.py @@ -178,9 +178,6 @@ # NVFUSER_SOURCE_DIR # specify nvfuser root directory # -# NVTOOLSEXT_PATH (Windows only) -# specify where nvtoolsext is installed -# # ACL_ROOT_DIR # specify where Compute Library is installed # diff --git a/third_party/NVTX b/third_party/NVTX new file mode 160000 index 0000000000000..e170594ac7cf1 --- /dev/null +++ b/third_party/NVTX @@ -0,0 +1 @@ +Subproject commit e170594ac7cf1dac584da473d4ca9301087090c1 diff --git a/third_party/nvfuser/csrc/instrumentation.h b/third_party/nvfuser/csrc/instrumentation.h index cd57825a248e1..5b27f0d98b9e3 100644 --- a/third_party/nvfuser/csrc/instrumentation.h +++ b/third_party/nvfuser/csrc/instrumentation.h @@ -2,7 +2,11 @@ #include +#ifndef FBCODE_CAFFE2 +#include +#else #include +#endif // NOLINTNEXTLINE(modernize-deprecated-headers) #include diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index c6c23f25a417f..d3b2a206365d4 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -129,8 +129,6 @@ if(USE_CUDA) list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cudnn) list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN) endif() - - list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext) endif() if(USE_ROCM) diff --git a/torch/__init__.py b/torch/__init__.py index e349c3a96a3b5..5189c054bd530 100644 --- a/torch/__init__.py +++ b/torch/__init__.py @@ -72,12 +72,6 @@ dll_paths = list(filter(os.path.exists, [th_dll_path, py_dll_path, base_py_dll_path])) - if all([not os.path.exists(os.path.join(p, 'nvToolsExt64_1.dll')) for p in dll_paths]): - nvtoolsext_dll_path = os.path.join( - os.getenv('NVTOOLSEXT_PATH', os.path.join(pfiles_path, 'NVIDIA Corporation', 'NvToolsExt')), 'bin', 'x64') - else: - nvtoolsext_dll_path = '' - from .version import cuda as cuda_version import glob if cuda_version and all([not glob.glob(os.path.join(p, 'cudart64*.dll')) for p in dll_paths]): @@ -88,7 +82,7 @@ else: cuda_path = '' - dll_paths.extend(filter(os.path.exists, [nvtoolsext_dll_path, cuda_path])) + dll_paths.extend(filter(os.path.exists, [cuda_path])) kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True) with_load_library_flags = hasattr(kernel32, 'AddDllDirectory') diff --git a/torch/csrc/cuda/shared/nvtx.cpp b/torch/csrc/cuda/shared/nvtx.cpp index 882324cbe04a6..c4dffbb775b20 100644 --- a/torch/csrc/cuda/shared/nvtx.cpp +++ b/torch/csrc/cuda/shared/nvtx.cpp @@ -1,7 +1,11 @@ #ifdef _WIN32 #include // _wgetenv for nvtx #endif +#ifndef FBCODE_CAFFE2 +#include +#else #include +#endif #include namespace torch { diff --git a/torch/csrc/profiler/stubs/cuda.cpp b/torch/csrc/profiler/stubs/cuda.cpp index 6731d0f4d3b50..c9c6c66013505 100644 --- a/torch/csrc/profiler/stubs/cuda.cpp +++ b/torch/csrc/profiler/stubs/cuda.cpp @@ -1,6 +1,10 @@ #include +#ifndef FBCODE_CAFFE2 +#include +#else #include +#endif #include #include diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py index 45e3cb69af8a2..f89b812b910eb 100644 --- a/torch/utils/hipify/cuda_to_hip_mappings.py +++ b/torch/utils/hipify/cuda_to_hip_mappings.py @@ -635,7 +635,7 @@ ("cub/device/device_radix_sort.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), ("cub/device/device_reduce.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), ("cub/device/device_scan.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), - ("nvToolsExt.h", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)), + ("nvtx3/nvToolsExt.h", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)), ] )