From 39800d3d4aeff80de36d7872ed551243518e25a8 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 24 Oct 2023 15:15:37 -0700 Subject: [PATCH] Add missing CUDA 12 dependencies and fix dlopen library names (#1366) The dropping of system CTK libraries from our CUDA 12 CI images revealed that we were missing the cuda-nvcc package required to provide nvvm for numba in the Python tests. They also revealed that the list of libraries we searched to dlopen is incomplete; for CUDA 11, the SONAME of the library incorrectly includes an extra `.0` version segment, and rmm was designed to search for that, but CUDA 12 correctly has just `libcudart.so.12` and that needs to be added to the search path. We were previously getting by on finding `libcudart.so`, but the linker name is only present in conda environments if `cuda-cudart-dev` is installed, and that package should not be a runtime requirement for rmm. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Rong Ou (https://github.com/rongou) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/rmm/pull/1366 --- ci/test_python.sh | 2 ++ .../all_cuda-118_arch-x86_64.yaml | 1 + dependencies.yaml | 4 ++++ include/rmm/detail/dynamic_load_runtime.hpp | 23 +++++++++++++------ 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index d8c1fdbce..d3b3bdfd8 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -11,7 +11,9 @@ rapids-dependency-file-generator \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml rapids-mamba-retry env create --force -f env.yaml -n test +set +u conda activate test +set -u rapids-print-env diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 8571a1928..78ddf0503 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -8,6 +8,7 @@ dependencies: - clang-tools==16.0.6 - clang==16.0.6 - cmake>=3.26.4 +- cuda-nvcc - cuda-python>=11.7.1,<12.0a0 - cuda-version=11.8 - cudatoolkit diff --git a/dependencies.yaml b/dependencies.yaml index 7d9af48cf..95f275134 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -223,3 +223,7 @@ dependencies: packages: - pytest - pytest-cov + - output_types: conda + packages: + # Needed for numba in tests + - cuda-nvcc diff --git a/include/rmm/detail/dynamic_load_runtime.hpp b/include/rmm/detail/dynamic_load_runtime.hpp index b45dbae25..43626144a 100644 --- a/include/rmm/detail/dynamic_load_runtime.hpp +++ b/include/rmm/detail/dynamic_load_runtime.hpp @@ -38,13 +38,22 @@ struct dynamic_load_runtime { auto close_cudart = [](void* handle) { ::dlclose(handle); }; auto open_cudart = []() { ::dlerror(); - const int major = CUDART_VERSION / 1000; - const std::string libname_ver = "libcudart.so." + std::to_string(major) + ".0"; - const std::string libname = "libcudart.so"; - - auto ptr = ::dlopen(libname_ver.c_str(), RTLD_LAZY); - if (!ptr) { ptr = ::dlopen(libname.c_str(), RTLD_LAZY); } - if (ptr) { return ptr; } + const int major = CUDART_VERSION / 1000; + + // In CUDA 12 the SONAME is correctly defined as libcudart.12, but for + // CUDA<=11 it includes an extra 0 minor version e.g. libcudart.11.0. We + // also allow finding the linker name. + const std::string libname_ver_cuda_11 = "libcudart.so." + std::to_string(major) + ".0"; + const std::string libname_ver_cuda_12 = "libcudart.so." + std::to_string(major); + const std::string libname = "libcudart.so"; + + void* ptr = nullptr; + for (auto&& name : {libname_ver_cuda_12, libname_ver_cuda_11, libname}) { + ptr = dlopen(name.c_str(), RTLD_LAZY); + if (ptr != nullptr) break; + } + + if (ptr != nullptr) { return ptr; } RMM_FAIL("Unable to dlopen cudart"); };