From 39800d3d4aeff80de36d7872ed551243518e25a8 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 24 Oct 2023 15:15:37 -0700
Subject: [PATCH] Add missing CUDA 12 dependencies and fix dlopen library names
 (#1366)

The dropping of system CTK libraries from our CUDA 12 CI images revealed that we were missing the cuda-nvcc package required to provide nvvm for numba in the Python tests. They also revealed that the list of libraries we searched to dlopen is incomplete; for CUDA 11, the SONAME of the library incorrectly includes an extra `.0` version segment, and rmm was designed to search for that, but CUDA 12 correctly has just `libcudart.so.12` and that needs to be added to the search path. We were previously getting by on finding `libcudart.so`, but the linker name is only present in conda environments if `cuda-cudart-dev` is installed, and that package should not be a runtime requirement for rmm.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Rong Ou (https://github.com/rongou)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/rmm/pull/1366
---
 ci/test_python.sh                             |  2 ++
 .../all_cuda-118_arch-x86_64.yaml             |  1 +
 dependencies.yaml                             |  4 ++++
 include/rmm/detail/dynamic_load_runtime.hpp   | 23 +++++++++++++------
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/ci/test_python.sh b/ci/test_python.sh
index d8c1fdbce..d3b3bdfd8 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -11,7 +11,9 @@ rapids-dependency-file-generator \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
 
 rapids-mamba-retry env create --force -f env.yaml -n test
+set +u
 conda activate test
+set -u
 
 rapids-print-env
 
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 8571a1928..78ddf0503 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -8,6 +8,7 @@ dependencies:
 - clang-tools==16.0.6
 - clang==16.0.6
 - cmake>=3.26.4
+- cuda-nvcc
 - cuda-python>=11.7.1,<12.0a0
 - cuda-version=11.8
 - cudatoolkit
diff --git a/dependencies.yaml b/dependencies.yaml
index 7d9af48cf..95f275134 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -223,3 +223,7 @@ dependencies:
         packages:
           - pytest
           - pytest-cov
+      - output_types: conda
+        packages:
+          # Needed for numba in tests
+          - cuda-nvcc
diff --git a/include/rmm/detail/dynamic_load_runtime.hpp b/include/rmm/detail/dynamic_load_runtime.hpp
index b45dbae25..43626144a 100644
--- a/include/rmm/detail/dynamic_load_runtime.hpp
+++ b/include/rmm/detail/dynamic_load_runtime.hpp
@@ -38,13 +38,22 @@ struct dynamic_load_runtime {
     auto close_cudart = [](void* handle) { ::dlclose(handle); };
     auto open_cudart  = []() {
       ::dlerror();
-      const int major               = CUDART_VERSION / 1000;
-      const std::string libname_ver = "libcudart.so." + std::to_string(major) + ".0";
-      const std::string libname     = "libcudart.so";
-
-      auto ptr = ::dlopen(libname_ver.c_str(), RTLD_LAZY);
-      if (!ptr) { ptr = ::dlopen(libname.c_str(), RTLD_LAZY); }
-      if (ptr) { return ptr; }
+      const int major = CUDART_VERSION / 1000;
+
+      // In CUDA 12 the SONAME is correctly defined as libcudart.12, but for
+      // CUDA<=11 it includes an extra 0 minor version e.g. libcudart.11.0. We
+      // also allow finding the linker name.
+      const std::string libname_ver_cuda_11 = "libcudart.so." + std::to_string(major) + ".0";
+      const std::string libname_ver_cuda_12 = "libcudart.so." + std::to_string(major);
+      const std::string libname             = "libcudart.so";
+
+      void* ptr = nullptr;
+      for (auto&& name : {libname_ver_cuda_12, libname_ver_cuda_11, libname}) {
+        ptr = dlopen(name.c_str(), RTLD_LAZY);
+        if (ptr != nullptr) break;
+      }
+
+      if (ptr != nullptr) { return ptr; }
 
       RMM_FAIL("Unable to dlopen cudart");
     };