rapidsai · rapids-bot · Nov 15, 2023 · Oct 12, 2023 · Nov 2, 2023 · Nov 2, 2023
@@ -38,6 +38,28 @@ struct cuda_device_id {
   /// @briefreturn{The wrapped integer value}
   [[nodiscard]] constexpr value_type value() const noexcept { return id_; }
 
+  /**
+   * @brief Equality comparison operator
+   *
+   * @param other The other `cuda_device_id` to compare to
+   * @return true if the two `cuda_device_id`s wrap the same integer value, false otherwise
+   */
+  [[nodiscard]] constexpr bool operator==(cuda_device_id const& other) const noexcept
+  {
+    return value() == other.value();
+  }
+
+  /**
+   * @brief Inequality comparison operator
+   *
+   * @param other The other `cuda_device_id` to compare to
+   * @return true if the two `cuda_device_id`s wrap different integer values, false otherwise
+   */
+  [[nodiscard]] constexpr bool operator!=(cuda_device_id const& other) const noexcept
+  {
+    return value() != other.value();
+  }
+
  private:
   value_type id_;
 };
@@ -79,16 +101,17 @@ struct cuda_set_device_raii {
    * @param dev_id The device to set as the current CUDA device
    */
   explicit cuda_set_device_raii(cuda_device_id dev_id)
-    : old_device_{get_current_cuda_device()}, needs_reset_{old_device_.value() != dev_id.value()}
+    : old_device_{get_current_cuda_device()},
+      needs_reset_{dev_id.value() >= 0 && old_device_.value() != dev_id.value()}
   {
-    if (needs_reset_) RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value()));
+    if (needs_reset_) { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value())); }
   }
   /**
    * @brief Reactivates the previous CUDA device
    */
   ~cuda_set_device_raii() noexcept
   {
-    if (needs_reset_) RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value()));
+    if (needs_reset_) { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value())); }
   }
 
   cuda_set_device_raii(cuda_set_device_raii const&)            = delete;

@@ -15,6 +15,7 @@
  */
 #pragma once
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
@@ -104,6 +105,7 @@ class device_buffer {
                          mr::device_memory_resource* mr = mr::get_current_device_resource())
     : _stream{stream}, _mr{mr}
   {
+    cuda_set_device_raii dev{_device};
     allocate_async(size);
   }
 
@@ -132,6 +134,7 @@ class device_buffer {
                 mr::device_memory_resource* mr = mr::get_current_device_resource())
     : _stream{stream}, _mr{mr}
   {
+    cuda_set_device_raii dev{_device};
     allocate_async(size);
     copy_async(source_data, size);
   }
@@ -188,6 +191,7 @@ class device_buffer {
     other._size     = 0;
     other._capacity = 0;
     other.set_stream(cuda_stream_view{});
+    other._device = cuda_device_id{-1};
   }
 
   /**
@@ -207,18 +211,21 @@ class device_buffer {
   device_buffer& operator=(device_buffer&& other) noexcept
   {
     if (&other != this) {
+      cuda_set_device_raii dev{_device};
       deallocate_async();
 
       _data     = other._data;
       _size     = other._size;
       _capacity = other._capacity;
       set_stream(other.stream());
-      _mr = other._mr;
+      _mr     = other._mr;
+      _device = other._device;
 
       other._data     = nullptr;
       other._size     = 0;
       other._capacity = 0;
       other.set_stream(cuda_stream_view{});
+      other._device = cuda_device_id{-1};
     }
     return *this;
   }
@@ -232,6 +239,7 @@ class device_buffer {
    */
   ~device_buffer() noexcept
   {
+    cuda_set_device_raii dev{_device};
     deallocate_async();
     _mr     = nullptr;
     _stream = cuda_stream_view{};
@@ -259,6 +267,7 @@ class device_buffer {
   {
     set_stream(stream);
     if (new_capacity > capacity()) {
+      cuda_set_device_raii dev{_device};
       auto tmp            = device_buffer{new_capacity, stream, _mr};
       auto const old_size = size();
       RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value()));
@@ -300,6 +309,7 @@ class device_buffer {
     if (new_size <= capacity()) {
       _size = new_size;
     } else {
+      cuda_set_device_raii dev{_device};
       auto tmp = device_buffer{new_size, stream, _mr};
       RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value()));
       *this = std::move(tmp);
@@ -323,6 +333,7 @@ class device_buffer {
   {
     set_stream(stream);
     if (size() != capacity()) {
+      cuda_set_device_raii dev{_device};
       // Invoke copy ctor on self which only copies `[0, size())` and swap it
       // with self. The temporary `device_buffer` will hold the old contents
       // which will then be destroyed
@@ -404,6 +415,7 @@ class device_buffer {
   mr::device_memory_resource* _mr{
     mr::get_current_device_resource()};  ///< The memory resource used to
                                          ///< allocate/deallocate device memory
+  cuda_device_id _device{get_current_cuda_device()};
 
   /**
    * @brief Allocates the specified amount of memory and updates the size/capacity accordingly.
@@ -454,6 +466,7 @@ class device_buffer {
   {
     if (bytes > 0) {
       RMM_EXPECTS(nullptr != source, "Invalid copy from nullptr.");
+      RMM_EXPECTS(nullptr != _data, "Invalid copy from nullptr.");
 
       RMM_CUDA_TRY(cudaMemcpyAsync(_data, source, bytes, cudaMemcpyDefault, stream().value()));
     }

@@ -186,4 +186,7 @@ ConfigureTest(BINNING_MR_TEST mr/device/binning_mr_tests.cpp)
 # callback memory resource tests
 ConfigureTest(CALLBACK_MR_TEST mr/device/callback_mr_tests.cpp)
 
+# container multidevice tests
+ConfigureTest(CONTAINER_MULTIDEVICE_TEST container_multidevice_tests.cu)
+
 rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing DESTINATION bin/gtests/librmm)
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device_check_resource_adaptor.hpp"
+#include "rmm/mr/device/per_device_resource.hpp"
+
+#include <rmm/cuda_stream.hpp>
+#include <rmm/device_buffer.hpp>
+#include <rmm/device_scalar.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <gtest/gtest.h>
+
+#include <type_traits>
+
+template <typename ContainerType>
+struct ContainerMultiDeviceTest : public ::testing::Test {};
+
+using containers =
+  ::testing::Types<rmm::device_buffer, rmm::device_uvector<int>, rmm::device_scalar<int>>;
+
+TYPED_TEST_CASE(ContainerMultiDeviceTest, containers);
+
+TYPED_TEST(ContainerMultiDeviceTest, CreateDestroyDifferentActiveDevice)
+{
+  // Get the number of cuda devices
+  int num_devices = rmm::get_num_cuda_devices();
+
+  // only run on multidevice systems
+  if (num_devices >= 2) {
+    rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
+    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto check_mr = device_check_resource_adaptor{orig_mr};
+    rmm::mr::set_current_device_resource(&check_mr);
+
+    {
+      if constexpr (std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
+        auto buf = TypeParam(rmm::cuda_stream_view{});
+        RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(1));  // force dtor with different active device
+      } else {
+        auto buf = TypeParam(128, rmm::cuda_stream_view{});
+        RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(1));  // force dtor with different active device
+      }
+    }
+
+    RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
+    rmm::mr::set_current_device_resource(orig_mr);
+  }
+}
+
+TYPED_TEST(ContainerMultiDeviceTest, CreateMoveDestroyDifferentActiveDevice)
+{
+  // Get the number of cuda devices
+  int num_devices = rmm::get_num_cuda_devices();
+
+  // only run on multidevice systems
+  if (num_devices >= 2) {
+    rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
+    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto check_mr = device_check_resource_adaptor{orig_mr};
+    rmm::mr::set_current_device_resource(&check_mr);
+
+    {
+      auto buf_1 = []() {
+        if constexpr (std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
+          return TypeParam(rmm::cuda_stream_view{});
+        } else {
+          return TypeParam(128, rmm::cuda_stream_view{});
+        }
+      }();
+
+      {
+        if constexpr (std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
+          // device_vector does not have a constructor that takes a stream
+          auto buf_0 = TypeParam(rmm::cuda_stream_view{});
+          buf_1      = std::move(buf_0);
+        } else {
+          auto buf_0 = TypeParam(128, rmm::cuda_stream_view{});
+          buf_1      = std::move(buf_0);
+        }
+      }
+
+      RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(1));  // force dtor with different active device
+    }
+
+    RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
+    rmm::mr::set_current_device_resource(orig_mr);
+  }
+}
+
+TYPED_TEST(ContainerMultiDeviceTest, ResizeDifferentActiveDevice)
+{
+  // Get the number of cuda devices
+  int num_devices = rmm::get_num_cuda_devices();
+
+  // only run on multidevice systems
+  if (num_devices >= 2) {
+    rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
+    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto check_mr = device_check_resource_adaptor{orig_mr};
+    rmm::mr::set_current_device_resource(&check_mr);
+
+    if constexpr (not std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
+      auto buf = TypeParam(128, rmm::cuda_stream_view{});
+      RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(1));  // force resize with different active device
+      buf.resize(1024, rmm::cuda_stream_view{});
+    }
+
+    RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
+    rmm::mr::set_current_device_resource(orig_mr);
+  }
+}
+
+TYPED_TEST(ContainerMultiDeviceTest, ShrinkDifferentActiveDevice)
+{
+  // Get the number of cuda devices
+  int num_devices = rmm::get_num_cuda_devices();
+
+  // only run on multidevice systems
+  if (num_devices >= 2) {
+    rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
+    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto check_mr = device_check_resource_adaptor{orig_mr};
+    rmm::mr::set_current_device_resource(&check_mr);
+
+    if constexpr (not std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
+      auto buf = TypeParam(128, rmm::cuda_stream_view{});
+      RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(1));  // force resize with different active device
+      buf.resize(64, rmm::cuda_stream_view{});
+      buf.shrink_to_fit(rmm::cuda_stream_view{});
+    }
+
+    RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
+    rmm::mr::set_current_device_resource(orig_mr);
+  }
+}
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <gtest/gtest.h>
-
 #include <rmm/cuda_stream.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
@@ -29,9 +27,12 @@
 
 #include <thrust/equal.h>
 #include <thrust/sequence.h>
+
+#include <gtest/gtest.h>
+
 namespace testing {
 namespace thrust = THRUST_NS_QUALIFIER;
-}
+}  // namespace testing
 using namespace testing;
 
 #include <cuda_runtime_api.h>