From fae33fa753177dd3fc7108eb751aca55b89185eb Mon Sep 17 00:00:00 2001
From: Mark Harris <783069+harrism@users.noreply.github.com>
Date: Tue, 28 Nov 2023 03:20:53 +0000
Subject: [PATCH] Add host_pinned_memory_resource and tests.

---
 .../rmm/mr/host_pinned_memory_resource.hpp    | 243 ++++++++++++++++++
 tests/mr/device/mr_test.hpp                   |  12 +-
 tests/mr/device/mr_tests.cpp                  |   2 +
 3 files changed, 256 insertions(+), 1 deletion(-)
 create mode 100644 include/rmm/mr/host_pinned_memory_resource.hpp
diff --git a/include/rmm/mr/host_pinned_memory_resource.hpp b/include/rmm/mr/host_pinned_memory_resource.hpp
new file mode 100644
index 000000000..d12f74f9e
--- /dev/null
+++ b/include/rmm/mr/host_pinned_memory_resource.hpp
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/detail/error.hpp>
+
+#include <cuda/memory_resource>
+
+#include <cuda_runtime_api.h>
+
+#include <cstddef>
+
+namespace rmm::mr {
+
+/**
+ * @brief Memory resource class for allocating pinned host memory.
+ *
+ * This class uses CUDA's `cudaHostAlloc` to allocate pinned host memory. It implements the
+ * `cuda::mr::memory_resource` and `cuda::mr::device_memory_resource` concepts, and
+ * the `cuda::mr::host_accessible` and `cuda::mr::device_accessible` properties.
+ */
+class pinned_host_memory_resource {
+ public:
+  // Disable clang-tidy complaining about the easily swappable size and alignment parameters
+  // of allocate and deallocate
+  // NOLINTBEGIN(bugprone-easily-swappable-parameters)
+
+  /**
+   * @brief Allocates pinned host memory of size at least \p bytes bytes.
+   *
+   * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a
+   * CUDA out of memory error.
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other
+   * reason.
+   *
+   * @param bytes The size, in bytes, of the allocation.
+   * @return Pointer to the newly allocated memory.
+   */
+  static void* allocate(std::size_t bytes)
+  {
+    void* ptr{nullptr};
+    RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, bytes, cudaHostAllocDefault));
+    return ptr;
+  }
+  /**
+   * @brief Allocates pinned host memory of size at least \p bytes bytes.
+   *
+   * @todo Alignment is not implemented yet.
+   *
+   * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a
+   * CUDA out of memory error.
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other
+   * reason.
+   *
+   * @param bytes The size, in bytes, of the allocation.
+   * @param alignment Alignment in bytes.
+   * @return Pointer to the newly allocated memory.
+   */
+  static void* allocate(std::size_t bytes, [[maybe_unused]] std::size_t alignment)
+  {
+    return allocate(bytes);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes.
+   *
+   * @throws Nothing.
+   *
+   * @param ptr Pointer to be deallocated.
+   * @param bytes Size of the allocation.
+   */
+  static void deallocate(void* ptr, [[maybe_unused]] std::size_t bytes) noexcept
+  {
+    RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr));
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes and alignment \p
+   * alignment bytes.
+   *
+   * @todo Alignment is not implemented yet.
+   *
+   * @throws Nothing.
+   *
+   * @param ptr Pointer to be deallocated.
+   * @param bytes Size of the allocation.
+   * @param alignment Alignment in bytes.
+   */
+  static void deallocate(void* ptr, std::size_t bytes, std::size_t) noexcept
+  {
+    return deallocate(ptr, bytes);
+  }
+
+  /**
+   * @brief Allocates pinned host memory of size at least \p bytes bytes.
+   *
+   * @note Stream argument is ignored and behavior is identical to allocate.
+   *
+   * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a
+   * CUDA out of memory error.
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other
+   * error.
+   *
+   * @param bytes The size, in bytes, of the allocation.
+   * @param stream CUDA stream on which to perform the allocation (ignored).
+   * @return Pointer to the newly allocated memory.
+   */
+  static void* allocate_async(std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream)
+  {
+    return allocate(bytes);
+  }
+
+  /**
+   * @brief Allocates pinned host memory of size at least \p bytes bytes and alignment \p alignment.
+   *
+   * @note Stream argument is ignored and behavior is identical to allocate.
+   *
+   * @todo Alignment is not implemented yet.
+   *
+   * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a
+   * CUDA out of memory error.
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other
+   * error.
+   *
+   * @param bytes The size, in bytes, of the allocation.
+   * @param alignment Alignment in bytes.
+   * @param stream CUDA stream on which to perform the allocation (ignored).
+   * @return Pointer to the newly allocated memory.
+   */
+  static void* allocate_async(std::size_t bytes,
+                              std::size_t alignment,
+                              [[maybe_unused]] cuda::stream_ref stream)
+  {
+    return allocate(bytes, alignment);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes.
+   *
+   * @note Stream argument is ignored and behavior is identical to deallocate.
+   *
+   * @throws Nothing.
+   *
+   * @param ptr Pointer to be deallocated.
+   * @param bytes Size of the allocation.
+   * @param stream CUDA stream on which to perform the deallocation (ignored).
+   */
+  static void deallocate_async(void* ptr,
+                               std::size_t bytes,
+                               [[maybe_unused]] cuda::stream_ref stream) noexcept
+  {
+    return deallocate(ptr, bytes);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes and alignment \p
+   * alignment bytes.
+   *
+   * @note Stream argument is ignored and behavior is identical to deallocate.
+   *
+   * @todo Alignment is not implemented yet.
+   *
+   * @throws Nothing.
+   *
+   * @param ptr Pointer to be deallocated.
+   * @param bytes Size of the allocation.
+   * @param alignment Alignment in bytes.
+   * @param stream CUDA stream on which to perform the deallocation (ignored).
+   */
+  static void deallocate_async(void* ptr,
+                               std::size_t bytes,
+                               std::size_t alignment,
+                               [[maybe_unused]] cuda::stream_ref stream) noexcept
+  {
+    return deallocate(ptr, bytes, alignment);
+  }
+  // NOLINTEND(bugprone-easily-swappable-parameters)
+
+  /**
+   * @briefreturn returns true if the specified resource is the same type as this resource, else
+   * false.
+   */
+  bool operator==(const pinned_host_memory_resource&) const { return true; }
+
+  /**
+   * @briefreturn returns true if the specified resource is not the same type as this resource, else
+   * false.
+   */
+  bool operator!=(const pinned_host_memory_resource&) const { return false; }
+
+  /**
+   * @brief Query whether the resource supports reporting free and available memory.
+   *
+   * @return false
+   */
+  static bool supports_get_mem_info() { return false; }
+
+  /**
+   * @brief Query the total amount of memory and free memory available for allocation by this
+   * resource.
+   *
+   * @throws nothing
+   *
+   * @return std::pair containing 0 for both total and free memory.
+   */
+  [[nodiscard]] static std::pair<std::size_t, std::size_t> get_mem_info(cuda::stream_ref) noexcept
+  {
+    return {0, 0};
+  }
+
+  /**
+   * @brief Enables the `cuda::mr::device_accessible` property
+   *
+   * This property declares that a `pinned_host_memory_resource` provides device accessible memory
+   */
+  friend void get_property(pinned_host_memory_resource const&, cuda::mr::device_accessible) noexcept
+  {
+  }
+
+  /**
+   * @brief Enables the `cuda::mr::host_accessible` property
+   *
+   * This property declares that a `pinned_host_memory_resource` provides host accessible memory
+   */
+  friend void get_property(pinned_host_memory_resource const&, cuda::mr::host_accessible) noexcept
+  {
+  }
+};
+
+}  // namespace rmm::mr
diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp
index 03f880e72..fb9b9dd67 100644
--- a/tests/mr/device/mr_test.hpp
+++ b/tests/mr/device/mr_test.hpp
@@ -31,6 +31,7 @@
 #include <rmm/mr/device/owning_wrapper.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
+#include <rmm/mr/host_pinned_memory_resource.hpp>
 
 #include <gtest/gtest.h>
 
@@ -52,7 +53,8 @@ inline bool is_device_memory(void* ptr)
 {
   cudaPointerAttributes attributes{};
   if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; }
-  return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged);
+  return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged) or
+         ((attributes.type == cudaMemoryTypeHost) and (attributes.devicePointer != nullptr));
 }
 
 enum size_in_bytes : size_t {};
@@ -245,6 +247,8 @@ struct mr_allocation_test : public mr_test {};
 /// MR factory functions
 inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
 
+inline auto make_host_pinned() { return std::make_shared<rmm::mr::pinned_host_memory_resource>(); }
+
 inline auto make_cuda_async()
 {
   if (rmm::detail::async_alloc::is_supported()) {
@@ -260,6 +264,12 @@ inline auto make_pool()
   return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
 }
 
+inline auto make_host_pinned_pool()
+{
+  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
+    make_host_pinned(), 2_GiB, 8_GiB);
+}
+
 inline auto make_arena()
 {
   return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda());
diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp
index f6141e90f..bf513adda 100644
--- a/tests/mr/device/mr_tests.cpp
+++ b/tests/mr/device/mr_tests.cpp
@@ -31,6 +31,7 @@ INSTANTIATE_TEST_SUITE_P(ResourceTests,
 #endif
                                            mr_factory{"Managed", &make_managed},
                                            mr_factory{"Pool", &make_pool},
+                                           mr_factory{"HostPinnedPool", &make_host_pinned_pool},
                                            mr_factory{"Arena", &make_arena},
                                            mr_factory{"Binning", &make_binning},
                                            mr_factory{"Fixed_Size", &make_fixed_size}),
@@ -45,6 +46,7 @@ INSTANTIATE_TEST_SUITE_P(ResourceAllocationTests,
 #endif
                                            mr_factory{"Managed", &make_managed},
                                            mr_factory{"Pool", &make_pool},
+                                           mr_factory{"HostPinnedPool", &make_host_pinned_pool},
                                            mr_factory{"Arena", &make_arena},
                                            mr_factory{"Binning", &make_binning}),
                          [](auto const& info) { return info.param.name; });