diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 78ddf0503..3a5cb90af 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -4,6 +4,7 @@ channels:
 - rapidsai
 - conda-forge
 dependencies:
+- breathe
 - c-compiler
 - clang-tools==16.0.6
 - clang==16.0.6
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index aec31b3db..f87564b74 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -4,6 +4,7 @@ channels:
 - rapidsai
 - conda-forge
 dependencies:
+- breathe
 - c-compiler
 - clang-tools==16.0.6
 - clang==16.0.6
diff --git a/dependencies.yaml b/dependencies.yaml
index 95f275134..af3f49ef0 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -174,6 +174,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
+          - breathe
           - *doxygen
           - graphviz
           - ipython
diff --git a/include/rmm/cuda_stream_view.hpp b/include/rmm/cuda_stream_view.hpp
index fe07fa1b9..7809140fb 100644
--- a/include/rmm/cuda_stream_view.hpp
+++ b/include/rmm/cuda_stream_view.hpp
@@ -125,6 +125,9 @@ static const cuda_stream_view cuda_stream_per_thread{
   cudaStreamPerThread  // NOLINT(cppcoreguidelines-pro-type-cstyle-cast)
 };
 
+// Need to avoid putting is_per_thread_default and is_default into the group twice.
+/** @} */  // end of group
+
 [[nodiscard]] inline bool cuda_stream_view::is_per_thread_default() const noexcept
 {
 #ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM
@@ -134,9 +137,6 @@ static const cuda_stream_view cuda_stream_per_thread{
 #endif
 }
 
-/**
- * @brief Return true if the wrapped stream is explicitly the CUDA legacy default stream.
- */
 [[nodiscard]] inline bool cuda_stream_view::is_default() const noexcept
 {
 #ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM
@@ -146,6 +146,11 @@ static const cuda_stream_view cuda_stream_per_thread{
 #endif
 }
 
+/**
+ * @addtogroup cuda_streams
+ * @{
+ */
+
 /**
  * @brief Equality comparison operator for streams
  *
diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
index 930cda157..982d2095d 100644
--- a/include/rmm/device_uvector.hpp
+++ b/include/rmm/device_uvector.hpp
@@ -44,7 +44,7 @@ namespace rmm {
  * `thrust::uninitialized_fill`.
  *
  * Example:
- * @code{c++}
+ * @code{.cpp}
  * rmm::mr::device_memory_resource * mr = new my_custom_resource();
  * rmm::cuda_stream_view s{};
  *
diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp
index ce0abc23b..6213221ab 100644
--- a/include/rmm/logger.hpp
+++ b/include/rmm/logger.hpp
@@ -98,7 +98,9 @@ struct bytes {
 
 /**
  * @brief Returns the global RMM logger
- * @addtogroup logging
+ *
+ * @ingroup logging
+ *
  * This is a spdlog logger. The easiest way to log messages is to use the `RMM_LOG_*` macros.
  *
  * @return spdlog::logger& The logger.
diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp
index 527cd3875..de31c7dc4 100644
--- a/include/rmm/mr/device/cuda_async_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp
@@ -75,7 +75,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
    * If the pool size grows beyond the release threshold, unused memory held by the pool will be
    * released at the next synchronization event.
    *
-   * @throws rmm::runtime_error if the CUDA version does not support `cudaMallocAsync`
+   * @throws rmm::logic_error if the CUDA version does not support `cudaMallocAsync`
    *
    * @param initial_pool_size Optional initial size in bytes of the pool. If no value is provided,
    * initial pool size is half of the available GPU memory.
diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp
index 355042092..bda52ac67 100644
--- a/include/rmm/mr/device/device_memory_resource.hpp
+++ b/include/rmm/mr/device/device_memory_resource.hpp
@@ -75,7 +75,7 @@ namespace rmm::mr {
  * pool_memory_resource objects for each device and sets them as the per-device resource for that
  * device.
  *
- * @code{c++}
+ * @code{.cpp}
  * std::vector<unique_ptr<pool_memory_resource>> per_device_pools;
  * for(int i = 0; i < N; ++i) {
  *   cudaSetDevice(i);
diff --git a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
index 73fd5f3b8..f8cbe8597 100644
--- a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
+++ b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
@@ -60,7 +60,7 @@ using failure_callback_t = std::function<bool(std::size_t, void*)>;
  * When implementing a callback function for allocation retry, care must be taken to avoid an
  * infinite loop. The following example makes sure to only retry the allocation once:
  *
- * @code{c++}
+ * @code{.cpp}
  * using failure_callback_adaptor =
  *   rmm::mr::failure_callback_resource_adaptor<rmm::mr::device_memory_resource>;
  *
diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index aa7217758..139389f0c 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -61,7 +61,7 @@
  * pool_memory_resource objects for each device and sets them as the per-device resource for that
  * device.
  *
- * @code{c++}
+ * @code{.cpp}
  * std::vector<unique_ptr<pool_memory_resource>> per_device_pools;
  * for(int i = 0; i < N; ++i) {
  *   cudaSetDevice(i);
@@ -72,6 +72,10 @@
  */
 
 namespace rmm::mr {
+/**
+ * @addtogroup memory_resources
+ * @{
+ */
 
 namespace detail {
 
@@ -233,4 +237,5 @@ inline device_memory_resource* set_current_device_resource(device_memory_resourc
 {
   return set_per_device_resource(rmm::get_current_cuda_device(), new_mr);
 }
+/** @} */  // end of group
 }  // namespace rmm::mr
diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp
index a52ec14d1..c3b8ac6ea 100644
--- a/include/rmm/mr/device/polymorphic_allocator.hpp
+++ b/include/rmm/mr/device/polymorphic_allocator.hpp
@@ -133,7 +133,7 @@ bool operator!=(polymorphic_allocator<T> const& lhs, polymorphic_allocator<U> co
  *`deallocate` functions.
  *
  * Example:
- *\code{c++}
+ *\code{.cpp}
  * my_stream_ordered_allocator<int> a{...};
  * cuda_stream_view s = // create stream;
  *
diff --git a/python/docs/conf.py b/python/docs/conf.py
index d8c7460dc..c55d998d6 100644
--- a/python/docs/conf.py
+++ b/python/docs/conf.py
@@ -11,6 +11,7 @@
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 import os
+import re
 
 # -- Project information -----------------------------------------------------
 
@@ -46,8 +47,12 @@
     "IPython.sphinxext.ipython_directive",
     "nbsphinx",
     "recommonmark",
+    "breathe",
 ]
 
+# Breathe Configuration
+breathe_projects = {"librmm": "../../doxygen/xml"}
+breathe_default_project = "librmm"
 
 copybutton_prompt_text = ">>> "
 
@@ -197,9 +202,72 @@
 ]
 
 
+def on_missing_reference(app, env, node, contnode):
+    if (refid := node.get("refid")) is not None and "hpp" in refid:
+        # We don't want to link to C++ header files directly from the
+        # Sphinx docs, those are pages that doxygen automatically
+        # generates. Adding those would clutter the Sphinx output.
+        return contnode
+
+    names_to_skip = [
+        # External names
+        "cudaStream_t",
+        "cudaStreamLegacy",
+        "cudaStreamPerThread",
+        "thrust",
+        "spdlog",
+        # Unknown types
+        "int64_t",
+        "int8_t",
+        # Internal objects
+        "detail",
+        "RMM_EXEC_CHECK_DISABLE",
+        "default_alignment_threshold",
+        "get_default_filename",
+        # Template types
+        "Base",
+    ]
+    if (
+        node["refdomain"] == "cpp"
+        and (reftarget := node.get("reftarget")) is not None
+    ):
+        if any(toskip in reftarget for toskip in names_to_skip):
+            return contnode
+
+        # Strip template parameters and just use the base type.
+        if match := re.search("(.*)<.*>", reftarget):
+            reftarget = match.group(1)
+
+        # Try to find the target prefixed with e.g. namespaces in case that's
+        # all that's missing. Include the empty prefix in case we're searching
+        # for a stripped template.
+        extra_prefixes = ["rmm::", "rmm::mr::", "mr::", ""]
+        for (name, dispname, type, docname, anchor, priority) in env.domains[
+            "cpp"
+        ].get_objects():
+
+            for prefix in extra_prefixes:
+                if (
+                    name == f"{prefix}{reftarget}"
+                    or f"{prefix}{name}" == reftarget
+                ):
+                    return env.domains["cpp"].resolve_xref(
+                        env,
+                        docname,
+                        app.builder,
+                        node["reftype"],
+                        name,
+                        node,
+                        contnode,
+                    )
+
+    return None
+
+
 def setup(app):
     app.add_js_file("copybutton_pydocs.js")
     app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
     app.add_js_file(
         "https://docs.rapids.ai/assets/js/custom.js", loading_method="defer"
     )
+    app.connect("missing-reference", on_missing_reference)
diff --git a/python/docs/cpp.rst b/python/docs/cpp.rst
new file mode 100644
index 000000000..e60f15129
--- /dev/null
+++ b/python/docs/cpp.rst
@@ -0,0 +1,8 @@
+Welcome to the rmm C++ documentation!
+========================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   cpp_api.rst
diff --git a/python/docs/cpp_api.rst b/python/docs/cpp_api.rst
new file mode 100644
index 000000000..63e435190
--- /dev/null
+++ b/python/docs/cpp_api.rst
@@ -0,0 +1,8 @@
+API Reference
+=============
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   librmm_docs/index
diff --git a/python/docs/basics.md b/python/docs/guide.md
similarity index 99%
rename from python/docs/basics.md
rename to python/docs/guide.md
index 997745f00..c06135ca8 100644
--- a/python/docs/basics.md
+++ b/python/docs/guide.md
@@ -1,4 +1,4 @@
-# RMM - the RAPIDS Memory Manager
+# User Guide
 
 Achieving optimal performance in GPU-centric workflows frequently requires
 customizing how GPU ("device") memory is allocated.
diff --git a/python/docs/index.rst b/python/docs/index.rst
index 8dffdeb02..524351253 100644
--- a/python/docs/index.rst
+++ b/python/docs/index.rst
@@ -10,8 +10,8 @@ Welcome to rmm's documentation!
    :maxdepth: 2
    :caption: Contents:
 
-   basics.md
-   api.rst
+   Python <python.rst>
+   C++ <cpp.rst>
 
 
 Indices and tables
diff --git a/python/docs/librmm_docs/cuda_device_management.rst b/python/docs/librmm_docs/cuda_device_management.rst
new file mode 100644
index 000000000..096509886
--- /dev/null
+++ b/python/docs/librmm_docs/cuda_device_management.rst
@@ -0,0 +1,5 @@
+CUDA Device Management
+======================
+
+.. doxygengroup:: cuda_device_management
+   :members:
diff --git a/python/docs/librmm_docs/cuda_streams.rst b/python/docs/librmm_docs/cuda_streams.rst
new file mode 100644
index 000000000..e3e6ed793
--- /dev/null
+++ b/python/docs/librmm_docs/cuda_streams.rst
@@ -0,0 +1,5 @@
+CUDA Streams
+============
+
+.. doxygengroup:: cuda_streams
+   :members:
diff --git a/python/docs/librmm_docs/data_containers.rst b/python/docs/librmm_docs/data_containers.rst
new file mode 100644
index 000000000..1cfccb7bd
--- /dev/null
+++ b/python/docs/librmm_docs/data_containers.rst
@@ -0,0 +1,5 @@
+Data Containers
+===============
+
+.. doxygengroup:: data_containers
+   :members:
diff --git a/python/docs/librmm_docs/errors.rst b/python/docs/librmm_docs/errors.rst
new file mode 100644
index 000000000..4de0ee40a
--- /dev/null
+++ b/python/docs/librmm_docs/errors.rst
@@ -0,0 +1,5 @@
+Errors
+======
+
+.. doxygengroup:: errors
+   :members:
diff --git a/python/docs/librmm_docs/index.rst b/python/docs/librmm_docs/index.rst
new file mode 100644
index 000000000..6afd94d2e
--- /dev/null
+++ b/python/docs/librmm_docs/index.rst
@@ -0,0 +1,29 @@
+.. rmm documentation master file, created by
+   sphinx-quickstart on Thu Nov 19 13:16:00 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+librmm Documentation
+====================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   memory_resources
+   data_containers
+   thrust_integrations
+   cuda_device_management
+   cuda_streams
+   errors
+   logging
+
+
+.. doxygennamespace:: rmm
+   :desc-only:
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
diff --git a/python/docs/librmm_docs/logging.rst b/python/docs/librmm_docs/logging.rst
new file mode 100644
index 000000000..97f961c0c
--- /dev/null
+++ b/python/docs/librmm_docs/logging.rst
@@ -0,0 +1,5 @@
+Logging
+=======
+
+.. doxygengroup:: logging
+   :members:
diff --git a/python/docs/librmm_docs/memory_resources.rst b/python/docs/librmm_docs/memory_resources.rst
new file mode 100644
index 000000000..68d33f003
--- /dev/null
+++ b/python/docs/librmm_docs/memory_resources.rst
@@ -0,0 +1,17 @@
+Memory Resources
+================
+
+.. doxygennamespace:: rmm::mr
+   :desc-only:
+
+.. doxygengroup:: memory_resources
+   :members:
+
+.. doxygengroup:: device_memory_resources
+   :members:
+
+.. doxygengroup:: host_memory_resources
+   :members:
+
+.. doxygengroup:: device_resource_adaptors
+   :members:
diff --git a/python/docs/librmm_docs/thrust_integrations.rst b/python/docs/librmm_docs/thrust_integrations.rst
new file mode 100644
index 000000000..8574f482a
--- /dev/null
+++ b/python/docs/librmm_docs/thrust_integrations.rst
@@ -0,0 +1,5 @@
+Thrust Integration
+==================
+
+.. doxygengroup:: thrust_integrations
+   :members:
diff --git a/python/docs/python.rst b/python/docs/python.rst
new file mode 100644
index 000000000..bff919627
--- /dev/null
+++ b/python/docs/python.rst
@@ -0,0 +1,9 @@
+Welcome to the rmm Python documentation!
+========================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   guide.md
+   python_api.rst
diff --git a/python/docs/api.rst b/python/docs/python_api.rst
similarity index 100%
rename from python/docs/api.rst
rename to python/docs/python_api.rst