Merge branch 'branch-25.02' into 17775

rapidsai · Jan 24, 2025 · d08d8c2 · d08d8c2
2 parents 47ad3ca + 77efd9b
commit d08d8c2
Show file tree

Hide file tree

Showing 50 changed files with 682 additions and 740 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,4 +1,13 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
+
+ci:
+  autofix_commit_msg: "[pre-commit.ci] auto code formatting"
+  autofix_prs: false
+  autoupdate_branch: ""
+  autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
+  autoupdate_schedule: quarterly
+  skip: ["verify-alpha-spec"]
+  submodules: false
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks

diff --git a/ci/run_cudf_memcheck_ctests.sh b/ci/run_cudf_memcheck_ctests.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 set -uo pipefail
 
@@ -10,6 +10,7 @@ trap "EXITCODE=1" ERR
 cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/gtests/libcudf/";
 
 export GTEST_CUDF_RMM_MODE=cuda
+export GTEST_BRIEF=1
 # compute-sanitizer bug 4553815
 export LIBCUDF_MEMCHECK_ENABLED=1
 for gt in ./*_TEST ; do
@@ -18,6 +19,7 @@ for gt in ./*_TEST ; do
   echo "Running compute-sanitizer on $test_name"
   compute-sanitizer --tool memcheck ${gt} "$@"
 done
+unset GTEST_BRIEF
 unset GTEST_CUDF_RMM_MODE
 unset LIBCUDF_MEMCHECK_ENABLED
 

diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -56,54 +56,46 @@ static void BM_ast_transform(nvbench::state& state)
   auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
 
   // Create table data
-  auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
+  auto const num_columns = reuse_columns ? 1 : tree_levels + 1;
   auto const source_table =
-    create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols),
+    create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, num_columns),
                           row_count{num_rows},
                           Nullable ? std::optional<double>{0.5} : std::nullopt);
   auto table = source_table->view();
 
+  cudf::ast::tree tree;
+
   // Create column references
-  auto column_refs = std::vector<cudf::ast::column_reference>();
-  std::transform(thrust::make_counting_iterator(0),
-                 thrust::make_counting_iterator(n_cols),
-                 std::back_inserter(column_refs),
-                 [](auto const& column_id) {
-                   return cudf::ast::column_reference(reuse_columns ? 0 : column_id);
-                 });
+  std::for_each(
+    thrust::make_counting_iterator(0),
+    thrust::make_counting_iterator(num_columns),
+    [&](int column_id) { tree.push(cudf::ast::column_reference(reuse_columns ? 0 : column_id)); });
 
   // Create expression trees
 
-  // Note that a std::list is required here because of its guarantees against reference invalidation
-  // when items are added or removed. References to items in a std::vector are not safe if the
-  // vector must re-allocate.
-  auto expressions = std::list<cudf::ast::operation>();
-
   // Construct tree that chains additions like (((a + b) + c) + d)
   auto const op = cudf::ast::ast_operator::ADD;
   if (reuse_columns) {
-    expressions.push_back(cudf::ast::operation(op, column_refs.at(0), column_refs.at(0)));
+    tree.push(cudf::ast::operation(op, tree.at(0), tree.at(0)));
     for (cudf::size_type i = 0; i < tree_levels - 1; i++) {
-      expressions.push_back(cudf::ast::operation(op, expressions.back(), column_refs.at(0)));
+      tree.push(cudf::ast::operation(op, tree.back(), tree.at(0)));
     }
   } else {
-    expressions.push_back(cudf::ast::operation(op, column_refs.at(0), column_refs.at(1)));
-    std::transform(std::next(column_refs.cbegin(), 2),
-                   column_refs.cend(),
-                   std::back_inserter(expressions),
-                   [&](auto const& column_ref) {
-                     return cudf::ast::operation(op, expressions.back(), column_ref);
-                   });
+    tree.push(cudf::ast::operation(op, tree.at(0), tree.at(1)));
+    std::for_each(
+      thrust::make_counting_iterator(2),
+      thrust::make_counting_iterator(num_columns),
+      [&](int col_id) { tree.push(cudf::ast::operation(op, tree.back(), tree.at(col_id))); });
   }
 
-  auto const& expression_tree_root = expressions.back();
+  auto const& root_expression = tree.back();
 
   // Use the number of bytes read from global memory
   state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows) * (tree_levels + 1));
   state.add_global_memory_writes<key_type>(num_rows);
 
   state.exec(nvbench::exec_tag::sync,
-             [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
+             [&](nvbench::launch&) { cudf::compute_column(table, root_expression); });
 }
 
 template <cudf::ast::ast_operator cmp_op, cudf::ast::ast_operator reduce_op>
@@ -117,10 +109,10 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
   CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons");
 
   // Create table data
-  auto const num_cols = tree_levels * 2;
+  auto const num_columns = tree_levels * 2;
   std::vector<std::unique_ptr<cudf::column>> columns;
   std::for_each(
-    thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) {
+    thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_columns), [&](size_t) {
       columns.emplace_back(create_string_column(num_rows, string_width, hit_rate));
     });
 
@@ -135,38 +127,36 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
       return size + cudf::strings_column_view{column}.chars_size(cudf::get_default_stream());
     });
 
-  // Create column references
-  auto column_refs = std::vector<cudf::ast::column_reference>();
-  std::transform(thrust::make_counting_iterator(0),
-                 thrust::make_counting_iterator(num_cols),
-                 std::back_inserter(column_refs),
-                 [](auto const& column_id) { return cudf::ast::column_reference(column_id); });
+  // Create expression tree
+  cudf::ast::tree tree;
 
-  // Create expression trees
-  std::list<cudf::ast::operation> expressions;
+  // Create column references
+  std::for_each(thrust::make_counting_iterator(0),
+                thrust::make_counting_iterator(num_columns),
+                [&](int column_id) { tree.push(cudf::ast::column_reference{column_id}); });
 
   // Construct AST tree (a == b && c == d && e == f && ...)
 
-  expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1]));
+  tree.push(cudf::ast::operation(cmp_op, tree[0], tree[1]));
 
   std::for_each(thrust::make_counting_iterator(1),
                 thrust::make_counting_iterator(tree_levels),
                 [&](size_t idx) {
-                  auto const& lhs = expressions.back();
-                  auto const& rhs = expressions.emplace_back(
-                    cudf::ast::operation(cmp_op, column_refs[idx * 2], column_refs[idx * 2 + 1]));
-                  expressions.emplace_back(cudf::ast::operation(reduce_op, lhs, rhs));
+                  auto const& lhs = tree.back();
+                  auto const& rhs =
+                    tree.push(cudf::ast::operation(cmp_op, tree[idx * 2], tree[idx * 2 + 1]));
+                  tree.push(cudf::ast::operation(reduce_op, lhs, rhs));
                 });
 
-  auto const& expression_tree_root = expressions.back();
-
   // Use the number of bytes read from global memory
   state.add_element_count(chars_size, "chars_size");
   state.add_global_memory_reads<nvbench::uint8_t>(chars_size);
   state.add_global_memory_writes<nvbench::int32_t>(num_rows);
 
+  auto const& expression = tree.back();
+
   state.exec(nvbench::exec_tag::sync,
-             [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
+             [&](nvbench::launch&) { cudf::compute_column(table, expression); });
 }
 
 #define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \

diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt
@@ -1,12 +1,11 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 cmake_minimum_required(VERSION 3.26.4)
 
 include(../set_cuda_architecture.cmake)
 
 # initialize cuda architecture
 rapids_cuda_init_architectures(basic_example)
-rapids_cuda_set_architectures(RAPIDS)
 
 project(
   basic_example

diff --git a/cpp/examples/billion_rows/CMakeLists.txt b/cpp/examples/billion_rows/CMakeLists.txt
@@ -1,12 +1,11 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 cmake_minimum_required(VERSION 3.26.4)
 
 include(../set_cuda_architecture.cmake)
 
 # initialize cuda architecture
 rapids_cuda_init_architectures(billion_rows)
-rapids_cuda_set_architectures(RAPIDS)
 
 project(
   billion_rows

diff --git a/cpp/examples/interop/CMakeLists.txt b/cpp/examples/interop/CMakeLists.txt
@@ -1,11 +1,10 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 cmake_minimum_required(VERSION 3.26.4)
 
 include(../set_cuda_architecture.cmake)
 
 rapids_cuda_init_architectures(interop_example)
-rapids_cuda_set_architectures(RAPIDS)
 
 project(
   interop_example

diff --git a/cpp/examples/nested_types/CMakeLists.txt b/cpp/examples/nested_types/CMakeLists.txt
@@ -1,12 +1,11 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 cmake_minimum_required(VERSION 3.26.4)
 
 include(../set_cuda_architecture.cmake)
 
 # initialize cuda architecture
 rapids_cuda_init_architectures(nested_types)
-rapids_cuda_set_architectures(RAPIDS)
 
 project(
   nested_types

diff --git a/cpp/examples/parquet_io/CMakeLists.txt b/cpp/examples/parquet_io/CMakeLists.txt
@@ -1,12 +1,11 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 cmake_minimum_required(VERSION 3.26.4)
 
 include(../set_cuda_architecture.cmake)
 
 # initialize cuda architecture
 rapids_cuda_init_architectures(parquet_io)
-rapids_cuda_set_architectures(RAPIDS)
 
 project(
   parquet_io

diff --git a/cpp/examples/strings/CMakeLists.txt b/cpp/examples/strings/CMakeLists.txt
@@ -1,12 +1,11 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 cmake_minimum_required(VERSION 3.26.4)
 
 include(../set_cuda_architecture.cmake)
 
 # initialize cuda architecture
 rapids_cuda_init_architectures(strings_examples)
-rapids_cuda_set_architectures(RAPIDS)
 
 project(
   strings_examples

diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -556,7 +556,7 @@ class column_name_reference : public expression {
 
 /**
  * @brief An AST expression tree. It owns and contains multiple dependent expressions. All the
- * expressions are destroyed when the tree is destructed.
+ * expressions are destroyed when the tree is destroyed.
  */
 class tree {
  public:
@@ -588,12 +588,11 @@ class tree {
    * @returns a reference to the added expression
    */
   template <typename Expr, typename... Args>
-  Expr const& emplace(Args&&... args)
+  std::enable_if_t<std::is_base_of_v<expression, Expr>, Expr const&> emplace(Args&&... args)
   {
-    static_assert(std::is_base_of_v<expression, Expr>);
-    auto expr            = std::make_shared<Expr>(std::forward<Args>(args)...);
+    auto expr            = std::make_unique<Expr>(std::forward<Args>(args)...);
     Expr const& expr_ref = *expr;
-    expressions.emplace_back(std::static_pointer_cast<expression>(std::move(expr)));
+    expressions.emplace_back(std::move(expr));
     return expr_ref;
   }
 
@@ -603,7 +602,7 @@ class tree {
    * @returns a reference to the added expression
    */
   template <typename Expr>
-  Expr const& push(Expr expr)
+  decltype(auto) push(Expr expr)
   {
     return emplace<Expr>(std::move(expr));
   }
@@ -641,9 +640,9 @@ class tree {
   expression const& operator[](size_t index) const { return *expressions[index]; }
 
  private:
-  // TODO: use better ownership semantics, the shared_ptr here is redundant. Consider using a bump
+  // TODO: use better ownership semantics, the unique_ptr here is redundant. Consider using a bump
   // allocator with type-erased deleters.
-  std::vector<std::shared_ptr<expression>> expressions;
+  std::vector<std::unique_ptr<expression>> expressions;
 };
 
 /** @} */  // end of group