Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into 17775
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr authored Jan 24, 2025
2 parents 47ad3ca + 77efd9b commit d08d8c2
Show file tree
Hide file tree
Showing 50 changed files with 682 additions and 740 deletions.
11 changes: 10 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.

ci:
autofix_commit_msg: "[pre-commit.ci] auto code formatting"
autofix_prs: false
autoupdate_branch: ""
autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
autoupdate_schedule: quarterly
skip: ["verify-alpha-spec"]
submodules: false

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
4 changes: 3 additions & 1 deletion ci/run_cudf_memcheck_ctests.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

set -uo pipefail

Expand All @@ -10,6 +10,7 @@ trap "EXITCODE=1" ERR
cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/gtests/libcudf/";

export GTEST_CUDF_RMM_MODE=cuda
export GTEST_BRIEF=1
# compute-sanitizer bug 4553815
export LIBCUDF_MEMCHECK_ENABLED=1
for gt in ./*_TEST ; do
Expand All @@ -18,6 +19,7 @@ for gt in ./*_TEST ; do
echo "Running compute-sanitizer on $test_name"
compute-sanitizer --tool memcheck ${gt} "$@"
done
unset GTEST_BRIEF
unset GTEST_CUDF_RMM_MODE
unset LIBCUDF_MEMCHECK_ENABLED

Expand Down
78 changes: 34 additions & 44 deletions cpp/benchmarks/ast/transform.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -56,54 +56,46 @@ static void BM_ast_transform(nvbench::state& state)
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));

// Create table data
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const num_columns = reuse_columns ? 1 : tree_levels + 1;
auto const source_table =
create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols),
create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, num_columns),
row_count{num_rows},
Nullable ? std::optional<double>{0.5} : std::nullopt);
auto table = source_table->view();

cudf::ast::tree tree;

// Create column references
auto column_refs = std::vector<cudf::ast::column_reference>();
std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(n_cols),
std::back_inserter(column_refs),
[](auto const& column_id) {
return cudf::ast::column_reference(reuse_columns ? 0 : column_id);
});
std::for_each(
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_columns),
[&](int column_id) { tree.push(cudf::ast::column_reference(reuse_columns ? 0 : column_id)); });

// Create expression trees

// Note that a std::list is required here because of its guarantees against reference invalidation
// when items are added or removed. References to items in a std::vector are not safe if the
// vector must re-allocate.
auto expressions = std::list<cudf::ast::operation>();

// Construct tree that chains additions like (((a + b) + c) + d)
auto const op = cudf::ast::ast_operator::ADD;
if (reuse_columns) {
expressions.push_back(cudf::ast::operation(op, column_refs.at(0), column_refs.at(0)));
tree.push(cudf::ast::operation(op, tree.at(0), tree.at(0)));
for (cudf::size_type i = 0; i < tree_levels - 1; i++) {
expressions.push_back(cudf::ast::operation(op, expressions.back(), column_refs.at(0)));
tree.push(cudf::ast::operation(op, tree.back(), tree.at(0)));
}
} else {
expressions.push_back(cudf::ast::operation(op, column_refs.at(0), column_refs.at(1)));
std::transform(std::next(column_refs.cbegin(), 2),
column_refs.cend(),
std::back_inserter(expressions),
[&](auto const& column_ref) {
return cudf::ast::operation(op, expressions.back(), column_ref);
});
tree.push(cudf::ast::operation(op, tree.at(0), tree.at(1)));
std::for_each(
thrust::make_counting_iterator(2),
thrust::make_counting_iterator(num_columns),
[&](int col_id) { tree.push(cudf::ast::operation(op, tree.back(), tree.at(col_id))); });
}

auto const& expression_tree_root = expressions.back();
auto const& root_expression = tree.back();

// Use the number of bytes read from global memory
state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
[&](nvbench::launch&) { cudf::compute_column(table, root_expression); });
}

template <cudf::ast::ast_operator cmp_op, cudf::ast::ast_operator reduce_op>
Expand All @@ -117,10 +109,10 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons");

// Create table data
auto const num_cols = tree_levels * 2;
auto const num_columns = tree_levels * 2;
std::vector<std::unique_ptr<cudf::column>> columns;
std::for_each(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) {
thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_columns), [&](size_t) {
columns.emplace_back(create_string_column(num_rows, string_width, hit_rate));
});

Expand All @@ -135,38 +127,36 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
return size + cudf::strings_column_view{column}.chars_size(cudf::get_default_stream());
});

// Create column references
auto column_refs = std::vector<cudf::ast::column_reference>();
std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_cols),
std::back_inserter(column_refs),
[](auto const& column_id) { return cudf::ast::column_reference(column_id); });
// Create expression tree
cudf::ast::tree tree;

// Create expression trees
std::list<cudf::ast::operation> expressions;
// Create column references
std::for_each(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_columns),
[&](int column_id) { tree.push(cudf::ast::column_reference{column_id}); });

// Construct AST tree (a == b && c == d && e == f && ...)

expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1]));
tree.push(cudf::ast::operation(cmp_op, tree[0], tree[1]));

std::for_each(thrust::make_counting_iterator(1),
thrust::make_counting_iterator(tree_levels),
[&](size_t idx) {
auto const& lhs = expressions.back();
auto const& rhs = expressions.emplace_back(
cudf::ast::operation(cmp_op, column_refs[idx * 2], column_refs[idx * 2 + 1]));
expressions.emplace_back(cudf::ast::operation(reduce_op, lhs, rhs));
auto const& lhs = tree.back();
auto const& rhs =
tree.push(cudf::ast::operation(cmp_op, tree[idx * 2], tree[idx * 2 + 1]));
tree.push(cudf::ast::operation(reduce_op, lhs, rhs));
});

auto const& expression_tree_root = expressions.back();

// Use the number of bytes read from global memory
state.add_element_count(chars_size, "chars_size");
state.add_global_memory_reads<nvbench::uint8_t>(chars_size);
state.add_global_memory_writes<nvbench::int32_t>(num_rows);

auto const& expression = tree.back();

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
[&](nvbench::launch&) { cudf::compute_column(table, expression); });
}

#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
Expand Down
3 changes: 1 addition & 2 deletions cpp/examples/basic/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.

cmake_minimum_required(VERSION 3.26.4)

include(../set_cuda_architecture.cmake)

# initialize cuda architecture
rapids_cuda_init_architectures(basic_example)
rapids_cuda_set_architectures(RAPIDS)

project(
basic_example
Expand Down
3 changes: 1 addition & 2 deletions cpp/examples/billion_rows/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

cmake_minimum_required(VERSION 3.26.4)

include(../set_cuda_architecture.cmake)

# initialize cuda architecture
rapids_cuda_init_architectures(billion_rows)
rapids_cuda_set_architectures(RAPIDS)

project(
billion_rows
Expand Down
3 changes: 1 addition & 2 deletions cpp/examples/interop/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

cmake_minimum_required(VERSION 3.26.4)

include(../set_cuda_architecture.cmake)

rapids_cuda_init_architectures(interop_example)
rapids_cuda_set_architectures(RAPIDS)

project(
interop_example
Expand Down
3 changes: 1 addition & 2 deletions cpp/examples/nested_types/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

cmake_minimum_required(VERSION 3.26.4)

include(../set_cuda_architecture.cmake)

# initialize cuda architecture
rapids_cuda_init_architectures(nested_types)
rapids_cuda_set_architectures(RAPIDS)

project(
nested_types
Expand Down
3 changes: 1 addition & 2 deletions cpp/examples/parquet_io/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

cmake_minimum_required(VERSION 3.26.4)

include(../set_cuda_architecture.cmake)

# initialize cuda architecture
rapids_cuda_init_architectures(parquet_io)
rapids_cuda_set_architectures(RAPIDS)

project(
parquet_io
Expand Down
3 changes: 1 addition & 2 deletions cpp/examples/strings/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.

cmake_minimum_required(VERSION 3.26.4)

include(../set_cuda_architecture.cmake)

# initialize cuda architecture
rapids_cuda_init_architectures(strings_examples)
rapids_cuda_set_architectures(RAPIDS)

project(
strings_examples
Expand Down
17 changes: 8 additions & 9 deletions cpp/include/cudf/ast/expressions.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -556,7 +556,7 @@ class column_name_reference : public expression {

/**
* @brief An AST expression tree. It owns and contains multiple dependent expressions. All the
* expressions are destroyed when the tree is destructed.
* expressions are destroyed when the tree is destroyed.
*/
class tree {
public:
Expand Down Expand Up @@ -588,12 +588,11 @@ class tree {
* @returns a reference to the added expression
*/
template <typename Expr, typename... Args>
Expr const& emplace(Args&&... args)
std::enable_if_t<std::is_base_of_v<expression, Expr>, Expr const&> emplace(Args&&... args)
{
static_assert(std::is_base_of_v<expression, Expr>);
auto expr = std::make_shared<Expr>(std::forward<Args>(args)...);
auto expr = std::make_unique<Expr>(std::forward<Args>(args)...);
Expr const& expr_ref = *expr;
expressions.emplace_back(std::static_pointer_cast<expression>(std::move(expr)));
expressions.emplace_back(std::move(expr));
return expr_ref;
}

Expand All @@ -603,7 +602,7 @@ class tree {
* @returns a reference to the added expression
*/
template <typename Expr>
Expr const& push(Expr expr)
decltype(auto) push(Expr expr)
{
return emplace<Expr>(std::move(expr));
}
Expand Down Expand Up @@ -641,9 +640,9 @@ class tree {
expression const& operator[](size_t index) const { return *expressions[index]; }

private:
// TODO: use better ownership semantics, the shared_ptr here is redundant. Consider using a bump
// TODO: use better ownership semantics, the unique_ptr here is redundant. Consider using a bump
// allocator with type-erased deleters.
std::vector<std::shared_ptr<expression>> expressions;
std::vector<std::unique_ptr<expression>> expressions;
};

/** @} */ // end of group
Expand Down
Loading

0 comments on commit d08d8c2

Please sign in to comment.