diff --git a/src/duckdb/src/common/vector_operations/comparison_operators.cpp b/src/duckdb/src/common/vector_operations/comparison_operators.cpp index 194f8462..8a56cdfc 100644 --- a/src/duckdb/src/common/vector_operations/comparison_operators.cpp +++ b/src/duckdb/src/common/vector_operations/comparison_operators.cpp @@ -167,6 +167,9 @@ static void NestedComparisonExecutor(Vector &left, Vector &right, Vector &result auto &result_validity = ConstantVector::Validity(result); SelectionVector true_sel(1); auto match_count = ComparisonSelector::Select(left, right, nullptr, 1, &true_sel, nullptr, result_validity); + // since we are dealing with nested types where the values are not NULL, the result is always valid (i.e true or + // false) + result_validity.SetAllValid(1); auto result_data = ConstantVector::GetData(result); result_data[0] = match_count > 0; return; diff --git a/src/duckdb/src/execution/index/art/plan_art.cpp b/src/duckdb/src/execution/index/art/plan_art.cpp new file mode 100644 index 00000000..2acc5699 --- /dev/null +++ b/src/duckdb/src/execution/index/art/plan_art.cpp @@ -0,0 +1,94 @@ + +#include "duckdb/execution/operator/order/physical_order.hpp" +#include "duckdb/execution/operator/projection/physical_projection.hpp" +#include "duckdb/execution/operator/filter/physical_filter.hpp" +#include "duckdb/execution/operator/schema/physical_create_art_index.hpp" + +#include "duckdb/planner/expression/bound_operator_expression.hpp" +#include "duckdb/planner/expression/bound_reference_expression.hpp" +#include "duckdb/planner/operator/logical_create_index.hpp" + +#include "duckdb/execution/index/art/art.hpp" + +namespace duckdb { + +unique_ptr ART::CreatePlan(PlanIndexInput &input) { + // generate a physical plan for the parallel index creation which consists of the following operators + // table scan - projection (for expression execution) - filter (NOT NULL) - order (if applicable) - create index + + auto &op = input.op; + auto &table_scan = input.table_scan; + + vector new_column_types; + vector> select_list; + for (idx_t i = 0; i < op.expressions.size(); i++) { + new_column_types.push_back(op.expressions[i]->return_type); + select_list.push_back(std::move(op.expressions[i])); + } + new_column_types.emplace_back(LogicalType::ROW_TYPE); + select_list.push_back(make_uniq(LogicalType::ROW_TYPE, op.info->scan_types.size() - 1)); + + auto projection = make_uniq(new_column_types, std::move(select_list), op.estimated_cardinality); + projection->children.push_back(std::move(table_scan)); + + // filter operator for IS_NOT_NULL on each key column + + vector filter_types; + vector> filter_select_list; + + for (idx_t i = 0; i < new_column_types.size() - 1; i++) { + filter_types.push_back(new_column_types[i]); + auto is_not_null_expr = + make_uniq(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN); + auto bound_ref = make_uniq(new_column_types[i], i); + is_not_null_expr->children.push_back(std::move(bound_ref)); + filter_select_list.push_back(std::move(is_not_null_expr)); + } + + auto null_filter = + make_uniq(std::move(filter_types), std::move(filter_select_list), op.estimated_cardinality); + null_filter->types.emplace_back(LogicalType::ROW_TYPE); + null_filter->children.push_back(std::move(projection)); + + // determine if we sort the data prior to index creation + // we don't sort, if either VARCHAR or compound key + auto perform_sorting = true; + if (op.unbound_expressions.size() > 1) { + perform_sorting = false; + } else if (op.unbound_expressions[0]->return_type.InternalType() == PhysicalType::VARCHAR) { + perform_sorting = false; + } + + // actual physical create index operator + + auto physical_create_index = + make_uniq(op, op.table, op.info->column_ids, std::move(op.info), + std::move(op.unbound_expressions), op.estimated_cardinality, perform_sorting); + + if (perform_sorting) { + + // optional order operator + vector orders; + vector projections; + for (idx_t i = 0; i < new_column_types.size() - 1; i++) { + auto col_expr = make_uniq_base(new_column_types[i], i); + orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, std::move(col_expr)); + projections.emplace_back(i); + } + projections.emplace_back(new_column_types.size() - 1); + + auto physical_order = make_uniq(new_column_types, std::move(orders), std::move(projections), + op.estimated_cardinality); + physical_order->children.push_back(std::move(null_filter)); + + physical_create_index->children.push_back(std::move(physical_order)); + } else { + + // no ordering + physical_create_index->children.push_back(std::move(null_filter)); + } + + return std::move(physical_create_index); +} + +} // namespace duckdb diff --git a/src/duckdb/src/execution/index/index_type_set.cpp b/src/duckdb/src/execution/index/index_type_set.cpp index 4e1dda7e..4fe7cda4 100644 --- a/src/duckdb/src/execution/index/index_type_set.cpp +++ b/src/duckdb/src/execution/index/index_type_set.cpp @@ -5,10 +5,13 @@ namespace duckdb { IndexTypeSet::IndexTypeSet() { - // Register the ART index type + + // Register the ART index type by default IndexType art_index_type; art_index_type.name = ART::TYPE_NAME; art_index_type.create_instance = ART::Create; + art_index_type.create_plan = ART::CreatePlan; + RegisterIndexType(art_index_type); } diff --git a/src/duckdb/src/execution/operator/persistent/physical_export.cpp b/src/duckdb/src/execution/operator/persistent/physical_export.cpp index 2e0f4bbf..733b01f7 100644 --- a/src/duckdb/src/execution/operator/persistent/physical_export.cpp +++ b/src/duckdb/src/execution/operator/persistent/physical_export.cpp @@ -127,6 +127,10 @@ void PhysicalExport::ExtractEntries(ClientContext &context, vector PhysicalPlanGenerator::CreatePlan(LogicalCreateIndex &op) { - // generate a physical plan for the parallel index creation which consists of the following operators - // table scan - projection (for expression execution) - filter (NOT NULL) - order (if applicable) - create index - - D_ASSERT(op.children.size() == 1); - auto table_scan = CreatePlan(*op.children[0]); // validate that all expressions contain valid scalar functions // e.g. get_current_timestamp(), random(), and sequence values are not allowed as index keys @@ -30,12 +22,14 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalCreateInde } } - // if we get here and the index type is not ART, we throw an exception - // because we don't support any other index type yet. However, an operator extension could have - // replaced this part of the plan with a different index creation operator. - if (op.info->index_type != ART::TYPE_NAME) { + // Do we have a valid index type? + const auto index_type = context.db->config.GetIndexTypes().FindByName(op.info->index_type); + if (!index_type) { throw BinderException("Unknown index type: " + op.info->index_type); } + if (!index_type->create_plan) { + throw InternalException("Index type '%s' is missing a create_plan function", op.info->index_type); + } // table scan operator for index key columns and row IDs dependencies.AddDependency(op.table); @@ -43,78 +37,11 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalCreateInde D_ASSERT(op.info->scan_types.size() - 1 <= op.info->names.size()); D_ASSERT(op.info->scan_types.size() - 1 <= op.info->column_ids.size()); - // projection to execute expressions on the key columns - - vector new_column_types; - vector> select_list; - for (idx_t i = 0; i < op.expressions.size(); i++) { - new_column_types.push_back(op.expressions[i]->return_type); - select_list.push_back(std::move(op.expressions[i])); - } - new_column_types.emplace_back(LogicalType::ROW_TYPE); - select_list.push_back(make_uniq(LogicalType::ROW_TYPE, op.info->scan_types.size() - 1)); - - auto projection = make_uniq(new_column_types, std::move(select_list), op.estimated_cardinality); - projection->children.push_back(std::move(table_scan)); - - // filter operator for IS_NOT_NULL on each key column - - vector filter_types; - vector> filter_select_list; - - for (idx_t i = 0; i < new_column_types.size() - 1; i++) { - filter_types.push_back(new_column_types[i]); - auto is_not_null_expr = - make_uniq(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN); - auto bound_ref = make_uniq(new_column_types[i], i); - is_not_null_expr->children.push_back(std::move(bound_ref)); - filter_select_list.push_back(std::move(is_not_null_expr)); - } - - auto null_filter = - make_uniq(std::move(filter_types), std::move(filter_select_list), op.estimated_cardinality); - null_filter->types.emplace_back(LogicalType::ROW_TYPE); - null_filter->children.push_back(std::move(projection)); - - // determine if we sort the data prior to index creation - // we don't sort, if either VARCHAR or compound key - auto perform_sorting = true; - if (op.unbound_expressions.size() > 1) { - perform_sorting = false; - } else if (op.unbound_expressions[0]->return_type.InternalType() == PhysicalType::VARCHAR) { - perform_sorting = false; - } - - // actual physical create index operator - - auto physical_create_index = - make_uniq(op, op.table, op.info->column_ids, std::move(op.info), - std::move(op.unbound_expressions), op.estimated_cardinality, perform_sorting); - - if (perform_sorting) { - - // optional order operator - vector orders; - vector projections; - for (idx_t i = 0; i < new_column_types.size() - 1; i++) { - auto col_expr = make_uniq_base(new_column_types[i], i); - orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, std::move(col_expr)); - projections.emplace_back(i); - } - projections.emplace_back(new_column_types.size() - 1); - - auto physical_order = make_uniq(new_column_types, std::move(orders), std::move(projections), - op.estimated_cardinality); - physical_order->children.push_back(std::move(null_filter)); - - physical_create_index->children.push_back(std::move(physical_order)); - } else { - - // no ordering - physical_create_index->children.push_back(std::move(null_filter)); - } + D_ASSERT(op.children.size() == 1); + auto table_scan = CreatePlan(*op.children[0]); - return std::move(physical_create_index); + PlanIndexInput input(context, op, table_scan); + return index_type->create_plan(input); } } // namespace duckdb diff --git a/src/duckdb/src/function/scalar/strftime_format.cpp b/src/duckdb/src/function/scalar/strftime_format.cpp index 3525519a..8ab46ace 100644 --- a/src/duckdb/src/function/scalar/strftime_format.cpp +++ b/src/duckdb/src/function/scalar/strftime_format.cpp @@ -1185,8 +1185,7 @@ bool StrpTimeFormat::Parse(const char *data, size_t size, ParseResult &result, b case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED: case StrTimeSpecifier::YEAR_WITHOUT_CENTURY: case StrTimeSpecifier::YEAR_DECIMAL: - // Part of the offset - break; + // Switch to offset parsing case StrTimeSpecifier::WEEKDAY_DECIMAL: // First offset specifier offset_specifier = specifiers[i]; diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 1049645a..4c49147f 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "3-dev123" +#define DUCKDB_PATCH_VERSION "3-dev142" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 1 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.1.3-dev123" +#define DUCKDB_VERSION "v1.1.3-dev142" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "b653a8c2b7" +#define DUCKDB_SOURCE_ID "7f34190f3f" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/execution/index/art/art.hpp b/src/duckdb/src/include/duckdb/execution/index/art/art.hpp index 076a9356..00299952 100644 --- a/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/art/art.hpp @@ -55,6 +55,9 @@ class ART : public BoundIndex { return std::move(art); } + //! Plan index construction. + static unique_ptr CreatePlan(PlanIndexInput &input); + //! Root of the tree. Node tree = Node(); //! Fixed-size allocators holding the ART nodes. diff --git a/src/duckdb/src/include/duckdb/execution/index/index_type.hpp b/src/duckdb/src/include/duckdb/execution/index/index_type.hpp index 3417b15e..52d779f3 100644 --- a/src/duckdb/src/include/duckdb/execution/index/index_type.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/index_type.hpp @@ -18,6 +18,8 @@ namespace duckdb { class BoundIndex; +class PhysicalOperator; +class LogicalCreateIndex; enum class IndexConstraintType : uint8_t; class Expression; class TableIOManager; @@ -43,7 +45,19 @@ struct CreateIndexInput { options(options) {}; }; +struct PlanIndexInput { + ClientContext &context; + LogicalCreateIndex &op; + unique_ptr &table_scan; + + PlanIndexInput(ClientContext &context_p, LogicalCreateIndex &op_p, unique_ptr &table_scan_p) + : context(context_p), op(op_p), table_scan(table_scan_p) { + } +}; + typedef unique_ptr (*index_create_function_t)(CreateIndexInput &input); +typedef unique_ptr (*index_plan_function_t)(PlanIndexInput &input); + //! A index "type" class IndexType { public: @@ -51,7 +65,8 @@ class IndexType { string name; // Callbacks - index_create_function_t create_instance; + index_plan_function_t create_plan = nullptr; + index_create_function_t create_instance = nullptr; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp b/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp index 3798cbd4..842d5ef2 100644 --- a/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp +++ b/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp @@ -8,8 +8,8 @@ #pragma once -#include "duckdb/planner/logical_operator.hpp" #include "duckdb/common/pair.hpp" +#include "duckdb/planner/logical_operator.hpp" namespace duckdb { @@ -61,14 +61,14 @@ class UnnestRewriter { private: //! Find delim joins that contain an UNNEST - void FindCandidates(unique_ptr *op_ptr, vector *> &candidates); + void FindCandidates(unique_ptr &op, vector>> &candidates); //! Rewrite a delim join that contains an UNNEST - bool RewriteCandidate(unique_ptr *candidate); + bool RewriteCandidate(unique_ptr &candidate); //! Update the bindings of the RHS sequence of LOGICAL_PROJECTION(s) - void UpdateRHSBindings(unique_ptr *plan_ptr, unique_ptr *candidate, + void UpdateRHSBindings(unique_ptr &plan, unique_ptr &candidate, UnnestRewriterPlanUpdater &updater); //! Update the bindings of the BOUND_UNNEST expression of the LOGICAL_UNNEST - void UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &updater, unique_ptr *candidate); + void UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &updater, unique_ptr &candidate); //! Store all delim columns of the delim join void GetDelimColumns(LogicalOperator &op); diff --git a/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp b/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp index a20b8132..e142a1d7 100644 --- a/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +++ b/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp @@ -31,7 +31,7 @@ static bool FilterRemovesNull(ClientContext &context, ExpressionRewriter &rewrit unordered_set &right_bindings) { // make a copy of the expression auto copy = expr->Copy(); - // replace all BoundColumnRef expressions frmo the RHS with NULL constants in the copied expression + // replace all BoundColumnRef expressions from the RHS with NULL constants in the copied expression copy = ReplaceColRefWithNull(std::move(copy), right_bindings); // attempt to flatten the expression by running the expression rewriter on it @@ -97,6 +97,9 @@ unique_ptr FilterPushdown::PushdownLeftJoin(unique_ptrfilter.get(), right_bindings)) { // the filter removes NULL values, turn it into an inner join join.join_type = JoinType::INNER; diff --git a/src/duckdb/src/optimizer/unnest_rewriter.cpp b/src/duckdb/src/optimizer/unnest_rewriter.cpp index f5d91061..796a9338 100644 --- a/src/duckdb/src/optimizer/unnest_rewriter.cpp +++ b/src/duckdb/src/optimizer/unnest_rewriter.cpp @@ -1,13 +1,13 @@ #include "duckdb/optimizer/unnest_rewriter.hpp" #include "duckdb/common/pair.hpp" -#include "duckdb/planner/operator/logical_delim_get.hpp" +#include "duckdb/planner/expression/bound_columnref_expression.hpp" +#include "duckdb/planner/expression/bound_unnest_expression.hpp" #include "duckdb/planner/operator/logical_comparison_join.hpp" -#include "duckdb/planner/operator/logical_unnest.hpp" +#include "duckdb/planner/operator/logical_delim_get.hpp" #include "duckdb/planner/operator/logical_projection.hpp" +#include "duckdb/planner/operator/logical_unnest.hpp" #include "duckdb/planner/operator/logical_window.hpp" -#include "duckdb/planner/expression/bound_unnest_expression.hpp" -#include "duckdb/planner/expression/bound_columnref_expression.hpp" namespace duckdb { @@ -35,8 +35,8 @@ void UnnestRewriterPlanUpdater::VisitExpression(unique_ptr *expressi unique_ptr UnnestRewriter::Optimize(unique_ptr op) { UnnestRewriterPlanUpdater updater; - vector *> candidates; - FindCandidates(&op, candidates); + vector>> candidates; + FindCandidates(op, candidates); // rewrite the plan and update the bindings for (auto &candidate : candidates) { @@ -47,7 +47,7 @@ unique_ptr UnnestRewriter::Optimize(unique_ptr // update the bindings of the BOUND_UNNEST expression UpdateBoundUnnestBindings(updater, candidate); // update the sequence of LOGICAL_PROJECTION(s) - UpdateRHSBindings(&op, candidate, updater); + UpdateRHSBindings(op, candidate, updater); // reset delim_columns.clear(); lhs_bindings.clear(); @@ -57,12 +57,11 @@ unique_ptr UnnestRewriter::Optimize(unique_ptr return op; } -void UnnestRewriter::FindCandidates(unique_ptr *op_ptr, - vector *> &candidates) { - auto op = op_ptr->get(); +void UnnestRewriter::FindCandidates(unique_ptr &op, + vector>> &candidates) { // search children before adding, so that we add candidates bottom-up for (auto &child : op->children) { - FindCandidates(&child, candidates); + FindCandidates(child, candidates); } // search for operator that has a LOGICAL_DELIM_JOIN as its child @@ -100,14 +99,15 @@ void UnnestRewriter::FindCandidates(unique_ptr *op_ptr, curr_op = &curr_op->get()->children[0]; } - if (curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST) { - candidates.push_back(op_ptr); + if (curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST && + curr_op->get()->children[0]->type == LogicalOperatorType::LOGICAL_DELIM_GET) { + candidates.push_back(op); } } -bool UnnestRewriter::RewriteCandidate(unique_ptr *candidate) { +bool UnnestRewriter::RewriteCandidate(unique_ptr &candidate) { - auto &topmost_op = (LogicalOperator &)**candidate; + auto &topmost_op = *candidate; if (topmost_op.type != LogicalOperatorType::LOGICAL_PROJECTION && topmost_op.type != LogicalOperatorType::LOGICAL_WINDOW && topmost_op.type != LogicalOperatorType::LOGICAL_FILTER && @@ -158,10 +158,10 @@ bool UnnestRewriter::RewriteCandidate(unique_ptr *candidate) { return true; } -void UnnestRewriter::UpdateRHSBindings(unique_ptr *plan_ptr, unique_ptr *candidate, +void UnnestRewriter::UpdateRHSBindings(unique_ptr &plan, unique_ptr &candidate, UnnestRewriterPlanUpdater &updater) { - auto &topmost_op = (LogicalOperator &)**candidate; + auto &topmost_op = *candidate; idx_t shift = lhs_bindings.size(); vector *> path_to_unnest; @@ -189,7 +189,7 @@ void UnnestRewriter::UpdateRHSBindings(unique_ptr *plan_ptr, un } // update all bindings by shifting them - updater.VisitOperator(*plan_ptr->get()); + updater.VisitOperator(*plan); updater.replace_bindings.clear(); // update all bindings coming from the LHS to RHS bindings @@ -212,7 +212,7 @@ void UnnestRewriter::UpdateRHSBindings(unique_ptr *plan_ptr, un unnest.expressions.clear(); unnest.children.clear(); // update the bindings of the plan - updater.VisitOperator(*plan_ptr->get()); + updater.VisitOperator(*plan); updater.replace_bindings.clear(); // add the children again for (auto &temp_bound_unnest : temp_bound_unnests) { @@ -253,9 +253,9 @@ void UnnestRewriter::UpdateRHSBindings(unique_ptr *plan_ptr, un } void UnnestRewriter::UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &updater, - unique_ptr *candidate) { + unique_ptr &candidate) { - auto &topmost_op = (LogicalOperator &)**candidate; + auto &topmost_op = *candidate; // traverse LOGICAL_PROJECTION(s) auto curr_op = &topmost_op.children[0]; diff --git a/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp b/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp index fee41aad..8b798a83 100644 --- a/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +++ b/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp @@ -15,6 +15,7 @@ #include "duckdb/parser/expression/function_expression.hpp" #include "duckdb/parser/result_modifier.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" +#include "duckdb/common/types/uuid.hpp" namespace duckdb { @@ -168,7 +169,6 @@ unique_ptr Transformer::TransformPivotStatement(duckdb_libpgquery::PG // generate CREATE TYPE statements for each of the columns that do not have an IN list bool is_pivot = !pivot->unpivots; auto columns = TransformPivotList(*pivot->columns, is_pivot); - auto pivot_idx = PivotEntryCount(); for (idx_t c = 0; c < columns.size(); c++) { auto &col = columns[c]; if (!col.pivot_enum.empty() || !col.entries.empty()) { @@ -177,7 +177,7 @@ unique_ptr Transformer::TransformPivotStatement(duckdb_libpgquery::PG if (col.pivot_expressions.size() != 1) { throw InternalException("PIVOT statement with multiple names in pivot entry!?"); } - auto enum_name = "__pivot_enum_" + std::to_string(pivot_idx) + "_" + std::to_string(c); + auto enum_name = "__pivot_enum_" + UUID::ToString(UUID::GenerateRandomUUID()); auto new_select = make_uniq(); ExtractCTEsRecursive(new_select->cte_map); diff --git a/src/duckdb/ub_src_execution_index_art.cpp b/src/duckdb/ub_src_execution_index_art.cpp index 503c9e38..2f700eb0 100644 --- a/src/duckdb/ub_src_execution_index_art.cpp +++ b/src/duckdb/ub_src_execution_index_art.cpp @@ -20,3 +20,5 @@ #include "src/execution/index/art/art.cpp" +#include "src/execution/index/art/plan_art.cpp" +