Skip to content

Commit

Permalink
Update vendored DuckDB sources to 941d1e6
Browse files Browse the repository at this point in the history
  • Loading branch information
duckdblabs-bot committed Oct 31, 2024
1 parent 941d1e6 commit 8934827
Show file tree
Hide file tree
Showing 14 changed files with 176 additions and 123 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ static void NestedComparisonExecutor(Vector &left, Vector &right, Vector &result
auto &result_validity = ConstantVector::Validity(result);
SelectionVector true_sel(1);
auto match_count = ComparisonSelector::Select<OP>(left, right, nullptr, 1, &true_sel, nullptr, result_validity);
// since we are dealing with nested types where the values are not NULL, the result is always valid (i.e true or
// false)
result_validity.SetAllValid(1);
auto result_data = ConstantVector::GetData<bool>(result);
result_data[0] = match_count > 0;
return;
Expand Down
94 changes: 94 additions & 0 deletions src/duckdb/src/execution/index/art/plan_art.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@

#include "duckdb/execution/operator/order/physical_order.hpp"
#include "duckdb/execution/operator/projection/physical_projection.hpp"
#include "duckdb/execution/operator/filter/physical_filter.hpp"
#include "duckdb/execution/operator/schema/physical_create_art_index.hpp"

#include "duckdb/planner/expression/bound_operator_expression.hpp"
#include "duckdb/planner/expression/bound_reference_expression.hpp"
#include "duckdb/planner/operator/logical_create_index.hpp"

#include "duckdb/execution/index/art/art.hpp"

namespace duckdb {

unique_ptr<PhysicalOperator> ART::CreatePlan(PlanIndexInput &input) {
// generate a physical plan for the parallel index creation which consists of the following operators
// table scan - projection (for expression execution) - filter (NOT NULL) - order (if applicable) - create index

auto &op = input.op;
auto &table_scan = input.table_scan;

vector<LogicalType> new_column_types;
vector<unique_ptr<Expression>> select_list;
for (idx_t i = 0; i < op.expressions.size(); i++) {
new_column_types.push_back(op.expressions[i]->return_type);
select_list.push_back(std::move(op.expressions[i]));
}
new_column_types.emplace_back(LogicalType::ROW_TYPE);
select_list.push_back(make_uniq<BoundReferenceExpression>(LogicalType::ROW_TYPE, op.info->scan_types.size() - 1));

auto projection = make_uniq<PhysicalProjection>(new_column_types, std::move(select_list), op.estimated_cardinality);
projection->children.push_back(std::move(table_scan));

// filter operator for IS_NOT_NULL on each key column

vector<LogicalType> filter_types;
vector<unique_ptr<Expression>> filter_select_list;

for (idx_t i = 0; i < new_column_types.size() - 1; i++) {
filter_types.push_back(new_column_types[i]);
auto is_not_null_expr =
make_uniq<BoundOperatorExpression>(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN);
auto bound_ref = make_uniq<BoundReferenceExpression>(new_column_types[i], i);
is_not_null_expr->children.push_back(std::move(bound_ref));
filter_select_list.push_back(std::move(is_not_null_expr));
}

auto null_filter =
make_uniq<PhysicalFilter>(std::move(filter_types), std::move(filter_select_list), op.estimated_cardinality);
null_filter->types.emplace_back(LogicalType::ROW_TYPE);
null_filter->children.push_back(std::move(projection));

// determine if we sort the data prior to index creation
// we don't sort, if either VARCHAR or compound key
auto perform_sorting = true;
if (op.unbound_expressions.size() > 1) {
perform_sorting = false;
} else if (op.unbound_expressions[0]->return_type.InternalType() == PhysicalType::VARCHAR) {
perform_sorting = false;
}

// actual physical create index operator

auto physical_create_index =
make_uniq<PhysicalCreateARTIndex>(op, op.table, op.info->column_ids, std::move(op.info),
std::move(op.unbound_expressions), op.estimated_cardinality, perform_sorting);

if (perform_sorting) {

// optional order operator
vector<BoundOrderByNode> orders;
vector<idx_t> projections;
for (idx_t i = 0; i < new_column_types.size() - 1; i++) {
auto col_expr = make_uniq_base<Expression, BoundReferenceExpression>(new_column_types[i], i);
orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, std::move(col_expr));
projections.emplace_back(i);
}
projections.emplace_back(new_column_types.size() - 1);

auto physical_order = make_uniq<PhysicalOrder>(new_column_types, std::move(orders), std::move(projections),
op.estimated_cardinality);
physical_order->children.push_back(std::move(null_filter));

physical_create_index->children.push_back(std::move(physical_order));
} else {

// no ordering
physical_create_index->children.push_back(std::move(null_filter));
}

return std::move(physical_create_index);
}

} // namespace duckdb
5 changes: 4 additions & 1 deletion src/duckdb/src/execution/index/index_type_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
namespace duckdb {

IndexTypeSet::IndexTypeSet() {
// Register the ART index type

// Register the ART index type by default
IndexType art_index_type;
art_index_type.name = ART::TYPE_NAME;
art_index_type.create_instance = ART::Create;
art_index_type.create_plan = ART::CreatePlan;

RegisterIndexType(art_index_type);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ void PhysicalExport::ExtractEntries(ClientContext &context, vector<reference<Sch
ExportEntries &result) {
for (auto &schema_p : schema_list) {
auto &schema = schema_p.get();
auto &catalog = schema.ParentCatalog();
if (catalog.IsSystemCatalog() || catalog.IsTemporaryCatalog()) {
continue;
}
if (!schema.internal) {
result.schemas.push_back(schema);
}
Expand Down
101 changes: 14 additions & 87 deletions src/duckdb/src/execution/physical_plan/plan_create_index.cpp
Original file line number Diff line number Diff line change
@@ -1,24 +1,16 @@
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
#include "duckdb/execution/operator/projection/physical_projection.hpp"
#include "duckdb/execution/operator/filter/physical_filter.hpp"
#include "duckdb/execution/operator/scan/physical_table_scan.hpp"
#include "duckdb/execution/operator/schema/physical_create_art_index.hpp"
#include "duckdb/execution/operator/order/physical_order.hpp"
#include "duckdb/execution/physical_plan_generator.hpp"
#include "duckdb/planner/operator/logical_create_index.hpp"
#include "duckdb/planner/operator/logical_get.hpp"
#include "duckdb/planner/expression/bound_operator_expression.hpp"
#include "duckdb/planner/expression/bound_reference_expression.hpp"
#include "duckdb/planner/table_filter.hpp"

#include "duckdb/main/database.hpp"
#include "duckdb/execution/index/index_type.hpp"
#include "duckdb/execution/index/bound_index.hpp"

namespace duckdb {

unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateIndex &op) {
// generate a physical plan for the parallel index creation which consists of the following operators
// table scan - projection (for expression execution) - filter (NOT NULL) - order (if applicable) - create index

D_ASSERT(op.children.size() == 1);
auto table_scan = CreatePlan(*op.children[0]);

// validate that all expressions contain valid scalar functions
// e.g. get_current_timestamp(), random(), and sequence values are not allowed as index keys
Expand All @@ -30,91 +22,26 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
}
}

// if we get here and the index type is not ART, we throw an exception
// because we don't support any other index type yet. However, an operator extension could have
// replaced this part of the plan with a different index creation operator.
if (op.info->index_type != ART::TYPE_NAME) {
// Do we have a valid index type?
const auto index_type = context.db->config.GetIndexTypes().FindByName(op.info->index_type);
if (!index_type) {
throw BinderException("Unknown index type: " + op.info->index_type);
}
if (!index_type->create_plan) {
throw InternalException("Index type '%s' is missing a create_plan function", op.info->index_type);
}

// table scan operator for index key columns and row IDs
dependencies.AddDependency(op.table);

D_ASSERT(op.info->scan_types.size() - 1 <= op.info->names.size());
D_ASSERT(op.info->scan_types.size() - 1 <= op.info->column_ids.size());

// projection to execute expressions on the key columns

vector<LogicalType> new_column_types;
vector<unique_ptr<Expression>> select_list;
for (idx_t i = 0; i < op.expressions.size(); i++) {
new_column_types.push_back(op.expressions[i]->return_type);
select_list.push_back(std::move(op.expressions[i]));
}
new_column_types.emplace_back(LogicalType::ROW_TYPE);
select_list.push_back(make_uniq<BoundReferenceExpression>(LogicalType::ROW_TYPE, op.info->scan_types.size() - 1));

auto projection = make_uniq<PhysicalProjection>(new_column_types, std::move(select_list), op.estimated_cardinality);
projection->children.push_back(std::move(table_scan));

// filter operator for IS_NOT_NULL on each key column

vector<LogicalType> filter_types;
vector<unique_ptr<Expression>> filter_select_list;

for (idx_t i = 0; i < new_column_types.size() - 1; i++) {
filter_types.push_back(new_column_types[i]);
auto is_not_null_expr =
make_uniq<BoundOperatorExpression>(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN);
auto bound_ref = make_uniq<BoundReferenceExpression>(new_column_types[i], i);
is_not_null_expr->children.push_back(std::move(bound_ref));
filter_select_list.push_back(std::move(is_not_null_expr));
}

auto null_filter =
make_uniq<PhysicalFilter>(std::move(filter_types), std::move(filter_select_list), op.estimated_cardinality);
null_filter->types.emplace_back(LogicalType::ROW_TYPE);
null_filter->children.push_back(std::move(projection));

// determine if we sort the data prior to index creation
// we don't sort, if either VARCHAR or compound key
auto perform_sorting = true;
if (op.unbound_expressions.size() > 1) {
perform_sorting = false;
} else if (op.unbound_expressions[0]->return_type.InternalType() == PhysicalType::VARCHAR) {
perform_sorting = false;
}

// actual physical create index operator

auto physical_create_index =
make_uniq<PhysicalCreateARTIndex>(op, op.table, op.info->column_ids, std::move(op.info),
std::move(op.unbound_expressions), op.estimated_cardinality, perform_sorting);

if (perform_sorting) {

// optional order operator
vector<BoundOrderByNode> orders;
vector<idx_t> projections;
for (idx_t i = 0; i < new_column_types.size() - 1; i++) {
auto col_expr = make_uniq_base<Expression, BoundReferenceExpression>(new_column_types[i], i);
orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, std::move(col_expr));
projections.emplace_back(i);
}
projections.emplace_back(new_column_types.size() - 1);

auto physical_order = make_uniq<PhysicalOrder>(new_column_types, std::move(orders), std::move(projections),
op.estimated_cardinality);
physical_order->children.push_back(std::move(null_filter));

physical_create_index->children.push_back(std::move(physical_order));
} else {

// no ordering
physical_create_index->children.push_back(std::move(null_filter));
}
D_ASSERT(op.children.size() == 1);
auto table_scan = CreatePlan(*op.children[0]);

return std::move(physical_create_index);
PlanIndexInput input(context, op, table_scan);
return index_type->create_plan(input);
}

} // namespace duckdb
3 changes: 1 addition & 2 deletions src/duckdb/src/function/scalar/strftime_format.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1185,8 +1185,7 @@ bool StrpTimeFormat::Parse(const char *data, size_t size, ParseResult &result, b
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED:
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY:
case StrTimeSpecifier::YEAR_DECIMAL:
// Part of the offset
break;
// Switch to offset parsing
case StrTimeSpecifier::WEEKDAY_DECIMAL:
// First offset specifier
offset_specifier = specifiers[i];
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "3-dev123"
#define DUCKDB_PATCH_VERSION "3-dev142"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 1
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.1.3-dev123"
#define DUCKDB_VERSION "v1.1.3-dev142"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "b653a8c2b7"
#define DUCKDB_SOURCE_ID "7f34190f3f"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
3 changes: 3 additions & 0 deletions src/duckdb/src/include/duckdb/execution/index/art/art.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ class ART : public BoundIndex {
return std::move(art);
}

//! Plan index construction.
static unique_ptr<PhysicalOperator> CreatePlan(PlanIndexInput &input);

//! Root of the tree.
Node tree = Node();
//! Fixed-size allocators holding the ART nodes.
Expand Down
17 changes: 16 additions & 1 deletion src/duckdb/src/include/duckdb/execution/index/index_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
namespace duckdb {

class BoundIndex;
class PhysicalOperator;
class LogicalCreateIndex;
enum class IndexConstraintType : uint8_t;
class Expression;
class TableIOManager;
Expand All @@ -43,15 +45,28 @@ struct CreateIndexInput {
options(options) {};
};

struct PlanIndexInput {
ClientContext &context;
LogicalCreateIndex &op;
unique_ptr<PhysicalOperator> &table_scan;

PlanIndexInput(ClientContext &context_p, LogicalCreateIndex &op_p, unique_ptr<PhysicalOperator> &table_scan_p)
: context(context_p), op(op_p), table_scan(table_scan_p) {
}
};

typedef unique_ptr<BoundIndex> (*index_create_function_t)(CreateIndexInput &input);
typedef unique_ptr<PhysicalOperator> (*index_plan_function_t)(PlanIndexInput &input);

//! A index "type"
class IndexType {
public:
// The name of the index type
string name;

// Callbacks
index_create_function_t create_instance;
index_plan_function_t create_plan = nullptr;
index_create_function_t create_instance = nullptr;
};

} // namespace duckdb
10 changes: 5 additions & 5 deletions src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

#pragma once

#include "duckdb/planner/logical_operator.hpp"
#include "duckdb/common/pair.hpp"
#include "duckdb/planner/logical_operator.hpp"

namespace duckdb {

Expand Down Expand Up @@ -61,14 +61,14 @@ class UnnestRewriter {

private:
//! Find delim joins that contain an UNNEST
void FindCandidates(unique_ptr<LogicalOperator> *op_ptr, vector<unique_ptr<LogicalOperator> *> &candidates);
void FindCandidates(unique_ptr<LogicalOperator> &op, vector<reference<unique_ptr<LogicalOperator>>> &candidates);
//! Rewrite a delim join that contains an UNNEST
bool RewriteCandidate(unique_ptr<LogicalOperator> *candidate);
bool RewriteCandidate(unique_ptr<LogicalOperator> &candidate);
//! Update the bindings of the RHS sequence of LOGICAL_PROJECTION(s)
void UpdateRHSBindings(unique_ptr<LogicalOperator> *plan_ptr, unique_ptr<LogicalOperator> *candidate,
void UpdateRHSBindings(unique_ptr<LogicalOperator> &plan, unique_ptr<LogicalOperator> &candidate,
UnnestRewriterPlanUpdater &updater);
//! Update the bindings of the BOUND_UNNEST expression of the LOGICAL_UNNEST
void UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &updater, unique_ptr<LogicalOperator> *candidate);
void UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &updater, unique_ptr<LogicalOperator> &candidate);

//! Store all delim columns of the delim join
void GetDelimColumns(LogicalOperator &op);
Expand Down
5 changes: 4 additions & 1 deletion src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static bool FilterRemovesNull(ClientContext &context, ExpressionRewriter &rewrit
unordered_set<idx_t> &right_bindings) {
// make a copy of the expression
auto copy = expr->Copy();
// replace all BoundColumnRef expressions frmo the RHS with NULL constants in the copied expression
// replace all BoundColumnRef expressions from the RHS with NULL constants in the copied expression
copy = ReplaceColRefWithNull(std::move(copy), right_bindings);

// attempt to flatten the expression by running the expression rewriter on it
Expand Down Expand Up @@ -97,6 +97,9 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownLeftJoin(unique_ptr<LogicalO
// bindings match right side or both sides: we cannot directly push it into the right
// however, if the filter removes rows with null values from the RHS we can turn the left outer join
// in an inner join, and then push down as we would push down an inner join
// Edit: This is only possible if the bindings match BOTH sides, so the filter can be pushed down to both
// children. If the filter can only be applied to the right side, and the filter filters
// all tuples, then the inner join cannot be converted.
if (FilterRemovesNull(optimizer.context, optimizer.rewriter, filters[i]->filter.get(), right_bindings)) {
// the filter removes NULL values, turn it into an inner join
join.join_type = JoinType::INNER;
Expand Down
Loading

0 comments on commit 8934827

Please sign in to comment.