From 12cdd310232ee6631a8d271e54c61fa45e545163 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Tue, 19 Nov 2024 00:35:16 +0000 Subject: [PATCH] Update vendored DuckDB sources to 4c582f38 --- src/duckdb/src/common/enum_util.cpp | 7 +- .../src/common/row_operations/row_gather.cpp | 57 ------- src/duckdb/src/common/types.cpp | 52 ++++++- .../types/column/column_data_collection.cpp | 7 +- .../join/perfect_hash_join_executor.cpp | 3 +- .../operator/join/physical_hash_join.cpp | 60 +++++++- .../physical_tableinout_function.cpp | 6 +- .../operator/scan/physical_table_scan.cpp | 6 +- src/duckdb/src/function/function_binder.cpp | 27 +--- .../src/function/scalar/list/list_zip.cpp | 4 +- src/duckdb/src/function/table/table_scan.cpp | 7 +- .../function/table/version/pragma_version.cpp | 6 +- src/duckdb/src/include/duckdb.h | 31 +++- .../src/include/duckdb/common/array_ptr.hpp | 8 + .../common/row_operations/row_operations.hpp | 2 - .../src/include/duckdb/common/types.hpp | 3 +- .../duckdb/execution/expression_executor.hpp | 1 - .../operator/join/join_filter_pushdown.hpp | 8 +- .../duckdb/function/table_function.hpp | 10 +- .../src/include/duckdb/main/appender.hpp | 53 ++++--- .../duckdb/main/capi/extension_api.hpp | 6 + .../src/include/duckdb/main/client_config.hpp | 7 +- .../include/duckdb/main/client_context.hpp | 5 +- .../src/include/duckdb/main/settings.hpp | 11 ++ .../duckdb/optimizer/filter_combiner.hpp | 4 + .../duckdb/planner/filter/in_filter.hpp | 35 +++++ .../duckdb/planner/filter/optional_filter.hpp | 2 +- .../include/duckdb/planner/table_filter.hpp | 13 +- .../src/include/duckdb/storage/data_table.hpp | 5 +- .../storage/statistics/numeric_stats.hpp | 3 +- .../storage/statistics/string_stats.hpp | 3 +- src/duckdb/src/include/duckdb_extension.h | 6 + src/duckdb/src/main/appender.cpp | 139 +++++++++++++----- src/duckdb/src/main/capi/appender-c.cpp | 5 +- src/duckdb/src/main/capi/duckdb_value-c.cpp | 48 ++++++ src/duckdb/src/main/client_context.cpp | 6 +- src/duckdb/src/main/config.cpp | 1 + .../main/settings/autogenerated_settings.cpp | 17 +++ src/duckdb/src/optimizer/filter_combiner.cpp | 90 +++++++----- .../src/optimizer/in_clause_rewriter.cpp | 8 +- .../expression/bind_macro_expression.cpp | 6 +- .../query_node/bind_table_macro_node.cpp | 4 +- .../planner/binder/statement/bind_copy.cpp | 2 +- .../src/planner/filter/constant_filter.cpp | 4 +- src/duckdb/src/planner/filter/in_filter.cpp | 80 ++++++++++ .../src/planner/filter/optional_filter.cpp | 3 +- .../src/planner/operator/logical_get.cpp | 6 +- src/duckdb/src/storage/data_table.cpp | 73 +++++++-- .../serialization/serialize_table_filter.cpp | 15 ++ .../src/storage/statistics/numeric_stats.cpp | 56 ++++--- .../src/storage/statistics/string_stats.cpp | 17 ++- src/duckdb/src/storage/table/row_group.cpp | 1 + src/duckdb/ub_src_planner_filter.cpp | 2 + 53 files changed, 772 insertions(+), 269 deletions(-) create mode 100644 src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp create mode 100644 src/duckdb/src/planner/filter/in_filter.cpp diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp index 6e7442dd..46f666c1 100644 --- a/src/duckdb/src/common/enum_util.cpp +++ b/src/duckdb/src/common/enum_util.cpp @@ -3581,19 +3581,20 @@ const StringUtil::EnumStringLiteral *GetTableFilterTypeValues() { { static_cast(TableFilterType::CONJUNCTION_OR), "CONJUNCTION_OR" }, { static_cast(TableFilterType::CONJUNCTION_AND), "CONJUNCTION_AND" }, { static_cast(TableFilterType::STRUCT_EXTRACT), "STRUCT_EXTRACT" }, - { static_cast(TableFilterType::OPTIONAL_FILTER), "OPTIONAL_FILTER" } + { static_cast(TableFilterType::OPTIONAL_FILTER), "OPTIONAL_FILTER" }, + { static_cast(TableFilterType::IN_FILTER), "IN_FILTER" } }; return values; } template<> const char* EnumUtil::ToChars(TableFilterType value) { - return StringUtil::EnumToString(GetTableFilterTypeValues(), 7, "TableFilterType", static_cast(value)); + return StringUtil::EnumToString(GetTableFilterTypeValues(), 8, "TableFilterType", static_cast(value)); } template<> TableFilterType EnumUtil::FromString(const char *value) { - return static_cast(StringUtil::StringToEnum(GetTableFilterTypeValues(), 7, "TableFilterType", value)); + return static_cast(StringUtil::StringToEnum(GetTableFilterTypeValues(), 8, "TableFilterType", value)); } const StringUtil::EnumStringLiteral *GetTablePartitionInfoValues() { diff --git a/src/duckdb/src/common/row_operations/row_gather.cpp b/src/duckdb/src/common/row_operations/row_gather.cpp index 251cbccb..40743279 100644 --- a/src/duckdb/src/common/row_operations/row_gather.cpp +++ b/src/duckdb/src/common/row_operations/row_gather.cpp @@ -178,61 +178,4 @@ void RowOperations::Gather(Vector &rows, const SelectionVector &row_sel, Vector } } -template -static void TemplatedFullScanLoop(Vector &rows, Vector &col, idx_t count, idx_t col_offset, idx_t col_no, - idx_t column_count) { - // Precompute mask indexes - idx_t entry_idx; - idx_t idx_in_entry; - ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry); - - auto ptrs = FlatVector::GetData(rows); - auto data = FlatVector::GetData(col); - // auto &col_mask = FlatVector::Validity(col); - - for (idx_t i = 0; i < count; i++) { - auto row = ptrs[i]; - data[i] = Load(row + col_offset); - ValidityBytes row_mask(row, column_count); - if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) { - throw InternalException("Null value comparisons not implemented for perfect hash table yet"); - // col_mask.SetInvalid(i); - } - } -} - -void RowOperations::FullScanColumn(const TupleDataLayout &layout, Vector &rows, Vector &col, idx_t count, - idx_t col_no) { - const auto col_offset = layout.GetOffsets()[col_no]; - col.SetVectorType(VectorType::FLAT_VECTOR); - switch (col.GetType().InternalType()) { - case PhysicalType::UINT8: - TemplatedFullScanLoop(rows, col, count, col_offset, col_no, layout.ColumnCount()); - break; - case PhysicalType::UINT16: - TemplatedFullScanLoop(rows, col, count, col_offset, col_no, layout.ColumnCount()); - break; - case PhysicalType::UINT32: - TemplatedFullScanLoop(rows, col, count, col_offset, col_no, layout.ColumnCount()); - break; - case PhysicalType::UINT64: - TemplatedFullScanLoop(rows, col, count, col_offset, col_no, layout.ColumnCount()); - break; - case PhysicalType::INT8: - TemplatedFullScanLoop(rows, col, count, col_offset, col_no, layout.ColumnCount()); - break; - case PhysicalType::INT16: - TemplatedFullScanLoop(rows, col, count, col_offset, col_no, layout.ColumnCount()); - break; - case PhysicalType::INT32: - TemplatedFullScanLoop(rows, col, count, col_offset, col_no, layout.ColumnCount()); - break; - case PhysicalType::INT64: - TemplatedFullScanLoop(rows, col, count, col_offset, col_no, layout.ColumnCount()); - break; - default: - throw NotImplementedException("Unimplemented type for RowOperations::FullScanColumn"); - } -} - } // namespace duckdb diff --git a/src/duckdb/src/common/types.cpp b/src/duckdb/src/common/types.cpp index 278be409..8e36b3d8 100644 --- a/src/duckdb/src/common/types.cpp +++ b/src/duckdb/src/common/types.cpp @@ -13,6 +13,7 @@ #include "duckdb/common/serializer/deserializer.hpp" #include "duckdb/common/serializer/serializer.hpp" #include "duckdb/common/string_util.hpp" +#include "duckdb/common/type_visitor.hpp" #include "duckdb/common/types/decimal.hpp" #include "duckdb/common/types/hash.hpp" #include "duckdb/common/types/string_type.hpp" @@ -24,11 +25,12 @@ #include "duckdb/main/attached_database.hpp" #include "duckdb/main/client_context.hpp" #include "duckdb/main/client_data.hpp" +#include "duckdb/main/config.hpp" #include "duckdb/main/database.hpp" #include "duckdb/main/database_manager.hpp" #include "duckdb/parser/keyword_helper.hpp" #include "duckdb/parser/parser.hpp" -#include "duckdb/main/config.hpp" + #include namespace duckdb { @@ -678,6 +680,54 @@ bool LogicalType::IsValid() const { return id() != LogicalTypeId::INVALID && id() != LogicalTypeId::UNKNOWN; } +bool LogicalType::IsComplete() const { + // Check if type does not contain incomplete types + return !TypeVisitor::Contains(*this, [](const LogicalType &type) { + switch (type.id()) { + case LogicalTypeId::INVALID: + case LogicalTypeId::UNKNOWN: + case LogicalTypeId::ANY: + return true; // These are incomplete by default + case LogicalTypeId::LIST: + case LogicalTypeId::MAP: + if (!type.AuxInfo() || type.AuxInfo()->type != ExtraTypeInfoType::LIST_TYPE_INFO) { + return true; // Missing or incorrect type info + } + break; + case LogicalTypeId::STRUCT: + case LogicalTypeId::UNION: + if (!type.AuxInfo() || type.AuxInfo()->type != ExtraTypeInfoType::STRUCT_TYPE_INFO) { + return true; // Missing or incorrect type info + } + break; + case LogicalTypeId::ARRAY: + if (!type.AuxInfo() || type.AuxInfo()->type != ExtraTypeInfoType::ARRAY_TYPE_INFO) { + return true; // Missing or incorrect type info + } + break; + case LogicalTypeId::DECIMAL: + if (!type.AuxInfo() || type.AuxInfo()->type != ExtraTypeInfoType::DECIMAL_TYPE_INFO) { + return true; // Missing or incorrect type info + } + break; + default: + return false; + } + + // Type has type info, check if it is complete + D_ASSERT(type.AuxInfo()); + switch (type.AuxInfo()->type) { + case ExtraTypeInfoType::STRUCT_TYPE_INFO: + return type.AuxInfo()->Cast().child_types.empty(); // Cannot be empty + case ExtraTypeInfoType::DECIMAL_TYPE_INFO: + return DecimalType::GetWidth(type) >= 1 && DecimalType::GetWidth(type) <= Decimal::MAX_WIDTH_DECIMAL && + DecimalType::GetScale(type) <= DecimalType::GetWidth(type); + default: + return false; // Nested types are checked by TypeVisitor recursion + } + }); +} + bool LogicalType::GetDecimalProperties(uint8_t &width, uint8_t &scale) const { switch (id_) { case LogicalTypeId::SQLNULL: diff --git a/src/duckdb/src/common/types/column/column_data_collection.cpp b/src/duckdb/src/common/types/column/column_data_collection.cpp index cc34c44d..d6e01e5a 100644 --- a/src/duckdb/src/common/types/column/column_data_collection.cpp +++ b/src/duckdb/src/common/types/column/column_data_collection.cpp @@ -1,14 +1,14 @@ #include "duckdb/common/types/column/column_data_collection.hpp" #include "duckdb/common/printer.hpp" +#include "duckdb/common/serializer/deserializer.hpp" +#include "duckdb/common/serializer/serializer.hpp" #include "duckdb/common/string_util.hpp" #include "duckdb/common/types/column/column_data_collection_segment.hpp" #include "duckdb/common/types/value_map.hpp" #include "duckdb/common/uhugeint.hpp" #include "duckdb/common/vector_operations/vector_operations.hpp" #include "duckdb/storage/buffer_manager.hpp" -#include "duckdb/common/serializer/serializer.hpp" -#include "duckdb/common/serializer/deserializer.hpp" namespace duckdb { @@ -779,7 +779,8 @@ ColumnDataCopyFunction ColumnDataCollection::GetCopyFunction(const LogicalType & break; } default: - throw InternalException("Unsupported type for ColumnDataCollection::GetCopyFunction"); + throw InternalException("Unsupported type %s for ColumnDataCollection::GetCopyFunction", + EnumUtil::ToString(type.InternalType())); } result.function = function; return result; diff --git a/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp b/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp index a7c037d5..fd623c15 100644 --- a/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +++ b/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp @@ -50,7 +50,8 @@ bool PerfectHashJoinExecutor::FullScanHashTable(LogicalType &key_type) { // Scan the build keys in the hash table Vector build_vector(key_type, key_count); - RowOperations::FullScanColumn(ht.layout, tuples_addresses, build_vector, key_count, 0); + data_collection.Gather(tuples_addresses, *FlatVector::IncrementalSelectionVector(), key_count, 0, build_vector, + *FlatVector::IncrementalSelectionVector(), nullptr); // Now fill the selection vector using the build keys and create a sequential vector // TODO: add check for fast pass when probe is part of build domain diff --git a/src/duckdb/src/execution/operator/join/physical_hash_join.cpp b/src/duckdb/src/execution/operator/join/physical_hash_join.cpp index 2bcfc84d..11127e40 100644 --- a/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +++ b/src/duckdb/src/execution/operator/join/physical_hash_join.cpp @@ -3,8 +3,8 @@ #include "duckdb/common/radix_partitioning.hpp" #include "duckdb/execution/expression_executor.hpp" #include "duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp" -#include "duckdb/function/aggregate/distributive_functions.hpp" #include "duckdb/function/aggregate/distributive_function_utils.hpp" +#include "duckdb/function/aggregate/distributive_functions.hpp" #include "duckdb/function/function_binder.hpp" #include "duckdb/main/client_context.hpp" #include "duckdb/main/query_profiler.hpp" @@ -15,12 +15,17 @@ #include "duckdb/parallel/thread_context.hpp" #include "duckdb/planner/expression/bound_aggregate_expression.hpp" #include "duckdb/planner/expression/bound_reference_expression.hpp" +#include "duckdb/planner/filter/conjunction_filter.hpp" #include "duckdb/planner/filter/constant_filter.hpp" +#include "duckdb/planner/filter/in_filter.hpp" #include "duckdb/planner/filter/null_filter.hpp" +#include "duckdb/planner/filter/optional_filter.hpp" #include "duckdb/planner/table_filter.hpp" #include "duckdb/storage/buffer_manager.hpp" #include "duckdb/storage/storage_manager.hpp" #include "duckdb/storage/temporary_memory_manager.hpp" +#include "duckdb/common/types/value_map.hpp" +#include "duckdb/optimizer/filter_combiner.hpp" namespace duckdb { @@ -567,7 +572,50 @@ class HashJoinRepartitionEvent : public BasePipelineEvent { } }; -void JoinFilterPushdownInfo::PushFilters(JoinFilterGlobalState &gstate, const PhysicalOperator &op) const { +void JoinFilterPushdownInfo::PushInFilter(const JoinFilterPushdownFilter &info, JoinHashTable &ht, + const PhysicalOperator &op, idx_t filter_idx, idx_t filter_col_idx) const { + // generate a "OR" filter (i.e. x=1 OR x=535 OR x=997) + // first scan the entire vector at the probe side + // FIXME: this code is duplicated from PerfectHashJoinExecutor::FullScanHashTable + auto build_idx = join_condition[filter_idx]; + auto &data_collection = ht.GetDataCollection(); + + Vector tuples_addresses(LogicalType::POINTER, ht.Count()); // allocate space for all the tuples + + JoinHTScanState join_ht_state(data_collection, 0, data_collection.ChunkCount(), + TupleDataPinProperties::KEEP_EVERYTHING_PINNED); + + // Go through all the blocks and fill the keys addresses + idx_t key_count = ht.FillWithHTOffsets(join_ht_state, tuples_addresses); + + // Scan the build keys in the hash table + Vector build_vector(ht.layout.GetTypes()[build_idx], key_count); + data_collection.Gather(tuples_addresses, *FlatVector::IncrementalSelectionVector(), key_count, build_idx, + build_vector, *FlatVector::IncrementalSelectionVector(), nullptr); + + // generate the OR-clause - note that we only need to consider unique values here (so we use a seT) + value_set_t unique_ht_values; + for (idx_t k = 0; k < key_count; k++) { + unique_ht_values.insert(build_vector.GetValue(k)); + } + vector in_list(unique_ht_values.begin(), unique_ht_values.end()); + + // generating the OR filter only makes sense if the range is not dense + // i.e. if we have the values [0, 1, 2, 3, 4] - the min/max is fully equivalent to the OR filter + if (FilterCombiner::IsDenseRange(in_list)) { + return; + } + + // generate the OR filter + auto or_filter = make_uniq(std::move(in_list)); + // we push the OR filter as an OptionalFilter so that we can use it for zonemap pruning only + // the IN-list is expensive to execute otherwise + auto filter = make_uniq(std::move(or_filter)); + info.dynamic_filters->PushFilter(op, filter_col_idx, std::move(filter)); +} + +void JoinFilterPushdownInfo::PushFilters(ClientContext &context, JoinHashTable &ht, JoinFilterGlobalState &gstate, + const PhysicalOperator &op) const { // finalize the min/max aggregates vector min_max_types; for (auto &aggr_expr : min_max_aggregates) { @@ -578,6 +626,7 @@ void JoinFilterPushdownInfo::PushFilters(JoinFilterGlobalState &gstate, const Ph gstate.global_aggregate_state->Finalize(final_min_max); + auto dynamic_or_filter_threshold = ClientConfig::GetSetting(context); // create a filter for each of the aggregates for (idx_t filter_idx = 0; filter_idx < join_condition.size(); filter_idx++) { for (auto &info : probe_info) { @@ -593,6 +642,11 @@ void JoinFilterPushdownInfo::PushFilters(JoinFilterGlobalState &gstate, const Ph // hash table e.g. because they are part of a RIGHT join continue; } + // if the HT is small we can generate a complete "OR" filter + if (ht.Count() > 1 && ht.Count() <= dynamic_or_filter_threshold) { + PushInFilter(info, ht, op, filter_idx, filter_col_idx); + } + if (Value::NotDistinctFrom(min_val, max_val)) { // min = max - generate an equality filter auto constant_filter = make_uniq(ExpressionType::COMPARE_EQUAL, std::move(min_val)); @@ -655,7 +709,7 @@ SinkFinalizeType PhysicalHashJoin::Finalize(Pipeline &pipeline, Event &event, Cl ht.Unpartition(); if (filter_pushdown && ht.Count() > 0) { - filter_pushdown->PushFilters(*sink.global_filter_state, *this); + filter_pushdown->PushFilters(context, ht, *sink.global_filter_state, *this); } // check for possible perfect hash table diff --git a/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp b/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp index 677e84d9..fa150693 100644 --- a/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +++ b/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp @@ -111,7 +111,11 @@ OperatorResultType PhysicalTableInOutFunction::Execute(ExecutionContext &context InsertionOrderPreservingMap PhysicalTableInOutFunction::ParamsToString() const { InsertionOrderPreservingMap result; if (function.to_string) { - result["__text__"] = function.to_string(bind_data.get()); + TableFunctionToStringInput input(function, bind_data.get()); + auto to_string_result = function.to_string(input); + for (const auto &it : to_string_result) { + result[it.first] = it.second; + } } else { result["Name"] = function.name; } diff --git a/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp b/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp index a96d88fe..00d2814e 100644 --- a/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +++ b/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp @@ -164,7 +164,11 @@ void AddProjectionNames(const ColumnIndex &index, const string &name, const Logi InsertionOrderPreservingMap PhysicalTableScan::ParamsToString() const { InsertionOrderPreservingMap result; if (function.to_string) { - result["__text__"] = function.to_string(bind_data.get()); + TableFunctionToStringInput input(function, bind_data.get()); + auto to_string_result = function.to_string(input); + for (const auto &it : to_string_result) { + result[it.first] = it.second; + } } else { result["Function"] = StringUtil::Upper(function.name); } diff --git a/src/duckdb/src/function/function_binder.cpp b/src/duckdb/src/function/function_binder.cpp index 5a554401..67144182 100644 --- a/src/duckdb/src/function/function_binder.cpp +++ b/src/duckdb/src/function/function_binder.cpp @@ -6,13 +6,13 @@ #include "duckdb/execution/expression_executor.hpp" #include "duckdb/function/aggregate_function.hpp" #include "duckdb/function/cast_rules.hpp" +#include "duckdb/function/scalar/generic_functions.hpp" #include "duckdb/parser/parsed_data/create_secret_info.hpp" #include "duckdb/planner/expression/bound_aggregate_expression.hpp" #include "duckdb/planner/expression/bound_cast_expression.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" #include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/planner/expression_binder.hpp" -#include "duckdb/function/scalar/generic_functions.hpp" namespace duckdb { @@ -320,24 +320,13 @@ unique_ptr FunctionBinder::BindScalarFunction(ScalarFunctionCatalogE // found a matching function! auto bound_function = func.functions.GetFunctionByOffset(best_function.GetIndex()); - // If any of the parameters are NULL, the function will just be replaced with a NULL constant - // But this NULL constant needs to have to correct type, because we use LogicalType::SQLNULL for binding macro's - // However, some functions may have an invalid return type, so we default to SQLNULL for those - LogicalType return_type_if_null; - switch (bound_function.return_type.id()) { - case LogicalTypeId::ANY: - case LogicalTypeId::DECIMAL: - case LogicalTypeId::STRUCT: - case LogicalTypeId::LIST: - case LogicalTypeId::MAP: - case LogicalTypeId::UNION: - case LogicalTypeId::ARRAY: - return_type_if_null = LogicalType::SQLNULL; - break; - default: - return_type_if_null = bound_function.return_type; - } - + // If any of the parameters are NULL, the function will just be replaced with a NULL constant. + // We try to give the NULL constant the correct type, but we have to do this without binding the function, + // because functions with DEFAULT_NULL_HANDLING should not have to deal with NULL inputs in their bind code. + // Some functions may have an invalid default return type, as they must be bound to infer the return type. + // In those cases, we default to SQLNULL. + const auto return_type_if_null = + bound_function.return_type.IsComplete() ? bound_function.return_type : LogicalType::SQLNULL; if (bound_function.null_handling == FunctionNullHandling::DEFAULT_NULL_HANDLING) { for (auto &child : children) { if (child->return_type == LogicalTypeId::SQLNULL) { diff --git a/src/duckdb/src/function/scalar/list/list_zip.cpp b/src/duckdb/src/function/scalar/list/list_zip.cpp index 9aa0ec39..106e72ff 100644 --- a/src/duckdb/src/function/scalar/list/list_zip.cpp +++ b/src/duckdb/src/function/scalar/list/list_zip.cpp @@ -112,7 +112,7 @@ static void ListZipFunction(DataChunk &args, ExpressionState &state, Vector &res offset += len; } for (idx_t child_idx = 0; child_idx < args_size; child_idx++) { - if (!(args.data[child_idx].GetType() == LogicalType::SQLNULL)) { + if (args.data[child_idx].GetType() != LogicalType::SQLNULL) { struct_entries[child_idx]->Slice(ListVector::GetEntry(args.data[child_idx]), selections[child_idx], result_size); } @@ -161,7 +161,7 @@ ScalarFunction ListZipFun::GetFunction() { auto fun = ScalarFunction({}, LogicalType::LIST(LogicalTypeId::STRUCT), ListZipFunction, ListZipBind); fun.varargs = LogicalType::ANY; - fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING; // Special handling needed? + fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING; return fun; } diff --git a/src/duckdb/src/function/table/table_scan.cpp b/src/duckdb/src/function/table/table_scan.cpp index 23218352..a88be6e7 100644 --- a/src/duckdb/src/function/table/table_scan.cpp +++ b/src/duckdb/src/function/table/table_scan.cpp @@ -386,9 +386,10 @@ void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, Fun }); } -string TableScanToString(const FunctionData *bind_data_p) { - auto &bind_data = bind_data_p->Cast(); - string result = bind_data.table.name; +InsertionOrderPreservingMap TableScanToString(TableFunctionToStringInput &input) { + InsertionOrderPreservingMap result; + auto &bind_data = input.bind_data->Cast(); + result["Table"] = bind_data.table.name; return result; } diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 2e0dd2f1..ac8b47be 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "4-dev1919" +#define DUCKDB_PATCH_VERSION "4-dev2005" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 1 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.1.4-dev1919" +#define DUCKDB_VERSION "v1.1.4-dev2005" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "810cfa4568" +#define DUCKDB_SOURCE_ID "b470dea7ee" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb.h b/src/duckdb/src/include/duckdb.h index b915ff56..a57fd2e6 100644 --- a/src/duckdb/src/include/duckdb.h +++ b/src/duckdb/src/include/duckdb.h @@ -2275,6 +2275,32 @@ Returns the LIST child at index as a duckdb_value. */ DUCKDB_API duckdb_value duckdb_get_list_child(duckdb_value value, idx_t index); +/*! +Creates an enum value from a type and a value. Must be destroyed with `duckdb_destroy_value`. + +* @param type The type of the enum +* @param value The value for the enum +* @return The enum value, or nullptr. +*/ +DUCKDB_API duckdb_value duckdb_create_enum_value(duckdb_logical_type type, uint64_t value); + +/*! +Returns the enum value of the given value. + +* @param value A duckdb_value containing an enum +* @return A uint64_t, or MinValue if the value cannot be converted +*/ +DUCKDB_API uint64_t duckdb_get_enum_value(duckdb_value value); + +/*! +Returns the STRUCT child at index as a duckdb_value. + +* @param value The STRUCT value. +* @param index The index of the child. +* @return The child as a duckdb_value. +*/ +DUCKDB_API duckdb_value duckdb_get_struct_child(duckdb_value value, idx_t index); + //===--------------------------------------------------------------------===// // Logical Type Interface //===--------------------------------------------------------------------===// @@ -3571,10 +3597,11 @@ DUCKDB_API duckdb_state duckdb_appender_create_ext(duckdb_connection connection, duckdb_appender *out_appender); /*! -Returns the number of columns in the table that belongs to the appender. +Returns the number of columns that belong to the appender. +If there is no custom column configuration, then this equals the table's physical columns. * @param appender The appender to get the column count from. -* @return The number of columns in the table. +* @return The number of columns in the data chunks. */ DUCKDB_API idx_t duckdb_appender_column_count(duckdb_appender appender); diff --git a/src/duckdb/src/include/duckdb/common/array_ptr.hpp b/src/duckdb/src/include/duckdb/common/array_ptr.hpp index 5300a349..6c268945 100644 --- a/src/duckdb/src/include/duckdb/common/array_ptr.hpp +++ b/src/duckdb/src/include/duckdb/common/array_ptr.hpp @@ -1,3 +1,11 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/array_ptr.hpp +// +// +//===----------------------------------------------------------------------===// + #pragma once #include "duckdb/common/exception.hpp" diff --git a/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp b/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp index 2973c6b6..06ae00f4 100644 --- a/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +++ b/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp @@ -82,8 +82,6 @@ struct RowOperations { static void Gather(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel, const idx_t count, const RowLayout &layout, const idx_t col_no, const idx_t build_size = 0, data_ptr_t heap_ptr = nullptr); - //! Full Scan an entire columns - static void FullScanColumn(const TupleDataLayout &layout, Vector &rows, Vector &col, idx_t count, idx_t col_idx); //===--------------------------------------------------------------------===// // Comparison Operators diff --git a/src/duckdb/src/include/duckdb/common/types.hpp b/src/duckdb/src/include/duckdb/common/types.hpp index 6e85f35d..72a91501 100644 --- a/src/duckdb/src/include/duckdb/common/types.hpp +++ b/src/duckdb/src/include/duckdb/common/types.hpp @@ -338,12 +338,13 @@ struct LogicalType { DUCKDB_API static LogicalType NormalizeType(const LogicalType &type); - //! Gets the decimal properties of a numeric type. Fails if the type is not numeric. + //! Gets the decimal properties of a numeric type. Fails if the type is not numeric. DUCKDB_API bool GetDecimalProperties(uint8_t &width, uint8_t &scale) const; DUCKDB_API void Verify() const; DUCKDB_API bool IsValid() const; + DUCKDB_API bool IsComplete() const; private: diff --git a/src/duckdb/src/include/duckdb/execution/expression_executor.hpp b/src/duckdb/src/include/duckdb/execution/expression_executor.hpp index 80c380d6..7aeb3c2d 100644 --- a/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +++ b/src/duckdb/src/include/duckdb/execution/expression_executor.hpp @@ -21,7 +21,6 @@ class ExecutionContext; //! ExpressionExecutor is responsible for executing a set of expressions and storing the result in a data chunk class ExpressionExecutor { friend class BoundIndex; - friend class CreateIndexLocalSinkState; public: DUCKDB_API explicit ExpressionExecutor(ClientContext &context); diff --git a/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp b/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp index 2dbe1454..b0215318 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp @@ -17,6 +17,7 @@ class DataChunk; class DynamicTableFilterSet; struct GlobalUngroupedAggregateState; struct LocalUngroupedAggregateState; +class JoinHashTable; struct JoinFilterPushdownColumn { //! The probe column index to which this filter should be applied @@ -58,7 +59,12 @@ struct JoinFilterPushdownInfo { void Sink(DataChunk &chunk, JoinFilterLocalState &lstate) const; void Combine(JoinFilterGlobalState &gstate, JoinFilterLocalState &lstate) const; - void PushFilters(JoinFilterGlobalState &gstate, const PhysicalOperator &op) const; + void PushFilters(ClientContext &context, JoinHashTable &ht, JoinFilterGlobalState &gstate, + const PhysicalOperator &op) const; + +private: + void PushInFilter(const JoinFilterPushdownFilter &info, JoinHashTable &ht, const PhysicalOperator &op, + idx_t filter_idx, idx_t filter_col_idx) const; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/function/table_function.hpp b/src/duckdb/src/include/duckdb/function/table_function.hpp index 81173e68..16a6b7d0 100644 --- a/src/duckdb/src/include/duckdb/function/table_function.hpp +++ b/src/duckdb/src/include/duckdb/function/table_function.hpp @@ -168,6 +168,14 @@ struct TableFunctionPartitionInput { const vector &partition_ids; }; +struct TableFunctionToStringInput { + TableFunctionToStringInput(const TableFunction &table_function_p, optional_ptr bind_data_p) + : table_function(table_function_p), bind_data(bind_data_p) { + } + const TableFunction &table_function; + optional_ptr bind_data; +}; + struct TableFunctionGetPartitionInput { public: TableFunctionGetPartitionInput(optional_ptr bind_data_p, @@ -267,7 +275,7 @@ typedef unique_ptr (*table_function_cardinality_t)(ClientContext typedef void (*table_function_pushdown_complex_filter_t)(ClientContext &context, LogicalGet &get, FunctionData *bind_data, vector> &filters); -typedef string (*table_function_to_string_t)(const FunctionData *bind_data); +typedef InsertionOrderPreservingMap (*table_function_to_string_t)(TableFunctionToStringInput &input); typedef void (*table_function_serialize_t)(Serializer &serializer, const optional_ptr bind_data, const TableFunction &function); diff --git a/src/duckdb/src/include/duckdb/main/appender.hpp b/src/duckdb/src/include/duckdb/main/appender.hpp index 47be7eaf..70dc881e 100644 --- a/src/duckdb/src/include/duckdb/main/appender.hpp +++ b/src/duckdb/src/include/duckdb/main/appender.hpp @@ -28,28 +28,31 @@ enum class AppenderType : uint8_t { //! The Appender class can be used to append elements to a table. class BaseAppender { public: - //! The amount of tuples that will be gathered in the column data collection before flushing + //! The amount of tuples that are gathered in the column data collection before flushing. static constexpr const idx_t DEFAULT_FLUSH_COUNT = STANDARD_VECTOR_SIZE * 100ULL; protected: + //! The allocator for the column data collection. Allocator &allocator; - //! The append types + //! The column types of the associated table. vector types; - //! The buffered data for the append + //! The active column types. + vector active_types; + //! The buffered to-be-appended data. unique_ptr collection; - //! Internal chunk used for appends + //! The active chunk for row-based appends. DataChunk chunk; - //! The current column to append to + //! The currently active column of row-based appends. idx_t column = 0; - //! The type of the appender + //! The type of the appender. AppenderType appender_type; - //! The amount of rows after which we flush the appender automatically + //! The amount of rows after which the appender flushes automatically. idx_t flush_count = DEFAULT_FLUSH_COUNT; protected: - DUCKDB_API BaseAppender(Allocator &allocator, AppenderType type); - DUCKDB_API BaseAppender(Allocator &allocator, vector types, AppenderType type, - idx_t flush_count = DEFAULT_FLUSH_COUNT); + DUCKDB_API BaseAppender(Allocator &allocator, const AppenderType type); + DUCKDB_API BaseAppender(Allocator &allocator, vector types, const AppenderType type, + const idx_t flush_count = DEFAULT_FLUSH_COUNT); public: DUCKDB_API virtual ~BaseAppender(); @@ -78,15 +81,21 @@ class BaseAppender { DUCKDB_API void Flush(); //! Flush the changes made by the appender and close it. The appender cannot be used after this point DUCKDB_API void Close(); + //! Returns the active types of the appender. + const vector &GetActiveTypes() const; - vector &GetTypes() { - return types; - } idx_t CurrentColumn() const { return column; } DUCKDB_API void AppendDataChunk(DataChunk &value); + //! Appends a column to the active column list. + //! Immediately flushes all previous data. + virtual void AddColumn(const string &name) = 0; + //! Removes all columns from the active column list. + //! Immediately flushes all previous data. + virtual void ClearColumns() = 0; + protected: void Destructor(); virtual void FlushInternal(ColumnDataCollection &collection) = 0; @@ -114,12 +123,16 @@ class BaseAppender { }; class Appender : public BaseAppender { - //! A reference to a database connection that created this appender + //! A shared pointer to the context of this appender. shared_ptr context; - //! The table description (including column names) + //! The table description including the column names. unique_ptr description; - //! The default expressions - unordered_map default_values; + //! All table default values. + unordered_map default_values; + + //! If not empty, then this holds all logical column IDs of columns provided by the appender. + //! Any other columns default to NULL, or their default values. + vector column_ids; public: DUCKDB_API Appender(Connection &con, const string &database_name, const string &schema_name, @@ -130,6 +143,8 @@ class Appender : public BaseAppender { public: void AppendDefault(); + void AddColumn(const string &name) override; + void ClearColumns() override; protected: void FlushInternal(ColumnDataCollection &collection) override; @@ -143,11 +158,13 @@ class InternalAppender : public BaseAppender { public: DUCKDB_API InternalAppender(ClientContext &context, TableCatalogEntry &table, - idx_t flush_count = DEFAULT_FLUSH_COUNT); + const idx_t flush_count = DEFAULT_FLUSH_COUNT); DUCKDB_API ~InternalAppender() override; protected: void FlushInternal(ColumnDataCollection &collection) override; + void AddColumn(const string &name) override; + void ClearColumns() override; }; template <> diff --git a/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp b/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp index 99286416..67e9b2e9 100644 --- a/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +++ b/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp @@ -436,6 +436,9 @@ typedef struct { duckdb_value (*duckdb_create_null_value)(); idx_t (*duckdb_get_list_size)(duckdb_value value); duckdb_value (*duckdb_get_list_child)(duckdb_value value, idx_t index); + duckdb_value (*duckdb_create_enum_value)(duckdb_logical_type type, uint64_t value); + uint64_t (*duckdb_get_enum_value)(duckdb_value value); + duckdb_value (*duckdb_get_struct_child)(duckdb_value value, idx_t index); } duckdb_ext_api_v0; //===--------------------------------------------------------------------===// @@ -822,6 +825,9 @@ inline duckdb_ext_api_v0 CreateAPIv0() { result.duckdb_create_null_value = duckdb_create_null_value; result.duckdb_get_list_size = duckdb_get_list_size; result.duckdb_get_list_child = duckdb_get_list_child; + result.duckdb_create_enum_value = duckdb_create_enum_value; + result.duckdb_get_enum_value = duckdb_get_enum_value; + result.duckdb_get_struct_child = duckdb_get_struct_child; return result; } diff --git a/src/duckdb/src/include/duckdb/main/client_config.hpp b/src/duckdb/src/include/duckdb/main/client_config.hpp index 029fbab3..8b919c82 100644 --- a/src/duckdb/src/include/duckdb/main/client_config.hpp +++ b/src/duckdb/src/include/duckdb/main/client_config.hpp @@ -117,6 +117,9 @@ struct ClientConfig { //! The threshold at which we switch from using filtered aggregates to LIST with a dedicated pivot operator idx_t pivot_filter_threshold = 20; + //! The maximum amount of OR filters we generate dynamically from a hash join + idx_t dynamic_or_filter_threshold = 50; + //! Whether or not the "/" division operator defaults to integer division or floating point division bool integer_division = false; //! When a scalar subquery returns multiple rows - return a random row instead of returning an error @@ -171,12 +174,12 @@ struct ClientConfig { } template - typename OP::RETURN_TYPE GetSetting(const ClientContext &context) { + static typename OP::RETURN_TYPE GetSetting(const ClientContext &context) { return OP::GetSetting(context).template GetValue(); } template - Value GetSettingValue(const ClientContext &context) { + static Value GetSettingValue(const ClientContext &context) { return OP::GetSetting(context); } diff --git a/src/duckdb/src/include/duckdb/main/client_context.hpp b/src/duckdb/src/include/duckdb/main/client_context.hpp index dc3171f4..6f5115cb 100644 --- a/src/duckdb/src/include/duckdb/main/client_context.hpp +++ b/src/duckdb/src/include/duckdb/main/client_context.hpp @@ -129,8 +129,9 @@ class ClientContext : public enable_shared_from_this { const string &table_name); //! Get the table info of a specific table, or nullptr if it cannot be found. Uses INVALID_CATALOG. DUCKDB_API unique_ptr TableInfo(const string &schema_name, const string &table_name); - //! Appends a DataChunk to the specified table. Returns whether or not the append was successful. - DUCKDB_API void Append(TableDescription &description, ColumnDataCollection &collection); + //! Appends a DataChunk and its default columns to the specified table. + DUCKDB_API void Append(TableDescription &description, ColumnDataCollection &collection, + optional_ptr> column_ids = nullptr); //! Try to bind a relation in the current client context; either throws an exception or fills the result_columns //! list with the set of returned columns diff --git a/src/duckdb/src/include/duckdb/main/settings.hpp b/src/duckdb/src/include/duckdb/main/settings.hpp index ab33eab1..e099df4d 100644 --- a/src/duckdb/src/include/duckdb/main/settings.hpp +++ b/src/duckdb/src/include/duckdb/main/settings.hpp @@ -449,6 +449,17 @@ struct DuckDBAPISetting { static Value GetSetting(const ClientContext &context); }; +struct DynamicOrFilterThresholdSetting { + using RETURN_TYPE = idx_t; + static constexpr const char *Name = "dynamic_or_filter_threshold"; + static constexpr const char *Description = + "The maximum amount of OR filters we generate dynamically from a hash join"; + static constexpr const char *InputType = "UBIGINT"; + static void SetLocal(ClientContext &context, const Value ¶meter); + static void ResetLocal(ClientContext &context); + static Value GetSetting(const ClientContext &context); +}; + struct EnableExternalAccessSetting { using RETURN_TYPE = bool; static constexpr const char *Name = "enable_external_access"; diff --git a/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp b/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp index 7906ecd9..7f3743f6 100644 --- a/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +++ b/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp @@ -45,6 +45,10 @@ class FilterCombiner { FilterResult AddFilter(unique_ptr expr); + //! Returns whether or not a set of integral values is a dense range (i.e. 1, 2, 3, 4, 5) + //! If this returns true - this sorts "in_list" as a side-effect + static bool IsDenseRange(vector &in_list); + void GenerateFilters(const std::function filter)> &callback); bool HasFilters(); TableFilterSet GenerateTableScanFilters(const vector &column_ids); diff --git a/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp b/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp new file mode 100644 index 00000000..c9028ed5 --- /dev/null +++ b/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/filter/in_filter.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/planner/table_filter.hpp" +#include "duckdb/common/types/value.hpp" + +namespace duckdb { + +class InFilter : public TableFilter { +public: + static constexpr const TableFilterType TYPE = TableFilterType::IN_FILTER; + +public: + explicit InFilter(vector values); + + vector values; + +public: + FilterPropagateResult CheckStatistics(BaseStatistics &stats) override; + string ToString(const string &column_name) override; + bool Equals(const TableFilter &other) const override; + unique_ptr Copy() const override; + unique_ptr ToExpression(const Expression &column) const override; + void Serialize(Serializer &serializer) const override; + static unique_ptr Deserialize(Deserializer &deserializer); +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp b/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp index 9d18c87f..d694674d 100644 --- a/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +++ b/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp @@ -18,7 +18,7 @@ class OptionalFilter : public TableFilter { static constexpr const TableFilterType TYPE = TableFilterType::OPTIONAL_FILTER; public: - OptionalFilter(); + explicit OptionalFilter(unique_ptr filter = nullptr); string ToString(const string &column_name) override; unique_ptr Copy() const override; diff --git a/src/duckdb/src/include/duckdb/planner/table_filter.hpp b/src/duckdb/src/include/duckdb/planner/table_filter.hpp index 881884a1..aa94e5ce 100644 --- a/src/duckdb/src/include/duckdb/planner/table_filter.hpp +++ b/src/duckdb/src/include/duckdb/planner/table_filter.hpp @@ -25,12 +25,13 @@ class PhysicalTableScan; enum class TableFilterType : uint8_t { CONSTANT_COMPARISON = 0, // constant comparison (e.g. =C, >C, >=C, > &bound_constraints); - //! Append a column data collection to the transaction-local storage of this table + //! Append a column data collection with default values to the transaction-local storage of this table. void LocalAppend(TableCatalogEntry &table, ClientContext &context, ColumnDataCollection &collection, - const vector> &bound_constraints); + const vector> &bound_constraints, + optional_ptr> column_ids); //! Merge a row group collection into the transaction-local storage void LocalMerge(ClientContext &context, RowGroupCollection &collection); //! Creates an optimistic writer for this table - used for optimistically writing parallel appends diff --git a/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp b/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp index 9515acbe..c5490958 100644 --- a/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +++ b/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp @@ -13,6 +13,7 @@ #include "duckdb/common/operator/comparison_operators.hpp" #include "duckdb/common/types/value.hpp" #include "duckdb/storage/statistics/numeric_stats_union.hpp" +#include "duckdb/common/array_ptr.hpp" namespace duckdb { class BaseStatistics; @@ -55,7 +56,7 @@ struct NumericStats { //! Check whether or not a given comparison with a constant could possibly be satisfied by rows given the statistics DUCKDB_API static FilterPropagateResult CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type, - const Value &constant); + array_ptr constants); DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other_p); diff --git a/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp b/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp index f069ecc4..6ae410be 100644 --- a/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +++ b/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp @@ -14,6 +14,7 @@ #include "duckdb/common/exception.hpp" #include "duckdb/common/operator/comparison_operators.hpp" #include "duckdb/common/types/hugeint.hpp" +#include "duckdb/common/array_ptr.hpp" namespace duckdb { class BaseStatistics; @@ -62,7 +63,7 @@ struct StringStats { DUCKDB_API static string ToString(const BaseStatistics &stats); DUCKDB_API static FilterPropagateResult CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type, - const string &value); + array_ptr constants); DUCKDB_API static FilterPropagateResult CheckZonemap(const_data_ptr_t min_data, idx_t min_len, const_data_ptr_t max_data, idx_t max_len, ExpressionType comparison_type, const string &value); diff --git a/src/duckdb/src/include/duckdb_extension.h b/src/duckdb/src/include/duckdb_extension.h index 7a8b33a7..0a269b46 100644 --- a/src/duckdb/src/include/duckdb_extension.h +++ b/src/duckdb/src/include/duckdb_extension.h @@ -500,6 +500,9 @@ typedef struct { duckdb_value (*duckdb_create_null_value)(); idx_t (*duckdb_get_list_size)(duckdb_value value); duckdb_value (*duckdb_get_list_child)(duckdb_value value, idx_t index); + duckdb_value (*duckdb_create_enum_value)(duckdb_logical_type type, uint64_t value); + uint64_t (*duckdb_get_enum_value)(duckdb_value value); + duckdb_value (*duckdb_get_struct_child)(duckdb_value value, idx_t index); #endif } duckdb_ext_api_v0; @@ -887,6 +890,9 @@ typedef struct { #define duckdb_create_null_value duckdb_ext_api.duckdb_create_null_value #define duckdb_get_list_size duckdb_ext_api.duckdb_get_list_size #define duckdb_get_list_child duckdb_ext_api.duckdb_get_list_child +#define duckdb_create_enum_value duckdb_ext_api.duckdb_create_enum_value +#define duckdb_get_enum_value duckdb_ext_api.duckdb_get_enum_value +#define duckdb_get_struct_child duckdb_ext_api.duckdb_get_struct_child #define duckdb_appender_create_ext duckdb_ext_api.duckdb_appender_create_ext #define duckdb_table_description_create_ext duckdb_ext_api.duckdb_table_description_create_ext #define duckdb_table_description_get_column_name duckdb_ext_api.duckdb_table_description_get_column_name diff --git a/src/duckdb/src/main/appender.cpp b/src/duckdb/src/main/appender.cpp index 4d222b35..dde56b65 100644 --- a/src/duckdb/src/main/appender.cpp +++ b/src/duckdb/src/main/appender.cpp @@ -18,12 +18,12 @@ namespace duckdb { -BaseAppender::BaseAppender(Allocator &allocator, AppenderType type_p) +BaseAppender::BaseAppender(Allocator &allocator, const AppenderType type_p) : allocator(allocator), column(0), appender_type(type_p) { } -BaseAppender::BaseAppender(Allocator &allocator_p, vector types_p, AppenderType type_p, - idx_t flush_count_p) +BaseAppender::BaseAppender(Allocator &allocator_p, vector types_p, const AppenderType type_p, + const idx_t flush_count_p) : allocator(allocator_p), types(std::move(types_p)), collection(make_uniq(allocator, types)), column(0), appender_type(type_p), flush_count(flush_count_p) { InitializeChunk(); @@ -36,15 +36,22 @@ void BaseAppender::Destructor() { if (Exception::UncaughtException()) { return; } - // flush any remaining chunks, but only if we are not cleaning up the appender as part of an exception stack unwind - // wrapped in a try/catch because Close() can throw if the table was dropped in the meantime + // Flush any remaining chunks, if we are not cleaning up as part of an exception stack unwind wrapped in a + // try/catch. Close() can throw if the table was dropped in the meantime. try { Close(); } catch (...) { // NOLINT } } -InternalAppender::InternalAppender(ClientContext &context_p, TableCatalogEntry &table_p, idx_t flush_count_p) +const vector &BaseAppender::GetActiveTypes() const { + if (active_types.empty()) { + return types; + } + return active_types; +} + +InternalAppender::InternalAppender(ClientContext &context_p, TableCatalogEntry &table_p, const idx_t flush_count_p) : BaseAppender(Allocator::DefaultAllocator(), table_p.GetTypes(), AppenderType::PHYSICAL, flush_count_p), context(context_p), table(table_p) { } @@ -92,20 +99,22 @@ Appender::Appender(Connection &con, const string &database_name, const string &s default_binder.target_type = type; auto bound_default = default_binder.Bind(default_copy); + if (!bound_default->IsFoldable()) { + // Not supported yet. + continue; + } + Value result_value; - if (bound_default->IsFoldable()) { - auto eval_success = ExpressionExecutor::TryEvaluateScalar(*context, *bound_default, result_value); - if (eval_success) { - // Insert the default Value. - default_values[i] = result_value; - } + auto eval_success = ExpressionExecutor::TryEvaluateScalar(*context, *bound_default, result_value); + // Insert the default Value. + if (eval_success) { + default_values[i] = result_value; } - // All other cases are not supported currently. } }); InitializeChunk(); - collection = make_uniq(allocator, types); + collection = make_uniq(allocator, GetActiveTypes()); } Appender::Appender(Connection &con, const string &schema_name, const string &table_name) @@ -121,14 +130,15 @@ Appender::~Appender() { } void BaseAppender::InitializeChunk() { - chunk.Initialize(allocator, types); + chunk.Destroy(); + chunk.Initialize(allocator, GetActiveTypes()); } void BaseAppender::BeginRow() { } void BaseAppender::EndRow() { - // check that all rows have been appended to + // Ensure that all columns have been appended to. if (column != chunk.ColumnCount()) { throw InvalidInputException("Call to EndRow before all columns have been appended to!"); } @@ -153,8 +163,8 @@ void BaseAppender::AppendDecimalValueInternal(Vector &col, SRC input) { auto width = DecimalType::GetWidth(type); auto scale = DecimalType::GetScale(type); CastParameters parameters; - TryCastToDecimal::Operation(input, FlatVector::GetData(col)[chunk.size()], parameters, width, - scale); + auto &result = FlatVector::GetData(col)[chunk.size()]; + TryCastToDecimal::Operation(input, result, parameters, width, scale); return; } case AppenderType::PHYSICAL: { @@ -168,7 +178,7 @@ void BaseAppender::AppendDecimalValueInternal(Vector &col, SRC input) { template void BaseAppender::AppendValueInternal(T input) { - if (column >= types.size()) { + if (column >= GetActiveTypes().size()) { throw InvalidInputException("Too many appends for chunk!"); } auto &col = chunk.data[column]; @@ -356,7 +366,7 @@ void BaseAppender::Append(interval_t value) { } template <> -void BaseAppender::Append(Value value) { // NOLINT: template shtuff +void BaseAppender::Append(Value value) { // NOLINT: template stuff if (column >= chunk.ColumnCount()) { throw InvalidInputException("Too many appends for chunk!"); } @@ -379,9 +389,10 @@ void BaseAppender::AppendValue(const Value &value) { void BaseAppender::AppendDataChunk(DataChunk &chunk_p) { auto chunk_types = chunk_p.GetTypes(); + auto &appender_types = GetActiveTypes(); // Early-out, if types match. - if (chunk_types == types) { + if (chunk_types == appender_types) { collection->Append(chunk_p); if (collection->Count() >= flush_count) { Flush(); @@ -390,19 +401,19 @@ void BaseAppender::AppendDataChunk(DataChunk &chunk_p) { } auto count = chunk_p.ColumnCount(); - if (count != types.size()) { - throw InvalidInputException("incorrect column count in AppendDataChunk, expected %d, got %d", types.size(), - count); + if (count != appender_types.size()) { + throw InvalidInputException("incorrect column count in AppendDataChunk, expected %d, got %d", + appender_types.size(), count); } // We try to cast the chunk. auto size = chunk_p.size(); DataChunk cast_chunk; - cast_chunk.Initialize(allocator, types); + cast_chunk.Initialize(allocator, appender_types); cast_chunk.SetCardinality(size); for (idx_t i = 0; i < count; i++) { - if (chunk_p.data[i].GetType() == types[i]) { + if (chunk_p.data[i].GetType() == appender_types[i]) { cast_chunk.data[i].Reference(chunk_p.data[i]); continue; } @@ -411,7 +422,7 @@ void BaseAppender::AppendDataChunk(DataChunk &chunk_p) { auto success = VectorOperations::DefaultTryCast(chunk_p.data[i], cast_chunk.data[i], size, &error_msg); if (!success) { throw InvalidInputException("type mismatch in AppendDataChunk, expected %s, got %s for column %d", - types[i].ToString(), chunk_p.data[i].GetType().ToString(), i); + appender_types[i].ToString(), chunk_p.data[i].GetType().ToString(), i); } } @@ -433,7 +444,7 @@ void BaseAppender::FlushChunk() { } void BaseAppender::Flush() { - // check that all vectors have the same length before appending + // Check that all vectors have the same length before appending. if (column != 0) { throw InvalidInputException("Failed to Flush appender: incomplete append to row!"); } @@ -442,36 +453,88 @@ void BaseAppender::Flush() { if (collection->Count() == 0) { return; } - FlushInternal(*collection); + FlushInternal(*collection); collection->Reset(); column = 0; } void Appender::FlushInternal(ColumnDataCollection &collection) { - context->Append(*description, collection); + context->Append(*description, collection, &column_ids); } void Appender::AppendDefault() { - auto it = default_values.find(column); - auto &column_def = description->columns[column]; + auto index = column_ids.empty() ? column : column_ids[column].index; + auto it = default_values.find(index); if (it == default_values.end()) { + auto &name = description->columns[index].Name(); throw NotImplementedException( - "AppendDefault is currently not supported for column \"%s\" because default expression is not foldable.", - column_def.Name()); + "AppendDefault is not supported for column \"%s\": not a foldable default expressions.", name); + } + auto &value = it->second; + Append(value); +} + +void Appender::AddColumn(const string &name) { + Flush(); + + auto exists = false; + for (idx_t col_idx = 0; col_idx < description->columns.size(); col_idx++) { + auto &col_def = description->columns[col_idx]; + if (col_def.Name() != name) { + continue; + } + + // Ensure that we are not adding a generated column. + if (col_def.Generated()) { + throw InvalidInputException("cannot add a generated column to the appender"); + } + + // Ensure that we haven't added this column before. + for (const auto &column_id : column_ids) { + if (column_id == col_def.Logical()) { + throw InvalidInputException("cannot add the same column twice"); + } + } + + active_types.push_back(col_def.Type()); + column_ids.push_back(col_def.Logical()); + exists = true; + break; + } + if (!exists) { + throw InvalidInputException("the column must exist in the table"); } - auto &default_value = it->second; - Append(default_value); + + InitializeChunk(); + collection = make_uniq(allocator, GetActiveTypes()); +} + +void Appender::ClearColumns() { + Flush(); + column_ids.clear(); + active_types.clear(); + + InitializeChunk(); + collection = make_uniq(allocator, GetActiveTypes()); } void InternalAppender::FlushInternal(ColumnDataCollection &collection) { auto binder = Binder::CreateBinder(context); auto bound_constraints = binder->BindConstraints(table); - table.GetStorage().LocalAppend(table, context, collection, bound_constraints); + table.GetStorage().LocalAppend(table, context, collection, bound_constraints, nullptr); +} + +void InternalAppender::AddColumn(const string &name) { + throw InternalException("AddColumn not implemented for InternalAppender"); +} + +void InternalAppender::ClearColumns() { + throw InternalException("ClearColumns not implemented for InternalAppender"); } void BaseAppender::Close() { - if (column == 0 || column == types.size()) { + if (column == 0 || column == GetActiveTypes().size()) { Flush(); } } diff --git a/src/duckdb/src/main/capi/appender-c.cpp b/src/duckdb/src/main/capi/appender-c.cpp index 9f67d915..1582a509 100644 --- a/src/duckdb/src/main/capi/appender-c.cpp +++ b/src/duckdb/src/main/capi/appender-c.cpp @@ -250,7 +250,7 @@ idx_t duckdb_appender_column_count(duckdb_appender appender) { return 0; } - return wrapper->appender->GetTypes().size(); + return wrapper->appender->GetActiveTypes().size(); } duckdb_logical_type duckdb_appender_column_type(duckdb_appender appender, idx_t col_idx) { @@ -263,7 +263,8 @@ duckdb_logical_type duckdb_appender_column_type(duckdb_appender appender, idx_t return nullptr; } - return reinterpret_cast(new duckdb::LogicalType(wrapper->appender->GetTypes()[col_idx])); + auto &logical_type = wrapper->appender->GetActiveTypes()[col_idx]; + return reinterpret_cast(new duckdb::LogicalType(logical_type)); } duckdb_state duckdb_append_data_chunk(duckdb_appender appender, duckdb_data_chunk chunk) { diff --git a/src/duckdb/src/main/capi/duckdb_value-c.cpp b/src/duckdb/src/main/capi/duckdb_value-c.cpp index 9eac3957..9842c95d 100644 --- a/src/duckdb/src/main/capi/duckdb_value-c.cpp +++ b/src/duckdb/src/main/capi/duckdb_value-c.cpp @@ -369,3 +369,51 @@ duckdb_value duckdb_get_list_child(duckdb_value value, idx_t index) { return WrapValue(new duckdb::Value(children[index])); } + +duckdb_value duckdb_create_enum_value(duckdb_logical_type type, uint64_t value) { + if (!type) { + return nullptr; + } + + auto &logical_type = UnwrapType(type); + if (logical_type.id() != LogicalTypeId::ENUM) { + return nullptr; + } + + if (value >= duckdb::EnumType::GetSize(logical_type)) { + return nullptr; + } + + return WrapValue(new duckdb::Value(duckdb::Value::ENUM(value, logical_type))); +} + +uint64_t duckdb_get_enum_value(duckdb_value value) { + if (!value) { + return 0; + } + + auto val = UnwrapValue(value); + if (val.type().id() != LogicalTypeId::ENUM || val.IsNull()) { + return 0; + } + + return val.GetValue(); +} + +duckdb_value duckdb_get_struct_child(duckdb_value value, idx_t index) { + if (!value) { + return nullptr; + } + + auto val = UnwrapValue(value); + if (val.type().id() != LogicalTypeId::STRUCT || val.IsNull()) { + return nullptr; + } + + auto &children = duckdb::StructValue::GetChildren(val); + if (index >= children.size()) { + return nullptr; + } + + return WrapValue(new duckdb::Value(children[index])); +} diff --git a/src/duckdb/src/main/client_context.cpp b/src/duckdb/src/main/client_context.cpp index a4564406..4cb53a73 100644 --- a/src/duckdb/src/main/client_context.cpp +++ b/src/duckdb/src/main/client_context.cpp @@ -1165,7 +1165,9 @@ unique_ptr ClientContext::TableInfo(const string &schema_name, return TableInfo(INVALID_CATALOG, schema_name, table_name); } -void ClientContext::Append(TableDescription &description, ColumnDataCollection &collection) { +void ClientContext::Append(TableDescription &description, ColumnDataCollection &collection, + optional_ptr> column_ids) { + RunFunctionInTransaction([&]() { auto &table_entry = Catalog::GetEntry(*this, description.database, description.schema, description.table); @@ -1187,7 +1189,7 @@ void ClientContext::Append(TableDescription &description, ColumnDataCollection & auto binder = Binder::CreateBinder(*this); auto bound_constraints = binder->BindConstraints(table_entry); MetaTransaction::Get(*this).ModifyDatabase(table_entry.ParentCatalog().GetAttached()); - table_entry.GetStorage().LocalAppend(table_entry, *this, collection, bound_constraints); + table_entry.GetStorage().LocalAppend(table_entry, *this, collection, bound_constraints, column_ids); }); } diff --git a/src/duckdb/src/main/config.cpp b/src/duckdb/src/main/config.cpp index b72ec07c..f6ff1cb3 100644 --- a/src/duckdb/src/main/config.cpp +++ b/src/duckdb/src/main/config.cpp @@ -95,6 +95,7 @@ static const ConfigurationOption internal_options[] = { DUCKDB_GLOBAL(DisabledFilesystemsSetting), DUCKDB_GLOBAL(DisabledOptimizersSetting), DUCKDB_GLOBAL(DuckDBAPISetting), + DUCKDB_LOCAL(DynamicOrFilterThresholdSetting), DUCKDB_GLOBAL(EnableExternalAccessSetting), DUCKDB_GLOBAL(EnableFSSTVectorsSetting), DUCKDB_LOCAL(EnableHTTPLoggingSetting), diff --git a/src/duckdb/src/main/settings/autogenerated_settings.cpp b/src/duckdb/src/main/settings/autogenerated_settings.cpp index 1fac3ffe..69997b43 100644 --- a/src/duckdb/src/main/settings/autogenerated_settings.cpp +++ b/src/duckdb/src/main/settings/autogenerated_settings.cpp @@ -392,6 +392,23 @@ void DefaultOrderSetting::ResetGlobal(DatabaseInstance *db, DBConfig &config) { config.options.default_order_type = DBConfig().options.default_order_type; } +//===----------------------------------------------------------------------===// +// Dynamic Or Filter Threshold +//===----------------------------------------------------------------------===// +void DynamicOrFilterThresholdSetting::SetLocal(ClientContext &context, const Value &input) { + auto &config = ClientConfig::GetConfig(context); + config.dynamic_or_filter_threshold = input.GetValue(); +} + +void DynamicOrFilterThresholdSetting::ResetLocal(ClientContext &context) { + ClientConfig::GetConfig(context).dynamic_or_filter_threshold = ClientConfig().dynamic_or_filter_threshold; +} + +Value DynamicOrFilterThresholdSetting::GetSetting(const ClientContext &context) { + auto &config = ClientConfig::GetConfig(context); + return Value::UBIGINT(config.dynamic_or_filter_threshold); +} + //===----------------------------------------------------------------------===// // Enable External Access //===----------------------------------------------------------------------===// diff --git a/src/duckdb/src/optimizer/filter_combiner.cpp b/src/duckdb/src/optimizer/filter_combiner.cpp index e11d6c6e..178dfadd 100644 --- a/src/duckdb/src/optimizer/filter_combiner.cpp +++ b/src/duckdb/src/optimizer/filter_combiner.cpp @@ -12,10 +12,12 @@ #include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/planner/expression/bound_operator_expression.hpp" #include "duckdb/planner/filter/constant_filter.hpp" +#include "duckdb/planner/filter/in_filter.hpp" #include "duckdb/planner/filter/null_filter.hpp" #include "duckdb/planner/filter/optional_filter.hpp" #include "duckdb/planner/filter/struct_filter.hpp" #include "duckdb/planner/table_filter.hpp" +#include "duckdb/common/operator/subtract.hpp" namespace duckdb { @@ -436,6 +438,35 @@ static unique_ptr PushDownFilterIntoExpr(const Expression &expr, un return inner_filter; } +bool FilterCombiner::IsDenseRange(vector &in_list) { + if (in_list.empty()) { + return true; + } + if (!in_list[0].type().IsIntegral()) { + return false; + } + // sort the input list + sort(in_list.begin(), in_list.end()); + + // check if the gap between each value is exactly one + hugeint_t prev_value = in_list[0].GetValue(); + for (idx_t i = 1; i < in_list.size(); i++) { + hugeint_t current_value = in_list[i].GetValue(); + hugeint_t diff; + if (!TrySubtractOperator::Operation(current_value, prev_value, diff)) { + // if subtract would overflow then it's certainly not 1 + return false; + } + if (diff != 1) { + // gap is not 1 - this is not a dense range + return false; + } + prev_value = current_value; + } + // dense range + return true; +} + TableFilterSet FilterCombiner::GenerateTableScanFilters(const vector &column_ids) { TableFilterSet table_filters; //! First, we figure the filters that have constant expressions that we can push down to the table scan @@ -554,7 +585,6 @@ TableFilterSet FilterCombiner::GenerateTableScanFilters(const vectortype == ExpressionType::COMPARE_IN) { auto &func = remaining_filter->Cast(); - vector in_values; D_ASSERT(func.children.size() > 1); if (func.children[0]->expression_class != ExpressionClass::BOUND_COLUMN_REF) { continue; @@ -594,54 +624,34 @@ TableFilterSet FilterCombiner::GenerateTableScanFilters(const vector= <= (only for integers) - // e.g. if we have x IN (1, 2, 3, 4, 5) we transform this into x >= 1 AND x <= 5 - bool can_simplify_in_to_range = true; - if (type.IsIntegral()) { - for (idx_t i = 1; i < func.children.size(); i++) { - auto &const_value_expr = func.children[i]->Cast(); - D_ASSERT(!const_value_expr.value.IsNull()); - in_values.push_back(const_value_expr.value.GetValue()); - } - - if (in_values.empty()) { - continue; - } - - sort(in_values.begin(), in_values.end()); - - for (idx_t in_val_idx = 1; in_val_idx < in_values.size(); in_val_idx++) { - if (in_values[in_val_idx] - in_values[in_val_idx - 1] > 1) { - can_simplify_in_to_range = false; - break; - } - } - } if (!type.IsIntegral()) { continue; } - if (can_simplify_in_to_range) { - auto lower_bound = make_uniq(ExpressionType::COMPARE_GREATERTHANOREQUALTO, - Value::Numeric(type, in_values.front())); - auto upper_bound = make_uniq(ExpressionType::COMPARE_LESSTHANOREQUALTO, - Value::Numeric(type, in_values.back())); + //! Check if values are consecutive, if yes transform them to >= <= (only for integers) + // e.g. if we have x IN (1, 2, 3, 4, 5) we transform this into x >= 1 AND x <= 5 + vector in_list; + for (idx_t i = 1; i < func.children.size(); i++) { + auto &const_value_expr = func.children[i]->Cast(); + D_ASSERT(!const_value_expr.value.IsNull()); + in_list.push_back(const_value_expr.value); + } + if (IsDenseRange(in_list)) { + // dense range! turn this into x >= min AND x <= max + // IsDenseRange sorts in_list, so the front element is the min and the back element is the max + auto lower_bound = + make_uniq(ExpressionType::COMPARE_GREATERTHANOREQUALTO, std::move(in_list.front())); + auto upper_bound = + make_uniq(ExpressionType::COMPARE_LESSTHANOREQUALTO, std::move(in_list.back())); table_filters.PushFilter(column_index, std::move(lower_bound)); table_filters.PushFilter(column_index, std::move(upper_bound)); table_filters.PushFilter(column_index, make_uniq()); remaining_filters.erase_at(rem_fil_idx); - } - // if we are still Integral, then we can push a zonemap filter. - else if (type.IsIntegral()) { + } else { + // if this is not a dense range we can push a zone-map filter auto optional_filter = make_uniq(); - auto or_filter = make_uniq(); - for (idx_t in_val_idx = 1; in_val_idx < func.children.size(); in_val_idx++) { - D_ASSERT(func.children[in_val_idx]->type == ExpressionType::VALUE_CONSTANT); - auto &const_val = func.children[in_val_idx]->Cast(); - auto const_filter = make_uniq(ExpressionType::COMPARE_EQUAL, const_val.value); - or_filter->child_filters.push_back(std::move(const_filter)); - } - optional_filter->child_filter = std::move(or_filter); + auto in_filter = make_uniq(std::move(in_list)); + optional_filter->child_filter = std::move(in_filter); table_filters.PushFilter(column_index, std::move(optional_filter)); } } diff --git a/src/duckdb/src/optimizer/in_clause_rewriter.cpp b/src/duckdb/src/optimizer/in_clause_rewriter.cpp index 880964f5..c1726cfd 100644 --- a/src/duckdb/src/optimizer/in_clause_rewriter.cpp +++ b/src/duckdb/src/optimizer/in_clause_rewriter.cpp @@ -15,8 +15,12 @@ unique_ptr InClauseRewriter::Rewrite(unique_ptrchildren.size() == 1) { if (op->children[0]->type == LogicalOperatorType::LOGICAL_GET) { auto &get = op->children[0]->Cast(); - if (get.function.to_string && get.function.to_string(get.bind_data.get()) == "REMOTE") { - return op; + if (get.function.to_string) { + TableFunctionToStringInput input(get.function, get.bind_data.get()); + auto to_string_result = get.function.to_string(input); + if (to_string_result["__text__"] == "REMOTE") { + return op; + } } } root = std::move(op->children[0]); diff --git a/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp b/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp index 358dd5db..151eadf9 100644 --- a/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +++ b/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp @@ -1,7 +1,5 @@ #include "duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp" #include "duckdb/common/enums/expression_type.hpp" -#include "duckdb/common/reference_map.hpp" -#include "duckdb/common/string_util.hpp" #include "duckdb/function/scalar_macro_function.hpp" #include "duckdb/parser/expression/function_expression.hpp" #include "duckdb/parser/expression/subquery_expression.hpp" @@ -112,13 +110,13 @@ void ExpressionBinder::UnfoldMacroExpression(FunctionExpression &function, Scala vector names; // positional parameters for (idx_t i = 0; i < macro_def.parameters.size(); i++) { - types.emplace_back(LogicalType::SQLNULL); + types.emplace_back(LogicalTypeId::UNKNOWN); auto ¶m = macro_def.parameters[i]->Cast(); names.push_back(param.GetColumnName()); } // default parameters for (auto it = macro_def.default_parameters.begin(); it != macro_def.default_parameters.end(); it++) { - types.emplace_back(LogicalType::SQLNULL); + types.emplace_back(LogicalTypeId::UNKNOWN); names.push_back(it->first); // now push the defaults into the positionals positionals.push_back(std::move(defaults[it->first])); diff --git a/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp b/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp index df896704..0f90c115 100644 --- a/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +++ b/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp @@ -36,13 +36,13 @@ unique_ptr Binder::BindTableMacro(FunctionExpression &function, Table vector names; // positional parameters for (idx_t i = 0; i < macro_def.parameters.size(); i++) { - types.emplace_back(LogicalType::SQLNULL); + types.emplace_back(LogicalTypeId::UNKNOWN); auto ¶m = macro_def.parameters[i]->Cast(); names.push_back(param.GetColumnName()); } // default parameters for (auto it = macro_def.default_parameters.begin(); it != macro_def.default_parameters.end(); it++) { - types.emplace_back(LogicalType::SQLNULL); + types.emplace_back(LogicalTypeId::UNKNOWN); names.push_back(it->first); // now push the defaults into the positionals positionals.push_back(std::move(defaults[it->first])); diff --git a/src/duckdb/src/planner/binder/statement/bind_copy.cpp b/src/duckdb/src/planner/binder/statement/bind_copy.cpp index 916a21ff..ce2f93ad 100644 --- a/src/duckdb/src/planner/binder/statement/bind_copy.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_copy.cpp @@ -122,7 +122,7 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt, CopyToType copy_to_type) return_type = CopyFunctionReturnType::CHANGED_ROWS_AND_FILE_LIST; } } else if (loption == "write_partition_columns") { - write_partition_columns = true; + write_partition_columns = GetBooleanArg(context, option.second); } else { stmt.info->options[option.first] = option.second; } diff --git a/src/duckdb/src/planner/filter/constant_filter.cpp b/src/duckdb/src/planner/filter/constant_filter.cpp index 34e16100..ba72e003 100644 --- a/src/duckdb/src/planner/filter/constant_filter.cpp +++ b/src/duckdb/src/planner/filter/constant_filter.cpp @@ -28,9 +28,9 @@ FilterPropagateResult ConstantFilter::CheckStatistics(BaseStatistics &stats) { case PhysicalType::INT128: case PhysicalType::FLOAT: case PhysicalType::DOUBLE: - return NumericStats::CheckZonemap(stats, comparison_type, constant); + return NumericStats::CheckZonemap(stats, comparison_type, array_ptr(&constant, 1)); case PhysicalType::VARCHAR: - return StringStats::CheckZonemap(stats, comparison_type, StringValue::Get(constant)); + return StringStats::CheckZonemap(stats, comparison_type, array_ptr(&constant, 1)); default: return FilterPropagateResult::NO_PRUNING_POSSIBLE; } diff --git a/src/duckdb/src/planner/filter/in_filter.cpp b/src/duckdb/src/planner/filter/in_filter.cpp new file mode 100644 index 00000000..ed8cde82 --- /dev/null +++ b/src/duckdb/src/planner/filter/in_filter.cpp @@ -0,0 +1,80 @@ +#include "duckdb/planner/filter/in_filter.hpp" +#include "duckdb/storage/statistics/base_statistics.hpp" +#include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/planner/expression/bound_operator_expression.hpp" + +namespace duckdb { + +InFilter::InFilter(vector values_p) : TableFilter(TableFilterType::IN_FILTER), values(std::move(values_p)) { + for (auto &val : values) { + if (val.IsNull()) { + throw InternalException("InFilter constant cannot be NULL - use IsNullFilter instead"); + } + } + for (idx_t i = 1; i < values.size(); i++) { + if (values[0].type() != values[i].type()) { + throw InternalException("InFilter constants must all have the same type"); + } + } + if (values.empty()) { + throw InternalException("InFilter constants cannot be empty"); + } +} + +FilterPropagateResult InFilter::CheckStatistics(BaseStatistics &stats) { + switch (values[0].type().InternalType()) { + case PhysicalType::UINT8: + case PhysicalType::UINT16: + case PhysicalType::UINT32: + case PhysicalType::UINT64: + case PhysicalType::UINT128: + case PhysicalType::INT8: + case PhysicalType::INT16: + case PhysicalType::INT32: + case PhysicalType::INT64: + case PhysicalType::INT128: + case PhysicalType::FLOAT: + case PhysicalType::DOUBLE: + return NumericStats::CheckZonemap(stats, ExpressionType::COMPARE_EQUAL, + array_ptr(values.data(), values.size())); + case PhysicalType::VARCHAR: + return StringStats::CheckZonemap(stats, ExpressionType::COMPARE_EQUAL, + array_ptr(values.data(), values.size())); + default: + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } +} + +string InFilter::ToString(const string &column_name) { + string in_list; + for (auto &val : values) { + if (in_list.empty()) { + in_list += ", "; + } + in_list += val.ToSQLString(); + } + return column_name + " IN (" + in_list + ")"; +} + +unique_ptr InFilter::ToExpression(const Expression &column) const { + auto result = make_uniq(ExpressionType::COMPARE_IN, LogicalType::BOOLEAN); + result->children.push_back(column.Copy()); + for (auto &val : values) { + result->children.push_back(make_uniq(val)); + } + return std::move(result); +} + +bool InFilter::Equals(const TableFilter &other_p) const { + if (!TableFilter::Equals(other_p)) { + return false; + } + auto &other = other_p.Cast(); + return other.values == values; +} + +unique_ptr InFilter::Copy() const { + return make_uniq(values); +} + +} // namespace duckdb diff --git a/src/duckdb/src/planner/filter/optional_filter.cpp b/src/duckdb/src/planner/filter/optional_filter.cpp index 75bd9445..891e9014 100644 --- a/src/duckdb/src/planner/filter/optional_filter.cpp +++ b/src/duckdb/src/planner/filter/optional_filter.cpp @@ -4,7 +4,8 @@ namespace duckdb { -OptionalFilter::OptionalFilter() : TableFilter(TableFilterType::OPTIONAL_FILTER) { +OptionalFilter::OptionalFilter(unique_ptr filter) + : TableFilter(TableFilterType::OPTIONAL_FILTER), child_filter(std::move(filter)) { } FilterPropagateResult OptionalFilter::CheckStatistics(BaseStatistics &stats) { diff --git a/src/duckdb/src/planner/operator/logical_get.cpp b/src/duckdb/src/planner/operator/logical_get.cpp index fa986f18..97dbe3f6 100644 --- a/src/duckdb/src/planner/operator/logical_get.cpp +++ b/src/duckdb/src/planner/operator/logical_get.cpp @@ -61,7 +61,11 @@ InsertionOrderPreservingMap LogicalGet::ParamsToString() const { } if (function.to_string) { - result["__text__"] = function.to_string(bind_data.get()); + TableFunctionToStringInput input(function, bind_data.get()); + auto to_string_result = function.to_string(input); + for (const auto &it : to_string_result) { + result[it.first] = it.second; + } } SetParamsEstimatedCardinality(result); return result; diff --git a/src/duckdb/src/storage/data_table.cpp b/src/duckdb/src/storage/data_table.cpp index 348e3fbb..1fe64396 100644 --- a/src/duckdb/src/storage/data_table.cpp +++ b/src/duckdb/src/storage/data_table.cpp @@ -3,29 +3,32 @@ #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" #include "duckdb/common/chrono.hpp" #include "duckdb/common/exception.hpp" +#include "duckdb/common/exception/transaction_exception.hpp" #include "duckdb/common/helper.hpp" +#include "duckdb/common/types/conflict_manager.hpp" +#include "duckdb/common/types/constraint_conflict_info.hpp" #include "duckdb/common/vector_operations/vector_operations.hpp" #include "duckdb/execution/expression_executor.hpp" +#include "duckdb/main/attached_database.hpp" #include "duckdb/main/client_context.hpp" #include "duckdb/parser/constraints/list.hpp" #include "duckdb/planner/constraints/list.hpp" +#include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/planner/expression/bound_reference_expression.hpp" #include "duckdb/planner/expression_binder/check_binder.hpp" +#include "duckdb/planner/expression_binder/constant_binder.hpp" #include "duckdb/planner/table_filter.hpp" #include "duckdb/storage/checkpoint/table_data_writer.hpp" #include "duckdb/storage/storage_manager.hpp" -#include "duckdb/storage/table_storage_info.hpp" -#include "duckdb/storage/table/persistent_table_data.hpp" -#include "duckdb/storage/table/row_group.hpp" -#include "duckdb/storage/table/standard_column_data.hpp" -#include "duckdb/transaction/duck_transaction.hpp" -#include "duckdb/main/attached_database.hpp" -#include "duckdb/common/types/conflict_manager.hpp" -#include "duckdb/common/types/constraint_conflict_info.hpp" #include "duckdb/storage/table/append_state.hpp" #include "duckdb/storage/table/delete_state.hpp" +#include "duckdb/storage/table/persistent_table_data.hpp" +#include "duckdb/storage/table/row_group.hpp" #include "duckdb/storage/table/scan_state.hpp" +#include "duckdb/storage/table/standard_column_data.hpp" #include "duckdb/storage/table/update_state.hpp" -#include "duckdb/common/exception/transaction_exception.hpp" +#include "duckdb/storage/table_storage_info.hpp" +#include "duckdb/transaction/duck_transaction.hpp" namespace duckdb { @@ -849,12 +852,60 @@ void DataTable::LocalAppend(TableCatalogEntry &table, ClientContext &context, Da } void DataTable::LocalAppend(TableCatalogEntry &table, ClientContext &context, ColumnDataCollection &collection, - const vector> &bound_constraints) { + const vector> &bound_constraints, + optional_ptr> column_ids) { + LocalAppendState append_state; auto &storage = table.GetStorage(); storage.InitializeLocalAppend(append_state, table, context, bound_constraints); + + if (!column_ids || column_ids->empty()) { + for (auto &chunk : collection.Chunks()) { + storage.LocalAppend(append_state, table, context, chunk); + } + storage.FinalizeLocalAppend(append_state); + return; + } + + auto &column_list = table.GetColumns(); + map> active_expressions; + for (idx_t i = 0; i < column_ids->size(); i++) { + auto &col = column_list.GetColumn((*column_ids)[i]); + auto expr = make_uniq(col.Name(), col.Type(), i); + active_expressions[col.Physical()] = std::move(expr); + } + + auto binder = Binder::CreateBinder(context); + ConstantBinder default_binder(*binder, context, "DEFAULT value"); + vector> expressions; + for (idx_t i = 0; i < column_list.PhysicalColumnCount(); i++) { + auto expr = active_expressions.find(PhysicalIndex(i)); + if (expr != active_expressions.end()) { + expressions.push_back(std::move(expr->second)); + continue; + } + + auto &col = column_list.GetColumn(PhysicalIndex(i)); + if (!col.HasDefaultValue()) { + auto null_expr = make_uniq(Value(col.Type())); + expressions.push_back(std::move(null_expr)); + continue; + } + + auto default_copy = col.DefaultValue().Copy(); + default_binder.target_type = col.Type(); + auto bound_default = default_binder.Bind(default_copy); + expressions.push_back(std::move(bound_default)); + } + + ExpressionExecutor expression_executor(context, expressions); + DataChunk result; + result.Initialize(context, table.GetTypes()); + for (auto &chunk : collection.Chunks()) { - storage.LocalAppend(append_state, table, context, chunk); + expression_executor.Execute(chunk, result); + storage.LocalAppend(append_state, table, context, result); + result.Reset(); } storage.FinalizeLocalAppend(append_state); } diff --git a/src/duckdb/src/storage/serialization/serialize_table_filter.cpp b/src/duckdb/src/storage/serialization/serialize_table_filter.cpp index b15c2d0b..219bf185 100644 --- a/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +++ b/src/duckdb/src/storage/serialization/serialize_table_filter.cpp @@ -11,6 +11,7 @@ #include "duckdb/planner/filter/conjunction_filter.hpp" #include "duckdb/planner/filter/struct_filter.hpp" #include "duckdb/planner/filter/optional_filter.hpp" +#include "duckdb/planner/filter/in_filter.hpp" namespace duckdb { @@ -31,6 +32,9 @@ unique_ptr TableFilter::Deserialize(Deserializer &deserializer) { case TableFilterType::CONSTANT_COMPARISON: result = ConstantFilter::Deserialize(deserializer); break; + case TableFilterType::IN_FILTER: + result = InFilter::Deserialize(deserializer); + break; case TableFilterType::IS_NOT_NULL: result = IsNotNullFilter::Deserialize(deserializer); break; @@ -84,6 +88,17 @@ unique_ptr ConstantFilter::Deserialize(Deserializer &deserializer) return std::move(result); } +void InFilter::Serialize(Serializer &serializer) const { + TableFilter::Serialize(serializer); + serializer.WritePropertyWithDefault>(200, "values", values); +} + +unique_ptr InFilter::Deserialize(Deserializer &deserializer) { + auto values = deserializer.ReadPropertyWithDefault>(200, "values"); + auto result = duckdb::unique_ptr(new InFilter(std::move(values))); + return std::move(result); +} + void IsNotNullFilter::Serialize(Serializer &serializer) const { TableFilter::Serialize(serializer); } diff --git a/src/duckdb/src/storage/statistics/numeric_stats.cpp b/src/duckdb/src/storage/statistics/numeric_stats.cpp index 29d89a51..a9379812 100644 --- a/src/duckdb/src/storage/statistics/numeric_stats.cpp +++ b/src/duckdb/src/storage/statistics/numeric_stats.cpp @@ -143,11 +143,8 @@ bool ConstantValueInRange(T min, T max, T constant) { } template -FilterPropagateResult CheckZonemapTemplated(const BaseStatistics &stats, ExpressionType comparison_type, - const Value &constant_value) { - T min_value = NumericStats::GetMinUnsafe(stats); - T max_value = NumericStats::GetMaxUnsafe(stats); - T constant = constant_value.GetValueUnsafe(); +FilterPropagateResult CheckZonemapTemplated(const BaseStatistics &stats, ExpressionType comparison_type, T min_value, + T max_value, T constant) { switch (comparison_type) { case ExpressionType::COMPARE_EQUAL: if (ConstantExactRange(min_value, max_value, constant)) { @@ -214,40 +211,55 @@ FilterPropagateResult CheckZonemapTemplated(const BaseStatistics &stats, Express } } -FilterPropagateResult NumericStats::CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type, - const Value &constant) { - D_ASSERT(constant.type() == stats.GetType()); - if (constant.IsNull()) { - return FilterPropagateResult::FILTER_ALWAYS_FALSE; +template +FilterPropagateResult CheckZonemapTemplated(const BaseStatistics &stats, ExpressionType comparison_type, + array_ptr constants) { + T min_value = NumericStats::GetMinUnsafe(stats); + T max_value = NumericStats::GetMaxUnsafe(stats); + for (auto &constant_value : constants) { + D_ASSERT(constant_value.type() == stats.GetType()); + D_ASSERT(!constant_value.IsNull()); + T constant = constant_value.GetValueUnsafe(); + auto prune_result = CheckZonemapTemplated(stats, comparison_type, min_value, max_value, constant); + if (prune_result == FilterPropagateResult::NO_PRUNING_POSSIBLE) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } else if (prune_result == FilterPropagateResult::FILTER_ALWAYS_TRUE) { + return FilterPropagateResult::FILTER_ALWAYS_TRUE; + } } + return FilterPropagateResult::FILTER_ALWAYS_FALSE; +} + +FilterPropagateResult NumericStats::CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type, + array_ptr constants) { if (!NumericStats::HasMinMax(stats)) { return FilterPropagateResult::NO_PRUNING_POSSIBLE; } switch (stats.GetType().InternalType()) { case PhysicalType::INT8: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); case PhysicalType::INT16: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); case PhysicalType::INT32: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); case PhysicalType::INT64: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); case PhysicalType::UINT8: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); case PhysicalType::UINT16: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); case PhysicalType::UINT32: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); case PhysicalType::UINT64: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); case PhysicalType::INT128: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); case PhysicalType::UINT128: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); case PhysicalType::FLOAT: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); case PhysicalType::DOUBLE: - return CheckZonemapTemplated(stats, comparison_type, constant); + return CheckZonemapTemplated(stats, comparison_type, constants); default: throw InternalException("Unsupported type for NumericStats::CheckZonemap"); } diff --git a/src/duckdb/src/storage/statistics/string_stats.cpp b/src/duckdb/src/storage/statistics/string_stats.cpp index c3994b54..691bae09 100644 --- a/src/duckdb/src/storage/statistics/string_stats.cpp +++ b/src/duckdb/src/storage/statistics/string_stats.cpp @@ -184,10 +184,21 @@ void StringStats::Merge(BaseStatistics &stats, const BaseStatistics &other) { } FilterPropagateResult StringStats::CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type, - const string &constant) { + array_ptr constants) { auto &string_data = StringStats::GetDataUnsafe(stats); - return CheckZonemap(string_data.min, StringStatsData::MAX_STRING_MINMAX_SIZE, string_data.max, - StringStatsData::MAX_STRING_MINMAX_SIZE, comparison_type, constant); + for (auto &constant_value : constants) { + D_ASSERT(constant_value.type() == stats.GetType()); + D_ASSERT(!constant_value.IsNull()); + auto &constant = StringValue::Get(constant_value); + auto prune_result = CheckZonemap(string_data.min, StringStatsData::MAX_STRING_MINMAX_SIZE, string_data.max, + StringStatsData::MAX_STRING_MINMAX_SIZE, comparison_type, constant); + if (prune_result == FilterPropagateResult::NO_PRUNING_POSSIBLE) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } else if (prune_result == FilterPropagateResult::FILTER_ALWAYS_TRUE) { + return FilterPropagateResult::FILTER_ALWAYS_TRUE; + } + } + return FilterPropagateResult::FILTER_ALWAYS_FALSE; } FilterPropagateResult StringStats::CheckZonemap(const_data_ptr_t min_data, idx_t min_len, const_data_ptr_t max_data, diff --git a/src/duckdb/src/storage/table/row_group.cpp b/src/duckdb/src/storage/table/row_group.cpp index 2b97075e..2f9eff81 100644 --- a/src/duckdb/src/storage/table/row_group.cpp +++ b/src/duckdb/src/storage/table/row_group.cpp @@ -453,6 +453,7 @@ static idx_t GetFilterScanCount(ColumnScanState &state, TableFilter &filter) { case TableFilterType::IS_NULL: case TableFilterType::IS_NOT_NULL: case TableFilterType::CONSTANT_COMPARISON: + case TableFilterType::IN_FILTER: return state.current->start + state.current->count; default: { throw NotImplementedException("Unimplemented filter type for zonemap"); diff --git a/src/duckdb/ub_src_planner_filter.cpp b/src/duckdb/ub_src_planner_filter.cpp index 700825c3..b9b8f086 100644 --- a/src/duckdb/ub_src_planner_filter.cpp +++ b/src/duckdb/ub_src_planner_filter.cpp @@ -2,6 +2,8 @@ #include "src/planner/filter/constant_filter.cpp" +#include "src/planner/filter/in_filter.cpp" + #include "src/planner/filter/null_filter.cpp" #include "src/planner/filter/struct_filter.cpp"