Skip to content

Commit

Permalink
Update vendored DuckDB sources to 4c582f3
Browse files Browse the repository at this point in the history
  • Loading branch information
duckdblabs-bot committed Nov 19, 2024
1 parent 4c582f3 commit 12cdd31
Show file tree
Hide file tree
Showing 53 changed files with 772 additions and 269 deletions.
7 changes: 4 additions & 3 deletions src/duckdb/src/common/enum_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3581,19 +3581,20 @@ const StringUtil::EnumStringLiteral *GetTableFilterTypeValues() {
{ static_cast<uint32_t>(TableFilterType::CONJUNCTION_OR), "CONJUNCTION_OR" },
{ static_cast<uint32_t>(TableFilterType::CONJUNCTION_AND), "CONJUNCTION_AND" },
{ static_cast<uint32_t>(TableFilterType::STRUCT_EXTRACT), "STRUCT_EXTRACT" },
{ static_cast<uint32_t>(TableFilterType::OPTIONAL_FILTER), "OPTIONAL_FILTER" }
{ static_cast<uint32_t>(TableFilterType::OPTIONAL_FILTER), "OPTIONAL_FILTER" },
{ static_cast<uint32_t>(TableFilterType::IN_FILTER), "IN_FILTER" }
};
return values;
}

template<>
const char* EnumUtil::ToChars<TableFilterType>(TableFilterType value) {
return StringUtil::EnumToString(GetTableFilterTypeValues(), 7, "TableFilterType", static_cast<uint32_t>(value));
return StringUtil::EnumToString(GetTableFilterTypeValues(), 8, "TableFilterType", static_cast<uint32_t>(value));
}

template<>
TableFilterType EnumUtil::FromString<TableFilterType>(const char *value) {
return static_cast<TableFilterType>(StringUtil::StringToEnum(GetTableFilterTypeValues(), 7, "TableFilterType", value));
return static_cast<TableFilterType>(StringUtil::StringToEnum(GetTableFilterTypeValues(), 8, "TableFilterType", value));
}

const StringUtil::EnumStringLiteral *GetTablePartitionInfoValues() {
Expand Down
57 changes: 0 additions & 57 deletions src/duckdb/src/common/row_operations/row_gather.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,61 +178,4 @@ void RowOperations::Gather(Vector &rows, const SelectionVector &row_sel, Vector
}
}

template <class T>
static void TemplatedFullScanLoop(Vector &rows, Vector &col, idx_t count, idx_t col_offset, idx_t col_no,
idx_t column_count) {
// Precompute mask indexes
idx_t entry_idx;
idx_t idx_in_entry;
ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry);

auto ptrs = FlatVector::GetData<data_ptr_t>(rows);
auto data = FlatVector::GetData<T>(col);
// auto &col_mask = FlatVector::Validity(col);

for (idx_t i = 0; i < count; i++) {
auto row = ptrs[i];
data[i] = Load<T>(row + col_offset);
ValidityBytes row_mask(row, column_count);
if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
throw InternalException("Null value comparisons not implemented for perfect hash table yet");
// col_mask.SetInvalid(i);
}
}
}

void RowOperations::FullScanColumn(const TupleDataLayout &layout, Vector &rows, Vector &col, idx_t count,
idx_t col_no) {
const auto col_offset = layout.GetOffsets()[col_no];
col.SetVectorType(VectorType::FLAT_VECTOR);
switch (col.GetType().InternalType()) {
case PhysicalType::UINT8:
TemplatedFullScanLoop<uint8_t>(rows, col, count, col_offset, col_no, layout.ColumnCount());
break;
case PhysicalType::UINT16:
TemplatedFullScanLoop<uint16_t>(rows, col, count, col_offset, col_no, layout.ColumnCount());
break;
case PhysicalType::UINT32:
TemplatedFullScanLoop<uint32_t>(rows, col, count, col_offset, col_no, layout.ColumnCount());
break;
case PhysicalType::UINT64:
TemplatedFullScanLoop<uint64_t>(rows, col, count, col_offset, col_no, layout.ColumnCount());
break;
case PhysicalType::INT8:
TemplatedFullScanLoop<int8_t>(rows, col, count, col_offset, col_no, layout.ColumnCount());
break;
case PhysicalType::INT16:
TemplatedFullScanLoop<int16_t>(rows, col, count, col_offset, col_no, layout.ColumnCount());
break;
case PhysicalType::INT32:
TemplatedFullScanLoop<int32_t>(rows, col, count, col_offset, col_no, layout.ColumnCount());
break;
case PhysicalType::INT64:
TemplatedFullScanLoop<int64_t>(rows, col, count, col_offset, col_no, layout.ColumnCount());
break;
default:
throw NotImplementedException("Unimplemented type for RowOperations::FullScanColumn");
}
}

} // namespace duckdb
52 changes: 51 additions & 1 deletion src/duckdb/src/common/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "duckdb/common/serializer/deserializer.hpp"
#include "duckdb/common/serializer/serializer.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/common/type_visitor.hpp"
#include "duckdb/common/types/decimal.hpp"
#include "duckdb/common/types/hash.hpp"
#include "duckdb/common/types/string_type.hpp"
Expand All @@ -24,11 +25,12 @@
#include "duckdb/main/attached_database.hpp"
#include "duckdb/main/client_context.hpp"
#include "duckdb/main/client_data.hpp"
#include "duckdb/main/config.hpp"
#include "duckdb/main/database.hpp"
#include "duckdb/main/database_manager.hpp"
#include "duckdb/parser/keyword_helper.hpp"
#include "duckdb/parser/parser.hpp"
#include "duckdb/main/config.hpp"

#include <cmath>

namespace duckdb {
Expand Down Expand Up @@ -678,6 +680,54 @@ bool LogicalType::IsValid() const {
return id() != LogicalTypeId::INVALID && id() != LogicalTypeId::UNKNOWN;
}

bool LogicalType::IsComplete() const {
// Check if type does not contain incomplete types
return !TypeVisitor::Contains(*this, [](const LogicalType &type) {
switch (type.id()) {
case LogicalTypeId::INVALID:
case LogicalTypeId::UNKNOWN:
case LogicalTypeId::ANY:
return true; // These are incomplete by default
case LogicalTypeId::LIST:
case LogicalTypeId::MAP:
if (!type.AuxInfo() || type.AuxInfo()->type != ExtraTypeInfoType::LIST_TYPE_INFO) {
return true; // Missing or incorrect type info
}
break;
case LogicalTypeId::STRUCT:
case LogicalTypeId::UNION:
if (!type.AuxInfo() || type.AuxInfo()->type != ExtraTypeInfoType::STRUCT_TYPE_INFO) {
return true; // Missing or incorrect type info
}
break;
case LogicalTypeId::ARRAY:
if (!type.AuxInfo() || type.AuxInfo()->type != ExtraTypeInfoType::ARRAY_TYPE_INFO) {
return true; // Missing or incorrect type info
}
break;
case LogicalTypeId::DECIMAL:
if (!type.AuxInfo() || type.AuxInfo()->type != ExtraTypeInfoType::DECIMAL_TYPE_INFO) {
return true; // Missing or incorrect type info
}
break;
default:
return false;
}

// Type has type info, check if it is complete
D_ASSERT(type.AuxInfo());
switch (type.AuxInfo()->type) {
case ExtraTypeInfoType::STRUCT_TYPE_INFO:
return type.AuxInfo()->Cast<StructTypeInfo>().child_types.empty(); // Cannot be empty
case ExtraTypeInfoType::DECIMAL_TYPE_INFO:
return DecimalType::GetWidth(type) >= 1 && DecimalType::GetWidth(type) <= Decimal::MAX_WIDTH_DECIMAL &&
DecimalType::GetScale(type) <= DecimalType::GetWidth(type);
default:
return false; // Nested types are checked by TypeVisitor recursion
}
});
}

bool LogicalType::GetDecimalProperties(uint8_t &width, uint8_t &scale) const {
switch (id_) {
case LogicalTypeId::SQLNULL:
Expand Down
7 changes: 4 additions & 3 deletions src/duckdb/src/common/types/column/column_data_collection.cpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#include "duckdb/common/types/column/column_data_collection.hpp"

#include "duckdb/common/printer.hpp"
#include "duckdb/common/serializer/deserializer.hpp"
#include "duckdb/common/serializer/serializer.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/common/types/column/column_data_collection_segment.hpp"
#include "duckdb/common/types/value_map.hpp"
#include "duckdb/common/uhugeint.hpp"
#include "duckdb/common/vector_operations/vector_operations.hpp"
#include "duckdb/storage/buffer_manager.hpp"
#include "duckdb/common/serializer/serializer.hpp"
#include "duckdb/common/serializer/deserializer.hpp"

namespace duckdb {

Expand Down Expand Up @@ -779,7 +779,8 @@ ColumnDataCopyFunction ColumnDataCollection::GetCopyFunction(const LogicalType &
break;
}
default:
throw InternalException("Unsupported type for ColumnDataCollection::GetCopyFunction");
throw InternalException("Unsupported type %s for ColumnDataCollection::GetCopyFunction",
EnumUtil::ToString(type.InternalType()));
}
result.function = function;
return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ bool PerfectHashJoinExecutor::FullScanHashTable(LogicalType &key_type) {

// Scan the build keys in the hash table
Vector build_vector(key_type, key_count);
RowOperations::FullScanColumn(ht.layout, tuples_addresses, build_vector, key_count, 0);
data_collection.Gather(tuples_addresses, *FlatVector::IncrementalSelectionVector(), key_count, 0, build_vector,
*FlatVector::IncrementalSelectionVector(), nullptr);

// Now fill the selection vector using the build keys and create a sequential vector
// TODO: add check for fast pass when probe is part of build domain
Expand Down
60 changes: 57 additions & 3 deletions src/duckdb/src/execution/operator/join/physical_hash_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#include "duckdb/common/radix_partitioning.hpp"
#include "duckdb/execution/expression_executor.hpp"
#include "duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp"
#include "duckdb/function/aggregate/distributive_functions.hpp"
#include "duckdb/function/aggregate/distributive_function_utils.hpp"
#include "duckdb/function/aggregate/distributive_functions.hpp"
#include "duckdb/function/function_binder.hpp"
#include "duckdb/main/client_context.hpp"
#include "duckdb/main/query_profiler.hpp"
Expand All @@ -15,12 +15,17 @@
#include "duckdb/parallel/thread_context.hpp"
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
#include "duckdb/planner/expression/bound_reference_expression.hpp"
#include "duckdb/planner/filter/conjunction_filter.hpp"
#include "duckdb/planner/filter/constant_filter.hpp"
#include "duckdb/planner/filter/in_filter.hpp"
#include "duckdb/planner/filter/null_filter.hpp"
#include "duckdb/planner/filter/optional_filter.hpp"
#include "duckdb/planner/table_filter.hpp"
#include "duckdb/storage/buffer_manager.hpp"
#include "duckdb/storage/storage_manager.hpp"
#include "duckdb/storage/temporary_memory_manager.hpp"
#include "duckdb/common/types/value_map.hpp"
#include "duckdb/optimizer/filter_combiner.hpp"

namespace duckdb {

Expand Down Expand Up @@ -567,7 +572,50 @@ class HashJoinRepartitionEvent : public BasePipelineEvent {
}
};

void JoinFilterPushdownInfo::PushFilters(JoinFilterGlobalState &gstate, const PhysicalOperator &op) const {
void JoinFilterPushdownInfo::PushInFilter(const JoinFilterPushdownFilter &info, JoinHashTable &ht,
const PhysicalOperator &op, idx_t filter_idx, idx_t filter_col_idx) const {
// generate a "OR" filter (i.e. x=1 OR x=535 OR x=997)
// first scan the entire vector at the probe side
// FIXME: this code is duplicated from PerfectHashJoinExecutor::FullScanHashTable
auto build_idx = join_condition[filter_idx];
auto &data_collection = ht.GetDataCollection();

Vector tuples_addresses(LogicalType::POINTER, ht.Count()); // allocate space for all the tuples

JoinHTScanState join_ht_state(data_collection, 0, data_collection.ChunkCount(),
TupleDataPinProperties::KEEP_EVERYTHING_PINNED);

// Go through all the blocks and fill the keys addresses
idx_t key_count = ht.FillWithHTOffsets(join_ht_state, tuples_addresses);

// Scan the build keys in the hash table
Vector build_vector(ht.layout.GetTypes()[build_idx], key_count);
data_collection.Gather(tuples_addresses, *FlatVector::IncrementalSelectionVector(), key_count, build_idx,
build_vector, *FlatVector::IncrementalSelectionVector(), nullptr);

// generate the OR-clause - note that we only need to consider unique values here (so we use a seT)
value_set_t unique_ht_values;
for (idx_t k = 0; k < key_count; k++) {
unique_ht_values.insert(build_vector.GetValue(k));
}
vector<Value> in_list(unique_ht_values.begin(), unique_ht_values.end());

// generating the OR filter only makes sense if the range is not dense
// i.e. if we have the values [0, 1, 2, 3, 4] - the min/max is fully equivalent to the OR filter
if (FilterCombiner::IsDenseRange(in_list)) {
return;
}

// generate the OR filter
auto or_filter = make_uniq<InFilter>(std::move(in_list));
// we push the OR filter as an OptionalFilter so that we can use it for zonemap pruning only
// the IN-list is expensive to execute otherwise
auto filter = make_uniq<OptionalFilter>(std::move(or_filter));
info.dynamic_filters->PushFilter(op, filter_col_idx, std::move(filter));
}

void JoinFilterPushdownInfo::PushFilters(ClientContext &context, JoinHashTable &ht, JoinFilterGlobalState &gstate,
const PhysicalOperator &op) const {
// finalize the min/max aggregates
vector<LogicalType> min_max_types;
for (auto &aggr_expr : min_max_aggregates) {
Expand All @@ -578,6 +626,7 @@ void JoinFilterPushdownInfo::PushFilters(JoinFilterGlobalState &gstate, const Ph

gstate.global_aggregate_state->Finalize(final_min_max);

auto dynamic_or_filter_threshold = ClientConfig::GetSetting<DynamicOrFilterThresholdSetting>(context);
// create a filter for each of the aggregates
for (idx_t filter_idx = 0; filter_idx < join_condition.size(); filter_idx++) {
for (auto &info : probe_info) {
Expand All @@ -593,6 +642,11 @@ void JoinFilterPushdownInfo::PushFilters(JoinFilterGlobalState &gstate, const Ph
// hash table e.g. because they are part of a RIGHT join
continue;
}
// if the HT is small we can generate a complete "OR" filter
if (ht.Count() > 1 && ht.Count() <= dynamic_or_filter_threshold) {
PushInFilter(info, ht, op, filter_idx, filter_col_idx);
}

if (Value::NotDistinctFrom(min_val, max_val)) {
// min = max - generate an equality filter
auto constant_filter = make_uniq<ConstantFilter>(ExpressionType::COMPARE_EQUAL, std::move(min_val));
Expand Down Expand Up @@ -655,7 +709,7 @@ SinkFinalizeType PhysicalHashJoin::Finalize(Pipeline &pipeline, Event &event, Cl
ht.Unpartition();

if (filter_pushdown && ht.Count() > 0) {
filter_pushdown->PushFilters(*sink.global_filter_state, *this);
filter_pushdown->PushFilters(context, ht, *sink.global_filter_state, *this);
}

// check for possible perfect hash table
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,11 @@ OperatorResultType PhysicalTableInOutFunction::Execute(ExecutionContext &context
InsertionOrderPreservingMap<string> PhysicalTableInOutFunction::ParamsToString() const {
InsertionOrderPreservingMap<string> result;
if (function.to_string) {
result["__text__"] = function.to_string(bind_data.get());
TableFunctionToStringInput input(function, bind_data.get());
auto to_string_result = function.to_string(input);
for (const auto &it : to_string_result) {
result[it.first] = it.second;
}
} else {
result["Name"] = function.name;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,11 @@ void AddProjectionNames(const ColumnIndex &index, const string &name, const Logi
InsertionOrderPreservingMap<string> PhysicalTableScan::ParamsToString() const {
InsertionOrderPreservingMap<string> result;
if (function.to_string) {
result["__text__"] = function.to_string(bind_data.get());
TableFunctionToStringInput input(function, bind_data.get());
auto to_string_result = function.to_string(input);
for (const auto &it : to_string_result) {
result[it.first] = it.second;
}
} else {
result["Function"] = StringUtil::Upper(function.name);
}
Expand Down
27 changes: 8 additions & 19 deletions src/duckdb/src/function/function_binder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
#include "duckdb/execution/expression_executor.hpp"
#include "duckdb/function/aggregate_function.hpp"
#include "duckdb/function/cast_rules.hpp"
#include "duckdb/function/scalar/generic_functions.hpp"
#include "duckdb/parser/parsed_data/create_secret_info.hpp"
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
#include "duckdb/planner/expression/bound_cast_expression.hpp"
#include "duckdb/planner/expression/bound_constant_expression.hpp"
#include "duckdb/planner/expression/bound_function_expression.hpp"
#include "duckdb/planner/expression_binder.hpp"
#include "duckdb/function/scalar/generic_functions.hpp"

namespace duckdb {

Expand Down Expand Up @@ -320,24 +320,13 @@ unique_ptr<Expression> FunctionBinder::BindScalarFunction(ScalarFunctionCatalogE
// found a matching function!
auto bound_function = func.functions.GetFunctionByOffset(best_function.GetIndex());

// If any of the parameters are NULL, the function will just be replaced with a NULL constant
// But this NULL constant needs to have to correct type, because we use LogicalType::SQLNULL for binding macro's
// However, some functions may have an invalid return type, so we default to SQLNULL for those
LogicalType return_type_if_null;
switch (bound_function.return_type.id()) {
case LogicalTypeId::ANY:
case LogicalTypeId::DECIMAL:
case LogicalTypeId::STRUCT:
case LogicalTypeId::LIST:
case LogicalTypeId::MAP:
case LogicalTypeId::UNION:
case LogicalTypeId::ARRAY:
return_type_if_null = LogicalType::SQLNULL;
break;
default:
return_type_if_null = bound_function.return_type;
}

// If any of the parameters are NULL, the function will just be replaced with a NULL constant.
// We try to give the NULL constant the correct type, but we have to do this without binding the function,
// because functions with DEFAULT_NULL_HANDLING should not have to deal with NULL inputs in their bind code.
// Some functions may have an invalid default return type, as they must be bound to infer the return type.
// In those cases, we default to SQLNULL.
const auto return_type_if_null =
bound_function.return_type.IsComplete() ? bound_function.return_type : LogicalType::SQLNULL;
if (bound_function.null_handling == FunctionNullHandling::DEFAULT_NULL_HANDLING) {
for (auto &child : children) {
if (child->return_type == LogicalTypeId::SQLNULL) {
Expand Down
4 changes: 2 additions & 2 deletions src/duckdb/src/function/scalar/list/list_zip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ static void ListZipFunction(DataChunk &args, ExpressionState &state, Vector &res
offset += len;
}
for (idx_t child_idx = 0; child_idx < args_size; child_idx++) {
if (!(args.data[child_idx].GetType() == LogicalType::SQLNULL)) {
if (args.data[child_idx].GetType() != LogicalType::SQLNULL) {
struct_entries[child_idx]->Slice(ListVector::GetEntry(args.data[child_idx]), selections[child_idx],
result_size);
}
Expand Down Expand Up @@ -161,7 +161,7 @@ ScalarFunction ListZipFun::GetFunction() {

auto fun = ScalarFunction({}, LogicalType::LIST(LogicalTypeId::STRUCT), ListZipFunction, ListZipBind);
fun.varargs = LogicalType::ANY;
fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING; // Special handling needed?
fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
return fun;
}

Expand Down
Loading

0 comments on commit 12cdd31

Please sign in to comment.