From 7bad582e34e801df91125ff7321b462f4c488dd5 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Sat, 9 Nov 2024 00:32:21 +0000 Subject: [PATCH] Update vendored DuckDB sources to 3ca8544e --- .../catalog_entry/duck_table_entry.cpp | 6 +- src/duckdb/src/common/enum_util.cpp | 8 +- src/duckdb/src/common/enums/metric_type.cpp | 20 +- src/duckdb/src/common/multi_file_list.cpp | 5 +- .../persistent/physical_batch_insert.cpp | 24 ++- .../persistent/physical_copy_to_file.cpp | 6 + .../operator/persistent/physical_delete.cpp | 2 +- .../operator/persistent/physical_insert.cpp | 11 +- .../physical_tableinout_function.cpp | 2 +- .../operator/scan/physical_table_scan.cpp | 9 +- .../physical_plan/plan_aggregate.cpp | 2 +- .../src/execution/physical_plan/plan_get.cpp | 24 +-- .../function/scalar/struct/struct_extract.cpp | 17 +- src/duckdb/src/function/table/table_scan.cpp | 48 +++-- .../function/table/version/pragma_version.cpp | 6 +- src/duckdb/src/include/duckdb.h | 17 ++ .../include/duckdb/common/column_index.hpp | 72 +++++++ .../duckdb/common/enums/metric_type.hpp | 8 +- .../include/duckdb/common/multi_file_list.hpp | 3 +- .../duckdb/common/types/validity_mask.hpp | 1 + .../operator/persistent/physical_insert.hpp | 2 +- .../physical_tableinout_function.hpp | 4 +- .../operator/scan/physical_table_scan.hpp | 5 +- .../duckdb/function/scalar/struct_utils.hpp | 33 ++++ .../duckdb/function/table_function.hpp | 22 ++- .../duckdb/main/capi/extension_api.hpp | 4 + .../duckdb/optimizer/filter_combiner.hpp | 2 +- .../optimizer/remove_unused_columns.hpp | 18 +- .../include/duckdb/planner/bind_context.hpp | 7 +- .../duckdb/planner/operator/logical_get.hpp | 8 +- .../include/duckdb/planner/table_binding.hpp | 7 +- .../include/duckdb/planner/table_filter.hpp | 3 +- .../src/include/duckdb/storage/data_table.hpp | 8 +- .../include/duckdb/storage/storage_index.hpp | 70 +++++++ .../duckdb/storage/table/delete_state.hpp | 2 +- .../duckdb/storage/table/row_group.hpp | 4 +- .../storage/table/row_group_collection.hpp | 12 +- .../duckdb/storage/table/scan_state.hpp | 17 +- .../duckdb/transaction/local_storage.hpp | 10 +- src/duckdb/src/include/duckdb_extension.h | 4 + src/duckdb/src/main/capi/duckdb_value-c.cpp | 38 +++- .../optimizer/build_probe_side_optimizer.cpp | 9 +- src/duckdb/src/optimizer/filter_combiner.cpp | 25 +-- .../join_order/relation_statistics_helper.cpp | 11 +- .../src/optimizer/remove_unused_columns.cpp | 178 +++++++++++++++--- .../statistics/operator/propagate_get.cpp | 6 +- src/duckdb/src/planner/bind_context.cpp | 6 +- src/duckdb/src/planner/binder.cpp | 4 +- .../planner/binder/query_node/plan_setop.cpp | 5 +- .../planner/binder/statement/bind_insert.cpp | 13 +- .../planner/binder/statement/bind_vacuum.cpp | 2 +- .../planner/binder/tableref/bind_showref.cpp | 2 +- .../expression_binder/index_binder.cpp | 7 +- .../src/planner/operator/logical_get.cpp | 55 ++++-- src/duckdb/src/planner/table_binding.cpp | 27 +-- src/duckdb/src/planner/table_filter.cpp | 11 +- src/duckdb/src/storage/data_table.cpp | 16 +- src/duckdb/src/storage/local_storage.cpp | 21 ++- .../storage/serialization/serialize_nodes.cpp | 13 ++ src/duckdb/src/storage/table/row_group.cpp | 61 ++++-- .../storage/table/row_group_collection.cpp | 34 ++-- src/duckdb/src/storage/table/scan_state.cpp | 13 +- .../src/storage/table/struct_column_data.cpp | 39 +++- src/duckdb/src/storage/table_index_list.cpp | 2 +- src/duckdb/src/storage/wal_replay.cpp | 13 +- 65 files changed, 854 insertions(+), 290 deletions(-) create mode 100644 src/duckdb/src/include/duckdb/common/column_index.hpp create mode 100644 src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp create mode 100644 src/duckdb/src/include/duckdb/storage/storage_index.hpp diff --git a/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp b/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp index 901a7120..e1d0c66a 100644 --- a/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp @@ -671,12 +671,12 @@ unique_ptr DuckTableEntry::ChangeColumnType(ClientContext &context auto bound_create_info = binder->BindCreateTableInfo(std::move(create_info), schema); - vector storage_oids; + vector storage_oids; for (idx_t i = 0; i < bound_columns.size(); i++) { - storage_oids.push_back(columns.LogicalToPhysical(bound_columns[i]).index); + storage_oids.emplace_back(columns.LogicalToPhysical(bound_columns[i]).index); } if (storage_oids.empty()) { - storage_oids.push_back(COLUMN_IDENTIFIER_ROW_ID); + storage_oids.emplace_back(COLUMN_IDENTIFIER_ROW_ID); } auto new_storage = diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp index ae0072ab..8d6598bb 100644 --- a/src/duckdb/src/common/enum_util.cpp +++ b/src/duckdb/src/common/enum_util.cpp @@ -2178,9 +2178,9 @@ const StringUtil::EnumStringLiteral *GetMetricsTypeValues() { { static_cast(MetricsType::CUMULATIVE_ROWS_SCANNED), "CUMULATIVE_ROWS_SCANNED" }, { static_cast(MetricsType::OPERATOR_ROWS_SCANNED), "OPERATOR_ROWS_SCANNED" }, { static_cast(MetricsType::OPERATOR_TIMING), "OPERATOR_TIMING" }, + { static_cast(MetricsType::RESULT_SET_SIZE), "RESULT_SET_SIZE" }, { static_cast(MetricsType::LATENCY), "LATENCY" }, { static_cast(MetricsType::ROWS_RETURNED), "ROWS_RETURNED" }, - { static_cast(MetricsType::RESULT_SET_SIZE), "RESULT_SET_SIZE" }, { static_cast(MetricsType::ALL_OPTIMIZERS), "ALL_OPTIMIZERS" }, { static_cast(MetricsType::CUMULATIVE_OPTIMIZER_TIMING), "CUMULATIVE_OPTIMIZER_TIMING" }, { static_cast(MetricsType::PLANNER), "PLANNER" }, @@ -2192,6 +2192,7 @@ const StringUtil::EnumStringLiteral *GetMetricsTypeValues() { { static_cast(MetricsType::OPTIMIZER_EXPRESSION_REWRITER), "OPTIMIZER_EXPRESSION_REWRITER" }, { static_cast(MetricsType::OPTIMIZER_FILTER_PULLUP), "OPTIMIZER_FILTER_PULLUP" }, { static_cast(MetricsType::OPTIMIZER_FILTER_PUSHDOWN), "OPTIMIZER_FILTER_PUSHDOWN" }, + { static_cast(MetricsType::OPTIMIZER_EMPTY_RESULT_PULLUP), "OPTIMIZER_EMPTY_RESULT_PULLUP" }, { static_cast(MetricsType::OPTIMIZER_CTE_FILTER_PUSHER), "OPTIMIZER_CTE_FILTER_PUSHER" }, { static_cast(MetricsType::OPTIMIZER_REGEX_RANGE), "OPTIMIZER_REGEX_RANGE" }, { static_cast(MetricsType::OPTIMIZER_IN_CLAUSE), "OPTIMIZER_IN_CLAUSE" }, @@ -2205,15 +2206,14 @@ const StringUtil::EnumStringLiteral *GetMetricsTypeValues() { { static_cast(MetricsType::OPTIMIZER_COLUMN_LIFETIME), "OPTIMIZER_COLUMN_LIFETIME" }, { static_cast(MetricsType::OPTIMIZER_BUILD_SIDE_PROBE_SIDE), "OPTIMIZER_BUILD_SIDE_PROBE_SIDE" }, { static_cast(MetricsType::OPTIMIZER_LIMIT_PUSHDOWN), "OPTIMIZER_LIMIT_PUSHDOWN" }, - { static_cast(MetricsType::OPTIMIZER_SAMPLING_PUSHDOWN), "OPTIMIZER_SAMPLING_PUSHDOWN" }, { static_cast(MetricsType::OPTIMIZER_TOP_N), "OPTIMIZER_TOP_N" }, { static_cast(MetricsType::OPTIMIZER_COMPRESSED_MATERIALIZATION), "OPTIMIZER_COMPRESSED_MATERIALIZATION" }, { static_cast(MetricsType::OPTIMIZER_DUPLICATE_GROUPS), "OPTIMIZER_DUPLICATE_GROUPS" }, { static_cast(MetricsType::OPTIMIZER_REORDER_FILTER), "OPTIMIZER_REORDER_FILTER" }, + { static_cast(MetricsType::OPTIMIZER_SAMPLING_PUSHDOWN), "OPTIMIZER_SAMPLING_PUSHDOWN" }, { static_cast(MetricsType::OPTIMIZER_JOIN_FILTER_PUSHDOWN), "OPTIMIZER_JOIN_FILTER_PUSHDOWN" }, { static_cast(MetricsType::OPTIMIZER_EXTENSION), "OPTIMIZER_EXTENSION" }, - { static_cast(MetricsType::OPTIMIZER_MATERIALIZED_CTE), "OPTIMIZER_MATERIALIZED_CTE" }, - { static_cast(MetricsType::OPTIMIZER_EMPTY_RESULT_PULLUP), "OPTIMIZER_EMPTY_RESULT_PULLUP" } + { static_cast(MetricsType::OPTIMIZER_MATERIALIZED_CTE), "OPTIMIZER_MATERIALIZED_CTE" } }; return values; } diff --git a/src/duckdb/src/common/enums/metric_type.cpp b/src/duckdb/src/common/enums/metric_type.cpp index cb684c95..8ccc6e30 100644 --- a/src/duckdb/src/common/enums/metric_type.cpp +++ b/src/duckdb/src/common/enums/metric_type.cpp @@ -16,6 +16,7 @@ profiler_settings_t MetricsUtils::GetOptimizerMetrics() { MetricsType::OPTIMIZER_EXPRESSION_REWRITER, MetricsType::OPTIMIZER_FILTER_PULLUP, MetricsType::OPTIMIZER_FILTER_PUSHDOWN, + MetricsType::OPTIMIZER_EMPTY_RESULT_PULLUP, MetricsType::OPTIMIZER_CTE_FILTER_PUSHER, MetricsType::OPTIMIZER_REGEX_RANGE, MetricsType::OPTIMIZER_IN_CLAUSE, @@ -33,6 +34,7 @@ profiler_settings_t MetricsUtils::GetOptimizerMetrics() { MetricsType::OPTIMIZER_COMPRESSED_MATERIALIZATION, MetricsType::OPTIMIZER_DUPLICATE_GROUPS, MetricsType::OPTIMIZER_REORDER_FILTER, + MetricsType::OPTIMIZER_SAMPLING_PUSHDOWN, MetricsType::OPTIMIZER_JOIN_FILTER_PUSHDOWN, MetricsType::OPTIMIZER_EXTENSION, MetricsType::OPTIMIZER_MATERIALIZED_CTE, @@ -60,6 +62,8 @@ MetricsType MetricsUtils::GetOptimizerMetricByType(OptimizerType type) { return MetricsType::OPTIMIZER_FILTER_PULLUP; case OptimizerType::FILTER_PUSHDOWN: return MetricsType::OPTIMIZER_FILTER_PUSHDOWN; + case OptimizerType::EMPTY_RESULT_PULLUP: + return MetricsType::OPTIMIZER_EMPTY_RESULT_PULLUP; case OptimizerType::CTE_FILTER_PUSHER: return MetricsType::OPTIMIZER_CTE_FILTER_PUSHER; case OptimizerType::REGEX_RANGE: @@ -86,8 +90,6 @@ MetricsType MetricsUtils::GetOptimizerMetricByType(OptimizerType type) { return MetricsType::OPTIMIZER_BUILD_SIDE_PROBE_SIDE; case OptimizerType::LIMIT_PUSHDOWN: return MetricsType::OPTIMIZER_LIMIT_PUSHDOWN; - case OptimizerType::SAMPLING_PUSHDOWN: - return MetricsType::OPTIMIZER_SAMPLING_PUSHDOWN; case OptimizerType::TOP_N: return MetricsType::OPTIMIZER_TOP_N; case OptimizerType::COMPRESSED_MATERIALIZATION: @@ -96,14 +98,14 @@ MetricsType MetricsUtils::GetOptimizerMetricByType(OptimizerType type) { return MetricsType::OPTIMIZER_DUPLICATE_GROUPS; case OptimizerType::REORDER_FILTER: return MetricsType::OPTIMIZER_REORDER_FILTER; + case OptimizerType::SAMPLING_PUSHDOWN: + return MetricsType::OPTIMIZER_SAMPLING_PUSHDOWN; case OptimizerType::JOIN_FILTER_PUSHDOWN: return MetricsType::OPTIMIZER_JOIN_FILTER_PUSHDOWN; case OptimizerType::EXTENSION: return MetricsType::OPTIMIZER_EXTENSION; case OptimizerType::MATERIALIZED_CTE: return MetricsType::OPTIMIZER_MATERIALIZED_CTE; - case OptimizerType::EMPTY_RESULT_PULLUP: - return MetricsType::OPTIMIZER_EMPTY_RESULT_PULLUP; default: throw InternalException("OptimizerType %s cannot be converted to a MetricsType", EnumUtil::ToString(type)); }; @@ -117,6 +119,8 @@ OptimizerType MetricsUtils::GetOptimizerTypeByMetric(MetricsType type) { return OptimizerType::FILTER_PULLUP; case MetricsType::OPTIMIZER_FILTER_PUSHDOWN: return OptimizerType::FILTER_PUSHDOWN; + case MetricsType::OPTIMIZER_EMPTY_RESULT_PULLUP: + return OptimizerType::EMPTY_RESULT_PULLUP; case MetricsType::OPTIMIZER_CTE_FILTER_PUSHER: return OptimizerType::CTE_FILTER_PUSHER; case MetricsType::OPTIMIZER_REGEX_RANGE: @@ -151,14 +155,14 @@ OptimizerType MetricsUtils::GetOptimizerTypeByMetric(MetricsType type) { return OptimizerType::DUPLICATE_GROUPS; case MetricsType::OPTIMIZER_REORDER_FILTER: return OptimizerType::REORDER_FILTER; + case MetricsType::OPTIMIZER_SAMPLING_PUSHDOWN: + return OptimizerType::SAMPLING_PUSHDOWN; case MetricsType::OPTIMIZER_JOIN_FILTER_PUSHDOWN: return OptimizerType::JOIN_FILTER_PUSHDOWN; case MetricsType::OPTIMIZER_EXTENSION: return OptimizerType::EXTENSION; case MetricsType::OPTIMIZER_MATERIALIZED_CTE: return OptimizerType::MATERIALIZED_CTE; - case MetricsType::OPTIMIZER_EMPTY_RESULT_PULLUP: - return OptimizerType::EMPTY_RESULT_PULLUP; default: return OptimizerType::INVALID; }; @@ -169,6 +173,7 @@ bool MetricsUtils::IsOptimizerMetric(MetricsType type) { case MetricsType::OPTIMIZER_EXPRESSION_REWRITER: case MetricsType::OPTIMIZER_FILTER_PULLUP: case MetricsType::OPTIMIZER_FILTER_PUSHDOWN: + case MetricsType::OPTIMIZER_EMPTY_RESULT_PULLUP: case MetricsType::OPTIMIZER_CTE_FILTER_PUSHER: case MetricsType::OPTIMIZER_REGEX_RANGE: case MetricsType::OPTIMIZER_IN_CLAUSE: @@ -182,15 +187,14 @@ bool MetricsUtils::IsOptimizerMetric(MetricsType type) { case MetricsType::OPTIMIZER_COLUMN_LIFETIME: case MetricsType::OPTIMIZER_BUILD_SIDE_PROBE_SIDE: case MetricsType::OPTIMIZER_LIMIT_PUSHDOWN: - case MetricsType::OPTIMIZER_SAMPLING_PUSHDOWN: case MetricsType::OPTIMIZER_TOP_N: case MetricsType::OPTIMIZER_COMPRESSED_MATERIALIZATION: case MetricsType::OPTIMIZER_DUPLICATE_GROUPS: case MetricsType::OPTIMIZER_REORDER_FILTER: + case MetricsType::OPTIMIZER_SAMPLING_PUSHDOWN: case MetricsType::OPTIMIZER_JOIN_FILTER_PUSHDOWN: case MetricsType::OPTIMIZER_EXTENSION: case MetricsType::OPTIMIZER_MATERIALIZED_CTE: - case MetricsType::OPTIMIZER_EMPTY_RESULT_PULLUP: return true; default: return false; diff --git a/src/duckdb/src/common/multi_file_list.cpp b/src/duckdb/src/common/multi_file_list.cpp index 9c57b5e8..668a5b36 100644 --- a/src/duckdb/src/common/multi_file_list.cpp +++ b/src/duckdb/src/common/multi_file_list.cpp @@ -14,8 +14,11 @@ namespace duckdb { MultiFilePushdownInfo::MultiFilePushdownInfo(LogicalGet &get) - : table_index(get.table_index), column_names(get.names), column_ids(get.GetColumnIds()), + : table_index(get.table_index), column_names(get.names), column_indexes(get.GetColumnIds()), extra_info(get.extra_info) { + for (auto &col_id : column_indexes) { + column_ids.push_back(col_id.GetPrimaryIndex()); + } } MultiFilePushdownInfo::MultiFilePushdownInfo(idx_t table_index, const vector &column_names, diff --git a/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp b/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp index eb696703..58c7a060 100644 --- a/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +++ b/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp @@ -34,6 +34,8 @@ PhysicalBatchInsert::PhysicalBatchInsert(LogicalOperator &op, SchemaCatalogEntry //===--------------------------------------------------------------------===// // CollectionMerger //===--------------------------------------------------------------------===// +enum class RowGroupBatchType : uint8_t { FLUSHED, NOT_FLUSHED }; + class CollectionMerger { public: explicit CollectionMerger(ClientContext &context) : context(context) { @@ -41,10 +43,17 @@ class CollectionMerger { ClientContext &context; vector> current_collections; + RowGroupBatchType batch_type = RowGroupBatchType::NOT_FLUSHED; public: - void AddCollection(unique_ptr collection) { + void AddCollection(unique_ptr collection, RowGroupBatchType type) { current_collections.push_back(std::move(collection)); + if (type == RowGroupBatchType::FLUSHED) { + batch_type = RowGroupBatchType::FLUSHED; + if (current_collections.size() > 1) { + throw InternalException("Cannot merge flushed collections"); + } + } } bool Empty() { @@ -65,9 +74,9 @@ class CollectionMerger { DataChunk scan_chunk; scan_chunk.Initialize(context, types); - vector column_ids; + vector column_ids; for (idx_t i = 0; i < types.size(); i++) { - column_ids.push_back(i); + column_ids.emplace_back(i); } for (auto &collection : current_collections) { if (!collection) { @@ -91,13 +100,14 @@ class CollectionMerger { } new_collection->FinalizeAppend(TransactionData(0, 0), append_state); writer.WriteLastRowGroup(*new_collection); + } else if (batch_type == RowGroupBatchType::NOT_FLUSHED) { + writer.WriteLastRowGroup(*new_collection); } current_collections.clear(); return new_collection; } }; -enum class RowGroupBatchType : uint8_t { FLUSHED, NOT_FLUSHED }; struct RowGroupBatchEntry { RowGroupBatchEntry(idx_t batch_idx, unique_ptr collection_p, RowGroupBatchType type) : batch_idx(batch_idx), total_rows(collection_p->GetTotalRows()), unflushed_memory(0), @@ -332,7 +342,7 @@ unique_ptr BatchInsertGlobalState::MergeCollections(ClientCo CollectionMerger merger(context); idx_t written_data = 0; for (auto &entry : merge_collections) { - merger.AddCollection(std::move(entry.collection)); + merger.AddCollection(std::move(entry.collection), RowGroupBatchType::NOT_FLUSHED); written_data += entry.unflushed_memory; } optimistically_written = true; @@ -571,7 +581,7 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event, if (!current_merger) { current_merger = make_uniq(context); } - current_merger->AddCollection(std::move(entry.collection)); + current_merger->AddCollection(std::move(entry.collection), entry.type); memory_manager.ReduceUnflushedMemory(entry.unflushed_memory); } else { // this collection has been flushed: it does not need to be merged @@ -582,7 +592,7 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event, current_merger.reset(); } auto larger_merger = make_uniq(context); - larger_merger->AddCollection(std::move(entry.collection)); + larger_merger->AddCollection(std::move(entry.collection), entry.type); mergers.push_back(std::move(larger_merger)); } } diff --git a/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp b/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp index fece3ef0..fa85d670 100644 --- a/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +++ b/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp @@ -510,6 +510,12 @@ SinkFinalizeType PhysicalCopyToFile::Finalize(Pipeline &pipeline, Event &event, } if (per_thread_output) { // already happened in combine + if (NumericCast(gstate.rows_copied.load()) == 0 && sink_state != nullptr) { + // no rows from source, write schema to file + auto global_lock = gstate.lock.GetExclusiveLock(); + gstate.global_state = CreateFileState(context, *sink_state, *global_lock); + function.copy_to_finalize(context, *bind_data, *gstate.global_state); + } return SinkFinalizeType::READY; } if (function.copy_to_finalize) { diff --git a/src/duckdb/src/execution/operator/persistent/physical_delete.cpp b/src/duckdb/src/execution/operator/persistent/physical_delete.cpp index ec832aa2..acbb8b9d 100644 --- a/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +++ b/src/duckdb/src/execution/operator/persistent/physical_delete.cpp @@ -51,7 +51,7 @@ SinkResultType PhysicalDelete::Sink(ExecutionContext &context, DataChunk &chunk, auto &transaction = DuckTransaction::Get(context.client, table.db); auto &row_identifiers = chunk.data[row_id_index]; - vector column_ids; + vector column_ids; for (idx_t i = 0; i < table.ColumnCount(); i++) { column_ids.emplace_back(i); }; diff --git a/src/duckdb/src/execution/operator/persistent/physical_insert.cpp b/src/duckdb/src/execution/operator/persistent/physical_insert.cpp index 99992322..46ee7896 100644 --- a/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +++ b/src/duckdb/src/execution/operator/persistent/physical_insert.cpp @@ -34,7 +34,7 @@ PhysicalInsert::PhysicalInsert( return_chunk(return_chunk), parallel(parallel), action_type(action_type), set_expressions(std::move(set_expressions)), set_columns(std::move(set_columns)), set_types(std::move(set_types)), on_conflict_condition(std::move(on_conflict_condition_p)), do_update_condition(std::move(do_update_condition_p)), - conflict_target(std::move(conflict_target_p)), columns_to_fetch(std::move(columns_to_fetch_p)) { + conflict_target(std::move(conflict_target_p)) { if (action_type == OnConflictAction::THROW) { return; @@ -44,11 +44,12 @@ PhysicalInsert::PhysicalInsert( // One or more columns are referenced from the existing table, // we use the 'insert_types' to figure out which types these columns have - types_to_fetch = vector(columns_to_fetch.size(), LogicalType::SQLNULL); - for (idx_t i = 0; i < columns_to_fetch.size(); i++) { - auto &id = columns_to_fetch[i]; + types_to_fetch = vector(columns_to_fetch_p.size(), LogicalType::SQLNULL); + for (idx_t i = 0; i < columns_to_fetch_p.size(); i++) { + auto &id = columns_to_fetch_p[i]; D_ASSERT(id < insert_types.size()); types_to_fetch[i] = insert_types[id]; + columns_to_fetch.emplace_back(id); } } @@ -526,6 +527,8 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato storage.FinalizeLocalAppend(gstate.append_state); } else { // we have written rows to disk optimistically - merge directly into the transaction-local storage + lstate.writer->WriteLastRowGroup(*lstate.local_collection); + gstate.table.GetStorage().LocalMerge(context.client, *lstate.local_collection); gstate.table.GetStorage().FinalizeOptimisticWriter(context.client, *lstate.writer); } diff --git a/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp b/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp index 9fa89e51..677e84d9 100644 --- a/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +++ b/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp @@ -23,7 +23,7 @@ class TableInOutGlobalState : public GlobalOperatorState { PhysicalTableInOutFunction::PhysicalTableInOutFunction(vector types, TableFunction function_p, unique_ptr bind_data_p, - vector column_ids_p, idx_t estimated_cardinality, + vector column_ids_p, idx_t estimated_cardinality, vector project_input_p) : PhysicalOperator(PhysicalOperatorType::INOUT_FUNCTION, std::move(types), estimated_cardinality), function(std::move(function_p)), bind_data(std::move(bind_data_p)), column_ids(std::move(column_ids_p)), diff --git a/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp b/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp index 675ab7d2..237a0b0c 100644 --- a/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +++ b/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp @@ -11,7 +11,7 @@ namespace duckdb { PhysicalTableScan::PhysicalTableScan(vector types, TableFunction function_p, unique_ptr bind_data_p, vector returned_types_p, - vector column_ids_p, vector projection_ids_p, + vector column_ids_p, vector projection_ids_p, vector names_p, unique_ptr table_filters_p, idx_t estimated_cardinality, ExtraOperatorInfo extra_info, vector parameters_p) @@ -156,7 +156,7 @@ InsertionOrderPreservingMap PhysicalTableScan::ParamsToString() const { if (function.filter_prune) { string projections; for (idx_t i = 0; i < projection_ids.size(); i++) { - const auto &column_id = column_ids[projection_ids[i]]; + auto column_id = column_ids[projection_ids[i]].GetPrimaryIndex(); if (column_id < names.size()) { if (i > 0) { projections += "\n"; @@ -168,7 +168,7 @@ InsertionOrderPreservingMap PhysicalTableScan::ParamsToString() const { } else { string projections; for (idx_t i = 0; i < column_ids.size(); i++) { - const auto &column_id = column_ids[i]; + auto column_id = column_ids[i].GetPrimaryIndex(); if (column_id < names.size()) { if (i > 0) { projections += "\n"; @@ -190,7 +190,8 @@ InsertionOrderPreservingMap PhysicalTableScan::ParamsToString() const { filters_info += "\n"; } first_item = false; - filters_info += filter->ToString(names[column_ids[column_index]]); + auto &col_name = names[column_ids[column_index].GetPrimaryIndex()]; + filters_info += filter->ToString(col_name); } } result["Filters"] = filters_info; diff --git a/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp b/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp index c25c4ef7..e0ce831c 100644 --- a/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +++ b/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp @@ -98,7 +98,7 @@ static bool CanUsePartitionedAggregate(ClientContext &context, LogicalAggregate vector base_columns; for (const auto &partition_idx : partition_columns) { auto col_idx = partition_idx; - col_idx = table_scan.column_ids[col_idx]; + col_idx = table_scan.column_ids[col_idx].GetPrimaryIndex(); base_columns.push_back(col_idx); } // check if the source operator is partitioned by the grouping columns diff --git a/src/duckdb/src/execution/physical_plan/plan_get.cpp b/src/duckdb/src/execution/physical_plan/plan_get.cpp index 2f4c65d3..4aba3a44 100644 --- a/src/duckdb/src/execution/physical_plan/plan_get.cpp +++ b/src/duckdb/src/execution/physical_plan/plan_get.cpp @@ -14,22 +14,22 @@ namespace duckdb { -unique_ptr CreateTableFilterSet(TableFilterSet &table_filters, const vector &column_ids) { +unique_ptr CreateTableFilterSet(TableFilterSet &table_filters, const vector &column_ids) { // create the table filter map auto table_filter_set = make_uniq(); for (auto &table_filter : table_filters.filters) { // find the relative column index from the absolute column index into the table - idx_t column_index = DConstants::INVALID_INDEX; + optional_idx column_index; for (idx_t i = 0; i < column_ids.size(); i++) { - if (table_filter.first == column_ids[i]) { + if (table_filter.first == column_ids[i].GetPrimaryIndex()) { column_index = i; break; } } - if (column_index == DConstants::INVALID_INDEX) { + if (!column_index.IsValid()) { throw InternalException("Could not find column index for table filter"); } - table_filter_set->filters[column_index] = std::move(table_filter.second); + table_filter_set->filters[column_index.GetIndex()] = std::move(table_filter.second); } return table_filter_set; } @@ -95,7 +95,7 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalGet &op) { unique_ptr unsupported_filter; unordered_set to_remove; for (auto &entry : table_filters->filters) { - auto column_id = column_ids[entry.first]; + auto column_id = column_ids[entry.first].GetPrimaryIndex(); auto &type = op.returned_types[column_id]; if (!op.function.supports_pushdown_type(type)) { idx_t column_id_filter = entry.first; @@ -123,7 +123,8 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalGet &op) { if (!select_list.empty()) { vector filter_types; for (auto &c : projection_ids) { - filter_types.push_back(op.returned_types[column_ids[c]]); + auto column_id = column_ids[c].GetPrimaryIndex(); + filter_types.push_back(op.returned_types[column_id]); } filter = make_uniq(filter_types, std::move(select_list), op.estimated_cardinality); } @@ -139,7 +140,7 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalGet &op) { if (column_ids.size() == op.returned_types.size()) { bool projection_necessary = false; for (idx_t i = 0; i < column_ids.size(); i++) { - if (column_ids[i] != i) { + if (column_ids[i].GetPrimaryIndex() != i) { projection_necessary = true; break; } @@ -158,13 +159,14 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalGet &op) { vector types; vector> expressions; for (auto &column_id : column_ids) { - if (column_id == COLUMN_IDENTIFIER_ROW_ID) { + if (column_id.IsRowIdColumn()) { types.emplace_back(LogicalType::ROW_TYPE); expressions.push_back(make_uniq(Value::BIGINT(0))); } else { - auto type = op.returned_types[column_id]; + auto col_id = column_id.GetPrimaryIndex(); + auto type = op.returned_types[col_id]; types.push_back(type); - expressions.push_back(make_uniq(type, column_id)); + expressions.push_back(make_uniq(type, col_id)); } } unique_ptr projection = diff --git a/src/duckdb/src/function/scalar/struct/struct_extract.cpp b/src/duckdb/src/function/scalar/struct/struct_extract.cpp index adfd74a3..8434bed3 100644 --- a/src/duckdb/src/function/scalar/struct/struct_extract.cpp +++ b/src/duckdb/src/function/scalar/struct/struct_extract.cpp @@ -5,25 +5,10 @@ #include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/planner/expression/bound_parameter_expression.hpp" #include "duckdb/storage/statistics/struct_stats.hpp" +#include "duckdb/function/scalar/struct_utils.hpp" namespace duckdb { -struct StructExtractBindData : public FunctionData { - explicit StructExtractBindData(idx_t index) : index(index) { - } - - idx_t index; - -public: - unique_ptr Copy() const override { - return make_uniq(index); - } - bool Equals(const FunctionData &other_p) const override { - auto &other = other_p.Cast(); - return index == other.index; - } -}; - static void StructExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); auto &info = func_expr.bind_info->Cast(); diff --git a/src/duckdb/src/function/table/table_scan.cpp b/src/duckdb/src/function/table/table_scan.cpp index 228e57e0..23218352 100644 --- a/src/duckdb/src/function/table/table_scan.cpp +++ b/src/duckdb/src/function/table/table_scan.cpp @@ -17,6 +17,7 @@ #include "duckdb/storage/table/scan_state.hpp" #include "duckdb/transaction/duck_transaction.hpp" #include "duckdb/transaction/local_storage.hpp" +#include "duckdb/storage/storage_index.hpp" #include "duckdb/main/client_data.hpp" namespace duckdb { @@ -34,12 +35,25 @@ struct TableScanLocalState : public LocalTableFunctionState { DataChunk all_columns; }; -static storage_t GetStorageIndex(TableCatalogEntry &table, column_t column_id) { - if (column_id == DConstants::INVALID_INDEX) { - return column_id; +static StorageIndex TransformStorageIndex(const ColumnIndex &column_id) { + vector result; + for (auto &child_id : column_id.GetChildIndexes()) { + result.push_back(TransformStorageIndex(child_id)); } - auto &col = table.GetColumn(LogicalIndex(column_id)); - return col.StorageOid(); + return StorageIndex(column_id.GetPrimaryIndex(), std::move(result)); +} + +static StorageIndex GetStorageIndex(TableCatalogEntry &table, const ColumnIndex &column_id) { + if (column_id.IsRowIdColumn()) { + return StorageIndex(); + } + // the index of the base ColumnIndex is equal to the physical column index in the table + // for any child indices - the indices are already the physical indices + // (since only the top-level can have generated columns) + auto &col = table.GetColumn(column_id.ToLogical()); + auto result = TransformStorageIndex(column_id); + result.SetIndex(col.StorageOid()); + return result; } struct TableScanGlobalState : public GlobalTableFunctionState { @@ -68,12 +82,11 @@ static unique_ptr TableScanInitLocal(ExecutionContext & GlobalTableFunctionState *gstate) { auto result = make_uniq(); auto &bind_data = input.bind_data->Cast(); - vector column_ids = input.column_ids; - for (auto &col : column_ids) { - auto storage_idx = GetStorageIndex(bind_data.table, col); - col = storage_idx; + vector storage_ids; + for (auto &col : input.column_indexes) { + storage_ids.push_back(GetStorageIndex(bind_data.table, col)); } - result->scan_state.Initialize(std::move(column_ids), input.filters.get(), input.sample_options.get()); + result->scan_state.Initialize(std::move(storage_ids), input.filters.get(), input.sample_options.get()); TableScanParallelStateNext(context.client, input.bind_data.get(), result.get(), gstate); if (input.CanRemoveFilterColumns()) { auto &tsgs = gstate->Cast(); @@ -93,11 +106,11 @@ unique_ptr TableScanInitGlobal(ClientContext &context, if (input.CanRemoveFilterColumns()) { result->projection_ids = input.projection_ids; const auto &columns = bind_data.table.GetColumns(); - for (const auto &col_idx : input.column_ids) { - if (col_idx == COLUMN_IDENTIFIER_ROW_ID) { + for (const auto &col_idx : input.column_indexes) { + if (col_idx.IsRowIdColumn()) { result->scanned_types.emplace_back(LogicalType::ROW_TYPE); } else { - result->scanned_types.push_back(columns.GetColumn(LogicalIndex(col_idx)).Type()); + result->scanned_types.push_back(columns.GetColumn(col_idx.ToLogical()).Type()); } } } @@ -222,7 +235,7 @@ struct IndexScanGlobalState : public GlobalTableFunctionState { idx_t row_ids_offset; ColumnFetchState fetch_state; TableScanState local_storage_state; - vector column_ids; + vector column_ids; bool finished; }; @@ -239,8 +252,8 @@ static unique_ptr IndexScanInitGlobal(ClientContext &c result->local_storage_state.options.force_fetch_row = ClientConfig::GetConfig(context).force_fetch_row; result->column_ids.reserve(input.column_ids.size()); - for (auto &id : input.column_ids) { - result->column_ids.push_back(GetStorageIndex(bind_data.table, id)); + for (auto &col_id : input.column_indexes) { + result->column_ids.push_back(GetStorageIndex(bind_data.table, col_id)); } result->local_storage_state.Initialize(result->column_ids, input.filters.get()); @@ -284,7 +297,8 @@ static void RewriteIndexExpression(Index &index, LogicalGet &get, Expression &ex column_t referenced_column = column_ids[bound_colref.binding.column_index]; // search for the referenced column in the set of column_ids for (idx_t i = 0; i < get_column_ids.size(); i++) { - if (get_column_ids[i] == referenced_column) { + auto column_id = get_column_ids[i].GetPrimaryIndex(); + if (column_id == referenced_column) { bound_colref.binding.column_index = i; return; } diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 374fd9c4..63c95ad7 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "4-dev1619" +#define DUCKDB_PATCH_VERSION "4-dev1693" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 1 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.1.4-dev1619" +#define DUCKDB_VERSION "v1.1.4-dev1693" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "2d1b7d796d" +#define DUCKDB_SOURCE_ID "fd5de0607d" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb.h b/src/duckdb/src/include/duckdb.h index 3b215104..b915ff56 100644 --- a/src/duckdb/src/include/duckdb.h +++ b/src/duckdb/src/include/duckdb.h @@ -2258,6 +2258,23 @@ Creates a value representing a NULL value. */ DUCKDB_API duckdb_value duckdb_create_null_value(); +/*! +Returns the number of elements in a LIST value. + +* @param value The LIST value. +* @return The number of elements in the list. +*/ +DUCKDB_API idx_t duckdb_get_list_size(duckdb_value value); + +/*! +Returns the LIST child at index as a duckdb_value. + +* @param value The LIST value. +* @param index The index of the child. +* @return The child as a duckdb_value. +*/ +DUCKDB_API duckdb_value duckdb_get_list_child(duckdb_value value, idx_t index); + //===--------------------------------------------------------------------===// // Logical Type Interface //===--------------------------------------------------------------------===// diff --git a/src/duckdb/src/include/duckdb/common/column_index.hpp b/src/duckdb/src/include/duckdb/common/column_index.hpp new file mode 100644 index 00000000..32d2e182 --- /dev/null +++ b/src/duckdb/src/include/duckdb/common/column_index.hpp @@ -0,0 +1,72 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/column_index.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/common/constants.hpp" +#include "duckdb/common/vector.hpp" + +namespace duckdb { + +struct ColumnIndex { + ColumnIndex() : index(DConstants::INVALID_INDEX) { + } + explicit ColumnIndex(idx_t index) : index(index) { + } + ColumnIndex(idx_t index, vector child_indexes_p) + : index(index), child_indexes(std::move(child_indexes_p)) { + } + + inline bool operator==(const ColumnIndex &rhs) const { + return index == rhs.index; + } + inline bool operator!=(const ColumnIndex &rhs) const { + return index != rhs.index; + } + inline bool operator<(const ColumnIndex &rhs) const { + return index < rhs.index; + } + idx_t GetPrimaryIndex() const { + return index; + } + LogicalIndex ToLogical() const { + return LogicalIndex(index); + } + bool HasChildren() const { + return !child_indexes.empty(); + } + idx_t ChildIndexCount() const { + return child_indexes.size(); + } + const ColumnIndex &GetChildIndex(idx_t idx) const { + return child_indexes[idx]; + } + ColumnIndex &GetChildIndex(idx_t idx) { + return child_indexes[idx]; + } + const vector &GetChildIndexes() const { + return child_indexes; + } + vector &GetChildIndexesMutable() { + return child_indexes; + } + void AddChildIndex(ColumnIndex new_index) { + this->child_indexes.push_back(std::move(new_index)); + } + bool IsRowIdColumn() const { + return index == DConstants::INVALID_INDEX; + } + void Serialize(Serializer &serializer) const; + static ColumnIndex Deserialize(Deserializer &deserializer); + +private: + idx_t index; + vector child_indexes; +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp b/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp index 74db7e86..e8f945fd 100644 --- a/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +++ b/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp @@ -30,9 +30,9 @@ enum class MetricsType : uint8_t { CUMULATIVE_ROWS_SCANNED, OPERATOR_ROWS_SCANNED, OPERATOR_TIMING, - LATENCY, - ROWS_RETURNED, RESULT_SET_SIZE, + LATENCY, + ROWS_RETURNED, ALL_OPTIMIZERS, CUMULATIVE_OPTIMIZER_TIMING, PLANNER, @@ -44,6 +44,7 @@ enum class MetricsType : uint8_t { OPTIMIZER_EXPRESSION_REWRITER, OPTIMIZER_FILTER_PULLUP, OPTIMIZER_FILTER_PUSHDOWN, + OPTIMIZER_EMPTY_RESULT_PULLUP, OPTIMIZER_CTE_FILTER_PUSHER, OPTIMIZER_REGEX_RANGE, OPTIMIZER_IN_CLAUSE, @@ -57,15 +58,14 @@ enum class MetricsType : uint8_t { OPTIMIZER_COLUMN_LIFETIME, OPTIMIZER_BUILD_SIDE_PROBE_SIDE, OPTIMIZER_LIMIT_PUSHDOWN, - OPTIMIZER_SAMPLING_PUSHDOWN, OPTIMIZER_TOP_N, OPTIMIZER_COMPRESSED_MATERIALIZATION, OPTIMIZER_DUPLICATE_GROUPS, OPTIMIZER_REORDER_FILTER, + OPTIMIZER_SAMPLING_PUSHDOWN, OPTIMIZER_JOIN_FILTER_PUSHDOWN, OPTIMIZER_EXTENSION, OPTIMIZER_MATERIALIZED_CTE, - OPTIMIZER_EMPTY_RESULT_PULLUP }; struct MetricsTypeHashFunction { diff --git a/src/duckdb/src/include/duckdb/common/multi_file_list.hpp b/src/duckdb/src/include/duckdb/common/multi_file_list.hpp index 65ae8693..5adc321c 100644 --- a/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +++ b/src/duckdb/src/include/duckdb/common/multi_file_list.hpp @@ -59,7 +59,8 @@ struct MultiFilePushdownInfo { idx_t table_index; const vector &column_names; - const vector &column_ids; + vector column_ids; + vector column_indexes; ExtraOperatorInfo &extra_info; }; diff --git a/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp b/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp index 897488e5..880bd9ff 100644 --- a/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +++ b/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp @@ -155,6 +155,7 @@ struct TemplatedValidityMask { return validity_mask[entry_idx]; } static inline bool AllValid(V entry) { + // Check if all the tuples that are covered by this entry (usually 64) are valid return entry == ValidityBuffer::MAX_ENTRY; } static inline bool NoneValid(V entry) { diff --git a/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp b/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp index 9680b583..68e740e5 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp @@ -73,7 +73,7 @@ class PhysicalInsert : public PhysicalOperator { unordered_set conflict_target; // Column ids from the original table to fetch - vector columns_to_fetch; + vector columns_to_fetch; // Matching types to the column ids to fetch vector types_to_fetch; diff --git a/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp b/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp index 1fb4eebe..f505f416 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp @@ -20,7 +20,7 @@ class PhysicalTableInOutFunction : public PhysicalOperator { public: PhysicalTableInOutFunction(vector types, TableFunction function_p, - unique_ptr bind_data_p, vector column_ids_p, + unique_ptr bind_data_p, vector column_ids_p, idx_t estimated_cardinality, vector projected_input); public: @@ -47,7 +47,7 @@ class PhysicalTableInOutFunction : public PhysicalOperator { //! Bind data of the function unique_ptr bind_data; //! The set of column ids to fetch - vector column_ids; + vector column_ids; //! The set of input columns to project out vector projected_input; }; diff --git a/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp b/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp index 53059212..55b86195 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp @@ -13,6 +13,7 @@ #include "duckdb/planner/table_filter.hpp" #include "duckdb/storage/data_table.hpp" #include "duckdb/common/extra_operator_info.hpp" +#include "duckdb/common/column_index.hpp" namespace duckdb { @@ -24,7 +25,7 @@ class PhysicalTableScan : public PhysicalOperator { public: //! Table scan that immediately projects out filter columns that are unused in the remainder of the query plan PhysicalTableScan(vector types, TableFunction function, unique_ptr bind_data, - vector returned_types, vector column_ids, vector projection_ids, + vector returned_types, vector column_ids, vector projection_ids, vector names, unique_ptr table_filters, idx_t estimated_cardinality, ExtraOperatorInfo extra_info, vector parameters); @@ -35,7 +36,7 @@ class PhysicalTableScan : public PhysicalOperator { //! The types of ALL columns that can be returned by the table function vector returned_types; //! The column ids used within the table function - vector column_ids; + vector column_ids; //! The projected-out column ids vector projection_ids; //! The names of the columns diff --git a/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp b/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp new file mode 100644 index 00000000..d92acd59 --- /dev/null +++ b/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/function/scalar/struct_utils.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/function/scalar_function.hpp" +#include "duckdb/function/function_set.hpp" +#include "duckdb/function/built_in_functions.hpp" + +namespace duckdb { + +struct StructExtractBindData : public FunctionData { + explicit StructExtractBindData(idx_t index) : index(index) { + } + + idx_t index; + +public: + unique_ptr Copy() const override { + return make_uniq(index); + } + bool Equals(const FunctionData &other_p) const override { + auto &other = other_p.Cast(); + return index == other.index; + } +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/function/table_function.hpp b/src/duckdb/src/include/duckdb/function/table_function.hpp index 1f5ee44b..81173e68 100644 --- a/src/duckdb/src/include/duckdb/function/table_function.hpp +++ b/src/duckdb/src/include/duckdb/function/table_function.hpp @@ -15,6 +15,7 @@ #include "duckdb/planner/bind_context.hpp" #include "duckdb/planner/logical_operator.hpp" #include "duckdb/storage/statistics/node_statistics.hpp" +#include "duckdb/common/column_index.hpp" #include @@ -104,15 +105,28 @@ struct TableFunctionBindInput { }; struct TableFunctionInitInput { - TableFunctionInitInput(optional_ptr bind_data_p, const vector &column_ids_p, + TableFunctionInitInput(optional_ptr bind_data_p, vector column_ids_p, const vector &projection_ids_p, optional_ptr filters_p, optional_ptr sample_options_p = nullptr) - : bind_data(bind_data_p), column_ids(column_ids_p), projection_ids(projection_ids_p), filters(filters_p), - sample_options(sample_options_p) { + : bind_data(bind_data_p), column_ids(std::move(column_ids_p)), projection_ids(projection_ids_p), + filters(filters_p), sample_options(sample_options_p) { + for (auto &col_id : column_ids) { + column_indexes.emplace_back(col_id); + } + } + TableFunctionInitInput(optional_ptr bind_data_p, vector column_indexes_p, + const vector &projection_ids_p, optional_ptr filters_p, + optional_ptr sample_options_p = nullptr) + : bind_data(bind_data_p), column_indexes(std::move(column_indexes_p)), projection_ids(projection_ids_p), + filters(filters_p), sample_options(sample_options_p) { + for (auto &col_id : column_indexes) { + column_ids.emplace_back(col_id.GetPrimaryIndex()); + } } optional_ptr bind_data; - const vector &column_ids; + vector column_ids; + vector column_indexes; const vector projection_ids; optional_ptr filters; optional_ptr sample_options; diff --git a/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp b/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp index 17dd6910..99286416 100644 --- a/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +++ b/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp @@ -434,6 +434,8 @@ typedef struct { duckdb_logical_type (*duckdb_param_logical_type)(duckdb_prepared_statement prepared_statement, idx_t param_idx); bool (*duckdb_is_null_value)(duckdb_value value); duckdb_value (*duckdb_create_null_value)(); + idx_t (*duckdb_get_list_size)(duckdb_value value); + duckdb_value (*duckdb_get_list_child)(duckdb_value value, idx_t index); } duckdb_ext_api_v0; //===--------------------------------------------------------------------===// @@ -818,6 +820,8 @@ inline duckdb_ext_api_v0 CreateAPIv0() { result.duckdb_param_logical_type = duckdb_param_logical_type; result.duckdb_is_null_value = duckdb_is_null_value; result.duckdb_create_null_value = duckdb_create_null_value; + result.duckdb_get_list_size = duckdb_get_list_size; + result.duckdb_get_list_child = duckdb_get_list_child; return result; } diff --git a/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp b/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp index c9449616..7906ecd9 100644 --- a/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +++ b/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp @@ -47,7 +47,7 @@ class FilterCombiner { void GenerateFilters(const std::function filter)> &callback); bool HasFilters(); - TableFilterSet GenerateTableScanFilters(const vector &column_ids); + TableFilterSet GenerateTableScanFilters(const vector &column_ids); // vector> GenerateZonemapChecks(vector &column_ids, vector> // &pushed_filters); diff --git a/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp b/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp index 629efc5d..a05a1d72 100644 --- a/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp +++ b/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp @@ -11,12 +11,18 @@ #include "duckdb/planner/logical_operator_visitor.hpp" #include "duckdb/planner/column_binding_map.hpp" #include "duckdb/common/vector.hpp" +#include "duckdb/common/column_index.hpp" namespace duckdb { class Binder; class BoundColumnRefExpression; class ClientContext; +struct ReferencedColumn { + vector> bindings; + vector child_columns; +}; + //! The RemoveUnusedColumns optimizer traverses the logical operator tree and removes any columns that are not required class RemoveUnusedColumns : public LogicalOperatorVisitor { public: @@ -25,6 +31,7 @@ class RemoveUnusedColumns : public LogicalOperatorVisitor { } void VisitOperator(LogicalOperator &op) override; + void VisitExpression(unique_ptr *expression) override; protected: unique_ptr VisitReplace(BoundColumnRefExpression &expr, unique_ptr *expr_ptr) override; @@ -37,14 +44,23 @@ class RemoveUnusedColumns : public LogicalOperatorVisitor { //! output implicitly refers all the columns below it) bool everything_referenced; //! The map of column references - column_binding_map_t> column_references; + column_binding_map_t column_references; private: template void ClearUnusedExpressions(vector &list, idx_t table_idx, bool replace = true); + //! Add a reference to the column in its entirey + void AddBinding(BoundColumnRefExpression &col); + //! Add a reference to a sub-section of the column + void AddBinding(BoundColumnRefExpression &col, ColumnIndex child_column); //! Perform a replacement of the ColumnBinding, iterating over all the currently found column references and //! replacing the bindings void ReplaceBinding(ColumnBinding current_binding, ColumnBinding new_binding); + + bool HandleStructExtract(Expression &expr); + + bool HandleStructExtractRecursive(Expression &expr, optional_ptr &colref, + vector &indexes); }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/planner/bind_context.hpp b/src/duckdb/src/include/duckdb/planner/bind_context.hpp index dbad0139..8234805e 100644 --- a/src/duckdb/src/include/duckdb/planner/bind_context.hpp +++ b/src/duckdb/src/include/duckdb/planner/bind_context.hpp @@ -10,6 +10,7 @@ #include "duckdb/catalog/catalog.hpp" #include "duckdb/common/case_insensitive_map.hpp" +#include "duckdb/common/column_index.hpp" #include "duckdb/common/reference_map.hpp" #include "duckdb/common/exception/binder_exception.hpp" #include "duckdb/parser/expression/columnref_expression.hpp" @@ -93,12 +94,12 @@ class BindContext { //! Adds a base table with the given alias to the BindContext. void AddBaseTable(idx_t index, const string &alias, const vector &names, const vector &types, - vector &bound_column_ids, StandardEntry &entry, bool add_row_id = true); + vector &bound_column_ids, StandardEntry &entry, bool add_row_id = true); void AddBaseTable(idx_t index, const string &alias, const vector &names, const vector &types, - vector &bound_column_ids, const string &table_name); + vector &bound_column_ids, const string &table_name); //! Adds a call to a table function with the given alias to the BindContext. void AddTableFunction(idx_t index, const string &alias, const vector &names, - const vector &types, vector &bound_column_ids, + const vector &types, vector &bound_column_ids, optional_ptr entry); //! Adds a table view with a given alias to the BindContext. void AddView(idx_t index, const string &alias, SubqueryRef &ref, BoundQueryNode &subquery, ViewCatalogEntry &view); diff --git a/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp b/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp index 273a83d4..993863c1 100644 --- a/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +++ b/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp @@ -62,11 +62,11 @@ class LogicalGet : public LogicalOperator { optional_ptr GetTable() const; public: - void SetColumnIds(vector &&column_ids); + void SetColumnIds(vector &&column_ids); void AddColumnId(column_t column_id); void ClearColumnIds(); - const vector &GetColumnIds() const; - vector &GetMutableColumnIds(); + const vector &GetColumnIds() const; + vector &GetMutableColumnIds(); vector GetColumnBindings() override; idx_t EstimateCardinality(ClientContext &context) override; @@ -87,6 +87,6 @@ class LogicalGet : public LogicalOperator { private: //! Bound column IDs - vector column_ids; + vector column_ids; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/planner/table_binding.hpp b/src/duckdb/src/include/duckdb/planner/table_binding.hpp index 6bf75cf0..4b9030b4 100644 --- a/src/duckdb/src/include/duckdb/planner/table_binding.hpp +++ b/src/duckdb/src/include/duckdb/planner/table_binding.hpp @@ -15,6 +15,7 @@ #include "duckdb/planner/expression_binder.hpp" #include "duckdb/catalog/catalog_entry/table_column_type.hpp" #include "duckdb/planner/binding_alias.hpp" +#include "duckdb/common/column_index.hpp" namespace duckdb { class BindContext; @@ -99,11 +100,11 @@ struct TableBinding : public Binding { public: TableBinding(const string &alias, vector types, vector names, - vector &bound_column_ids, optional_ptr entry, idx_t index, + vector &bound_column_ids, optional_ptr entry, idx_t index, bool add_row_id = false); //! A reference to the set of bound column ids - vector &bound_column_ids; + vector &bound_column_ids; //! The underlying catalog entry (if any) optional_ptr entry; @@ -113,7 +114,7 @@ struct TableBinding : public Binding { optional_ptr GetStandardEntry() override; ErrorData ColumnNotFoundError(const string &column_name) const override; // These are columns that are present in the name_map, appearing in the order that they're bound - const vector &GetBoundColumnIds() const; + const vector &GetBoundColumnIds() const; protected: ColumnBinding GetColumnBinding(column_t column_index); diff --git a/src/duckdb/src/include/duckdb/planner/table_filter.hpp b/src/duckdb/src/include/duckdb/planner/table_filter.hpp index 079d22a2..881884a1 100644 --- a/src/duckdb/src/include/duckdb/planner/table_filter.hpp +++ b/src/duckdb/src/include/duckdb/planner/table_filter.hpp @@ -15,6 +15,7 @@ #include "duckdb/common/types.hpp" #include "duckdb/common/unordered_map.hpp" #include "duckdb/planner/column_binding.hpp" +#include "duckdb/common/column_index.hpp" namespace duckdb { class BaseStatistics; @@ -79,7 +80,7 @@ class TableFilterSet { unordered_map> filters; public: - void PushFilter(idx_t column_index, unique_ptr filter); + void PushFilter(const ColumnIndex &col_idx, unique_ptr filter); bool Equals(TableFilterSet &other) { if (filters.size() != other.filters.size()) { diff --git a/src/duckdb/src/include/duckdb/storage/data_table.hpp b/src/duckdb/src/include/duckdb/storage/data_table.hpp index b3b6515b..dacf0283 100644 --- a/src/duckdb/src/include/duckdb/storage/data_table.hpp +++ b/src/duckdb/src/include/duckdb/storage/data_table.hpp @@ -57,7 +57,7 @@ class DataTable { DataTable(ClientContext &context, DataTable &parent, idx_t removed_column); //! Constructs a DataTable as a delta on an existing data table but with one column changed type DataTable(ClientContext &context, DataTable &parent, idx_t changed_idx, const LogicalType &target_type, - const vector &bound_columns, Expression &cast_expr); + const vector &bound_columns, Expression &cast_expr); //! Constructs a DataTable as a delta on an existing data table but with one column added new constraint DataTable(ClientContext &context, DataTable &parent, BoundConstraint &constraint); @@ -74,7 +74,7 @@ class DataTable { vector GetTypes(); const vector &Columns() const; - void InitializeScan(DuckTransaction &transaction, TableScanState &state, const vector &column_ids, + void InitializeScan(DuckTransaction &transaction, TableScanState &state, const vector &column_ids, TableFilterSet *table_filters = nullptr); //! Returns the maximum amount of threads that should be assigned to scan this data table @@ -89,7 +89,7 @@ class DataTable { void Scan(DuckTransaction &transaction, DataChunk &result, TableScanState &state); //! Fetch data from the specific row identifiers from the base table - void Fetch(DuckTransaction &transaction, DataChunk &result, const vector &column_ids, + void Fetch(DuckTransaction &transaction, DataChunk &result, const vector &column_ids, const Vector &row_ids, idx_t fetch_count, ColumnFetchState &state); //! Initializes an append to transaction-local storage @@ -247,7 +247,7 @@ class DataTable { void VerifyDeleteConstraints(TableDeleteState &state, ClientContext &context, DataChunk &chunk); void InitializeScanWithOffset(DuckTransaction &transaction, TableScanState &state, - const vector &column_ids, idx_t start_row, idx_t end_row); + const vector &column_ids, idx_t start_row, idx_t end_row); void VerifyForeignKeyConstraint(const BoundForeignKeyConstraint &bfk, ClientContext &context, DataChunk &chunk, VerifyExistenceType verify_type); diff --git a/src/duckdb/src/include/duckdb/storage/storage_index.hpp b/src/duckdb/src/include/duckdb/storage/storage_index.hpp new file mode 100644 index 00000000..38ba9997 --- /dev/null +++ b/src/duckdb/src/include/duckdb/storage/storage_index.hpp @@ -0,0 +1,70 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/storage_index.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/common/constants.hpp" +#include "duckdb/common/vector.hpp" + +namespace duckdb { + +struct StorageIndex { + StorageIndex() : index(DConstants::INVALID_INDEX) { + } + explicit StorageIndex(idx_t index) : index(index) { + } + StorageIndex(idx_t index, vector child_indexes_p) + : index(index), child_indexes(std::move(child_indexes_p)) { + } + + inline bool operator==(const StorageIndex &rhs) const { + return index == rhs.index; + } + inline bool operator!=(const StorageIndex &rhs) const { + return index != rhs.index; + } + inline bool operator<(const StorageIndex &rhs) const { + return index < rhs.index; + } + idx_t GetPrimaryIndex() const { + return index; + } + PhysicalIndex ToPhysical() const { + return PhysicalIndex(index); + } + bool HasChildren() const { + return !child_indexes.empty(); + } + idx_t ChildIndexCount() const { + return child_indexes.size(); + } + const StorageIndex &GetChildIndex(idx_t idx) const { + return child_indexes[idx]; + } + StorageIndex &GetChildIndex(idx_t idx) { + return child_indexes[idx]; + } + const vector &GetChildIndexes() const { + return child_indexes; + } + void AddChildIndex(StorageIndex new_index) { + this->child_indexes.push_back(std::move(new_index)); + } + void SetIndex(idx_t new_index) { + index = new_index; + } + bool IsRowIdColumn() const { + return index == DConstants::INVALID_INDEX; + } + +private: + idx_t index; + vector child_indexes; +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp b/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp index 6d25df4a..d3a05eee 100644 --- a/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp @@ -17,7 +17,7 @@ struct TableDeleteState { unique_ptr constraint_state; bool has_delete_constraints = false; DataChunk verify_chunk; - vector col_ids; + vector col_ids; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp index 1dd5aa5c..3c1584c7 100644 --- a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp @@ -18,6 +18,7 @@ #include "duckdb/storage/table/segment_base.hpp" #include "duckdb/storage/block.hpp" #include "duckdb/common/enums/checkpoint_type.hpp" +#include "duckdb/storage/storage_index.hpp" namespace duckdb { class AttachedDatabase; @@ -123,7 +124,7 @@ class RowGroup : public SegmentBase { //! For a specific row, returns true if it should be used for the transaction and false otherwise. bool Fetch(TransactionData transaction, idx_t row); //! Fetch a specific row from the row_group and insert it into the result at the specified index - void FetchRow(TransactionData transaction, ColumnFetchState &state, const vector &column_ids, + void FetchRow(TransactionData transaction, ColumnFetchState &state, const vector &column_ids, row_t row_id, DataChunk &result, idx_t result_idx); //! Append count rows to the version info @@ -187,6 +188,7 @@ class RowGroup : public SegmentBase { void SetVersionInfo(shared_ptr version); ColumnData &GetColumn(storage_t c); + ColumnData &GetColumn(const StorageIndex &c); idx_t GetColumnCount() const; vector> &GetColumns(); diff --git a/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp b/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp index 3f93b58f..5186d8d5 100644 --- a/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp @@ -12,6 +12,7 @@ #include "duckdb/storage/table/segment_tree.hpp" #include "duckdb/storage/statistics/column_statistics.hpp" #include "duckdb/storage/table/table_statistics.hpp" +#include "duckdb/storage/storage_index.hpp" namespace duckdb { @@ -58,20 +59,21 @@ class RowGroupCollection { RowGroup *GetRowGroup(int64_t index); void Verify(); - void InitializeScan(CollectionScanState &state, const vector &column_ids, TableFilterSet *table_filters); + void InitializeScan(CollectionScanState &state, const vector &column_ids, + TableFilterSet *table_filters); void InitializeCreateIndexScan(CreateIndexScanState &state); - void InitializeScanWithOffset(CollectionScanState &state, const vector &column_ids, idx_t start_row, + void InitializeScanWithOffset(CollectionScanState &state, const vector &column_ids, idx_t start_row, idx_t end_row); static bool InitializeScanInRowGroup(CollectionScanState &state, RowGroupCollection &collection, RowGroup &row_group, idx_t vector_index, idx_t max_row); void InitializeParallelScan(ParallelCollectionScanState &state); bool NextParallelScan(ClientContext &context, ParallelCollectionScanState &state, CollectionScanState &scan_state); - bool Scan(DuckTransaction &transaction, const vector &column_ids, + bool Scan(DuckTransaction &transaction, const vector &column_ids, const std::function &fun); bool Scan(DuckTransaction &transaction, const std::function &fun); - void Fetch(TransactionData transaction, DataChunk &result, const vector &column_ids, + void Fetch(TransactionData transaction, DataChunk &result, const vector &column_ids, const Vector &row_identifiers, idx_t fetch_count, ColumnFetchState &state); //! Initialize an append of a variable number of rows. FinalizeAppend must be called after appending is done. @@ -116,7 +118,7 @@ class RowGroupCollection { ExpressionExecutor &default_executor); shared_ptr RemoveColumn(idx_t col_idx); shared_ptr AlterType(ClientContext &context, idx_t changed_idx, const LogicalType &target_type, - vector bound_columns, Expression &cast_expr); + vector bound_columns, Expression &cast_expr); void VerifyNewConstraint(DataTable &parent, const BoundConstraint &constraint); void CopyStats(TableStatistics &stats); diff --git a/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp b/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp index 2b8e0dfc..e94381a6 100644 --- a/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp @@ -17,6 +17,7 @@ #include "duckdb/storage/table/segment_lock.hpp" #include "duckdb/common/types/data_chunk.hpp" #include "duckdb/parser/parsed_data/sample_options.hpp" +#include "duckdb/storage/storage_index.hpp" namespace duckdb { class AdaptiveFilter; @@ -96,10 +97,14 @@ struct ColumnScanState { vector> previous_states; //! The last read offset in the child state (used for LIST columns only) idx_t last_offset = 0; + //! Whether or not we should scan a specific child column + vector scan_child_column; //! Contains TableScan level config for scanning optional_ptr scan_options; public: + void Initialize(const LogicalType &type, const vector &children, + optional_ptr options); void Initialize(const LogicalType &type, optional_ptr options); //! Move the scan state forward by "count" rows (including all child states) void Next(idx_t count); @@ -117,7 +122,7 @@ struct ColumnFetchState { }; struct ScanFilter { - ScanFilter(idx_t index, const vector &column_ids, TableFilter &filter); + ScanFilter(idx_t index, const vector &column_ids, TableFilter &filter); idx_t scan_column_index; idx_t table_column_index; @@ -133,7 +138,7 @@ class ScanFilterInfo { public: ~ScanFilterInfo(); - void Initialize(TableFilterSet &filters, const vector &column_ids); + void Initialize(TableFilterSet &filters, const vector &column_ids); const vector &GetFilterList() const { return filter_list; @@ -195,7 +200,7 @@ class CollectionScanState { public: void Initialize(const vector &types); - const vector &GetColumnIds(); + const vector &GetColumnIds(); ScanFilterInfo &GetFilterInfo(); ScanSamplingInfo &GetSamplingInfo(); TableScanOptions &GetOptions(); @@ -247,10 +252,10 @@ class TableScanState { ScanSamplingInfo sampling_info; public: - void Initialize(vector column_ids, optional_ptr table_filters = nullptr, + void Initialize(vector column_ids, optional_ptr table_filters = nullptr, optional_ptr table_sampling = nullptr); - const vector &GetColumnIds(); + const vector &GetColumnIds(); ScanFilterInfo &GetFilterInfo(); @@ -258,7 +263,7 @@ class TableScanState { private: //! The column identifiers of the scan - vector column_ids; + vector column_ids; }; struct ParallelCollectionScanState { diff --git a/src/duckdb/src/include/duckdb/transaction/local_storage.hpp b/src/duckdb/src/include/duckdb/transaction/local_storage.hpp index 32715121..7ef26898 100644 --- a/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +++ b/src/duckdb/src/include/duckdb/transaction/local_storage.hpp @@ -31,7 +31,7 @@ class LocalTableStorage : public enable_shared_from_this { explicit LocalTableStorage(ClientContext &context, DataTable &table); // Create a LocalTableStorage from an ALTER TYPE LocalTableStorage(ClientContext &context, DataTable &table, LocalTableStorage &parent, idx_t changed_idx, - const LogicalType &target_type, const vector &bound_columns, Expression &cast_expr); + const LogicalType &target_type, const vector &bound_columns, Expression &cast_expr); // Create a LocalTableStorage from a DROP COLUMN LocalTableStorage(DataTable &table, LocalTableStorage &parent, idx_t drop_idx); // Create a LocalTableStorage from an ADD COLUMN @@ -110,7 +110,7 @@ class LocalStorage { //! Initialize a scan of the local storage void InitializeScan(DataTable &table, CollectionScanState &state, optional_ptr table_filters); //! Scan - void Scan(CollectionScanState &state, const vector &column_ids, DataChunk &result); + void Scan(CollectionScanState &state, const vector &column_ids, DataChunk &result); void InitializeParallelScan(DataTable &table, ParallelCollectionScanState &state); bool NextParallelScan(ClientContext &context, DataTable &table, ParallelCollectionScanState &state, @@ -150,11 +150,11 @@ class LocalStorage { ExpressionExecutor &default_executor); void DropColumn(DataTable &old_dt, DataTable &new_dt, idx_t removed_column); void ChangeType(DataTable &old_dt, DataTable &new_dt, idx_t changed_idx, const LogicalType &target_type, - const vector &bound_columns, Expression &cast_expr); + const vector &bound_columns, Expression &cast_expr); void MoveStorage(DataTable &old_dt, DataTable &new_dt); - void FetchChunk(DataTable &table, Vector &row_ids, idx_t count, const vector &col_ids, DataChunk &chunk, - ColumnFetchState &fetch_state); + void FetchChunk(DataTable &table, Vector &row_ids, idx_t count, const vector &col_ids, + DataChunk &chunk, ColumnFetchState &fetch_state); TableIndexList &GetIndexes(DataTable &table); void VerifyNewConstraint(DataTable &parent, const BoundConstraint &constraint); diff --git a/src/duckdb/src/include/duckdb_extension.h b/src/duckdb/src/include/duckdb_extension.h index 671e4377..3aaf29b7 100644 --- a/src/duckdb/src/include/duckdb_extension.h +++ b/src/duckdb/src/include/duckdb_extension.h @@ -498,6 +498,8 @@ typedef struct { duckdb_logical_type (*duckdb_param_logical_type)(duckdb_prepared_statement prepared_statement, idx_t param_idx); bool (*duckdb_is_null_value)(duckdb_value value); duckdb_value (*duckdb_create_null_value)(); + idx_t (*duckdb_get_list_size)(duckdb_value value); + duckdb_value (*duckdb_get_list_child)(duckdb_value value, idx_t index); #endif } duckdb_ext_api_v0; @@ -883,6 +885,8 @@ typedef struct { #define duckdb_param_logical_type duckdb_ext_api.duckdb_param_logical_type #define duckdb_is_null_value duckdb_ext_api.duckdb_is_null_value #define duckdb_create_null_value duckdb_ext_api.duckdb_create_null_value +#define duckdb_get_list_size duckdb_ext_api.duckdb_get_list_size +#define duckdb_get_list_child duckdb_ext_api.duckdb_get_list_child #define duckdb_appender_create_ext duckdb_ext_api.duckdb_appender_create_ext #define duckdb_table_description_create_ext duckdb_ext_api.duckdb_table_description_create_ext #define duckdb_table_description_get_column_name duckdb_ext_api.duckdb_table_description_get_column_name diff --git a/src/duckdb/src/main/capi/duckdb_value-c.cpp b/src/duckdb/src/main/capi/duckdb_value-c.cpp index c3603d88..9eac3957 100644 --- a/src/duckdb/src/main/capi/duckdb_value-c.cpp +++ b/src/duckdb/src/main/capi/duckdb_value-c.cpp @@ -282,7 +282,7 @@ idx_t duckdb_get_map_size(duckdb_value value) { } auto val = UnwrapValue(value); - if (val.type().id() != LogicalTypeId::MAP) { + if (val.type().id() != LogicalTypeId::MAP || val.IsNull()) { return 0; } @@ -296,7 +296,7 @@ duckdb_value duckdb_get_map_key(duckdb_value value, idx_t index) { } auto val = UnwrapValue(value); - if (val.type().id() != LogicalTypeId::MAP) { + if (val.type().id() != LogicalTypeId::MAP || val.IsNull()) { return nullptr; } @@ -316,7 +316,7 @@ duckdb_value duckdb_get_map_value(duckdb_value value, idx_t index) { } auto val = UnwrapValue(value); - if (val.type().id() != LogicalTypeId::MAP) { + if (val.type().id() != LogicalTypeId::MAP || val.IsNull()) { return nullptr; } @@ -337,3 +337,35 @@ bool duckdb_is_null_value(duckdb_value value) { duckdb_value duckdb_create_null_value() { return WrapValue(new duckdb::Value()); } + +idx_t duckdb_get_list_size(duckdb_value value) { + if (!value) { + return 0; + } + + auto val = UnwrapValue(value); + if (val.type().id() != LogicalTypeId::LIST || val.IsNull()) { + return 0; + } + + auto &children = duckdb::ListValue::GetChildren(val); + return children.size(); +} + +duckdb_value duckdb_get_list_child(duckdb_value value, idx_t index) { + if (!value) { + return nullptr; + } + + auto val = UnwrapValue(value); + if (val.type().id() != LogicalTypeId::LIST || val.IsNull()) { + return nullptr; + } + + auto &children = duckdb::ListValue::GetChildren(val); + if (index >= children.size()) { + return nullptr; + } + + return WrapValue(new duckdb::Value(children[index])); +} diff --git a/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp b/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp index cebafb77..0ff8169d 100644 --- a/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +++ b/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp @@ -16,7 +16,14 @@ static void GetRowidBindings(LogicalOperator &op, vector &binding auto &get = op.Cast(); auto get_bindings = get.GetColumnBindings(); auto &column_ids = get.GetColumnIds(); - if (std::find(column_ids.begin(), column_ids.end(), DConstants::INVALID_INDEX) != column_ids.end()) { + bool has_row_id = false; + for (auto &col_id : column_ids) { + if (col_id.IsRowIdColumn()) { + has_row_id = true; + break; + } + } + if (has_row_id) { for (auto &binding : get_bindings) { bindings.push_back(binding); } diff --git a/src/duckdb/src/optimizer/filter_combiner.cpp b/src/duckdb/src/optimizer/filter_combiner.cpp index fe238263..e11d6c6e 100644 --- a/src/duckdb/src/optimizer/filter_combiner.cpp +++ b/src/duckdb/src/optimizer/filter_combiner.cpp @@ -399,7 +399,7 @@ bool FilterCombiner::HasFilters() { // Try to extract a column index from a bound column ref expression, or a column ref recursively nested // inside of a struct_extract call. If the expression is not a column ref (or nested column ref), return false. -static bool TryGetBoundColumnIndex(const vector &column_ids, const Expression &expr, idx_t &result) { +static bool TryGetBoundColumnIndex(const vector &column_ids, const Expression &expr, ColumnIndex &result) { switch (expr.type) { case ExpressionType::BOUND_COLUMN_REF: { auto &ref = expr.Cast(); @@ -436,7 +436,7 @@ static unique_ptr PushDownFilterIntoExpr(const Expression &expr, un return inner_filter; } -TableFilterSet FilterCombiner::GenerateTableScanFilters(const vector &column_ids) { +TableFilterSet FilterCombiner::GenerateTableScanFilters(const vector &column_ids) { TableFilterSet table_filters; //! First, we figure the filters that have constant expressions that we can push down to the table scan for (auto &constant_value : constant_values) { @@ -462,11 +462,11 @@ TableFilterSet FilterCombiner::GenerateTableScanFilters(const vector &col // Try to get the column index, either from bound column ref, or a column ref nested inside of a // struct_extract call - idx_t column_index; + ColumnIndex column_index; if (!TryGetBoundColumnIndex(column_ids, expr, column_index)) { continue; } - if (column_index == COLUMN_IDENTIFIER_ROW_ID) { + if (column_index.IsRowIdColumn()) { break; } @@ -499,7 +499,7 @@ TableFilterSet FilterCombiner::GenerateTableScanFilters(const vector &col if (like_string.empty()) { continue; } - auto column_index = column_ids[column_ref.binding.column_index]; + auto &column_index = column_ids[column_ref.binding.column_index]; //! Here the like must be transformed to a BOUND COMPARISON geq le auto lower_bound = make_uniq(ExpressionType::COMPARE_GREATERTHANOREQUALTO, Value(like_string)); @@ -514,7 +514,7 @@ TableFilterSet FilterCombiner::GenerateTableScanFilters(const vector &col //! This is a like function. auto &column_ref = func.children[0]->Cast(); auto &constant_value_expr = func.children[1]->Cast(); - auto column_index = column_ids[column_ref.binding.column_index]; + auto &column_index = column_ids[column_ref.binding.column_index]; // constant value expr can sometimes be null. if so, push is not null filter, which will // make the filter unsatisfiable and return no results. if (constant_value_expr.value.IsNull()) { @@ -560,8 +560,8 @@ TableFilterSet FilterCombiner::GenerateTableScanFilters(const vector &col continue; } auto &column_ref = func.children[0]->Cast(); - auto column_index = column_ids[column_ref.binding.column_index]; - if (column_index == COLUMN_IDENTIFIER_ROW_ID) { + auto &column_index = column_ids[column_ref.binding.column_index]; + if (column_index.IsRowIdColumn()) { break; } //! check if all children are const expr @@ -678,11 +678,12 @@ TableFilterSet FilterCombiner::GenerateTableScanFilters(const vector &col } if (!column_id.IsValid()) { - if (IsRowIdColumnId(column_ids[column_ref->binding.column_index])) { + auto &col_id = column_ids[column_ref->binding.column_index]; + if (col_id.IsRowIdColumn()) { break; } - column_id = column_ids[column_ref->binding.column_index]; - } else if (column_id.GetIndex() != column_ids[column_ref->binding.column_index]) { + column_id = col_id.GetPrimaryIndex(); + } else if (column_id.GetIndex() != column_ids[column_ref->binding.column_index].GetPrimaryIndex()) { column_id.SetInvalid(); break; } @@ -697,7 +698,7 @@ TableFilterSet FilterCombiner::GenerateTableScanFilters(const vector &col } if (column_id.IsValid()) { optional_filter->child_filter = std::move(conj_filter); - table_filters.PushFilter(column_id.GetIndex(), std::move(optional_filter)); + table_filters.PushFilter(ColumnIndex(column_id.GetIndex()), std::move(optional_filter)); } } } diff --git a/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp b/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp index 32d2b432..aa20b033 100644 --- a/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +++ b/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp @@ -79,14 +79,15 @@ RelationStats RelationStatisticsHelper::ExtractGetStats(LogicalGet &get, ClientC // first push back basic distinct counts for each column (if we have them). auto &column_ids = get.GetColumnIds(); for (idx_t i = 0; i < column_ids.size(); i++) { + auto column_id = column_ids[i].GetPrimaryIndex(); bool have_distinct_count_stats = false; if (get.function.statistics) { - column_statistics = get.function.statistics(context, get.bind_data.get(), column_ids[i]); + column_statistics = get.function.statistics(context, get.bind_data.get(), column_id); if (column_statistics && have_catalog_table_statistics) { - auto distinct_count = MaxValue((idx_t)1, column_statistics->GetDistinctCount()); + auto distinct_count = MaxValue(1, column_statistics->GetDistinctCount()); auto column_distinct_count = DistinctCount({distinct_count, true}); return_stats.column_distinct_count.push_back(column_distinct_count); - return_stats.column_names.push_back(name + "." + get.names.at(column_ids.at(i))); + return_stats.column_names.push_back(name + "." + get.names.at(column_id)); have_distinct_count_stats = true; } } @@ -97,8 +98,8 @@ RelationStats RelationStatisticsHelper::ExtractGetStats(LogicalGet &get, ClientC auto column_distinct_count = DistinctCount({cardinality_after_filters, false}); return_stats.column_distinct_count.push_back(column_distinct_count); auto column_name = string("column"); - if (column_ids.at(i) < get.names.size()) { - column_name = get.names.at(column_ids.at(i)); + if (column_id < get.names.size()) { + column_name = get.names.at(column_id); } return_stats.column_names.push_back(get.GetName() + "." + column_name); } diff --git a/src/duckdb/src/optimizer/remove_unused_columns.cpp b/src/duckdb/src/optimizer/remove_unused_columns.cpp index 8515e469..b56fd9f1 100644 --- a/src/duckdb/src/optimizer/remove_unused_columns.cpp +++ b/src/duckdb/src/optimizer/remove_unused_columns.cpp @@ -8,6 +8,7 @@ #include "duckdb/planner/expression/bound_aggregate_expression.hpp" #include "duckdb/planner/expression/bound_columnref_expression.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/planner/expression_iterator.hpp" #include "duckdb/planner/operator/logical_aggregate.hpp" #include "duckdb/planner/operator/logical_comparison_join.hpp" @@ -18,15 +19,17 @@ #include "duckdb/planner/operator/logical_projection.hpp" #include "duckdb/planner/operator/logical_set_operation.hpp" #include "duckdb/planner/operator/logical_simple.hpp" +#include "duckdb/function/scalar/struct_utils.hpp" namespace duckdb { void RemoveUnusedColumns::ReplaceBinding(ColumnBinding current_binding, ColumnBinding new_binding) { auto colrefs = column_references.find(current_binding); if (colrefs != column_references.end()) { - for (auto &colref : colrefs->second) { - D_ASSERT(colref->binding == current_binding); - colref->binding = new_binding; + for (auto &colref_p : colrefs->second.bindings) { + auto &colref = colref_p.get(); + D_ASSERT(colref.binding == current_binding); + colref.binding = new_binding; } } } @@ -95,9 +98,10 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) { // if there are any columns that refer to the RHS, auto colrefs = column_references.find(rhs_col.binding); if (colrefs != column_references.end()) { - for (auto &entry : colrefs->second) { - entry->binding = lhs_col.binding; - column_references[lhs_col.binding].push_back(entry); + for (auto &entry : colrefs->second.bindings) { + auto &colref = entry.get(); + colref.binding = lhs_col.binding; + AddBinding(colref); } column_references.erase(rhs_col.binding); } @@ -209,7 +213,7 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) { return; } - auto &final_column_ids = get.GetColumnIds(); + auto final_column_ids = get.GetColumnIds(); // Create "selection vector" of all column ids vector proj_sel; @@ -221,12 +225,13 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) { // Clear unused ids, exclude filter columns that are projected out immediately ClearUnusedExpressions(proj_sel, get.table_index, false); + vector> filter_expressions; // for every table filter, push a column binding into the column references map to prevent the column from // being projected out for (auto &filter : get.table_filters.filters) { optional_idx index; for (idx_t i = 0; i < final_column_ids.size(); i++) { - if (final_column_ids[i] == filter.first) { + if (final_column_ids[i].GetPrimaryIndex() == filter.first) { index = i; break; } @@ -234,20 +239,36 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) { if (!index.IsValid()) { throw InternalException("Could not find column index for table filter"); } + auto &column_type = get.returned_types[filter.first]; ColumnBinding filter_binding(get.table_index, index.GetIndex()); - if (column_references.find(filter_binding) == column_references.end()) { - column_references.insert(make_pair(filter_binding, vector())); - } + auto column_ref = make_uniq(column_type, filter_binding); + auto filter_expr = filter.second->ToExpression(*column_ref); + VisitExpression(&filter_expr); + filter_expressions.push_back(std::move(filter_expr)); } // Clear unused ids, include filter columns that are projected out immediately ClearUnusedExpressions(col_sel, get.table_index); // Now set the column ids in the LogicalGet using the "selection vector" - vector column_ids; + vector column_ids; column_ids.reserve(col_sel.size()); for (auto col_sel_idx : col_sel) { - column_ids.push_back(final_column_ids[col_sel_idx]); + auto entry = column_references.find(ColumnBinding(get.table_index, col_sel_idx)); + if (entry == column_references.end()) { + throw InternalException("RemoveUnusedColumns - could not find referenced column"); + } + if (final_column_ids[col_sel_idx].HasChildren()) { + throw InternalException("RemoveUnusedColumns - LogicalGet::column_ids already has children"); + } + ColumnIndex new_index(final_column_ids[col_sel_idx].GetPrimaryIndex(), entry->second.child_columns); + column_ids.emplace_back(new_index); + } + if (column_ids.empty()) { + // this generally means we are only interested in whether or not anything exists in the table (e.g. + // EXISTS(SELECT * FROM tbl)) in this case, we just scan the row identifier column as it means we do not + // need to read any of the columns + column_ids.emplace_back(COLUMN_IDENTIFIER_ROW_ID); } get.SetColumnIds(std::move(column_ids)); @@ -264,13 +285,6 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) { } } } - - if (final_column_ids.empty()) { - // this generally means we are only interested in whether or not anything exists in the table (e.g. - // EXISTS(SELECT * FROM tbl)) in this case, we just scan the row identifier column as it means we do not - // need to read any of the columns - get.AddColumnId(COLUMN_IDENTIFIER_ROW_ID); - } } return; case LogicalOperatorType::LOGICAL_FILTER: { @@ -339,10 +353,130 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) { } } +bool RemoveUnusedColumns::HandleStructExtractRecursive(Expression &expr, optional_ptr &colref, + vector &indexes) { + if (expr.GetExpressionClass() != ExpressionClass::BOUND_FUNCTION) { + return false; + } + auto &function = expr.Cast(); + if (function.function.name != "struct_extract" && function.function.name != "array_extract") { + return false; + } + if (!function.bind_info) { + return false; + } + if (function.children[0]->return_type.id() != LogicalTypeId::STRUCT) { + return false; + } + auto &bind_data = function.bind_info->Cast(); + indexes.push_back(bind_data.index); + // struct extract, check if left child is a bound column ref + if (function.children[0]->GetExpressionClass() == ExpressionClass::BOUND_COLUMN_REF) { + // column reference - check if it is a struct + auto &ref = function.children[0]->Cast(); + if (ref.return_type.id() != LogicalTypeId::STRUCT) { + return false; + } + colref = &ref; + return true; + } + // not a column reference - try to handle this recursively + if (!HandleStructExtractRecursive(*function.children[0], colref, indexes)) { + return false; + } + return true; +} + +bool RemoveUnusedColumns::HandleStructExtract(Expression &expr) { + optional_ptr colref; + vector indexes; + if (!HandleStructExtractRecursive(expr, colref, indexes)) { + return false; + } + D_ASSERT(!indexes.empty()); + // construct the ColumnIndex + ColumnIndex index = ColumnIndex(indexes[0]); + for (idx_t i = 1; i < indexes.size(); i++) { + ColumnIndex new_index(indexes[i]); + new_index.AddChildIndex(std::move(index)); + index = std::move(new_index); + } + AddBinding(*colref, std::move(index)); + return true; +} + +void MergeChildColumns(vector ¤t_child_columns, ColumnIndex &new_child_column) { + if (current_child_columns.empty()) { + // there's already a reference to the full column - we can't extract only a subfield + // skip struct projection pushdown + return; + } + // if we are already extract sub-fields, add it (if it is not there yet) + for (auto &binding : current_child_columns) { + if (binding.GetPrimaryIndex() != new_child_column.GetPrimaryIndex()) { + continue; + } + // found a match: sub-field is already projected + // check if we have child columns + auto &nested_child_columns = binding.GetChildIndexesMutable(); + if (!new_child_column.HasChildren()) { + // new child is a reference to a full column - clear any existing bindings (if any) + nested_child_columns.clear(); + } else { + // new child has a sub-reference - merge recursively + D_ASSERT(new_child_column.ChildIndexCount() == 1); + MergeChildColumns(nested_child_columns, new_child_column.GetChildIndex(0)); + } + return; + } + // this child column is not projected yet - add it in + current_child_columns.push_back(std::move(new_child_column)); +} + +void RemoveUnusedColumns::AddBinding(BoundColumnRefExpression &col, ColumnIndex child_column) { + auto entry = column_references.find(col.binding); + if (entry == column_references.end()) { + // column not referenced yet - add a binding to it entirely + ReferencedColumn column; + column.bindings.push_back(col); + column.child_columns.push_back(std::move(child_column)); + column_references.insert(make_pair(col.binding, std::move(column))); + } else { + // column reference already exists - check add the binding + auto &column = entry->second; + column.bindings.push_back(col); + + MergeChildColumns(column.child_columns, child_column); + } +} + +void RemoveUnusedColumns::AddBinding(BoundColumnRefExpression &col) { + auto entry = column_references.find(col.binding); + if (entry == column_references.end()) { + // column not referenced yet - add a binding to it entirely + column_references[col.binding].bindings.push_back(col); + } else { + // column reference already exists - add the binding and clear any sub-references + auto &column = entry->second; + column.bindings.push_back(col); + column.child_columns.clear(); + } +} + +void RemoveUnusedColumns::VisitExpression(unique_ptr *expression) { + auto &expr = **expression; + if (HandleStructExtract(expr)) { + // already handled + return; + } + // recurse + LogicalOperatorVisitor::VisitExpression(expression); +} + unique_ptr RemoveUnusedColumns::VisitReplace(BoundColumnRefExpression &expr, unique_ptr *expr_ptr) { - // add a column reference - column_references[expr.binding].push_back(&expr); + // add a reference to the entire column + AddBinding(expr); return nullptr; } diff --git a/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp b/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp index 962ad8f9..dc2390db 100644 --- a/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +++ b/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp @@ -41,7 +41,7 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalGet } auto &column_ids = get.GetColumnIds(); for (idx_t i = 0; i < column_ids.size(); i++) { - auto stats = get.function.statistics(context, get.bind_data.get(), column_ids[i]); + auto stats = get.function.statistics(context, get.bind_data.get(), column_ids[i].GetPrimaryIndex()); if (stats) { ColumnBinding binding(get.table_index, i); statistics_map.insert(make_pair(binding, std::move(stats))); @@ -57,12 +57,12 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalGet for (auto &table_filter_column : column_indexes) { idx_t column_index; for (column_index = 0; column_index < column_ids.size(); column_index++) { - if (column_ids[column_index] == table_filter_column) { + if (column_ids[column_index].GetPrimaryIndex() == table_filter_column) { break; } } D_ASSERT(column_index < column_ids.size()); - D_ASSERT(column_ids[column_index] == table_filter_column); + D_ASSERT(column_ids[column_index].GetPrimaryIndex() == table_filter_column); // find the stats ColumnBinding stats_binding(get.table_index, column_index); diff --git a/src/duckdb/src/planner/bind_context.cpp b/src/duckdb/src/planner/bind_context.cpp index 9ad84ed8..80856246 100644 --- a/src/duckdb/src/planner/bind_context.cpp +++ b/src/duckdb/src/planner/bind_context.cpp @@ -599,20 +599,20 @@ void BindContext::AddBinding(unique_ptr binding) { } void BindContext::AddBaseTable(idx_t index, const string &alias, const vector &names, - const vector &types, vector &bound_column_ids, + const vector &types, vector &bound_column_ids, StandardEntry &entry, bool add_row_id) { AddBinding(make_uniq(alias, types, names, bound_column_ids, &entry, index, add_row_id)); } void BindContext::AddBaseTable(idx_t index, const string &alias, const vector &names, - const vector &types, vector &bound_column_ids, + const vector &types, vector &bound_column_ids, const string &table_name) { AddBinding(make_uniq(alias.empty() ? table_name : alias, types, names, bound_column_ids, nullptr, index, true)); } void BindContext::AddTableFunction(idx_t index, const string &alias, const vector &names, - const vector &types, vector &bound_column_ids, + const vector &types, vector &bound_column_ids, optional_ptr entry) { AddBinding(make_uniq(alias, types, names, bound_column_ids, entry, index)); } diff --git a/src/duckdb/src/planner/binder.cpp b/src/duckdb/src/planner/binder.cpp index ec479489..3f38386c 100644 --- a/src/duckdb/src/planner/binder.cpp +++ b/src/duckdb/src/planner/binder.cpp @@ -682,13 +682,13 @@ BoundStatement Binder::BindReturning(vector> return auto binder = Binder::CreateBinder(context); - vector bound_columns; + vector bound_columns; idx_t column_count = 0; for (auto &col : table.GetColumns().Logical()) { names.push_back(col.Name()); types.push_back(col.Type()); if (!col.Generated()) { - bound_columns.push_back(column_count); + bound_columns.emplace_back(column_count); } column_count++; } diff --git a/src/duckdb/src/planner/binder/query_node/plan_setop.cpp b/src/duckdb/src/planner/binder/query_node/plan_setop.cpp index 1afca342..41988bbe 100644 --- a/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +++ b/src/duckdb/src/planner/binder/query_node/plan_setop.cpp @@ -35,12 +35,13 @@ unique_ptr Binder::CastLogicalOperatorToTypes(vectorexpressions.size(); i++) { if (op->expressions[i]->type == ExpressionType::BOUND_COLUMN_REF) { auto &col_ref = op->expressions[i]->Cast(); - if (new_column_types.find(column_ids[col_ref.binding.column_index]) != new_column_types.end()) { + auto column_id = column_ids[col_ref.binding.column_index].GetPrimaryIndex(); + if (new_column_types.find(column_id) != new_column_types.end()) { // Only one reference per column is accepted do_pushdown = false; break; } - new_column_types[column_ids[col_ref.binding.column_index]] = target_types[i]; + new_column_types[column_id] = target_types[i]; } else { do_pushdown = false; break; diff --git a/src/duckdb/src/planner/binder/statement/bind_insert.cpp b/src/duckdb/src/planner/binder/statement/bind_insert.cpp index f02be800..444dd2ad 100644 --- a/src/duckdb/src/planner/binder/statement/bind_insert.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_insert.cpp @@ -251,6 +251,15 @@ unique_ptr CreateSetInfoForReplace(TableCatalogEntry &table, Inse return set_info; } +vector GetColumnsToFetch(const TableBinding &binding) { + auto &bound_columns = binding.GetBoundColumnIds(); + vector result; + for (auto &col : bound_columns) { + result.push_back(col.GetPrimaryIndex()); + } + return result; +} + void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &table, InsertStatement &stmt) { if (!stmt.on_conflict_info) { insert.action_type = OnConflictAction::THROW; @@ -422,7 +431,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl // of the original table, to execute the expressions D_ASSERT(original_binding->binding_type == BindingType::TABLE); auto &table_binding = original_binding->Cast(); - insert.columns_to_fetch = table_binding.GetBoundColumnIds(); + insert.columns_to_fetch = GetColumnsToFetch(table_binding); return; } @@ -448,7 +457,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl // of the original table, to execute the expressions D_ASSERT(original_binding->binding_type == BindingType::TABLE); auto &table_binding = original_binding->Cast(); - insert.columns_to_fetch = table_binding.GetBoundColumnIds(); + insert.columns_to_fetch = GetColumnsToFetch(table_binding); // Replace the column bindings to refer to the child operator for (auto &expr : insert.expressions) { diff --git a/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp b/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp index 6ef9446e..34b0ec56 100644 --- a/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp @@ -68,7 +68,7 @@ void Binder::BindVacuumTable(LogicalVacuum &vacuum, unique_ptr D_ASSERT(select_list.size() == column_ids.size()); D_ASSERT(info.columns.size() == column_ids.size()); for (idx_t i = 0; i < column_ids.size(); i++) { - vacuum.column_id_map[i] = table.GetColumns().LogicalToPhysical(LogicalIndex(column_ids[i])).index; + vacuum.column_id_map[i] = table.GetColumns().LogicalToPhysical(column_ids[i].ToLogical()).index; } auto projection = make_uniq(GenerateTableIndex(), std::move(select_list)); diff --git a/src/duckdb/src/planner/binder/tableref/bind_showref.cpp b/src/duckdb/src/planner/binder/tableref/bind_showref.cpp index 168b68e7..5a0ddc7f 100644 --- a/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +++ b/src/duckdb/src/planner/binder/tableref/bind_showref.cpp @@ -34,7 +34,7 @@ BaseTableColumnInfo FindBaseTableColumn(LogicalOperator &op, ColumnBinding bindi } result.table = table; auto base_column_id = get.GetColumnIds()[binding.column_index]; - result.column = &table->GetColumn(LogicalIndex(base_column_id)); + result.column = &table->GetColumn(LogicalIndex(base_column_id.GetPrimaryIndex())); return result; } case LogicalOperatorType::LOGICAL_PROJECTION: { diff --git a/src/duckdb/src/planner/expression_binder/index_binder.cpp b/src/duckdb/src/planner/expression_binder/index_binder.cpp index 69b27cb6..a890ffd5 100644 --- a/src/duckdb/src/planner/expression_binder/index_binder.cpp +++ b/src/duckdb/src/planner/expression_binder/index_binder.cpp @@ -72,15 +72,16 @@ unique_ptr IndexBinder::BindCreateIndex(ClientContext &context, auto &get = plan->Cast(); auto &column_ids = get.GetColumnIds(); for (auto &column_id : column_ids) { - if (column_id == COLUMN_IDENTIFIER_ROW_ID) { + if (column_id.IsRowIdColumn()) { throw BinderException("cannot create an index on the rowid"); } - create_index_info->scan_types.push_back(get.returned_types[column_id]); + auto col_id = column_id.GetPrimaryIndex(); + create_index_info->column_ids.push_back(col_id); + create_index_info->scan_types.push_back(get.returned_types[col_id]); } create_index_info->scan_types.emplace_back(LogicalType::ROW_TYPE); create_index_info->names = get.names; - create_index_info->column_ids = column_ids; create_index_info->schema = table_entry.schema.name; auto &bind_data = get.bind_data->Cast(); diff --git a/src/duckdb/src/planner/operator/logical_get.cpp b/src/duckdb/src/planner/operator/logical_get.cpp index 9883b685..fa986f18 100644 --- a/src/duckdb/src/planner/operator/logical_get.cpp +++ b/src/duckdb/src/planner/operator/logical_get.cpp @@ -67,23 +67,23 @@ InsertionOrderPreservingMap LogicalGet::ParamsToString() const { return result; } -void LogicalGet::SetColumnIds(vector &&column_ids) { +void LogicalGet::SetColumnIds(vector &&column_ids) { this->column_ids = std::move(column_ids); } void LogicalGet::AddColumnId(column_t column_id) { - column_ids.push_back(column_id); + column_ids.emplace_back(column_id); } void LogicalGet::ClearColumnIds() { column_ids.clear(); } -const vector &LogicalGet::GetColumnIds() const { +const vector &LogicalGet::GetColumnIds() const { return column_ids; } -vector &LogicalGet::GetMutableColumnIds() { +vector &LogicalGet::GetMutableColumnIds() { return column_ids; } @@ -116,24 +116,24 @@ vector LogicalGet::GetColumnBindings() { void LogicalGet::ResolveTypes() { if (column_ids.empty()) { - column_ids.push_back(COLUMN_IDENTIFIER_ROW_ID); + column_ids.emplace_back(COLUMN_IDENTIFIER_ROW_ID); } types.clear(); if (projection_ids.empty()) { for (auto &index : column_ids) { - if (index == COLUMN_IDENTIFIER_ROW_ID) { + if (index.IsRowIdColumn()) { types.emplace_back(LogicalType::ROW_TYPE); } else { - types.push_back(returned_types[index]); + types.push_back(returned_types[index.GetPrimaryIndex()]); } } } else { for (auto &proj_index : projection_ids) { auto &index = column_ids[proj_index]; - if (index == COLUMN_IDENTIFIER_ROW_ID) { + if (index.IsRowIdColumn()) { types.emplace_back(LogicalType::ROW_TYPE); } else { - types.push_back(returned_types[index]); + types.push_back(returned_types[index.GetPrimaryIndex()]); } } } @@ -170,7 +170,7 @@ void LogicalGet::Serialize(Serializer &serializer) const { serializer.WriteProperty(200, "table_index", table_index); serializer.WriteProperty(201, "returned_types", returned_types); serializer.WriteProperty(202, "names", names); - serializer.WriteProperty(203, "column_ids", column_ids); + /* [Deleted] (vector) "column_ids" */ serializer.WriteProperty(204, "projection_ids", projection_ids); serializer.WriteProperty(205, "table_filters", table_filters); FunctionSerializer::Serialize(serializer, function, bind_data.get()); @@ -183,14 +183,17 @@ void LogicalGet::Serialize(Serializer &serializer) const { serializer.WriteProperty(209, "input_table_names", input_table_names); } serializer.WriteProperty(210, "projected_input", projected_input); + serializer.WritePropertyWithDefault(211, "column_indexes", column_ids); } unique_ptr LogicalGet::Deserialize(Deserializer &deserializer) { + vector legacy_column_ids; + auto result = unique_ptr(new LogicalGet()); deserializer.ReadProperty(200, "table_index", result->table_index); deserializer.ReadProperty(201, "returned_types", result->returned_types); deserializer.ReadProperty(202, "names", result->names); - deserializer.ReadProperty(203, "column_ids", result->column_ids); + deserializer.ReadPropertyWithDefault(203, "column_ids", legacy_column_ids); deserializer.ReadProperty(204, "projection_ids", result->projection_ids); deserializer.ReadProperty(205, "table_filters", result->table_filters); auto entry = FunctionSerializer::DeserializeBase( @@ -198,13 +201,27 @@ unique_ptr LogicalGet::Deserialize(Deserializer &deserializer) result->function = entry.first; auto &function = result->function; auto has_serialize = entry.second; - unique_ptr bind_data; if (!has_serialize) { deserializer.ReadProperty(206, "parameters", result->parameters); deserializer.ReadProperty(207, "named_parameters", result->named_parameters); deserializer.ReadProperty(208, "input_table_types", result->input_table_types); deserializer.ReadProperty(209, "input_table_names", result->input_table_names); + } else { + bind_data = FunctionSerializer::FunctionDeserialize(deserializer, function); + } + deserializer.ReadProperty(210, "projected_input", result->projected_input); + deserializer.ReadPropertyWithDefault(211, "column_indexes", result->column_ids); + if (!legacy_column_ids.empty()) { + if (!result->column_ids.empty()) { + throw SerializationException( + "LogicalGet::Deserialize - either column_ids or column_indexes should be set - not both"); + } + for (auto &col_id : legacy_column_ids) { + result->column_ids.emplace_back(col_id); + } + } + if (!has_serialize) { TableFunctionRef empty_ref; TableFunctionBindInput input(result->parameters, result->named_parameters, result->input_table_types, result->input_table_names, function.function_info.get(), nullptr, result->function, @@ -218,24 +235,22 @@ unique_ptr LogicalGet::Deserialize(Deserializer &deserializer) bind_data = function.bind(deserializer.Get(), input, bind_return_types, bind_names); for (auto &col_id : result->column_ids) { - if (IsRowIdColumnId(col_id)) { + if (col_id.IsRowIdColumn()) { // rowid continue; } - auto &ret_type = result->returned_types[col_id]; - auto &col_name = result->names[col_id]; - if (bind_return_types[col_id] != ret_type) { + auto idx = col_id.GetPrimaryIndex(); + auto &ret_type = result->returned_types[idx]; + auto &col_name = result->names[idx]; + if (bind_return_types[idx] != ret_type) { throw SerializationException("Table function deserialization failure in function \"%s\" - column with " "name %s was serialized with type %s, but now has type %s", - function.name, col_name, ret_type, bind_return_types[col_id]); + function.name, col_name, ret_type, bind_return_types[idx]); } } result->returned_types = std::move(bind_return_types); - } else { - bind_data = FunctionSerializer::FunctionDeserialize(deserializer, function); } result->bind_data = std::move(bind_data); - deserializer.ReadProperty(210, "projected_input", result->projected_input); return std::move(result); } diff --git a/src/duckdb/src/planner/table_binding.cpp b/src/duckdb/src/planner/table_binding.cpp index ab8c88bb..7f53cdfd 100644 --- a/src/duckdb/src/planner/table_binding.cpp +++ b/src/duckdb/src/planner/table_binding.cpp @@ -113,7 +113,7 @@ optional_ptr EntryBinding::GetStandardEntry() { } TableBinding::TableBinding(const string &alias, vector types_p, vector names_p, - vector &bound_column_ids, optional_ptr entry, idx_t index, + vector &bound_column_ids, optional_ptr entry, idx_t index, bool add_row_id) : Binding(BindingType::TABLE, GetAlias(alias, entry), std::move(types_p), std::move(names_p), index), bound_column_ids(bound_column_ids), entry(entry) { @@ -177,15 +177,16 @@ unique_ptr TableBinding::ExpandGeneratedColumn(const string &c return (expression); } -const vector &TableBinding::GetBoundColumnIds() const { +const vector &TableBinding::GetBoundColumnIds() const { #ifdef DEBUG - unordered_set column_ids; - for (auto &id : bound_column_ids) { + unordered_set column_ids; + for (auto &col_id : bound_column_ids) { + idx_t id = col_id.IsRowIdColumn() ? DConstants::INVALID_INDEX : col_id.GetPrimaryIndex(); auto result = column_ids.insert(id); // assert that all entries in the bound_column_ids are unique D_ASSERT(result.second); auto it = std::find_if(name_map.begin(), name_map.end(), - [&](const std::pair &it) { return it.second == id; }); + [&](const std::pair &it) { return it.second == id; }); // assert that every id appears in the name_map D_ASSERT(it != name_map.end()); // the order that they appear in is not guaranteed to be sequential @@ -199,13 +200,17 @@ ColumnBinding TableBinding::GetColumnBinding(column_t column_index) { ColumnBinding binding; // Locate the column_id that matches the 'column_index' - auto it = std::find_if(column_ids.begin(), column_ids.end(), - [&](const column_t &id) -> bool { return id == column_index; }); - // Get the index of it - binding.column_index = NumericCast(std::distance(column_ids.begin(), it)); + binding.column_index = column_ids.size(); + for (idx_t i = 0; i < column_ids.size(); ++i) { + auto &col_id = column_ids[i]; + if (col_id.GetPrimaryIndex() == column_index) { + binding.column_index = i; + break; + } + } // If it wasn't found, add it - if (it == column_ids.end()) { - column_ids.push_back(column_index); + if (binding.column_index == column_ids.size()) { + column_ids.emplace_back(column_index); } binding.table_index = index; diff --git a/src/duckdb/src/planner/table_filter.cpp b/src/duckdb/src/planner/table_filter.cpp index 636a7671..b9db37d8 100644 --- a/src/duckdb/src/planner/table_filter.cpp +++ b/src/duckdb/src/planner/table_filter.cpp @@ -7,7 +7,8 @@ namespace duckdb { -void TableFilterSet::PushFilter(idx_t column_index, unique_ptr filter) { +void TableFilterSet::PushFilter(const ColumnIndex &col_idx, unique_ptr filter) { + auto column_index = col_idx.GetPrimaryIndex(); auto entry = filters.find(column_index); if (entry == filters.end()) { // no filter yet: push the filter directly @@ -46,7 +47,7 @@ void DynamicTableFilterSet::PushFilter(const PhysicalOperator &op, idx_t column_ } else { filter_ptr = entry->second.get(); } - filter_ptr->PushFilter(column_index, std::move(filter)); + filter_ptr->PushFilter(ColumnIndex(column_index), std::move(filter)); } bool DynamicTableFilterSet::HasFilters() const { @@ -61,16 +62,16 @@ DynamicTableFilterSet::GetFinalTableFilters(const PhysicalTableScan &scan, auto result = make_uniq(); if (existing_filters) { for (auto &entry : existing_filters->filters) { - result->PushFilter(entry.first, entry.second->Copy()); + result->PushFilter(ColumnIndex(entry.first), entry.second->Copy()); } } for (auto &entry : filters) { for (auto &filter : entry.second->filters) { - if (IsRowIdColumnId(scan.column_ids[filter.first])) { + if (scan.column_ids[filter.first].IsRowIdColumn()) { // skip row id filters continue; } - result->PushFilter(filter.first, filter.second->Copy()); + result->PushFilter(ColumnIndex(filter.first), filter.second->Copy()); } } if (result->filters.empty()) { diff --git a/src/duckdb/src/storage/data_table.cpp b/src/duckdb/src/storage/data_table.cpp index bf1e1424..49bde202 100644 --- a/src/duckdb/src/storage/data_table.cpp +++ b/src/duckdb/src/storage/data_table.cpp @@ -158,7 +158,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, BoundConstraint } DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_idx, const LogicalType &target_type, - const vector &bound_columns, Expression &cast_expr) + const vector &bound_columns, Expression &cast_expr) : db(parent.db), info(parent.info), is_root(true) { auto &local_storage = LocalStorage::Get(context, db); // prevent any tuples from being added to the parent @@ -225,8 +225,8 @@ TableIOManager &TableIOManager::Get(DataTable &table) { //===--------------------------------------------------------------------===// // Scan //===--------------------------------------------------------------------===// -void DataTable::InitializeScan(DuckTransaction &transaction, TableScanState &state, const vector &column_ids, - TableFilterSet *table_filters) { +void DataTable::InitializeScan(DuckTransaction &transaction, TableScanState &state, + const vector &column_ids, TableFilterSet *table_filters) { state.checkpoint_lock = transaction.SharedLockTable(*info); auto &local_storage = LocalStorage::Get(transaction); state.Initialize(column_ids, table_filters); @@ -235,7 +235,7 @@ void DataTable::InitializeScan(DuckTransaction &transaction, TableScanState &sta } void DataTable::InitializeScanWithOffset(DuckTransaction &transaction, TableScanState &state, - const vector &column_ids, idx_t start_row, idx_t end_row) { + const vector &column_ids, idx_t start_row, idx_t end_row) { state.checkpoint_lock = transaction.SharedLockTable(*info); state.Initialize(column_ids); row_groups->InitializeScanWithOffset(state.table_state, column_ids, start_row, end_row); @@ -377,7 +377,7 @@ TableStorageInfo DataTable::GetStorageInfo() { //===--------------------------------------------------------------------===// // Fetch //===--------------------------------------------------------------------===// -void DataTable::Fetch(DuckTransaction &transaction, DataChunk &result, const vector &column_ids, +void DataTable::Fetch(DuckTransaction &transaction, DataChunk &result, const vector &column_ids, const Vector &row_identifiers, idx_t fetch_count, ColumnFetchState &state) { auto lock = info->checkpoint_lock.GetSharedLock(); row_groups->Fetch(transaction, result, column_ids, row_identifiers, fetch_count, state); @@ -896,11 +896,11 @@ void DataTable::ScanTableSegment(DuckTransaction &transaction, idx_t row_start, } idx_t end = row_start + count; - vector column_ids; + vector column_ids; vector types; for (idx_t i = 0; i < this->column_definitions.size(); i++) { auto &col = this->column_definitions[i]; - column_ids.push_back(i); + column_ids.emplace_back(i); types.push_back(col.Type()); } DataChunk chunk; @@ -1161,7 +1161,7 @@ unique_ptr DataTable::InitializeDelete(TableCatalogEntry &tabl if (result->has_delete_constraints) { // initialize the chunk if there are any constraints to verify for (idx_t i = 0; i < column_definitions.size(); i++) { - result->col_ids.push_back(column_definitions[i].StorageOid()); + result->col_ids.emplace_back(column_definitions[i].StorageOid()); types.emplace_back(column_definitions[i].Type()); } result->verify_chunk.Initialize(Allocator::Get(context), types); diff --git a/src/duckdb/src/storage/local_storage.cpp b/src/duckdb/src/storage/local_storage.cpp index 9e1ad45e..a702af82 100644 --- a/src/duckdb/src/storage/local_storage.cpp +++ b/src/duckdb/src/storage/local_storage.cpp @@ -40,7 +40,7 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &table) LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_dt, LocalTableStorage &parent, idx_t changed_idx, const LogicalType &target_type, - const vector &bound_columns, Expression &cast_expr) + const vector &bound_columns, Expression &cast_expr) : table_ref(new_dt), allocator(Allocator::Get(new_dt.db)), deleted_rows(parent.deleted_rows), optimistic_writer(new_dt, parent.optimistic_writer), optimistic_writers(std::move(parent.optimistic_writers)), merged_storage(parent.merged_storage) { @@ -122,15 +122,20 @@ ErrorData LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, RowGr row_t &start_row) { // only need to scan for index append // figure out which columns we need to scan for the set of indexes - auto columns = index_list.GetRequiredColumns(); + auto index_columns = index_list.GetRequiredColumns(); + vector required_columns; + for (auto &col : index_columns) { + required_columns.emplace_back(col); + } // create an empty mock chunk that contains all the correct types for the table DataChunk mock_chunk; mock_chunk.InitializeEmpty(table_types); ErrorData error; - source.Scan(transaction, columns, [&](DataChunk &chunk) -> bool { + source.Scan(transaction, required_columns, [&](DataChunk &chunk) -> bool { // construct the mock chunk by referencing the required columns - for (idx_t i = 0; i < columns.size(); i++) { - mock_chunk.data[columns[i]].Reference(chunk.data[i]); + for (idx_t i = 0; i < required_columns.size(); i++) { + auto col_id = required_columns[i].GetPrimaryIndex(); + mock_chunk.data[col_id].Reference(chunk.data[i]); } mock_chunk.SetCardinality(chunk); // append this chunk to the indexes of the table @@ -324,7 +329,7 @@ void LocalStorage::InitializeScan(DataTable &table, CollectionScanState &state, storage->InitializeScan(state, table_filters); } -void LocalStorage::Scan(CollectionScanState &state, const vector &column_ids, DataChunk &result) { +void LocalStorage::Scan(CollectionScanState &state, const vector &, DataChunk &result) { state.Scan(transaction, result); } @@ -557,7 +562,7 @@ void LocalStorage::DropColumn(DataTable &old_dt, DataTable &new_dt, idx_t remove } void LocalStorage::ChangeType(DataTable &old_dt, DataTable &new_dt, idx_t changed_idx, const LogicalType &target_type, - const vector &bound_columns, Expression &cast_expr) { + const vector &bound_columns, Expression &cast_expr) { // check if there are any pending appends for the old version of the table auto storage = table_manager.MoveEntry(old_dt); if (!storage) { @@ -568,7 +573,7 @@ void LocalStorage::ChangeType(DataTable &old_dt, DataTable &new_dt, idx_t change table_manager.InsertEntry(new_dt, std::move(new_storage)); } -void LocalStorage::FetchChunk(DataTable &table, Vector &row_ids, idx_t count, const vector &col_ids, +void LocalStorage::FetchChunk(DataTable &table, Vector &row_ids, idx_t count, const vector &col_ids, DataChunk &chunk, ColumnFetchState &fetch_state) { auto storage = table_manager.GetStorage(table); if (!storage) { diff --git a/src/duckdb/src/storage/serialization/serialize_nodes.cpp b/src/duckdb/src/storage/serialization/serialize_nodes.cpp index 808d5c1b..8dc9d987 100644 --- a/src/duckdb/src/storage/serialization/serialize_nodes.cpp +++ b/src/duckdb/src/storage/serialization/serialize_nodes.cpp @@ -34,6 +34,7 @@ #include "duckdb/common/types/interval.hpp" #include "duckdb/parser/qualified_name.hpp" #include "duckdb/parser/parsed_data/exported_table_data.hpp" +#include "duckdb/common/column_index.hpp" namespace duckdb { @@ -303,6 +304,18 @@ ColumnDefinition ColumnDefinition::Deserialize(Deserializer &deserializer) { return result; } +void ColumnIndex::Serialize(Serializer &serializer) const { + serializer.WritePropertyWithDefault(1, "index", index); + serializer.WritePropertyWithDefault>(2, "child_indexes", child_indexes); +} + +ColumnIndex ColumnIndex::Deserialize(Deserializer &deserializer) { + ColumnIndex result; + deserializer.ReadPropertyWithDefault(1, "index", result.index); + deserializer.ReadPropertyWithDefault>(2, "child_indexes", result.child_indexes); + return result; +} + void ColumnInfo::Serialize(Serializer &serializer) const { serializer.WritePropertyWithDefault>(100, "names", names); serializer.WritePropertyWithDefault>(101, "types", types); diff --git a/src/duckdb/src/storage/table/row_group.cpp b/src/duckdb/src/storage/table/row_group.cpp index 5ef82603..2b97075e 100644 --- a/src/duckdb/src/storage/table/row_group.cpp +++ b/src/duckdb/src/storage/table/row_group.cpp @@ -99,6 +99,10 @@ idx_t RowGroup::GetRowGroupSize() const { return collection.get().GetRowGroupSize(); } +ColumnData &RowGroup::GetColumn(const StorageIndex &c) { + return GetColumn(c.GetPrimaryIndex()); +} + ColumnData &RowGroup::GetColumn(storage_t c) { D_ASSERT(c < columns.size()); if (!is_loaded) { @@ -149,7 +153,8 @@ void RowGroup::InitializeEmpty(const vector &types) { } } -void ColumnScanState::Initialize(const LogicalType &type, optional_ptr options) { +void ColumnScanState::Initialize(const LogicalType &type, const vector &children, + optional_ptr options) { // Register the options in the state scan_options = options; @@ -161,8 +166,23 @@ void ColumnScanState::Initialize(const LogicalType &type, optional_ptr options) { + vector children; + Initialize(type, children, options); +} + void CollectionScanState::Initialize(const vector &types) { auto &column_ids = GetColumnIds(); column_scans = make_unsafe_uniq_array(column_ids.size()); for (idx_t i = 0; i < column_ids.size(); i++) { - if (column_ids[i] == COLUMN_IDENTIFIER_ROW_ID) { + if (column_ids[i].IsRowIdColumn()) { continue; } - column_scans[i].Initialize(types[column_ids[i]], &GetOptions()); + auto col_id = column_ids[i].GetPrimaryIndex(); + column_scans[i].Initialize(types[col_id], column_ids[i].GetChildIndexes(), &GetOptions()); } } @@ -212,7 +238,7 @@ bool RowGroup::InitializeScanWithOffset(CollectionScanState &state, idx_t vector D_ASSERT(state.column_scans); for (idx_t i = 0; i < column_ids.size(); i++) { const auto &column = column_ids[i]; - if (column != COLUMN_IDENTIFIER_ROW_ID) { + if (!column.IsRowIdColumn()) { auto &column_data = GetColumn(column); column_data.InitializeScanWithOffset(state.column_scans[i], row_number); state.column_scans[i].scan_options = &state.GetOptions(); @@ -239,7 +265,7 @@ bool RowGroup::InitializeScan(CollectionScanState &state) { D_ASSERT(state.column_scans); for (idx_t i = 0; i < column_ids.size(); i++) { auto column = column_ids[i]; - if (column != COLUMN_IDENTIFIER_ROW_ID) { + if (!column.IsRowIdColumn()) { auto &column_data = GetColumn(column); column_data.InitializeScan(state.column_scans[i]); state.column_scans[i].scan_options = &state.GetOptions(); @@ -368,10 +394,9 @@ void RowGroup::NextVector(CollectionScanState &state) { const auto &column_ids = state.GetColumnIds(); for (idx_t i = 0; i < column_ids.size(); i++) { const auto &column = column_ids[i]; - if (column == COLUMN_IDENTIFIER_ROW_ID) { + if (column.IsRowIdColumn()) { continue; } - D_ASSERT(column < columns.size()); GetColumn(column).Skip(state.column_scans[i]); } } @@ -536,7 +561,7 @@ void RowGroup::TemplatedScan(TransactionData transaction, CollectionScanState &s PrefetchState prefetch_state; for (idx_t i = 0; i < column_ids.size(); i++) { const auto &column = column_ids[i]; - if (column != COLUMN_IDENTIFIER_ROW_ID) { + if (!column.IsRowIdColumn()) { GetColumn(column).InitializePrefetch(prefetch_state, state.column_scans[i], max_count); } } @@ -549,7 +574,7 @@ void RowGroup::TemplatedScan(TransactionData transaction, CollectionScanState &s // scan all vectors completely: full scan without deletions or table filters for (idx_t i = 0; i < column_ids.size(); i++) { const auto &column = column_ids[i]; - if (column == COLUMN_IDENTIFIER_ROW_ID) { + if (column.IsRowIdColumn()) { // scan row id D_ASSERT(result.data[i].GetType().InternalType() == ROW_TYPE); result.data[i].Sequence(UnsafeNumericCast(this->start + current_row), 1, count); @@ -604,8 +629,8 @@ void RowGroup::TemplatedScan(TransactionData transaction, CollectionScanState &s result.Reset(); // skip this vector in all the scans that were not scanned yet for (idx_t i = 0; i < column_ids.size(); i++) { - auto col_idx = column_ids[i]; - if (col_idx == COLUMN_IDENTIFIER_ROW_ID) { + auto &col_idx = column_ids[i]; + if (col_idx.IsRowIdColumn()) { continue; } if (has_filters && filter_info.ColumnHasFilters(i)) { @@ -623,8 +648,8 @@ void RowGroup::TemplatedScan(TransactionData transaction, CollectionScanState &s // column has already been scanned as part of the filtering process continue; } - auto column = column_ids[i]; - if (column == COLUMN_IDENTIFIER_ROW_ID) { + auto &column = column_ids[i]; + if (column.IsRowIdColumn()) { D_ASSERT(result.data[i].GetType().InternalType() == PhysicalType::INT64); result.data[i].SetVectorType(VectorType::FLAT_VECTOR); auto result_data = FlatVector::GetData(result.data[i]); @@ -765,14 +790,14 @@ bool RowGroup::Fetch(TransactionData transaction, idx_t row) { return vinfo->Fetch(transaction, row); } -void RowGroup::FetchRow(TransactionData transaction, ColumnFetchState &state, const vector &column_ids, +void RowGroup::FetchRow(TransactionData transaction, ColumnFetchState &state, const vector &column_ids, row_t row_id, DataChunk &result, idx_t result_idx) { for (idx_t col_idx = 0; col_idx < column_ids.size(); col_idx++) { - auto column = column_ids[col_idx]; + auto &column = column_ids[col_idx]; auto &result_vector = result.data[col_idx]; D_ASSERT(result_vector.GetVectorType() == VectorType::FLAT_VECTOR); D_ASSERT(!FlatVector::IsNull(result_vector, result_idx)); - if (column == COLUMN_IDENTIFIER_ROW_ID) { + if (column.IsRowIdColumn()) { // row id column: fill in the row ids D_ASSERT(result_vector.GetType().InternalType() == PhysicalType::INT64); result_vector.SetVectorType(VectorType::FLAT_VECTOR); diff --git a/src/duckdb/src/storage/table/row_group_collection.cpp b/src/duckdb/src/storage/table/row_group_collection.cpp index 961a0959..8282097d 100644 --- a/src/duckdb/src/storage/table/row_group_collection.cpp +++ b/src/duckdb/src/storage/table/row_group_collection.cpp @@ -141,7 +141,7 @@ void RowGroupCollection::Verify() { //===--------------------------------------------------------------------===// // Scan //===--------------------------------------------------------------------===// -void RowGroupCollection::InitializeScan(CollectionScanState &state, const vector &column_ids, +void RowGroupCollection::InitializeScan(CollectionScanState &state, const vector &column_ids, TableFilterSet *table_filters) { auto row_group = row_groups->GetRootSegment(); D_ASSERT(row_group); @@ -157,7 +157,7 @@ void RowGroupCollection::InitializeCreateIndexScan(CreateIndexScanState &state) state.segment_lock = row_groups->Lock(); } -void RowGroupCollection::InitializeScanWithOffset(CollectionScanState &state, const vector &column_ids, +void RowGroupCollection::InitializeScanWithOffset(CollectionScanState &state, const vector &column_ids, idx_t start_row, idx_t end_row) { auto row_group = row_groups->GetSegment(start_row); D_ASSERT(row_group); @@ -242,11 +242,11 @@ bool RowGroupCollection::NextParallelScan(ClientContext &context, ParallelCollec return false; } -bool RowGroupCollection::Scan(DuckTransaction &transaction, const vector &column_ids, +bool RowGroupCollection::Scan(DuckTransaction &transaction, const vector &column_ids, const std::function &fun) { vector scan_types; for (idx_t i = 0; i < column_ids.size(); i++) { - scan_types.push_back(types[column_ids[i]]); + scan_types.push_back(types[column_ids[i].GetPrimaryIndex()]); } DataChunk chunk; chunk.Initialize(GetAllocator(), scan_types); @@ -269,10 +269,10 @@ bool RowGroupCollection::Scan(DuckTransaction &transaction, const vector &fun) { - vector column_ids; + vector column_ids; column_ids.reserve(types.size()); for (idx_t i = 0; i < types.size(); i++) { - column_ids.push_back(i); + column_ids.emplace_back(i); } return Scan(transaction, column_ids, fun); } @@ -280,7 +280,7 @@ bool RowGroupCollection::Scan(DuckTransaction &transaction, const std::function< //===--------------------------------------------------------------------===// // Fetch //===--------------------------------------------------------------------===// -void RowGroupCollection::Fetch(TransactionData transaction, DataChunk &result, const vector &column_ids, +void RowGroupCollection::Fetch(TransactionData transaction, DataChunk &result, const vector &column_ids, const Vector &row_identifiers, idx_t fetch_count, ColumnFetchState &state) { // figure out which row_group to fetch from auto row_ids = FlatVector::GetData(row_identifiers); @@ -627,10 +627,10 @@ void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_ // initialize the fetch state // FIXME: we do not need to fetch all columns, only the columns required by the indices! TableScanState state; - vector column_ids; + vector column_ids; column_ids.reserve(types.size()); for (idx_t i = 0; i < types.size(); i++) { - column_ids.push_back(i); + column_ids.emplace_back(i); } state.Initialize(std::move(column_ids)); state.table_state.max_row = row_start + total_rows; @@ -791,9 +791,9 @@ class VacuumTask : public BaseCheckpointTask { DataChunk scan_chunk; scan_chunk.Initialize(Allocator::DefaultAllocator(), types); - vector column_ids; + vector column_ids; for (idx_t c = 0; c < types.size(); c++) { - column_ids.push_back(c); + column_ids.emplace_back(c); } idx_t current_append_idx = 0; @@ -1103,7 +1103,8 @@ shared_ptr RowGroupCollection::RemoveColumn(idx_t col_idx) { shared_ptr RowGroupCollection::AlterType(ClientContext &context, idx_t changed_idx, const LogicalType &target_type, - vector bound_columns, Expression &cast_expr) { + vector bound_columns, + Expression &cast_expr) { D_ASSERT(changed_idx < types.size()); auto new_types = types; new_types[changed_idx] = target_type; @@ -1114,10 +1115,10 @@ shared_ptr RowGroupCollection::AlterType(ClientContext &cont vector scan_types; for (idx_t i = 0; i < bound_columns.size(); i++) { - if (bound_columns[i] == COLUMN_IDENTIFIER_ROW_ID) { + if (bound_columns[i].IsRowIdColumn()) { scan_types.emplace_back(LogicalType::ROW_TYPE); } else { - scan_types.push_back(types[bound_columns[i]]); + scan_types.push_back(types[bound_columns[i].GetPrimaryIndex()]); } } DataChunk scan_chunk; @@ -1158,14 +1159,15 @@ void RowGroupCollection::VerifyNewConstraint(DataTable &parent, const BoundConst DataChunk scan_chunk; scan_chunk.Initialize(GetAllocator(), scan_types); - vector column_ids; - column_ids.push_back(physical_index); + vector column_ids; + column_ids.emplace_back(physical_index); // Use SCAN_COMMITTED to scan the latest data. CreateIndexScanState state; auto scan_type = TableScanType::TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED; state.Initialize(column_ids, nullptr); InitializeScan(state.table_state, column_ids, nullptr); + InitializeCreateIndexScan(state); while (true) { diff --git a/src/duckdb/src/storage/table/scan_state.cpp b/src/duckdb/src/storage/table/scan_state.cpp index dc4213d0..adeccde9 100644 --- a/src/duckdb/src/storage/table/scan_state.cpp +++ b/src/duckdb/src/storage/table/scan_state.cpp @@ -16,7 +16,7 @@ TableScanState::TableScanState() : table_state(*this), local_state(*this) { TableScanState::~TableScanState() { } -void TableScanState::Initialize(vector column_ids_p, optional_ptr table_filters, +void TableScanState::Initialize(vector column_ids_p, optional_ptr table_filters, optional_ptr table_sampling) { this->column_ids = std::move(column_ids_p); if (table_filters) { @@ -28,7 +28,7 @@ void TableScanState::Initialize(vector column_ids_p, optional_ptr &TableScanState::GetColumnIds() { +const vector &TableScanState::GetColumnIds() { D_ASSERT(!column_ids.empty()); return column_ids; } @@ -44,11 +44,12 @@ ScanSamplingInfo &TableScanState::GetSamplingInfo() { return sampling_info; } -ScanFilter::ScanFilter(idx_t index, const vector &column_ids, TableFilter &filter) - : scan_column_index(index), table_column_index(column_ids[index]), filter(filter), always_true(false) { +ScanFilter::ScanFilter(idx_t index, const vector &column_ids, TableFilter &filter) + : scan_column_index(index), table_column_index(column_ids[index].GetPrimaryIndex()), filter(filter), + always_true(false) { } -void ScanFilterInfo::Initialize(TableFilterSet &filters, const vector &column_ids) { +void ScanFilterInfo::Initialize(TableFilterSet &filters, const vector &column_ids) { D_ASSERT(!filters.filters.empty()); table_filters = &filters; adaptive_filter = make_uniq(filters); @@ -142,7 +143,7 @@ void ColumnScanState::Next(idx_t count) { } } -const vector &CollectionScanState::GetColumnIds() { +const vector &CollectionScanState::GetColumnIds() { return parent.GetColumnIds(); } diff --git a/src/duckdb/src/storage/table/struct_column_data.cpp b/src/duckdb/src/storage/table/struct_column_data.cpp index d6e2ef8d..5d1506b5 100644 --- a/src/duckdb/src/storage/table/struct_column_data.cpp +++ b/src/duckdb/src/storage/table/struct_column_data.cpp @@ -43,6 +43,9 @@ idx_t StructColumnData::GetMaxEntry() { void StructColumnData::InitializePrefetch(PrefetchState &prefetch_state, ColumnScanState &scan_state, idx_t rows) { validity.InitializePrefetch(prefetch_state, scan_state.child_states[0], rows); for (idx_t i = 0; i < sub_columns.size(); i++) { + if (!scan_state.scan_child_column[i]) { + continue; + } sub_columns[i]->InitializePrefetch(prefetch_state, scan_state.child_states[i + 1], rows); } } @@ -57,6 +60,9 @@ void StructColumnData::InitializeScan(ColumnScanState &state) { // initialize the sub-columns for (idx_t i = 0; i < sub_columns.size(); i++) { + if (!state.scan_child_column[i]) { + continue; + } sub_columns[i]->InitializeScan(state.child_states[i + 1]); } } @@ -71,6 +77,9 @@ void StructColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t ro // initialize the sub-columns for (idx_t i = 0; i < sub_columns.size(); i++) { + if (!state.scan_child_column[i]) { + continue; + } sub_columns[i]->InitializeScanWithOffset(state.child_states[i + 1], row_idx); } } @@ -80,7 +89,14 @@ idx_t StructColumnData::Scan(TransactionData transaction, idx_t vector_index, Co auto scan_count = validity.Scan(transaction, vector_index, state.child_states[0], result, target_count); auto &child_entries = StructVector::GetEntries(result); for (idx_t i = 0; i < sub_columns.size(); i++) { - sub_columns[i]->Scan(transaction, vector_index, state.child_states[i + 1], *child_entries[i], target_count); + auto &target_vector = *child_entries[i]; + if (!state.scan_child_column[i]) { + // if we are not scanning this vector - set it to NULL + target_vector.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(target_vector, true); + continue; + } + sub_columns[i]->Scan(transaction, vector_index, state.child_states[i + 1], target_vector, target_count); } return scan_count; } @@ -90,7 +106,14 @@ idx_t StructColumnData::ScanCommitted(idx_t vector_index, ColumnScanState &state auto scan_count = validity.ScanCommitted(vector_index, state.child_states[0], result, allow_updates, target_count); auto &child_entries = StructVector::GetEntries(result); for (idx_t i = 0; i < sub_columns.size(); i++) { - sub_columns[i]->ScanCommitted(vector_index, state.child_states[i + 1], *child_entries[i], allow_updates, + auto &target_vector = *child_entries[i]; + if (!state.scan_child_column[i]) { + // if we are not scanning this vector - set it to NULL + target_vector.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(target_vector, true); + continue; + } + sub_columns[i]->ScanCommitted(vector_index, state.child_states[i + 1], target_vector, allow_updates, target_count); } return scan_count; @@ -100,7 +123,14 @@ idx_t StructColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t auto scan_count = validity.ScanCount(state.child_states[0], result, count); auto &child_entries = StructVector::GetEntries(result); for (idx_t i = 0; i < sub_columns.size(); i++) { - sub_columns[i]->ScanCount(state.child_states[i + 1], *child_entries[i], count); + auto &target_vector = *child_entries[i]; + if (!state.scan_child_column[i]) { + // if we are not scanning this vector - set it to NULL + target_vector.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(target_vector, true); + continue; + } + sub_columns[i]->ScanCount(state.child_states[i + 1], target_vector, count); } return scan_count; } @@ -110,6 +140,9 @@ void StructColumnData::Skip(ColumnScanState &state, idx_t count) { // skip inside the sub-columns for (idx_t child_idx = 0; child_idx < sub_columns.size(); child_idx++) { + if (!state.scan_child_column[child_idx]) { + continue; + } sub_columns[child_idx]->Skip(state.child_states[child_idx + 1], count); } } diff --git a/src/duckdb/src/storage/table_index_list.cpp b/src/duckdb/src/storage/table_index_list.cpp index 426568ae..821fcf9c 100644 --- a/src/duckdb/src/storage/table_index_list.cpp +++ b/src/duckdb/src/storage/table_index_list.cpp @@ -92,7 +92,7 @@ void TableIndexList::InitializeIndexes(ClientContext &context, DataTableInfo &ta // Add the table to the binder // We're not interested in the column_ids here, so just pass a dummy vector - vector dummy_column_ids; + vector dummy_column_ids; binder->bind_context.AddBaseTable(0, string(), column_names, column_types, dummy_column_ids, table); // Create an IndexBinder to bind the index diff --git a/src/duckdb/src/storage/wal_replay.cpp b/src/duckdb/src/storage/wal_replay.cpp index f70323de..2eca4581 100644 --- a/src/duckdb/src/storage/wal_replay.cpp +++ b/src/duckdb/src/storage/wal_replay.cpp @@ -440,8 +440,8 @@ void WriteAheadLogDeserializer::ReplayAlter() { } // Create a binder to bind the parsed expressions. - vector column_ids; - binder->bind_context.AddBaseTable(0, string(), column_names, column_types, column_ids, table); + vector column_indexes; + binder->bind_context.AddBaseTable(0, string(), column_names, column_types, column_indexes, table); IndexBinder idx_binder(*binder, context); // Bind the parsed expressions to create unbound expressions. @@ -453,6 +453,11 @@ void WriteAheadLogDeserializer::ReplayAlter() { unbound_expressions.push_back(idx_binder.Bind(parsed)); } + vector column_ids; + for (auto &column_index : column_indexes) { + column_ids.push_back(column_index.GetPrimaryIndex()); + } + auto &storage = table.GetStorage(); CreateIndexInput input(TableIOManager::Get(storage), storage.db, IndexConstraintType::PRIMARY, index_storage_info.name, column_ids, unbound_expressions, index_storage_info, @@ -656,8 +661,8 @@ void WriteAheadLogDeserializer::ReplayCreateIndex() { column_names.push_back(col.Name()); } - // Create a binder to bind the parsed expressions. - vector column_ids; + // create a binder to bind the parsed expressions + vector column_ids; binder->bind_context.AddBaseTable(0, string(), column_names, column_types, column_ids, table); IndexBinder idx_binder(*binder, context);