From 444080213e0ba1f87fecdf6896906c3c39ab0c11 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Sat, 24 Aug 2024 00:31:00 +0000 Subject: [PATCH] Update vendored DuckDB sources to 51d53f85 --- .../extension/json/include/json_executors.hpp | 43 +- .../extension/json/include/json_functions.hpp | 2 + src/duckdb/extension/json/json_functions.cpp | 2 + .../json/json_functions/json_array_length.cpp | 2 +- .../json/json_functions/json_exists.cpp | 32 + .../json/json_functions/json_extract.cpp | 4 +- .../json/json_functions/json_keys.cpp | 2 +- .../json/json_functions/json_pretty.cpp | 2 +- .../json/json_functions/json_structure.cpp | 2 +- .../json/json_functions/json_type.cpp | 6 +- .../json/json_functions/json_value.cpp | 42 + .../catalog_entry/duck_schema_entry.cpp | 34 +- .../catalog_entry/duck_table_entry.cpp | 170 ++- .../src/common/compressed_file_system.cpp | 1 - src/duckdb/src/common/enum_util.cpp | 38 + .../core_functions/scalar/generic/error.cpp | 2 +- src/duckdb/src/execution/index/art/art.cpp | 1256 +++++++++-------- .../src/execution/index/art/art_key.cpp | 159 ++- .../src/execution/index/art/base_leaf.cpp | 168 +++ .../src/execution/index/art/base_node.cpp | 163 +++ .../src/execution/index/art/iterator.cpp | 226 ++- src/duckdb/src/execution/index/art/leaf.cpp | 422 +++--- src/duckdb/src/execution/index/art/node.cpp | 740 ++++++---- src/duckdb/src/execution/index/art/node16.cpp | 196 --- .../src/execution/index/art/node256.cpp | 122 +- .../src/execution/index/art/node256_leaf.cpp | 71 + src/duckdb/src/execution/index/art/node4.cpp | 189 --- src/duckdb/src/execution/index/art/node48.cpp | 218 +-- src/duckdb/src/execution/index/art/prefix.cpp | 668 +++++---- .../src/execution/index/bound_index.cpp | 8 +- .../execution/index/fixed_size_allocator.cpp | 28 +- .../src/execution/index/fixed_size_buffer.cpp | 81 +- src/duckdb/src/execution/join_hashtable.cpp | 34 +- .../table_function/global_csv_state.cpp | 38 +- .../operator/join/physical_hash_join.cpp | 3 +- .../schema/physical_create_art_index.cpp | 59 +- .../function/scalar/operators/arithmetic.cpp | 26 +- src/duckdb/src/function/table/table_scan.cpp | 6 +- .../function/table/version/pragma_version.cpp | 6 +- src/duckdb/src/include/duckdb.h | 35 + .../src/include/duckdb/common/enum_util.hpp | 8 + .../include/duckdb/common/optional_ptr.hpp | 11 +- .../aggregate/quantile_sort_tree.hpp | 11 +- .../duckdb/execution/index/art/art.hpp | 190 ++- .../duckdb/execution/index/art/art_key.hpp | 68 +- .../duckdb/execution/index/art/base_leaf.hpp | 109 ++ .../duckdb/execution/index/art/base_node.hpp | 140 ++ .../duckdb/execution/index/art/iterator.hpp | 62 +- .../duckdb/execution/index/art/leaf.hpp | 93 +- .../duckdb/execution/index/art/node.hpp | 205 ++- .../duckdb/execution/index/art/node16.hpp | 65 - .../duckdb/execution/index/art/node256.hpp | 75 +- .../execution/index/art/node256_leaf.hpp | 53 + .../duckdb/execution/index/art/node4.hpp | 63 - .../duckdb/execution/index/art/node48.hpp | 87 +- .../duckdb/execution/index/art/prefix.hpp | 153 +- .../duckdb/execution/index/bound_index.hpp | 13 +- .../execution/index/fixed_size_allocator.hpp | 44 +- .../execution/index/fixed_size_buffer.hpp | 2 - .../duckdb/execution/index/index_pointer.hpp | 6 +- .../duckdb/execution/join_hashtable.hpp | 6 +- .../schema/physical_create_art_index.hpp | 4 +- .../duckdb/function/table/table_scan.hpp | 2 +- .../duckdb/main/capi/extension_api.hpp | 8 + src/duckdb/src/include/duckdb/main/config.hpp | 6 +- .../src/include/duckdb/main/database.hpp | 1 + .../include/duckdb/main/extension_entries.hpp | 2 + .../include/duckdb/main/extension_helper.hpp | 3 +- .../src/include/duckdb/main/relation.hpp | 5 +- .../src/include/duckdb/main/settings.hpp | 20 + .../duckdb/planner/logical_operator.hpp | 1 + .../include/duckdb/storage/block_manager.hpp | 2 + .../storage/in_memory_block_manager.hpp | 3 + .../duckdb/storage/index_storage_info.hpp | 24 +- .../storage/single_file_block_manager.hpp | 2 + .../duckdb/storage/storage_manager.hpp | 3 + .../duckdb/storage/table/table_index_list.hpp | 4 +- src/duckdb/src/include/duckdb_extension.h | 8 + src/duckdb/src/main/capi/duckdb_value-c.cpp | 60 +- src/duckdb/src/main/capi/profiling_info-c.cpp | 24 + src/duckdb/src/main/config.cpp | 8 + src/duckdb/src/main/database.cpp | 63 +- .../src/main/extension/extension_helper.cpp | 19 + src/duckdb/src/main/settings/settings.cpp | 32 + .../optimizer/compressed_materialization.cpp | 6 + src/duckdb/src/optimizer/cse_optimizer.cpp | 3 + src/duckdb/src/optimizer/filter_pushdown.cpp | 10 + .../join_order/join_order_optimizer.cpp | 7 + .../join_order/query_graph_manager.cpp | 8 - .../optimizer/join_order/relation_manager.cpp | 16 +- .../join_order/relation_statistics_helper.cpp | 1 + .../pushdown/pushdown_cross_product.cpp | 18 +- .../pushdown/pushdown_inner_join.cpp | 6 + .../src/optimizer/remove_unused_columns.cpp | 3 + src/duckdb/src/optimizer/topn_optimizer.cpp | 5 + .../binder/query_node/plan_subquery.cpp | 49 +- .../binder/statement/bind_summarize.cpp | 13 +- src/duckdb/src/planner/logical_operator.cpp | 5 + .../storage/checkpoint/table_data_writer.cpp | 13 +- src/duckdb/src/storage/checkpoint_manager.cpp | 14 +- .../serialization/serialize_storage.cpp | 2 + .../src/storage/single_file_block_manager.cpp | 4 + src/duckdb/src/storage/storage_manager.cpp | 5 + src/duckdb/src/storage/table_index_list.cpp | 15 +- src/duckdb/src/storage/write_ahead_log.cpp | 15 +- .../src/transaction/duck_transaction.cpp | 6 + .../ub_extension_json_json_functions.cpp | 4 + src/duckdb/ub_src_execution_index_art.cpp | 8 +- 108 files changed, 4305 insertions(+), 3005 deletions(-) create mode 100644 src/duckdb/extension/json/json_functions/json_exists.cpp create mode 100644 src/duckdb/extension/json/json_functions/json_value.cpp create mode 100644 src/duckdb/src/execution/index/art/base_leaf.cpp create mode 100644 src/duckdb/src/execution/index/art/base_node.cpp delete mode 100644 src/duckdb/src/execution/index/art/node16.cpp create mode 100644 src/duckdb/src/execution/index/art/node256_leaf.cpp delete mode 100644 src/duckdb/src/execution/index/art/node4.cpp create mode 100644 src/duckdb/src/include/duckdb/execution/index/art/base_leaf.hpp create mode 100644 src/duckdb/src/include/duckdb/execution/index/art/base_node.hpp delete mode 100644 src/duckdb/src/include/duckdb/execution/index/art/node16.hpp create mode 100644 src/duckdb/src/include/duckdb/execution/index/art/node256_leaf.hpp delete mode 100644 src/duckdb/src/include/duckdb/execution/index/art/node4.hpp diff --git a/src/duckdb/extension/json/include/json_executors.hpp b/src/duckdb/extension/json/include/json_executors.hpp index 78da4526..0eeff5e4 100644 --- a/src/duckdb/extension/json/include/json_executors.hpp +++ b/src/duckdb/extension/json/include/json_executors.hpp @@ -13,26 +13,28 @@ namespace duckdb { +template +using json_function_t = std::function; + struct JSONExecutors { public: //! Single-argument JSON read function, i.e. json_type('[1, 2, 3]') template - static void UnaryExecute(DataChunk &args, ExpressionState &state, Vector &result, - std::function fun) { + static void UnaryExecute(DataChunk &args, ExpressionState &state, Vector &result, const json_function_t fun) { auto &lstate = JSONFunctionLocalState::ResetAndGet(state); auto alc = lstate.json_allocator.GetYYAlc(); auto &inputs = args.data[0]; - UnaryExecutor::Execute(inputs, result, args.size(), [&](string_t input) { - auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, alc); - return fun(doc->root, alc, result); - }); + UnaryExecutor::ExecuteWithNulls( + inputs, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) { + auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, alc); + return fun(doc->root, alc, result, mask, idx); + }); } //! Two-argument JSON read function (with path query), i.e. json_type('[1, 2, 3]', '$[0]') - template - static void BinaryExecute(DataChunk &args, ExpressionState &state, Vector &result, - std::function fun) { + template + static void BinaryExecute(DataChunk &args, ExpressionState &state, Vector &result, const json_function_t fun) { auto &func_expr = state.expr.Cast(); const auto &info = func_expr.bind_info->Cast(); auto &lstate = JSONFunctionLocalState::ResetAndGet(state); @@ -48,11 +50,11 @@ struct JSONExecutors { auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc()); auto val = JSONCommon::GetUnsafe(doc->root, ptr, len); - if (!val || (NULL_IF_NULL && unsafe_yyjson_is_null(val))) { + if (SET_NULL_IF_NOT_FOUND && !val) { mask.SetInvalid(idx); return T {}; } else { - return fun(val, alc, result); + return fun(val, alc, result, mask, idx); } }); } else { @@ -76,11 +78,7 @@ struct JSONExecutors { for (idx_t i = 0; i < vals.size(); i++) { auto &val = vals[i]; D_ASSERT(val != nullptr); // Wildcard extract shouldn't give back nullptrs - if (NULL_IF_NULL && unsafe_yyjson_is_null(val)) { - child_validity.SetInvalid(current_size + i); - } else { - child_vals[current_size + i] = fun(val, alc, result); - } + child_vals[current_size + i] = fun(val, alc, result, child_validity, current_size + i); } ListVector::SetListSize(result, new_size); @@ -95,11 +93,11 @@ struct JSONExecutors { inputs, paths, result, args.size(), [&](string_t input, string_t path, ValidityMask &mask, idx_t idx) { auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc()); auto val = JSONCommon::Get(doc->root, path); - if (!val || unsafe_yyjson_is_null(val)) { + if (SET_NULL_IF_NOT_FOUND && !val) { mask.SetInvalid(idx); return T {}; } else { - return fun(val, alc, result); + return fun(val, alc, result, mask, idx); } }); } @@ -109,9 +107,8 @@ struct JSONExecutors { } //! JSON read function with list of path queries, i.e. json_type('[1, 2, 3]', ['$[0]', '$[1]']) - template - static void ExecuteMany(DataChunk &args, ExpressionState &state, Vector &result, - std::function fun) { + template + static void ExecuteMany(DataChunk &args, ExpressionState &state, Vector &result, const json_function_t fun) { auto &func_expr = state.expr.Cast(); const auto &info = func_expr.bind_info->Cast(); auto &lstate = JSONFunctionLocalState::ResetAndGet(state); @@ -148,10 +145,10 @@ struct JSONExecutors { for (idx_t path_i = 0; path_i < num_paths; path_i++) { auto child_idx = offset + path_i; val = JSONCommon::GetUnsafe(doc->root, info.ptrs[path_i], info.lens[path_i]); - if (!val || (NULL_IF_NULL && unsafe_yyjson_is_null(val))) { + if (SET_NULL_IF_NOT_FOUND && !val) { child_validity.SetInvalid(child_idx); } else { - child_data[child_idx] = fun(val, alc, child); + child_data[child_idx] = fun(val, alc, child, child_validity, child_idx); } } diff --git a/src/duckdb/extension/json/include/json_functions.hpp b/src/duckdb/extension/json/include/json_functions.hpp index d8e2103c..cd19f373 100644 --- a/src/duckdb/extension/json/include/json_functions.hpp +++ b/src/duckdb/extension/json/include/json_functions.hpp @@ -96,9 +96,11 @@ class JSONFunctions { static ScalarFunctionSet GetArrayLengthFunction(); static ScalarFunctionSet GetContainsFunction(); + static ScalarFunctionSet GetExistsFunction(); static ScalarFunctionSet GetKeysFunction(); static ScalarFunctionSet GetTypeFunction(); static ScalarFunctionSet GetValidFunction(); + static ScalarFunctionSet GetValueFunction(); static ScalarFunctionSet GetSerializeSqlFunction(); static ScalarFunctionSet GetDeserializeSqlFunction(); static ScalarFunctionSet GetSerializePlanFunction(); diff --git a/src/duckdb/extension/json/json_functions.cpp b/src/duckdb/extension/json/json_functions.cpp index 76d63604..0ad68376 100644 --- a/src/duckdb/extension/json/json_functions.cpp +++ b/src/duckdb/extension/json/json_functions.cpp @@ -160,9 +160,11 @@ vector JSONFunctions::GetScalarFunctions() { // Other functions.push_back(GetArrayLengthFunction()); functions.push_back(GetContainsFunction()); + functions.push_back(GetExistsFunction()); functions.push_back(GetKeysFunction()); functions.push_back(GetTypeFunction()); functions.push_back(GetValidFunction()); + functions.push_back(GetValueFunction()); functions.push_back(GetSerializePlanFunction()); functions.push_back(GetSerializeSqlFunction()); functions.push_back(GetDeserializeSqlFunction()); diff --git a/src/duckdb/extension/json/json_functions/json_array_length.cpp b/src/duckdb/extension/json/json_functions/json_array_length.cpp index fc33f371..c487239b 100644 --- a/src/duckdb/extension/json/json_functions/json_array_length.cpp +++ b/src/duckdb/extension/json/json_functions/json_array_length.cpp @@ -2,7 +2,7 @@ namespace duckdb { -static inline uint64_t GetArrayLength(yyjson_val *val, yyjson_alc *alc, Vector &result) { +static inline uint64_t GetArrayLength(yyjson_val *val, yyjson_alc *, Vector &, ValidityMask &, idx_t) { return yyjson_arr_size(val); } diff --git a/src/duckdb/extension/json/json_functions/json_exists.cpp b/src/duckdb/extension/json/json_functions/json_exists.cpp new file mode 100644 index 00000000..f9d3548b --- /dev/null +++ b/src/duckdb/extension/json/json_functions/json_exists.cpp @@ -0,0 +1,32 @@ +#include "json_executors.hpp" + +namespace duckdb { + +static inline bool JSONExists(yyjson_val *val, yyjson_alc *, Vector &, ValidityMask &, idx_t) { + return val; +} + +static void BinaryExistsFunction(DataChunk &args, ExpressionState &state, Vector &result) { + JSONExecutors::BinaryExecute(args, state, result, JSONExists); +} + +static void ManyExistsFunction(DataChunk &args, ExpressionState &state, Vector &result) { + JSONExecutors::ExecuteMany(args, state, result, JSONExists); +} + +static void GetExistsFunctionsInternal(ScalarFunctionSet &set, const LogicalType &input_type) { + set.AddFunction(ScalarFunction({input_type, LogicalType::VARCHAR}, LogicalType::BOOLEAN, BinaryExistsFunction, + JSONReadFunctionData::Bind, nullptr, nullptr, JSONFunctionLocalState::Init)); + set.AddFunction(ScalarFunction({input_type, LogicalType::LIST(LogicalType::VARCHAR)}, + LogicalType::LIST(LogicalType::BOOLEAN), ManyExistsFunction, + JSONReadManyFunctionData::Bind, nullptr, nullptr, JSONFunctionLocalState::Init)); +} + +ScalarFunctionSet JSONFunctions::GetExistsFunction() { + ScalarFunctionSet set("json_exists"); + GetExistsFunctionsInternal(set, LogicalType::VARCHAR); + GetExistsFunctionsInternal(set, LogicalType::JSON()); + return set; +} + +} // namespace duckdb diff --git a/src/duckdb/extension/json/json_functions/json_extract.cpp b/src/duckdb/extension/json/json_functions/json_extract.cpp index 59daa49c..2fc32612 100644 --- a/src/duckdb/extension/json/json_functions/json_extract.cpp +++ b/src/duckdb/extension/json/json_functions/json_extract.cpp @@ -2,11 +2,11 @@ namespace duckdb { -static inline string_t ExtractFromVal(yyjson_val *val, yyjson_alc *alc, Vector &) { +static inline string_t ExtractFromVal(yyjson_val *val, yyjson_alc *alc, Vector &, ValidityMask &, idx_t) { return JSONCommon::WriteVal(val, alc); } -static inline string_t ExtractStringFromVal(yyjson_val *val, yyjson_alc *alc, Vector &) { +static inline string_t ExtractStringFromVal(yyjson_val *val, yyjson_alc *alc, Vector &, ValidityMask &, idx_t) { return yyjson_is_str(val) ? string_t(unsafe_yyjson_get_str(val), unsafe_yyjson_get_len(val)) : JSONCommon::WriteVal(val, alc); } diff --git a/src/duckdb/extension/json/json_functions/json_keys.cpp b/src/duckdb/extension/json/json_functions/json_keys.cpp index eb991883..0b672c08 100644 --- a/src/duckdb/extension/json/json_functions/json_keys.cpp +++ b/src/duckdb/extension/json/json_functions/json_keys.cpp @@ -2,7 +2,7 @@ namespace duckdb { -static inline list_entry_t GetJSONKeys(yyjson_val *val, yyjson_alc *alc, Vector &result) { +static inline list_entry_t GetJSONKeys(yyjson_val *val, yyjson_alc *, Vector &result, ValidityMask &, idx_t) { auto num_keys = yyjson_obj_size(val); auto current_size = ListVector::GetListSize(result); auto new_size = current_size + num_keys; diff --git a/src/duckdb/extension/json/json_functions/json_pretty.cpp b/src/duckdb/extension/json/json_functions/json_pretty.cpp index 16240917..1fb96081 100644 --- a/src/duckdb/extension/json/json_functions/json_pretty.cpp +++ b/src/duckdb/extension/json/json_functions/json_pretty.cpp @@ -3,7 +3,7 @@ namespace duckdb { //! Pretty Print a given JSON Document -string_t PrettyPrint(yyjson_val *val, yyjson_alc *alc, Vector &result) { +string_t PrettyPrint(yyjson_val *val, yyjson_alc *alc, Vector &, ValidityMask &, idx_t) { D_ASSERT(alc); idx_t len; auto data = diff --git a/src/duckdb/extension/json/json_functions/json_structure.cpp b/src/duckdb/extension/json/json_functions/json_structure.cpp index 9f4593e5..04800572 100644 --- a/src/duckdb/extension/json/json_functions/json_structure.cpp +++ b/src/duckdb/extension/json/json_functions/json_structure.cpp @@ -499,7 +499,7 @@ static yyjson_mut_val *ConvertStructure(const JSONStructureNode &node, yyjson_mu } } -static string_t JSONStructureFunction(yyjson_val *val, yyjson_alc *alc, Vector &) { +static string_t JSONStructureFunction(yyjson_val *val, yyjson_alc *alc, Vector &, ValidityMask &, idx_t) { return JSONCommon::WriteVal( ConvertStructure(ExtractStructureInternal(val, true), yyjson_mut_doc_new(alc)), alc); } diff --git a/src/duckdb/extension/json/json_functions/json_type.cpp b/src/duckdb/extension/json/json_functions/json_type.cpp index 8f3fb3ad..47aec2d6 100644 --- a/src/duckdb/extension/json/json_functions/json_type.cpp +++ b/src/duckdb/extension/json/json_functions/json_type.cpp @@ -2,7 +2,7 @@ namespace duckdb { -static inline string_t GetType(yyjson_val *val, yyjson_alc *alc, Vector &result) { +static inline string_t GetType(yyjson_val *val, yyjson_alc *, Vector &, ValidityMask &mask, idx_t idx) { return JSONCommon::ValTypeToStringT(val); } @@ -11,11 +11,11 @@ static void UnaryTypeFunction(DataChunk &args, ExpressionState &state, Vector &r } static void BinaryTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - JSONExecutors::BinaryExecute(args, state, result, GetType); + JSONExecutors::BinaryExecute(args, state, result, GetType); } static void ManyTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - JSONExecutors::ExecuteMany(args, state, result, GetType); + JSONExecutors::ExecuteMany(args, state, result, GetType); } static void GetTypeFunctionsInternal(ScalarFunctionSet &set, const LogicalType &input_type) { diff --git a/src/duckdb/extension/json/json_functions/json_value.cpp b/src/duckdb/extension/json/json_functions/json_value.cpp new file mode 100644 index 00000000..06afbd94 --- /dev/null +++ b/src/duckdb/extension/json/json_functions/json_value.cpp @@ -0,0 +1,42 @@ +#include "json_executors.hpp" + +namespace duckdb { + +static inline string_t ValueFromVal(yyjson_val *val, yyjson_alc *alc, Vector &, ValidityMask &mask, idx_t idx) { + switch (yyjson_get_tag(val)) { + case YYJSON_TYPE_ARR | YYJSON_SUBTYPE_NONE: + case YYJSON_TYPE_OBJ | YYJSON_SUBTYPE_NONE: + mask.SetInvalid(idx); + return string_t {}; + default: + return JSONCommon::WriteVal(val, alc); + } +} + +static void ValueFunction(DataChunk &args, ExpressionState &state, Vector &result) { + JSONExecutors::BinaryExecute(args, state, result, ValueFromVal); +} + +static void ValueManyFunction(DataChunk &args, ExpressionState &state, Vector &result) { + JSONExecutors::ExecuteMany(args, state, result, ValueFromVal); +} + +static void GetValueFunctionsInternal(ScalarFunctionSet &set, const LogicalType &input_type) { + set.AddFunction(ScalarFunction({input_type, LogicalType::BIGINT}, LogicalType::JSON(), ValueFunction, + JSONReadFunctionData::Bind, nullptr, nullptr, JSONFunctionLocalState::Init)); + set.AddFunction(ScalarFunction({input_type, LogicalType::VARCHAR}, LogicalType::JSON(), ValueFunction, + JSONReadFunctionData::Bind, nullptr, nullptr, JSONFunctionLocalState::Init)); + set.AddFunction(ScalarFunction({input_type, LogicalType::LIST(LogicalType::VARCHAR)}, + LogicalType::LIST(LogicalType::JSON()), ValueManyFunction, + JSONReadManyFunctionData::Bind, nullptr, nullptr, JSONFunctionLocalState::Init)); +} + +ScalarFunctionSet JSONFunctions::GetValueFunction() { + // The value function is just like the extract function but returns NULL if the JSON is not a scalar value + ScalarFunctionSet set("json_value"); + GetValueFunctionsInternal(set, LogicalType::VARCHAR); + GetValueFunctionsInternal(set, LogicalType::JSON()); + return set; +} + +} // namespace duckdb diff --git a/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp b/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp index 92e64a7c..42dea06f 100644 --- a/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp @@ -1,42 +1,44 @@ #include "duckdb/catalog/catalog_entry/duck_schema_entry.hpp" -#include "duckdb/catalog/default/default_functions.hpp" -#include "duckdb/catalog/default/default_table_functions.hpp" -#include "duckdb/catalog/default/default_types.hpp" -#include "duckdb/catalog/default/default_views.hpp" + +#include "duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp" #include "duckdb/catalog/catalog_entry/collate_catalog_entry.hpp" #include "duckdb/catalog/catalog_entry/copy_function_catalog_entry.hpp" #include "duckdb/catalog/catalog_entry/duck_index_entry.hpp" +#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" #include "duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp" +#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp" +#include "duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp" #include "duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp" +#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp" +#include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp" #include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp" #include "duckdb/catalog/catalog_entry/view_catalog_entry.hpp" -#include "duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp" -#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp" -#include "duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp" -#include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp" -#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" -#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" +#include "duckdb/catalog/default/default_functions.hpp" +#include "duckdb/catalog/default/default_table_functions.hpp" +#include "duckdb/catalog/default/default_types.hpp" +#include "duckdb/catalog/default/default_views.hpp" #include "duckdb/catalog/dependency_list.hpp" -#include "duckdb/planner/constraints/bound_foreign_key_constraint.hpp" +#include "duckdb/main/attached_database.hpp" +#include "duckdb/main/database.hpp" #include "duckdb/parser/constraints/foreign_key_constraint.hpp" #include "duckdb/parser/parsed_data/alter_table_info.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/storage/data_table.hpp" -#include "duckdb/planner/parsed_data/bound_create_table_info.hpp" #include "duckdb/parser/parsed_data/create_collation_info.hpp" #include "duckdb/parser/parsed_data/create_copy_function_info.hpp" #include "duckdb/parser/parsed_data/create_index_info.hpp" #include "duckdb/parser/parsed_data/create_pragma_function_info.hpp" +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" #include "duckdb/parser/parsed_data/create_schema_info.hpp" #include "duckdb/parser/parsed_data/create_sequence_info.hpp" #include "duckdb/parser/parsed_data/create_table_function_info.hpp" #include "duckdb/parser/parsed_data/create_type_info.hpp" #include "duckdb/parser/parsed_data/create_view_info.hpp" #include "duckdb/parser/parsed_data/drop_info.hpp" -#include "duckdb/transaction/meta_transaction.hpp" -#include "duckdb/main/attached_database.hpp" +#include "duckdb/planner/constraints/bound_foreign_key_constraint.hpp" +#include "duckdb/planner/parsed_data/bound_create_table_info.hpp" +#include "duckdb/storage/data_table.hpp" #include "duckdb/transaction/duck_transaction.hpp" +#include "duckdb/transaction/meta_transaction.hpp" namespace duckdb { diff --git a/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp b/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp index 144fe314..be1041f9 100644 --- a/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp @@ -1,10 +1,13 @@ #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" #include "duckdb/common/enum_util.hpp" +#include "duckdb/common/exception/transaction_exception.hpp" #include "duckdb/common/index_map.hpp" #include "duckdb/execution/index/art/art.hpp" #include "duckdb/function/table/table_scan.hpp" +#include "duckdb/main/database.hpp" #include "duckdb/parser/constraints/list.hpp" +#include "duckdb/parser/parsed_data/comment_on_column_info.hpp" #include "duckdb/parser/parsed_expression_iterator.hpp" #include "duckdb/planner/binder.hpp" #include "duckdb/planner/constraints/bound_check_constraint.hpp" @@ -13,7 +16,6 @@ #include "duckdb/planner/constraints/bound_unique_constraint.hpp" #include "duckdb/planner/expression/bound_reference_expression.hpp" #include "duckdb/planner/expression_binder/alter_binder.hpp" -#include "duckdb/planner/filter/null_filter.hpp" #include "duckdb/planner/operator/logical_get.hpp" #include "duckdb/planner/operator/logical_projection.hpp" #include "duckdb/planner/operator/logical_update.hpp" @@ -21,13 +23,11 @@ #include "duckdb/planner/table_filter.hpp" #include "duckdb/storage/storage_manager.hpp" #include "duckdb/storage/table_storage_info.hpp" -#include "duckdb/common/exception/transaction_exception.hpp" -#include "duckdb/parser/parsed_data/comment_on_column_info.hpp" namespace duckdb { void AddDataTableIndex(DataTable &storage, const ColumnList &columns, const vector &keys, - IndexConstraintType constraint_type, const IndexStorageInfo &info = IndexStorageInfo()) { + IndexConstraintType constraint_type, const IndexStorageInfo &info) { // fetch types and create expressions for the index from the columns vector column_ids; @@ -54,7 +54,7 @@ void AddDataTableIndex(DataTable &storage, const ColumnList &columns, const vect } void AddDataTableIndex(DataTable &storage, const ColumnList &columns, vector &keys, - IndexConstraintType constraint_type, const IndexStorageInfo &info = IndexStorageInfo()) { + IndexConstraintType constraint_type, const IndexStorageInfo &info) { vector new_keys; new_keys.reserve(keys.size()); for (auto &logical_key : keys) { @@ -63,12 +63,17 @@ void AddDataTableIndex(DataTable &storage, const ColumnList &columns, vector &create_info, - idx_t idx) { +IndexStorageInfo GetIndexInfo(const IndexConstraintType &constraint_type, const bool v1_0_0_storage, + unique_ptr &create_info, const idx_t identifier) { auto &create_table_info = create_info->Cast(); auto constraint_name = EnumUtil::ToString(constraint_type) + "_"; - return IndexStorageInfo(constraint_name + create_table_info.table + "_" + to_string(idx)); + auto name = constraint_name + create_table_info.table + "_" + to_string(identifier); + IndexStorageInfo info(name); + if (!v1_0_0_storage) { + info.options.emplace("v1_0_0_storage", v1_0_0_storage); + } + return info; } vector GetUniqueConstraintKeys(const ColumnList &columns, const UniqueConstraint &constraint) { @@ -88,66 +93,72 @@ DuckTableEntry::DuckTableEntry(Catalog &catalog, SchemaCatalogEntry &schema, Bou : TableCatalogEntry(catalog, schema, info.Base()), storage(std::move(inherited_storage)), column_dependency_manager(std::move(info.column_dependency_manager)) { - if (!storage) { - // create the physical storage - vector storage_columns; - for (auto &col_def : columns.Physical()) { - storage_columns.push_back(col_def.Copy()); + if (storage) { + if (!info.indexes.empty()) { + storage->SetIndexStorageInfo(std::move(info.indexes)); } - storage = - make_shared_ptr(catalog.GetAttached(), StorageManager::Get(catalog).GetTableIOManager(&info), - schema.name, name, std::move(storage_columns), std::move(info.data)); - - // create the unique indexes for the UNIQUE and PRIMARY KEY and FOREIGN KEY constraints - idx_t indexes_idx = 0; - for (idx_t i = 0; i < constraints.size(); i++) { - auto &constraint = constraints[i]; - if (constraint->type == ConstraintType::UNIQUE) { - - // unique constraint: create a unique index - auto &unique = constraint->Cast(); - IndexConstraintType constraint_type = IndexConstraintType::UNIQUE; - if (unique.is_primary_key) { - constraint_type = IndexConstraintType::PRIMARY; - } - auto unique_keys = GetUniqueConstraintKeys(columns, unique); + return; + } + + // create the physical storage + vector storage_columns; + for (auto &col_def : columns.Physical()) { + storage_columns.push_back(col_def.Copy()); + } + storage = make_shared_ptr(catalog.GetAttached(), StorageManager::Get(catalog).GetTableIOManager(&info), + schema.name, name, std::move(storage_columns), std::move(info.data)); + + // create the unique indexes for the UNIQUE and PRIMARY KEY and FOREIGN KEY constraints + idx_t indexes_idx = 0; + for (idx_t i = 0; i < constraints.size(); i++) { + auto &constraint = constraints[i]; + if (constraint->type == ConstraintType::UNIQUE) { + // unique constraint: create a unique index + auto &unique = constraint->Cast(); + IndexConstraintType constraint_type = IndexConstraintType::UNIQUE; + if (unique.is_primary_key) { + constraint_type = IndexConstraintType::PRIMARY; + } + auto unique_keys = GetUniqueConstraintKeys(columns, unique); + if (info.indexes.empty()) { + auto index_storage_info = GetIndexInfo(constraint_type, false, info.base, i); + AddDataTableIndex(*storage, columns, unique_keys, constraint_type, index_storage_info); + continue; + } + + // We read the index from an old storage version applying a dummy name. + if (info.indexes[indexes_idx].name.empty()) { + auto name_info = GetIndexInfo(constraint_type, true, info.base, i); + info.indexes[indexes_idx].name = name_info.name; + } + + // now add the index + AddDataTableIndex(*storage, columns, unique_keys, constraint_type, info.indexes[indexes_idx++]); + continue; + } + + if (constraint->type == ConstraintType::FOREIGN_KEY) { + // foreign key constraint: create a foreign key index + auto &bfk = constraint->Cast(); + if (bfk.info.type == ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE || + bfk.info.type == ForeignKeyType::FK_TYPE_SELF_REFERENCE_TABLE) { + if (info.indexes.empty()) { - AddDataTableIndex(*storage, columns, unique_keys, constraint_type, - GetIndexInfo(constraint_type, info.base, i)); - } else { - // we read the index from an old storage version, so we have to apply a dummy name - if (info.indexes[indexes_idx].name.empty()) { - auto name_info = GetIndexInfo(constraint_type, info.base, i); - info.indexes[indexes_idx].name = name_info.name; - } - - // now add the index - AddDataTableIndex(*storage, columns, unique_keys, constraint_type, info.indexes[indexes_idx++]); + auto constraint_type = IndexConstraintType::FOREIGN; + auto index_storage_info = GetIndexInfo(constraint_type, false, info.base, i); + AddDataTableIndex(*storage, columns, bfk.info.fk_keys, constraint_type, index_storage_info); + continue; } - } else if (constraint->type == ConstraintType::FOREIGN_KEY) { - // foreign key constraint: create a foreign key index - auto &bfk = constraint->Cast(); - if (bfk.info.type == ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE || - bfk.info.type == ForeignKeyType::FK_TYPE_SELF_REFERENCE_TABLE) { - - if (info.indexes.empty()) { - auto constraint_type = IndexConstraintType::FOREIGN; - AddDataTableIndex(*storage, columns, bfk.info.fk_keys, constraint_type, - GetIndexInfo(constraint_type, info.base, i)); - - } else { - // we read the index from an old storage version, so we have to apply a dummy name - if (info.indexes[indexes_idx].name.empty()) { - auto name_info = GetIndexInfo(IndexConstraintType::FOREIGN, info.base, i); - info.indexes[indexes_idx].name = name_info.name; - } - - // now add the index - AddDataTableIndex(*storage, columns, bfk.info.fk_keys, IndexConstraintType::FOREIGN, - info.indexes[indexes_idx++]); - } + // We read the index from an old storage version applying a dummy name. + if (info.indexes[indexes_idx].name.empty()) { + auto name_info = GetIndexInfo(IndexConstraintType::FOREIGN, true, info.base, i); + info.indexes[indexes_idx].name = name_info.name; } + + // now add the index + AddDataTableIndex(*storage, columns, bfk.info.fk_keys, IndexConstraintType::FOREIGN, + info.indexes[indexes_idx++]); } } } @@ -169,20 +180,25 @@ unique_ptr DuckTableEntry::GetStatistics(ClientContext &context, } unique_ptr DuckTableEntry::AlterEntry(CatalogTransaction transaction, AlterInfo &info) { - if (transaction.context) { - return AlterEntry(*transaction.context, info); - } - if (info.type == AlterType::ALTER_TABLE) { - auto &table_info = info.Cast(); - if (table_info.alter_table_type == AlterTableType::FOREIGN_KEY_CONSTRAINT) { - auto &foreign_key_constraint_info = table_info.Cast(); - if (foreign_key_constraint_info.type == AlterForeignKeyType::AFT_ADD) { - // for checkpoint loading we support adding foreign key constraints without a client context - return AddForeignKeyConstraint(nullptr, foreign_key_constraint_info); - } - } + if (transaction.HasContext()) { + return AlterEntry(transaction.GetContext(), info); + } + if (info.type != AlterType::ALTER_TABLE) { + return CatalogEntry::AlterEntry(transaction, info); } - return CatalogEntry::AlterEntry(transaction, info); + + auto &table_info = info.Cast(); + if (table_info.alter_table_type != AlterTableType::FOREIGN_KEY_CONSTRAINT) { + return CatalogEntry::AlterEntry(transaction, info); + } + + auto &foreign_key_constraint_info = table_info.Cast(); + if (foreign_key_constraint_info.type != AlterForeignKeyType::AFT_ADD) { + return CatalogEntry::AlterEntry(transaction, info); + } + + // We add foreign key constraints without a client context during checkpoint loading. + return AddForeignKeyConstraint(nullptr, foreign_key_constraint_info); } unique_ptr DuckTableEntry::AlterEntry(ClientContext &context, AlterInfo &info) { @@ -758,7 +774,6 @@ unique_ptr DuckTableEntry::AddForeignKeyConstraint(optional_ptr(catalog, schema, *bound_create_info, storage); } @@ -783,7 +798,6 @@ unique_ptr DuckTableEntry::DropForeignKeyConstraint(ClientContext auto binder = Binder::CreateBinder(context); auto bound_create_info = binder->BindCreateTableInfo(std::move(create_info), schema); - return make_uniq(catalog, schema, *bound_create_info, storage); } diff --git a/src/duckdb/src/common/compressed_file_system.cpp b/src/duckdb/src/common/compressed_file_system.cpp index 9bc4a591..b34c6c21 100644 --- a/src/duckdb/src/common/compressed_file_system.cpp +++ b/src/duckdb/src/common/compressed_file_system.cpp @@ -8,7 +8,6 @@ StreamWrapper::~StreamWrapper() { CompressedFile::CompressedFile(CompressedFileSystem &fs, unique_ptr child_handle_p, const string &path) : FileHandle(fs, path), compressed_fs(fs), child_handle(std::move(child_handle_p)) { - D_ASSERT(child_handle->SeekPosition() == 0); } CompressedFile::~CompressedFile() { diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp index 23f40e58..7fbece35 100644 --- a/src/duckdb/src/common/enum_util.cpp +++ b/src/duckdb/src/common/enum_util.cpp @@ -3241,6 +3241,29 @@ FunctionStability EnumUtil::FromString(const char *value) { throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value)); } +template<> +const char* EnumUtil::ToChars(GateStatus value) { + switch(value) { + case GateStatus::GATE_NOT_SET: + return "GATE_NOT_SET"; + case GateStatus::GATE_SET: + return "GATE_SET"; + default: + throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value)); + } +} + +template<> +GateStatus EnumUtil::FromString(const char *value) { + if (StringUtil::Equals(value, "GATE_NOT_SET")) { + return GateStatus::GATE_NOT_SET; + } + if (StringUtil::Equals(value, "GATE_SET")) { + return GateStatus::GATE_SET; + } + throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value)); +} + template<> const char* EnumUtil::ToChars(HLLStorageType value) { switch(value) { @@ -4544,6 +4567,12 @@ const char* EnumUtil::ToChars(NType value) { return "NODE_256"; case NType::LEAF_INLINED: return "LEAF_INLINED"; + case NType::NODE_7_LEAF: + return "NODE_7_LEAF"; + case NType::NODE_15_LEAF: + return "NODE_15_LEAF"; + case NType::NODE_256_LEAF: + return "NODE_256_LEAF"; default: throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value)); } @@ -4572,6 +4601,15 @@ NType EnumUtil::FromString(const char *value) { if (StringUtil::Equals(value, "LEAF_INLINED")) { return NType::LEAF_INLINED; } + if (StringUtil::Equals(value, "NODE_7_LEAF")) { + return NType::NODE_7_LEAF; + } + if (StringUtil::Equals(value, "NODE_15_LEAF")) { + return NType::NODE_15_LEAF; + } + if (StringUtil::Equals(value, "NODE_256_LEAF")) { + return NType::NODE_256_LEAF; + } throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value)); } diff --git a/src/duckdb/src/core_functions/scalar/generic/error.cpp b/src/duckdb/src/core_functions/scalar/generic/error.cpp index 9c172e87..e9047378 100644 --- a/src/duckdb/src/core_functions/scalar/generic/error.cpp +++ b/src/duckdb/src/core_functions/scalar/generic/error.cpp @@ -11,7 +11,7 @@ struct ErrorOperator { }; ScalarFunction ErrorFun::GetFunction() { - auto fun = ScalarFunction({LogicalType::VARCHAR}, LogicalType::SQLNULL, + auto fun = ScalarFunction("error", {LogicalType::VARCHAR}, LogicalType::SQLNULL, ScalarFunction::UnaryFunction); // Set the function with side effects to avoid the optimization. fun.stability = FunctionStability::VOLATILE; diff --git a/src/duckdb/src/execution/index/art/art.cpp b/src/duckdb/src/execution/index/art/art.cpp index dba3e58f..e3e08243 100644 --- a/src/duckdb/src/execution/index/art/art.cpp +++ b/src/duckdb/src/execution/index/art/art.cpp @@ -1,37 +1,38 @@ #include "duckdb/execution/index/art/art.hpp" #include "duckdb/common/types/conflict_manager.hpp" +#include "duckdb/common/unordered_map.hpp" #include "duckdb/common/vector_operations/vector_operations.hpp" #include "duckdb/execution/expression_executor.hpp" #include "duckdb/execution/index/art/art_key.hpp" +#include "duckdb/execution/index/art/base_leaf.hpp" +#include "duckdb/execution/index/art/base_node.hpp" #include "duckdb/execution/index/art/iterator.hpp" #include "duckdb/execution/index/art/leaf.hpp" -#include "duckdb/execution/index/art/node16.hpp" #include "duckdb/execution/index/art/node256.hpp" -#include "duckdb/execution/index/art/node4.hpp" +#include "duckdb/execution/index/art/node256_leaf.hpp" #include "duckdb/execution/index/art/node48.hpp" #include "duckdb/execution/index/art/prefix.hpp" -#include "duckdb/storage/arena_allocator.hpp" -#include "duckdb/storage/metadata/metadata_reader.hpp" -#include "duckdb/storage/table/scan_state.hpp" -#include "duckdb/storage/table_io_manager.hpp" #include "duckdb/optimizer/matcher/expression_matcher.hpp" #include "duckdb/planner/expression/bound_between_expression.hpp" #include "duckdb/planner/expression/bound_comparison_expression.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/storage/arena_allocator.hpp" +#include "duckdb/storage/metadata/metadata_reader.hpp" +#include "duckdb/storage/table/scan_state.hpp" +#include "duckdb/storage/table_io_manager.hpp" namespace duckdb { struct ARTIndexScanState : public IndexScanState { - - //! Scan predicates (single predicate scan or range scan) + //! The predicates to scan. + //! A single predicate for point lookups, and two predicates for range scans. Value values[2]; - //! Expressions of the scan predicates + //! The expressions over the scan predicates. ExpressionType expressions[2]; bool checked = false; - //! All scanned row IDs - vector result_ids; - Iterator iterator; + //! All scanned row IDs. + unsafe_vector row_ids; }; //===--------------------------------------------------------------------===// @@ -40,40 +41,13 @@ struct ARTIndexScanState : public IndexScanState { ART::ART(const string &name, const IndexConstraintType index_constraint_type, const vector &column_ids, TableIOManager &table_io_manager, const vector> &unbound_expressions, - AttachedDatabase &db, const shared_ptr, ALLOCATOR_COUNT>> &allocators_ptr, + AttachedDatabase &db, + const shared_ptr, ALLOCATOR_COUNT>> &allocators_ptr, const IndexStorageInfo &info) : BoundIndex(name, ART::TYPE_NAME, index_constraint_type, column_ids, table_io_manager, unbound_expressions, db), allocators(allocators_ptr), owns_data(false) { - // initialize all allocators - if (!allocators) { - owns_data = true; - auto &block_manager = table_io_manager.GetIndexBlockManager(); - - array, ALLOCATOR_COUNT> allocator_array = { - make_uniq(sizeof(Prefix), block_manager), - make_uniq(sizeof(Leaf), block_manager), - make_uniq(sizeof(Node4), block_manager), - make_uniq(sizeof(Node16), block_manager), - make_uniq(sizeof(Node48), block_manager), - make_uniq(sizeof(Node256), block_manager)}; - allocators = - make_shared_ptr, ALLOCATOR_COUNT>>(std::move(allocator_array)); - } - - // deserialize lazily - if (info.IsValid()) { - - if (!info.root_block_ptr.IsValid()) { - InitAllocators(info); - - } else { - // old storage file - Deserialize(info.root_block_ptr); - } - } - - // validate the types of the key columns + // FIXME: Use the new byte representation function to support nested types. for (idx_t i = 0; i < types.size(); i++) { switch (types[i]) { case PhysicalType::BOOL: @@ -95,28 +69,61 @@ ART::ART(const string &name, const IndexConstraintType index_constraint_type, co throw InvalidTypeException(logical_types[i], "Invalid type for index key."); } } + + // Initialize the allocators. + SetPrefixCount(info); + if (!allocators) { + owns_data = true; + auto prefix_size = NumericCast(prefix_count) + NumericCast(Prefix::METADATA_SIZE); + auto &block_manager = table_io_manager.GetIndexBlockManager(); + + array, ALLOCATOR_COUNT> allocator_array = { + make_unsafe_uniq(prefix_size, block_manager), + make_unsafe_uniq(sizeof(Leaf), block_manager), + make_unsafe_uniq(sizeof(Node4), block_manager), + make_unsafe_uniq(sizeof(Node16), block_manager), + make_unsafe_uniq(sizeof(Node48), block_manager), + make_unsafe_uniq(sizeof(Node256), block_manager), + make_unsafe_uniq(sizeof(Node7Leaf), block_manager), + make_unsafe_uniq(sizeof(Node15Leaf), block_manager), + make_unsafe_uniq(sizeof(Node256Leaf), block_manager), + }; + allocators = + make_shared_ptr, ALLOCATOR_COUNT>>(std::move(allocator_array)); + } + + if (!info.IsValid()) { + // We create a new ART. + return; + } + + if (info.root_block_ptr.IsValid()) { + // Backwards compatibility. + Deserialize(info.root_block_ptr); + return; + } + + // Set the root node and initialize the allocators. + tree.Set(info.root); + InitAllocators(info); } //===--------------------------------------------------------------------===// -// Initialize Predicate Scans +// Initialize Scans //===--------------------------------------------------------------------===// -//! Initialize a single predicate scan on the index with the given expression and column IDs -static unique_ptr InitializeScanSinglePredicate(const Transaction &transaction, const Value &value, +static unique_ptr InitializeScanSinglePredicate(const Value &value, const ExpressionType expression_type) { - // initialize point lookup auto result = make_uniq(); result->values[0] = value; result->expressions[0] = expression_type; return std::move(result); } -//! Initialize a two predicate scan on the index with the given expression and column IDs -static unique_ptr InitializeScanTwoPredicates(const Transaction &transaction, const Value &low_value, +static unique_ptr InitializeScanTwoPredicates(const Value &low_value, const ExpressionType low_expression_type, const Value &high_value, const ExpressionType high_expression_type) { - // initialize range lookup auto result = make_uniq(); result->values[0] = low_value; result->expressions[0] = low_expression_type; @@ -125,64 +132,64 @@ static unique_ptr InitializeScanTwoPredicates(const Transaction return std::move(result); } -unique_ptr ART::TryInitializeScan(const Transaction &transaction, const Expression &index_expr, - const Expression &filter_expr) { - +unique_ptr ART::TryInitializeScan(const Expression &expr, const Expression &filter_expr) { Value low_value, high_value, equal_value; ExpressionType low_comparison_type = ExpressionType::INVALID, high_comparison_type = ExpressionType::INVALID; - // try to find a matching index for any of the filter expressions - // create a matcher for a comparison with a constant + // Try to find a matching index for any of the filter expressions. ComparisonExpressionMatcher matcher; - // match on a comparison type + // Match on a comparison type. matcher.expr_type = make_uniq(); - // match on a constant comparison with the indexed expression - matcher.matchers.push_back(make_uniq(index_expr)); + // Match on a constant comparison with the indexed expression. + matcher.matchers.push_back(make_uniq(expr)); matcher.matchers.push_back(make_uniq()); - matcher.policy = SetMatcher::Policy::UNORDERED; vector> bindings; - if (matcher.Match(const_cast(filter_expr), bindings)) { // NOLINT: Match does not alter the expr - // range or equality comparison with constant value - // we can use our index here - // bindings[0] = the expression - // bindings[1] = the index expression - // bindings[2] = the constant + auto filter_match = + matcher.Match(const_cast(filter_expr), bindings); // NOLINT: Match does not alter the expr. + if (filter_match) { + // This is a range or equality comparison with a constant value, so we can use the index. + // bindings[0] = the expression + // bindings[1] = the index expression + // bindings[2] = the constant auto &comparison = bindings[0].get().Cast(); auto constant_value = bindings[2].get().Cast().value; auto comparison_type = comparison.type; + if (comparison.left->type == ExpressionType::VALUE_CONSTANT) { - // the expression is on the right side, we flip them around + // The expression is on the right side, we flip the comparison expression. comparison_type = FlipComparisonExpression(comparison_type); } + if (comparison_type == ExpressionType::COMPARE_EQUAL) { - // equality value - // equality overrides any other bounds so we just break here + // An equality value overrides any other bounds. equal_value = constant_value; } else if (comparison_type == ExpressionType::COMPARE_GREATERTHANOREQUALTO || comparison_type == ExpressionType::COMPARE_GREATERTHAN) { - // greater than means this is a lower bound + // This is a lower bound. low_value = constant_value; low_comparison_type = comparison_type; } else { - // smaller than means this is an upper bound + // This is an upper bound. high_value = constant_value; high_comparison_type = comparison_type; } + } else if (filter_expr.type == ExpressionType::COMPARE_BETWEEN) { - // BETWEEN expression auto &between = filter_expr.Cast(); - if (!between.input->Equals(index_expr)) { - // expression doesn't match the index expression + if (!between.input->Equals(expr)) { + // The expression does not match the index expression. return nullptr; } + if (between.lower->type != ExpressionType::VALUE_CONSTANT || between.upper->type != ExpressionType::VALUE_CONSTANT) { - // not a constant comparison + // Not a constant expression. return nullptr; } - low_value = (between.lower->Cast()).value; + + low_value = between.lower->Cast().value; low_comparison_type = between.lower_inclusive ? ExpressionType::COMPARE_GREATERTHANOREQUALTO : ExpressionType::COMPARE_GREATERTHAN; high_value = (between.upper->Cast()).value; @@ -190,45 +197,44 @@ unique_ptr ART::TryInitializeScan(const Transaction &transaction between.upper_inclusive ? ExpressionType::COMPARE_LESSTHANOREQUALTO : ExpressionType::COMPARE_LESSTHAN; } - if (!equal_value.IsNull() || !low_value.IsNull() || !high_value.IsNull()) { - // we can scan this index using this predicate: try a scan - unique_ptr index_state; - if (!equal_value.IsNull()) { - // equality predicate - index_state = InitializeScanSinglePredicate(transaction, equal_value, ExpressionType::COMPARE_EQUAL); - } else if (!low_value.IsNull() && !high_value.IsNull()) { - // two-sided predicate - index_state = InitializeScanTwoPredicates(transaction, low_value, low_comparison_type, high_value, - high_comparison_type); - } else if (!low_value.IsNull()) { - // less than predicate - index_state = InitializeScanSinglePredicate(transaction, low_value, low_comparison_type); - } else { - D_ASSERT(!high_value.IsNull()); - index_state = InitializeScanSinglePredicate(transaction, high_value, high_comparison_type); - } - return index_state; + // We cannot use an index scan. + if (equal_value.IsNull() && low_value.IsNull() && high_value.IsNull()) { + return nullptr; } - return nullptr; + + // Initialize the index scan state and return it. + if (!equal_value.IsNull()) { + // Equality predicate. + return InitializeScanSinglePredicate(equal_value, ExpressionType::COMPARE_EQUAL); + } + if (!low_value.IsNull() && !high_value.IsNull()) { + // Two-sided predicate. + return InitializeScanTwoPredicates(low_value, low_comparison_type, high_value, high_comparison_type); + } + if (!low_value.IsNull()) { + // Less-than predicate. + return InitializeScanSinglePredicate(low_value, low_comparison_type); + } + // Greater-than predicate. + return InitializeScanSinglePredicate(high_value, high_comparison_type); } //===--------------------------------------------------------------------===// -// Keys +// ART Keys //===--------------------------------------------------------------------===// template -static void TemplatedGenerateKeys(ArenaAllocator &allocator, Vector &input, idx_t count, vector &keys) { - +static void TemplatedGenerateKeys(ArenaAllocator &allocator, Vector &input, idx_t count, unsafe_vector &keys) { D_ASSERT(keys.size() >= count); - UnifiedVectorFormat idata; - input.ToUnifiedFormat(count, idata); - auto input_data = UnifiedVectorFormat::GetData(idata); - for (idx_t i = 0; i < count; i++) { - auto idx = idata.sel->get_index(i); + UnifiedVectorFormat data; + input.ToUnifiedFormat(count, data); + auto input_data = UnifiedVectorFormat::GetData(data); - if (IS_NOT_NULL || idata.validity.RowIsValid(idx)) { - ARTKey::CreateARTKey(allocator, input.GetType(), keys[i], input_data[idx]); + for (idx_t i = 0; i < count; i++) { + auto idx = data.sel->get_index(i); + if (IS_NOT_NULL || data.validity.RowIsValid(idx)) { + ARTKey::CreateARTKey(allocator, keys[i], input_data[idx]); continue; } @@ -238,18 +244,17 @@ static void TemplatedGenerateKeys(ArenaAllocator &allocator, Vector &input, idx_ } template -static void ConcatenateKeys(ArenaAllocator &allocator, Vector &input, idx_t count, vector &keys) { - - UnifiedVectorFormat idata; - input.ToUnifiedFormat(count, idata); - auto input_data = UnifiedVectorFormat::GetData(idata); +static void ConcatenateKeys(ArenaAllocator &allocator, Vector &input, idx_t count, unsafe_vector &keys) { + UnifiedVectorFormat data; + input.ToUnifiedFormat(count, data); + auto input_data = UnifiedVectorFormat::GetData(data); for (idx_t i = 0; i < count; i++) { - auto idx = idata.sel->get_index(i); + auto idx = data.sel->get_index(i); if (IS_NOT_NULL) { - auto other_key = ARTKey::CreateARTKey(allocator, input.GetType(), input_data[idx]); - keys[i].ConcatenateARTKey(allocator, other_key); + auto other_key = ARTKey::CreateARTKey(allocator, input_data[idx]); + keys[i].Concat(allocator, other_key); continue; } @@ -259,19 +264,19 @@ static void ConcatenateKeys(ArenaAllocator &allocator, Vector &input, idx_t coun } // This column entry is NULL, so we set the whole key to NULL. - if (!idata.validity.RowIsValid(idx)) { + if (!data.validity.RowIsValid(idx)) { keys[i] = ARTKey(); continue; } // Concatenate the keys. - auto other_key = ARTKey::CreateARTKey(allocator, input.GetType(), input_data[idx]); - keys[i].ConcatenateARTKey(allocator, other_key); + auto other_key = ARTKey::CreateARTKey(allocator, input_data[idx]); + keys[i].Concat(allocator, other_key); } } template -void GenerateKeysInternal(ArenaAllocator &allocator, DataChunk &input, vector &keys) { +void GenerateKeysInternal(ArenaAllocator &allocator, DataChunk &input, unsafe_vector &keys) { switch (input.data[0].GetType().InternalType()) { case PhysicalType::BOOL: TemplatedGenerateKeys(allocator, input.data[0], input.size(), keys); @@ -371,203 +376,167 @@ void GenerateKeysInternal(ArenaAllocator &allocator, DataChunk &input, vector -void ART::GenerateKeys<>(ArenaAllocator &allocator, DataChunk &input, vector &keys) { +void ART::GenerateKeys<>(ArenaAllocator &allocator, DataChunk &input, unsafe_vector &keys) { GenerateKeysInternal(allocator, input, keys); } template <> -void ART::GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector &keys) { +void ART::GenerateKeys(ArenaAllocator &allocator, DataChunk &input, unsafe_vector &keys) { GenerateKeysInternal(allocator, input, keys); } -//===--------------------------------------------------------------------===// -// Construct from sorted data (only during CREATE (UNIQUE) INDEX statements) -//===--------------------------------------------------------------------===// - -struct KeySection { - KeySection(idx_t start_p, idx_t end_p, idx_t depth_p, data_t key_byte_p) - : start(start_p), end(end_p), depth(depth_p), key_byte(key_byte_p) {}; - KeySection(idx_t start_p, idx_t end_p, vector &keys, KeySection &key_section) - : start(start_p), end(end_p), depth(key_section.depth + 1), key_byte(keys[end_p].data[key_section.depth]) {}; - idx_t start; - idx_t end; - idx_t depth; - data_t key_byte; -}; - -void GetChildSections(vector &child_sections, vector &keys, KeySection &key_section) { +void ART::GenerateKeyVectors(ArenaAllocator &allocator, DataChunk &input, Vector &row_ids, unsafe_vector &keys, + unsafe_vector &row_id_keys) { + GenerateKeys<>(allocator, input, keys); - idx_t child_start_idx = key_section.start; - for (idx_t i = key_section.start + 1; i <= key_section.end; i++) { - if (keys[i - 1].data[key_section.depth] != keys[i].data[key_section.depth]) { - child_sections.emplace_back(child_start_idx, i - 1, keys, key_section); - child_start_idx = i; - } - } - child_sections.emplace_back(child_start_idx, key_section.end, keys, key_section); + DataChunk row_id_chunk; + row_id_chunk.Initialize(Allocator::DefaultAllocator(), vector {LogicalType::ROW_TYPE}, keys.size()); + row_id_chunk.data[0].Reference(row_ids); + row_id_chunk.SetCardinality(keys.size()); + GenerateKeys<>(allocator, row_id_chunk, row_id_keys); } -bool ConstructInternal(ART &art, vector &keys, const row_t *row_ids, Node &node, KeySection &key_section, - bool &has_constraint) { +//===--------------------------------------------------------------------===// +// Construct from sorted data. +//===--------------------------------------------------------------------===// - D_ASSERT(key_section.start < keys.size()); - D_ASSERT(key_section.end < keys.size()); - D_ASSERT(key_section.start <= key_section.end); +bool ART::ConstructInternal(const unsafe_vector &keys, const unsafe_vector &row_ids, Node &node, + ARTKeySection §ion) { + D_ASSERT(section.start < keys.size()); + D_ASSERT(section.end < keys.size()); + D_ASSERT(section.start <= section.end); - auto &start_key = keys[key_section.start]; - auto &end_key = keys[key_section.end]; + auto &start = keys[section.start]; + auto &end = keys[section.end]; + D_ASSERT(start.len != 0); - // increment the depth until we reach a leaf or find a mismatching byte - auto prefix_start = key_section.depth; - while (start_key.len != key_section.depth && - start_key.ByteMatches(end_key, UnsafeNumericCast(key_section.depth))) { - key_section.depth++; + // Increment the depth until we reach a leaf or find a mismatching byte. + auto prefix_depth = section.depth; + while (start.len != section.depth && start.ByteMatches(end, section.depth)) { + section.depth++; } - // we reached a leaf, i.e. all the bytes of start_key and end_key match - if (start_key.len == key_section.depth) { - // end_idx is inclusive - auto num_row_ids = key_section.end - key_section.start + 1; - - // check for possible constraint violation - auto single_row_id = num_row_ids == 1; - if (has_constraint && !single_row_id) { + if (start.len == section.depth) { + // We reached a leaf. All the bytes of start_key and end_key match. + auto row_id_count = section.end - section.start + 1; + if (IsUnique() && row_id_count != 1) { return false; } - reference ref_node(node); - Prefix::New(art, ref_node, start_key, UnsafeNumericCast(prefix_start), - UnsafeNumericCast(start_key.len - prefix_start)); - if (single_row_id) { - Leaf::New(ref_node, row_ids[key_section.start]); + reference ref(node); + auto count = UnsafeNumericCast(start.len - prefix_depth); + Prefix::New(*this, ref, start, prefix_depth, count); + if (row_id_count == 1) { + Leaf::New(ref, row_ids[section.start].GetRowId()); } else { - Leaf::New(art, ref_node, row_ids + key_section.start, num_row_ids); + Leaf::New(*this, ref, row_ids, section.start, row_id_count); } return true; } - // create a new node and recurse - - // we will find at least two child entries of this node, otherwise we'd have reached a leaf - vector child_sections; - GetChildSections(child_sections, keys, key_section); - - // set the prefix - reference ref_node(node); - auto prefix_length = key_section.depth - prefix_start; - Prefix::New(art, ref_node, start_key, UnsafeNumericCast(prefix_start), - UnsafeNumericCast(prefix_length)); + // Create a new node and recurse. + unsafe_vector children; + section.GetChildSections(children, keys); - // set the node - auto node_type = Node::GetARTNodeTypeByCount(child_sections.size()); - Node::New(art, ref_node, node_type); + // Create the prefix. + reference ref(node); + auto prefix_length = section.depth - prefix_depth; + Prefix::New(*this, ref, start, prefix_depth, prefix_length); - // recurse on each child section - for (auto &child_section : child_sections) { + // Create the node. + Node::New(*this, ref, Node::GetNodeType(children.size())); + for (auto &child : children) { Node new_child; - auto no_violation = ConstructInternal(art, keys, row_ids, new_child, child_section, has_constraint); - Node::InsertChild(art, ref_node, child_section.key_byte, new_child); - if (!no_violation) { + auto success = ConstructInternal(keys, row_ids, new_child, child); + Node::InsertChild(*this, ref, child.key_byte, new_child); + if (!success) { return false; } } return true; } -bool ART::ConstructFromSorted(idx_t count, vector &keys, Vector &row_identifiers) { - - UnifiedVectorFormat row_id_data; - row_identifiers.ToUnifiedFormat(count, row_id_data); - auto row_ids = UnifiedVectorFormat::GetData(row_id_data); - - auto key_section = KeySection(0, count - 1, 0, 0); - auto has_constraint = IsUnique(); - if (!ConstructInternal(*this, keys, row_ids, tree, key_section, has_constraint)) { +bool ART::Construct(unsafe_vector &keys, unsafe_vector &row_ids, const idx_t row_count) { + ARTKeySection section(0, row_count - 1, 0, 0); + if (!ConstructInternal(keys, row_ids, tree, section)) { return false; } #ifdef DEBUG - D_ASSERT(!VerifyAndToStringInternal(true).empty()); - for (idx_t i = 0; i < count; i++) { - D_ASSERT(!keys[i].Empty()); - auto leaf = Lookup(tree, keys[i], 0); - D_ASSERT(Leaf::ContainsRowId(*this, *leaf, row_ids[i])); - } + unsafe_vector row_ids_debug; + Iterator it(*this); + it.FindMinimum(tree); + ARTKey empty_key = ARTKey(); + it.Scan(empty_key, NumericLimits().Maximum(), row_ids_debug, false); + D_ASSERT(row_count == row_ids_debug.size()); #endif - return true; } //===--------------------------------------------------------------------===// -// Insert / Verification / Constraint Checking +// Insert and Constraint Checking //===--------------------------------------------------------------------===// -ErrorData ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_identifiers) { - D_ASSERT(row_identifiers.GetType().InternalType() == ROW_TYPE); - D_ASSERT(logical_types[0] == input.data[0].GetType()); +ErrorData ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) { + D_ASSERT(row_ids.GetType().InternalType() == ROW_TYPE); + auto row_count = input.size(); - ArenaAllocator arena_allocator(BufferAllocator::Get(db)); - vector keys(input.size()); - GenerateKeys<>(arena_allocator, input, keys); + ArenaAllocator allocator(BufferAllocator::Get(db)); + unsafe_vector keys(row_count); + unsafe_vector row_id_keys(row_count); + GenerateKeyVectors(allocator, input, row_ids, keys, row_id_keys); - UnifiedVectorFormat row_id_data; - row_identifiers.ToUnifiedFormat(input.size(), row_id_data); - auto row_ids = UnifiedVectorFormat::GetData(row_id_data); - - // now insert the elements into the index + // Insert the entries into the index. idx_t failed_index = DConstants::INVALID_INDEX; - for (idx_t i = 0; i < input.size(); i++) { + auto was_empty = !tree.HasMetadata(); + for (idx_t i = 0; i < row_count; i++) { if (keys[i].Empty()) { continue; } - - auto row_id = row_ids[i]; - if (!Insert(tree, keys[i], 0, row_id)) { - // failed to insert because of constraint violation + if (!Insert(tree, keys[i], 0, row_id_keys[i], tree.GetGateStatus())) { + // Insertion failure due to a constraint violation. failed_index = i; break; } } - // failed to insert because of constraint violation: remove previously inserted entries + // Remove any previously inserted entries. if (failed_index != DConstants::INVALID_INDEX) { for (idx_t i = 0; i < failed_index; i++) { if (keys[i].Empty()) { continue; } - row_t row_id = row_ids[i]; - Erase(tree, keys[i], 0, row_id); + Erase(tree, keys[i], 0, row_id_keys[i], tree.GetGateStatus()); } } + if (was_empty) { + // All nodes are in-memory. + VerifyAllocationsInternal(); + } + if (failed_index != DConstants::INVALID_INDEX) { - return ErrorData(ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicate key \"%s\"", - AppendRowError(input, failed_index))); + auto msg = AppendRowError(input, failed_index); + return ErrorData(ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicate key \"%s\"", msg)); } #ifdef DEBUG - for (idx_t i = 0; i < input.size(); i++) { + for (idx_t i = 0; i < row_count; i++) { if (keys[i].Empty()) { continue; } - - auto leaf = Lookup(tree, keys[i], 0); - D_ASSERT(Leaf::ContainsRowId(*this, *leaf, row_ids[i])); + D_ASSERT(Lookup(tree, keys[i], 0)); } #endif - return ErrorData(); } -ErrorData ART::Append(IndexLock &lock, DataChunk &appended_data, Vector &row_identifiers) { - DataChunk expression_result; - expression_result.Initialize(Allocator::DefaultAllocator(), logical_types); - - // first resolve the expressions for the index - ExecuteExpressions(appended_data, expression_result); - - // now insert into the index - return Insert(lock, expression_result, row_identifiers); +ErrorData ART::Append(IndexLock &lock, DataChunk &input, Vector &row_ids) { + // Execute all column expressions before inserting the data chunk. + DataChunk expr_chunk; + expr_chunk.Initialize(Allocator::DefaultAllocator(), logical_types); + ExecuteExpressions(input, expr_chunk); + return Insert(lock, expr_chunk, row_ids); } void ART::VerifyAppend(DataChunk &chunk) { @@ -580,87 +549,102 @@ void ART::VerifyAppend(DataChunk &chunk, ConflictManager &conflict_manager) { CheckConstraintsForChunk(chunk, conflict_manager); } -bool ART::InsertToLeaf(Node &leaf, const row_t &row_id) { +void ART::InsertIntoEmpty(Node &node, const ARTKey &key, const idx_t depth, const ARTKey &row_id, + const GateStatus status) { + D_ASSERT(depth <= key.len); + D_ASSERT(!node.HasMetadata()); - if (IsUnique()) { - return false; + if (status == GateStatus::GATE_SET) { + Leaf::New(node, row_id.GetRowId()); + return; } - Leaf::Insert(*this, leaf, row_id); - return true; + reference ref(node); + auto count = key.len - depth; + + Prefix::New(*this, ref, key, depth, count); + Leaf::New(ref, row_id.GetRowId()); } -bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id) { +bool ART::InsertIntoNode(Node &node, const ARTKey &key, const idx_t depth, const ARTKey &row_id, + const GateStatus status) { + D_ASSERT(depth < key.len); + auto child = node.GetChildMutable(*this, key[depth]); - // node is currently empty, create a leaf here with the key - if (!node.HasMetadata()) { - D_ASSERT(depth <= key.len); - reference ref_node(node); - Prefix::New(*this, ref_node, key, UnsafeNumericCast(depth), - UnsafeNumericCast(key.len - depth)); - Leaf::New(ref_node, row_id); - return true; + // Recurse, if a child exists at key[depth]. + if (child) { + D_ASSERT(child->HasMetadata()); + bool success = Insert(*child, key, depth + 1, row_id, status); + node.ReplaceChild(*this, key[depth], *child); + return success; } - auto node_type = node.GetType(); - - // insert the row ID into this leaf - if (node_type == NType::LEAF || node_type == NType::LEAF_INLINED) { - return InsertToLeaf(node, row_id); + // Create an inlined prefix at key[depth]. + if (status == GateStatus::GATE_SET) { + Node remainder; + auto byte = key[depth]; + auto success = Insert(remainder, key, depth + 1, row_id, status); + Node::InsertChild(*this, node, byte, remainder); + return success; } - if (node_type != NType::PREFIX) { - D_ASSERT(depth < key.len); - auto child = node.GetChildMutable(*this, key[depth]); + // Insert an inlined leaf at key[depth]. + Node leaf; + reference ref(leaf); - // recurse, if a child exists at key[depth] - if (child) { - bool success = Insert(*child, key, depth + 1, row_id); - node.ReplaceChild(*this, key[depth], *child); - return success; - } - - // insert a new leaf node at key[depth] - Node leaf_node; - reference ref_node(leaf_node); - if (depth + 1 < key.len) { - Prefix::New(*this, ref_node, key, UnsafeNumericCast(depth + 1), - UnsafeNumericCast(key.len - depth - 1)); - } - Leaf::New(ref_node, row_id); - Node::InsertChild(*this, node, key[depth], leaf_node); - return true; + // Create the prefix. + if (depth + 1 < key.len) { + auto count = key.len - depth - 1; + Prefix::New(*this, ref, key, depth + 1, count); } - // this is a prefix node, traverse - reference next_node(node); - auto mismatch_position = Prefix::TraverseMutable(*this, next_node, key, depth); + // Create the inlined leaf. + Leaf::New(ref, row_id.GetRowId()); + Node::InsertChild(*this, node, key[depth], leaf); + return true; +} - // prefix matches key - if (next_node.get().GetType() != NType::PREFIX) { - return Insert(next_node, key, depth, row_id); +bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const ARTKey &row_id, const GateStatus status) { + if (!node.HasMetadata()) { + InsertIntoEmpty(node, key, depth, row_id, status); + return true; } - // prefix does not match the key, we need to create a new Node4; this new Node4 has two children, - // the remaining part of the prefix, and the new leaf - Node remaining_prefix; - auto prefix_byte = Prefix::GetByte(*this, next_node, mismatch_position); - Prefix::Split(*this, next_node, remaining_prefix, mismatch_position); - Node4::New(*this, next_node); - - // insert remaining prefix - Node4::InsertChild(*this, next_node, prefix_byte, remaining_prefix); + // Enter a nested leaf. + if (status == GateStatus::GATE_NOT_SET && node.GetGateStatus() == GateStatus::GATE_SET) { + return Insert(node, row_id, 0, row_id, GateStatus::GATE_SET); + } - // insert new leaf - Node leaf_node; - reference ref_node(leaf_node); - if (depth + 1 < key.len) { - Prefix::New(*this, ref_node, key, UnsafeNumericCast(depth + 1), - UnsafeNumericCast(key.len - depth - 1)); + auto type = node.GetType(); + switch (type) { + case NType::LEAF_INLINED: { + if (IsUnique()) { + return false; + } + Leaf::InsertIntoInlined(*this, node, row_id, depth, status); + return true; + } + case NType::LEAF: { + Leaf::TransformToNested(*this, node); + return Insert(node, key, depth, row_id, status); + } + case NType::NODE_7_LEAF: + case NType::NODE_15_LEAF: + case NType::NODE_256_LEAF: { + auto byte = key[Prefix::ROW_ID_COUNT]; + Node::InsertChild(*this, node, byte); + return true; + } + case NType::NODE_4: + case NType::NODE_16: + case NType::NODE_48: + case NType::NODE_256: + return InsertIntoNode(node, key, depth, row_id, status); + case NType::PREFIX: + return Prefix::Insert(*this, node, key, depth, row_id, status); + default: + throw InternalException("Invalid node type for Insert."); } - Leaf::New(ref_node, row_id); - Node4::InsertChild(*this, next_node, key[depth], leaf_node); - return true; } //===--------------------------------------------------------------------===// @@ -674,339 +658,292 @@ void ART::CommitDrop(IndexLock &index_lock) { tree.Clear(); } -void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_identifiers) { +void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) { + auto row_count = input.size(); - DataChunk expression; - expression.Initialize(Allocator::DefaultAllocator(), logical_types); - ExecuteExpressions(input, expression); - - ArenaAllocator arena_allocator(BufferAllocator::Get(db)); - vector keys(expression.size()); - GenerateKeys<>(arena_allocator, expression, keys); + DataChunk expr_chunk; + expr_chunk.Initialize(Allocator::DefaultAllocator(), logical_types); + ExecuteExpressions(input, expr_chunk); - UnifiedVectorFormat row_id_data; - row_identifiers.ToUnifiedFormat(input.size(), row_id_data); - auto row_ids = UnifiedVectorFormat::GetData(row_id_data); + ArenaAllocator allocator(BufferAllocator::Get(db)); + unsafe_vector keys(row_count); + unsafe_vector row_id_keys(row_count); + GenerateKeyVectors(allocator, expr_chunk, row_ids, keys, row_id_keys); - for (idx_t i = 0; i < input.size(); i++) { + for (idx_t i = 0; i < row_count; i++) { if (keys[i].Empty()) { continue; } - Erase(tree, keys[i], 0, row_ids[i]); + Erase(tree, keys[i], 0, row_id_keys[i], tree.GetGateStatus()); + } + + if (!tree.HasMetadata()) { + // No more allocations. + VerifyAllocationsInternal(); } #ifdef DEBUG - // verify that we removed all row IDs - for (idx_t i = 0; i < input.size(); i++) { + for (idx_t i = 0; i < row_count; i++) { if (keys[i].Empty()) { continue; } - auto leaf = Lookup(tree, keys[i], 0); - if (leaf) { - D_ASSERT(!Leaf::ContainsRowId(*this, *leaf, row_ids[i])); + if (leaf && leaf->GetType() == NType::LEAF_INLINED) { + D_ASSERT(leaf->GetRowId() != row_id_keys[i].GetRowId()); } } #endif } -void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id) { - +void ART::Erase(Node &node, reference key, idx_t depth, reference row_id, + GateStatus status) { if (!node.HasMetadata()) { return; } - // handle prefix - reference next_node(node); - if (next_node.get().GetType() == NType::PREFIX) { - Prefix::TraverseMutable(*this, next_node, key, depth); - if (next_node.get().GetType() == NType::PREFIX) { + // Traverse the prefix. + reference next(node); + if (next.get().GetType() == NType::PREFIX) { + Prefix::TraverseMutable(*this, next, key, depth); + + // Prefixes don't match: nothing to erase. + if (next.get().GetType() == NType::PREFIX && next.get().GetGateStatus() == GateStatus::GATE_NOT_SET) { return; } } - // delete a row ID from a leaf (root is leaf with possible prefix nodes) - if (next_node.get().GetType() == NType::LEAF || next_node.get().GetType() == NType::LEAF_INLINED) { - if (Leaf::Remove(*this, next_node, row_id)) { + // Delete the row ID from the leaf. + // This is the root node, which can be a leaf with possible prefix nodes. + if (next.get().GetType() == NType::LEAF_INLINED) { + if (next.get().GetRowId() == row_id.get().GetRowId()) { Node::Free(*this, node); } return; } - D_ASSERT(depth < key.len); - auto child = next_node.get().GetChildMutable(*this, key[depth]); - if (child) { - D_ASSERT(child->HasMetadata()); + // Transform a deprecated leaf. + if (next.get().GetType() == NType::LEAF) { + D_ASSERT(status == GateStatus::GATE_NOT_SET); + Leaf::TransformToNested(*this, next); + } - auto temp_depth = depth + 1; - reference child_node(*child); - if (child_node.get().GetType() == NType::PREFIX) { - Prefix::TraverseMutable(*this, child_node, key, temp_depth); - if (child_node.get().GetType() == NType::PREFIX) { - return; - } - } + // Enter a nested leaf. + if (status == GateStatus::GATE_NOT_SET && next.get().GetGateStatus() == GateStatus::GATE_SET) { + return Erase(next, row_id, 0, row_id, GateStatus::GATE_SET); + } - if (child_node.get().GetType() == NType::LEAF || child_node.get().GetType() == NType::LEAF_INLINED) { - // leaf found, remove entry - if (Leaf::Remove(*this, child_node, row_id)) { - Node::DeleteChild(*this, next_node, node, key[depth]); - } - return; + D_ASSERT(depth < key.get().len); + if (next.get().IsLeafNode()) { + auto byte = key.get()[depth]; + if (next.get().HasByte(*this, byte)) { + Node::DeleteChild(*this, next, node, key.get()[depth], status, key.get()); } + return; + } - // recurse - Erase(*child, key, depth + 1, row_id); - next_node.get().ReplaceChild(*this, key[depth], *child); + auto child = next.get().GetChildMutable(*this, key.get()[depth]); + if (!child) { + // No child at the byte: nothing to erase. + return; } -} -//===--------------------------------------------------------------------===// -// Point Query (Equal) -//===--------------------------------------------------------------------===// + // Transform a deprecated leaf. + if (child->GetType() == NType::LEAF) { + D_ASSERT(status == GateStatus::GATE_NOT_SET); + Leaf::TransformToNested(*this, *child); + } -static ARTKey CreateKey(ArenaAllocator &allocator, PhysicalType type, Value &value) { - D_ASSERT(type == value.type().InternalType()); - switch (type) { - case PhysicalType::BOOL: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::INT8: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::INT16: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::INT32: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::INT64: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::UINT8: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::UINT16: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::UINT32: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::UINT64: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::INT128: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::UINT128: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::FLOAT: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::DOUBLE: - return ARTKey::CreateARTKey(allocator, value.type(), value); - case PhysicalType::VARCHAR: - return ARTKey::CreateARTKey(allocator, value.type(), value); - default: - throw InternalException("Invalid type for the ART key"); + // Enter a nested leaf. + if (status == GateStatus::GATE_NOT_SET && child->GetGateStatus() == GateStatus::GATE_SET) { + Erase(*child, row_id, 0, row_id, GateStatus::GATE_SET); + if (!child->HasMetadata()) { + Node::DeleteChild(*this, next, node, key.get()[depth], status, key.get()); + } else { + next.get().ReplaceChild(*this, key.get()[depth], *child); + } + return; } -} -bool ART::SearchEqual(ARTKey &key, idx_t max_count, vector &result_ids) { + auto temp_depth = depth + 1; + reference ref(*child); - auto leaf = Lookup(tree, key, 0); - if (!leaf) { - return true; + if (ref.get().GetType() == NType::PREFIX) { + Prefix::TraverseMutable(*this, ref, key, temp_depth); + + // Prefixes don't match: nothing to erase. + if (ref.get().GetType() == NType::PREFIX && ref.get().GetGateStatus() == GateStatus::GATE_NOT_SET) { + return; + } + } + + if (ref.get().GetType() == NType::LEAF_INLINED) { + if (ref.get().GetRowId() == row_id.get().GetRowId()) { + Node::DeleteChild(*this, next, node, key.get()[depth], status, key.get()); + } + return; + } + + // Recurse. + Erase(*child, key, depth + 1, row_id, status); + if (!child->HasMetadata()) { + Node::DeleteChild(*this, next, node, key.get()[depth], status, key.get()); + } else { + next.get().ReplaceChild(*this, key.get()[depth], *child); } - return Leaf::GetRowIds(*this, *leaf, result_ids, max_count); } //===--------------------------------------------------------------------===// -// Lookup +// Point and range lookups //===--------------------------------------------------------------------===// -optional_ptr ART::Lookup(const Node &node, const ARTKey &key, idx_t depth) { +const unsafe_optional_ptr ART::Lookup(const Node &node, const ARTKey &key, idx_t depth) { + reference ref(node); + while (ref.get().HasMetadata()) { - reference node_ref(node); - while (node_ref.get().HasMetadata()) { + // Return the leaf. + if (ref.get().IsAnyLeaf() || ref.get().GetGateStatus() == GateStatus::GATE_SET) { + return unsafe_optional_ptr(ref.get()); + } - // traverse prefix, if exists - reference next_node(node_ref.get()); - if (next_node.get().GetType() == NType::PREFIX) { - Prefix::Traverse(*this, next_node, key, depth); - if (next_node.get().GetType() == NType::PREFIX) { + // Traverse the prefix. + if (ref.get().GetType() == NType::PREFIX) { + Prefix::Traverse(*this, ref, key, depth); + if (ref.get().GetType() == NType::PREFIX && ref.get().GetGateStatus() == GateStatus::GATE_NOT_SET) { + // Prefix mismatch, return nullptr. return nullptr; } + continue; } - if (next_node.get().GetType() == NType::LEAF || next_node.get().GetType() == NType::LEAF_INLINED) { - return &next_node.get(); - } - + // Get the child node. D_ASSERT(depth < key.len); - auto child = next_node.get().GetChild(*this, key[depth]); + auto child = ref.get().GetChild(*this, key[depth]); + + // No child at the matching byte, return nullptr. if (!child) { - // prefix matches key, but no child at byte, ART/subtree does not contain key return nullptr; } - // lookup in child node - node_ref = *child; - D_ASSERT(node_ref.get().HasMetadata()); + // Continue in the child. + ref = *child; + D_ASSERT(ref.get().HasMetadata()); depth++; } return nullptr; } -//===--------------------------------------------------------------------===// -// Greater Than and Less Than -//===--------------------------------------------------------------------===// +bool ART::SearchEqual(ARTKey &key, idx_t max_count, unsafe_vector &row_ids) { + auto leaf = Lookup(tree, key, 0); + if (!leaf) { + return true; + } -bool ART::SearchGreater(ARTIndexScanState &state, ARTKey &key, bool equal, idx_t max_count, vector &result_ids) { + Iterator it(*this); + it.FindMinimum(*leaf); + ARTKey empty_key = ARTKey(); + return it.Scan(empty_key, max_count, row_ids, false); +} +bool ART::SearchGreater(ARTKey &key, bool equal, idx_t max_count, unsafe_vector &row_ids) { if (!tree.HasMetadata()) { return true; } - Iterator &it = state.iterator; - // find the lowest value that satisfies the predicate - if (!it.art) { - it.art = this; - if (!it.LowerBound(tree, key, equal, 0)) { - // early-out, if the maximum value in the ART is lower than the lower bound - return true; - } + // Find the lowest value that satisfies the predicate. + Iterator it(*this); + + // Early-out, if the maximum value in the ART is lower than the lower bound. + if (!it.LowerBound(tree, key, equal, 0)) { + return true; } - // after that we continue the scan; we don't need to check the bounds as any value following this value is - // automatically bigger and hence satisfies our predicate - ARTKey empty_key = ARTKey(); - return it.Scan(empty_key, max_count, result_ids, false); + // We continue the scan. We do not check the bounds as any value following this value is + // greater and satisfies our predicate. + return it.Scan(ARTKey(), max_count, row_ids, false); } -bool ART::SearchLess(ARTIndexScanState &state, ARTKey &upper_bound, bool equal, idx_t max_count, - vector &result_ids) { - +bool ART::SearchLess(ARTKey &upper_bound, bool equal, idx_t max_count, unsafe_vector &row_ids) { if (!tree.HasMetadata()) { return true; } - Iterator &it = state.iterator; - if (!it.art) { - it.art = this; - // find the minimum value in the ART: we start scanning from this value - it.FindMinimum(tree); - // early-out, if the minimum value is higher than the upper bound - if (it.current_key > upper_bound) { - return true; - } + // Find the minimum value in the ART: we start scanning from this value. + Iterator it(*this); + it.FindMinimum(tree); + + // Early-out, if the minimum value is higher than the upper bound. + if (it.current_key.GreaterThan(upper_bound, equal)) { + return true; } - // now continue the scan until we reach the upper bound - return it.Scan(upper_bound, max_count, result_ids, equal); + // Continue the scan until we reach the upper bound. + return it.Scan(upper_bound, max_count, row_ids, equal); } -//===--------------------------------------------------------------------===// -// Closed Range Query -//===--------------------------------------------------------------------===// +bool ART::SearchCloseRange(ARTKey &lower_bound, ARTKey &upper_bound, bool left_equal, bool right_equal, idx_t max_count, + unsafe_vector &row_ids) { + // Find the first node that satisfies the left predicate. + Iterator it(*this); -bool ART::SearchCloseRange(ARTIndexScanState &state, ARTKey &lower_bound, ARTKey &upper_bound, bool left_equal, - bool right_equal, idx_t max_count, vector &result_ids) { - - Iterator &it = state.iterator; - - // find the first node that satisfies the left predicate - if (!it.art) { - it.art = this; - if (!it.LowerBound(tree, lower_bound, left_equal, 0)) { - // early-out, if the maximum value in the ART is lower than the lower bound - return true; - } + // Early-out, if the maximum value in the ART is lower than the lower bound. + if (!it.LowerBound(tree, lower_bound, left_equal, 0)) { + return true; } - // now continue the scan until we reach the upper bound - return it.Scan(upper_bound, max_count, result_ids, right_equal); + // Continue the scan until we reach the upper bound. + return it.Scan(upper_bound, max_count, row_ids, right_equal); } -bool ART::Scan(const Transaction &transaction, const DataTable &table, IndexScanState &state, const idx_t max_count, - vector &result_ids) { - +bool ART::Scan(IndexScanState &state, const idx_t max_count, unsafe_vector &row_ids) { auto &scan_state = state.Cast(); - vector row_ids; - bool success; - - // FIXME: the key directly owning the data for a single key might be more efficient D_ASSERT(scan_state.values[0].type().InternalType() == types[0]); ArenaAllocator arena_allocator(Allocator::Get(db)); - auto key = CreateKey(arena_allocator, types[0], scan_state.values[0]); + auto key = ARTKey::CreateKey(arena_allocator, types[0], scan_state.values[0]); if (scan_state.values[1].IsNull()) { - - // single predicate + // Single predicate. lock_guard l(lock); switch (scan_state.expressions[0]) { case ExpressionType::COMPARE_EQUAL: - success = SearchEqual(key, max_count, row_ids); - break; + return SearchEqual(key, max_count, row_ids); case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - success = SearchGreater(scan_state, key, true, max_count, row_ids); - break; + return SearchGreater(key, true, max_count, row_ids); case ExpressionType::COMPARE_GREATERTHAN: - success = SearchGreater(scan_state, key, false, max_count, row_ids); - break; + return SearchGreater(key, false, max_count, row_ids); case ExpressionType::COMPARE_LESSTHANOREQUALTO: - success = SearchLess(scan_state, key, true, max_count, row_ids); - break; + return SearchLess(key, true, max_count, row_ids); case ExpressionType::COMPARE_LESSTHAN: - success = SearchLess(scan_state, key, false, max_count, row_ids); - break; + return SearchLess(key, false, max_count, row_ids); default: throw InternalException("Index scan type not implemented"); } - - } else { - - // two predicates - lock_guard l(lock); - - D_ASSERT(scan_state.values[1].type().InternalType() == types[0]); - auto upper_bound = CreateKey(arena_allocator, types[0], scan_state.values[1]); - - bool left_equal = scan_state.expressions[0] == ExpressionType ::COMPARE_GREATERTHANOREQUALTO; - bool right_equal = scan_state.expressions[1] == ExpressionType ::COMPARE_LESSTHANOREQUALTO; - success = SearchCloseRange(scan_state, key, upper_bound, left_equal, right_equal, max_count, row_ids); } - if (!success) { - return false; - } - if (row_ids.empty()) { - return true; - } - - // sort the row ids - sort(row_ids.begin(), row_ids.end()); - // duplicate eliminate the row ids and append them to the row ids of the state - result_ids.reserve(row_ids.size()); - - result_ids.push_back(row_ids[0]); - for (idx_t i = 1; i < row_ids.size(); i++) { - if (row_ids[i] != row_ids[i - 1]) { - result_ids.push_back(row_ids[i]); - } - } - return true; + // Two predicates. + lock_guard l(lock); + D_ASSERT(scan_state.values[1].type().InternalType() == types[0]); + auto upper_bound = ARTKey::CreateKey(arena_allocator, types[0], scan_state.values[1]); + bool left_equal = scan_state.expressions[0] == ExpressionType ::COMPARE_GREATERTHANOREQUALTO; + bool right_equal = scan_state.expressions[1] == ExpressionType ::COMPARE_LESSTHANOREQUALTO; + return SearchCloseRange(key, upper_bound, left_equal, right_equal, max_count, row_ids); } //===--------------------------------------------------------------------===// -// More Verification / Constraint Checking +// More Constraint Checking //===--------------------------------------------------------------------===// -string ART::GenerateErrorKeyName(DataChunk &input, idx_t row) { - - // FIXME: why exactly can we not pass the expression_chunk as an argument to this - // FIXME: function instead of re-executing? - // re-executing the expressions is not very fast, but we're going to throw, so we don't care - DataChunk expression_chunk; - expression_chunk.Initialize(Allocator::DefaultAllocator(), logical_types); - ExecuteExpressions(input, expression_chunk); +string ART::GenerateErrorKeyName(DataChunk &input, idx_t row_idx) { + DataChunk expr_chunk; + expr_chunk.Initialize(Allocator::DefaultAllocator(), logical_types); + ExecuteExpressions(input, expr_chunk); string key_name; - for (idx_t k = 0; k < expression_chunk.ColumnCount(); k++) { + for (idx_t k = 0; k < expr_chunk.ColumnCount(); k++) { if (k > 0) { key_name += ", "; } - key_name += unbound_expressions[k]->GetName() + ": " + expression_chunk.data[k].GetValue(row).ToString(); + key_name += unbound_expressions[k]->GetName() + ": " + expr_chunk.data[k].GetValue(row_idx).ToString(); } return key_name; } @@ -1014,7 +951,7 @@ string ART::GenerateErrorKeyName(DataChunk &input, idx_t row) { string ART::GenerateConstraintErrorMessage(VerifyExistenceType verify_type, const string &key_name) { switch (verify_type) { case VerifyExistenceType::APPEND: { - // APPEND to PK/UNIQUE table, but node/key already exists in PK/UNIQUE table + // APPEND to PK/UNIQUE table, but node/key already exists in PK/UNIQUE table. string type = IsPrimary() ? "primary key" : "unique"; return StringUtil::Format("Duplicate key \"%s\" violates %s constraint. " "If this is an unexpected constraint violation please double " @@ -1023,12 +960,12 @@ string ART::GenerateConstraintErrorMessage(VerifyExistenceType verify_type, cons key_name, type); } case VerifyExistenceType::APPEND_FK: { - // APPEND_FK to FK table, node/key does not exist in PK/UNIQUE table + // APPEND_FK to FK table, node/key does not exist in PK/UNIQUE table. return StringUtil::Format( "Violates foreign key constraint because key \"%s\" does not exist in the referenced table", key_name); } case VerifyExistenceType::DELETE_FK: { - // DELETE_FK that still exists in a FK table, i.e., not a valid delete + // DELETE_FK that still exists in a FK table, i.e., not a valid delete. return StringUtil::Format("Violates foreign key constraint because key \"%s\" is still referenced by a foreign " "key in a different table", key_name); @@ -1039,22 +976,19 @@ string ART::GenerateConstraintErrorMessage(VerifyExistenceType verify_type, cons } void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) { - - // don't alter the index during constraint checking + // Lock the index during constraint checking. lock_guard l(lock); - // first resolve the expressions for the index - DataChunk expression_chunk; - expression_chunk.Initialize(Allocator::DefaultAllocator(), logical_types); - ExecuteExpressions(input, expression_chunk); + DataChunk expr_chunk; + expr_chunk.Initialize(Allocator::DefaultAllocator(), logical_types); + ExecuteExpressions(input, expr_chunk); ArenaAllocator arena_allocator(BufferAllocator::Get(db)); - vector keys(expression_chunk.size()); - GenerateKeys<>(arena_allocator, expression_chunk, keys); + unsafe_vector keys(expr_chunk.size()); + GenerateKeys<>(arena_allocator, expr_chunk, keys); - idx_t found_conflict = DConstants::INVALID_INDEX; + auto found_conflict = DConstants::INVALID_INDEX; for (idx_t i = 0; found_conflict == DConstants::INVALID_INDEX && i < input.size(); i++) { - if (keys[i].Empty()) { if (conflict_manager.AddNull(i)) { found_conflict = i; @@ -1070,8 +1004,8 @@ void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_m continue; } - // when we find a node, we need to update the 'matches' and 'row_ids' - // NOTE: leaves can have more than one row_id, but for UNIQUE/PRIMARY KEY they will only have one + // If we find a node, we need to update the 'matches' and 'row_ids'. + // We only perform constraint checking on unique indexes, i.e., all leaves are inlined. D_ASSERT(leaf->GetType() == NType::LEAF_INLINED); if (conflict_manager.AddHit(i, leaf->GetRowId())) { found_conflict = i; @@ -1079,7 +1013,6 @@ void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_m } conflict_manager.FinishLookup(); - if (found_conflict == DConstants::INVALID_INDEX) { return; } @@ -1089,93 +1022,178 @@ void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_m throw ConstraintException(exception_msg); } +string ART::GetConstraintViolationMessage(VerifyExistenceType verify_type, idx_t failed_index, DataChunk &input) { + auto key_name = GenerateErrorKeyName(input, failed_index); + auto exception_msg = GenerateConstraintErrorMessage(verify_type, key_name); + return exception_msg; +} + //===--------------------------------------------------------------------===// -// Helper functions for (de)serialization +// Storage and Memory //===--------------------------------------------------------------------===// -IndexStorageInfo ART::GetStorageInfo(const bool get_buffers) { +void ART::TransformToDeprecated() { + auto idx = Node::GetAllocatorIdx(NType::PREFIX); + auto &block_manager = (*allocators)[idx]->block_manager; + unsafe_unique_ptr deprecated_allocator; + + if (prefix_count != Prefix::DEPRECATED_COUNT) { + auto prefix_size = NumericCast(Prefix::DEPRECATED_COUNT) + NumericCast(Prefix::METADATA_SIZE); + deprecated_allocator = make_unsafe_uniq(prefix_size, block_manager); + } + + // Transform all leaves, and possibly the prefixes. + if (tree.HasMetadata()) { + Node::TransformToDeprecated(*this, tree, deprecated_allocator); + } + + // Replace the prefix allocator with the deprecated allocator. + if (deprecated_allocator) { + prefix_count = Prefix::DEPRECATED_COUNT; + + D_ASSERT((*allocators)[idx]->IsEmpty()); + (*allocators)[idx]->Reset(); + (*allocators)[idx] = std::move(deprecated_allocator); + } +} + +IndexStorageInfo ART::GetStorageInfo(const case_insensitive_map_t &options, const bool to_wal) { + // If the storage format uses deprecated leaf storage, + // then we need to transform all nested leaves before serialization. + auto v1_0_0_option = options.find("v1_0_0_storage"); + bool v1_0_0_storage = v1_0_0_option == options.end() || v1_0_0_option->second != Value(false); + if (v1_0_0_storage) { + TransformToDeprecated(); + } - // set the name and root node - IndexStorageInfo info; - info.name = name; + IndexStorageInfo info(name); info.root = tree.Get(); + info.options = options; - if (!get_buffers) { - // store the data on disk as partial blocks and set the block ids - WritePartialBlocks(); + for (auto &allocator : *allocators) { + allocator->RemoveEmptyBuffers(); + } + +#ifdef DEBUG + if (v1_0_0_storage) { + D_ASSERT((*allocators)[Node::GetAllocatorIdx(NType::NODE_7_LEAF)]->IsEmpty()); + D_ASSERT((*allocators)[Node::GetAllocatorIdx(NType::NODE_15_LEAF)]->IsEmpty()); + D_ASSERT((*allocators)[Node::GetAllocatorIdx(NType::NODE_256_LEAF)]->IsEmpty()); + D_ASSERT((*allocators)[Node::GetAllocatorIdx(NType::PREFIX)]->GetSegmentSize() == + Prefix::DEPRECATED_COUNT + Prefix::METADATA_SIZE); + } +#endif + + auto allocator_count = v1_0_0_storage ? DEPRECATED_ALLOCATOR_COUNT : ALLOCATOR_COUNT; + if (!to_wal) { + // Store the data on disk as partial blocks and set the block ids. + WritePartialBlocks(v1_0_0_storage); } else { - // set the correct allocation sizes and get the map containing all buffers - for (const auto &allocator : *allocators) { - info.buffers.push_back(allocator->InitSerializationToWAL()); + // Set the correct allocation sizes and get the map containing all buffers. + for (idx_t i = 0; i < allocator_count; i++) { + info.buffers.push_back((*allocators)[i]->InitSerializationToWAL()); } } - for (const auto &allocator : *allocators) { - info.allocator_infos.push_back(allocator->GetInfo()); + for (idx_t i = 0; i < allocator_count; i++) { + info.allocator_infos.push_back((*allocators)[i]->GetInfo()); } - return info; } -void ART::WritePartialBlocks() { - - // use the partial block manager to serialize all allocator data +void ART::WritePartialBlocks(const bool v1_0_0_storage) { auto &block_manager = table_io_manager.GetIndexBlockManager(); PartialBlockManager partial_block_manager(block_manager, PartialBlockType::FULL_CHECKPOINT); - for (auto &allocator : *allocators) { - allocator->SerializeBuffers(partial_block_manager); + idx_t allocator_count = v1_0_0_storage ? DEPRECATED_ALLOCATOR_COUNT : ALLOCATOR_COUNT; + for (idx_t i = 0; i < allocator_count; i++) { + (*allocators)[i]->SerializeBuffers(partial_block_manager); } partial_block_manager.FlushPartialBlocks(); } void ART::InitAllocators(const IndexStorageInfo &info) { - - // set the root node - tree.Set(info.root); - - // initialize the allocators - D_ASSERT(info.allocator_infos.size() == ALLOCATOR_COUNT); for (idx_t i = 0; i < info.allocator_infos.size(); i++) { (*allocators)[i]->Init(info.allocator_infos[i]); } } void ART::Deserialize(const BlockPointer &pointer) { - D_ASSERT(pointer.IsValid()); + auto &metadata_manager = table_io_manager.GetMetadataManager(); MetadataReader reader(metadata_manager, pointer); tree = reader.Read(); - for (idx_t i = 0; i < ALLOCATOR_COUNT; i++) { + for (idx_t i = 0; i < DEPRECATED_ALLOCATOR_COUNT; i++) { (*allocators)[i]->Deserialize(metadata_manager, reader.Read()); } } -//===--------------------------------------------------------------------===// -// Vacuum -//===--------------------------------------------------------------------===// +void ART::SetPrefixCount(const IndexStorageInfo &info) { + auto numeric_max = NumericLimits().Maximum(); + auto max_aligned = AlignValueFloor(numeric_max - Prefix::METADATA_SIZE); -void ART::InitializeVacuum(ARTFlags &flags) { - flags.vacuum_flags.reserve(flags.vacuum_flags.size() + allocators->size()); + if (info.IsValid() && info.root_block_ptr.IsValid()) { + prefix_count = Prefix::DEPRECATED_COUNT; + return; + } + + if (info.IsValid()) { + auto serialized_count = info.allocator_infos[0].segment_size - Prefix::METADATA_SIZE; + prefix_count = NumericCast(serialized_count); + return; + } + + if (!IsUnique()) { + prefix_count = Prefix::ROW_ID_COUNT; + return; + } + + idx_t compound_size = 0; + for (const auto &type : types) { + compound_size += GetTypeIdSize(type); + } + + auto aligned = AlignValue(compound_size) - 1; + if (aligned > NumericCast(max_aligned)) { + prefix_count = max_aligned; + return; + } + + prefix_count = NumericCast(aligned); +} + +idx_t ART::GetInMemorySize(IndexLock &index_lock) { + D_ASSERT(owns_data); + + idx_t in_memory_size = 0; for (auto &allocator : *allocators) { - flags.vacuum_flags.push_back(allocator->InitializeVacuum()); + in_memory_size += allocator->GetInMemorySize(); } + return in_memory_size; } -void ART::FinalizeVacuum(const ARTFlags &flags) { +//===--------------------------------------------------------------------===// +// Vacuum +//===--------------------------------------------------------------------===// +void ART::InitializeVacuum(unordered_set &indexes) { for (idx_t i = 0; i < allocators->size(); i++) { - if (flags.vacuum_flags[i]) { - (*allocators)[i]->FinalizeVacuum(); + if ((*allocators)[i]->InitializeVacuum()) { + indexes.insert(NumericCast(i)); } } } -void ART::Vacuum(IndexLock &state) { +void ART::FinalizeVacuum(const unordered_set &indexes) { + for (const auto &idx : indexes) { + (*allocators)[idx]->FinalizeVacuum(); + } +} +void ART::Vacuum(IndexLock &state) { D_ASSERT(owns_data); if (!tree.HasMetadata()) { @@ -1185,60 +1203,34 @@ void ART::Vacuum(IndexLock &state) { return; } - // holds true, if an allocator needs a vacuum, and false otherwise - ARTFlags flags; - InitializeVacuum(flags); + // True, if an allocator needs a vacuum, false otherwise. + unordered_set indexes; + InitializeVacuum(indexes); - // skip vacuum if no allocators require it - auto perform_vacuum = false; - for (const auto &vacuum_flag : flags.vacuum_flags) { - if (vacuum_flag) { - perform_vacuum = true; - break; - } - } - if (!perform_vacuum) { + // Skip vacuum, if no allocators require it. + if (indexes.empty()) { return; } - // traverse the allocated memory of the tree to perform a vacuum - tree.Vacuum(*this, flags); + // Traverse the allocated memory of the tree to perform a vacuum. + tree.Vacuum(*this, indexes); - // finalize the vacuum operation - FinalizeVacuum(flags); -} - -//===--------------------------------------------------------------------===// -// Size -//===--------------------------------------------------------------------===// - -idx_t ART::GetInMemorySize(IndexLock &index_lock) { - - D_ASSERT(owns_data); - - idx_t in_memory_size = 0; - for (auto &allocator : *allocators) { - in_memory_size += allocator->GetInMemorySize(); - } - return in_memory_size; + // Finalize the vacuum operation. + FinalizeVacuum(indexes); } //===--------------------------------------------------------------------===// // Merging //===--------------------------------------------------------------------===// -void ART::InitializeMerge(ARTFlags &flags) { - +void ART::InitializeMerge(unsafe_vector &upper_bounds) { D_ASSERT(owns_data); - - flags.merge_buffer_counts.reserve(allocators->size()); for (auto &allocator : *allocators) { - flags.merge_buffer_counts.emplace_back(allocator->GetUpperBoundBufferId()); + upper_bounds.emplace_back(allocator->GetUpperBoundBufferId()); } } bool ART::MergeIndexes(IndexLock &state, BoundIndex &other_index) { - auto &other_art = other_index.Cast(); if (!other_art.tree.HasMetadata()) { return true; @@ -1246,33 +1238,31 @@ bool ART::MergeIndexes(IndexLock &state, BoundIndex &other_index) { if (other_art.owns_data) { if (tree.HasMetadata()) { - // fully deserialize other_index, and traverse it to increment its buffer IDs - ARTFlags flags; - InitializeMerge(flags); - other_art.tree.InitializeMerge(other_art, flags); + // Fully deserialize other_index, and traverse it to increment its buffer IDs. + unsafe_vector upper_bounds; + InitializeMerge(upper_bounds); + other_art.tree.InitMerge(other_art, upper_bounds); } - // merge the node storage + // Merge the node storage. for (idx_t i = 0; i < allocators->size(); i++) { (*allocators)[i]->Merge(*(*other_art.allocators)[i]); } } - // merge the ARTs - if (!tree.Merge(*this, other_art.tree)) { + // Merge the ARTs. + D_ASSERT(tree.GetGateStatus() == other_art.tree.GetGateStatus()); + if (!tree.Merge(*this, other_art.tree, tree.GetGateStatus())) { return false; } return true; } //===--------------------------------------------------------------------===// -// Utility +// Verification //===--------------------------------------------------------------------===// string ART::VerifyAndToString(IndexLock &state, const bool only_verify) { - // FIXME: this can be improved by counting the allocations of each node type, - // FIXME: and by asserting that each fixed-size allocator lists an equal number of - // FIXME: allocations of that type return VerifyAndToStringInternal(only_verify); } @@ -1283,10 +1273,26 @@ string ART::VerifyAndToStringInternal(const bool only_verify) { return "[empty]"; } -string ART::GetConstraintViolationMessage(VerifyExistenceType verify_type, idx_t failed_index, DataChunk &input) { - auto key_name = GenerateErrorKeyName(input, failed_index); - auto exception_msg = GenerateConstraintErrorMessage(verify_type, key_name); - return exception_msg; +void ART::VerifyAllocations(IndexLock &state) { + return VerifyAllocationsInternal(); +} + +void ART::VerifyAllocationsInternal() { +#ifdef DEBUG + unordered_map node_counts; + for (idx_t i = 0; i < allocators->size(); i++) { + node_counts[NumericCast(i)] = 0; + } + + if (tree.HasMetadata()) { + tree.VerifyAllocations(*this, node_counts); + } + + for (idx_t i = 0; i < allocators->size(); i++) { + auto segment_count = (*allocators)[i]->GetSegmentCount(); + D_ASSERT(segment_count == node_counts[NumericCast(i)]); + } +#endif } constexpr const char *ART::TYPE_NAME; diff --git a/src/duckdb/src/execution/index/art/art_key.cpp b/src/duckdb/src/execution/index/art/art_key.cpp index e9be0abb..d5769f0f 100644 --- a/src/duckdb/src/execution/index/art/art_key.cpp +++ b/src/duckdb/src/execution/index/art/art_key.cpp @@ -2,98 +2,181 @@ namespace duckdb { +//===--------------------------------------------------------------------===// +// ARTKey +//===--------------------------------------------------------------------===// + ARTKey::ARTKey() : len(0) { } -ARTKey::ARTKey(const data_ptr_t &data, const uint32_t &len) : len(len), data(data) { +ARTKey::ARTKey(const data_ptr_t data, idx_t len) : len(len), data(data) { } -ARTKey::ARTKey(ArenaAllocator &allocator, const uint32_t &len) : len(len) { +ARTKey::ARTKey(ArenaAllocator &allocator, idx_t len) : len(len) { data = allocator.Allocate(len); } template <> -ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, string_t value) { +ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, string_t value) { auto string_data = const_data_ptr_cast(value.GetData()); auto string_len = value.GetSize(); - // we need to escape \00 and \01 + + // We escape \00 and \01. idx_t escape_count = 0; - for (idx_t r = 0; r < string_len; r++) { - if (string_data[r] <= 1) { + for (idx_t i = 0; i < string_len; i++) { + if (string_data[i] <= 1) { escape_count++; } } + idx_t len = string_len + escape_count + 1; auto data = allocator.Allocate(len); - // copy over the data and add in escapes + + // Copy over the data and add escapes. idx_t pos = 0; - for (idx_t r = 0; r < string_len; r++) { - if (string_data[r] <= 1) { - // escape + for (idx_t i = 0; i < string_len; i++) { + if (string_data[i] <= 1) { + // Add escape. data[pos++] = '\01'; } - data[pos++] = string_data[r]; + data[pos++] = string_data[i]; } - // end with a null-terminator + + // End with a null-terminator. data[pos] = '\0'; - return ARTKey(data, UnsafeNumericCast(len)); + return ARTKey(data, len); } template <> -ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, const char *value) { - return ARTKey::CreateARTKey(allocator, type, string_t(value, UnsafeNumericCast(strlen(value)))); +ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const char *value) { + return ARTKey::CreateARTKey(allocator, string_t(value, UnsafeNumericCast(strlen(value)))); } template <> -void ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, string_t value) { - key = ARTKey::CreateARTKey(allocator, type, value); +void ARTKey::CreateARTKey(ArenaAllocator &allocator, ARTKey &key, string_t value) { + key = ARTKey::CreateARTKey(allocator, value); } template <> -void ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, const char *value) { - ARTKey::CreateARTKey(allocator, type, key, string_t(value, UnsafeNumericCast(strlen(value)))); +void ARTKey::CreateARTKey(ArenaAllocator &allocator, ARTKey &key, const char *value) { + ARTKey::CreateARTKey(allocator, key, string_t(value, UnsafeNumericCast(strlen(value)))); } -bool ARTKey::operator>(const ARTKey &k) const { - for (uint32_t i = 0; i < MinValue(len, k.len); i++) { - if (data[i] > k.data[i]) { +ARTKey ARTKey::CreateKey(ArenaAllocator &allocator, PhysicalType type, Value &value) { + D_ASSERT(type == value.type().InternalType()); + switch (type) { + case PhysicalType::BOOL: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::INT8: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::INT16: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::INT32: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::INT64: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::UINT8: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::UINT16: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::UINT32: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::UINT64: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::INT128: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::UINT128: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::FLOAT: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::DOUBLE: + return ARTKey::CreateARTKey(allocator, value); + case PhysicalType::VARCHAR: + return ARTKey::CreateARTKey(allocator, value); + default: + throw InternalException("Invalid type for the ART key."); + } +} + +bool ARTKey::operator>(const ARTKey &key) const { + for (idx_t i = 0; i < MinValue(len, key.len); i++) { + if (data[i] > key.data[i]) { return true; - } else if (data[i] < k.data[i]) { + } else if (data[i] < key.data[i]) { return false; } } - return len > k.len; + return len > key.len; } -bool ARTKey::operator>=(const ARTKey &k) const { - for (uint32_t i = 0; i < MinValue(len, k.len); i++) { - if (data[i] > k.data[i]) { +bool ARTKey::operator>=(const ARTKey &key) const { + for (idx_t i = 0; i < MinValue(len, key.len); i++) { + if (data[i] > key.data[i]) { return true; - } else if (data[i] < k.data[i]) { + } else if (data[i] < key.data[i]) { return false; } } - return len >= k.len; + return len >= key.len; } -bool ARTKey::operator==(const ARTKey &k) const { - if (len != k.len) { +bool ARTKey::operator==(const ARTKey &key) const { + if (len != key.len) { return false; } - for (uint32_t i = 0; i < len; i++) { - if (data[i] != k.data[i]) { + for (idx_t i = 0; i < len; i++) { + if (data[i] != key.data[i]) { return false; } } return true; } -void ARTKey::ConcatenateARTKey(ArenaAllocator &allocator, ARTKey &other_key) { - - auto compound_data = allocator.Allocate(len + other_key.len); +void ARTKey::Concat(ArenaAllocator &allocator, const ARTKey &other) { + auto compound_data = allocator.Allocate(len + other.len); memcpy(compound_data, data, len); - memcpy(compound_data + len, other_key.data, other_key.len); - len += other_key.len; + memcpy(compound_data + len, other.data, other.len); + len += other.len; data = compound_data; } + +row_t ARTKey::GetRowId() const { + D_ASSERT(len == sizeof(row_t)); + return Radix::DecodeData(data); +} + +idx_t ARTKey::GetMismatchPos(const ARTKey &other, const idx_t start) const { + D_ASSERT(len <= other.len); + D_ASSERT(start <= len); + for (idx_t i = start; i < other.len; i++) { + if (data[i] != other.data[i]) { + return i; + } + } + return DConstants::INVALID_INDEX; +} + +//===--------------------------------------------------------------------===// +// ARTKeySection +//===--------------------------------------------------------------------===// + +ARTKeySection::ARTKeySection(idx_t start, idx_t end, idx_t depth, data_t byte) + : start(start), end(end), depth(depth), key_byte(byte) { +} + +ARTKeySection::ARTKeySection(idx_t start, idx_t end, const unsafe_vector &keys, const ARTKeySection §ion) + : start(start), end(end), depth(section.depth + 1), key_byte(keys[end].data[section.depth]) { +} + +void ARTKeySection::GetChildSections(unsafe_vector §ions, const unsafe_vector &keys) { + auto child_idx = start; + for (idx_t i = start + 1; i <= end; i++) { + if (keys[i - 1].data[depth] != keys[i].data[depth]) { + sections.emplace_back(child_idx, i - 1, keys, *this); + child_idx = i; + } + } + sections.emplace_back(child_idx, end, keys, *this); +} + } // namespace duckdb diff --git a/src/duckdb/src/execution/index/art/base_leaf.cpp b/src/duckdb/src/execution/index/art/base_leaf.cpp new file mode 100644 index 00000000..59492bb4 --- /dev/null +++ b/src/duckdb/src/execution/index/art/base_leaf.cpp @@ -0,0 +1,168 @@ +#include "duckdb/execution/index/art/base_leaf.hpp" + +#include "duckdb/execution/index/art/art_key.hpp" +#include "duckdb/execution/index/art/base_node.hpp" +#include "duckdb/execution/index/art/leaf.hpp" +#include "duckdb/execution/index/art/prefix.hpp" +#include "duckdb/execution/index/art/node256_leaf.hpp" + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// BaseLeaf +//===--------------------------------------------------------------------===// + +template +void BaseLeaf::InsertByteInternal(BaseLeaf &n, const uint8_t byte) { + // Still space. Insert the child. + uint8_t child_pos = 0; + while (child_pos < n.count && n.key[child_pos] < byte) { + child_pos++; + } + + // Move children backwards to make space. + for (uint8_t i = n.count; i > child_pos; i--) { + n.key[i] = n.key[i - 1]; + } + + n.key[child_pos] = byte; + n.count++; +} + +template +BaseLeaf &BaseLeaf::DeleteByteInternal(ART &art, Node &node, const uint8_t byte) { + auto &n = Node::Ref(art, node, node.GetType()); + uint8_t child_pos = 0; + + for (; child_pos < n.count; child_pos++) { + if (n.key[child_pos] == byte) { + break; + } + } + n.count--; + + // Possibly move children backwards. + for (uint8_t i = child_pos; i < n.count; i++) { + n.key[i] = n.key[i + 1]; + } + return n; +} + +//===--------------------------------------------------------------------===// +// Node7Leaf +//===--------------------------------------------------------------------===// + +void Node7Leaf::InsertByte(ART &art, Node &node, const uint8_t byte) { + // The node is full. Grow to Node15. + auto &n7 = Node::Ref(art, node, NODE_7_LEAF); + if (n7.count == CAPACITY) { + auto node7 = node; + Node15Leaf::GrowNode7Leaf(art, node, node7); + Node15Leaf::InsertByte(art, node, byte); + return; + } + + // Still space. Insert the child. + uint8_t child_pos = 0; + while (child_pos < n7.count && n7.key[child_pos] < byte) { + child_pos++; + } + + InsertByteInternal(n7, byte); +} + +void Node7Leaf::DeleteByte(ART &art, Node &node, Node &prefix, const uint8_t byte, const ARTKey &row_id) { + auto &n7 = DeleteByteInternal(art, node, byte); + + // Compress one-way nodes. + if (n7.count == 1) { + D_ASSERT(node.GetGateStatus() == GateStatus::GATE_NOT_SET); + + // Get the remaining row ID. + auto remainder = UnsafeNumericCast(row_id.GetRowId()) & AND_LAST_BYTE; + remainder |= UnsafeNumericCast(n7.key[0]); + + n7.count--; + Node::Free(art, node); + + if (prefix.GetType() == NType::PREFIX) { + Node::Free(art, prefix); + Leaf::New(prefix, UnsafeNumericCast(remainder)); + } else { + Leaf::New(node, UnsafeNumericCast(remainder)); + } + } +} + +void Node7Leaf::ShrinkNode15Leaf(ART &art, Node &node7_leaf, Node &node15_leaf) { + auto &n7 = New(art, node7_leaf); + auto &n15 = Node::Ref(art, node15_leaf, NType::NODE_15_LEAF); + node7_leaf.SetGateStatus(node15_leaf.GetGateStatus()); + + n7.count = n15.count; + for (uint8_t i = 0; i < n15.count; i++) { + n7.key[i] = n15.key[i]; + } + + n15.count = 0; + Node::Free(art, node15_leaf); +} + +//===--------------------------------------------------------------------===// +// Node15Leaf +//===--------------------------------------------------------------------===// + +void Node15Leaf::InsertByte(ART &art, Node &node, const uint8_t byte) { + // The node is full. Grow to Node256Leaf. + auto &n15 = Node::Ref(art, node, NODE_15_LEAF); + if (n15.count == CAPACITY) { + auto node15 = node; + Node256Leaf::GrowNode15Leaf(art, node, node15); + Node256Leaf::InsertByte(art, node, byte); + return; + } + + InsertByteInternal(n15, byte); +} + +void Node15Leaf::DeleteByte(ART &art, Node &node, const uint8_t byte) { + auto &n15 = DeleteByteInternal(art, node, byte); + + // Shrink node to Node7. + if (n15.count < Node7Leaf::CAPACITY) { + auto node15 = node; + Node7Leaf::ShrinkNode15Leaf(art, node, node15); + } +} + +void Node15Leaf::GrowNode7Leaf(ART &art, Node &node15_leaf, Node &node7_leaf) { + auto &n7 = Node::Ref(art, node7_leaf, NType::NODE_7_LEAF); + auto &n15 = New(art, node15_leaf); + node15_leaf.SetGateStatus(node7_leaf.GetGateStatus()); + + n15.count = n7.count; + for (uint8_t i = 0; i < n7.count; i++) { + n15.key[i] = n7.key[i]; + } + + n7.count = 0; + Node::Free(art, node7_leaf); +} + +void Node15Leaf::ShrinkNode256Leaf(ART &art, Node &node15_leaf, Node &node256_leaf) { + auto &n15 = New(art, node15_leaf); + auto &n256 = Node::Ref(art, node256_leaf, NType::NODE_256_LEAF); + node15_leaf.SetGateStatus(node256_leaf.GetGateStatus()); + + ValidityMask mask(&n256.mask[0]); + for (uint16_t i = 0; i < Node256::CAPACITY; i++) { + if (mask.RowIsValid(i)) { + n15.key[n15.count] = UnsafeNumericCast(i); + n15.count++; + } + } + + Node::Free(art, node256_leaf); +} + +} // namespace duckdb diff --git a/src/duckdb/src/execution/index/art/base_node.cpp b/src/duckdb/src/execution/index/art/base_node.cpp new file mode 100644 index 00000000..228d12e8 --- /dev/null +++ b/src/duckdb/src/execution/index/art/base_node.cpp @@ -0,0 +1,163 @@ +#include "duckdb/execution/index/art/base_node.hpp" + +#include "duckdb/execution/index/art/leaf.hpp" +#include "duckdb/execution/index/art/node48.hpp" +#include "duckdb/execution/index/art/prefix.hpp" + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// BaseNode +//===--------------------------------------------------------------------===// + +template +void BaseNode::InsertChildInternal(BaseNode &n, const uint8_t byte, const Node child) { + // Still space. Insert the child. + uint8_t child_pos = 0; + while (child_pos < n.count && n.key[child_pos] < byte) { + child_pos++; + } + + // Move children backwards to make space. + for (uint8_t i = n.count; i > child_pos; i--) { + n.key[i] = n.key[i - 1]; + n.children[i] = n.children[i - 1]; + } + + n.key[child_pos] = byte; + n.children[child_pos] = child; + n.count++; +} + +template +BaseNode &BaseNode::DeleteChildInternal(ART &art, Node &node, const uint8_t byte) { + auto &n = Node::Ref(art, node, TYPE); + + uint8_t child_pos = 0; + for (; child_pos < n.count; child_pos++) { + if (n.key[child_pos] == byte) { + break; + } + } + + // Free the child and decrease the count. + Node::Free(art, n.children[child_pos]); + n.count--; + + // Possibly move children backwards. + for (uint8_t i = child_pos; i < n.count; i++) { + n.key[i] = n.key[i + 1]; + n.children[i] = n.children[i + 1]; + } + return n; +} + +//===--------------------------------------------------------------------===// +// Node4 +//===--------------------------------------------------------------------===// + +void Node4::InsertChild(ART &art, Node &node, const uint8_t byte, const Node child) { + // The node is full. Grow to Node16. + auto &n = Node::Ref(art, node, NODE_4); + if (n.count == CAPACITY) { + auto node4 = node; + Node16::GrowNode4(art, node, node4); + Node16::InsertChild(art, node, byte, child); + return; + } + + InsertChildInternal(n, byte, child); +} + +void Node4::DeleteChild(ART &art, Node &node, Node &prefix, const uint8_t byte, const GateStatus status) { + auto &n = DeleteChildInternal(art, node, byte); + + // Compress one-way nodes. + if (n.count == 1) { + n.count--; + + auto child = n.children[0]; + auto remainder = n.key[0]; + auto old_status = node.GetGateStatus(); + + Node::Free(art, node); + Prefix::Concat(art, prefix, remainder, old_status, child, status); + } +} + +void Node4::ShrinkNode16(ART &art, Node &node4, Node &node16) { + auto &n4 = New(art, node4); + auto &n16 = Node::Ref(art, node16, NType::NODE_16); + node4.SetGateStatus(node16.GetGateStatus()); + + n4.count = n16.count; + for (uint8_t i = 0; i < n16.count; i++) { + n4.key[i] = n16.key[i]; + n4.children[i] = n16.children[i]; + } + + n16.count = 0; + Node::Free(art, node16); +} + +//===--------------------------------------------------------------------===// +// Node16 +//===--------------------------------------------------------------------===// + +void Node16::DeleteChild(ART &art, Node &node, const uint8_t byte) { + auto &n = DeleteChildInternal(art, node, byte); + + // Shrink node to Node4. + if (n.count < Node4::CAPACITY) { + auto node16 = node; + Node4::ShrinkNode16(art, node, node16); + } +} + +void Node16::InsertChild(ART &art, Node &node, const uint8_t byte, const Node child) { + // The node is full. Grow to Node48. + auto &n16 = Node::Ref(art, node, NODE_16); + if (n16.count == CAPACITY) { + auto node16 = node; + Node48::GrowNode16(art, node, node16); + Node48::InsertChild(art, node, byte, child); + return; + } + + InsertChildInternal(n16, byte, child); +} + +void Node16::GrowNode4(ART &art, Node &node16, Node &node4) { + auto &n4 = Node::Ref(art, node4, NType::NODE_4); + auto &n16 = New(art, node16); + node16.SetGateStatus(node4.GetGateStatus()); + + n16.count = n4.count; + for (uint8_t i = 0; i < n4.count; i++) { + n16.key[i] = n4.key[i]; + n16.children[i] = n4.children[i]; + } + + n4.count = 0; + Node::Free(art, node4); +} + +void Node16::ShrinkNode48(ART &art, Node &node16, Node &node48) { + auto &n16 = New(art, node16); + auto &n48 = Node::Ref(art, node48, NType::NODE_48); + node16.SetGateStatus(node48.GetGateStatus()); + + n16.count = 0; + for (uint16_t i = 0; i < Node256::CAPACITY; i++) { + if (n48.child_index[i] != Node48::EMPTY_MARKER) { + n16.key[n16.count] = UnsafeNumericCast(i); + n16.children[n16.count] = n48.children[n48.child_index[i]]; + n16.count++; + } + } + + n48.count = 0; + Node::Free(art, node48); +} + +} // namespace duckdb diff --git a/src/duckdb/src/execution/index/art/iterator.cpp b/src/duckdb/src/execution/index/art/iterator.cpp index 0d0290eb..ca6f5c71 100644 --- a/src/duckdb/src/execution/index/art/iterator.cpp +++ b/src/duckdb/src/execution/index/art/iterator.cpp @@ -7,168 +7,216 @@ namespace duckdb { -bool IteratorKey::operator>(const ARTKey &key) const { - for (idx_t i = 0; i < MinValue(key_bytes.size(), key.len); i++) { - if (key_bytes[i] > key.data[i]) { - return true; - } else if (key_bytes[i] < key.data[i]) { +//===--------------------------------------------------------------------===// +// IteratorKey +//===--------------------------------------------------------------------===// + +bool IteratorKey::Contains(const ARTKey &key) const { + if (Size() < key.len) { + return false; + } + for (idx_t i = 0; i < key.len; i++) { + if (key_bytes[i] != key.data[i]) { return false; } } - return key_bytes.size() > key.len; + return true; } -bool IteratorKey::operator>=(const ARTKey &key) const { - for (idx_t i = 0; i < MinValue(key_bytes.size(), key.len); i++) { +bool IteratorKey::GreaterThan(const ARTKey &key, const bool equal) const { + for (idx_t i = 0; i < MinValue(Size(), key.len); i++) { if (key_bytes[i] > key.data[i]) { return true; } else if (key_bytes[i] < key.data[i]) { return false; } } - return key_bytes.size() >= key.len; -} - -bool IteratorKey::operator==(const ARTKey &key) const { - // NOTE: we only use this for finding the LowerBound, in which case the length - // has to be equal - D_ASSERT(key_bytes.size() == key.len); - for (idx_t i = 0; i < key_bytes.size(); i++) { - if (key_bytes[i] != key.data[i]) { - return false; - } + if (equal) { + // Returns true, if current_key is greater than key. + return Size() > key.len; } - return true; + // Returns true, if current_key and key match or current_key is greater than key. + return Size() >= key.len; } -bool Iterator::Scan(const ARTKey &upper_bound, const idx_t max_count, vector &result_ids, const bool equal) { +//===--------------------------------------------------------------------===// +// Iterator +//===--------------------------------------------------------------------===// +bool Iterator::Scan(const ARTKey &upper_bound, const idx_t max_count, unsafe_vector &row_ids, const bool equal) { bool has_next; do { - if (!upper_bound.Empty()) { - // no more row IDs within the key bounds - if (equal) { - if (current_key > upper_bound) { - return true; + // An empty upper bound indicates that no upper bound exists. + if (!upper_bound.Empty() && status == GateStatus::GATE_NOT_SET) { + if (current_key.GreaterThan(upper_bound, equal)) { + return true; + } + } + + switch (last_leaf.GetType()) { + case NType::LEAF_INLINED: + if (row_ids.size() + 1 > max_count) { + return false; + } + row_ids.push_back(last_leaf.GetRowId()); + break; + case NType::LEAF: + if (!Leaf::DeprecatedGetRowIds(art, last_leaf, row_ids, max_count)) { + return false; + } + break; + case NType::NODE_7_LEAF: + case NType::NODE_15_LEAF: + case NType::NODE_256_LEAF: { + uint8_t byte = 0; + while (last_leaf.GetNextByte(art, byte)) { + if (row_ids.size() + 1 > max_count) { + return false; } - } else { - if (current_key >= upper_bound) { - return true; + row_id[ROW_ID_SIZE - 1] = byte; + ARTKey key(&row_id[0], ROW_ID_SIZE); + row_ids.push_back(key.GetRowId()); + if (byte == NumericLimits::Maximum()) { + break; } + byte++; } + break; } - - // copy all row IDs of this leaf into the result IDs (if they don't exceed max_count) - if (!Leaf::GetRowIds(*art, last_leaf, result_ids, max_count)) { - return false; + default: + throw InternalException("Invalid leaf type for index scan."); } - // get the next leaf has_next = Next(); - } while (has_next); - return true; } void Iterator::FindMinimum(const Node &node) { - D_ASSERT(node.HasMetadata()); - // found the minimum - if (node.GetType() == NType::LEAF || node.GetType() == NType::LEAF_INLINED) { + // Found the minimum. + if (node.IsAnyLeaf()) { last_leaf = node; return; } - // traverse the prefix + // We are passing a gate node. + if (node.GetGateStatus() == GateStatus::GATE_SET) { + D_ASSERT(status == GateStatus::GATE_NOT_SET); + status = GateStatus::GATE_SET; + nested_depth = 0; + } + + // Traverse the prefix. if (node.GetType() == NType::PREFIX) { - auto &prefix = Node::Ref(*art, node, NType::PREFIX); - for (idx_t i = 0; i < prefix.data[Node::PREFIX_SIZE]; i++) { + Prefix prefix(art, node); + for (idx_t i = 0; i < prefix.data[Prefix::Count(art)]; i++) { current_key.Push(prefix.data[i]); + if (status == GateStatus::GATE_SET) { + row_id[nested_depth] = prefix.data[i]; + nested_depth++; + D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE); + } } nodes.emplace(node, 0); - return FindMinimum(prefix.ptr); + return FindMinimum(*prefix.ptr); } - // go to the leftmost entry in the current node and recurse + // Go to the leftmost entry in the current node. uint8_t byte = 0; - auto next = node.GetNextChild(*art, byte); + auto next = node.GetNextChild(art, byte); D_ASSERT(next); + + // Recurse on the leftmost node. current_key.Push(byte); + if (status == GateStatus::GATE_SET) { + row_id[nested_depth] = byte; + nested_depth++; + D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE); + } nodes.emplace(node, byte); FindMinimum(*next); } bool Iterator::LowerBound(const Node &node, const ARTKey &key, const bool equal, idx_t depth) { - if (!node.HasMetadata()) { return false; } - // we found the lower bound - if (node.GetType() == NType::LEAF || node.GetType() == NType::LEAF_INLINED) { - if (!equal && current_key == key) { + // We found any leaf node, or a gate. + if (node.IsAnyLeaf() || node.GetGateStatus() == GateStatus::GATE_SET) { + D_ASSERT(status == GateStatus::GATE_NOT_SET); + D_ASSERT(current_key.Size() == key.len); + if (!equal && current_key.Contains(key)) { return Next(); } - last_leaf = node; + + if (node.GetGateStatus() == GateStatus::GATE_SET) { + FindMinimum(node); + } else { + last_leaf = node; + } return true; } + D_ASSERT(node.GetGateStatus() == GateStatus::GATE_NOT_SET); if (node.GetType() != NType::PREFIX) { auto next_byte = key[depth]; - auto child = node.GetNextChild(*art, next_byte); + auto child = node.GetNextChild(art, next_byte); + + // The key is greater than any key in this subtree. if (!child) { - // the key is greater than any key in this subtree return Next(); } current_key.Push(next_byte); nodes.emplace(node, next_byte); + // We return the minimum because all keys are greater than the lower bound. if (next_byte > key[depth]) { - // we only need to find the minimum from here - // because all keys will be greater than the lower bound FindMinimum(*child); return true; } - // recurse into the child + // We recurse into the child. return LowerBound(*child, key, equal, depth + 1); } - // resolve the prefix - auto &prefix = Node::Ref(*art, node, NType::PREFIX); - for (idx_t i = 0; i < prefix.data[Node::PREFIX_SIZE]; i++) { + // Push back all prefix bytes. + Prefix prefix(art, node); + for (idx_t i = 0; i < prefix.data[Prefix::Count(art)]; i++) { current_key.Push(prefix.data[i]); } nodes.emplace(node, 0); - for (idx_t i = 0; i < prefix.data[Node::PREFIX_SIZE]; i++) { - // the key down to this node is less than the lower bound, the next key will be - // greater than the lower bound + // We compare the prefix bytes with the key bytes. + for (idx_t i = 0; i < prefix.data[Prefix::Count(art)]; i++) { + // We found a prefix byte that is less than its corresponding key byte. + // I.e., the subsequent node is lesser than the key. Thus, the next node + // is the lower bound. if (prefix.data[i] < key[depth + i]) { return Next(); } - // we only need to find the minimum from here - // because all keys will be greater than the lower bound + + // We found a prefix byte that is greater than its corresponding key byte. + // I.e., the subsequent node is greater than the key. Thus, the minimum is + // the lower bound. if (prefix.data[i] > key[depth + i]) { - FindMinimum(prefix.ptr); + FindMinimum(*prefix.ptr); return true; } } - // recurse into the child - depth += prefix.data[Node::PREFIX_SIZE]; - return LowerBound(prefix.ptr, key, equal, depth); + // The prefix matches the key. We recurse into the child. + depth += prefix.data[Prefix::Count(art)]; + return LowerBound(*prefix.ptr, key, equal, depth); } bool Iterator::Next() { - while (!nodes.empty()) { - auto &top = nodes.top(); - D_ASSERT(top.node.GetType() != NType::LEAF && top.node.GetType() != NType::LEAF_INLINED); + D_ASSERT(!top.node.IsAnyLeaf()); if (top.node.GetType() == NType::PREFIX) { PopNode(); @@ -176,20 +224,26 @@ bool Iterator::Next() { } if (top.byte == NumericLimits::Maximum()) { - // no node found: move up the tree, pop key byte of current node + // No more children of this node. + // Move up the tree by popping the key byte of the current node. PopNode(); continue; } top.byte++; - auto next_node = top.node.GetNextChild(*art, top.byte); + auto next_node = top.node.GetNextChild(art, top.byte); if (!next_node) { + // No more children of this node. + // Move up the tree by popping the key byte of the current node. PopNode(); continue; } current_key.Pop(1); current_key.Push(top.byte); + if (status == GateStatus::GATE_SET) { + row_id[nested_depth - 1] = top.byte; + } FindMinimum(*next_node); return true; @@ -198,12 +252,30 @@ bool Iterator::Next() { } void Iterator::PopNode() { - if (nodes.top().node.GetType() == NType::PREFIX) { - auto &prefix = Node::Ref(*art, nodes.top().node, NType::PREFIX); - auto prefix_byte_count = prefix.data[Node::PREFIX_SIZE]; - current_key.Pop(prefix_byte_count); - } else { + // We are popping a gate node. + if (nodes.top().node.GetGateStatus() == GateStatus::GATE_SET) { + D_ASSERT(status == GateStatus::GATE_SET); + status = GateStatus::GATE_NOT_SET; + } + + // Pop the byte and the node. + if (nodes.top().node.GetType() != NType::PREFIX) { current_key.Pop(1); + if (status == GateStatus::GATE_SET) { + nested_depth--; + D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE); + } + nodes.pop(); + return; + } + + // Pop all prefix bytes and the node. + Prefix prefix(art, nodes.top().node); + auto prefix_byte_count = prefix.data[Prefix::Count(art)]; + current_key.Pop(prefix_byte_count); + if (status == GateStatus::GATE_SET) { + nested_depth -= prefix_byte_count; + D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE); } nodes.pop(); } diff --git a/src/duckdb/src/execution/index/art/leaf.cpp b/src/duckdb/src/execution/index/art/leaf.cpp index 0fb8804e..4a5b346c 100644 --- a/src/duckdb/src/execution/index/art/leaf.cpp +++ b/src/duckdb/src/execution/index/art/leaf.cpp @@ -1,347 +1,243 @@ #include "duckdb/execution/index/art/leaf.hpp" + +#include "duckdb/common/types.hpp" #include "duckdb/execution/index/art/art.hpp" +#include "duckdb/execution/index/art/art_key.hpp" +#include "duckdb/execution/index/art/base_leaf.hpp" +#include "duckdb/execution/index/art/base_node.hpp" +#include "duckdb/execution/index/art/iterator.hpp" #include "duckdb/execution/index/art/node.hpp" -#include "duckdb/common/numeric_utils.hpp" +#include "duckdb/execution/index/art/prefix.hpp" namespace duckdb { void Leaf::New(Node &node, const row_t row_id) { - - // we directly inline this row ID into the node pointer D_ASSERT(row_id < MAX_ROW_ID_LOCAL); + + auto status = node.GetGateStatus(); node.Clear(); - node.SetMetadata(static_cast(NType::LEAF_INLINED)); + + node.SetMetadata(static_cast(INLINED)); node.SetRowId(row_id); + node.SetGateStatus(status); } -void Leaf::New(ART &art, reference &node, const row_t *row_ids, idx_t count) { - +void Leaf::New(ART &art, reference &node, const unsafe_vector &row_ids, const idx_t start, + const idx_t count) { D_ASSERT(count > 1); + D_ASSERT(!node.get().HasMetadata()); - idx_t copy_count = 0; - while (count) { - node.get() = Node::GetAllocator(art, NType::LEAF).New(); - node.get().SetMetadata(static_cast(NType::LEAF)); - - auto &leaf = Node::RefMutable(art, node, NType::LEAF); - - leaf.count = UnsafeNumericCast(MinValue((idx_t)Node::LEAF_SIZE, count)); - - for (idx_t i = 0; i < leaf.count; i++) { - leaf.row_ids[i] = row_ids[copy_count + i]; - } - - copy_count += leaf.count; - count -= leaf.count; - - node = leaf.ptr; - leaf.ptr.Clear(); + // We cannot recurse into the leaf during Construct(...) because row IDs are not sorted. + for (idx_t i = 0; i < count; i++) { + idx_t offset = start + i; + art.Insert(node, row_ids[offset], 0, row_ids[offset], GateStatus::GATE_SET); } + node.get().SetGateStatus(GateStatus::GATE_SET); } -Leaf &Leaf::New(ART &art, Node &node) { - node = Node::GetAllocator(art, NType::LEAF).New(); - node.SetMetadata(static_cast(NType::LEAF)); - auto &leaf = Node::RefMutable(art, node, NType::LEAF); - - leaf.count = 0; - leaf.ptr.Clear(); - return leaf; -} - -void Leaf::Free(ART &art, Node &node) { +void Leaf::MergeInlined(ART &art, Node &l_node, Node &r_node) { + D_ASSERT(r_node.GetType() == INLINED); - Node current_node = node; - Node next_node; - while (current_node.HasMetadata()) { - next_node = Node::RefMutable(art, current_node, NType::LEAF).ptr; - Node::GetAllocator(art, NType::LEAF).Free(current_node); - current_node = next_node; - } - - node.Clear(); + ArenaAllocator arena_allocator(Allocator::Get(art.db)); + auto key = ARTKey::CreateARTKey(arena_allocator, r_node.GetRowId()); + art.Insert(l_node, key, 0, key, l_node.GetGateStatus()); + r_node.Clear(); } -void Leaf::InitializeMerge(ART &art, Node &node, const ARTFlags &flags) { - - auto merge_buffer_count = flags.merge_buffer_counts[static_cast(NType::LEAF) - 1]; +void Leaf::InsertIntoInlined(ART &art, Node &node, const ARTKey &row_id, idx_t depth, const GateStatus status) { + D_ASSERT(node.GetType() == INLINED); - Node next_node = node; - node.IncreaseBufferId(merge_buffer_count); + ArenaAllocator allocator(Allocator::Get(art.db)); + auto key = ARTKey::CreateARTKey(allocator, node.GetRowId()); - while (next_node.HasMetadata()) { - auto &leaf = Node::RefMutable(art, next_node, NType::LEAF); - next_node = leaf.ptr; - if (leaf.ptr.HasMetadata()) { - leaf.ptr.IncreaseBufferId(merge_buffer_count); - } + GateStatus new_status; + if (status == GateStatus::GATE_NOT_SET || node.GetGateStatus() == GateStatus::GATE_SET) { + new_status = GateStatus::GATE_SET; + } else { + new_status = GateStatus::GATE_NOT_SET; } -} -void Leaf::Merge(ART &art, Node &l_node, Node &r_node) { - - D_ASSERT(l_node.HasMetadata() && r_node.HasMetadata()); + if (new_status == GateStatus::GATE_SET) { + depth = 0; + } + node.Clear(); - // copy inlined row ID of r_node - if (r_node.GetType() == NType::LEAF_INLINED) { - Insert(art, l_node, r_node.GetRowId()); - r_node.Clear(); - return; + // Get the mismatching position. + D_ASSERT(row_id.len == key.len); + auto pos = row_id.GetMismatchPos(key, depth); + D_ASSERT(pos != DConstants::INVALID_INDEX); + D_ASSERT(pos >= depth); + auto byte = row_id.data[pos]; + + // Create the (optional) prefix and the node. + reference next(node); + auto count = pos - depth; + if (count != 0) { + Prefix::New(art, next, row_id, depth, count); + } + if (pos == Prefix::ROW_ID_COUNT) { + Node7Leaf::New(art, next); + } else { + Node4::New(art, next); } - // l_node has an inlined row ID, swap and insert - if (l_node.GetType() == NType::LEAF_INLINED) { - auto row_id = l_node.GetRowId(); - l_node = r_node; - Insert(art, l_node, row_id); - r_node.Clear(); - return; + // Create the children. + Node row_id_node; + Leaf::New(row_id_node, row_id.GetRowId()); + Node remainder; + if (pos != Prefix::ROW_ID_COUNT) { + Leaf::New(remainder, key.GetRowId()); } - D_ASSERT(l_node.GetType() != NType::LEAF_INLINED); - D_ASSERT(r_node.GetType() != NType::LEAF_INLINED); + Node::InsertChild(art, next, key[pos], remainder); + Node::InsertChild(art, next, byte, row_id_node); + node.SetGateStatus(new_status); +} - reference l_node_ref(l_node); - reference l_leaf = Node::RefMutable(art, l_node_ref, NType::LEAF); +void Leaf::TransformToNested(ART &art, Node &node) { + D_ASSERT(node.GetType() == LEAF); - // find a non-full node - while (l_leaf.get().count == Node::LEAF_SIZE) { - l_node_ref = l_leaf.get().ptr; + ArenaAllocator allocator(Allocator::Get(art.db)); + Node root = Node(); - // the last leaf is full - if (!l_leaf.get().ptr.HasMetadata()) { - break; + // Move all row IDs into the nested leaf. + reference leaf_ref(node); + while (leaf_ref.get().HasMetadata()) { + auto &leaf = Node::Ref(art, leaf_ref, LEAF); + for (uint8_t i = 0; i < leaf.count; i++) { + auto row_id = ARTKey::CreateARTKey(allocator, leaf.row_ids[i]); + art.Insert(root, row_id, 0, row_id, GateStatus::GATE_SET); } - l_leaf = Node::RefMutable(art, l_node_ref, NType::LEAF); + leaf_ref = leaf.ptr; } - // store the last leaf and then append r_node - auto last_leaf_node = l_node_ref.get(); - l_node_ref.get() = r_node; - r_node.Clear(); - - // append the remaining row IDs of the last leaf node - if (last_leaf_node.HasMetadata()) { - // find the tail - l_leaf = Node::RefMutable(art, l_node_ref, NType::LEAF); - while (l_leaf.get().ptr.HasMetadata()) { - l_leaf = Node::RefMutable(art, l_leaf.get().ptr, NType::LEAF); - } - // append the row IDs - auto &last_leaf = Node::RefMutable(art, last_leaf_node, NType::LEAF); - for (idx_t i = 0; i < last_leaf.count; i++) { - l_leaf = l_leaf.get().Append(art, last_leaf.row_ids[i]); - } - Node::GetAllocator(art, NType::LEAF).Free(last_leaf_node); - } + root.SetGateStatus(GateStatus::GATE_SET); + Node::Free(art, node); + node = root; } -void Leaf::Insert(ART &art, Node &node, const row_t row_id) { +void Leaf::TransformToDeprecated(ART &art, Node &node) { + D_ASSERT(node.GetGateStatus() == GateStatus::GATE_SET || node.GetType() == LEAF); - D_ASSERT(node.HasMetadata()); - - if (node.GetType() == NType::LEAF_INLINED) { - MoveInlinedToLeaf(art, node); - Insert(art, node, row_id); + // Early-out, if we never transformed this leaf. + if (node.GetGateStatus() == GateStatus::GATE_NOT_SET) { return; } - // append to the tail - reference leaf = Node::RefMutable(art, node, NType::LEAF); - while (leaf.get().ptr.HasMetadata()) { - leaf = Node::RefMutable(art, leaf.get().ptr, NType::LEAF); - } - leaf.get().Append(art, row_id); -} - -bool Leaf::Remove(ART &art, reference &node, const row_t row_id) { - - D_ASSERT(node.get().HasMetadata()); + // Collect all row IDs and free the nested leaf. + unsafe_vector row_ids; + Iterator it(art); + it.FindMinimum(node); + ARTKey empty_key = ARTKey(); + it.Scan(empty_key, NumericLimits().Maximum(), row_ids, false); + Node::Free(art, node); + D_ASSERT(row_ids.size() > 1); - if (node.get().GetType() == NType::LEAF_INLINED) { - if (node.get().GetRowId() == row_id) { - return true; - } - return false; - } + // Create the deprecated leaves. + idx_t remaining = row_ids.size(); + idx_t copy_count = 0; + reference ref(node); + while (remaining) { + ref.get() = Node::GetAllocator(art, LEAF).New(); + ref.get().SetMetadata(static_cast(LEAF)); - reference leaf = Node::RefMutable(art, node, NType::LEAF); + auto &leaf = Node::Ref(art, ref, LEAF); + auto min = MinValue(UnsafeNumericCast(LEAF_SIZE), remaining); + leaf.count = UnsafeNumericCast(min); - // inline the remaining row ID - if (leaf.get().count == 2) { - if (leaf.get().row_ids[0] == row_id || leaf.get().row_ids[1] == row_id) { - auto remaining_row_id = leaf.get().row_ids[0] == row_id ? leaf.get().row_ids[1] : leaf.get().row_ids[0]; - Node::Free(art, node); - New(node, remaining_row_id); + for (uint8_t i = 0; i < leaf.count; i++) { + leaf.row_ids[i] = row_ids[copy_count + i]; } - return false; - } - - // get the last row ID (the order within a leaf does not matter) - // because we want to overwrite the row ID to remove with that one - - // go to the tail and keep track of the previous leaf node - reference prev_leaf(leaf); - while (leaf.get().ptr.HasMetadata()) { - prev_leaf = leaf; - leaf = Node::RefMutable(art, leaf.get().ptr, NType::LEAF); - } - auto last_idx = leaf.get().count; - auto last_row_id = leaf.get().row_ids[last_idx - 1]; - - // only one row ID in this leaf segment, free it - if (leaf.get().count == 1) { - Node::Free(art, prev_leaf.get().ptr); - if (last_row_id == row_id) { - return false; - } - } else { - leaf.get().count--; - } + copy_count += leaf.count; + remaining -= leaf.count; - // find the row ID and copy the last row ID to that position - while (node.get().HasMetadata()) { - leaf = Node::RefMutable(art, node, NType::LEAF); - for (idx_t i = 0; i < leaf.get().count; i++) { - if (leaf.get().row_ids[i] == row_id) { - leaf.get().row_ids[i] = last_row_id; - return false; - } - } - node = leaf.get().ptr; + ref = leaf.ptr; + leaf.ptr.Clear(); } - return false; } -idx_t Leaf::TotalCount(ART &art, const Node &node) { +//===--------------------------------------------------------------------===// +// Deprecated code paths. +//===--------------------------------------------------------------------===// - D_ASSERT(node.HasMetadata()); - if (node.GetType() == NType::LEAF_INLINED) { - return 1; - } +void Leaf::DeprecatedFree(ART &art, Node &node) { + D_ASSERT(node.GetType() == LEAF); - idx_t count = 0; - reference node_ref(node); - while (node_ref.get().HasMetadata()) { - auto &leaf = Node::Ref(art, node_ref, NType::LEAF); - count += leaf.count; - node_ref = leaf.ptr; + Node next; + while (node.HasMetadata()) { + next = Node::Ref(art, node, LEAF).ptr; + Node::GetAllocator(art, LEAF).Free(node); + node = next; } - return count; + node.Clear(); } -bool Leaf::GetRowIds(ART &art, const Node &node, vector &result_ids, idx_t max_count) { +bool Leaf::DeprecatedGetRowIds(ART &art, const Node &node, unsafe_vector &row_ids, const idx_t max_count) { + D_ASSERT(node.GetType() == LEAF); - // adding more elements would exceed the maximum count - D_ASSERT(node.HasMetadata()); - if (result_ids.size() + TotalCount(art, node) > max_count) { - return false; - } - - if (node.GetType() == NType::LEAF_INLINED) { - // push back the inlined row ID of this leaf - result_ids.push_back(node.GetRowId()); + reference ref(node); + while (ref.get().HasMetadata()) { - } else { - // push back all the row IDs of this leaf - reference last_leaf_ref(node); - while (last_leaf_ref.get().HasMetadata()) { - auto &leaf = Node::Ref(art, last_leaf_ref, NType::LEAF); - for (idx_t i = 0; i < leaf.count; i++) { - result_ids.push_back(leaf.row_ids[i]); - } - last_leaf_ref = leaf.ptr; + auto &leaf = Node::Ref(art, ref, LEAF); + if (row_ids.size() + leaf.count > max_count) { + return false; + } + for (uint8_t i = 0; i < leaf.count; i++) { + row_ids.push_back(leaf.row_ids[i]); } + ref = leaf.ptr; } - return true; } -bool Leaf::ContainsRowId(ART &art, const Node &node, const row_t row_id) { - +void Leaf::DeprecatedVacuum(ART &art, Node &node) { D_ASSERT(node.HasMetadata()); - - if (node.GetType() == NType::LEAF_INLINED) { - return node.GetRowId() == row_id; - } - - reference ref_node(node); - while (ref_node.get().HasMetadata()) { - auto &leaf = Node::Ref(art, ref_node, NType::LEAF); - for (idx_t i = 0; i < leaf.count; i++) { - if (leaf.row_ids[i] == row_id) { - return true; - } + D_ASSERT(node.GetType() == LEAF); + + auto &allocator = Node::GetAllocator(art, LEAF); + reference ref(node); + while (ref.get().HasMetadata()) { + if (allocator.NeedsVacuum(ref)) { + ref.get() = allocator.VacuumPointer(ref); + ref.get().SetMetadata(static_cast(LEAF)); } - ref_node = leaf.ptr; + auto &leaf = Node::Ref(art, ref, LEAF); + ref = leaf.ptr; } - - return false; } -string Leaf::VerifyAndToString(ART &art, const Node &node, const bool only_verify) { - - if (node.GetType() == NType::LEAF_INLINED) { - return only_verify ? "" : "Leaf [count: 1, row ID: " + to_string(node.GetRowId()) + "]"; - } +string Leaf::DeprecatedVerifyAndToString(ART &art, const Node &node, const bool only_verify) { + D_ASSERT(node.GetType() == LEAF); string str = ""; + reference ref(node); - reference node_ref(node); - while (node_ref.get().HasMetadata()) { - - auto &leaf = Node::Ref(art, node_ref, NType::LEAF); - D_ASSERT(leaf.count <= Node::LEAF_SIZE); + while (ref.get().HasMetadata()) { + auto &leaf = Node::Ref(art, ref, LEAF); + D_ASSERT(leaf.count <= LEAF_SIZE); str += "Leaf [count: " + to_string(leaf.count) + ", row IDs: "; - for (idx_t i = 0; i < leaf.count; i++) { + for (uint8_t i = 0; i < leaf.count; i++) { str += to_string(leaf.row_ids[i]) + "-"; } str += "] "; - - node_ref = leaf.ptr; + ref = leaf.ptr; } - return only_verify ? "" : str; -} -void Leaf::Vacuum(ART &art, Node &node) { - - auto &allocator = Node::GetAllocator(art, NType::LEAF); - - reference node_ref(node); - while (node_ref.get().HasMetadata()) { - if (allocator.NeedsVacuum(node_ref)) { - node_ref.get() = allocator.VacuumPointer(node_ref); - node_ref.get().SetMetadata(static_cast(NType::LEAF)); - } - auto &leaf = Node::RefMutable(art, node_ref, NType::LEAF); - node_ref = leaf.ptr; - } -} - -void Leaf::MoveInlinedToLeaf(ART &art, Node &node) { - - D_ASSERT(node.GetType() == NType::LEAF_INLINED); - auto row_id = node.GetRowId(); - auto &leaf = New(art, node); - - leaf.count = 1; - leaf.row_ids[0] = row_id; + return only_verify ? "" : str; } -Leaf &Leaf::Append(ART &art, const row_t row_id) { +void Leaf::DeprecatedVerifyAllocations(ART &art, unordered_map &node_counts) const { + auto idx = Node::GetAllocatorIdx(LEAF); + node_counts[idx]++; - reference leaf(*this); - - // we need a new leaf node - if (leaf.get().count == Node::LEAF_SIZE) { - leaf = New(art, leaf.get().ptr); + reference ref(ptr); + while (ref.get().HasMetadata()) { + auto &leaf = Node::Ref(art, ref, LEAF); + node_counts[idx]++; + ref = leaf.ptr; } - - leaf.get().row_ids[leaf.get().count] = row_id; - leaf.get().count++; - return leaf.get(); } } // namespace duckdb diff --git a/src/duckdb/src/execution/index/art/node.cpp b/src/duckdb/src/execution/index/art/node.cpp index 5c82b748..8a39d832 100644 --- a/src/duckdb/src/execution/index/art/node.cpp +++ b/src/duckdb/src/execution/index/art/node.cpp @@ -3,25 +3,34 @@ #include "duckdb/common/limits.hpp" #include "duckdb/common/swap.hpp" #include "duckdb/execution/index/art/art.hpp" +#include "duckdb/execution/index/art/art_key.hpp" +#include "duckdb/execution/index/art/base_leaf.hpp" +#include "duckdb/execution/index/art/base_node.hpp" +#include "duckdb/execution/index/art/iterator.hpp" +#include "duckdb/execution/index/art/leaf.hpp" #include "duckdb/execution/index/art/node256.hpp" +#include "duckdb/execution/index/art/node256_leaf.hpp" #include "duckdb/execution/index/art/node48.hpp" -#include "duckdb/execution/index/art/node16.hpp" -#include "duckdb/execution/index/art/node4.hpp" -#include "duckdb/execution/index/art/leaf.hpp" #include "duckdb/execution/index/art/prefix.hpp" #include "duckdb/storage/table_io_manager.hpp" namespace duckdb { //===--------------------------------------------------------------------===// -// New / Free +// New and free //===--------------------------------------------------------------------===// -void Node::New(ART &art, Node &node, const NType type) { - - // NOTE: leaves and prefixes should not pass through this function - +void Node::New(ART &art, Node &node, NType type) { switch (type) { + case NType::NODE_7_LEAF: + Node7Leaf::New(art, node); + break; + case NType::NODE_15_LEAF: + Node15Leaf::New(art, node); + break; + case NType::NODE_256_LEAF: + Node256Leaf::New(art, node); + break; case NType::NODE_4: Node4::New(art, node); break; @@ -35,25 +44,22 @@ void Node::New(ART &art, Node &node, const NType type) { Node256::New(art, node); break; default: - throw InternalException("Invalid node type for New."); + throw InternalException("Invalid node type for New: %d.", static_cast(type)); } } void Node::Free(ART &art, Node &node) { - if (!node.HasMetadata()) { return node.Clear(); } - // free the children of the nodes + // Free the children. auto type = node.GetType(); switch (type) { case NType::PREFIX: - // iterative return Prefix::Free(art, node); case NType::LEAF: - // iterative - return Leaf::Free(art, node); + return Leaf::DeprecatedFree(art, node); case NType::NODE_4: Node4::Free(art, node); break; @@ -68,6 +74,10 @@ void Node::Free(ART &art, Node &node) { break; case NType::LEAF_INLINED: return node.Clear(); + case NType::NODE_7_LEAF: + case NType::NODE_15_LEAF: + case NType::NODE_256_LEAF: + break; } GetAllocator(art, type).Free(node); @@ -75,11 +85,36 @@ void Node::Free(ART &art, Node &node) { } //===--------------------------------------------------------------------===// -// Get Allocators +// Allocators //===--------------------------------------------------------------------===// FixedSizeAllocator &Node::GetAllocator(const ART &art, const NType type) { - return *(*art.allocators)[static_cast(type) - 1]; + return *(*art.allocators)[GetAllocatorIdx(type)]; +} + +uint8_t Node::GetAllocatorIdx(const NType type) { + switch (type) { + case NType::PREFIX: + return 0; + case NType::LEAF: + return 1; + case NType::NODE_4: + return 2; + case NType::NODE_16: + return 3; + case NType::NODE_48: + return 4; + case NType::NODE_256: + return 5; + case NType::NODE_7_LEAF: + return 6; + case NType::NODE_15_LEAF: + return 7; + case NType::NODE_256_LEAF: + return 8; + default: + throw InternalException("Invalid node type for GetAllocatorIdx: %d.", static_cast(type)); + } } //===--------------------------------------------------------------------===// @@ -87,24 +122,28 @@ FixedSizeAllocator &Node::GetAllocator(const ART &art, const NType type) { //===--------------------------------------------------------------------===// void Node::ReplaceChild(const ART &art, const uint8_t byte, const Node child) const { + D_ASSERT(HasMetadata()); - switch (GetType()) { + auto type = GetType(); + switch (type) { case NType::NODE_4: - return RefMutable(art, *this, NType::NODE_4).ReplaceChild(byte, child); + return Node4::ReplaceChild(Ref(art, *this, type), byte, child); case NType::NODE_16: - return RefMutable(art, *this, NType::NODE_16).ReplaceChild(byte, child); + return Node16::ReplaceChild(Ref(art, *this, type), byte, child); case NType::NODE_48: - return RefMutable(art, *this, NType::NODE_48).ReplaceChild(byte, child); + return Ref(art, *this, type).ReplaceChild(byte, child); case NType::NODE_256: - return RefMutable(art, *this, NType::NODE_256).ReplaceChild(byte, child); + return Ref(art, *this, type).ReplaceChild(byte, child); default: - throw InternalException("Invalid node type for ReplaceChild."); + throw InternalException("Invalid node type for ReplaceChild: %d.", static_cast(type)); } } void Node::InsertChild(ART &art, Node &node, const uint8_t byte, const Node child) { + D_ASSERT(node.HasMetadata()); - switch (node.GetType()) { + auto type = node.GetType(); + switch (type) { case NType::NODE_4: return Node4::InsertChild(art, node, byte, child); case NType::NODE_16: @@ -113,104 +152,134 @@ void Node::InsertChild(ART &art, Node &node, const uint8_t byte, const Node chil return Node48::InsertChild(art, node, byte, child); case NType::NODE_256: return Node256::InsertChild(art, node, byte, child); + case NType::NODE_7_LEAF: + return Node7Leaf::InsertByte(art, node, byte); + case NType::NODE_15_LEAF: + return Node15Leaf::InsertByte(art, node, byte); + case NType::NODE_256_LEAF: + return Node256Leaf::InsertByte(art, node, byte); default: - throw InternalException("Invalid node type for InsertChild."); + throw InternalException("Invalid node type for InsertChild: %d.", static_cast(type)); } } //===--------------------------------------------------------------------===// -// Deletes +// Delete //===--------------------------------------------------------------------===// -void Node::DeleteChild(ART &art, Node &node, Node &prefix, const uint8_t byte) { +void Node::DeleteChild(ART &art, Node &node, Node &prefix, const uint8_t byte, const GateStatus status, + const ARTKey &row_id) { + D_ASSERT(node.HasMetadata()); - switch (node.GetType()) { + auto type = node.GetType(); + switch (type) { case NType::NODE_4: - return Node4::DeleteChild(art, node, prefix, byte); + return Node4::DeleteChild(art, node, prefix, byte, status); case NType::NODE_16: return Node16::DeleteChild(art, node, byte); case NType::NODE_48: return Node48::DeleteChild(art, node, byte); case NType::NODE_256: return Node256::DeleteChild(art, node, byte); + case NType::NODE_7_LEAF: + return Node7Leaf::DeleteByte(art, node, prefix, byte, row_id); + case NType::NODE_15_LEAF: + return Node15Leaf::DeleteByte(art, node, byte); + case NType::NODE_256_LEAF: + return Node256Leaf::DeleteByte(art, node, byte); default: - throw InternalException("Invalid node type for DeleteChild."); + throw InternalException("Invalid node type for DeleteChild: %d.", static_cast(type)); } } //===--------------------------------------------------------------------===// -// Get functions +// Get child and byte. //===--------------------------------------------------------------------===// -optional_ptr Node::GetChild(ART &art, const uint8_t byte) const { - - D_ASSERT(HasMetadata()); +template +unsafe_optional_ptr GetChildInternal(ART &art, NODE &node, const uint8_t byte) { + D_ASSERT(node.HasMetadata()); - switch (GetType()) { + auto type = node.GetType(); + switch (type) { case NType::NODE_4: - return Ref(art, *this, NType::NODE_4).GetChild(byte); + return Node4::GetChild(Node::Ref(art, node, type), byte); case NType::NODE_16: - return Ref(art, *this, NType::NODE_16).GetChild(byte); + return Node16::GetChild(Node::Ref(art, node, type), byte); case NType::NODE_48: - return Ref(art, *this, NType::NODE_48).GetChild(byte); - case NType::NODE_256: - return Ref(art, *this, NType::NODE_256).GetChild(byte); + return Node48::GetChild(Node::Ref(art, node, type), byte); + case NType::NODE_256: { + return Node256::GetChild(Node::Ref(art, node, type), byte); + } default: - throw InternalException("Invalid node type for GetChild."); + throw InternalException("Invalid node type for GetChildInternal: %d.", static_cast(type)); } } -optional_ptr Node::GetChildMutable(ART &art, const uint8_t byte) const { +const unsafe_optional_ptr Node::GetChild(ART &art, const uint8_t byte) const { + return GetChildInternal(art, *this, byte); +} - D_ASSERT(HasMetadata()); +unsafe_optional_ptr Node::GetChildMutable(ART &art, const uint8_t byte) const { + return GetChildInternal(art, *this, byte); +} - switch (GetType()) { +template +unsafe_optional_ptr GetNextChildInternal(ART &art, NODE &node, uint8_t &byte) { + D_ASSERT(node.HasMetadata()); + + auto type = node.GetType(); + switch (type) { case NType::NODE_4: - return RefMutable(art, *this, NType::NODE_4).GetChildMutable(byte); + return Node4::GetNextChild(Node::Ref(art, node, type), byte); case NType::NODE_16: - return RefMutable(art, *this, NType::NODE_16).GetChildMutable(byte); + return Node16::GetNextChild(Node::Ref(art, node, type), byte); case NType::NODE_48: - return RefMutable(art, *this, NType::NODE_48).GetChildMutable(byte); + return Node48::GetNextChild(Node::Ref(art, node, type), byte); case NType::NODE_256: - return RefMutable(art, *this, NType::NODE_256).GetChildMutable(byte); + return Node256::GetNextChild(Node::Ref(art, node, type), byte); default: - throw InternalException("Invalid node type for GetChildMutable."); + throw InternalException("Invalid node type for GetNextChildInternal: %d.", static_cast(type)); } } -optional_ptr Node::GetNextChild(ART &art, uint8_t &byte) const { +const unsafe_optional_ptr Node::GetNextChild(ART &art, uint8_t &byte) const { + return GetNextChildInternal(art, *this, byte); +} + +unsafe_optional_ptr Node::GetNextChildMutable(ART &art, uint8_t &byte) const { + return GetNextChildInternal(art, *this, byte); +} +bool Node::HasByte(ART &art, uint8_t &byte) const { D_ASSERT(HasMetadata()); - switch (GetType()) { - case NType::NODE_4: - return Ref(art, *this, NType::NODE_4).GetNextChild(byte); - case NType::NODE_16: - return Ref(art, *this, NType::NODE_16).GetNextChild(byte); - case NType::NODE_48: - return Ref(art, *this, NType::NODE_48).GetNextChild(byte); - case NType::NODE_256: - return Ref(art, *this, NType::NODE_256).GetNextChild(byte); + auto type = GetType(); + switch (type) { + case NType::NODE_7_LEAF: + return Ref(art, *this, NType::NODE_7_LEAF).HasByte(byte); + case NType::NODE_15_LEAF: + return Ref(art, *this, NType::NODE_15_LEAF).HasByte(byte); + case NType::NODE_256_LEAF: + return Ref(art, *this, NType::NODE_256_LEAF).HasByte(byte); default: - throw InternalException("Invalid node type for GetNextChild."); + throw InternalException("Invalid node type for GetNextByte: %d.", static_cast(type)); } } -optional_ptr Node::GetNextChildMutable(ART &art, uint8_t &byte) const { - +bool Node::GetNextByte(ART &art, uint8_t &byte) const { D_ASSERT(HasMetadata()); - switch (GetType()) { - case NType::NODE_4: - return RefMutable(art, *this, NType::NODE_4).GetNextChildMutable(byte); - case NType::NODE_16: - return RefMutable(art, *this, NType::NODE_16).GetNextChildMutable(byte); - case NType::NODE_48: - return RefMutable(art, *this, NType::NODE_48).GetNextChildMutable(byte); - case NType::NODE_256: - return RefMutable(art, *this, NType::NODE_256).GetNextChildMutable(byte); + auto type = GetType(); + switch (type) { + case NType::NODE_7_LEAF: + return Ref(art, *this, NType::NODE_7_LEAF).GetNextByte(byte); + case NType::NODE_15_LEAF: + return Ref(art, *this, NType::NODE_15_LEAF).GetNextByte(byte); + case NType::NODE_256_LEAF: + return Ref(art, *this, NType::NODE_256_LEAF).GetNextByte(byte); default: - throw InternalException("Invalid node type for GetNextChildMutable."); + throw InternalException("Invalid node type for GetNextByte: %d.", static_cast(type)); } } @@ -218,301 +287,478 @@ optional_ptr Node::GetNextChildMutable(ART &art, uint8_t &byte) const { // Utility //===--------------------------------------------------------------------===// -string Node::VerifyAndToString(ART &art, const bool only_verify) const { - - D_ASSERT(HasMetadata()); - - if (GetType() == NType::LEAF || GetType() == NType::LEAF_INLINED) { - auto str = Leaf::VerifyAndToString(art, *this, only_verify); - return only_verify ? "" : "\n" + str; - } - if (GetType() == NType::PREFIX) { - auto str = Prefix::VerifyAndToString(art, *this, only_verify); - return only_verify ? "" : "\n" + str; +idx_t GetCapacity(NType type) { + switch (type) { + case NType::NODE_4: + return Node4::CAPACITY; + case NType::NODE_7_LEAF: + return Node7Leaf::CAPACITY; + case NType::NODE_15_LEAF: + return Node15Leaf::CAPACITY; + case NType::NODE_16: + return Node16::CAPACITY; + case NType::NODE_48: + return Node48::CAPACITY; + case NType::NODE_256_LEAF: + return Node256::CAPACITY; + case NType::NODE_256: + return Node256::CAPACITY; + default: + throw InternalException("Invalid node type for GetCapacity: %d.", static_cast(type)); } +} - string str = "Node" + to_string(GetCapacity()) + ": ["; - uint8_t byte = 0; - auto child = GetNextChild(art, byte); - - while (child) { - str += "(" + to_string(byte) + ", " + child->VerifyAndToString(art, only_verify) + ")"; - if (byte == NumericLimits::Maximum()) { - break; - } - - byte++; - child = GetNextChild(art, byte); +NType Node::GetNodeType(idx_t count) { + if (count <= Node4::CAPACITY) { + return NType::NODE_4; + } else if (count <= Node16::CAPACITY) { + return NType::NODE_16; + } else if (count <= Node48::CAPACITY) { + return NType::NODE_48; } - - return only_verify ? "" : "\n" + str + "]"; + return NType::NODE_256; } -idx_t Node::GetCapacity() const { - +bool Node::IsNode() const { switch (GetType()) { case NType::NODE_4: - return NODE_4_CAPACITY; case NType::NODE_16: - return NODE_16_CAPACITY; case NType::NODE_48: - return NODE_48_CAPACITY; case NType::NODE_256: - return NODE_256_CAPACITY; + return true; default: - throw InternalException("Invalid node type for GetCapacity."); + return false; } } -NType Node::GetARTNodeTypeByCount(const idx_t count) { +bool Node::IsLeafNode() const { + switch (GetType()) { + case NType::NODE_7_LEAF: + case NType::NODE_15_LEAF: + case NType::NODE_256_LEAF: + return true; + default: + return false; + } +} - if (count <= NODE_4_CAPACITY) { - return NType::NODE_4; - } else if (count <= NODE_16_CAPACITY) { - return NType::NODE_16; - } else if (count <= NODE_48_CAPACITY) { - return NType::NODE_48; +bool Node::IsAnyLeaf() const { + if (IsLeafNode()) { + return true; + } + + switch (GetType()) { + case NType::LEAF_INLINED: + case NType::LEAF: + return true; + default: + return false; } - return NType::NODE_256; } //===--------------------------------------------------------------------===// -// Merging +// Merge //===--------------------------------------------------------------------===// -void Node::InitializeMerge(ART &art, const ARTFlags &flags) { - +void Node::InitMerge(ART &art, const unsafe_vector &upper_bounds) { D_ASSERT(HasMetadata()); + auto type = GetType(); - switch (GetType()) { + switch (type) { case NType::PREFIX: - // iterative - return Prefix::InitializeMerge(art, *this, flags); + return Prefix::InitializeMerge(art, *this, upper_bounds); case NType::LEAF: - // iterative - return Leaf::InitializeMerge(art, *this, flags); + throw InternalException("Failed to initialize merge due to deprecated ART storage."); case NType::NODE_4: - RefMutable(art, *this, NType::NODE_4).InitializeMerge(art, flags); + InitMergeInternal(art, Ref(art, *this, type), upper_bounds); break; case NType::NODE_16: - RefMutable(art, *this, NType::NODE_16).InitializeMerge(art, flags); + InitMergeInternal(art, Ref(art, *this, type), upper_bounds); break; case NType::NODE_48: - RefMutable(art, *this, NType::NODE_48).InitializeMerge(art, flags); + InitMergeInternal(art, Ref(art, *this, type), upper_bounds); break; case NType::NODE_256: - RefMutable(art, *this, NType::NODE_256).InitializeMerge(art, flags); + InitMergeInternal(art, Ref(art, *this, type), upper_bounds); break; case NType::LEAF_INLINED: return; + case NType::NODE_7_LEAF: + case NType::NODE_15_LEAF: + case NType::NODE_256_LEAF: + break; } - IncreaseBufferId(flags.merge_buffer_counts[static_cast(GetType()) - 1]); + auto idx = GetAllocatorIdx(type); + IncreaseBufferId(upper_bounds[idx]); } -bool Node::Merge(ART &art, Node &other) { +bool Node::MergeNormalNodes(ART &art, Node &l_node, Node &r_node, uint8_t &byte, const GateStatus status) { + // Merge N4, N16, N48, N256 nodes. + D_ASSERT(l_node.IsNode() && r_node.IsNode()); + D_ASSERT(l_node.GetGateStatus() == r_node.GetGateStatus()); - if (!HasMetadata()) { - *this = other; - other = Node(); - return true; + auto r_child = r_node.GetNextChildMutable(art, byte); + while (r_child) { + auto l_child = l_node.GetChildMutable(art, byte); + if (!l_child) { + Node::InsertChild(art, l_node, byte, *r_child); + r_node.ReplaceChild(art, byte); + } else { + if (!l_child->MergeInternal(art, *r_child, status)) { + return false; + } + } + + if (byte == NumericLimits::Maximum()) { + break; + } + byte++; + r_child = r_node.GetNextChildMutable(art, byte); } - return ResolvePrefixes(art, other); + Node::Free(art, r_node); + return true; } -bool MergePrefixContainsOtherPrefix(ART &art, reference &l_node, reference &r_node, - idx_t &mismatch_position) { - - // r_node's prefix contains l_node's prefix - // l_node cannot be a leaf, otherwise the key represented by l_node would be a subset of another key - // which is not possible by our construction - D_ASSERT(l_node.get().GetType() != NType::LEAF && l_node.get().GetType() != NType::LEAF_INLINED); +void Node::MergeLeafNodes(ART &art, Node &l_node, Node &r_node, uint8_t &byte) { + // Merge N7, N15, N256 leaf nodes. + D_ASSERT(l_node.IsLeafNode() && r_node.IsLeafNode()); + D_ASSERT(l_node.GetGateStatus() == GateStatus::GATE_NOT_SET); + D_ASSERT(r_node.GetGateStatus() == GateStatus::GATE_NOT_SET); - // test if the next byte (mismatch_position) in r_node (prefix) exists in l_node - auto mismatch_byte = Prefix::GetByte(art, r_node, mismatch_position); - auto child_node = l_node.get().GetChildMutable(art, mismatch_byte); + auto has_next = r_node.GetNextByte(art, byte); + while (has_next) { + // Row IDs are always unique. + Node::InsertChild(art, l_node, byte); + if (byte == NumericLimits::Maximum()) { + break; + } + byte++; + has_next = r_node.GetNextByte(art, byte); + } - // update the prefix of r_node to only consist of the bytes after mismatch_position - Prefix::Reduce(art, r_node, mismatch_position); + Node::Free(art, r_node); +} - if (!child_node) { - // insert r_node as a child of l_node at the empty position - Node::InsertChild(art, l_node, mismatch_byte, r_node); - r_node.get().Clear(); - return true; +bool Node::MergeNodes(ART &art, Node &other, GateStatus status) { + // Merge the smaller node into the bigger node. + if (GetType() < other.GetType()) { + swap(*this, other); } - // recurse - return child_node->ResolvePrefixes(art, r_node); + uint8_t byte = 0; + if (IsNode()) { + return MergeNormalNodes(art, *this, other, byte, status); + } + MergeLeafNodes(art, *this, other, byte); + return true; } -void MergePrefixesDiffer(ART &art, reference &l_node, reference &r_node, idx_t &mismatch_position) { +bool Node::Merge(ART &art, Node &other, const GateStatus status) { + if (HasMetadata()) { + return MergeInternal(art, other, status); + } - // create a new node and insert both nodes as children + *this = other; + other = Node(); + return true; +} - Node l_child; - auto l_byte = Prefix::GetByte(art, l_node, mismatch_position); - Prefix::Split(art, l_node, l_child, mismatch_position); - Node4::New(art, l_node); +bool Node::PrefixContainsOther(ART &art, Node &l_node, Node &r_node, const uint8_t pos, const GateStatus status) { + // r_node's prefix contains l_node's prefix. l_node must be a node with child nodes. + D_ASSERT(l_node.IsNode()); - // insert children - Node4::InsertChild(art, l_node, l_byte, l_child); - auto r_byte = Prefix::GetByte(art, r_node, mismatch_position); - Prefix::Reduce(art, r_node, mismatch_position); - Node4::InsertChild(art, l_node, r_byte, r_node); + // Check if the next byte (pos) in r_node exists in l_node. + auto byte = Prefix::GetByte(art, r_node, pos); + auto child = l_node.GetChildMutable(art, byte); - r_node.get().Clear(); + // Reduce r_node's prefix to the bytes after pos. + Prefix::Reduce(art, r_node, pos); + if (child) { + return child->MergeInternal(art, r_node, status); + } + + Node::InsertChild(art, l_node, byte, r_node); + r_node.Clear(); + return true; } -bool Node::ResolvePrefixes(ART &art, Node &other) { +void Node::MergeIntoNode4(ART &art, Node &l_node, Node &r_node, const uint8_t pos) { + Node l_child; + auto l_byte = Prefix::GetByte(art, l_node, pos); - // NOTE: we always merge into the left ART + reference ref(l_node); + auto status = Prefix::Split(art, ref, l_child, pos); + Node4::New(art, ref); + ref.get().SetGateStatus(status); - D_ASSERT(HasMetadata() && other.HasMetadata()); + Node4::InsertChild(art, ref, l_byte, l_child); - // case 1: both nodes have no prefix - if (GetType() != NType::PREFIX && other.GetType() != NType::PREFIX) { - return MergeInternal(art, other); - } + auto r_byte = Prefix::GetByte(art, r_node, pos); + Prefix::Reduce(art, r_node, pos); + Node4::InsertChild(art, ref, r_byte, r_node); + r_node.Clear(); +} +bool Node::MergePrefixes(ART &art, Node &other, const GateStatus status) { reference l_node(*this); reference r_node(other); + auto pos = DConstants::INVALID_INDEX; - idx_t mismatch_position = DConstants::INVALID_INDEX; - - // traverse prefixes if (l_node.get().GetType() == NType::PREFIX && r_node.get().GetType() == NType::PREFIX) { - - if (!Prefix::Traverse(art, l_node, r_node, mismatch_position)) { + // Traverse prefixes. Possibly change the referenced nodes. + if (!Prefix::Traverse(art, l_node, r_node, pos, status)) { return false; } - // we already recurse because the prefixes matched (so far) - if (mismatch_position == DConstants::INVALID_INDEX) { + if (pos == DConstants::INVALID_INDEX) { return true; } } else { - - // l_prefix contains r_prefix + // l_prefix contains r_prefix. if (l_node.get().GetType() == NType::PREFIX) { swap(*this, other); } - mismatch_position = 0; + pos = 0; } - D_ASSERT(mismatch_position != DConstants::INVALID_INDEX); - // case 2: one prefix contains the other prefix + D_ASSERT(pos != DConstants::INVALID_INDEX); if (l_node.get().GetType() != NType::PREFIX && r_node.get().GetType() == NType::PREFIX) { - return MergePrefixContainsOtherPrefix(art, l_node, r_node, mismatch_position); + return PrefixContainsOther(art, l_node, r_node, UnsafeNumericCast(pos), status); } - // case 3: prefixes differ at a specific byte - MergePrefixesDiffer(art, l_node, r_node, mismatch_position); + // The prefixes differ. + MergeIntoNode4(art, l_node, r_node, UnsafeNumericCast(pos)); return true; } -bool Node::MergeInternal(ART &art, Node &other) { - - D_ASSERT(HasMetadata() && other.HasMetadata()); - D_ASSERT(GetType() != NType::PREFIX && other.GetType() != NType::PREFIX); +bool Node::MergeInternal(ART &art, Node &other, const GateStatus status) { + D_ASSERT(HasMetadata()); + D_ASSERT(other.HasMetadata()); - // always try to merge the smaller node into the bigger node - // because maybe there is enough free space in the bigger node to fit the smaller one - // without too much recursion - if (GetType() < other.GetType()) { + // Merge inlined leaves. + if (GetType() == NType::LEAF_INLINED) { swap(*this, other); } - - Node empty_node; - auto &l_node = *this; - auto &r_node = other; - - if (r_node.GetType() == NType::LEAF || r_node.GetType() == NType::LEAF_INLINED) { - D_ASSERT(l_node.GetType() == NType::LEAF || l_node.GetType() == NType::LEAF_INLINED); + if (other.GetType() == NType::LEAF_INLINED) { + D_ASSERT(status == GateStatus::GATE_NOT_SET); + D_ASSERT(other.GetGateStatus() == GateStatus::GATE_SET || other.GetType() == NType::LEAF_INLINED); + D_ASSERT(GetType() == NType::LEAF_INLINED || GetGateStatus() == GateStatus::GATE_SET); if (art.IsUnique()) { return false; } + Leaf::MergeInlined(art, *this, other); + return true; + } - Leaf::Merge(art, l_node, r_node); + // Enter a gate. + if (GetGateStatus() == GateStatus::GATE_SET && status == GateStatus::GATE_NOT_SET) { + D_ASSERT(other.GetGateStatus() == GateStatus::GATE_SET); + D_ASSERT(GetType() != NType::LEAF_INLINED); + D_ASSERT(other.GetType() != NType::LEAF_INLINED); + + // Get all row IDs. + unsafe_vector row_ids; + Iterator it(art); + it.FindMinimum(other); + ARTKey empty_key = ARTKey(); + it.Scan(empty_key, NumericLimits().Maximum(), row_ids, false); + Node::Free(art, other); + D_ASSERT(row_ids.size() > 1); + + // Insert all row IDs. + ArenaAllocator allocator(Allocator::Get(art.db)); + for (idx_t i = 0; i < row_ids.size(); i++) { + auto row_id = ARTKey::CreateARTKey(allocator, row_ids[i]); + art.Insert(*this, row_id, 0, row_id, GateStatus::GATE_SET); + } return true; } - uint8_t byte = 0; - auto r_child = r_node.GetNextChildMutable(art, byte); + // Merge N4, N16, N48, N256 nodes. + if (IsNode() && other.IsNode()) { + return MergeNodes(art, other, status); + } + // Merge N7, N15, N256 leaf nodes. + if (IsLeafNode() && other.IsLeafNode()) { + D_ASSERT(status == GateStatus::GATE_SET); + return MergeNodes(art, other, status); + } - // while r_node still has children to merge - while (r_child) { - auto l_child = l_node.GetChildMutable(art, byte); - if (!l_child) { - // insert child at empty byte - InsertChild(art, l_node, byte, *r_child); - r_node.ReplaceChild(art, byte, empty_node); + // Merge prefixes. + return MergePrefixes(art, other, status); +} - } else { - // recurse - if (!l_child->ResolvePrefixes(art, *r_child)) { - return false; - } - } +//===--------------------------------------------------------------------===// +// Vacuum +//===--------------------------------------------------------------------===// - if (byte == NumericLimits::Maximum()) { - break; +void Node::Vacuum(ART &art, const unordered_set &indexes) { + D_ASSERT(HasMetadata()); + + auto type = GetType(); + switch (type) { + case NType::LEAF_INLINED: + return; + case NType::PREFIX: + return Prefix::Vacuum(art, *this, indexes); + case NType::LEAF: + if (indexes.find(GetAllocatorIdx(type)) == indexes.end()) { + return; } - byte++; - r_child = r_node.GetNextChildMutable(art, byte); + return Leaf::DeprecatedVacuum(art, *this); + default: + break; } - Free(art, r_node); - return true; + auto idx = GetAllocatorIdx(type); + auto &allocator = GetAllocator(art, type); + auto needs_vacuum = indexes.find(idx) != indexes.end() && allocator.NeedsVacuum(*this); + if (needs_vacuum) { + auto status = GetGateStatus(); + *this = allocator.VacuumPointer(*this); + SetMetadata(static_cast(type)); + SetGateStatus(status); + } + + switch (type) { + case NType::NODE_4: + return VacuumInternal(art, Ref(art, *this, type), indexes); + case NType::NODE_16: + return VacuumInternal(art, Ref(art, *this, type), indexes); + case NType::NODE_48: + return VacuumInternal(art, Ref(art, *this, type), indexes); + case NType::NODE_256: + return VacuumInternal(art, Ref(art, *this, type), indexes); + case NType::NODE_7_LEAF: + case NType::NODE_15_LEAF: + case NType::NODE_256_LEAF: + return; + default: + throw InternalException("Invalid node type for Vacuum: %d.", static_cast(type)); + } } //===--------------------------------------------------------------------===// -// Vacuum +// TransformToDeprecated //===--------------------------------------------------------------------===// -void Node::Vacuum(ART &art, const ARTFlags &flags) { +void Node::TransformToDeprecated(ART &art, Node &node, unsafe_unique_ptr &allocator) { + D_ASSERT(node.HasMetadata()); - D_ASSERT(HasMetadata()); + if (node.GetGateStatus() == GateStatus::GATE_SET) { + return Leaf::TransformToDeprecated(art, node); + } + + auto type = node.GetType(); + switch (type) { + case NType::PREFIX: + return Prefix::TransformToDeprecated(art, node, allocator); + case NType::LEAF_INLINED: + return; + case NType::LEAF: + return; + case NType::NODE_4: + return TransformToDeprecatedInternal(art, InMemoryRef(art, node, type), allocator); + case NType::NODE_16: + return TransformToDeprecatedInternal(art, InMemoryRef(art, node, type), allocator); + case NType::NODE_48: + return TransformToDeprecatedInternal(art, InMemoryRef(art, node, type), allocator); + case NType::NODE_256: + return TransformToDeprecatedInternal(art, InMemoryRef(art, node, type), allocator); + default: + throw InternalException("Invalid node type for TransformToDeprecated: %d.", static_cast(type)); + } +} - auto node_type = GetType(); - auto node_type_idx = static_cast(node_type); +//===--------------------------------------------------------------------===// +// Verification +//===--------------------------------------------------------------------===// + +string Node::VerifyAndToString(ART &art, const bool only_verify) const { + D_ASSERT(HasMetadata()); - // iterative functions - if (node_type == NType::PREFIX) { - return Prefix::Vacuum(art, *this, flags); + auto type = GetType(); + switch (type) { + case NType::LEAF_INLINED: + return only_verify ? "" : "Inlined Leaf [row ID: " + to_string(GetRowId()) + "]"; + case NType::LEAF: + return Leaf::DeprecatedVerifyAndToString(art, *this, only_verify); + case NType::PREFIX: { + auto str = Prefix::VerifyAndToString(art, *this, only_verify); + if (GetGateStatus() == GateStatus::GATE_SET) { + str = "Gate [ " + str + " ]"; + } + return only_verify ? "" : "\n" + str; } - if (node_type == NType::LEAF_INLINED) { - return; + default: + break; } - if (node_type == NType::LEAF) { - if (flags.vacuum_flags[node_type_idx - 1]) { - Leaf::Vacuum(art, *this); + + string str = "Node" + to_string(GetCapacity(type)) + ": [ "; + uint8_t byte = 0; + + if (IsLeafNode()) { + str = "Leaf " + str; + auto has_byte = GetNextByte(art, byte); + while (has_byte) { + str += to_string(byte) + "-"; + if (byte == NumericLimits::Maximum()) { + break; + } + byte++; + has_byte = GetNextByte(art, byte); + } + } else { + auto child = GetNextChild(art, byte); + while (child) { + str += "(" + to_string(byte) + ", " + child->VerifyAndToString(art, only_verify) + ")"; + if (byte == NumericLimits::Maximum()) { + break; + } + byte++; + child = GetNextChild(art, byte); } - return; } - auto &allocator = GetAllocator(art, node_type); - auto needs_vacuum = flags.vacuum_flags[node_type_idx - 1] && allocator.NeedsVacuum(*this); - if (needs_vacuum) { - *this = allocator.VacuumPointer(*this); - SetMetadata(node_type_idx); + if (GetGateStatus() == GateStatus::GATE_SET) { + str = "Gate [ " + str + " ]"; } + return only_verify ? "" : "\n" + str + "]"; +} + +void Node::VerifyAllocations(ART &art, unordered_map &node_counts) const { + D_ASSERT(HasMetadata()); - // recursive functions - switch (node_type) { + auto type = GetType(); + switch (type) { + case NType::PREFIX: + return Prefix::VerifyAllocations(art, *this, node_counts); + case NType::LEAF: + return Ref(art, *this, type).DeprecatedVerifyAllocations(art, node_counts); + case NType::LEAF_INLINED: + return; case NType::NODE_4: - return RefMutable(art, *this, NType::NODE_4).Vacuum(art, flags); + VerifyAllocationsInternal(art, Ref(art, *this, type), node_counts); + break; case NType::NODE_16: - return RefMutable(art, *this, NType::NODE_16).Vacuum(art, flags); + VerifyAllocationsInternal(art, Ref(art, *this, type), node_counts); + break; case NType::NODE_48: - return RefMutable(art, *this, NType::NODE_48).Vacuum(art, flags); + VerifyAllocationsInternal(art, Ref(art, *this, type), node_counts); + break; case NType::NODE_256: - return RefMutable(art, *this, NType::NODE_256).Vacuum(art, flags); - default: - throw InternalException("Invalid node type for Vacuum."); + VerifyAllocationsInternal(art, Ref(art, *this, type), node_counts); + break; + case NType::NODE_7_LEAF: + case NType::NODE_15_LEAF: + case NType::NODE_256_LEAF: + break; } + + node_counts[GetAllocatorIdx(type)]++; } } // namespace duckdb diff --git a/src/duckdb/src/execution/index/art/node16.cpp b/src/duckdb/src/execution/index/art/node16.cpp deleted file mode 100644 index 9214a9b9..00000000 --- a/src/duckdb/src/execution/index/art/node16.cpp +++ /dev/null @@ -1,196 +0,0 @@ -#include "duckdb/execution/index/art/node16.hpp" -#include "duckdb/execution/index/art/node4.hpp" -#include "duckdb/execution/index/art/node48.hpp" -#include "duckdb/common/numeric_utils.hpp" - -namespace duckdb { - -Node16 &Node16::New(ART &art, Node &node) { - - node = Node::GetAllocator(art, NType::NODE_16).New(); - node.SetMetadata(static_cast(NType::NODE_16)); - auto &n16 = Node::RefMutable(art, node, NType::NODE_16); - - n16.count = 0; - return n16; -} - -void Node16::Free(ART &art, Node &node) { - - D_ASSERT(node.HasMetadata()); - auto &n16 = Node::RefMutable(art, node, NType::NODE_16); - - // free all children - for (idx_t i = 0; i < n16.count; i++) { - Node::Free(art, n16.children[i]); - } -} - -Node16 &Node16::GrowNode4(ART &art, Node &node16, Node &node4) { - - auto &n4 = Node::RefMutable(art, node4, NType::NODE_4); - auto &n16 = New(art, node16); - - n16.count = n4.count; - for (idx_t i = 0; i < n4.count; i++) { - n16.key[i] = n4.key[i]; - n16.children[i] = n4.children[i]; - } - - n4.count = 0; - Node::Free(art, node4); - return n16; -} - -Node16 &Node16::ShrinkNode48(ART &art, Node &node16, Node &node48) { - - auto &n16 = New(art, node16); - auto &n48 = Node::RefMutable(art, node48, NType::NODE_48); - - n16.count = 0; - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { - D_ASSERT(n16.count <= Node::NODE_16_CAPACITY); - if (n48.child_index[i] != Node::EMPTY_MARKER) { - n16.key[n16.count] = UnsafeNumericCast(i); - n16.children[n16.count] = n48.children[n48.child_index[i]]; - n16.count++; - } - } - - n48.count = 0; - Node::Free(art, node48); - return n16; -} - -void Node16::InitializeMerge(ART &art, const ARTFlags &flags) { - - for (idx_t i = 0; i < count; i++) { - children[i].InitializeMerge(art, flags); - } -} - -void Node16::InsertChild(ART &art, Node &node, const uint8_t byte, const Node child) { - - D_ASSERT(node.HasMetadata()); - auto &n16 = Node::RefMutable(art, node, NType::NODE_16); - - // ensure that there is no other child at the same byte - for (idx_t i = 0; i < n16.count; i++) { - D_ASSERT(n16.key[i] != byte); - } - - // insert new child node into node - if (n16.count < Node::NODE_16_CAPACITY) { - // still space, just insert the child - idx_t child_pos = 0; - while (child_pos < n16.count && n16.key[child_pos] < byte) { - child_pos++; - } - // move children backwards to make space - for (idx_t i = n16.count; i > child_pos; i--) { - n16.key[i] = n16.key[i - 1]; - n16.children[i] = n16.children[i - 1]; - } - - n16.key[child_pos] = byte; - n16.children[child_pos] = child; - n16.count++; - - } else { - // node is full, grow to Node48 - auto node16 = node; - Node48::GrowNode16(art, node, node16); - Node48::InsertChild(art, node, byte, child); - } -} - -void Node16::DeleteChild(ART &art, Node &node, const uint8_t byte) { - - D_ASSERT(node.HasMetadata()); - auto &n16 = Node::RefMutable(art, node, NType::NODE_16); - - idx_t child_pos = 0; - for (; child_pos < n16.count; child_pos++) { - if (n16.key[child_pos] == byte) { - break; - } - } - - D_ASSERT(child_pos < n16.count); - - // free the child and decrease the count - Node::Free(art, n16.children[child_pos]); - n16.count--; - - // potentially move any children backwards - for (idx_t i = child_pos; i < n16.count; i++) { - n16.key[i] = n16.key[i + 1]; - n16.children[i] = n16.children[i + 1]; - } - - // shrink node to Node4 - if (n16.count < Node::NODE_4_CAPACITY) { - auto node16 = node; - Node4::ShrinkNode16(art, node, node16); - } -} - -void Node16::ReplaceChild(const uint8_t byte, const Node child) { - for (idx_t i = 0; i < count; i++) { - if (key[i] == byte) { - children[i] = child; - return; - } - } -} - -optional_ptr Node16::GetChild(const uint8_t byte) const { - for (idx_t i = 0; i < count; i++) { - if (key[i] == byte) { - D_ASSERT(children[i].HasMetadata()); - return &children[i]; - } - } - return nullptr; -} - -optional_ptr Node16::GetChildMutable(const uint8_t byte) { - for (idx_t i = 0; i < count; i++) { - if (key[i] == byte) { - D_ASSERT(children[i].HasMetadata()); - return &children[i]; - } - } - return nullptr; -} - -optional_ptr Node16::GetNextChild(uint8_t &byte) const { - for (idx_t i = 0; i < count; i++) { - if (key[i] >= byte) { - byte = key[i]; - D_ASSERT(children[i].HasMetadata()); - return &children[i]; - } - } - return nullptr; -} - -optional_ptr Node16::GetNextChildMutable(uint8_t &byte) { - for (idx_t i = 0; i < count; i++) { - if (key[i] >= byte) { - byte = key[i]; - D_ASSERT(children[i].HasMetadata()); - return &children[i]; - } - } - return nullptr; -} - -void Node16::Vacuum(ART &art, const ARTFlags &flags) { - - for (idx_t i = 0; i < count; i++) { - children[i].Vacuum(art, flags); - } -} - -} // namespace duckdb diff --git a/src/duckdb/src/execution/index/art/node256.cpp b/src/duckdb/src/execution/index/art/node256.cpp index 30182858..f08717e1 100644 --- a/src/duckdb/src/execution/index/art/node256.cpp +++ b/src/duckdb/src/execution/index/art/node256.cpp @@ -1,17 +1,16 @@ #include "duckdb/execution/index/art/node256.hpp" + #include "duckdb/execution/index/art/node48.hpp" -#include "duckdb/common/numeric_utils.hpp" namespace duckdb { Node256 &Node256::New(ART &art, Node &node) { - - node = Node::GetAllocator(art, NType::NODE_256).New(); - node.SetMetadata(static_cast(NType::NODE_256)); - auto &n256 = Node::RefMutable(art, node, NType::NODE_256); + node = Node::GetAllocator(art, NODE_256).New(); + node.SetMetadata(static_cast(NODE_256)); + auto &n256 = Node::Ref(art, node, NODE_256); n256.count = 0; - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { + for (uint16_t i = 0; i < CAPACITY; i++) { n256.children[i].Clear(); } @@ -19,120 +18,61 @@ Node256 &Node256::New(ART &art, Node &node) { } void Node256::Free(ART &art, Node &node) { - - D_ASSERT(node.HasMetadata()); - auto &n256 = Node::RefMutable(art, node, NType::NODE_256); - + auto &n256 = Node::Ref(art, node, NODE_256); if (!n256.count) { return; } - // free all children - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { - if (n256.children[i].HasMetadata()) { - Node::Free(art, n256.children[i]); - } - } -} - -Node256 &Node256::GrowNode48(ART &art, Node &node256, Node &node48) { - - auto &n48 = Node::RefMutable(art, node48, NType::NODE_48); - auto &n256 = New(art, node256); - - n256.count = n48.count; - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { - if (n48.child_index[i] != Node::EMPTY_MARKER) { - n256.children[i] = n48.children[n48.child_index[i]]; - } else { - n256.children[i].Clear(); - } - } - - n48.count = 0; - Node::Free(art, node48); - return n256; -} - -void Node256::InitializeMerge(ART &art, const ARTFlags &flags) { - - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { - if (children[i].HasMetadata()) { - children[i].InitializeMerge(art, flags); - } - } + Iterator(n256, [&](Node &child) { Node::Free(art, child); }); } void Node256::InsertChild(ART &art, Node &node, const uint8_t byte, const Node child) { - - D_ASSERT(node.HasMetadata()); - auto &n256 = Node::RefMutable(art, node, NType::NODE_256); - - // ensure that there is no other child at the same byte - D_ASSERT(!n256.children[byte].HasMetadata()); - + auto &n256 = Node::Ref(art, node, NODE_256); n256.count++; - D_ASSERT(n256.count <= Node::NODE_256_CAPACITY); n256.children[byte] = child; } void Node256::DeleteChild(ART &art, Node &node, const uint8_t byte) { + auto &n256 = Node::Ref(art, node, NODE_256); - D_ASSERT(node.HasMetadata()); - auto &n256 = Node::RefMutable(art, node, NType::NODE_256); - - // free the child and decrease the count + // Free the child and decrease the count. Node::Free(art, n256.children[byte]); n256.count--; - // shrink node to Node48 - if (n256.count <= Node::NODE_256_SHRINK_THRESHOLD) { + // Shrink to Node48. + if (n256.count <= SHRINK_THRESHOLD) { auto node256 = node; Node48::ShrinkNode256(art, node, node256); } } -optional_ptr Node256::GetChild(const uint8_t byte) const { - if (children[byte].HasMetadata()) { - return &children[byte]; - } - return nullptr; -} +void Node256::ReplaceChild(const uint8_t byte, const Node child) { + D_ASSERT(count > SHRINK_THRESHOLD); -optional_ptr Node256::GetChildMutable(const uint8_t byte) { - if (children[byte].HasMetadata()) { - return &children[byte]; + auto status = children[byte].GetGateStatus(); + children[byte] = child; + if (status == GateStatus::GATE_SET && child.HasMetadata()) { + children[byte].SetGateStatus(status); } - return nullptr; } -optional_ptr Node256::GetNextChild(uint8_t &byte) const { - for (idx_t i = byte; i < Node::NODE_256_CAPACITY; i++) { - if (children[i].HasMetadata()) { - byte = UnsafeNumericCast(i); - return &children[i]; - } - } - return nullptr; -} +Node256 &Node256::GrowNode48(ART &art, Node &node256, Node &node48) { + auto &n48 = Node::Ref(art, node48, NType::NODE_48); + auto &n256 = New(art, node256); + node256.SetGateStatus(node48.GetGateStatus()); -optional_ptr Node256::GetNextChildMutable(uint8_t &byte) { - for (idx_t i = byte; i < Node::NODE_256_CAPACITY; i++) { - if (children[i].HasMetadata()) { - byte = UnsafeNumericCast(i); - return &children[i]; + n256.count = n48.count; + for (uint16_t i = 0; i < CAPACITY; i++) { + if (n48.child_index[i] != Node48::EMPTY_MARKER) { + n256.children[i] = n48.children[n48.child_index[i]]; + } else { + n256.children[i].Clear(); } } - return nullptr; -} -void Node256::Vacuum(ART &art, const ARTFlags &flags) { - - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { - if (children[i].HasMetadata()) { - children[i].Vacuum(art, flags); - } - } + n48.count = 0; + Node::Free(art, node48); + return n256; } } // namespace duckdb diff --git a/src/duckdb/src/execution/index/art/node256_leaf.cpp b/src/duckdb/src/execution/index/art/node256_leaf.cpp new file mode 100644 index 00000000..01067922 --- /dev/null +++ b/src/duckdb/src/execution/index/art/node256_leaf.cpp @@ -0,0 +1,71 @@ +#include "duckdb/execution/index/art/node256_leaf.hpp" + +#include "duckdb/execution/index/art/base_leaf.hpp" +#include "duckdb/execution/index/art/node48.hpp" + +namespace duckdb { + +Node256Leaf &Node256Leaf::New(ART &art, Node &node) { + node = Node::GetAllocator(art, NODE_256_LEAF).New(); + node.SetMetadata(static_cast(NODE_256_LEAF)); + auto &n256 = Node::Ref(art, node, NODE_256_LEAF); + + n256.count = 0; + ValidityMask mask(&n256.mask[0]); + mask.SetAllInvalid(CAPACITY); + return n256; +} + +void Node256Leaf::InsertByte(ART &art, Node &node, const uint8_t byte) { + auto &n256 = Node::Ref(art, node, NODE_256_LEAF); + n256.count++; + ValidityMask mask(&n256.mask[0]); + mask.SetValid(byte); +} + +void Node256Leaf::DeleteByte(ART &art, Node &node, const uint8_t byte) { + auto &n256 = Node::Ref(art, node, NODE_256_LEAF); + n256.count--; + ValidityMask mask(&n256.mask[0]); + mask.SetInvalid(byte); + + // Shrink node to Node15 + if (n256.count <= Node48::SHRINK_THRESHOLD) { + auto node256 = node; + Node15Leaf::ShrinkNode256Leaf(art, node, node256); + } +} + +bool Node256Leaf::HasByte(uint8_t &byte) { + ValidityMask v_mask(&mask[0]); + return v_mask.RowIsValid(byte); +} + +bool Node256Leaf::GetNextByte(uint8_t &byte) { + ValidityMask v_mask(&mask[0]); + for (uint16_t i = byte; i < CAPACITY; i++) { + if (v_mask.RowIsValid(i)) { + byte = UnsafeNumericCast(i); + return true; + } + } + return false; +} + +Node256Leaf &Node256Leaf::GrowNode15Leaf(ART &art, Node &node256_leaf, Node &node15_leaf) { + auto &n15 = Node::Ref(art, node15_leaf, NType::NODE_15_LEAF); + auto &n256 = New(art, node256_leaf); + node256_leaf.SetGateStatus(node15_leaf.GetGateStatus()); + + n256.count = n15.count; + ValidityMask mask(&n256.mask[0]); + for (uint8_t i = 0; i < n15.count; i++) { + mask.SetValid(n15.key[i]); + } + + n15.count = 0; + Node::Free(art, node15_leaf); + return n256; +} + +} // namespace duckdb diff --git a/src/duckdb/src/execution/index/art/node4.cpp b/src/duckdb/src/execution/index/art/node4.cpp deleted file mode 100644 index 6438b744..00000000 --- a/src/duckdb/src/execution/index/art/node4.cpp +++ /dev/null @@ -1,189 +0,0 @@ -#include "duckdb/execution/index/art/node4.hpp" - -#include "duckdb/execution/index/art/prefix.hpp" -#include "duckdb/execution/index/art/node16.hpp" - -namespace duckdb { - -Node4 &Node4::New(ART &art, Node &node) { - - node = Node::GetAllocator(art, NType::NODE_4).New(); - node.SetMetadata(static_cast(NType::NODE_4)); - auto &n4 = Node::RefMutable(art, node, NType::NODE_4); - - n4.count = 0; - return n4; -} - -void Node4::Free(ART &art, Node &node) { - - D_ASSERT(node.HasMetadata()); - auto &n4 = Node::RefMutable(art, node, NType::NODE_4); - - // free all children - for (idx_t i = 0; i < n4.count; i++) { - Node::Free(art, n4.children[i]); - } -} - -Node4 &Node4::ShrinkNode16(ART &art, Node &node4, Node &node16) { - - auto &n4 = New(art, node4); - auto &n16 = Node::RefMutable(art, node16, NType::NODE_16); - - D_ASSERT(n16.count <= Node::NODE_4_CAPACITY); - n4.count = n16.count; - for (idx_t i = 0; i < n16.count; i++) { - n4.key[i] = n16.key[i]; - n4.children[i] = n16.children[i]; - } - - n16.count = 0; - Node::Free(art, node16); - return n4; -} - -void Node4::InitializeMerge(ART &art, const ARTFlags &flags) { - - for (idx_t i = 0; i < count; i++) { - children[i].InitializeMerge(art, flags); - } -} - -void Node4::InsertChild(ART &art, Node &node, const uint8_t byte, const Node child) { - - D_ASSERT(node.HasMetadata()); - auto &n4 = Node::RefMutable(art, node, NType::NODE_4); - - // ensure that there is no other child at the same byte - for (idx_t i = 0; i < n4.count; i++) { - D_ASSERT(n4.key[i] != byte); - } - - // insert new child node into node - if (n4.count < Node::NODE_4_CAPACITY) { - // still space, just insert the child - idx_t child_pos = 0; - while (child_pos < n4.count && n4.key[child_pos] < byte) { - child_pos++; - } - // move children backwards to make space - for (idx_t i = n4.count; i > child_pos; i--) { - n4.key[i] = n4.key[i - 1]; - n4.children[i] = n4.children[i - 1]; - } - - n4.key[child_pos] = byte; - n4.children[child_pos] = child; - n4.count++; - - } else { - // node is full, grow to Node16 - auto node4 = node; - Node16::GrowNode4(art, node, node4); - Node16::InsertChild(art, node, byte, child); - } -} - -void Node4::DeleteChild(ART &art, Node &node, Node &prefix, const uint8_t byte) { - - D_ASSERT(node.HasMetadata()); - auto &n4 = Node::RefMutable(art, node, NType::NODE_4); - - idx_t child_pos = 0; - for (; child_pos < n4.count; child_pos++) { - if (n4.key[child_pos] == byte) { - break; - } - } - - D_ASSERT(child_pos < n4.count); - D_ASSERT(n4.count > 1); - - // free the child and decrease the count - Node::Free(art, n4.children[child_pos]); - n4.count--; - - // potentially move any children backwards - for (idx_t i = child_pos; i < n4.count; i++) { - n4.key[i] = n4.key[i + 1]; - n4.children[i] = n4.children[i + 1]; - } - - // this is a one way node, compress - if (n4.count == 1) { - - // we need to keep track of the old node pointer - // because Concatenate() might overwrite that pointer while appending bytes to - // the prefix (and by doing so overwriting the subsequent node with - // new prefix nodes) - auto old_n4_node = node; - - // get only child and concatenate prefixes - auto child = *n4.GetChildMutable(n4.key[0]); - Prefix::Concatenate(art, prefix, n4.key[0], child); - - n4.count--; - Node::Free(art, old_n4_node); - } -} - -void Node4::ReplaceChild(const uint8_t byte, const Node child) { - for (idx_t i = 0; i < count; i++) { - if (key[i] == byte) { - children[i] = child; - return; - } - } -} - -optional_ptr Node4::GetChild(const uint8_t byte) const { - for (idx_t i = 0; i < count; i++) { - if (key[i] == byte) { - D_ASSERT(children[i].HasMetadata()); - return &children[i]; - } - } - return nullptr; -} - -optional_ptr Node4::GetChildMutable(const uint8_t byte) { - for (idx_t i = 0; i < count; i++) { - if (key[i] == byte) { - D_ASSERT(children[i].HasMetadata()); - return &children[i]; - } - } - return nullptr; -} - -optional_ptr Node4::GetNextChild(uint8_t &byte) const { - for (idx_t i = 0; i < count; i++) { - if (key[i] >= byte) { - byte = key[i]; - D_ASSERT(children[i].HasMetadata()); - return &children[i]; - } - } - return nullptr; -} - -optional_ptr Node4::GetNextChildMutable(uint8_t &byte) { - for (idx_t i = 0; i < count; i++) { - if (key[i] >= byte) { - byte = key[i]; - D_ASSERT(children[i].HasMetadata()); - return &children[i]; - } - } - return nullptr; -} - -void Node4::Vacuum(ART &art, const ARTFlags &flags) { - - for (idx_t i = 0; i < count; i++) { - children[i].Vacuum(art, flags); - } -} - -} // namespace duckdb diff --git a/src/duckdb/src/execution/index/art/node48.cpp b/src/duckdb/src/execution/index/art/node48.cpp index 2b1ba22b..f9ad0460 100644 --- a/src/duckdb/src/execution/index/art/node48.cpp +++ b/src/duckdb/src/execution/index/art/node48.cpp @@ -1,21 +1,20 @@ #include "duckdb/execution/index/art/node48.hpp" -#include "duckdb/execution/index/art/node16.hpp" + +#include "duckdb/execution/index/art/base_node.hpp" #include "duckdb/execution/index/art/node256.hpp" -#include "duckdb/common/numeric_utils.hpp" namespace duckdb { Node48 &Node48::New(ART &art, Node &node) { - - node = Node::GetAllocator(art, NType::NODE_48).New(); - node.SetMetadata(static_cast(NType::NODE_48)); - auto &n48 = Node::RefMutable(art, node, NType::NODE_48); + node = Node::GetAllocator(art, NODE_48).New(); + node.SetMetadata(static_cast(NODE_48)); + auto &n48 = Node::Ref(art, node, NODE_48); n48.count = 0; - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { - n48.child_index[i] = Node::EMPTY_MARKER; + for (uint16_t i = 0; i < Node256::CAPACITY; i++) { + n48.child_index[i] = EMPTY_MARKER; } - for (idx_t i = 0; i < Node::NODE_48_CAPACITY; i++) { + for (uint8_t i = 0; i < CAPACITY; i++) { n48.children[i].Clear(); } @@ -23,39 +22,79 @@ Node48 &Node48::New(ART &art, Node &node) { } void Node48::Free(ART &art, Node &node) { + auto &n48 = Node::Ref(art, node, NODE_48); + if (!n48.count) { + return; + } - D_ASSERT(node.HasMetadata()); - auto &n48 = Node::RefMutable(art, node, NType::NODE_48); + Iterator(n48, [&](Node &child) { Node::Free(art, child); }); +} - if (!n48.count) { +void Node48::InsertChild(ART &art, Node &node, const uint8_t byte, const Node child) { + auto &n48 = Node::Ref(art, node, NODE_48); + + // The node is full. Grow to Node256. + if (n48.count == CAPACITY) { + auto node48 = node; + Node256::GrowNode48(art, node, node48); + Node256::InsertChild(art, node, byte, child); return; } - // free all children - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { - if (n48.child_index[i] != Node::EMPTY_MARKER) { - Node::Free(art, n48.children[n48.child_index[i]]); + // Still space. Insert the child. + uint8_t child_pos = n48.count; + if (n48.children[child_pos].HasMetadata()) { + // Find an empty position in the node list. + child_pos = 0; + while (n48.children[child_pos].HasMetadata()) { + child_pos++; } } + + n48.children[child_pos] = child; + n48.child_index[byte] = child_pos; + n48.count++; } -Node48 &Node48::GrowNode16(ART &art, Node &node48, Node &node16) { +void Node48::DeleteChild(ART &art, Node &node, const uint8_t byte) { + auto &n48 = Node::Ref(art, node, NODE_48); + + // Free the child and decrease the count. + Node::Free(art, n48.children[n48.child_index[byte]]); + n48.child_index[byte] = EMPTY_MARKER; + n48.count--; + + // Shrink to Node16. + if (n48.count < SHRINK_THRESHOLD) { + auto node48 = node; + Node16::ShrinkNode48(art, node, node48); + } +} + +void Node48::ReplaceChild(const uint8_t byte, const Node child) { + D_ASSERT(count >= SHRINK_THRESHOLD); + + auto status = children[child_index[byte]].GetGateStatus(); + children[child_index[byte]] = child; + if (status == GateStatus::GATE_SET && child.HasMetadata()) { + children[child_index[byte]].SetGateStatus(status); + } +} - auto &n16 = Node::RefMutable(art, node16, NType::NODE_16); +Node48 &Node48::GrowNode16(ART &art, Node &node48, Node &node16) { + auto &n16 = Node::Ref(art, node16, NType::NODE_16); auto &n48 = New(art, node48); + node48.SetGateStatus(node16.GetGateStatus()); n48.count = n16.count; - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { - n48.child_index[i] = Node::EMPTY_MARKER; + for (uint16_t i = 0; i < Node256::CAPACITY; i++) { + n48.child_index[i] = EMPTY_MARKER; } - - for (idx_t i = 0; i < n16.count; i++) { - n48.child_index[n16.key[i]] = UnsafeNumericCast(i); + for (uint8_t i = 0; i < n16.count; i++) { + n48.child_index[n16.key[i]] = i; n48.children[i] = n16.children[i]; } - - // necessary for faster child insertion/deletion - for (idx_t i = n16.count; i < Node::NODE_48_CAPACITY; i++) { + for (uint8_t i = n16.count; i < CAPACITY; i++) { n48.children[i].Clear(); } @@ -65,24 +104,21 @@ Node48 &Node48::GrowNode16(ART &art, Node &node48, Node &node16) { } Node48 &Node48::ShrinkNode256(ART &art, Node &node48, Node &node256) { - auto &n48 = New(art, node48); - auto &n256 = Node::RefMutable(art, node256, NType::NODE_256); + auto &n256 = Node::Ref(art, node256, NType::NODE_256); + node48.SetGateStatus(node256.GetGateStatus()); n48.count = 0; - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { - D_ASSERT(n48.count <= Node::NODE_48_CAPACITY); - if (n256.children[i].HasMetadata()) { - n48.child_index[i] = n48.count; - n48.children[n48.count] = n256.children[i]; - n48.count++; - } else { - n48.child_index[i] = Node::EMPTY_MARKER; + for (uint16_t i = 0; i < Node256::CAPACITY; i++) { + if (!n256.children[i].HasMetadata()) { + n48.child_index[i] = EMPTY_MARKER; + continue; } + n48.child_index[i] = n48.count; + n48.children[n48.count] = n256.children[i]; + n48.count++; } - - // necessary for faster child insertion/deletion - for (idx_t i = n48.count; i < Node::NODE_48_CAPACITY; i++) { + for (uint8_t i = n48.count; i < CAPACITY; i++) { n48.children[i].Clear(); } @@ -91,108 +127,4 @@ Node48 &Node48::ShrinkNode256(ART &art, Node &node48, Node &node256) { return n48; } -void Node48::InitializeMerge(ART &art, const ARTFlags &flags) { - - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { - if (child_index[i] != Node::EMPTY_MARKER) { - children[child_index[i]].InitializeMerge(art, flags); - } - } -} - -void Node48::InsertChild(ART &art, Node &node, const uint8_t byte, const Node child) { - - D_ASSERT(node.HasMetadata()); - auto &n48 = Node::RefMutable(art, node, NType::NODE_48); - - // ensure that there is no other child at the same byte - D_ASSERT(n48.child_index[byte] == Node::EMPTY_MARKER); - - // insert new child node into node - if (n48.count < Node::NODE_48_CAPACITY) { - // still space, just insert the child - idx_t child_pos = n48.count; - if (n48.children[child_pos].HasMetadata()) { - // find an empty position in the node list if the current position is occupied - child_pos = 0; - while (n48.children[child_pos].HasMetadata()) { - child_pos++; - } - } - n48.children[child_pos] = child; - n48.child_index[byte] = UnsafeNumericCast(child_pos); - n48.count++; - - } else { - // node is full, grow to Node256 - auto node48 = node; - Node256::GrowNode48(art, node, node48); - Node256::InsertChild(art, node, byte, child); - } -} - -void Node48::DeleteChild(ART &art, Node &node, const uint8_t byte) { - - D_ASSERT(node.HasMetadata()); - auto &n48 = Node::RefMutable(art, node, NType::NODE_48); - - // free the child and decrease the count - Node::Free(art, n48.children[n48.child_index[byte]]); - n48.child_index[byte] = Node::EMPTY_MARKER; - n48.count--; - - // shrink node to Node16 - if (n48.count < Node::NODE_48_SHRINK_THRESHOLD) { - auto node48 = node; - Node16::ShrinkNode48(art, node, node48); - } -} - -optional_ptr Node48::GetChild(const uint8_t byte) const { - if (child_index[byte] != Node::EMPTY_MARKER) { - D_ASSERT(children[child_index[byte]].HasMetadata()); - return &children[child_index[byte]]; - } - return nullptr; -} - -optional_ptr Node48::GetChildMutable(const uint8_t byte) { - if (child_index[byte] != Node::EMPTY_MARKER) { - D_ASSERT(children[child_index[byte]].HasMetadata()); - return &children[child_index[byte]]; - } - return nullptr; -} - -optional_ptr Node48::GetNextChild(uint8_t &byte) const { - for (idx_t i = byte; i < Node::NODE_256_CAPACITY; i++) { - if (child_index[i] != Node::EMPTY_MARKER) { - byte = UnsafeNumericCast(i); - D_ASSERT(children[child_index[i]].HasMetadata()); - return &children[child_index[i]]; - } - } - return nullptr; -} - -optional_ptr Node48::GetNextChildMutable(uint8_t &byte) { - for (idx_t i = byte; i < Node::NODE_256_CAPACITY; i++) { - if (child_index[i] != Node::EMPTY_MARKER) { - byte = UnsafeNumericCast(i); - D_ASSERT(children[child_index[i]].HasMetadata()); - return &children[child_index[i]]; - } - } - return nullptr; -} - -void Node48::Vacuum(ART &art, const ARTFlags &flags) { - - for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) { - if (child_index[i] != Node::EMPTY_MARKER) { - children[child_index[i]].Vacuum(art, flags); - } - } -} - } // namespace duckdb diff --git a/src/duckdb/src/execution/index/art/prefix.cpp b/src/duckdb/src/execution/index/art/prefix.cpp index 7a485fd9..ea6cfd03 100644 --- a/src/duckdb/src/execution/index/art/prefix.cpp +++ b/src/duckdb/src/execution/index/art/prefix.cpp @@ -1,370 +1,550 @@ #include "duckdb/execution/index/art/prefix.hpp" +#include "duckdb/common/swap.hpp" #include "duckdb/execution/index/art/art.hpp" #include "duckdb/execution/index/art/art_key.hpp" +#include "duckdb/execution/index/art/base_leaf.hpp" +#include "duckdb/execution/index/art/base_node.hpp" +#include "duckdb/execution/index/art/leaf.hpp" #include "duckdb/execution/index/art/node.hpp" -#include "duckdb/common/swap.hpp" namespace duckdb { -Prefix &Prefix::New(ART &art, Node &node) { - - node = Node::GetAllocator(art, NType::PREFIX).New(); - node.SetMetadata(static_cast(NType::PREFIX)); +Prefix::Prefix(const ART &art, const Node ptr_p, const bool is_mutable, const bool set_in_memory) { + if (!set_in_memory) { + data = Node::GetAllocator(art, PREFIX).Get(ptr_p, is_mutable); + } else { + data = Node::GetAllocator(art, PREFIX).GetIfLoaded(ptr_p); + if (!data) { + ptr = nullptr; + in_memory = false; + return; + } + } + ptr = reinterpret_cast(data + Count(art) + 1); + in_memory = true; +} - auto &prefix = Node::RefMutable(art, node, NType::PREFIX); - prefix.data[Node::PREFIX_SIZE] = 0; - return prefix; +Prefix::Prefix(unsafe_unique_ptr &allocator, const Node ptr_p, const idx_t count) { + data = allocator->Get(ptr_p, true); + ptr = reinterpret_cast(data + count + 1); + in_memory = true; } -Prefix &Prefix::New(ART &art, Node &node, uint8_t byte, const Node &next) { +idx_t Prefix::GetMismatchWithOther(const Prefix &l_prefix, const Prefix &r_prefix, const idx_t max_count) { + for (idx_t i = 0; i < max_count; i++) { + if (l_prefix.data[i] != r_prefix.data[i]) { + return i; + } + } + return DConstants::INVALID_INDEX; +} - node = Node::GetAllocator(art, NType::PREFIX).New(); - node.SetMetadata(static_cast(NType::PREFIX)); +idx_t Prefix::GetMismatchWithKey(ART &art, const Node &node, const ARTKey &key, idx_t &depth) { + Prefix prefix(art, node); + for (idx_t i = 0; i < prefix.data[Prefix::Count(art)]; i++) { + if (prefix.data[i] != key[depth]) { + return i; + } + depth++; + } + return DConstants::INVALID_INDEX; +} - auto &prefix = Node::RefMutable(art, node, NType::PREFIX); - prefix.data[Node::PREFIX_SIZE] = 1; - prefix.data[0] = byte; - prefix.ptr = next; - return prefix; +uint8_t Prefix::GetByte(const ART &art, const Node &node, const uint8_t pos) { + D_ASSERT(node.GetType() == PREFIX); + Prefix prefix(art, node); + return prefix.data[pos]; } -void Prefix::New(ART &art, reference &node, const ARTKey &key, const uint32_t depth, uint32_t count) { +Prefix Prefix::NewInternal(ART &art, Node &node, const data_ptr_t data, const uint8_t count, const idx_t offset, + const NType type) { + node = Node::GetAllocator(art, type).New(); + node.SetMetadata(static_cast(type)); - if (count == 0) { - return; + Prefix prefix(art, node, true); + prefix.data[Count(art)] = count; + if (data) { + D_ASSERT(count); + memcpy(prefix.data, data + offset, count); } - idx_t copy_count = 0; + return prefix; +} - while (count) { - node.get() = Node::GetAllocator(art, NType::PREFIX).New(); - node.get().SetMetadata(static_cast(NType::PREFIX)); - auto &prefix = Node::RefMutable(art, node, NType::PREFIX); +void Prefix::New(ART &art, reference &ref, const ARTKey &key, const idx_t depth, idx_t count) { + idx_t offset = 0; - auto this_count = MinValue((uint32_t)Node::PREFIX_SIZE, count); - prefix.data[Node::PREFIX_SIZE] = (uint8_t)this_count; - memcpy(prefix.data, key.data + depth + copy_count, this_count); + while (count) { + auto min = MinValue(UnsafeNumericCast(Count(art)), count); + auto this_count = UnsafeNumericCast(min); + auto prefix = NewInternal(art, ref, key.data, this_count, offset + depth, PREFIX); - node = prefix.ptr; - copy_count += this_count; + ref = *prefix.ptr; + offset += this_count; count -= this_count; } } void Prefix::Free(ART &art, Node &node) { + Node next; - Node current_node = node; - Node next_node; - while (current_node.HasMetadata() && current_node.GetType() == NType::PREFIX) { - next_node = Node::RefMutable(art, current_node, NType::PREFIX).ptr; - Node::GetAllocator(art, NType::PREFIX).Free(current_node); - current_node = next_node; + while (node.HasMetadata() && node.GetType() == PREFIX) { + Prefix prefix(art, node, true); + next = *prefix.ptr; + Node::GetAllocator(art, PREFIX).Free(node); + node = next; } - Node::Free(art, current_node); + Node::Free(art, node); node.Clear(); } -void Prefix::InitializeMerge(ART &art, Node &node, const ARTFlags &flags) { - - auto merge_buffer_count = flags.merge_buffer_counts[static_cast(NType::PREFIX) - 1]; - - Node next_node = node; - reference prefix = Node::RefMutable(art, next_node, NType::PREFIX); +void Prefix::InitializeMerge(ART &art, Node &node, const unsafe_vector &upper_bounds) { + auto buffer_count = upper_bounds[Node::GetAllocatorIdx(PREFIX)]; + Node next = node; + Prefix prefix(art, next, true); - while (next_node.GetType() == NType::PREFIX) { - next_node = prefix.get().ptr; - if (prefix.get().ptr.GetType() == NType::PREFIX) { - prefix.get().ptr.IncreaseBufferId(merge_buffer_count); - prefix = Node::RefMutable(art, next_node, NType::PREFIX); + while (next.GetType() == PREFIX) { + next = *prefix.ptr; + if (prefix.ptr->GetType() == PREFIX) { + prefix.ptr->IncreaseBufferId(buffer_count); + prefix = Prefix(art, next, true); } } - node.IncreaseBufferId(merge_buffer_count); - prefix.get().ptr.InitializeMerge(art, flags); + node.IncreaseBufferId(buffer_count); + prefix.ptr->InitMerge(art, upper_bounds); } -void Prefix::Concatenate(ART &art, Node &prefix_node, const uint8_t byte, Node &child_prefix_node) { +void Prefix::Concat(ART &art, Node &parent, uint8_t byte, const GateStatus old_status, const Node &child, + const GateStatus status) { + D_ASSERT(!parent.IsAnyLeaf()); + D_ASSERT(child.HasMetadata()); - D_ASSERT(prefix_node.HasMetadata() && child_prefix_node.HasMetadata()); - - // append a byte and a child_prefix to prefix - if (prefix_node.GetType() == NType::PREFIX) { - - // get the tail - reference prefix = Node::RefMutable(art, prefix_node, NType::PREFIX); - D_ASSERT(prefix.get().ptr.HasMetadata()); + if (old_status == GateStatus::GATE_SET) { + // Concat Node4. + D_ASSERT(status == GateStatus::GATE_SET); + return ConcatGate(art, parent, byte, child); + } + if (child.GetGateStatus() == GateStatus::GATE_SET) { + // Concat Node4. + D_ASSERT(status == GateStatus::GATE_NOT_SET); + return ConcatChildIsGate(art, parent, byte, child); + } - while (prefix.get().ptr.GetType() == NType::PREFIX) { - prefix = Node::RefMutable(art, prefix.get().ptr, NType::PREFIX); - D_ASSERT(prefix.get().ptr.HasMetadata()); + if (status == GateStatus::GATE_SET && child.GetType() == NType::LEAF_INLINED) { + auto row_id = child.GetRowId(); + if (parent.GetType() == PREFIX) { + auto parent_status = parent.GetGateStatus(); + Free(art, parent); + Leaf::New(parent, row_id); + parent.SetGateStatus(parent_status); + } else { + Leaf::New(parent, row_id); } + return; + } - // append the byte - prefix = prefix.get().Append(art, byte); - - if (child_prefix_node.GetType() == NType::PREFIX) { - // append the child prefix - prefix.get().Append(art, child_prefix_node); + if (parent.GetType() != PREFIX) { + auto prefix = NewInternal(art, parent, &byte, 1, 0, PREFIX); + if (child.GetType() == PREFIX) { + prefix.Append(art, child); } else { - // set child_prefix_node to succeed prefix - prefix.get().ptr = child_prefix_node; + *prefix.ptr = child; } return; } - // create a new prefix node containing the byte, then append the child_prefix to it - if (prefix_node.GetType() != NType::PREFIX && child_prefix_node.GetType() == NType::PREFIX) { + auto tail = GetTail(art, parent); + tail = tail.Append(art, byte); - auto child_prefix = child_prefix_node; - auto &prefix = New(art, prefix_node, byte); - prefix.Append(art, child_prefix); - return; + if (child.GetType() == PREFIX) { + tail.Append(art, child); + } else { + *tail.ptr = child; } - - // neither prefix nor child_prefix are prefix nodes - // create a new prefix containing the byte - New(art, prefix_node, byte, child_prefix_node); } -idx_t Prefix::Traverse(ART &art, reference &prefix_node, const ARTKey &key, idx_t &depth) { +template +idx_t TraverseInternal(ART &art, reference &node, const ARTKey &key, idx_t &depth, + const bool is_mutable = false) { + D_ASSERT(node.get().HasMetadata()); + D_ASSERT(node.get().GetType() == NType::PREFIX); - D_ASSERT(prefix_node.get().HasMetadata()); - D_ASSERT(prefix_node.get().GetType() == NType::PREFIX); + while (node.get().GetType() == NType::PREFIX) { + auto pos = Prefix::GetMismatchWithKey(art, node, key, depth); + if (pos != DConstants::INVALID_INDEX) { + return pos; + } - // compare prefix nodes to key bytes - while (prefix_node.get().GetType() == NType::PREFIX) { - auto &prefix = Node::Ref(art, prefix_node, NType::PREFIX); - for (idx_t i = 0; i < prefix.data[Node::PREFIX_SIZE]; i++) { - if (prefix.data[i] != key[depth]) { - return i; - } - depth++; + Prefix prefix(art, node, is_mutable); + node = *prefix.ptr; + if (node.get().GetGateStatus() == GateStatus::GATE_SET) { + break; } - prefix_node = prefix.ptr; - D_ASSERT(prefix_node.get().HasMetadata()); } - return DConstants::INVALID_INDEX; } -idx_t Prefix::TraverseMutable(ART &art, reference &prefix_node, const ARTKey &key, idx_t &depth) { - - D_ASSERT(prefix_node.get().HasMetadata()); - D_ASSERT(prefix_node.get().GetType() == NType::PREFIX); - - // compare prefix nodes to key bytes - while (prefix_node.get().GetType() == NType::PREFIX) { - auto &prefix = Node::RefMutable(art, prefix_node, NType::PREFIX); - for (idx_t i = 0; i < prefix.data[Node::PREFIX_SIZE]; i++) { - if (prefix.data[i] != key[depth]) { - return i; - } - depth++; - } - prefix_node = prefix.ptr; - D_ASSERT(prefix_node.get().HasMetadata()); - } +idx_t Prefix::Traverse(ART &art, reference &node, const ARTKey &key, idx_t &depth) { + return TraverseInternal(art, node, key, depth); +} - return DConstants::INVALID_INDEX; +idx_t Prefix::TraverseMutable(ART &art, reference &node, const ARTKey &key, idx_t &depth) { + return TraverseInternal(art, node, key, depth, true); } -bool Prefix::Traverse(ART &art, reference &l_node, reference &r_node, idx_t &mismatch_position) { +bool Prefix::Traverse(ART &art, reference &l_node, reference &r_node, idx_t &pos, const GateStatus status) { + D_ASSERT(l_node.get().HasMetadata()); + D_ASSERT(r_node.get().HasMetadata()); - auto &l_prefix = Node::RefMutable(art, l_node.get(), NType::PREFIX); - auto &r_prefix = Node::RefMutable(art, r_node.get(), NType::PREFIX); + Prefix l_prefix(art, l_node, true); + Prefix r_prefix(art, r_node, true); - // compare prefix bytes - idx_t max_count = MinValue(l_prefix.data[Node::PREFIX_SIZE], r_prefix.data[Node::PREFIX_SIZE]); - for (idx_t i = 0; i < max_count; i++) { - if (l_prefix.data[i] != r_prefix.data[i]) { - mismatch_position = i; - break; - } + idx_t max_count = MinValue(l_prefix.data[Count(art)], r_prefix.data[Count(art)]); + pos = GetMismatchWithOther(l_prefix, r_prefix, max_count); + if (pos != DConstants::INVALID_INDEX) { + return true; } - if (mismatch_position == DConstants::INVALID_INDEX) { - - // prefixes match (so far) - if (l_prefix.data[Node::PREFIX_SIZE] == r_prefix.data[Node::PREFIX_SIZE]) { - return l_prefix.ptr.ResolvePrefixes(art, r_prefix.ptr); - } - - mismatch_position = max_count; - - // l_prefix contains r_prefix - if (r_prefix.ptr.GetType() != NType::PREFIX && r_prefix.data[Node::PREFIX_SIZE] == max_count) { - swap(l_node.get(), r_node.get()); - l_node = r_prefix.ptr; - - } else { - // r_prefix contains l_prefix - l_node = l_prefix.ptr; - } + // Match. + if (l_prefix.data[Count(art)] == r_prefix.data[Count(art)]) { + auto r_child = *r_prefix.ptr; + r_prefix.ptr->Clear(); + Node::Free(art, r_node); + return l_prefix.ptr->MergeInternal(art, r_child, status); } + pos = max_count; + if (r_prefix.ptr->GetType() != PREFIX && r_prefix.data[Count(art)] == max_count) { + // l_prefix contains r_prefix. + swap(l_node.get(), r_node.get()); + l_node = *r_prefix.ptr; + return true; + } + // r_prefix contains l_prefix. + l_node = *l_prefix.ptr; return true; } -void Prefix::Reduce(ART &art, Node &prefix_node, const idx_t n) { - - D_ASSERT(prefix_node.HasMetadata()); - D_ASSERT(n < Node::PREFIX_SIZE); - - reference prefix = Node::RefMutable(art, prefix_node, NType::PREFIX); +void Prefix::Reduce(ART &art, Node &node, const idx_t pos) { + D_ASSERT(node.HasMetadata()); + D_ASSERT(pos < Count(art)); - // free this prefix node - if (n == (idx_t)(prefix.get().data[Node::PREFIX_SIZE] - 1)) { - auto next_ptr = prefix.get().ptr; - D_ASSERT(next_ptr.HasMetadata()); - prefix.get().ptr.Clear(); - Node::Free(art, prefix_node); - prefix_node = next_ptr; + Prefix prefix(art, node); + if (pos == idx_t(prefix.data[Count(art)] - 1)) { + auto next = *prefix.ptr; + prefix.ptr->Clear(); + Node::Free(art, node); + node = next; return; } - // shift by n bytes in the current prefix - for (idx_t i = 0; i < Node::PREFIX_SIZE - n - 1; i++) { - prefix.get().data[i] = prefix.get().data[n + i + 1]; + for (idx_t i = 0; i < Count(art) - pos - 1; i++) { + prefix.data[i] = prefix.data[pos + i + 1]; } - D_ASSERT(n < (idx_t)(prefix.get().data[Node::PREFIX_SIZE] - 1)); - prefix.get().data[Node::PREFIX_SIZE] -= n + 1; - // append the remaining prefix bytes - prefix.get().Append(art, prefix.get().ptr); + prefix.data[Count(art)] -= pos + 1; + prefix.Append(art, *prefix.ptr); } -void Prefix::Split(ART &art, reference &prefix_node, Node &child_node, idx_t position) { +GateStatus Prefix::Split(ART &art, reference &node, Node &child, const uint8_t pos) { + D_ASSERT(node.get().HasMetadata()); - D_ASSERT(prefix_node.get().HasMetadata()); + Prefix prefix(art, node); - auto &prefix = Node::RefMutable(art, prefix_node, NType::PREFIX); - - // the split is at the last byte of this prefix, so the child_node contains all subsequent - // prefix nodes (prefix.ptr) (if any), and the count of this prefix decreases by one, - // then, we reference prefix.ptr, to overwrite it with a new node later - if (position + 1 == Node::PREFIX_SIZE) { - prefix.data[Node::PREFIX_SIZE]--; - prefix_node = prefix.ptr; - child_node = prefix.ptr; - return; + // The split is at the last prefix byte. Decrease the count and return. + if (pos + 1 == Count(art)) { + prefix.data[Count(art)]--; + node = *prefix.ptr; + child = *prefix.ptr; + return GateStatus::GATE_NOT_SET; } - // append the remaining bytes after the split - if (position + 1 < prefix.data[Node::PREFIX_SIZE]) { - reference child_prefix = New(art, child_node); - for (idx_t i = position + 1; i < prefix.data[Node::PREFIX_SIZE]; i++) { - child_prefix = child_prefix.get().Append(art, prefix.data[i]); - } - - D_ASSERT(prefix.ptr.HasMetadata()); + if (pos + 1 < prefix.data[Count(art)]) { + // Create a new prefix and + // 1. copy the remaining bytes of this prefix. + // 2. append remaining prefix nodes. + auto new_prefix = NewInternal(art, child, nullptr, 0, 0, PREFIX); + new_prefix.data[Count(art)] = prefix.data[Count(art)] - pos - 1; + memcpy(new_prefix.data, prefix.data + pos + 1, new_prefix.data[Count(art)]); - if (prefix.ptr.GetType() == NType::PREFIX) { - child_prefix.get().Append(art, prefix.ptr); + if (prefix.ptr->GetType() == PREFIX && prefix.ptr->GetGateStatus() == GateStatus::GATE_NOT_SET) { + new_prefix.Append(art, *prefix.ptr); } else { - // this is the last prefix node of the prefix - child_prefix.get().ptr = prefix.ptr; + *new_prefix.ptr = *prefix.ptr; } + + } else if (pos + 1 == prefix.data[Count(art)]) { + // No prefix bytes after the split. + child = *prefix.ptr; } - // this is the last prefix node of the prefix - if (position + 1 == prefix.data[Node::PREFIX_SIZE]) { - child_node = prefix.ptr; + // Set the new count of this node. + prefix.data[Count(art)] = pos; + + // No bytes left before the split, free this node. + if (pos == 0) { + auto old_status = node.get().GetGateStatus(); + prefix.ptr->Clear(); + Node::Free(art, node); + return old_status; } - // set the new size of this node - prefix.data[Node::PREFIX_SIZE] = UnsafeNumericCast(position); + // There are bytes left before the split. + // The subsequent node replaces the split byte. + node = *prefix.ptr; + return GateStatus::GATE_NOT_SET; +} - // no bytes left before the split, free this node - if (position == 0) { - prefix.ptr.Clear(); - Node::Free(art, prefix_node.get()); - return; +bool Prefix::Insert(ART &art, Node &node, const ARTKey &key, idx_t depth, const ARTKey &row_id, + const GateStatus status) { + reference next(node); + auto pos = TraverseMutable(art, next, key, depth); + + // We recurse into the next node, if + // (1) the prefix matches the key. + // (2) we reach a gate. + if (pos == DConstants::INVALID_INDEX) { + if (next.get().GetType() != NType::PREFIX || next.get().GetGateStatus() == GateStatus::GATE_SET) { + return art.Insert(next, key, depth, row_id, status); + } + } + + Node remainder; + auto byte = GetByte(art, next, UnsafeNumericCast(pos)); + auto split_status = Split(art, next, remainder, UnsafeNumericCast(pos)); + Node4::New(art, next); + next.get().SetGateStatus(split_status); + + // Insert the remaining prefix into the new Node4. + Node4::InsertChild(art, next, byte, remainder); + + if (status == GateStatus::GATE_SET) { + D_ASSERT(pos != ROW_ID_COUNT); + Node new_row_id; + Leaf::New(new_row_id, key.GetRowId()); + Node::InsertChild(art, next, key[depth], new_row_id); + return true; } - // bytes left before the split, reference subsequent node - prefix_node = prefix.ptr; - return; + Node leaf; + reference ref(leaf); + if (depth + 1 < key.len) { + // Create the prefix. + auto count = key.len - depth - 1; + Prefix::New(art, ref, key, depth + 1, count); + } + // Create the inlined leaf. + Leaf::New(ref, row_id.GetRowId()); + Node4::InsertChild(art, next, key[depth], leaf); + return true; } string Prefix::VerifyAndToString(ART &art, const Node &node, const bool only_verify) { - - // NOTE: we could do this recursively, but the function-call overhead can become kinda crazy string str = ""; + reference ref(node); - reference node_ref(node); - while (node_ref.get().GetType() == NType::PREFIX) { + Iterator(art, ref, true, false, [&](Prefix &prefix) { + D_ASSERT(prefix.data[Count(art)] != 0); + D_ASSERT(prefix.data[Count(art)] <= Count(art)); - auto &prefix = Node::Ref(art, node_ref, NType::PREFIX); - D_ASSERT(prefix.data[Node::PREFIX_SIZE] != 0); - D_ASSERT(prefix.data[Node::PREFIX_SIZE] <= Node::PREFIX_SIZE); - - str += " prefix_bytes:["; - for (idx_t i = 0; i < prefix.data[Node::PREFIX_SIZE]; i++) { + str += " Prefix :[ "; + for (idx_t i = 0; i < prefix.data[Count(art)]; i++) { str += to_string(prefix.data[i]) + "-"; } - str += "] "; + str += " ] "; + }); - node_ref = prefix.ptr; - } + auto child = ref.get().VerifyAndToString(art, only_verify); + return only_verify ? "" : str + child; +} - auto subtree = node_ref.get().VerifyAndToString(art, only_verify); - return only_verify ? "" : str + subtree; +void Prefix::VerifyAllocations(ART &art, const Node &node, unordered_map &node_counts) { + auto idx = Node::GetAllocatorIdx(PREFIX); + reference ref(node); + Iterator(art, ref, false, false, [&](Prefix &prefix) { node_counts[idx]++; }); + return ref.get().VerifyAllocations(art, node_counts); } -void Prefix::Vacuum(ART &art, Node &node, const ARTFlags &flags) { +void Prefix::Vacuum(ART &art, Node &node, const unordered_set &indexes) { + bool set = indexes.find(Node::GetAllocatorIdx(PREFIX)) != indexes.end(); + auto &allocator = Node::GetAllocator(art, PREFIX); + + reference ref(node); + while (ref.get().GetType() == PREFIX) { + if (set && allocator.NeedsVacuum(ref)) { + auto status = ref.get().GetGateStatus(); + ref.get() = allocator.VacuumPointer(ref); + ref.get().SetMetadata(static_cast(PREFIX)); + ref.get().SetGateStatus(status); + } + Prefix prefix(art, ref, true); + ref = *prefix.ptr; + } - bool flag_set = flags.vacuum_flags[static_cast(NType::PREFIX) - 1]; - auto &allocator = Node::GetAllocator(art, NType::PREFIX); + ref.get().Vacuum(art, indexes); +} - reference node_ref(node); - while (node_ref.get().GetType() == NType::PREFIX) { - if (flag_set && allocator.NeedsVacuum(node_ref)) { - node_ref.get() = allocator.VacuumPointer(node_ref); - node_ref.get().SetMetadata(static_cast(NType::PREFIX)); +void Prefix::TransformToDeprecated(ART &art, Node &node, unsafe_unique_ptr &allocator) { + // Early-out, if we do not need any transformations. + if (!allocator) { + reference ref(node); + while (ref.get().GetType() == PREFIX && ref.get().GetGateStatus() == GateStatus::GATE_NOT_SET) { + Prefix prefix(art, ref, true, true); + if (!prefix.in_memory) { + return; + } + ref = *prefix.ptr; } - auto &prefix = Node::RefMutable(art, node_ref, NType::PREFIX); - node_ref = prefix.ptr; + return Node::TransformToDeprecated(art, ref, allocator); } - node_ref.get().Vacuum(art, flags); -} + // Fast path. + if (art.prefix_count <= DEPRECATED_COUNT) { + reference ref(node); + while (ref.get().GetType() == PREFIX && ref.get().GetGateStatus() == GateStatus::GATE_NOT_SET) { + Prefix prefix(art, ref, true, true); + if (!prefix.in_memory) { + return; + } + + Node new_node; + new_node = allocator->New(); + new_node.SetMetadata(static_cast(PREFIX)); -Prefix &Prefix::Append(ART &art, const uint8_t byte) { + Prefix new_prefix(allocator, new_node, DEPRECATED_COUNT); + new_prefix.data[DEPRECATED_COUNT] = prefix.data[Count(art)]; + memcpy(new_prefix.data, prefix.data, new_prefix.data[DEPRECATED_COUNT]); + *new_prefix.ptr = *prefix.ptr; - reference prefix(*this); + prefix.ptr->Clear(); + Node::Free(art, ref); + ref.get() = new_node; + ref = *new_prefix.ptr; + } + + return Node::TransformToDeprecated(art, ref, allocator); + } + + // Else, we need to create a new prefix chain. + Node new_node; + new_node = allocator->New(); + new_node.SetMetadata(static_cast(PREFIX)); + Prefix new_prefix(allocator, new_node, DEPRECATED_COUNT); + + reference ref(node); + while (ref.get().GetType() == PREFIX && ref.get().GetGateStatus() == GateStatus::GATE_NOT_SET) { + Prefix prefix(art, ref, true, true); + if (!prefix.in_memory) { + return; + } + + for (idx_t i = 0; i < prefix.data[Count(art)]; i++) { + new_prefix = new_prefix.TransformToDeprecatedAppend(art, allocator, prefix.data[i]); + } - // we need a new prefix node - if (prefix.get().data[Node::PREFIX_SIZE] == Node::PREFIX_SIZE) { - prefix = New(art, prefix.get().ptr); + *new_prefix.ptr = *prefix.ptr; + Node::GetAllocator(art, PREFIX).Free(ref); + ref = *new_prefix.ptr; } - prefix.get().data[prefix.get().data[Node::PREFIX_SIZE]] = byte; - prefix.get().data[Node::PREFIX_SIZE]++; - return prefix.get(); + return Node::TransformToDeprecated(art, ref, allocator); } -void Prefix::Append(ART &art, Node other_prefix) { +Prefix Prefix::Append(ART &art, const uint8_t byte) { + if (data[Count(art)] != Count(art)) { + data[data[Count(art)]] = byte; + data[Count(art)]++; + return *this; + } + + auto prefix = NewInternal(art, *ptr, nullptr, 0, 0, PREFIX); + return prefix.Append(art, byte); +} - D_ASSERT(other_prefix.HasMetadata()); +void Prefix::Append(ART &art, Node other) { + D_ASSERT(other.HasMetadata()); - reference prefix(*this); - while (other_prefix.GetType() == NType::PREFIX) { + Prefix prefix = *this; + while (other.GetType() == PREFIX) { + if (other.GetGateStatus() == GateStatus::GATE_SET) { + *prefix.ptr = other; + return; + } - // copy prefix bytes - auto &other = Node::RefMutable(art, other_prefix, NType::PREFIX); - for (idx_t i = 0; i < other.data[Node::PREFIX_SIZE]; i++) { - prefix = prefix.get().Append(art, other.data[i]); + Prefix other_prefix(art, other, true); + for (idx_t i = 0; i < other_prefix.data[Count(art)]; i++) { + prefix = prefix.Append(art, other_prefix.data[i]); } - D_ASSERT(other.ptr.HasMetadata()); + *prefix.ptr = *other_prefix.ptr; + Node::GetAllocator(art, PREFIX).Free(other); + other = *prefix.ptr; + } +} + +Prefix Prefix::GetTail(ART &art, const Node &node) { + Prefix prefix(art, node, true); + while (prefix.ptr->GetType() == PREFIX) { + prefix = Prefix(art, *prefix.ptr, true); + } + return prefix; +} + +void Prefix::ConcatGate(ART &art, Node &parent, uint8_t byte, const Node &child) { + D_ASSERT(child.HasMetadata()); + Node new_prefix = Node(); + + // Inside gates, inlined row IDs are not prefixed. + if (child.GetType() == NType::LEAF_INLINED) { + Leaf::New(new_prefix, child.GetRowId()); + + } else if (child.GetType() == PREFIX) { + // At least one more row ID in this gate. + auto prefix = NewInternal(art, new_prefix, &byte, 1, 0, PREFIX); + prefix.ptr->Clear(); + prefix.Append(art, child); + new_prefix.SetGateStatus(GateStatus::GATE_SET); + + } else { + // At least one more row ID in this gate. + auto prefix = NewInternal(art, new_prefix, &byte, 1, 0, PREFIX); + *prefix.ptr = child; + new_prefix.SetGateStatus(GateStatus::GATE_SET); + } + + if (parent.GetType() != PREFIX) { + parent = new_prefix; + return; + } + *GetTail(art, parent).ptr = new_prefix; +} + +void Prefix::ConcatChildIsGate(ART &art, Node &parent, uint8_t byte, const Node &child) { + // Create a new prefix and point it to the gate. + if (parent.GetType() != PREFIX) { + auto prefix = NewInternal(art, parent, &byte, 1, 0, PREFIX); + *prefix.ptr = child; + return; + } + + auto tail = GetTail(art, parent); + tail = tail.Append(art, byte); + *tail.ptr = child; +} - prefix.get().ptr = other.ptr; - Node::GetAllocator(art, NType::PREFIX).Free(other_prefix); - other_prefix = prefix.get().ptr; +Prefix Prefix::TransformToDeprecatedAppend(ART &art, unsafe_unique_ptr &allocator, uint8_t byte) { + if (data[DEPRECATED_COUNT] != DEPRECATED_COUNT) { + data[data[DEPRECATED_COUNT]] = byte; + data[DEPRECATED_COUNT]++; + return *this; } - D_ASSERT(prefix.get().ptr.GetType() != NType::PREFIX); + *ptr = allocator->New(); + ptr->SetMetadata(static_cast(PREFIX)); + Prefix prefix(allocator, *ptr, DEPRECATED_COUNT); + return prefix.TransformToDeprecatedAppend(art, allocator, byte); } } // namespace duckdb diff --git a/src/duckdb/src/execution/index/bound_index.cpp b/src/duckdb/src/execution/index/bound_index.cpp index 49a02a05..017f7f5b 100644 --- a/src/duckdb/src/execution/index/bound_index.cpp +++ b/src/duckdb/src/execution/index/bound_index.cpp @@ -62,6 +62,12 @@ string BoundIndex::VerifyAndToString(const bool only_verify) { return VerifyAndToString(state, only_verify); } +void BoundIndex::VerifyAllocations() { + IndexLock state; + InitializeLock(state); + return VerifyAllocations(state); +} + void BoundIndex::Vacuum() { IndexLock state; InitializeLock(state); @@ -97,7 +103,7 @@ bool BoundIndex::IndexIsUpdated(const vector &column_ids_p) const return false; } -IndexStorageInfo BoundIndex::GetStorageInfo(const bool get_buffers) { +IndexStorageInfo BoundIndex::GetStorageInfo(const case_insensitive_map_t &options, const bool to_wal) { throw NotImplementedException("The implementation of this index serialization does not exist."); } diff --git a/src/duckdb/src/execution/index/fixed_size_allocator.cpp b/src/duckdb/src/execution/index/fixed_size_allocator.cpp index f40234e3..a6ad0f38 100644 --- a/src/duckdb/src/execution/index/fixed_size_allocator.cpp +++ b/src/duckdb/src/execution/index/fixed_size_allocator.cpp @@ -172,18 +172,7 @@ bool FixedSizeAllocator::InitializeVacuum() { Reset(); return false; } - - // remove all empty buffers - auto buffer_it = buffers.begin(); - while (buffer_it != buffers.end()) { - if (!buffer_it->second.segment_count) { - buffers_with_free_space.erase(buffer_it->first); - buffer_it->second.Destroy(); - buffer_it = buffers.erase(buffer_it); - } else { - buffer_it++; - } - } + RemoveEmptyBuffers(); // determine if a vacuum is necessary multimap temporary_vacuum_buffers; @@ -355,4 +344,19 @@ idx_t FixedSizeAllocator::GetAvailableBufferId() const { return buffer_id; } +void FixedSizeAllocator::RemoveEmptyBuffers() { + + auto buffer_it = buffers.begin(); + while (buffer_it != buffers.end()) { + if (buffer_it->second.segment_count != 0) { + buffer_it++; + continue; + } + + buffers_with_free_space.erase(buffer_it->first); + buffer_it->second.Destroy(); + buffer_it = buffers.erase(buffer_it); + } +} + } // namespace duckdb diff --git a/src/duckdb/src/execution/index/fixed_size_buffer.cpp b/src/duckdb/src/execution/index/fixed_size_buffer.cpp index aa95d2ac..78a3e4d0 100644 --- a/src/duckdb/src/execution/index/fixed_size_buffer.cpp +++ b/src/duckdb/src/execution/index/fixed_size_buffer.cpp @@ -68,7 +68,7 @@ void FixedSizeBuffer::Destroy() { void FixedSizeBuffer::Serialize(PartialBlockManager &partial_block_manager, const idx_t available_segments, const idx_t segment_size, const idx_t bitmask_offset) { - // we do not serialize a block that is already on disk and not in memory + // Early-out, if the block is already on disk and not in memory. if (!InMemory()) { if (!OnDisk() || dirty) { throw InternalException("invalid or missing buffer in FixedSizeAllocator"); @@ -76,12 +76,13 @@ void FixedSizeBuffer::Serialize(PartialBlockManager &partial_block_manager, cons return; } - // we do not serialize a block that is already on disk and not dirty + // Early-out, if the buffer is already on disk and not dirty. if (!dirty && OnDisk()) { return; } - // the allocation possibly changed + // Adjust the allocation size. + D_ASSERT(segment_count != 0); SetAllocationSize(available_segments, segment_size, bitmask_offset); // the buffer is in memory, so we copied it onto a new buffer when pinning @@ -195,75 +196,23 @@ uint32_t FixedSizeBuffer::GetOffset(const idx_t bitmask_count) { void FixedSizeBuffer::SetAllocationSize(const idx_t available_segments, const idx_t segment_size, const idx_t bitmask_offset) { - - if (dirty) { - auto max_offset = GetMaxOffset(available_segments); - allocation_size = max_offset * segment_size + bitmask_offset; - } -} - -uint32_t FixedSizeBuffer::GetMaxOffset(const idx_t available_segments) { - - // this function calls Get() on the buffer - D_ASSERT(InMemory()); - - // finds the maximum zero bit in a bitmask, and adds one to it, - // so that max_offset * segment_size = allocated_size of this bitmask's buffer - idx_t entry_size = sizeof(validity_t) * 8; - idx_t bitmask_count = available_segments / entry_size; - if (available_segments % entry_size != 0) { - bitmask_count++; + if (!dirty) { + return; } - auto max_offset = UnsafeNumericCast(bitmask_count * sizeof(validity_t) * 8); - auto bits_in_last_entry = available_segments % (sizeof(validity_t) * 8); - // get the bitmask data + // We traverse from the back. A binary search would be faster. + // However, buffers are often (almost) full, so the overhead is acceptable. auto bitmask_ptr = reinterpret_cast(Get()); - const ValidityMask mask(bitmask_ptr); - const auto data = mask.GetData(); - - D_ASSERT(bitmask_count > 0); - for (idx_t i = bitmask_count; i > 0; i--) { - - auto entry = data[i - 1]; - - // set all bits after bits_in_last_entry - if (i == bitmask_count) { - entry |= ~idx_t(0) << bits_in_last_entry; - } - - if (entry == ~idx_t(0)) { - max_offset -= sizeof(validity_t) * 8; - continue; - } - - // invert data[entry_idx] - auto entry_inv = ~entry; - idx_t first_valid_bit = 0; - - // then find the position of the LEFTMOST set bit - for (idx_t level = 0; level < 6; level++) { + ValidityMask mask(bitmask_ptr); - // set the right half of the bits of this level to zero and test if the entry is still not zero - if (entry_inv & ~BASE[level]) { - // first valid bit is in the leftmost s[level] bits - // shift by s[level] for the next iteration and add s[level] to the position of the leftmost set bit - entry_inv >>= SHIFT[level]; - first_valid_bit += SHIFT[level]; - } else { - // first valid bit is in the rightmost s[level] bits - // permanently set the left half of the bits to zero - entry_inv &= BASE[level]; - } + auto max_offset = available_segments; + for (idx_t i = available_segments; i > 0; i--) { + if (!mask.RowIsValid(i - 1)) { + max_offset = i; + break; } - D_ASSERT(entry_inv); - max_offset -= sizeof(validity_t) * 8 - first_valid_bit; - D_ASSERT(!mask.RowIsValid(max_offset)); - return max_offset + 1; } - - // there are no allocations in this buffer - throw InternalException("tried to serialize empty buffer"); + allocation_size = max_offset * segment_size + bitmask_offset; } void FixedSizeBuffer::SetUninitializedRegions(PartialBlockForIndex &p_block_for_index, const idx_t segment_size, diff --git a/src/duckdb/src/execution/join_hashtable.cpp b/src/duckdb/src/execution/join_hashtable.cpp index 3a98a26b..1f2412c0 100644 --- a/src/duckdb/src/execution/join_hashtable.cpp +++ b/src/duckdb/src/execution/join_hashtable.cpp @@ -29,12 +29,12 @@ JoinHashTable::InsertState::InsertState(const JoinHashTable &ht) ht.data_collection->InitializeChunkState(chunk_state, ht.equality_predicate_columns); } -JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector &conditions_p, +JoinHashTable::JoinHashTable(ClientContext &context, const vector &conditions_p, vector btypes, JoinType type_p, const vector &output_columns_p) - : buffer_manager(buffer_manager_p), conditions(conditions_p), build_types(std::move(btypes)), - output_columns(output_columns_p), entry_size(0), tuple_size(0), vfound(Value::BOOLEAN(false)), join_type(type_p), - finalized(false), has_null(false), radix_bits(INITIAL_RADIX_BITS), partition_start(0), partition_end(0) { - + : buffer_manager(BufferManager::GetBufferManager(context)), conditions(conditions_p), + build_types(std::move(btypes)), output_columns(output_columns_p), entry_size(0), tuple_size(0), + vfound(Value::BOOLEAN(false)), join_type(type_p), finalized(false), has_null(false), + radix_bits(INITIAL_RADIX_BITS), partition_start(0), partition_end(0) { for (idx_t i = 0; i < conditions.size(); ++i) { auto &condition = conditions[i]; D_ASSERT(condition.left->return_type == condition.right->return_type); @@ -103,6 +103,11 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector(layout.GetRowWidth()); memset(dead_end.get(), 0, layout.GetRowWidth()); + + if (join_type == JoinType::SINGLE) { + auto &config = DBConfig::GetConfig(context); + single_join_error_on_multiple_rows = config.options.scalar_subquery_error_on_multiple_rows; + } } JoinHashTable::~JoinHashTable() { @@ -1173,6 +1178,7 @@ void ScanStructure::NextSingleJoin(DataChunk &keys, DataChunk &left, DataChunk & // this join is similar to the semi join except that // (1) we actually return data from the RHS and // (2) we return NULL for that data if there is no match + // (3) if single_join_error_on_multiple_rows is set, we need to keep looking for duplicates after fetching idx_t result_count = 0; SelectionVector result_sel(STANDARD_VECTOR_SIZE); @@ -1213,6 +1219,24 @@ void ScanStructure::NextSingleJoin(DataChunk &keys, DataChunk &left, DataChunk & // like the SEMI, ANTI and MARK join types, the SINGLE join only ever does one pass over the HT per input chunk finished = true; + + if (ht.single_join_error_on_multiple_rows && result_count > 0) { + // we need to throw an error if there are multiple rows per key + // advance pointers for those rows + AdvancePointers(result_sel, result_count); + + // now resolve the predicates + idx_t match_count = ResolvePredicates(keys, chain_match_sel_vector, nullptr); + if (match_count > 0) { + // we found at least one duplicate row - throw + throw InvalidInputException( + "More than one row returned by a subquery used as an expression - scalar subqueries can only " + "return a single row.\n\nUse \"SET scalar_subquery_error_on_multiple_rows=false\" to revert to " + "previous behavior of returning a random row."); + } + + this->count = 0; + } } void JoinHashTable::ScanFullOuter(JoinHTScanState &state, Vector &addresses, DataChunk &result) const { diff --git a/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp index aa7873c4..bc72fa27 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp @@ -72,25 +72,29 @@ double CSVGlobalState::GetProgress(const ReadCSVData &bind_data_p) const { unique_ptr CSVGlobalState::Next(optional_ptr previous_scanner) { if (single_threaded) { idx_t cur_idx; - { - lock_guard parallel_lock(main_mutex); - cur_idx = last_file_idx++; - if (cur_idx >= bind_data.files.size()) { - return nullptr; - } - if (cur_idx == 0) { - D_ASSERT(!previous_scanner); - auto current_file = file_scans.front(); - return make_uniq(scanner_idx++, current_file->buffer_manager, - current_file->state_machine, current_file->error_handler, - current_file, false, current_boundary); + shared_ptr current_file; + do { + { + lock_guard parallel_lock(main_mutex); + cur_idx = last_file_idx++; + if (cur_idx >= bind_data.files.size()) { + return nullptr; + } + if (cur_idx == 0) { + D_ASSERT(!previous_scanner); + current_file = file_scans.front(); + return make_uniq(scanner_idx++, current_file->buffer_manager, + current_file->state_machine, current_file->error_handler, + current_file, false, current_boundary); + } } - } - auto file_scan = make_shared_ptr(context, bind_data.files[cur_idx], bind_data.options, cur_idx, - bind_data, column_ids, file_schema, true); + auto file_scan = make_shared_ptr(context, bind_data.files[cur_idx], bind_data.options, cur_idx, + bind_data, column_ids, file_schema, true); + lock_guard parallel_lock(main_mutex); + file_scans.emplace_back(std::move(file_scan)); + current_file = file_scans.back(); + } while (current_file->file_size == 0); lock_guard parallel_lock(main_mutex); - file_scans.emplace_back(std::move(file_scan)); - auto current_file = file_scans.back(); current_boundary = current_file->start_iterator; current_boundary.SetCurrentBoundaryToPosition(single_threaded); current_buffer_in_use = diff --git a/src/duckdb/src/execution/operator/join/physical_hash_join.cpp b/src/duckdb/src/execution/operator/join/physical_hash_join.cpp index 115d41e1..c2f8a321 100644 --- a/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +++ b/src/duckdb/src/execution/operator/join/physical_hash_join.cpp @@ -219,8 +219,7 @@ class HashJoinLocalSinkState : public LocalSinkState { }; unique_ptr PhysicalHashJoin::InitializeHashTable(ClientContext &context) const { - auto result = make_uniq(BufferManager::GetBufferManager(context), conditions, payload_types, - join_type, rhs_output_columns); + auto result = make_uniq(context, conditions, payload_types, join_type, rhs_output_columns); if (!delim_types.empty() && join_type == JoinType::MARK) { // correlated MARK join if (delim_types.size() + 1 == conditions.size()) { diff --git a/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp b/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp index 4bae412b..ea3def48 100644 --- a/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp @@ -21,7 +21,7 @@ PhysicalCreateARTIndex::PhysicalCreateARTIndex(LogicalOperator &op, TableCatalog table(table_p.Cast()), info(std::move(info)), unbound_expressions(std::move(unbound_expressions)), sorted(sorted) { - // convert virtual column ids to storage column ids + // Convert the virtual column ids to physical column ids. for (auto &column_id : column_ids) { storage_ids.push_back(table.GetColumns().LogicalToPhysical(LogicalIndex(column_id)).index); } @@ -33,7 +33,6 @@ PhysicalCreateARTIndex::PhysicalCreateARTIndex(LogicalOperator &op, TableCatalog class CreateARTIndexGlobalSinkState : public GlobalSinkState { public: - //! Global index to be added to the table unique_ptr global_index; }; @@ -43,53 +42,51 @@ class CreateARTIndexLocalSinkState : public LocalSinkState { unique_ptr local_index; ArenaAllocator arena_allocator; - vector keys; + DataChunk key_chunk; + unsafe_vector keys; vector key_column_ids; + + DataChunk row_id_chunk; + unsafe_vector row_ids; }; unique_ptr PhysicalCreateARTIndex::GetGlobalSinkState(ClientContext &context) const { + // Create the global sink state and add the global index. auto state = make_uniq(); - - // create the global index auto &storage = table.GetStorage(); state->global_index = make_uniq(info->index_name, info->constraint_type, storage_ids, TableIOManager::Get(storage), unbound_expressions, storage.db); - return (std::move(state)); } unique_ptr PhysicalCreateARTIndex::GetLocalSinkState(ExecutionContext &context) const { + // Create the local sink state and add the local index. auto state = make_uniq(context.client); - - // create the local index - auto &storage = table.GetStorage(); state->local_index = make_uniq(info->index_name, info->constraint_type, storage_ids, TableIOManager::Get(storage), unbound_expressions, storage.db); - state->keys = vector(STANDARD_VECTOR_SIZE); + // Initialize the local sink state. + state->keys.resize(STANDARD_VECTOR_SIZE); + state->row_ids.resize(STANDARD_VECTOR_SIZE); state->key_chunk.Initialize(Allocator::Get(context.client), state->local_index->logical_types); - + state->row_id_chunk.Initialize(Allocator::Get(context.client), vector {LogicalType::ROW_TYPE}); for (idx_t i = 0; i < state->key_chunk.ColumnCount(); i++) { state->key_column_ids.push_back(i); } return std::move(state); } -SinkResultType PhysicalCreateARTIndex::SinkUnsorted(Vector &row_identifiers, OperatorSinkInput &input) const { +SinkResultType PhysicalCreateARTIndex::SinkUnsorted(OperatorSinkInput &input) const { auto &l_state = input.local_state.Cast(); - auto count = l_state.key_chunk.size(); - - UnifiedVectorFormat row_id_data; - row_identifiers.ToUnifiedFormat(count, row_id_data); - auto row_ids = UnifiedVectorFormat::GetData(row_id_data); + auto row_count = l_state.key_chunk.size(); - // insert the row IDs + // Insert each key and its corresponding row ID. auto &art = l_state.local_index->Cast(); - for (idx_t i = 0; i < count; i++) { - if (!art.Insert(art.tree, l_state.keys[i], 0, row_ids[i])) { + for (idx_t i = 0; i < row_count; i++) { + if (!art.Insert(art.tree, l_state.keys[i], 0, l_state.row_ids[i], art.tree.GetGateStatus())) { throw ConstraintException("Data contains duplicates on indexed column(s)"); } } @@ -97,21 +94,21 @@ SinkResultType PhysicalCreateARTIndex::SinkUnsorted(Vector &row_identifiers, Ope return SinkResultType::NEED_MORE_INPUT; } -SinkResultType PhysicalCreateARTIndex::SinkSorted(Vector &row_identifiers, OperatorSinkInput &input) const { +SinkResultType PhysicalCreateARTIndex::SinkSorted(OperatorSinkInput &input) const { auto &l_state = input.local_state.Cast(); auto &storage = table.GetStorage(); auto &l_index = l_state.local_index; - // create an ART from the chunk + // Construct an ART for this chunk. auto art = make_uniq(info->index_name, l_index->GetConstraintType(), l_index->GetColumnIds(), l_index->table_io_manager, l_index->unbound_expressions, storage.db, l_index->Cast().allocators); - if (!art->ConstructFromSorted(l_state.key_chunk.size(), l_state.keys, row_identifiers)) { + if (!art->Construct(l_state.keys, l_state.row_ids, l_state.key_chunk.size())) { throw ConstraintException("Data contains duplicates on indexed column(s)"); } - // merge into the local ART + // Merge the ART into the local ART. if (!l_index->MergeIndexes(*art)) { throw ConstraintException("Data contains duplicates on indexed column(s)"); } @@ -124,18 +121,15 @@ SinkResultType PhysicalCreateARTIndex::Sink(ExecutionContext &context, DataChunk D_ASSERT(chunk.ColumnCount() >= 2); auto &l_state = input.local_state.Cast(); - l_state.key_chunk.ReferenceColumns(chunk, l_state.key_column_ids); l_state.arena_allocator.Reset(); + l_state.key_chunk.ReferenceColumns(chunk, l_state.key_column_ids); + ART::GenerateKeyVectors(l_state.arena_allocator, l_state.key_chunk, chunk.data[chunk.ColumnCount() - 1], + l_state.keys, l_state.row_ids); - // Insert the keys and their corresponding row identifiers. - auto &row_identifiers = chunk.data[chunk.ColumnCount() - 1]; if (sorted) { - ART::GenerateKeys(l_state.arena_allocator, l_state.key_chunk, l_state.keys); - return SinkSorted(row_identifiers, input); + return SinkSorted(input); } - - ART::GenerateKeys(l_state.arena_allocator, l_state.key_chunk, l_state.keys); - return SinkUnsorted(row_identifiers, input); + return SinkUnsorted(input); } SinkCombineResultType PhysicalCreateARTIndex::Combine(ExecutionContext &context, @@ -161,6 +155,7 @@ SinkFinalizeType PhysicalCreateARTIndex::Finalize(Pipeline &pipeline, Event &eve // vacuum excess memory and verify state.global_index->Vacuum(); D_ASSERT(!state.global_index->VerifyAndToString(true).empty()); + state.global_index->VerifyAllocations(); auto &storage = table.GetStorage(); if (!storage.IsRoot()) { diff --git a/src/duckdb/src/function/scalar/operators/arithmetic.cpp b/src/duckdb/src/function/scalar/operators/arithmetic.cpp index 09c8ca15..75344f62 100644 --- a/src/duckdb/src/function/scalar/operators/arithmetic.cpp +++ b/src/duckdb/src/function/scalar/operators/arithmetic.cpp @@ -953,12 +953,24 @@ static scalar_function_t GetBinaryFunctionIgnoreZero(PhysicalType type) { } } +template +unique_ptr BindBinaryFloatingPoint(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + auto &config = DBConfig::GetConfig(context); + if (config.options.ieee_floating_point_ops) { + bound_function.function = GetScalarBinaryFunction(bound_function.return_type.InternalType()); + } else { + bound_function.function = GetBinaryFunctionIgnoreZero(bound_function.return_type.InternalType()); + } + return nullptr; +} + void DivideFun::RegisterFunction(BuiltinFunctions &set) { ScalarFunctionSet fp_divide("/"); - fp_divide.AddFunction(ScalarFunction({LogicalType::FLOAT, LogicalType::FLOAT}, LogicalType::FLOAT, - GetBinaryFunctionIgnoreZero(PhysicalType::FLOAT))); - fp_divide.AddFunction(ScalarFunction({LogicalType::DOUBLE, LogicalType::DOUBLE}, LogicalType::DOUBLE, - GetBinaryFunctionIgnoreZero(PhysicalType::DOUBLE))); + fp_divide.AddFunction(ScalarFunction({LogicalType::FLOAT, LogicalType::FLOAT}, LogicalType::FLOAT, nullptr, + BindBinaryFloatingPoint)); + fp_divide.AddFunction(ScalarFunction({LogicalType::DOUBLE, LogicalType::DOUBLE}, LogicalType::DOUBLE, nullptr, + BindBinaryFloatingPoint)); fp_divide.AddFunction( ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL, BinaryScalarFunctionIgnoreZero)); @@ -1001,14 +1013,12 @@ unique_ptr BindDecimalModulo(ClientContext &context, ScalarFunctio template <> float ModuloOperator::Operation(float left, float right) { - D_ASSERT(right != 0); auto result = std::fmod(left, right); return result; } template <> double ModuloOperator::Operation(double left, double right) { - D_ASSERT(right != 0); auto result = std::fmod(left, right); return result; } @@ -1024,7 +1034,9 @@ hugeint_t ModuloOperator::Operation(hugeint_t left, hugeint_t right) { void ModFun::RegisterFunction(BuiltinFunctions &set) { ScalarFunctionSet functions("%"); for (auto &type : LogicalType::Numeric()) { - if (type.id() == LogicalTypeId::DECIMAL) { + if (type.id() == LogicalTypeId::FLOAT || type.id() == LogicalTypeId::DOUBLE) { + functions.AddFunction(ScalarFunction({type, type}, type, nullptr, BindBinaryFloatingPoint)); + } else if (type.id() == LogicalTypeId::DECIMAL) { functions.AddFunction(ScalarFunction({type, type}, type, nullptr, BindDecimalModulo)); } else { functions.AddFunction( diff --git a/src/duckdb/src/function/table/table_scan.cpp b/src/duckdb/src/function/table/table_scan.cpp index 9a8aa661..938b443d 100644 --- a/src/duckdb/src/function/table/table_scan.cpp +++ b/src/duckdb/src/function/table/table_scan.cpp @@ -324,7 +324,6 @@ void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, Fun auto checkpoint_lock = storage.GetSharedCheckpointLock(); auto &info = storage.GetDataTableInfo(); - auto &transaction = Transaction::Get(context, bind_data.table.catalog); // bind and scan any ART indexes info->GetIndexes().BindAndScan(context, *info, [&](ART &art_index) { @@ -344,7 +343,7 @@ void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, Fun // Try to find a matching index for any of the filter expressions. for (auto &filter : filters) { - auto index_state = art_index.TryInitializeScan(transaction, *index_expression, *filter); + auto index_state = art_index.TryInitializeScan(*index_expression, *filter); if (index_state != nullptr) { auto &db_config = DBConfig::GetConfig(context); @@ -356,12 +355,13 @@ void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, Fun auto max_count = MaxValue(index_scan_max_count, total_rows_from_percentage); // Check if we can use an index scan, and already retrieve the matching row ids. - if (art_index.Scan(transaction, storage, *index_state, max_count, bind_data.row_ids)) { + if (art_index.Scan(*index_state, max_count, bind_data.row_ids)) { bind_data.is_index_scan = true; get.function = TableScanFunction::GetIndexScanFunction(); return true; } + // Clear the row ids in case we exceeded the maximum count and stopped scanning. bind_data.row_ids.clear(); return true; } diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 86f20f29..9f99ba78 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "1-dev4509" +#define DUCKDB_PATCH_VERSION "1-dev4673" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 0 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.0.1-dev4509" +#define DUCKDB_VERSION "v1.0.1-dev4673" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "862852fa39" +#define DUCKDB_SOURCE_ID "f0dbafd48f" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb.h b/src/duckdb/src/include/duckdb.h index 0eb5fb05..51878906 100644 --- a/src/duckdb/src/include/duckdb.h +++ b/src/duckdb/src/include/duckdb.h @@ -2196,6 +2196,32 @@ Must be destroyed with `duckdb_destroy_value`. */ DUCKDB_API duckdb_value duckdb_create_array_value(duckdb_logical_type type, duckdb_value *values, idx_t value_count); +/*! +Returns the number of elements in a MAP value. + +* @param value The MAP value. +* @return The number of elements in the map. +*/ +DUCKDB_API idx_t duckdb_get_map_size(duckdb_value value); + +/*! +Returns the MAP key at index as a duckdb_value. + +* @param value The MAP value. +* @param index The index of the key. +* @return The key as a duckdb_value. +*/ +DUCKDB_API duckdb_value duckdb_get_map_key(duckdb_value value, idx_t index); + +/*! +Returns the MAP value at index as a duckdb_value. + +* @param value The MAP value. +* @param index The index of the value. +* @return The value as a duckdb_value. +*/ +DUCKDB_API duckdb_value duckdb_get_map_value(duckdb_value value, idx_t index); + //===--------------------------------------------------------------------===// // Logical Type Interface //===--------------------------------------------------------------------===// @@ -3417,6 +3443,15 @@ Returns the value of the metric of the current profiling info node. Returns null */ DUCKDB_API duckdb_value duckdb_profiling_info_get_value(duckdb_profiling_info info, const char *key); +/*! +Returns the key-value metric map of this profiling node as a MAP duckdb_value. +The individual elements are accessible via the duckdb_value MAP functions. + +* @param info A profiling information object. +* @return The key-value metric map as a MAP duckdb_value. +*/ +DUCKDB_API duckdb_value duckdb_profiling_info_get_metrics(duckdb_profiling_info info); + /*! Returns the number of children in the current profiling info node. diff --git a/src/duckdb/src/include/duckdb/common/enum_util.hpp b/src/duckdb/src/include/duckdb/common/enum_util.hpp index f6cd19c8..83086019 100644 --- a/src/duckdb/src/include/duckdb/common/enum_util.hpp +++ b/src/duckdb/src/include/duckdb/common/enum_util.hpp @@ -168,6 +168,8 @@ enum class FunctionNullHandling : uint8_t; enum class FunctionStability : uint8_t; +enum class GateStatus : uint8_t; + enum class HLLStorageType : uint8_t; enum class IndexConstraintType : uint8_t; @@ -559,6 +561,9 @@ const char* EnumUtil::ToChars(FunctionNullHandling value); template<> const char* EnumUtil::ToChars(FunctionStability value); +template<> +const char* EnumUtil::ToChars(GateStatus value); + template<> const char* EnumUtil::ToChars(HLLStorageType value); @@ -1043,6 +1048,9 @@ FunctionNullHandling EnumUtil::FromString(const char *valu template<> FunctionStability EnumUtil::FromString(const char *value); +template<> +GateStatus EnumUtil::FromString(const char *value); + template<> HLLStorageType EnumUtil::FromString(const char *value); diff --git a/src/duckdb/src/include/duckdb/common/optional_ptr.hpp b/src/duckdb/src/include/duckdb/common/optional_ptr.hpp index b6abfc11..d2211e64 100644 --- a/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +++ b/src/duckdb/src/include/duckdb/common/optional_ptr.hpp @@ -14,7 +14,7 @@ namespace duckdb { -template +template class optional_ptr { // NOLINT: mimic std casing public: optional_ptr() noexcept : ptr(nullptr) { @@ -29,8 +29,10 @@ class optional_ptr { // NOLINT: mimic std casing } void CheckValid() const { - if (!ptr) { - throw InternalException("Attempting to dereference an optional pointer that is not set"); + if (MemorySafety::ENABLED) { + if (!ptr) { + throw InternalException("Attempting to dereference an optional pointer that is not set"); + } } } @@ -79,4 +81,7 @@ class optional_ptr { // NOLINT: mimic std casing T *ptr; }; +template +using unsafe_optional_ptr = optional_ptr; + } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_sort_tree.hpp b/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_sort_tree.hpp index c3b01d52..7fb0d8fd 100644 --- a/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_sort_tree.hpp +++ b/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_sort_tree.hpp @@ -250,7 +250,6 @@ struct QuantileSortTree : public MergeSortTree { explicit QuantileSortTree(Elements &&lowest_level) { BaseTree::Allocate(lowest_level.size()); BaseTree::LowestLevel() = std::move(lowest_level); - BaseTree::Build(); } template @@ -289,9 +288,12 @@ struct QuantileSortTree : public MergeSortTree { template RESULT_TYPE WindowScalar(const INPUT_TYPE *data, const SubFrames &frames, const idx_t n, Vector &result, - const QuantileValue &q) const { + const QuantileValue &q) { D_ASSERT(n > 0); + // Thread safe and idempotent. + BaseTree::Build(); + // Find the interpolated indicies within the frame Interpolator interp(q, n, false); const auto lo_data = SelectNth(frames, interp.FRN); @@ -308,9 +310,12 @@ struct QuantileSortTree : public MergeSortTree { template void WindowList(const INPUT_TYPE *data, const SubFrames &frames, const idx_t n, Vector &list, const idx_t lidx, - const QuantileBindData &bind_data) const { + const QuantileBindData &bind_data) { D_ASSERT(n > 0); + // Thread safe and idempotent. + BaseTree::Build(); + // Result is a constant LIST with a fixed length auto ldata = FlatVector::GetData(list); auto &lentry = ldata[lidx]; diff --git a/src/duckdb/src/include/duckdb/execution/index/art/art.hpp b/src/duckdb/src/include/duckdb/execution/index/art/art.hpp index 041e514a..076a9356 100644 --- a/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/art/art.hpp @@ -14,158 +14,146 @@ namespace duckdb { -// classes enum class VerifyExistenceType : uint8_t { - APPEND = 0, // appends to a table - APPEND_FK = 1, // appends to a table that has a foreign key - DELETE_FK = 2 // delete from a table that has a foreign key + // Appends to a table. + APPEND = 0, + // Appends to a table that has a foreign key. + APPEND_FK = 1, + // Delete from a table that has a foreign key. + DELETE_FK = 2 }; class ConflictManager; class ARTKey; +class ARTKeySection; class FixedSizeAllocator; -// structs struct ARTIndexScanState; -struct ARTFlags { - vector vacuum_flags; - vector merge_buffer_counts; -}; class ART : public BoundIndex { public: - // Index type name for the ART + friend class Leaf; + +public: + //! Index type name for the ART. static constexpr const char *TYPE_NAME = "ART"; - //! FixedSizeAllocator count of the ART - static constexpr uint8_t ALLOCATOR_COUNT = 6; + //! FixedSizeAllocator count of the ART. + static constexpr uint8_t ALLOCATOR_COUNT = 9; + //! FixedSizeAllocator count of deprecated ARTs. + static constexpr uint8_t DEPRECATED_ALLOCATOR_COUNT = ALLOCATOR_COUNT - 3; public: - //! Constructs an ART ART(const string &name, const IndexConstraintType index_constraint_type, const vector &column_ids, TableIOManager &table_io_manager, const vector> &unbound_expressions, AttachedDatabase &db, - const shared_ptr, ALLOCATOR_COUNT>> &allocators_ptr = nullptr, + const shared_ptr, ALLOCATOR_COUNT>> &allocators_ptr = nullptr, const IndexStorageInfo &info = IndexStorageInfo()); - //! Root of the tree - Node tree = Node(); - //! Fixed-size allocators holding the ART nodes - shared_ptr, ALLOCATOR_COUNT>> allocators; - //! True, if the ART owns its data - bool owns_data; - - //! Try to initialize a scan on the index with the given expression and filter - unique_ptr TryInitializeScan(const Transaction &transaction, const Expression &index_expr, - const Expression &filter_expr); - - //! Performs a lookup on the index, fetching up to max_count result IDs. Returns true if all row IDs were fetched, - //! and false otherwise - bool Scan(const Transaction &transaction, const DataTable &table, IndexScanState &state, idx_t max_count, - vector &result_ids); - -public: - //! Create a index instance of this type + //! Create a index instance of this type. static unique_ptr Create(CreateIndexInput &input) { auto art = make_uniq(input.name, input.constraint_type, input.column_ids, input.table_io_manager, input.unbound_expressions, input.db, nullptr, input.storage_info); return std::move(art); } - //! Called when data is appended to the index. The lock obtained from InitializeLock must be held - ErrorData Append(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override; - //! Verify that data can be appended to the index without a constraint violation + //! Root of the tree. + Node tree = Node(); + //! Fixed-size allocators holding the ART nodes. + shared_ptr, ALLOCATOR_COUNT>> allocators; + //! True, if the ART owns its data. + bool owns_data; + //! The number of bytes fitting in the prefix. + uint8_t prefix_count; + +public: + //! Try to initialize a scan on the ART with the given expression and filter. + unique_ptr TryInitializeScan(const Expression &expr, const Expression &filter_expr); + //! Perform a lookup on the ART, fetching up to max_count row IDs. + //! If all row IDs were fetched, it return true, else false. + bool Scan(IndexScanState &state, idx_t max_count, unsafe_vector &row_ids); + + //! Append a chunk by first executing the ART's expressions. + ErrorData Append(IndexLock &lock, DataChunk &input, Vector &row_ids) override; + //! Insert a chunk. + bool Insert(Node &node, const ARTKey &key, idx_t depth, const ARTKey &row_id, const GateStatus status); + ErrorData Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) override; + + //! Constraint verification for a chunk. void VerifyAppend(DataChunk &chunk) override; - //! Verify that data can be appended to the index without a constraint violation using the conflict manager void VerifyAppend(DataChunk &chunk, ConflictManager &conflict_manager) override; - //! Deletes all data from the index. The lock obtained from InitializeLock must be held - void CommitDrop(IndexLock &index_lock) override; - //! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held - void Delete(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override; - //! Insert a chunk of entries into the index - ErrorData Insert(IndexLock &lock, DataChunk &data, Vector &row_identifiers) override; - - //! Construct an ART from a vector of sorted keys - bool ConstructFromSorted(idx_t count, vector &keys, Vector &row_identifiers); - //! Search equal values and fetches the row IDs - bool SearchEqual(ARTKey &key, idx_t max_count, vector &result_ids); + //! Delete a chunk from the ART. + void Delete(IndexLock &lock, DataChunk &entries, Vector &row_ids) override; + //! Drop the ART. + void CommitDrop(IndexLock &index_lock) override; - //! Returns all ART storage information for serialization - IndexStorageInfo GetStorageInfo(const bool get_buffers) override; + //! Construct an ART from a vector of sorted keys and their row IDs. + bool Construct(unsafe_vector &keys, unsafe_vector &row_ids, const idx_t row_count); - //! Merge another index into this index. The lock obtained from InitializeLock must be held, and the other - //! index must also be locked during the merge + //! Merge another ART into this ART. Both must be locked. bool MergeIndexes(IndexLock &state, BoundIndex &other_index) override; - //! Traverses an ART and vacuums the qualifying nodes. The lock obtained from InitializeLock must be held + //! Vacuums the ART storage. void Vacuum(IndexLock &state) override; - //! Returns the in-memory usage of the index. The lock obtained from InitializeLock must be held + //! Returns ART storage serialization information. + IndexStorageInfo GetStorageInfo(const case_insensitive_map_t &options, const bool to_wal) override; + //! Returns the in-memory usage of the ART. idx_t GetInMemorySize(IndexLock &index_lock) override; - //! Generate ART keys for an input chunk + //! ART key generation. template - static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector &keys); + static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, unsafe_vector &keys); + static void GenerateKeyVectors(ArenaAllocator &allocator, DataChunk &input, Vector &row_ids, + unsafe_vector &keys, unsafe_vector &row_id_keys); + + //! Verifies the nodes and optionally returns a string of the ART. + string VerifyAndToString(IndexLock &state, const bool only_verify) override; + //! Verifies that the node allocations match the node counts. + void VerifyAllocations(IndexLock &state) override; + +private: + bool SearchEqual(ARTKey &key, idx_t max_count, unsafe_vector &row_ids); + bool SearchGreater(ARTKey &key, bool equal, idx_t max_count, unsafe_vector &row_ids); + bool SearchLess(ARTKey &upper_bound, bool equal, idx_t max_count, unsafe_vector &row_ids); + bool SearchCloseRange(ARTKey &lower_bound, ARTKey &upper_bound, bool left_equal, bool right_equal, idx_t max_count, + unsafe_vector &row_ids); + const unsafe_optional_ptr Lookup(const Node &node, const ARTKey &key, idx_t depth); + + void InsertIntoEmpty(Node &node, const ARTKey &key, const idx_t depth, const ARTKey &row_id, + const GateStatus status); + bool InsertIntoNode(Node &node, const ARTKey &key, const idx_t depth, const ARTKey &row_id, + const GateStatus status); - //! Generate a string containing all the expressions and their respective values that violate a constraint string GenerateErrorKeyName(DataChunk &input, idx_t row); - //! Generate the matching error message for a constraint violation string GenerateConstraintErrorMessage(VerifyExistenceType verify_type, const string &key_name); - //! Performs constraint checking for a chunk of input data void CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) override; + string GetConstraintViolationMessage(VerifyExistenceType verify_type, idx_t failed_index, + DataChunk &input) override; - //! Returns the string representation of the ART, or only traverses and verifies the index - string VerifyAndToString(IndexLock &state, const bool only_verify) override; + void Erase(Node &node, reference key, idx_t depth, reference row_id, GateStatus status); - //! Find the node with a matching key, or return nullptr if not found - optional_ptr Lookup(const Node &node, const ARTKey &key, idx_t depth); - //! Insert a key into the tree - bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id); + bool ConstructInternal(const unsafe_vector &keys, const unsafe_vector &row_ids, Node &node, + ARTKeySection §ion); -private: - //! Insert a row ID into a leaf - bool InsertToLeaf(Node &leaf, const row_t &row_id); - //! Erase a key from the tree (if a leaf has more than one value) or erase the leaf itself - void Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id); - - //! Returns all row IDs belonging to a key greater (or equal) than the search key - bool SearchGreater(ARTIndexScanState &state, ARTKey &key, bool equal, idx_t max_count, vector &result_ids); - //! Returns all row IDs belonging to a key less (or equal) than the upper_bound - bool SearchLess(ARTIndexScanState &state, ARTKey &upper_bound, bool equal, idx_t max_count, - vector &result_ids); - //! Returns all row IDs belonging to a key within the range of lower_bound and upper_bound - bool SearchCloseRange(ARTIndexScanState &state, ARTKey &lower_bound, ARTKey &upper_bound, bool left_equal, - bool right_equal, idx_t max_count, vector &result_ids); - - //! Initializes a merge operation by returning a set containing the buffer count of each fixed-size allocator - void InitializeMerge(ARTFlags &flags); - - //! Initializes a vacuum operation by calling the initialize operation of the respective - //! node allocator, and returns a vector containing either true, if the allocator at - //! the respective position qualifies, or false, if not - void InitializeVacuum(ARTFlags &flags); - //! Finalizes a vacuum operation by calling the finalize operation of all qualifying - //! fixed size allocators - void FinalizeVacuum(const ARTFlags &flags); - - //! Internal function to return the string representation of the ART, - //! or only traverses and verifies the index - string VerifyAndToStringInternal(const bool only_verify); + void InitializeMerge(unsafe_vector &upper_bounds); + + void InitializeVacuum(unordered_set &indexes); + void FinalizeVacuum(const unordered_set &indexes); - //! Initialize the allocators of the ART void InitAllocators(const IndexStorageInfo &info); - //! STABLE STORAGE NOTE: This is for old storage files, to deserialize the allocators of the ART + void TransformToDeprecated(); void Deserialize(const BlockPointer &pointer); - //! Initializes the serialization of the index by combining the allocator data onto partial blocks - void WritePartialBlocks(); + void WritePartialBlocks(const bool v1_0_0_storage); + void SetPrefixCount(const IndexStorageInfo &info); - string GetConstraintViolationMessage(VerifyExistenceType verify_type, idx_t failed_index, - DataChunk &input) override; + string VerifyAndToStringInternal(const bool only_verify); + void VerifyAllocationsInternal(); }; template <> -void ART::GenerateKeys<>(ArenaAllocator &allocator, DataChunk &input, vector &keys); +void ART::GenerateKeys<>(ArenaAllocator &allocator, DataChunk &input, unsafe_vector &keys); template <> -void ART::GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector &keys); +void ART::GenerateKeys(ArenaAllocator &allocator, DataChunk &input, unsafe_vector &keys); } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp b/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp index 0bb9c0f0..e19e3cbb 100644 --- a/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp @@ -20,55 +20,59 @@ namespace duckdb { class ARTKey { public: ARTKey(); - ARTKey(const data_ptr_t &data, const uint32_t &len); - ARTKey(ArenaAllocator &allocator, const uint32_t &len); + ARTKey(data_ptr_t data, idx_t len); + ARTKey(ArenaAllocator &allocator, idx_t len); - uint32_t len; + idx_t len; data_ptr_t data; public: template - static inline ARTKey CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, T element) { - auto data = ARTKey::CreateData(allocator, element); - return ARTKey(data, sizeof(element)); + static inline ARTKey CreateARTKey(ArenaAllocator &allocator, T value) { + auto data = ARTKey::CreateData(allocator, value); + return ARTKey(data, sizeof(value)); } template - static inline ARTKey CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, const Value &element) { - return CreateARTKey(allocator, type, element.GetValueUnsafe()); + static inline ARTKey CreateARTKey(ArenaAllocator &allocator, Value &value) { + return CreateARTKey(allocator, value.GetValueUnsafe()); } template - static inline void CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, T element) { - key.data = ARTKey::CreateData(allocator, element); - key.len = sizeof(element); + static inline void CreateARTKey(ArenaAllocator &allocator, ARTKey &key, T value) { + key.data = ARTKey::CreateData(allocator, value); + key.len = sizeof(value); } template - static inline void CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, - const Value element) { - key.data = ARTKey::CreateData(allocator, element.GetValueUnsafe()); - key.len = sizeof(element); + static inline void CreateARTKey(ArenaAllocator &allocator, ARTKey &key, Value value) { + key.data = ARTKey::CreateData(allocator, value.GetValueUnsafe()); + key.len = sizeof(value); } + static ARTKey CreateKey(ArenaAllocator &allocator, PhysicalType type, Value &value); + public: - data_t &operator[](size_t i) { + data_t &operator[](idx_t i) { return data[i]; } - const data_t &operator[](size_t i) const { + const data_t &operator[](idx_t i) const { return data[i]; } - bool operator>(const ARTKey &k) const; - bool operator>=(const ARTKey &k) const; - bool operator==(const ARTKey &k) const; + bool operator>(const ARTKey &key) const; + bool operator>=(const ARTKey &key) const; + bool operator==(const ARTKey &key) const; - inline bool ByteMatches(const ARTKey &other, const uint32_t &depth) const { + inline bool ByteMatches(const ARTKey &other, idx_t depth) const { return data[depth] == other[depth]; } inline bool Empty() const { return len == 0; } - void ConcatenateARTKey(ArenaAllocator &allocator, ARTKey &concat_key); + + void Concat(ArenaAllocator &allocator, const ARTKey &other); + row_t GetRowId() const; + idx_t GetMismatchPos(const ARTKey &other, const idx_t start) const; private: template @@ -80,9 +84,23 @@ class ARTKey { }; template <> -ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, string_t value); +ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, string_t value); template <> -ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, const char *value); +ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const char *value); template <> -void ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, string_t value); +void ARTKey::CreateARTKey(ArenaAllocator &allocator, ARTKey &key, string_t value); + +class ARTKeySection { +public: + ARTKeySection(idx_t start, idx_t end, idx_t depth, data_t byte); + ARTKeySection(idx_t start, idx_t end, const unsafe_vector &keys, const ARTKeySection §ion); + + idx_t start; + idx_t end; + idx_t depth; + data_t key_byte; + +public: + void GetChildSections(unsafe_vector §ions, const unsafe_vector &keys); +}; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/base_leaf.hpp b/src/duckdb/src/include/duckdb/execution/index/art/base_leaf.hpp new file mode 100644 index 00000000..4b5c8125 --- /dev/null +++ b/src/duckdb/src/include/duckdb/execution/index/art/base_leaf.hpp @@ -0,0 +1,109 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/index/art/base_leaf.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/execution/index/fixed_size_allocator.hpp" +#include "duckdb/execution/index/art/art.hpp" +#include "duckdb/execution/index/art/node.hpp" + +namespace duckdb { + +template +class BaseLeaf { + friend class Node7Leaf; + friend class Node15Leaf; + friend class Node256Leaf; + +public: + BaseLeaf() = delete; + BaseLeaf(const BaseLeaf &) = delete; + BaseLeaf &operator=(const BaseLeaf &) = delete; + +private: + uint8_t count; + uint8_t key[CAPACITY]; + +public: + //! Get a new BaseLeaf and initialize it. + static BaseLeaf &New(ART &art, Node &node) { + node = Node::GetAllocator(art, TYPE).New(); + node.SetMetadata(static_cast(TYPE)); + + auto &n = Node::Ref(art, node, TYPE); + n.count = 0; + return n; + } + + //! Returns true, if the byte exists, else false. + bool HasByte(uint8_t &byte) const { + for (uint8_t i = 0; i < count; i++) { + if (key[i] == byte) { + return true; + } + } + return false; + } + + //! Get the first byte greater than or equal to the byte. + //! Returns true, if such a byte exists, else false. + bool GetNextByte(uint8_t &byte) const { + for (uint8_t i = 0; i < count; i++) { + if (key[i] >= byte) { + byte = key[i]; + return true; + } + } + return false; + } + +private: + static void InsertByteInternal(BaseLeaf &n, const uint8_t byte); + static BaseLeaf &DeleteByteInternal(ART &art, Node &node, const uint8_t byte); +}; + +//! Node7Leaf holds up to seven sorted bytes. +class Node7Leaf : public BaseLeaf<7, NType::NODE_7_LEAF> { + friend class Node15Leaf; + +public: + static constexpr NType NODE_7_LEAF = NType::NODE_7_LEAF; + static constexpr uint8_t CAPACITY = 7; + static constexpr idx_t AND_LAST_BYTE = 0xFFFFFFFFFFFFFF00; + +public: + //! Insert a byte. + static void InsertByte(ART &art, Node &node, const uint8_t byte); + //! Delete a byte. + static void DeleteByte(ART &art, Node &node, Node &prefix, const uint8_t byte, const ARTKey &row_id); + +private: + static void ShrinkNode15Leaf(ART &art, Node &node7_leaf, Node &node15_leaf); +}; + +//! Node15Leaf holds up to 15 sorted bytes. +class Node15Leaf : public BaseLeaf<15, NType::NODE_15_LEAF> { + friend class Node7Leaf; + friend class Node256Leaf; + +public: + static constexpr NType NODE_15_LEAF = NType::NODE_15_LEAF; + static constexpr uint8_t CAPACITY = 15; + +public: + //! Insert a byte. + static void InsertByte(ART &art, Node &node, const uint8_t byte); + //! Delete a byte. + static void DeleteByte(ART &art, Node &node, const uint8_t byte); + +private: + static void GrowNode7Leaf(ART &art, Node &node15_leaf, Node &node7_leaf); + static void ShrinkNode256Leaf(ART &art, Node &node15_leaf, Node &node256_leaf); +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/base_node.hpp b/src/duckdb/src/include/duckdb/execution/index/art/base_node.hpp new file mode 100644 index 00000000..2baa0eb8 --- /dev/null +++ b/src/duckdb/src/include/duckdb/execution/index/art/base_node.hpp @@ -0,0 +1,140 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/index/art/base_node.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/execution/index/fixed_size_allocator.hpp" +#include "duckdb/execution/index/art/art.hpp" +#include "duckdb/execution/index/art/node.hpp" + +namespace duckdb { + +template +class BaseNode { + friend class Node4; + friend class Node16; + friend class Node48; + +public: + BaseNode() = delete; + BaseNode(const BaseNode &) = delete; + BaseNode &operator=(const BaseNode &) = delete; + +private: + uint8_t count; + uint8_t key[CAPACITY]; + Node children[CAPACITY]; + +public: + //! Get a new BaseNode and initialize it. + static BaseNode &New(ART &art, Node &node) { + node = Node::GetAllocator(art, TYPE).New(); + node.SetMetadata(static_cast(TYPE)); + + auto &n = Node::Ref(art, node, TYPE); + n.count = 0; + return n; + } + + //! Free the node and its children. + static void Free(ART &art, Node &node) { + auto &n = Node::Ref(art, node, TYPE); + for (uint8_t i = 0; i < n.count; i++) { + Node::Free(art, n.children[i]); + } + } + + //! Replace the child at byte. + static void ReplaceChild(BaseNode &n, const uint8_t byte, const Node child) { + D_ASSERT(n.count != 0); + for (uint8_t i = 0; i < n.count; i++) { + if (n.key[i] == byte) { + auto status = n.children[i].GetGateStatus(); + n.children[i] = child; + + if (status == GateStatus::GATE_SET && child.HasMetadata()) { + n.children[i].SetGateStatus(status); + } + return; + } + } + } + + //! Get the child at byte. + static unsafe_optional_ptr GetChild(BaseNode &n, const uint8_t byte) { + for (uint8_t i = 0; i < n.count; i++) { + if (n.key[i] == byte) { + D_ASSERT(n.children[i].HasMetadata()); + return &n.children[i]; + } + } + return nullptr; + } + + //! Get the first child greater than or equal to the byte. + static unsafe_optional_ptr GetNextChild(BaseNode &n, uint8_t &byte) { + for (uint8_t i = 0; i < n.count; i++) { + if (n.key[i] >= byte) { + byte = n.key[i]; + return &n.children[i]; + } + } + return nullptr; + } + +public: + template + static void Iterator(BaseNode &n, F &&lambda) { + for (uint8_t i = 0; i < n.count; i++) { + lambda(n.children[i]); + } + } + +private: + static void InsertChildInternal(BaseNode &n, const uint8_t byte, const Node child); + static BaseNode &DeleteChildInternal(ART &art, Node &node, const uint8_t byte); +}; + +//! Node4 holds up to four children sorted by their key byte. +class Node4 : public BaseNode<4, NType::NODE_4> { + friend class Node16; + +public: + static constexpr NType NODE_4 = NType::NODE_4; + static constexpr uint8_t CAPACITY = 4; + +public: + //! Insert a child at byte. + static void InsertChild(ART &art, Node &node, const uint8_t byte, const Node child); + //! Delete the child at byte. + static void DeleteChild(ART &art, Node &node, Node &prefix, const uint8_t byte, const GateStatus status); + +private: + static void ShrinkNode16(ART &art, Node &node4, Node &node16); +}; + +class Node16 : public BaseNode<16, NType::NODE_16> { + friend class Node4; + friend class Node48; + +public: + static constexpr NType NODE_16 = NType::NODE_16; + static constexpr uint8_t CAPACITY = 16; + +public: + //! Insert a child at byte. + static void InsertChild(ART &art, Node &node, const uint8_t byte, const Node child); + //! Delete the child at byte. + static void DeleteChild(ART &art, Node &node, const uint8_t byte); + +private: + static void GrowNode4(ART &art, Node &node16, Node &node4); + static void ShrinkNode48(ART &art, Node &node16, Node &node48); +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp b/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp index 437a818b..161b65d0 100644 --- a/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp @@ -14,7 +14,7 @@ namespace duckdb { -//! Keeps track of the byte leading to the currently active child of the node +//! Keeps track of the byte leading to the currently active child of the node. struct IteratorEntry { IteratorEntry(Node node, uint8_t byte) : node(node), byte(byte) { } @@ -23,60 +23,74 @@ struct IteratorEntry { uint8_t byte = 0; }; -//! Keeps track of the current key in the iterator leading down to the top node in the stack +//! Keeps track of the current key in the iterator leading down to the top node in the stack. class IteratorKey { public: - //! Pushes a byte into the current key + //! Pushes a byte into the current key. inline void Push(const uint8_t key_byte) { key_bytes.push_back(key_byte); } - //! Pops n bytes from the current key + //! Pops n bytes from the current key. inline void Pop(const idx_t n) { key_bytes.resize(key_bytes.size() - n); } - - //! Subscript operator + //! Returns the byte at idx. inline uint8_t &operator[](idx_t idx) { D_ASSERT(idx < key_bytes.size()); return key_bytes[idx]; } - //! Greater than operator - bool operator>(const ARTKey &key) const; - //! Greater than or equal to operator - bool operator>=(const ARTKey &key) const; - //! Equal to operator - bool operator==(const ARTKey &key) const; + // Returns the number of key bytes. + inline idx_t Size() const { + return key_bytes.size(); + } + + //! Returns true, if key_bytes contains all bytes of key. + bool Contains(const ARTKey &key) const; + //! Returns true, if key_bytes is greater than [or equal to] the key. + bool GreaterThan(const ARTKey &key, bool equal) const; private: - vector key_bytes; + unsafe_vector key_bytes; }; class Iterator { public: - //! Holds the current key leading down to the top node on the stack + static constexpr uint8_t ROW_ID_SIZE = sizeof(row_t); + +public: + explicit Iterator(ART &art) : art(art), status(GateStatus::GATE_NOT_SET) {}; + //! Holds the current key leading down to the top node on the stack. IteratorKey current_key; - //! Pointer to the ART - optional_ptr art = nullptr; +public: //! Scans the tree, starting at the current top node on the stack, and ending at upper_bound. - //! If upper_bound is the empty ARTKey, than there is no upper bound - bool Scan(const ARTKey &upper_bound, const idx_t max_count, vector &result_ids, const bool equal); - //! Finds the minimum (leaf) of the current subtree + //! If upper_bound is the empty ARTKey, than there is no upper bound. + bool Scan(const ARTKey &upper_bound, const idx_t max_count, unsafe_vector &row_ids, const bool equal); + //! Finds the minimum (leaf) of the current subtree. void FindMinimum(const Node &node); //! Finds the lower bound of the ART and adds the nodes to the stack. Returns false, if the lower - //! bound exceeds the maximum value of the ART + //! bound exceeds the maximum value of the ART. bool LowerBound(const Node &node, const ARTKey &key, const bool equal, idx_t depth); private: - //! Stack of nodes from the root to the currently active node + //! The ART. + ART &art; + //! Stack of nodes from the root to the currently active node. stack nodes; - //! Last visited leaf node + //! Last visited leaf node. Node last_leaf = Node(); + //! Holds the row ID of nested leaves. + uint8_t row_id[ROW_ID_SIZE]; + //! True, if we passed a gate. + GateStatus status; + //! Depth in a nested leaf. + uint8_t nested_depth = 0; +private: //! Goes to the next leaf in the ART and sets it as last_leaf, - //! returns false if there is no next leaf + //! returns false if there is no next leaf. bool Next(); - //! Pop the top node from the stack of iterator entries and adjust the current key + //! Pop the top node from the stack of iterator entries and adjust the current key. void PopNode(); }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp b/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp index a981b61b..6228e75e 100644 --- a/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp @@ -14,70 +14,59 @@ namespace duckdb { -// classes -class MetadataWriter; -class MetadataReader; - -// structs -struct BlockPointer; - -//! The LEAF is a special node type that contains a count, up to LEAF_SIZE row IDs, -//! and a Node pointer. If this pointer is set, then it must point to another LEAF, -//! creating a chain of leaf nodes storing row IDs. -//! This class also contains functionality for nodes of type LEAF_INLINED, in which case we store the -//! row ID directly in the node pointer. +//! There are three types of leaves. +//! 1. LEAF_INLINED: Inlines a row ID in a Node pointer. +//! 2. LEAF: Deprecated. A list of Leaf nodes containing row IDs. +//! 3. Nested leaves indicated by gate nodes. If an ART key contains multiple row IDs, then we use the row IDs as keys +//! and create a nested ART behind the gate node. As row IDs are always unique, these nested ARTs never contain +//! duplicates themselves. class Leaf { public: - //! Delete copy constructors, as any Leaf can never own its memory + static constexpr NType LEAF = NType::LEAF; + static constexpr NType INLINED = NType::LEAF_INLINED; + static constexpr uint8_t LEAF_SIZE = 4; // Deprecated. + +public: + Leaf() = delete; Leaf(const Leaf &) = delete; Leaf &operator=(const Leaf &) = delete; - //! The number of row IDs in this leaf - uint8_t count; - //! Up to LEAF_SIZE row IDs - row_t row_ids[Node::LEAF_SIZE]; - //! A pointer to the next LEAF node - Node ptr; +private: + uint8_t count; // Deprecated. + row_t row_ids[LEAF_SIZE]; // Deprecated. + Node ptr; // Deprecated. public: - //! Inline a row ID into a node pointer + //! Inline a row ID into a node pointer. static void New(Node &node, const row_t row_id); - //! Get a new chain of leaf nodes, might cause new buffer allocations, - //! with the node parameter holding the tail of the chain - static void New(ART &art, reference &node, const row_t *row_ids, idx_t count); - //! Get a new leaf node without any data - static Leaf &New(ART &art, Node &node); - //! Free the leaf (chain) - static void Free(ART &art, Node &node); + //! Get a new non-inlined nested leaf node. + static void New(ART &art, reference &node, const unsafe_vector &row_ids, const idx_t start, + const idx_t count); - //! Initializes a merge by incrementing the buffer IDs of the leaf (chain) - static void InitializeMerge(ART &art, Node &node, const ARTFlags &flags); - //! Merge leaf (chains) and free all copied leaf nodes - static void Merge(ART &art, Node &l_node, Node &r_node); + //! Merge two leaves. r_node must be INLINED. + static void MergeInlined(ART &art, Node &l_node, Node &r_node); - //! Insert a row ID into a leaf - static void Insert(ART &art, Node &node, const row_t row_id); - //! Remove a row ID from a leaf. Returns true, if the leaf is empty after the removal - static bool Remove(ART &art, reference &node, const row_t row_id); + //! Insert a row ID into an inlined leaf. + static void InsertIntoInlined(ART &art, Node &node, const ARTKey &row_id, idx_t depth, const GateStatus status); - //! Get the total count of row IDs in the chain of leaves - static idx_t TotalCount(ART &art, const Node &node); - //! Fill the result_ids vector with the row IDs of this leaf chain, if the total count does not exceed max_count - static bool GetRowIds(ART &art, const Node &node, vector &result_ids, const idx_t max_count); - //! Returns whether the leaf contains the row ID - static bool ContainsRowId(ART &art, const Node &node, const row_t row_id); + //! Transforms a deprecated leaf to a nested leaf. + static void TransformToNested(ART &art, Node &node); + //! Transforms a nested leaf to a deprecated leaf. + static void TransformToDeprecated(ART &art, Node &node); - //! Returns the string representation of the leaf (chain), or only traverses and verifies the leaf (chain) - static string VerifyAndToString(ART &art, const Node &node, const bool only_verify); - - //! Vacuum the leaf (chain) - static void Vacuum(ART &art, Node &node); - -private: - //! Moves the inlined row ID onto a leaf - static void MoveInlinedToLeaf(ART &art, Node &node); - //! Appends the row ID to this leaf, or creates a subsequent leaf, if this node is full - Leaf &Append(ART &art, const row_t row_id); +public: + //! Frees the linked list of leaves. + static void DeprecatedFree(ART &art, Node &node); + //! Fills the row_ids vector with the row IDs of this linked list of leaves. + //! Never pushes more than max_count row IDs. + static bool DeprecatedGetRowIds(ART &art, const Node &node, unsafe_vector &row_ids, const idx_t max_count); + //! Vacuums the linked list of leaves. + static void DeprecatedVacuum(ART &art, Node &node); + //! Returns the string representation of the linked list of leaves, if only_verify is true. + //! Else, it traverses and verifies the linked list of leaves. + static string DeprecatedVerifyAndToString(ART &art, const Node &node, const bool only_verify); + //! Count the number of leaves. + void DeprecatedVerifyAllocations(ART &art, unordered_map &node_counts) const; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/node.hpp b/src/duckdb/src/include/duckdb/execution/index/art/node.hpp index 1bbc223d..725cc2d3 100644 --- a/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/art/node.hpp @@ -9,16 +9,14 @@ #pragma once #include "duckdb/common/assert.hpp" +#include "duckdb/common/limits.hpp" #include "duckdb/common/optional_ptr.hpp" -#include "duckdb/common/to_string.hpp" #include "duckdb/common/typedefs.hpp" -#include "duckdb/common/limits.hpp" -#include "duckdb/execution/index/index_pointer.hpp" #include "duckdb/execution/index/fixed_size_allocator.hpp" +#include "duckdb/execution/index/index_pointer.hpp" namespace duckdb { -// classes enum class NType : uint8_t { PREFIX = 1, LEAF = 2, @@ -27,107 +25,168 @@ enum class NType : uint8_t { NODE_48 = 5, NODE_256 = 6, LEAF_INLINED = 7, + NODE_7_LEAF = 8, + NODE_15_LEAF = 9, + NODE_256_LEAF = 10, +}; + +enum class GateStatus : uint8_t { + GATE_NOT_SET = 0, + GATE_SET = 1, }; class ART; class Prefix; -class MetadataReader; -class MetadataWriter; - -// structs -struct BlockPointer; -struct ARTFlags; -struct MetaBlockPointer; +class ARTKey; //! The Node is the pointer class of the ART index. -//! It inherits from the IndexPointer, and adds ART-specific functionality +//! It inherits from the IndexPointer, and adds ART-specific functionality. class Node : public IndexPointer { + friend class Prefix; + public: - //! Node thresholds - static constexpr uint8_t NODE_48_SHRINK_THRESHOLD = 12; - static constexpr uint8_t NODE_256_SHRINK_THRESHOLD = 36; - //! Node sizes - static constexpr uint8_t NODE_4_CAPACITY = 4; - static constexpr uint8_t NODE_16_CAPACITY = 16; - static constexpr uint8_t NODE_48_CAPACITY = 48; - static constexpr uint16_t NODE_256_CAPACITY = 256; - //! Other constants - static constexpr uint8_t EMPTY_MARKER = 48; - static constexpr uint8_t LEAF_SIZE = 4; - static constexpr uint8_t PREFIX_SIZE = 15; + //! A gate sets the leftmost bit of the metadata, binary: 1000-0000. + static constexpr uint8_t AND_GATE = 0x80; static constexpr idx_t AND_ROW_ID = 0x00FFFFFFFFFFFFFF; public: - //! Get a new pointer to a node, might cause a new buffer allocation, and initialize it + //! Get a new pointer to a node and initialize it. static void New(ART &art, Node &node, const NType type); - //! Free the node (and its subtree) + //! Free the node and its children. static void Free(ART &art, Node &node); - //! Get references to the allocator + //! Get a reference to the allocator. static FixedSizeAllocator &GetAllocator(const ART &art, const NType type); - //! Get a (immutable) reference to the node. If dirty is false, then T should be a const class + //! Get the index of a node type's allocator. + static uint8_t GetAllocatorIdx(const NType type); + + //! Get a reference to a node. template - static inline const NODE &Ref(const ART &art, const Node ptr, const NType type) { - return *(GetAllocator(art, type).Get(ptr, false)); + static inline NODE &Ref(const ART &art, const Node ptr, const NType type) { + D_ASSERT(ptr.GetType() != NType::PREFIX); + return *(GetAllocator(art, type).Get(ptr, !std::is_const::value)); } - //! Get a (const) reference to the node. If dirty is false, then T should be a const class + //! Get a node pointer, if the node is in memory, else nullptr. template - static inline NODE &RefMutable(const ART &art, const Node ptr, const NType type) { - return *(GetAllocator(art, type).Get(ptr)); + static inline unsafe_optional_ptr InMemoryRef(const ART &art, const Node ptr, const NType type) { + D_ASSERT(ptr.GetType() != NType::PREFIX); + return GetAllocator(art, type).GetIfLoaded(ptr); } - //! Replace the child node at byte - void ReplaceChild(const ART &art, const uint8_t byte, const Node child) const; - //! Insert the child node at byte - static void InsertChild(ART &art, Node &node, const uint8_t byte, const Node child); - //! Delete the child node at byte - static void DeleteChild(ART &art, Node &node, Node &prefix, const uint8_t byte); - - //! Get the child (immutable) for the respective byte in the node - optional_ptr GetChild(ART &art, const uint8_t byte) const; - //! Get the child for the respective byte in the node - optional_ptr GetChildMutable(ART &art, const uint8_t byte) const; - //! Get the first child (immutable) that is greater or equal to the specific byte - optional_ptr GetNextChild(ART &art, uint8_t &byte) const; - //! Get the first child that is greater or equal to the specific byte - optional_ptr GetNextChildMutable(ART &art, uint8_t &byte) const; - - //! Returns the string representation of the node, or only traverses and verifies the node and its subtree + //! Replace the child at byte. + void ReplaceChild(const ART &art, const uint8_t byte, const Node child = Node()) const; + //! Insert the child at byte. + static void InsertChild(ART &art, Node &node, const uint8_t byte, const Node child = Node()); + //! Delete the child at byte. + static void DeleteChild(ART &art, Node &node, Node &prefix, const uint8_t byte, const GateStatus status, + const ARTKey &row_id); + + //! Get the immutable child at byte. + const unsafe_optional_ptr GetChild(ART &art, const uint8_t byte) const; + //! Get the child at byte. + unsafe_optional_ptr GetChildMutable(ART &art, const uint8_t byte) const; + //! Get the first immutable child greater than or equal to the byte. + const unsafe_optional_ptr GetNextChild(ART &art, uint8_t &byte) const; + //! Get the first child greater than or equal to the byte. + unsafe_optional_ptr GetNextChildMutable(ART &art, uint8_t &byte) const; + //! Returns true, if the byte exists, else false. + bool HasByte(ART &art, uint8_t &byte) const; + //! Get the first byte greater than or equal to the byte. + bool GetNextByte(ART &art, uint8_t &byte) const; + + //! Returns the string representation of the node, if only_verify is false. + //! Else, it traverses and verifies the node. string VerifyAndToString(ART &art, const bool only_verify) const; - //! Returns the capacity of the node - idx_t GetCapacity() const; - //! Returns the matching node type for a given count - static NType GetARTNodeTypeByCount(const idx_t count); - - //! Initializes a merge by incrementing the buffer IDs of a node and its subtree - void InitializeMerge(ART &art, const ARTFlags &flags); - //! Merge another node into this node - bool Merge(ART &art, Node &other); - //! Merge two nodes by first resolving their prefixes - bool ResolvePrefixes(ART &art, Node &other); - //! Merge two nodes that have no prefix or the same prefix - bool MergeInternal(ART &art, Node &other); - - //! Vacuum all nodes that exceed their respective vacuum thresholds - void Vacuum(ART &art, const ARTFlags &flags); - - //! Get the row ID (8th to 63rd bit) + //! Counts each node type. + void VerifyAllocations(ART &art, unordered_map &node_counts) const; + + //! Returns the node type for a count. + static NType GetNodeType(const idx_t count); + + //! Initialize a merge by incrementing the buffer IDs of a node and its children. + void InitMerge(ART &art, const unsafe_vector &upper_bounds); + //! Merge a node into this node. + bool Merge(ART &art, Node &other, const GateStatus status); + + //! Vacuum all nodes exceeding their vacuum threshold. + void Vacuum(ART &art, const unordered_set &indexes); + + //! Transform the node storage to deprecated storage. + static void TransformToDeprecated(ART &art, Node &node, unsafe_unique_ptr &allocator); + + //! Returns the node type. + inline NType GetType() const { + return NType(GetMetadata() & ~AND_GATE); + } + + //! True, if the node is a Node4, Node16, Node48, or Node256. + bool IsNode() const; + //! True, if the node is a Node7Leaf, Node15Leaf, or Node256Leaf. + bool IsLeafNode() const; + //! True, if the node is any leaf. + bool IsAnyLeaf() const; + + //! Get the row ID (8th to 63rd bit). inline row_t GetRowId() const { return UnsafeNumericCast(Get() & AND_ROW_ID); } - //! Set the row ID (8th to 63rd bit) + //! Set the row ID (8th to 63rd bit). inline void SetRowId(const row_t row_id) { Set((Get() & AND_METADATA) | UnsafeNumericCast(row_id)); } - //! Returns the type of the node, which is held in the metadata - inline NType GetType() const { - return NType(GetMetadata()); + //! Returns the gate status of a node. + inline GateStatus GetGateStatus() const { + return (GetMetadata() & AND_GATE) == 0 ? GateStatus::GATE_NOT_SET : GateStatus::GATE_SET; + } + //! Sets the gate status of a node. + inline void SetGateStatus(const GateStatus status) { + switch (status) { + case GateStatus::GATE_SET: + SetMetadata(GetMetadata() | AND_GATE); + break; + case GateStatus::GATE_NOT_SET: + SetMetadata(GetMetadata() & ~AND_GATE); + break; + } } - //! Assign operator + //! Assign operator. inline void operator=(const IndexPointer &ptr) { Set(ptr.Get()); } + +private: + bool MergeNormalNodes(ART &art, Node &l_node, Node &r_node, uint8_t &byte, const GateStatus status); + void MergeLeafNodes(ART &art, Node &l_node, Node &r_node, uint8_t &byte); + bool MergeNodes(ART &art, Node &other, const GateStatus status); + bool PrefixContainsOther(ART &art, Node &l_node, Node &r_node, const uint8_t pos, const GateStatus status); + void MergeIntoNode4(ART &art, Node &l_node, Node &r_node, const uint8_t pos); + bool MergePrefixes(ART &art, Node &other, const GateStatus status); + bool MergeInternal(ART &art, Node &other, const GateStatus status); + +private: + template + static void InitMergeInternal(ART &art, NODE &n, const unsafe_vector &upper_bounds) { + NODE::Iterator(n, [&](Node &child) { child.InitMerge(art, upper_bounds); }); + } + + template + static void VacuumInternal(ART &art, NODE &n, const unordered_set &indexes) { + NODE::Iterator(n, [&](Node &child) { child.Vacuum(art, indexes); }); + } + + template + static void TransformToDeprecatedInternal(ART &art, unsafe_optional_ptr ptr, + unsafe_unique_ptr &allocator) { + if (ptr) { + NODE::Iterator(*ptr, [&](Node &child) { Node::TransformToDeprecated(art, child, allocator); }); + } + } + + template + static void VerifyAllocationsInternal(ART &art, NODE &n, unordered_map &node_counts) { + NODE::Iterator(n, [&](const Node &child) { child.VerifyAllocations(art, node_counts); }); + } }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp b/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp deleted file mode 100644 index 36d85e83..00000000 --- a/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +++ /dev/null @@ -1,65 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/index/art/node16.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb/execution/index/fixed_size_allocator.hpp" -#include "duckdb/execution/index/art/art.hpp" -#include "duckdb/execution/index/art/node.hpp" - -namespace duckdb { - -//! Node16 holds up to 16 Node children sorted by their key byte -class Node16 { -public: - //! Delete copy constructors, as any Node16 can never own its memory - Node16(const Node16 &) = delete; - Node16 &operator=(const Node16 &) = delete; - - //! Number of non-null children - uint8_t count; - //! Array containing all partial key bytes - uint8_t key[Node::NODE_16_CAPACITY]; - //! Node pointers to the child nodes - Node children[Node::NODE_16_CAPACITY]; - -public: - //! Get a new Node16, might cause a new buffer allocation, and initialize it - static Node16 &New(ART &art, Node &node); - //! Free the node (and its subtree) - static void Free(ART &art, Node &node); - - //! Initializes all the fields of the node while growing a Node4 to a Node16 - static Node16 &GrowNode4(ART &art, Node &node16, Node &node4); - //! Initializes all fields of the node while shrinking a Node48 to a Node16 - static Node16 &ShrinkNode48(ART &art, Node &node16, Node &node48); - - //! Initializes a merge by incrementing the buffer IDs of the node - void InitializeMerge(ART &art, const ARTFlags &flags); - - //! Insert a child node at byte - static void InsertChild(ART &art, Node &node, const uint8_t byte, const Node child); - //! Delete the child node at byte - static void DeleteChild(ART &art, Node &node, const uint8_t byte); - - //! Replace the child node at byte - void ReplaceChild(const uint8_t byte, const Node child); - - //! Get the (immutable) child for the respective byte in the node - optional_ptr GetChild(const uint8_t byte) const; - //! Get the child for the respective byte in the node - optional_ptr GetChildMutable(const uint8_t byte); - //! Get the first (immutable) child that is greater or equal to the specific byte - optional_ptr GetNextChild(uint8_t &byte) const; - //! Get the first child that is greater or equal to the specific byte - optional_ptr GetNextChildMutable(uint8_t &byte); - - //! Vacuum the children of the node - void Vacuum(ART &art, const ARTFlags &flags); -}; -} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp b/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp index bd6e9c3d..2b9f293c 100644 --- a/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp @@ -14,50 +14,67 @@ namespace duckdb { -//! Node256 holds up to 256 Node children which can be directly indexed by the key byte +//! Node256 holds up to 256 children. They are indexed by their key byte. class Node256 { + friend class Node48; + +public: + static constexpr NType NODE_256 = NType::NODE_256; + static constexpr uint16_t CAPACITY = 256; + static constexpr uint8_t SHRINK_THRESHOLD = 36; + public: - //! Delete copy constructors, as any Node256 can never own its memory + Node256() = delete; Node256(const Node256 &) = delete; Node256 &operator=(const Node256 &) = delete; - //! Number of non-null children +private: uint16_t count; - //! Node pointers to the child nodes - Node children[Node::NODE_256_CAPACITY]; + Node children[CAPACITY]; public: - //! Get a new Node256, might cause a new buffer allocation, and initialize it + //! Get a new Node256 and initialize it. static Node256 &New(ART &art, Node &node); - //! Free the node (and its subtree) + //! Free the node and its children. static void Free(ART &art, Node &node); - //! Initializes all the fields of the node while growing a Node48 to a Node256 - static Node256 &GrowNode48(ART &art, Node &node256, Node &node48); - - //! Initializes a merge by incrementing the buffer IDs of the node - void InitializeMerge(ART &art, const ARTFlags &flags); - - //! Insert a child node at byte + //! Insert a child at byte. static void InsertChild(ART &art, Node &node, const uint8_t byte, const Node child); - //! Delete the child node at byte + //! Delete the child at byte. static void DeleteChild(ART &art, Node &node, const uint8_t byte); + //! Replace the child at byte. + void ReplaceChild(const uint8_t byte, const Node child); - //! Replace the child node at byte - inline void ReplaceChild(const uint8_t byte, const Node child) { - children[byte] = child; +public: + template + static void Iterator(NODE &n, F &&lambda) { + for (idx_t i = 0; i < CAPACITY; i++) { + if (n.children[i].HasMetadata()) { + lambda(n.children[i]); + } + } + } + + template + static unsafe_optional_ptr GetChild(NODE &n, const uint8_t byte) { + if (n.children[byte].HasMetadata()) { + return &n.children[byte]; + } + return nullptr; } - //! Get the (immutable) child for the respective byte in the node - optional_ptr GetChild(const uint8_t byte) const; - //! Get the child for the respective byte in the node - optional_ptr GetChildMutable(const uint8_t byte); - //! Get the first (immutable) child that is greater or equal to the specific byte - optional_ptr GetNextChild(uint8_t &byte) const; - //! Get the first child that is greater or equal to the specific byte - optional_ptr GetNextChildMutable(uint8_t &byte); - - //! Vacuum the children of the node - void Vacuum(ART &art, const ARTFlags &flags); + template + static unsafe_optional_ptr GetNextChild(NODE &n, uint8_t &byte) { + for (idx_t i = byte; i < CAPACITY; i++) { + if (n.children[i].HasMetadata()) { + byte = UnsafeNumericCast(i); + return &n.children[i]; + } + } + return nullptr; + } + +private: + static Node256 &GrowNode48(ART &art, Node &node256, Node &node48); }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/node256_leaf.hpp b/src/duckdb/src/include/duckdb/execution/index/art/node256_leaf.hpp new file mode 100644 index 00000000..e3a708d3 --- /dev/null +++ b/src/duckdb/src/include/duckdb/execution/index/art/node256_leaf.hpp @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/index/art/node256_leaf.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/execution/index/art/art.hpp" +#include "duckdb/execution/index/art/node.hpp" +#include "duckdb/execution/index/art/node256.hpp" +#include "duckdb/execution/index/fixed_size_allocator.hpp" + +namespace duckdb { + +//! Node256Leaf is a bitmask containing 256 bits. +class Node256Leaf { + friend class Node15Leaf; + +public: + static constexpr NType NODE_256_LEAF = NType::NODE_256_LEAF; + static constexpr uint16_t CAPACITY = Node256::CAPACITY; + +public: + Node256Leaf() = delete; + Node256Leaf(const Node256Leaf &) = delete; + Node256Leaf &operator=(const Node256Leaf &) = delete; + +private: + uint16_t count; + validity_t mask[CAPACITY / sizeof(validity_t)]; + +public: + //! Get a new Node256Leaf and initialize it. + static Node256Leaf &New(ART &art, Node &node); + + //! Insert a byte. + static void InsertByte(ART &art, Node &node, const uint8_t byte); + //! Delete a byte. + static void DeleteByte(ART &art, Node &node, const uint8_t byte); + + //! Returns true, if the byte exists, else false. + bool HasByte(uint8_t &byte); + //! Get the first byte greater or equal to the byte. + bool GetNextByte(uint8_t &byte); + +private: + static Node256Leaf &GrowNode15Leaf(ART &art, Node &node256_leaf, Node &node15_leaf); +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp b/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp deleted file mode 100644 index 86952b85..00000000 --- a/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +++ /dev/null @@ -1,63 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/index/art/node4.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb/execution/index/fixed_size_allocator.hpp" -#include "duckdb/execution/index/art/art.hpp" -#include "duckdb/execution/index/art/node.hpp" - -namespace duckdb { - -//! Node4 holds up to four Node children sorted by their key byte -class Node4 { -public: - //! Delete copy constructors, as any Node4 can never own its memory - Node4(const Node4 &) = delete; - Node4 &operator=(const Node4 &) = delete; - - //! Number of non-null children - uint8_t count; - //! Array containing all partial key bytes - uint8_t key[Node::NODE_4_CAPACITY]; - //! Node pointers to the child nodes - Node children[Node::NODE_4_CAPACITY]; - -public: - //! Get a new Node4, might cause a new buffer allocation, and initialize it - static Node4 &New(ART &art, Node &node); - //! Free the node (and its subtree) - static void Free(ART &art, Node &node); - - //! Initializes all fields of the node while shrinking a Node16 to a Node4 - static Node4 &ShrinkNode16(ART &art, Node &node4, Node &node16); - - //! Initializes a merge by incrementing the buffer IDs of the child nodes - void InitializeMerge(ART &art, const ARTFlags &flags); - - //! Insert a child node at byte - static void InsertChild(ART &art, Node &node, const uint8_t byte, const Node child); - //! Delete the child node at byte - static void DeleteChild(ART &art, Node &node, Node &prefix, const uint8_t byte); - - //! Replace the child node at byte - void ReplaceChild(const uint8_t byte, const Node child); - - //! Get the (immutable) child for the respective byte in the node - optional_ptr GetChild(const uint8_t byte) const; - //! Get the child for the respective byte in the node - optional_ptr GetChildMutable(const uint8_t byte); - //! Get the first (immutable) child that is greater or equal to the specific byte - optional_ptr GetNextChild(uint8_t &byte) const; - //! Get the first child that is greater or equal to the specific byte - optional_ptr GetNextChildMutable(uint8_t &byte); - - //! Vacuum the children of the node - void Vacuum(ART &art, const ARTFlags &flags); -}; -} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp b/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp index f57eea2f..b0c25fad 100644 --- a/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp @@ -8,62 +8,79 @@ #pragma once -#include "duckdb/execution/index/fixed_size_allocator.hpp" #include "duckdb/execution/index/art/art.hpp" #include "duckdb/execution/index/art/node.hpp" +#include "duckdb/execution/index/art/node256.hpp" +#include "duckdb/execution/index/fixed_size_allocator.hpp" namespace duckdb { -//! Node48 holds up to 48 Node children. It contains a child_index array which can be directly indexed by the key -//! byte, and which contains the position of the child node in the children array +//! Node48 holds up to 48 children. The child_index array is indexed by the key byte. +//! It contains the position of the child node in the children array. class Node48 { + friend class Node16; + friend class Node256; + +public: + static constexpr NType NODE_48 = NType::NODE_48; + static constexpr uint8_t CAPACITY = 48; + static constexpr uint8_t EMPTY_MARKER = 48; + static constexpr uint8_t SHRINK_THRESHOLD = 12; + public: - //! Delete copy constructors, as any Node48 can never own its memory + Node48() = delete; Node48(const Node48 &) = delete; Node48 &operator=(const Node48 &) = delete; - //! Number of non-null children +private: uint8_t count; - //! Array containing all possible partial key bytes, those not set have an EMPTY_MARKER - uint8_t child_index[Node::NODE_256_CAPACITY]; - //! Node pointers to the child nodes - Node children[Node::NODE_48_CAPACITY]; + uint8_t child_index[Node256::CAPACITY]; + Node children[CAPACITY]; public: - //! Get a new Node48, might cause a new buffer allocation, and initialize it + //! Get a new Node48 and initialize it. static Node48 &New(ART &art, Node &node); - //! Free the node (and its subtree) + //! Free the node and its children. static void Free(ART &art, Node &node); - //! Initializes all the fields of the node while growing a Node16 to a Node48 - static Node48 &GrowNode16(ART &art, Node &node48, Node &node16); - //! Initializes all fields of the node while shrinking a Node256 to a Node48 - static Node48 &ShrinkNode256(ART &art, Node &node48, Node &node256); - - //! Initializes a merge by incrementing the buffer IDs of the node - void InitializeMerge(ART &art, const ARTFlags &flags); - - //! Insert a child node at byte + //! Insert a child at byte. static void InsertChild(ART &art, Node &node, const uint8_t byte, const Node child); - //! Delete the child node at byte + //! Delete the child at byte. static void DeleteChild(ART &art, Node &node, const uint8_t byte); + //! Replace the child at byte. + void ReplaceChild(const uint8_t byte, const Node child); - //! Replace the child node at byte - inline void ReplaceChild(const uint8_t byte, const Node child) { - D_ASSERT(child_index[byte] != Node::EMPTY_MARKER); - children[child_index[byte]] = child; +public: + template + static void Iterator(NODE &n, F &&lambda) { + for (idx_t i = 0; i < Node256::CAPACITY; i++) { + if (n.child_index[i] != EMPTY_MARKER) { + lambda(n.children[n.child_index[i]]); + } + } } - //! Get the (immutable) child for the respective byte in the node - optional_ptr GetChild(const uint8_t byte) const; - //! Get the child for the respective byte in the node - optional_ptr GetChildMutable(const uint8_t byte); - //! Get the first (immutable) child that is greater or equal to the specific byte - optional_ptr GetNextChild(uint8_t &byte) const; - //! Get the first child that is greater or equal to the specific byte - optional_ptr GetNextChildMutable(uint8_t &byte); + template + static unsafe_optional_ptr GetChild(NODE &n, const uint8_t byte) { + if (n.child_index[byte] != Node48::EMPTY_MARKER) { + return &n.children[n.child_index[byte]]; + } + return nullptr; + } - //! Vacuum the children of the node - void Vacuum(ART &art, const ARTFlags &flags); + template + static unsafe_optional_ptr GetNextChild(NODE &n, uint8_t &byte) { + for (idx_t i = byte; i < Node256::CAPACITY; i++) { + if (n.child_index[i] != EMPTY_MARKER) { + byte = UnsafeNumericCast(i); + return &n.children[n.child_index[i]]; + } + } + return nullptr; + } + +private: + static Node48 &GrowNode16(ART &art, Node &node48, Node &node16); + static Node48 &ShrinkNode256(ART &art, Node &node48, Node &node256); }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp b/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp index 7c3068fb..670ab32e 100644 --- a/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp @@ -13,77 +13,116 @@ namespace duckdb { -// classes class ARTKey; -//! The Prefix is a special node type that contains up to PREFIX_SIZE bytes, and one byte for the count, -//! and a Node pointer. This pointer either points to a prefix node or another Node. +//! Prefix is a wrapper class to access a prefix. +//! The prefix contains up to the ART's prefix size bytes and an additional byte for the count. +//! It also contains a Node pointer to a child node. class Prefix { public: - //! Delete copy constructors, as any Prefix can never own its memory - Prefix(const Prefix &) = delete; - Prefix &operator=(const Prefix &) = delete; + static constexpr NType PREFIX = NType::PREFIX; - //! Up to PREFIX_SIZE bytes of prefix data and the count - uint8_t data[Node::PREFIX_SIZE + 1]; - //! A pointer to the next Node - Node ptr; + static constexpr uint8_t ROW_ID_SIZE = sizeof(row_t); + static constexpr uint8_t ROW_ID_COUNT = ROW_ID_SIZE - 1; + static constexpr uint8_t DEPRECATED_COUNT = 15; + static constexpr uint8_t METADATA_SIZE = sizeof(Node) + 1; public: - //! Get a new empty prefix node, might cause a new buffer allocation - static Prefix &New(ART &art, Node &node); - //! Create a new prefix node containing a single byte and a pointer to a next node - static Prefix &New(ART &art, Node &node, uint8_t byte, const Node &next = Node()); - //! Get a new chain of prefix nodes, might cause new buffer allocations, - //! with the node parameter holding the tail of the chain - static void New(ART &art, reference &node, const ARTKey &key, const uint32_t depth, uint32_t count); - //! Free the node (and its subtree) - static void Free(ART &art, Node &node); + Prefix() = delete; + Prefix(const ART &art, const Node ptr_p, const bool is_mutable = false, const bool set_in_memory = false); + Prefix(unsafe_unique_ptr &allocator, const Node ptr_p, const idx_t count); + + data_ptr_t data; + Node *ptr; + bool in_memory; - //! Initializes a merge by incrementing the buffer ID of the prefix and its child node(s) - static void InitializeMerge(ART &art, Node &node, const ARTFlags &flags); - - //! Appends a byte and a child_prefix to prefix. If there is no prefix, than it pushes the - //! byte on top of child_prefix. If there is no child_prefix, then it creates a new - //! prefix node containing that byte - static void Concatenate(ART &art, Node &prefix_node, const uint8_t byte, Node &child_prefix_node); - //! Traverse a prefix and a key until (1) encountering a non-prefix node, or (2) encountering - //! a mismatching byte, in which case depth indexes the mismatching byte in the key - static idx_t Traverse(ART &art, reference &prefix_node, const ARTKey &key, idx_t &depth); - //! Traverse a prefix and a key until (1) encountering a non-prefix node, or (2) encountering - //! a mismatching byte, in which case depth indexes the mismatching byte in the key - static idx_t TraverseMutable(ART &art, reference &prefix_node, const ARTKey &key, idx_t &depth); - //! Traverse two prefixes to find (1) that they match (so far), or (2) that they have a mismatching position, - //! or (3) that one prefix contains the other prefix. This function aids in merging Nodes, and, therefore, - //! the nodes are not const - static bool Traverse(ART &art, reference &l_node, reference &r_node, idx_t &mismatch_position); - //! Returns the byte at position - static inline uint8_t GetByte(const ART &art, const Node &prefix_node, const idx_t position) { - auto &prefix = Node::Ref(art, prefix_node, NType::PREFIX); - D_ASSERT(position < Node::PREFIX_SIZE); - D_ASSERT(position < prefix.data[Node::PREFIX_SIZE]); - return prefix.data[position]; +public: + static inline uint8_t Count(const ART &art) { + return art.prefix_count; } - //! Removes the first n bytes from the prefix and shifts all subsequent bytes in the - //! prefix node(s) by n. Frees empty prefix nodes - static void Reduce(ART &art, Node &prefix_node, const idx_t n); - //! Splits the prefix at position. prefix_node then references the ptr (if any bytes left before - //! the split), or stays unchanged (no bytes left before the split). child_node references - //! the node after the split, which is either a new prefix node, or ptr - static void Split(ART &art, reference &prefix_node, Node &child_node, idx_t position); + static idx_t GetMismatchWithOther(const Prefix &l_prefix, const Prefix &r_prefix, const idx_t max_count); + static idx_t GetMismatchWithKey(ART &art, const Node &node, const ARTKey &key, idx_t &depth); + static uint8_t GetByte(const ART &art, const Node &node, const uint8_t pos); + +public: + //! Get a new list of prefix nodes. The node reference holds the last prefix of the list. + static void New(ART &art, reference &ref, const ARTKey &key, const idx_t depth, idx_t count); + + //! Free the prefix and its child. + static void Free(ART &art, Node &node); + + //! Initializes a merge by incrementing the buffer ID of the prefix and its child. + static void InitializeMerge(ART &art, Node &node, const unsafe_vector &upper_bounds); + + //! Concatenates parent -> byte -> child. Special-handling, if + //! 1. the byte was in a gate node. + //! 2. the byte was in PREFIX_INLINED. + static void Concat(ART &art, Node &parent, uint8_t byte, const GateStatus old_status, const Node &child, + const GateStatus status); + + //! Traverse a prefix and a key until + //! 1. a non-prefix node. + //! 2. a mismatching byte. + //! Early-out, if the next prefix is a gate node. + static idx_t Traverse(ART &art, reference &node, const ARTKey &key, idx_t &depth); + static idx_t TraverseMutable(ART &art, reference &node, const ARTKey &key, idx_t &depth); + + //! Traverse two prefixes to find + //! 1. that they match. + //! 2. that they mismatch. + //! 3. that one prefix contains the other prefix. + static bool Traverse(ART &art, reference &l_node, reference &r_node, idx_t &pos, + const GateStatus status); + + //! Removes up to pos bytes from the prefix. + //! Shifts all subsequent bytes by pos. Frees empty nodes. + static void Reduce(ART &art, Node &node, const idx_t pos); + //! Splits the prefix at pos. + //! prefix_node points to the node that replaces the split byte. + //! child_node points to the remaining node after the split. + //! Returns INSIDE, if a gate node was freed, else OUTSIDE. + static GateStatus Split(ART &art, reference &node, Node &child, const uint8_t pos); + + //! Insert a key into a prefix. + static bool Insert(ART &art, Node &node, const ARTKey &key, idx_t depth, const ARTKey &row_id, + const GateStatus status); //! Returns the string representation of the node, or only traverses and verifies the node and its subtree static string VerifyAndToString(ART &art, const Node &node, const bool only_verify); + //! Count the number of prefixes. + static void VerifyAllocations(ART &art, const Node &node, unordered_map &node_counts); + + //! Vacuum the child of the node. + static void Vacuum(ART &art, Node &node, const unordered_set &indexes); + + //! Transform the child of the node. + static void TransformToDeprecated(ART &art, Node &node, unsafe_unique_ptr &allocator); + +private: + static Prefix NewInternal(ART &art, Node &node, const data_ptr_t data, const uint8_t count, const idx_t offset, + const NType type); + + static Prefix GetTail(ART &art, const Node &node); + + static void ConcatGate(ART &art, Node &parent, uint8_t byte, const Node &child); + static void ConcatChildIsGate(ART &art, Node &parent, uint8_t byte, const Node &child); - //! Vacuum the child of the node - static void Vacuum(ART &art, Node &node, const ARTFlags &flags); + Prefix Append(ART &art, const uint8_t byte); + void Append(ART &art, Node other); + Prefix TransformToDeprecatedAppend(ART &art, unsafe_unique_ptr &allocator, uint8_t byte); private: - //! Appends the byte to this prefix node, or creates a subsequent prefix node, - //! if this node is full - Prefix &Append(ART &art, const uint8_t byte); - //! Appends the other_prefix and all its subsequent prefix nodes to this prefix node. - //! Also frees all copied/appended nodes - void Append(ART &art, Node other_prefix); + template + static void Iterator(ART &art, reference &ref, const bool exit_gate, const bool is_mutable, F &&lambda) { + while (ref.get().HasMetadata() && ref.get().GetType() == PREFIX) { + Prefix prefix(art, ref, is_mutable); + lambda(prefix); + + ref = *prefix.ptr; + if (exit_gate && ref.get().GetGateStatus() == GateStatus::GATE_SET) { + break; + } + } + } }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp b/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp index 20e26891..a73ede0f 100644 --- a/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp @@ -106,16 +106,21 @@ class BoundIndex : public Index { //! Returns the in-memory usage of the index idx_t GetInMemorySize(); - //! Returns the string representation of an index, or only traverses and verifies the index + //! Returns the string representation of an index, or only traverses and verifies the index. virtual string VerifyAndToString(IndexLock &state, const bool only_verify) = 0; - //! Obtains a lock and calls VerifyAndToString while holding that lock + //! Obtains a lock and calls VerifyAndToString. string VerifyAndToString(const bool only_verify); + //! Ensures that the node allocation counts match the node counts. + virtual void VerifyAllocations(IndexLock &state) = 0; + //! Obtains a lock and calls VerifyAllocations. + void VerifyAllocations(); + //! Returns true if the index is affected by updates on the specified column IDs, and false otherwise bool IndexIsUpdated(const vector &column_ids) const; - //! Returns all index storage information for serialization - virtual IndexStorageInfo GetStorageInfo(const bool get_buffers); + //! Returns index storage serialization information. + virtual IndexStorageInfo GetStorageInfo(const case_insensitive_map_t &options, const bool to_wal); //! Execute the index expressions on an input chunk void ExecuteExpressions(DataChunk &input, DataChunk &result); diff --git a/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp b/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp index 317ef8dd..9942bb81 100644 --- a/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp @@ -42,25 +42,57 @@ class FixedSizeAllocator { IndexPointer New(); //! Free the segment of the IndexPointer void Free(const IndexPointer ptr); - //! Returns a pointer of type T to a segment. If dirty is false, then T should be a const class + + //! Returns a pointer of type T to a segment. If dirty is false, then T must be a const class. template - inline T *Get(const IndexPointer ptr, const bool dirty = true) { + inline unsafe_optional_ptr Get(const IndexPointer ptr, const bool dirty = true) { return (T *)Get(ptr, dirty); } - //! Returns the data_ptr_t to a segment, and sets the dirty flag of the buffer containing that segment + + //! Returns the data_ptr_t to a segment, and sets the dirty flag of the buffer containing that segment. inline data_ptr_t Get(const IndexPointer ptr, const bool dirty = true) { D_ASSERT(ptr.GetOffset() < available_segments_per_buffer); D_ASSERT(buffers.find(ptr.GetBufferId()) != buffers.end()); + auto &buffer = buffers.find(ptr.GetBufferId())->second; auto buffer_ptr = buffer.Get(dirty); return buffer_ptr + ptr.GetOffset() * segment_size + bitmask_offset; } + //! Returns a pointer of type T to a segment, or nullptr, if the buffer is not in memory. + template + inline unsafe_optional_ptr GetIfLoaded(const IndexPointer ptr) { + return (T *)GetIfLoaded(ptr); + } + + //! Returns the data_ptr_t to a segment, or nullptr, if the buffer is not in memory. + inline data_ptr_t GetIfLoaded(const IndexPointer ptr) { + D_ASSERT(ptr.GetOffset() < available_segments_per_buffer); + D_ASSERT(buffers.find(ptr.GetBufferId()) != buffers.end()); + + auto &buffer = buffers.find(ptr.GetBufferId())->second; + if (!buffer.InMemory()) { + return nullptr; + } + + auto buffer_ptr = buffer.Get(); + auto raw_ptr = buffer_ptr + ptr.GetOffset() * segment_size + bitmask_offset; + return raw_ptr; + } + //! Resets the allocator, e.g., during 'DELETE FROM table' void Reset(); //! Returns the in-memory size in bytes idx_t GetInMemorySize() const; + //! Returns the segment size. + inline idx_t GetSegmentSize() const { + return segment_size; + } + //! Returns the total segment count. + inline idx_t GetSegmentCount() const { + return total_segment_count; + } //! Returns the upper bound of the available buffer IDs, i.e., upper_bound > max_buffer_id idx_t GetUpperBoundBufferId() const; @@ -91,6 +123,12 @@ class FixedSizeAllocator { void Init(const FixedSizeAllocatorInfo &info); //! Deserializes all metadata of older storage files void Deserialize(MetadataManager &metadata_manager, const BlockPointer &block_pointer); + //! Removes empty buffers. + void RemoveEmptyBuffers(); + //! Returns true, if the allocator does not contain any segments. + inline bool IsEmpty() { + return total_segment_count == 0; + } private: //! Allocation size of one segment in a buffer diff --git a/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp b/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp index 67de51d2..b9c858a8 100644 --- a/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp @@ -100,8 +100,6 @@ class FixedSizeBuffer { shared_ptr block_handle; private: - //! Returns the maximum non-free offset in a bitmask - uint32_t GetMaxOffset(const idx_t available_segments_per_buffer); //! Sets all uninitialized regions of a buffer in the respective partial block allocation void SetUninitializedRegions(PartialBlockForIndex &p_block_for_index, const idx_t segment_size, const idx_t offset, const idx_t bitmask_offset); diff --git a/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp b/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp index 3b7f0c75..0941adf3 100644 --- a/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp @@ -8,6 +8,7 @@ #pragma once +#include "duckdb/common/numeric_utils.hpp" #include "duckdb/common/typedefs.hpp" namespace duckdb { @@ -27,7 +28,7 @@ class IndexPointer { IndexPointer() : data(0) {}; //! Constructs an in-memory IndexPointer with a buffer ID and an offset IndexPointer(const uint32_t buffer_id, const uint32_t offset) : data(0) { - auto shifted_offset = ((idx_t)offset) << SHIFT_OFFSET; + auto shifted_offset = UnsafeNumericCast(offset) << SHIFT_OFFSET; data += shifted_offset; data += buffer_id; }; @@ -52,7 +53,8 @@ class IndexPointer { } //! Set metadata (zero to 7th bit) inline void SetMetadata(const uint8_t metadata) { - data += (idx_t)metadata << SHIFT_METADATA; + data &= ~AND_METADATA; + data |= UnsafeNumericCast(metadata) << SHIFT_METADATA; } //! Get the offset (8th to 23rd bit) diff --git a/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp b/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp index 0b6dd9f2..add3a271 100644 --- a/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +++ b/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp @@ -157,8 +157,8 @@ class JoinHashTable { TupleDataChunkState chunk_state; }; - JoinHashTable(BufferManager &buffer_manager, const vector &conditions, - vector build_types, JoinType type, const vector &output_columns); + JoinHashTable(ClientContext &context, const vector &conditions, vector build_types, + JoinType type, const vector &output_columns); ~JoinHashTable(); //! Add the given data to the HT @@ -255,6 +255,8 @@ class JoinHashTable { bool has_null; //! Bitmask for getting relevant bits from the hashes to determine the position uint64_t bitmask = DConstants::INVALID_INDEX; + //! Whether or not we error on multiple rows found per match in a SINGLE join + bool single_join_error_on_multiple_rows = true; struct { mutex mj_lock; diff --git a/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp b/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp index fb66fe39..edd949fb 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp @@ -55,9 +55,9 @@ class PhysicalCreateARTIndex : public PhysicalOperator { unique_ptr GetGlobalSinkState(ClientContext &context) const override; //! Sink for unsorted data: insert iteratively - SinkResultType SinkUnsorted(Vector &row_identifiers, OperatorSinkInput &input) const; + SinkResultType SinkUnsorted(OperatorSinkInput &input) const; //! Sink for sorted data: build + merge - SinkResultType SinkSorted(Vector &row_identifiers, OperatorSinkInput &input) const; + SinkResultType SinkSorted(OperatorSinkInput &input) const; SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override; SinkCombineResultType Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const override; diff --git a/src/duckdb/src/include/duckdb/function/table/table_scan.hpp b/src/duckdb/src/include/duckdb/function/table/table_scan.hpp index ab47502d..23015d33 100644 --- a/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +++ b/src/duckdb/src/include/duckdb/function/table/table_scan.hpp @@ -28,7 +28,7 @@ struct TableScanBindData : public TableFunctionData { //! Whether or not the table scan is for index creation. bool is_create_index; //! The row ids to fetch in case of an index scan. - vector row_ids; + unsafe_vector row_ids; public: bool Equals(const FunctionData &other_p) const override { diff --git a/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp b/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp index 58c44ff3..5b325489 100644 --- a/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +++ b/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp @@ -268,6 +268,7 @@ typedef struct { duckdb_value (*duckdb_profiling_info_get_value)(duckdb_profiling_info info, const char *key); idx_t (*duckdb_profiling_info_get_child_count)(duckdb_profiling_info info); duckdb_profiling_info (*duckdb_profiling_info_get_child)(duckdb_profiling_info info, idx_t index); + duckdb_value (*duckdb_profiling_info_get_metrics)(duckdb_profiling_info info); void (*duckdb_scalar_function_set_varargs)(duckdb_scalar_function scalar_function, duckdb_logical_type type); void (*duckdb_scalar_function_set_special_handling)(duckdb_scalar_function scalar_function); void (*duckdb_scalar_function_set_volatile)(duckdb_scalar_function scalar_function); @@ -326,6 +327,9 @@ typedef struct { duckdb_state (*duckdb_add_aggregate_function_to_set)(duckdb_aggregate_function_set set, duckdb_aggregate_function function); duckdb_state (*duckdb_register_aggregate_function_set)(duckdb_connection con, duckdb_aggregate_function_set set); + idx_t (*duckdb_get_map_size)(duckdb_value value); + duckdb_value (*duckdb_get_map_key)(duckdb_value value, idx_t index); + duckdb_value (*duckdb_get_map_value)(duckdb_value value, idx_t index); // dev // WARNING! the functions below are not (yet) stable @@ -611,6 +615,7 @@ inline duckdb_ext_api_v0 CreateAPIv0() { result.duckdb_profiling_info_get_value = duckdb_profiling_info_get_value; result.duckdb_profiling_info_get_child_count = duckdb_profiling_info_get_child_count; result.duckdb_profiling_info_get_child = duckdb_profiling_info_get_child; + result.duckdb_profiling_info_get_metrics = duckdb_profiling_info_get_metrics; result.duckdb_scalar_function_set_varargs = duckdb_scalar_function_set_varargs; result.duckdb_scalar_function_set_special_handling = duckdb_scalar_function_set_special_handling; result.duckdb_scalar_function_set_volatile = duckdb_scalar_function_set_volatile; @@ -667,6 +672,9 @@ inline duckdb_ext_api_v0 CreateAPIv0() { result.duckdb_destroy_aggregate_function_set = duckdb_destroy_aggregate_function_set; result.duckdb_add_aggregate_function_to_set = duckdb_add_aggregate_function_to_set; result.duckdb_register_aggregate_function_set = duckdb_register_aggregate_function_set; + result.duckdb_get_map_size = duckdb_get_map_size; + result.duckdb_get_map_key = duckdb_get_map_key; + result.duckdb_get_map_value = duckdb_get_map_value; result.duckdb_create_aggregate_function = duckdb_create_aggregate_function; result.duckdb_destroy_aggregate_function = duckdb_destroy_aggregate_function; result.duckdb_aggregate_function_set_name = duckdb_aggregate_function_set_name; diff --git a/src/duckdb/src/include/duckdb/main/config.hpp b/src/duckdb/src/include/duckdb/main/config.hpp index 15986fc4..965217cf 100644 --- a/src/duckdb/src/include/duckdb/main/config.hpp +++ b/src/duckdb/src/include/duckdb/main/config.hpp @@ -228,7 +228,7 @@ struct DBConfigOptions { //! The set of user-provided options case_insensitive_map_t user_options; //! The set of unrecognized (other) options - unordered_map unrecognized_options; + case_insensitive_map_t unrecognized_options; //! Whether or not the configuration settings can be altered bool lock_configuration = false; //! Whether to print bindings when printing the plan (debug mode only) @@ -263,6 +263,10 @@ struct DBConfigOptions { idx_t catalog_error_max_schemas = 100; //! Whether or not to always write to the WAL file, even if this is not required bool debug_skip_checkpoint_on_commit = false; + //! When a scalar subquery returns multiple rows - return a random row instead of returning an error + bool scalar_subquery_error_on_multiple_rows = true; + //! Use IEE754-compliant floating point operations (returning NAN instead of errors/NULL) + bool ieee_floating_point_ops = true; bool operator==(const DBConfigOptions &other) const; }; diff --git a/src/duckdb/src/include/duckdb/main/database.hpp b/src/duckdb/src/include/duckdb/main/database.hpp index b7d6be08..222a36c0 100644 --- a/src/duckdb/src/include/duckdb/main/database.hpp +++ b/src/duckdb/src/include/duckdb/main/database.hpp @@ -76,6 +76,7 @@ class DatabaseInstance : public enable_shared_from_this { private: void Initialize(const char *path, DBConfig *config); + void LoadExtensionSettings(); void CreateMainDatabase(); void Configure(DBConfig &config, const char *path); diff --git a/src/duckdb/src/include/duckdb/main/extension_entries.hpp b/src/duckdb/src/include/duckdb/main/extension_entries.hpp index c453cd6b..5988d96c 100644 --- a/src/duckdb/src/include/duckdb/main/extension_entries.hpp +++ b/src/duckdb/src/include/duckdb/main/extension_entries.hpp @@ -200,6 +200,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"json_deserialize_sql", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, {"json_execute_serialized_sql", "json", CatalogType::PRAGMA_FUNCTION_ENTRY}, {"json_execute_serialized_sql", "json", CatalogType::TABLE_FUNCTION_ENTRY}, + {"json_exists", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, {"json_extract", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, {"json_extract_path", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, {"json_extract_path_text", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, @@ -219,6 +220,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"json_transform_strict", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, {"json_type", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, {"json_valid", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, + {"json_value", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, {"load_aws_credentials", "aws", CatalogType::TABLE_FUNCTION_ENTRY}, {"make_timestamptz", "icu", CatalogType::SCALAR_FUNCTION_ENTRY}, {"mysql_clear_cache", "mysql_scanner", CatalogType::TABLE_FUNCTION_ENTRY}, diff --git a/src/duckdb/src/include/duckdb/main/extension_helper.hpp b/src/duckdb/src/include/duckdb/main/extension_helper.hpp index d625f869..46583572 100644 --- a/src/duckdb/src/include/duckdb/main/extension_helper.hpp +++ b/src/duckdb/src/include/duckdb/main/extension_helper.hpp @@ -14,8 +14,6 @@ #include -#include - namespace duckdb { class DuckDB; @@ -104,6 +102,7 @@ class ExtensionHelper { static void AutoLoadExtension(DatabaseInstance &db, const string &extension_name); //! Autoload an extension (depending on config, potentially a nop. Returns false on failure) + DUCKDB_API static bool TryAutoLoadExtension(DatabaseInstance &db, const string &extension_name) noexcept; DUCKDB_API static bool TryAutoLoadExtension(ClientContext &context, const string &extension_name) noexcept; //! Update all extensions, return a vector of extension names that were updated; diff --git a/src/duckdb/src/include/duckdb/main/relation.hpp b/src/duckdb/src/include/duckdb/main/relation.hpp index 118d0aa6..18b125a3 100644 --- a/src/duckdb/src/include/duckdb/main/relation.hpp +++ b/src/duckdb/src/include/duckdb/main/relation.hpp @@ -136,9 +136,10 @@ class Relation : public enable_shared_from_this { //! Insert a row (i.e.,list of values) into a table DUCKDB_API void Insert(const vector> &values); //! Create a table and insert the data from this relation into that table - DUCKDB_API shared_ptr CreateRel(const string &schema_name, const string &table_name, bool temporary); + DUCKDB_API shared_ptr CreateRel(const string &schema_name, const string &table_name, + bool temporary = false); DUCKDB_API void Create(const string &table_name, bool temporary = false); - DUCKDB_API void Create(const string &schema_name, const string &table_name, bool temporary); + DUCKDB_API void Create(const string &schema_name, const string &table_name, bool temporary = false); //! Write a relation to a CSV file DUCKDB_API shared_ptr diff --git a/src/duckdb/src/include/duckdb/main/settings.hpp b/src/duckdb/src/include/duckdb/main/settings.hpp index 2530cf61..87215a5b 100644 --- a/src/duckdb/src/include/duckdb/main/settings.hpp +++ b/src/duckdb/src/include/duckdb/main/settings.hpp @@ -516,6 +516,16 @@ struct LockConfigurationSetting { static Value GetSetting(const ClientContext &context); }; +struct IEEEFloatingPointOpsSetting { + static constexpr const char *Name = "ieee_floating_point_ops"; + static constexpr const char *Description = + "Use IEE754-compliant floating point operations (returning NAN instead of errors/NULL)"; + static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN; + static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter); + static void ResetGlobal(DatabaseInstance *db, DBConfig &config); + static Value GetSetting(const ClientContext &context); +}; + struct ImmediateTransactionModeSetting { static constexpr const char *Name = "immediate_transaction_mode"; static constexpr const char *Description = @@ -753,6 +763,16 @@ struct ProgressBarTimeSetting { static Value GetSetting(const ClientContext &context); }; +struct ScalarSubqueryErrorOnMultipleRows { + static constexpr const char *Name = "scalar_subquery_error_on_multiple_rows"; + static constexpr const char *Description = + "When a scalar subquery returns multiple rows - return a random row instead of returning an error"; + static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN; + static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter); + static void ResetGlobal(DatabaseInstance *db, DBConfig &config); + static Value GetSetting(const ClientContext &context); +}; + struct SchemaSetting { static constexpr const char *Name = "schema"; static constexpr const char *Description = diff --git a/src/duckdb/src/include/duckdb/planner/logical_operator.hpp b/src/duckdb/src/include/duckdb/planner/logical_operator.hpp index fbde89f4..33488d1f 100644 --- a/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +++ b/src/duckdb/src/include/duckdb/planner/logical_operator.hpp @@ -63,6 +63,7 @@ class LogicalOperator { void AddChild(unique_ptr child); virtual idx_t EstimateCardinality(ClientContext &context); + void SetEstimatedCardinality(idx_t _estimated_cardinality); virtual void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); diff --git a/src/duckdb/src/include/duckdb/storage/block_manager.hpp b/src/duckdb/src/include/duckdb/storage/block_manager.hpp index 1cbe4f16..01dd1f5b 100644 --- a/src/duckdb/src/include/duckdb/storage/block_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/block_manager.hpp @@ -78,6 +78,8 @@ class BlockManager { //! Whether or not the attached database is in-memory virtual bool InMemory() = 0; + //! Sync changes made to the block manager + virtual void FileSync() = 0; //! Truncate the underlying database file after a checkpoint virtual void Truncate(); diff --git a/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp b/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp index bd41b6ce..ef01d92e 100644 --- a/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp @@ -65,6 +65,9 @@ class InMemoryBlockManager : public BlockManager { bool InMemory() override { return true; } + void FileSync() override { + throw InternalException("Cannot perform IO in in-memory database - FileSync!"); + } idx_t TotalBlocks() override { throw InternalException("Cannot perform IO in in-memory database - TotalBlocks!"); } diff --git a/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp b/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp index 08f4723f..d5e71ae4 100644 --- a/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +++ b/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp @@ -8,10 +8,11 @@ #pragma once +#include "duckdb/common/case_insensitive_map.hpp" #include "duckdb/common/types/value.hpp" +#include "duckdb/common/unordered_set.hpp" #include "duckdb/storage/block.hpp" #include "duckdb/storage/storage_info.hpp" -#include "duckdb/common/unordered_set.hpp" namespace duckdb { @@ -38,26 +39,29 @@ struct IndexBufferInfo { idx_t allocation_size; }; -//! Information to serialize an index +//! Index (de)serialization information. struct IndexStorageInfo { IndexStorageInfo() {}; - explicit IndexStorageInfo(string name) : name(std::move(name)) {}; + explicit IndexStorageInfo(const string &name) : name(name) {}; - //! The name of the index + //! The name. string name; - //! The root of the index + //! The storage root. idx_t root; - //! Information to serialize the index memory held by the fixed-size allocators + //! Any index specialization can provide additional key-Value settings via this map. + case_insensitive_map_t options; + //! Serialization information for fixed-size allocator memory. vector allocator_infos; - //! Contains all buffer pointers and their allocation size for serializing to the WAL - //! First dimension: all fixed-size allocators, second dimension: the buffers of each allocator + //! Contains all buffer pointers and their allocation size for serializing to the WAL. + //! First dimension: All fixed-size allocators. + //! Second dimension: The buffers of each fixed-size allocator. vector> buffers; - //! The root block pointer of the index, which is necessary to support older storage files + //! The root block pointer of the index. Necessary to support older storage files. BlockPointer root_block_ptr; - //! Returns true, if the struct contains index information + //! Returns true, if IndexStorageInfo holds information to deserialize an index. bool IsValid() const { return root_block_ptr.IsValid() || !allocator_infos.empty(); } diff --git a/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp b/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp index da48d36d..3118dd52 100644 --- a/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp @@ -71,6 +71,8 @@ class SingleFileBlockManager : public BlockManager { void Write(FileBuffer &block, block_id_t block_id) override; //! Write the header to disk, this is the final step of the checkpointing process void WriteHeader(DatabaseHeader header) override; + //! Sync changes to the underlying file + void FileSync() override; //! Truncate the underlying database file after a checkpoint void Truncate() override; diff --git a/src/duckdb/src/include/duckdb/storage/storage_manager.hpp b/src/duckdb/src/include/duckdb/storage/storage_manager.hpp index 14eea421..1820805f 100644 --- a/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/storage_manager.hpp @@ -41,6 +41,9 @@ class StorageCommitState { unique_ptr row_group_data) = 0; virtual optional_ptr GetRowGroupData(DataTable &table, idx_t start_index, idx_t &count) = 0; + virtual bool HasRowGroupData() { + return false; + } }; struct CheckpointOptions { diff --git a/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp b/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp index 2520ba5c..4f1f681f 100644 --- a/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp @@ -75,8 +75,8 @@ class TableIndexList { Index *FindForeignKeyIndex(const vector &fk_keys, ForeignKeyType fk_type); void VerifyForeignKey(const vector &fk_keys, DataChunk &chunk, ConflictManager &conflict_manager); - //! Serialize all indexes of this table - vector GetStorageInfos(); + //! Serialize all indexes of this table. + vector GetStorageInfos(const case_insensitive_map_t &options); vector GetRequiredColumns(); diff --git a/src/duckdb/src/include/duckdb_extension.h b/src/duckdb/src/include/duckdb_extension.h index cebf2e47..43781b04 100644 --- a/src/duckdb/src/include/duckdb_extension.h +++ b/src/duckdb/src/include/duckdb_extension.h @@ -328,6 +328,7 @@ typedef struct { duckdb_value (*duckdb_profiling_info_get_value)(duckdb_profiling_info info, const char *key); idx_t (*duckdb_profiling_info_get_child_count)(duckdb_profiling_info info); duckdb_profiling_info (*duckdb_profiling_info_get_child)(duckdb_profiling_info info, idx_t index); + duckdb_value (*duckdb_profiling_info_get_metrics)(duckdb_profiling_info info); void (*duckdb_scalar_function_set_varargs)(duckdb_scalar_function scalar_function, duckdb_logical_type type); void (*duckdb_scalar_function_set_special_handling)(duckdb_scalar_function scalar_function); void (*duckdb_scalar_function_set_volatile)(duckdb_scalar_function scalar_function); @@ -386,6 +387,9 @@ typedef struct { duckdb_state (*duckdb_add_aggregate_function_to_set)(duckdb_aggregate_function_set set, duckdb_aggregate_function function); duckdb_state (*duckdb_register_aggregate_function_set)(duckdb_connection con, duckdb_aggregate_function_set set); + idx_t (*duckdb_get_map_size)(duckdb_value value); + duckdb_value (*duckdb_get_map_key)(duckdb_value value, idx_t index); + duckdb_value (*duckdb_get_map_value)(duckdb_value value, idx_t index); #endif #ifdef DUCKDB_EXTENSION_API_VERSION_DEV // dev @@ -741,6 +745,9 @@ typedef struct { #define duckdb_get_interval duckdb_ext_api.duckdb_get_interval #define duckdb_get_value_type duckdb_ext_api.duckdb_get_value_type #define duckdb_get_blob duckdb_ext_api.duckdb_get_blob +#define duckdb_get_map_size duckdb_ext_api.duckdb_get_map_size +#define duckdb_get_map_key duckdb_ext_api.duckdb_get_map_key +#define duckdb_get_map_value duckdb_ext_api.duckdb_get_map_value #define duckdb_scalar_function_set_varargs duckdb_ext_api.duckdb_scalar_function_set_varargs #define duckdb_scalar_function_set_special_handling duckdb_ext_api.duckdb_scalar_function_set_special_handling @@ -759,6 +766,7 @@ typedef struct { #define duckdb_get_profiling_info duckdb_ext_api.duckdb_get_profiling_info #define duckdb_profiling_info_get_value duckdb_ext_api.duckdb_profiling_info_get_value +#define duckdb_profiling_info_get_metrics duckdb_ext_api.duckdb_profiling_info_get_metrics #define duckdb_profiling_info_get_child_count duckdb_ext_api.duckdb_profiling_info_get_child_count #define duckdb_profiling_info_get_child duckdb_ext_api.duckdb_profiling_info_get_child diff --git a/src/duckdb/src/main/capi/duckdb_value-c.cpp b/src/duckdb/src/main/capi/duckdb_value-c.cpp index cd73d6f1..b231f919 100644 --- a/src/duckdb/src/main/capi/duckdb_value-c.cpp +++ b/src/duckdb/src/main/capi/duckdb_value-c.cpp @@ -1,13 +1,13 @@ #include "duckdb/common/type_visitor.hpp" #include "duckdb/common/types.hpp" +#include "duckdb/common/types/null_value.hpp" #include "duckdb/common/types/value.hpp" #include "duckdb/main/capi/capi_internal.hpp" -#include "duckdb/common/types/null_value.hpp" using duckdb::LogicalTypeId; -static duckdb_value WrapValue(duckdb::Value *list_value) { - return reinterpret_cast(list_value); +static duckdb_value WrapValue(duckdb::Value *value) { + return reinterpret_cast(value); } static duckdb::LogicalType &UnwrapType(duckdb_logical_type type) { @@ -275,3 +275,57 @@ duckdb_value duckdb_create_array_value(duckdb_logical_type type, duckdb_value *v } return WrapValue(array_value); } + +idx_t duckdb_get_map_size(duckdb_value value) { + if (!value) { + return 0; + } + + auto val = UnwrapValue(value); + if (val.type() != LogicalTypeId::MAP) { + return 0; + } + + auto &children = duckdb::MapValue::GetChildren(val); + return children.size(); +} + +duckdb_value duckdb_get_map_key(duckdb_value value, idx_t index) { + if (!value) { + return nullptr; + } + + auto val = UnwrapValue(value); + if (val.type() != LogicalTypeId::MAP) { + return nullptr; + } + + auto &children = duckdb::MapValue::GetChildren(val); + if (index >= children.size()) { + return nullptr; + } + + auto &child = children[index]; + auto &child_struct = duckdb::StructValue::GetChildren(child); + return WrapValue(new duckdb::Value(child_struct[0])); +} + +duckdb_value duckdb_get_map_value(duckdb_value value, idx_t index) { + if (!value) { + return nullptr; + } + + auto val = UnwrapValue(value); + if (val.type() != LogicalTypeId::MAP) { + return nullptr; + } + + auto &children = duckdb::MapValue::GetChildren(val); + if (index >= children.size()) { + return nullptr; + } + + auto &child = children[index]; + auto &child_struct = duckdb::StructValue::GetChildren(child); + return WrapValue(new duckdb::Value(child_struct[1])); +} diff --git a/src/duckdb/src/main/capi/profiling_info-c.cpp b/src/duckdb/src/main/capi/profiling_info-c.cpp index 327b7c3f..7e563dd0 100644 --- a/src/duckdb/src/main/capi/profiling_info-c.cpp +++ b/src/duckdb/src/main/capi/profiling_info-c.cpp @@ -38,6 +38,30 @@ duckdb_value duckdb_profiling_info_get_value(duckdb_profiling_info info, const c return duckdb_create_varchar_length(str.c_str(), strlen(str.c_str())); } +duckdb_value duckdb_profiling_info_get_metrics(duckdb_profiling_info info) { + if (!info) { + return nullptr; + } + + auto &node = *reinterpret_cast(info); + auto &profiling_info = node.GetProfilingInfo(); + + // FIXME: filter between operator metrics and query node metrics. + duckdb::unordered_map metrics_map; + for (const auto &metric : profiling_info.metrics) { + auto key = EnumUtil::ToString(metric.first); + if (key == EnumUtil::ToString(MetricsType::OPERATOR_TYPE)) { + auto type = duckdb::PhysicalOperatorType(metric.second.GetValue()); + metrics_map[key] = EnumUtil::ToString(type); + } else { + metrics_map[key] = metric.second.ToString(); + } + } + + auto map = duckdb::Value::MAP(metrics_map); + return reinterpret_cast(new duckdb::Value(map)); +} + idx_t duckdb_profiling_info_get_child_count(duckdb_profiling_info info) { if (!info) { return 0; diff --git a/src/duckdb/src/main/config.cpp b/src/duckdb/src/main/config.cpp index 08d1a543..d4c22307 100644 --- a/src/duckdb/src/main/config.cpp +++ b/src/duckdb/src/main/config.cpp @@ -101,6 +101,7 @@ static const ConfigurationOption internal_options[] = { DUCKDB_GLOBAL(EnableMacrosDependencies), DUCKDB_GLOBAL(EnableViewDependencies), DUCKDB_GLOBAL(LockConfigurationSetting), + DUCKDB_GLOBAL(IEEEFloatingPointOpsSetting), DUCKDB_GLOBAL(ImmediateTransactionModeSetting), DUCKDB_LOCAL(IntegerDivisionSetting), DUCKDB_LOCAL(MaximumExpressionDepthSetting), @@ -126,6 +127,7 @@ static const ConfigurationOption internal_options[] = { DUCKDB_LOCAL(ProgressBarTimeSetting), DUCKDB_LOCAL(SchemaSetting), DUCKDB_LOCAL(SearchPathSetting), + DUCKDB_GLOBAL(ScalarSubqueryErrorOnMultipleRows), DUCKDB_GLOBAL(SecretDirectorySetting), DUCKDB_GLOBAL(DefaultSecretStorage), DUCKDB_GLOBAL(TempDirectorySetting), @@ -549,11 +551,17 @@ SerializationCompatibility SerializationCompatibility::Default() { auto res = FromString("latest"); res.manually_set = false; return res; +#else +#ifdef DUCKDB_LATEST_STORAGE + auto res = FromString("latest"); + res.manually_set = false; + return res; #else auto res = FromString("v0.10.2"); res.manually_set = false; return res; #endif +#endif } SerializationCompatibility SerializationCompatibility::Latest() { diff --git a/src/duckdb/src/main/database.cpp b/src/duckdb/src/main/database.cpp index 3f22d29e..0d13aece 100644 --- a/src/duckdb/src/main/database.cpp +++ b/src/duckdb/src/main/database.cpp @@ -202,13 +202,60 @@ void DatabaseInstance::CreateMainDatabase() { initial_database->Initialize(); } -void ThrowExtensionSetUnrecognizedOptions(const unordered_map &unrecognized_options) { - auto unrecognized_options_iter = unrecognized_options.begin(); - string unrecognized_option_keys = unrecognized_options_iter->first; - while (++unrecognized_options_iter != unrecognized_options.end()) { - unrecognized_option_keys = "," + unrecognized_options_iter->first; +static void ThrowExtensionSetUnrecognizedOptions(const case_insensitive_map_t &unrecognized_options) { + D_ASSERT(!unrecognized_options.empty()); + + vector options; + for (auto &kv : unrecognized_options) { + options.push_back(kv.first); + } + auto concatenated = StringUtil::Join(options, ", "); + throw InvalidInputException("The following options were not recognized: " + concatenated); +} + +void DatabaseInstance::LoadExtensionSettings() { + auto &unrecognized_options = config.options.unrecognized_options; + + if (config.options.autoload_known_extensions) { + if (unrecognized_options.empty()) { + // Nothing to do + return; + } + + Connection con(*this); + con.BeginTransaction(); + + vector extension_options; + for (auto &option : unrecognized_options) { + auto &name = option.first; + auto &value = option.second; + + auto extension_name = ExtensionHelper::FindExtensionInEntries(name, EXTENSION_SETTINGS); + if (extension_name.empty()) { + continue; + } + if (!ExtensionHelper::TryAutoLoadExtension(*this, extension_name)) { + throw InvalidInputException( + "To set the %s setting, the %s extension needs to be loaded. But it could not be autoloaded.", name, + extension_name); + } + auto it = config.extension_parameters.find(name); + if (it == config.extension_parameters.end()) { + throw InternalException("Extension %s did not provide the '%s' config setting", extension_name, name); + } + auto &context = *con.context; + PhysicalSet::SetExtensionVariable(context, it->second, name, SetScope::GLOBAL, value); + extension_options.push_back(name); + } + + for (auto &option : extension_options) { + unrecognized_options.erase(option); + } + con.Commit(); + } + if (!unrecognized_options.empty()) { + ThrowExtensionSetUnrecognizedOptions(unrecognized_options); } - throw InvalidInputException("Unrecognized configuration property \"%s\"", unrecognized_option_keys); } void DatabaseInstance::Initialize(const char *database_path, DBConfig *user_config) { @@ -254,9 +301,7 @@ void DatabaseInstance::Initialize(const char *database_path, DBConfig *user_conf ExtensionHelper::LoadExternalExtension(*this, *config.file_system, config.options.database_type); } - if (!config.options.unrecognized_options.empty()) { - ThrowExtensionSetUnrecognizedOptions(config.options.unrecognized_options); - } + LoadExtensionSettings(); if (!db_manager->HasDefaultDatabase()) { CreateMainDatabase(); diff --git a/src/duckdb/src/main/extension/extension_helper.cpp b/src/duckdb/src/main/extension/extension_helper.cpp index d844c41f..17b7ccda 100644 --- a/src/duckdb/src/main/extension/extension_helper.cpp +++ b/src/duckdb/src/main/extension/extension_helper.cpp @@ -220,6 +220,25 @@ bool ExtensionHelper::TryAutoLoadExtension(ClientContext &context, const string } } +bool ExtensionHelper::TryAutoLoadExtension(DatabaseInstance &instance, const string &extension_name) noexcept { + if (instance.ExtensionIsLoaded(extension_name)) { + return true; + } + auto &dbconfig = DBConfig::GetConfig(instance); + try { + auto &fs = FileSystem::GetFileSystem(instance); + if (dbconfig.options.autoinstall_known_extensions) { + auto autoinstall_repo = + ExtensionRepository::GetRepositoryByUrl(dbconfig.options.autoinstall_extension_repo); + ExtensionHelper::InstallExtension(dbconfig, fs, extension_name, false, autoinstall_repo, false); + } + ExtensionHelper::LoadExternalExtension(instance, fs, extension_name); + return true; + } catch (...) { + return false; + } +} + static ExtensionUpdateResult UpdateExtensionInternal(ClientContext &context, DatabaseInstance &db, FileSystem &fs, const string &full_extension_path, const string &extension_name) { ExtensionUpdateResult result; diff --git a/src/duckdb/src/main/settings/settings.cpp b/src/duckdb/src/main/settings/settings.cpp index aa2372e0..cb86347d 100644 --- a/src/duckdb/src/main/settings/settings.cpp +++ b/src/duckdb/src/main/settings/settings.cpp @@ -1166,6 +1166,22 @@ Value LockConfigurationSetting::GetSetting(const ClientContext &context) { return Value::BOOLEAN(config.options.lock_configuration); } +//===--------------------------------------------------------------------===// +// IEEE Floating Points +//===--------------------------------------------------------------------===// +void IEEEFloatingPointOpsSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { + config.options.ieee_floating_point_ops = BooleanValue::Get(input); +} + +void IEEEFloatingPointOpsSetting::ResetGlobal(DatabaseInstance *db, DBConfig &config) { + config.options.ieee_floating_point_ops = DBConfig().options.ieee_floating_point_ops; +} + +Value IEEEFloatingPointOpsSetting::GetSetting(const ClientContext &context) { + auto &config = DBConfig::GetConfig(context); + return Value::BOOLEAN(config.options.ieee_floating_point_ops); +} + //===--------------------------------------------------------------------===// // Immediate Transaction Mode //===--------------------------------------------------------------------===// @@ -1548,6 +1564,7 @@ Value ArrowOutputListView::GetSetting(const ClientContext &context) { return Value::BOOLEAN(arrow_output_list_view); } +//===--------------------------------------------------------------------===// // ProduceArrowStringView //===--------------------------------------------------------------------===// void ProduceArrowStringView::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { @@ -1562,6 +1579,21 @@ Value ProduceArrowStringView::GetSetting(const ClientContext &context) { return Value::BOOLEAN(DBConfig::GetConfig(context).options.produce_arrow_string_views); } +//===--------------------------------------------------------------------===// +// ScalarSubqueryErrorOnMultipleRows +//===--------------------------------------------------------------------===// +void ScalarSubqueryErrorOnMultipleRows::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { + config.options.scalar_subquery_error_on_multiple_rows = input.GetValue(); +} + +void ScalarSubqueryErrorOnMultipleRows::ResetGlobal(DatabaseInstance *db, DBConfig &config) { + config.options.scalar_subquery_error_on_multiple_rows = DBConfig().options.scalar_subquery_error_on_multiple_rows; +} + +Value ScalarSubqueryErrorOnMultipleRows::GetSetting(const ClientContext &context) { + return Value::BOOLEAN(DBConfig::GetConfig(context).options.scalar_subquery_error_on_multiple_rows); +} + //===--------------------------------------------------------------------===// // Profile Output //===--------------------------------------------------------------------===// diff --git a/src/duckdb/src/optimizer/compressed_materialization.cpp b/src/duckdb/src/optimizer/compressed_materialization.cpp index 3116ab03..3a819807 100644 --- a/src/duckdb/src/optimizer/compressed_materialization.cpp +++ b/src/duckdb/src/optimizer/compressed_materialization.cpp @@ -172,6 +172,9 @@ void CompressedMaterialization::CreateCompressProjection(unique_ptr(table_index, std::move(projections)); + if (child_op->has_estimated_cardinality) { + compress_projection->SetEstimatedCardinality(child_op->estimated_cardinality); + } compress_projection->ResolveOperatorTypes(); compress_projection->children.emplace_back(std::move(child_op)); @@ -258,6 +261,9 @@ void CompressedMaterialization::CreateDecompressProjection(unique_ptr(table_index, std::move(decompress_exprs)); + if (op->has_estimated_cardinality) { + decompress_projection->SetEstimatedCardinality(op->estimated_cardinality); + } decompress_projection->children.emplace_back(std::move(op)); op = std::move(decompress_projection); diff --git a/src/duckdb/src/optimizer/cse_optimizer.cpp b/src/duckdb/src/optimizer/cse_optimizer.cpp index d3f11bdd..12034f07 100644 --- a/src/duckdb/src/optimizer/cse_optimizer.cpp +++ b/src/duckdb/src/optimizer/cse_optimizer.cpp @@ -149,6 +149,9 @@ void CommonSubExpressionOptimizer::ExtractCommonSubExpresions(LogicalOperator &o D_ASSERT(state.expressions.size() > 0); // create a projection node as the child of this node auto projection = make_uniq(state.projection_index, std::move(state.expressions)); + if (op.children[0]->has_estimated_cardinality) { + projection->SetEstimatedCardinality(op.children[0]->estimated_cardinality); + } projection->children.push_back(std::move(op.children[0])); op.children[0] = std::move(projection); } diff --git a/src/duckdb/src/optimizer/filter_pushdown.cpp b/src/duckdb/src/optimizer/filter_pushdown.cpp index 57289c9f..0744c670 100644 --- a/src/duckdb/src/optimizer/filter_pushdown.cpp +++ b/src/duckdb/src/optimizer/filter_pushdown.cpp @@ -186,6 +186,16 @@ unique_ptr FilterPushdown::AddLogicalFilter(unique_ptr(); + if (op->has_estimated_cardinality) { + // set the filter's estimated cardinality as the child op's. + // if the filter is created during the filter pushdown optimization, the estimated cardinality will be later + // overridden during the join order optimization to a more accurate one. + // if the filter is created during the statistics propagation, the estimated cardinality won't be set unless set + // here. assuming the filters introduced during the statistics propagation have little effect in reducing the + // cardinality, we adopt the the cardinality of the child. this could be improved by MinMax info from the + // statistics propagation + filter->SetEstimatedCardinality(op->estimated_cardinality); + } filter->expressions = std::move(expressions); filter->children.push_back(std::move(op)); return std::move(filter); diff --git a/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp b/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp index 3e6f331a..e49798ac 100644 --- a/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +++ b/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp @@ -64,6 +64,13 @@ unique_ptr JoinOrderOptimizer::Optimize(unique_ptrEstimateCardinality(context); + } + + if (new_logical_plan->type == LogicalOperatorType::LOGICAL_EXPLAIN) { + new_logical_plan->SetEstimatedCardinality(3); } return new_logical_plan; diff --git a/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp b/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp index 879219bb..e76e2390 100644 --- a/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +++ b/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp @@ -322,14 +322,6 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vectorestimated_props = node.estimated_props->Copy(); result_operator->estimated_cardinality = node->cardinality; result_operator->has_estimated_cardinality = true; - if (result_operator->type == LogicalOperatorType::LOGICAL_FILTER && - result_operator->children[0]->type == LogicalOperatorType::LOGICAL_GET) { - // FILTER on top of GET, add estimated properties to both - // auto &filter_props = *result_operator->estimated_props; - auto &child_operator = *result_operator->children[0]; - child_operator.estimated_cardinality = node->cardinality; - child_operator.has_estimated_cardinality = true; - } // check if we should do a pushdown on this node // basically, any remaining filter that is a subset of the current relation will no longer be used in joins // hence we should push it here diff --git a/src/duckdb/src/optimizer/join_order/relation_manager.cpp b/src/duckdb/src/optimizer/join_order/relation_manager.cpp index 38272184..319c88e2 100644 --- a/src/duckdb/src/optimizer/join_order/relation_manager.cpp +++ b/src/duckdb/src/optimizer/join_order/relation_manager.cpp @@ -225,6 +225,7 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica } auto combined_stats = RelationStatisticsHelper::CombineStatsOfNonReorderableOperator(*op, children_stats); + op->SetEstimatedCardinality(combined_stats.cardinality); if (!datasource_filters.empty()) { combined_stats.cardinality = (idx_t)MaxValue( double(combined_stats.cardinality) * RelationStatisticsHelper::DEFAULT_SELECTIVITY, (double)1); @@ -241,6 +242,8 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica op->children[0] = child_optimizer.Optimize(std::move(op->children[0]), &child_stats); auto &aggr = op->Cast(); auto operator_stats = RelationStatisticsHelper::ExtractAggregationStats(aggr, child_stats); + // the extracted cardinality should be set for aggregate + aggr.SetEstimatedCardinality(operator_stats.cardinality); if (!datasource_filters.empty()) { operator_stats.cardinality = LossyNumericCast(static_cast(operator_stats.cardinality) * RelationStatisticsHelper::DEFAULT_SELECTIVITY); @@ -256,6 +259,8 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica op->children[0] = child_optimizer.Optimize(std::move(op->children[0]), &child_stats); auto &window = op->Cast(); auto operator_stats = RelationStatisticsHelper::ExtractWindowStats(window, child_stats); + // the extracted cardinality should be set for window + window.SetEstimatedCardinality(operator_stats.cardinality); if (!datasource_filters.empty()) { operator_stats.cardinality = LossyNumericCast(static_cast(operator_stats.cardinality) * RelationStatisticsHelper::DEFAULT_SELECTIVITY); @@ -322,6 +327,7 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica auto stats = RelationStatisticsHelper::ExtractGetStats(get, context); // if there is another logical filter that could not be pushed down into the // table scan, apply another selectivity. + get.SetEstimatedCardinality(stats.cardinality); if (!datasource_filters.empty()) { stats.cardinality = (idx_t)MaxValue(double(stats.cardinality) * RelationStatisticsHelper::DEFAULT_SELECTIVITY, (double)1); @@ -338,6 +344,7 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica auto &proj = op->Cast(); // Projection can create columns so we need to add them here auto proj_stats = RelationStatisticsHelper::ExtractProjectionStats(proj, child_stats); + proj.SetEstimatedCardinality(proj_stats.cardinality); ModifyStatsIfLimit(limit_op.get(), proj_stats); AddRelation(input_op, parent, proj_stats); return true; @@ -347,6 +354,7 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica auto &empty_result = op->Cast(); // Projection can create columns so we need to add them here auto stats = RelationStatisticsHelper::ExtractEmptyResultStats(empty_result); + empty_result.SetEstimatedCardinality(stats.cardinality); AddRelation(input_op, parent, stats); return true; } @@ -370,7 +378,9 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica if (cte_ref.materialized_cte != CTEMaterialize::CTE_MATERIALIZE_ALWAYS) { return false; } - AddRelation(input_op, parent, optimizer.GetMaterializedCTEStats(cte_ref.cte_index)); + auto cte_stats = optimizer.GetMaterializedCTEStats(cte_ref.cte_index); + cte_ref.SetEstimatedCardinality(cte_stats.cardinality); + AddRelation(input_op, parent, cte_stats); return true; } case LogicalOperatorType::LOGICAL_DELIM_JOIN: { @@ -401,7 +411,9 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica // Used to not be possible to reorder these. We added reordering (without stats) before, // but ran into terrible join orders (see internal issue #596), so we removed it again // We now have proper statistics for DelimGets, and get an even better query plan for #596 - AddAggregateOrWindowRelation(input_op, parent, optimizer.GetDelimScanStats(), op->type); + auto delim_scan_stats = optimizer.GetDelimScanStats(); + op->SetEstimatedCardinality(delim_scan_stats.cardinality); + AddAggregateOrWindowRelation(input_op, parent, delim_scan_stats, op->type); return true; } default: diff --git a/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp b/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp index 02d8636a..32d2b432 100644 --- a/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +++ b/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp @@ -239,6 +239,7 @@ RelationStats RelationStatisticsHelper::CombineStatsOfNonReorderableOperator(Log case JoinType::ANTI: case JoinType::SEMI: case JoinType::SINGLE: + case JoinType::MARK: ret.cardinality = child_1_card; break; default: diff --git a/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp b/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp index 09be63ad..a05da2c2 100644 --- a/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +++ b/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp @@ -62,9 +62,21 @@ unique_ptr FilterPushdown::PushdownCrossProduct(unique_ptrchildren[1], left_bindings, right_bindings, join_expressions, conditions, arbitrary_expressions); // create the join from the join conditions - return LogicalComparisonJoin::CreateJoin(GetContext(), join_type, join_ref_type, std::move(op->children[0]), - std::move(op->children[1]), std::move(conditions), - std::move(arbitrary_expressions)); + auto new_op = LogicalComparisonJoin::CreateJoin(GetContext(), join_type, join_ref_type, + std::move(op->children[0]), std::move(op->children[1]), + std::move(conditions), std::move(arbitrary_expressions)); + + // possible cases are: AnyJoin, ComparisonJoin, or Filter + ComparisonJoin + if (op->has_estimated_cardinality) { + // set the estimated cardinality of the new operator + new_op->SetEstimatedCardinality(op->estimated_cardinality); + if (new_op->type == LogicalOperatorType::LOGICAL_FILTER) { + // if the new operators are Filter + ComparisonJoin, also set the estimated cardinality for the join + D_ASSERT(new_op->children[0]->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN); + new_op->children[0]->SetEstimatedCardinality(op->estimated_cardinality); + } + } + return new_op; } else { // no join conditions found: keep as cross product D_ASSERT(op->type == LogicalOperatorType::LOGICAL_CROSS_PRODUCT); diff --git a/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp b/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp index 985110d4..791bbb9d 100644 --- a/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +++ b/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp @@ -47,6 +47,12 @@ unique_ptr FilterPushdown::PushdownInnerJoin(unique_ptr(std::move(op->children[0]), std::move(op->children[1])); + + // preserve the estimated cardinality of the operator + if (op->has_estimated_cardinality) { + cross_product->SetEstimatedCardinality(op->estimated_cardinality); + } + // then push down cross product return PushdownCrossProduct(std::move(cross_product)); } diff --git a/src/duckdb/src/optimizer/remove_unused_columns.cpp b/src/duckdb/src/optimizer/remove_unused_columns.cpp index 56a93155..8425050b 100644 --- a/src/duckdb/src/optimizer/remove_unused_columns.cpp +++ b/src/duckdb/src/optimizer/remove_unused_columns.cpp @@ -144,6 +144,9 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) { } auto new_projection = make_uniq(binder.GenerateTableIndex(), std::move(expressions)); + if (child->has_estimated_cardinality) { + new_projection->SetEstimatedCardinality(child->estimated_cardinality); + } new_projection->children.push_back(std::move(child)); op.children[child_idx] = std::move(new_projection); diff --git a/src/duckdb/src/optimizer/topn_optimizer.cpp b/src/duckdb/src/optimizer/topn_optimizer.cpp index 574e8c66..81b2aa09 100644 --- a/src/duckdb/src/optimizer/topn_optimizer.cpp +++ b/src/duckdb/src/optimizer/topn_optimizer.cpp @@ -62,6 +62,11 @@ unique_ptr TopN::Optimize(unique_ptr op) { } auto topn = make_uniq(std::move(order_by.orders), limit_val, offset_val); topn->AddChild(std::move(order_by.children[0])); + auto cardinality = limit_val; + if (topn->children[0]->has_estimated_cardinality && topn->children[0]->estimated_cardinality < limit_val) { + cardinality = topn->children[0]->estimated_cardinality; + } + topn->SetEstimatedCardinality(cardinality); op = std::move(topn); // reconstruct all projection nodes above limit operator diff --git a/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp b/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp index 860b1791..61276c57 100644 --- a/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +++ b/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp @@ -2,6 +2,7 @@ #include "duckdb/main/client_config.hpp" #include "duckdb/planner/binder.hpp" #include "duckdb/planner/expression/bound_aggregate_expression.hpp" +#include "duckdb/planner/expression/bound_case_expression.hpp" #include "duckdb/planner/expression/bound_cast_expression.hpp" #include "duckdb/planner/expression/bound_columnref_expression.hpp" #include "duckdb/planner/expression/bound_comparison_expression.hpp" @@ -16,6 +17,7 @@ #include "duckdb/common/enums/logical_operator_type.hpp" #include "duckdb/planner/operator/logical_dependent_join.hpp" #include "duckdb/planner/subquery/recursive_dependent_join_planner.hpp" +#include "duckdb/core_functions/scalar/generic_functions.hpp" namespace duckdb { @@ -75,11 +77,8 @@ static unique_ptr PlanUncorrelatedSubquery(Binder &binder, BoundSubq D_ASSERT(bindings.size() == 1); idx_t table_idx = bindings[0].table_index; - // in the uncorrelated case we are only interested in the first result of the query - // hence we simply push a LIMIT 1 to get the first row of the subquery - auto limit = make_uniq(BoundLimitNode::ConstantValue(1), BoundLimitNode()); - limit->AddChild(std::move(plan)); - plan = std::move(limit); + auto &config = DBConfig::GetConfig(binder.context); + bool error_on_multiple_rows = config.options.scalar_subquery_error_on_multiple_rows; // we push an aggregate that returns the FIRST element vector> expressions; @@ -92,11 +91,51 @@ static unique_ptr PlanUncorrelatedSubquery(Binder &binder, BoundSubq FirstFun::GetFunction(expr.return_type), std::move(first_children), nullptr, AggregateType::NON_DISTINCT); expressions.push_back(std::move(first_agg)); + if (error_on_multiple_rows) { + vector> count_children; + auto count_agg = function_binder.BindAggregateFunction( + CountStarFun::GetFunction(), std::move(count_children), nullptr, AggregateType::NON_DISTINCT); + expressions.push_back(std::move(count_agg)); + } auto aggr_index = binder.GenerateTableIndex(); + auto aggr = make_uniq(binder.GenerateTableIndex(), aggr_index, std::move(expressions)); aggr->AddChild(std::move(plan)); plan = std::move(aggr); + if (error_on_multiple_rows) { + // CASE WHEN count > 1 THEN error('Scalar subquery can only return a single row') ELSE first_agg END + idx_t proj_index = binder.GenerateTableIndex(); + + auto first_ref = + make_uniq(plan->expressions[0]->return_type, ColumnBinding(aggr_index, 0)); + auto count_ref = + make_uniq(plan->expressions[1]->return_type, ColumnBinding(aggr_index, 1)); + + auto constant_one = make_uniq(Value::BIGINT(1)); + auto count_check = make_uniq(ExpressionType::COMPARE_GREATERTHAN, + std::move(count_ref), std::move(constant_one)); + + vector> error_children; + error_children.push_back(make_uniq( + Value("More than one row returned by a subquery used as an expression - scalar subqueries can only " + "return a single row.\n\nUse \"SET scalar_subquery_error_on_multiple_rows=false\" to revert to " + "previous behavior of returning a random row."))); + auto error_expr = function_binder.BindScalarFunction(ErrorFun::GetFunction(), std::move(error_children)); + error_expr->return_type = first_ref->return_type; + auto case_expr = + make_uniq(std::move(count_check), std::move(error_expr), std::move(first_ref)); + + vector> proj_expressions; + proj_expressions.push_back(std::move(case_expr)); + + auto proj = make_uniq(proj_index, std::move(proj_expressions)); + proj->AddChild(std::move(plan)); + plan = std::move(proj); + + aggr_index = proj_index; + } + // in the uncorrelated case, we add the value to the main query through a cross product // FIXME: should use something else besides cross product as we always add only one scalar constant and cross // product is not optimized for this. diff --git a/src/duckdb/src/planner/binder/statement/bind_summarize.cpp b/src/duckdb/src/planner/binder/statement/bind_summarize.cpp index 6fb6403d..dac43d8f 100644 --- a/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_summarize.cpp @@ -1,7 +1,9 @@ #include "duckdb/planner/binder.hpp" #include "duckdb/parser/query_node/select_node.hpp" #include "duckdb/parser/expression/function_expression.hpp" +#include "duckdb/parser/expression/comparison_expression.hpp" #include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/expression/case_expression.hpp" #include "duckdb/parser/expression/cast_expression.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" #include "duckdb/parser/tableref/showref.hpp" @@ -64,7 +66,16 @@ static unique_ptr SummarizeCreateNullPercentage(string column_ auto percentage_x = SummarizeCreateBinaryFunction("*", std::move(negate_x), make_uniq(Value::DOUBLE(100))); - return make_uniq(LogicalType::DECIMAL(9, 2), std::move(percentage_x)); + auto comp_expr = make_uniq(ExpressionType::COMPARE_GREATERTHAN, SummarizeCreateCountStar(), + make_uniq(Value::BIGINT(0))); + auto case_expr = make_uniq(); + CaseCheck check; + check.when_expr = std::move(comp_expr); + check.then_expr = std::move(percentage_x); + case_expr->case_checks.push_back(std::move(check)); + case_expr->else_expr = make_uniq(Value()); + + return make_uniq(LogicalType::DECIMAL(9, 2), std::move(case_expr)); } unique_ptr Binder::BindSummarize(ShowRef &ref) { diff --git a/src/duckdb/src/planner/logical_operator.cpp b/src/duckdb/src/planner/logical_operator.cpp index ee6a8892..185e7887 100644 --- a/src/duckdb/src/planner/logical_operator.cpp +++ b/src/duckdb/src/planner/logical_operator.cpp @@ -191,6 +191,11 @@ idx_t LogicalOperator::EstimateCardinality(ClientContext &context) { return estimated_cardinality; } +void LogicalOperator::SetEstimatedCardinality(idx_t _estimated_cardinality) { + estimated_cardinality = _estimated_cardinality; + has_estimated_cardinality = true; +} + void LogicalOperator::Print() { Printer::Print(ToString()); } diff --git a/src/duckdb/src/storage/checkpoint/table_data_writer.cpp b/src/duckdb/src/storage/checkpoint/table_data_writer.cpp index dad7e96a..eff266f1 100644 --- a/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +++ b/src/duckdb/src/storage/checkpoint/table_data_writer.cpp @@ -3,9 +3,10 @@ #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" #include "duckdb/common/serializer/binary_serializer.hpp" +#include "duckdb/main/database.hpp" +#include "duckdb/parallel/task_scheduler.hpp" #include "duckdb/storage/table/column_checkpoint_state.hpp" #include "duckdb/storage/table/table_statistics.hpp" -#include "duckdb/parallel/task_scheduler.hpp" namespace duckdb { @@ -49,6 +50,7 @@ CheckpointType SingleFileTableDataWriter::GetCheckpointType() const { void SingleFileTableDataWriter::FinalizeTable(const TableStatistics &global_stats, DataTableInfo *info, Serializer &serializer) { + // store the current position in the metadata writer // this is where the row groups for this table start auto pointer = table_data_writer.GetMetaBlockPointer(); @@ -80,7 +82,14 @@ void SingleFileTableDataWriter::FinalizeTable(const TableStatistics &global_stat serializer.WriteProperty(101, "table_pointer", pointer); serializer.WriteProperty(102, "total_rows", total_rows); - auto index_storage_infos = info->GetIndexes().GetStorageInfos(); + auto db_options = checkpoint_manager.db.GetDatabase().config.options; + auto v1_0_0_storage = db_options.serialization_compatibility.serialization_version < 3; + case_insensitive_map_t options; + if (!v1_0_0_storage) { + options.emplace("v1_0_0_storage", v1_0_0_storage); + } + auto index_storage_infos = info->GetIndexes().GetStorageInfos(options); + // write empty block pointers for forwards compatibility vector compat_block_pointers; serializer.WriteProperty(103, "index_pointers", compat_block_pointers); diff --git a/src/duckdb/src/storage/checkpoint_manager.cpp b/src/duckdb/src/storage/checkpoint_manager.cpp index fce8b2fc..9db9de0d 100644 --- a/src/duckdb/src/storage/checkpoint_manager.cpp +++ b/src/duckdb/src/storage/checkpoint_manager.cpp @@ -429,12 +429,12 @@ void CheckpointReader::ReadIndex(CatalogTransaction transaction, Deserializer &d IndexStorageInfo index_storage_info; if (root_block_pointer.IsValid()) { - // this code path is necessary to read older duckdb files + // Read older duckdb files. index_storage_info.name = index.name; index_storage_info.root_block_ptr = root_block_pointer; } else { - // get the matching index storage info + // Read the matching index storage info. for (auto const &elem : data_table.GetDataTableInfo()->GetIndexStorageInfo()) { if (elem.name == index.name) { index_storage_info = elem; @@ -445,10 +445,9 @@ void CheckpointReader::ReadIndex(CatalogTransaction transaction, Deserializer &d D_ASSERT(index_storage_info.IsValid() && !index_storage_info.name.empty()); - // Create an unbound index and add it to the table + // Create an unbound index and add it to the table. auto unbound_index = make_uniq(std::move(create_info), index_storage_info, TableIOManager::Get(data_table), data_table.db); - data_table.GetDataTableInfo()->GetIndexes().AddIndex(std::move(unbound_index)); } @@ -525,9 +524,9 @@ void CheckpointReader::ReadTableData(CatalogTransaction transaction, Deserialize auto table_pointer = deserializer.ReadProperty(101, "table_pointer"); auto total_rows = deserializer.ReadProperty(102, "total_rows"); - // old file read + // Cover reading old storage files. auto index_pointers = deserializer.ReadPropertyWithExplicitDefault>(103, "index_pointers", {}); - // new file read + // Cover reading new storage files. auto index_storage_infos = deserializer.ReadPropertyWithExplicitDefault>(104, "index_storage_infos", {}); @@ -535,8 +534,9 @@ void CheckpointReader::ReadTableData(CatalogTransaction transaction, Deserialize bound_info.indexes = index_storage_infos; } else { - // old duckdb file containing index pointers + // This is an old duckdb file containing index pointers and deprecated storage. for (idx_t i = 0; i < index_pointers.size(); i++) { + // Deprecated storage is always true for old duckdb files. IndexStorageInfo index_storage_info; index_storage_info.root_block_ptr = index_pointers[i]; bound_info.indexes.push_back(index_storage_info); diff --git a/src/duckdb/src/storage/serialization/serialize_storage.cpp b/src/duckdb/src/storage/serialization/serialize_storage.cpp index 96261903..29ac3038 100644 --- a/src/duckdb/src/storage/serialization/serialize_storage.cpp +++ b/src/duckdb/src/storage/serialization/serialize_storage.cpp @@ -88,6 +88,7 @@ void IndexStorageInfo::Serialize(Serializer &serializer) const { serializer.WritePropertyWithDefault(100, "name", name); serializer.WritePropertyWithDefault(101, "root", root); serializer.WritePropertyWithDefault>(102, "allocator_infos", allocator_infos); + serializer.WritePropertyWithDefault>(103, "options", options, case_insensitive_map_t()); } IndexStorageInfo IndexStorageInfo::Deserialize(Deserializer &deserializer) { @@ -95,6 +96,7 @@ IndexStorageInfo IndexStorageInfo::Deserialize(Deserializer &deserializer) { deserializer.ReadPropertyWithDefault(100, "name", result.name); deserializer.ReadPropertyWithDefault(101, "root", result.root); deserializer.ReadPropertyWithDefault>(102, "allocator_infos", result.allocator_infos); + deserializer.ReadPropertyWithExplicitDefault>(103, "options", result.options, case_insensitive_map_t()); return result; } diff --git a/src/duckdb/src/storage/single_file_block_manager.cpp b/src/duckdb/src/storage/single_file_block_manager.cpp index 27803a44..4e689c35 100644 --- a/src/duckdb/src/storage/single_file_block_manager.cpp +++ b/src/duckdb/src/storage/single_file_block_manager.cpp @@ -638,6 +638,10 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) { TrimFreeBlocks(); } +void SingleFileBlockManager::FileSync() { + handle->Sync(); +} + void SingleFileBlockManager::TrimFreeBlocks() { if (DBConfig::Get(db).options.trim_free_blocks) { for (auto itr = newly_freed_list.begin(); itr != newly_freed_list.end(); ++itr) { diff --git a/src/duckdb/src/storage/storage_manager.cpp b/src/duckdb/src/storage/storage_manager.cpp index 531e0020..0cd10f06 100644 --- a/src/duckdb/src/storage/storage_manager.cpp +++ b/src/duckdb/src/storage/storage_manager.cpp @@ -255,6 +255,7 @@ class SingleFileStorageCommitState : public StorageCommitState { void AddRowGroupData(DataTable &table, idx_t start_index, idx_t count, unique_ptr row_group_data) override; optional_ptr GetRowGroupData(DataTable &table, idx_t start_index, idx_t &count) override; + bool HasRowGroupData() override; private: idx_t initial_wal_size = 0; @@ -329,6 +330,10 @@ optional_ptr SingleFileStorageCommitState::GetRowGroup return start_entry->second.row_group_data.get(); } +bool SingleFileStorageCommitState::HasRowGroupData() { + return !optimistically_written_data.empty(); +} + unique_ptr SingleFileStorageManager::GenStorageCommitState(WriteAheadLog &wal) { return make_uniq(*this, wal); } diff --git a/src/duckdb/src/storage/table_index_list.cpp b/src/duckdb/src/storage/table_index_list.cpp index 8cc8487e..c505d0a0 100644 --- a/src/duckdb/src/storage/table_index_list.cpp +++ b/src/duckdb/src/storage/table_index_list.cpp @@ -167,21 +167,22 @@ vector TableIndexList::GetRequiredColumns() { return result; } -vector TableIndexList::GetStorageInfos() { +vector TableIndexList::GetStorageInfos(const case_insensitive_map_t &options) { vector index_storage_infos; for (auto &index : indexes) { if (index->IsBound()) { - auto index_storage_info = index->Cast().GetStorageInfo(false); - D_ASSERT(index_storage_info.IsValid() && !index_storage_info.name.empty()); - index_storage_infos.push_back(index_storage_info); - } else { - // TODO: Will/should this ever happen? - auto index_storage_info = index->Cast().GetStorageInfo(); + auto index_storage_info = index->Cast().GetStorageInfo(options, false); D_ASSERT(index_storage_info.IsValid() && !index_storage_info.name.empty()); index_storage_infos.push_back(index_storage_info); + continue; } + + auto index_storage_info = index->Cast().GetStorageInfo(); + D_ASSERT(index_storage_info.IsValid() && !index_storage_info.name.empty()); + index_storage_infos.push_back(index_storage_info); } + return index_storage_infos; } diff --git a/src/duckdb/src/storage/write_ahead_log.cpp b/src/duckdb/src/storage/write_ahead_log.cpp index 103be7ab..ba9db9ee 100644 --- a/src/duckdb/src/storage/write_ahead_log.cpp +++ b/src/duckdb/src/storage/write_ahead_log.cpp @@ -259,11 +259,12 @@ void WriteAheadLog::WriteDropTableMacro(const TableMacroCatalogEntry &entry) { // Indexes //===--------------------------------------------------------------------===// -void SerializeIndexToWAL(WriteAheadLogSerializer &serializer, Index &index) { +void SerializeIndexToWAL(WriteAheadLogSerializer &serializer, Index &index, + const case_insensitive_map_t &options) { // We will never write an index to the WAL that is not bound D_ASSERT(index.IsBound()); - const auto index_storage_info = index.Cast().GetStorageInfo(true); + const auto index_storage_info = index.Cast().GetStorageInfo(options, true); serializer.WriteProperty(102, "index_storage_info", index_storage_info); serializer.WriteList(103, "index_storage", index_storage_info.buffers.size(), [&](Serializer::List &list, idx_t i) { @@ -278,12 +279,20 @@ void WriteAheadLog::WriteCreateIndex(const IndexCatalogEntry &entry) { WriteAheadLogSerializer serializer(*this, WALType::CREATE_INDEX); serializer.WriteProperty(101, "index_catalog_entry", &entry); + auto db_options = database.GetDatabase().config.options; + auto v1_0_0_storage = db_options.serialization_compatibility.serialization_version < 3; + case_insensitive_map_t options; + if (!v1_0_0_storage) { + options.emplace("v1_0_0_storage", v1_0_0_storage); + } + // now serialize the index data to the persistent storage and write the index metadata auto &duck_index_entry = entry.Cast(); auto &table_idx_list = duck_index_entry.GetDataTableInfo().GetIndexes(); + table_idx_list.Scan([&](Index &index) { if (duck_index_entry.name == index.GetIndexName()) { - SerializeIndexToWAL(serializer, index); + SerializeIndexToWAL(serializer, index, options); return true; } return false; diff --git a/src/duckdb/src/transaction/duck_transaction.cpp b/src/duckdb/src/transaction/duck_transaction.cpp index 35a93e3d..94299d22 100644 --- a/src/duckdb/src/transaction/duck_transaction.cpp +++ b/src/duckdb/src/transaction/duck_transaction.cpp @@ -198,6 +198,12 @@ ErrorData DuckTransaction::WriteToWAL(AttachedDatabase &db, unique_ptrCommit(commit_state.get()); undo_buffer.WriteToWAL(*log, commit_state.get()); + if (commit_state->HasRowGroupData()) { + // if we have optimistically written any data AND we are writing to the WAL, we have written references to + // optimistically written blocks + // hence we need to ensure those optimistically written blocks are persisted + storage_manager.GetBlockManager().FileSync(); + } } catch (std::exception &ex) { if (commit_state) { commit_state->RevertCommit(); diff --git a/src/duckdb/ub_extension_json_json_functions.cpp b/src/duckdb/ub_extension_json_json_functions.cpp index 411b8f53..7a2a1af0 100644 --- a/src/duckdb/ub_extension_json_json_functions.cpp +++ b/src/duckdb/ub_extension_json_json_functions.cpp @@ -4,6 +4,8 @@ #include "extension/json/json_functions/json_contains.cpp" +#include "extension/json/json_functions/json_exists.cpp" + #include "extension/json/json_functions/json_extract.cpp" #include "extension/json/json_functions/json_keys.cpp" @@ -22,6 +24,8 @@ #include "extension/json/json_functions/json_valid.cpp" +#include "extension/json/json_functions/json_value.cpp" + #include "extension/json/json_functions/json_serialize_plan.cpp" #include "extension/json/json_functions/json_serialize_sql.cpp" diff --git a/src/duckdb/ub_src_execution_index_art.cpp b/src/duckdb/ub_src_execution_index_art.cpp index eb3d4030..503c9e38 100644 --- a/src/duckdb/ub_src_execution_index_art.cpp +++ b/src/duckdb/ub_src_execution_index_art.cpp @@ -6,9 +6,7 @@ #include "src/execution/index/art/leaf.cpp" -#include "src/execution/index/art/node4.cpp" - -#include "src/execution/index/art/node16.cpp" +#include "src/execution/index/art/base_node.cpp" #include "src/execution/index/art/node48.cpp" @@ -16,5 +14,9 @@ #include "src/execution/index/art/prefix.cpp" +#include "src/execution/index/art/base_leaf.cpp" + +#include "src/execution/index/art/node256_leaf.cpp" + #include "src/execution/index/art/art.cpp"