From 530feaf86909aea90fbd6e1e3428a2d14a72c2d5 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Thu, 24 Oct 2024 10:35:36 +0200 Subject: [PATCH 1/2] add index plan callback --- src/hnsw/CMakeLists.txt | 3 +- src/hnsw/hnsw_index.cpp | 6 +- src/hnsw/hnsw_index_logical_create.cpp | 119 -------------- src/hnsw/hnsw_index_physical_create.cpp | 4 +- src/hnsw/hnsw_index_plan.cpp | 153 ++++++++++++++++++ src/hnsw/hnsw_plan_index_create.cpp | 150 ----------------- src/include/hnsw/hnsw_index.hpp | 2 + .../hnsw/hnsw_index_logical_create.hpp | 28 ---- .../hnsw/hnsw_index_physical_create.hpp | 2 +- .../hnsw_lateral_join_group_large.test | 0 10 files changed, 164 insertions(+), 303 deletions(-) delete mode 100644 src/hnsw/hnsw_index_logical_create.cpp create mode 100644 src/hnsw/hnsw_index_plan.cpp delete mode 100644 src/hnsw/hnsw_plan_index_create.cpp delete mode 100644 src/include/hnsw/hnsw_index_logical_create.hpp rename test/sql/{hnsw => slow}/hnsw_lateral_join_group_large.test (100%) diff --git a/src/hnsw/CMakeLists.txt b/src/hnsw/CMakeLists.txt index 6c0e1b0..577edb0 100644 --- a/src/hnsw/CMakeLists.txt +++ b/src/hnsw/CMakeLists.txt @@ -2,12 +2,11 @@ set(EXTENSION_SOURCES ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/hnsw_index.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/hnsw_index_logical_create.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hnsw_index_macros.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hnsw_index_physical_create.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hnsw_index_pragmas.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hnsw_index_scan.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/hnsw_plan_index_create.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/hnsw_index_plan.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hnsw_topk_operator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hnsw_optimize_expr.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hnsw_optimize_join.cpp diff --git a/src/hnsw/hnsw_index.cpp b/src/hnsw/hnsw_index.cpp index f869929..711e527 100644 --- a/src/hnsw/hnsw_index.cpp +++ b/src/hnsw/hnsw_index.cpp @@ -246,6 +246,7 @@ const case_insensitive_map_t HNSWIndex::METRIC_KIN const unordered_map HNSWIndex::SCALAR_KIND_MAP = { {static_cast(LogicalTypeId::FLOAT), unum::usearch::scalar_kind_t::f32_k}, + /* TODO: Add the rest of these later {static_cast(LogicalTypeId::DOUBLE), unum::usearch::scalar_kind_t::f64_k}, {static_cast(LogicalTypeId::TINYINT), unum::usearch::scalar_kind_t::i8_k}, {static_cast(LogicalTypeId::SMALLINT), unum::usearch::scalar_kind_t::i16_k}, @@ -254,7 +255,9 @@ const unordered_map HNSWIndex::SCALAR_KIN {static_cast(LogicalTypeId::UTINYINT), unum::usearch::scalar_kind_t::u8_k}, {static_cast(LogicalTypeId::USMALLINT), unum::usearch::scalar_kind_t::u16_k}, {static_cast(LogicalTypeId::UINTEGER), unum::usearch::scalar_kind_t::u32_k}, - {static_cast(LogicalTypeId::UBIGINT), unum::usearch::scalar_kind_t::u64_k}}; + {static_cast(LogicalTypeId::UBIGINT), unum::usearch::scalar_kind_t::u64_k} + */ +}; unique_ptr HNSWIndex::GetStats() { auto lock = rwlock.GetExclusiveLock(); @@ -671,6 +674,7 @@ void HNSWModule::RegisterIndex(DatabaseInstance &db) { input.unbound_expressions, input.db, input.options, input.storage_info); return std::move(res); }; + index_type.create_plan = HNSWIndex::CreatePlan; // Register scan option db.config.AddExtensionOption("hnsw_ef_search", diff --git a/src/hnsw/hnsw_index_logical_create.cpp b/src/hnsw/hnsw_index_logical_create.cpp deleted file mode 100644 index 775c984..0000000 --- a/src/hnsw/hnsw_index_logical_create.cpp +++ /dev/null @@ -1,119 +0,0 @@ -#include "hnsw/hnsw_index_logical_create.hpp" -#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" -#include "duckdb/execution/column_binding_resolver.hpp" -#include "duckdb/execution/operator/filter/physical_filter.hpp" -#include "duckdb/execution/operator/projection/physical_projection.hpp" -#include "duckdb/planner/expression/bound_reference_expression.hpp" -#include "duckdb/planner/expression/bound_operator_expression.hpp" -#include "duckdb/planner/operator/logical_create_index.hpp" - -#include "hnsw/hnsw_index.hpp" -#include "hnsw/hnsw_index_physical_create.hpp" - -namespace duckdb { - -LogicalCreateHNSWIndex::LogicalCreateHNSWIndex(unique_ptr info_p, - vector> expressions_p, TableCatalogEntry &table_p) - : LogicalExtensionOperator(), info(std::move(info_p)), table(table_p) { - for (auto &expr : expressions_p) { - this->unbound_expressions.push_back(expr->Copy()); - } - this->expressions = std::move(expressions_p); -} - -void LogicalCreateHNSWIndex::ResolveTypes() { - types.emplace_back(LogicalType::BIGINT); -} - -void LogicalCreateHNSWIndex::ResolveColumnBindings(ColumnBindingResolver &res, vector &bindings) { - bindings = LogicalOperator::GenerateColumnBindings(0, table.GetColumns().LogicalColumnCount()); - - // Visit the operator's expressions - LogicalOperatorVisitor::EnumerateExpressions(*this, - [&](unique_ptr *child) { res.VisitExpression(child); }); -} - -string LogicalCreateHNSWIndex::GetExtensionName() const { - return "hnsw_create_index"; -} - -unique_ptr LogicalCreateHNSWIndex::CreatePlan(ClientContext &context, - PhysicalPlanGenerator &generator) { - - auto &op = *this; - - // generate a physical plan for the parallel index creation which consists of the following operators - // table scan - projection (for expression execution) - filter (NOT NULL) - order - create index - D_ASSERT(op.children.size() == 1); - auto table_scan = generator.CreatePlan(std::move(op.children[0])); - - // Validate that we only have one expression - if (op.unbound_expressions.size() != 1) { - throw BinderException("HNSW indexes can only be created over a single column of keys."); - } - - auto &expr = op.unbound_expressions[0]; - - // Validate that the expression does not have side effects - if (!expr->IsConsistent()) { - throw BinderException("HNSW index keys cannot contain expressions with side " - "effects."); - } - - // Validate that we have the right type of expression (float array) - auto &type = expr->return_type; - if (type.id() != LogicalTypeId::ARRAY || ArrayType::GetChildType(type).id() != LogicalTypeId::FLOAT) { - throw BinderException("HNSW index can only be created over FLOAT[N] keys."); - } - - // Assert that we got the right index type - D_ASSERT(op.info->index_type == HNSWIndex::TYPE_NAME); - - // table scan operator for index key columns and row IDs - generator.dependencies.AddDependency(op.table); - - D_ASSERT(op.info->scan_types.size() - 1 <= op.info->names.size()); - D_ASSERT(op.info->scan_types.size() - 1 <= op.info->column_ids.size()); - - // projection to execute expressions on the key columns - - vector new_column_types; - vector> select_list; - for (idx_t i = 0; i < op.expressions.size(); i++) { - new_column_types.push_back(op.expressions[i]->return_type); - select_list.push_back(std::move(op.expressions[i])); - } - new_column_types.emplace_back(LogicalType::ROW_TYPE); - select_list.push_back(make_uniq(LogicalType::ROW_TYPE, op.info->scan_types.size() - 1)); - - auto projection = make_uniq(new_column_types, std::move(select_list), op.estimated_cardinality); - projection->children.push_back(std::move(table_scan)); - - // filter operator for IS_NOT_NULL on each key column - vector filter_types; - vector> filter_select_list; - - for (idx_t i = 0; i < new_column_types.size() - 1; i++) { - filter_types.push_back(new_column_types[i]); - auto is_not_null_expr = - make_uniq(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN); - auto bound_ref = make_uniq(new_column_types[i], i); - is_not_null_expr->children.push_back(std::move(bound_ref)); - filter_select_list.push_back(std::move(is_not_null_expr)); - } - - auto null_filter = - make_uniq(std::move(filter_types), std::move(filter_select_list), op.estimated_cardinality); - null_filter->types.emplace_back(LogicalType::ROW_TYPE); - null_filter->children.push_back(std::move(projection)); - - auto physical_create_index = - make_uniq(op, op.table, op.info->column_ids, std::move(op.info), - std::move(op.unbound_expressions), op.estimated_cardinality); - - physical_create_index->children.push_back(std::move(null_filter)); - - return std::move(physical_create_index); -} - -} // namespace duckdb diff --git a/src/hnsw/hnsw_index_physical_create.cpp b/src/hnsw/hnsw_index_physical_create.cpp index 543b042..82ac33e 100644 --- a/src/hnsw/hnsw_index_physical_create.cpp +++ b/src/hnsw/hnsw_index_physical_create.cpp @@ -14,12 +14,12 @@ namespace duckdb { -PhysicalCreateHNSWIndex::PhysicalCreateHNSWIndex(LogicalOperator &op, TableCatalogEntry &table_p, +PhysicalCreateHNSWIndex::PhysicalCreateHNSWIndex(const vector &types_p, TableCatalogEntry &table_p, const vector &column_ids, unique_ptr info, vector> unbound_expressions, idx_t estimated_cardinality) // Declare this operators as a EXTENSION operator - : PhysicalOperator(PhysicalOperatorType::EXTENSION, op.types, estimated_cardinality), + : PhysicalOperator(PhysicalOperatorType::EXTENSION, types_p, estimated_cardinality), table(table_p.Cast()), info(std::move(info)), unbound_expressions(std::move(unbound_expressions)), sorted(false) { diff --git a/src/hnsw/hnsw_index_plan.cpp b/src/hnsw/hnsw_index_plan.cpp new file mode 100644 index 0000000..7160fd0 --- /dev/null +++ b/src/hnsw/hnsw_index_plan.cpp @@ -0,0 +1,153 @@ +#include "duckdb/planner/operator/logical_create_index.hpp" +#include "duckdb/planner/expression/bound_reference_expression.hpp" +#include "duckdb/planner/expression/bound_operator_expression.hpp" + +#include "duckdb/parser/parsed_data/create_index_info.hpp" +#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" + +#include "duckdb/execution/operator/projection/physical_projection.hpp" +#include "duckdb/execution/operator/filter/physical_filter.hpp" + +#include "hnsw/hnsw.hpp" +#include "hnsw/hnsw_index.hpp" +#include "hnsw/hnsw_index_physical_create.hpp" + + +namespace duckdb { + +unique_ptr HNSWIndex::CreatePlan(PlanIndexInput &input) { + auto &create_index = input.op; + auto &context = input.context; + + Value enable_persistence; + context.TryGetCurrentSetting("hnsw_enable_experimental_persistence", enable_persistence); + + auto is_disk_db = !create_index.table.GetStorage().db.GetStorageManager().InMemory(); + auto is_persistence_disabled = !enable_persistence.GetValue(); + + if (is_disk_db && is_persistence_disabled) { + throw BinderException("HNSW indexes can only be created in in-memory databases, or when the configuration " + "option 'hnsw_enable_experimental_persistence' is set to true."); + } + + // Verify the options + for (auto &option : create_index.info->options) { + auto &k = option.first; + auto &v = option.second; + if (StringUtil::CIEquals(k, "metric")) { + if (v.type() != LogicalType::VARCHAR) { + throw BinderException("HNSW index 'metric' must be a string"); + } + auto metric = v.GetValue(); + if (HNSWIndex::METRIC_KIND_MAP.find(metric) == HNSWIndex::METRIC_KIND_MAP.end()) { + vector allowed_metrics; + for (auto &entry : HNSWIndex::METRIC_KIND_MAP) { + allowed_metrics.push_back(StringUtil::Format("'%s'", entry.first)); + } + throw BinderException("HNSW index 'metric' must be one of: %s", + StringUtil::Join(allowed_metrics, ", ")); + } + } else if (StringUtil::CIEquals(k, "ef_construction")) { + if (v.type() != LogicalType::INTEGER) { + throw BinderException("HNSW index 'ef_construction' must be an integer"); + } + if (v.GetValue() < 1) { + throw BinderException("HNSW index 'ef_construction' must be at least 1"); + } + } else if (StringUtil::CIEquals(k, "ef_search")) { + if (v.type() != LogicalType::INTEGER) { + throw BinderException("HNSW index 'ef_search' must be an integer"); + } + if (v.GetValue() < 1) { + throw BinderException("HNSW index 'ef_search' must be at least 1"); + } + } else if (StringUtil::CIEquals(k, "M")) { + if (v.type() != LogicalType::INTEGER) { + throw BinderException("HNSW index 'M' must be an integer"); + } + if (v.GetValue() < 2) { + throw BinderException("HNSW index 'M' must be at least 2"); + } + } else if (StringUtil::CIEquals(k, "M0")) { + if (v.type() != LogicalType::INTEGER) { + throw BinderException("HNSW index 'M0' must be an integer"); + } + if (v.GetValue() < 2) { + throw BinderException("HNSW index 'M0' must be at least 2"); + } + } else { + throw BinderException("Unknown option for HNSW index: '%s'", k); + } + } + + // Verify the expression type + if (create_index.expressions.size() != 1) { + throw BinderException("HNSW indexes can only be created over a single column of keys."); + } + auto &arr_type = create_index.expressions[0]->return_type; + if (arr_type.id() != LogicalTypeId::ARRAY) { + throw BinderException("HNSW index keys must be of type FLOAT[N]"); + } + auto &child_type = ArrayType::GetChildType(arr_type); + auto child_type_val = HNSWIndex::SCALAR_KIND_MAP.find(static_cast(child_type.id())); + if (child_type_val == HNSWIndex::SCALAR_KIND_MAP.end()) { + vector allowed_types; + for (auto &entry : HNSWIndex::SCALAR_KIND_MAP) { + auto id = static_cast(entry.first); + allowed_types.push_back(StringUtil::Format("'%s[N]'", LogicalType(id).ToString())); + } + throw BinderException("HNSW index key type must be one of: %s", StringUtil::Join(allowed_types, ", ")); + } + + // projection to execute expressions on the key columns + + vector new_column_types; + vector> select_list; + for (auto & expression : create_index.expressions) { + new_column_types.push_back(expression->return_type); + select_list.push_back(std::move(expression)); + } + new_column_types.emplace_back(LogicalType::ROW_TYPE); + select_list.push_back(make_uniq(LogicalType::ROW_TYPE, create_index.info->scan_types.size() - 1)); + + auto projection = make_uniq(new_column_types, std::move(select_list), create_index.estimated_cardinality); + projection->children.push_back(std::move(input.table_scan)); + + // filter operator for IS_NOT_NULL on each key column + vector filter_types; + vector> filter_select_list; + + for (idx_t i = 0; i < new_column_types.size() - 1; i++) { + filter_types.push_back(new_column_types[i]); + auto is_not_null_expr = + make_uniq(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN); + auto bound_ref = make_uniq(new_column_types[i], i); + is_not_null_expr->children.push_back(std::move(bound_ref)); + filter_select_list.push_back(std::move(is_not_null_expr)); + } + + auto null_filter = + make_uniq(std::move(filter_types), std::move(filter_select_list), create_index.estimated_cardinality); + null_filter->types.emplace_back(LogicalType::ROW_TYPE); + null_filter->children.push_back(std::move(projection)); + + auto physical_create_index = + make_uniq(create_index.types, create_index.table, create_index.info->column_ids, std::move(create_index.info), + std::move(create_index.unbound_expressions), create_index.estimated_cardinality); + + physical_create_index->children.push_back(std::move(null_filter)); + + return std::move(physical_create_index); +} + +//------------------------------------------------------------- +// Register +//------------------------------------------------------------- +void HNSWModule::RegisterPlanIndexCreate(DatabaseInstance &db) { + // Register the optimizer extension + db.config.AddExtensionOption("hnsw_enable_experimental_persistence", + "experimental: enable creating HNSW indexes in persistent databases", + LogicalType::BOOLEAN, Value::BOOLEAN(false)); +} + +} // namespace duckdb \ No newline at end of file diff --git a/src/hnsw/hnsw_plan_index_create.cpp b/src/hnsw/hnsw_plan_index_create.cpp deleted file mode 100644 index d83e4f6..0000000 --- a/src/hnsw/hnsw_plan_index_create.cpp +++ /dev/null @@ -1,150 +0,0 @@ -#include "duckdb/optimizer/optimizer_extension.hpp" -#include "duckdb/planner/operator/logical_create_index.hpp" -#include "duckdb/parser/parsed_data/create_index_info.hpp" -#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" - -#include "hnsw/hnsw.hpp" -#include "hnsw/hnsw_index.hpp" -#include "hnsw/hnsw_index_logical_create.hpp" - -namespace duckdb { - -//----------------------------------------------------------------------------- -// Plan rewriter -//----------------------------------------------------------------------------- -class HNSWIndexInsertionRewriter : public OptimizerExtension { -public: - HNSWIndexInsertionRewriter() { - optimize_function = HNSWIndexInsertionRewriter::Optimize; - } - - static void TryOptimize(ClientContext &context, unique_ptr &plan) { - auto &op = *plan; - - // Look for a CREATE INDEX operator - if (op.type != LogicalOperatorType::LOGICAL_CREATE_INDEX) { - return; - } - auto &create_index = op.Cast(); - - if (create_index.info->index_type != HNSWIndex::TYPE_NAME) { - // Not the index type we are looking for - return; - } - - Value enable_persistence; - context.TryGetCurrentSetting("hnsw_enable_experimental_persistence", enable_persistence); - - auto is_disk_db = !create_index.table.GetStorage().db.GetStorageManager().InMemory(); - auto is_persistence_disabled = !enable_persistence.GetValue(); - - if (is_disk_db && is_persistence_disabled) { - throw BinderException("HNSW indexes can only be created in in-memory databases, or when the configuration " - "option 'hnsw_enable_experimental_persistence' is set to true."); - } - - // Verify the options - for (auto &option : create_index.info->options) { - auto &k = option.first; - auto &v = option.second; - if (StringUtil::CIEquals(k, "metric")) { - if (v.type() != LogicalType::VARCHAR) { - throw BinderException("HNSW index 'metric' must be a string"); - } - auto metric = v.GetValue(); - if (HNSWIndex::METRIC_KIND_MAP.find(metric) == HNSWIndex::METRIC_KIND_MAP.end()) { - vector allowed_metrics; - for (auto &entry : HNSWIndex::METRIC_KIND_MAP) { - allowed_metrics.push_back(StringUtil::Format("'%s'", entry.first)); - } - throw BinderException("HNSW index 'metric' must be one of: %s", - StringUtil::Join(allowed_metrics, ", ")); - } - } else if (StringUtil::CIEquals(k, "ef_construction")) { - if (v.type() != LogicalType::INTEGER) { - throw BinderException("HNSW index 'ef_construction' must be an integer"); - } - if (v.GetValue() < 1) { - throw BinderException("HNSW index 'ef_construction' must be at least 1"); - } - } else if (StringUtil::CIEquals(k, "ef_search")) { - if (v.type() != LogicalType::INTEGER) { - throw BinderException("HNSW index 'ef_search' must be an integer"); - } - if (v.GetValue() < 1) { - throw BinderException("HNSW index 'ef_search' must be at least 1"); - } - } else if (StringUtil::CIEquals(k, "M")) { - if (v.type() != LogicalType::INTEGER) { - throw BinderException("HNSW index 'M' must be an integer"); - } - if (v.GetValue() < 2) { - throw BinderException("HNSW index 'M' must be at least 2"); - } - } else if (StringUtil::CIEquals(k, "M0")) { - if (v.type() != LogicalType::INTEGER) { - throw BinderException("HNSW index 'M0' must be an integer"); - } - if (v.GetValue() < 2) { - throw BinderException("HNSW index 'M0' must be at least 2"); - } - } else { - throw BinderException("Unknown option for HNSW index: '%s'", k); - } - } - - // Verify the expression type - if (create_index.expressions.size() != 1) { - throw BinderException("HNSW indexes can only be created over a single column of keys."); - } - auto &arr_type = create_index.expressions[0]->return_type; - if (arr_type.id() != LogicalTypeId::ARRAY) { - throw BinderException("HNSW index keys must be of type FLOAT[N]"); - } - auto &child_type = ArrayType::GetChildType(arr_type); - auto child_type_val = HNSWIndex::SCALAR_KIND_MAP.find(static_cast(child_type.id())); - if (child_type_val == HNSWIndex::SCALAR_KIND_MAP.end()) { - vector allowed_types; - for (auto &entry : HNSWIndex::SCALAR_KIND_MAP) { - auto id = static_cast(entry.first); - allowed_types.push_back(StringUtil::Format("'%s[N]'", LogicalType(id).ToString())); - } - throw BinderException("HNSW index key type must be one of: %s", StringUtil::Join(allowed_types, ", ")); - } - - // We have a create index operator for our index - // We can replace this with a operator that creates the index - // The "LogicalCreateHNSWINdex" operator is a custom operator that we defined in the extension - auto physical_create_index = make_uniq( - std::move(create_index.info), std::move(create_index.expressions), create_index.table); - - // Move the children - physical_create_index->children = std::move(create_index.children); - - // Replace the operator - plan = std::move(physical_create_index); - } - - static void Optimize(OptimizerExtensionInput &input, unique_ptr &plan) { - - TryOptimize(input.context, plan); - - // Recursively traverse the children - for (auto &child : plan->children) { - Optimize(input, child); - } - }; -}; - -//------------------------------------------------------------- -// Register -//------------------------------------------------------------- -void HNSWModule::RegisterPlanIndexCreate(DatabaseInstance &db) { - // Register the optimizer extension - db.config.AddExtensionOption("hnsw_enable_experimental_persistence", - "experimental: enable creating HNSW indexes in persistent databases", - LogicalType::BOOLEAN, Value::BOOLEAN(false)); - db.config.optimizer_extensions.push_back(HNSWIndexInsertionRewriter()); -} - -} // namespace duckdb \ No newline at end of file diff --git a/src/include/hnsw/hnsw_index.hpp b/src/include/hnsw/hnsw_index.hpp index c3aad77..2b7d87a 100644 --- a/src/include/hnsw/hnsw_index.hpp +++ b/src/include/hnsw/hnsw_index.hpp @@ -33,6 +33,8 @@ class HNSWIndex : public BoundIndex { AttachedDatabase &db, const case_insensitive_map_t &options, const IndexStorageInfo &info = IndexStorageInfo(), idx_t estimated_cardinality = 0); + static unique_ptr CreatePlan(PlanIndexInput &input); + //! The actual usearch index USearchIndexType index; diff --git a/src/include/hnsw/hnsw_index_logical_create.hpp b/src/include/hnsw/hnsw_index_logical_create.hpp deleted file mode 100644 index 1f8ed7f..0000000 --- a/src/include/hnsw/hnsw_index_logical_create.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once -#include "duckdb/planner/operator/logical_extension_operator.hpp" - -namespace duckdb { - -class LogicalCreateHNSWIndex : public LogicalExtensionOperator { -public: - // Info for index creation - unique_ptr info; - - //! The table to create the index for - TableCatalogEntry &table; - - //! Unbound expressions to be used in the optimizer - vector> unbound_expressions; - -public: - LogicalCreateHNSWIndex(unique_ptr info_p, vector> expressions_p, - TableCatalogEntry &table_p); - void ResolveTypes() override; - void ResolveColumnBindings(ColumnBindingResolver &res, vector &bindings) override; - string GetExtensionName() const override; - - // Actually create plan the index creation - unique_ptr CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) override; -}; - -} // namespace duckdb \ No newline at end of file diff --git a/src/include/hnsw/hnsw_index_physical_create.hpp b/src/include/hnsw/hnsw_index_physical_create.hpp index b3de5b8..b8b8156 100644 --- a/src/include/hnsw/hnsw_index_physical_create.hpp +++ b/src/include/hnsw/hnsw_index_physical_create.hpp @@ -11,7 +11,7 @@ class PhysicalCreateHNSWIndex : public PhysicalOperator { static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::EXTENSION; public: - PhysicalCreateHNSWIndex(LogicalOperator &op, TableCatalogEntry &table, const vector &column_ids, + PhysicalCreateHNSWIndex(const vector &types_p, TableCatalogEntry &table, const vector &column_ids, unique_ptr info, vector> unbound_expressions, idx_t estimated_cardinality); diff --git a/test/sql/hnsw/hnsw_lateral_join_group_large.test b/test/sql/slow/hnsw_lateral_join_group_large.test similarity index 100% rename from test/sql/hnsw/hnsw_lateral_join_group_large.test rename to test/sql/slow/hnsw_lateral_join_group_large.test From f46c34cdf5c697d67b8b22c48128c9ac1da58c32 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 1 Nov 2024 10:47:13 +0100 Subject: [PATCH 2/2] format, update duckdb --- duckdb | 2 +- src/hnsw/hnsw_index.cpp | 7 ++++- src/hnsw/hnsw_index_plan.cpp | 31 +++++++------------ src/include/hnsw/hnsw.hpp | 2 -- .../hnsw/hnsw_index_physical_create.hpp | 6 ++-- 5 files changed, 21 insertions(+), 27 deletions(-) diff --git a/duckdb b/duckdb index f680b7d..c3ca360 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit f680b7d08f56183391b581077d4baf589e1cc8bd +Subproject commit c3ca3607c221d315f38227b8bf58e68746c59083 diff --git a/src/hnsw/hnsw_index.cpp b/src/hnsw/hnsw_index.cpp index 711e527..b8bfd1f 100644 --- a/src/hnsw/hnsw_index.cpp +++ b/src/hnsw/hnsw_index.cpp @@ -246,7 +246,7 @@ const case_insensitive_map_t HNSWIndex::METRIC_KIN const unordered_map HNSWIndex::SCALAR_KIND_MAP = { {static_cast(LogicalTypeId::FLOAT), unum::usearch::scalar_kind_t::f32_k}, - /* TODO: Add the rest of these later + /* TODO: Add the rest of these later {static_cast(LogicalTypeId::DOUBLE), unum::usearch::scalar_kind_t::f64_k}, {static_cast(LogicalTypeId::TINYINT), unum::usearch::scalar_kind_t::i8_k}, {static_cast(LogicalTypeId::SMALLINT), unum::usearch::scalar_kind_t::i16_k}, @@ -676,6 +676,11 @@ void HNSWModule::RegisterIndex(DatabaseInstance &db) { }; index_type.create_plan = HNSWIndex::CreatePlan; + // Register persistence option + db.config.AddExtensionOption("hnsw_enable_experimental_persistence", + "experimental: enable creating HNSW indexes in persistent databases", + LogicalType::BOOLEAN, Value::BOOLEAN(false)); + // Register scan option db.config.AddExtensionOption("hnsw_ef_search", "experimental: override the ef_search parameter when scanning HNSW indexes", diff --git a/src/hnsw/hnsw_index_plan.cpp b/src/hnsw/hnsw_index_plan.cpp index 7160fd0..2505a1f 100644 --- a/src/hnsw/hnsw_index_plan.cpp +++ b/src/hnsw/hnsw_index_plan.cpp @@ -12,7 +12,6 @@ #include "hnsw/hnsw_index.hpp" #include "hnsw/hnsw_index_physical_create.hpp" - namespace duckdb { unique_ptr HNSWIndex::CreatePlan(PlanIndexInput &input) { @@ -103,14 +102,16 @@ unique_ptr HNSWIndex::CreatePlan(PlanIndexInput &input) { vector new_column_types; vector> select_list; - for (auto & expression : create_index.expressions) { + for (auto &expression : create_index.expressions) { new_column_types.push_back(expression->return_type); select_list.push_back(std::move(expression)); } new_column_types.emplace_back(LogicalType::ROW_TYPE); - select_list.push_back(make_uniq(LogicalType::ROW_TYPE, create_index.info->scan_types.size() - 1)); + select_list.push_back( + make_uniq(LogicalType::ROW_TYPE, create_index.info->scan_types.size() - 1)); - auto projection = make_uniq(new_column_types, std::move(select_list), create_index.estimated_cardinality); + auto projection = + make_uniq(new_column_types, std::move(select_list), create_index.estimated_cardinality); projection->children.push_back(std::move(input.table_scan)); // filter operator for IS_NOT_NULL on each key column @@ -120,34 +121,24 @@ unique_ptr HNSWIndex::CreatePlan(PlanIndexInput &input) { for (idx_t i = 0; i < new_column_types.size() - 1; i++) { filter_types.push_back(new_column_types[i]); auto is_not_null_expr = - make_uniq(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN); + make_uniq(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN); auto bound_ref = make_uniq(new_column_types[i], i); is_not_null_expr->children.push_back(std::move(bound_ref)); filter_select_list.push_back(std::move(is_not_null_expr)); } - auto null_filter = - make_uniq(std::move(filter_types), std::move(filter_select_list), create_index.estimated_cardinality); + auto null_filter = make_uniq(std::move(filter_types), std::move(filter_select_list), + create_index.estimated_cardinality); null_filter->types.emplace_back(LogicalType::ROW_TYPE); null_filter->children.push_back(std::move(projection)); - auto physical_create_index = - make_uniq(create_index.types, create_index.table, create_index.info->column_ids, std::move(create_index.info), - std::move(create_index.unbound_expressions), create_index.estimated_cardinality); + auto physical_create_index = make_uniq( + create_index.types, create_index.table, create_index.info->column_ids, std::move(create_index.info), + std::move(create_index.unbound_expressions), create_index.estimated_cardinality); physical_create_index->children.push_back(std::move(null_filter)); return std::move(physical_create_index); } -//------------------------------------------------------------- -// Register -//------------------------------------------------------------- -void HNSWModule::RegisterPlanIndexCreate(DatabaseInstance &db) { - // Register the optimizer extension - db.config.AddExtensionOption("hnsw_enable_experimental_persistence", - "experimental: enable creating HNSW indexes in persistent databases", - LogicalType::BOOLEAN, Value::BOOLEAN(false)); -} - } // namespace duckdb \ No newline at end of file diff --git a/src/include/hnsw/hnsw.hpp b/src/include/hnsw/hnsw.hpp index 3d69562..5f0b55e 100644 --- a/src/include/hnsw/hnsw.hpp +++ b/src/include/hnsw/hnsw.hpp @@ -10,7 +10,6 @@ struct HNSWModule { RegisterIndex(db); RegisterIndexScan(db); RegisterIndexPragmas(db); - RegisterPlanIndexCreate(db); RegisterMacros(db); // Optimizers @@ -25,7 +24,6 @@ struct HNSWModule { static void RegisterIndexScan(DatabaseInstance &db); static void RegisterMultiScan(DatabaseInstance &db); static void RegisterIndexPragmas(DatabaseInstance &db); - static void RegisterPlanIndexCreate(DatabaseInstance &db); static void RegisterMacros(DatabaseInstance &db); static void RegisterTopKOptimizer(DatabaseInstance &db); diff --git a/src/include/hnsw/hnsw_index_physical_create.hpp b/src/include/hnsw/hnsw_index_physical_create.hpp index b8b8156..b620831 100644 --- a/src/include/hnsw/hnsw_index_physical_create.hpp +++ b/src/include/hnsw/hnsw_index_physical_create.hpp @@ -11,9 +11,9 @@ class PhysicalCreateHNSWIndex : public PhysicalOperator { static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::EXTENSION; public: - PhysicalCreateHNSWIndex(const vector &types_p, TableCatalogEntry &table, const vector &column_ids, - unique_ptr info, vector> unbound_expressions, - idx_t estimated_cardinality); + PhysicalCreateHNSWIndex(const vector &types_p, TableCatalogEntry &table, + const vector &column_ids, unique_ptr info, + vector> unbound_expressions, idx_t estimated_cardinality); //! The table to create the index for DuckTableEntry &table;