From 491af18884809f86729be937098105bc7e68721c Mon Sep 17 00:00:00 2001 From: dentiny Date: Mon, 23 Dec 2024 12:01:45 +0000 Subject: [PATCH 1/3] fix undefined property --- src/core/functions/table/match.cpp | 150 +++++++++++++++++- .../duckpgq/core/functions/table/match.hpp | 16 ++ test/sql/create_pg/no_properties.test | 9 ++ test/sql/path_finding/complex_matching.test | 2 +- test/sql/path_finding/shortest_path.test | 2 +- 5 files changed, 173 insertions(+), 6 deletions(-) diff --git a/src/core/functions/table/match.cpp b/src/core/functions/table/match.cpp index fcbc0e8f..9f621c3e 100644 --- a/src/core/functions/table/match.cpp +++ b/src/core/functions/table/match.cpp @@ -1,6 +1,8 @@ #include #include "duckpgq/core/functions/table/match.hpp" +#include "duckdb/common/string_util.hpp" +#include "duckdb/common/case_insensitive_map.hpp" #include "duckdb/parser/tableref/matchref.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" #include "duckdb/parser/tableref/joinref.hpp" @@ -33,6 +35,55 @@ namespace duckpgq { namespace core { +namespace { + +// Get fully-qualified column names for the property graph [tbl], and insert +// into set [col_names]. +void PopulateFullyQualifiedColName( + const vector> &tbls, + const case_insensitive_map_t> &tbl_name_to_aliases, + case_insensitive_set_t &col_names) { + for (const auto &cur_tbl : tbls) { + for (const auto &cur_col : cur_tbl->column_names) { + // It's legal to query by `` instead of `.`. + col_names.insert(cur_col); + + const string &tbl_name = cur_tbl->table_name; + auto iter = tbl_name_to_aliases.find(tbl_name); + // Prefer to use table alias specified in the statement, otherwise use + // table name. + if (iter == tbl_name_to_aliases.end()) { + col_names.insert(StringUtil::Format("%s.%s", tbl_name, cur_col)); + } else { + const auto &all_aliases = iter->second; + for (const auto &cur_alias : all_aliases) { + col_names.insert(StringUtil::Format("%s.%s", cur_alias, cur_col)); + } + } + } + } +} + +// Get fully-qualified column names from property graph. +case_insensitive_set_t GetFullyQualifiedColFromPg( + const CreatePropertyGraphInfo &pg, + const case_insensitive_map_t> &alias_map) { + case_insensitive_map_t> relation_name_to_aliases; + for (const auto &entry : alias_map) { + relation_name_to_aliases[entry.second->table_name].emplace_back( + entry.first); + } + + case_insensitive_set_t col_names; + PopulateFullyQualifiedColName(pg.vertex_tables, relation_name_to_aliases, + col_names); + PopulateFullyQualifiedColName(pg.edge_tables, relation_name_to_aliases, + col_names); + return col_names; +} + +} // namespace + shared_ptr PGQMatchFunction::FindGraphTable(const string &label, CreatePropertyGraphInfo &pg_table) { @@ -145,6 +196,18 @@ PathElement *PGQMatchFunction::GetPathElement( throw InternalException("Unknown path reference type detected"); } +SubPath * +PGQMatchFunction::GetSubPath(const unique_ptr &path_reference) { + if (path_reference->path_reference_type == + PGQPathReferenceType::PATH_ELEMENT) { + return nullptr; + } + if (path_reference->path_reference_type == PGQPathReferenceType::SUBPATH) { + return reinterpret_cast(path_reference.get()); + } + throw InternalException("Unknown path reference type detected"); +} + unique_ptr PGQMatchFunction::CreateCountCTESubquery() { //! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x @@ -907,15 +970,84 @@ void PGQMatchFunction::ProcessPathList( } } +void PGQMatchFunction::PopulateGraphTableAliasMap( + const CreatePropertyGraphInfo &pg_table, + const unique_ptr &path_reference, + case_insensitive_map_t> + &alias_to_vertex_and_edge_tables) { + PathElement *path_elem = GetPathElement(path_reference); + + // Populate binding from PathElement. + if (path_elem != nullptr) { + auto iter = pg_table.label_map.find(path_elem->label); + if (iter == pg_table.label_map.end()) { + throw BinderException( + "The label %s is not registered in property graph %s", + path_elem->label, pg_table.property_graph_name); + } + alias_to_vertex_and_edge_tables[path_elem->variable_binding] = iter->second; + return; + } + + // Recursively populate binding from SubPath. + SubPath *sub_path = GetSubPath(path_reference); + D_ASSERT(sub_path != nullptr); + const auto &path_list = sub_path->path_list; + for (const auto &cur_path : path_list) { + PopulateGraphTableAliasMap(pg_table, cur_path, + alias_to_vertex_and_edge_tables); + } +} + +/*static*/ void +PGQMatchFunction::CheckColumnBinding(const CreatePropertyGraphInfo &pg_table, + const MatchExpression &ref) { + // Maps from table alias to table, including vertex and edge tables. + case_insensitive_map_t> + alias_to_vertex_and_edge_tables; + for (idx_t idx_i = 0; idx_i < ref.path_patterns.size(); idx_i++) { + const auto &path_list = ref.path_patterns[idx_i]->path_elements; + for (const auto &cur_path : path_list) { + PopulateGraphTableAliasMap(pg_table, cur_path, + alias_to_vertex_and_edge_tables); + } + } + + // All fully-qualified column names for vertex tables and edge tables. + const auto all_fq_col_names = + GetFullyQualifiedColFromPg(pg_table, alias_to_vertex_and_edge_tables); + + for (auto &expression : ref.column_list) { + auto *column_ref = dynamic_cast(expression.get()); + if (column_ref == nullptr) { + continue; + } + // 'shortest_path_cte' is a special table populated by pgq. + if (column_ref->column_names[0] == "shortest_path_cte") { + continue; + } + // 'rowid' is a column duckdb binds automatically. + if (column_ref->column_names.back() == "rowid") { + continue; + } + const auto cur_fq_col_name = + StringUtil::Join(column_ref->column_names, /*separator=*/"."); + if (all_fq_col_names.find(cur_fq_col_name) == all_fq_col_names.end()) { + throw BinderException("Property %s is never registered!", + cur_fq_col_name); + } + } +} + unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, TableFunctionBindInput &bind_input) { auto duckpgq_state = GetDuckPGQState(context); auto match_index = bind_input.inputs[0].GetValue(); - auto ref = dynamic_cast( + auto *ref = dynamic_cast( duckpgq_state->transform_expression[match_index].get()); - auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); + auto *pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); vector> conditions; @@ -948,11 +1080,16 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, if (ref->where_clause) { conditions.push_back(std::move(ref->where_clause)); } + + CheckColumnBinding(*pg_table, *ref); + std::vector> final_column_list; for (auto &expression : ref->column_list) { unordered_set named_subpaths; - auto column_ref = dynamic_cast(expression.get()); + + // Handle ColumnRefExpression. + auto *column_ref = dynamic_cast(expression.get()); if (column_ref != nullptr) { if (named_subpaths.count(column_ref->column_names[0]) && column_ref->column_names.size() == 1) { @@ -963,7 +1100,9 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, } continue; } - auto function_ref = dynamic_cast(expression.get()); + + // Handle FunctionExpression. + auto *function_ref = dynamic_cast(expression.get()); if (function_ref != nullptr) { if (function_ref->function_name == "path_length") { column_ref = dynamic_cast( @@ -995,6 +1134,9 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, continue; } + // TODO(hjiang): For star expression, only select columns in vertex or edge + // table, but not those unspecified in property graph. + // Issue reference: https://github.com/cwida/duckpgq-extension/issues/192 final_column_list.push_back(std::move(expression)); } diff --git a/src/include/duckpgq/core/functions/table/match.hpp b/src/include/duckpgq/core/functions/table/match.hpp index 0eda8f8c..9d83b5c2 100644 --- a/src/include/duckpgq/core/functions/table/match.hpp +++ b/src/include/duckpgq/core/functions/table/match.hpp @@ -53,9 +53,20 @@ struct PGQMatchFunction : public TableFunction { vector vertex_keys, vector edge_keys, const string &vertex_alias, const string &edge_alias); + // Populate all vertex and edge tables and their alias into + // [alias_to_vertex_and_edge_tables], from paths information from + // [path_reference]. + static void PopulateGraphTableAliasMap( + const CreatePropertyGraphInfo &pg_table, + const unique_ptr &path_reference, + case_insensitive_map_t> + &alias_to_vertex_and_edge_tables); + static PathElement * GetPathElement(const unique_ptr &path_reference); + static SubPath *GetSubPath(const unique_ptr &path_reference); + static unique_ptr GetJoinRef(const shared_ptr &edge_table, const string &edge_binding, const string &prev_binding, @@ -157,6 +168,11 @@ struct PGQMatchFunction : public TableFunction { CreatePropertyGraphInfo &pg_table, unique_ptr &final_select_node, vector> &conditions); + + // Check whether columns to query are valid against the property graph, throws + // BinderException if error. + static void CheckColumnBinding(const CreatePropertyGraphInfo &pg_table, + const MatchExpression &ref); }; } // namespace core diff --git a/test/sql/create_pg/no_properties.test b/test/sql/create_pg/no_properties.test index 8d715aad..802fdd0a 100644 --- a/test/sql/create_pg/no_properties.test +++ b/test/sql/create_pg/no_properties.test @@ -35,3 +35,12 @@ EDGE TABLES ( DESTINATION KEY ( dst ) REFERENCES Student ( id ) LABEL Knows ) + +statement ok +-CREATE PROPERTY GRAPH g VERTEX TABLES (Student PROPERTIES (id)); + +# Query on unspecified columns in the graph throws error. +statement error +-SELECT * FROM GRAPH_TABLE (g MATCH (s:Student) COLUMNS (s.id, s.name)); +---- +Binder Error: Property s.name is never registered! diff --git a/test/sql/path_finding/complex_matching.test b/test/sql/path_finding/complex_matching.test index 4f7b30b0..0136a098 100644 --- a/test/sql/path_finding/complex_matching.test +++ b/test/sql/path_finding/complex_matching.test @@ -92,7 +92,7 @@ statement error ) tmp limit 10; ---- -Binder Error: Referenced column "o" not found in FROM clause! +Binder Error: Property o is never registered! # https://github.com/cwida/duckpgq-extension/issues/68 diff --git a/test/sql/path_finding/shortest_path.test b/test/sql/path_finding/shortest_path.test index c265e2ae..fc7304dd 100644 --- a/test/sql/path_finding/shortest_path.test +++ b/test/sql/path_finding/shortest_path.test @@ -90,7 +90,7 @@ statement error COLUMNS (p, a.name as name, b.name as b_name) ) study; ---- -Binder Error: Referenced column "p" not found in FROM clause! +Binder Error: Property p is never registered query III From d4c0e40d5a81dcd3a61891cd69a2386ff60b94bb Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 19 Jan 2025 09:20:50 +0000 Subject: [PATCH 2/3] Add TODO to handle not only column ref expression --- src/core/functions/table/match.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/core/functions/table/match.cpp b/src/core/functions/table/match.cpp index 9f621c3e..627b6d68 100644 --- a/src/core/functions/table/match.cpp +++ b/src/core/functions/table/match.cpp @@ -999,9 +999,8 @@ void PGQMatchFunction::PopulateGraphTableAliasMap( } } -/*static*/ void -PGQMatchFunction::CheckColumnBinding(const CreatePropertyGraphInfo &pg_table, - const MatchExpression &ref) { +void PGQMatchFunction::CheckColumnBinding( + const CreatePropertyGraphInfo &pg_table, const MatchExpression &ref) { // Maps from table alias to table, including vertex and edge tables. case_insensitive_map_t> alias_to_vertex_and_edge_tables; @@ -1018,6 +1017,11 @@ PGQMatchFunction::CheckColumnBinding(const CreatePropertyGraphInfo &pg_table, GetFullyQualifiedColFromPg(pg_table, alias_to_vertex_and_edge_tables); for (auto &expression : ref.column_list) { + // TODO(hjiang): `ColumnRefExpression` alone is not enough, we could have + // more complicated expression. + // + // See issue for reference: + // https://github.com/cwida/duckpgq-extension/issues/198 auto *column_ref = dynamic_cast(expression.get()); if (column_ref == nullptr) { continue; From 2b102335fc0dc85895a3f29e37011d2b533c842b Mon Sep 17 00:00:00 2001 From: dentiny Date: Fri, 24 Jan 2025 11:39:35 +0000 Subject: [PATCH 3/3] Add more failure sql tests with catalog and schema prefix --- test/sql/create_pg/attach_pg.test | 18 ++++++++++++++++++ test/sql/create_pg/no_properties.test | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/test/sql/create_pg/attach_pg.test b/test/sql/create_pg/attach_pg.test index f1ed4902..d4d4ef21 100644 --- a/test/sql/create_pg/attach_pg.test +++ b/test/sql/create_pg/attach_pg.test @@ -151,4 +151,22 @@ from pagerank(bluesky, bluesky.account, follows) limit 10; ---- Invalid Error: Label 'bluesky.account' not found. Did you mean the vertex label 'account'? +statement ok +-CREATE PROPERTY GRAPH pg VERTEX TABLES (bluesky.account PROPERTIES (displayName)); +# Query on unspecified columns in the graph throws error. +statement error +-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (acc.displayName, acc.handle)); +---- +Binder Error: Property acc.handle is never registered! + +# Columns to query is only allowed to be or
., which we cannot prefix catalog or schema. +statement error +-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (bluesky.main.acc.displayName)); +---- +Binder Error: Property bluesky.main.acc.displayName is never registered! + +statement error +-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (main.acc.displayName)); +---- +Binder Error: Property main.acc.displayName is never registered! diff --git a/test/sql/create_pg/no_properties.test b/test/sql/create_pg/no_properties.test index 802fdd0a..28b531c5 100644 --- a/test/sql/create_pg/no_properties.test +++ b/test/sql/create_pg/no_properties.test @@ -44,3 +44,9 @@ statement error -SELECT * FROM GRAPH_TABLE (g MATCH (s:Student) COLUMNS (s.id, s.name)); ---- Binder Error: Property s.name is never registered! + +# Columns to query is only allowed to be or
., which we cannot prefix catalog or schema. +statement error +-SELECT * FROM GRAPH_TABLE (g MATCH (s:Student) COLUMNS (main.s.id)); +---- +Binder Error: Property main.s.id is never registered!