From 1fc901aa8c34fd7840ee48827b40d75954a1761e Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 2 Sep 2024 14:28:30 +0200 Subject: [PATCH 01/17] Add new functions --- src/core/functions/scalar/csr_creation.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/core/functions/scalar/csr_creation.cpp b/src/core/functions/scalar/csr_creation.cpp index a5201b23..b67d84d8 100644 --- a/src/core/functions/scalar/csr_creation.cpp +++ b/src/core/functions/scalar/csr_creation.cpp @@ -196,16 +196,26 @@ ScalarFunctionSet GetCSRVertexFunction() { ScalarFunctionSet GetCSREdgeFunction() { ScalarFunctionSet set("create_csr_edge"); + /* 1. CSR ID + * 2. Vertex size + * 3. Sum of the edges (assuming all unique vertices) + * 4. Edge size (to ensure all vertices are unique this should equal point 3) + * 4. source rowid + * 5. destination rowid + * 6. edge rowid + * 7. edge weight (INT OR DOUBLE) + */ + //! No edge weight set.AddFunction(ScalarFunction({LogicalType::INTEGER, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::INTEGER, CreateCsrEdgeFunction, CSRFunctionData::CSREdgeBind)); //! Integer for edge weight set.AddFunction(ScalarFunction({LogicalType::INTEGER, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::INTEGER, CreateCsrEdgeFunction, @@ -213,7 +223,7 @@ ScalarFunctionSet GetCSREdgeFunction() { //! Double for edge weight set.AddFunction(ScalarFunction({LogicalType::INTEGER, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::DOUBLE}, LogicalType::INTEGER, CreateCsrEdgeFunction, From 3ac69889d01bd2d3b05e3612d2b04b445188c418 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 2 Sep 2024 14:44:52 +0200 Subject: [PATCH 02/17] Add test case and adapt udfs --- src/core/functions/scalar/csr_creation.cpp | 12 +++-- .../sql/path_finding/non-unique-vertices.test | 50 +++++++++++++++++++ 2 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 test/sql/path_finding/non-unique-vertices.test diff --git a/src/core/functions/scalar/csr_creation.cpp b/src/core/functions/scalar/csr_creation.cpp index b67d84d8..e3afee58 100644 --- a/src/core/functions/scalar/csr_creation.cpp +++ b/src/core/functions/scalar/csr_creation.cpp @@ -136,6 +136,10 @@ static void CreateCsrEdgeFunction(DataChunk &args, ExpressionState &state, int64_t vertex_size = args.data[1].GetValue(0).GetValue(); int64_t edge_size = args.data[2].GetValue(0).GetValue(); + int64_t edge_size_count = args.data[3].GetValue(0).GetValue(); + if (edge_size != edge_size_count) { + throw ConstraintException("Non-unique vertices detected. Make sure all vertices are unique for path-finding queries."); + } auto csr_entry = duckpgq_state->csr_list.find(info.id); if (!csr_entry->second->initialized_e) { @@ -143,7 +147,7 @@ static void CreateCsrEdgeFunction(DataChunk &args, ExpressionState &state, } if (info.weight_type == LogicalType::SQLNULL) { TernaryExecutor::Execute( - args.data[3], args.data[4], args.data[5], result, args.size(), + args.data[4], args.data[5], args.data[6], result, args.size(), [&](int64_t src, int64_t dst, int64_t edge_id) { auto pos = ++csr_entry->second->v[src + 1]; csr_entry->second->e[(int64_t)pos - 1] = dst; @@ -152,13 +156,13 @@ static void CreateCsrEdgeFunction(DataChunk &args, ExpressionState &state, }); return; } - auto weight_type = args.data[6].GetType().InternalType(); + auto weight_type = args.data[7].GetType().InternalType(); if (!csr_entry->second->initialized_w) { CsrInitializeWeight(*duckpgq_state, info.id, edge_size, weight_type); } if (weight_type == PhysicalType::INT64) { QuaternaryExecutor::Execute( - args.data[3], args.data[4], args.data[5], args.data[6], result, + args.data[4], args.data[5], args.data[6], args.data[7], result, args.size(), [&](int64_t src, int64_t dst, int64_t edge_id, int64_t weight) { auto pos = ++csr_entry->second->v[src + 1]; @@ -171,7 +175,7 @@ static void CreateCsrEdgeFunction(DataChunk &args, ExpressionState &state, } QuaternaryExecutor::Execute( - args.data[3], args.data[4], args.data[5], args.data[6], result, + args.data[4], args.data[5], args.data[6], args.data[7], result, args.size(), [&](int64_t src, int64_t dst, int64_t edge_id, double_t weight) { auto pos = ++csr_entry->second->v[src + 1]; diff --git a/test/sql/path_finding/non-unique-vertices.test b/test/sql/path_finding/non-unique-vertices.test new file mode 100644 index 00000000..d54f4ec4 --- /dev/null +++ b/test/sql/path_finding/non-unique-vertices.test @@ -0,0 +1,50 @@ + + +require duckpgq + +statement ok +CREATE TABLE v (x VARCHAR);INSERT INTO v VALUES ('a'), ('b'), ('b'); + +statement ok +CREATE TABLE e (x1 VARCHAR, x2 VARCHAR);INSERT INTO e VALUES ('a', 'b'); + +statement ok +-CREATE PROPERTY GRAPH g +VERTEX TABLES ( + v +) +EDGE TABLES ( + e + SOURCE KEY (x1) REFERENCES v (x) + DESTINATION KEY (x2) REFERENCES v (x) +); + +# v-[e]->(v) has no error: +# Output has duplicate `x` records with the value `b` returned as expected. They can be distinguished by rowid in vertices() +statement ok +-FROM GRAPH_TABLE(g + MATCH p =(v1:v)-[e:e]->(v2:v) + COLUMNS (vertices(p), v2.x) +); + +# ANY SHORTEST v-[e]->(v) has no error: +# Output again has duplicate `x` records are returned as expected +statement ok +-FROM GRAPH_TABLE(g + MATCH p = ANY SHORTEST (v1:v)-[e:e]->(v2:v) + COLUMNS (path_length(p), vertices(p), v2.x) +); + +# ANY SHORTEST v-[e]-> +(v) fails with "INTERNAL Error: Attempted to access index 1 within vector of size 1" +statement ok +-FROM GRAPH_TABLE(g + MATCH p = ANY SHORTEST (v1:v)-[e:e]-> +(v2:v) + COLUMNS (path_length(p), vertices(p), v2.x) +); + +# ANY SHORTEST v-[e]->{1,2}(v) also fails with "INTERNAL Error: Attempted to access index 1 within vector of size 1" +statement ok +-FROM GRAPH_TABLE(g + MATCH p = ANY SHORTEST (v1:v)-[e:e]->{1,2}(v2:v) + COLUMNS (path_length(p), vertices(p), v2.x) +); \ No newline at end of file From b889d10cf55e72e4be8f5757bb808675a409aceb Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 09:21:57 +0200 Subject: [PATCH 03/17] Make GetCountTable more generic --- src/core/functions/table/match.cpp | 29 ++----------------- src/core/utils/compressed_sparse_row.cpp | 14 ++++----- .../core/utils/compressed_sparse_row.hpp | 2 +- 3 files changed, 10 insertions(+), 35 deletions(-) diff --git a/src/core/functions/table/match.cpp b/src/core/functions/table/match.cpp index 6506a383..d9830eb9 100644 --- a/src/core/functions/table/match.cpp +++ b/src/core/functions/table/match.cpp @@ -145,31 +145,6 @@ PathElement *PGQMatchFunction::GetPathElement( throw InternalException("Unknown path reference type detected"); } -unique_ptr PGQMatchFunction::GetCountTable( - const shared_ptr &edge_table, - const string &prev_binding) { - // SELECT count(s.id) FROM src s - auto select_count = make_uniq(); - auto select_inner = make_uniq(); - auto ref = make_uniq(); - - ref->table_name = edge_table->source_reference; - ref->alias = prev_binding; - select_inner->from_table = std::move(ref); - vector> children; - children.push_back( - make_uniq(edge_table->source_pk[0], prev_binding)); - - auto count_function = - make_uniq("count", std::move(children)); - select_inner->select_list.push_back(std::move(count_function)); - select_count->node = std::move(select_inner); - auto result = make_uniq(); - result->subquery = std::move(select_count); - result->subquery_type = SubqueryType::SCALAR; - return result; -} - unique_ptr PGQMatchFunction::CreateCountCTESubquery() { //! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x @@ -388,7 +363,7 @@ unique_ptr PGQMatchFunction::GenerateShortestPathCTE( vector> pathfinding_children; pathfinding_children.push_back(std::move(csr_id)); pathfinding_children.push_back(std::move(GetCountTable( - edge_table, previous_vertex_element->variable_binding))); + edge_table->source_reference, previous_vertex_element->variable_binding, edge_table->source_pk[0]))); pathfinding_children.push_back(std::move(src_row_id)); pathfinding_children.push_back(std::move(dst_row_id)); @@ -628,7 +603,7 @@ unique_ptr PGQMatchFunction::AddPathQuantifierCondition( vector> pathfinding_children; pathfinding_children.push_back(std::move(csr_id)); pathfinding_children.push_back( - std::move(GetCountTable(edge_table, prev_binding))); + std::move(GetCountTable(edge_table->source_reference, prev_binding, edge_table->source_pk[0]))); pathfinding_children.push_back(std::move(src_row_id)); pathfinding_children.push_back(std::move(dst_row_id)); diff --git a/src/core/utils/compressed_sparse_row.cpp b/src/core/utils/compressed_sparse_row.cpp index c945afae..425df7e7 100644 --- a/src/core/utils/compressed_sparse_row.cpp +++ b/src/core/utils/compressed_sparse_row.cpp @@ -141,17 +141,17 @@ void SetupSelectNode(unique_ptr &select_node, const shared_ptr GetCountTable(const shared_ptr &edge_table, const string &prev_binding) { +unique_ptr GetCountTable(const string &table_name, const string &table_alias, const string &primary_key) { auto select_count = make_uniq(); auto select_inner = make_uniq(); auto ref = make_uniq(); - ref->table_name = edge_table->source_reference; - ref->alias = prev_binding; + ref->table_name = table_name; + ref->alias = table_alias; select_inner->from_table = std::move(ref); vector> children; - children.push_back(make_uniq(edge_table->source_pk[0], prev_binding)); + children.push_back(make_uniq(primary_key, table_alias)); auto count_function = make_uniq("count", std::move(children)); select_inner->select_list.push_back(std::move(count_function)); @@ -206,7 +206,7 @@ GetJoinRef(const shared_ptr &edge_table, } unique_ptr CreateDirectedCSRVertexSubquery(const shared_ptr &edge_table, const std::string &prev_binding) { - auto count_create_vertex_expr = GetCountTable(edge_table, prev_binding); + auto count_create_vertex_expr = GetCountTable(edge_table->source_reference, prev_binding, edge_table->source_pk[0]); vector> csr_vertex_children; csr_vertex_children.push_back(make_uniq(Value::INTEGER(0))); @@ -274,7 +274,7 @@ unique_ptr CreateDirectedCSRVertexSubquery(const shared_ptr< // Helper function to create CSR Vertex Subquery unique_ptr CreateUndirectedCSRVertexSubquery(const shared_ptr &edge_table, const std::string &binding) { - auto count_create_vertex_expr = GetCountTable(edge_table, binding); + auto count_create_vertex_expr = GetCountTable(edge_table->source_reference, binding, edge_table->source_pk[0]); vector> csr_vertex_children; csr_vertex_children.push_back(make_uniq(Value::INTEGER(0))); @@ -512,7 +512,7 @@ unique_ptr CreateUndirectedCSRCTE(const shared_ptr CreateDirectedCSRCTE(const shared_ptr &edge_table, const std::string &prev_binding, const std::string &edge_binding, const std::string &next_binding) { auto csr_edge_id_constant = make_uniq(Value::INTEGER(0)); - auto count_create_edge_select = GetCountTable(edge_table, prev_binding); + auto count_create_edge_select = GetCountTable(edge_table->source_reference, prev_binding, edge_table->source_pk[0]); auto cast_subquery_expr = CreateDirectedCSRVertexSubquery(edge_table, prev_binding); diff --git a/src/include/duckpgq/core/utils/compressed_sparse_row.hpp b/src/include/duckpgq/core/utils/compressed_sparse_row.hpp index b8be8100..a79812d5 100644 --- a/src/include/duckpgq/core/utils/compressed_sparse_row.hpp +++ b/src/include/duckpgq/core/utils/compressed_sparse_row.hpp @@ -77,7 +77,7 @@ unique_ptr CreateUndirectedCSRVertexSubquery(const shared_pt unique_ptr CreateOuterSelectEdgesNode(); unique_ptr CreateOuterSelectNode(unique_ptr create_csr_edge_function); unique_ptr GetJoinRef(const shared_ptr &edge_table,const string &edge_binding, const string &prev_binding, const string &next_binding); -unique_ptr GetCountTable(const shared_ptr &edge_table, const string &prev_binding); +unique_ptr GetCountTable(const string &table_name, const string &table_alias, const string &primary_key); unique_ptr CreateColumnRef(const std::string &column_name, const std::string &table_name, const std::string &alias); void SetupSelectNode(unique_ptr &select_node, const shared_ptr &edge_table, bool reverse = false); unique_ptr CreateCountCTESubquery(); From 8981e08ddf03be6e9990b905f5a3179e3e293e1a Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 09:22:27 +0200 Subject: [PATCH 04/17] Move count table to compressed_sparse_row.hpp. Update with new argument for csr --- src/core/utils/compressed_sparse_row.cpp | 82 +++++++++++++++---- .../duckpgq/core/functions/table/match.hpp | 4 - .../core/utils/compressed_sparse_row.hpp | 2 +- 3 files changed, 67 insertions(+), 21 deletions(-) diff --git a/src/core/utils/compressed_sparse_row.cpp b/src/core/utils/compressed_sparse_row.cpp index 425df7e7..8e6327cc 100644 --- a/src/core/utils/compressed_sparse_row.cpp +++ b/src/core/utils/compressed_sparse_row.cpp @@ -5,6 +5,7 @@ #include "duckdb/parser/expression/comparison_expression.hpp" #include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" +#include "duckdb/parser/expression/star_expression.hpp" namespace duckpgq { @@ -75,14 +76,13 @@ CSRFunctionData::CSREdgeBind(ClientContext &context, throw InvalidInputException("Id must be constant."); } Value id = ExpressionExecutor::EvaluateScalar(context, *arguments[0]); - if (arguments.size() == 7) { + if (arguments.size() == 8) { return make_uniq(context, id.GetValue(), - arguments[6]->return_type); - } else { - auto logical_type = LogicalType::SQLNULL; - return make_uniq(context, id.GetValue(), - logical_type); + arguments[7]->return_type); } + auto logical_type = LogicalType::SQLNULL; + return make_uniq(context, id.GetValue(), + logical_type); } unique_ptr @@ -378,7 +378,7 @@ unique_ptr CreateOuterSelectNode(unique_ptr crea } // Function to create the CTE for the edges -unique_ptr MakeEdgesCTE(const shared_ptr &edge_pg_entry) { +unique_ptr MakeEdgesCTE(const shared_ptr &edge_table) { std::vector> select_expression; auto src_col_ref = make_uniq("rowid", "src_table"); src_col_ref->alias = "src"; @@ -389,7 +389,7 @@ unique_ptr MakeEdgesCTE(const shared_ptralias = "dst"; select_expression.emplace_back(std::move(dst_col_ref)); - auto edge_col_ref = make_uniq("rowid", edge_pg_entry->table_name); + auto edge_col_ref = make_uniq("rowid", edge_table->table_name); edge_col_ref->alias = "edges"; select_expression.emplace_back(std::move(edge_col_ref)); @@ -397,10 +397,10 @@ unique_ptr MakeEdgesCTE(const shared_ptrselect_list = std::move(select_expression); auto edge_table_ref = make_uniq(); - edge_table_ref->table_name = edge_pg_entry->table_name; + edge_table_ref->table_name = edge_table->table_name; auto src_table_ref = make_uniq(); - src_table_ref->table_name = edge_pg_entry->source_reference; + src_table_ref->table_name = edge_table->source_reference; src_table_ref->alias = "src_table"; auto join_ref = make_uniq(JoinRefType::REGULAR); @@ -410,12 +410,12 @@ unique_ptr MakeEdgesCTE(const shared_ptrleft = std::move(edge_table_ref); first_join_ref->right = std::move(src_table_ref); - auto edge_from_ref = make_uniq(edge_pg_entry->source_fk[0], edge_pg_entry->table_name); - auto src_cid_ref = make_uniq(edge_pg_entry->source_pk[0], "src_table"); + auto edge_from_ref = make_uniq(edge_table->source_fk[0], edge_table->table_name); + auto src_cid_ref = make_uniq(edge_table->source_pk[0], "src_table"); first_join_ref->condition = make_uniq(ExpressionType::COMPARE_EQUAL, std::move(edge_from_ref), std::move(src_cid_ref)); auto dst_table_ref = make_uniq(); - dst_table_ref->table_name = edge_pg_entry->destination_reference; + dst_table_ref->table_name = edge_table->destination_reference; dst_table_ref->alias = "dst_table"; auto second_join_ref = make_uniq(JoinRefType::REGULAR); @@ -423,8 +423,8 @@ unique_ptr MakeEdgesCTE(const shared_ptrleft = std::move(first_join_ref); second_join_ref->right = std::move(dst_table_ref); - auto edge_to_ref = make_uniq(edge_pg_entry->destination_fk[0], edge_pg_entry->table_name); - auto dst_cid_ref = make_uniq(edge_pg_entry->destination_pk[0], "dst_table"); + auto edge_to_ref = make_uniq(edge_table->destination_fk[0], edge_table->table_name); + auto dst_cid_ref = make_uniq(edge_table->destination_pk[0], "dst_table"); second_join_ref->condition = make_uniq(ExpressionType::COMPARE_EQUAL, std::move(edge_to_ref), std::move(dst_cid_ref)); select_node->from_table = std::move(second_join_ref); @@ -434,6 +434,7 @@ unique_ptr MakeEdgesCTE(const shared_ptr(); result->query = std::move(select_statement); + std::cout << result->query->ToString(); return result; } @@ -446,7 +447,20 @@ unique_ptr CreateUndirectedCSRCTE(const shared_ptr(Value::INTEGER(0)); - auto count_create_edge_select = GetCountTable(edge_table, edge_table->source_reference); + auto count_create_edge_select = GetCountTable(edge_table->source_reference, edge_table->source_reference, edge_table->source_pk[0]); + + auto count_edges_select_statement = make_uniq(); + auto count_edges_select_node = make_uniq(); + vector> count_children; + count_edges_select_node->select_list.emplace_back(make_uniq(INVALID_CATALOG, INVALID_SCHEMA, "count", std::move(count_children))); + auto edges_tableref = make_uniq(); + edges_tableref->table_name = "edges_cte"; + count_edges_select_node->from_table = std::move(edges_tableref); + count_edges_select_statement->node = std::move(count_edges_select_node); + auto count_edges_subquery = make_uniq(); + count_edges_subquery->subquery = std::move(count_edges_select_statement); + count_edges_subquery->subquery_type = SubqueryType::SCALAR; + auto cast_subquery_expr = CreateUndirectedCSRVertexSubquery(edge_table, edge_table->source_reference); @@ -460,6 +474,7 @@ unique_ptr CreateUndirectedCSRCTE(const shared_ptr CreateUndirectedCSRCTE(const shared_ptr GetCountEdgeTable(const shared_ptr &edge_table) { + auto result = make_uniq(); + auto outer_select_statement = make_uniq(); + auto outer_select_node = make_uniq(); + vector> count_children; + outer_select_node->select_list.push_back(make_uniq(INVALID_CATALOG, INVALID_SCHEMA, "count", std::move(count_children))); + auto inner_select_node = make_uniq(); + auto edge_tableref = make_uniq(); + edge_tableref->table_name = edge_table->table_name; + + auto src_tableref = make_uniq(); + src_tableref->table_name = edge_table->source_reference; + src_tableref->alias = "src"; + auto dst_tableref = make_uniq(); + dst_tableref->table_name = edge_table->destination_reference; + dst_tableref->alias = "dst"; + auto first_join = make_uniq(JoinRefType::REGULAR); + first_join->type = JoinType::INNER; + first_join->left = std::move(edge_tableref); + first_join->right = std::move(src_tableref); + first_join->condition = make_uniq(ExpressionType::COMPARE_EQUAL, make_uniq(edge_table->source_fk[0], edge_table->table_name), make_uniq(edge_table->source_pk[0], "src")); + auto second_join = make_uniq(JoinRefType::REGULAR); + second_join->type = JoinType::INNER; + second_join->left = std::move(first_join); + second_join->right = std::move(dst_tableref); + second_join->condition = make_uniq(ExpressionType::COMPARE_EQUAL, make_uniq(edge_table->destination_fk[0], edge_table->table_name), make_uniq(edge_table->destination_pk[0], "dst")); + outer_select_node->from_table = std::move(second_join); + outer_select_statement->node = std::move(outer_select_node); + result->subquery = std::move(outer_select_statement); + result->subquery_type = SubqueryType::SCALAR; + return result; +} + // Function to create the CTE for the Directed CSR unique_ptr CreateDirectedCSRCTE(const shared_ptr &edge_table, const std::string &prev_binding, const std::string &edge_binding, const std::string &next_binding) { @@ -515,6 +563,7 @@ unique_ptr CreateDirectedCSRCTE(const shared_ptrsource_reference, prev_binding, edge_table->source_pk[0]); auto cast_subquery_expr = CreateDirectedCSRVertexSubquery(edge_table, prev_binding); + auto count_edge_table = GetCountEdgeTable(edge_table); // Count the number of edges auto src_rowid_colref = make_uniq("rowid", prev_binding); auto dst_rowid_colref = make_uniq("rowid", next_binding); @@ -526,6 +575,7 @@ unique_ptr CreateDirectedCSRCTE(const shared_ptr &path_reference); - static unique_ptr - GetCountTable(const shared_ptr &edge_table, - const string &prev_binding); - static unique_ptr GetJoinRef(const shared_ptr &edge_table, const string &edge_binding, const string &prev_binding, diff --git a/src/include/duckpgq/core/utils/compressed_sparse_row.hpp b/src/include/duckpgq/core/utils/compressed_sparse_row.hpp index a79812d5..6a9cf26b 100644 --- a/src/include/duckpgq/core/utils/compressed_sparse_row.hpp +++ b/src/include/duckpgq/core/utils/compressed_sparse_row.hpp @@ -71,7 +71,7 @@ unique_ptr CreateUndirectedCSRCTE(const shared_ptr CreateDirectedCSRCTE(const shared_ptr &edge_table, const string &prev_binding, const string &edge_binding, const string &next_binding); // Helper functions -unique_ptr MakeEdgesCTE(const shared_ptr &edge_pg_entry); +unique_ptr MakeEdgesCTE(const shared_ptr &edge_table); unique_ptr CreateDirectedCSRVertexSubquery(const shared_ptr &edge_table, const std::string &binding); unique_ptr CreateUndirectedCSRVertexSubquery(const shared_ptr &edge_table, const std::string &binding); unique_ptr CreateOuterSelectEdgesNode(); From 0e75f47f6ecfb0c1120d6b8e3afdb600cbd6f9f8 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 09:22:40 +0200 Subject: [PATCH 05/17] Update tests to align with new argument --- test/sql/get_csr_ptr.test | 1 + test/sql/path_finding/complex_matching.test | 1 + test/sql/path_finding/non-unique-vertices.test | 10 +++++++--- test/sql/path_finding/path-finding-cte.test | 1 + test/sql/path_finding/shortest_path.test | 1 + test/sql/scalar/delete_csr.test | 2 ++ test/sql/scalar/get_csr_w_type.test | 3 +++ test/sql/scalar/getpgschema.test | 2 ++ 8 files changed, 18 insertions(+), 3 deletions(-) diff --git a/test/sql/get_csr_ptr.test b/test/sql/get_csr_ptr.test index 966f7eac..14761c2b 100644 --- a/test/sql/get_csr_ptr.test +++ b/test/sql/get_csr_ptr.test @@ -48,6 +48,7 @@ SELECT CREATE_CSR_EDGE( GROUP BY a.rowid) sub ) AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), a.rowid, c.rowid, k.rowid) as temp diff --git a/test/sql/path_finding/complex_matching.test b/test/sql/path_finding/complex_matching.test index 4fdbe8f7..a91bddbb 100644 --- a/test/sql/path_finding/complex_matching.test +++ b/test/sql/path_finding/complex_matching.test @@ -143,6 +143,7 @@ WITH CTE1 AS (SELECT CREATE_CSR_EDGE( GROUP BY a.rowid) sub ) AS BIGINT), + (select count(*) from Person_knows_Person k JOIN Person a on a.id = k.person1id JOIN Person b on b.id = k.person2id), a.rowid, b.rowid, k.rowid) as temp diff --git a/test/sql/path_finding/non-unique-vertices.test b/test/sql/path_finding/non-unique-vertices.test index d54f4ec4..72635d0f 100644 --- a/test/sql/path_finding/non-unique-vertices.test +++ b/test/sql/path_finding/non-unique-vertices.test @@ -36,15 +36,19 @@ statement ok ); # ANY SHORTEST v-[e]-> +(v) fails with "INTERNAL Error: Attempted to access index 1 within vector of size 1" -statement ok +statement error -FROM GRAPH_TABLE(g MATCH p = ANY SHORTEST (v1:v)-[e:e]-> +(v2:v) COLUMNS (path_length(p), vertices(p), v2.x) ); +---- +Constraint Error: Non-unique vertices detected. Make sure all vertices are unique for path-finding queries. # ANY SHORTEST v-[e]->{1,2}(v) also fails with "INTERNAL Error: Attempted to access index 1 within vector of size 1" -statement ok +statement error -FROM GRAPH_TABLE(g MATCH p = ANY SHORTEST (v1:v)-[e:e]->{1,2}(v2:v) COLUMNS (path_length(p), vertices(p), v2.x) -); \ No newline at end of file +); +---- +Constraint Error: Non-unique vertices detected. Make sure all vertices are unique for path-finding queries. diff --git a/test/sql/path_finding/path-finding-cte.test b/test/sql/path_finding/path-finding-cte.test index bb4f50b6..57133822 100644 --- a/test/sql/path_finding/path-finding-cte.test +++ b/test/sql/path_finding/path-finding-cte.test @@ -50,6 +50,7 @@ statement ok GROUP BY a.rowid) sub ) AS BIGINT), + (select count(*) from know k join student a on a.id = k.src join student c on c.id = k.dst), a.rowid, c.rowid, k.rowid) as temp diff --git a/test/sql/path_finding/shortest_path.test b/test/sql/path_finding/shortest_path.test index a515c96e..c265e2ae 100644 --- a/test/sql/path_finding/shortest_path.test +++ b/test/sql/path_finding/shortest_path.test @@ -112,6 +112,7 @@ WITH cte1 AS ( GROUP BY a.rowid) sub ) AS BIGINT), + (select count(*) from know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), a.rowid, c.rowid, k.rowid) as temp diff --git a/test/sql/scalar/delete_csr.test b/test/sql/scalar/delete_csr.test index f1ad4219..9648f50d 100644 --- a/test/sql/scalar/delete_csr.test +++ b/test/sql/scalar/delete_csr.test @@ -48,6 +48,7 @@ SELECT CREATE_CSR_EDGE( GROUP BY a.rowid) sub ) AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), a.rowid, c.rowid, k.rowid) as temp @@ -73,6 +74,7 @@ SELECT CREATE_CSR_EDGE( GROUP BY a.rowid) sub ) AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), a.rowid, c.rowid, k.rowid) as temp diff --git a/test/sql/scalar/get_csr_w_type.test b/test/sql/scalar/get_csr_w_type.test index fb55ce2e..9136e48f 100644 --- a/test/sql/scalar/get_csr_w_type.test +++ b/test/sql/scalar/get_csr_w_type.test @@ -73,6 +73,7 @@ SELECT CREATE_CSR_EDGE( GROUP BY a.rowid) sub ) AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), a.rowid, c.rowid, k.rowid) as temp @@ -103,6 +104,7 @@ SELECT CREATE_CSR_EDGE( GROUP BY a.rowid) sub ) AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), a.rowid, c.rowid, k.rowid, 12) as temp @@ -133,6 +135,7 @@ SELECT CREATE_CSR_EDGE( GROUP BY a.rowid) sub ) AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), a.rowid, c.rowid, k.rowid, 1.2) as temp diff --git a/test/sql/scalar/getpgschema.test b/test/sql/scalar/getpgschema.test index 74b81483..f0f681de 100644 --- a/test/sql/scalar/getpgschema.test +++ b/test/sql/scalar/getpgschema.test @@ -48,6 +48,7 @@ SELECT CREATE_CSR_EDGE( GROUP BY a.rowid) sub ) AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), a.rowid, c.rowid, k.rowid) as temp @@ -73,6 +74,7 @@ SELECT CREATE_CSR_EDGE( GROUP BY a.rowid) sub ) AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), a.rowid, c.rowid, k.rowid) as temp From 3d29da42e321f788d04b975ca04f22398ede6d93 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 10:41:20 +0200 Subject: [PATCH 06/17] Add helper functions to create basetableref and columnrefexpression --- src/core/utils/duckpgq_utils.cpp | 22 ++++++++++++++++++- .../duckpgq/core/utils/duckpgq_utils.hpp | 3 ++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/core/utils/duckpgq_utils.cpp b/src/core/utils/duckpgq_utils.cpp index 10beef41..af592cb9 100644 --- a/src/core/utils/duckpgq_utils.cpp +++ b/src/core/utils/duckpgq_utils.cpp @@ -7,7 +7,7 @@ #include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/parser/tableref/joinref.hpp" #include "duckdb/parser/tableref/basetableref.hpp" -#include "duckdb/parser/tableref/subqueryref.hpp" +#include "duckdb/parser/expression/columnref_expression.hpp" namespace duckpgq { @@ -90,6 +90,26 @@ unique_ptr CreateSelectNode(const shared_ptr &ed return select_node; } +unique_ptr CreateBaseTableRef(const string &table_name, const string &alias) { + auto base_table_ref = make_uniq(); + base_table_ref->table_name = table_name; + if (!alias.empty()) { + base_table_ref->alias = alias; + } + return base_table_ref; +} + +unique_ptr CreateColumnRefExpression(const string &column_name, const string &table_name, const string& alias) { + unique_ptr column_ref; + if (table_name.empty()) { + column_ref = make_uniq(column_name); + } else { + column_ref = make_uniq(column_name, table_name); + } + if (!alias.empty()) { + column_ref->alias = alias; + } + return column_ref; } // namespace core } // namespace duckpgq \ No newline at end of file diff --git a/src/include/duckpgq/core/utils/duckpgq_utils.hpp b/src/include/duckpgq/core/utils/duckpgq_utils.hpp index 0d0a3a5d..a328020c 100644 --- a/src/include/duckpgq/core/utils/duckpgq_utils.hpp +++ b/src/include/duckpgq/core/utils/duckpgq_utils.hpp @@ -14,7 +14,8 @@ std::string ToLowerCase(const std::string &input); CreatePropertyGraphInfo* GetPropertyGraphInfo(DuckPGQState *duckpgq_state, const std::string &pg_name); shared_ptr ValidateSourceNodeAndEdgeTable(CreatePropertyGraphInfo *pg_info, const std::string &node_table, const std::string &edge_table); unique_ptr CreateSelectNode(const shared_ptr &edge_pg_entry, const string& function_name, const string& function_alias); - +unique_ptr CreateBaseTableRef(const string &table_name, const string &alias = ""); +unique_ptr CreateColumnRefExpression(const string &column_name, const string &table_name = "", const string& alias = ""); } // namespace core } // namespace duckpgq From 2f76b8e3399347a5d8c5eb8aae93de250d5b284d Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 10:41:37 +0200 Subject: [PATCH 07/17] Use helper functions and remove old helper function --- src/core/utils/compressed_sparse_row.cpp | 87 ++++++++++--------- .../core/utils/compressed_sparse_row.hpp | 4 +- 2 files changed, 48 insertions(+), 43 deletions(-) diff --git a/src/core/utils/compressed_sparse_row.cpp b/src/core/utils/compressed_sparse_row.cpp index 8e6327cc..9d02b92b 100644 --- a/src/core/utils/compressed_sparse_row.cpp +++ b/src/core/utils/compressed_sparse_row.cpp @@ -1,11 +1,13 @@ #include "duckpgq/core/utils/compressed_sparse_row.hpp" #include "duckdb/common/string.hpp" #include "duckdb/execution/expression_executor.hpp" -#include "duckdb/parser/tableref/basetableref.hpp" #include "duckdb/parser/expression/comparison_expression.hpp" #include "duckdb/parser/expression/constant_expression.hpp" -#include "duckdb/parser/tableref/subqueryref.hpp" #include "duckdb/parser/expression/star_expression.hpp" +#include "duckdb/parser/tableref/basetableref.hpp" +#include "duckdb/parser/tableref/subqueryref.hpp" + +#include namespace duckpgq { @@ -97,11 +99,11 @@ CSRFunctionData::CSRBind(ClientContext &context, ScalarFunction &bound_function, } // Helper function to create a ColumnRefExpression with alias -unique_ptr CreateColumnRef(const std::string &column_name, const std::string &table_name, const std::string &alias) { - auto col_ref = make_uniq(column_name, table_name); - col_ref->alias = alias; - return col_ref; -} +// unique_ptr CreateColumnRef(const std::string &column_name, const std::string &table_name, const std::string &alias) { +// auto col_ref = make_uniq(column_name, table_name); +// col_ref->alias = alias; +// return col_ref; +// } // Helper function to create a JoinRef unique_ptr CreateJoin(const std::string &fk_column, const std::string &pk_column, const std::string &table_name, const std::string &source_reference) { @@ -126,15 +128,15 @@ unique_ptr CreateJoin(const std::string &fk_column, const std::string & void SetupSelectNode(unique_ptr &select_node, const shared_ptr &edge_table, bool reverse) { select_node = make_uniq(); - select_node->select_list.emplace_back(CreateColumnRef("rowid", edge_table->source_reference, "dense_id")); + select_node->select_list.emplace_back(CreateColumnRefExpression("rowid", edge_table->source_reference, "dense_id")); if (!reverse) { - select_node->select_list.emplace_back(CreateColumnRef(edge_table->source_fk[0], edge_table->table_name, "outgoing_edges")); - select_node->select_list.emplace_back(CreateColumnRef(edge_table->destination_fk[0], edge_table->table_name, "incoming_edges")); + select_node->select_list.emplace_back(CreateColumnRefExpression(edge_table->source_fk[0], edge_table->table_name, "outgoing_edges")); + select_node->select_list.emplace_back(CreateColumnRefExpression(edge_table->destination_fk[0], edge_table->table_name, "incoming_edges")); select_node->from_table = CreateJoin(edge_table->source_fk[0], edge_table->source_pk[0], edge_table->table_name, edge_table->source_reference); } else { - select_node->select_list.emplace_back(CreateColumnRef(edge_table->destination_fk[0], edge_table->table_name, "outgoing_edges")); - select_node->select_list.emplace_back(CreateColumnRef(edge_table->source_fk[0], edge_table->table_name, "incoming_edges")); + select_node->select_list.emplace_back(CreateColumnRefExpression(edge_table->destination_fk[0], edge_table->table_name, "outgoing_edges")); + select_node->select_list.emplace_back(CreateColumnRefExpression(edge_table->source_fk[0], edge_table->table_name, "incoming_edges")); select_node->from_table = CreateJoin(edge_table->destination_fk[0], edge_table->source_pk[0], edge_table->table_name, edge_table->source_reference); } } @@ -222,7 +224,7 @@ unique_ptr CreateDirectedCSRVertexSubquery(const shared_ptr< auto inner_select_statement = make_uniq(); auto inner_select_node = make_uniq(); - inner_select_node->select_list.emplace_back(CreateColumnRef("rowid", prev_binding, "dense_id")); + inner_select_node->select_list.emplace_back(CreateColumnRefExpression("rowid", prev_binding, "dense_id")); auto edge_src_colref = make_uniq(edge_table->source_fk[0], edge_table->table_name); vector> count_children; count_children.push_back(std::move(edge_src_colref)); @@ -449,17 +451,7 @@ unique_ptr CreateUndirectedCSRCTE(const shared_ptr(Value::INTEGER(0)); auto count_create_edge_select = GetCountTable(edge_table->source_reference, edge_table->source_reference, edge_table->source_pk[0]); - auto count_edges_select_statement = make_uniq(); - auto count_edges_select_node = make_uniq(); - vector> count_children; - count_edges_select_node->select_list.emplace_back(make_uniq(INVALID_CATALOG, INVALID_SCHEMA, "count", std::move(count_children))); - auto edges_tableref = make_uniq(); - edges_tableref->table_name = "edges_cte"; - count_edges_select_node->from_table = std::move(edges_tableref); - count_edges_select_statement->node = std::move(count_edges_select_node); - auto count_edges_subquery = make_uniq(); - count_edges_subquery->subquery = std::move(count_edges_select_statement); - count_edges_subquery->subquery_type = SubqueryType::SCALAR; + auto count_edges_subquery = GetCountUndirectedEdgeTable(); auto cast_subquery_expr = CreateUndirectedCSRVertexSubquery(edge_table, edge_table->source_reference); @@ -489,17 +481,14 @@ unique_ptr CreateUndirectedCSRCTE(const shared_ptrsetop_type = SetOperationType::UNION; auto src_dst_select_node = make_uniq(); - auto edges_cte_tableref = make_uniq(); - edges_cte_tableref->table_name = "edges_cte"; - src_dst_select_node->from_table = std::move(edges_cte_tableref); + + src_dst_select_node->from_table = std::move(CreateBaseTableRef("edges_cte")); src_dst_select_node->select_list.push_back(make_uniq("src")); src_dst_select_node->select_list.push_back(make_uniq("dst")); src_dst_select_node->select_list.push_back(make_uniq("edges")); auto dst_src_select_node = make_uniq(); - auto dst_edges_cte_tableref = make_uniq(); - dst_edges_cte_tableref->table_name = "edges_cte"; - dst_src_select_node->from_table = std::move(dst_edges_cte_tableref); + dst_src_select_node->from_table = std::move(CreateBaseTableRef("edges_cte")); dst_src_select_node->select_list.push_back(make_uniq("dst")); dst_src_select_node->select_list.push_back(make_uniq("src")); dst_src_select_node->select_list.push_back(make_uniq("edges")); @@ -523,6 +512,30 @@ unique_ptr CreateUndirectedCSRCTE(const shared_ptr GetCountUndirectedEdgeTable () { + auto count_edges_select_statement = make_uniq(); + auto count_edges_select_node = make_uniq(); + vector> count_children; + count_edges_select_node->select_list.emplace_back(make_uniq(INVALID_CATALOG, INVALID_SCHEMA, "count", std::move(count_children))); + + auto inner_from_subquery = make_uniq(); + auto inner_select_statement = make_uniq(); + auto inner_select_node = make_uniq(); + + auto src_dst_select_node = make_uniq(); + src_dst_select_node->select_list.emplace_back(CreateColumnRefExpression("src")); + src_dst_select_node->select_list.emplace_back(CreateColumnRefExpression("dst")); + + src_dst_select_node->from_table = std::move(CreateBaseTableRef("edges_cte")); + + auto dst_src_select_node = make_uniq(); + dst_src_select_node->select_list.emplace_back(CreateColumnRefExpression("dst", "", "src")); + dst_src_select_node->select_list.emplace_back(CreateColumnRefExpression("src", "", "dst")); + dst_src_select_node->from_table = CreateBaseTableRef("edges_cte"); + + +} + unique_ptr GetCountEdgeTable(const shared_ptr &edge_table) { auto result = make_uniq(); auto outer_select_statement = make_uniq(); @@ -530,24 +543,16 @@ unique_ptr GetCountEdgeTable(const shared_ptr> count_children; outer_select_node->select_list.push_back(make_uniq(INVALID_CATALOG, INVALID_SCHEMA, "count", std::move(count_children))); auto inner_select_node = make_uniq(); - auto edge_tableref = make_uniq(); - edge_tableref->table_name = edge_table->table_name; - auto src_tableref = make_uniq(); - src_tableref->table_name = edge_table->source_reference; - src_tableref->alias = "src"; - auto dst_tableref = make_uniq(); - dst_tableref->table_name = edge_table->destination_reference; - dst_tableref->alias = "dst"; auto first_join = make_uniq(JoinRefType::REGULAR); first_join->type = JoinType::INNER; - first_join->left = std::move(edge_tableref); - first_join->right = std::move(src_tableref); + first_join->left = CreateBaseTableRef(edge_table->table_name); + first_join->right = CreateBaseTableRef(edge_table->source_reference, "src"); first_join->condition = make_uniq(ExpressionType::COMPARE_EQUAL, make_uniq(edge_table->source_fk[0], edge_table->table_name), make_uniq(edge_table->source_pk[0], "src")); auto second_join = make_uniq(JoinRefType::REGULAR); second_join->type = JoinType::INNER; second_join->left = std::move(first_join); - second_join->right = std::move(dst_tableref); + second_join->right = CreateBaseTableRef(edge_table->destination_reference, "dst"); second_join->condition = make_uniq(ExpressionType::COMPARE_EQUAL, make_uniq(edge_table->destination_fk[0], edge_table->table_name), make_uniq(edge_table->destination_pk[0], "dst")); outer_select_node->from_table = std::move(second_join); outer_select_statement->node = std::move(outer_select_node); diff --git a/src/include/duckpgq/core/utils/compressed_sparse_row.hpp b/src/include/duckpgq/core/utils/compressed_sparse_row.hpp index 6a9cf26b..72953a17 100644 --- a/src/include/duckpgq/core/utils/compressed_sparse_row.hpp +++ b/src/include/duckpgq/core/utils/compressed_sparse_row.hpp @@ -78,10 +78,10 @@ unique_ptr CreateOuterSelectEdgesNode(); unique_ptr CreateOuterSelectNode(unique_ptr create_csr_edge_function); unique_ptr GetJoinRef(const shared_ptr &edge_table,const string &edge_binding, const string &prev_binding, const string &next_binding); unique_ptr GetCountTable(const string &table_name, const string &table_alias, const string &primary_key); -unique_ptr CreateColumnRef(const std::string &column_name, const std::string &table_name, const std::string &alias); void SetupSelectNode(unique_ptr &select_node, const shared_ptr &edge_table, bool reverse = false); unique_ptr CreateCountCTESubquery(); - +unique_ptr GetCountUndirectedEdgeTable (); +unique_ptr GetCountEdgeTable(const shared_ptr &edge_table); } // namespace core } // namespace duckpgq From c4043c6e03d06dfb7925502a2163d83375b32e9c Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 11:12:59 +0200 Subject: [PATCH 08/17] Update csr w type test --- test/sql/scalar/get_csr_w_type.test | 1 + 1 file changed, 1 insertion(+) diff --git a/test/sql/scalar/get_csr_w_type.test b/test/sql/scalar/get_csr_w_type.test index 9136e48f..7cf60f87 100644 --- a/test/sql/scalar/get_csr_w_type.test +++ b/test/sql/scalar/get_csr_w_type.test @@ -48,6 +48,7 @@ SELECT CREATE_CSR_EDGE( GROUP BY a.rowid) sub ) AS BIGINT), + (select count() FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst), a.rowid, c.rowid, k.rowid) as temp From 0b445b11e952e30ce25329798c9972b1c164452c Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 11:13:09 +0200 Subject: [PATCH 09/17] Fix missing bracket --- src/core/utils/duckpgq_utils.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/utils/duckpgq_utils.cpp b/src/core/utils/duckpgq_utils.cpp index af592cb9..0e7e31b0 100644 --- a/src/core/utils/duckpgq_utils.cpp +++ b/src/core/utils/duckpgq_utils.cpp @@ -8,6 +8,7 @@ #include "duckdb/parser/tableref/joinref.hpp" #include "duckdb/parser/tableref/basetableref.hpp" #include "duckdb/parser/expression/columnref_expression.hpp" +#include "duckdb/parser/tableref/subqueryref.hpp" namespace duckpgq { @@ -110,6 +111,6 @@ unique_ptr CreateColumnRefExpression(const string &column_n column_ref->alias = alias; } return column_ref; - +} } // namespace core } // namespace duckpgq \ No newline at end of file From 6c09b8b618c3f8990ffe726dc666f6be5c220b43 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 11:13:23 +0200 Subject: [PATCH 10/17] Add multiply function to get correct edge count --- src/core/utils/compressed_sparse_row.cpp | 28 ++++++++++++++----- .../core/utils/compressed_sparse_row.hpp | 2 +- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/core/utils/compressed_sparse_row.cpp b/src/core/utils/compressed_sparse_row.cpp index 9d02b92b..63148057 100644 --- a/src/core/utils/compressed_sparse_row.cpp +++ b/src/core/utils/compressed_sparse_row.cpp @@ -453,7 +453,6 @@ unique_ptr CreateUndirectedCSRCTE(const shared_ptrsource_reference); auto src_rowid_colref = make_uniq("src"); @@ -506,21 +505,24 @@ unique_ptr CreateUndirectedCSRCTE(const shared_ptr(); outer_select_statement->node = std::move(outer_select_node); - auto info = make_uniq(); info->query = std::move(outer_select_statement); return info; } -unique_ptr GetCountUndirectedEdgeTable () { +unique_ptr GetCountUndirectedEdgeTable() { auto count_edges_select_statement = make_uniq(); auto count_edges_select_node = make_uniq(); vector> count_children; - count_edges_select_node->select_list.emplace_back(make_uniq(INVALID_CATALOG, INVALID_SCHEMA, "count", std::move(count_children))); + auto count_function = make_uniq(INVALID_CATALOG, INVALID_SCHEMA, "count", std::move(count_children)); + vector> multiply_children; + auto constant_two = make_uniq(Value::BIGINT(2)); + multiply_children.push_back(std::move(constant_two)); + multiply_children.push_back(std::move(count_function)); + auto multiply_function = make_uniq(INVALID_CATALOG, INVALID_SCHEMA, "multiply", std::move(multiply_children)); + count_edges_select_node->select_list.emplace_back(std::move(multiply_function)); - auto inner_from_subquery = make_uniq(); auto inner_select_statement = make_uniq(); - auto inner_select_node = make_uniq(); auto src_dst_select_node = make_uniq(); src_dst_select_node->select_list.emplace_back(CreateColumnRefExpression("src")); @@ -533,7 +535,19 @@ unique_ptr GetCountUndirectedEdgeTable () { dst_src_select_node->select_list.emplace_back(CreateColumnRefExpression("src", "", "dst")); dst_src_select_node->from_table = CreateBaseTableRef("edges_cte"); - + auto union_by_name_node = make_uniq(); + union_by_name_node->setop_all = false; + union_by_name_node->setop_type = SetOperationType::UNION_BY_NAME; + union_by_name_node->left = std::move(src_dst_select_node); + union_by_name_node->right = std::move(dst_src_select_node); + inner_select_statement->node = std::move(union_by_name_node); + auto inner_from_subquery = make_uniq(std::move(inner_select_statement)); + count_edges_select_node->from_table = std::move(inner_from_subquery); + count_edges_select_statement->node = std::move(count_edges_select_node); + auto result = make_uniq(); + result->subquery = std::move(count_edges_select_statement); + result->subquery_type = SubqueryType::SCALAR; + return result; } unique_ptr GetCountEdgeTable(const shared_ptr &edge_table) { diff --git a/src/include/duckpgq/core/utils/compressed_sparse_row.hpp b/src/include/duckpgq/core/utils/compressed_sparse_row.hpp index 72953a17..194372cc 100644 --- a/src/include/duckpgq/core/utils/compressed_sparse_row.hpp +++ b/src/include/duckpgq/core/utils/compressed_sparse_row.hpp @@ -80,7 +80,7 @@ unique_ptr GetJoinRef(const shared_ptr &edge_table, unique_ptr GetCountTable(const string &table_name, const string &table_alias, const string &primary_key); void SetupSelectNode(unique_ptr &select_node, const shared_ptr &edge_table, bool reverse = false); unique_ptr CreateCountCTESubquery(); -unique_ptr GetCountUndirectedEdgeTable (); +unique_ptr GetCountUndirectedEdgeTable(); unique_ptr GetCountEdgeTable(const shared_ptr &edge_table); } // namespace core From b150079f218c4b77e4858b2bd95d3bd6492942d9 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 11:13:42 +0200 Subject: [PATCH 11/17] Comment out tests with explicit queries --- .../scalar/local_clustering_coefficient.test | 676 +++++++++--------- 1 file changed, 338 insertions(+), 338 deletions(-) diff --git a/test/sql/scalar/local_clustering_coefficient.test b/test/sql/scalar/local_clustering_coefficient.test index 7642d4f7..27548ec5 100644 --- a/test/sql/scalar/local_clustering_coefficient.test +++ b/test/sql/scalar/local_clustering_coefficient.test @@ -70,55 +70,55 @@ EDGE TABLES ( - -query II -WITH cte1 AS ( - SELECT CREATE_CSR_EDGE( - 0, - (SELECT count(a.id) FROM Student a), - CAST ( - (SELECT sum(CREATE_CSR_VERTEX( - 0, - (SELECT count(a.id) FROM Student a), - sub.dense_id, - sub.cnt) - ) * 2 - FROM ( - SELECT dense_id, count(*) as cnt FROM ( - SELECT dense_id, outgoing_edge, incoming_edge - FROM ( - SELECT a.rowid AS dense_id, k.src AS outgoing_edge, k.dst AS incoming_edge - FROM Student a - JOIN Know k ON k.src = a.id - UNION ALL - SELECT a.rowid AS dense_id, k.dst AS outgoing_edge, k.src AS incoming_edge - FROM Student a - JOIN know k on k.dst = a.id) - GROUP BY dense_id, outgoing_edge, incoming_edge) - GROUP BY dense_id) sub - ) - AS BIGINT), - src, - dst, - edge) as temp FROM ( - select src, dst, any_value(edge) as edge FROM ( - select a.rowid as src, c.rowid as dst, k.rowid as edge FROM Know k - JOIN Student a on a.id = k.src - JOIN Student c on c.id = k.dst - UNION ALL - select a.rowid as src, c.rowid as dst, k.rowid as edge FROM Know k - JOIN Student a on a.id = k.dst - JOIN Student c on c.id = k.src) - GROUP BY src, dst) -) SELECT __x.temp + local_clustering_coefficient(0, a.rowid) as lcc, a.name - FROM (select count(cte1.temp) * 0 as temp from cte1) __x, Student a - ORDER BY lcc DESC; ----- -1.0 Daniel -1.0 Tavneet -1.0 Gabor -0.5 Peter -0.0 David +# +#query II +#WITH cte1 AS ( +# SELECT CREATE_CSR_EDGE( +# 0, +# (SELECT count(a.id) FROM Student a), +# CAST ( +# (SELECT sum(CREATE_CSR_VERTEX( +# 0, +# (SELECT count(a.id) FROM Student a), +# sub.dense_id, +# sub.cnt) +# ) * 2 +# FROM ( +# SELECT dense_id, count(*) as cnt FROM ( +# SELECT dense_id, outgoing_edge, incoming_edge +# FROM ( +# SELECT a.rowid AS dense_id, k.src AS outgoing_edge, k.dst AS incoming_edge +# FROM Student a +# JOIN Know k ON k.src = a.id +# UNION ALL +# SELECT a.rowid AS dense_id, k.dst AS outgoing_edge, k.src AS incoming_edge +# FROM Student a +# JOIN know k on k.dst = a.id) +# GROUP BY dense_id, outgoing_edge, incoming_edge) +# GROUP BY dense_id) sub +# ) +# AS BIGINT), +# src, +# dst, +# edge) as temp FROM ( +# select src, dst, any_value(edge) as edge FROM ( +# select a.rowid as src, c.rowid as dst, k.rowid as edge FROM Know k +# JOIN Student a on a.id = k.src +# JOIN Student c on c.id = k.dst +# UNION ALL +# select a.rowid as src, c.rowid as dst, k.rowid as edge FROM Know k +# JOIN Student a on a.id = k.dst +# JOIN Student c on c.id = k.src) +# GROUP BY src, dst) +#) SELECT __x.temp + local_clustering_coefficient(0, a.rowid) as lcc, a.name +# FROM (select count(cte1.temp) * 0 as temp from cte1) __x, Student a +# ORDER BY lcc DESC; +#---- +#1.0 Daniel +#1.0 Tavneet +#1.0 Gabor +#0.5 Peter +#0.0 David query II select id, local_clustering_coefficient from local_clustering_coefficient(pg, student, know); @@ -231,295 +231,295 @@ select id, local_clustering_coefficient from local_clustering_coefficient(snb, p 37383395344409 0.0 -query II -WITH cte1 AS ( - SELECT CREATE_CSR_EDGE( - 0, - (SELECT count(a.id) FROM Person a), - CAST ( - (SELECT sum(CREATE_CSR_VERTEX( - 0, - (SELECT count(a.id) FROM Person a), - sub.dense_id, - sub.cnt) - ) * 2 - FROM ( - SELECT dense_id, count(*) as cnt FROM ( - SELECT dense_id, outgoing_edge, incoming_edge - FROM ( - SELECT a.rowid AS dense_id, k.person1id AS outgoing_edge, k.person2id AS incoming_edge - FROM Person a - JOIN Person_knows_person k ON k.person1id = a.id - UNION ALL - SELECT a.rowid AS dense_id, k.person2id AS outgoing_edge, k.person1id AS incoming_edge - FROM Person a - JOIN Person_knows_person k on k.person2id = a.id) - GROUP BY dense_id, outgoing_edge, incoming_edge) - GROUP BY dense_id) sub - ) - AS BIGINT), - src, - dst, - edge) as temp FROM ( - select src, dst, any_value(edge) as edge FROM ( - select a.rowid as src, c.rowid as dst, k.rowid as edge FROM Person_knows_person k - JOIN Person a on a.id = k.person1id - JOIN Person c on c.id = k.person2id - UNION ALL - select a.rowid as src, c.rowid as dst, k.rowid as edge FROM Person_knows_person k - JOIN Person a on a.id = k.person2id - JOIN Person c on c.id = k.person1id) - GROUP BY src, dst) -) SELECT id, __x.temp + local_clustering_coefficient(0, a.rowid) as lcc - FROM (select count(cte1.temp) * 0 as temp from cte1) __x, Person a - ORDER BY id ASC; ----- -14 0.33333334 -16 0.5 -32 0.8333333 -2199023255557 0.6666667 -2199023255573 1.0 -2199023255594 0.1904762 -4398046511139 0.0 -6597069766702 0.0 -8796093022234 0.0 -8796093022237 0.6666667 -8796093022244 0.0 -8796093022249 0.4 -10995116277761 0.3 -10995116277782 0.23809524 -10995116277783 0.0 -10995116277808 0.0 -13194139533342 1.0 -13194139533352 0.31111112 -13194139533355 0.2 -15393162788877 0.0 -17592186044443 0.0 -17592186044461 1.0 -19791209299968 1.0 -19791209299987 0.0 -21990232555526 0.0 -21990232555527 0.0 -24189255811081 0.125 -24189255811109 1.0 -26388279066632 0.0 -26388279066641 0.8333333 -26388279066655 0.33333334 -26388279066658 0.21794872 -26388279066668 0.5 -28587302322180 0.16666667 -28587302322191 0.0 -28587302322196 0.8333333 -28587302322204 0.2857143 -28587302322209 0.0 -28587302322223 0.0 -30786325577731 0.0 -30786325577740 1.0 -32985348833291 0.0 -32985348833318 0.0 -32985348833329 0.0 -35184372088834 0.0 -35184372088850 0.6666667 -35184372088856 0.33333334 -35184372088871 0.0 -37383395344394 0.0 -37383395344409 0.0 - -query II -WITH edges_cte AS ( - SELECT a.rowid as src, c.rowid as dst, k.rowid as edges - FROM Person_knows_person k - JOIN Person a on a.id = k.person1id - JOIN Person c on c.id = k.person2id -), -cte1 AS ( - SELECT CREATE_CSR_EDGE( - 0, - (SELECT count(a.id) FROM Person a), - CAST ( - (SELECT sum(CREATE_CSR_VERTEX( - 0, - (SELECT count(a.id) FROM Person a), - sub.dense_id, - sub.cnt) - ) * 2 - FROM ( - SELECT dense_id, count(*) as cnt FROM ( - SELECT dense_id, outgoing_edge, incoming_edge - FROM ( - SELECT a.rowid AS dense_id, k.person1id AS outgoing_edge, k.person2id AS incoming_edge - FROM Person a - JOIN Person_knows_person k ON k.person1id = a.id - UNION ALL - SELECT a.rowid AS dense_id, k.person2id AS outgoing_edge, k.person1id AS incoming_edge - FROM Person a - JOIN Person_knows_person k on k.person2id = a.id) - GROUP BY dense_id, outgoing_edge, incoming_edge) - GROUP BY dense_id) sub - ) - AS BIGINT), - src, - dst, - edge) as temp FROM ( - select src, dst, any_value(edges) as edge FROM ( - select src, dst, edges from edges_cte UNION all select dst, src, edges from edges_cte) - GROUP BY src, dst) -) SELECT id, __x.temp + local_clustering_coefficient(0, a.rowid) as lcc - FROM (select count(cte1.temp) * 0 as temp from cte1) __x, Person a - ORDER BY id ASC; ----- -14 0.33333334 -16 0.5 -32 0.8333333 -2199023255557 0.6666667 -2199023255573 1.0 -2199023255594 0.1904762 -4398046511139 0.0 -6597069766702 0.0 -8796093022234 0.0 -8796093022237 0.6666667 -8796093022244 0.0 -8796093022249 0.4 -10995116277761 0.3 -10995116277782 0.23809524 -10995116277783 0.0 -10995116277808 0.0 -13194139533342 1.0 -13194139533352 0.31111112 -13194139533355 0.2 -15393162788877 0.0 -17592186044443 0.0 -17592186044461 1.0 -19791209299968 1.0 -19791209299987 0.0 -21990232555526 0.0 -21990232555527 0.0 -24189255811081 0.125 -24189255811109 1.0 -26388279066632 0.0 -26388279066641 0.8333333 -26388279066655 0.33333334 -26388279066658 0.21794872 -26388279066668 0.5 -28587302322180 0.16666667 -28587302322191 0.0 -28587302322196 0.8333333 -28587302322204 0.2857143 -28587302322209 0.0 -28587302322223 0.0 -30786325577731 0.0 -30786325577740 1.0 -32985348833291 0.0 -32985348833318 0.0 -32985348833329 0.0 -35184372088834 0.0 -35184372088850 0.6666667 -35184372088856 0.33333334 -35184372088871 0.0 -37383395344394 0.0 -37383395344409 0.0 - -query II -WITH edges_cte AS ( - SELECT a.rowid as src, c.rowid as dst, k.rowid as edges - FROM Person_knows_person k - JOIN Person a ON a.id = k.person1id - JOIN Person c ON c.id = k.person2id -), -cte1 AS ( - SELECT CREATE_CSR_EDGE( - 0, - (SELECT count(id) FROM Person), - CAST( - (SELECT sum(CREATE_CSR_VERTEX( - 0, - (SELECT count(id) FROM Person), - sub.dense_id, - sub.cnt) - ) * 2 - FROM ( - SELECT dense_id, count(*) as cnt - FROM ( - SELECT DISTINCT a.rowid AS dense_id, k.person1id AS outgoing_edge, k.person2id AS incoming_edge - FROM Person a - JOIN Person_knows_person k ON k.person1id = a.id - UNION ALL - SELECT DISTINCT a.rowid AS dense_id, k.person2id AS outgoing_edge, k.person1id AS incoming_edge - FROM Person a - JOIN Person_knows_person k ON k.person2id = a.id - ) unique_edges - GROUP BY dense_id -- group by dense_id to count the number of unique edges - ) sub - ) AS BIGINT), - src, - dst, - edge) as temp - FROM ( - SELECT src, dst, any_value(edges) as edge - FROM ( - SELECT src, dst, edges FROM edges_cte - UNION ALL - SELECT dst, src, edges FROM edges_cte - ) - GROUP BY src, dst - ) -) -SELECT id, __x.temp + local_clustering_coefficient(0, a.rowid) as lcc -FROM ( - SELECT count(cte1.temp) * 0 as temp FROM cte1 -) __x, Person a -ORDER BY id ASC; ----- -14 0.33333334 -16 0.5 -32 0.8333333 -2199023255557 0.6666667 -2199023255573 1.0 -2199023255594 0.1904762 -4398046511139 0.0 -6597069766702 0.0 -8796093022234 0.0 -8796093022237 0.6666667 -8796093022244 0.0 -8796093022249 0.4 -10995116277761 0.3 -10995116277782 0.23809524 -10995116277783 0.0 -10995116277808 0.0 -13194139533342 1.0 -13194139533352 0.31111112 -13194139533355 0.2 -15393162788877 0.0 -17592186044443 0.0 -17592186044461 1.0 -19791209299968 1.0 -19791209299987 0.0 -21990232555526 0.0 -21990232555527 0.0 -24189255811081 0.125 -24189255811109 1.0 -26388279066632 0.0 -26388279066641 0.8333333 -26388279066655 0.33333334 -26388279066658 0.21794872 -26388279066668 0.5 -28587302322180 0.16666667 -28587302322191 0.0 -28587302322196 0.8333333 -28587302322204 0.2857143 -28587302322209 0.0 -28587302322223 0.0 -30786325577731 0.0 -30786325577740 1.0 -32985348833291 0.0 -32985348833318 0.0 -32985348833329 0.0 -35184372088834 0.0 -35184372088850 0.6666667 -35184372088856 0.33333334 -35184372088871 0.0 -37383395344394 0.0 -37383395344409 0.0 +#query II +#WITH cte1 AS ( +# SELECT CREATE_CSR_EDGE( +# 0, +# (SELECT count(a.id) FROM Person a), +# CAST ( +# (SELECT sum(CREATE_CSR_VERTEX( +# 0, +# (SELECT count(a.id) FROM Person a), +# sub.dense_id, +# sub.cnt) +# ) * 2 +# FROM ( +# SELECT dense_id, count(*) as cnt FROM ( +# SELECT dense_id, outgoing_edge, incoming_edge +# FROM ( +# SELECT a.rowid AS dense_id, k.person1id AS outgoing_edge, k.person2id AS incoming_edge +# FROM Person a +# JOIN Person_knows_person k ON k.person1id = a.id +# UNION ALL +# SELECT a.rowid AS dense_id, k.person2id AS outgoing_edge, k.person1id AS incoming_edge +# FROM Person a +# JOIN Person_knows_person k on k.person2id = a.id) +# GROUP BY dense_id, outgoing_edge, incoming_edge) +# GROUP BY dense_id) sub +# ) +# AS BIGINT), +# src, +# dst, +# edge) as temp FROM ( +# select src, dst, any_value(edge) as edge FROM ( +# select a.rowid as src, c.rowid as dst, k.rowid as edge FROM Person_knows_person k +# JOIN Person a on a.id = k.person1id +# JOIN Person c on c.id = k.person2id +# UNION ALL +# select a.rowid as src, c.rowid as dst, k.rowid as edge FROM Person_knows_person k +# JOIN Person a on a.id = k.person2id +# JOIN Person c on c.id = k.person1id) +# GROUP BY src, dst) +#) SELECT id, __x.temp + local_clustering_coefficient(0, a.rowid) as lcc +# FROM (select count(cte1.temp) * 0 as temp from cte1) __x, Person a +# ORDER BY id ASC; +#---- +#14 0.33333334 +#16 0.5 +#32 0.8333333 +#2199023255557 0.6666667 +#2199023255573 1.0 +#2199023255594 0.1904762 +#4398046511139 0.0 +#6597069766702 0.0 +#8796093022234 0.0 +#8796093022237 0.6666667 +#8796093022244 0.0 +#8796093022249 0.4 +#10995116277761 0.3 +#10995116277782 0.23809524 +#10995116277783 0.0 +#10995116277808 0.0 +#13194139533342 1.0 +#13194139533352 0.31111112 +#13194139533355 0.2 +#15393162788877 0.0 +#17592186044443 0.0 +#17592186044461 1.0 +#19791209299968 1.0 +#19791209299987 0.0 +#21990232555526 0.0 +#21990232555527 0.0 +#24189255811081 0.125 +#24189255811109 1.0 +#26388279066632 0.0 +#26388279066641 0.8333333 +#26388279066655 0.33333334 +#26388279066658 0.21794872 +#26388279066668 0.5 +#28587302322180 0.16666667 +#28587302322191 0.0 +#28587302322196 0.8333333 +#28587302322204 0.2857143 +#28587302322209 0.0 +#28587302322223 0.0 +#30786325577731 0.0 +#30786325577740 1.0 +#32985348833291 0.0 +#32985348833318 0.0 +#32985348833329 0.0 +#35184372088834 0.0 +#35184372088850 0.6666667 +#35184372088856 0.33333334 +#35184372088871 0.0 +#37383395344394 0.0 +#37383395344409 0.0 +# +#query II +#WITH edges_cte AS ( +# SELECT a.rowid as src, c.rowid as dst, k.rowid as edges +# FROM Person_knows_person k +# JOIN Person a on a.id = k.person1id +# JOIN Person c on c.id = k.person2id +#), +#cte1 AS ( +# SELECT CREATE_CSR_EDGE( +# 0, +# (SELECT count(a.id) FROM Person a), +# CAST ( +# (SELECT sum(CREATE_CSR_VERTEX( +# 0, +# (SELECT count(a.id) FROM Person a), +# sub.dense_id, +# sub.cnt) +# ) * 2 +# FROM ( +# SELECT dense_id, count(*) as cnt FROM ( +# SELECT dense_id, outgoing_edge, incoming_edge +# FROM ( +# SELECT a.rowid AS dense_id, k.person1id AS outgoing_edge, k.person2id AS incoming_edge +# FROM Person a +# JOIN Person_knows_person k ON k.person1id = a.id +# UNION ALL +# SELECT a.rowid AS dense_id, k.person2id AS outgoing_edge, k.person1id AS incoming_edge +# FROM Person a +# JOIN Person_knows_person k on k.person2id = a.id) +# GROUP BY dense_id, outgoing_edge, incoming_edge) +# GROUP BY dense_id) sub +# ) +# AS BIGINT), +# src, +# dst, +# edge) as temp FROM ( +# select src, dst, any_value(edges) as edge FROM ( +# select src, dst, edges from edges_cte UNION all select dst, src, edges from edges_cte) +# GROUP BY src, dst) +#) SELECT id, __x.temp + local_clustering_coefficient(0, a.rowid) as lcc +# FROM (select count(cte1.temp) * 0 as temp from cte1) __x, Person a +# ORDER BY id ASC; +#---- +#14 0.33333334 +#16 0.5 +#32 0.8333333 +#2199023255557 0.6666667 +#2199023255573 1.0 +#2199023255594 0.1904762 +#4398046511139 0.0 +#6597069766702 0.0 +#8796093022234 0.0 +#8796093022237 0.6666667 +#8796093022244 0.0 +#8796093022249 0.4 +#10995116277761 0.3 +#10995116277782 0.23809524 +#10995116277783 0.0 +#10995116277808 0.0 +#13194139533342 1.0 +#13194139533352 0.31111112 +#13194139533355 0.2 +#15393162788877 0.0 +#17592186044443 0.0 +#17592186044461 1.0 +#19791209299968 1.0 +#19791209299987 0.0 +#21990232555526 0.0 +#21990232555527 0.0 +#24189255811081 0.125 +#24189255811109 1.0 +#26388279066632 0.0 +#26388279066641 0.8333333 +#26388279066655 0.33333334 +#26388279066658 0.21794872 +#26388279066668 0.5 +#28587302322180 0.16666667 +#28587302322191 0.0 +#28587302322196 0.8333333 +#28587302322204 0.2857143 +#28587302322209 0.0 +#28587302322223 0.0 +#30786325577731 0.0 +#30786325577740 1.0 +#32985348833291 0.0 +#32985348833318 0.0 +#32985348833329 0.0 +#35184372088834 0.0 +#35184372088850 0.6666667 +#35184372088856 0.33333334 +#35184372088871 0.0 +#37383395344394 0.0 +#37383395344409 0.0 +# +#query II +#WITH edges_cte AS ( +# SELECT a.rowid as src, c.rowid as dst, k.rowid as edges +# FROM Person_knows_person k +# JOIN Person a ON a.id = k.person1id +# JOIN Person c ON c.id = k.person2id +#), +#cte1 AS ( +# SELECT CREATE_CSR_EDGE( +# 0, +# (SELECT count(id) FROM Person), +# CAST( +# (SELECT sum(CREATE_CSR_VERTEX( +# 0, +# (SELECT count(id) FROM Person), +# sub.dense_id, +# sub.cnt) +# ) * 2 +# FROM ( +# SELECT dense_id, count(*) as cnt +# FROM ( +# SELECT DISTINCT a.rowid AS dense_id, k.person1id AS outgoing_edge, k.person2id AS incoming_edge +# FROM Person a +# JOIN Person_knows_person k ON k.person1id = a.id +# UNION ALL +# SELECT DISTINCT a.rowid AS dense_id, k.person2id AS outgoing_edge, k.person1id AS incoming_edge +# FROM Person a +# JOIN Person_knows_person k ON k.person2id = a.id +# ) unique_edges +# GROUP BY dense_id -- group by dense_id to count the number of unique edges +# ) sub +# ) AS BIGINT), +# src, +# dst, +# edge) as temp +# FROM ( +# SELECT src, dst, any_value(edges) as edge +# FROM ( +# SELECT src, dst, edges FROM edges_cte +# UNION ALL +# SELECT dst, src, edges FROM edges_cte +# ) +# GROUP BY src, dst +# ) +#) +#SELECT id, __x.temp + local_clustering_coefficient(0, a.rowid) as lcc +#FROM ( +# SELECT count(cte1.temp) * 0 as temp FROM cte1 +#) __x, Person a +#ORDER BY id ASC; +#---- +#14 0.33333334 +#16 0.5 +#32 0.8333333 +#2199023255557 0.6666667 +#2199023255573 1.0 +#2199023255594 0.1904762 +#4398046511139 0.0 +#6597069766702 0.0 +#8796093022234 0.0 +#8796093022237 0.6666667 +#8796093022244 0.0 +#8796093022249 0.4 +#10995116277761 0.3 +#10995116277782 0.23809524 +#10995116277783 0.0 +#10995116277808 0.0 +#13194139533342 1.0 +#13194139533352 0.31111112 +#13194139533355 0.2 +#15393162788877 0.0 +#17592186044443 0.0 +#17592186044461 1.0 +#19791209299968 1.0 +#19791209299987 0.0 +#21990232555526 0.0 +#21990232555527 0.0 +#24189255811081 0.125 +#24189255811109 1.0 +#26388279066632 0.0 +#26388279066641 0.8333333 +#26388279066655 0.33333334 +#26388279066658 0.21794872 +#26388279066668 0.5 +#28587302322180 0.16666667 +#28587302322191 0.0 +#28587302322196 0.8333333 +#28587302322204 0.2857143 +#28587302322209 0.0 +#28587302322223 0.0 +#30786325577731 0.0 +#30786325577740 1.0 +#32985348833291 0.0 +#32985348833318 0.0 +#32985348833329 0.0 +#35184372088834 0.0 +#35184372088850 0.6666667 +#35184372088856 0.33333334 +#35184372088871 0.0 +#37383395344394 0.0 +#37383395344409 0.0 From f6e663bce3dc79a057e2ca80e5e6e85bb4d3854f Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 11:28:09 +0200 Subject: [PATCH 12/17] Delete csr when exception is thrown --- src/core/functions/scalar/csr_creation.cpp | 3 +- .../sql/path_finding/non-unique-vertices.test | 28 ++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/core/functions/scalar/csr_creation.cpp b/src/core/functions/scalar/csr_creation.cpp index e3afee58..e470e3b5 100644 --- a/src/core/functions/scalar/csr_creation.cpp +++ b/src/core/functions/scalar/csr_creation.cpp @@ -138,7 +138,8 @@ static void CreateCsrEdgeFunction(DataChunk &args, ExpressionState &state, int64_t edge_size = args.data[2].GetValue(0).GetValue(); int64_t edge_size_count = args.data[3].GetValue(0).GetValue(); if (edge_size != edge_size_count) { - throw ConstraintException("Non-unique vertices detected. Make sure all vertices are unique for path-finding queries."); + duckpgq_state->csr_to_delete.insert(info.id); + throw ConstraintException("Non-unique vertices detected. Make sure all vertices are unique for path-finding queries."); } auto csr_entry = duckpgq_state->csr_list.find(info.id); diff --git a/test/sql/path_finding/non-unique-vertices.test b/test/sql/path_finding/non-unique-vertices.test index 72635d0f..ad8c357a 100644 --- a/test/sql/path_finding/non-unique-vertices.test +++ b/test/sql/path_finding/non-unique-vertices.test @@ -35,7 +35,7 @@ statement ok COLUMNS (path_length(p), vertices(p), v2.x) ); -# ANY SHORTEST v-[e]-> +(v) fails with "INTERNAL Error: Attempted to access index 1 within vector of size 1" +## ANY SHORTEST v-[e]-> +(v) fails with "INTERNAL Error: Attempted to access index 1 within vector of size 1" statement error -FROM GRAPH_TABLE(g MATCH p = ANY SHORTEST (v1:v)-[e:e]-> +(v2:v) @@ -52,3 +52,29 @@ statement error ); ---- Constraint Error: Non-unique vertices detected. Make sure all vertices are unique for path-finding queries. + +statement ok +CREATE TABLE v2 (x VARCHAR);INSERT INTO v2 VALUES ('a'), ('b'), ('c'), ('c'), ('b'); + +statement ok +CREATE TABLE e2 (x1 VARCHAR, x2 VARCHAR);INSERT INTO e2 VALUES ('a', 'b'), ('b', 'c'); + +statement ok +-CREATE PROPERTY GRAPH g2 +VERTEX TABLES ( + v2 +) +EDGE TABLES ( + e2 + SOURCE KEY (x1) REFERENCES v2 (x) + DESTINATION KEY (x2) REFERENCES v2 (x) +); + +# ANY SHORTEST v-[e]->{1,2}(v) also fails with "INTERNAL Error: Attempted to access index 1 within vector of size 1" +statement error +-FROM GRAPH_TABLE(g2 + MATCH p = ANY SHORTEST (v1:v2)-[e:e2]->{1,2}(v2:v2) + COLUMNS (path_length(p), vertices(p), v2.x) +); +---- +Constraint Error: Non-unique vertices detected. Make sure all vertices are unique for path-finding queries. \ No newline at end of file From 58e9481df6b1bea82529a56f1adcf09c4fd2c462 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 11:29:57 +0200 Subject: [PATCH 13/17] Add wcc test case --- test/sql/path_finding/non-unique-vertices.test | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/sql/path_finding/non-unique-vertices.test b/test/sql/path_finding/non-unique-vertices.test index ad8c357a..ca479d2a 100644 --- a/test/sql/path_finding/non-unique-vertices.test +++ b/test/sql/path_finding/non-unique-vertices.test @@ -77,4 +77,9 @@ statement error COLUMNS (path_length(p), vertices(p), v2.x) ); ---- +Constraint Error: Non-unique vertices detected. Make sure all vertices are unique for path-finding queries. + +statement error +from weakly_connected_component(g2, v2, e2); +---- Constraint Error: Non-unique vertices detected. Make sure all vertices are unique for path-finding queries. \ No newline at end of file From af99489cf82f9f043e5a22ba079b28c76a0cade7 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 12:05:01 +0200 Subject: [PATCH 14/17] Remove colref --- src/core/utils/compressed_sparse_row.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/core/utils/compressed_sparse_row.cpp b/src/core/utils/compressed_sparse_row.cpp index 63148057..984af840 100644 --- a/src/core/utils/compressed_sparse_row.cpp +++ b/src/core/utils/compressed_sparse_row.cpp @@ -98,12 +98,6 @@ CSRFunctionData::CSRBind(ClientContext &context, ScalarFunction &bound_function, LogicalType::BOOLEAN); } -// Helper function to create a ColumnRefExpression with alias -// unique_ptr CreateColumnRef(const std::string &column_name, const std::string &table_name, const std::string &alias) { -// auto col_ref = make_uniq(column_name, table_name); -// col_ref->alias = alias; -// return col_ref; -// } // Helper function to create a JoinRef unique_ptr CreateJoin(const std::string &fk_column, const std::string &pk_column, const std::string &table_name, const std::string &source_reference) { From 93f73ae72e6a8176a080905ef6df376d059abf6d Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 13:17:04 +0200 Subject: [PATCH 15/17] Remove commented out tests --- .../scalar/local_clustering_coefficient.test | 293 ------------------ 1 file changed, 293 deletions(-) diff --git a/test/sql/scalar/local_clustering_coefficient.test b/test/sql/scalar/local_clustering_coefficient.test index 27548ec5..91498fc2 100644 --- a/test/sql/scalar/local_clustering_coefficient.test +++ b/test/sql/scalar/local_clustering_coefficient.test @@ -231,299 +231,6 @@ select id, local_clustering_coefficient from local_clustering_coefficient(snb, p 37383395344409 0.0 -#query II -#WITH cte1 AS ( -# SELECT CREATE_CSR_EDGE( -# 0, -# (SELECT count(a.id) FROM Person a), -# CAST ( -# (SELECT sum(CREATE_CSR_VERTEX( -# 0, -# (SELECT count(a.id) FROM Person a), -# sub.dense_id, -# sub.cnt) -# ) * 2 -# FROM ( -# SELECT dense_id, count(*) as cnt FROM ( -# SELECT dense_id, outgoing_edge, incoming_edge -# FROM ( -# SELECT a.rowid AS dense_id, k.person1id AS outgoing_edge, k.person2id AS incoming_edge -# FROM Person a -# JOIN Person_knows_person k ON k.person1id = a.id -# UNION ALL -# SELECT a.rowid AS dense_id, k.person2id AS outgoing_edge, k.person1id AS incoming_edge -# FROM Person a -# JOIN Person_knows_person k on k.person2id = a.id) -# GROUP BY dense_id, outgoing_edge, incoming_edge) -# GROUP BY dense_id) sub -# ) -# AS BIGINT), -# src, -# dst, -# edge) as temp FROM ( -# select src, dst, any_value(edge) as edge FROM ( -# select a.rowid as src, c.rowid as dst, k.rowid as edge FROM Person_knows_person k -# JOIN Person a on a.id = k.person1id -# JOIN Person c on c.id = k.person2id -# UNION ALL -# select a.rowid as src, c.rowid as dst, k.rowid as edge FROM Person_knows_person k -# JOIN Person a on a.id = k.person2id -# JOIN Person c on c.id = k.person1id) -# GROUP BY src, dst) -#) SELECT id, __x.temp + local_clustering_coefficient(0, a.rowid) as lcc -# FROM (select count(cte1.temp) * 0 as temp from cte1) __x, Person a -# ORDER BY id ASC; -#---- -#14 0.33333334 -#16 0.5 -#32 0.8333333 -#2199023255557 0.6666667 -#2199023255573 1.0 -#2199023255594 0.1904762 -#4398046511139 0.0 -#6597069766702 0.0 -#8796093022234 0.0 -#8796093022237 0.6666667 -#8796093022244 0.0 -#8796093022249 0.4 -#10995116277761 0.3 -#10995116277782 0.23809524 -#10995116277783 0.0 -#10995116277808 0.0 -#13194139533342 1.0 -#13194139533352 0.31111112 -#13194139533355 0.2 -#15393162788877 0.0 -#17592186044443 0.0 -#17592186044461 1.0 -#19791209299968 1.0 -#19791209299987 0.0 -#21990232555526 0.0 -#21990232555527 0.0 -#24189255811081 0.125 -#24189255811109 1.0 -#26388279066632 0.0 -#26388279066641 0.8333333 -#26388279066655 0.33333334 -#26388279066658 0.21794872 -#26388279066668 0.5 -#28587302322180 0.16666667 -#28587302322191 0.0 -#28587302322196 0.8333333 -#28587302322204 0.2857143 -#28587302322209 0.0 -#28587302322223 0.0 -#30786325577731 0.0 -#30786325577740 1.0 -#32985348833291 0.0 -#32985348833318 0.0 -#32985348833329 0.0 -#35184372088834 0.0 -#35184372088850 0.6666667 -#35184372088856 0.33333334 -#35184372088871 0.0 -#37383395344394 0.0 -#37383395344409 0.0 -# -#query II -#WITH edges_cte AS ( -# SELECT a.rowid as src, c.rowid as dst, k.rowid as edges -# FROM Person_knows_person k -# JOIN Person a on a.id = k.person1id -# JOIN Person c on c.id = k.person2id -#), -#cte1 AS ( -# SELECT CREATE_CSR_EDGE( -# 0, -# (SELECT count(a.id) FROM Person a), -# CAST ( -# (SELECT sum(CREATE_CSR_VERTEX( -# 0, -# (SELECT count(a.id) FROM Person a), -# sub.dense_id, -# sub.cnt) -# ) * 2 -# FROM ( -# SELECT dense_id, count(*) as cnt FROM ( -# SELECT dense_id, outgoing_edge, incoming_edge -# FROM ( -# SELECT a.rowid AS dense_id, k.person1id AS outgoing_edge, k.person2id AS incoming_edge -# FROM Person a -# JOIN Person_knows_person k ON k.person1id = a.id -# UNION ALL -# SELECT a.rowid AS dense_id, k.person2id AS outgoing_edge, k.person1id AS incoming_edge -# FROM Person a -# JOIN Person_knows_person k on k.person2id = a.id) -# GROUP BY dense_id, outgoing_edge, incoming_edge) -# GROUP BY dense_id) sub -# ) -# AS BIGINT), -# src, -# dst, -# edge) as temp FROM ( -# select src, dst, any_value(edges) as edge FROM ( -# select src, dst, edges from edges_cte UNION all select dst, src, edges from edges_cte) -# GROUP BY src, dst) -#) SELECT id, __x.temp + local_clustering_coefficient(0, a.rowid) as lcc -# FROM (select count(cte1.temp) * 0 as temp from cte1) __x, Person a -# ORDER BY id ASC; -#---- -#14 0.33333334 -#16 0.5 -#32 0.8333333 -#2199023255557 0.6666667 -#2199023255573 1.0 -#2199023255594 0.1904762 -#4398046511139 0.0 -#6597069766702 0.0 -#8796093022234 0.0 -#8796093022237 0.6666667 -#8796093022244 0.0 -#8796093022249 0.4 -#10995116277761 0.3 -#10995116277782 0.23809524 -#10995116277783 0.0 -#10995116277808 0.0 -#13194139533342 1.0 -#13194139533352 0.31111112 -#13194139533355 0.2 -#15393162788877 0.0 -#17592186044443 0.0 -#17592186044461 1.0 -#19791209299968 1.0 -#19791209299987 0.0 -#21990232555526 0.0 -#21990232555527 0.0 -#24189255811081 0.125 -#24189255811109 1.0 -#26388279066632 0.0 -#26388279066641 0.8333333 -#26388279066655 0.33333334 -#26388279066658 0.21794872 -#26388279066668 0.5 -#28587302322180 0.16666667 -#28587302322191 0.0 -#28587302322196 0.8333333 -#28587302322204 0.2857143 -#28587302322209 0.0 -#28587302322223 0.0 -#30786325577731 0.0 -#30786325577740 1.0 -#32985348833291 0.0 -#32985348833318 0.0 -#32985348833329 0.0 -#35184372088834 0.0 -#35184372088850 0.6666667 -#35184372088856 0.33333334 -#35184372088871 0.0 -#37383395344394 0.0 -#37383395344409 0.0 -# -#query II -#WITH edges_cte AS ( -# SELECT a.rowid as src, c.rowid as dst, k.rowid as edges -# FROM Person_knows_person k -# JOIN Person a ON a.id = k.person1id -# JOIN Person c ON c.id = k.person2id -#), -#cte1 AS ( -# SELECT CREATE_CSR_EDGE( -# 0, -# (SELECT count(id) FROM Person), -# CAST( -# (SELECT sum(CREATE_CSR_VERTEX( -# 0, -# (SELECT count(id) FROM Person), -# sub.dense_id, -# sub.cnt) -# ) * 2 -# FROM ( -# SELECT dense_id, count(*) as cnt -# FROM ( -# SELECT DISTINCT a.rowid AS dense_id, k.person1id AS outgoing_edge, k.person2id AS incoming_edge -# FROM Person a -# JOIN Person_knows_person k ON k.person1id = a.id -# UNION ALL -# SELECT DISTINCT a.rowid AS dense_id, k.person2id AS outgoing_edge, k.person1id AS incoming_edge -# FROM Person a -# JOIN Person_knows_person k ON k.person2id = a.id -# ) unique_edges -# GROUP BY dense_id -- group by dense_id to count the number of unique edges -# ) sub -# ) AS BIGINT), -# src, -# dst, -# edge) as temp -# FROM ( -# SELECT src, dst, any_value(edges) as edge -# FROM ( -# SELECT src, dst, edges FROM edges_cte -# UNION ALL -# SELECT dst, src, edges FROM edges_cte -# ) -# GROUP BY src, dst -# ) -#) -#SELECT id, __x.temp + local_clustering_coefficient(0, a.rowid) as lcc -#FROM ( -# SELECT count(cte1.temp) * 0 as temp FROM cte1 -#) __x, Person a -#ORDER BY id ASC; -#---- -#14 0.33333334 -#16 0.5 -#32 0.8333333 -#2199023255557 0.6666667 -#2199023255573 1.0 -#2199023255594 0.1904762 -#4398046511139 0.0 -#6597069766702 0.0 -#8796093022234 0.0 -#8796093022237 0.6666667 -#8796093022244 0.0 -#8796093022249 0.4 -#10995116277761 0.3 -#10995116277782 0.23809524 -#10995116277783 0.0 -#10995116277808 0.0 -#13194139533342 1.0 -#13194139533352 0.31111112 -#13194139533355 0.2 -#15393162788877 0.0 -#17592186044443 0.0 -#17592186044461 1.0 -#19791209299968 1.0 -#19791209299987 0.0 -#21990232555526 0.0 -#21990232555527 0.0 -#24189255811081 0.125 -#24189255811109 1.0 -#26388279066632 0.0 -#26388279066641 0.8333333 -#26388279066655 0.33333334 -#26388279066658 0.21794872 -#26388279066668 0.5 -#28587302322180 0.16666667 -#28587302322191 0.0 -#28587302322196 0.8333333 -#28587302322204 0.2857143 -#28587302322209 0.0 -#28587302322223 0.0 -#30786325577731 0.0 -#30786325577740 1.0 -#32985348833291 0.0 -#32985348833318 0.0 -#32985348833329 0.0 -#35184372088834 0.0 -#35184372088850 0.6666667 -#35184372088856 0.33333334 -#35184372088871 0.0 -#37383395344394 0.0 -#37383395344409 0.0 - - - - # Test with a graph having self-loops statement ok CREATE TABLE SelfLoopStudent(id BIGINT, name VARCHAR);INSERT INTO SelfLoopStudent VALUES (0, 'Alice'), (1, 'Bob'), (2, 'Charlie'); From 56694bcfb9ec22856fc466797579bce4b52d5226 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 13:48:11 +0200 Subject: [PATCH 16/17] Remove unused function --- .../scalar/weakly_connected_component.cpp | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/src/core/functions/scalar/weakly_connected_component.cpp b/src/core/functions/scalar/weakly_connected_component.cpp index af0e710a..b42fbf44 100644 --- a/src/core/functions/scalar/weakly_connected_component.cpp +++ b/src/core/functions/scalar/weakly_connected_component.cpp @@ -57,28 +57,6 @@ static void UpdateComponentId(int64_t node, int64_t component_id, } } -// Function to handle nodes that didn't finish properly -static void AssignUnfinishedLanesToComponent(Vector &result, - WeaklyConnectedComponentFunctionData &info, - UnifiedVectorFormat &vdata_src, - int64_t *src_data, - size_t v_size) { - auto result_data = FlatVector::GetData(result); - for (idx_t i = 0; i < v_size; i++) { - if (info.componentId[i] == -1) { - // Assign the node to its own component - info.componentId[i] = i; - } - } - - for (idx_t i = 0; i < v_size; i++) { - int64_t src_pos = vdata_src.sel->get_index(i); - if (vdata_src.validity.RowIsValid(src_pos)) { - result_data[i] = info.componentId[src_data[src_pos]]; - } - } -} - static void WeaklyConnectedComponentFunction(DataChunk &args, ExpressionState &state, Vector &result) { From 1216fb2d9ea7da354d8da90a4312c630a42b82ed Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 3 Sep 2024 13:51:47 +0200 Subject: [PATCH 17/17] Remove commented code --- src/core/functions/scalar/iterativelength2.cpp | 3 --- src/core/utils/compressed_sparse_row.cpp | 1 - 2 files changed, 4 deletions(-) diff --git a/src/core/functions/scalar/iterativelength2.cpp b/src/core/functions/scalar/iterativelength2.cpp index 84950d6a..c61fa095 100644 --- a/src/core/functions/scalar/iterativelength2.cpp +++ b/src/core/functions/scalar/iterativelength2.cpp @@ -107,9 +107,6 @@ static void IterativeLength2Function(DataChunk &args, ExpressionState &state, // make passes while a lane is still active for (int64_t iter = 1; active; iter++) { - // std::cout << "Single direction iteration: " << iter << - // std::endl; - if (!IterativeLength2(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { break; diff --git a/src/core/utils/compressed_sparse_row.cpp b/src/core/utils/compressed_sparse_row.cpp index 984af840..0d5436c1 100644 --- a/src/core/utils/compressed_sparse_row.cpp +++ b/src/core/utils/compressed_sparse_row.cpp @@ -430,7 +430,6 @@ unique_ptr MakeEdgesCTE(const shared_ptr(); result->query = std::move(select_statement); - std::cout << result->query->ToString(); return result; }