From 2c43691ad220618b7c9b4f9d35abe912323d4c5a Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 5 Feb 2024 16:54:58 +0100 Subject: [PATCH 1/7] Update --- duckdb-pgq | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb-pgq b/duckdb-pgq index 82a05987..0817ac6b 160000 --- a/duckdb-pgq +++ b/duckdb-pgq @@ -1 +1 @@ -Subproject commit 82a059870aa81cd8c72f9d75123e69e1430bbcf5 +Subproject commit 0817ac6b8283dac6e6a78526132c9592627cc1ba From e32224a1e9fbf226e412f5173fcdbfe62b2cac3b Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 6 Feb 2024 13:42:51 +0100 Subject: [PATCH 2/7] WITH statements and subqueries now seem to work --- duckpgq/src/duckpgq_extension.cpp | 41 ++++++++-------- test/sql/with_statement_duckpgq.test | 73 ++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 20 deletions(-) create mode 100644 test/sql/with_statement_duckpgq.test diff --git a/duckpgq/src/duckpgq_extension.cpp b/duckpgq/src/duckpgq_extension.cpp index 9d419012..5f2a91f2 100644 --- a/duckpgq/src/duckpgq_extension.cpp +++ b/duckpgq/src/duckpgq_extension.cpp @@ -17,6 +17,7 @@ #include "duckdb/parser/query_node/select_node.hpp" #include "duckdb/parser/statement/copy_statement.hpp" #include "duckdb/parser/parsed_data/create_table_info.hpp" +#include "duckdb/parser/tableref/joinref.hpp" #include "duckdb/parser/statement/extension_statement.hpp" @@ -109,24 +110,32 @@ BoundStatement duckpgq_bind(ClientContext &context, Binder &binder, throw BinderException("Unable to find DuckPGQ Parse Data"); } -ParserExtensionPlanResult duckpgq_handle_statement(SQLStatement *statement, DuckPGQState &duckpgq_state) { - if (statement->type == StatementType::SELECT_STATEMENT) { - auto select_statement = dynamic_cast(statement); - auto select_node = dynamic_cast(select_statement->node.get()); - auto from_table_function = - dynamic_cast(select_node->from_table.get()); - auto function = - dynamic_cast(from_table_function->function.get()); +void duckpgq_find_match_function(TableRef* table_ref, DuckPGQState &duckpgq_state) { + if (auto table_function_ref = dynamic_cast(table_ref)) { + // Handle TableFunctionRef case + auto function = dynamic_cast(table_function_ref->function.get()); if (function->function_name == "duckpgq_match") { duckpgq_state.transform_expression = std::move(std::move(function->children[0])); function->children.pop_back(); } + } else if (auto join_ref = dynamic_cast(table_ref)) { + // Handle JoinRef case + duckpgq_find_match_function(join_ref->left.get(), duckpgq_state); + duckpgq_find_match_function(join_ref->right.get(), duckpgq_state); + } +} + +ParserExtensionPlanResult duckpgq_handle_statement(SQLStatement *statement, DuckPGQState &duckpgq_state) { + if (statement->type == StatementType::SELECT_STATEMENT) { + auto select_statement = dynamic_cast(statement); + auto select_node = dynamic_cast(select_statement->node.get()); + duckpgq_find_match_function(select_node->from_table.get(), duckpgq_state); throw Exception("use duckpgq_bind instead"); } if (statement->type == StatementType::CREATE_STATEMENT) { - auto &create_statement = statement->Cast(); - auto create_property_graph = dynamic_cast(create_statement.info.get()); + const auto &create_statement = statement->Cast(); + const auto create_property_graph = dynamic_cast(create_statement.info.get()); if (create_property_graph) { ParserExtensionPlanResult result; result.function = CreatePropertyGraphFunction(); @@ -134,7 +143,7 @@ ParserExtensionPlanResult duckpgq_handle_statement(SQLStatement *statement, Duck result.return_type = StatementReturnType::QUERY_RESULT; return result; } - auto create_table = reinterpret_cast(create_statement.info.get()); + const auto create_table = reinterpret_cast(create_statement.info.get()); duckpgq_handle_statement(create_table->query.get(), duckpgq_state); } if (statement->type == StatementType::DROP_STATEMENT) { @@ -152,15 +161,7 @@ ParserExtensionPlanResult duckpgq_handle_statement(SQLStatement *statement, Duck if (statement->type == StatementType::COPY_STATEMENT) { auto ©_statement = statement->Cast(); auto select_node = dynamic_cast(copy_statement.select_statement.get()); - auto from_table_function = - dynamic_cast(select_node->from_table.get()); - auto function = - dynamic_cast(from_table_function->function.get()); - if (function->function_name == "duckpgq_match") { - duckpgq_state.transform_expression = - std::move(std::move(function->children[0])); - function->children.pop_back(); - } + duckpgq_find_match_function(select_node->from_table.get(), duckpgq_state); throw Exception("use duckpgq_bind instead"); } if (statement->type == StatementType::INSERT_STATEMENT) { diff --git a/test/sql/with_statement_duckpgq.test b/test/sql/with_statement_duckpgq.test new file mode 100644 index 00000000..e122fa2d --- /dev/null +++ b/test/sql/with_statement_duckpgq.test @@ -0,0 +1,73 @@ +# name: test/sql/sqlpgq/snb.test +# group: [duckpgq] + +require duckpgq + +statement ok +import database 'duckdb-pgq/data/SNB0.003'; + +statement ok +-CREATE PROPERTY GRAPH snb_projected +VERTEX TABLES (Message); + +query IIIIIII +-WITH message_count AS ( + SELECT count(*) as m_count + FROM Message m + WHERE m.creationDate < '2010-05-27 11:16:36.013' +) +SELECT year, isComment, + CASE WHEN m_length < 40 THEN 0 + WHEN m_length < 80 THEN 1 + WHEN m_length < 160 THEN 2 + ELSE 3 END as lengthCategory, + count(*) as messageCount, + avg(m_length) as averageMessageLength, + sum(m_length) as sumMessageLength, + count(*) / mc.m_count as percentageOfMessages +FROM GRAPH_TABLE(snb_projected + MATCH (message:Message where message.creationDate < '2010-05-27 11:16:36.013') + COLUMNS (date_part('year', message.creationDate::TIMESTAMP) as year, message.ImageFile is NULL as isComment, message.length as m_length, message.id) + ) tmp, message_count mc +GROUP BY year, isComment, lengthCategory, m_count +ORDER BY year DESC, isComment ASC, lengthCategory ASC; +---- +2010 false 0 63 0.0 0 0.9692307692307692 +2010 true 2 2 109.0 218 0.03076923076923077 + + +query II +-FROM GRAPH_TABLE (snb_projected + MATCH (m:message) + COLUMNS (m.id) + ) tmp, (SELECT id from message limit 1) +LIMIT 10; +---- +618475290624 618475290624 +343597383683 618475290624 +343597383684 618475290624 +962072674309 618475290624 +962072674310 618475290624 +962072674311 618475290624 +962072674312 618475290624 +962072674313 618475290624 +962072674314 618475290624 +962072674315 618475290624 + +query II +-FROM (SELECT id from message limit 1), GRAPH_TABLE (snb_projected + MATCH (m:message) + COLUMNS (m.id) + ) tmp +LIMIT 10; +---- +618475290624 618475290624 +618475290624 343597383683 +618475290624 343597383684 +618475290624 962072674309 +618475290624 962072674310 +618475290624 962072674311 +618475290624 962072674312 +618475290624 962072674313 +618475290624 962072674314 +618475290624 962072674315 \ No newline at end of file From b09432dcdf2df72a04d925fa261b010e6e544846 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 6 Feb 2024 13:45:33 +0100 Subject: [PATCH 3/7] Add const where needed --- duckpgq/src/duckpgq_extension.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/duckpgq/src/duckpgq_extension.cpp b/duckpgq/src/duckpgq_extension.cpp index 5f2a91f2..b38d9420 100644 --- a/duckpgq/src/duckpgq_extension.cpp +++ b/duckpgq/src/duckpgq_extension.cpp @@ -128,8 +128,8 @@ void duckpgq_find_match_function(TableRef* table_ref, DuckPGQState &duckpgq_stat ParserExtensionPlanResult duckpgq_handle_statement(SQLStatement *statement, DuckPGQState &duckpgq_state) { if (statement->type == StatementType::SELECT_STATEMENT) { - auto select_statement = dynamic_cast(statement); - auto select_node = dynamic_cast(select_statement->node.get()); + const auto select_statement = dynamic_cast(statement); + const auto select_node = dynamic_cast(select_statement->node.get()); duckpgq_find_match_function(select_node->from_table.get(), duckpgq_state); throw Exception("use duckpgq_bind instead"); } @@ -159,13 +159,13 @@ ParserExtensionPlanResult duckpgq_handle_statement(SQLStatement *statement, Duck duckpgq_handle_statement(explain_statement.stmt.get(), duckpgq_state); } if (statement->type == StatementType::COPY_STATEMENT) { - auto ©_statement = statement->Cast(); - auto select_node = dynamic_cast(copy_statement.select_statement.get()); + const auto ©_statement = statement->Cast(); + const auto select_node = dynamic_cast(copy_statement.select_statement.get()); duckpgq_find_match_function(select_node->from_table.get(), duckpgq_state); throw Exception("use duckpgq_bind instead"); } if (statement->type == StatementType::INSERT_STATEMENT) { - auto &insert_statement = statement->Cast(); + const auto &insert_statement = statement->Cast(); duckpgq_handle_statement(insert_statement.select_statement.get(), duckpgq_state); } From 909dc955e3693ce740e34e3eb268bf6ecb8a6ab3 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Tue, 6 Feb 2024 13:46:19 +0100 Subject: [PATCH 4/7] Format fix --- duckpgq/include/duckpgq_extension.hpp | 4 +-- duckpgq/src/duckpgq_extension.cpp | 35 +++++++++++++++++---------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/duckpgq/include/duckpgq_extension.hpp b/duckpgq/include/duckpgq_extension.hpp index 97c6e2d2..5bbe091d 100644 --- a/duckpgq/include/duckpgq_extension.hpp +++ b/duckpgq/include/duckpgq_extension.hpp @@ -45,8 +45,8 @@ ParserExtensionPlanResult duckpgq_plan(ParserExtensionInfo *info, ClientContext &, unique_ptr); -ParserExtensionPlanResult duckpgq_handle_statement(unique_ptr &statement); - +ParserExtensionPlanResult +duckpgq_handle_statement(unique_ptr &statement); struct DuckPGQParserExtension : public ParserExtension { DuckPGQParserExtension() : ParserExtension() { diff --git a/duckpgq/src/duckpgq_extension.cpp b/duckpgq/src/duckpgq_extension.cpp index b38d9420..ebf4da15 100644 --- a/duckpgq/src/duckpgq_extension.cpp +++ b/duckpgq/src/duckpgq_extension.cpp @@ -110,10 +110,12 @@ BoundStatement duckpgq_bind(ClientContext &context, Binder &binder, throw BinderException("Unable to find DuckPGQ Parse Data"); } -void duckpgq_find_match_function(TableRef* table_ref, DuckPGQState &duckpgq_state) { +void duckpgq_find_match_function(TableRef *table_ref, + DuckPGQState &duckpgq_state) { if (auto table_function_ref = dynamic_cast(table_ref)) { // Handle TableFunctionRef case - auto function = dynamic_cast(table_function_ref->function.get()); + auto function = + dynamic_cast(table_function_ref->function.get()); if (function->function_name == "duckpgq_match") { duckpgq_state.transform_expression = std::move(std::move(function->children[0])); @@ -126,16 +128,19 @@ void duckpgq_find_match_function(TableRef* table_ref, DuckPGQState &duckpgq_stat } } -ParserExtensionPlanResult duckpgq_handle_statement(SQLStatement *statement, DuckPGQState &duckpgq_state) { +ParserExtensionPlanResult +duckpgq_handle_statement(SQLStatement *statement, DuckPGQState &duckpgq_state) { if (statement->type == StatementType::SELECT_STATEMENT) { const auto select_statement = dynamic_cast(statement); - const auto select_node = dynamic_cast(select_statement->node.get()); + const auto select_node = + dynamic_cast(select_statement->node.get()); duckpgq_find_match_function(select_node->from_table.get(), duckpgq_state); throw Exception("use duckpgq_bind instead"); } if (statement->type == StatementType::CREATE_STATEMENT) { const auto &create_statement = statement->Cast(); - const auto create_property_graph = dynamic_cast(create_statement.info.get()); + const auto create_property_graph = + dynamic_cast(create_statement.info.get()); if (create_property_graph) { ParserExtensionPlanResult result; result.function = CreatePropertyGraphFunction(); @@ -143,7 +148,8 @@ ParserExtensionPlanResult duckpgq_handle_statement(SQLStatement *statement, Duck result.return_type = StatementReturnType::QUERY_RESULT; return result; } - const auto create_table = reinterpret_cast(create_statement.info.get()); + const auto create_table = + reinterpret_cast(create_statement.info.get()); duckpgq_handle_statement(create_table->query.get(), duckpgq_state); } if (statement->type == StatementType::DROP_STATEMENT) { @@ -155,22 +161,27 @@ ParserExtensionPlanResult duckpgq_handle_statement(SQLStatement *statement, Duck } if (statement->type == StatementType::EXPLAIN_STATEMENT) { auto &explain_statement = statement->Cast(); - // auto select_statement = dynamic_cast(explain_statement.stmt.get()); + // auto select_statement = + // dynamic_cast(explain_statement.stmt.get()); duckpgq_handle_statement(explain_statement.stmt.get(), duckpgq_state); } if (statement->type == StatementType::COPY_STATEMENT) { const auto ©_statement = statement->Cast(); - const auto select_node = dynamic_cast(copy_statement.select_statement.get()); + const auto select_node = + dynamic_cast(copy_statement.select_statement.get()); duckpgq_find_match_function(select_node->from_table.get(), duckpgq_state); throw Exception("use duckpgq_bind instead"); } if (statement->type == StatementType::INSERT_STATEMENT) { const auto &insert_statement = statement->Cast(); - duckpgq_handle_statement(insert_statement.select_statement.get(), duckpgq_state); + duckpgq_handle_statement(insert_statement.select_statement.get(), + duckpgq_state); } - // Preferably throw NotImplementedExpection here, but only BinderExceptions are caught properly on MacOS right now - throw BinderException("%s has not been implemented yet for DuckPGQ queries", StatementTypeToString(statement->type)); + // Preferably throw NotImplementedExpection here, but only BinderExceptions + // are caught properly on MacOS right now + throw BinderException("%s has not been implemented yet for DuckPGQ queries", + StatementTypeToString(statement->type)); } ParserExtensionPlanResult @@ -195,8 +206,6 @@ duckpgq_plan(ParserExtensionInfo *, ClientContext &context, auto statement = duckpgq_parse_data->statement.get(); return duckpgq_handle_statement(statement, *duckpgq_state); - - } std::string DuckpgqExtension::Name() { return "duckpgq"; } From 3c8b832b144ce97c8c391859ca9ad626ffc6fb34 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 8 Feb 2024 15:02:29 +0100 Subject: [PATCH 5/7] Changing gitmodule to point to main duckdb-pgq branch --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 2eae8b8d..69c0ab5a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,7 @@ [submodule "duckdb-pgq"] path = duckdb-pgq url = git@github.com:cwida/duckdb-pgq.git - branch = master + branch = main [submodule "duckdb"] path = duckdb url = git@github.com:duckdb/duckdb.git From 06bf42725a0c11240808444a5106f23cd26c4e56 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 8 Feb 2024 15:16:27 +0100 Subject: [PATCH 6/7] updating duckdb --- duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb b/duckdb index da64a739..3c695d7b 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit da64a73967a339c079d1f1c4f7b46b6d15f0a20f +Subproject commit 3c695d7ba94d95d9facee48d395f46ed0bd72b46 From 624160754fd503b606d8b634fe82dce01e564ada Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20ten=20Wolde?= Date: Thu, 8 Feb 2024 15:19:43 +0100 Subject: [PATCH 7/7] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 37f57be5..661d5a6d 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This repository is currently a research project and a work in progress. Feel fre ## Getting started To get started, first clone this repository: ```sh -git clone --recurse-submodules https://github.com/cwida/duckpgq-extension.git +git clone --recurse-submodules git@github.com:cwida/duckpgq-extension.git ``` Note that `--recurse-submodules` will ensure the correct version of DuckDB is pulled allowing you to get started right away.