Skip to content

Commit

Permalink
Merge pull request #110 from cwida/107-improve-slow-undirected-edge-p…
Browse files Browse the repository at this point in the history
…atterns

Improve performance on any directed edges
  • Loading branch information
Dtenwolde authored Mar 22, 2024
2 parents 4212f63 + 6abad54 commit 67c2cc1
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 51 deletions.
2 changes: 1 addition & 1 deletion duckdb-pgq
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

#pragma once
#include "duckdb/function/table_function.hpp"
#include "duckdb/parser/statement/create_statement.hpp"
#include "duckdb/parser/parsed_data/create_property_graph_info.hpp"
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"

Expand Down Expand Up @@ -42,12 +41,12 @@ class CreatePropertyGraphFunction : public TableFunction {
CheckPropertyGraphTableColumns(const shared_ptr<PropertyGraphTable> &pg_table,
TableCatalogEntry &table);

static duckdb::unique_ptr<FunctionData>
static unique_ptr<FunctionData>
CreatePropertyGraphBind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types,
vector<string> &names);

static duckdb::unique_ptr<GlobalTableFunctionState>
static unique_ptr<GlobalTableFunctionState>
CreatePropertyGraphInit(ClientContext &context,
TableFunctionInitInput &input);

Expand Down
9 changes: 5 additions & 4 deletions duckpgq/include/duckpgq/functions/tablefunctions/match.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ struct PGQMatchFunction : public TableFunction {
const string &edge_binding,
const string &prev_binding,
const string &next_binding,
vector<unique_ptr<ParsedExpression>> &conditions);
vector<unique_ptr<ParsedExpression>> &conditions,
unique_ptr<TableRef> &from_clause);

static void EdgeTypeLeft(const shared_ptr<PropertyGraphTable> &edge_table,
const string &next_table_name,
Expand Down Expand Up @@ -120,15 +121,15 @@ struct PGQMatchFunction : public TableFunction {
const SubPath *subpath);

static void
AddEdgeJoins(const unique_ptr<SelectNode> &select_node,
const shared_ptr<PropertyGraphTable> &edge_table,
AddEdgeJoins(const shared_ptr<PropertyGraphTable> &edge_table,
const shared_ptr<PropertyGraphTable> &previous_vertex_table,
const shared_ptr<PropertyGraphTable> &next_vertex_table,
PGQMatchType edge_type, const string &edge_binding,
const string &prev_binding, const string &next_binding,
vector<unique_ptr<ParsedExpression>> &conditions,
unordered_map<string, string> &alias_map,
int32_t &extra_alias_counter);
int32_t &extra_alias_counter,
unique_ptr<TableRef> &from_clause);

static void ProcessPathList(
vector<unique_ptr<PathReference>> &path_pattern,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "duckpgq/functions/tablefunctions/create_property_graph.hpp"
#include "duckdb/parser/statement/create_statement.hpp"
#include <duckpgq_extension.hpp>

namespace duckdb {
Expand Down Expand Up @@ -66,7 +67,7 @@ CreatePropertyGraphFunction::CreatePropertyGraphBind(
throw Exception(ExceptionType::INVALID,
"Registered DuckPGQ state not found");
}
const auto duckpgq_state = (DuckPGQState *)lookup->second.get();
const auto duckpgq_state = dynamic_cast<DuckPGQState *>(lookup->second.get());
const auto duckpgq_parse_data =
dynamic_cast<DuckPGQParseData *>(duckpgq_state->parse_data.get());

Expand Down
114 changes: 75 additions & 39 deletions duckpgq/src/duckpgq/functions/tablefunctions/match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
#include "duckdb/parser/expression/constant_expression.hpp"
#include "duckdb/parser/expression/comparison_expression.hpp"
#include "duckdb/parser/expression/conjunction_expression.hpp"
#include "duckdb/parser/expression/star_expression.hpp"

#include "duckdb/parser/query_node/set_operation_node.hpp"

#include "duckdb/parser/query_node/select_node.hpp"

Expand All @@ -21,9 +24,8 @@

#include "duckdb/parser/property_graph_table.hpp"
#include "duckdb/parser/subpath_element.hpp"

#include <iostream>
#include <cmath>
#include <duckdb/common/enums/set_operation_type.hpp>

namespace duckdb {
shared_ptr<PropertyGraphTable>
Expand Down Expand Up @@ -364,7 +366,58 @@ void PGQMatchFunction::EdgeTypeAny(
const shared_ptr<PropertyGraphTable> &edge_table,
const string &edge_binding, const string &prev_binding,
const string &next_binding,
vector<unique_ptr<ParsedExpression>> &conditions) {
vector<unique_ptr<ParsedExpression>> &conditions,
unique_ptr<TableRef> &from_clause) {

// START SELECT src, dst, * from edge_table
auto src_dst_select_node = make_uniq<SelectNode>();

auto edge_left_ref = make_uniq<BaseTableRef>();
edge_left_ref->table_name = edge_table->table_name;
src_dst_select_node->from_table = std::move(edge_left_ref);
auto src_dst_children = vector<unique_ptr<ParsedExpression>>();
src_dst_children.push_back(make_uniq<ColumnRefExpression>(edge_table->source_fk[0], edge_table->table_name));
src_dst_children.push_back(make_uniq<ColumnRefExpression>(edge_table->destination_fk[0], edge_table->table_name));
src_dst_children.push_back(make_uniq<StarExpression>());

src_dst_select_node->select_list = std::move(src_dst_children);
// END SELECT src, dst, * from edge_table

// START SELECT dst, src, * from edge_table
auto dst_src_select_node = make_uniq<SelectNode>();

auto edge_right_ref = make_uniq<BaseTableRef>();
edge_right_ref->table_name = edge_table->table_name;
auto dst_src_children = vector<unique_ptr<ParsedExpression>>();
dst_src_select_node->from_table = std::move(edge_right_ref);

dst_src_children.push_back(make_uniq<ColumnRefExpression>(
edge_table->destination_fk[0], edge_table->table_name));
dst_src_children.push_back(make_uniq<ColumnRefExpression>(edge_table->source_fk[0],
edge_table->table_name));
dst_src_children.push_back(make_uniq<StarExpression>());

dst_src_select_node->select_list = std::move(dst_src_children);
// END SELECT dst, src, * from edge_table

auto union_node = make_uniq<SetOperationNode>();
union_node->setop_type = SetOperationType::UNION;
union_node->setop_all = true;
union_node->left = std::move(src_dst_select_node);
union_node->right = std::move(dst_src_select_node);
auto union_select = make_uniq<SelectStatement>();
union_select->node = std::move(union_node);
// (SELECT src, dst, * from edge_table UNION ALL SELECT dst, src, * from edge_table)
auto union_subquery = make_uniq<SubqueryRef>(std::move(union_select));
union_subquery->alias = edge_binding;
if (from_clause) {
auto from_join = make_uniq<JoinRef>(JoinRefType::CROSS);
from_join->left = std::move(from_clause);
from_join->right = std::move(union_subquery);
from_clause = std::move(from_join);
} else {
from_clause = std::move(union_subquery);
}
// (a) src.key = edge.src
auto src_left_expr = CreateMatchJoinExpression(
edge_table->source_pk, edge_table->source_fk, prev_binding, edge_binding);
Expand All @@ -376,23 +429,8 @@ void PGQMatchFunction::EdgeTypeAny(
auto combined_left_expr = make_uniq<ConjunctionExpression>(
ExpressionType::CONJUNCTION_AND, std::move(src_left_expr),
std::move(dst_left_expr));
// (c) src.key = edge.dst
auto src_right_expr = CreateMatchJoinExpression(edge_table->source_pk,
edge_table->destination_fk,
prev_binding, edge_binding);
// (d) dst.key = edge.src
auto dst_right_expr = CreateMatchJoinExpression(edge_table->destination_pk,
edge_table->source_fk,
next_binding, edge_binding);
// (c) AND (d)
auto combined_right_expr = make_uniq<ConjunctionExpression>(
ExpressionType::CONJUNCTION_AND, std::move(src_right_expr),
std::move(dst_right_expr));
// ((a) AND (b)) OR ((c) AND (d))
auto combined_expr = make_uniq<ConjunctionExpression>(
ExpressionType::CONJUNCTION_OR, std::move(combined_left_expr),
std::move(combined_right_expr));
conditions.push_back(std::move(combined_expr));

conditions.push_back(std::move(combined_left_expr));
}

void PGQMatchFunction::EdgeTypeLeft(
Expand Down Expand Up @@ -592,20 +630,21 @@ unique_ptr<ParsedExpression> PGQMatchFunction::CreatePathFindingFunction(
return final_list;
}

void PGQMatchFunction::AddEdgeJoins(
const unique_ptr<SelectNode> &select_node,
const shared_ptr<PropertyGraphTable> &edge_table,
void PGQMatchFunction::AddEdgeJoins(const shared_ptr<PropertyGraphTable> &edge_table,
const shared_ptr<PropertyGraphTable> &previous_vertex_table,
const shared_ptr<PropertyGraphTable> &next_vertex_table,
PGQMatchType edge_type, const string &edge_binding,
const string &prev_binding, const string &next_binding,
vector<unique_ptr<ParsedExpression>> &conditions,
unordered_map<string, string> &alias_map, int32_t &extra_alias_counter) {
unordered_map<string, string> &alias_map, int32_t &extra_alias_counter,
unique_ptr<TableRef> &from_clause) {
if (edge_type != PGQMatchType::MATCH_EDGE_ANY) {
alias_map[edge_binding] = edge_table->table_name;
}
switch (edge_type) {
case PGQMatchType::MATCH_EDGE_ANY: {
select_node->modifiers.push_back(make_uniq<DistinctModifier>());
EdgeTypeAny(edge_table, edge_binding, prev_binding, next_binding,
conditions);
conditions, from_clause);
break;
}
case PGQMatchType::MATCH_EDGE_LEFT:
Expand Down Expand Up @@ -865,33 +904,31 @@ void PGQMatchFunction::ProcessPathList(
} else {
alias_map[edge_element->variable_binding] =
edge_table->source_reference;
AddEdgeJoins(select_node, edge_table, previous_vertex_table,
AddEdgeJoins(edge_table, previous_vertex_table,
next_vertex_table, edge_element->match_type,
edge_element->variable_binding,
previous_vertex_element->variable_binding,
next_vertex_element->variable_binding, conditions,
alias_map, extra_alias_counter);
alias_map, extra_alias_counter, from_clause);
}
} else {
// The edge element is a path element without WHERE or path-finding.
auto edge_table = FindGraphTable(edge_element->label, pg_table);
CheckInheritance(edge_table, edge_element, conditions);
// check aliases
alias_map[edge_element->variable_binding] = edge_table->table_name;
AddEdgeJoins(select_node, edge_table, previous_vertex_table,
AddEdgeJoins(edge_table, previous_vertex_table,
next_vertex_table, edge_element->match_type,
edge_element->variable_binding,
previous_vertex_element->variable_binding,
next_vertex_element->variable_binding, conditions, alias_map,
extra_alias_counter);
next_vertex_element->variable_binding, conditions,
alias_map, extra_alias_counter, from_clause);
// Check the edge type
// If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id
// If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id
// If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR
// (b.dst = a.id AND b.src
// = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND
// (b.dst = a.id AND b.src
//= c.id)
// If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id)
// FROM (src, dst, * from b UNION ALL dst, src, * from b)
// If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND
// (b.dst = a.id AND b.src = c.id)
}
previous_vertex_element = next_vertex_element;
previous_vertex_table = next_vertex_table;
Expand All @@ -900,7 +937,7 @@ void PGQMatchFunction::ProcessPathList(

unique_ptr<TableRef>
PGQMatchFunction::MatchBindReplace(ClientContext &context,
TableFunctionBindInput &) {
TableFunctionBindInput &bind_input) {
auto duckpgq_state_entry = context.registered_state.find("duckpgq");
auto duckpgq_state =
dynamic_cast<DuckPGQState *>(duckpgq_state_entry->second.get());
Expand Down Expand Up @@ -1005,7 +1042,6 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context,
subquery->node = std::move(select_node);

auto result = make_uniq<SubqueryRef>(std::move(subquery), ref->alias);

return std::move(result);
}
} // namespace duckdb
3 changes: 2 additions & 1 deletion duckpgq/src/duckpgq_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
#include "duckdb/parser/query_node/select_node.hpp"
#include "duckdb/parser/statement/copy_statement.hpp"
#include "duckdb/parser/statement/create_statement.hpp"
#include "duckdb/parser/parsed_data/create_table_info.hpp"
#include "duckdb/parser/tableref/joinref.hpp"

Expand Down Expand Up @@ -101,7 +102,7 @@ BoundStatement duckpgq_bind(ClientContext &context, Binder &binder,
}

auto duckpgq_state = (DuckPGQState *)lookup->second.get();
auto duckpgq_binder = Binder::CreateBinder(context);
auto duckpgq_binder = Binder::CreateBinder(context, &binder);
auto duckpgq_parse_data =
dynamic_cast<DuckPGQParseData *>(duckpgq_state->parse_data.get());
if (duckpgq_parse_data) {
Expand Down
3 changes: 2 additions & 1 deletion test/sql/create_pg/optional_edge_table_clause.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# name: test/sql/sqlpgq/snb.test
# name: test/sql/create_pg/optional_edge_table_clause.test
# group: [duckpgq]

require duckpgq
Expand All @@ -10,6 +10,7 @@ statement ok
-CREATE PROPERTY GRAPH snb
VERTEX TABLES (Message, person);


statement ok
-FROM GRAPH_TABLE (snb
MATCH (m:Message)
Expand Down
2 changes: 1 addition & 1 deletion test/sql/pattern-matching/basic_match.test
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ Peter Tavneet
Peter Gabor
Peter David

# Disabled, see https://github.com/cwida/duckpgq-extension/issues/60
query II
-SELECT study.a_name, study.b_name
FROM GRAPH_TABLE (pg
Expand All @@ -144,6 +143,7 @@ FROM GRAPH_TABLE (pg
ORDER BY a_name, b_name;
----
Peter Daniel
Peter Daniel
Peter David
Peter Gabor
Peter Tavneet
Expand Down
66 changes: 66 additions & 0 deletions test/sql/pattern-matching/undirected_edges.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# name: test/sql/sqlpgq/undirected_edges.test
# group: [duckpgq]

statement ok
pragma enable_verification

require duckpgq

statement ok
CREATE TABLE Student(id BIGINT, name VARCHAR);INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David');

statement ok
CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT);INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17), (4, 0, 18);

statement ok
-CREATE PROPERTY GRAPH pg
VERTEX TABLES (Student)
EDGE TABLES (
know SOURCE KEY ( src ) REFERENCES Student ( id )
DESTINATION KEY ( dst ) REFERENCES Student ( id )
);

query II
SELECT a.name AS person, b.name AS friend
FROM ((SELECT know.src, know.dst FROM know) UNION ALL (SELECT know.dst, know.src FROM know)) AS k , Student AS b , Student AS a
WHERE ((a.id = k.src) AND (b.id = k.dst) AND (a.name = 'Daniel'));
----
Daniel Tavneet
Daniel Gabor
Daniel Peter
Daniel David
Daniel Peter

# Daniel has 3 outgoing edges and 2 incoming edges, so there should be 5 tuples
query II
-SELECT person, friend
FROM GRAPH_TABLE (pg
MATCH
(a:Student)-[k:know]-(b:Student)
WHERE a.name = 'Daniel'
COLUMNS (a.name as person, b.name as friend)
)
ORDER BY person, friend;
----
Daniel David
Daniel Gabor
Daniel Peter
Daniel Peter
Daniel Tavneet

# Daniel has 3 outgoing edges and 2 incoming edges, so there should be 5 tuples
query III
-FROM GRAPH_TABLE (pg
MATCH
(a:Student)-[k:know]-(b:Student)
WHERE a.name = 'Daniel'
COLUMNS (a.name as person, b.name as friend, k.createDate as date)
)
ORDER BY person, friend, date;
----
Daniel David 18
Daniel Gabor 11
Daniel Peter 12
Daniel Peter 13
Daniel Tavneet 10

0 comments on commit 67c2cc1

Please sign in to comment.