generated from duckdb/extension-template
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
initial fix of issue 67, need to test further
- Loading branch information
1 parent
9b64863
commit cfe0649
Showing
3 changed files
with
267 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import kuzu | ||
import pandas as pd | ||
|
||
db = kuzu.Database('./test') | ||
conn = kuzu.Connection(db) | ||
|
||
# Drop the table if it exists: | ||
try: | ||
conn.execute("DROP TABLE knows") | ||
except: | ||
pass | ||
try: | ||
conn.execute("DROP TABLE Person") | ||
except: | ||
pass | ||
|
||
# Define the schema: | ||
conn.execute("CREATE NODE TABLE Person (creationDate TIMESTAMP, id INT64, firstName STRING, lastName STRING, gender STRING, birthday DATE, locationIP STRING, browserUsed STRING, LocationCityId INT64, speaks STRING, email STRING, PRIMARY KEY (id))") | ||
conn.execute("CREATE REL TABLE knows (FROM Person TO Person)") | ||
|
||
# Load the data: | ||
conn.execute("Copy Person FROM './test/person.csv'") | ||
conn.execute("Copy knows FROM './test/person_knows_person.csv'") | ||
|
||
# Calculate the shortest path between two people with bounded distance: | ||
MIN_DISTANCE = 0 | ||
MAX_DISTANCE = 30 | ||
results = pd.DataFrame() | ||
for low in range(MIN_DISTANCE, MAX_DISTANCE + 1): | ||
for high in range(low, MAX_DISTANCE + 1): | ||
result = conn.execute("MATCH (a:Person)-[e:knows*%d..%d]->(b:Person) RETURN a.id, b.id, length(e) AS distance ORDER BY distance ASC" % (low, high)).get_as_df() | ||
result = result.drop_duplicates(subset=['a.id', 'b.id'], keep='first') | ||
result['min_distance'] = low | ||
result['max_distance'] = high | ||
results = pd.concat([results, result], ignore_index=True) | ||
|
||
results.to_csv('./test/shortest_length_kuzu.csv', index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
# name: test/sql/sqlpgq/shortest_path_bound.test | ||
# group: [sqlpgq] | ||
|
||
statement ok | ||
pragma enable_verification | ||
|
||
require duckpgq | ||
|
||
# Graph to test regular shortest path bound | ||
# (0) -> (1) | ||
# ↓ ↑ | ||
# (2) -> (3) | ||
|
||
statement ok | ||
CREATE TABLE Point(id BIGINT); INSERT INTO Point VALUES (0), (1), (2), (3); | ||
|
||
statement ok | ||
CREATE TABLE know(src BIGINT, dst BIGINT); INSERT INTO know VALUES (0, 1), (0, 2), (2, 3), (3, 1); | ||
|
||
statement ok | ||
-CREATE PROPERTY GRAPH pg | ||
VERTEX TABLES ( | ||
Point PROPERTIES ( id ) LABEL Pnt | ||
) | ||
EDGE TABLES ( | ||
know SOURCE KEY ( src ) REFERENCES Point ( id ) | ||
DESTINATION KEY ( dst ) REFERENCES Point ( id ) | ||
LABEL Knows | ||
); | ||
|
||
query III | ||
WITH cte1 AS ( | ||
SELECT CREATE_CSR_EDGE( | ||
0, | ||
(SELECT count(a.id) FROM Point a), | ||
CAST ( | ||
(SELECT sum(CREATE_CSR_VERTEX( | ||
0, | ||
(SELECT count(a.id) FROM Point a), | ||
sub.dense_id, | ||
sub.cnt) | ||
) | ||
FROM ( | ||
SELECT a.rowid as dense_id, count(k.src) as cnt | ||
FROM Point a | ||
LEFT JOIN Know k ON k.src = a.id | ||
GROUP BY a.rowid) sub | ||
) | ||
AS BIGINT), | ||
a.rowid, | ||
c.rowid, | ||
k.rowid) as temp | ||
FROM Know k | ||
JOIN Point a on a.id = k.src | ||
JOIN Point c on c.id = k.dst | ||
) SELECT a.id as srd_id, b.id as dst_id, iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3) as path_length | ||
FROM Point a, Point b, (select count(cte1.temp) * 0 as temp from cte1) __x | ||
WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3); | ||
---- | ||
0 1 3 | ||
0 3 2 | ||
|
||
query III | ||
WITH cte1 AS ( | ||
SELECT CREATE_CSR_EDGE( | ||
0, | ||
(SELECT count(a.id) FROM Point a), | ||
CAST ( | ||
(SELECT sum(CREATE_CSR_VERTEX( | ||
0, | ||
(SELECT count(a.id) FROM Point a), | ||
sub.dense_id, | ||
sub.cnt) | ||
) | ||
FROM ( | ||
SELECT a.rowid as dense_id, count(k.src) as cnt | ||
FROM Point a | ||
LEFT JOIN Know k ON k.src = a.id | ||
GROUP BY a.rowid) sub | ||
) | ||
AS BIGINT), | ||
a.rowid, | ||
c.rowid, | ||
k.rowid) as temp | ||
FROM Know k | ||
JOIN Point a on a.id = k.src | ||
JOIN Point c on c.id = k.dst | ||
) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 1, 3) as path_length | ||
FROM Point a, Point b, (select count(cte1.temp) * 0 as temp from cte1) __x | ||
WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 1, 3); | ||
---- | ||
0 1 1 | ||
0 2 1 | ||
0 3 2 | ||
|
||
# Graph to test shortest path bound with a cycle | ||
# (0) --> (1) <-> (2) | ||
# 0 to 1 is 1 hop | ||
|
||
statement ok | ||
CREATE TABLE Point2(id BIGINT); INSERT INTO Point2 VALUES (0), (1), (2); | ||
|
||
statement ok | ||
CREATE TABLE know2(src BIGINT, dst BIGINT); INSERT INTO know2 VALUES (0, 1), (1, 2), (2, 1); | ||
|
||
statement ok | ||
-CREATE PROPERTY GRAPH pg2 | ||
VERTEX TABLES ( | ||
Point2 PROPERTIES ( id ) LABEL Pnt2 | ||
) | ||
EDGE TABLES ( | ||
know2 SOURCE KEY ( src ) REFERENCES Point2 ( id ) | ||
DESTINATION KEY ( dst ) REFERENCES Point2 ( id ) | ||
LABEL Knows2 | ||
); | ||
|
||
query III | ||
WITH cte1 AS ( | ||
SELECT CREATE_CSR_EDGE( | ||
0, | ||
(SELECT count(a.id) FROM Point2 a), | ||
CAST ( | ||
(SELECT sum(CREATE_CSR_VERTEX( | ||
0, | ||
(SELECT count(a.id) FROM Point2 a), | ||
sub.dense_id, | ||
sub.cnt) | ||
) | ||
FROM ( | ||
SELECT a.rowid as dense_id, count(k.src) as cnt | ||
FROM Point2 a | ||
LEFT JOIN know2 k ON k.src = a.id | ||
GROUP BY a.rowid) sub | ||
) | ||
AS BIGINT), | ||
a.rowid, | ||
c.rowid, | ||
k.rowid) as temp | ||
FROM know2 k | ||
JOIN Point2 a on a.id = k.src | ||
JOIN Point2 c on c.id = k.dst | ||
) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 3) as path_length | ||
FROM Point2 a, Point2 b, (select count(cte1.temp) * 0 as temp from cte1) __x | ||
WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 3); | ||
---- | ||
0 2 2 | ||
|
||
|
||
# Graph to test shortest path bound with a cycle | ||
# (1) <- (0) <-> (2) | ||
# 0 to 1 is 1 hop | ||
|
||
statement ok | ||
CREATE TABLE Point3(id BIGINT); INSERT INTO Point3 VALUES (0), (1), (2); | ||
|
||
statement ok | ||
CREATE TABLE know3(src BIGINT, dst BIGINT); INSERT INTO know3 VALUES (0, 1), (0, 2), (2, 0); | ||
|
||
statement ok | ||
-CREATE PROPERTY GRAPH pg3 | ||
VERTEX TABLES ( | ||
Point3 PROPERTIES ( id ) LABEL Pnt | ||
) | ||
EDGE TABLES ( | ||
know3 SOURCE KEY ( src ) REFERENCES Point3 ( id ) | ||
DESTINATION KEY ( dst ) REFERENCES Point3 ( id ) | ||
LABEL Knows | ||
); | ||
|
||
query III | ||
WITH cte1 AS ( | ||
SELECT CREATE_CSR_EDGE( | ||
0, | ||
(SELECT count(a.id) FROM Point3 a), | ||
CAST ( | ||
(SELECT sum(CREATE_CSR_VERTEX( | ||
0, | ||
(SELECT count(a.id) FROM Point3 a), | ||
sub.dense_id, | ||
sub.cnt) | ||
) | ||
FROM ( | ||
SELECT a.rowid as dense_id, count(k.src) as cnt | ||
FROM Point3 a | ||
LEFT JOIN know3 k ON k.src = a.id | ||
GROUP BY a.rowid) sub | ||
) | ||
AS BIGINT), | ||
a.rowid, | ||
c.rowid, | ||
k.rowid) as temp | ||
FROM know3 k | ||
JOIN Point3 a on a.id = k.src | ||
JOIN Point3 c on c.id = k.dst | ||
) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3) as path_length | ||
FROM Point3 a, Point3 b, (select count(cte1.temp) * 0 as temp from cte1) __x | ||
WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3); | ||
---- | ||
|