From 40530e346a323188e74c4a141667cf043ce4472e Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Tue, 27 Feb 2024 11:43:21 +0100 Subject: [PATCH] implement path reconstruction --- .../scalar/iterativelength_lowerbound.cpp | 20 ++++----- .../scalar/iterativelength_two_phase.cpp | 21 +++++----- .../scalar/shortest_path_two_phase.cpp | 41 ++++++++----------- .../sql/path-finding/shortest_path_bound.test | 40 ++++++++++++++---- 4 files changed, 71 insertions(+), 51 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp index f23dfeb7..76a421dc 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp @@ -185,15 +185,15 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, duckpgq_state->csr_to_delete.insert(info.csr_id); } -CreateScalarFunctionInfo -DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { - auto fun = ScalarFunction( - "iterativelength_lowerbound", - {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, - LogicalType::BIGINT, IterativeLengthLowerBoundFunction, - IterativeLengthFunctionData::IterativeLengthBind); - return CreateScalarFunctionInfo(fun); -} +// CreateScalarFunctionInfo +// DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { +// auto fun = ScalarFunction( +// "iterativelength_lowerbound", +// {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, +// LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, +// LogicalType::BIGINT, IterativeLengthLowerBoundFunction, +// IterativeLengthFunctionData::IterativeLengthBind); +// return CreateScalarFunctionInfo(fun); +// } } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp index e270f5d5..a697fbcc 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp @@ -163,7 +163,6 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & auto result_data = FlatVector::GetData(result); // create temp SIMD arrays - vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); @@ -178,7 +177,6 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & // empty visit vectors for (auto i = 0; i < v_size; i++) { - seen[i] = 0; visit1[i] = 0; } @@ -236,14 +234,15 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & duckpgq_state->csr_to_delete.insert(info.csr_id); } -// CreateScalarFunctionInfo DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { -// auto fun = ScalarFunction("iterativelength_lowerbound", -// {LogicalType::INTEGER, LogicalType::BIGINT, -// LogicalType::BIGINT, LogicalType::BIGINT, -// LogicalType::BIGINT, LogicalType::BIGINT}, -// LogicalType::BIGINT, IterativeLengthLowerBoundFunction, -// IterativeLengthFunctionData::IterativeLengthBind); -// return CreateScalarFunctionInfo(fun); -// } +CreateScalarFunctionInfo +DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { + auto fun = ScalarFunction( + "iterativelength_lowerbound", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::BIGINT, IterativeLengthLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); + return CreateScalarFunctionInfo(fun); +} } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp index 93adb3d7..8b515bcd 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp @@ -15,13 +15,11 @@ static bool IterativeLengthPhaseOne(int64_t v_size, int64_t *V, vector int64_t iter, vector &edge_ids, vector>> &paths_v, vector>> &paths_e, - vector> &seen, vector> &visit, vector> &next) { bool change = false; for (auto v = 0; v < v_size; v++) { next[v] = 0; - seen[v] = 0; } //! Keep track of edge id through which the node was reached for (auto v = 0; v < v_size; v++) { @@ -41,7 +39,6 @@ static bool IterativeLengthPhaseOne(int64_t v_size, int64_t *V, vector } for (auto v = 0; v < v_size; v++) { - seen[v] = seen[v] | next[v]; change |= next[v].any(); } return change; @@ -208,7 +205,6 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta ValidityMask &result_validity = FlatVector::Validity(result); // create temp SIMD arrays - vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); @@ -230,7 +226,6 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta // empty visit vectors for (auto i = 0; i < v_size; i++) { - seen[i] = 0; visit1[i] = 0; } @@ -252,7 +247,7 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta int64_t iter = 1; for (; iter < lower_bound; iter++) { - IterativeLengthPhaseOne(v_size, v, e, iter, edge_ids, paths_v, paths_e, seen, + IterativeLengthPhaseOne(v_size, v, e, iter, edge_ids, paths_v, paths_e, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1); } @@ -260,7 +255,6 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { auto search_num = lane_to_num[lane]; if (search_num >= 0) { - int64_t src_pos = vdata_src.sel->get_index(search_num); int64_t dst_pos = vdata_dst.sel->get_index(search_num); auto phase_two_result = ShortestPathInternal(lane, v_size, dst_data[dst_pos], upper_bound - lower_bound + 1, v, e, edge_ids, (iter & 1) ? visit1 : visit2); @@ -270,17 +264,17 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta vector output_vector; // construct the path of phase one if (paths_v[phase_two_src][lane].size() > 0) { - auto parent_vertex = paths_v[phase_two_src][lane][lower_bound - 1]; - auto parent_edge = paths_e[phase_two_src][lane][lower_bound - 1]; + auto iterations = lower_bound - 1; + auto parent_vertex = paths_v[phase_two_src][lane][iterations]; + auto parent_edge = paths_e[phase_two_src][lane][iterations]; - output_vector.push_back(parent_edge); - while (parent_vertex != src_data[src_pos]) { - output_vector.push_back(parent_vertex); - parent_edge = paths_e[parent_vertex][lane][lower_bound - 1]; - parent_vertex = paths_v[parent_vertex][lane][lower_bound - 1]; + while (iterations > 0) { output_vector.push_back(parent_edge); + output_vector.push_back(parent_vertex); + iterations--; + parent_edge = paths_e[parent_vertex][lane][iterations]; + parent_vertex = paths_v[parent_vertex][lane][iterations]; } - output_vector.push_back(src_data[src_pos]); std::reverse(output_vector.begin(), output_vector.end()); } @@ -320,14 +314,15 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta duckpgq_state->csr_to_delete.insert(info.csr_id); } -CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathLowerBoundFunction() { - auto fun = ScalarFunction("shortestpath_lowerbound", - {LogicalType::INTEGER, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT}, - LogicalType::LIST(LogicalType::BIGINT), - ShortestPathLowerBoundFunction, - IterativeLengthFunctionData::IterativeLengthBind); +CreateScalarFunctionInfo +DuckPGQFunctions::GetShortestPathLowerBoundFunction() { + auto fun = ScalarFunction( + "shortestpath_lowerbound", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::LIST(LogicalType::BIGINT), + ShortestPathLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); return CreateScalarFunctionInfo(fun); } diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index 3d57a5c0..da12c592 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -206,7 +206,6 @@ WITH cte1 AS ( # Graph to test shortest path bound with a cycle # (1) <- (0) <-> (2) -# 0 to 1 is 1 hop statement ok CREATE TABLE Point4(id BIGINT); INSERT INTO Point4 VALUES (0), (1), (2); @@ -240,9 +239,37 @@ query III 2 1 [2, 2, 0, 0, 1] 2 2 [2, 2, 0, 1, 2] +query IIIII +-FROM GRAPH_TABLE (pg4 + MATCH + p = ANY SHORTEST (a:Point4)-[k:know4]->{1,5}(b:Point4) + COLUMNS (a.id as id1, b.id as id2, element_id(p) as elements, vertices(p) as vertices, path_length(p) as length) + ) tmp + order by tmp.id1, tmp.id2; +---- +0 0 [0, 1, 2, 2, 0] [0, 2, 0] 2 +0 1 [0, 0, 1] [0, 1] 1 +0 2 [0, 1, 2] [0, 2] 1 +2 0 [2, 2, 0] [2, 0] 1 +2 1 [2, 2, 0, 0, 1] [2, 0, 1] 2 +2 2 [2, 2, 0, 1, 2] [2, 0, 2] 2 + +query IIIII +-FROM GRAPH_TABLE (pg4 + MATCH + p = ANY SHORTEST (a:Point4)-[k:know4]->{10,15}(b:Point4) + COLUMNS (a.id as id1, b.id as id2, element_id(p) as elements, vertices(p) as vertices, path_length(p) as length) + ) tmp + order by tmp.id1, tmp.id2; +---- +0 0 [0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0] [0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0] 10 +0 1 [0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1] [0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1] 11 +0 2 [0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2] [0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2] 11 +2 0 [2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0] [2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0] 11 +2 1 [2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1] [2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1] 10 +2 2 [2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2] [2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2] 10 # Description: Test algorithm's capability to ignore isolated nodes. -# Graph Structure: # (0) <-> (2), (1), (3) statement ok @@ -275,8 +302,7 @@ query III 2 2 [2, 1, 0, 0, 2] # Description: Test shortest paths in a graph with cycles. -# Graph Structure: -# (0) <-> (2) <-> (3) -> (1) (selfloop) +# (0) <-> (2) <-> (3) -> (1) ⮌ # ↑ | # └----------------------┘ @@ -309,8 +335,8 @@ query III 0 1 [0, 0, 2, 2, 3, 4, 1] 0 2 [0, 0, 2, 1, 0, 0, 2] 0 3 [0, 0, 2, 2, 3] -1 0 [1, 5, 0, 0, 2, 1, 0] -1 1 [1, 5, 0, 0, 2, 2, 3, 4, 1] +1 0 [1, 6, 1, 5, 0] +1 1 [1, 6, 1, 6, 1] 1 2 [1, 5, 0, 0, 2] 1 3 [1, 5, 0, 0, 2, 2, 3] 2 0 [2, 1, 0, 0, 2, 1, 0] @@ -318,7 +344,7 @@ query III 2 2 [2, 1, 0, 0, 2] 2 3 [2, 1, 0, 0, 2, 2, 3] 3 0 [3, 4, 1, 5, 0] -3 1 [3, 3, 2, 2, 3, 4, 1] +3 1 [3, 4, 1, 6, 1] 3 2 [3, 4, 1, 5, 0, 0, 2] 3 3 [3, 3, 2, 2, 3]