Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

67 shortest path bounded #98

Closed
wants to merge 44 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
cfe0649
initial fix of issue 67, need to test further
SiberiaWolfP Feb 8, 2024
1f21138
Merge branch 'main' into 67-shortest-path-bounded
SiberiaWolfP Feb 8, 2024
cb66073
Fix the bug of not being able to find longer paths
SiberiaWolfP Feb 13, 2024
49494a0
Previous commit causes unable to find shortest path
SiberiaWolfP Feb 13, 2024
c90456d
The path returns -1 internally if it does not exist, but should not e…
SiberiaWolfP Feb 13, 2024
d7a7a8a
Fix getting stuck in a dead loop in a cyclic graph
SiberiaWolfP Feb 13, 2024
f1076c0
Adding upper and lower bounds to the shortestpath function
SiberiaWolfP Feb 16, 2024
fd52d46
Merge branch 'main' into 67-shortest-path-bounded
SiberiaWolfP Feb 16, 2024
7708373
Fix bug: path always is [0]
SiberiaWolfP Feb 16, 2024
db60bdf
Code clean
SiberiaWolfP Feb 16, 2024
470ad31
Fix bug: stop too early
SiberiaWolfP Feb 18, 2024
9ef7574
clean match.cpp
SiberiaWolfP Feb 18, 2024
bf84fe3
Bug fix: src == dst search result is always 0
SiberiaWolfP Feb 20, 2024
fbc9303
Merge remote-tracking branch 'origin/main' into 67-shortest-path-bounded
SiberiaWolfP Feb 20, 2024
79b7495
If the lower bound is not greater than 1, the high performance algori…
SiberiaWolfP Feb 20, 2024
90c6085
Keep the original algorithm consistent
SiberiaWolfP Feb 20, 2024
0341634
Format fix
Dtenwolde Feb 21, 2024
04e8e97
Comment
Dtenwolde Feb 21, 2024
29a17bc
Adding extra test case
Dtenwolde Feb 21, 2024
eb7678e
Trigger lowerbound function when lowerbound > 0
Dtenwolde Feb 21, 2024
03558fe
comment
Dtenwolde Feb 21, 2024
c6f0edb
Revert "Trigger lowerbound function when lowerbound > 0"
Dtenwolde Feb 21, 2024
ed4f5ef
Trigger lowerbound function when lowerbound > 0
Dtenwolde Feb 21, 2024
9f913b4
Remove unused dst_pos
Dtenwolde Feb 21, 2024
d39776f
Remove test
Dtenwolde Feb 21, 2024
e17f417
Add column
Dtenwolde Feb 21, 2024
195ff27
Remove test file
Dtenwolde Feb 21, 2024
34559be
Add correct test results
Dtenwolde Feb 21, 2024
a05525d
Add results
Dtenwolde Feb 21, 2024
d61e335
Add correct resutl
Dtenwolde Feb 23, 2024
1ccf97c
Remove condition
Dtenwolde Feb 23, 2024
aa8dfa4
Remove condition
Dtenwolde Feb 23, 2024
81ca5f9
Implement new idea
SiberiaWolfP Feb 26, 2024
737ab85
Merge branch '67-shortest-path-bounded' of https://github.com/cwida/d…
SiberiaWolfP Feb 26, 2024
d3011f4
Merge remote-tracking branch 'origin/main' into 67-shortest-path-bounded
SiberiaWolfP Feb 26, 2024
40530e3
implement path reconstruction
SiberiaWolfP Feb 27, 2024
49c957f
Updating the path record of the old algorithm
SiberiaWolfP Feb 28, 2024
ded74bd
Register all shortest path functions for testing
SiberiaWolfP Feb 29, 2024
9741705
fix build fail
SiberiaWolfP Feb 29, 2024
ac54a7d
Prevents completed lanes from being added to the calculation
SiberiaWolfP Feb 29, 2024
a3fa824
Further optimization
SiberiaWolfP Mar 4, 2024
e6ab7f1
make funtion template
SiberiaWolfP Mar 4, 2024
a1c6833
Optimizing path logging with one-dimensional array
SiberiaWolfP Mar 6, 2024
e307c6c
fix bug: index out of range
SiberiaWolfP Mar 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ duckdb_unittest_tempdir/
testext
test/python/__pycache__/
.Rhistory
.vscode
8 changes: 8 additions & 0 deletions duckpgq/include/duckpgq/duckpgq_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,12 @@ class DuckPGQFunctions {
functions.push_back(GetCsrEdgeFunction());
functions.push_back(GetCheapestPathLengthFunction());
functions.push_back(GetShortestPathFunction());
functions.push_back(GetShortestPathLowerBoundFunction());
functions.push_back(GetShortestPathTwoPhaseFunction());
functions.push_back(GetReachabilityFunction());
functions.push_back(GetIterativeLengthFunction());
functions.push_back(GetIterativeLengthLowerBoundFunction());
functions.push_back(GetIterativeLengthTwoPhaseFunction());
functions.push_back(GetIterativeLengthBidirectionalFunction());
functions.push_back(GetIterativeLength2Function());
functions.push_back(GetDeleteCsrFunction());
Expand Down Expand Up @@ -57,8 +61,12 @@ class DuckPGQFunctions {
static CreateScalarFunctionInfo GetCsrEdgeFunction();
static CreateScalarFunctionInfo GetCheapestPathLengthFunction();
static CreateScalarFunctionInfo GetShortestPathFunction();
static CreateScalarFunctionInfo GetShortestPathLowerBoundFunction();
static CreateScalarFunctionInfo GetShortestPathTwoPhaseFunction();
static CreateScalarFunctionInfo GetReachabilityFunction();
static CreateScalarFunctionInfo GetIterativeLengthFunction();
static CreateScalarFunctionInfo GetIterativeLengthLowerBoundFunction();
static CreateScalarFunctionInfo GetIterativeLengthTwoPhaseFunction();
static CreateScalarFunctionInfo GetIterativeLengthBidirectionalFunction();
static CreateScalarFunctionInfo GetIterativeLength2Function();
static CreateScalarFunctionInfo GetDeleteCsrFunction();
Expand Down
4 changes: 4 additions & 0 deletions duckpgq/src/duckpgq/functions/scalar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@ set(EXTENSION_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/csr_deletion.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csr_get_w_type.cpp
${CMAKE_CURRENT_SOURCE_DIR}/iterativelength.cpp
${CMAKE_CURRENT_SOURCE_DIR}/iterativelength_lowerbound.cpp
${CMAKE_CURRENT_SOURCE_DIR}/iterativelength_two_phase.cpp
${CMAKE_CURRENT_SOURCE_DIR}/iterativelength2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/iterativelength_bidirectional.cpp
${CMAKE_CURRENT_SOURCE_DIR}/reachability.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shortest_path.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shortest_path_lowerbound.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shortest_path_two_phase.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csr_creation.cpp
PARENT_SCOPE
)
9 changes: 6 additions & 3 deletions duckpgq/src/duckpgq/functions/scalar/csr_creation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ static void CsrInitializeVertex(DuckPGQState &context, int32_t id,
csr->initialized_v = true;
context.csr_list[id] = std::move(csr);
} catch (std::bad_alloc const &) {
throw Exception(ExceptionType::INTERNAL, "Unable to initialize vector of size for csr vertex table "
throw Exception(ExceptionType::INTERNAL,
"Unable to initialize vector of size for csr vertex table "
"representation");
}

Expand All @@ -55,7 +56,8 @@ static void CsrInitializeEdge(DuckPGQState &context, int32_t id, int64_t v_size,
csr_entry->second->e.resize(e_size, 0);
csr_entry->second->edge_ids.resize(e_size, 0);
} catch (std::bad_alloc const &) {
throw Exception(ExceptionType::INTERNAL, "Unable to initialize vector of size for csr edge table "
throw Exception(ExceptionType::INTERNAL,
"Unable to initialize vector of size for csr edge table "
"representation");
}
for (auto i = 1; i < v_size + 2; i++) {
Expand All @@ -82,7 +84,8 @@ static void CsrInitializeWeight(DuckPGQState &context, int32_t id,
throw NotImplementedException("Unrecognized weight type detected.");
}
} catch (std::bad_alloc const &) {
throw Exception(ExceptionType::INTERNAL, "Unable to initialize vector of size for csr weight table "
throw Exception(ExceptionType::INTERNAL,
"Unable to initialize vector of size for csr weight table "
"representation");
}

Expand Down
15 changes: 11 additions & 4 deletions duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state,
auto src_data = (int64_t *)vdata_src.data;
auto dst_data = (int64_t *)vdata_dst.data;

// get lowerbound and upperbound
auto &upper = args.data[5];
UnifiedVectorFormat vdata_upper_bound;
upper.ToUnifiedFormat(args.size(), vdata_upper_bound);
auto upper_bound = ((int64_t *)vdata_upper_bound.data)[0];

ValidityMask &result_validity = FlatVector::Validity(result);

// create result vector
Expand Down Expand Up @@ -110,11 +116,11 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state,
int64_t dst_pos = vdata_dst.sel->get_index(search_num);
if (!vdata_src.validity.RowIsValid(src_pos)) {
result_validity.SetInvalid(search_num);
result_data[search_num] = (uint64_t)-1; /* no path */
result_data[search_num] = (int64_t)-1; /* no path */
} else if (src_data[src_pos] == dst_data[dst_pos]) {
result_data[search_num] =
(uint64_t)0; // path of length 0 does not require a search
result_data[search_num] = (int64_t)0; /* source == destination, length is 0 */
} else {
result_data[search_num] = (int64_t)-1; /* initialize to no path */
visit1[src_data[src_pos]][lane] = true;
lane_to_num[lane] = search_num; // active lane
active++;
Expand All @@ -124,7 +130,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state,
}

// make passes while a lane is still active
for (int64_t iter = 1; active; iter++) {
for (int64_t iter = 1; active && iter <= upper_bound; iter++) {
if (!IterativeLength(v_size, v, e, seen, (iter & 1) ? visit1 : visit2,
(iter & 1) ? visit2 : visit1)) {
break;
Expand Down Expand Up @@ -160,6 +166,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state,
CreateScalarFunctionInfo DuckPGQFunctions::GetIterativeLengthFunction() {
auto fun = ScalarFunction("iterativelength",
{LogicalType::INTEGER, LogicalType::BIGINT,
LogicalType::BIGINT, LogicalType::BIGINT,
LogicalType::BIGINT, LogicalType::BIGINT},
LogicalType::BIGINT, IterativeLengthFunction,
IterativeLengthFunctionData::IterativeLengthBind);
Expand Down
181 changes: 181 additions & 0 deletions duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#include <duckpgq_extension.hpp>
#include "duckdb/main/client_data.hpp"
#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
#include "duckdb/planner/expression/bound_function_expression.hpp"
#include "duckpgq/common.hpp"
#include "duckpgq/duckpgq_functions.hpp"

namespace duckdb {

static bool IterativeLengthLowerBound(int64_t v_size, int64_t *v, vector<int64_t> &e,
vector<std::bitset<LANE_LIMIT>> &seen,
vector<std::bitset<LANE_LIMIT>> &visit,
vector<std::bitset<LANE_LIMIT>> &next) {
bool change = false;
for (auto i = 0; i < v_size; i++) {
next[i] = 0;
seen[i] = 0;
}
for (auto i = 0; i < v_size; i++) {
if (visit[i].any()) {
for (auto offset = v[i]; offset < v[i + 1]; offset++) {
auto n = e[offset];
next[n] = next[n] | visit[i];
}
}
}
for (auto i = 0; i < v_size; i++) {
seen[i] = seen[i] | next[i];
change |= next[i].any();
}

return change;
}

static void IterativeLengthLowerBoundFunction(DataChunk &args,
ExpressionState &state,
Vector &result) {
auto &func_expr = (BoundFunctionExpression &)state.expr;
auto &info = (IterativeLengthFunctionData &)*func_expr.bind_info;
auto duckpgq_state_entry = info.context.registered_state.find("duckpgq");
if (duckpgq_state_entry == info.context.registered_state.end()) {
//! Wondering how you can get here if the extension wasn't loaded, but
//! leaving this check in anyways
throw MissingExtensionException(
"The DuckPGQ extension has not been loaded");
}
auto duckpgq_state =
reinterpret_cast<DuckPGQState *>(duckpgq_state_entry->second.get());

D_ASSERT(duckpgq_state->csr_list[info.csr_id]);

if ((uint64_t)info.csr_id + 1 > duckpgq_state->csr_list.size()) {
throw ConstraintException("Invalid ID");
}
auto csr_entry = duckpgq_state->csr_list.find((uint64_t)info.csr_id);
if (csr_entry == duckpgq_state->csr_list.end()) {
throw ConstraintException(
"Need to initialize CSR before doing shortest path");
}

if (!(csr_entry->second->initialized_v && csr_entry->second->initialized_e)) {
throw ConstraintException(
"Need to initialize CSR before doing shortest path");
}
int64_t v_size = args.data[1].GetValue(0).GetValue<int64_t>();
int64_t *v = (int64_t *)duckpgq_state->csr_list[info.csr_id]->v;
vector<int64_t> &e = duckpgq_state->csr_list[info.csr_id]->e;

// get src and dst vectors for searches
auto &src = args.data[2];
auto &dst = args.data[3];
UnifiedVectorFormat vdata_src;
UnifiedVectorFormat vdata_dst;
src.ToUnifiedFormat(args.size(), vdata_src);
dst.ToUnifiedFormat(args.size(), vdata_dst);
auto src_data = (int64_t *)vdata_src.data;
auto dst_data = (int64_t *)vdata_dst.data;

// get lowerbound and upperbound
auto &lower = args.data[4];
auto &upper = args.data[5];
UnifiedVectorFormat vdata_lower_bound;
UnifiedVectorFormat vdata_upper_bound;
lower.ToUnifiedFormat(args.size(), vdata_lower_bound);
upper.ToUnifiedFormat(args.size(), vdata_upper_bound);
auto lower_bound = ((int64_t *)vdata_lower_bound.data)[0];
auto upper_bound = ((int64_t *)vdata_upper_bound.data)[0];

ValidityMask &result_validity = FlatVector::Validity(result);

// create result vector
result.SetVectorType(VectorType::FLAT_VECTOR);
auto result_data = FlatVector::GetData<int64_t>(result);

// create temp SIMD arrays
vector<std::bitset<LANE_LIMIT>> seen(v_size);
vector<std::bitset<LANE_LIMIT>> visit1(v_size);
vector<std::bitset<LANE_LIMIT>> visit2(v_size);

// maps lane to search number
short lane_to_num[LANE_LIMIT];
for (int64_t lane = 0; lane < LANE_LIMIT; lane++) {
lane_to_num[lane] = -1; // inactive
}

idx_t started_searches = 0;
while (started_searches < args.size()) {

// empty visit vectors
for (auto i = 0; i < v_size; i++) {
seen[i] = 0;
visit1[i] = 0;
}

// add search jobs to free lanes
for (int64_t lane = 0; lane < LANE_LIMIT; lane++) {
lane_to_num[lane] = -1;
while (started_searches < args.size()) {
int64_t search_num = started_searches++;
int64_t src_pos = vdata_src.sel->get_index(search_num);
if (!vdata_src.validity.RowIsValid(src_pos)) { // NULL value
result_validity.SetInvalid(search_num);
result_data[search_num] = (int64_t)-1; /* no path */
} else {
result_data[search_num] = (int64_t)-1; /* initialize to no path */
visit1[src_data[src_pos]][lane] = true;
lane_to_num[lane] = search_num; // active lane
break;
}
}
}

// make passes while a lane is still active
for (int64_t iter = 1; iter <= upper_bound; iter++) {
if (!IterativeLengthLowerBound(v_size, v, e, seen,
(iter & 1) ? visit1 : visit2,
(iter & 1) ? visit2 : visit1)) {
break;
}
if (iter < lower_bound) {
continue;
}
// detect lanes that finished
for (int64_t lane = 0; lane < LANE_LIMIT; lane++) {
int64_t search_num = lane_to_num[lane];
if (search_num >= 0) { // active lane
int64_t dst_pos = vdata_dst.sel->get_index(search_num);
if (seen[dst_data[dst_pos]][lane]) {
result_data[search_num] =
iter; /* found at iter => iter = path length */
lane_to_num[lane] = -1; // mark inactive
}
}
}
}

// no changes anymore: any still active searches have no path
for (int64_t lane = 0; lane < LANE_LIMIT; lane++) {
int64_t search_num = lane_to_num[lane];
if (search_num >= 0) { // active lane
result_validity.SetInvalid(search_num);
result_data[search_num] = (int64_t)-1; /* no path */
lane_to_num[lane] = -1; // mark inactive
}
}
}
duckpgq_state->csr_to_delete.insert(info.csr_id);
}

CreateScalarFunctionInfo
DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() {
auto fun = ScalarFunction(
"iterativelength_lowerbound",
{LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT,
LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT},
LogicalType::BIGINT, IterativeLengthLowerBoundFunction,
IterativeLengthFunctionData::IterativeLengthBind);
return CreateScalarFunctionInfo(fun);
}

} // namespace duckdb
Loading
Loading