Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compute shortest paths using Bellman-Ford algorithm. #4294

Draft
wants to merge 6 commits into
base: branch-25.02
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ set(CUGRAPH_SOURCES
src/traversal/bfs_sg.cu
src/traversal/bfs_mg.cu
src/traversal/sssp_sg.cu
src/traversal/bellman_ford_sg.cu
src/traversal/bellman_ford_mg.cu
src/traversal/od_shortest_distances_sg.cu
src/traversal/sssp_mg.cu
src/link_analysis/hits_sg.cu
Expand Down
36 changes: 36 additions & 0 deletions cpp/include/cugraph/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1170,6 +1170,42 @@ void sssp(raft::handle_t const& handle,
weight_t cutoff = std::numeric_limits<weight_t>::max(),
bool do_expensive_check = false);

/**
* @brief Run Bellman-Ford algorithm to compute the minimum distances (and predecessors) from
* the source vertex.
*
* This function computes the distances (minimum edge weight sums) from the source vertex. If @p
* predecessors is not `nullptr`, this function calculates the predecessor of each vertex in the
* shortest-path as well. Bellman-Ford algorithm works for negative edge weights as well. If the
* input graph has negative cycle(s), the algorithm return false.
*
* @throws cugraph::logic_error on erroneous input arguments.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
* or multi-GPU (true).
* @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator,
* and handles to various CUDA libraries) to run graph algorithms.
* @param[in] graph_view Graph view object.
* @param[in] edge_weight_view View object holding edge weights for @p graph_view.
* @param[in] source_vertex Source vertex to start single-source shortest-path.
* In a multi-gpu context the source vertex should be local to this GPU.
* @param[out] distances Pointer to the output distance array.
* @param[out] predecessors Pointer to the output predecessor array or `nullptr`.
* @return True if there is no negative cycle in input graph pointed by @p graph_view, and in
* that case @p distances and @p predecessors contain valid results.
* False otherwise, and in that case @p distances and @p predecessors contain invalid values.
*/
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
bool bellman_ford(raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
edge_property_view_t<edge_t, weight_t const*> edge_weight_view,
vertex_t source_vertex,
vertex_t* predecessors,
weight_t* distances);

/*
* @brief Compute the shortest distances from the given origins to all the given destinations.
*
Expand Down
234 changes: 234 additions & 0 deletions cpp/src/traversal/bellman_ford_impl.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "prims/fill_edge_property.cuh"
#include "prims/reduce_op.cuh"
#include "prims/transform_e.cuh"
#include "prims/transform_reduce_e_by_src_dst_key.cuh"
#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh"
#include "prims/update_edge_src_dst_property.cuh"
#include "prims/update_v_frontier.cuh"
#include "prims/vertex_frontier.cuh"

#include <cugraph/algorithms.hpp>
#include <cugraph/detail/shuffle_wrappers.hpp>
#include <cugraph/detail/utility_wrappers.hpp>

#include <raft/core/handle.hpp>

#include <thrust/fill.h>

namespace cugraph {

namespace detail {

template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
bool bellman_ford(raft::handle_t const& handle,
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
edge_property_view_t<edge_t, weight_t const*> edge_weight_view,
vertex_t source,
vertex_t* predecessors,
weight_t* distances)
{
using graph_view_t = cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu>;
graph_view_t current_graph_view(graph_view);

// edge mask
cugraph::edge_property_t<graph_view_t, bool> edge_masks_even(handle, current_graph_view);
cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even);

cugraph::edge_property_t<graph_view_t, bool> edge_masks_odd(handle, current_graph_view);
cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd);

cugraph::transform_e(
handle,
current_graph_view,
edge_src_dummy_property_t{}.view(),
edge_dst_dummy_property_t{}.view(),
cugraph::edge_dummy_property_t{}.view(),
[] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) {
return !(src == dst); // mask out self-loop
},
edge_masks_even.mutable_view());

current_graph_view.attach_edge_mask(edge_masks_even.view());

// 2. initialize distances and predecessors

auto constexpr invalid_distance = std::numeric_limits<weight_t>::max();
auto constexpr invalid_vertex = invalid_vertex_id<vertex_t>::value;

auto val_first = thrust::make_zip_iterator(thrust::make_tuple(distances, predecessors));
thrust::transform(
handle.get_thrust_policy(),
thrust::make_counting_iterator(current_graph_view.local_vertex_partition_range_first()),
thrust::make_counting_iterator(current_graph_view.local_vertex_partition_range_last()),
val_first,
[source] __device__(auto v) {
auto distance = invalid_distance;
if (v == source) { distance = weight_t{0.0}; }
return thrust::make_tuple(distance, invalid_vertex);
});

auto src_distance_cache =
graph_view_t::is_multi_gpu
? edge_src_property_t<graph_view_t, weight_t>(handle, current_graph_view)
: edge_src_property_t<graph_view_t, weight_t>(handle);

rmm::device_uvector<vertex_t> local_vertices(
current_graph_view.local_vertex_partition_range_size(), handle.get_stream());

detail::sequence_fill(handle.get_stream(),
local_vertices.begin(),
local_vertices.size(),
current_graph_view.local_vertex_partition_range_first());

constexpr size_t bucket_idx_curr = 0;
constexpr size_t bucket_idx_next = 1;
constexpr size_t num_buckets = 2;

vertex_frontier_t<vertex_t, void, graph_view_t::is_multi_gpu, true> vertex_frontier(handle,
num_buckets);

if (current_graph_view.in_local_vertex_partition_range_nocheck(source)) {
vertex_frontier.bucket(bucket_idx_curr).insert(source);
}

rmm::device_uvector<vertex_t> enqueue_counter(
current_graph_view.local_vertex_partition_range_size(), handle.get_stream());

thrust::fill(
handle.get_thrust_policy(), enqueue_counter.begin(), enqueue_counter.end(), vertex_t{0});

vertex_t nr_times_in_queue = 0;
while (true) {
if constexpr (graph_view_t::is_multi_gpu) {
cugraph::update_edge_src_property(handle,
current_graph_view,
vertex_frontier.bucket(bucket_idx_curr).begin(),
vertex_frontier.bucket(bucket_idx_curr).end(),
distances,
src_distance_cache);
}

auto [new_frontier_vertex_buffer, distance_predecessor_buffer] =
cugraph::transform_reduce_v_frontier_outgoing_e_by_dst(
handle,
current_graph_view,
vertex_frontier.bucket(bucket_idx_curr),
graph_view_t::is_multi_gpu
? src_distance_cache.view()
: detail::edge_major_property_view_t<vertex_t, weight_t const*>(distances),
edge_dst_dummy_property_t{}.view(),
edge_weight_view,
[distances,
v_first = current_graph_view.local_vertex_partition_range_first(),
v_last =
current_graph_view.local_vertex_partition_range_last()] __device__(auto src,
auto dst,
auto src_dist,
thrust::nullopt_t,
auto wt) {
assert(dst < v_first || dst >= v_last);

auto dst_dist = distances[dst - v_first];
auto relax = (dst_dist > (src_dist + wt));

return relax ? thrust::optional<thrust::tuple<weight_t, vertex_t>>{thrust::make_tuple(
src_dist + wt, src)}
: thrust::nullopt;
},
reduce_op::minimum<thrust::tuple<weight_t, vertex_t>>(),
true);
size_t nr_of_updated_vertices = new_frontier_vertex_buffer.size();

if (graph_view_t::is_multi_gpu) {
nr_of_updated_vertices = host_scalar_allreduce(
handle.get_comms(), nr_of_updated_vertices, raft::comms::op_t::SUM, handle.get_stream());
}

if (nr_of_updated_vertices == 0) { break; }

thrust::for_each(handle.get_thrust_policy(),
new_frontier_vertex_buffer.begin(),
new_frontier_vertex_buffer.end(),
[v_first = current_graph_view.local_vertex_partition_range_first(),
v_last = current_graph_view.local_vertex_partition_range_last(),
enqueue_counter = enqueue_counter.begin()] __device__(vertex_t v) {
assert(v < v_first || v >= v_last);
enqueue_counter[v - v_first] += 1;
});

nr_times_in_queue =
thrust::count_if(handle.get_thrust_policy(),
enqueue_counter.begin(),
enqueue_counter.end(),
[nr_vertices = current_graph_view.number_of_vertices()] __device__(
auto freq_v) { return freq_v >= nr_vertices; });

if (graph_view_t::is_multi_gpu) {
nr_times_in_queue = host_scalar_allreduce(
handle.get_comms(), nr_times_in_queue, raft::comms::op_t::SUM, handle.get_stream());
}

if (nr_times_in_queue > 0) { break; }

update_v_frontier(handle,
current_graph_view,
std::move(new_frontier_vertex_buffer),
std::move(distance_predecessor_buffer),
vertex_frontier,
std::vector<size_t>{bucket_idx_next},
distances,
thrust::make_zip_iterator(thrust::make_tuple(distances, predecessors)),
[] __device__(auto v, auto v_val, auto pushed_val) {
auto new_dist = thrust::get<0>(pushed_val);
auto update = (new_dist < v_val);
return thrust::make_tuple(
update ? thrust::optional<size_t>{bucket_idx_next} : thrust::nullopt,
update ? thrust::optional<thrust::tuple<weight_t, vertex_t>>{pushed_val}
: thrust::nullopt);
});

vertex_frontier.bucket(bucket_idx_curr).clear();
vertex_frontier.bucket(bucket_idx_curr).shrink_to_fit();

if (vertex_frontier.bucket(bucket_idx_next).aggregate_size() > 0) {
vertex_frontier.swap_buckets(bucket_idx_curr, bucket_idx_next);
} else {
break;
}
}

if (nr_times_in_queue > 0) { return false; }
return true;
}
} // namespace detail

template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
bool bellman_ford(raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
edge_property_view_t<edge_t, weight_t const*> edge_weight_view,
vertex_t source,
vertex_t* predecessors,
weight_t* distances)
{
return detail::bellman_ford(
handle, graph_view, edge_weight_view, source, predecessors, distances);
}

} // namespace cugraph
62 changes: 62 additions & 0 deletions cpp/src/traversal/bellman_ford_mg.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "bellman_ford_impl.cuh"

namespace cugraph {

template bool bellman_ford(raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
edge_property_view_t<int32_t, float const*> edge_weight_view,
int32_t source,
int32_t* predecessors,
float* distances);

template bool bellman_ford(raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
edge_property_view_t<int32_t, double const*> edge_weight_view,
int32_t source,
int32_t* predecessors,
double* distances);

template bool bellman_ford(raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, false, true> const& graph_view,
edge_property_view_t<int64_t, float const*> edge_weight_view,
int32_t source,
int32_t* predecessors,
float* distances);

template bool bellman_ford(raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
edge_property_view_t<int64_t, float const*> edge_weight_view,
int64_t source,
int64_t* predecessors,
float* distances);

template bool bellman_ford(raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, false, true> const& graph_view,
edge_property_view_t<int64_t, double const*> edge_weight_view,
int32_t source,
int32_t* predecessors,
double* distances);

template bool bellman_ford(raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
edge_property_view_t<int64_t, double const*> edge_weight_view,
int64_t source,
int64_t* predecessors,
double* distances);

} // namespace cugraph
Loading
Loading