From d07af7d7555e9b17064475f17337594681fbbd4f Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Mon, 6 Nov 2023 17:37:43 -0800 Subject: [PATCH 1/6] propose C API changes --- cpp/include/cugraph_c/graph.h | 160 ++++++++++++++++++++++++++++++++-- 1 file changed, 154 insertions(+), 6 deletions(-) diff --git a/cpp/include/cugraph_c/graph.h b/cpp/include/cugraph_c/graph.h index e910d8b1244..176db2d377e 100644 --- a/cpp/include/cugraph_c/graph.h +++ b/cpp/include/cugraph_c/graph.h @@ -35,10 +35,11 @@ typedef struct { bool_t is_multigraph; } cugraph_graph_properties_t; -// FIXME: Add support for specifying isolated vertices /** * @brief Construct an SG graph * + * @deprecated This API will be deleted, use cugraph_graph_create_sg instead + * * @param [in] handle Handle for accessing resources * @param [in] properties Properties of the constructed graph * @param [in] src Device array containing the source vertex ids. @@ -76,9 +77,55 @@ cugraph_error_code_t cugraph_sg_graph_create( cugraph_graph_t** graph, cugraph_error_t** error); +/** + * @brief Construct an SG graph + * + * @param [in] handle Handle for accessing resources + * @param [in] properties Properties of the constructed graph + * @param [in] vertices Optional device array containing a list of vertex ids + * (specify NULL if we should create vertex ids from the + * unique contents of @p src and @p dst) + * @param [in] src Device array containing the source vertex ids. + * @param [in] dst Device array containing the destination vertex ids + * @param [in] weights Device array containing the edge weights. Note that an unweighted + * graph can be created by passing weights == NULL. + * @param [in] edge_ids Device array containing the edge ids for each edge. Optional + argument that can be NULL if edge ids are not used. + * @param [in] edge_type_ids Device array containing the edge types for each edge. Optional + argument that can be NULL if edge types are not used. + * @param [in] store_transposed If true create the graph initially in transposed format + * @param [in] renumber If true, renumber vertices to make an efficient data structure. + * If false, do not renumber. Renumbering is required if the vertices are not sequential + * integer values from 0 to num_vertices. + * @param [in] do_expensive_check If true, do expensive checks to validate the input data + * is consistent with software assumptions. If false bypass these checks. + * @param [in] properties Properties of the graph + * @param [out] graph A pointer to the graph object + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * + * @return error code + */ +cugraph_error_code_t cugraph_graph_create_sg( + const cugraph_resource_handle_t* handle, + const cugraph_graph_properties_t* properties, + const cugraph_type_erased_device_array_view_t* vertices, + const cugraph_type_erased_device_array_view_t* src, + const cugraph_type_erased_device_array_view_t* dst, + const cugraph_type_erased_device_array_view_t* weights, + const cugraph_type_erased_device_array_view_t* edge_ids, + const cugraph_type_erased_device_array_view_t* edge_type_ids, + bool_t store_transposed, + bool_t renumber, + bool_t do_expensive_check, + cugraph_graph_t** graph, + cugraph_error_t** error); + /** * @brief Construct an SG graph from a CSR input * + * @deprecated This API will be deleted, use cugraph_graph_create_sg_from_csr instead + * * @param [in] handle Handle for accessing resources * @param [in] properties Properties of the constructed graph * @param [in] offsets Device array containing the CSR offsets array @@ -116,19 +163,67 @@ cugraph_error_code_t cugraph_sg_graph_create_from_csr( cugraph_graph_t** graph, cugraph_error_t** error); +/** + * @brief Construct an SG graph from a CSR input + * + * @param [in] handle Handle for accessing resources + * @param [in] properties Properties of the constructed graph + * @param [in] offsets Device array containing the CSR offsets array + * @param [in] indices Device array containing the destination vertex ids + * @param [in] weights Device array containing the edge weights. Note that an unweighted + * graph can be created by passing weights == NULL. + * @param [in] edge_ids Device array containing the edge ids for each edge. Optional + argument that can be NULL if edge ids are not used. + * @param [in] edge_type_ids Device array containing the edge types for each edge. Optional + argument that can be NULL if edge types are not used. + * @param [in] store_transposed If true create the graph initially in transposed format + * @param [in] renumber If true, renumber vertices to make an efficient data structure. + * If false, do not renumber. Renumbering is required if the vertices are not sequential + * integer values from 0 to num_vertices. + * @param [in] do_expensive_check If true, do expensive checks to validate the input data + * is consistent with software assumptions. If false bypass these checks. + * @param [in] properties Properties of the graph + * @param [out] graph A pointer to the graph object + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * + * @return error code + */ +cugraph_error_code_t cugraph_graph_create_sg_from_csr( + const cugraph_resource_handle_t* handle, + const cugraph_graph_properties_t* properties, + const cugraph_type_erased_device_array_view_t* offsets, + const cugraph_type_erased_device_array_view_t* indices, + const cugraph_type_erased_device_array_view_t* weights, + const cugraph_type_erased_device_array_view_t* edge_ids, + const cugraph_type_erased_device_array_view_t* edge_type_ids, + bool_t store_transposed, + bool_t renumber, + bool_t do_expensive_check, + cugraph_graph_t** graph, + cugraph_error_t** error); + +/** + * @brief Destroy an graph + * + * @param [in] graph A pointer to the graph object to destroy + */ +void cugraph_graph_free(cugraph_graph_t* graph); + /** * @brief Destroy an SG graph * + * @deprecated This API will be deleted, use cugraph_graph_free instead + * * @param [in] graph A pointer to the graph object to destroy */ -// FIXME: This should probably just be cugraph_graph_free -// but didn't want to confuse with original cugraph_free_graph void cugraph_sg_graph_free(cugraph_graph_t* graph); -// FIXME: Add support for specifying isolated vertices /** * @brief Construct an MG graph * + * @deprecated This API will be deleted, use cugraph_graph_create_mg instead + * * @param [in] handle Handle for accessing resources * @param [in] properties Properties of the constructed graph * @param [in] src Device array containing the source vertex ids @@ -165,13 +260,66 @@ cugraph_error_code_t cugraph_mg_graph_create( cugraph_graph_t** graph, cugraph_error_t** error); +/** + * @brief Construct an MG graph + * + * @param [in] handle Handle for accessing resources + * @param [in] properties Properties of the constructed graph + * @param [in] vertices List of device arrays containing the unique vertex ids. + * If NULL we will construct this internally using the unique + * entries specified in src and dst + * All entries in this list will be concatenated on this GPU + * into a single array. + * @param [in] src List of device array containing the source vertex ids + * All entries in this list will be concatenated on this GPU + * into a single array. + * @param [in] dst List of device array containing the destination vertex ids + * All entries in this list will be concatenated on this GPU + * into a single array. + * @param [in] weights List of device array containing the edge weights. Note that an + * unweighted graph can be created by passing weights == NULL. If a weighted graph is to be + * created, the weights device array should be created on each rank, but the pointer can be NULL and + * the size 0 if there are no inputs provided by this rank All entries in this list will be + * concatenated on this GPU into a single array. + * @param [in] edge_ids List of device array containing the edge ids for each edge. Optional + * argument that can be NULL if edge ids are not used. + * All entries in this list will be concatenated on this GPU + * into a single array. + * @param [in] edge_type_ids List of device array containing the edge types for each edge. + * Optional argument that can be NULL if edge types are not used. All entries in this list will be + * concatenated on this GPU into a single array. + * @param [in] store_transposed If true create the graph initially in transposed format + * @param [in] num_arrays The number of arrays specified in @p vertices, @p src, @p dst, @p + * weights, @p edge_ids and @p edge_type_ids + * @param [in] do_expensive_check If true, do expensive checks to validate the input data + * is consistent with software assumptions. If false bypass these checks. + * @param [out] graph A pointer to the graph object + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_graph_create_mg( + const cugraph_resource_handle_t* handle, + const cugraph_graph_properties_t* properties, + const cugraph_type_erased_device_array_view_t** vertices, + const cugraph_type_erased_device_array_view_t** src, + const cugraph_type_erased_device_array_view_t** dst, + const cugraph_type_erased_device_array_view_t** weights, + const cugraph_type_erased_device_array_view_t** edge_ids, + const cugraph_type_erased_device_array_view_t** edge_type_ids, + bool_t store_transposed, + size_t num_arrays, + bool_t do_expensive_check, + cugraph_graph_t** graph, + cugraph_error_t** error); + /** * @brief Destroy an MG graph * + * @deprecated This API will be deleted, use cugraph_graph_free instead + * * @param [in] graph A pointer to the graph object to destroy */ -// FIXME: This should probably just be cugraph_graph_free -// but didn't want to confuse with original cugraph_free_graph void cugraph_mg_graph_free(cugraph_graph_t* graph); /** From c66a50e85b7a85a3af8a58a5be835bad36bcd464 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Mon, 13 Nov 2023 15:10:27 -0800 Subject: [PATCH 2/6] new graph creation implementation in C API --- cpp/include/cugraph_c/graph.h | 4 - cpp/src/c_api/graph_mg.cpp | 448 ++++++++++++++----------- cpp/src/c_api/graph_sg.cpp | 106 ++++-- cpp/tests/c_api/create_graph_test.c | 317 ++++++++++++++++- cpp/tests/c_api/mg_create_graph_test.c | 12 +- 5 files changed, 661 insertions(+), 226 deletions(-) diff --git a/cpp/include/cugraph_c/graph.h b/cpp/include/cugraph_c/graph.h index 176db2d377e..e431d2d8c90 100644 --- a/cpp/include/cugraph_c/graph.h +++ b/cpp/include/cugraph_c/graph.h @@ -56,7 +56,6 @@ typedef struct { * integer values from 0 to num_vertices. * @param [in] do_expensive_check If true, do expensive checks to validate the input data * is consistent with software assumptions. If false bypass these checks. - * @param [in] properties Properties of the graph * @param [out] graph A pointer to the graph object * @param [out] error Pointer to an error object storing details of any error. Will * be populated if error code is not CUGRAPH_SUCCESS @@ -99,7 +98,6 @@ cugraph_error_code_t cugraph_sg_graph_create( * integer values from 0 to num_vertices. * @param [in] do_expensive_check If true, do expensive checks to validate the input data * is consistent with software assumptions. If false bypass these checks. - * @param [in] properties Properties of the graph * @param [out] graph A pointer to the graph object * @param [out] error Pointer to an error object storing details of any error. Will * be populated if error code is not CUGRAPH_SUCCESS @@ -142,7 +140,6 @@ cugraph_error_code_t cugraph_graph_create_sg( * integer values from 0 to num_vertices. * @param [in] do_expensive_check If true, do expensive checks to validate the input data * is consistent with software assumptions. If false bypass these checks. - * @param [in] properties Properties of the graph * @param [out] graph A pointer to the graph object * @param [out] error Pointer to an error object storing details of any error. Will * be populated if error code is not CUGRAPH_SUCCESS @@ -182,7 +179,6 @@ cugraph_error_code_t cugraph_sg_graph_create_from_csr( * integer values from 0 to num_vertices. * @param [in] do_expensive_check If true, do expensive checks to validate the input data * is consistent with software assumptions. If false bypass these checks. - * @param [in] properties Properties of the graph * @param [out] graph A pointer to the graph object * @param [out] error Pointer to an error object storing details of any error. Will * be populated if error code is not CUGRAPH_SUCCESS diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp index f50c7c08fb6..56a3d611469 100644 --- a/cpp/src/c_api/graph_mg.cpp +++ b/cpp/src/c_api/graph_mg.cpp @@ -31,40 +31,79 @@ namespace { +template +rmm::device_uvector concatenate( + raft::handle_t const& handle, + cugraph::c_api::cugraph_type_erased_device_array_view_t const** values, + size_t num_arrays) +{ + size_t num_values = std::transform_reduce( + values, values + num_arrays, size_t{0}, std::plus{}, [](auto p) { return p->size_; }); + + rmm::device_uvector results(num_values, handle.get_stream()); + size_t concat_pos{0}; + + for (size_t i = 0; i < num_arrays; ++i) { + raft::copy(results.data() + concat_pos, + values[i]->as_type(), + values[i]->size_, + handle.get_stream()); + concat_pos += values[i]->size_; + } + + return results; +} + struct create_graph_functor : public cugraph::c_api::abstract_functor { raft::handle_t const& handle_; cugraph_graph_properties_t const* properties_; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* src_; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* dst_; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* weights_; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_ids_; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_type_ids_; + cugraph_data_type_id_t vertex_type_; + cugraph_data_type_id_t edge_type_; + cugraph_data_type_id_t weight_type_; + cugraph_data_type_id_t edge_type_id_type_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const** vertices_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const** src_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const** dst_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const** weights_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const** edge_ids_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const** edge_type_ids_; + size_t num_arrays_; bool_t renumber_; bool_t check_; - cugraph_data_type_id_t edge_type_; cugraph::c_api::cugraph_graph_t* result_{}; - create_graph_functor(raft::handle_t const& handle, - cugraph_graph_properties_t const* properties, - cugraph::c_api::cugraph_type_erased_device_array_view_t const* src, - cugraph::c_api::cugraph_type_erased_device_array_view_t const* dst, - cugraph::c_api::cugraph_type_erased_device_array_view_t const* weights, - cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_ids, - cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_type_ids, - bool_t renumber, - bool_t check, - cugraph_data_type_id_t edge_type) + create_graph_functor( + raft::handle_t const& handle, + cugraph_graph_properties_t const* properties, + cugraph_data_type_id_t vertex_type, + cugraph_data_type_id_t edge_type, + cugraph_data_type_id_t weight_type, + cugraph_data_type_id_t edge_type_id_type, + cugraph::c_api::cugraph_type_erased_device_array_view_t const** vertices, + cugraph::c_api::cugraph_type_erased_device_array_view_t const** src, + cugraph::c_api::cugraph_type_erased_device_array_view_t const** dst, + cugraph::c_api::cugraph_type_erased_device_array_view_t const** weights, + cugraph::c_api::cugraph_type_erased_device_array_view_t const** edge_ids, + cugraph::c_api::cugraph_type_erased_device_array_view_t const** edge_type_ids, + size_t num_arrays, + bool_t renumber, + bool_t check) : abstract_functor(), properties_(properties), + vertex_type_(vertex_type), + edge_type_(edge_type), + weight_type_(weight_type), + edge_type_id_type_(edge_type_id_type), handle_(handle), + vertices_(vertices), src_(src), dst_(dst), weights_(weights), edge_ids_(edge_ids), edge_type_ids_(edge_type_ids), + num_arrays_(num_arrays), renumber_(renumber), - check_(check), - edge_type_(edge_type) + check_(check) { } @@ -96,49 +135,27 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { edge_type_id_t>> new_edge_types{std::nullopt}; - rmm::device_uvector edgelist_srcs(src_->size_, handle_.get_stream()); - rmm::device_uvector edgelist_dsts(dst_->size_, handle_.get_stream()); + std::optional> vertex_list = + vertices_ ? std::make_optional(concatenate(handle_, vertices_, num_arrays_)) + : std::nullopt; - raft::copy( - edgelist_srcs.data(), src_->as_type(), src_->size_, handle_.get_stream()); - raft::copy( - edgelist_dsts.data(), dst_->as_type(), dst_->size_, handle_.get_stream()); + rmm::device_uvector edgelist_srcs = + concatenate(handle_, src_, num_arrays_); + rmm::device_uvector edgelist_dsts = + concatenate(handle_, dst_, num_arrays_); std::optional> edgelist_weights = - weights_ - ? std::make_optional(rmm::device_uvector(weights_->size_, handle_.get_stream())) - : std::nullopt; - - if (edgelist_weights) { - raft::copy(edgelist_weights->data(), - weights_->as_type(), - weights_->size_, - handle_.get_stream()); - } + weights_ ? std::make_optional(concatenate(handle_, weights_, num_arrays_)) + : std::nullopt; std::optional> edgelist_edge_ids = - edge_ids_ - ? std::make_optional(rmm::device_uvector(edge_ids_->size_, handle_.get_stream())) - : std::nullopt; - - if (edgelist_edge_ids) { - raft::copy(edgelist_edge_ids->data(), - edge_ids_->as_type(), - edge_ids_->size_, - handle_.get_stream()); - } + edge_ids_ ? std::make_optional(concatenate(handle_, edge_ids_, num_arrays_)) + : std::nullopt; std::optional> edgelist_edge_types = - edge_type_ids_ ? std::make_optional(rmm::device_uvector( - edge_type_ids_->size_, handle_.get_stream())) - : std::nullopt; - - if (edgelist_edge_types) { - raft::copy(edgelist_edge_types->data(), - edge_type_ids_->as_type(), - edge_type_ids_->size_, - handle_.get_stream()); - } + edge_type_ids_ + ? std::make_optional(concatenate(handle_, edge_type_ids_, num_arrays_)) + : std::nullopt; std::tie(store_transposed ? edgelist_dsts : edgelist_srcs, store_transposed ? edgelist_srcs : edgelist_dsts, @@ -153,6 +170,11 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { std::move(edgelist_edge_ids), std::move(edgelist_edge_types)); + if (vertex_list) { + vertex_list = cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( + handle_, std::move(*vertex_list)); + } + auto graph = new cugraph::graph_t(handle_); rmm::device_uvector* number_map = @@ -179,7 +201,7 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { store_transposed, multi_gpu>( handle_, - std::nullopt, + std::move(vertex_list), std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_weights), @@ -204,89 +226,36 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { if (new_edge_types) { *edge_types = std::move(new_edge_types.value()); } // Set up return - auto result = new cugraph::c_api::cugraph_graph_t{ - src_->type_, - edge_type_, - weights_ ? weights_->type_ : cugraph_data_type_id_t::FLOAT32, - edge_type_ids_ ? edge_type_ids_->type_ : cugraph_data_type_id_t::INT32, - store_transposed, - multi_gpu, - graph, - number_map, - new_edge_weights ? edge_weights : nullptr, - new_edge_ids ? edge_ids : nullptr, - new_edge_types ? edge_types : nullptr}; + auto result = new cugraph::c_api::cugraph_graph_t{vertex_type_, + edge_type_, + weight_type_, + edge_type_id_type_, + store_transposed, + multi_gpu, + graph, + number_map, + new_edge_weights ? edge_weights : nullptr, + new_edge_ids ? edge_ids : nullptr, + new_edge_types ? edge_types : nullptr}; result_ = reinterpret_cast(result); } } }; -struct destroy_graph_functor : public cugraph::c_api::abstract_functor { - void* graph_; - void* number_map_; - void* edge_weights_; - void* edge_ids_; - void* edge_types_; - - destroy_graph_functor( - void* graph, void* number_map, void* edge_weights, void* edge_ids, void* edge_types) - : abstract_functor(), - graph_(graph), - number_map_(number_map), - edge_weights_(edge_weights), - edge_ids_(edge_ids), - edge_types_(edge_types) - { - } - - template - void operator()() - { - auto internal_graph_pointer = - reinterpret_cast*>(graph_); - - delete internal_graph_pointer; - - auto internal_number_map_pointer = - reinterpret_cast*>(number_map_); - - delete internal_number_map_pointer; - - auto internal_edge_weight_pointer = reinterpret_cast< - cugraph::edge_property_t, - weight_t>*>(edge_weights_); - if (internal_edge_weight_pointer) { delete internal_edge_weight_pointer; } - - auto internal_edge_id_pointer = reinterpret_cast< - cugraph::edge_property_t, - edge_t>*>(edge_ids_); - if (internal_edge_id_pointer) { delete internal_edge_id_pointer; } - - auto internal_edge_type_pointer = reinterpret_cast< - cugraph::edge_property_t, - edge_type_id_t>*>(edge_types_); - if (internal_edge_type_pointer) { delete internal_edge_type_pointer; } - } -}; - } // namespace -extern "C" cugraph_error_code_t cugraph_mg_graph_create( +extern "C" cugraph_error_code_t cugraph_graph_create_mg( const cugraph_resource_handle_t* handle, const cugraph_graph_properties_t* properties, - const cugraph_type_erased_device_array_view_t* src, - const cugraph_type_erased_device_array_view_t* dst, - const cugraph_type_erased_device_array_view_t* weights, - const cugraph_type_erased_device_array_view_t* edge_ids, - const cugraph_type_erased_device_array_view_t* edge_type_ids, + const cugraph_type_erased_device_array_view_t** vertices, + const cugraph_type_erased_device_array_view_t** src, + const cugraph_type_erased_device_array_view_t** dst, + const cugraph_type_erased_device_array_view_t** weights, + const cugraph_type_erased_device_array_view_t** edge_ids, + const cugraph_type_erased_device_array_view_t** edge_type_ids, bool_t store_transposed, - size_t num_edges, + size_t num_arrays, bool_t check, cugraph_graph_t** graph, cugraph_error_t** error) @@ -298,87 +267,173 @@ extern "C" cugraph_error_code_t cugraph_mg_graph_create( *error = nullptr; auto p_handle = reinterpret_cast(handle); + auto p_vertices = + reinterpret_cast(vertices); auto p_src = - reinterpret_cast(src); + reinterpret_cast(src); auto p_dst = - reinterpret_cast(dst); + reinterpret_cast(dst); auto p_weights = - reinterpret_cast(weights); + reinterpret_cast(weights); auto p_edge_ids = - reinterpret_cast(edge_ids); + reinterpret_cast(edge_ids); auto p_edge_type_ids = - reinterpret_cast(edge_type_ids); + reinterpret_cast( + edge_type_ids); - CAPI_EXPECTS(p_src->size_ == p_dst->size_, - CUGRAPH_INVALID_INPUT, - "Invalid input arguments: src size != dst size.", - *error); - CAPI_EXPECTS(p_src->type_ == p_dst->type_, - CUGRAPH_INVALID_INPUT, - "Invalid input arguments: src type != dst type.", - *error); + size_t local_num_edges{0}; - CAPI_EXPECTS((weights == nullptr) || (p_weights->size_ == p_src->size_), - CUGRAPH_INVALID_INPUT, - "Invalid input arguments: src size != weights size.", - *error); + cugraph_data_type_id_t vertex_type{cugraph_data_type_id_t::NTYPES}; + cugraph_data_type_id_t weight_type{cugraph_data_type_id_t::NTYPES}; + + for (size_t i = 0; i < num_arrays; ++i) { + CAPI_EXPECTS(p_src[i]->size_ == p_dst[i]->size_, + CUGRAPH_INVALID_INPUT, + "Invalid input arguments: src size != dst size.", + *error); + + CAPI_EXPECTS(p_src[i]->type_ == p_dst[i]->type_, + CUGRAPH_INVALID_INPUT, + "Invalid input arguments: src type != dst type.", + *error); + + CAPI_EXPECTS((p_vertices == nullptr) || (p_src[i]->type_ == p_vertices[i]->type_), + CUGRAPH_INVALID_INPUT, + "Invalid input arguments: src type != vertices type.", + *error); + + CAPI_EXPECTS((weights == nullptr) || (p_weights[i]->size_ == p_src[i]->size_), + CUGRAPH_INVALID_INPUT, + "Invalid input arguments: src size != weights size.", + *error); + + local_num_edges += p_src[i]->size_; + + if (vertex_type == cugraph_data_type_id_t::NTYPES) vertex_type = p_src[i]->type_; + + if (weights != nullptr) { + if (weight_type == cugraph_data_type_id_t::NTYPES) weight_type = p_weights[i]->type_; + } + + CAPI_EXPECTS(p_src[i]->type_ == vertex_type, + CUGRAPH_INVALID_INPUT, + "Invalid input arguments: all vertex types must match", + *error); + + CAPI_EXPECTS(p_weights[i]->type_ == weight_type, + CUGRAPH_INVALID_INPUT, + "Invalid input arguments: all weight types must match", + *error); + } + + size_t num_edges = cugraph::host_scalar_allreduce(p_handle->handle_->get_comms(), + local_num_edges, + raft::comms::op_t::SUM, + p_handle->handle_->get_stream()); + + // FIXME: Need to handle the case where a GPU gets a NULL pointer but other GPUs + // Get values. Override vertex_type/weight_type/edge_id_type but don't + // fail. + + auto vertex_types = cugraph::host_scalar_allgather( + p_handle->handle_->get_comms(), static_cast(vertex_type), p_handle->handle_->get_stream()); + + auto weight_types = cugraph::host_scalar_allgather( + p_handle->handle_->get_comms(), static_cast(weight_type), p_handle->handle_->get_stream()); + + CAPI_EXPECTS( + std::count_if(vertex_types.begin(), + vertex_types.end(), + [vertex_type](auto t) { return vertex_type != static_cast(t); }) == 0, + CUGRAPH_INVALID_INPUT, + "different vertex type used on different GPUs", + *error); + + CAPI_EXPECTS( + std::count_if(weight_types.begin(), + weight_types.end(), + [weight_type](auto t) { return weight_type != static_cast(t); }) == 0, + CUGRAPH_INVALID_INPUT, + "different weight type used on different GPUs", + *error); cugraph_data_type_id_t edge_type; - cugraph_data_type_id_t weight_type; if (num_edges < int32_threshold) { - edge_type = p_src->type_; + edge_type = static_cast(vertex_types[0]); } else { edge_type = cugraph_data_type_id_t::INT64; } - if (weights != nullptr) { - weight_type = p_weights->type_; - } else { + if (weight_type == cugraph_data_type_id_t::NTYPES) { weight_type = cugraph_data_type_id_t::FLOAT32; } - CAPI_EXPECTS((edge_ids == nullptr) || (p_edge_ids->type_ == edge_type), - CUGRAPH_INVALID_INPUT, - "Invalid input arguments: Edge id type must match edge type", - *error); + cugraph_data_type_id_t edge_type_id_type{cugraph_data_type_id_t::NTYPES}; - CAPI_EXPECTS((edge_ids == nullptr) || (p_edge_ids->size_ == p_src->size_), - CUGRAPH_INVALID_INPUT, - "Invalid input arguments: src size != edge id prop size", - *error); + for (size_t i = 0; i < num_arrays; ++i) { + CAPI_EXPECTS((edge_ids == nullptr) || (p_edge_ids[i]->type_ == edge_type), + CUGRAPH_INVALID_INPUT, + "Invalid input arguments: Edge id type must match edge type", + *error); + + CAPI_EXPECTS((edge_ids == nullptr) || (p_edge_ids[i]->size_ == p_src[i]->size_), + CUGRAPH_INVALID_INPUT, + "Invalid input arguments: src size != edge id prop size", + *error); + + if (edge_type_ids != nullptr) { + CAPI_EXPECTS(p_edge_type_ids[i]->size_ == p_src[i]->size_, + CUGRAPH_INVALID_INPUT, + "Invalid input arguments: src size != edge type prop size", + *error); - CAPI_EXPECTS((edge_type_ids == nullptr) || (p_edge_type_ids->size_ == p_src->size_), + if (edge_type_id_type == cugraph_data_type_id_t::NTYPES) + edge_type_id_type = p_edge_type_ids[i]->type_; + + CAPI_EXPECTS(p_edge_type_ids[i]->type_ == edge_type_id_type, + CUGRAPH_INVALID_INPUT, + "Invalid input arguments: src size != edge type prop size", + *error); + } + } + + auto edge_type_id_types = cugraph::host_scalar_allgather(p_handle->handle_->get_comms(), + static_cast(edge_type_id_type), + p_handle->handle_->get_stream()); + + CAPI_EXPECTS(std::count_if(edge_type_id_types.begin(), + edge_type_id_types.end(), + [edge_type_id_type](auto t) { + return edge_type_id_type != static_cast(t); + }) == 0, CUGRAPH_INVALID_INPUT, - "Invalid input arguments: src size != edge type prop size", + "different edge_type_id type used on different GPUs", *error); - cugraph_data_type_id_t edge_type_id_type; - if (edge_type_ids == nullptr) { + if (edge_type_id_type == cugraph_data_type_id_t::NTYPES) { edge_type_id_type = cugraph_data_type_id_t::INT32; - } else { - edge_type_id_type = p_edge_type_ids->type_; } create_graph_functor functor(*p_handle->handle_, properties, + vertex_type, + edge_type, + weight_type, + edge_type_id_type, + p_vertices, p_src, p_dst, p_weights, p_edge_ids, p_edge_type_ids, + num_arrays, bool_t::TRUE, - check, - edge_type); + check); try { - cugraph::c_api::vertex_dispatcher(p_src->type_, - edge_type, - weight_type, - edge_type_id_type, - store_transposed, - multi_gpu, - functor); + cugraph::c_api::vertex_dispatcher( + vertex_type, edge_type, weight_type, edge_type_id_type, store_transposed, multi_gpu, functor); if (functor.error_code_ != CUGRAPH_SUCCESS) { *error = reinterpret_cast(functor.error_.release()); @@ -394,25 +449,36 @@ extern "C" cugraph_error_code_t cugraph_mg_graph_create( return CUGRAPH_SUCCESS; } +extern "C" cugraph_error_code_t cugraph_mg_graph_create( + const cugraph_resource_handle_t* handle, + const cugraph_graph_properties_t* properties, + const cugraph_type_erased_device_array_view_t* src, + const cugraph_type_erased_device_array_view_t* dst, + const cugraph_type_erased_device_array_view_t* weights, + const cugraph_type_erased_device_array_view_t* edge_ids, + const cugraph_type_erased_device_array_view_t* edge_type_ids, + bool_t store_transposed, + size_t num_edges, + bool_t check, + cugraph_graph_t** graph, + cugraph_error_t** error) +{ + return cugraph_graph_create_mg(handle, + properties, + NULL, + &src, + &dst, + &weights, + &edge_ids, + &edge_type_ids, + store_transposed, + 1, + check, + graph, + error); +} + extern "C" void cugraph_mg_graph_free(cugraph_graph_t* ptr_graph) { - if (ptr_graph != NULL) { - auto internal_pointer = reinterpret_cast(ptr_graph); - - destroy_graph_functor functor(internal_pointer->graph_, - internal_pointer->number_map_, - internal_pointer->edge_weights_, - internal_pointer->edge_ids_, - internal_pointer->edge_types_); - - cugraph::c_api::vertex_dispatcher(internal_pointer->vertex_type_, - internal_pointer->edge_type_, - internal_pointer->weight_type_, - internal_pointer->edge_type_id_type_, - internal_pointer->store_transposed_, - internal_pointer->multi_gpu_, - functor); - - delete internal_pointer; - } + if (ptr_graph != NULL) { cugraph_graph_free(ptr_graph); } } diff --git a/cpp/src/c_api/graph_sg.cpp b/cpp/src/c_api/graph_sg.cpp index 9536869f123..f16be20807a 100644 --- a/cpp/src/c_api/graph_sg.cpp +++ b/cpp/src/c_api/graph_sg.cpp @@ -33,6 +33,7 @@ namespace { struct create_graph_functor : public cugraph::c_api::abstract_functor { raft::handle_t const& handle_; cugraph_graph_properties_t const* properties_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertices_; cugraph::c_api::cugraph_type_erased_device_array_view_t const* src_; cugraph::c_api::cugraph_type_erased_device_array_view_t const* dst_; cugraph::c_api::cugraph_type_erased_device_array_view_t const* weights_; @@ -45,6 +46,7 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { create_graph_functor(raft::handle_t const& handle, cugraph_graph_properties_t const* properties, + cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertices, cugraph::c_api::cugraph_type_erased_device_array_view_t const* src, cugraph::c_api::cugraph_type_erased_device_array_view_t const* dst, cugraph::c_api::cugraph_type_erased_device_array_view_t const* weights, @@ -56,6 +58,7 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { : abstract_functor(), properties_(properties), handle_(handle), + vertices_(vertices), src_(src), dst_(dst), weights_(weights), @@ -99,6 +102,18 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { edge_type_id_t>> new_edge_types{std::nullopt}; + std::optional> vertex_list = + vertices_ ? std::make_optional( + rmm::device_uvector(vertices_->size_, handle_.get_stream())) + : std::nullopt; + + if (vertex_list) { + raft::copy(vertex_list->data(), + vertices_->as_type(), + vertices_->size_, + handle_.get_stream()); + } + rmm::device_uvector edgelist_srcs(src_->size_, handle_.get_stream()); rmm::device_uvector edgelist_dsts(dst_->size_, handle_.get_stream()); @@ -169,7 +184,7 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { store_transposed, multi_gpu>( handle_, - std::nullopt, + std::move(vertex_list), std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_weights), @@ -279,6 +294,12 @@ struct create_graph_csr_functor : public cugraph::c_api::abstract_functor { edge_type_id_t>> new_edge_types{std::nullopt}; + std::optional> vertex_list = std::make_optional( + rmm::device_uvector(offsets_->size_ - 1, handle_.get_stream())); + + cugraph::detail::sequence_fill( + handle_.get_stream(), vertex_list->data(), vertex_list->size(), vertex_t{0}); + rmm::device_uvector edgelist_srcs(0, handle_.get_stream()); rmm::device_uvector edgelist_dsts(indices_->size_, handle_.get_stream()); @@ -354,7 +375,7 @@ struct create_graph_csr_functor : public cugraph::c_api::abstract_functor { store_transposed, multi_gpu>( handle_, - std::nullopt, + std::move(vertex_list), std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_weights), @@ -452,9 +473,10 @@ struct destroy_graph_functor : public cugraph::c_api::abstract_functor { } // namespace -extern "C" cugraph_error_code_t cugraph_sg_graph_create( +extern "C" cugraph_error_code_t cugraph_graph_create_sg( const cugraph_resource_handle_t* handle, const cugraph_graph_properties_t* properties, + const cugraph_type_erased_device_array_view_t* vertices, const cugraph_type_erased_device_array_view_t* src, const cugraph_type_erased_device_array_view_t* dst, const cugraph_type_erased_device_array_view_t* weights, @@ -473,6 +495,8 @@ extern "C" cugraph_error_code_t cugraph_sg_graph_create( *error = nullptr; auto p_handle = reinterpret_cast(handle); + auto p_vertices = + reinterpret_cast(vertices); auto p_src = reinterpret_cast(src); auto p_dst = @@ -488,6 +512,12 @@ extern "C" cugraph_error_code_t cugraph_sg_graph_create( CUGRAPH_INVALID_INPUT, "Invalid input arguments: src size != dst size.", *error); + + CAPI_EXPECTS((p_vertices == nullptr) || (p_src->type_ == p_vertices->type_), + CUGRAPH_INVALID_INPUT, + "Invalid input arguments: src type != vertices type.", + *error); + CAPI_EXPECTS(p_src->type_ == p_dst->type_, CUGRAPH_INVALID_INPUT, "Invalid input arguments: src type != dst type.", @@ -533,6 +563,7 @@ extern "C" cugraph_error_code_t cugraph_sg_graph_create( ::create_graph_functor functor(*p_handle->handle_, properties, + p_vertices, p_src, p_dst, p_weights, @@ -565,6 +596,35 @@ extern "C" cugraph_error_code_t cugraph_sg_graph_create( return CUGRAPH_SUCCESS; } +extern "C" cugraph_error_code_t cugraph_sg_graph_create( + const cugraph_resource_handle_t* handle, + const cugraph_graph_properties_t* properties, + const cugraph_type_erased_device_array_view_t* src, + const cugraph_type_erased_device_array_view_t* dst, + const cugraph_type_erased_device_array_view_t* weights, + const cugraph_type_erased_device_array_view_t* edge_ids, + const cugraph_type_erased_device_array_view_t* edge_type_ids, + bool_t store_transposed, + bool_t renumber, + bool_t do_expensive_check, + cugraph_graph_t** graph, + cugraph_error_t** error) +{ + return cugraph_graph_create_sg(handle, + properties, + NULL, + src, + dst, + weights, + edge_ids, + edge_type_ids, + store_transposed, + renumber, + do_expensive_check, + graph, + error); +} + cugraph_error_code_t cugraph_sg_graph_create_from_csr( const cugraph_resource_handle_t* handle, const cugraph_graph_properties_t* properties, @@ -662,23 +722,27 @@ cugraph_error_code_t cugraph_sg_graph_create_from_csr( return CUGRAPH_SUCCESS; } -extern "C" void cugraph_sg_graph_free(cugraph_graph_t* ptr_graph) +extern "C" void cugraph_graph_free(cugraph_graph_t* ptr_graph) { - auto internal_pointer = reinterpret_cast(ptr_graph); - - destroy_graph_functor functor(internal_pointer->graph_, - internal_pointer->number_map_, - internal_pointer->edge_weights_, - internal_pointer->edge_ids_, - internal_pointer->edge_types_); - - cugraph::c_api::vertex_dispatcher(internal_pointer->vertex_type_, - internal_pointer->edge_type_, - internal_pointer->weight_type_, - internal_pointer->edge_type_id_type_, - internal_pointer->store_transposed_, - internal_pointer->multi_gpu_, - functor); - - delete internal_pointer; + if (ptr_graph != NULL) { + auto internal_pointer = reinterpret_cast(ptr_graph); + + destroy_graph_functor functor(internal_pointer->graph_, + internal_pointer->number_map_, + internal_pointer->edge_weights_, + internal_pointer->edge_ids_, + internal_pointer->edge_types_); + + cugraph::c_api::vertex_dispatcher(internal_pointer->vertex_type_, + internal_pointer->edge_type_, + internal_pointer->weight_type_, + internal_pointer->edge_type_id_type_, + internal_pointer->store_transposed_, + internal_pointer->multi_gpu_, + functor); + + delete internal_pointer; + } } + +extern "C" void cugraph_sg_graph_free(cugraph_graph_t* ptr_graph) { cugraph_graph_free(ptr_graph); } diff --git a/cpp/tests/c_api/create_graph_test.c b/cpp/tests/c_api/create_graph_test.c index 736db761ebd..276e790c2b6 100644 --- a/cpp/tests/c_api/create_graph_test.c +++ b/cpp/tests/c_api/create_graph_test.c @@ -91,8 +91,9 @@ int test_create_sg_graph_simple() handle, wgt_view, (byte_t*)h_wgt, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed."); - ret_code = cugraph_sg_graph_create(handle, + ret_code = cugraph_graph_create_sg(handle, &properties, + NULL, src_view, dst_view, wgt_view, @@ -105,7 +106,7 @@ int test_create_sg_graph_simple() &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); - cugraph_sg_graph_free(graph); + cugraph_graph_free(graph); cugraph_type_erased_device_array_view_free(wgt_view); cugraph_type_erased_device_array_view_free(dst_view); @@ -300,7 +301,7 @@ int test_create_sg_graph_csr() } cugraph_sample_result_free(result); - cugraph_sg_graph_free(graph); + cugraph_graph_free(graph); cugraph_type_erased_device_array_view_free(wgt_view); cugraph_type_erased_device_array_view_free(indices_view); cugraph_type_erased_device_array_view_free(offsets_view); @@ -382,8 +383,9 @@ int test_create_sg_graph_symmetric_error() handle, wgt_view, (byte_t*)h_wgt, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed."); - ret_code = cugraph_sg_graph_create(handle, + ret_code = cugraph_graph_create_sg(handle, &properties, + NULL, src_view, dst_view, wgt_view, @@ -396,14 +398,317 @@ int test_create_sg_graph_symmetric_error() &ret_error); TEST_ASSERT(test_ret_value, ret_code != CUGRAPH_SUCCESS, "graph creation succeeded but should have failed."); - if (ret_code == CUGRAPH_SUCCESS) cugraph_sg_graph_free(graph); + if (ret_code == CUGRAPH_SUCCESS) cugraph_graph_free(graph); + + cugraph_type_erased_device_array_view_free(wgt_view); + cugraph_type_erased_device_array_view_free(dst_view); + cugraph_type_erased_device_array_view_free(src_view); + cugraph_type_erased_device_array_free(wgt); + cugraph_type_erased_device_array_free(dst); + cugraph_type_erased_device_array_free(src); + + cugraph_free_resource_handle(handle); + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int test_create_sg_graph_with_isolated_vertices() +{ + int test_ret_value = 0; + + typedef int32_t vertex_t; + typedef int32_t edge_t; + typedef float weight_t; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + size_t num_edges = 8; + size_t num_vertices = 7; + double alpha = 0.95; + double epsilon = 0.0001; + size_t max_iterations = 20; + + vertex_t h_vertices[] = { 0, 1, 2, 3, 4, 5, 6 }; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + weight_t h_result[] = { 0.0859168, 0.158029, 0.0616337, 0.179675, 0.113239, 0.339873, 0.0616337 }; + + cugraph_resource_handle_t* handle = NULL; + cugraph_graph_t* graph = NULL; + cugraph_graph_properties_t properties; + + properties.is_symmetric = FALSE; + properties.is_multigraph = FALSE; + + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); + + cugraph_type_erased_device_array_t* vertices; + cugraph_type_erased_device_array_t* src; + cugraph_type_erased_device_array_t* dst; + cugraph_type_erased_device_array_t* wgt; + cugraph_type_erased_device_array_view_t* vertices_view; + cugraph_type_erased_device_array_view_t* src_view; + cugraph_type_erased_device_array_view_t* dst_view; + cugraph_type_erased_device_array_view_t* wgt_view; + + ret_code = + cugraph_type_erased_device_array_create(handle, num_vertices, vertex_tid, &vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices create failed."); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_edges, vertex_tid, &src, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src create failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_edges, vertex_tid, &dst, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst create failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_edges, weight_tid, &wgt, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt create failed."); + + vertices_view = cugraph_type_erased_device_array_view(vertices); + src_view = cugraph_type_erased_device_array_view(src); + dst_view = cugraph_type_erased_device_array_view(dst); + wgt_view = cugraph_type_erased_device_array_view(wgt); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, vertices_view, (byte_t*)h_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, src_view, (byte_t*)h_src, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, dst_view, (byte_t*)h_dst, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, wgt_view, (byte_t*)h_wgt, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed."); + + ret_code = cugraph_graph_create_sg(handle, + &properties, + vertices_view, + src_view, + dst_view, + wgt_view, + NULL, + NULL, + FALSE, + FALSE, + FALSE, + &graph, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + + cugraph_centrality_result_t* result = NULL; + + // To verify we will call pagerank + ret_code = cugraph_pagerank(handle, + graph, + NULL, + NULL, + NULL, + NULL, + alpha, + epsilon, + max_iterations, + FALSE, + &result, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + cugraph_type_erased_device_array_view_t* result_vertices; + cugraph_type_erased_device_array_view_t* pageranks; + + result_vertices = cugraph_centrality_result_get_vertices(result); + pageranks = cugraph_centrality_result_get_values(result); + + vertex_t h_result_vertices[num_vertices]; + weight_t h_pageranks[num_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_vertices, result_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_pageranks, pageranks, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_result[h_result_vertices[i]], h_pageranks[i], 0.001), + "pagerank results don't match"); + } + + cugraph_centrality_result_free(result); + cugraph_graph_free(graph); cugraph_type_erased_device_array_view_free(wgt_view); cugraph_type_erased_device_array_view_free(dst_view); cugraph_type_erased_device_array_view_free(src_view); + cugraph_type_erased_device_array_view_free(vertices_view); cugraph_type_erased_device_array_free(wgt); cugraph_type_erased_device_array_free(dst); cugraph_type_erased_device_array_free(src); + cugraph_type_erased_device_array_free(vertices); + + cugraph_free_resource_handle(handle); + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int test_create_sg_graph_csr_with_isolated() +{ + int test_ret_value = 0; + + typedef int32_t vertex_t; + typedef int32_t edge_t; + typedef float weight_t; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + size_t num_edges = 8; + size_t num_vertices = 7; + double alpha = 0.95; + double epsilon = 0.0001; + size_t max_iterations = 20; + + /* + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + */ + edge_t h_offsets[] = {0, 1, 3, 6, 7, 8, 8, 8}; + vertex_t h_indices[] = {1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_start[] = {0, 1, 2, 3, 4, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + weight_t h_result[] = { 0.0859168, 0.158029, 0.0616337, 0.179675, 0.113239, 0.339873, 0.0616337 }; + + cugraph_resource_handle_t* handle = NULL; + cugraph_graph_t* graph = NULL; + cugraph_graph_properties_t properties; + + properties.is_symmetric = FALSE; + properties.is_multigraph = FALSE; + + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); + + cugraph_type_erased_device_array_t* offsets; + cugraph_type_erased_device_array_t* indices; + cugraph_type_erased_device_array_t* wgt; + cugraph_type_erased_device_array_view_t* offsets_view; + cugraph_type_erased_device_array_view_t* indices_view; + cugraph_type_erased_device_array_view_t* wgt_view; + + ret_code = cugraph_type_erased_device_array_create( + handle, num_vertices + 1, vertex_tid, &offsets, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "offsets create failed."); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_edges, vertex_tid, &indices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "indices create failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_edges, weight_tid, &wgt, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt create failed."); + + offsets_view = cugraph_type_erased_device_array_view(offsets); + indices_view = cugraph_type_erased_device_array_view(indices); + wgt_view = cugraph_type_erased_device_array_view(wgt); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, offsets_view, (byte_t*)h_offsets, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "offsets copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, indices_view, (byte_t*)h_indices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "indices copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, wgt_view, (byte_t*)h_wgt, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed."); + + ret_code = cugraph_sg_graph_create_from_csr(handle, + &properties, + offsets_view, + indices_view, + wgt_view, + NULL, + NULL, + FALSE, + FALSE, + FALSE, + &graph, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + + cugraph_centrality_result_t* result = NULL; + + // To verify we will call pagerank + ret_code = cugraph_pagerank(handle, + graph, + NULL, + NULL, + NULL, + NULL, + alpha, + epsilon, + max_iterations, + FALSE, + &result, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + cugraph_type_erased_device_array_view_t* result_vertices; + cugraph_type_erased_device_array_view_t* pageranks; + + result_vertices = cugraph_centrality_result_get_vertices(result); + pageranks = cugraph_centrality_result_get_values(result); + + vertex_t h_result_vertices[num_vertices]; + weight_t h_pageranks[num_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_vertices, result_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_pageranks, pageranks, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_result[h_result_vertices[i]], h_pageranks[i], 0.001), + "pagerank results don't match"); + } + + cugraph_centrality_result_free(result); + cugraph_graph_free(graph); + cugraph_type_erased_device_array_view_free(wgt_view); + cugraph_type_erased_device_array_view_free(indices_view); + cugraph_type_erased_device_array_view_free(offsets_view); + cugraph_type_erased_device_array_free(wgt); + cugraph_type_erased_device_array_free(indices); + cugraph_type_erased_device_array_free(offsets); cugraph_free_resource_handle(handle); cugraph_error_free(ret_error); @@ -419,5 +724,7 @@ int main(int argc, char** argv) result |= RUN_TEST(test_create_sg_graph_simple); result |= RUN_TEST(test_create_sg_graph_csr); result |= RUN_TEST(test_create_sg_graph_symmetric_error); + result |= RUN_TEST(test_create_sg_graph_with_isolated_vertices); + result |= RUN_TEST(test_create_sg_graph_csr_with_isolated); return result; } diff --git a/cpp/tests/c_api/mg_create_graph_test.c b/cpp/tests/c_api/mg_create_graph_test.c index 4c8f2f22982..e0244f2fde8 100644 --- a/cpp/tests/c_api/mg_create_graph_test.c +++ b/cpp/tests/c_api/mg_create_graph_test.c @@ -94,19 +94,21 @@ int test_create_mg_graph_simple(const cugraph_resource_handle_t* handle) handle, wgt_view, (byte_t*)h_wgt, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed."); - ret_code = cugraph_mg_graph_create(handle, + ret_code = cugraph_graph_create_mg(handle, &properties, - src_view, - dst_view, - wgt_view, + NULL, + &src_view, + &dst_view, + &wgt_view, NULL, NULL, FALSE, - num_edges, + 1, TRUE, &p_graph, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); cugraph_mg_graph_free(p_graph); From d470bc62a8240a37de9ab11ff336e4ab95843de5 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Tue, 14 Nov 2023 15:00:34 -0800 Subject: [PATCH 3/6] add proper test for multiple input lists in MG --- cpp/include/cugraph_c/graph.h | 16 +- cpp/include/cugraph_c/resource_handle.h | 12 ++ cpp/src/c_api/graph_mg.cpp | 71 +++++---- cpp/src/c_api/resource_handle.cpp | 9 +- cpp/tests/c_api/mg_create_graph_test.c | 201 +++++++++++++++++++++++- 5 files changed, 260 insertions(+), 49 deletions(-) diff --git a/cpp/include/cugraph_c/graph.h b/cpp/include/cugraph_c/graph.h index e431d2d8c90..04f4d5429b2 100644 --- a/cpp/include/cugraph_c/graph.h +++ b/cpp/include/cugraph_c/graph.h @@ -295,14 +295,14 @@ cugraph_error_code_t cugraph_mg_graph_create( * @return error code */ cugraph_error_code_t cugraph_graph_create_mg( - const cugraph_resource_handle_t* handle, - const cugraph_graph_properties_t* properties, - const cugraph_type_erased_device_array_view_t** vertices, - const cugraph_type_erased_device_array_view_t** src, - const cugraph_type_erased_device_array_view_t** dst, - const cugraph_type_erased_device_array_view_t** weights, - const cugraph_type_erased_device_array_view_t** edge_ids, - const cugraph_type_erased_device_array_view_t** edge_type_ids, + cugraph_resource_handle_t const* handle, + cugraph_graph_properties_t const* properties, + cugraph_type_erased_device_array_view_t const* const* vertices, + cugraph_type_erased_device_array_view_t const* const* src, + cugraph_type_erased_device_array_view_t const* const* dst, + cugraph_type_erased_device_array_view_t const* const* weights, + cugraph_type_erased_device_array_view_t const* const* edge_ids, + cugraph_type_erased_device_array_view_t const* const* edge_type_ids, bool_t store_transposed, size_t num_arrays, bool_t do_expensive_check, diff --git a/cpp/include/cugraph_c/resource_handle.h b/cpp/include/cugraph_c/resource_handle.h index a239c24afe9..0e45102aae2 100644 --- a/cpp/include/cugraph_c/resource_handle.h +++ b/cpp/include/cugraph_c/resource_handle.h @@ -57,6 +57,18 @@ typedef struct cugraph_resource_handle_ { */ cugraph_resource_handle_t* cugraph_create_resource_handle(void* raft_handle); +/** + * @brief get comm_size from resource handle + * + * If the resource handle has been configured for multi-gpu, this will return + * the comm_size for this cluster. If the resource handle has not been configured for + * multi-gpu this will always return 1. + * + * @param [in] handle Handle for accessing resources + * @return comm_size + */ +int cugraph_resource_handle_get_comm_size(const cugraph_resource_handle_t* handle); + /** * @brief get rank from resource handle * diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp index 56a3d611469..0ccdc54df93 100644 --- a/cpp/src/c_api/graph_mg.cpp +++ b/cpp/src/c_api/graph_mg.cpp @@ -34,7 +34,7 @@ namespace { template rmm::device_uvector concatenate( raft::handle_t const& handle, - cugraph::c_api::cugraph_type_erased_device_array_view_t const** values, + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* values, size_t num_arrays) { size_t num_values = std::transform_reduce( @@ -61,12 +61,12 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { cugraph_data_type_id_t edge_type_; cugraph_data_type_id_t weight_type_; cugraph_data_type_id_t edge_type_id_type_; - cugraph::c_api::cugraph_type_erased_device_array_view_t const** vertices_; - cugraph::c_api::cugraph_type_erased_device_array_view_t const** src_; - cugraph::c_api::cugraph_type_erased_device_array_view_t const** dst_; - cugraph::c_api::cugraph_type_erased_device_array_view_t const** weights_; - cugraph::c_api::cugraph_type_erased_device_array_view_t const** edge_ids_; - cugraph::c_api::cugraph_type_erased_device_array_view_t const** edge_type_ids_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* vertices_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* src_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* dst_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* weights_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_ids_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_type_ids_; size_t num_arrays_; bool_t renumber_; bool_t check_; @@ -79,12 +79,12 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { cugraph_data_type_id_t edge_type, cugraph_data_type_id_t weight_type, cugraph_data_type_id_t edge_type_id_type, - cugraph::c_api::cugraph_type_erased_device_array_view_t const** vertices, - cugraph::c_api::cugraph_type_erased_device_array_view_t const** src, - cugraph::c_api::cugraph_type_erased_device_array_view_t const** dst, - cugraph::c_api::cugraph_type_erased_device_array_view_t const** weights, - cugraph::c_api::cugraph_type_erased_device_array_view_t const** edge_ids, - cugraph::c_api::cugraph_type_erased_device_array_view_t const** edge_type_ids, + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* vertices, + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* src, + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* dst, + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* weights, + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_ids, + cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_type_ids, size_t num_arrays, bool_t renumber, bool_t check) @@ -246,14 +246,14 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { } // namespace extern "C" cugraph_error_code_t cugraph_graph_create_mg( - const cugraph_resource_handle_t* handle, - const cugraph_graph_properties_t* properties, - const cugraph_type_erased_device_array_view_t** vertices, - const cugraph_type_erased_device_array_view_t** src, - const cugraph_type_erased_device_array_view_t** dst, - const cugraph_type_erased_device_array_view_t** weights, - const cugraph_type_erased_device_array_view_t** edge_ids, - const cugraph_type_erased_device_array_view_t** edge_type_ids, + cugraph_resource_handle_t const* handle, + cugraph_graph_properties_t const* properties, + cugraph_type_erased_device_array_view_t const* const* vertices, + cugraph_type_erased_device_array_view_t const* const* src, + cugraph_type_erased_device_array_view_t const* const* dst, + cugraph_type_erased_device_array_view_t const* const* weights, + cugraph_type_erased_device_array_view_t const* const* edge_ids, + cugraph_type_erased_device_array_view_t const* const* edge_type_ids, bool_t store_transposed, size_t num_arrays, bool_t check, @@ -268,17 +268,20 @@ extern "C" cugraph_error_code_t cugraph_graph_create_mg( auto p_handle = reinterpret_cast(handle); auto p_vertices = - reinterpret_cast(vertices); + reinterpret_cast( + vertices); auto p_src = - reinterpret_cast(src); + reinterpret_cast(src); auto p_dst = - reinterpret_cast(dst); + reinterpret_cast(dst); auto p_weights = - reinterpret_cast(weights); + reinterpret_cast( + weights); auto p_edge_ids = - reinterpret_cast(edge_ids); + reinterpret_cast( + edge_ids); auto p_edge_type_ids = - reinterpret_cast( + reinterpret_cast( edge_type_ids); size_t local_num_edges{0}; @@ -450,13 +453,13 @@ extern "C" cugraph_error_code_t cugraph_graph_create_mg( } extern "C" cugraph_error_code_t cugraph_mg_graph_create( - const cugraph_resource_handle_t* handle, - const cugraph_graph_properties_t* properties, - const cugraph_type_erased_device_array_view_t* src, - const cugraph_type_erased_device_array_view_t* dst, - const cugraph_type_erased_device_array_view_t* weights, - const cugraph_type_erased_device_array_view_t* edge_ids, - const cugraph_type_erased_device_array_view_t* edge_type_ids, + cugraph_resource_handle_t const* handle, + cugraph_graph_properties_t const* properties, + cugraph_type_erased_device_array_view_t const* src, + cugraph_type_erased_device_array_view_t const* dst, + cugraph_type_erased_device_array_view_t const* weights, + cugraph_type_erased_device_array_view_t const* edge_ids, + cugraph_type_erased_device_array_view_t const* edge_type_ids, bool_t store_transposed, size_t num_edges, bool_t check, diff --git a/cpp/src/c_api/resource_handle.cpp b/cpp/src/c_api/resource_handle.cpp index 767a6f0add6..75b9537ef49 100644 --- a/cpp/src/c_api/resource_handle.cpp +++ b/cpp/src/c_api/resource_handle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,3 +41,10 @@ extern "C" int cugraph_resource_handle_get_rank(const cugraph_resource_handle_t* auto& comm = internal->handle_->get_comms(); return static_cast(comm.get_rank()); } + +extern "C" int cugraph_resource_handle_get_comm_size(const cugraph_resource_handle_t* handle) +{ + auto internal = reinterpret_cast(handle); + auto& comm = internal->handle_->get_comms(); + return static_cast(comm.get_size()); +} diff --git a/cpp/tests/c_api/mg_create_graph_test.c b/cpp/tests/c_api/mg_create_graph_test.c index e0244f2fde8..f96edaca7e7 100644 --- a/cpp/tests/c_api/mg_create_graph_test.c +++ b/cpp/tests/c_api/mg_create_graph_test.c @@ -17,6 +17,8 @@ #include "c_test_utils.h" /* RUN_TEST */ #include "mg_test_utils.h" /* RUN_TEST */ +#include + #include #include #include @@ -41,7 +43,7 @@ int test_create_mg_graph_simple(const cugraph_resource_handle_t* handle) vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - cugraph_graph_t* p_graph = NULL; + cugraph_graph_t* graph = NULL; cugraph_graph_properties_t properties; properties.is_symmetric = FALSE; @@ -97,20 +99,20 @@ int test_create_mg_graph_simple(const cugraph_resource_handle_t* handle) ret_code = cugraph_graph_create_mg(handle, &properties, NULL, - &src_view, - &dst_view, - &wgt_view, + (cugraph_type_erased_device_array_view_t const* const*) &src_view, + (cugraph_type_erased_device_array_view_t const* const*) &dst_view, + (cugraph_type_erased_device_array_view_t const* const*) &wgt_view, NULL, NULL, FALSE, 1, TRUE, - &p_graph, + &graph, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); - cugraph_mg_graph_free(p_graph); + cugraph_graph_free(graph); cugraph_type_erased_device_array_view_free(wgt_view); cugraph_type_erased_device_array_view_free(dst_view); @@ -124,6 +126,192 @@ int test_create_mg_graph_simple(const cugraph_resource_handle_t* handle) return test_ret_value; } +int test_create_mg_graph_multiple_edge_lists(const cugraph_resource_handle_t* handle) +{ + int test_ret_value = 0; + + typedef int32_t vertex_t; + typedef int32_t edge_t; + typedef float weight_t; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + size_t num_edges = 8; + size_t num_vertices = 7; + + double alpha = 0.95; + double epsilon = 0.0001; + size_t max_iterations = 20; + + vertex_t h_vertices[] = { 0, 1, 2, 3, 4, 5, 6 }; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + weight_t h_result[] = { 0.0859168, 0.158029, 0.0616337, 0.179675, 0.113239, 0.339873, 0.0616337 }; + + cugraph_graph_t* graph = NULL; + cugraph_graph_properties_t properties; + + properties.is_symmetric = FALSE; + properties.is_multigraph = FALSE; + + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + const size_t num_local_arrays = 2; + + cugraph_type_erased_device_array_t* vertices[num_local_arrays]; + cugraph_type_erased_device_array_t* src[num_local_arrays]; + cugraph_type_erased_device_array_t* dst[num_local_arrays]; + cugraph_type_erased_device_array_t* wgt[num_local_arrays]; + cugraph_type_erased_device_array_view_t* vertices_view[num_local_arrays]; + cugraph_type_erased_device_array_view_t* src_view[num_local_arrays]; + cugraph_type_erased_device_array_view_t* dst_view[num_local_arrays]; + cugraph_type_erased_device_array_view_t* wgt_view[num_local_arrays]; + + int my_rank = cugraph_resource_handle_get_rank(handle); + int comm_size = cugraph_resource_handle_get_comm_size(handle); + + size_t local_num_vertices = (num_vertices + comm_size - 1) / comm_size; + size_t local_start_vertex = my_rank * local_num_vertices; + size_t local_num_edges = (num_edges + comm_size - 1) / comm_size; + size_t local_start_edge = my_rank * local_num_edges; + + local_num_edges = (local_num_edges < (num_edges - local_start_edge)) ? local_num_edges : (num_edges - local_start_edge); + local_num_vertices = (local_num_vertices < (num_vertices - local_start_vertex)) ? local_num_vertices : (num_vertices - local_start_vertex); + + for (size_t i = 0 ; i < num_local_arrays ; ++i) { + size_t vertex_count = (local_num_vertices + num_local_arrays - 1) / num_local_arrays; + size_t vertex_start = i * vertex_count; + vertex_count = (vertex_count < (local_num_vertices - vertex_start)) ? vertex_count : (local_num_vertices - vertex_start); + + ret_code = + cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices create failed."); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + size_t edge_count = (local_num_edges + num_local_arrays - 1) / num_local_arrays; + size_t edge_start = i * edge_count; + edge_count = (edge_count < (local_num_edges - edge_start)) ? edge_count : (local_num_edges - edge_start); + + ret_code = + cugraph_type_erased_device_array_create(handle, edge_count, vertex_tid, src + i, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src create failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, edge_count, vertex_tid, dst + i, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst create failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, edge_count, weight_tid, wgt + i, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt create failed."); + + vertices_view[i] = cugraph_type_erased_device_array_view(vertices[i]); + src_view[i] = cugraph_type_erased_device_array_view(src[i]); + dst_view[i] = cugraph_type_erased_device_array_view(dst[i]); + wgt_view[i] = cugraph_type_erased_device_array_view(wgt[i]); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, vertices_view[i], (byte_t*)(h_vertices + local_start_vertex + vertex_start), &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, src_view[i], (byte_t*)(h_src + local_start_edge + edge_start), &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, dst_view[i], (byte_t*)(h_dst + local_start_edge + edge_start), &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, wgt_view[i], (byte_t*)(h_wgt + local_start_edge + edge_start), &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed."); + } + + ret_code = cugraph_graph_create_mg(handle, + &properties, + (cugraph_type_erased_device_array_view_t const* const*) vertices_view, + (cugraph_type_erased_device_array_view_t const* const*) src_view, + (cugraph_type_erased_device_array_view_t const* const*) dst_view, + (cugraph_type_erased_device_array_view_t const* const*) wgt_view, + NULL, + NULL, + FALSE, + num_local_arrays, + TRUE, + &graph, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + // + // Now call pagerank and check results... + // + cugraph_centrality_result_t* result = NULL; + + ret_code = cugraph_pagerank(handle, + graph, + NULL, + NULL, + NULL, + NULL, + alpha, + epsilon, + max_iterations, + FALSE, + &result, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed."); + + // NOTE: Because we get back vertex ids and pageranks, we can simply compare + // the returned values with the expected results for the entire + // graph. Each GPU will have a subset of the total vertices, so + // they will do a subset of the comparisons. + cugraph_type_erased_device_array_view_t* result_vertices; + cugraph_type_erased_device_array_view_t* pageranks; + + result_vertices = cugraph_centrality_result_get_vertices(result); + pageranks = cugraph_centrality_result_get_values(result); + + size_t num_local_vertices = cugraph_type_erased_device_array_view_size(result_vertices); + + vertex_t h_result_vertices[num_local_vertices]; + weight_t h_pageranks[num_local_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_vertices, result_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_pageranks, pageranks, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_local_vertices) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_result[h_result_vertices[i]], h_pageranks[i], 0.001), + "pagerank results don't match"); + } + + cugraph_centrality_result_free(result); + cugraph_graph_free(graph); + + for (size_t i = 0 ; i < num_local_arrays ; ++i) { + cugraph_type_erased_device_array_view_free(wgt_view[i]); + cugraph_type_erased_device_array_view_free(dst_view[i]); + cugraph_type_erased_device_array_view_free(src_view[i]); + cugraph_type_erased_device_array_view_free(vertices_view[i]); + cugraph_type_erased_device_array_free(wgt[i]); + cugraph_type_erased_device_array_free(dst[i]); + cugraph_type_erased_device_array_free(src[i]); + cugraph_type_erased_device_array_free(vertices[i]); + } + + cugraph_error_free(ret_error); + + return test_ret_value; +} + /******************************************************************************/ int main(int argc, char** argv) @@ -133,6 +321,7 @@ int main(int argc, char** argv) int result = 0; result |= RUN_MG_TEST(test_create_mg_graph_simple, handle); + result |= RUN_MG_TEST(test_create_mg_graph_multiple_edge_lists, handle); cugraph_free_resource_handle(handle); free_mg_raft_handle(raft_handle); From 52b31628a65e79d9b6a38914d4c5b2faa9331915 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Wed, 15 Nov 2023 18:23:37 -0800 Subject: [PATCH 4/6] add support for dropping self loops and removing multi edges to C API for graph creation (SG and MG) --- cpp/CMakeLists.txt | 2 + cpp/include/cugraph/graph_functions.hpp | 65 ++++++ cpp/include/cugraph_c/graph.h | 67 +++--- cpp/src/c_api/graph_mg.cpp | 50 ++++- cpp/src/c_api/graph_sg.cpp | 79 ++++++- cpp/src/structure/remove_self_loops.cu | 92 ++++++++ cpp/src/structure/remove_self_loops_impl.cuh | 180 ++++++++++++++++ .../structure/sort_and_remove_multi_edges.cu | 92 ++++++++ .../sort_and_remove_multi_edges_impl.cuh | 196 ++++++++++++++++++ cpp/tests/c_api/create_graph_test.c | 166 +++++++++++++++ cpp/tests/c_api/mg_create_graph_test.c | 193 +++++++++++++++++ 11 files changed, 1149 insertions(+), 33 deletions(-) create mode 100644 cpp/src/structure/remove_self_loops.cu create mode 100644 cpp/src/structure/remove_self_loops_impl.cuh create mode 100644 cpp/src/structure/sort_and_remove_multi_edges.cu create mode 100644 cpp/src/structure/sort_and_remove_multi_edges_impl.cuh diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 360165e688d..cecfd16d959 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -196,6 +196,8 @@ set(CUGRAPH_SOURCES src/community/detail/mis_mg.cu src/detail/utility_wrappers.cu src/structure/graph_view_mg.cu + src/structure/remove_self_loops.cu + src/structure/sort_and_remove_multi_edges.cu src/utilities/path_retrieval.cu src/structure/legacy/graph.cu src/linear_assignment/legacy/hungarian.cu diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 5c1e9d5311f..8bc655c8a21 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -973,4 +973,69 @@ renumber_sampled_edgelist( label_offsets, bool do_expensive_check = false); +/** + * @brief Remove self loops from an edge list + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weight. Currently float and double are supported. + * @tparam edge_type_t Type of edge type. Needs to be an integral type. + * + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param edgelist_srcs List of source vertex ids + * @param edgelist_dsts List of destination vertex ids + * @param edgelist_weights Optional list of edge weights + * @param edgelist_edge_ids Optional list of edge ids + * @param edgelist_edge_types Optional list of edge types + * @return Tuple of vectors storing edge sources, destinations, optional weights, + * optional edge ids, optional edge types. + */ +template +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +/** + * @brief Sort the edges and remove all but one edge when a multi-edge exists + * + * In an MG context it is assumed that edges have been shuffled to the proper GPU, + * in which case any multi-edges will be on the same GPU. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weight. Currently float and double are supported. + * @tparam edge_type_t Type of edge type. Needs to be an integral type. + * + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param edgelist_srcs List of source vertex ids + * @param edgelist_dsts List of destination vertex ids + * @param edgelist_weights Optional list of edge weights + * @param edgelist_edge_ids Optional list of edge ids + * @param edgelist_edge_types Optional list of edge types + * @return Tuple of vectors storing edge sources, destinations, optional weights, + * optional edge ids, optional edge types. + */ +template +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + } // namespace cugraph diff --git a/cpp/include/cugraph_c/graph.h b/cpp/include/cugraph_c/graph.h index 04f4d5429b2..88176a9c1b6 100644 --- a/cpp/include/cugraph_c/graph.h +++ b/cpp/include/cugraph_c/graph.h @@ -52,8 +52,9 @@ typedef struct { argument that can be NULL if edge types are not used. * @param [in] store_transposed If true create the graph initially in transposed format * @param [in] renumber If true, renumber vertices to make an efficient data structure. - * If false, do not renumber. Renumbering is required if the vertices are not sequential - * integer values from 0 to num_vertices. + * If false, do not renumber. Renumbering enables some significant optimizations within + * the graph primitives library, so it is strongly encouraged. Renumbering is required if + * the vertices are not sequential integer values from 0 to num_vertices. * @param [in] do_expensive_check If true, do expensive checks to validate the input data * is consistent with software assumptions. If false bypass these checks. * @param [out] graph A pointer to the graph object @@ -94,8 +95,15 @@ cugraph_error_code_t cugraph_sg_graph_create( argument that can be NULL if edge types are not used. * @param [in] store_transposed If true create the graph initially in transposed format * @param [in] renumber If true, renumber vertices to make an efficient data structure. - * If false, do not renumber. Renumbering is required if the vertices are not sequential - * integer values from 0 to num_vertices. + * If false, do not renumber. Renumbering enables some significant optimizations within + * the graph primitives library, so it is strongly encouraged. Renumbering is required if + * the vertices are not sequential integer values from 0 to num_vertices. + * @param [in] drop_self_loops If true, drop any self loops that exist in the provided edge list. + * @param [in] drop_multi_edges If true, drop any multi edges that exist in the provided edge list. + * Note that setting this flag will arbitrarily select one instance of a multi edge to be the + * edge that survives. If the edges have properties that should be honored (e.g. sum the + weights, + * or take the maximum weight), the caller should do that on not rely on this flag. * @param [in] do_expensive_check If true, do expensive checks to validate the input data * is consistent with software assumptions. If false bypass these checks. * @param [out] graph A pointer to the graph object @@ -115,6 +123,8 @@ cugraph_error_code_t cugraph_graph_create_sg( const cugraph_type_erased_device_array_view_t* edge_type_ids, bool_t store_transposed, bool_t renumber, + bool_t drop_self_loops, + bool_t drop_multi_edges, bool_t do_expensive_check, cugraph_graph_t** graph, cugraph_error_t** error); @@ -136,8 +146,9 @@ cugraph_error_code_t cugraph_graph_create_sg( argument that can be NULL if edge types are not used. * @param [in] store_transposed If true create the graph initially in transposed format * @param [in] renumber If true, renumber vertices to make an efficient data structure. - * If false, do not renumber. Renumbering is required if the vertices are not sequential - * integer values from 0 to num_vertices. + * If false, do not renumber. Renumbering enables some significant optimizations within + * the graph primitives library, so it is strongly encouraged. Renumbering is required if + * the vertices are not sequential integer values from 0 to num_vertices. * @param [in] do_expensive_check If true, do expensive checks to validate the input data * is consistent with software assumptions. If false bypass these checks. * @param [out] graph A pointer to the graph object @@ -175,8 +186,9 @@ cugraph_error_code_t cugraph_sg_graph_create_from_csr( argument that can be NULL if edge types are not used. * @param [in] store_transposed If true create the graph initially in transposed format * @param [in] renumber If true, renumber vertices to make an efficient data structure. - * If false, do not renumber. Renumbering is required if the vertices are not sequential - * integer values from 0 to num_vertices. + * If false, do not renumber. Renumbering enables some significant optimizations within + * the graph primitives library, so it is strongly encouraged. Renumbering is required if + * the vertices are not sequential integer values from 0 to num_vertices. * @param [in] do_expensive_check If true, do expensive checks to validate the input data * is consistent with software assumptions. If false bypass these checks. * @param [out] graph A pointer to the graph object @@ -199,22 +211,6 @@ cugraph_error_code_t cugraph_graph_create_sg_from_csr( cugraph_graph_t** graph, cugraph_error_t** error); -/** - * @brief Destroy an graph - * - * @param [in] graph A pointer to the graph object to destroy - */ -void cugraph_graph_free(cugraph_graph_t* graph); - -/** - * @brief Destroy an SG graph - * - * @deprecated This API will be deleted, use cugraph_graph_free instead - * - * @param [in] graph A pointer to the graph object to destroy - */ -void cugraph_sg_graph_free(cugraph_graph_t* graph); - /** * @brief Construct an MG graph * @@ -287,6 +283,11 @@ cugraph_error_code_t cugraph_mg_graph_create( * @param [in] store_transposed If true create the graph initially in transposed format * @param [in] num_arrays The number of arrays specified in @p vertices, @p src, @p dst, @p * weights, @p edge_ids and @p edge_type_ids + * @param [in] drop_self_loops If true, drop any self loops that exist in the provided edge list. + * @param [in] drop_multi_edges If true, drop any multi edges that exist in the provided edge list. + * Note that setting this flag will arbitrarily select one instance of a multi edge to be the + * edge that survives. If the edges have properties that should be honored (e.g. sum the + * weights, or take the maximum weight), the caller should do that on not rely on this flag. * @param [in] do_expensive_check If true, do expensive checks to validate the input data * is consistent with software assumptions. If false bypass these checks. * @param [out] graph A pointer to the graph object @@ -305,10 +306,28 @@ cugraph_error_code_t cugraph_graph_create_mg( cugraph_type_erased_device_array_view_t const* const* edge_type_ids, bool_t store_transposed, size_t num_arrays, + bool_t drop_self_loops, + bool_t drop_multi_edges, bool_t do_expensive_check, cugraph_graph_t** graph, cugraph_error_t** error); +/** + * @brief Destroy an graph + * + * @param [in] graph A pointer to the graph object to destroy + */ +void cugraph_graph_free(cugraph_graph_t* graph); + +/** + * @brief Destroy an SG graph + * + * @deprecated This API will be deleted, use cugraph_graph_free instead + * + * @param [in] graph A pointer to the graph object to destroy + */ +void cugraph_sg_graph_free(cugraph_graph_t* graph); + /** * @brief Destroy an MG graph * diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp index 0ccdc54df93..c38fe1a09d0 100644 --- a/cpp/src/c_api/graph_mg.cpp +++ b/cpp/src/c_api/graph_mg.cpp @@ -69,7 +69,9 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_type_ids_; size_t num_arrays_; bool_t renumber_; - bool_t check_; + bool_t drop_self_loops_; + bool_t drop_multi_edges_; + bool_t do_expensive_check_; cugraph::c_api::cugraph_graph_t* result_{}; create_graph_functor( @@ -87,7 +89,9 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_type_ids, size_t num_arrays, bool_t renumber, - bool_t check) + bool_t drop_self_loops, + bool_t drop_multi_edges, + bool_t do_expensive_check) : abstract_functor(), properties_(properties), vertex_type_(vertex_type), @@ -103,7 +107,9 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { edge_type_ids_(edge_type_ids), num_arrays_(num_arrays), renumber_(renumber), - check_(check) + drop_self_loops_(drop_self_loops), + drop_multi_edges_(drop_multi_edges), + do_expensive_check_(do_expensive_check) { } @@ -192,6 +198,28 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { cugraph::graph_view_t, edge_type_id_t>(handle_); + if (drop_multi_edges_) { + std::tie( + edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = + cugraph::sort_and_remove_multi_edges(handle_, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types)); + } + + if (drop_self_loops_) { + std::tie( + edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = + cugraph::remove_self_loops(handle_, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types)); + } + std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = cugraph::create_graph_from_edgelistis_symmetric, properties_->is_multigraph}, renumber_, - check_); + do_expensive_check_); if (renumber_) { *number_map = std::move(new_number_map.value()); @@ -256,7 +284,9 @@ extern "C" cugraph_error_code_t cugraph_graph_create_mg( cugraph_type_erased_device_array_view_t const* const* edge_type_ids, bool_t store_transposed, size_t num_arrays, - bool_t check, + bool_t drop_self_loops, + bool_t drop_multi_edges, + bool_t do_expensive_check, cugraph_graph_t** graph, cugraph_error_t** error) { @@ -432,7 +462,9 @@ extern "C" cugraph_error_code_t cugraph_graph_create_mg( p_edge_type_ids, num_arrays, bool_t::TRUE, - check); + drop_self_loops, + drop_multi_edges, + do_expensive_check); try { cugraph::c_api::vertex_dispatcher( @@ -462,7 +494,7 @@ extern "C" cugraph_error_code_t cugraph_mg_graph_create( cugraph_type_erased_device_array_view_t const* edge_type_ids, bool_t store_transposed, size_t num_edges, - bool_t check, + bool_t do_expensive_check, cugraph_graph_t** graph, cugraph_error_t** error) { @@ -476,7 +508,9 @@ extern "C" cugraph_error_code_t cugraph_mg_graph_create( &edge_type_ids, store_transposed, 1, - check, + FALSE, + FALSE, + do_expensive_check, graph, error); } diff --git a/cpp/src/c_api/graph_sg.cpp b/cpp/src/c_api/graph_sg.cpp index f16be20807a..4c7307e849c 100644 --- a/cpp/src/c_api/graph_sg.cpp +++ b/cpp/src/c_api/graph_sg.cpp @@ -40,6 +40,8 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_ids_; cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_type_ids_; bool_t renumber_; + bool_t drop_self_loops_; + bool_t drop_multi_edges_; bool_t do_expensive_check_; cugraph_data_type_id_t edge_type_; cugraph::c_api::cugraph_graph_t* result_{}; @@ -53,6 +55,8 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_ids, cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_type_ids, bool_t renumber, + bool_t drop_self_loops, + bool_t drop_multi_edges, bool_t do_expensive_check, cugraph_data_type_id_t edge_type) : abstract_functor(), @@ -65,6 +69,8 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { edge_ids_(edge_ids), edge_type_ids_(edge_type_ids), renumber_(renumber), + drop_self_loops_(drop_self_loops), + drop_multi_edges_(drop_multi_edges), do_expensive_check_(do_expensive_check), edge_type_(edge_type) { @@ -175,6 +181,40 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { cugraph::graph_view_t, edge_type_id_t>(handle_); + if (drop_multi_edges_) { + std::cout << "calling drop_multi_edges" << std::endl; + raft::print_device_vector( + " edgelist_srcs", edgelist_srcs.data(), edgelist_srcs.size(), std::cout); + raft::print_device_vector( + " edgelist_dsts", edgelist_dsts.data(), edgelist_dsts.size(), std::cout); + + std::tie( + edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = + cugraph::sort_and_remove_multi_edges(handle_, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types)); + + std::cout << "after drop_multi_edges" << std::endl; + raft::print_device_vector( + " edgelist_srcs", edgelist_srcs.data(), edgelist_srcs.size(), std::cout); + raft::print_device_vector( + " edgelist_dsts", edgelist_dsts.data(), edgelist_dsts.size(), std::cout); + } + + if (drop_self_loops_) { + std::tie( + edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = + cugraph::remove_self_loops(handle_, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types)); + } + std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = cugraph::create_graph_from_edgelisttype_; } + std::cout << " constructing create_graph_functor, drop_multi_edges = " + << (drop_multi_edges ? "TRUE" : "FALSE") << std::endl; + ::create_graph_functor functor(*p_handle->handle_, properties, p_vertices, @@ -570,6 +615,8 @@ extern "C" cugraph_error_code_t cugraph_graph_create_sg( p_edge_ids, p_edge_type_ids, renumber, + drop_self_loops, + drop_multi_edges, do_expensive_check, edge_type); @@ -620,12 +667,14 @@ extern "C" cugraph_error_code_t cugraph_sg_graph_create( edge_type_ids, store_transposed, renumber, + FALSE, + FALSE, do_expensive_check, graph, error); } -cugraph_error_code_t cugraph_sg_graph_create_from_csr( +cugraph_error_code_t cugraph_graph_create_sg_from_csr( const cugraph_resource_handle_t* handle, const cugraph_graph_properties_t* properties, const cugraph_type_erased_device_array_view_t* offsets, @@ -722,6 +771,34 @@ cugraph_error_code_t cugraph_sg_graph_create_from_csr( return CUGRAPH_SUCCESS; } +cugraph_error_code_t cugraph_sg_graph_create_from_csr( + const cugraph_resource_handle_t* handle, + const cugraph_graph_properties_t* properties, + const cugraph_type_erased_device_array_view_t* offsets, + const cugraph_type_erased_device_array_view_t* indices, + const cugraph_type_erased_device_array_view_t* weights, + const cugraph_type_erased_device_array_view_t* edge_ids, + const cugraph_type_erased_device_array_view_t* edge_type_ids, + bool_t store_transposed, + bool_t renumber, + bool_t do_expensive_check, + cugraph_graph_t** graph, + cugraph_error_t** error) +{ + return cugraph_graph_create_sg_from_csr(handle, + properties, + offsets, + indices, + weights, + edge_ids, + edge_type_ids, + store_transposed, + renumber, + do_expensive_check, + graph, + error); +} + extern "C" void cugraph_graph_free(cugraph_graph_t* ptr_graph) { if (ptr_graph != NULL) { diff --git a/cpp/src/structure/remove_self_loops.cu b/cpp/src/structure/remove_self_loops.cu new file mode 100644 index 00000000000..8a66c1e05e3 --- /dev/null +++ b/cpp/src/structure/remove_self_loops.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +namespace cugraph { + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +} // namespace cugraph diff --git a/cpp/src/structure/remove_self_loops_impl.cuh b/cpp/src/structure/remove_self_loops_impl.cuh new file mode 100644 index 00000000000..bad150eeac8 --- /dev/null +++ b/cpp/src/structure/remove_self_loops_impl.cuh @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include +#include + +namespace cugraph { + +namespace detail { + +template +void remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector& edgelist_srcs /* [INOUT] */, + rmm::device_uvector& edgelist_dsts /* [INOUT] */) +{ + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_srcs.begin(), edgelist_dsts.begin())); + edgelist_srcs.resize( + thrust::distance( + edge_first, + thrust::remove_if(handle.get_thrust_policy(), + edge_first, + edge_first + edgelist_srcs.size(), + [] __device__(auto e) { return thrust::get<0>(e) == thrust::get<1>(e); })), + handle.get_stream()); + edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); +} + +template +void remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector& edgelist_srcs /* [INOUT] */, + rmm::device_uvector& edgelist_dsts /* [INOUT] */, + rmm::device_uvector& edgelist_a /* [INOUT] */) +{ + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_srcs.begin(), edgelist_dsts.begin(), edgelist_a.begin())); + edgelist_srcs.resize( + thrust::distance( + edge_first, + thrust::remove_if(handle.get_thrust_policy(), + edge_first, + edge_first + edgelist_srcs.size(), + [] __device__(auto e) { return thrust::get<0>(e) == thrust::get<1>(e); })), + handle.get_stream()); + edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); +} + +template +void remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector& edgelist_srcs /* [INOUT] */, + rmm::device_uvector& edgelist_dsts /* [INOUT] */, + rmm::device_uvector& edgelist_a /* [INOUT] */, + rmm::device_uvector& edgelist_b /* [INOUT] */) +{ + auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( + edgelist_srcs.begin(), edgelist_dsts.begin(), edgelist_a.begin(), edgelist_b.begin())); + edgelist_srcs.resize( + thrust::distance( + edge_first, + thrust::remove_if(handle.get_thrust_policy(), + edge_first, + edge_first + edgelist_srcs.size(), + [] __device__(auto e) { return thrust::get<0>(e) == thrust::get<1>(e); })), + handle.get_stream()); + edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_b.resize(edgelist_srcs.size(), handle.get_stream()); +} + +template +void remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector& edgelist_srcs /* [INOUT] */, + rmm::device_uvector& edgelist_dsts /* [INOUT] */, + rmm::device_uvector& edgelist_a /* [INOUT] */, + rmm::device_uvector& edgelist_b /* [INOUT] */, + rmm::device_uvector& edgelist_c /* [INOUT] */) +{ + auto edge_first = thrust::make_zip_iterator(thrust::make_tuple(edgelist_srcs.begin(), + edgelist_dsts.begin(), + edgelist_a.begin(), + edgelist_b.begin(), + edgelist_c.begin())); + edgelist_srcs.resize( + thrust::distance( + edge_first, + thrust::remove_if(handle.get_thrust_policy(), + edge_first, + edge_first + edgelist_srcs.size(), + [] __device__(auto e) { return thrust::get<0>(e) == thrust::get<1>(e); })), + handle.get_stream()); + edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_b.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_c.resize(edgelist_srcs.size(), handle.get_stream()); +} + +} // namespace detail + +template +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_self_loops(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types) +{ + if (edgelist_weights) { + if (edgelist_edge_ids) { + if (edgelist_edge_types) { + detail::remove_self_loops(handle, + edgelist_srcs, + edgelist_dsts, + *edgelist_weights, + *edgelist_edge_ids, + *edgelist_edge_types); + } else { + detail::remove_self_loops( + handle, edgelist_srcs, edgelist_dsts, *edgelist_weights, *edgelist_edge_ids); + } + } else { + if (edgelist_edge_types) { + detail::remove_self_loops( + handle, edgelist_srcs, edgelist_dsts, *edgelist_weights, *edgelist_edge_types); + } else { + detail::remove_self_loops(handle, edgelist_srcs, edgelist_dsts, *edgelist_weights); + } + } + } else { + if (edgelist_edge_ids) { + if (edgelist_edge_types) { + detail::remove_self_loops( + handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_ids, *edgelist_edge_types); + } else { + detail::remove_self_loops(handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_ids); + } + } else { + if (edgelist_edge_types) { + detail::remove_self_loops(handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_types); + } else { + detail::remove_self_loops(handle, edgelist_srcs, edgelist_dsts); + } + } + } + + return std::make_tuple(std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types)); +} + +} // namespace cugraph diff --git a/cpp/src/structure/sort_and_remove_multi_edges.cu b/cpp/src/structure/sort_and_remove_multi_edges.cu new file mode 100644 index 00000000000..21ee9531f06 --- /dev/null +++ b/cpp/src/structure/sort_and_remove_multi_edges.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +namespace cugraph { + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +} // namespace cugraph diff --git a/cpp/src/structure/sort_and_remove_multi_edges_impl.cuh b/cpp/src/structure/sort_and_remove_multi_edges_impl.cuh new file mode 100644 index 00000000000..2c8f265bfa7 --- /dev/null +++ b/cpp/src/structure/sort_and_remove_multi_edges_impl.cuh @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +namespace cugraph { + +namespace detail { + +template +void sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector& edgelist_srcs /* [INOUT] */, + rmm::device_uvector& edgelist_dsts /* [INOUT] */) +{ + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_srcs.begin(), edgelist_dsts.begin())); + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); + edgelist_srcs.resize( + thrust::distance(edge_first, + thrust::unique(handle.get_thrust_policy(), + edge_first, + edge_first + edgelist_srcs.size(), + [] __device__(auto lhs, auto rhs) { + return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && + (thrust::get<1>(lhs) == thrust::get<1>(rhs)); + })), + handle.get_stream()); + edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); +} + +template +void sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector& edgelist_srcs /* [INOUT] */, + rmm::device_uvector& edgelist_dsts /* [INOUT] */, + rmm::device_uvector& edgelist_a /* [INOUT] */) +{ + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_srcs.begin(), edgelist_dsts.begin(), edgelist_a.begin())); + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); + edgelist_srcs.resize( + thrust::distance(edge_first, + thrust::unique(handle.get_thrust_policy(), + edge_first, + edge_first + edgelist_srcs.size(), + [] __device__(auto lhs, auto rhs) { + return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && + (thrust::get<1>(lhs) == thrust::get<1>(rhs)); + })), + handle.get_stream()); + edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); +} + +template +void sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector& edgelist_srcs /* [INOUT] */, + rmm::device_uvector& edgelist_dsts /* [INOUT] */, + rmm::device_uvector& edgelist_a /* [INOUT] */, + rmm::device_uvector& edgelist_b /* [INOUT] */) +{ + auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( + edgelist_srcs.begin(), edgelist_dsts.begin(), edgelist_a.begin(), edgelist_b.begin())); + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); + edgelist_srcs.resize( + thrust::distance(edge_first, + thrust::unique(handle.get_thrust_policy(), + edge_first, + edge_first + edgelist_srcs.size(), + [] __device__(auto lhs, auto rhs) { + return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && + (thrust::get<1>(lhs) == thrust::get<1>(rhs)); + })), + handle.get_stream()); + edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_b.resize(edgelist_srcs.size(), handle.get_stream()); +} + +template +void sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector& edgelist_srcs /* [INOUT] */, + rmm::device_uvector& edgelist_dsts /* [INOUT] */, + rmm::device_uvector& edgelist_a /* [INOUT] */, + rmm::device_uvector& edgelist_b /* [INOUT] */, + rmm::device_uvector& edgelist_c /* [INOUT] */) +{ + auto edge_first = thrust::make_zip_iterator(thrust::make_tuple(edgelist_srcs.begin(), + edgelist_dsts.begin(), + edgelist_a.begin(), + edgelist_b.begin(), + edgelist_c.begin())); + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); + edgelist_srcs.resize( + thrust::distance(edge_first, + thrust::unique(handle.get_thrust_policy(), + edge_first, + edge_first + edgelist_srcs.size(), + [] __device__(auto lhs, auto rhs) { + return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && + (thrust::get<1>(lhs) == thrust::get<1>(rhs)); + })), + handle.get_stream()); + edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_b.resize(edgelist_srcs.size(), handle.get_stream()); + edgelist_c.resize(edgelist_srcs.size(), handle.get_stream()); +} + +} // namespace detail + +template +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types) +{ + if (edgelist_weights) { + if (edgelist_edge_ids) { + if (edgelist_edge_types) { + detail::sort_and_remove_multi_edges(handle, + edgelist_srcs, + edgelist_dsts, + *edgelist_weights, + *edgelist_edge_ids, + *edgelist_edge_types); + } else { + detail::sort_and_remove_multi_edges( + handle, edgelist_srcs, edgelist_dsts, *edgelist_weights, *edgelist_edge_ids); + } + } else { + if (edgelist_edge_types) { + detail::sort_and_remove_multi_edges( + handle, edgelist_srcs, edgelist_dsts, *edgelist_weights, *edgelist_edge_types); + } else { + detail::sort_and_remove_multi_edges( + handle, edgelist_srcs, edgelist_dsts, *edgelist_weights); + } + } + } else { + if (edgelist_edge_ids) { + if (edgelist_edge_types) { + detail::sort_and_remove_multi_edges( + handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_ids, *edgelist_edge_types); + } else { + detail::sort_and_remove_multi_edges( + handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_ids); + } + } else { + if (edgelist_edge_types) { + detail::sort_and_remove_multi_edges( + handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_types); + } else { + detail::sort_and_remove_multi_edges(handle, edgelist_srcs, edgelist_dsts); + } + } + } + + return std::make_tuple(std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types)); +} + +} // namespace cugraph diff --git a/cpp/tests/c_api/create_graph_test.c b/cpp/tests/c_api/create_graph_test.c index 276e790c2b6..ed9db237a29 100644 --- a/cpp/tests/c_api/create_graph_test.c +++ b/cpp/tests/c_api/create_graph_test.c @@ -102,6 +102,8 @@ int test_create_sg_graph_simple() FALSE, FALSE, FALSE, + FALSE, + FALSE, &graph, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); @@ -393,6 +395,8 @@ int test_create_sg_graph_symmetric_error() NULL, FALSE, FALSE, + FALSE, + FALSE, TRUE, &graph, &ret_error); @@ -507,6 +511,8 @@ int test_create_sg_graph_with_isolated_vertices() FALSE, FALSE, FALSE, + FALSE, + FALSE, &graph, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); @@ -716,6 +722,165 @@ int test_create_sg_graph_csr_with_isolated() return test_ret_value; } +int test_create_sg_graph_with_isolated_vertices_multi_input() +{ + int test_ret_value = 0; + + typedef int32_t vertex_t; + typedef int32_t edge_t; + typedef float weight_t; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + size_t num_edges = 11; + size_t num_vertices = 7; + double alpha = 0.95; + double epsilon = 0.0001; + size_t max_iterations = 20; + + vertex_t h_vertices[] = { 0, 1, 2, 3, 4, 5, 6 }; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f}; + weight_t h_result[] = { 0.0859168, 0.158029, 0.0616337, 0.179675, 0.113239, 0.339873, 0.0616337 }; + + cugraph_resource_handle_t* handle = NULL; + cugraph_graph_t* graph = NULL; + cugraph_graph_properties_t properties; + + properties.is_symmetric = FALSE; + properties.is_multigraph = FALSE; + + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); + + cugraph_type_erased_device_array_t* vertices; + cugraph_type_erased_device_array_t* src; + cugraph_type_erased_device_array_t* dst; + cugraph_type_erased_device_array_t* wgt; + cugraph_type_erased_device_array_view_t* vertices_view; + cugraph_type_erased_device_array_view_t* src_view; + cugraph_type_erased_device_array_view_t* dst_view; + cugraph_type_erased_device_array_view_t* wgt_view; + + ret_code = + cugraph_type_erased_device_array_create(handle, num_vertices, vertex_tid, &vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices create failed."); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_edges, vertex_tid, &src, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src create failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_edges, vertex_tid, &dst, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst create failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_edges, weight_tid, &wgt, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt create failed."); + + vertices_view = cugraph_type_erased_device_array_view(vertices); + src_view = cugraph_type_erased_device_array_view(src); + dst_view = cugraph_type_erased_device_array_view(dst); + wgt_view = cugraph_type_erased_device_array_view(wgt); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, vertices_view, (byte_t*)h_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, src_view, (byte_t*)h_src, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, dst_view, (byte_t*)h_dst, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, wgt_view, (byte_t*)h_wgt, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed."); + + ret_code = cugraph_graph_create_sg(handle, + &properties, + vertices_view, + src_view, + dst_view, + wgt_view, + NULL, + NULL, + FALSE, + FALSE, + TRUE, + TRUE, + FALSE, + &graph, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + + cugraph_centrality_result_t* result = NULL; + + // To verify we will call pagerank + ret_code = cugraph_pagerank(handle, + graph, + NULL, + NULL, + NULL, + NULL, + alpha, + epsilon, + max_iterations, + FALSE, + &result, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + cugraph_type_erased_device_array_view_t* result_vertices; + cugraph_type_erased_device_array_view_t* pageranks; + + result_vertices = cugraph_centrality_result_get_vertices(result); + pageranks = cugraph_centrality_result_get_values(result); + + vertex_t h_result_vertices[num_vertices]; + weight_t h_pageranks[num_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_vertices, result_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_pageranks, pageranks, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_result[h_result_vertices[i]], h_pageranks[i], 0.001), + "pagerank results don't match"); + } + + cugraph_centrality_result_free(result); + cugraph_graph_free(graph); + + cugraph_type_erased_device_array_view_free(wgt_view); + cugraph_type_erased_device_array_view_free(dst_view); + cugraph_type_erased_device_array_view_free(src_view); + cugraph_type_erased_device_array_view_free(vertices_view); + cugraph_type_erased_device_array_free(wgt); + cugraph_type_erased_device_array_free(dst); + cugraph_type_erased_device_array_free(src); + cugraph_type_erased_device_array_free(vertices); + + cugraph_free_resource_handle(handle); + cugraph_error_free(ret_error); + + return test_ret_value; +} + /******************************************************************************/ int main(int argc, char** argv) @@ -726,5 +891,6 @@ int main(int argc, char** argv) result |= RUN_TEST(test_create_sg_graph_symmetric_error); result |= RUN_TEST(test_create_sg_graph_with_isolated_vertices); result |= RUN_TEST(test_create_sg_graph_csr_with_isolated); + result |= RUN_TEST(test_create_sg_graph_with_isolated_vertices_multi_input); return result; } diff --git a/cpp/tests/c_api/mg_create_graph_test.c b/cpp/tests/c_api/mg_create_graph_test.c index f96edaca7e7..fec319d1881 100644 --- a/cpp/tests/c_api/mg_create_graph_test.c +++ b/cpp/tests/c_api/mg_create_graph_test.c @@ -106,6 +106,8 @@ int test_create_mg_graph_simple(const cugraph_resource_handle_t* handle) NULL, FALSE, 1, + FALSE, + FALSE, TRUE, &graph, &ret_error); @@ -239,6 +241,196 @@ int test_create_mg_graph_multiple_edge_lists(const cugraph_resource_handle_t* ha NULL, FALSE, num_local_arrays, + FALSE, + FALSE, + TRUE, + &graph, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + // + // Now call pagerank and check results... + // + cugraph_centrality_result_t* result = NULL; + + ret_code = cugraph_pagerank(handle, + graph, + NULL, + NULL, + NULL, + NULL, + alpha, + epsilon, + max_iterations, + FALSE, + &result, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed."); + + // NOTE: Because we get back vertex ids and pageranks, we can simply compare + // the returned values with the expected results for the entire + // graph. Each GPU will have a subset of the total vertices, so + // they will do a subset of the comparisons. + cugraph_type_erased_device_array_view_t* result_vertices; + cugraph_type_erased_device_array_view_t* pageranks; + + result_vertices = cugraph_centrality_result_get_vertices(result); + pageranks = cugraph_centrality_result_get_values(result); + + size_t num_local_vertices = cugraph_type_erased_device_array_view_size(result_vertices); + + vertex_t h_result_vertices[num_local_vertices]; + weight_t h_pageranks[num_local_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_vertices, result_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_pageranks, pageranks, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_local_vertices) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_result[h_result_vertices[i]], h_pageranks[i], 0.001), + "pagerank results don't match"); + } + + cugraph_centrality_result_free(result); + cugraph_graph_free(graph); + + for (size_t i = 0 ; i < num_local_arrays ; ++i) { + cugraph_type_erased_device_array_view_free(wgt_view[i]); + cugraph_type_erased_device_array_view_free(dst_view[i]); + cugraph_type_erased_device_array_view_free(src_view[i]); + cugraph_type_erased_device_array_view_free(vertices_view[i]); + cugraph_type_erased_device_array_free(wgt[i]); + cugraph_type_erased_device_array_free(dst[i]); + cugraph_type_erased_device_array_free(src[i]); + cugraph_type_erased_device_array_free(vertices[i]); + } + + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int test_create_mg_graph_multiple_edge_lists_multi_edge(const cugraph_resource_handle_t* handle) +{ + int test_ret_value = 0; + + typedef int32_t vertex_t; + typedef int32_t edge_t; + typedef float weight_t; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + size_t num_edges = 11; + size_t num_vertices = 7; + + double alpha = 0.95; + double epsilon = 0.0001; + size_t max_iterations = 20; + + vertex_t h_vertices[] = { 0, 1, 2, 3, 4, 5, 6 }; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.1f}; + weight_t h_result[] = { 0.0859168, 0.158029, 0.0616337, 0.179675, 0.113239, 0.339873, 0.0616337 }; + + cugraph_graph_t* graph = NULL; + cugraph_graph_properties_t properties; + + properties.is_symmetric = FALSE; + properties.is_multigraph = FALSE; + + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + const size_t num_local_arrays = 2; + + cugraph_type_erased_device_array_t* vertices[num_local_arrays]; + cugraph_type_erased_device_array_t* src[num_local_arrays]; + cugraph_type_erased_device_array_t* dst[num_local_arrays]; + cugraph_type_erased_device_array_t* wgt[num_local_arrays]; + cugraph_type_erased_device_array_view_t* vertices_view[num_local_arrays]; + cugraph_type_erased_device_array_view_t* src_view[num_local_arrays]; + cugraph_type_erased_device_array_view_t* dst_view[num_local_arrays]; + cugraph_type_erased_device_array_view_t* wgt_view[num_local_arrays]; + + int my_rank = cugraph_resource_handle_get_rank(handle); + int comm_size = cugraph_resource_handle_get_comm_size(handle); + + size_t local_num_vertices = (num_vertices + comm_size - 1) / comm_size; + size_t local_start_vertex = my_rank * local_num_vertices; + size_t local_num_edges = (num_edges + comm_size - 1) / comm_size; + size_t local_start_edge = my_rank * local_num_edges; + + local_num_edges = (local_num_edges < (num_edges - local_start_edge)) ? local_num_edges : (num_edges - local_start_edge); + local_num_vertices = (local_num_vertices < (num_vertices - local_start_vertex)) ? local_num_vertices : (num_vertices - local_start_vertex); + + for (size_t i = 0 ; i < num_local_arrays ; ++i) { + size_t vertex_count = (local_num_vertices + num_local_arrays - 1) / num_local_arrays; + size_t vertex_start = i * vertex_count; + vertex_count = (vertex_count < (local_num_vertices - vertex_start)) ? vertex_count : (local_num_vertices - vertex_start); + + ret_code = + cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices create failed."); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + size_t edge_count = (local_num_edges + num_local_arrays - 1) / num_local_arrays; + size_t edge_start = i * edge_count; + edge_count = (edge_count < (local_num_edges - edge_start)) ? edge_count : (local_num_edges - edge_start); + + ret_code = + cugraph_type_erased_device_array_create(handle, edge_count, vertex_tid, src + i, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src create failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, edge_count, vertex_tid, dst + i, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst create failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, edge_count, weight_tid, wgt + i, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt create failed."); + + vertices_view[i] = cugraph_type_erased_device_array_view(vertices[i]); + src_view[i] = cugraph_type_erased_device_array_view(src[i]); + dst_view[i] = cugraph_type_erased_device_array_view(dst[i]); + wgt_view[i] = cugraph_type_erased_device_array_view(wgt[i]); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, vertices_view[i], (byte_t*)(h_vertices + local_start_vertex + vertex_start), &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, src_view[i], (byte_t*)(h_src + local_start_edge + edge_start), &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, dst_view[i], (byte_t*)(h_dst + local_start_edge + edge_start), &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, wgt_view[i], (byte_t*)(h_wgt + local_start_edge + edge_start), &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed."); + } + + ret_code = cugraph_graph_create_mg(handle, + &properties, + (cugraph_type_erased_device_array_view_t const* const*) vertices_view, + (cugraph_type_erased_device_array_view_t const* const*) src_view, + (cugraph_type_erased_device_array_view_t const* const*) dst_view, + (cugraph_type_erased_device_array_view_t const* const*) wgt_view, + NULL, + NULL, + FALSE, + num_local_arrays, + TRUE, + TRUE, TRUE, &graph, &ret_error); @@ -322,6 +514,7 @@ int main(int argc, char** argv) int result = 0; result |= RUN_MG_TEST(test_create_mg_graph_simple, handle); result |= RUN_MG_TEST(test_create_mg_graph_multiple_edge_lists, handle); + result |= RUN_MG_TEST(test_create_mg_graph_multiple_edge_lists_multi_edge, handle); cugraph_free_resource_handle(handle); free_mg_raft_handle(raft_handle); From d96ba625805541be408e31b1746a66ce583897b3 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Thu, 16 Nov 2023 23:41:50 -0800 Subject: [PATCH 5/6] refactor remove_self_loops and sort_and_remove_multi_edges to reduce memory footprint when graphs are large relative to GPU memory --- cpp/src/c_api/graph_mg.cpp | 94 ++++-- cpp/src/c_api/graph_sg.cpp | 37 +- cpp/src/structure/detail/structure_utils.cuh | 58 ++++ cpp/src/structure/remove_self_loops_impl.cuh | 155 ++------- .../sort_and_remove_multi_edges_impl.cuh | 319 ++++++++++++------ cpp/tests/c_api/create_graph_test.c | 23 +- 6 files changed, 393 insertions(+), 293 deletions(-) diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp index c38fe1a09d0..ca89e3091f4 100644 --- a/cpp/src/c_api/graph_mg.cpp +++ b/cpp/src/c_api/graph_mg.cpp @@ -198,17 +198,6 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { cugraph::graph_view_t, edge_type_id_t>(handle_); - if (drop_multi_edges_) { - std::tie( - edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = - cugraph::sort_and_remove_multi_edges(handle_, - std::move(edgelist_srcs), - std::move(edgelist_dsts), - std::move(edgelist_weights), - std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); - } - if (drop_self_loops_) { std::tie( edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = @@ -220,6 +209,17 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { std::move(edgelist_edge_types)); } + if (drop_multi_edges_) { + std::tie( + edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = + cugraph::sort_and_remove_multi_edges(handle_, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types)); + } + std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = cugraph::create_graph_from_edgelisthandle_->get_stream()); - // FIXME: Need to handle the case where a GPU gets a NULL pointer but other GPUs - // Get values. Override vertex_type/weight_type/edge_id_type but don't - // fail. - auto vertex_types = cugraph::host_scalar_allgather( p_handle->handle_->get_comms(), static_cast(vertex_type), p_handle->handle_->get_stream()); auto weight_types = cugraph::host_scalar_allgather( p_handle->handle_->get_comms(), static_cast(weight_type), p_handle->handle_->get_stream()); - CAPI_EXPECTS( - std::count_if(vertex_types.begin(), - vertex_types.end(), - [vertex_type](auto t) { return vertex_type != static_cast(t); }) == 0, - CUGRAPH_INVALID_INPUT, - "different vertex type used on different GPUs", - *error); + if (vertex_type == cugraph_data_type_id_t::NTYPES) { + // Only true if this GPU had no vertex arrays + vertex_type = static_cast( + *std::min_element(vertex_types.begin(), vertex_types.end())); + } - CAPI_EXPECTS( - std::count_if(weight_types.begin(), - weight_types.end(), - [weight_type](auto t) { return weight_type != static_cast(t); }) == 0, - CUGRAPH_INVALID_INPUT, - "different weight type used on different GPUs", - *error); + if (weight_type == cugraph_data_type_id_t::NTYPES) { + // Only true if this GPU had no weight arrays + weight_type = static_cast( + *std::min_element(weight_types.begin(), weight_types.end())); + } + + CAPI_EXPECTS(std::all_of(vertex_types.begin(), + vertex_types.end(), + [vertex_type](auto t) { return vertex_type == static_cast(t); }), + CUGRAPH_INVALID_INPUT, + "different vertex type used on different GPUs", + *error); + + CAPI_EXPECTS(std::all_of(weight_types.begin(), + weight_types.end(), + [weight_type](auto t) { return weight_type == static_cast(t); }), + CUGRAPH_INVALID_INPUT, + "different weight type used on different GPUs", + *error); cugraph_data_type_id_t edge_type; @@ -435,19 +447,27 @@ extern "C" cugraph_error_code_t cugraph_graph_create_mg( static_cast(edge_type_id_type), p_handle->handle_->get_stream()); - CAPI_EXPECTS(std::count_if(edge_type_id_types.begin(), - edge_type_id_types.end(), - [edge_type_id_type](auto t) { - return edge_type_id_type != static_cast(t); - }) == 0, - CUGRAPH_INVALID_INPUT, - "different edge_type_id type used on different GPUs", - *error); + if (edge_type_id_type == cugraph_data_type_id_t::NTYPES) { + // Only true if this GPU had no edge_type_id arrays + edge_type_id_type = static_cast( + *std::min_element(edge_type_id_types.begin(), edge_type_id_types.end())); + } + + CAPI_EXPECTS( + std::all_of(edge_type_id_types.begin(), + edge_type_id_types.end(), + [edge_type_id_type](auto t) { return edge_type_id_type == static_cast(t); }), + CUGRAPH_INVALID_INPUT, + "different edge_type_id type used on different GPUs", + *error); if (edge_type_id_type == cugraph_data_type_id_t::NTYPES) { edge_type_id_type = cugraph_data_type_id_t::INT32; } + // + // Now we know enough to create the graph + // create_graph_functor functor(*p_handle->handle_, properties, vertex_type, diff --git a/cpp/src/c_api/graph_sg.cpp b/cpp/src/c_api/graph_sg.cpp index 4c7307e849c..3cdeb0d86c2 100644 --- a/cpp/src/c_api/graph_sg.cpp +++ b/cpp/src/c_api/graph_sg.cpp @@ -181,29 +181,6 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { cugraph::graph_view_t, edge_type_id_t>(handle_); - if (drop_multi_edges_) { - std::cout << "calling drop_multi_edges" << std::endl; - raft::print_device_vector( - " edgelist_srcs", edgelist_srcs.data(), edgelist_srcs.size(), std::cout); - raft::print_device_vector( - " edgelist_dsts", edgelist_dsts.data(), edgelist_dsts.size(), std::cout); - - std::tie( - edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = - cugraph::sort_and_remove_multi_edges(handle_, - std::move(edgelist_srcs), - std::move(edgelist_dsts), - std::move(edgelist_weights), - std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); - - std::cout << "after drop_multi_edges" << std::endl; - raft::print_device_vector( - " edgelist_srcs", edgelist_srcs.data(), edgelist_srcs.size(), std::cout); - raft::print_device_vector( - " edgelist_dsts", edgelist_dsts.data(), edgelist_dsts.size(), std::cout); - } - if (drop_self_loops_) { std::tie( edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = @@ -215,6 +192,17 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { std::move(edgelist_edge_types)); } + if (drop_multi_edges_) { + std::tie( + edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = + cugraph::sort_and_remove_multi_edges(handle_, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types)); + } + std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = cugraph::create_graph_from_edgelisttype_; } - std::cout << " constructing create_graph_functor, drop_multi_edges = " - << (drop_multi_edges ? "TRUE" : "FALSE") << std::endl; - ::create_graph_functor functor(*p_handle->handle_, properties, p_vertices, diff --git a/cpp/src/structure/detail/structure_utils.cuh b/cpp/src/structure/detail/structure_utils.cuh index 01fbccaa53e..4674efe0c88 100644 --- a/cpp/src/structure/detail/structure_utils.cuh +++ b/cpp/src/structure/detail/structure_utils.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -496,6 +498,62 @@ void sort_adjacency_list(raft::handle_t const& handle, } } +template +struct indirect_array_reference { + T const* array_; + + T operator() __host__ __device__(size_t index) { return array_[index]; } +}; + +template +std::tuple> mark_edges_for_removal( + raft::handle_t const& handle, + raft::device_span src, + raft::device_span dst, + comparison_t comparison) +{ + rmm::device_uvector remove_flags(packed_bool_size(src.size()), handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + remove_flags.begin(), + remove_flags.end(), + cugraph::packed_bool_empty_mask()); + + size_t remove_count = thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(src.size()), + [comparison, d_remove_flags = remove_flags.data()] __device__(size_t i) { + if (comparison(i)) { + atomicOr(d_remove_flags + cugraph::packed_bool_offset(i), cugraph::packed_bool_mask(i)); + return true; + } + return false; + }); + + return std::make_tuple(remove_count, std::move(remove_flags)); +} + +template +rmm::device_uvector remove_flagged_elements(raft::handle_t const& handle, + rmm::device_uvector&& vector, + rmm::device_uvector const& remove_flags, + size_t remove_count) +{ + rmm::device_uvector result(vector.size() - remove_count, handle.get_stream()); + + thrust::copy_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(vector.size()), + thrust::make_transform_output_iterator(result.begin(), + indirect_array_reference{vector.data()}), + [d_remove_flags = remove_flags.data()] __device__(size_t i) { + return !(d_remove_flags[cugraph::packed_bool_offset(i)] & cugraph::packed_bool_mask(i)); + }); + + return result; +} + } // namespace detail } // namespace cugraph diff --git a/cpp/src/structure/remove_self_loops_impl.cuh b/cpp/src/structure/remove_self_loops_impl.cuh index bad150eeac8..1dcd6b1bc05 100644 --- a/cpp/src/structure/remove_self_loops_impl.cuh +++ b/cpp/src/structure/remove_self_loops_impl.cuh @@ -15,9 +15,12 @@ */ #pragma once +#include + #include #include +#include #include #include #include @@ -28,97 +31,6 @@ namespace cugraph { -namespace detail { - -template -void remove_self_loops(raft::handle_t const& handle, - rmm::device_uvector& edgelist_srcs /* [INOUT] */, - rmm::device_uvector& edgelist_dsts /* [INOUT] */) -{ - auto edge_first = - thrust::make_zip_iterator(thrust::make_tuple(edgelist_srcs.begin(), edgelist_dsts.begin())); - edgelist_srcs.resize( - thrust::distance( - edge_first, - thrust::remove_if(handle.get_thrust_policy(), - edge_first, - edge_first + edgelist_srcs.size(), - [] __device__(auto e) { return thrust::get<0>(e) == thrust::get<1>(e); })), - handle.get_stream()); - edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); -} - -template -void remove_self_loops(raft::handle_t const& handle, - rmm::device_uvector& edgelist_srcs /* [INOUT] */, - rmm::device_uvector& edgelist_dsts /* [INOUT] */, - rmm::device_uvector& edgelist_a /* [INOUT] */) -{ - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_srcs.begin(), edgelist_dsts.begin(), edgelist_a.begin())); - edgelist_srcs.resize( - thrust::distance( - edge_first, - thrust::remove_if(handle.get_thrust_policy(), - edge_first, - edge_first + edgelist_srcs.size(), - [] __device__(auto e) { return thrust::get<0>(e) == thrust::get<1>(e); })), - handle.get_stream()); - edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); -} - -template -void remove_self_loops(raft::handle_t const& handle, - rmm::device_uvector& edgelist_srcs /* [INOUT] */, - rmm::device_uvector& edgelist_dsts /* [INOUT] */, - rmm::device_uvector& edgelist_a /* [INOUT] */, - rmm::device_uvector& edgelist_b /* [INOUT] */) -{ - auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( - edgelist_srcs.begin(), edgelist_dsts.begin(), edgelist_a.begin(), edgelist_b.begin())); - edgelist_srcs.resize( - thrust::distance( - edge_first, - thrust::remove_if(handle.get_thrust_policy(), - edge_first, - edge_first + edgelist_srcs.size(), - [] __device__(auto e) { return thrust::get<0>(e) == thrust::get<1>(e); })), - handle.get_stream()); - edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_b.resize(edgelist_srcs.size(), handle.get_stream()); -} - -template -void remove_self_loops(raft::handle_t const& handle, - rmm::device_uvector& edgelist_srcs /* [INOUT] */, - rmm::device_uvector& edgelist_dsts /* [INOUT] */, - rmm::device_uvector& edgelist_a /* [INOUT] */, - rmm::device_uvector& edgelist_b /* [INOUT] */, - rmm::device_uvector& edgelist_c /* [INOUT] */) -{ - auto edge_first = thrust::make_zip_iterator(thrust::make_tuple(edgelist_srcs.begin(), - edgelist_dsts.begin(), - edgelist_a.begin(), - edgelist_b.begin(), - edgelist_c.begin())); - edgelist_srcs.resize( - thrust::distance( - edge_first, - thrust::remove_if(handle.get_thrust_policy(), - edge_first, - edge_first + edgelist_srcs.size(), - [] __device__(auto e) { return thrust::get<0>(e) == thrust::get<1>(e); })), - handle.get_stream()); - edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_b.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_c.resize(edgelist_srcs.size(), handle.get_stream()); -} - -} // namespace detail - template std::tuple, rmm::device_uvector, @@ -132,42 +44,31 @@ remove_self_loops(raft::handle_t const& handle, std::optional>&& edgelist_edge_ids, std::optional>&& edgelist_edge_types) { - if (edgelist_weights) { - if (edgelist_edge_ids) { - if (edgelist_edge_types) { - detail::remove_self_loops(handle, - edgelist_srcs, - edgelist_dsts, - *edgelist_weights, - *edgelist_edge_ids, - *edgelist_edge_types); - } else { - detail::remove_self_loops( - handle, edgelist_srcs, edgelist_dsts, *edgelist_weights, *edgelist_edge_ids); - } - } else { - if (edgelist_edge_types) { - detail::remove_self_loops( - handle, edgelist_srcs, edgelist_dsts, *edgelist_weights, *edgelist_edge_types); - } else { - detail::remove_self_loops(handle, edgelist_srcs, edgelist_dsts, *edgelist_weights); - } - } - } else { - if (edgelist_edge_ids) { - if (edgelist_edge_types) { - detail::remove_self_loops( - handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_ids, *edgelist_edge_types); - } else { - detail::remove_self_loops(handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_ids); - } - } else { - if (edgelist_edge_types) { - detail::remove_self_loops(handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_types); - } else { - detail::remove_self_loops(handle, edgelist_srcs, edgelist_dsts); - } - } + auto [remove_count, remove_flags] = detail::mark_edges_for_removal( + handle, + raft::device_span{edgelist_srcs.data(), edgelist_srcs.size()}, + raft::device_span{edgelist_dsts.data(), edgelist_dsts.size()}, + [d_srcs = edgelist_srcs.data(), d_dsts = edgelist_dsts.data()] __device__(size_t i) { + return d_srcs[i] == d_dsts[i]; + }); + + if (remove_count > 0) { + edgelist_srcs = + detail::remove_flagged_elements(handle, std::move(edgelist_srcs), remove_flags, remove_count); + edgelist_dsts = + detail::remove_flagged_elements(handle, std::move(edgelist_dsts), remove_flags, remove_count); + + if (edgelist_weights) + edgelist_weights = detail::remove_flagged_elements( + handle, std::move(*edgelist_weights), remove_flags, remove_count); + + if (edgelist_edge_ids) + edgelist_edge_ids = detail::remove_flagged_elements( + handle, std::move(*edgelist_edge_ids), remove_flags, remove_count); + + if (edgelist_edge_types) + edgelist_edge_types = detail::remove_flagged_elements( + handle, std::move(*edgelist_edge_types), remove_flags, remove_count); } return std::make_tuple(std::move(edgelist_srcs), diff --git a/cpp/src/structure/sort_and_remove_multi_edges_impl.cuh b/cpp/src/structure/sort_and_remove_multi_edges_impl.cuh index 2c8f265bfa7..232d825a0c8 100644 --- a/cpp/src/structure/sort_and_remove_multi_edges_impl.cuh +++ b/cpp/src/structure/sort_and_remove_multi_edges_impl.cuh @@ -15,9 +15,18 @@ */ #pragma once +#include +// FIXME: mem_frugal_partition should probably not be in shuffle_comm.hpp +// It's used here without any notion of shuffling +#include + +#include +#include #include +#include #include +#include #include #include #include @@ -31,103 +40,163 @@ namespace cugraph { namespace detail { -template -void sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector& edgelist_srcs /* [INOUT] */, - rmm::device_uvector& edgelist_dsts /* [INOUT] */) +template +rmm::device_uvector compute_hash_sizes(InputIterator iter_first, + InputIterator iter_last, + size_t num_hash_buckets, + hash_op_t hash_op, + rmm::cuda_stream_view stream_view) { - auto edge_first = - thrust::make_zip_iterator(thrust::make_tuple(edgelist_srcs.begin(), edgelist_dsts.begin())); - thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); - edgelist_srcs.resize( - thrust::distance(edge_first, - thrust::unique(handle.get_thrust_policy(), - edge_first, - edge_first + edgelist_srcs.size(), - [] __device__(auto lhs, auto rhs) { - return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && - (thrust::get<1>(lhs) == thrust::get<1>(rhs)); - })), - handle.get_stream()); - edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); -} + rmm::device_uvector hash_counts(num_hash_buckets, stream_view); + thrust::fill(rmm::exec_policy(stream_view), hash_counts.begin(), hash_counts.end(), size_t{0}); -template -void sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector& edgelist_srcs /* [INOUT] */, - rmm::device_uvector& edgelist_dsts /* [INOUT] */, - rmm::device_uvector& edgelist_a /* [INOUT] */) -{ - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_srcs.begin(), edgelist_dsts.begin(), edgelist_a.begin())); - thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); - edgelist_srcs.resize( - thrust::distance(edge_first, - thrust::unique(handle.get_thrust_policy(), - edge_first, - edge_first + edgelist_srcs.size(), - [] __device__(auto lhs, auto rhs) { - return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && - (thrust::get<1>(lhs) == thrust::get<1>(rhs)); - })), - handle.get_stream()); - edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); + auto hash_counts_view = raft::device_span(hash_counts.data(), hash_counts.size()); + thrust::for_each(rmm::exec_policy(stream_view), + iter_first, + iter_last, + [num_hash_buckets, hash_op, hash_counts_view] __device__(auto value) { + atomicAdd(&hash_counts_view[hash_op(value) % num_hash_buckets], size_t{1}); + }); + + thrust::exclusive_scan( + rmm::exec_policy(stream_view), hash_counts.begin(), hash_counts.end(), hash_counts.begin()); + + return hash_counts; } -template -void sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector& edgelist_srcs /* [INOUT] */, - rmm::device_uvector& edgelist_dsts /* [INOUT] */, - rmm::device_uvector& edgelist_a /* [INOUT] */, - rmm::device_uvector& edgelist_b /* [INOUT] */) +template +struct hash_src_dst_pair { + int32_t __device__ operator()(thrust::tuple t) const + { + vertex_t pair[2]; + pair[0] = thrust::get<0>(t); + pair[1] = thrust::get<1>(t); + cuco::detail::MurmurHash3_32 hash_func{}; + return hash_func.compute_hash(reinterpret_cast(pair), 2 * sizeof(vertex_t)); + } +}; + +template +std::tuple, rmm::device_uvector> +sort_and_remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + size_t mem_frugal_threshold) { - auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( - edgelist_srcs.begin(), edgelist_dsts.begin(), edgelist_a.begin(), edgelist_b.begin())); - thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); + auto pair_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); + + if (edgelist_srcs.size() > mem_frugal_threshold) { + // Tuning parameter to address high frequency multi-edges + size_t num_hash_buckets{16}; + + auto hash_counts = compute_hash_sizes(pair_first, + pair_first + edgelist_srcs.size(), + num_hash_buckets, + hash_src_dst_pair{}, + handle.get_stream()); + + auto pivot = + static_cast(thrust::distance(hash_counts.begin(), + thrust::lower_bound(handle.get_thrust_policy(), + hash_counts.begin(), + hash_counts.end(), + edgelist_srcs.size() / 2))); + + auto second_first = detail::mem_frugal_partition(pair_first, + pair_first + edgelist_srcs.size(), + hash_src_dst_pair{}, + pivot, + handle.get_stream()); + thrust::sort(handle.get_thrust_policy(), pair_first, second_first); + thrust::sort(handle.get_thrust_policy(), second_first, pair_first + edgelist_srcs.size()); + } else { + thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + edgelist_srcs.size()); + } + edgelist_srcs.resize( - thrust::distance(edge_first, + thrust::distance(pair_first, thrust::unique(handle.get_thrust_policy(), - edge_first, - edge_first + edgelist_srcs.size(), + pair_first, + pair_first + edgelist_srcs.size(), [] __device__(auto lhs, auto rhs) { return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && (thrust::get<1>(lhs) == thrust::get<1>(rhs)); })), handle.get_stream()); + edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_b.resize(edgelist_srcs.size(), handle.get_stream()); + + return std::make_tuple(std::move(edgelist_srcs), std::move(edgelist_dsts)); } -template -void sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector& edgelist_srcs /* [INOUT] */, - rmm::device_uvector& edgelist_dsts /* [INOUT] */, - rmm::device_uvector& edgelist_a /* [INOUT] */, - rmm::device_uvector& edgelist_b /* [INOUT] */, - rmm::device_uvector& edgelist_c /* [INOUT] */) +template +std::tuple, + rmm::device_uvector, + decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{}))> +sort_and_remove_multi_edges( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))&& edgelist_values, + size_t mem_frugal_threshold) { - auto edge_first = thrust::make_zip_iterator(thrust::make_tuple(edgelist_srcs.begin(), - edgelist_dsts.begin(), - edgelist_a.begin(), - edgelist_b.begin(), - edgelist_c.begin())); - thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); - edgelist_srcs.resize( - thrust::distance(edge_first, - thrust::unique(handle.get_thrust_policy(), - edge_first, - edge_first + edgelist_srcs.size(), - [] __device__(auto lhs, auto rhs) { - return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && - (thrust::get<1>(lhs) == thrust::get<1>(rhs)); - })), - handle.get_stream()); + auto pair_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); + + if (edgelist_srcs.size() > mem_frugal_threshold) { + // Tuning parameter to address high frequency multi-edges + size_t num_hash_buckets{16}; + + auto hash_counts = compute_hash_sizes(pair_first, + pair_first + edgelist_srcs.size(), + num_hash_buckets, + hash_src_dst_pair{}, + handle.get_stream()); + + auto pivot = + static_cast(thrust::distance(hash_counts.begin(), + thrust::lower_bound(handle.get_thrust_policy(), + hash_counts.begin(), + hash_counts.end(), + edgelist_srcs.size() / 2))); + + auto second_first = detail::mem_frugal_partition(pair_first, + pair_first + edgelist_srcs.size(), + get_dataframe_buffer_begin(edgelist_values), + hash_src_dst_pair{}, + pivot, + handle.get_stream()); + thrust::sort_by_key(handle.get_thrust_policy(), + pair_first, + std::get<0>(second_first), + get_dataframe_buffer_begin(edgelist_values)); + thrust::sort_by_key(handle.get_thrust_policy(), + std::get<0>(second_first), + pair_first + edgelist_srcs.size(), + std::get<1>(second_first)); + } else { + thrust::sort_by_key(handle.get_thrust_policy(), + pair_first, + pair_first + edgelist_srcs.size(), + get_dataframe_buffer_begin(edgelist_values)); + } + + edgelist_srcs.resize(thrust::distance(pair_first, + thrust::get<0>(thrust::unique_by_key( + handle.get_thrust_policy(), + pair_first, + pair_first + edgelist_srcs.size(), + get_dataframe_buffer_begin(edgelist_values), + [] __device__(auto lhs, auto rhs) { + return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && + (thrust::get<1>(lhs) == thrust::get<1>(rhs)); + }))), + handle.get_stream()); + edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_a.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_b.resize(edgelist_srcs.size(), handle.get_stream()); - edgelist_c.resize(edgelist_srcs.size(), handle.get_stream()); + resize_dataframe_buffer(edgelist_values, edgelist_srcs.size(), handle.get_stream()); + + return std::make_tuple( + std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_values)); } } // namespace detail @@ -145,43 +214,95 @@ sort_and_remove_multi_edges(raft::handle_t const& handle, std::optional>&& edgelist_edge_ids, std::optional>&& edgelist_edge_types) { + auto total_global_mem = handle.get_device_properties().totalGlobalMem; + size_t element_size = sizeof(vertex_t) * 2; + if (edgelist_weights) { element_size += sizeof(weight_t); } + if (edgelist_edge_ids) { element_size += sizeof(edge_t); } + if (edgelist_edge_types) { element_size += sizeof(edge_type_t); } + + auto constexpr mem_frugal_ratio = + 0.25; // if the expected temporary buffer size exceeds the mem_frugal_ratio of the + // total_global_mem, switch to the memory frugal approach + auto mem_frugal_threshold = + static_cast(static_cast(total_global_mem / element_size) * mem_frugal_ratio); + if (edgelist_weights) { if (edgelist_edge_ids) { if (edgelist_edge_types) { - detail::sort_and_remove_multi_edges(handle, - edgelist_srcs, - edgelist_dsts, - *edgelist_weights, - *edgelist_edge_ids, - *edgelist_edge_types); + std::forward_as_tuple(edgelist_srcs, + edgelist_dsts, + std::tie(edgelist_weights, edgelist_edge_ids, edgelist_edge_types)) = + detail::sort_and_remove_multi_edges>( + handle, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::make_tuple(std::move(*edgelist_weights), + std::move(*edgelist_edge_ids), + std::move(*edgelist_edge_types)), + mem_frugal_threshold); } else { - detail::sort_and_remove_multi_edges( - handle, edgelist_srcs, edgelist_dsts, *edgelist_weights, *edgelist_edge_ids); + std::forward_as_tuple( + edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_ids)) = + detail::sort_and_remove_multi_edges>( + handle, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids)), + mem_frugal_threshold); } } else { if (edgelist_edge_types) { - detail::sort_and_remove_multi_edges( - handle, edgelist_srcs, edgelist_dsts, *edgelist_weights, *edgelist_edge_types); + std::forward_as_tuple( + edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_types)) = + detail::sort_and_remove_multi_edges>( + handle, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_types)), + mem_frugal_threshold); } else { - detail::sort_and_remove_multi_edges( - handle, edgelist_srcs, edgelist_dsts, *edgelist_weights); + std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights)) = + detail::sort_and_remove_multi_edges>( + handle, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::make_tuple(std::move(*edgelist_weights)), + mem_frugal_threshold); } } } else { if (edgelist_edge_ids) { if (edgelist_edge_types) { - detail::sort_and_remove_multi_edges( - handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_ids, *edgelist_edge_types); + std::forward_as_tuple( + edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_ids, edgelist_edge_types)) = + detail::sort_and_remove_multi_edges>( + handle, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::make_tuple(std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)), + mem_frugal_threshold); } else { - detail::sort_and_remove_multi_edges( - handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_ids); + std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_ids)) = + detail::sort_and_remove_multi_edges>( + handle, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::make_tuple(std::move(*edgelist_edge_ids)), + mem_frugal_threshold); } } else { if (edgelist_edge_types) { - detail::sort_and_remove_multi_edges( - handle, edgelist_srcs, edgelist_dsts, *edgelist_edge_types); + std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_types)) = + detail::sort_and_remove_multi_edges>( + handle, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::make_tuple(std::move(*edgelist_edge_types)), + mem_frugal_threshold); } else { - detail::sort_and_remove_multi_edges(handle, edgelist_srcs, edgelist_dsts); + std::tie(edgelist_srcs, edgelist_dsts) = detail::sort_and_remove_multi_edges( + handle, std::move(edgelist_srcs), std::move(edgelist_dsts), mem_frugal_threshold); } } } diff --git a/cpp/tests/c_api/create_graph_test.c b/cpp/tests/c_api/create_graph_test.c index ed9db237a29..11da2eb8589 100644 --- a/cpp/tests/c_api/create_graph_test.c +++ b/cpp/tests/c_api/create_graph_test.c @@ -732,16 +732,31 @@ int test_create_sg_graph_with_isolated_vertices_multi_input() cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; cugraph_error_t* ret_error; - size_t num_edges = 11; + size_t num_edges = 66; size_t num_vertices = 7; double alpha = 0.95; double epsilon = 0.0001; size_t max_iterations = 20; vertex_t h_vertices[] = { 0, 1, 2, 3, 4, 5, 6 }; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f}; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, + 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, + 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, + 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, + 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, + 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5, + 1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5, + 1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5, + 1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5, + 1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5, + 1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f, + 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f, + 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f, + 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f, + 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f, + 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f}; weight_t h_result[] = { 0.0859168, 0.158029, 0.0616337, 0.179675, 0.113239, 0.339873, 0.0616337 }; cugraph_resource_handle_t* handle = NULL; From a6dee7917645c1c304a1a41c15749ee7754283dc Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Mon, 20 Nov 2023 13:22:00 -0800 Subject: [PATCH 6/6] latest recommendations from PR review --- cpp/CMakeLists.txt | 2 +- cpp/include/cugraph/graph_functions.hpp | 16 +- cpp/src/c_api/graph_mg.cpp | 14 +- cpp/src/c_api/graph_sg.cpp | 12 +- cpp/src/structure/detail/structure_utils.cuh | 71 +++--- cpp/src/structure/remove_multi_edges.cu | 92 ++++++++ ...s_impl.cuh => remove_multi_edges_impl.cuh} | 219 +++++++++--------- cpp/src/structure/remove_self_loops_impl.cuh | 43 ++-- .../structure/sort_and_remove_multi_edges.cu | 92 -------- 9 files changed, 285 insertions(+), 276 deletions(-) create mode 100644 cpp/src/structure/remove_multi_edges.cu rename cpp/src/structure/{sort_and_remove_multi_edges_impl.cuh => remove_multi_edges_impl.cuh} (53%) delete mode 100644 cpp/src/structure/sort_and_remove_multi_edges.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cecfd16d959..1d5670fd62b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -197,7 +197,7 @@ set(CUGRAPH_SOURCES src/detail/utility_wrappers.cu src/structure/graph_view_mg.cu src/structure/remove_self_loops.cu - src/structure/sort_and_remove_multi_edges.cu + src/structure/remove_multi_edges.cu src/utilities/path_retrieval.cu src/structure/legacy/graph.cu src/linear_assignment/legacy/hungarian.cu diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 8bc655c8a21..6a75a420bf8 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -1005,7 +1005,9 @@ remove_self_loops(raft::handle_t const& handle, std::optional>&& edgelist_edge_types); /** - * @brief Sort the edges and remove all but one edge when a multi-edge exists + * @brief Remove all but one edge when a multi-edge exists. Note that this function does not use + * stable methods. When a multi-edge exists, one of the edges will remain, there is no + * guarantee on which one will remain. * * In an MG context it is assumed that edges have been shuffled to the proper GPU, * in which case any multi-edges will be on the same GPU. @@ -1031,11 +1033,11 @@ std::tuple, std::optional>, std::optional>, std::optional>> -sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional>&& edgelist_weights, - std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); +remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); } // namespace cugraph diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp index ca89e3091f4..5413949e3a3 100644 --- a/cpp/src/c_api/graph_mg.cpp +++ b/cpp/src/c_api/graph_mg.cpp @@ -212,12 +212,12 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { if (drop_multi_edges_) { std::tie( edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = - cugraph::sort_and_remove_multi_edges(handle_, - std::move(edgelist_srcs), - std::move(edgelist_dsts), - std::move(edgelist_weights), - std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); + cugraph::remove_multi_edges(handle_, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types)); } std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = @@ -359,7 +359,7 @@ extern "C" cugraph_error_code_t cugraph_graph_create_mg( "Invalid input arguments: all vertex types must match", *error); - CAPI_EXPECTS(p_weights[i]->type_ == weight_type, + CAPI_EXPECTS((weights == nullptr) || (p_weights[i]->type_ == weight_type), CUGRAPH_INVALID_INPUT, "Invalid input arguments: all weight types must match", *error); diff --git a/cpp/src/c_api/graph_sg.cpp b/cpp/src/c_api/graph_sg.cpp index 3cdeb0d86c2..7793458b53a 100644 --- a/cpp/src/c_api/graph_sg.cpp +++ b/cpp/src/c_api/graph_sg.cpp @@ -195,12 +195,12 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { if (drop_multi_edges_) { std::tie( edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) = - cugraph::sort_and_remove_multi_edges(handle_, - std::move(edgelist_srcs), - std::move(edgelist_dsts), - std::move(edgelist_weights), - std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); + cugraph::remove_multi_edges(handle_, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types)); } std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = diff --git a/cpp/src/structure/detail/structure_utils.cuh b/cpp/src/structure/detail/structure_utils.cuh index 4674efe0c88..c49b62e4543 100644 --- a/cpp/src/structure/detail/structure_utils.cuh +++ b/cpp/src/structure/detail/structure_utils.cuh @@ -498,45 +498,47 @@ void sort_adjacency_list(raft::handle_t const& handle, } } -template -struct indirect_array_reference { - T const* array_; - - T operator() __host__ __device__(size_t index) { return array_[index]; } -}; - -template -std::tuple> mark_edges_for_removal( - raft::handle_t const& handle, - raft::device_span src, - raft::device_span dst, - comparison_t comparison) +template +std::tuple> mark_entries(raft::handle_t const& handle, + size_t num_entries, + comparison_t comparison) { - rmm::device_uvector remove_flags(packed_bool_size(src.size()), handle.get_stream()); - thrust::fill(handle.get_thrust_policy(), - remove_flags.begin(), - remove_flags.end(), - cugraph::packed_bool_empty_mask()); - - size_t remove_count = thrust::count_if( + rmm::device_uvector marked_entries(cugraph::packed_bool_size(num_entries), + handle.get_stream()); + + thrust::tabulate(handle.get_thrust_policy(), + marked_entries.begin(), + marked_entries.end(), + [comparison, num_entries] __device__(size_t idx) { + auto word = cugraph::packed_bool_empty_mask(); + size_t start_index = idx * cugraph::packed_bools_per_word(); + size_t bits_in_this_word = + (start_index + cugraph::packed_bools_per_word() < num_entries) + ? cugraph::packed_bools_per_word() + : (num_entries - start_index); + + for (size_t bit = 0; bit < bits_in_this_word; ++bit) { + if (comparison(start_index + bit)) word |= cugraph::packed_bool_mask(bit); + } + + return word; + }); + + size_t bit_count = thrust::transform_reduce( handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(src.size()), - [comparison, d_remove_flags = remove_flags.data()] __device__(size_t i) { - if (comparison(i)) { - atomicOr(d_remove_flags + cugraph::packed_bool_offset(i), cugraph::packed_bool_mask(i)); - return true; - } - return false; - }); + marked_entries.begin(), + marked_entries.end(), + [] __device__(auto word) { return __popc(word); }, + size_t{0}, + thrust::plus()); - return std::make_tuple(remove_count, std::move(remove_flags)); + return std::make_tuple(bit_count, std::move(marked_entries)); } template rmm::device_uvector remove_flagged_elements(raft::handle_t const& handle, rmm::device_uvector&& vector, - rmm::device_uvector const& remove_flags, + raft::device_span remove_flags, size_t remove_count) { rmm::device_uvector result(vector.size() - remove_count, handle.get_stream()); @@ -546,14 +548,13 @@ rmm::device_uvector remove_flagged_elements(raft::handle_t const& handle, thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(vector.size()), thrust::make_transform_output_iterator(result.begin(), - indirect_array_reference{vector.data()}), - [d_remove_flags = remove_flags.data()] __device__(size_t i) { - return !(d_remove_flags[cugraph::packed_bool_offset(i)] & cugraph::packed_bool_mask(i)); + indirection_t{vector.data()}), + [remove_flags] __device__(size_t i) { + return !(remove_flags[cugraph::packed_bool_offset(i)] & cugraph::packed_bool_mask(i)); }); return result; } } // namespace detail - } // namespace cugraph diff --git a/cpp/src/structure/remove_multi_edges.cu b/cpp/src/structure/remove_multi_edges.cu new file mode 100644 index 00000000000..ba07d068c0e --- /dev/null +++ b/cpp/src/structure/remove_multi_edges.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +namespace cugraph { + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types); + +} // namespace cugraph diff --git a/cpp/src/structure/sort_and_remove_multi_edges_impl.cuh b/cpp/src/structure/remove_multi_edges_impl.cuh similarity index 53% rename from cpp/src/structure/sort_and_remove_multi_edges_impl.cuh rename to cpp/src/structure/remove_multi_edges_impl.cuh index 232d825a0c8..ab6b1fba8eb 100644 --- a/cpp/src/structure/sort_and_remove_multi_edges_impl.cuh +++ b/cpp/src/structure/remove_multi_edges_impl.cuh @@ -15,6 +15,8 @@ */ #pragma once +#include + #include // FIXME: mem_frugal_partition should probably not be in shuffle_comm.hpp // It's used here without any notion of shuffling @@ -40,92 +42,56 @@ namespace cugraph { namespace detail { -template -rmm::device_uvector compute_hash_sizes(InputIterator iter_first, - InputIterator iter_last, - size_t num_hash_buckets, - hash_op_t hash_op, - rmm::cuda_stream_view stream_view) -{ - rmm::device_uvector hash_counts(num_hash_buckets, stream_view); - thrust::fill(rmm::exec_policy(stream_view), hash_counts.begin(), hash_counts.end(), size_t{0}); - - auto hash_counts_view = raft::device_span(hash_counts.data(), hash_counts.size()); - thrust::for_each(rmm::exec_policy(stream_view), - iter_first, - iter_last, - [num_hash_buckets, hash_op, hash_counts_view] __device__(auto value) { - atomicAdd(&hash_counts_view[hash_op(value) % num_hash_buckets], size_t{1}); - }); - - thrust::exclusive_scan( - rmm::exec_policy(stream_view), hash_counts.begin(), hash_counts.end(), hash_counts.begin()); - - return hash_counts; -} - template struct hash_src_dst_pair { + int32_t num_groups; + int32_t __device__ operator()(thrust::tuple t) const { vertex_t pair[2]; pair[0] = thrust::get<0>(t); pair[1] = thrust::get<1>(t); cuco::detail::MurmurHash3_32 hash_func{}; - return hash_func.compute_hash(reinterpret_cast(pair), 2 * sizeof(vertex_t)); + return hash_func.compute_hash(reinterpret_cast(pair), 2 * sizeof(vertex_t)) % + num_groups; } }; template -std::tuple, rmm::device_uvector> -sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - size_t mem_frugal_threshold) +std::tuple, rmm::device_uvector> group_multi_edges( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + size_t mem_frugal_threshold) { auto pair_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); if (edgelist_srcs.size() > mem_frugal_threshold) { - // Tuning parameter to address high frequency multi-edges - size_t num_hash_buckets{16}; + // FIXME: Tuning parameter to address high frequency multi-edges + // Defaulting to 2 which makes the code easier. If + // num_groups > 2 we can evaluate whether to find a good + // midpoint to do 2 sorts, or if we should do more than 2 sorts. + const size_t num_groups{2}; - auto hash_counts = compute_hash_sizes(pair_first, + auto group_counts = groupby_and_count(pair_first, pair_first + edgelist_srcs.size(), - num_hash_buckets, hash_src_dst_pair{}, + num_groups, + mem_frugal_threshold, handle.get_stream()); - auto pivot = - static_cast(thrust::distance(hash_counts.begin(), - thrust::lower_bound(handle.get_thrust_policy(), - hash_counts.begin(), - hash_counts.end(), - edgelist_srcs.size() / 2))); - - auto second_first = detail::mem_frugal_partition(pair_first, - pair_first + edgelist_srcs.size(), - hash_src_dst_pair{}, - pivot, - handle.get_stream()); - thrust::sort(handle.get_thrust_policy(), pair_first, second_first); - thrust::sort(handle.get_thrust_policy(), second_first, pair_first + edgelist_srcs.size()); + std::vector h_group_counts(group_counts.size()); + raft::update_host( + h_group_counts.data(), group_counts.data(), group_counts.size(), handle.get_stream()); + + thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + h_group_counts[0]); + thrust::sort(handle.get_thrust_policy(), + pair_first + h_group_counts[0], + pair_first + edgelist_srcs.size()); } else { thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + edgelist_srcs.size()); } - edgelist_srcs.resize( - thrust::distance(pair_first, - thrust::unique(handle.get_thrust_policy(), - pair_first, - pair_first + edgelist_srcs.size(), - [] __device__(auto lhs, auto rhs) { - return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && - (thrust::get<1>(lhs) == thrust::get<1>(rhs)); - })), - handle.get_stream()); - - edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); - return std::make_tuple(std::move(edgelist_srcs), std::move(edgelist_dsts)); } @@ -133,46 +99,43 @@ template std::tuple, rmm::device_uvector, decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{}))> -sort_and_remove_multi_edges( +group_multi_edges( raft::handle_t const& handle, rmm::device_uvector&& edgelist_srcs, rmm::device_uvector&& edgelist_dsts, decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))&& edgelist_values, size_t mem_frugal_threshold) { - auto pair_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); + auto pair_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); + auto value_first = get_dataframe_buffer_begin(edgelist_values); if (edgelist_srcs.size() > mem_frugal_threshold) { - // Tuning parameter to address high frequency multi-edges - size_t num_hash_buckets{16}; + // FIXME: Tuning parameter to address high frequency multi-edges + // Defaulting to 2 which makes the code easier. If + // num_groups > 2 we can evaluate whether to find a good + // midpoint to do 2 sorts, or if we should do more than 2 sorts. + const size_t num_groups{2}; - auto hash_counts = compute_hash_sizes(pair_first, + auto group_counts = groupby_and_count(pair_first, pair_first + edgelist_srcs.size(), - num_hash_buckets, + value_first, hash_src_dst_pair{}, + num_groups, + mem_frugal_threshold, handle.get_stream()); - auto pivot = - static_cast(thrust::distance(hash_counts.begin(), - thrust::lower_bound(handle.get_thrust_policy(), - hash_counts.begin(), - hash_counts.end(), - edgelist_srcs.size() / 2))); - - auto second_first = detail::mem_frugal_partition(pair_first, - pair_first + edgelist_srcs.size(), - get_dataframe_buffer_begin(edgelist_values), - hash_src_dst_pair{}, - pivot, - handle.get_stream()); + std::vector h_group_counts(group_counts.size()); + raft::update_host( + h_group_counts.data(), group_counts.data(), group_counts.size(), handle.get_stream()); + thrust::sort_by_key(handle.get_thrust_policy(), pair_first, - std::get<0>(second_first), + pair_first + h_group_counts[0], get_dataframe_buffer_begin(edgelist_values)); thrust::sort_by_key(handle.get_thrust_policy(), - std::get<0>(second_first), + pair_first + h_group_counts[0], pair_first + edgelist_srcs.size(), - std::get<1>(second_first)); + get_dataframe_buffer_begin(edgelist_values) + h_group_counts[0]); } else { thrust::sort_by_key(handle.get_thrust_policy(), pair_first, @@ -180,21 +143,6 @@ sort_and_remove_multi_edges( get_dataframe_buffer_begin(edgelist_values)); } - edgelist_srcs.resize(thrust::distance(pair_first, - thrust::get<0>(thrust::unique_by_key( - handle.get_thrust_policy(), - pair_first, - pair_first + edgelist_srcs.size(), - get_dataframe_buffer_begin(edgelist_values), - [] __device__(auto lhs, auto rhs) { - return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && - (thrust::get<1>(lhs) == thrust::get<1>(rhs)); - }))), - handle.get_stream()); - - edgelist_dsts.resize(edgelist_srcs.size(), handle.get_stream()); - resize_dataframe_buffer(edgelist_values, edgelist_srcs.size(), handle.get_stream()); - return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_values)); } @@ -207,12 +155,12 @@ std::tuple, std::optional>, std::optional>, std::optional>> -sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional>&& edgelist_weights, - std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types) +remove_multi_edges(raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types) { auto total_global_mem = handle.get_device_properties().totalGlobalMem; size_t element_size = sizeof(vertex_t) * 2; @@ -232,8 +180,7 @@ sort_and_remove_multi_edges(raft::handle_t const& handle, std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_ids, edgelist_edge_types)) = - detail::sort_and_remove_multi_edges>( + detail::group_multi_edges>( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), @@ -244,7 +191,7 @@ sort_and_remove_multi_edges(raft::handle_t const& handle, } else { std::forward_as_tuple( edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_ids)) = - detail::sort_and_remove_multi_edges>( + detail::group_multi_edges>( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), @@ -255,7 +202,7 @@ sort_and_remove_multi_edges(raft::handle_t const& handle, if (edgelist_edge_types) { std::forward_as_tuple( edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_types)) = - detail::sort_and_remove_multi_edges>( + detail::group_multi_edges>( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), @@ -263,7 +210,7 @@ sort_and_remove_multi_edges(raft::handle_t const& handle, mem_frugal_threshold); } else { std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights)) = - detail::sort_and_remove_multi_edges>( + detail::group_multi_edges>( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), @@ -276,7 +223,7 @@ sort_and_remove_multi_edges(raft::handle_t const& handle, if (edgelist_edge_types) { std::forward_as_tuple( edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_ids, edgelist_edge_types)) = - detail::sort_and_remove_multi_edges>( + detail::group_multi_edges>( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), @@ -284,7 +231,7 @@ sort_and_remove_multi_edges(raft::handle_t const& handle, mem_frugal_threshold); } else { std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_ids)) = - detail::sort_and_remove_multi_edges>( + detail::group_multi_edges>( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), @@ -294,19 +241,65 @@ sort_and_remove_multi_edges(raft::handle_t const& handle, } else { if (edgelist_edge_types) { std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_types)) = - detail::sort_and_remove_multi_edges>( + detail::group_multi_edges>( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_types)), mem_frugal_threshold); } else { - std::tie(edgelist_srcs, edgelist_dsts) = detail::sort_and_remove_multi_edges( + std::tie(edgelist_srcs, edgelist_dsts) = detail::group_multi_edges( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), mem_frugal_threshold); } } } + auto [multi_edge_count, multi_edges_to_delete] = + detail::mark_entries(handle, + edgelist_srcs.size(), + [d_edgelist_srcs = edgelist_srcs.data(), + d_edgelist_dsts = edgelist_dsts.data()] __device__(auto idx) { + return (idx > 0) && (d_edgelist_srcs[idx - 1] == d_edgelist_srcs[idx]) && + (d_edgelist_dsts[idx - 1] == d_edgelist_dsts[idx]); + }); + + if (multi_edge_count > 0) { + edgelist_srcs = detail::remove_flagged_elements( + handle, + std::move(edgelist_srcs), + raft::device_span{multi_edges_to_delete.data(), multi_edges_to_delete.size()}, + multi_edge_count); + edgelist_dsts = detail::remove_flagged_elements( + handle, + std::move(edgelist_dsts), + raft::device_span{multi_edges_to_delete.data(), multi_edges_to_delete.size()}, + multi_edge_count); + + if (edgelist_weights) + edgelist_weights = detail::remove_flagged_elements( + handle, + std::move(*edgelist_weights), + raft::device_span{multi_edges_to_delete.data(), + multi_edges_to_delete.size()}, + multi_edge_count); + + if (edgelist_edge_ids) + edgelist_edge_ids = detail::remove_flagged_elements( + handle, + std::move(*edgelist_edge_ids), + raft::device_span{multi_edges_to_delete.data(), + multi_edges_to_delete.size()}, + multi_edge_count); + + if (edgelist_edge_types) + edgelist_edge_types = detail::remove_flagged_elements( + handle, + std::move(*edgelist_edge_types), + raft::device_span{multi_edges_to_delete.data(), + multi_edges_to_delete.size()}, + multi_edge_count); + } + return std::make_tuple(std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_weights), diff --git a/cpp/src/structure/remove_self_loops_impl.cuh b/cpp/src/structure/remove_self_loops_impl.cuh index 1dcd6b1bc05..161ffeae28e 100644 --- a/cpp/src/structure/remove_self_loops_impl.cuh +++ b/cpp/src/structure/remove_self_loops_impl.cuh @@ -44,31 +44,44 @@ remove_self_loops(raft::handle_t const& handle, std::optional>&& edgelist_edge_ids, std::optional>&& edgelist_edge_types) { - auto [remove_count, remove_flags] = detail::mark_edges_for_removal( - handle, - raft::device_span{edgelist_srcs.data(), edgelist_srcs.size()}, - raft::device_span{edgelist_dsts.data(), edgelist_dsts.size()}, - [d_srcs = edgelist_srcs.data(), d_dsts = edgelist_dsts.data()] __device__(size_t i) { - return d_srcs[i] == d_dsts[i]; - }); + auto [self_loop_count, self_loops_to_delete] = + detail::mark_entries(handle, + edgelist_srcs.size(), + [d_srcs = edgelist_srcs.data(), d_dsts = edgelist_dsts.data()] __device__( + size_t i) { return d_srcs[i] == d_dsts[i]; }); - if (remove_count > 0) { - edgelist_srcs = - detail::remove_flagged_elements(handle, std::move(edgelist_srcs), remove_flags, remove_count); - edgelist_dsts = - detail::remove_flagged_elements(handle, std::move(edgelist_dsts), remove_flags, remove_count); + if (self_loop_count > 0) { + edgelist_srcs = detail::remove_flagged_elements( + handle, + std::move(edgelist_srcs), + raft::device_span{self_loops_to_delete.data(), self_loops_to_delete.size()}, + self_loop_count); + edgelist_dsts = detail::remove_flagged_elements( + handle, + std::move(edgelist_dsts), + raft::device_span{self_loops_to_delete.data(), self_loops_to_delete.size()}, + self_loop_count); if (edgelist_weights) edgelist_weights = detail::remove_flagged_elements( - handle, std::move(*edgelist_weights), remove_flags, remove_count); + handle, + std::move(*edgelist_weights), + raft::device_span{self_loops_to_delete.data(), self_loops_to_delete.size()}, + self_loop_count); if (edgelist_edge_ids) edgelist_edge_ids = detail::remove_flagged_elements( - handle, std::move(*edgelist_edge_ids), remove_flags, remove_count); + handle, + std::move(*edgelist_edge_ids), + raft::device_span{self_loops_to_delete.data(), self_loops_to_delete.size()}, + self_loop_count); if (edgelist_edge_types) edgelist_edge_types = detail::remove_flagged_elements( - handle, std::move(*edgelist_edge_types), remove_flags, remove_count); + handle, + std::move(*edgelist_edge_types), + raft::device_span{self_loops_to_delete.data(), self_loops_to_delete.size()}, + self_loop_count); } return std::make_tuple(std::move(edgelist_srcs), diff --git a/cpp/src/structure/sort_and_remove_multi_edges.cu b/cpp/src/structure/sort_and_remove_multi_edges.cu deleted file mode 100644 index 21ee9531f06..00000000000 --- a/cpp/src/structure/sort_and_remove_multi_edges.cu +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -namespace cugraph { - -template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>> -sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional>&& edgelist_weights, - std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); - -template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>> -sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional>&& edgelist_weights, - std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); - -template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>> -sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional>&& edgelist_weights, - std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); - -template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>> -sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional>&& edgelist_weights, - std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); - -template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>> -sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional>&& edgelist_weights, - std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); - -template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>> -sort_and_remove_multi_edges(raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional>&& edgelist_weights, - std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); - -} // namespace cugraph