diff --git a/python/tests/test_algorithms.py b/python/tests/test_algorithms.py index 4baf3df7af..8c9880c8bf 100644 --- a/python/tests/test_algorithms.py +++ b/python/tests/test_algorithms.py @@ -31,6 +31,14 @@ def test_connected_components(): assert actual.get("1") == 1 +def test_largest_connected_component(): + g = gen_graph() + actual = g.largest_connected_component() + expected = ["1", "2", "3", "4", "5", "6", "7", "8"] + for node in expected: + assert actual.has_node(node) + + def test_in_components(): g = gen_graph() actual = algorithms.in_components(g).get_all_with_names() diff --git a/raphtory/src/algorithms/components/lcc.rs b/raphtory/src/algorithms/components/lcc.rs new file mode 100644 index 0000000000..d31a902041 --- /dev/null +++ b/raphtory/src/algorithms/components/lcc.rs @@ -0,0 +1,143 @@ +use crate::{ + algorithms::components::connected_components::weakly_connected_components, + db::{ + api::view::{GraphViewOps, StaticGraphViewOps}, + graph::views::node_subgraph::NodeSubgraph, + }, + prelude::Graph, +}; + +/// Gives the large connected component of a graph. +/// The large connected component is the largest (i.e., with the highest number of nodes) +/// connected sub-graph of the network. +/// +/// # Example Usage: +/// +/// g.largest_connected_component() +/// +/// # Returns: +/// +/// A raphtory graph, which essentially is a sub-graph of the graph `g` +/// +pub trait LargestConnectedComponent { + fn largest_connected_component(&self) -> NodeSubgraph + where + Self: StaticGraphViewOps; +} + +impl LargestConnectedComponent for Graph { + fn largest_connected_component(&self) -> NodeSubgraph + where + Self: StaticGraphViewOps, + { + let mut connected_components_map = + weakly_connected_components(self, usize::MAX, None).group_by(); + let mut lcc_key = 0; + let mut key_length = 0; + let mut is_tie = false; + + for (key, value) in &connected_components_map { + let length = value.len(); + if length > key_length { + key_length = length; + lcc_key = *key; + is_tie = false; + } else if length == key_length { + is_tie = true + } + } + if is_tie { + println!("Warning: The graph has two or more connected components that are both the largest. \ + The returned component has been picked arbitrarily."); + } + return match connected_components_map.remove(&lcc_key) { + Some(nodes) => self.subgraph(nodes), + None => self.subgraph(self.nodes()), + }; + } +} + +#[cfg(test)] +mod largest_connected_component_test { + use super::*; + use crate::{ + db::api::view::GraphViewOps, + prelude::{AdditionOps, Graph, NO_PROPS}, + }; + + #[test] + fn test_empty_graph() { + let graph = Graph::new(); + let subgraph = graph.largest_connected_component(); + assert!( + subgraph.is_empty(), + "The subgraph of an empty graph should be empty" + ); + } + + #[test] + fn test_single_connected_component() { + let graph = Graph::new(); + let edges = vec![(1, 1, 2), (2, 2, 1), (3, 3, 1)]; + for (ts, src, dst) in edges { + graph.add_edge(ts, src, dst, NO_PROPS, None).unwrap(); + } + let subgraph = graph.largest_connected_component(); + + let expected_nodes = vec![1, 2, 3]; + for node in expected_nodes { + assert_eq!( + subgraph.has_node(node), + true, + "Node {} should be in the largest connected component.", + node + ); + } + assert_eq!(subgraph.count_nodes(), 3); + } + + #[test] + fn test_multiple_connected_components() { + let graph = Graph::new(); + let edges = vec![ + (1, 1, 2), + (2, 2, 1), + (3, 3, 1), + (1, 10, 11), + (2, 20, 21), + (3, 30, 31), + ]; + for (ts, src, dst) in edges { + graph.add_edge(ts, src, dst, NO_PROPS, None).unwrap(); + } + let subgraph = graph.largest_connected_component(); + let expected_nodes = vec![1, 2, 3]; + for node in expected_nodes { + assert_eq!( + subgraph.has_node(node), + true, + "Node {} should be in the largest connected component.", + node + ); + } + assert_eq!(subgraph.count_nodes(), 3); + } + + #[test] + fn test_same_size_connected_components() { + let graph = Graph::new(); + let edges = vec![ + (1, 1, 2), + (1, 2, 1), + (1, 3, 1), + (1, 5, 6), + (1, 11, 12), + (1, 12, 11), + (1, 13, 11), + ]; + for (ts, src, dst) in edges { + graph.add_edge(ts, src, dst, NO_PROPS, None).unwrap(); + } + let _subgraph = graph.largest_connected_component(); + } +} diff --git a/raphtory/src/algorithms/components/mod.rs b/raphtory/src/algorithms/components/mod.rs index 51ab226209..fa0fb7abb8 100644 --- a/raphtory/src/algorithms/components/mod.rs +++ b/raphtory/src/algorithms/components/mod.rs @@ -1,9 +1,11 @@ mod connected_components; mod in_components; +mod lcc; mod out_components; mod scc; pub use connected_components::weakly_connected_components; pub use in_components::in_components; +pub use lcc::LargestConnectedComponent; pub use out_components::out_components; pub use scc::strongly_connected_components; diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 6060874cbc..23916854b4 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -5,10 +5,11 @@ //! In Python, this class wraps around the rust graph. use super::utils; use crate::{ + algorithms::components::LargestConnectedComponent, core::{entities::nodes::node_ref::NodeRef, utils::errors::GraphError, ArcStr}, db::{ api::view::internal::{CoreGraphOps, DynamicGraph, IntoDynamic, MaterializedGraph}, - graph::{edge::EdgeView, node::NodeView}, + graph::{edge::EdgeView, node::NodeView, views::node_subgraph::NodeSubgraph}, }, prelude::*, python::{ @@ -366,6 +367,18 @@ impl PyGraph { Ok(PyBytes::new(py, &bytes)) } + /// Gives the large connected component of a graph. + /// + /// # Example Usage: + /// g.largest_connected_component() + /// + /// # Returns: + /// A raphtory graph, which essentially is a sub-graph of the graph `g` + /// + pub fn largest_connected_component(&self) -> NodeSubgraph { + self.graph.largest_connected_component() + } + /// Get persistent graph pub fn persistent_graph<'py>(&'py self) -> PyResult> { PyPersistentGraph::py_from_db_graph(self.graph.persistent_graph())