Algorithm Result object complete rewrite, Add betweeen centrality, Ad…

…d Zaks Karate Club (#1327) * checkpoint, need to fix tests * im so happy it works :D * remove type_name param * algorithm result tests complete, new fmt, fixed group_by, fixed all types * renamed old algorithm result, changed base result type to a hashmap instead of vector due to potential issues with sparse vectors if the dataset is large, added missing G type for the graph, brought back results type * fix bad 0 checking * macro magic but partially implemented * added the extra macros, added all missing functions, removed all pointers for clones (:vomit: pyo3) * algorithm result object + all macros is complete in both rust and python * fixed reciprocity, fixed centrality * fixed pagerank, fixed graphql new type issues * balance algorithm complete * sssp fixed * sssp forgot le lib * temporal reachability fixed * hits fixed * connected components nearly fixed, last test * removed bad test * three node motif completed, algorithm result complte * good bye bugs * removed all warnings * fix lotr bug * i think i fixed hulong * fix dusty benchmark * rename macro rules, fix doctests * implement debug for algo result * fix pytests * Zaks Karate Club Graph (#1326) * betweenness centrality for a directed graph with and without normalisation * cleanup * swapped to algorithm result, added rust docs * ported to python, added python docs, added python test * fix formatting * karate club graph, but its half working, adding 1 extra node and way too many edges * fixed issue with rows * betweeness has float calc differences but largely the same * fix test * fixes issue with python? * port betweeness centrality to new algo object and fix over python tests * Changed State to use Internal ID everywhere, fixed most of the algorithms, fixed the tests * fixed all the algos * fn name changes * added very basic hashing to allow get_all to return a vertex object * vertex view now returned by algo rest * fix python algo result for vertexview * reorder tests * implemented custom debug fmt display for vertex view, implemented custom display fmt, fixed algorithm tests, fixed all rust issues * extended richcmp for python objects, fixed all python tests * connected_components.rs passes * moved algos * resolved comments * changes due to comments * windows works, layers dont * added tests for windowed graphs * complete! * bad comments for ubuntu * bad colon
Pometry · Oct 19, 2023 · b4eef2e · b4eef2e
1 parent 9df4985
commit b4eef2e
Show file tree

Hide file tree

Showing 35 changed files with 1,760 additions and 929 deletions.
diff --git a/comparison-benchmark/rust/raphtory-rust-benchmark/src/main.rs b/comparison-benchmark/rust/raphtory-rust-benchmark/src/main.rs
@@ -197,14 +197,12 @@ fn main() {
 
     // page rank with time
     now = Instant::now();
-    let _page_rank: Vec<_> = unweighted_page_rank(&g, 1000, None, None, true)
-        .into_iter()
-        .collect();
+    let _page_rank = unweighted_page_rank(&g, 1000, None, None, true);
     println!("Page rank: {} seconds", now.elapsed().as_secs_f64());
 
     // connected community_detection with time
     now = Instant::now();
-    let _cc: AlgorithmResult<String, u64> = weakly_connected_components(&g, usize::MAX, None);
+    let _cc = weakly_connected_components(&g, usize::MAX, None);
     println!(
         "Connected community_detection: {} seconds",
         now.elapsed().as_secs_f64()

diff --git a/docs/source/reference/algorithms/centrality.rst b/docs/source/reference/algorithms/centrality.rst
@@ -7,3 +7,4 @@ Centrality
 
 .. autofunction:: raphtory.algorithms.hits
 
+.. autofunction:: raphtory.algorithms.betweenness_centrality
diff --git a/examples/rust/src/bin/hulongbay/main.rs b/examples/rust/src/bin/hulongbay/main.rs
@@ -125,6 +125,7 @@ fn try_main() -> Result<(), Box<dyn Error>> {
     let components = weakly_connected_components(&graph, 5, Some(16));
 
     components
+        .result
         .into_iter()
         .counts_by(|(_, cc)| cc)
         .iter()

diff --git a/examples/rust/src/bin/lotr/main.rs b/examples/rust/src/bin/lotr/main.rs
@@ -1,4 +1,3 @@
-use itertools::Itertools;
 use raphtory::{
     algorithms::pathing::temporal_reachability::temporally_reachable_nodes, core::utils::hashing,
     graph_loader::source::csv_loader::CsvLoader, prelude::*,
@@ -104,9 +103,12 @@ fn main() {
     assert!(graph.has_vertex(gandalf));
     assert_eq!(graph.vertex(gandalf).unwrap().name(), "Gandalf");
 
-    let r = temporally_reachable_nodes(&graph, None, 20, 31930, vec!["Gandalf"], None);
-    assert_eq!(
-        r.result.keys().sorted().collect_vec(),
-        vec!["Gandalf", "Saruman", "Wormtongue"]
-    )
+    let r: Vec<String> = temporally_reachable_nodes(&graph, None, 20, 31930, vec!["Gandalf"], None)
+        .get_all_values()
+        .into_iter()
+        .flatten()
+        .map(|(_, s)| s)
+        .collect();
+
+    assert_eq!(r, vec!["Gandalf", "Saruman", "Wormtongue"])
 }
diff --git a/python/src/lib.rs b/python/src/lib.rs
@@ -69,6 +69,7 @@ fn raphtory(py: Python<'_>, m: &PyModule) -> PyResult<()> {
         algorithm_module,
         dijkstra_single_source_shortest_paths,
         global_reciprocity,
+        betweenness_centrality,
         all_local_reciprocity,
         triplet_count,
         local_triangle_count,
@@ -107,6 +108,7 @@ fn raphtory(py: Python<'_>, m: &PyModule) -> PyResult<()> {
         neo4j_movie_graph,
         stable_coin_graph,
         reddit_hyperlink_graph,
+        karate_club_graph,
     );
     m.add_submodule(graph_loader_module)?;
 

diff --git a/python/tests/test_algorithms.py b/python/tests/test_algorithms.py
@@ -1,20 +1,208 @@
 import pytest
+import pandas as pd
+import pandas.core.frame
+from raphtory import Graph, GraphWithDeletions, PyDirection
+from raphtory import algorithms
+from raphtory import graph_loader
+
+def gen_graph():
+    g = Graph()
+    g.add_edge(10, 1, 3, {})
+    g.add_edge(11, 1, 2, {})
+    g.add_edge(12, 1, 2, {})
+    g.add_edge(9, 1, 2, {})
+    g.add_edge(12, 2, 4, {})
+    g.add_edge(13, 2, 5, {})
+    g.add_edge(14, 5, 5, {})
+    g.add_edge(14, 5, 4, {})
+    g.add_edge(5, 4, 6, {})
+    g.add_edge(15, 4, 7, {})
+    g.add_edge(10, 4, 7, {})
+    g.add_edge(10, 5, 8, {})
+    return g
+
+
+def test_connected_components():
+    g = gen_graph()
+    actual = algorithms.weakly_connected_components(g, 20)
+    expected = {"1": 1, "2": 1, "3": 1, "4": 1, "5": 1, "6": 1, "7": 1, "8": 1}
+    assert actual.get_all_with_names() == expected
+    assert actual.get("1") == 1
+
+
+def test_empty_algo():
+    g = Graph()
+    assert algorithms.weakly_connected_components(g, 20).get_all_with_names() == {}
+    assert algorithms.pagerank(g, 20).get_all_with_names() == {}
+
+
+def test_algo_result_windowed_graph():
+    g = Graph()
+    g.add_edge(0, 1, 2, {})
+    g.add_edge(1, 1, 2, {})
+    g.add_edge(2, 3, 4, {})
+    g.add_edge(3, 5, 6, {})
+    g.add_edge(10, 10, 11, {})
+
+    res_full_graph = algorithms.weakly_connected_components(g, 20)
+    assert sorted(res_full_graph.get_all_with_names().items()) == [('1', 1), ('10', 10), ('11', 10), ('2', 1), ('3', 3), ('4', 3), ('5', 5), ('6', 5)]
+
+    g_window = g.window(0, 2)
+    res_window = algorithms.weakly_connected_components(g_window, 20) 
+    assert sorted(res_window.get_all_with_names().items()) == [('1', 1), ('2', 1)]
+
+    g_window = g.window(2, 3)
+    res_window = algorithms.weakly_connected_components(g_window, 20) 
+    assert sorted(res_window.get_all_with_names().items()) == [('3', 3), ('4', 3)]
+
+
+def test_algo_result_layered_graph():
+    g = Graph()
+    g.add_edge(0, 1, 2, {}, layer="ZERO-TWO")
+    g.add_edge(1, 1, 3, {}, layer="ZERO-TWO")
+    g.add_edge(2, 4, 5, {}, layer="ZERO-TWO")
+    g.add_edge(3, 6, 7, {}, layer="THREE-FIVE")
+    g.add_edge(4, 8, 9, {}, layer="THREE-FIVE")
+
+    g_layer_zero_two = g.layer("ZERO-TWO")
+    g_layer_three_five = g.layer("THREE-FIVE")
+
+    res_zero_two = algorithms.weakly_connected_components(g_layer_zero_two, 20) 
+    assert sorted(res_zero_two.get_all_with_names().items()) == [('1', 1), ('2', 1), ('3', 1), ('4', 4), ('5', 4), ('6', 6), ('7', 7), ('8', 8), ('9', 9)]
+
+    res_three_five = algorithms.weakly_connected_components(g_layer_three_five, 20) 
+    assert sorted(res_three_five.get_all_with_names().items()) == [('1', 1), ('2', 2), ('3', 3), ('4', 4), ('5', 5), ('6', 6), ('7', 6), ('8', 8), ('9', 8)]
+
+
+def test_algo_result_window_and_layered_graph():
+    g = Graph()
+    g.add_edge(0, 1, 2, {}, layer="ZERO-TWO")
+    g.add_edge(1, 1, 3, {}, layer="ZERO-TWO")
+    g.add_edge(2, 4, 5, {}, layer="ZERO-TWO")
+    g.add_edge(3, 6, 7, {}, layer="THREE-FIVE")
+    g.add_edge(4, 8, 9, {}, layer="THREE-FIVE")
+
+    g_layer_zero_two = g.window(0, 1).layer("ZERO-TWO")
+    g_layer_three_five = g.window(4, 5).layer("THREE-FIVE")
+
+    res_zero_two = algorithms.weakly_connected_components(g_layer_zero_two, 20) 
+    assert sorted(res_zero_two.get_all_with_names().items()) == [('1', 1), ('2', 1)]
+
+    res_three_five = algorithms.weakly_connected_components(g_layer_three_five, 20) 
+    assert sorted(res_three_five.get_all_with_names().items()) == [('8', 8), ('9', 8)]
+
+
+def test_algo_result():
+    g = gen_graph()
+
+    actual = algorithms.weakly_connected_components(g, 20)
+    expected = {"1": 1, "2": 1, "3": 1, "4": 1, "5": 1, "6": 1, "7": 1, "8": 1}
+    assert actual.get_all_with_names() == expected
+    assert actual.get("1") == 1
+    assert actual.get("not a node") == None
+    expected_array = [
+        (g.vertex("1"), 1),
+        (g.vertex("2"), 1),
+        (g.vertex("3"), 1),
+        (g.vertex("4"), 1),
+        (g.vertex("5"), 1),
+        (g.vertex("6"), 1),
+        (g.vertex("7"), 1),
+        (g.vertex("8"), 1),
+    ]
+    assert actual.sort_by_vertex_name(False) == expected_array
+    assert sorted(actual.top_k(8)) == expected_array
+    assert len(actual.group_by()[1]) == 8
+    assert type(actual.to_df()) == pandas.core.frame.DataFrame
+    df = actual.to_df()
+    expected_result = pd.DataFrame({"Key": [1], "Value": [1]})
+    row_with_one = df[df["Key"] == 1]
+    row_with_one.reset_index(inplace=True, drop=True)
+    print(row_with_one)
+    assert row_with_one.equals(expected_result)
+    # Algo Str u64
+    actual = algorithms.weakly_connected_components(g)
+    all_res = actual.get_all_with_names()
+    sorted_res = {k: all_res[k] for k in sorted(all_res)}
+    assert sorted_res == {
+        "1": 1,
+        "2": 1,
+        "3": 1,
+        "4": 1,
+        "5": 1,
+        "6": 1,
+        "7": 1,
+        "8": 1,
+    }
+    # algo str f64
+    actual = algorithms.pagerank(g)
+    expected_result = {
+        "3": 0.10274080842110422,
+        "2": 0.10274080842110422,
+        "4": 0.1615298183542792,
+        "6": 0.14074777909144864,
+        "1": 0.07209850165402759,
+        "5": 0.1615298183542792,
+        "7": 0.14074777909144864,
+        "8": 0.11786468661230831,
+    }
+    assert actual.get_all_with_names() == expected_result
+    assert actual.get("Not a node") == None
+    assert len(actual.to_df()) == 8
+    # algo str vector
+    actual = algorithms.temporally_reachable_nodes(g, 20, 11, [1, 2], [4, 5])
+    assert sorted(actual.get_all_with_names()) == ["1", "2", "3", "4", "5", "6", "7", "8"]
+
+
+def test_page_rank():
+    g = gen_graph()
+    actual = algorithms.pagerank(g)
+    expected = {
+        "1": 0.07209850165402759,
+        "2": 0.10274080842110422,
+        "3": 0.10274080842110422,
+        "4": 0.1615298183542792,
+        "5": 0.1615298183542792,
+        "6": 0.14074777909144864,
+        "7": 0.14074777909144864,
+        "8": 0.11786468661230831,
+    }
+    assert actual.get_all_with_names() == expected
+
+
+def test_temporal_reachability():
+    g = gen_graph()
+
+    actual = algorithms.temporally_reachable_nodes(g, 20, 11, [1, 2], [4, 5])
+    expected = {
+        "1": [(11, "start")],
+        "2": [(11, "start"), (12, "1"), (11, "1")],
+        "3": [],
+        "4": [(12, "2")],
+        "5": [(13, "2")],
+        "6": [],
+        "7": [],
+        "8": [],
+    }
+
+    assert actual.get_all_with_names() == expected
+
 
 def test_degree_centrality():
     from raphtory import Graph
     from raphtory.algorithms import degree_centrality
 
     g = Graph()
-    g.add_edge(0, 0, 1, {})
-    g.add_edge(0, 0, 2, {})
-    g.add_edge(0, 0, 3, {})
     g.add_edge(0, 1, 2, {})
     g.add_edge(0, 1, 3, {})
-    assert degree_centrality(g).get_all() == {
-        "0": 1.0,
+    g.add_edge(0, 1, 4, {})
+    g.add_edge(0, 2, 3, {})
+    g.add_edge(0, 2, 4, {})
+    assert degree_centrality(g).get_all_with_names() == {
         "1": 1.0,
-        "2": 2 / 3,
+        "2": 1.0,
         "3": 2 / 3,
+        "4": 2 / 3,
     }
 
 
@@ -45,12 +233,12 @@ def test_single_source_shortest_path():
     g.add_edge(0, 2, 4, {})
     res_one = single_source_shortest_path(g, 1, 1)
     res_two = single_source_shortest_path(g, 1, 2)
-    assert res_one.get_all() == {"1": ["1"], "2": ["1", "2"], "4": ["1", "4"]}
+    assert res_one.get_all_with_names() == {"1": ["1"], "2": ["1", "2"], "3": None, "4": ["1", "4"]}
     assert (
-        res_two.get_all()
+        res_two.get_all_with_names()
         == {"1": ["1"], "2": ["1", "2"], "3": ["1", "2", "3"], "4": ["1", "4"]}
     ) or (
-        res_two.get_all()
+        res_two.get_all_with_names()
         == {"1": ["1"], "3": ["1", "4", "3"], "2": ["1", "2"], "4": ["1", "4"]}
     )
 
@@ -84,4 +272,62 @@ def test_dijsktra_shortest_paths():
         dijkstra_single_source_shortest_paths(g, "A", ["F"], weight="NO")
     assert "Weight property not found on edges" in str(excinfo.value)
 
-
+
+def test_betweenness_centrality():
+    from raphtory import Graph
+    from raphtory.algorithms import betweenness_centrality
+    g = Graph()
+    edges = [
+        (0, 1),
+        (0, 2),
+        (0, 3),
+        (1, 2),
+        (1, 3),
+        (1, 4),
+        (2, 3),
+        (2, 4),
+        (2, 5),
+        (3, 2),
+        (3, 1),
+        (3, 3)
+    ]
+    for e in edges:
+        g.add_edge(0, e[0], e[1], {})
+
+    res = betweenness_centrality(g, normalized=False)
+    assert res.get_all_with_names() == { "0": 0.0, '1': 1.0, "2": 4.0, "3": 1.0, "4": 0.0, "5": 0.0 }
+
+    res = betweenness_centrality(g, normalized=True)
+    assert res.get_all_with_names() == { "0": 0.0, '1': 0.05, "2": 0.2, "3": 0.05, "4": 0.0, "5": 0.0}
+
+
+def test_hits_algorithm():
+    g = graph_loader.lotr_graph()
+    assert algorithms.hits(g).get("Aldor") == (
+        0.0035840950440615416,
+        0.007476256228983402,
+    )
+
+
+def test_balance_algorithm():
+    g = Graph()
+    edges_str = [
+        ("1", "2", 10.0, 1),
+        ("1", "4", 20.0, 2),
+        ("2", "3", 5.0, 3),
+        ("3", "2", 2.0, 4),
+        ("3", "1", 1.0, 5),
+        ("4", "3", 10.0, 6),
+        ("4", "1", 5.0, 7),
+        ("1", "5", 2.0, 8),
+    ]
+    for src, dst, val, time in edges_str:
+        g.add_edge(time, src, dst, {"value_dec": val})
+    result = algorithms.balance(g, "value_dec", PyDirection("BOTH"), None).get_all_with_names()
+    assert result == {"1": -26.0, "2": 7.0, "3": 12.0, "4": 5.0, "5": 2.0}
+
+    result = algorithms.balance(g, "value_dec", PyDirection("IN"), None).get_all_with_names()
+    assert result == {"1": 6.0, "2": 12.0, "3": 15.0, "4": 20.0, "5": 2.0}
+
+    result = algorithms.balance(g, "value_dec", PyDirection("OUT"), None).get_all_with_names()
+    assert result == {"1": -32.0, "2": -5.0, "3": -3.0, "4": -15.0, "5": 0.0}
diff --git a/python/tests/test_graph_gen.py b/python/tests/test_graph_gen.py
@@ -0,0 +1,6 @@
+def test_karate_club():
+    from raphtory.graph_loader import karate_club_graph
+    g = karate_club_graph()
+    assert g.count_vertices() == 34
+    assert g.count_edges() == 155
+
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,3 +7,4 @@ Centrality

		.. autofunction:: raphtory.algorithms.hits

		.. autofunction:: raphtory.algorithms.betweenness_centrality