Skip to content

Commit

Permalink
Algorithm Result object complete rewrite, Add betweeen centrality, Ad…
Browse files Browse the repository at this point in the history
…d Zaks Karate Club (#1327)

* checkpoint, need to fix tests

* im so happy it works :D

* remove type_name param

* algorithm result tests complete, new fmt, fixed group_by, fixed all types

* renamed old algorithm result, changed base result type to a hashmap instead of vector due to potential issues with sparse vectors if the dataset is large, added missing G type for the graph, brought back results type

* fix bad 0 checking

* macro magic but partially implemented

* added the extra macros, added all missing functions, removed all pointers for clones (:vomit: pyo3)

* algorithm result object + all macros is complete in both rust and python

* fixed reciprocity, fixed centrality

* fixed pagerank, fixed graphql new type issues

* balance algorithm complete

* sssp fixed

* sssp forgot le lib

* temporal reachability fixed

* hits fixed

* connected components nearly fixed, last test

* removed bad test

* three node motif completed, algorithm result complte

* good bye bugs

* removed all warnings

* fix lotr bug

* i think i fixed hulong

* fix dusty benchmark

* rename macro rules, fix doctests

* implement debug for algo result

* fix pytests

* Zaks Karate Club Graph (#1326)

* betweenness centrality for a directed graph with and without normalisation

* cleanup

* swapped to algorithm result, added rust docs

* ported to python, added python docs, added python test

* fix formatting

* karate club graph, but its half working, adding 1 extra node and way too many edges

* fixed issue with rows

* betweeness has float calc differences but largely the same

* fix test

* fixes issue with python?

* port betweeness centrality to new algo object and fix over python tests

* Changed State to use Internal ID everywhere, fixed most of the algorithms, fixed the tests

* fixed all the algos

* fn name changes

* added very basic hashing to allow get_all to return a vertex object

* vertex view now returned by algo rest

* fix python algo result for vertexview

* reorder tests

* implemented custom debug fmt display for vertex view, implemented custom display fmt, fixed algorithm tests, fixed all rust issues

* extended richcmp for python objects, fixed all python tests

* connected_components.rs passes

* moved algos

* resolved comments

* changes due to comments

* windows works, layers dont

* added tests for windowed graphs

* complete!

* bad comments for ubuntu

* bad colon
  • Loading branch information
Haaroon authored Oct 19, 2023
1 parent 9df4985 commit b4eef2e
Show file tree
Hide file tree
Showing 35 changed files with 1,760 additions and 929 deletions.
6 changes: 2 additions & 4 deletions comparison-benchmark/rust/raphtory-rust-benchmark/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,14 +197,12 @@ fn main() {

// page rank with time
now = Instant::now();
let _page_rank: Vec<_> = unweighted_page_rank(&g, 1000, None, None, true)
.into_iter()
.collect();
let _page_rank = unweighted_page_rank(&g, 1000, None, None, true);
println!("Page rank: {} seconds", now.elapsed().as_secs_f64());

// connected community_detection with time
now = Instant::now();
let _cc: AlgorithmResult<String, u64> = weakly_connected_components(&g, usize::MAX, None);
let _cc = weakly_connected_components(&g, usize::MAX, None);
println!(
"Connected community_detection: {} seconds",
now.elapsed().as_secs_f64()
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/algorithms/centrality.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ Centrality

.. autofunction:: raphtory.algorithms.hits

.. autofunction:: raphtory.algorithms.betweenness_centrality
1 change: 1 addition & 0 deletions examples/rust/src/bin/hulongbay/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ fn try_main() -> Result<(), Box<dyn Error>> {
let components = weakly_connected_components(&graph, 5, Some(16));

components
.result
.into_iter()
.counts_by(|(_, cc)| cc)
.iter()
Expand Down
14 changes: 8 additions & 6 deletions examples/rust/src/bin/lotr/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use itertools::Itertools;
use raphtory::{
algorithms::pathing::temporal_reachability::temporally_reachable_nodes, core::utils::hashing,
graph_loader::source::csv_loader::CsvLoader, prelude::*,
Expand Down Expand Up @@ -104,9 +103,12 @@ fn main() {
assert!(graph.has_vertex(gandalf));
assert_eq!(graph.vertex(gandalf).unwrap().name(), "Gandalf");

let r = temporally_reachable_nodes(&graph, None, 20, 31930, vec!["Gandalf"], None);
assert_eq!(
r.result.keys().sorted().collect_vec(),
vec!["Gandalf", "Saruman", "Wormtongue"]
)
let r: Vec<String> = temporally_reachable_nodes(&graph, None, 20, 31930, vec!["Gandalf"], None)
.get_all_values()
.into_iter()
.flatten()
.map(|(_, s)| s)
.collect();

assert_eq!(r, vec!["Gandalf", "Saruman", "Wormtongue"])
}
2 changes: 2 additions & 0 deletions python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ fn raphtory(py: Python<'_>, m: &PyModule) -> PyResult<()> {
algorithm_module,
dijkstra_single_source_shortest_paths,
global_reciprocity,
betweenness_centrality,
all_local_reciprocity,
triplet_count,
local_triangle_count,
Expand Down Expand Up @@ -107,6 +108,7 @@ fn raphtory(py: Python<'_>, m: &PyModule) -> PyResult<()> {
neo4j_movie_graph,
stable_coin_graph,
reddit_hyperlink_graph,
karate_club_graph,
);
m.add_submodule(graph_loader_module)?;

Expand Down
266 changes: 256 additions & 10 deletions python/tests/test_algorithms.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,208 @@
import pytest
import pandas as pd
import pandas.core.frame
from raphtory import Graph, GraphWithDeletions, PyDirection
from raphtory import algorithms
from raphtory import graph_loader

def gen_graph():
g = Graph()
g.add_edge(10, 1, 3, {})
g.add_edge(11, 1, 2, {})
g.add_edge(12, 1, 2, {})
g.add_edge(9, 1, 2, {})
g.add_edge(12, 2, 4, {})
g.add_edge(13, 2, 5, {})
g.add_edge(14, 5, 5, {})
g.add_edge(14, 5, 4, {})
g.add_edge(5, 4, 6, {})
g.add_edge(15, 4, 7, {})
g.add_edge(10, 4, 7, {})
g.add_edge(10, 5, 8, {})
return g


def test_connected_components():
g = gen_graph()
actual = algorithms.weakly_connected_components(g, 20)
expected = {"1": 1, "2": 1, "3": 1, "4": 1, "5": 1, "6": 1, "7": 1, "8": 1}
assert actual.get_all_with_names() == expected
assert actual.get("1") == 1


def test_empty_algo():
g = Graph()
assert algorithms.weakly_connected_components(g, 20).get_all_with_names() == {}
assert algorithms.pagerank(g, 20).get_all_with_names() == {}


def test_algo_result_windowed_graph():
g = Graph()
g.add_edge(0, 1, 2, {})
g.add_edge(1, 1, 2, {})
g.add_edge(2, 3, 4, {})
g.add_edge(3, 5, 6, {})
g.add_edge(10, 10, 11, {})

res_full_graph = algorithms.weakly_connected_components(g, 20)
assert sorted(res_full_graph.get_all_with_names().items()) == [('1', 1), ('10', 10), ('11', 10), ('2', 1), ('3', 3), ('4', 3), ('5', 5), ('6', 5)]

g_window = g.window(0, 2)
res_window = algorithms.weakly_connected_components(g_window, 20)
assert sorted(res_window.get_all_with_names().items()) == [('1', 1), ('2', 1)]

g_window = g.window(2, 3)
res_window = algorithms.weakly_connected_components(g_window, 20)
assert sorted(res_window.get_all_with_names().items()) == [('3', 3), ('4', 3)]


def test_algo_result_layered_graph():
g = Graph()
g.add_edge(0, 1, 2, {}, layer="ZERO-TWO")
g.add_edge(1, 1, 3, {}, layer="ZERO-TWO")
g.add_edge(2, 4, 5, {}, layer="ZERO-TWO")
g.add_edge(3, 6, 7, {}, layer="THREE-FIVE")
g.add_edge(4, 8, 9, {}, layer="THREE-FIVE")

g_layer_zero_two = g.layer("ZERO-TWO")
g_layer_three_five = g.layer("THREE-FIVE")

res_zero_two = algorithms.weakly_connected_components(g_layer_zero_two, 20)
assert sorted(res_zero_two.get_all_with_names().items()) == [('1', 1), ('2', 1), ('3', 1), ('4', 4), ('5', 4), ('6', 6), ('7', 7), ('8', 8), ('9', 9)]

res_three_five = algorithms.weakly_connected_components(g_layer_three_five, 20)
assert sorted(res_three_five.get_all_with_names().items()) == [('1', 1), ('2', 2), ('3', 3), ('4', 4), ('5', 5), ('6', 6), ('7', 6), ('8', 8), ('9', 8)]


def test_algo_result_window_and_layered_graph():
g = Graph()
g.add_edge(0, 1, 2, {}, layer="ZERO-TWO")
g.add_edge(1, 1, 3, {}, layer="ZERO-TWO")
g.add_edge(2, 4, 5, {}, layer="ZERO-TWO")
g.add_edge(3, 6, 7, {}, layer="THREE-FIVE")
g.add_edge(4, 8, 9, {}, layer="THREE-FIVE")

g_layer_zero_two = g.window(0, 1).layer("ZERO-TWO")
g_layer_three_five = g.window(4, 5).layer("THREE-FIVE")

res_zero_two = algorithms.weakly_connected_components(g_layer_zero_two, 20)
assert sorted(res_zero_two.get_all_with_names().items()) == [('1', 1), ('2', 1)]

res_three_five = algorithms.weakly_connected_components(g_layer_three_five, 20)
assert sorted(res_three_five.get_all_with_names().items()) == [('8', 8), ('9', 8)]


def test_algo_result():
g = gen_graph()

actual = algorithms.weakly_connected_components(g, 20)
expected = {"1": 1, "2": 1, "3": 1, "4": 1, "5": 1, "6": 1, "7": 1, "8": 1}
assert actual.get_all_with_names() == expected
assert actual.get("1") == 1
assert actual.get("not a node") == None
expected_array = [
(g.vertex("1"), 1),
(g.vertex("2"), 1),
(g.vertex("3"), 1),
(g.vertex("4"), 1),
(g.vertex("5"), 1),
(g.vertex("6"), 1),
(g.vertex("7"), 1),
(g.vertex("8"), 1),
]
assert actual.sort_by_vertex_name(False) == expected_array
assert sorted(actual.top_k(8)) == expected_array
assert len(actual.group_by()[1]) == 8
assert type(actual.to_df()) == pandas.core.frame.DataFrame
df = actual.to_df()
expected_result = pd.DataFrame({"Key": [1], "Value": [1]})
row_with_one = df[df["Key"] == 1]
row_with_one.reset_index(inplace=True, drop=True)
print(row_with_one)
assert row_with_one.equals(expected_result)
# Algo Str u64
actual = algorithms.weakly_connected_components(g)
all_res = actual.get_all_with_names()
sorted_res = {k: all_res[k] for k in sorted(all_res)}
assert sorted_res == {
"1": 1,
"2": 1,
"3": 1,
"4": 1,
"5": 1,
"6": 1,
"7": 1,
"8": 1,
}
# algo str f64
actual = algorithms.pagerank(g)
expected_result = {
"3": 0.10274080842110422,
"2": 0.10274080842110422,
"4": 0.1615298183542792,
"6": 0.14074777909144864,
"1": 0.07209850165402759,
"5": 0.1615298183542792,
"7": 0.14074777909144864,
"8": 0.11786468661230831,
}
assert actual.get_all_with_names() == expected_result
assert actual.get("Not a node") == None
assert len(actual.to_df()) == 8
# algo str vector
actual = algorithms.temporally_reachable_nodes(g, 20, 11, [1, 2], [4, 5])
assert sorted(actual.get_all_with_names()) == ["1", "2", "3", "4", "5", "6", "7", "8"]


def test_page_rank():
g = gen_graph()
actual = algorithms.pagerank(g)
expected = {
"1": 0.07209850165402759,
"2": 0.10274080842110422,
"3": 0.10274080842110422,
"4": 0.1615298183542792,
"5": 0.1615298183542792,
"6": 0.14074777909144864,
"7": 0.14074777909144864,
"8": 0.11786468661230831,
}
assert actual.get_all_with_names() == expected


def test_temporal_reachability():
g = gen_graph()

actual = algorithms.temporally_reachable_nodes(g, 20, 11, [1, 2], [4, 5])
expected = {
"1": [(11, "start")],
"2": [(11, "start"), (12, "1"), (11, "1")],
"3": [],
"4": [(12, "2")],
"5": [(13, "2")],
"6": [],
"7": [],
"8": [],
}

assert actual.get_all_with_names() == expected


def test_degree_centrality():
from raphtory import Graph
from raphtory.algorithms import degree_centrality

g = Graph()
g.add_edge(0, 0, 1, {})
g.add_edge(0, 0, 2, {})
g.add_edge(0, 0, 3, {})
g.add_edge(0, 1, 2, {})
g.add_edge(0, 1, 3, {})
assert degree_centrality(g).get_all() == {
"0": 1.0,
g.add_edge(0, 1, 4, {})
g.add_edge(0, 2, 3, {})
g.add_edge(0, 2, 4, {})
assert degree_centrality(g).get_all_with_names() == {
"1": 1.0,
"2": 2 / 3,
"2": 1.0,
"3": 2 / 3,
"4": 2 / 3,
}


Expand Down Expand Up @@ -45,12 +233,12 @@ def test_single_source_shortest_path():
g.add_edge(0, 2, 4, {})
res_one = single_source_shortest_path(g, 1, 1)
res_two = single_source_shortest_path(g, 1, 2)
assert res_one.get_all() == {"1": ["1"], "2": ["1", "2"], "4": ["1", "4"]}
assert res_one.get_all_with_names() == {"1": ["1"], "2": ["1", "2"], "3": None, "4": ["1", "4"]}
assert (
res_two.get_all()
res_two.get_all_with_names()
== {"1": ["1"], "2": ["1", "2"], "3": ["1", "2", "3"], "4": ["1", "4"]}
) or (
res_two.get_all()
res_two.get_all_with_names()
== {"1": ["1"], "3": ["1", "4", "3"], "2": ["1", "2"], "4": ["1", "4"]}
)

Expand Down Expand Up @@ -84,4 +272,62 @@ def test_dijsktra_shortest_paths():
dijkstra_single_source_shortest_paths(g, "A", ["F"], weight="NO")
assert "Weight property not found on edges" in str(excinfo.value)



def test_betweenness_centrality():
from raphtory import Graph
from raphtory.algorithms import betweenness_centrality
g = Graph()
edges = [
(0, 1),
(0, 2),
(0, 3),
(1, 2),
(1, 3),
(1, 4),
(2, 3),
(2, 4),
(2, 5),
(3, 2),
(3, 1),
(3, 3)
]
for e in edges:
g.add_edge(0, e[0], e[1], {})

res = betweenness_centrality(g, normalized=False)
assert res.get_all_with_names() == { "0": 0.0, '1': 1.0, "2": 4.0, "3": 1.0, "4": 0.0, "5": 0.0 }

res = betweenness_centrality(g, normalized=True)
assert res.get_all_with_names() == { "0": 0.0, '1': 0.05, "2": 0.2, "3": 0.05, "4": 0.0, "5": 0.0}


def test_hits_algorithm():
g = graph_loader.lotr_graph()
assert algorithms.hits(g).get("Aldor") == (
0.0035840950440615416,
0.007476256228983402,
)


def test_balance_algorithm():
g = Graph()
edges_str = [
("1", "2", 10.0, 1),
("1", "4", 20.0, 2),
("2", "3", 5.0, 3),
("3", "2", 2.0, 4),
("3", "1", 1.0, 5),
("4", "3", 10.0, 6),
("4", "1", 5.0, 7),
("1", "5", 2.0, 8),
]
for src, dst, val, time in edges_str:
g.add_edge(time, src, dst, {"value_dec": val})
result = algorithms.balance(g, "value_dec", PyDirection("BOTH"), None).get_all_with_names()
assert result == {"1": -26.0, "2": 7.0, "3": 12.0, "4": 5.0, "5": 2.0}

result = algorithms.balance(g, "value_dec", PyDirection("IN"), None).get_all_with_names()
assert result == {"1": 6.0, "2": 12.0, "3": 15.0, "4": 20.0, "5": 2.0}

result = algorithms.balance(g, "value_dec", PyDirection("OUT"), None).get_all_with_names()
assert result == {"1": -32.0, "2": -5.0, "3": -3.0, "4": -15.0, "5": 0.0}
6 changes: 6 additions & 0 deletions python/tests/test_graph_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def test_karate_club():
from raphtory.graph_loader import karate_club_graph
g = karate_club_graph()
assert g.count_vertices() == 34
assert g.count_edges() == 155

Loading

0 comments on commit b4eef2e

Please sign in to comment.