Extra benches (#1651)

* added extra benchmarks * extend benchmark suite with persistend and layered graphs as well as exploded edges and materialize * change sx_superuser_graph to take optional number of layers * fixes as per PR review * fmt
Pometry · Jun 11, 2024 · 771d45c · 771d45c
1 parent 1c7a7d7
commit 771d45c
Show file tree

Hide file tree

Showing 5 changed files with 208 additions and 14 deletions.
diff --git a/pometry-storage-private b/pometry-storage-private
diff --git a/raphtory-benchmark/benches/common/mod.rs b/raphtory-benchmark/benches/common/mod.rs
@@ -1,8 +1,10 @@
 #![allow(dead_code)]
 
-use criterion::{measurement::WallTime, BatchSize, Bencher, BenchmarkGroup, BenchmarkId};
+use criterion::{
+    black_box, measurement::WallTime, BatchSize, Bencher, BenchmarkGroup, BenchmarkId,
+};
 use rand::{distributions::Uniform, seq::*, Rng};
-use raphtory::{db::api::view::StaticGraphViewOps, prelude::*};
+use raphtory::{core::entities::LayerIds, db::api::view::StaticGraphViewOps, prelude::*};
 use std::collections::HashSet;
 
 fn make_index_gen() -> Box<dyn Iterator<Item = u64>> {
@@ -264,17 +266,35 @@ pub fn run_analysis_benchmarks<F, G>(
     G: StaticGraphViewOps,
 {
     let graph = make_graph();
+    println!(
+        "Num layers {:?}, node count: {}, edge_count: {}",
+        graph.unique_layers().count(),
+        graph.count_nodes(),
+        graph.count_edges()
+    );
     let edges: HashSet<(u64, u64)> = graph
         .edges()
         .into_iter()
         .map(|e| (e.src().id(), e.dst().id()))
         .collect();
+
+    let edges_t = graph
+        .edges()
+        .explode()
+        .into_iter()
+        .map(|e| (e.src().id(), e.dst().id(), e.time().expect("need time")))
+        .collect::<Vec<_>>();
+
     let nodes: HashSet<u64> = graph.nodes().id().collect();
 
     bench(group, "num_edges", parameter, |b: &mut Bencher| {
         b.iter(|| graph.count_edges())
     });
 
+    bench(group, "num_edges_temporal", parameter, |b: &mut Bencher| {
+        b.iter(|| graph.count_temporal_edges())
+    });
+
     bench(group, "has_edge_existing", parameter, |b: &mut Bencher| {
         let mut rng = rand::thread_rng();
         let edge = edges.iter().choose(&mut rng).expect("non-empty graph");
@@ -300,6 +320,38 @@ pub fn run_analysis_benchmarks<F, G>(
         },
     );
 
+    bench(group, "active edge", parameter, |b: &mut Bencher| {
+        let mut rng = rand::thread_rng();
+        let (edge, active_t) = edges_t
+            .choose(&mut rng)
+            .and_then(|(src, dst, t)| graph.edge(src, dst).map(|e| (e, *t)))
+            .expect("active edge");
+        b.iter(|| {
+            edge.window(active_t.saturating_sub(5), active_t + 5)
+                .explode_layers()
+                .iter()
+                .for_each(|e| {
+                    black_box(e);
+                });
+        });
+    });
+
+    bench(group, "edge has layer", parameter, |b: &mut Bencher| {
+        let mut rng = rand::thread_rng();
+        let edge = edges
+            .iter()
+            .choose(&mut rng)
+            .and_then(|(src, dst)| graph.edge(src, dst))
+            .expect("active edge");
+
+        let layers = graph.unique_layers().collect::<Vec<_>>();
+        b.iter(|| {
+            for name in layers.iter() {
+                black_box(edge.has_layer(name));
+            }
+        });
+    });
+
     bench(group, "num_nodes", parameter, |b: &mut Bencher| {
         b.iter(|| graph.count_nodes())
     });
@@ -334,6 +386,44 @@ pub fn run_analysis_benchmarks<F, G>(
         b.iter(|| graph.nodes().degree().max())
     });
 
+    bench(group, "iterate nodes", parameter, |b: &mut Bencher| {
+        b.iter(|| {
+            for n in graph.nodes() {
+                black_box(n);
+            }
+        })
+    });
+
+    bench(group, "iterate edges", parameter, |b: &mut Bencher| {
+        b.iter(|| {
+            for e in graph.edges() {
+                black_box(e);
+            }
+        })
+    });
+
+    bench(
+        group,
+        "iterate_exploded_edges",
+        parameter,
+        |b: &mut Bencher| {
+            b.iter(|| {
+                for e in graph.edges() {
+                    for ee in e.explode() {
+                        black_box(ee);
+                    }
+                }
+            })
+        },
+    );
+
+    bench(group, "materialize", parameter, |b: &mut Bencher| {
+        b.iter(|| {
+            let mg = graph.materialize();
+            black_box(mg)
+        })
+    })
+
     // Too noisy due to degree variability and confuses criterion
     // bench(
     //     group,

diff --git a/raphtory-benchmark/benches/graph_ops.rs b/raphtory-benchmark/benches/graph_ops.rs
@@ -1,6 +1,15 @@
 use common::run_analysis_benchmarks;
 use criterion::{criterion_group, criterion_main, Criterion};
-use raphtory::{db::api::view::*, graph_loader::example::sx_superuser_graph::sx_superuser_graph};
+use rand::{seq::*, SeedableRng};
+use raphtory::{
+    core::utils::hashing::calculate_hash,
+    db::api::view::*,
+    graph_loader::{
+        example::sx_superuser_graph::{sx_superuser_file, sx_superuser_graph, TEdge},
+        source::csv_loader::CsvLoader,
+    },
+    prelude::*,
+};
 
 mod common;
 
@@ -17,6 +26,8 @@ pub fn graph(c: &mut Criterion) {
         None,
     );
     graph_window_group_100.finish();
+
+    // graph windowed
     let mut graph_window_group_10 = c.benchmark_group("analysis_graph_window_10");
     let latest = graph.latest_time().expect("non-empty graph");
     let earliest = graph.earliest_time().expect("non-empty graph");
@@ -28,6 +39,105 @@ pub fn graph(c: &mut Criterion) {
         None,
     );
     graph_window_group_10.finish();
+
+    // subgraph
+    let mut rng = rand::rngs::StdRng::seed_from_u64(73);
+    let nodes = graph
+        .nodes()
+        .into_iter()
+        .choose_multiple(&mut rng, graph.count_nodes() / 10)
+        .into_iter()
+        .map(|n| n.id())
+        .collect::<Vec<_>>();
+    let subgraph = graph.subgraph(nodes);
+    let mut subgraph_10 = c.benchmark_group("analysis_subgraph_10pc");
+    subgraph_10.sample_size(10);
+
+    run_analysis_benchmarks(&mut subgraph_10, || subgraph.clone(), None);
+    subgraph_10.finish();
+
+    // subgraph windowed
+    let mut subgraph_10_windowed = c.benchmark_group("analysis_subgraph_10pc_windowed");
+    subgraph_10_windowed.sample_size(10);
+
+    run_analysis_benchmarks(
+        &mut subgraph_10_windowed,
+        || subgraph.window(start, latest + 1),
+        None,
+    );
+    subgraph_10_windowed.finish();
+
+    // layered graph windowed
+    let graph = layered_sx_super_user_graph(Some(10)).unwrap();
+    let mut graph_window_layered_group_50 = c.benchmark_group("analysis_graph_window_50_layered");
+    let latest = graph.latest_time().expect("non-empty graph");
+    let earliest = graph.earliest_time().expect("non-empty graph");
+    let start = latest - (latest - earliest) / 2;
+    graph_window_layered_group_50.sample_size(10);
+    run_analysis_benchmarks(
+        &mut graph_window_layered_group_50,
+        || {
+            graph
+                .window(start, latest + 1)
+                .layers(["0", "1", "2", "3", "4"])
+                .unwrap()
+        },
+        None,
+    );
+    graph_window_layered_group_50.finish();
+
+    let graph = graph.persistent_graph();
+
+    let mut graph_window_layered_group_50 =
+        c.benchmark_group("persistent_analysis_graph_window_50_layered");
+    let latest = graph.latest_time().expect("non-empty graph");
+    let earliest = graph.earliest_time().expect("non-empty graph");
+    let start = latest - (latest - earliest) / 2;
+    graph_window_layered_group_50.sample_size(10);
+    run_analysis_benchmarks(
+        &mut graph_window_layered_group_50,
+        || {
+            graph
+                .window(start, latest + 1)
+                .layers(["0", "1", "2", "3", "4"])
+                .unwrap()
+        },
+        None,
+    );
+    graph_window_layered_group_50.finish();
+}
+
+/// Load the SX SuperUser dataset into a graph and return it
+///
+/// Returns:
+///
+/// - A Result containing the graph or an error, with edges randomly assigned to layers
+fn layered_sx_super_user_graph(
+    num_layers: Option<usize>,
+) -> Result<Graph, Box<dyn std::error::Error>> {
+    let graph = Graph::new();
+    CsvLoader::new(sx_superuser_file()?)
+        .set_delimiter(" ")
+        .load_into_graph(&graph, |edge: TEdge, g: &Graph| {
+            if let Some(layer) = num_layers
+                .map(|num_layers| calculate_hash(&(edge.src_id, edge.dst_id)) % num_layers as u64)
+                .map(|id| id.to_string())
+            {
+                g.add_edge(
+                    edge.time,
+                    edge.src_id,
+                    edge.dst_id,
+                    NO_PROPS,
+                    Some(layer.as_str()),
+                )
+                .expect("Error: Unable to add edge");
+            } else {
+                g.add_edge(edge.time, edge.src_id, edge.dst_id, NO_PROPS, None)
+                    .expect("Error: Unable to add edge");
+            }
+        })?;
+
+    Ok(graph)
 }
 
 criterion_group!(benches, graph);

diff --git a/raphtory/src/core/entities/edges/edge_store.rs b/raphtory/src/core/entities/edges/edge_store.rs
@@ -23,7 +23,6 @@ pub use raphtory_api::core::entities::edges::*;
 
 use itertools::{EitherOrBoth, Itertools};
 use ouroboros::self_referencing;
-use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 use std::{
     iter,

diff --git a/raphtory/src/graph_loader/example/sx_superuser_graph.rs b/raphtory/src/graph_loader/example/sx_superuser_graph.rs
@@ -46,6 +46,7 @@
 //! ```
 
 use crate::{
+    core::utils::hashing::calculate_hash,
     graph_loader::{fetch_file, source::csv_loader::CsvLoader},
     prelude::*,
 };
@@ -54,9 +55,9 @@ use std::path::PathBuf;
 
 #[derive(Deserialize, std::fmt::Debug)]
 pub struct TEdge {
-    src_id: u64,
-    dst_id: u64,
-    time: i64,
+    pub src_id: u64,
+    pub dst_id: u64,
+    pub time: i64,
 }
 
 /// Download the SX SuperUser dataset
@@ -72,13 +73,8 @@ pub fn sx_superuser_file() -> Result<PathBuf, Box<dyn std::error::Error>> {
         600,
     )
 }
-
 /// Load the SX SuperUser dataset into a graph and return it
 ///
-/// # Arguments
-///
-/// * `shards` - The number of shards to use for the graph
-///
 /// Returns:
 ///
 /// - A Result containing the graph or an error
@@ -90,7 +86,6 @@ pub fn sx_superuser_graph() -> Result<Graph, Box<dyn std::error::Error>> {
             g.add_edge(edge.time, edge.src_id, edge.dst_id, NO_PROPS, None)
                 .expect("Error: Unable to add edge");
         })?;
-
     Ok(graph)
 }