Skip to content

Commit

Permalink
Make the edge storage more columnar to reduce memory usage (#1670)
Browse files Browse the repository at this point in the history
* Refactor the graph storage to reduce memory usage

* delete some useless stuff

* only load props when they exist

* fix props Optional

* inline len

* 64 shards should be good enough

* fix the errors with the benchmarks

* actually inline the len calls

* increment the BINCODE_VERSION constant

* get pometry-storage-private up to master

* fixes as per review

* removed the comments and sorted out the imports
  • Loading branch information
fabianmurariu authored Jul 2, 2024
1 parent 876cdbb commit fd400bc
Show file tree
Hide file tree
Showing 35 changed files with 902 additions and 914 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pometry-storage-private
Binary file modified python/tests/test_graphdb/graph.bincode
Binary file not shown.
3 changes: 2 additions & 1 deletion raphtory-benchmark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ edition = "2021"

[dependencies]
criterion = { workspace = true }
raphtory = { path = "../raphtory", features = ["io"] }
raphtory = { path = "../raphtory", features = ["io"], version = "0.9.3" }
raphtory-api = { path = "../raphtory-api", version = "0.9.3" }
raphtory-graphql = { path = "../raphtory-graphql", version = "0.9.3" }
pometry-storage.workspace = true
sorted_vector_map = { workspace = true }
Expand Down
343 changes: 168 additions & 175 deletions raphtory-benchmark/benches/arrow_algobench.rs
Original file line number Diff line number Diff line change
@@ -1,187 +1,180 @@
use crate::common::bench;
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode};
use raphtory::{
algorithms::{
centrality::pagerank::unweighted_page_rank,
components::weakly_connected_components,
metrics::{
clustering_coefficient::clustering_coefficient,
local_clustering_coefficient::local_clustering_coefficient,
},
motifs::local_triangle_count::local_triangle_count,
},
graphgen::random_attachment::random_attachment,
prelude::*,
};
use rayon::prelude::*;
use tempfile::TempDir;

mod common;

//TODO swap to new trianglecount
// pub fn global_triangle_count_analysis(c: &mut Criterion) {
// let mut group = c.benchmark_group("global_triangle_count");
// group.sample_size(10);
// bench(&mut group, "global_triangle_count", None, |b| {
// let g = raphtory_db::graph_loader::lotr_graph::lotr_graph(1);
// let windowed_graph = g.window(i64::MIN, i64::MAX);
// b.iter(|| {
// global_triangle_count(&windowed_graph).unwrap();
// });
// });
//
// group.finish();
// }

pub fn local_triangle_count_analysis(c: &mut Criterion) {
let mut group = c.benchmark_group("local_triangle_count");
group.sample_size(10);
bench(&mut group, "local_triangle_count", None, |b| {
let g = raphtory::graph_loader::lotr_graph::lotr_graph();
let test_dir = TempDir::new().unwrap();
let g = g.persist_as_disk_graph(test_dir.path()).unwrap();
let windowed_graph = g.window(i64::MIN, i64::MAX);

b.iter(|| {
let node_ids = windowed_graph.nodes().collect();

node_ids.into_par_iter().for_each(|v| {
local_triangle_count(&windowed_graph, v).unwrap();
});
})
});

group.finish();
}

pub fn local_clustering_coefficient_analysis(c: &mut Criterion) {
let mut group = c.benchmark_group("local_clustering_coefficient");

bench(&mut group, "local_clustering_coefficient", None, |b| {
let g: Graph = Graph::new();

let vs = vec![
(1, 2, 1),
(1, 3, 2),
(1, 4, 3),
(3, 1, 4),
(3, 4, 5),
(3, 5, 6),
(4, 5, 7),
(5, 6, 8),
(5, 8, 9),
(7, 5, 10),
(8, 5, 11),
(1, 9, 12),
(9, 1, 13),
(6, 3, 14),
(4, 8, 15),
(8, 3, 16),
(5, 10, 17),
(10, 5, 18),
(10, 8, 19),
(1, 11, 20),
(11, 1, 21),
(9, 11, 22),
(11, 9, 23),
];

for (src, dst, t) in &vs {
g.add_edge(*t, *src, *dst, NO_PROPS, None).unwrap();
}

let test_dir = TempDir::new().unwrap();
let g = g.persist_as_disk_graph(test_dir.path()).unwrap();

let windowed_graph = g.window(0, 5);
b.iter(|| local_clustering_coefficient(&windowed_graph, 1))
});

group.finish();
}

pub fn graphgen_large_clustering_coeff(c: &mut Criterion) {
let mut group = c.benchmark_group("graphgen_large_clustering_coeff");
// generate graph
let graph = Graph::new();
let seed: [u8; 32] = [1; 32];
random_attachment(&graph, 500000, 4, Some(seed));

let test_dir = TempDir::new().unwrap();
let graph = graph.persist_as_disk_graph(test_dir.path()).unwrap();

group.sampling_mode(SamplingMode::Flat);
group.measurement_time(std::time::Duration::from_secs(60));
group.sample_size(10);
group.bench_with_input(
BenchmarkId::new("graphgen_large_clustering_coeff", &graph),
&graph,
|b, graph| {
b.iter(|| {
let result = clustering_coefficient(graph);
black_box(result);
});
#[cfg(feature = "storage")]
pub mod arrow_bench {

use crate::common::bench;
use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode,
};
use raphtory::{
algorithms::{
centrality::pagerank::unweighted_page_rank,
components::weakly_connected_components,
metrics::{
clustering_coefficient::clustering_coefficient,
local_clustering_coefficient::local_clustering_coefficient,
},
motifs::local_triangle_count::local_triangle_count,
},
);
group.finish()
}
graphgen::random_attachment::random_attachment,
prelude::*,
};
use rayon::prelude::*;
use tempfile::TempDir;

pub fn local_triangle_count_analysis(c: &mut Criterion) {
let mut group = c.benchmark_group("local_triangle_count");
group.sample_size(10);
bench(&mut group, "local_triangle_count", None, |b| {
let g = raphtory::graph_loader::lotr_graph::lotr_graph();
let test_dir = TempDir::new().unwrap();
let g = g.persist_as_disk_graph(test_dir.path()).unwrap();
let windowed_graph = g.window(i64::MIN, i64::MAX);

pub fn graphgen_large_pagerank(c: &mut Criterion) {
let mut group = c.benchmark_group("graphgen_large_pagerank");
// generate graph
let graph = Graph::new();
let seed: [u8; 32] = [1; 32];
random_attachment(&graph, 500000, 4, Some(seed));

let test_dir = TempDir::new().unwrap();
let graph = graph.persist_as_disk_graph(test_dir.path()).unwrap();
group.sampling_mode(SamplingMode::Flat);
group.measurement_time(std::time::Duration::from_secs(20));
group.sample_size(10);
group.bench_with_input(
BenchmarkId::new("graphgen_large_pagerank", &graph),
&graph,
|b, graph| {
b.iter(|| {
let result = unweighted_page_rank(graph, Some(100), None, None, true, None);
black_box(result);
});
},
);
group.finish()
}
let node_ids = windowed_graph.nodes().collect();

node_ids.into_par_iter().for_each(|v| {
local_triangle_count(&windowed_graph, v).unwrap();
});
})
});

group.finish();
}

pub fn local_clustering_coefficient_analysis(c: &mut Criterion) {
let mut group = c.benchmark_group("local_clustering_coefficient");

bench(&mut group, "local_clustering_coefficient", None, |b| {
let g: Graph = Graph::new();

let vs = vec![
(1, 2, 1),
(1, 3, 2),
(1, 4, 3),
(3, 1, 4),
(3, 4, 5),
(3, 5, 6),
(4, 5, 7),
(5, 6, 8),
(5, 8, 9),
(7, 5, 10),
(8, 5, 11),
(1, 9, 12),
(9, 1, 13),
(6, 3, 14),
(4, 8, 15),
(8, 3, 16),
(5, 10, 17),
(10, 5, 18),
(10, 8, 19),
(1, 11, 20),
(11, 1, 21),
(9, 11, 22),
(11, 9, 23),
];

for (src, dst, t) in &vs {
g.add_edge(*t, *src, *dst, NO_PROPS, None).unwrap();
}

let test_dir = TempDir::new().unwrap();
let g = g.persist_as_disk_graph(test_dir.path()).unwrap();

let windowed_graph = g.window(0, 5);
b.iter(|| local_clustering_coefficient(&windowed_graph, 1))
});

group.finish();
}

pub fn graphgen_large_clustering_coeff(c: &mut Criterion) {
let mut group = c.benchmark_group("graphgen_large_clustering_coeff");
// generate graph
let graph = Graph::new();
let seed: [u8; 32] = [1; 32];
random_attachment(&graph, 500000, 4, Some(seed));

pub fn graphgen_large_concomp(c: &mut Criterion) {
let mut group = c.benchmark_group("graphgen_large_concomp");
// generate graph
let graph = Graph::new();
let seed: [u8; 32] = [1; 32];
random_attachment(&graph, 500000, 4, Some(seed));
let test_dir = TempDir::new().unwrap();
let graph = graph.persist_as_disk_graph(test_dir.path()).unwrap();

group.sampling_mode(SamplingMode::Flat);
group.measurement_time(std::time::Duration::from_secs(60));
group.sample_size(10);
group.bench_with_input(
BenchmarkId::new("graphgen_large_concomp", &graph),
&graph,
|b, graph| {
b.iter(|| {
let result = weakly_connected_components(graph, 20, None);
black_box(result);
});
},
);
group.finish()
let test_dir = TempDir::new().unwrap();
let graph = graph.persist_as_disk_graph(test_dir.path()).unwrap();

group.sampling_mode(SamplingMode::Flat);
group.measurement_time(std::time::Duration::from_secs(60));
group.sample_size(10);
group.bench_with_input(
BenchmarkId::new("graphgen_large_clustering_coeff", &graph),
&graph,
|b, graph| {
b.iter(|| {
let result = clustering_coefficient(graph);
black_box(result);
});
},
);
group.finish()
}

pub fn graphgen_large_pagerank(c: &mut Criterion) {
let mut group = c.benchmark_group("graphgen_large_pagerank");
// generate graph
let graph = Graph::new();
let seed: [u8; 32] = [1; 32];
random_attachment(&graph, 500000, 4, Some(seed));

let test_dir = TempDir::new().unwrap();
let graph = graph.persist_as_disk_graph(test_dir.path()).unwrap();
group.sampling_mode(SamplingMode::Flat);
group.measurement_time(std::time::Duration::from_secs(20));
group.sample_size(10);
group.bench_with_input(
BenchmarkId::new("graphgen_large_pagerank", &graph),
&graph,
|b, graph| {
b.iter(|| {
let result = unweighted_page_rank(graph, Some(100), None, None, true, None);
black_box(result);
});
},
);
group.finish()
}

pub fn graphgen_large_concomp(c: &mut Criterion) {
let mut group = c.benchmark_group("graphgen_large_concomp");
// generate graph
let graph = Graph::new();
let seed: [u8; 32] = [1; 32];
random_attachment(&graph, 500000, 4, Some(seed));
let test_dir = TempDir::new().unwrap();
let graph = graph.persist_as_disk_graph(test_dir.path()).unwrap();

group.sampling_mode(SamplingMode::Flat);
group.measurement_time(std::time::Duration::from_secs(60));
group.sample_size(10);
group.bench_with_input(
BenchmarkId::new("graphgen_large_concomp", &graph),
&graph,
|b, graph| {
b.iter(|| {
let result = weakly_connected_components(graph, 20, None);
black_box(result);
});
},
);
group.finish()
}
}

#[cfg(feature = "storage")]
criterion_group!(
benches,
local_triangle_count_analysis,
local_clustering_coefficient_analysis,
graphgen_large_clustering_coeff,
graphgen_large_pagerank,
graphgen_large_concomp,
arrow_bench.local_triangle_count_analysis,
arrow_bench.local_clustering_coefficient_analysis,
arrow_bench.graphgen_large_clustering_coeff,
arrow_bench.graphgen_large_pagerank,
arrow_bench.graphgen_large_concomp,
);
#[cfg(feature = "storage")]
criterion_main!(benches);
2 changes: 1 addition & 1 deletion raphtory-benchmark/benches/edge_add.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use criterion::{criterion_group, criterion_main, Criterion};
use raphtory::{core::entities::nodes::input_node::InputNode, prelude::*};
use raphtory::prelude::*;

mod common;
use rand::{
Expand Down
Loading

0 comments on commit fd400bc

Please sign in to comment.