diff --git a/Cargo.lock b/Cargo.lock index ec0513b897..6aee3bb0d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1934,7 +1934,7 @@ dependencies = [ [[package]] name = "examples" -version = "0.9.2" +version = "0.9.3" dependencies = [ "chrono", "itertools 0.12.1", @@ -2776,7 +2776,7 @@ dependencies = [ [[package]] name = "js-raphtory" -version = "0.9.2" +version = "0.9.3" dependencies = [ "chrono", "console_error_panic_hook", @@ -3209,7 +3209,7 @@ dependencies = [ [[package]] name = "netflow_algorithm" -version = "0.9.2" +version = "0.9.3" dependencies = [ "pyo3", "pyo3-build-config", @@ -4021,7 +4021,7 @@ dependencies = [ [[package]] name = "pometry-storage" -version = "0.9.2" +version = "0.9.3" [[package]] name = "portable-atomic" @@ -4371,7 +4371,7 @@ dependencies = [ [[package]] name = "raphtory" -version = "0.9.2" +version = "0.9.3" dependencies = [ "ahash", "async-openai", @@ -4435,15 +4435,24 @@ dependencies = [ [[package]] name = "raphtory-api" -version = "0.9.2" +version = "0.9.3" dependencies = [ "chrono", + "dashmap", + "lock_api", + "parking_lot", + "pyo3", + "quickcheck 1.0.3", + "quickcheck_macros", + "rand 0.8.5", + "rayon", + "rustc-hash", "serde", ] [[package]] name = "raphtory-benchmark" -version = "0.9.2" +version = "0.9.3" dependencies = [ "chrono", "clap", @@ -4463,7 +4472,7 @@ dependencies = [ [[package]] name = "raphtory-cypher" -version = "0.9.2" +version = "0.9.3" dependencies = [ "arrow", "arrow-array", @@ -4494,7 +4503,7 @@ dependencies = [ [[package]] name = "raphtory-graphql" -version = "0.9.2" +version = "0.9.3" dependencies = [ "async-graphql", "async-graphql-poem", @@ -4519,6 +4528,7 @@ dependencies = [ "poem", "poem-openapi", "raphtory", + "raphtory-api", "reqwest", "serde", "serde_json", @@ -4537,7 +4547,7 @@ dependencies = [ [[package]] name = "raphtory-pymodule" -version = "0.9.2" +version = "0.9.3" dependencies = [ "async-graphql", "crossbeam-channel", diff --git a/Cargo.toml b/Cargo.toml index b047777aff..3a44fb3bab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ default-members = ["raphtory"] resolver = "2" [workspace.package] -version = "0.9.2" +version = "0.9.3" documentation = "https://raphtory.readthedocs.io/en/latest/" repository = "https://github.com/Raphtory/raphtory/" license = "GPL-3.0" diff --git a/pometry-storage-private b/pometry-storage-private index aa1ba92dea..00addb990e 160000 --- a/pometry-storage-private +++ b/pometry-storage-private @@ -1 +1 @@ -Subproject commit aa1ba92deae1a27d9c4dd60a3020e61ca8445c35 +Subproject commit 00addb990e0a98f91a3c1d3f0ee083a666c4bbe7 diff --git a/python/Cargo.toml b/python/Cargo.toml index 128370b445..f9e42de836 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -19,8 +19,8 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { workspace = true } -raphtory_core = { path = "../raphtory", version = "0.9.2", features = ["python", "search", "vectors"], package = "raphtory" } -raphtory-graphql = { path = "../raphtory-graphql", version = "0.9.2" } +raphtory_core = { path = "../raphtory", version = "0.9.3", features = ["python", "search", "vectors"], package = "raphtory" } +raphtory-graphql = { path = "../raphtory-graphql", version = "0.9.3" } serde_json = { workspace = true } reqwest = { workspace = true } tokio = { workspace = true } diff --git a/python/tests/test_diskgraph.py b/python/tests/test_diskgraph.py index 7319f95d0e..8dbd6e7435 100644 --- a/python/tests/test_diskgraph.py +++ b/python/tests/test_diskgraph.py @@ -4,10 +4,9 @@ import tempfile import os - def test_disk_graph(): curr_dir = os.path.dirname(os.path.abspath(__file__)) - rsc_dir = os.path.join(curr_dir, "..","..", "pometry-storage-private", "resources") + rsc_dir = os.path.join(curr_dir, "..", "..", "pometry-storage-private", "resources") rsc_dir = os.path.normpath(rsc_dir) print("rsc_dir:", rsc_dir + "/netflowsorted/nft_sorted") @@ -66,6 +65,7 @@ def test_disk_graph(): read_chunk_size, concurrent_files, num_threads, + None, print_result=False, ) @@ -91,3 +91,65 @@ def test_disk_graph(): "Page Rank", algorithms.pagerank, g.layer("netflow"), 100, print_result=False ) assert len(list(actual.get_all_with_names())) == 1624 + +def test_disk_graph_type_filter(): + curr_dir = os.path.dirname(os.path.abspath(__file__)) + rsc_dir = os.path.join(curr_dir, "..", "..", "pometry-storage-private", "resources") + rsc_dir = os.path.normpath(rsc_dir) + print("rsc_dir:", rsc_dir + "/netflowsorted/nft_sorted") + + graph_dir = tempfile.TemporaryDirectory() + layer_parquet_cols = [ + { + "parquet_dir": rsc_dir + "/netflowsorted/nft_sorted", + "layer": "netflow", + "src_col": "src", + "dst_col": "dst", + "time_col": "epoch_time", + } + ] + + chunk_size = 268_435_456 + num_threads = 4 + t_props_chunk_size = int(chunk_size / 8) + read_chunk_size = 4_000_000 + concurrent_files = 1 + + g = DiskGraph.load_from_parquets( + graph_dir.name, + layer_parquet_cols, + rsc_dir + "/netflowsorted/props/props.parquet", + chunk_size, + t_props_chunk_size, + read_chunk_size, + concurrent_files, + num_threads, + "node_type" + ) + + assert g.count_nodes() == 1619 + assert g.layer("netflow").count_edges() == 2018 + assert g.earliest_time == 7257619 + assert g.latest_time == 7343970 + + assert len(g.nodes.type_filter(["A"]).name.collect()) == 785 + assert len(g.nodes.type_filter([""]).name.collect()) == 0 + assert len(g.nodes.type_filter(["A", "B"]).name.collect()) == 1619 + + neighbor_names = g.nodes.type_filter(["A"]).neighbours.name.collect() + total_length = sum(len(names) for names in neighbor_names) + assert total_length == 2056 + + assert g.nodes.type_filter([]).name.collect() == [] + + neighbor_names = g.nodes.type_filter(["A"]).neighbours.type_filter(["B"]).name.collect() + total_length = sum(len(names) for names in neighbor_names) + assert total_length == 1023 + + assert g.node("Comp175846").neighbours.type_filter(["A"]).name.collect() == ["Comp844043"] + assert g.node("Comp175846").neighbours.type_filter(["B"]).name.collect() == [] + assert g.node("Comp175846").neighbours.type_filter([]).name.collect() == [] + assert g.node("Comp175846").neighbours.type_filter(["A", "B"]).name.collect() == ["Comp844043"] + + neighbor_names = g.node("Comp175846").neighbours.neighbours.name.collect() + assert len(neighbor_names) == 193 diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 4146a92d8a..05686db562 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -17,3 +17,19 @@ edition.workspace = true [dependencies] serde = { workspace = true, features = ["derive"] } chrono.workspace = true +dashmap = { workspace = true } +rustc-hash = { workspace = true } +lock_api = { workspace = true } +parking_lot = { workspace = true } +pyo3 = { workspace = true, optional = true } +rayon = { workspace = true } +rand = { workspace = true } +quickcheck = { workspace = true } +quickcheck_macros = { workspace = true } + +[features] +default = [] +# Enables generating the pyo3 python bindings +python = [ + "dep:pyo3", +] diff --git a/raphtory-api/src/core/storage/arc_str.rs b/raphtory-api/src/core/storage/arc_str.rs new file mode 100644 index 0000000000..d3589c21af --- /dev/null +++ b/raphtory-api/src/core/storage/arc_str.rs @@ -0,0 +1,121 @@ +use serde::{Deserialize, Serialize}; +use std::{ + borrow::Borrow, + cmp::Ordering, + fmt, + fmt::{Display, Formatter}, + ops::Deref, + sync::Arc, +}; + +#[derive(Clone, Debug, Eq, Ord, Hash, Serialize, Deserialize)] +pub struct ArcStr(pub Arc); + +impl Display for ArcStr { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(&self.0, f) + } +} + +impl>> From for ArcStr { + fn from(value: T) -> Self { + ArcStr(value.into()) + } +} + +impl From for String { + fn from(value: ArcStr) -> Self { + value.to_string() + } +} + +impl From<&ArcStr> for String { + fn from(value: &ArcStr) -> Self { + value.clone().into() + } +} + +impl Deref for ArcStr { + type Target = Arc; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Borrow for ArcStr { + #[inline] + fn borrow(&self) -> &str { + self.0.borrow() + } +} + +impl AsRef for ArcStr +where + T: ?Sized, + ::Target: AsRef, +{ + fn as_ref(&self) -> &T { + self.deref().as_ref() + } +} + +impl + ?Sized> PartialEq for ArcStr { + fn eq(&self, other: &T) -> bool { + >::borrow(self).eq(other.borrow()) + } +} + +impl> PartialOrd for ArcStr { + fn partial_cmp(&self, other: &T) -> Option { + >::borrow(self).partial_cmp(other.borrow()) + } +} + +pub trait OptionAsStr<'a> { + fn as_str(self) -> Option<&'a str>; +} + +impl<'a, O: AsRef + 'a> OptionAsStr<'a> for &'a Option { + fn as_str(self) -> Option<&'a str> { + self.as_ref().map(|s| s.as_ref()) + } +} + +impl<'a, O: AsRef + 'a> OptionAsStr<'a> for Option<&'a O> { + fn as_str(self) -> Option<&'a str> { + self.map(|s| s.as_ref()) + } +} + +#[cfg(test)] +mod test_arc_str { + use crate::core::storage::arc_str::{ArcStr, OptionAsStr}; + use std::sync::Arc; + + #[test] + fn can_compare_with_str() { + let test: ArcStr = "test".into(); + assert_eq!(test, "test"); + assert_eq!(test, "test".to_string()); + assert_eq!(test, Arc::from("test")); + assert_eq!(&test, &"test".to_string()) + } + + #[test] + fn test_option_conv() { + let test: Option = Some("test".into()); + + let opt_str = test.as_str(); + assert_eq!(opt_str, Some("test")); + + let test_ref = test.as_ref(); + let opt_str = test_ref.as_str(); + assert_eq!(opt_str, Some("test")); + + let test = Some("test".to_string()); + let opt_str = test.as_str(); + assert_eq!(opt_str, Some("test")); + } +} diff --git a/raphtory-api/src/core/storage/dict_mapper.rs b/raphtory-api/src/core/storage/dict_mapper.rs new file mode 100644 index 0000000000..e447224834 --- /dev/null +++ b/raphtory-api/src/core/storage/dict_mapper.rs @@ -0,0 +1,147 @@ +use crate::core::storage::{arc_str::ArcStr, locked_vec::ArcReadLockedVec, FxDashMap}; +use parking_lot::RwLock; +use serde::{Deserialize, Serialize}; +use std::{borrow::Borrow, hash::Hash, sync::Arc}; + +#[derive(Serialize, Deserialize, Default, Debug)] +pub struct DictMapper { + map: FxDashMap, + reverse_map: Arc>>, //FIXME: a boxcar vector would be a great fit if it was serializable... +} + +impl DictMapper { + pub fn get_or_create_id(&self, name: &Q) -> usize + where + Q: Hash + Eq + ?Sized + ToOwned + Borrow, + T: Into, + { + if let Some(existing_id) = self.map.get(name.borrow()) { + return *existing_id; + } + + let name = name.to_owned().into(); + let new_id = self.map.entry(name.clone()).or_insert_with(|| { + let mut reverse = self.reverse_map.write(); + let id = reverse.len(); + reverse.push(name); + id + }); + *new_id + } + + pub fn get_id(&self, name: &str) -> Option { + self.map.get(name).map(|id| *id) + } + + pub fn has_name(&self, id: usize) -> bool { + let guard = self.reverse_map.read(); + guard.get(id).is_some() + } + + pub fn get_name(&self, id: usize) -> ArcStr { + let guard = self.reverse_map.read(); + guard + .get(id) + .cloned() + .expect("internal ids should always be mapped to a name") + } + + pub fn get_keys(&self) -> ArcReadLockedVec { + ArcReadLockedVec { + guard: self.reverse_map.read_arc(), + } + } + + pub fn get_values(&self) -> Vec { + self.map.iter().map(|entry| *entry.value()).collect() + } + + pub fn len(&self) -> usize { + self.reverse_map.read().len() + } + + pub fn is_empty(&self) -> bool { + self.reverse_map.read().is_empty() + } +} + +#[cfg(test)] +mod test { + use std::{collections::HashMap, sync::Arc, thread}; + + use crate::core::storage::dict_mapper::DictMapper; + use quickcheck_macros::quickcheck; + use rand::seq::SliceRandom; + use rayon::prelude::*; + + use super::*; + + #[test] + fn test_dict_mapper() { + let mapper = DictMapper::default(); + assert_eq!(mapper.get_or_create_id("test"), 0); + assert_eq!(mapper.get_or_create_id("test"), 0); + assert_eq!(mapper.get_or_create_id("test2"), 1); + assert_eq!(mapper.get_or_create_id("test2"), 1); + assert_eq!(mapper.get_or_create_id("test"), 0); + } + + #[quickcheck] + fn check_dict_mapper_concurrent_write(write: Vec) -> bool { + let n = 100; + let mapper: DictMapper = DictMapper::default(); + + // create n maps from strings to ids in parallel + let res: Vec> = (0..n) + .into_par_iter() + .map(|_| { + let mut ids: HashMap = Default::default(); + let mut rng = rand::thread_rng(); + let mut write_s = write.clone(); + write_s.shuffle(&mut rng); + for s in write_s { + let id = mapper.get_or_create_id(s.as_str()); + ids.insert(s, id); + } + ids + }) + .collect(); + + // check that all maps are the same and that all strings have been assigned an id + let res_0 = &res[0]; + res[1..n].iter().all(|v| res_0 == v) && write.iter().all(|v| mapper.get_id(v).is_some()) + } + + // map 5 strings to 5 ids from 4 threads concurrently 1000 times + #[test] + fn test_dict_mapper_concurrent() { + use std::{sync::Arc, thread}; + + let mapper = Arc::new(DictMapper::default()); + let mut threads = Vec::new(); + for _ in 0..4 { + let mapper = Arc::clone(&mapper); + threads.push(thread::spawn(move || { + for _ in 0..1000 { + mapper.get_or_create_id("test"); + mapper.get_or_create_id("test2"); + mapper.get_or_create_id("test3"); + mapper.get_or_create_id("test4"); + mapper.get_or_create_id("test5"); + } + })); + } + + for thread in threads { + thread.join().unwrap(); + } + + let mut actual = vec!["test", "test2", "test3", "test4", "test5"] + .into_iter() + .map(|name| mapper.get_or_create_id(name)) + .collect::>(); + actual.sort(); + + assert_eq!(actual, vec![0, 1, 2, 3, 4]); + } +} diff --git a/raphtory-api/src/core/storage/locked_vec.rs b/raphtory-api/src/core/storage/locked_vec.rs new file mode 100644 index 0000000000..a675b594b4 --- /dev/null +++ b/raphtory-api/src/core/storage/locked_vec.rs @@ -0,0 +1,48 @@ +use crate::core::storage::ArcRwLockReadGuard; +use std::ops::Deref; + +#[derive(Debug)] +pub struct ArcReadLockedVec { + pub(crate) guard: ArcRwLockReadGuard>, +} + +impl Deref for ArcReadLockedVec { + type Target = Vec; + + #[inline] + fn deref(&self) -> &Self::Target { + self.guard.deref() + } +} + +impl IntoIterator for ArcReadLockedVec { + type Item = T; + type IntoIter = LockedIter; + + fn into_iter(self) -> Self::IntoIter { + let guard = self.guard; + let len = guard.len(); + let pos = 0; + LockedIter { guard, pos, len } + } +} + +pub struct LockedIter { + guard: ArcRwLockReadGuard>, + pos: usize, + len: usize, +} + +impl Iterator for LockedIter { + type Item = T; + + fn next(&mut self) -> Option { + if self.pos < self.len { + let next_val = Some(self.guard[self.pos].clone()); + self.pos += 1; + next_val + } else { + None + } + } +} diff --git a/raphtory-api/src/core/storage/mod.rs b/raphtory-api/src/core/storage/mod.rs index 5309fd0959..c0145de7dd 100644 --- a/raphtory-api/src/core/storage/mod.rs +++ b/raphtory-api/src/core/storage/mod.rs @@ -1 +1,12 @@ +use dashmap::DashMap; +use rustc_hash::FxHasher; +use std::hash::BuildHasherDefault; + +pub mod arc_str; +pub mod dict_mapper; +pub mod locked_vec; pub mod timeindex; + +pub type FxDashMap = DashMap>; + +pub type ArcRwLockReadGuard = lock_api::ArcRwLockReadGuard; diff --git a/raphtory-api/src/lib.rs b/raphtory-api/src/lib.rs index 5a7ca06a4f..eeaed59341 100644 --- a/raphtory-api/src/lib.rs +++ b/raphtory-api/src/lib.rs @@ -1 +1,4 @@ pub mod core; + +#[cfg(feature = "python")] +pub mod python; diff --git a/raphtory/src/python/types/arcstr.rs b/raphtory-api/src/python/mod.rs similarity index 79% rename from raphtory/src/python/types/arcstr.rs rename to raphtory-api/src/python/mod.rs index 7622e2e1a4..ccbd7bc413 100644 --- a/raphtory/src/python/types/arcstr.rs +++ b/raphtory-api/src/python/mod.rs @@ -1,5 +1,5 @@ -use crate::core::ArcStr; -use pyo3::{FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python, ToPyObject}; +use crate::core::storage::arc_str::ArcStr; +use pyo3::*; impl IntoPy for ArcStr { fn into_py(self, py: Python<'_>) -> PyObject { diff --git a/raphtory-benchmark/Cargo.toml b/raphtory-benchmark/Cargo.toml index d53dca9085..f9f773c1a4 100644 --- a/raphtory-benchmark/Cargo.toml +++ b/raphtory-benchmark/Cargo.toml @@ -8,7 +8,7 @@ edition = "2021" [dependencies] criterion = { workspace = true } raphtory = { path = "../raphtory", features = ["io"] } -raphtory-graphql = { path = "../raphtory-graphql", version = "0.9.2" } +raphtory-graphql = { path = "../raphtory-graphql", version = "0.9.3" } pometry-storage.workspace = true sorted_vector_map = { workspace = true } rand = { workspace = true } diff --git a/raphtory-benchmark/benches/base.rs b/raphtory-benchmark/benches/base.rs index fb178a534b..5968e9f44b 100644 --- a/raphtory-benchmark/benches/base.rs +++ b/raphtory-benchmark/benches/base.rs @@ -1,4 +1,5 @@ -use crate::common::{bootstrap_graph, run_analysis_benchmarks, run_large_ingestion_benchmarks}; +use crate::common::{bootstrap_graph, run_large_ingestion_benchmarks}; +use common::run_graph_ops_benches; use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use raphtory::{graph_loader::example::lotr_graph::lotr_graph, prelude::*}; @@ -21,29 +22,28 @@ pub fn base(c: &mut Criterion) { // Make an option of None run_large_ingestion_benchmarks(&mut large_group, || bootstrap_graph(10000), None); large_group.finish(); - let mut graph_group = c.benchmark_group("lotr_graph"); + let graph = lotr_graph(); - run_analysis_benchmarks(&mut graph_group, || graph.clone(), None); - graph_group.finish(); - let mut graph_window_group_100 = c.benchmark_group("lotr_graph_window_100"); - graph_window_group_100.sample_size(10); - run_analysis_benchmarks( - &mut graph_window_group_100, - || graph.window(i64::MIN, i64::MAX), - None, - ); - graph_window_group_100.finish(); - let mut graph_window_group_10 = c.benchmark_group("lotr_graph_window_10"); - let latest = graph.latest_time().expect("non-empty graph"); - let earliest = graph.earliest_time().expect("non-empty graph"); - let start = latest - (latest - earliest) / 10; - graph_window_group_10.sample_size(10); - run_analysis_benchmarks( - &mut graph_window_group_10, - || graph.window(start, latest + 1), - None, - ); - graph_window_group_10.finish(); + + let layered_graph = Graph::new(); + + for layer in (0..10).map(|i| i.to_string()) { + for edge in graph.edges() { + for t in edge.history() { + layered_graph + .add_edge( + t, + edge.src().name().clone(), + edge.dst().name().clone(), + NO_PROPS, + Some(&layer), + ) + .expect("Error: Unable to add edge"); + } + } + } + + run_graph_ops_benches(c, "lotr", graph, layered_graph) } criterion_group!(benches, base); diff --git a/raphtory-benchmark/benches/common/mod.rs b/raphtory-benchmark/benches/common/mod.rs index cd31d00285..0c98d4045b 100644 --- a/raphtory-benchmark/benches/common/mod.rs +++ b/raphtory-benchmark/benches/common/mod.rs @@ -1,14 +1,10 @@ #![allow(dead_code)] use criterion::{ - black_box, measurement::WallTime, BatchSize, Bencher, BenchmarkGroup, BenchmarkId, -}; -use rand::{distributions::Uniform, seq::*, Rng}; -use raphtory::{ - core::entities::{LayerIds, VID}, - db::api::view::StaticGraphViewOps, - prelude::*, + black_box, measurement::WallTime, BatchSize, Bencher, BenchmarkGroup, BenchmarkId, Criterion, }; +use rand::{distributions::Uniform, seq::*, Rng, SeedableRng}; +use raphtory::{db::api::view::StaticGraphViewOps, prelude::*}; use std::collections::HashSet; fn make_index_gen() -> Box> { @@ -421,20 +417,144 @@ pub fn run_analysis_benchmarks( }, ); - bench(group, "materialize", parameter, |b: &mut Bencher| { - b.iter(|| { - let mg = graph.materialize(); - black_box(mg) - }) - }); - bench( group, "max_neighbour_degree", parameter, |b: &mut Bencher| { - let v = graph.node(VID(0)).expect("graph should not be empty"); + let v = graph + .nodes() + .into_iter() + .next() + .expect("graph should not be empty"); b.iter(|| v.neighbours().degree().max()) }, ); } + +pub fn run_materialize( + group: &mut BenchmarkGroup, + make_graph: F, + parameter: Option, +) where + F: Fn() -> G, + G: StaticGraphViewOps, +{ + let graph = make_graph(); + bench(group, "materialize", parameter, |b: &mut Bencher| { + b.iter(|| { + let mg = graph.materialize(); + black_box(mg) + }) + }); +} + +pub fn run_graph_ops_benches( + c: &mut Criterion, + graph_name: &str, + graph: Graph, + layered_graph: Graph, +) { + let mut graph_group = c.benchmark_group(graph_name); + let make_graph = || graph.clone(); + run_analysis_benchmarks(&mut graph_group, make_graph, None); + graph_group.finish(); + + bench_materialise(&format!("{graph_name}_materialise"), c, make_graph); + + let group_name = format!("{graph_name}_window_100"); + let make_graph = || graph.window(i64::MIN, i64::MAX); + let mut graph_window_group_100 = c.benchmark_group(group_name); + // graph_window_group_100.sample_size(10); + run_analysis_benchmarks(&mut graph_window_group_100, make_graph, None); + graph_window_group_100.finish(); + + bench_materialise(&format!("{graph_name}_materialise"), c, make_graph); + + // graph windowed + let group_name = format!("{graph_name}_graph_window_10"); + let mut graph_window_group_10 = c.benchmark_group(group_name); + let latest = graph.latest_time().expect("non-empty graph"); + let earliest = graph.earliest_time().expect("non-empty graph"); + let start = latest - (latest - earliest) / 10; + // graph_window_group_10.sample_size(10); + let make_graph = || graph.window(start, latest + 1); + run_analysis_benchmarks(&mut graph_window_group_10, make_graph, None); + graph_window_group_10.finish(); + bench_materialise(&format!("{graph_name}_materialise"), c, make_graph); + + // subgraph + let mut rng = rand::rngs::StdRng::seed_from_u64(73); + let nodes = graph + .nodes() + .into_iter() + .choose_multiple(&mut rng, graph.count_nodes() / 10) + .into_iter() + .map(|n| n.id()) + .collect::>(); + let subgraph = graph.subgraph(nodes); + let group_name = format!("{graph_name}_subgraph_10pc"); + let mut subgraph_10 = c.benchmark_group(group_name); + // subgraph_10.sample_size(10); + + let make_graph = || subgraph.clone(); + run_analysis_benchmarks(&mut subgraph_10, make_graph, None); + subgraph_10.finish(); + bench_materialise(&format!("{graph_name}_materialise"), c, make_graph); + + // subgraph windowed + let group_name = format!("{graph_name}_subgraph_10pc_windowed"); + let mut subgraph_10_windowed = c.benchmark_group(group_name); + + let make_graph = || subgraph.window(start, latest + 1); + run_analysis_benchmarks(&mut subgraph_10_windowed, make_graph, None); + subgraph_10_windowed.finish(); + bench_materialise(&format!("{graph_name}_materialise"), c, make_graph); + + // layered graph windowed + let graph = layered_graph; + let group_name = format!("{graph_name}_graph_window_50_layered"); + let mut graph_window_layered_group_50 = c.benchmark_group(group_name); + let latest = graph.latest_time().expect("non-empty graph"); + let earliest = graph.earliest_time().expect("non-empty graph"); + let start = latest - (latest - earliest) / 2; + graph_window_layered_group_50.sample_size(10); + let make_graph = || { + graph + .window(start, latest + 1) + .layers(["0", "1", "2", "3", "4"]) + .unwrap() + }; + run_analysis_benchmarks(&mut graph_window_layered_group_50, make_graph, None); + graph_window_layered_group_50.finish(); + bench_materialise(&format!("{graph_name}_materialise"), c, make_graph); + + let graph = graph.persistent_graph(); + + let group_name = format!("{graph_name}_persistent_window_50_layered"); + let mut graph_window_layered_group_50 = c.benchmark_group(group_name); + let latest = graph.latest_time().expect("non-empty graph"); + let earliest = graph.earliest_time().expect("non-empty graph"); + let start = latest - (latest - earliest) / 2; + graph_window_layered_group_50.sample_size(10); + let make_graph = || { + graph + .window(start, latest + 1) + .layers(["0", "1", "2", "3", "4"]) + .unwrap() + }; + run_analysis_benchmarks(&mut graph_window_layered_group_50, make_graph, None); + graph_window_layered_group_50.finish(); + bench_materialise(&format!("{graph_name}_materialise"), c, make_graph); +} + +fn bench_materialise(name: &str, c: &mut Criterion, make_graph: F) +where + F: Fn() -> G, + G: StaticGraphViewOps, +{ + let mut mat_graph_group = c.benchmark_group(name); + mat_graph_group.sample_size(10); + run_materialize(&mut mat_graph_group, make_graph, None); + mat_graph_group.finish(); +} diff --git a/raphtory-benchmark/benches/graph_ops.rs b/raphtory-benchmark/benches/graph_ops.rs index c881e880ff..9aee4f630c 100644 --- a/raphtory-benchmark/benches/graph_ops.rs +++ b/raphtory-benchmark/benches/graph_ops.rs @@ -1,9 +1,7 @@ -use common::run_analysis_benchmarks; +use common::run_graph_ops_benches; use criterion::{criterion_group, criterion_main, Criterion}; -use rand::{seq::*, SeedableRng}; use raphtory::{ core::utils::hashing::calculate_hash, - db::api::view::*, graph_loader::{ example::sx_superuser_graph::{sx_superuser_file, sx_superuser_graph, TEdge}, source::csv_loader::CsvLoader, @@ -14,97 +12,11 @@ use raphtory::{ mod common; pub fn graph(c: &mut Criterion) { - let mut graph_group = c.benchmark_group("analysis_graph"); + let group_name = "analysis_graph"; let graph = sx_superuser_graph().unwrap(); - run_analysis_benchmarks(&mut graph_group, || graph.clone(), None); - graph_group.finish(); - let mut graph_window_group_100 = c.benchmark_group("analysis_graph_window_100"); - graph_window_group_100.sample_size(10); - run_analysis_benchmarks( - &mut graph_window_group_100, - || graph.window(i64::MIN, i64::MAX), - None, - ); - graph_window_group_100.finish(); + let layered_graph = layered_sx_super_user_graph(Some(10)).unwrap(); - // graph windowed - let mut graph_window_group_10 = c.benchmark_group("analysis_graph_window_10"); - let latest = graph.latest_time().expect("non-empty graph"); - let earliest = graph.earliest_time().expect("non-empty graph"); - let start = latest - (latest - earliest) / 10; - graph_window_group_10.sample_size(10); - run_analysis_benchmarks( - &mut graph_window_group_10, - || graph.window(start, latest + 1), - None, - ); - graph_window_group_10.finish(); - - // subgraph - let mut rng = rand::rngs::StdRng::seed_from_u64(73); - let nodes = graph - .nodes() - .into_iter() - .choose_multiple(&mut rng, graph.count_nodes() / 10) - .into_iter() - .map(|n| n.id()) - .collect::>(); - let subgraph = graph.subgraph(nodes); - let mut subgraph_10 = c.benchmark_group("analysis_subgraph_10pc"); - subgraph_10.sample_size(10); - - run_analysis_benchmarks(&mut subgraph_10, || subgraph.clone(), None); - subgraph_10.finish(); - - // subgraph windowed - let mut subgraph_10_windowed = c.benchmark_group("analysis_subgraph_10pc_windowed"); - subgraph_10_windowed.sample_size(10); - - run_analysis_benchmarks( - &mut subgraph_10_windowed, - || subgraph.window(start, latest + 1), - None, - ); - subgraph_10_windowed.finish(); - - // layered graph windowed - let graph = layered_sx_super_user_graph(Some(10)).unwrap(); - let mut graph_window_layered_group_50 = c.benchmark_group("analysis_graph_window_50_layered"); - let latest = graph.latest_time().expect("non-empty graph"); - let earliest = graph.earliest_time().expect("non-empty graph"); - let start = latest - (latest - earliest) / 2; - graph_window_layered_group_50.sample_size(10); - run_analysis_benchmarks( - &mut graph_window_layered_group_50, - || { - graph - .window(start, latest + 1) - .layers(["0", "1", "2", "3", "4"]) - .unwrap() - }, - None, - ); - graph_window_layered_group_50.finish(); - - let graph = graph.persistent_graph(); - - let mut graph_window_layered_group_50 = - c.benchmark_group("persistent_analysis_graph_window_50_layered"); - let latest = graph.latest_time().expect("non-empty graph"); - let earliest = graph.earliest_time().expect("non-empty graph"); - let start = latest - (latest - earliest) / 2; - graph_window_layered_group_50.sample_size(10); - run_analysis_benchmarks( - &mut graph_window_layered_group_50, - || { - graph - .window(start, latest + 1) - .layers(["0", "1", "2", "3", "4"]) - .unwrap() - }, - None, - ); - graph_window_layered_group_50.finish(); + run_graph_ops_benches(c, group_name, graph, layered_graph); } /// Load the SX SuperUser dataset into a graph and return it diff --git a/raphtory-cypher/examples/raphtory_cypher.rs b/raphtory-cypher/examples/raphtory_cypher.rs index e16c304d21..a9084dba71 100644 --- a/raphtory-cypher/examples/raphtory_cypher.rs +++ b/raphtory-cypher/examples/raphtory_cypher.rs @@ -73,6 +73,10 @@ mod cypher { #[arg(short, long)] node_props: Option, + /// Node properties to load + #[arg(short, long)] + node_type_col: Option, + /// Edge list parquet files to load as layers #[arg(short='l', last = true, value_parser = parse_key_val::)] layers: Vec<(String, ArgLayer)>, @@ -192,6 +196,7 @@ mod cypher { args.read_chunk_size, args.concurrent_files, args.num_threads, + args.node_type_col.as_deref(), ) .expect("Failed to load graph"); } diff --git a/raphtory-cypher/src/executor/table_provider/node.rs b/raphtory-cypher/src/executor/table_provider/node.rs index fbca415c88..112afb7d84 100644 --- a/raphtory-cypher/src/executor/table_provider/node.rs +++ b/raphtory-cypher/src/executor/table_provider/node.rs @@ -1,5 +1,7 @@ -use std::{any::Any, fmt::Formatter, sync::Arc}; - +use crate::{ + arrow2::{self, array::to_data, datatypes::ArrowDataType}, + executor::ExecError, +}; use arrow::datatypes::UInt64Type; use arrow_array::{make_array, Array, PrimitiveArray}; use arrow_buffer::ScalarBuffer; @@ -19,17 +21,12 @@ use datafusion::{ }, }; use futures::Stream; -use pometry_storage::properties::Properties; - +use pometry_storage::properties::ConstProps; use raphtory::{ core::entities::VID, disk_graph::{graph_impl::DiskGraph, prelude::*}, }; - -use crate::{ - arrow2::{self, array::to_data, datatypes::ArrowDataType}, - executor::ExecError, -}; +use std::{any::Any, fmt::Formatter, sync::Arc}; pub struct NodeTableProvider { graph: DiskGraph, @@ -43,9 +40,11 @@ impl NodeTableProvider { let graph = g.as_ref(); let (num_partitions, chunk_size) = graph .node_properties() + .const_props + .as_ref() .map(|properties| { - let num_partitions = properties.const_props.props().num_chunks(); - let chunk_size = properties.const_props.props().chunk_size(); + let num_partitions = properties.props().num_chunks(); + let chunk_size = properties.props().chunk_size(); (num_partitions, chunk_size) }) .unwrap_or_else(|| { @@ -54,7 +53,10 @@ impl NodeTableProvider { }); let name_dt = graph.global_ordering().data_type(); - let schema = lift_arrow_schema(name_dt.clone(), graph.node_properties())?; + let schema = lift_arrow_schema( + name_dt.clone(), + graph.node_properties().const_props.as_ref(), + )?; Ok(Self { graph: g, @@ -67,7 +69,7 @@ impl NodeTableProvider { pub fn lift_arrow_schema( gid_dt: ArrowDataType, - properties: Option<&Properties>, + properties: Option<&ConstProps>, ) -> Result { let mut fields = vec![]; @@ -79,7 +81,7 @@ pub fn lift_arrow_schema( fields.push(arrow2::datatypes::Field::new("gid", gid_dt, false)); if let Some(properties) = properties { - fields.extend_from_slice(properties.const_props.prop_dtypes()); + fields.extend_from_slice(properties.prop_dtypes()); } let dt: DataType = ArrowDataType::Struct(fields).into(); @@ -141,9 +143,11 @@ async fn produce_record_batch( let graph = g.as_ref(); let properties = graph .node_properties() + .const_props + .as_ref() .ok_or_else(|| DataFusionError::Execution("Failed to find node properties".to_string()))?; - let const_props = properties.const_props.props(); + let const_props = properties.props(); let chunk = const_props.chunk(chunk_id); diff --git a/raphtory-cypher/src/lib.rs b/raphtory-cypher/src/lib.rs index 99d0dcb72b..0939e5972f 100644 --- a/raphtory-cypher/src/lib.rs +++ b/raphtory-cypher/src/lib.rs @@ -365,6 +365,7 @@ mod cypher { None, None, 1, + None, ) .unwrap(); diff --git a/raphtory-graphql/Cargo.toml b/raphtory-graphql/Cargo.toml index 07197220a9..c4a777e537 100644 --- a/raphtory-graphql/Cargo.toml +++ b/raphtory-graphql/Cargo.toml @@ -13,7 +13,8 @@ readme.workspace = true homepage.workspace = true [dependencies] -raphtory = { path = "../raphtory", version = "0.9.2", features = ['vectors', 'search', "io"] } +raphtory = { path = "../raphtory", version = "0.9.3", features = ['vectors', 'search', "io"] } +raphtory-api = { path = "../raphtory-api", version = "0.9.3" } bincode = { workspace = true } base64 = { workspace = true } thiserror = { workspace = true } diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index c6904efa90..23a34dca06 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -14,11 +14,12 @@ use dynamic_graphql::{ }; use itertools::Itertools; use raphtory::{ - core::{utils::errors::GraphError, ArcStr, Prop}, + core::{utils::errors::GraphError, Prop}, db::api::view::MaterializedGraph, prelude::{GraphViewOps, ImportOps, NodeViewOps, PropertyAdditionOps}, search::IndexedGraph, }; +use raphtory_api::core::storage::arc_str::ArcStr; use serde_json::Value; use std::{ collections::HashMap, diff --git a/raphtory-graphql/src/server.rs b/raphtory-graphql/src/server.rs index 73e9639b5e..6e6d14416a 100644 --- a/raphtory-graphql/src/server.rs +++ b/raphtory-graphql/src/server.rs @@ -247,7 +247,7 @@ impl RaphtoryServer { let (signal_sender, signal_receiver) = mpsc::channel(1); println!("Playground: http://localhost:{port}"); - let server_task = Server::new(TcpListener::bind(format!("127.0.0.1:{port}"))) + let server_task = Server::new(TcpListener::bind(format!("0.0.0.0:{port}"))) .run_with_graceful_shutdown(app, server_termination(signal_receiver), None); let server_result = tokio::spawn(server_task); diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index eb2607d685..3034f97cd5 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -15,7 +15,7 @@ homepage.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -raphtory-api = { path = "../raphtory-api", version = "0.9.2" } +raphtory-api = { path = "../raphtory-api", version = "0.9.3" } bincode = { workspace = true } chrono = { workspace = true } itertools = { workspace = true } @@ -109,6 +109,7 @@ python = [ "dep:display-error-chain", "dep:polars-arrow", "polars-arrow?/compute", + "raphtory-api/python", "dep:kdam", "dep:rpds", ] diff --git a/raphtory/src/algorithms/motifs/local_temporal_three_node_motifs.rs b/raphtory/src/algorithms/motifs/local_temporal_three_node_motifs.rs index 369ea215e1..a4f720fdd1 100644 --- a/raphtory/src/algorithms/motifs/local_temporal_three_node_motifs.rs +++ b/raphtory/src/algorithms/motifs/local_temporal_three_node_motifs.rs @@ -228,7 +228,7 @@ where .permutations(2) .map(|e| { u.graph() - .edge(*e.get(0).unwrap(), *e.get(1).unwrap()) + .edge(*e.first().unwrap(), *e.get(1).unwrap()) .iter() .flat_map(|edge| edge.explode()) .collect::>() diff --git a/raphtory/src/core/entities/edges/edge_store.rs b/raphtory/src/core/entities/edges/edge_store.rs index 1a9cef12ca..edb1fea95b 100644 --- a/raphtory/src/core/entities/edges/edge_store.rs +++ b/raphtory/src/core/entities/edges/edge_store.rs @@ -34,9 +34,14 @@ pub struct EdgeStore { pub(crate) eid: EID, pub(crate) src: VID, pub(crate) dst: VID, - pub(crate) layers: Vec, // each layer has its own set of properties - pub(crate) additions: Vec>, - pub(crate) deletions: Vec>, + pub(crate) data: Vec, +} + +#[derive(Serialize, Deserialize, Debug, Default, PartialEq)] +pub struct EdgeData { + pub(crate) layer: EdgeLayer, + pub(crate) additions: TimeIndex, + pub(crate) deletions: TimeIndex, } #[derive(Serialize, Deserialize, Debug, Default, PartialEq)] @@ -95,32 +100,28 @@ impl EdgeStore { } pub fn internal_num_layers(&self) -> usize { - self.layers - .len() - .max(self.additions.len()) - .max(self.deletions.len()) + self.data.len() } + fn get_or_allocate_layer(&mut self, layer_id: usize) -> &mut EdgeLayer { - if self.layers.len() <= layer_id { - self.layers.resize_with(layer_id + 1, Default::default); + if self.data.len() <= layer_id { + self.data.resize_with(layer_id + 1, Default::default); } - &mut self.layers[layer_id] + &mut self.data[layer_id].layer } pub fn has_layer_inner(&self, layer_id: usize) -> bool { - self.additions - .get(layer_id) + self.get_additions(layer_id) .filter(|t_index| !t_index.is_empty()) .is_some() || self - .deletions - .get(layer_id) + .get_deletions(layer_id) .filter(|t_index| !t_index.is_empty()) .is_some() } - pub fn layer_iter(&self) -> impl Iterator + '_ { - self.layers.iter() + pub fn layer_iter(&self) -> impl Iterator + '_ { + self.data.iter() } /// Iterate over (layer_id, additions, deletions) triplets for edge @@ -148,14 +149,14 @@ impl EdgeStore { .into_dyn_boxed(), LayerIds::One(id) => Box::new(iter::once(( *id, - self.additions.get(*id).unwrap_or(&TimeIndex::Empty), - self.deletions.get(*id).unwrap_or(&TimeIndex::Empty), + self.get_additions(*id).unwrap_or(&TimeIndex::Empty), + self.get_deletions(*id).unwrap_or(&TimeIndex::Empty), ))), LayerIds::Multiple(ids) => Box::new(ids.iter().map(|id| { ( *id, - self.additions.get(*id).unwrap_or(&TimeIndex::Empty), - self.deletions.get(*id).unwrap_or(&TimeIndex::Empty), + self.get_additions(*id).unwrap_or(&TimeIndex::Empty), + self.get_deletions(*id).unwrap_or(&TimeIndex::Empty), ) })), } @@ -167,11 +168,11 @@ impl EdgeStore { ) -> BoxedLIter<'a, &TimeIndex> { match layers { LayerIds::None => iter::empty().into_dyn_boxed(), - LayerIds::All => self.additions.iter().into_dyn_boxed(), - LayerIds::One(id) => self.additions.get(*id).into_iter().into_dyn_boxed(), + LayerIds::All => self.iter_additions().into_dyn_boxed(), + LayerIds::One(id) => self.get_additions(*id).into_iter().into_dyn_boxed(), LayerIds::Multiple(ids) => ids .iter() - .flat_map(|id| self.additions.get(*id)) + .flat_map(|id| self.get_additions(*id)) .into_dyn_boxed(), } } @@ -182,21 +183,20 @@ impl EdgeStore { ) -> BoxedLIter<'a, &TimeIndex> { match layers { LayerIds::None => iter::empty().into_dyn_boxed(), - LayerIds::All => self.deletions.iter().into_dyn_boxed(), - LayerIds::One(id) => self.deletions.get(*id).into_iter().into_dyn_boxed(), + LayerIds::All => self.iter_deletions().into_dyn_boxed(), + LayerIds::One(id) => self.get_deletions(*id).into_iter().into_dyn_boxed(), LayerIds::Multiple(ids) => ids .iter() - .flat_map(|id| self.deletions.get(*id)) + .flat_map(|id| self.get_deletions(*id)) .into_dyn_boxed(), } } pub fn layer_ids_window_iter(&self, w: Range) -> impl Iterator + '_ { let layer_ids = self - .additions - .iter() + .iter_additions() .enumerate() - .zip_longest(self.deletions.iter().enumerate()) + .zip_longest(self.iter_deletions().enumerate()) .flat_map(move |e| match e { EitherOrBoth::Both((i, t1), (_, t2)) => { if t1.contains(w.clone()) || t2.contains(w.clone()) { @@ -229,14 +229,12 @@ impl EdgeStore { eid: 0.into(), src, dst, - layers: Vec::with_capacity(1), - additions: Vec::with_capacity(1), - deletions: Vec::with_capacity(1), + data: Vec::with_capacity(1), } } pub fn layer(&self, layer_id: usize) -> Option<&EdgeLayer> { - self.layers.get(layer_id) + self.data.get(layer_id).map(|data| &data.layer) } /// an edge is active in a window if it has an addition event in any of the layers @@ -244,12 +242,10 @@ impl EdgeStore { match layer_ids { LayerIds::None => false, LayerIds::All => self - .additions - .iter() + .iter_additions() .any(|t_index| t_index.contains(w.clone())), LayerIds::One(l_id) => self - .additions - .get(*l_id) + .get_additions(*l_id) .map(|t_index| t_index.contains(w)) .unwrap_or(false), LayerIds::Multiple(layers) => layers @@ -261,11 +257,11 @@ impl EdgeStore { pub fn last_deletion(&self, layer_ids: &LayerIds) -> Option { match layer_ids { LayerIds::None => None, - LayerIds::All => self.deletions.iter().flat_map(|d| d.last()).max(), - LayerIds::One(id) => self.deletions.get(*id).and_then(|t| t.last()), + LayerIds::All => self.iter_deletions().flat_map(|d| d.last()).max(), + LayerIds::One(id) => self.get_deletions(*id).and_then(|t| t.last()), LayerIds::Multiple(ids) => ids .iter() - .flat_map(|id| self.deletions.get(*id).and_then(|t| t.last())) + .flat_map(|id| self.get_deletions(*id).and_then(|t| t.last())) .max(), } } @@ -273,19 +269,19 @@ impl EdgeStore { pub fn last_addition(&self, layer_ids: &LayerIds) -> Option { match layer_ids { LayerIds::None => None, - LayerIds::All => self.additions.iter().flat_map(|d| d.last()).max(), - LayerIds::One(id) => self.additions.get(*id).and_then(|t| t.last()), + LayerIds::All => self.iter_additions().flat_map(|d| d.last()).max(), + LayerIds::One(id) => self.get_additions(*id).and_then(|t| t.last()), LayerIds::Multiple(ids) => ids .iter() - .flat_map(|id| self.additions.get(*id).and_then(|t| t.last())) + .flat_map(|id| self.get_additions(*id).and_then(|t| t.last())) .max(), } } pub fn temporal_prop_layer_inner(&self, layer_id: usize, prop_id: usize) -> Option<&TProp> { - self.layers + self.data .get(layer_id) - .and_then(|layer| layer.temporal_property(prop_id)) + .and_then(|layer| layer.layer.temporal_property(prop_id)) } pub fn layer_mut(&mut self, layer_id: usize) -> impl DerefMut + '_ { @@ -293,17 +289,17 @@ impl EdgeStore { } pub fn deletions_mut(&mut self, layer_id: usize) -> &mut TimeIndex { - if self.deletions.len() <= layer_id { - self.deletions.resize_with(layer_id + 1, Default::default); + if self.data.len() <= layer_id { + self.data.resize_with(layer_id + 1, Default::default); } - &mut self.deletions[layer_id] + &mut self.data[layer_id].deletions } pub fn additions_mut(&mut self, layer_id: usize) -> &mut TimeIndex { - if self.additions.len() <= layer_id { - self.additions.resize_with(layer_id + 1, Default::default); + if self.data.len() <= layer_id { + self.data.resize_with(layer_id + 1, Default::default); } - &mut self.additions[layer_id] + &mut self.data[layer_id].additions } pub(crate) fn temp_prop_ids( @@ -311,22 +307,39 @@ impl EdgeStore { layer_id: Option, ) -> Box + '_> { if let Some(layer_id) = layer_id { - Box::new(self.layers.get(layer_id).into_iter().flat_map(|layer| { + Box::new(self.data.get(layer_id).into_iter().flat_map(|layer| { layer + .layer .props() .into_iter() .flat_map(|props| props.temporal_prop_ids()) })) } else { Box::new( - self.layers + self.data .iter() - .flat_map(|layer| layer.props().map(|prop| prop.temporal_prop_ids())) + .flat_map(|layer| layer.layer.props().map(|prop| prop.temporal_prop_ids())) .kmerge() .dedup(), ) } } + + pub fn get_additions(&self, layer_id: usize) -> Option<&TimeIndex> { + self.data.get(layer_id).map(|data| &data.additions) + } + + pub fn get_deletions(&self, layer_id: usize) -> Option<&TimeIndex> { + self.data.get(layer_id).map(|data| &data.deletions) + } + + pub fn iter_additions(&self) -> impl Iterator> + '_ { + self.data.iter().map(|data| &data.additions) + } + + pub fn iter_deletions(&self) -> impl Iterator> + '_ { + self.data.iter().map(|data| &data.deletions) + } } impl EdgeStorageIntoOps for ArcEntry { diff --git a/raphtory/src/core/entities/graph/tgraph.rs b/raphtory/src/core/entities/graph/tgraph.rs index dcb51de815..bd71ec16ba 100644 --- a/raphtory/src/core/entities/graph/tgraph.rs +++ b/raphtory/src/core/entities/graph/tgraph.rs @@ -7,11 +7,7 @@ use crate::{ timer::{MaxCounter, MinCounter, TimeCounterTrait}, }, nodes::{input_node::InputNode, node_ref::NodeRef, node_store::NodeStore}, - properties::{ - graph_meta::GraphMeta, - props::{ArcReadLockedVec, Meta}, - tprop::TProp, - }, + properties::{graph_meta::GraphMeta, props::Meta, tprop::TProp}, LayerIds, EID, VID, }, storage::{ @@ -20,7 +16,7 @@ use crate::{ Entry, EntryMut, }, utils::errors::GraphError, - ArcStr, Direction, Prop, + Direction, Prop, }, db::api::{ storage::locked::LockedGraph, @@ -29,6 +25,7 @@ use crate::{ prelude::DeletionOps, }; use dashmap::{DashMap, DashSet}; +use raphtory_api::core::storage::{arc_str::ArcStr, locked_vec::ArcReadLockedVec, FxDashMap}; use rustc_hash::FxHasher; use serde::{Deserialize, Serialize}; use std::{ @@ -38,7 +35,6 @@ use std::{ sync::{atomic::AtomicUsize, Arc}, }; -pub(crate) type FxDashMap = DashMap>; pub(crate) type FxDashSet = DashSet>; #[derive(Serialize, Deserialize, Debug, Clone)] diff --git a/raphtory/src/core/entities/nodes/input_node.rs b/raphtory/src/core/entities/nodes/input_node.rs index 17d92f1c38..10f99ce39e 100644 --- a/raphtory/src/core/entities/nodes/input_node.rs +++ b/raphtory/src/core/entities/nodes/input_node.rs @@ -28,10 +28,8 @@ pub fn parse_u64_strict(input: &str) -> Option { if !(byte_1..=MAX_U64_BYTES[0]).contains(&first) { return None; } - } else { - if !(byte_1..=byte_9).contains(&first) { - return None; - } + } else if !(byte_1..=byte_9).contains(&first) { + return None; } let mut result = (first - byte_0) as u64; @@ -41,14 +39,12 @@ pub fn parse_u64_strict(input: &str) -> Option { return None; } check_max = next_byte == max_byte; - } else { - if !(byte_0..=byte_9).contains(&next_byte) { - return None; - } + } else if !(byte_0..=byte_9).contains(&next_byte) { + return None; } result = result * 10 + (next_byte - byte_0) as u64; } - return Some(result); + Some(result) } pub trait InputNode: Clone { diff --git a/raphtory/src/core/entities/nodes/node_store.rs b/raphtory/src/core/entities/nodes/node_store.rs index b8b5a42614..7b9ae2e969 100644 --- a/raphtory/src/core/entities/nodes/node_store.rs +++ b/raphtory/src/core/entities/nodes/node_store.rs @@ -8,7 +8,7 @@ use crate::core::{ storage::{ lazy_vec::IllegalSet, timeindex::{AsTime, TimeIndex, TimeIndexEntry}, - ArcEntry, + ArcEntry, Entry, }, utils::errors::GraphError, Direction, Prop, @@ -383,6 +383,24 @@ impl ArcEntry { } } +impl<'a> Entry<'a, NodeStore> { + pub fn into_neighbours(self, layers: &LayerIds, dir: Direction) -> LockedRefNeighboursIter<'a> { + LockedRefNeighboursIterBuilder { + entry: self, + iter_builder: |node| node.neighbours(layers, dir), + } + .build() + } + + pub fn into_edges(self, layers: &LayerIds, dir: Direction) -> LockedRefEdgesIter<'a> { + LockedRefEdgesIterBuilder { + entry: self, + iter_builder: |node| node.edge_tuples(layers, dir), + } + .build() + } +} + #[self_referencing] pub struct LockedAdjIter { entry: ArcEntry, @@ -416,6 +434,38 @@ impl Iterator for LockedNeighboursIter { } } +#[self_referencing] +pub struct LockedRefNeighboursIter<'a> { + entry: Entry<'a, NodeStore>, + #[borrows(entry)] + #[covariant] + iter: Box + Send + 'this>, +} + +impl<'a> Iterator for LockedRefNeighboursIter<'a> { + type Item = VID; + + fn next(&mut self) -> Option { + self.with_iter_mut(|iter| iter.next()) + } +} + +#[self_referencing] +pub struct LockedRefEdgesIter<'a> { + entry: Entry<'a, NodeStore>, + #[borrows(entry)] + #[covariant] + iter: Box + Send + 'this>, +} + +impl<'a> Iterator for LockedRefEdgesIter<'a> { + type Item = EdgeRef; + + fn next(&mut self) -> Option { + self.with_iter_mut(|iter| iter.next()) + } +} + pub struct LockedLayers { entry: ArcEntry, pos: usize, diff --git a/raphtory/src/core/entities/properties/graph_meta.rs b/raphtory/src/core/entities/properties/graph_meta.rs index a2eb8d506c..a04ca614c0 100644 --- a/raphtory/src/core/entities/properties/graph_meta.rs +++ b/raphtory/src/core/entities/properties/graph_meta.rs @@ -1,14 +1,11 @@ use crate::core::{ - entities::{ - graph::tgraph::FxDashMap, - properties::{ - props::{ArcReadLockedVec, DictMapper}, - tprop::TProp, - }, - }, + entities::properties::tprop::TProp, storage::{locked_view::LockedView, timeindex::TimeIndexEntry}, utils::errors::{GraphError, MutateGraphError}, - ArcStr, Prop, PropType, + Prop, PropType, +}; +use raphtory_api::core::storage::{ + arc_str::ArcStr, dict_mapper::DictMapper, locked_vec::ArcReadLockedVec, FxDashMap, }; use serde::{Deserialize, Serialize}; use std::ops::DerefMut; diff --git a/raphtory/src/core/entities/properties/props.rs b/raphtory/src/core/entities/properties/props.rs index c5495b2974..bc47b7fa9b 100644 --- a/raphtory/src/core/entities/properties/props.rs +++ b/raphtory/src/core/entities/properties/props.rs @@ -1,22 +1,23 @@ use crate::{ core::{ - entities::{graph::tgraph::FxDashMap, properties::tprop::TProp}, + entities::properties::tprop::TProp, storage::{ lazy_vec::{IllegalSet, LazyVec}, timeindex::TimeIndexEntry, }, utils::errors::GraphError, - ArcStr, Prop, PropType, + Prop, PropType, }, db::api::storage::tprop_storage_ops::TPropOps, }; use lock_api; use parking_lot::RwLock; +use raphtory_api::core::storage::{ + arc_str::ArcStr, dict_mapper::DictMapper, locked_vec::ArcReadLockedVec, +}; use serde::{Deserialize, Serialize}; use std::{borrow::Borrow, fmt::Debug, hash::Hash, ops::Deref, sync::Arc}; -type ArcRwLockReadGuard = lock_api::ArcRwLockReadGuard; - #[derive(Serialize, Deserialize, Default, Debug, PartialEq)] pub struct Props { // properties @@ -189,12 +190,12 @@ impl Meta { #[inline] pub fn get_layer_id(&self, name: &str) -> Option { - self.meta_layer.map.get(name).as_deref().copied() + self.meta_layer.get_id(name) } #[inline] pub fn get_node_type_id(&self, node_type: &str) -> Option { - self.meta_node_type.map.get(node_type).as_deref().copied() + self.meta_node_type.get_id(node_type) } pub fn get_layer_name_by_id(&self, id: usize) -> ArcStr { @@ -210,19 +211,14 @@ impl Meta { } pub fn get_all_layers(&self) -> Vec { - self.meta_layer - .map - .iter() - .map(|entry| *entry.value()) - .collect() + self.meta_layer.get_values() } pub fn get_all_node_types(&self) -> Vec { self.meta_node_type - .map + .get_keys() .iter() - .filter_map(|entry| { - let key = entry.key(); + .filter_map(|key| { if key != "_default" { Some(key.clone()) } else { @@ -249,110 +245,6 @@ impl Meta { } } -#[derive(Serialize, Deserialize, Default, Debug)] -pub struct DictMapper { - map: FxDashMap, - reverse_map: Arc>>, //FIXME: a boxcar vector would be a great fit if it was serializable... -} - -#[derive(Debug)] -pub struct ArcReadLockedVec { - guard: ArcRwLockReadGuard>, -} - -impl Deref for ArcReadLockedVec { - type Target = Vec; - - #[inline] - fn deref(&self) -> &Self::Target { - self.guard.deref() - } -} - -impl IntoIterator for ArcReadLockedVec { - type Item = T; - type IntoIter = LockedIter; - - fn into_iter(self) -> Self::IntoIter { - let guard = self.guard; - let len = guard.len(); - let pos = 0; - LockedIter { guard, pos, len } - } -} - -pub struct LockedIter { - guard: ArcRwLockReadGuard>, - pos: usize, - len: usize, -} - -impl Iterator for LockedIter { - type Item = T; - - fn next(&mut self) -> Option { - if self.pos < self.len { - let next_val = Some(self.guard[self.pos].clone()); - self.pos += 1; - next_val - } else { - None - } - } -} - -impl DictMapper { - pub fn get_or_create_id(&self, name: &Q) -> usize - where - Q: Hash + Eq + ?Sized + ToOwned + Borrow, - T: Into, - { - if let Some(existing_id) = self.map.get(name.borrow()) { - return *existing_id; - } - - let name = name.to_owned().into(); - let new_id = self.map.entry(name.clone()).or_insert_with(|| { - let mut reverse = self.reverse_map.write(); - let id = reverse.len(); - reverse.push(name); - id - }); - *new_id - } - - pub fn get_id(&self, name: &str) -> Option { - self.map.get(name).map(|id| *id) - } - - pub fn has_name(&self, id: usize) -> bool { - let guard = self.reverse_map.read(); - guard.get(id).is_some() - } - - pub fn get_name(&self, id: usize) -> ArcStr { - let guard = self.reverse_map.read(); - guard - .get(id) - .cloned() - .expect("internal ids should always be mapped to a name") - } - - pub fn get_keys(&self) -> ArcReadLockedVec { - ArcReadLockedVec { - guard: self.reverse_map.read_arc(), - } - } - - pub fn len(&self) -> usize { - self.reverse_map.read().len() - } - - pub fn is_empty(&self) -> bool { - self.reverse_map.read().is_empty() - } -} - #[derive(Default, Debug, Serialize, Deserialize)] pub struct PropMapper { id_mapper: DictMapper, @@ -426,82 +318,9 @@ impl PropMapper { #[cfg(test)] mod test { - use std::{collections::HashMap, sync::Arc, thread}; - - use quickcheck_macros::quickcheck; - use rand::seq::SliceRandom; - use rayon::prelude::*; - use super::*; - - #[test] - fn test_dict_mapper() { - let mapper = DictMapper::default(); - assert_eq!(mapper.get_or_create_id("test"), 0); - assert_eq!(mapper.get_or_create_id("test"), 0); - assert_eq!(mapper.get_or_create_id("test2"), 1); - assert_eq!(mapper.get_or_create_id("test2"), 1); - assert_eq!(mapper.get_or_create_id("test"), 0); - } - - #[quickcheck] - fn check_dict_mapper_concurrent_write(write: Vec) -> bool { - let n = 100; - let mapper: DictMapper = DictMapper::default(); - - // create n maps from strings to ids in parallel - let res: Vec> = (0..n) - .into_par_iter() - .map(|_| { - let mut ids: HashMap = Default::default(); - let mut rng = rand::thread_rng(); - let mut write_s = write.clone(); - write_s.shuffle(&mut rng); - for s in write_s { - let id = mapper.get_or_create_id(s.as_str()); - ids.insert(s, id); - } - ids - }) - .collect(); - - // check that all maps are the same and that all strings have been assigned an id - let res_0 = &res[0]; - res[1..n].iter().all(|v| res_0 == v) && write.iter().all(|v| mapper.get_id(v).is_some()) - } - - // map 5 strings to 5 ids from 4 threads concurrently 1000 times - #[test] - fn test_dict_mapper_concurrent() { - use std::{sync::Arc, thread}; - - let mapper = Arc::new(DictMapper::default()); - let mut threads = Vec::new(); - for _ in 0..4 { - let mapper = Arc::clone(&mapper); - threads.push(thread::spawn(move || { - for _ in 0..1000 { - mapper.get_or_create_id("test"); - mapper.get_or_create_id("test2"); - mapper.get_or_create_id("test3"); - mapper.get_or_create_id("test4"); - mapper.get_or_create_id("test5"); - } - })); - } - - for thread in threads { - thread.join().unwrap(); - } - - let mut actual = vec!["test", "test2", "test3", "test4", "test5"] - .into_iter() - .map(|name| mapper.get_or_create_id(name)) - .collect::>(); - actual.sort(); - - assert_eq!(actual, vec![0, 1, 2, 3, 4]); - } + use rayon::prelude::*; + use std::{sync::Arc, thread}; #[test] fn test_prop_mapper_concurrent() { diff --git a/raphtory/src/core/entities/properties/tprop.rs b/raphtory/src/core/entities/properties/tprop.rs index a5576a3451..856eb71a0c 100644 --- a/raphtory/src/core/entities/properties/tprop.rs +++ b/raphtory/src/core/entities/properties/tprop.rs @@ -1,7 +1,7 @@ use crate::{ core::{ entities::properties::tcell::TCell, storage::timeindex::TimeIndexEntry, - utils::errors::GraphError, ArcStr, DocumentInput, Prop, PropType, + utils::errors::GraphError, DocumentInput, Prop, PropType, }, db::{ api::storage::tprop_storage_ops::TPropOps, @@ -9,6 +9,7 @@ use crate::{ }, }; use chrono::{DateTime, NaiveDateTime, Utc}; +use raphtory_api::core::storage::arc_str::ArcStr; use serde::{Deserialize, Serialize}; use std::{collections::HashMap, iter, ops::Range, sync::Arc}; @@ -313,7 +314,7 @@ impl TProp { } impl<'a> TPropOps<'a> for &'a TProp { - fn last_before(self, t: i64) -> Option<(TimeIndexEntry, Prop)> { + fn last_before(&self, t: i64) -> Option<(TimeIndexEntry, Prop)> { match self { TProp::Empty => None, TProp::Str(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::Str(v.clone()))), diff --git a/raphtory/src/core/mod.rs b/raphtory/src/core/mod.rs index 2f84af6a20..0ecfb962dd 100644 --- a/raphtory/src/core/mod.rs +++ b/raphtory/src/core/mod.rs @@ -54,87 +54,8 @@ pub mod storage; pub mod utils; // this is here because Arc annoyingly doesn't implement all the expected comparisons -#[derive(Clone, Debug, Eq, Ord, Hash, Serialize, Deserialize)] -pub struct ArcStr(pub(crate) Arc); - -impl Display for ArcStr { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - Display::fmt(&self.0, f) - } -} - -impl>> From for ArcStr { - fn from(value: T) -> Self { - ArcStr(value.into()) - } -} - -impl From for String { - fn from(value: ArcStr) -> Self { - value.to_string() - } -} - -impl From<&ArcStr> for String { - fn from(value: &ArcStr) -> Self { - value.clone().into() - } -} - -impl Deref for ArcStr { - type Target = Arc; - - #[inline] - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl Borrow for ArcStr { - #[inline] - fn borrow(&self) -> &str { - self.0.borrow() - } -} - -impl AsRef for ArcStr -where - T: ?Sized, - ::Target: AsRef, -{ - fn as_ref(&self) -> &T { - self.deref().as_ref() - } -} - -impl + ?Sized> PartialEq for ArcStr { - fn eq(&self, other: &T) -> bool { - >::borrow(self).eq(other.borrow()) - } -} - -impl> PartialOrd for ArcStr { - fn partial_cmp(&self, other: &T) -> Option { - >::borrow(self).partial_cmp(other.borrow()) - } -} - -pub trait OptionAsStr<'a> { - fn as_str(self) -> Option<&'a str>; -} - -impl<'a, O: AsRef + 'a> OptionAsStr<'a> for &'a Option { - fn as_str(self) -> Option<&'a str> { - self.as_ref().map(|s| s.as_ref()) - } -} - -impl<'a, O: AsRef + 'a> OptionAsStr<'a> for Option<&'a O> { - fn as_str(self) -> Option<&'a str> { - self.map(|s| s.as_ref()) - } -} +use raphtory_api::core::storage::arc_str::ArcStr; pub use raphtory_api::core::*; #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Hash)] @@ -965,43 +886,3 @@ mod serde_value_into_prop { } } } - -#[cfg(test)] -mod test_arc_str { - use crate::core::{ArcStr, OptionAsStr, Prop}; - use std::sync::Arc; - - #[test] - fn can_compare_with_str() { - let test: ArcStr = "test".into(); - assert_eq!(test, "test"); - assert_eq!(test, "test".to_string()); - assert_eq!(test, Arc::from("test")); - assert_eq!(&test, &"test".to_string()) - } - - #[test] - fn test_option_conv() { - let test: Option = Some("test".into()); - - let test_ref = test.as_ref(); - - let opt_str = test.as_str(); - let opt_str3 = test_ref.as_str(); - - let test2 = Some("test".to_string()); - let opt_str_2 = test2.as_str(); - - assert_eq!(opt_str, Some("test")); - assert_eq!(opt_str_2, Some("test")); - assert_eq!(opt_str3, Some("test")); - } - - #[test] - fn test_prop_min_max() { - let v1 = Prop::I64(4); - let v2 = Prop::I64(2); - assert_eq!(v1.clone().max(v2.clone()), Some(Prop::I64(4))); - assert_eq!((v1.min(v2)), Some(Prop::I64(2))); - } -} diff --git a/raphtory/src/core/storage/mod.rs b/raphtory/src/core/storage/mod.rs index fdd93da811..218454320d 100644 --- a/raphtory/src/core/storage/mod.rs +++ b/raphtory/src/core/storage/mod.rs @@ -9,6 +9,7 @@ pub mod timeindex; use self::iter::Iter; use lock_api; use locked_view::LockedView; +use ouroboros::self_referencing; use parking_lot::{RwLock, RwLockReadGuard}; use rayon::prelude::*; use serde::{Deserialize, Serialize}; diff --git a/raphtory/src/core/storage/timeindex.rs b/raphtory/src/core/storage/timeindex.rs index d2473b49ac..f673eb2611 100644 --- a/raphtory/src/core/storage/timeindex.rs +++ b/raphtory/src/core/storage/timeindex.rs @@ -240,6 +240,7 @@ impl<'a, T: AsTime, Ops: TimeIndexOps, V: AsRef> + Send = LayeredTimeIndexWindow<'b, Ops::RangeType<'b>> where Self: 'b; + #[inline(always)] fn active(&self, w: Range) -> bool { self.view.as_ref().iter().any(|t| t.active(w.clone())) } @@ -425,6 +426,7 @@ where type IndexType = T; type RangeType<'a> = TimeIndexWindow<'a, T> where Self: 'a; + #[inline(always)] fn active(&self, w: Range) -> bool { match self { TimeIndexWindow::Empty => false, @@ -509,6 +511,7 @@ impl<'a, Ops: TimeIndexOps + 'a> TimeIndexOps for LayeredTimeIndexWindow<'a, Ops type IndexType = Ops::IndexType; type RangeType<'b> = LayeredTimeIndexWindow<'b, Ops::RangeType<'b>> where Self: 'b; + #[inline(always)] fn active(&self, w: Range) -> bool { self.timeindex.iter().any(|t| t.active(w.clone())) } diff --git a/raphtory/src/core/utils/errors.rs b/raphtory/src/core/utils/errors.rs index ce5fe09555..78c3ff7744 100644 --- a/raphtory/src/core/utils/errors.rs +++ b/raphtory/src/core/utils/errors.rs @@ -1,4 +1,5 @@ -use crate::core::{utils::time::error::ParseTimeError, ArcStr, Prop, PropType}; +use crate::core::{utils::time::error::ParseTimeError, Prop, PropType}; +use raphtory_api::core::storage::arc_str::ArcStr; #[cfg(feature = "search")] use tantivy; #[cfg(feature = "search")] diff --git a/raphtory/src/db/api/mutation/import_ops.rs b/raphtory/src/db/api/mutation/import_ops.rs index 8ad52c0070..eb53cc25b4 100644 --- a/raphtory/src/db/api/mutation/import_ops.rs +++ b/raphtory/src/db/api/mutation/import_ops.rs @@ -5,7 +5,6 @@ use crate::{ GraphError, GraphError::{EdgeExistsError, NodeExistsError}, }, - OptionAsStr, }, db::{ api::{ @@ -19,6 +18,7 @@ use crate::{ }, prelude::{AdditionOps, EdgeViewOps, NodeViewOps}, }; +use raphtory_api::core::storage::arc_str::OptionAsStr; use super::time_from_input; @@ -127,8 +127,14 @@ impl< return Err(NodeExistsError(node.id())); } - let node_internal = - self.resolve_node(node.id(), node.graph.core_node_entry(node.node).name()); + let node_internal = self.resolve_node( + node.id(), + node.graph + .core_node_entry(node.node) + .name() + .as_ref() + .map(|x| x.as_ref()), + ); let node_internal_type_id = self .resolve_node_type(node_internal, node.node_type().as_str()) .unwrap_or(0usize); diff --git a/raphtory/src/db/api/properties/constant_props.rs b/raphtory/src/db/api/properties/constant_props.rs index b32f31ddc5..ad11c9c061 100644 --- a/raphtory/src/db/api/properties/constant_props.rs +++ b/raphtory/src/db/api/properties/constant_props.rs @@ -1,7 +1,5 @@ -use crate::{ - core::{ArcStr, Prop}, - db::api::properties::internal::ConstPropertiesOps, -}; +use crate::{core::Prop, db::api::properties::internal::ConstPropertiesOps}; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{collections::HashMap, iter::Zip}; pub struct ConstProperties { diff --git a/raphtory/src/db/api/properties/internal.rs b/raphtory/src/db/api/properties/internal.rs index 109006bb07..51ad02c985 100644 --- a/raphtory/src/db/api/properties/internal.rs +++ b/raphtory/src/db/api/properties/internal.rs @@ -1,9 +1,10 @@ use crate::{ - core::{storage::timeindex::AsTime, ArcStr, Prop}, + core::{storage::timeindex::AsTime, Prop}, db::api::view::internal::Base, }; use chrono::{DateTime, Utc}; use enum_dispatch::enum_dispatch; +use raphtory_api::core::storage::arc_str::ArcStr; #[enum_dispatch] pub trait TemporalPropertyViewOps { diff --git a/raphtory/src/db/api/properties/props.rs b/raphtory/src/db/api/properties/props.rs index 8c1c469a06..db651b0981 100644 --- a/raphtory/src/db/api/properties/props.rs +++ b/raphtory/src/db/api/properties/props.rs @@ -1,9 +1,10 @@ use crate::{ - core::{ArcStr, Prop}, + core::Prop, db::api::properties::{ constant_props::ConstProperties, internal::*, temporal_props::TemporalProperties, }, }; +use raphtory_api::core::storage::arc_str::ArcStr; use std::collections::HashMap; /// View of the properties of an entity (graph|node|edge) diff --git a/raphtory/src/db/api/properties/temporal_props.rs b/raphtory/src/db/api/properties/temporal_props.rs index 4fe2ae219a..0d8700a48c 100644 --- a/raphtory/src/db/api/properties/temporal_props.rs +++ b/raphtory/src/db/api/properties/temporal_props.rs @@ -1,9 +1,10 @@ use crate::{ - core::{ArcStr, DocumentInput, Prop, PropUnwrap}, + core::{DocumentInput, Prop, PropUnwrap}, db::{api::properties::internal::PropertiesOps, graph::views::deletion_graph::PersistentGraph}, prelude::Graph, }; use chrono::{DateTime, NaiveDateTime, Utc}; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{ collections::{HashMap, HashSet}, iter::Zip, diff --git a/raphtory/src/db/api/state/lazy_node_state.rs b/raphtory/src/db/api/state/lazy_node_state.rs index 3027314889..8661e09dd1 100644 --- a/raphtory/src/db/api/state/lazy_node_state.rs +++ b/raphtory/src/db/api/state/lazy_node_state.rs @@ -50,7 +50,7 @@ impl< } pub fn compute(&self) -> NodeState<'graph, V, G, GH> { - let cg = self.graph.core_graph(); + let cg = self.graph.core_graph().lock(); if self.graph.nodes_filtered() || self.node_types_filter.is_some() { let keys: Vec<_> = cg .nodes_par(&self.graph, self.node_types_filter.as_ref()) @@ -92,7 +92,7 @@ impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>, V: 'graph> IntoI type IntoIter = Box + Send + 'graph>; fn into_iter(self) -> Self::IntoIter { - let cg = self.graph.core_graph(); + let cg = self.graph.core_graph().lock(); let graph = self.graph; let op = self.op; cg.clone() @@ -126,7 +126,7 @@ impl< where 'graph: 'a, { - let cg = self.graph.core_graph(); + let cg = self.graph.core_graph().lock(); cg.clone() .into_nodes_iter(&self.graph, self.node_types_filter.clone()) .map(move |vid| self.apply(&cg, &self.graph, vid)) @@ -136,14 +136,14 @@ impl< where 'graph: 'a, { - let cg = self.graph.core_graph(); + let cg = self.graph.core_graph().lock(); cg.clone() .into_nodes_par(&self.graph, self.node_types_filter.clone()) .map(move |vid| self.apply(&cg, &self.graph, vid)) } fn into_values(self) -> impl Iterator + 'graph { - let cg = self.graph.core_graph(); + let cg = self.graph.core_graph().lock(); let graph = self.graph.clone(); let op = self.op; cg.clone() @@ -152,7 +152,7 @@ impl< } fn into_par_values(self) -> impl ParallelIterator + 'graph { - let cg = self.graph.core_graph(); + let cg = self.graph.core_graph().lock(); let graph = self.graph.clone(); let op = self.op; cg.clone() @@ -171,7 +171,7 @@ impl< where 'graph: 'a, { - let cg = self.graph.core_graph(); + let cg = self.graph.core_graph().lock(); cg.clone() .into_nodes_iter(self.graph.clone(), self.node_types_filter.clone()) .map(move |n| { @@ -193,7 +193,7 @@ impl< where 'graph: 'a, { - let cg = self.graph.core_graph(); + let cg = self.graph.core_graph().lock(); cg.clone() .into_nodes_par(self.graph.clone(), self.node_types_filter.clone()) .map(move |n| { @@ -235,7 +235,8 @@ impl< return None; } if let Some(type_filter) = self.node_types_filter.as_ref() { - if !type_filter[self.graph.core_node_entry(vid).node_type_id()] { + let core_node_entry = &self.graph.core_node_entry(vid); + if !type_filter[core_node_entry.node_type_id()] { return None; } } diff --git a/raphtory/src/db/api/storage/edges/edge_entry.rs b/raphtory/src/db/api/storage/edges/edge_entry.rs index ef8b730552..b32be169a4 100644 --- a/raphtory/src/db/api/storage/edges/edge_entry.rs +++ b/raphtory/src/db/api/storage/edges/edge_entry.rs @@ -21,15 +21,18 @@ use std::ops::Range; #[derive(Debug)] pub enum EdgeStorageEntry<'a> { - Mem(Entry<'a, EdgeStore>), + Mem(&'a EdgeStore), + Unlocked(Entry<'a, EdgeStore>), #[cfg(feature = "storage")] Disk(DiskEdge<'a>), } impl<'a> EdgeStorageEntry<'a> { + #[inline] pub fn as_ref(&self) -> EdgeStorageRef { match self { EdgeStorageEntry::Mem(edge) => EdgeStorageRef::Mem(edge), + EdgeStorageEntry::Unlocked(edge) => EdgeStorageRef::Mem(edge), #[cfg(feature = "storage")] EdgeStorageEntry::Disk(edge) => EdgeStorageRef::Disk(*edge), } diff --git a/raphtory/src/db/api/storage/edges/edge_storage_ops.rs b/raphtory/src/db/api/storage/edges/edge_storage_ops.rs index ca3b1701ca..40e96574f6 100644 --- a/raphtory/src/db/api/storage/edges/edge_storage_ops.rs +++ b/raphtory/src/db/api/storage/edges/edge_storage_ops.rs @@ -42,6 +42,7 @@ impl<'a> TimeIndexOps for TimeIndexRef<'a> { type IndexType = TimeIndexEntry; type RangeType<'b> = TimeIndexRef<'b> where Self: 'b; + #[inline(always)] fn active(&self, w: Range) -> bool { match self { TimeIndexRef::Ref(t) => t.active(w), @@ -301,11 +302,11 @@ impl<'a> EdgeStorageOps<'a> for &'a EdgeStore { } fn additions(self, layer_id: usize) -> TimeIndexRef<'a> { - TimeIndexRef::Ref(self.additions.get(layer_id).unwrap_or(&TimeIndex::Empty)) + TimeIndexRef::Ref(self.get_additions(layer_id).unwrap_or(&TimeIndex::Empty)) } fn deletions(self, layer_id: usize) -> TimeIndexRef<'a> { - TimeIndexRef::Ref(self.deletions.get(layer_id).unwrap_or(&TimeIndex::Empty)) + TimeIndexRef::Ref(self.get_deletions(layer_id).unwrap_or(&TimeIndex::Empty)) } fn temporal_prop_layer(self, layer_id: usize, prop_id: usize) -> impl TPropOps<'a> + 'a { diff --git a/raphtory/src/db/api/storage/edges/edges.rs b/raphtory/src/db/api/storage/edges/edges.rs index 2f3cd6ffe2..8117610716 100644 --- a/raphtory/src/db/api/storage/edges/edges.rs +++ b/raphtory/src/db/api/storage/edges/edges.rs @@ -3,12 +3,13 @@ use crate::{ entities::{edges::edge_store::EdgeStore, LayerIds, EID}, storage::ReadLockedStorage, }, - db::api::storage::edges::edge_ref::EdgeStorageRef, + db::api::storage::nodes::unlocked::UnlockedEdges, }; #[cfg(feature = "storage")] use crate::disk_graph::storage_interface::edges_ref::DiskEdgesRef; +use super::edge_entry::EdgeStorageEntry; use crate::db::api::storage::edges::edge_storage_ops::EdgeStorageOps; #[cfg(feature = "storage")] use crate::disk_graph::storage_interface::edges::DiskEdges; @@ -33,69 +34,93 @@ impl EdgesStorage { } } -#[derive(Copy, Clone, Debug)] +#[derive(Debug)] pub enum EdgesStorageRef<'a> { Mem(&'a ReadLockedStorage), + Unlocked(UnlockedEdges<'a>), #[cfg(feature = "storage")] Disk(DiskEdgesRef<'a>), } +#[cfg(feature = "storage")] +use crate::db::api::storage::variants::storage_variants3::StorageVariants; + impl<'a> EdgesStorageRef<'a> { #[cfg(feature = "storage")] - pub fn iter(self, layers: LayerIds) -> impl Iterator> { + pub fn iter(self, layers: LayerIds) -> impl Iterator> { match self { - EdgesStorageRef::Mem(storage) => Either::Left( + EdgesStorageRef::Mem(storage) => StorageVariants::Mem( storage .iter() .filter(move |e| e.has_layer(&layers)) - .map(EdgeStorageRef::Mem), + .map(EdgeStorageEntry::Mem), + ), + EdgesStorageRef::Unlocked(edges) => StorageVariants::Unlocked( + edges + .iter() + .filter(move |e| e.has_layer(&layers)) + .map(EdgeStorageEntry::Unlocked), ), EdgesStorageRef::Disk(storage) => { - Either::Right(storage.iter(layers).map(EdgeStorageRef::Disk)) + StorageVariants::Disk(storage.iter(layers).map(EdgeStorageEntry::Disk)) } } } #[cfg(not(feature = "storage"))] - pub fn iter(self, layers: LayerIds) -> impl Iterator> { + pub fn iter(self, layers: LayerIds) -> impl Iterator> { match self { - EdgesStorageRef::Mem(storage) => { - Either::<_, std::iter::Empty>>::Left( - storage - .iter() - .filter(move |e| e.has_layer(&layers)) - .map(EdgeStorageRef::Mem), - ) - } + EdgesStorageRef::Mem(storage) => Either::Left( + storage + .iter() + .filter(move |e| e.has_layer(&layers)) + .map(EdgeStorageEntry::Mem), + ), + EdgesStorageRef::Unlocked(edges) => Either::Right( + edges + .iter() + .filter(move |e| e.has_layer(&layers)) + .map(EdgeStorageEntry::Unlocked), + ), } } #[cfg(feature = "storage")] - pub fn par_iter(self, layers: LayerIds) -> impl ParallelIterator> { + pub fn par_iter(self, layers: LayerIds) -> impl ParallelIterator> { match self { - EdgesStorageRef::Mem(storage) => Either::Left( + EdgesStorageRef::Mem(storage) => StorageVariants::Mem( storage .par_iter() .filter(move |e| e.has_layer(&layers)) - .map(EdgeStorageRef::Mem), + .map(EdgeStorageEntry::Mem), + ), + EdgesStorageRef::Unlocked(edges) => StorageVariants::Unlocked( + edges + .par_iter() + .filter(move |e| e.has_layer(&layers)) + .map(EdgeStorageEntry::Unlocked), ), EdgesStorageRef::Disk(storage) => { - Either::Right(storage.par_iter(layers).map(EdgeStorageRef::Disk)) + StorageVariants::Disk(storage.par_iter(layers).map(EdgeStorageEntry::Disk)) } } } #[cfg(not(feature = "storage"))] - pub fn par_iter(self, layers: LayerIds) -> impl ParallelIterator> { + pub fn par_iter(self, layers: LayerIds) -> impl ParallelIterator> { match self { - EdgesStorageRef::Mem(storage) => { - Either::<_, rayon::iter::Empty>>::Left( - storage - .par_iter() - .filter(move |e| e.has_layer(&layers)) - .map(EdgeStorageRef::Mem), - ) - } + EdgesStorageRef::Mem(storage) => Either::Left( + storage + .par_iter() + .filter(move |e| e.has_layer(&layers)) + .map(EdgeStorageEntry::Mem), + ), + EdgesStorageRef::Unlocked(edges) => Either::Right( + edges + .par_iter() + .filter(move |e| e.has_layer(&layers)) + .map(EdgeStorageEntry::Unlocked), + ), } } @@ -107,6 +132,11 @@ impl<'a> EdgesStorageRef<'a> { LayerIds::All => storage.len(), _ => storage.par_iter().filter(|e| e.has_layer(layers)).count(), }, + EdgesStorageRef::Unlocked(edges) => match layers { + LayerIds::None => 0, + LayerIds::All => edges.len(), + _ => edges.par_iter().filter(|e| e.has_layer(layers)).count(), + }, #[cfg(feature = "storage")] EdgesStorageRef::Disk(storage) => storage.count(layers), } diff --git a/raphtory/src/db/api/storage/nodes/mod.rs b/raphtory/src/db/api/storage/nodes/mod.rs index 26081101c8..015bd0ee05 100644 --- a/raphtory/src/db/api/storage/nodes/mod.rs +++ b/raphtory/src/db/api/storage/nodes/mod.rs @@ -4,3 +4,4 @@ pub mod node_ref; pub mod node_storage_ops; pub mod nodes; pub mod nodes_ref; +pub mod unlocked; diff --git a/raphtory/src/db/api/storage/nodes/node_entry.rs b/raphtory/src/db/api/storage/nodes/node_entry.rs index 1ff55de43d..2ab6debb08 100644 --- a/raphtory/src/db/api/storage/nodes/node_entry.rs +++ b/raphtory/src/db/api/storage/nodes/node_entry.rs @@ -1,5 +1,5 @@ -#[cfg(feature = "storage")] -use crate::db::api::storage::variants::storage_variants::StorageVariants; +use std::borrow::Cow; + #[cfg(feature = "storage")] use crate::disk_graph::storage_interface::node::DiskNode; use crate::{ @@ -12,21 +12,44 @@ use crate::{ storage::{ nodes::{node_ref::NodeStorageRef, node_storage_ops::NodeStorageOps}, tprop_storage_ops::TPropOps, + variants::storage_variants3::StorageVariants, }, view::internal::NodeAdditions, }, }; pub enum NodeStorageEntry<'a> { - Mem(Entry<'a, NodeStore>), + Mem(&'a NodeStore), + Unlocked(Entry<'a, NodeStore>), #[cfg(feature = "storage")] Disk(DiskNode<'a>), } +impl<'a> From<&'a NodeStore> for NodeStorageEntry<'a> { + fn from(value: &'a NodeStore) -> Self { + NodeStorageEntry::Mem(value) + } +} + +impl<'a> From> for NodeStorageEntry<'a> { + fn from(value: Entry<'a, NodeStore>) -> Self { + NodeStorageEntry::Unlocked(value) + } +} + +#[cfg(feature = "storage")] +impl<'a> From> for NodeStorageEntry<'a> { + fn from(value: DiskNode<'a>) -> Self { + NodeStorageEntry::Disk(value) + } +} + +#[cfg(feature = "storage")] macro_rules! for_all { ($value:expr, $pattern:pat => $result:expr) => { match $value { NodeStorageEntry::Mem($pattern) => $result, + NodeStorageEntry::Unlocked($pattern) => $result, #[cfg(feature = "storage")] NodeStorageEntry::Disk($pattern) => $result, } @@ -38,24 +61,18 @@ macro_rules! for_all_iter { ($value:expr, $pattern:pat => $result:expr) => {{ match $value { NodeStorageEntry::Mem($pattern) => StorageVariants::Mem($result), + NodeStorageEntry::Unlocked($pattern) => StorageVariants::Unlocked($result), NodeStorageEntry::Disk($pattern) => StorageVariants::Disk($result), } }}; } -#[cfg(not(feature = "storage"))] -macro_rules! for_all_iter { - ($value:expr, $pattern:pat => $result:expr) => {{ - match $value { - NodeStorageEntry::Mem($pattern) => $result, - } - }}; -} - impl<'a> NodeStorageEntry<'a> { + #[inline] pub fn as_ref(&self) -> NodeStorageRef { match self { NodeStorageEntry::Mem(entry) => NodeStorageRef::Mem(entry), + NodeStorageEntry::Unlocked(entry) => NodeStorageRef::Mem(entry), #[cfg(feature = "storage")] NodeStorageEntry::Disk(node) => NodeStorageRef::Disk(*node), } @@ -68,17 +85,34 @@ impl<'a, 'b: 'a> From<&'a NodeStorageEntry<'b>> for NodeStorageRef<'a> { } } +impl<'b> NodeStorageEntry<'b> { + pub fn into_edges_iter( + self, + layers: &'b LayerIds, + dir: Direction, + ) -> impl Iterator + '_ { + match self { + NodeStorageEntry::Mem(entry) => StorageVariants::Mem(entry.edges_iter(layers, dir)), + NodeStorageEntry::Unlocked(entry) => { + StorageVariants::Unlocked(entry.into_edges_iter(layers, dir)) + } + #[cfg(feature = "storage")] + NodeStorageEntry::Disk(node) => StorageVariants::Disk(node.edges_iter(layers, dir)), + } + } +} + impl<'a, 'b: 'a> NodeStorageOps<'a> for &'a NodeStorageEntry<'b> { fn degree(self, layers: &LayerIds, dir: Direction) -> usize { - for_all!(self, node => node.degree(layers, dir)) + self.as_ref().degree(layers, dir) } fn additions(self) -> NodeAdditions<'a> { - for_all!(self, node => node.additions()) + self.as_ref().additions() } fn tprop(self, prop_id: usize) -> impl TPropOps<'a> { - for_all_iter!(self, node => node.tprop(prop_id)) + self.as_ref().tprop(prop_id) } fn edges_iter( @@ -86,26 +120,26 @@ impl<'a, 'b: 'a> NodeStorageOps<'a> for &'a NodeStorageEntry<'b> { layers: &'a LayerIds, dir: Direction, ) -> impl Iterator + 'a { - for_all_iter!(self, node => node.edges_iter(layers, dir)) + self.as_ref().edges_iter(layers, dir) } fn node_type_id(self) -> usize { - for_all!(self, node => node.node_type_id()) + self.as_ref().node_type_id() } fn vid(self) -> VID { - for_all!(self, node => node.vid()) + self.as_ref().vid() } fn id(self) -> u64 { - for_all!(self, node => node.id()) + self.as_ref().id() } - fn name(self) -> Option<&'a str> { - for_all!(self, node => node.name()) + fn name(self) -> Option> { + self.as_ref().name() } fn find_edge(self, dst: VID, layer_ids: &LayerIds) -> Option { - for_all!(self, node => node.find_edge(dst, layer_ids)) + self.as_ref().find_edge(dst, layer_ids) } } diff --git a/raphtory/src/db/api/storage/nodes/node_owned_entry.rs b/raphtory/src/db/api/storage/nodes/node_owned_entry.rs index 64a8bdd6b5..70b16b6b27 100644 --- a/raphtory/src/db/api/storage/nodes/node_owned_entry.rs +++ b/raphtory/src/db/api/storage/nodes/node_owned_entry.rs @@ -1,23 +1,16 @@ #[cfg(feature = "storage")] -use crate::db::api::storage::variants::storage_variants::StorageVariants; -#[cfg(feature = "storage")] use crate::disk_graph::storage_interface::node::DiskOwnedNode; + +#[cfg(feature = "storage")] +use either::Either; + use crate::{ core::{ - entities::{edges::edge_ref::EdgeRef, nodes::node_store::NodeStore, LayerIds, VID}, + entities::{edges::edge_ref::EdgeRef, nodes::node_store::NodeStore, LayerIds}, storage::ArcEntry, Direction, }, - db::api::{ - storage::{ - nodes::{ - node_ref::NodeStorageRef, - node_storage_ops::{NodeStorageIntoOps, NodeStorageOps}, - }, - tprop_storage_ops::TPropOps, - }, - view::internal::NodeAdditions, - }, + db::api::storage::nodes::node_storage_ops::NodeStorageIntoOps, }; pub enum NodeOwnedEntry { @@ -26,32 +19,12 @@ pub enum NodeOwnedEntry { Disk(DiskOwnedNode), } -impl NodeOwnedEntry { - pub fn as_ref(&self) -> NodeStorageRef { - match self { - NodeOwnedEntry::Mem(entry) => NodeStorageRef::Mem(entry), - #[cfg(feature = "storage")] - NodeOwnedEntry::Disk(entry) => NodeStorageRef::Disk(entry.as_ref()), - } - } -} - -macro_rules! for_all { - ($value:expr, $pattern:pat => $result:expr) => { - match $value { - NodeOwnedEntry::Mem($pattern) => $result, - #[cfg(feature = "storage")] - NodeOwnedEntry::Disk($pattern) => $result, - } - }; -} - #[cfg(feature = "storage")] macro_rules! for_all_iter { ($value:expr, $pattern:pat => $result:expr) => {{ match $value { - NodeOwnedEntry::Mem($pattern) => StorageVariants::Mem($result), - NodeOwnedEntry::Disk($pattern) => StorageVariants::Disk($result), + NodeOwnedEntry::Mem($pattern) => Either::Left($result), + NodeOwnedEntry::Disk($pattern) => Either::Right($result), } }}; } @@ -65,54 +38,8 @@ macro_rules! for_all_iter { }}; } -impl<'a> NodeStorageOps<'a> for &'a NodeOwnedEntry { - fn degree(self, layers: &LayerIds, dir: Direction) -> usize { - for_all!(self, node => node.degree(layers, dir)) - } - - fn additions(self) -> NodeAdditions<'a> { - for_all!(self, node => node.additions()) - } - - fn tprop(self, prop_id: usize) -> impl TPropOps<'a> { - for_all_iter!(self, node => node.tprop(prop_id)) - } - - fn edges_iter( - self, - layers: &'a LayerIds, - dir: Direction, - ) -> impl Iterator + 'a { - for_all_iter!(self, node => node.edges_iter(layers, dir)) - } - - fn node_type_id(self) -> usize { - for_all!(self, node => node.node_type_id()) - } - - fn vid(self) -> VID { - for_all!(self, node => node.vid()) - } - - fn id(self) -> u64 { - for_all!(self, node => node.id()) - } - - fn name(self) -> Option<&'a str> { - for_all!(self, node => node.name()) - } - - fn find_edge(self, dst: VID, layer_ids: &LayerIds) -> Option { - for_all!(self, node => node.find_edge(dst, layer_ids)) - } -} - impl NodeStorageIntoOps for NodeOwnedEntry { fn into_edges_iter(self, layers: LayerIds, dir: Direction) -> impl Iterator { for_all_iter!(self, node => node.into_edges_iter(layers, dir)) } - - fn into_neighbours_iter(self, layers: LayerIds, dir: Direction) -> impl Iterator { - for_all_iter!(self, node => node.into_neighbours_iter(layers, dir)) - } } diff --git a/raphtory/src/db/api/storage/nodes/node_ref.rs b/raphtory/src/db/api/storage/nodes/node_ref.rs index 410cc9028f..7326322c64 100644 --- a/raphtory/src/db/api/storage/nodes/node_ref.rs +++ b/raphtory/src/db/api/storage/nodes/node_ref.rs @@ -1,5 +1,8 @@ +use std::borrow::Cow; + #[cfg(feature = "storage")] use crate::db::api::storage::variants::storage_variants::StorageVariants; + #[cfg(feature = "storage")] use crate::disk_graph::storage_interface::node::DiskNode; use crate::{ @@ -95,7 +98,7 @@ impl<'a> NodeStorageOps<'a> for NodeStorageRef<'a> { for_all!(self, node => node.id()) } - fn name(self) -> Option<&'a str> { + fn name(self) -> Option> { for_all!(self, node => node.name()) } diff --git a/raphtory/src/db/api/storage/nodes/node_storage_ops.rs b/raphtory/src/db/api/storage/nodes/node_storage_ops.rs index 14bb843e67..305d64adab 100644 --- a/raphtory/src/db/api/storage/nodes/node_storage_ops.rs +++ b/raphtory/src/db/api/storage/nodes/node_storage_ops.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + use crate::{ core::{ entities::{ @@ -5,11 +7,12 @@ use crate::{ LayerIds, VID, }, storage::ArcEntry, - Direction, OptionAsStr, + Direction, }, db::api::{storage::tprop_storage_ops::TPropOps, view::internal::NodeAdditions}, }; use itertools::Itertools; +use raphtory_api::core::storage::arc_str::OptionAsStr; pub trait NodeStorageOps<'a>: Sized { fn degree(self, layers: &LayerIds, dir: Direction) -> usize; @@ -27,7 +30,7 @@ pub trait NodeStorageOps<'a>: Sized { fn id(self) -> u64; - fn name(self) -> Option<&'a str>; + fn name(self) -> Option>; fn find_edge(self, dst: VID, layer_ids: &LayerIds) -> Option; } @@ -65,8 +68,8 @@ impl<'a> NodeStorageOps<'a> for &'a NodeStore { self.global_id } - fn name(self) -> Option<&'a str> { - self.name.as_str() + fn name(self) -> Option> { + self.name.as_str().map(Cow::from) } fn find_edge(self, dst: VID, layer_ids: &LayerIds) -> Option { diff --git a/raphtory/src/db/api/storage/nodes/nodes.rs b/raphtory/src/db/api/storage/nodes/nodes.rs index dacf9d8e1e..9699297401 100644 --- a/raphtory/src/db/api/storage/nodes/nodes.rs +++ b/raphtory/src/db/api/storage/nodes/nodes.rs @@ -5,10 +5,12 @@ use crate::{ entities::{nodes::node_store::NodeStore, VID}, storage::ReadLockedStorage, }, - db::api::storage::nodes::{node_ref::NodeStorageRef, nodes_ref::NodesStorageRef}, + db::api::storage::nodes::nodes_ref::NodesStorageEntry, }; use std::sync::Arc; +use super::node_ref::NodeStorageRef; + pub enum NodesStorage { Mem(Arc>), #[cfg(feature = "storage")] @@ -16,19 +18,23 @@ pub enum NodesStorage { } impl NodesStorage { - pub fn as_ref(&self) -> NodesStorageRef { + pub fn as_ref(&self) -> NodesStorageEntry { match self { - NodesStorage::Mem(storage) => NodesStorageRef::Mem(storage), + NodesStorage::Mem(storage) => NodesStorageEntry::Mem(storage), #[cfg(feature = "storage")] - NodesStorage::Disk(storage) => NodesStorageRef::Disk(storage.as_ref()), + NodesStorage::Disk(storage) => NodesStorageEntry::Disk(storage.as_ref()), } } - pub fn node_ref(&self, vid: VID) -> NodeStorageRef { + pub fn node_entry(&self, vid: VID) -> NodeStorageRef { match self { NodesStorage::Mem(storage) => NodeStorageRef::Mem(storage.get(vid)), #[cfg(feature = "storage")] NodesStorage::Disk(storage) => NodeStorageRef::Disk(storage.node(vid)), } } + + pub fn len(&self) -> usize { + self.as_ref().len() + } } diff --git a/raphtory/src/db/api/storage/nodes/nodes_ref.rs b/raphtory/src/db/api/storage/nodes/nodes_ref.rs index ca2ca2f8f6..47abbadf3f 100644 --- a/raphtory/src/db/api/storage/nodes/nodes_ref.rs +++ b/raphtory/src/db/api/storage/nodes/nodes_ref.rs @@ -1,19 +1,23 @@ #[cfg(feature = "storage")] -use crate::db::api::storage::variants::storage_variants::StorageVariants; +use crate::db::api::storage::variants::storage_variants3::StorageVariants; #[cfg(feature = "storage")] use crate::disk_graph::storage_interface::nodes_ref::DiskNodesRef; -use crate::{ - core::{ - entities::{nodes::node_store::NodeStore, VID}, - storage::ReadLockedStorage, - }, - db::api::storage::nodes::node_ref::NodeStorageRef, + +#[cfg(not(feature = "storage"))] +use either::Either; + +use crate::core::{ + entities::{nodes::node_store::NodeStore, VID}, + storage::ReadLockedStorage, }; use rayon::iter::ParallelIterator; -#[derive(Copy, Clone, Debug)] -pub enum NodesStorageRef<'a> { +use super::node_ref::NodeStorageRef; + +#[derive(Debug)] +pub enum NodesStorageEntry<'a> { Mem(&'a ReadLockedStorage), + Unlocked(ReadLockedStorage), #[cfg(feature = "storage")] Disk(DiskNodesRef<'a>), } @@ -22,8 +26,9 @@ pub enum NodesStorageRef<'a> { macro_rules! for_all_variants { ($value:expr, $pattern:pat => $result:expr) => { match $value { - NodesStorageRef::Mem($pattern) => StorageVariants::Mem($result), - NodesStorageRef::Disk($pattern) => StorageVariants::Disk($result), + NodesStorageEntry::Mem($pattern) => StorageVariants::Mem($result), + NodesStorageEntry::Unlocked($pattern) => StorageVariants::Unlocked($result), + NodesStorageEntry::Disk($pattern) => StorageVariants::Disk($result), } }; } @@ -32,32 +37,36 @@ macro_rules! for_all_variants { macro_rules! for_all_variants { ($value:expr, $pattern:pat => $result:expr) => { match $value { - NodesStorageRef::Mem($pattern) => $result, + NodesStorageEntry::Mem($pattern) => Either::Left($result), + NodesStorageEntry::Unlocked($pattern) => Either::Right($result), } }; } -impl<'a> NodesStorageRef<'a> { - pub fn node(self, vid: VID) -> NodeStorageRef<'a> { +impl<'a> NodesStorageEntry<'a> { + pub fn node(&self, vid: VID) -> NodeStorageRef<'_> { match self { - NodesStorageRef::Mem(store) => NodeStorageRef::Mem(store.get(vid)), + NodesStorageEntry::Mem(store) => NodeStorageRef::Mem(store.get(vid)), + NodesStorageEntry::Unlocked(store) => NodeStorageRef::Mem(store.get(vid)), #[cfg(feature = "storage")] - NodesStorageRef::Disk(store) => NodeStorageRef::Disk(store.node(vid)), + NodesStorageEntry::Disk(store) => NodeStorageRef::Disk(store.node(vid)), } } pub fn len(&self) -> usize { match self { - NodesStorageRef::Mem(store) => store.len(), + NodesStorageEntry::Mem(store) => store.len(), + NodesStorageEntry::Unlocked(store) => store.len(), #[cfg(feature = "storage")] - NodesStorageRef::Disk(store) => store.len(), + NodesStorageEntry::Disk(store) => store.len(), } } - pub fn par_iter(self) -> impl ParallelIterator> { + + pub fn par_iter(&self) -> impl ParallelIterator> { for_all_variants!(self, nodes => nodes.par_iter().map(|n| n.into())) } - pub fn iter(self) -> impl Iterator> { + pub fn iter(&self) -> impl Iterator> { for_all_variants!(self, nodes => nodes.iter().map(|n| n.into())) } } diff --git a/raphtory/src/db/api/storage/nodes/unlocked.rs b/raphtory/src/db/api/storage/nodes/unlocked.rs new file mode 100644 index 0000000000..2217dcce21 --- /dev/null +++ b/raphtory/src/db/api/storage/nodes/unlocked.rs @@ -0,0 +1,115 @@ +use crate::core::{ + entities::{ + edges::edge_store::EdgeStore, graph::tgraph::InternalGraph, nodes::node_store::NodeStore, + LayerIds, + }, + storage::{ArcEntry, Entry}, +}; +use ouroboros::self_referencing; +use raphtory_api::core::{ + entities::{edges::edge_ref::EdgeRef, EID, VID}, + Direction, +}; +use rayon::prelude::*; + +impl<'a> Entry<'a, NodeStore> { + pub fn into_edges_iter( + self, + layers: &'a LayerIds, + dir: Direction, + ) -> impl Iterator + 'a { + LockedEdgesRefIterBuilder { + entry: self, + iter_builder: |node| Box::new(node.edge_tuples(layers, dir)), + } + .build() + } +} + +#[self_referencing] +pub struct LockedEdgesRefIter<'a> { + entry: Entry<'a, NodeStore>, + #[borrows(entry)] + #[covariant] + iter: Box + Send + 'this>, +} + +impl<'a> Iterator for LockedEdgesRefIter<'a> { + type Item = EdgeRef; + + fn next(&mut self) -> Option { + self.with_iter_mut(|iter| iter.next()) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct UnlockedNodes<'a>(pub &'a InternalGraph); + +impl<'a> UnlockedNodes<'a> { + pub fn len(self) -> usize { + self.0.inner().storage.nodes.len() + } + + pub fn node(&self, vid: VID) -> Entry<'a, NodeStore> { + self.0.inner().storage.nodes.entry(vid) + } + + pub fn iter(self) -> impl Iterator> + 'a { + let storage = &self.0.inner().storage.nodes; + (0..storage.len()).map(VID).map(|vid| storage.entry(vid)) + } + + pub fn par_iter(self) -> impl ParallelIterator> + 'a { + let storage = &self.0.inner().storage.nodes; + (0..storage.len()) + .into_par_iter() + .map(VID) + .map(|vid| storage.entry(vid)) + } +} + +#[derive(Debug, Clone)] +pub struct UnlockedOwnedNode { + g: InternalGraph, + vid: VID, +} + +impl UnlockedOwnedNode { + pub fn new(g: InternalGraph, vid: VID) -> Self { + Self { g, vid } + } + + pub fn arc_node(&self) -> ArcEntry { + self.g.inner().storage.nodes.entry_arc(self.vid) + } + + pub fn into_edges_iter( + self, + layers: LayerIds, + dir: Direction, + ) -> impl Iterator { + self.arc_node().into_edges(&layers, dir) + } +} + +#[derive(Copy, Clone, Debug)] +pub struct UnlockedEdges<'a>(pub &'a InternalGraph); + +impl<'a> UnlockedEdges<'a> { + pub fn iter(self) -> impl Iterator> + 'a { + let storage = &self.0.inner().storage.edges; + (0..storage.len()).map(EID).map(|eid| storage.entry(eid)) + } + + pub fn par_iter(self) -> impl ParallelIterator> + 'a { + let storage = &self.0.inner().storage.edges; + (0..storage.len()) + .into_par_iter() + .map(EID) + .map(|eid| storage.entry(eid)) + } + + pub fn len(self) -> usize { + self.0.inner().storage.edges.len() + } +} diff --git a/raphtory/src/db/api/storage/storage_ops.rs b/raphtory/src/db/api/storage/storage_ops.rs index d36019dee3..9001307010 100644 --- a/raphtory/src/db/api/storage/storage_ops.rs +++ b/raphtory/src/db/api/storage/storage_ops.rs @@ -1,6 +1,6 @@ use crate::{ core::{ - entities::{edges::edge_ref::EdgeRef, LayerIds, EID, VID}, + entities::{edges::edge_ref::EdgeRef, graph::tgraph::InternalGraph, LayerIds, EID, VID}, Direction, }, db::api::{ @@ -13,17 +13,13 @@ use crate::{ locked::LockedGraph, nodes::{ node_owned_entry::NodeOwnedEntry, - node_ref::NodeStorageRef, node_storage_ops::{NodeStorageIntoOps, NodeStorageOps}, nodes::NodesStorage, - nodes_ref::NodesStorageRef, + nodes_ref::NodesStorageEntry, }, variants::filter_variants::FilterVariants, }, - view::{ - internal::{FilterOps, FilterState, NodeList}, - IntoDynBoxed, - }, + view::internal::{FilterOps, FilterState, NodeList}, }, prelude::GraphViewOps, }; @@ -45,41 +41,65 @@ use crate::{ #[cfg(feature = "storage")] use pometry_storage::graph::TemporalGraph; +use super::{ + edges::edge_entry::EdgeStorageEntry, + nodes::{node_entry::NodeStorageEntry, unlocked::UnlockedEdges}, +}; + #[derive(Debug, Clone)] pub enum GraphStorage { Mem(LockedGraph), + Unlocked(InternalGraph), #[cfg(feature = "storage")] Disk(Arc), } impl GraphStorage { - pub fn nodes(&self) -> NodesStorageRef { + pub fn lock(self) -> Self { match self { - GraphStorage::Mem(storage) => NodesStorageRef::Mem(&storage.nodes), + GraphStorage::Unlocked(storage) => GraphStorage::Mem(storage.lock()), + _ => self, + } + } + + pub fn nodes(&self) -> NodesStorageEntry { + match self { + GraphStorage::Mem(storage) => NodesStorageEntry::Mem(&storage.nodes), + GraphStorage::Unlocked(storage) => { + NodesStorageEntry::Unlocked(storage.inner().storage.nodes.read_lock()) + } #[cfg(feature = "storage")] - GraphStorage::Disk(storage) => NodesStorageRef::Disk(DiskNodesRef::new(storage)), + GraphStorage::Disk(storage) => NodesStorageEntry::Disk(DiskNodesRef::new(storage)), } } pub fn owned_nodes(&self) -> NodesStorage { match self { GraphStorage::Mem(storage) => NodesStorage::Mem(storage.nodes.clone()), + GraphStorage::Unlocked(storage) => NodesStorage::Mem(storage.lock().nodes.clone()), #[cfg(feature = "storage")] GraphStorage::Disk(storage) => NodesStorage::Disk(DiskNodesOwned::new(storage.clone())), } } - pub fn node(&self, vid: VID) -> NodeStorageRef { + #[inline(always)] + pub fn node(&self, vid: VID) -> NodeStorageEntry { match self { - GraphStorage::Mem(storage) => NodeStorageRef::Mem(storage.nodes.get(vid)), + GraphStorage::Mem(storage) => NodeStorageEntry::Mem(storage.nodes.get(vid)), + GraphStorage::Unlocked(storage) => { + NodeStorageEntry::Unlocked(storage.inner().node_entry(vid)) + } #[cfg(feature = "storage")] - GraphStorage::Disk(storage) => NodeStorageRef::Disk(DiskNode::new(storage, vid)), + GraphStorage::Disk(storage) => NodeStorageEntry::Disk(DiskNode::new(storage, vid)), } } pub fn owned_node(&self, vid: VID) -> NodeOwnedEntry { match self { GraphStorage::Mem(storage) => NodeOwnedEntry::Mem(storage.nodes.arc_entry(vid)), + GraphStorage::Unlocked(storage) => { + NodeOwnedEntry::Mem(storage.inner().storage.nodes.entry_arc(vid)) + } #[cfg(feature = "storage")] GraphStorage::Disk(storage) => { NodeOwnedEntry::Disk(DiskOwnedNode::new(storage.clone(), vid)) @@ -90,6 +110,7 @@ impl GraphStorage { pub fn edges(&self) -> EdgesStorageRef { match self { GraphStorage::Mem(storage) => EdgesStorageRef::Mem(&storage.edges), + GraphStorage::Unlocked(storage) => EdgesStorageRef::Unlocked(UnlockedEdges(storage)), #[cfg(feature = "storage")] GraphStorage::Disk(storage) => EdgesStorageRef::Disk(DiskEdgesRef::new(storage)), } @@ -98,20 +119,24 @@ impl GraphStorage { pub fn owned_edges(&self) -> EdgesStorage { match self { GraphStorage::Mem(storage) => EdgesStorage::Mem(storage.edges.clone()), + GraphStorage::Unlocked(storage) => GraphStorage::Mem(storage.lock()).owned_edges(), #[cfg(feature = "storage")] GraphStorage::Disk(storage) => EdgesStorage::Disk(DiskEdges::new(storage)), } } - pub fn edge(&self, eid: EdgeRef) -> EdgeStorageRef { + pub fn edge(&self, eid: EdgeRef) -> EdgeStorageEntry { match self { - GraphStorage::Mem(storage) => EdgeStorageRef::Mem(storage.edges.get(eid.pid())), + GraphStorage::Mem(storage) => EdgeStorageEntry::Mem(storage.edges.get(eid.pid())), + GraphStorage::Unlocked(storage) => { + EdgeStorageEntry::Unlocked(storage.inner().edge_entry(eid.pid())) + } #[cfg(feature = "storage")] GraphStorage::Disk(storage) => { let layer = eid .layer() .expect("disk_graph EdgeRefs should always have layer set"); - EdgeStorageRef::Disk(storage.layers()[*layer].edge(eid.pid())) + EdgeStorageEntry::Disk(storage.layers()[*layer].edge(eid.pid())) } } } @@ -152,47 +177,6 @@ impl GraphStorage { } } - pub fn nodes_iter<'graph, G: GraphViewOps<'graph>>( - &'graph self, - view: &'graph G, - type_filter: Option<&'graph Arc<[bool]>>, - ) -> Box + Send + 'graph> { - if view.node_list_trusted() { - match type_filter { - Some(type_filter) => Box::new( - view.node_list() - .into_iter() - .filter(|&vid| type_filter[self.node(vid).node_type_id()]), - ), - None => view.node_list().into_iter(), - } - } else { - match view.node_list() { - NodeList::All { .. } => self - .nodes() - .iter() - .enumerate() - .filter(move |(_, node)| { - type_filter.map_or(true, |type_filter| type_filter[node.node_type_id()]) - && view.filter_node(*node, view.layer_ids()) - }) - .map(|(vid, _)| VID(vid)) - .into_dyn_boxed(), - nodes @ NodeList::List { .. } => { - let node_storage = self.nodes(); - nodes - .into_iter() - .filter(move |&vid| { - type_filter.map_or(true, |type_filter| { - type_filter[node_storage.node(vid).node_type_id()] - }) && view.filter_node(node_storage.node(vid), view.layer_ids()) - }) - .into_dyn_boxed() - } - } - } - } - pub fn into_nodes_iter<'graph, G: GraphViewOps<'graph>>( self, view: G, @@ -204,9 +188,9 @@ impl GraphStorage { if view.node_list_trusted() { iter } else { - Box::new( - iter.filter(move |&vid| view.filter_node(self.node(vid), view.layer_ids())), - ) + Box::new(iter.filter(move |&vid| { + view.filter_node(self.node(vid).as_ref(), view.layer_ids()) + })) } } Some(type_filter) => { @@ -215,7 +199,8 @@ impl GraphStorage { } else { Box::new(iter.filter(move |&vid| { let node = self.node(vid); - type_filter[node.node_type_id()] && view.filter_node(node, view.layer_ids()) + type_filter[node.node_type_id()] + && view.filter_node(node.as_ref(), view.layer_ids()) })) } } @@ -230,7 +215,7 @@ impl GraphStorage { view.node_list().into_par_iter().filter(move |&vid| { let node = self.node(vid); type_filter.map_or(true, |type_filter| type_filter[node.node_type_id()]) - && view.filter_node(node, view.layer_ids()) + && view.filter_node(node.as_ref(), view.layer_ids()) }) } @@ -241,10 +226,16 @@ impl GraphStorage { ) -> impl ParallelIterator + 'graph { view.node_list().into_par_iter().filter(move |&vid| { let node = self.node(vid); - type_filter + let n = node.name(); + let i = node.node_type_id(); + let r = type_filter .as_ref() - .map_or(true, |type_filter| type_filter[node.node_type_id()]) - && view.filter_node(self.node(vid), view.layer_ids()) + .map_or(true, |type_filter| type_filter[node.node_type_id()]); + let s = view.filter_node(self.node(vid).as_ref(), view.layer_ids()); + + println!("name = {:?}, id = {}, r = {}, s = {}", n, i, r, s); + + r && s }) } @@ -258,22 +249,22 @@ impl GraphStorage { FilterState::Neither => FilterVariants::Neither(iter), FilterState::Both => { let nodes = self.nodes(); - FilterVariants::Both(iter.filter(move |&e| { - view.filter_edge(e, view.layer_ids()) + FilterVariants::Both(iter.filter(move |e| { + view.filter_edge(e.as_ref(), view.layer_ids()) && view.filter_node(nodes.node(e.src()), view.layer_ids()) && view.filter_node(nodes.node(e.dst()), view.layer_ids()) })) } FilterState::Nodes => { let nodes = self.nodes(); - FilterVariants::Nodes(iter.filter(move |&e| { + FilterVariants::Nodes(iter.filter(move |e| { view.filter_node(nodes.node(e.src()), view.layer_ids()) && view.filter_node(nodes.node(e.dst()), view.layer_ids()) })) } - FilterState::Edges | FilterState::BothIndependent => { - FilterVariants::Edges(iter.filter(|&e| view.filter_edge(e, view.layer_ids()))) - } + FilterState::Edges | FilterState::BothIndependent => FilterVariants::Edges( + iter.filter(|e| view.filter_edge(e.as_ref(), view.layer_ids())), + ), }; filtered.map(|e| e.out_ref()) } @@ -295,14 +286,14 @@ impl GraphStorage { FilterState::Both => FilterVariants::Both(iter.filter_map(move |e| { let e = EdgeStorageRef::Mem(edges.get(e)); (view.filter_edge(e, view.layer_ids()) - && view.filter_node(nodes.node_ref(e.src()), view.layer_ids()) - && view.filter_node(nodes.node_ref(e.dst()), view.layer_ids())) + && view.filter_node(nodes.node_entry(e.src()), view.layer_ids()) + && view.filter_node(nodes.node_entry(e.dst()), view.layer_ids())) .then(|| e.out_ref()) })), FilterState::Nodes => FilterVariants::Nodes(iter.filter_map(move |e| { let e = EdgeStorageRef::Mem(edges.get(e)); - (view.filter_node(nodes.node_ref(e.src()), view.layer_ids()) - && view.filter_node(nodes.node_ref(e.dst()), view.layer_ids())) + (view.filter_node(nodes.node_entry(e.src()), view.layer_ids()) + && view.filter_node(nodes.node_entry(e.dst()), view.layer_ids())) .then(|| e.out_ref()) })), FilterState::Edges | FilterState::BothIndependent => { @@ -335,11 +326,11 @@ impl GraphStorage { if !view.filter_edge(e, view.layer_ids()) { return None; } - let src = nodes.node_ref(e.src()); + let src = nodes.node_entry(e.src()); if !view.filter_node(src, view.layer_ids()) { return None; } - let dst = nodes.node_ref(e.dst()); + let dst = nodes.node_entry(e.dst()); if !view.filter_node(dst, view.layer_ids()) { return None; } @@ -349,11 +340,11 @@ impl GraphStorage { FilterState::Nodes => { FilterVariants::Nodes(iter.filter_map(move |(eid, layer_id)| { let e = EdgeStorageRef::Disk(edges.get(eid, layer_id)); - let src = nodes.node_ref(e.src()); + let src = nodes.node_entry(e.src()); if !view.filter_node(src, view.layer_ids()) { return None; } - let dst = nodes.node_ref(e.dst()); + let dst = nodes.node_entry(e.dst()); if !view.filter_node(dst, view.layer_ids()) { return None; } @@ -381,24 +372,25 @@ impl GraphStorage { ) -> impl ParallelIterator + 'graph { self.edges() .par_iter(view.layer_ids().clone()) - .filter(|&edge| match view.filter_state() { + .filter(|edge| match view.filter_state() { FilterState::Neither => true, FilterState::Both => { let layer_ids = view.layer_ids(); let src = self.node(edge.src()); let dst = self.node(edge.dst()); - view.filter_edge(edge, view.layer_ids()) - && view.filter_node(src, layer_ids) - && view.filter_node(dst, layer_ids) + view.filter_edge(edge.as_ref(), view.layer_ids()) + && view.filter_node(src.as_ref(), layer_ids) + && view.filter_node(dst.as_ref(), layer_ids) } FilterState::Nodes => { let layer_ids = view.layer_ids(); let src = self.node(edge.src()); let dst = self.node(edge.dst()); - view.filter_node(src, layer_ids) && view.filter_node(dst, layer_ids) + view.filter_node(src.as_ref(), layer_ids) + && view.filter_node(dst.as_ref(), layer_ids) } FilterState::Edges | FilterState::BothIndependent => { - view.filter_edge(edge, view.layer_ids()) + view.filter_edge(edge.as_ref(), view.layer_ids()) } }) .map(|e| e.out_ref()) @@ -421,14 +413,14 @@ impl GraphStorage { FilterState::Both => FilterVariants::Both(iter.filter_map(move |e| { let e = EdgeStorageRef::Mem(edges.get(e)); (view.filter_edge(e, view.layer_ids()) - && view.filter_node(nodes.node_ref(e.src()), view.layer_ids()) - && view.filter_node(nodes.node_ref(e.dst()), view.layer_ids())) + && view.filter_node(nodes.node_entry(e.src()), view.layer_ids()) + && view.filter_node(nodes.node_entry(e.dst()), view.layer_ids())) .then(|| e.out_ref()) })), FilterState::Nodes => FilterVariants::Nodes(iter.filter_map(move |e| { let e = EdgeStorageRef::Mem(edges.get(e)); - (view.filter_node(nodes.node_ref(e.src()), view.layer_ids()) - && view.filter_node(nodes.node_ref(e.dst()), view.layer_ids())) + (view.filter_node(nodes.node_entry(e.src()), view.layer_ids()) + && view.filter_node(nodes.node_entry(e.dst()), view.layer_ids())) .then(|| e.out_ref()) })), FilterState::Edges | FilterState::BothIndependent => { @@ -463,11 +455,11 @@ impl GraphStorage { if !view.filter_edge(e, view.layer_ids()) { return None; } - let src = nodes.node_ref(e.src()); + let src = nodes.node_entry(e.src()); if !view.filter_node(src, view.layer_ids()) { return None; } - let dst = nodes.node_ref(e.dst()); + let dst = nodes.node_entry(e.dst()); if !view.filter_node(dst, view.layer_ids()) { return None; } @@ -477,11 +469,11 @@ impl GraphStorage { FilterState::Nodes => { FilterVariants::Nodes(iter.filter_map(move |(eid, layer_id)| { let e = EdgeStorageRef::Disk(edges.get(eid, layer_id)); - let src = nodes.node_ref(e.src()); + let src = nodes.node_entry(e.src()); if !view.filter_node(src, view.layer_ids()) { return None; } - let dst = nodes.node_ref(e.dst()); + let dst = nodes.node_entry(e.dst()); if !view.filter_node(dst, view.layer_ids()) { return None; } @@ -547,18 +539,18 @@ impl GraphStorage { ) -> impl Iterator + 'a { let source = self.node(node); let layers = view.layer_ids(); - let iter = source.edges_iter(layers, dir); + let iter = source.into_edges_iter(layers, dir); match view.filter_state() { FilterState::Neither => FilterVariants::Neither(iter), FilterState::Both => FilterVariants::Both(iter.filter(|&e| { - view.filter_edge(self.edge(e), view.layer_ids()) - && view.filter_node(self.node(e.remote()), view.layer_ids()) + view.filter_edge(self.edge(e).as_ref(), view.layer_ids()) + && view.filter_node(self.node(e.remote()).as_ref(), view.layer_ids()) })), FilterState::Nodes => FilterVariants::Nodes( - iter.filter(|e| view.filter_node(self.node(e.remote()), view.layer_ids())), + iter.filter(|e| view.filter_node(self.node(e.remote()).as_ref(), view.layer_ids())), ), FilterState::Edges | FilterState::BothIndependent => FilterVariants::Edges( - iter.filter(|&e| view.filter_edge(self.edge(e), view.layer_ids())), + iter.filter(|&e| view.filter_edge(self.edge(e).as_ref(), view.layer_ids())), ), } } @@ -576,14 +568,14 @@ impl GraphStorage { match view.filter_state() { FilterState::Neither => FilterVariants::Neither(iter), FilterState::Both => FilterVariants::Both(iter.filter(move |&e| { - view.filter_edge(self.edge(e), view.layer_ids()) - && view.filter_node(self.node(e.remote()), view.layer_ids()) + view.filter_edge(self.edge(e).as_ref(), view.layer_ids()) + && view.filter_node(self.node(e.remote()).as_ref(), view.layer_ids()) + })), + FilterState::Nodes => FilterVariants::Nodes(iter.filter(move |e| { + view.filter_node(self.node(e.remote()).as_ref(), view.layer_ids()) })), - FilterState::Nodes => FilterVariants::Nodes( - iter.filter(move |e| view.filter_node(self.node(e.remote()), view.layer_ids())), - ), FilterState::Edges | FilterState::BothIndependent => FilterVariants::Edges( - iter.filter(move |&e| view.filter_edge(self.edge(e), view.layer_ids())), + iter.filter(move |&e| view.filter_edge(self.edge(e).as_ref(), view.layer_ids())), ), } } diff --git a/raphtory/src/db/api/storage/tprop_storage_ops.rs b/raphtory/src/db/api/storage/tprop_storage_ops.rs index e29def68b6..3e98686ff2 100644 --- a/raphtory/src/db/api/storage/tprop_storage_ops.rs +++ b/raphtory/src/db/api/storage/tprop_storage_ops.rs @@ -42,11 +42,11 @@ macro_rules! for_all_variants { }; } -pub trait TPropOps<'a>: Sized + Copy + 'a + Send { +pub trait TPropOps<'a>: Sized + 'a + Send { fn active(self, w: Range) -> bool { self.iter_window_t(w).next().is_some() } - fn last_before(self, t: i64) -> Option<(TimeIndexEntry, Prop)>; + fn last_before(&self, t: i64) -> Option<(TimeIndexEntry, Prop)>; fn iter(self) -> impl Iterator + Send + 'a; fn iter_t(self) -> impl Iterator + Send + 'a { @@ -78,7 +78,7 @@ pub trait TPropOps<'a>: Sized + Copy + 'a + Send { } impl<'a> TPropOps<'a> for TPropRef<'a> { - fn last_before(self, t: i64) -> Option<(TimeIndexEntry, Prop)> { + fn last_before(&self, t: i64) -> Option<(TimeIndexEntry, Prop)> { for_all!(self, tprop => tprop.last_before(t)) } diff --git a/raphtory/src/db/api/storage/variants/mod.rs b/raphtory/src/db/api/storage/variants/mod.rs index 81eb0875a9..adc80a7d61 100644 --- a/raphtory/src/db/api/storage/variants/mod.rs +++ b/raphtory/src/db/api/storage/variants/mod.rs @@ -2,3 +2,4 @@ pub mod direction_variants; pub mod filter_variants; pub mod layer_variants; pub mod storage_variants; +pub mod storage_variants3; diff --git a/raphtory/src/db/api/storage/variants/storage_variants.rs b/raphtory/src/db/api/storage/variants/storage_variants.rs index 4d5edb4461..dfb41a855b 100644 --- a/raphtory/src/db/api/storage/variants/storage_variants.rs +++ b/raphtory/src/db/api/storage/variants/storage_variants.rs @@ -259,7 +259,7 @@ impl< impl<'a, Mem: TPropOps<'a> + 'a, #[cfg(feature = "storage")] Disk: TPropOps<'a> + 'a> TPropOps<'a> for SelfType!(Mem, Disk) { - fn last_before(self, t: i64) -> Option<(TimeIndexEntry, Prop)> { + fn last_before(&self, t: i64) -> Option<(TimeIndexEntry, Prop)> { for_all!(self, props => props.last_before(t)) } diff --git a/raphtory/src/db/api/storage/variants/storage_variants3.rs b/raphtory/src/db/api/storage/variants/storage_variants3.rs new file mode 100644 index 0000000000..b2bfcb92b1 --- /dev/null +++ b/raphtory/src/db/api/storage/variants/storage_variants3.rs @@ -0,0 +1,304 @@ +use crate::{core::Prop, db::api::storage::tprop_storage_ops::TPropOps}; +use raphtory_api::core::storage::timeindex::TimeIndexEntry; +use rayon::iter::{ + plumbing::{Consumer, ProducerCallback, UnindexedConsumer}, + IndexedParallelIterator, ParallelIterator, +}; +use std::{cmp::Ordering, ops::Range}; + +#[derive(Copy, Clone, Debug)] +pub enum StorageVariants { + Mem(Mem), + Unlocked(Unlocked), + #[cfg(feature = "storage")] + Disk(Disk), +} + +#[cfg(feature = "storage")] +macro_rules! SelfType { + ($Mem:ident, $Unlocked:ident, $Disk:ident) => { + StorageVariants<$Mem, $Unlocked, $Disk> + }; +} + +#[cfg(not(feature = "storage"))] +macro_rules! SelfType { + ($Mem:ident, $Unlocked:ident, $Disk:ident) => { + StorageVariants<$Mem, $Unlocked> + }; +} + +macro_rules! for_all { + ($value:expr, $pattern:pat => $result:expr) => { + match $value { + StorageVariants::Mem($pattern) => $result, + StorageVariants::Unlocked($pattern) => $result, + #[cfg(feature = "storage")] + StorageVariants::Disk($pattern) => $result, + } + }; +} + +#[cfg(feature = "storage")] +macro_rules! for_all_iter { + ($value:expr, $pattern:pat => $result:expr) => { + match $value { + StorageVariants::Mem($pattern) => StorageVariants::Mem($result), + StorageVariants::Unlocked($pattern) => StorageVariants::Unlocked($result), + StorageVariants::Disk($pattern) => StorageVariants::Disk($result), + } + }; +} + +#[cfg(not(feature = "storage"))] +macro_rules! for_all_iter { + ($value:expr, $pattern:pat => $result:expr) => { + match $value { + StorageVariants::Mem($pattern) => StorageVariants::Mem($result), + StorageVariants::Unlocked($pattern) => StorageVariants::Unlocked($result), + } + }; +} + +impl< + V, + Mem: Iterator, + Unlocked: Iterator, + #[cfg(feature = "storage")] Disk: Iterator, + > Iterator for SelfType!(Mem, Unlocked, Disk) +{ + type Item = V; + + #[inline] + fn next(&mut self) -> Option { + for_all!(self, iter => iter.next()) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + for_all!(self, iter => iter.size_hint()) + } + + #[inline] + fn count(self) -> usize + where + Self: Sized, + { + for_all!(self, iter => iter.count()) + } + + #[inline] + fn last(self) -> Option + where + Self: Sized, + { + for_all!(self, iter => iter.last()) + } + + #[inline] + fn nth(&mut self, n: usize) -> Option { + for_all!(self, iter => iter.nth(n)) + } + + #[inline] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + for_all!(self, iter => iter.fold(init, f)) + } + + #[inline] + fn find

(&mut self, predicate: P) -> Option + where + Self: Sized, + P: FnMut(&Self::Item) -> bool, + { + for_all!(self, iter => iter.find(predicate)) + } + + fn find_map(&mut self, f: F) -> Option + where + Self: Sized, + F: FnMut(Self::Item) -> Option, + { + for_all!(self, iter => iter.find_map(f)) + } + + fn position

(&mut self, predicate: P) -> Option + where + Self: Sized, + P: FnMut(Self::Item) -> bool, + { + for_all!(self, iter => iter.position(predicate)) + } + + fn max(self) -> Option + where + Self: Sized, + Self::Item: Ord, + { + for_all!(self, iter => iter.max()) + } + + fn min(self) -> Option + where + Self: Sized, + Self::Item: Ord, + { + for_all!(self, iter => iter.min()) + } + + fn max_by_key(self, f: F) -> Option + where + Self: Sized, + F: FnMut(&Self::Item) -> B, + { + for_all!(self, iter => iter.max_by_key(f)) + } + + fn max_by(self, compare: F) -> Option + where + Self: Sized, + F: FnMut(&Self::Item, &Self::Item) -> Ordering, + { + for_all!(self, iter => iter.max_by(compare)) + } + + fn min_by_key(self, f: F) -> Option + where + Self: Sized, + F: FnMut(&Self::Item) -> B, + { + for_all!(self, iter => iter.min_by_key(f)) + } + + fn min_by(self, compare: F) -> Option + where + Self: Sized, + F: FnMut(&Self::Item, &Self::Item) -> Ordering, + { + for_all!(self, iter => iter.min_by(compare)) + } +} + +impl< + V, + Mem: DoubleEndedIterator, + Unlocked: DoubleEndedIterator, + #[cfg(feature = "storage")] Disk: DoubleEndedIterator, + > DoubleEndedIterator for SelfType!(Mem, Unlocked, Disk) +{ + fn next_back(&mut self) -> Option { + for_all!(self, iter => iter.next_back()) + } + + fn nth_back(&mut self, n: usize) -> Option { + for_all!(self, iter => iter.nth_back(n)) + } + + fn rfold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + for_all!(self, iter => iter.rfold(init, f)) + } + + fn rfind

(&mut self, predicate: P) -> Option + where + Self: Sized, + P: FnMut(&Self::Item) -> bool, + { + for_all!(self, iter => iter.rfind(predicate)) + } +} + +impl< + V, + Mem: ExactSizeIterator, + Unlocked: ExactSizeIterator, + #[cfg(feature = "storage")] Disk: ExactSizeIterator, + > ExactSizeIterator for SelfType!(Mem, Unlocked, Disk) +{ + fn len(&self) -> usize { + for_all!(self, iter => iter.len()) + } +} + +impl< + V: Send, + Mem: ParallelIterator, + Unlocked: ParallelIterator, + #[cfg(feature = "storage")] Disk: ParallelIterator, + > ParallelIterator for SelfType!(Mem, Unlocked, Disk) +{ + type Item = V; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + for_all!(self, iter => iter.drive_unindexed(consumer)) + } + + fn opt_len(&self) -> Option { + for_all!(self, iter => iter.opt_len()) + } +} + +impl< + V: Send, + Mem: IndexedParallelIterator, + Unlocked: IndexedParallelIterator, + #[cfg(feature = "storage")] Disk: IndexedParallelIterator, + > IndexedParallelIterator for SelfType!(Mem, Unlocked, Disk) +{ + fn len(&self) -> usize { + for_all!(self, iter => iter.len()) + } + + fn drive>(self, consumer: C) -> C::Result { + for_all!(self, iter => iter.drive(consumer)) + } + + fn with_producer>(self, callback: CB) -> CB::Output { + for_all!(self, iter => iter.with_producer(callback)) + } +} + +impl< + 'a, + Mem: TPropOps<'a> + 'a, + Unlocked: TPropOps<'a> + 'a, + #[cfg(feature = "storage")] Disk: TPropOps<'a> + 'a, + > TPropOps<'a> for SelfType!(Mem, Unlocked, Disk) +{ + fn last_before(&self, t: i64) -> Option<(TimeIndexEntry, Prop)> { + for_all!(self, props => props.last_before(t)) + } + + fn iter(self) -> impl Iterator + Send + 'a { + for_all_iter!(self, props => props.iter()) + } + + fn iter_window( + self, + r: Range, + ) -> impl Iterator + Send + 'a { + for_all_iter!(self, props => props.iter_window(r)) + } + + fn at(self, ti: &TimeIndexEntry) -> Option { + for_all!(self, props => props.at(ti)) + } + + fn len(self) -> usize { + for_all!(self, props=> props.len()) + } + + fn is_empty(self) -> bool { + for_all!(self, props => props.is_empty()) + } +} diff --git a/raphtory/src/db/api/view/edge.rs b/raphtory/src/db/api/view/edge.rs index 10fd68a6d3..d7ce9e6eb1 100644 --- a/raphtory/src/db/api/view/edge.rs +++ b/raphtory/src/db/api/view/edge.rs @@ -1,13 +1,13 @@ use std::iter; use chrono::{DateTime, Utc}; +use raphtory_api::core::storage::arc_str::ArcStr; use crate::{ core::{ entities::{edges::edge_ref::EdgeRef, VID}, storage::timeindex::{AsTime, TimeIndexEntry}, utils::errors::GraphError, - ArcStr, }, db::api::{ properties::{internal::PropertiesOps, Properties}, diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 04997b9ffd..a15f736631 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -3,7 +3,6 @@ use crate::{ entities::{graph::tgraph::InternalGraph, nodes::node_ref::AsNodeRef, LayerIds, VID}, storage::timeindex::AsTime, utils::errors::GraphError, - ArcStr, OptionAsStr, }, db::{ api::{ @@ -28,6 +27,7 @@ use crate::{ }; use chrono::{DateTime, Utc}; use itertools::Itertools; +use raphtory_api::core::storage::arc_str::{ArcStr, OptionAsStr}; use rayon::prelude::*; use rustc_hash::FxHashSet; use std::{borrow::Borrow, sync::Arc}; @@ -135,6 +135,11 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> fn materialize(&self) -> Result { let g = InternalGraph::default(); + let earliest = if let Some(earliest) = self.earliest_time() { + earliest + } else { + return Ok(self.new_base_graph(g)); + }; // make sure we preserve all layers even if they are empty // skip default layer @@ -187,9 +192,13 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> g.add_node(t, v.name(), [(name.clone(), prop)], v_type_str)?; } } - g.node(v.id()) - .expect("node added") - .add_constant_properties(v.properties().constant())?; + + let node = match g.node(v.id()) { + Some(node) => node, + None => g.add_node(earliest, v.name(), NO_PROPS, v_type_str)?, + }; + + node.add_constant_properties(v.properties().constant())?; } g.add_constant_properties(self.properties().constant())?; @@ -259,11 +268,11 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> NodeList::All { .. } => core_nodes .as_ref() .par_iter() - .filter(|&v| self.filter_node(v, layer_ids)) + .filter(|v| self.filter_node(*v, layer_ids)) .count(), NodeList::List { nodes } => nodes .par_iter() - .filter(|&&id| self.filter_node(core_nodes.node_ref(id), layer_ids)) + .filter(|&&id| self.filter_node(core_nodes.node_entry(id), layer_ids)) .count(), } } else { @@ -287,10 +296,10 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> edges .as_ref() .par_iter(self.layer_ids().clone()) - .filter(|&e| { - self.filter_edge(e, self.layer_ids()) - && self.filter_node(nodes.node_ref(e.src()), self.layer_ids()) - && self.filter_node(nodes.node_ref(e.dst()), self.layer_ids()) + .filter(|e| { + self.filter_edge(e.as_ref(), self.layer_ids()) + && self.filter_node(nodes.node_entry(e.src()), self.layer_ids()) + && self.filter_node(nodes.node_entry(e.dst()), self.layer_ids()) }) .count() } @@ -300,9 +309,9 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> edges .as_ref() .par_iter(self.layer_ids().clone()) - .filter(|&e| { - self.filter_node(nodes.node_ref(e.src()), self.layer_ids()) - && self.filter_node(nodes.node_ref(e.dst()), self.layer_ids()) + .filter(|e| { + self.filter_node(nodes.node_entry(e.src()), self.layer_ids()) + && self.filter_node(nodes.node_entry(e.dst()), self.layer_ids()) }) .count() } @@ -311,7 +320,7 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> edges .as_ref() .par_iter(self.layer_ids().clone()) - .filter(|&e| self.filter_edge(e, self.layer_ids())) + .filter(|e| self.filter_edge(e.as_ref(), self.layer_ids())) .count() } } @@ -324,19 +333,19 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> FilterState::Neither => core_edges .as_ref() .par_iter(layer_ids.clone()) - .map(|edge| self.edge_exploded_count(edge, layer_ids)) + .map(|edge| self.edge_exploded_count(edge.as_ref(), layer_ids)) .sum(), FilterState::Both => { let nodes = self.core_nodes(); core_edges .as_ref() .par_iter(layer_ids.clone()) - .filter(|&e| { - self.filter_edge(e, self.layer_ids()) - && self.filter_node(nodes.node_ref(e.src()), self.layer_ids()) - && self.filter_node(nodes.node_ref(e.dst()), self.layer_ids()) + .filter(|e| { + self.filter_edge(e.as_ref(), self.layer_ids()) + && self.filter_node(nodes.node_entry(e.src()), self.layer_ids()) + && self.filter_node(nodes.node_entry(e.dst()), self.layer_ids()) }) - .map(|e| self.edge_exploded_count(e, layer_ids)) + .map(|e| self.edge_exploded_count(e.as_ref(), layer_ids)) .sum() } FilterState::Nodes => { @@ -344,18 +353,18 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> core_edges .as_ref() .par_iter(layer_ids.clone()) - .filter(|&e| { - self.filter_node(nodes.node_ref(e.src()), self.layer_ids()) - && self.filter_node(nodes.node_ref(e.dst()), self.layer_ids()) + .filter(|e| { + self.filter_node(nodes.node_entry(e.src()), self.layer_ids()) + && self.filter_node(nodes.node_entry(e.dst()), self.layer_ids()) }) - .map(|e| self.edge_exploded_count(e, layer_ids)) + .map(|e| self.edge_exploded_count(e.as_ref(), layer_ids)) .sum() } FilterState::Edges | FilterState::BothIndependent => core_edges .as_ref() .par_iter(layer_ids.clone()) - .filter(|&e| self.filter_edge(e, self.layer_ids())) - .map(|e| self.edge_exploded_count(e, layer_ids)) + .filter(|e| self.filter_edge(e.as_ref(), self.layer_ids())) + .map(|e| self.edge_exploded_count(e.as_ref(), layer_ids)) .sum(), } } @@ -482,10 +491,8 @@ mod test_exploded_edges { #[cfg(test)] mod test_materialize { - use crate::{ - core::OptionAsStr, db::api::view::internal::CoreGraphOps, prelude::*, - test_utils::test_graph, - }; + use crate::{db::api::view::internal::CoreGraphOps, prelude::*, test_storage}; + use raphtory_api::core::storage::arc_str::OptionAsStr; #[test] fn test_materialize() { @@ -567,8 +574,7 @@ mod test_materialize { graph.add_node(0, "A", NO_PROPS, None).unwrap(); graph.add_node(1, "B", NO_PROPS, Some("H")).unwrap(); - // FIXME: Node types not yet supported (Issue #51) - test_graph(&graph, |graph| { + test_storage!(&graph, |graph| { let node_a = graph.node("A").unwrap(); let node_b = graph.node("B").unwrap(); let node_a_type = node_a.node_type(); diff --git a/raphtory/src/db/api/view/internal/core_ops.rs b/raphtory/src/db/api/view/internal/core_ops.rs index 5fb616571a..d949ba6e55 100644 --- a/raphtory/src/db/api/view/internal/core_ops.rs +++ b/raphtory/src/db/api/view/internal/core_ops.rs @@ -10,7 +10,7 @@ use crate::{ locked_view::LockedView, timeindex::{TimeIndex, TimeIndexOps, TimeIndexWindow}, }, - ArcStr, Prop, + Prop, }, db::api::{ storage::{ @@ -30,6 +30,7 @@ use std::ops::Range; #[cfg(feature = "storage")] use pometry_storage::timestamps::TimeStamps; +use raphtory_api::core::storage::arc_str::ArcStr; #[cfg(feature = "storage")] use rayon::prelude::*; @@ -372,6 +373,7 @@ impl CoreGraphOps for G { pub enum NodeAdditions<'a> { Mem(&'a TimeIndex), + Locked(LockedView<'a, TimeIndex>), Range(TimeIndexWindow<'a, i64>), #[cfg(feature = "storage")] Col(Vec>), @@ -381,9 +383,11 @@ impl<'b> TimeIndexOps for NodeAdditions<'b> { type IndexType = i64; type RangeType<'a> = NodeAdditions<'a> where Self: 'a; + #[inline] fn active(&self, w: Range) -> bool { match self { NodeAdditions::Mem(index) => index.active_t(w), + NodeAdditions::Locked(index) => index.active_t(w), #[cfg(feature = "storage")] NodeAdditions::Col(index) => index.par_iter().any(|index| index.active_t(w.clone())), NodeAdditions::Range(index) => index.active_t(w), @@ -393,6 +397,7 @@ impl<'b> TimeIndexOps for NodeAdditions<'b> { fn range(&self, w: Range) -> Self::RangeType<'_> { match self { NodeAdditions::Mem(index) => NodeAdditions::Range(index.range(w)), + NodeAdditions::Locked(index) => NodeAdditions::Range(index.range(w)), #[cfg(feature = "storage")] NodeAdditions::Col(index) => { let mut ranges = Vec::with_capacity(index.len()); @@ -409,6 +414,7 @@ impl<'b> TimeIndexOps for NodeAdditions<'b> { fn first(&self) -> Option { match self { NodeAdditions::Mem(index) => index.first(), + NodeAdditions::Locked(index) => index.first(), #[cfg(feature = "storage")] NodeAdditions::Col(index) => index.par_iter().flat_map(|index| index.first()).min(), NodeAdditions::Range(index) => index.first(), @@ -418,6 +424,7 @@ impl<'b> TimeIndexOps for NodeAdditions<'b> { fn last(&self) -> Option { match self { NodeAdditions::Mem(index) => index.last(), + NodeAdditions::Locked(index) => index.last(), #[cfg(feature = "storage")] NodeAdditions::Col(index) => index.par_iter().flat_map(|index| index.last()).max(), NodeAdditions::Range(index) => index.last(), @@ -427,6 +434,7 @@ impl<'b> TimeIndexOps for NodeAdditions<'b> { fn iter(&self) -> Box + Send + '_> { match self { NodeAdditions::Mem(index) => index.iter(), + NodeAdditions::Locked(index) => Box::new(index.iter()), #[cfg(feature = "storage")] NodeAdditions::Col(index) => Box::new(index.iter().flat_map(|index| index.iter())), NodeAdditions::Range(index) => index.iter(), @@ -436,6 +444,7 @@ impl<'b> TimeIndexOps for NodeAdditions<'b> { fn len(&self) -> usize { match self { NodeAdditions::Mem(index) => index.len(), + NodeAdditions::Locked(index) => index.len(), NodeAdditions::Range(range) => range.len(), #[cfg(feature = "storage")] NodeAdditions::Col(col) => col.len(), diff --git a/raphtory/src/db/api/view/internal/materialize.rs b/raphtory/src/db/api/view/internal/materialize.rs index d36e3686f8..d06f237001 100644 --- a/raphtory/src/db/api/view/internal/materialize.rs +++ b/raphtory/src/db/api/view/internal/materialize.rs @@ -9,7 +9,7 @@ use crate::{ }, storage::{locked_view::LockedView, timeindex::TimeIndexEntry}, utils::errors::GraphError, - ArcStr, PropType, + PropType, }, db::{ api::{ @@ -37,6 +37,7 @@ use crate::{ }; use chrono::{DateTime, Utc}; use enum_dispatch::enum_dispatch; +use raphtory_api::core::storage::arc_str::ArcStr; use serde::{de::Error, Deserialize, Deserializer, Serialize}; use std::path::Path; diff --git a/raphtory/src/db/api/view/layer.rs b/raphtory/src/db/api/view/layer.rs index 0f7b647d8b..87ccb94214 100644 --- a/raphtory/src/db/api/view/layer.rs +++ b/raphtory/src/db/api/view/layer.rs @@ -1,10 +1,11 @@ use crate::{ - core::{utils::errors::GraphError, ArcStr}, + core::utils::errors::GraphError, db::{ api::view::internal::{InternalLayerOps, OneHopFilter}, graph::views::layer_graph::LayeredGraph, }, }; +use raphtory_api::core::storage::arc_str::ArcStr; use std::sync::Arc; /// Trait defining layer operations diff --git a/raphtory/src/db/api/view/node.rs b/raphtory/src/db/api/view/node.rs index 8fe957987a..125622c507 100644 --- a/raphtory/src/db/api/view/node.rs +++ b/raphtory/src/db/api/view/node.rs @@ -2,7 +2,7 @@ use crate::{ core::{ entities::{edges::edge_ref::EdgeRef, VID}, storage::timeindex::AsTime, - ArcStr, Direction, + Direction, }, db::api::{ properties::{internal::PropertiesOps, Properties}, @@ -16,6 +16,7 @@ use crate::{ prelude::{EdgeViewOps, GraphViewOps, LayerOps}, }; use chrono::{DateTime, Utc}; +use raphtory_api::core::storage::arc_str::ArcStr; pub trait BaseNodeViewOps<'graph>: Clone + TimeOps<'graph> + LayerOps<'graph> { type BaseGraph: GraphViewOps<'graph>; diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index 4accc97dcd..901f02c7d1 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -12,7 +12,6 @@ use crate::{ entities::{edges::edge_ref::EdgeRef, LayerIds, VID}, storage::timeindex::AsTime, utils::{errors::GraphError, time::IntoTime}, - ArcStr, }, db::{ api::{ @@ -34,6 +33,7 @@ use crate::{ }, prelude::*, }; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{ fmt::{Debug, Formatter}, sync::Arc, @@ -402,13 +402,9 @@ impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>> OneHopFilter<'gr #[cfg(test)] mod test_edge { - use crate::{ - core::{ArcStr, IntoPropMap}, - prelude::*, - test_storage, - test_utils::test_graph, - }; + use crate::{core::IntoPropMap, prelude::*, test_storage, test_utils::test_graph}; use itertools::Itertools; + use raphtory_api::core::storage::arc_str::ArcStr; use std::collections::HashMap; #[test] diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index 63fd86f29a..14aad5c3b7 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -164,6 +164,7 @@ impl Graph { pub fn new_with_shards(num_shards: usize) -> Self { Self(Arc::new(InternalGraph::new(num_shards))) } + pub(crate) fn from_internal_graph(internal_graph: Arc) -> Self { Self(internal_graph) } @@ -211,7 +212,7 @@ mod db_tests { algorithms::components::weakly_connected_components, core::{ utils::time::{error::ParseTimeError, TryIntoTime}, - ArcStr, OptionAsStr, Prop, + Prop, }, db::{ api::{ @@ -232,8 +233,13 @@ mod db_tests { use chrono::NaiveDateTime; use itertools::Itertools; use quickcheck_macros::quickcheck; + use raphtory_api::core::storage::arc_str::{ArcStr, OptionAsStr}; + use rayon::prelude::*; use serde_json::Value; - use std::collections::{HashMap, HashSet}; + use std::{ + collections::{HashMap, HashSet}, + path::PathBuf, + }; use tempfile::TempDir; #[test] diff --git a/raphtory/src/db/graph/mod.rs b/raphtory/src/db/graph/mod.rs index 664b8f1709..fceef2c402 100644 --- a/raphtory/src/db/graph/mod.rs +++ b/raphtory/src/db/graph/mod.rs @@ -1,4 +1,4 @@ -use crate::core::entities::properties::props::DictMapper; +use raphtory_api::core::storage::dict_mapper::DictMapper; use std::sync::Arc; pub mod edge; diff --git a/raphtory/src/db/graph/node.rs b/raphtory/src/db/graph/node.rs index 2cac91f5c4..0d1953db16 100644 --- a/raphtory/src/db/graph/node.rs +++ b/raphtory/src/db/graph/node.rs @@ -4,7 +4,6 @@ use crate::{ core::{ entities::{edges::edge_ref::EdgeRef, nodes::node_ref::NodeRef, VID}, utils::errors::GraphError, - ArcStr, }, db::{ api::{ @@ -34,6 +33,7 @@ use crate::{ }, }; use chrono::{DateTime, Utc}; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{ fmt, hash::{Hash, Hasher}, @@ -384,7 +384,8 @@ impl #[cfg(test)] mod node_test { - use crate::{core::ArcStr, prelude::*, test_utils::test_graph}; + use crate::{prelude::*, test_utils::test_graph}; + use raphtory_api::core::storage::arc_str::ArcStr; use std::collections::HashMap; #[test] diff --git a/raphtory/src/db/graph/nodes.rs b/raphtory/src/db/graph/nodes.rs index 9f0ffcc4b8..32687f67c2 100644 --- a/raphtory/src/db/graph/nodes.rs +++ b/raphtory/src/db/graph/nodes.rs @@ -75,7 +75,7 @@ where #[inline] pub(crate) fn iter_refs(&self) -> impl Iterator + 'graph { - let g = self.graph.core_graph(); + let g = self.graph.core_graph().lock(); let node_types_filter = self.node_types_filter.clone(); g.into_nodes_iter(self.graph.clone(), node_types_filter) } @@ -89,14 +89,14 @@ where } pub fn par_iter(&self) -> impl ParallelIterator> + '_ { - let cg = self.graph.core_graph(); + let cg = self.graph.core_graph().lock(); let node_types_filter = self.node_types_filter.clone(); cg.into_nodes_par(&self.graph, node_types_filter) .map(|v| NodeView::new_one_hop_filtered(&self.base_graph, &self.graph, v)) } pub fn into_par_iter(self) -> impl ParallelIterator> + 'graph { - let cg = self.graph.core_graph(); + let cg = self.graph.core_graph().lock(); cg.into_nodes_par(self.graph.clone(), self.node_types_filter) .map(move |n| { NodeView::new_one_hop_filtered(self.base_graph.clone(), self.graph.clone(), n) @@ -127,6 +127,18 @@ where self.graph.node_meta().node_type_meta(), node_types, )); + println!( + "node_types_filter = {:?}", + node_types_filter.as_ref().unwrap() + ); + println!( + "node_types = {:?}", + self.graph.nodes().node_type().collect_vec() + ); + println!( + "node_type_ids = {:?}", + self.graph.nodes().node_type_id().collect_vec() + ); Nodes { base_graph: self.base_graph.clone(), graph: self.graph.clone(), diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index c0aa617d2e..7f43169dfe 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -240,17 +240,22 @@ impl TimeSemantics for PersistentGraph { } fn earliest_time_window(&self, start: i64, end: i64) -> Option { - self.0.earliest_time_window(start, end) + self.earliest_time_global() + .map(|t| t.max(start)) + .filter(|&t| t < end) } fn latest_time_window(&self, start: i64, end: i64) -> Option { - self.0.latest_time_window(start, end) + self.latest_time_global() + .map(|t| t.min(end.saturating_sub(1))) + .filter(|&t| t > start) } fn node_earliest_time_window(&self, v: VID, start: i64, end: i64) -> Option { let v = self.core_node_entry(v); - if v.additions().first_t()? <= start { - Some(v.additions().range_t(start..end).first_t().unwrap_or(start)) + let additions = v.additions(); + if additions.first_t()? <= start { + Some(additions.range_t(start..end).first_t().unwrap_or(start)) } else { None } @@ -684,7 +689,9 @@ mod test_deletions { db::{ api::view::time::internal::InternalTimeOps, graph::{ - edge::EdgeView, graph::assert_graph_equal, views::deletion_graph::PersistentGraph, + edge::EdgeView, + graph::assert_graph_equal, + views::deletion_graph::{PersistentGraph, TimeSemantics}, }, }, prelude::*, @@ -847,6 +854,44 @@ mod test_deletions { assert_graph_equal(&gm, &g.window(3, 5)) } + #[test] + fn test_materialize_window_earliest_time() { + let g = PersistentGraph::new(); + g.add_edge(0, 1, 2, NO_PROPS, None).unwrap(); + g.delete_edge(10, 1, 2, None).unwrap(); + + let ltg = g.latest_time_global(); + assert_eq!(ltg, Some(10)); + + let wg = g.window(3, 5); + + let e = wg.edge(1, 2).unwrap(); + assert_eq!(e.earliest_time(), Some(3)); + assert_eq!(e.latest_time(), Some(4)); + let n1 = wg.node(1).unwrap(); + assert_eq!(n1.earliest_time(), Some(3)); + assert_eq!(n1.latest_time(), Some(4)); + let n2 = wg.node(2).unwrap(); + assert_eq!(n2.earliest_time(), Some(3)); + assert_eq!(n2.latest_time(), Some(4)); + + let actual_lt = wg.latest_time(); + assert_eq!(actual_lt, Some(4)); + + let actual_et = wg.earliest_time(); + assert_eq!(actual_et, Some(3)); + + let gm = g + .window(3, 5) + .materialize() + .unwrap() + .into_persistent() + .unwrap(); + + let expected_et = gm.earliest_time(); + assert_eq!(actual_et, expected_et); + } + #[test] fn test_exploded_latest_time() { let g = PersistentGraph::new(); diff --git a/raphtory/src/db/graph/views/window_graph.rs b/raphtory/src/db/graph/views/window_graph.rs index b9d5aae011..3f45ac8e3c 100644 --- a/raphtory/src/db/graph/views/window_graph.rs +++ b/raphtory/src/db/graph/views/window_graph.rs @@ -41,7 +41,7 @@ use crate::{ core::{ entities::{edges::edge_ref::EdgeRef, LayerIds, VID}, storage::timeindex::AsTime, - ArcStr, Prop, + Prop, }, db::{ api::{ @@ -62,6 +62,7 @@ use crate::{ prelude::GraphViewOps, }; use chrono::{DateTime, Utc}; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{ fmt::{Debug, Formatter}, ops::Range, diff --git a/raphtory/src/db/internal/core_ops.rs b/raphtory/src/db/internal/core_ops.rs index bbabc190e8..b8b50926d9 100644 --- a/raphtory/src/db/internal/core_ops.rs +++ b/raphtory/src/db/internal/core_ops.rs @@ -8,7 +8,6 @@ use crate::{ LayerIds, ELID, VID, }, storage::locked_view::LockedView, - ArcStr, }, db::api::{ storage::{ @@ -25,6 +24,7 @@ use crate::{ prelude::Prop, }; use itertools::Itertools; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{collections::HashMap, iter, sync::Arc}; impl CoreGraphOps for InternalGraph { @@ -38,7 +38,7 @@ impl CoreGraphOps for InternalGraph { } fn core_graph(&self) -> GraphStorage { - GraphStorage::Mem(self.lock()) + GraphStorage::Unlocked(self.clone()) } #[inline] fn node_meta(&self) -> &Meta { @@ -163,13 +163,13 @@ impl CoreGraphOps for InternalGraph { entry .layer_iter() .next() - .and_then(|layer| layer.const_prop(prop_id).cloned()) + .and_then(|data| data.layer.const_prop(prop_id).cloned()) } else { let prop_map: HashMap<_, _> = entry .layer_iter() .enumerate() - .flat_map(|(id, layer)| { - layer + .flat_map(|(id, data)| { + data.layer .const_prop(prop_id) .map(|p| (self.inner().get_layer_name(id), p.clone())) }) @@ -214,7 +214,7 @@ impl CoreGraphOps for InternalGraph { LayerIds::None => vec![], LayerIds::All => entry .layer_iter() - .map(|l| l.const_prop_ids()) + .map(|data| data.layer.const_prop_ids()) .kmerge() .dedup() .collect(), @@ -266,12 +266,12 @@ impl CoreGraphOps for InternalGraph { #[inline] fn core_edge(&self, eid: ELID) -> EdgeStorageEntry { - EdgeStorageEntry::Mem(self.inner().storage.edges.entry(eid.pid())) + EdgeStorageEntry::Unlocked(self.inner().storage.edges.entry(eid.pid())) } #[inline] fn core_node_entry(&self, vid: VID) -> NodeStorageEntry { - NodeStorageEntry::Mem(self.inner().storage.nodes.entry(vid)) + NodeStorageEntry::Unlocked(self.inner().storage.nodes.entry(vid)) } fn core_node_arc(&self, vid: VID) -> NodeOwnedEntry { @@ -290,12 +290,10 @@ impl CoreGraphOps for InternalGraph { #[cfg(test)] mod test_edges { + use raphtory_api::core::storage::arc_str::ArcStr; use std::collections::HashMap; - use crate::{ - core::{ArcStr, IntoPropMap}, - prelude::*, - }; + use crate::{core::IntoPropMap, prelude::*}; #[test] fn test_edge_properties_for_layers() { diff --git a/raphtory/src/db/internal/static_properties.rs b/raphtory/src/db/internal/static_properties.rs index 0a390e4b0f..9b6e61d3b7 100644 --- a/raphtory/src/db/internal/static_properties.rs +++ b/raphtory/src/db/internal/static_properties.rs @@ -1,7 +1,8 @@ use crate::{ - core::{entities::graph::tgraph::InternalGraph, ArcStr, Prop}, + core::{entities::graph::tgraph::InternalGraph, Prop}, db::api::properties::internal::ConstPropertiesOps, }; +use raphtory_api::core::storage::arc_str::ArcStr; impl ConstPropertiesOps for InternalGraph { fn get_const_prop_id(&self, name: &str) -> Option { diff --git a/raphtory/src/db/internal/temporal_properties.rs b/raphtory/src/db/internal/temporal_properties.rs index e5249f0062..91972a00f2 100644 --- a/raphtory/src/db/internal/temporal_properties.rs +++ b/raphtory/src/db/internal/temporal_properties.rs @@ -1,11 +1,12 @@ use crate::{ - core::{entities::graph::tgraph::InternalGraph, storage::timeindex::AsTime, ArcStr, Prop}, + core::{entities::graph::tgraph::InternalGraph, storage::timeindex::AsTime, Prop}, db::api::{ properties::internal::{TemporalPropertiesOps, TemporalPropertyViewOps}, storage::tprop_storage_ops::TPropOps, }, }; use chrono::{DateTime, Utc}; +use raphtory_api::core::storage::arc_str::ArcStr; use std::ops::Deref; impl TemporalPropertyViewOps for InternalGraph { diff --git a/raphtory/src/db/internal/time_semantics.rs b/raphtory/src/db/internal/time_semantics.rs index 1bbbc5aed0..732a669cbe 100644 --- a/raphtory/src/db/internal/time_semantics.rs +++ b/raphtory/src/db/internal/time_semantics.rs @@ -109,12 +109,14 @@ impl TimeSemantics for InternalGraph { fn node_history(&self, v: VID) -> Vec { let node = self.core_node_entry(v); - node.additions().iter_t().collect() + let collect = node.additions().iter_t().collect(); + collect } fn node_history_window(&self, v: VID, w: Range) -> Vec { let node = self.core_node_entry(v); - node.additions().range_t(w).iter_t().collect() + let collect = node.additions().range_t(w).iter_t().collect(); + collect } fn edge_history(&self, e: EdgeRef, layer_ids: LayerIds) -> Vec { @@ -293,14 +295,15 @@ impl TimeSemantics for InternalGraph { self.inner() .graph_meta .get_temporal_prop(prop_id) - .filter(|p| p.iter_window_t(w).next().is_some()) + .map(|p| p.iter_window_t(w).next().is_some()) + .filter(|p| *p) .is_some() } fn temporal_prop_vec_window(&self, prop_id: usize, start: i64, end: i64) -> Vec<(i64, Prop)> { self.inner() .get_temporal_prop(prop_id) - .map(|prop| (&prop).iter_window_t(start..end).collect()) + .map(|prop| prop.iter_window_t(start..end).collect()) .unwrap_or_default() } diff --git a/raphtory/src/disk_graph/graph_impl/const_properties_ops.rs b/raphtory/src/disk_graph/graph_impl/const_properties_ops.rs index eda8658b28..be62971eed 100644 --- a/raphtory/src/disk_graph/graph_impl/const_properties_ops.rs +++ b/raphtory/src/disk_graph/graph_impl/const_properties_ops.rs @@ -1,4 +1,5 @@ -use crate::{core::ArcStr, db::api::properties::internal::ConstPropertiesOps, prelude::Prop}; +use crate::{db::api::properties::internal::ConstPropertiesOps, prelude::Prop}; +use raphtory_api::core::storage::arc_str::ArcStr; use super::DiskGraph; diff --git a/raphtory/src/disk_graph/graph_impl/core_ops.rs b/raphtory/src/disk_graph/graph_impl/core_ops.rs index daeb3b895d..43750008ea 100644 --- a/raphtory/src/disk_graph/graph_impl/core_ops.rs +++ b/raphtory/src/disk_graph/graph_impl/core_ops.rs @@ -7,7 +7,7 @@ use crate::{ LayerIds, ELID, VID, }, storage::locked_view::LockedView, - ArcStr, Prop, + Prop, }, db::api::{ storage::{ @@ -33,7 +33,8 @@ use crate::{ }; use itertools::Itertools; use polars_arrow::datatypes::ArrowDataType; -use pometry_storage::{properties::Properties, GidRef, GID}; +use pometry_storage::{properties::ConstProps, GidRef, GID}; +use raphtory_api::core::storage::arc_str::ArcStr; use rayon::prelude::*; impl CoreGraphOps for DiskGraph { @@ -93,7 +94,7 @@ impl CoreGraphOps for DiskGraph { } fn get_all_node_types(&self) -> Vec { - todo!("Node types are not supported on diskgraph yet") + self.node_meta.get_all_node_types() } fn node_id(&self, v: VID) -> u64 { @@ -112,8 +113,9 @@ impl CoreGraphOps for DiskGraph { } } - fn node_type(&self, _v: VID) -> Option { - None + fn node_type(&self, v: VID) -> Option { + let node_type_id = self.inner.node_type_id(v); + self.node_meta.get_node_type_name_by_id(node_type_id) } fn internalise_node(&self, v: NodeRef) -> Option { @@ -137,19 +139,18 @@ impl CoreGraphOps for DiskGraph { } fn constant_node_prop(&self, v: VID, id: usize) -> Option { - match &self.inner.node_properties() { + match &self.inner.node_properties().const_props { None => None, Some(props) => const_props(props, v, id), } } fn constant_node_prop_ids(&self, v: VID) -> Box + '_> { - match self.inner.node_properties() { + match &self.inner.node_properties().const_props { None => Box::new(std::iter::empty()), - Some(props) => Box::new( - (0..props.const_props.num_props()) - .filter(move |id| props.const_props.has_prop(v, *id)), - ), + Some(props) => { + Box::new((0..props.num_props()).filter(move |id| props.has_prop(v, *id))) + } } } @@ -224,41 +225,28 @@ impl CoreGraphOps for DiskGraph { .sum() } - fn node_type_id(&self, _v: VID) -> usize { - // self.graph().node_type_id(v) TODO: Impl node types for disk_graph graphs - 0 + fn node_type_id(&self, v: VID) -> usize { + self.inner.node_type_id(v) } } -pub fn const_props(props: &Properties, index: Index, id: usize) -> Option +pub fn const_props(props: &ConstProps, index: Index, id: usize) -> Option where usize: From, { - let dtype = props.const_props.prop_dtype(id); + let dtype = props.prop_dtype(id); match dtype.data_type() { - ArrowDataType::Int64 => props.const_props.prop_native(index, id).map(Prop::I64), - ArrowDataType::Int32 => props.const_props.prop_native(index, id).map(Prop::I32), - ArrowDataType::UInt64 => props.const_props.prop_native(index, id).map(Prop::U64), - ArrowDataType::UInt32 => props.const_props.prop_native(index, id).map(Prop::U32), - ArrowDataType::UInt16 => props.const_props.prop_native(index, id).map(Prop::U16), - ArrowDataType::UInt8 => props.const_props.prop_native(index, id).map(Prop::U8), - ArrowDataType::Float64 => props.const_props.prop_native(index, id).map(Prop::F64), - ArrowDataType::Float32 => props.const_props.prop_native(index, id).map(Prop::F32), - ArrowDataType::Utf8 => props - .const_props - .prop_str(index, id) - .map(Into::into) - .map(Prop::Str), - ArrowDataType::LargeUtf8 => props - .const_props - .prop_str(index, id) - .map(Into::into) - .map(Prop::Str), - ArrowDataType::Utf8View => props - .const_props - .prop_str(index, id) - .map(Into::into) - .map(Prop::Str), - _ => unimplemented!(), + ArrowDataType::Int64 => props.prop_native(index, id).map(Prop::I64), + ArrowDataType::Int32 => props.prop_native(index, id).map(Prop::I32), + ArrowDataType::UInt64 => props.prop_native(index, id).map(Prop::U64), + ArrowDataType::UInt32 => props.prop_native(index, id).map(Prop::U32), + ArrowDataType::UInt16 => props.prop_native(index, id).map(Prop::U16), + ArrowDataType::UInt8 => props.prop_native(index, id).map(Prop::U8), + ArrowDataType::Float64 => props.prop_native(index, id).map(Prop::F64), + ArrowDataType::Float32 => props.prop_native(index, id).map(Prop::F32), + ArrowDataType::Utf8 => props.prop_str(index, id).map(Into::into).map(Prop::Str), + ArrowDataType::LargeUtf8 => props.prop_str(index, id).map(Into::into).map(Prop::Str), + ArrowDataType::Utf8View => props.prop_str(index, id).map(Into::into).map(Prop::Str), + _ => unimplemented!("Data type not supported"), } } diff --git a/raphtory/src/disk_graph/graph_impl/interop.rs b/raphtory/src/disk_graph/graph_impl/interop.rs index 51283387a2..a0b1402185 100644 --- a/raphtory/src/disk_graph/graph_impl/interop.rs +++ b/raphtory/src/disk_graph/graph_impl/interop.rs @@ -31,6 +31,24 @@ impl GraphLike for Graph { self.nodes().name().into_iter() } + fn node_type_ids(&self) -> Option> { + if self.0.inner().node_meta.node_type_meta().len() <= 1 { + None + } else { + let core_nodes = self.core_nodes(); + Some((0..core_nodes.len()).map(move |i| core_nodes.node_entry(VID(i)).node_type_id())) + } + } + + fn node_types(&self) -> Option> { + let meta = self.0.inner().node_meta.node_type_meta(); + if meta.len() <= 1 { + None + } else { + Some(meta.get_keys().into_iter().map(|s| s.to_string())) + } + } + fn layer_names(&self) -> Vec { self.edge_meta() .layer_meta() diff --git a/raphtory/src/disk_graph/graph_impl/mod.rs b/raphtory/src/disk_graph/graph_impl/mod.rs index 80b397003b..e55615ef5e 100644 --- a/raphtory/src/disk_graph/graph_impl/mod.rs +++ b/raphtory/src/disk_graph/graph_impl/mod.rs @@ -182,6 +182,14 @@ impl DiskGraph { let mut edge_meta = Meta::new(); let graph_meta = GraphMeta::new(); + for node_type in inner_graph.node_types().into_iter().flatten() { + if let Some(node_type) = node_type { + node_meta.get_or_create_node_type_id(node_type); + } else { + panic!("Node types cannot be null"); + } + } + for layer in inner_graph.layers() { let edge_props_fields = layer.edges_data_type(); @@ -204,15 +212,17 @@ impl DiskGraph { edge_meta.layer_meta().get_or_create_id(l_name); } - if let Some(props) = inner_graph.node_properties().as_ref() { - let node_const_props_fields = props.const_props.prop_dtypes(); + if let Some(props) = &inner_graph.node_properties().const_props { + let node_const_props_fields = props.prop_dtypes(); for field in node_const_props_fields { node_meta .resolve_prop_id(&field.name, field.data_type().into(), true) .expect("Initial resolve should not fail"); } + } - let node_temporal_props_fields = props.temporal_props.prop_dtypes(); + if let Some(props) = &inner_graph.node_properties().temporal_props { + let node_temporal_props_fields = props.prop_dtypes(); for field in node_temporal_props_fields { node_meta .resolve_prop_id(&field.name, field.data_type().into(), false) @@ -273,6 +283,7 @@ impl DiskGraph { read_chunk_size: Option, concurrent_files: Option, num_threads: usize, + node_type_col: Option<&str>, ) -> Result { let layered_edge_list: Vec> = layer_parquet_cols .iter() @@ -290,7 +301,7 @@ impl DiskGraph { ) .collect::>(); - let t_graph = TemporalGraph::from_edge_lists( + let t_graph = TemporalGraph::from_parquets( num_threads, chunk_size, t_props_chunk_size, @@ -299,6 +310,7 @@ impl DiskGraph { graph_dir.as_ref(), layered_edge_list, node_properties.as_ref().map(|p| p.as_ref()), + node_type_col, )?; Ok(Self::new(t_graph, graph_dir.as_ref().to_path_buf())) } @@ -471,20 +483,26 @@ impl InternalPropertyAdditionOps for DiskGraph { #[cfg(test)] mod test { - use std::{cmp::Reverse, iter::once, path::Path}; + use std::{ + cmp::Reverse, + iter::once, + path::{Path, PathBuf}, + }; use itertools::{chain, Itertools}; - use pometry_storage::graph::TemporalGraph; + use pometry_storage::{graph::TemporalGraph, properties::Properties}; use proptest::{prelude::*, sample::size_range}; use rayon::prelude::*; use tempfile::TempDir; use crate::{ - algorithms::components::weakly_connected_components, db::api::view::StaticGraphViewOps, - disk_graph::Time, prelude::*, + algorithms::components::weakly_connected_components, + db::api::view::{internal::TimeSemantics, StaticGraphViewOps}, + disk_graph::Time, + prelude::*, }; - use super::DiskGraph; + use super::{DiskGraph, ParquetLayerCols}; fn make_simple_graph(graph_dir: impl AsRef, edges: &[(u64, u64, i64, f64)]) -> DiskGraph { DiskGraph::make_simple_graph(graph_dir, edges, 1000, 1000) @@ -770,7 +788,8 @@ mod test { mem_graph.add_edge(0, 0, 1, [("test", 0u64)], None).unwrap(); let test_dir = TempDir::new().unwrap(); let disk_graph = - TemporalGraph::from_graph(&mem_graph, test_dir.path(), || Ok(None)).unwrap(); + TemporalGraph::from_graph(&mem_graph, test_dir.path(), || Ok(Properties::default())) + .unwrap(); assert_eq!(disk_graph.num_nodes(), 2); assert_eq!(disk_graph.num_edges(0), 1); } @@ -840,4 +859,242 @@ mod test { "test" ); } + + #[test] + fn test_type_filter_disk_graph_loaded_from_parquets() { + let tmp_dir = tempfile::tempdir().unwrap(); + let graph_dir = tmp_dir.path(); + let chunk_size = 268_435_456; + let num_threads = 4; + let t_props_chunk_size = chunk_size / 8; + let read_chunk_size = 4_000_000; + let concurrent_files = 1; + + let netflow_layer_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .map(|p| p.join("pometry-storage-private/resources/test/netflow.parquet")) + .unwrap(); + + let v1_layer_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .map(|p| p.join("pometry-storage-private/resources/test/wls.parquet")) + .unwrap(); + + let node_properties = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .map(|p| p.join("pometry-storage-private/resources/test/node_types.parquet")) + .unwrap(); + + let layer_parquet_cols = vec![ + ParquetLayerCols { + parquet_dir: netflow_layer_path.to_str().unwrap(), + layer: "netflow", + src_col: "source", + dst_col: "destination", + time_col: "time", + }, + ParquetLayerCols { + parquet_dir: v1_layer_path.to_str().unwrap(), + layer: "wls", + src_col: "src", + dst_col: "dst", + time_col: "Time", + }, + ]; + + let node_type_col = Some("node_type"); + + let g = DiskGraph::load_from_parquets( + graph_dir, + layer_parquet_cols, + Some(&node_properties), + chunk_size, + t_props_chunk_size, + Some(read_chunk_size as usize), + Some(concurrent_files), + num_threads, + node_type_col, + ) + .unwrap(); + + println!("node types = {:?}", g.nodes().node_type().collect_vec()); + + assert_eq!( + g.nodes().type_filter(&vec!["A"]).name().collect_vec(), + vec!["Comp710070", "Comp844043"] + ); + + assert_eq!( + g.nodes() + .type_filter(&Vec::::new()) + .name() + .collect_vec(), + Vec::::new() + ); + + assert_eq!( + g.nodes().type_filter(&vec![""]).name().collect_vec(), + Vec::::new() + ); + + assert_eq!( + g.nodes() + .type_filter(&vec!["A"]) + .neighbours() + .name() + .map(|n| { n.collect::>() }) + .collect_vec(), + vec![vec!["Comp844043"], vec!["Comp710070"]] + ); + + assert_eq!( + g.nodes() + .type_filter(&vec!["A", "B"]) + .neighbours() + .name() + .map(|n| { n.collect::>() }) + .collect_vec(), + vec![vec!["Comp244393"], vec!["Comp844043"], vec!["Comp710070"]] + ); + + assert_eq!( + g.nodes() + .type_filter(&vec!["C"]) + .neighbours() + .name() + .map(|n| { n.collect::>() }) + .collect_vec(), + Vec::>::new() + ); + + assert_eq!( + g.nodes() + .type_filter(&vec!["A"]) + .neighbours() + .type_filter(&vec!["A"]) + .name() + .map(|n| { n.collect::>() }) + .collect_vec(), + vec![vec!["Comp844043"], vec!["Comp710070"]] + ); + + assert_eq!( + g.nodes() + .type_filter(&vec!["A"]) + .neighbours() + .type_filter(&Vec::<&str>::new()) + .name() + .map(|n| { n.collect::>() }) + .collect_vec(), + vec![vec![], Vec::<&str>::new()] + ); + + let w = g.window(6415659, 7387801); + + assert_eq!( + w.nodes().type_filter(&vec!["A"]).name().collect_vec(), + vec!["Comp710070", "Comp844043"] + ); + + assert_eq!( + w.nodes() + .type_filter(&Vec::::new()) + .name() + .collect_vec(), + Vec::::new() + ); + + assert_eq!( + w.nodes().type_filter(&vec![""]).name().collect_vec(), + Vec::::new() + ); + + let l = g.layers(["netflow"]).unwrap(); + + assert_eq!( + l.nodes().type_filter(&vec!["A"]).name().collect_vec(), + vec!["Comp710070", "Comp844043"] + ); + + assert_eq!( + l.nodes() + .type_filter(&Vec::::new()) + .name() + .collect_vec(), + Vec::::new() + ); + + assert_eq!( + l.nodes().type_filter(&vec![""]).name().collect_vec(), + Vec::::new() + ); + } + + #[test] + fn test_type_filter_disk_graph_created_from_in_memory_graph() { + let g = Graph::new(); + g.add_node(1, 1, NO_PROPS, Some("a")).unwrap(); + g.add_node(1, 2, NO_PROPS, Some("b")).unwrap(); + g.add_node(1, 3, NO_PROPS, Some("b")).unwrap(); + g.add_node(1, 4, NO_PROPS, Some("a")).unwrap(); + g.add_node(1, 5, NO_PROPS, Some("c")).unwrap(); + g.add_node(1, 6, NO_PROPS, Some("e")).unwrap(); + g.add_node(1, 7, NO_PROPS, None).unwrap(); + g.add_node(1, 8, NO_PROPS, None).unwrap(); + g.add_node(1, 9, NO_PROPS, None).unwrap(); + g.add_edge(2, 1, 2, NO_PROPS, Some("a")).unwrap(); + g.add_edge(2, 3, 2, NO_PROPS, Some("a")).unwrap(); + g.add_edge(2, 2, 4, NO_PROPS, Some("a")).unwrap(); + g.add_edge(2, 4, 5, NO_PROPS, Some("a")).unwrap(); + g.add_edge(2, 4, 5, NO_PROPS, Some("a")).unwrap(); + g.add_edge(2, 5, 6, NO_PROPS, Some("a")).unwrap(); + g.add_edge(2, 3, 6, NO_PROPS, Some("a")).unwrap(); + + let tmp_dir = tempfile::tempdir().unwrap(); + let g = DiskGraph::from_graph(&g, tmp_dir.path()).unwrap(); + + assert_eq!( + g.nodes() + .type_filter(&vec!["a", "b", "c", "e"]) + .name() + .collect_vec(), + vec!["1", "2", "3", "4", "5", "6"] + ); + + assert_eq!( + g.nodes() + .type_filter(&Vec::::new()) + .name() + .collect_vec(), + Vec::::new() + ); + + assert_eq!( + g.nodes().type_filter(&vec![""]).name().collect_vec(), + vec!["7", "8", "9"] + ); + + let g = DiskGraph::load_from_dir(tmp_dir.path()).unwrap(); + + assert_eq!( + g.nodes() + .type_filter(&vec!["a", "b", "c", "e"]) + .name() + .collect_vec(), + vec!["1", "2", "3", "4", "5", "6"] + ); + + assert_eq!( + g.nodes() + .type_filter(&Vec::::new()) + .name() + .collect_vec(), + Vec::::new() + ); + + assert_eq!( + g.nodes().type_filter(&vec![""]).name().collect_vec(), + vec!["7", "8", "9"] + ); + } } diff --git a/raphtory/src/disk_graph/graph_impl/prop_conversion.rs b/raphtory/src/disk_graph/graph_impl/prop_conversion.rs index 40b02c0166..1195afa5a9 100644 --- a/raphtory/src/disk_graph/graph_impl/prop_conversion.rs +++ b/raphtory/src/disk_graph/graph_impl/prop_conversion.rs @@ -20,14 +20,14 @@ use std::path::Path; pub fn make_node_properties_from_graph( graph: &Graph, graph_dir: impl AsRef, -) -> Result>, RAError> { +) -> Result, RAError> { let graph_dir = graph_dir.as_ref(); let n = graph.unfiltered_num_nodes(); let temporal_meta = graph.node_meta().temporal_prop_meta(); let constant_meta = graph.node_meta().const_prop_meta(); if temporal_meta.is_empty() && constant_meta.is_empty() { - return Ok(None); + return Ok(Properties::default()); } let nodes = graph.0.inner().storage.nodes.read_lock(); @@ -91,7 +91,7 @@ pub fn make_node_properties_from_graph( (Field::new(prop_key, dtype, true), col) }) }); - let props = builder.build().map(Some)?; + let props = builder.build()?; Ok(props) } diff --git a/raphtory/src/disk_graph/graph_impl/temporal_properties_ops.rs b/raphtory/src/disk_graph/graph_impl/temporal_properties_ops.rs index 45c830b86e..30b0e85816 100644 --- a/raphtory/src/disk_graph/graph_impl/temporal_properties_ops.rs +++ b/raphtory/src/disk_graph/graph_impl/temporal_properties_ops.rs @@ -1,8 +1,8 @@ use crate::{ - core::ArcStr, db::api::properties::internal::{TemporalPropertiesOps, TemporalPropertyViewOps}, prelude::Prop, }; +use raphtory_api::core::storage::arc_str::ArcStr; use super::DiskGraph; diff --git a/raphtory/src/disk_graph/graph_impl/time_semantics.rs b/raphtory/src/disk_graph/graph_impl/time_semantics.rs index f93e6072eb..2424193917 100644 --- a/raphtory/src/disk_graph/graph_impl/time_semantics.rs +++ b/raphtory/src/disk_graph/graph_impl/time_semantics.rs @@ -118,10 +118,10 @@ impl TimeSemantics for DiskGraph { || self .inner .node_properties() + .temporal_props .as_ref() .map(|props| { props - .temporal_props .timestamps::(v.vid()) .active_t(w.clone()) }) @@ -451,26 +451,26 @@ impl TimeSemantics for DiskGraph { } fn has_temporal_node_prop(&self, v: VID, prop_id: usize) -> bool { - match &self.inner.node_properties() { + match &self.inner.node_properties().temporal_props { None => false, - Some(props) => props.temporal_props.has_prop(v, prop_id), + Some(props) => props.has_prop(v, prop_id), } } #[doc = " and the second element is the property value."] fn temporal_node_prop_vec(&self, v: VID, id: usize) -> Vec<(i64, Prop)> { - match &self.inner.node_properties() { + match &self.inner.node_properties().temporal_props { None => { vec![] } - Some(props) => props.temporal_props.prop(v, id).iter_t().collect(), + Some(props) => props.prop(v, id).iter_t().collect(), } } fn has_temporal_node_prop_window(&self, v: VID, prop_id: usize, w: Range) -> bool { - match &self.inner.node_properties() { + match &self.inner.node_properties().temporal_props { None => false, - Some(props) => props.temporal_props.has_prop_window(v, prop_id, w), + Some(props) => props.has_prop_window(v, prop_id, w), } } @@ -481,13 +481,9 @@ impl TimeSemantics for DiskGraph { start: i64, end: i64, ) -> Vec<(i64, Prop)> { - match &self.inner.node_properties() { + match &self.inner.node_properties().temporal_props { None => vec![], - Some(props) => props - .temporal_props - .prop(v, id) - .iter_window_t(start..end) - .collect(), + Some(props) => props.prop(v, id).iter_window_t(start..end).collect(), } } diff --git a/raphtory/src/disk_graph/graph_impl/tprops.rs b/raphtory/src/disk_graph/graph_impl/tprops.rs index c9477298fa..ab9d0dcb4f 100644 --- a/raphtory/src/disk_graph/graph_impl/tprops.rs +++ b/raphtory/src/disk_graph/graph_impl/tprops.rs @@ -21,7 +21,7 @@ use std::{iter, ops::Range}; impl<'a, T: NativeType + Into> TPropOps<'a> for TPropColumn<'a, ChunkedPrimitiveCol<'a, T>, TimeIndexEntry> { - fn last_before(self, t: i64) -> Option<(TimeIndexEntry, Prop)> { + fn last_before(&self, t: i64) -> Option<(TimeIndexEntry, Prop)> { let (props, timestamps) = self.into_inner(); let (t, t_index) = timestamps.last_before(t)?; let v = props.get(t_index)?; @@ -68,7 +68,7 @@ impl<'a, T: NativeType + Into> TPropOps<'a> } impl<'a, I: Offset> TPropOps<'a> for TPropColumn<'a, StringCol<'a, I>, TimeIndexEntry> { - fn last_before(self, t: i64) -> Option<(TimeIndexEntry, Prop)> { + fn last_before(&self, t: i64) -> Option<(TimeIndexEntry, Prop)> { let (props, timestamps) = self.into_inner(); let (t, t_index) = timestamps.last_before(t)?; let v = props.get(t_index)?; @@ -152,7 +152,7 @@ pub fn read_tprop_column(id: usize, field: Field, edge: Edge) -> Option TPropOps<'a> for EmptyTProp { - fn last_before(self, _t: i64) -> Option<(TimeIndexEntry, Prop)> { + fn last_before(&self, _t: i64) -> Option<(TimeIndexEntry, Prop)> { None } @@ -202,7 +202,7 @@ macro_rules! for_all { } impl<'a> TPropOps<'a> for DiskTProp<'a, TimeIndexEntry> { - fn last_before(self, t: i64) -> Option<(TimeIndexEntry, Prop)> { + fn last_before(&self, t: i64) -> Option<(TimeIndexEntry, Prop)> { for_all!(self, v => v.last_before(t)) } diff --git a/raphtory/src/disk_graph/storage_interface/node.rs b/raphtory/src/disk_graph/storage_interface/node.rs index 3afcda05d3..d083f2e73d 100644 --- a/raphtory/src/disk_graph/storage_interface/node.rs +++ b/raphtory/src/disk_graph/storage_interface/node.rs @@ -9,13 +9,13 @@ use crate::{ tprop_storage_ops::TPropOps, variants::{direction_variants::DirectionVariants, layer_variants::LayerVariants}, }, - view::internal::NodeAdditions, + view::internal::{CoreGraphOps, NodeAdditions}, }, }; use itertools::Itertools; use pometry_storage::{graph::TemporalGraph, timestamps::TimeStamps, GidRef}; use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; -use std::{iter, sync::Arc}; +use std::{borrow::Cow, iter, sync::Arc}; #[derive(Copy, Clone, Debug)] pub struct DiskNode<'a> { @@ -115,7 +115,7 @@ impl<'a> DiskNode<'a> { .merge_by(self.out_edges(layers), |e1, e2| e1.remote() <= e2.remote()) } - pub fn additions_for_layers(self, layer_ids: &LayerIds) -> NodeAdditions<'a> { + pub fn additions_for_layers(&self, layer_ids: &LayerIds) -> NodeAdditions<'a> { let mut additions = match layer_ids { LayerIds::None => Vec::with_capacity(1), LayerIds::All => { @@ -154,8 +154,9 @@ impl<'a> DiskNode<'a> { additions } }; - if let Some(props) = self.graph.node_properties() { - let timestamps = props.temporal_props.timestamps::(self.vid); + + if let Some(props) = &self.graph.node_properties().temporal_props { + let timestamps = props.timestamps::(self.vid); if timestamps.len() > 0 { let ts = timestamps.times(); additions.push(ts); @@ -218,8 +219,9 @@ impl<'a> NodeStorageOps<'a> for DiskNode<'a> { fn tprop(self, prop_id: usize) -> impl TPropOps<'a> { self.graph .node_properties() - .unwrap() .temporal_props + .as_ref() + .unwrap() .prop(self.vid, prop_id) } @@ -236,7 +238,7 @@ impl<'a> NodeStorageOps<'a> for DiskNode<'a> { } fn node_type_id(self) -> usize { - 0 + self.graph.node_type_id(self.vid) } fn vid(self) -> VID { @@ -251,11 +253,11 @@ impl<'a> NodeStorageOps<'a> for DiskNode<'a> { } } - fn name(self) -> Option<&'a str> { + fn name(self) -> Option> { match self.graph.node_gid(self.vid).unwrap() { GidRef::U64(_) => None, GidRef::I64(_) => None, - GidRef::Str(v) => Some(v), + GidRef::Str(v) => Some(Cow::from(v)), } } @@ -494,7 +496,7 @@ impl<'a> NodeStorageOps<'a> for &'a DiskOwnedNode { self.as_ref().id() } - fn name(self) -> Option<&'a str> { + fn name(self) -> Option> { self.as_ref().name() } diff --git a/raphtory/src/python/graph/disk_graph.rs b/raphtory/src/python/graph/disk_graph.rs index 826076477f..8ae7c94716 100644 --- a/raphtory/src/python/graph/disk_graph.rs +++ b/raphtory/src/python/graph/disk_graph.rs @@ -195,7 +195,7 @@ impl PyDiskGraph { } #[staticmethod] - #[pyo3(signature = (graph_dir, layer_parquet_cols, node_properties, chunk_size, t_props_chunk_size, read_chunk_size, concurrent_files, num_threads))] + #[pyo3(signature = (graph_dir, layer_parquet_cols, node_properties, chunk_size, t_props_chunk_size, read_chunk_size, concurrent_files, num_threads, node_type_col))] fn load_from_parquets( graph_dir: &str, layer_parquet_cols: ParquetLayerColsList, @@ -205,6 +205,7 @@ impl PyDiskGraph { read_chunk_size: Option, concurrent_files: Option, num_threads: usize, + node_type_col: Option<&str>, ) -> Result { let graph = Self::from_parquets( graph_dir, @@ -215,6 +216,7 @@ impl PyDiskGraph { read_chunk_size, concurrent_files, num_threads, + node_type_col, ); graph.map_err(|e| { GraphError::LoadFailure(format!("Failed to load graph {e:?} from parquet files")) @@ -293,6 +295,7 @@ impl PyDiskGraph { read_chunk_size: Option, concurrent_files: Option, num_threads: usize, + node_type_col: Option<&str>, ) -> Result { DiskGraph::load_from_parquets( graph_dir, @@ -303,6 +306,7 @@ impl PyDiskGraph { read_chunk_size, concurrent_files, num_threads, + node_type_col, ) .map_err(|err| GraphError::LoadFailure(format!("Failed to load graph {err:?}"))) } diff --git a/raphtory/src/python/graph/edge.rs b/raphtory/src/python/graph/edge.rs index 2cb20ff73a..e562bfedb2 100644 --- a/raphtory/src/python/graph/edge.rs +++ b/raphtory/src/python/graph/edge.rs @@ -5,7 +5,7 @@ //! edge as it existed at a particular point in time, or as it existed over a particular time range. //! use crate::{ - core::{utils::errors::GraphError, ArcStr, Direction}, + core::{utils::errors::GraphError, Direction}, db::{ api::{ properties::Properties, @@ -22,6 +22,7 @@ use crate::{ use chrono::{DateTime, Utc}; use itertools::Itertools; use pyo3::{prelude::*, pyclass::CompareOp}; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{ collections::{hash_map::DefaultHasher, HashMap}, hash::{Hash, Hasher}, diff --git a/raphtory/src/python/graph/edges.rs b/raphtory/src/python/graph/edges.rs index 8dac0b686b..c3b9666f82 100644 --- a/raphtory/src/python/graph/edges.rs +++ b/raphtory/src/python/graph/edges.rs @@ -1,5 +1,5 @@ use crate::{ - core::{utils::errors::GraphError, ArcStr, Prop}, + core::{utils::errors::GraphError, Prop}, db::{ api::view::{ internal::CoreGraphOps, BoxedIter, DynamicGraph, IntoDynBoxed, IntoDynamic, @@ -34,6 +34,7 @@ use pyo3::{ prelude::PyModule, pyclass, pymethods, types::PyDict, IntoPy, PyObject, PyResult, Python, ToPyObject, }; +use raphtory_api::core::storage::arc_str::ArcStr; use rayon::{iter::IntoParallelIterator, prelude::*}; use std::collections::HashMap; diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 4347b87ad8..5312fc9f10 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -6,7 +6,7 @@ use super::utils; use crate::{ algorithms::components::LargestConnectedComponent, - core::{entities::nodes::node_ref::NodeRef, utils::errors::GraphError, ArcStr}, + core::{entities::nodes::node_ref::NodeRef, utils::errors::GraphError}, db::{ api::view::internal::{CoreGraphOps, DynamicGraph, IntoDynamic, MaterializedGraph}, graph::{edge::EdgeView, node::NodeView, views::node_subgraph::NodeSubgraph}, @@ -21,6 +21,7 @@ use crate::{ }, }; use pyo3::{prelude::*, types::PyBytes}; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{ collections::HashMap, fmt::{Debug, Formatter}, diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index a53c0cebee..2a565f7c21 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -5,8 +5,16 @@ //! create windows, and query the graph with a variety of algorithms. //! It is a wrapper around a set of shards, which are the actual graph data structures. //! In Python, this class wraps around the rust graph. +use super::{ + graph::PyGraph, + pandas::{ + dataframe::{process_pandas_py_df, GraphLoadException}, + loaders::load_edges_deletions_from_df, + }, + utils, +}; use crate::{ - core::{entities::nodes::node_ref::NodeRef, utils::errors::GraphError, ArcStr, Prop}, + core::{entities::nodes::node_ref::NodeRef, utils::errors::GraphError, Prop}, db::{ api::{ mutation::{AdditionOps, PropertyAdditionOps}, @@ -24,21 +32,13 @@ use pyo3::{ prelude::*, types::{IntoPyDict, PyBytes}, }; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{ collections::HashMap, fmt::{Debug, Formatter}, path::{Path, PathBuf}, }; -use super::{ - graph::PyGraph, - pandas::{ - dataframe::{process_pandas_py_df, GraphLoadException}, - loaders::load_edges_deletions_from_df, - }, - utils, -}; - /// A temporal graph that allows edges and nodes to be deleted. #[derive(Clone)] #[pyclass(name = "PersistentGraph", extends = PyGraphView)] diff --git a/raphtory/src/python/graph/node.rs b/raphtory/src/python/graph/node.rs index 77eb85806e..91750179b5 100644 --- a/raphtory/src/python/graph/node.rs +++ b/raphtory/src/python/graph/node.rs @@ -5,7 +5,7 @@ use crate::{ core::{ entities::nodes::node_ref::{AsNodeRef, NodeRef}, utils::errors::GraphError, - ArcStr, Prop, + Prop, }, db::{ api::{ @@ -45,6 +45,7 @@ use python::{ types::repr::{iterator_repr, Repr}, utils::export::{create_row, extract_properties, get_column_names_from_props}, }; +use raphtory_api::core::storage::arc_str::ArcStr; use rayon::{iter::IntoParallelIterator, prelude::*}; use std::collections::HashMap; diff --git a/raphtory/src/python/graph/pandas/mod.rs b/raphtory/src/python/graph/pandas/mod.rs index a000426a75..41b0394b98 100644 --- a/raphtory/src/python/graph/pandas/mod.rs +++ b/raphtory/src/python/graph/pandas/mod.rs @@ -5,7 +5,6 @@ mod prop_handler; #[cfg(test)] mod test { use crate::{ - core::ArcStr, prelude::*, python::graph::pandas::{ dataframe::PretendDF, @@ -13,6 +12,7 @@ mod test { }, }; use polars_arrow::array::{PrimitiveArray, Utf8Array}; + use raphtory_api::core::storage::arc_str::ArcStr; #[test] fn load_edges_from_pretend_df() { diff --git a/raphtory/src/python/graph/properties/constant_props.rs b/raphtory/src/python/graph/properties/constant_props.rs index 3d6ca20484..02aa41df84 100644 --- a/raphtory/src/python/graph/properties/constant_props.rs +++ b/raphtory/src/python/graph/properties/constant_props.rs @@ -1,5 +1,5 @@ use crate::{ - core::{ArcStr, Prop}, + core::Prop, db::api::properties::{ dyn_props::DynConstProperties, internal::PropertiesOps, ConstProperties, }, @@ -17,6 +17,7 @@ use pyo3::{ exceptions::{PyKeyError, PyTypeError}, prelude::*, }; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{collections::HashMap, sync::Arc}; impl IntoPy for ConstProperties

{ diff --git a/raphtory/src/python/graph/properties/props.rs b/raphtory/src/python/graph/properties/props.rs index d007a6d253..dfd40a8ee9 100644 --- a/raphtory/src/python/graph/properties/props.rs +++ b/raphtory/src/python/graph/properties/props.rs @@ -1,5 +1,5 @@ use crate::{ - core::{ArcStr, Prop}, + core::Prop, db::api::{ properties::{ dyn_props::{DynConstProperties, DynProperties, DynTemporalProperties}, @@ -25,6 +25,7 @@ use pyo3::{ exceptions::{PyKeyError, PyTypeError}, prelude::*, }; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{collections::HashMap, ops::Deref, sync::Arc}; #[derive(PartialEq, Clone)] diff --git a/raphtory/src/python/graph/properties/temporal_props.rs b/raphtory/src/python/graph/properties/temporal_props.rs index 99247462d3..7f2c48bc10 100644 --- a/raphtory/src/python/graph/properties/temporal_props.rs +++ b/raphtory/src/python/graph/properties/temporal_props.rs @@ -1,5 +1,5 @@ use crate::{ - core::{utils::time::IntoTime, ArcStr, Prop}, + core::{utils::time::IntoTime, Prop}, db::api::{ properties::{ dyn_props::{DynTemporalProperties, DynTemporalProperty}, @@ -29,6 +29,7 @@ use pyo3::{ exceptions::{PyKeyError, PyTypeError}, prelude::*, }; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{collections::HashMap, ops::Deref, sync::Arc}; impl> From

for PyTemporalProperties { diff --git a/raphtory/src/python/graph/views/graph_view.rs b/raphtory/src/python/graph/views/graph_view.rs index ca71101e7d..70d4e7f7bd 100644 --- a/raphtory/src/python/graph/views/graph_view.rs +++ b/raphtory/src/python/graph/views/graph_view.rs @@ -4,7 +4,7 @@ use rayon::prelude::*; use std::collections::HashMap; use crate::{ - core::{entities::nodes::node_ref::NodeRef, utils::errors::GraphError, ArcStr}, + core::{entities::nodes::node_ref::NodeRef, utils::errors::GraphError}, db::{ api::{ properties::Properties, @@ -34,6 +34,7 @@ use crate::{ }; use chrono::prelude::*; use pyo3::{prelude::*, types::PyBytes}; +use raphtory_api::core::storage::arc_str::ArcStr; impl IntoPy for MaterializedGraph { fn into_py(self, py: Python<'_>) -> PyObject { diff --git a/raphtory/src/python/graph/views/graph_view_modules/export.rs b/raphtory/src/python/graph/views/graph_view_modules/export.rs index 8b91ad7a9f..3da3d85842 100644 --- a/raphtory/src/python/graph/views/graph_view_modules/export.rs +++ b/raphtory/src/python/graph/views/graph_view_modules/export.rs @@ -1,5 +1,5 @@ use crate::{ - core::{ArcStr, Prop}, + core::Prop, prelude::{EdgeViewOps, GraphViewOps, NodeViewOps, PropUnwrap}, python::graph::views::graph_view::PyGraphView, }; @@ -9,6 +9,7 @@ use pyo3::{ types::{PyDict, PyList, PyTuple}, IntoPy, PyObject, PyResult, Python, ToPyObject, }; +use raphtory_api::core::storage::arc_str::ArcStr; use std::collections::HashMap; #[pymethods] diff --git a/raphtory/src/python/types/macros/trait_impl/node_state.rs b/raphtory/src/python/types/macros/trait_impl/node_state.rs index b0f4a7881a..5eb24d328f 100644 --- a/raphtory/src/python/types/macros/trait_impl/node_state.rs +++ b/raphtory/src/python/types/macros/trait_impl/node_state.rs @@ -1,6 +1,6 @@ #![allow(non_local_definitions)] use crate::{ - core::{entities::nodes::node_ref::NodeRef, ArcStr}, + core::entities::nodes::node_ref::NodeRef, db::{ api::{ state::{LazyNodeState, NodeState, NodeStateOps, OrderedNodeStateOps}, @@ -17,6 +17,7 @@ use pyo3::{ prelude::*, types::PyNotImplemented, }; +use raphtory_api::core::storage::arc_str::ArcStr; use std::sync::Arc; macro_rules! impl_node_state_ops { diff --git a/raphtory/src/python/types/mod.rs b/raphtory/src/python/types/mod.rs index 160503823a..713089c41a 100644 --- a/raphtory/src/python/types/mod.rs +++ b/raphtory/src/python/types/mod.rs @@ -1,7 +1,6 @@ #[macro_use] pub mod macros; -mod arcstr; pub mod iterable; pub mod repr; pub mod wrappers; diff --git a/raphtory/src/python/types/repr.rs b/raphtory/src/python/types/repr.rs index 1a731f4a71..9f2accbe58 100644 --- a/raphtory/src/python/types/repr.rs +++ b/raphtory/src/python/types/repr.rs @@ -1,10 +1,11 @@ use crate::{ - core::{storage::locked_view::LockedView, ArcStr}, + core::storage::locked_view::LockedView, db::api::state::{LazyNodeState, NodeState}, prelude::{GraphViewOps, NodeStateOps, NodeViewOps}, }; use chrono::{DateTime, NaiveDateTime, TimeZone}; use itertools::Itertools; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{collections::HashMap, ops::Deref}; pub fn iterator_repr, V: Repr>(iter: I) -> String { diff --git a/raphtory/src/python/types/wrappers/iterables.rs b/raphtory/src/python/types/wrappers/iterables.rs index 4520155db1..7683c09946 100644 --- a/raphtory/src/python/types/wrappers/iterables.rs +++ b/raphtory/src/python/types/wrappers/iterables.rs @@ -1,7 +1,8 @@ -use crate::{core::ArcStr, db::api::view::BoxedIter, prelude::Prop, python::types::repr::Repr}; +use crate::{db::api::view::BoxedIter, prelude::Prop, python::types::repr::Repr}; use chrono::{DateTime, Utc}; use num::cast::AsPrimitive; use pyo3::prelude::*; +use raphtory_api::core::storage::arc_str::ArcStr; use std::{i64, iter::Sum}; pub(crate) trait MeanExt: Iterator diff --git a/raphtory/src/python/utils/export.rs b/raphtory/src/python/utils/export.rs index 3b99624303..02e74dc820 100644 --- a/raphtory/src/python/utils/export.rs +++ b/raphtory/src/python/utils/export.rs @@ -1,8 +1,9 @@ use crate::{ - core::{entities::properties::props::Meta, storage::timeindex::AsTime, ArcStr, Prop}, + core::{entities::properties::props::Meta, storage::timeindex::AsTime, Prop}, db::api::properties::{internal::PropertiesOps, Properties}, }; use itertools::Itertools; +use raphtory_api::core::storage::arc_str::ArcStr; use rayon::{iter::IntoParallelRefIterator, prelude::*}; use std::{ collections::{HashMap, HashSet}, diff --git a/raphtory/src/search/mod.rs b/raphtory/src/search/mod.rs index 562849ed12..c1160d84c8 100644 --- a/raphtory/src/search/mod.rs +++ b/raphtory/src/search/mod.rs @@ -4,6 +4,7 @@ pub mod into_indexed; use std::{collections::HashSet, ops::Deref, path::Path, sync::Arc}; +use raphtory_api::core::storage::arc_str::{ArcStr, OptionAsStr}; use rayon::{prelude::ParallelIterator, slice::ParallelSlice}; use tantivy::{ collector::TopDocs, @@ -16,7 +17,7 @@ use crate::{ entities::{nodes::node_ref::NodeRef, EID, ELID, VID}, storage::timeindex::{AsTime, TimeIndexEntry}, utils::errors::GraphError, - ArcStr, OptionAsStr, PropType, + PropType, }, db::{ api::{