From 4672baab5d29d50283f5439b9140c5d9c477a1f4 Mon Sep 17 00:00:00 2001 From: ljeub-pometry <97447091+ljeub-pometry@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:25:36 +0100 Subject: [PATCH] improve subgraph count_nodes performance (#1869) * improve subgraph count_nodes performance * clean up warnings * try to optimise index for filtering --- raphtory-graphql/src/data.rs | 13 ++---- raphtory-graphql/src/model/graph/nodes.rs | 6 +-- raphtory/src/db/api/mutation/import_ops.rs | 17 +++----- raphtory/src/db/api/state/lazy_node_state.rs | 7 ++- raphtory/src/db/api/state/node_state.rs | 45 ++++++++------------ raphtory/src/db/api/state/node_state_ops.rs | 11 +++-- raphtory/src/db/api/view/graph.rs | 3 +- raphtory/src/db/graph/graph.rs | 29 +++++++------ raphtory/src/db/graph/views/node_subgraph.rs | 39 +++++++++++------ raphtory/src/db/graph/views/window_graph.rs | 2 +- 10 files changed, 86 insertions(+), 86 deletions(-) diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index 3295b38a8..b62fbc9d2 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -248,19 +248,14 @@ impl Data { #[cfg(test)] pub(crate) mod data_tests { + use super::ValidGraphFolder; use crate::{ config::app_config::{AppConfig, AppConfigBuilder}, data::Data, }; use itertools::Itertools; use raphtory::{core::utils::errors::GraphError, db::api::view::MaterializedGraph, prelude::*}; - use std::{ - collections::HashMap, - fs, - fs::File, - io, - path::{Path, PathBuf}, - }; + use std::{collections::HashMap, fs, fs::File, io, path::Path}; #[cfg(feature = "storage")] use raphtory::{ @@ -268,10 +263,10 @@ pub(crate) mod data_tests { disk_graph::DiskGraphStorage, }; #[cfg(feature = "storage")] + use std::path::PathBuf; + #[cfg(feature = "storage")] use std::{thread, time::Duration}; - use super::ValidGraphFolder; - #[cfg(feature = "storage")] fn copy_dir_recursive(source_dir: &Path, target_dir: &Path) -> Result<(), GraphError> { fs::create_dir_all(target_dir)?; diff --git a/raphtory-graphql/src/model/graph/nodes.rs b/raphtory-graphql/src/model/graph/nodes.rs index 3adcf29dc..4ee679f6d 100644 --- a/raphtory-graphql/src/model/graph/nodes.rs +++ b/raphtory-graphql/src/model/graph/nodes.rs @@ -1,9 +1,9 @@ -use crate::model::graph::{node::Node, property::GqlPropValue, FilterCondition, Operator}; -use dynamic_graphql::{Enum, InputObject, ResolvedObject, ResolvedObjectFields}; +use crate::model::graph::{node::Node, FilterCondition, Operator}; +use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; use raphtory::{ core::utils::errors::GraphError, db::{api::view::DynamicGraph, graph::nodes::Nodes}, - prelude::{GraphViewOps, *}, + prelude::*, }; #[derive(ResolvedObject)] diff --git a/raphtory/src/db/api/mutation/import_ops.rs b/raphtory/src/db/api/mutation/import_ops.rs index 5608e9710..59b3e249f 100644 --- a/raphtory/src/db/api/mutation/import_ops.rs +++ b/raphtory/src/db/api/mutation/import_ops.rs @@ -1,12 +1,7 @@ -use raphtory_api::core::entities::{GID, VID}; -use std::{borrow::Borrow, fmt::Debug}; - +use super::time_from_input; use crate::{ core::{ - entities::{ - nodes::node_ref::{AsNodeRef, NodeRef}, - LayerIds, - }, + entities::{nodes::node_ref::AsNodeRef, LayerIds}, utils::errors::{ GraphError, GraphError::{EdgeExistsError, NodeExistsError}, @@ -23,9 +18,11 @@ use crate::{ }, prelude::{AdditionOps, EdgeViewOps, GraphViewOps, NodeViewOps}, }; -use raphtory_api::core::storage::{arc_str::OptionAsStr, timeindex::AsTime}; - -use super::time_from_input; +use raphtory_api::core::{ + entities::GID, + storage::{arc_str::OptionAsStr, timeindex::AsTime}, +}; +use std::{borrow::Borrow, fmt::Debug}; pub trait ImportOps: StaticGraphViewOps diff --git a/raphtory/src/db/api/state/lazy_node_state.rs b/raphtory/src/db/api/state/lazy_node_state.rs index 65a1f0fdf..52d64d6f7 100644 --- a/raphtory/src/db/api/state/lazy_node_state.rs +++ b/raphtory/src/db/api/state/lazy_node_state.rs @@ -4,7 +4,7 @@ use crate::{ api::{ state::{ ops::{node::NodeOp, NodeOpFilter}, - NodeState, NodeStateOps, + Index, NodeState, NodeStateOps, }, view::{ internal::{NodeList, OneHopFilter}, @@ -97,7 +97,10 @@ impl<'graph, Op: NodeOp + 'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'gra self.nodes.base_graph.clone(), self.nodes.graph.clone(), values, - Some(keys.into()), + Some(Index::new( + keys, + self.nodes.base_graph.unfiltered_num_nodes(), + )), ) } else { let values = self.collect_vec(); diff --git a/raphtory/src/db/api/state/node_state.rs b/raphtory/src/db/api/state/node_state.rs index cb158adaf..e014c3fd6 100644 --- a/raphtory/src/db/api/state/node_state.rs +++ b/raphtory/src/db/api/state/node_state.rs @@ -7,32 +7,29 @@ use crate::{ prelude::GraphViewOps, }; use rayon::{iter::Either, prelude::*}; -use std::{ - borrow::Borrow, collections::HashMap, fmt::Debug, hash::Hash, marker::PhantomData, sync::Arc, -}; +use std::{fmt::Debug, hash::Hash, marker::PhantomData, sync::Arc}; #[derive(Clone, Debug)] pub struct Index { keys: Arc<[K]>, - map: Arc>, + map: Arc<[bool]>, } -impl From> for Index { - fn from(keys: Vec) -> Self { - let map = keys - .iter() - .copied() - .enumerate() - .map(|(i, k)| (k, i)) - .collect(); +impl> Index { + pub fn new(keys: impl Into>, n: usize) -> Self { + let keys = keys.into(); + let mut map = vec![false; n]; + for k in keys.iter().copied() { + map[k.into()] = true; + } Self { - keys: keys.into(), - map: Arc::new(map), + keys, + map: map.into(), } } } -impl Index { +impl + Send + Sync> Index { pub fn iter(&self) -> impl Iterator + '_ { self.keys.iter() } @@ -47,12 +44,8 @@ impl Index { (0..keys.len()).map(move |i| keys[i]) } - pub fn index(&self, key: &Q) -> Option - where - K: Borrow, - Q: Hash + Eq, - { - self.map.get(key).copied() + pub fn index(&self, key: &K) -> Option { + self.keys.binary_search(key).ok() } pub fn key(&self, index: usize) -> Option { @@ -63,12 +56,8 @@ impl Index { self.keys.len() } - pub fn contains(&self, key: &Q) -> bool - where - K: Borrow, - Q: Hash + Eq, - { - self.map.contains_key(key) + pub fn contains(&self, key: &K) -> bool { + self.map.get((*key).into()).copied().unwrap_or(false) } } @@ -246,7 +235,7 @@ impl< fn get_by_node(&self, node: N) -> Option> { let id = self.graph.internalise_node(node.as_node_ref())?; match &self.keys { - Some(index) => index.map.get(&id).map(|i| &self.values[*i]), + Some(index) => index.index(&id).map(|i| &self.values[i]), None => Some(&self.values[id.0]), } } diff --git a/raphtory/src/db/api/state/node_state_ops.rs b/raphtory/src/db/api/state/node_state_ops.rs index 732722beb..35e88da60 100644 --- a/raphtory/src/db/api/state/node_state_ops.rs +++ b/raphtory/src/db/api/state/node_state_ops.rs @@ -1,7 +1,10 @@ use crate::{ core::entities::nodes::node_ref::AsNodeRef, db::{ - api::state::{node_state::NodeState, node_state_ord_ops, Index}, + api::{ + state::{node_state::NodeState, node_state_ord_ops, Index}, + view::internal::CoreGraphOps, + }, graph::node::NodeView, }, prelude::{GraphViewOps, NodeViewOps}, @@ -110,7 +113,7 @@ pub trait NodeStateOps<'graph>: IntoIterator { self.base_graph().clone(), self.graph().clone(), values, - Some(Index::from(keys)), + Some(Index::new(keys, self.base_graph().unfiltered_num_nodes())), ) } } @@ -134,7 +137,7 @@ pub trait NodeStateOps<'graph>: IntoIterator { self.base_graph().clone(), self.graph().clone(), values, - Some(Index::from(keys)), + Some(Index::new(keys, self.base_graph().unfiltered_num_nodes())), ) } @@ -171,7 +174,7 @@ pub trait NodeStateOps<'graph>: IntoIterator { self.base_graph().clone(), self.graph().clone(), values, - Some(Index::from(keys)), + Some(Index::new(keys, self.base_graph().unfiltered_num_nodes())), ) } diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 65dbc2d75..3c4028e56 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -369,8 +369,7 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> .nodes() .into_iter() .filter(|node| !nodes_to_exclude.contains(&node.node)) - .map(|node| node.node) - .collect(); + .map(|node| node.node); NodeSubgraph::new(self.clone(), nodes_to_include) } diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index d7f74952f..7dca25036 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -449,7 +449,6 @@ mod db_tests { storage::arc_str::{ArcStr, OptionAsStr}, utils::logging::global_info_logger, }; - use serde_json::Value; use std::collections::{HashMap, HashSet}; #[cfg(feature = "proto")] use tempfile::TempDir; @@ -681,7 +680,7 @@ mod db_tests { assert_eq!(gg.edges().len(), 2); let gg = Graph::new(); - let res = gg.add_edge(1, "C", "D", NO_PROPS, None); + gg.add_edge(1, "C", "D", NO_PROPS, None).unwrap(); let res = gg.import_edges(vec![&e_a_b, &e_c_d], false); match res { Err(GraphError::EdgesExistError(duplicates)) => { @@ -823,11 +822,12 @@ mod db_tests { #[test] fn import_edge_as() { let g = Graph::new(); - let g_a = g.add_node(0, "A", NO_PROPS, None).unwrap(); + g.add_node(0, "A", NO_PROPS, None).unwrap(); let g_b = g .add_node(1, "B", vec![("temp".to_string(), Prop::Bool(true))], None) .unwrap(); - let _ = g_b.add_constant_properties(vec![("con".to_string(), Prop::I64(11))]); + g_b.add_constant_properties(vec![("con".to_string(), Prop::I64(11))]) + .unwrap(); let e_a_b = g .add_edge( 2, @@ -848,8 +848,8 @@ mod db_tests { .unwrap(); let gg = Graph::new(); - let e = gg.add_edge(1, "X", "Y", NO_PROPS, None).unwrap(); - let res = gg.import_edge_as(&e_b_c, ("Y", "Z"), false); + gg.add_edge(1, "X", "Y", NO_PROPS, None).unwrap(); + gg.import_edge_as(&e_b_c, ("Y", "Z"), false).unwrap(); let res = gg.import_edge_as(&e_a_b, ("X", "Y"), false); match res { Err(EdgeExistsError(src_id, dst_id)) => { @@ -883,7 +883,7 @@ mod db_tests { #[test] fn import_edge_as_merge() { let g = Graph::new(); - let g_a = g.add_node(0, "A", NO_PROPS, None).unwrap(); + g.add_node(0, "A", NO_PROPS, None).unwrap(); let g_b = g .add_node(1, "B", vec![("temp".to_string(), Prop::Bool(true))], None) .unwrap(); @@ -920,12 +920,13 @@ mod db_tests { #[test] fn import_edges_as() { let g = Graph::new(); - let g_a = g.add_node(0, "A", NO_PROPS, None).unwrap(); + g.add_node(0, "A", NO_PROPS, None).unwrap(); let g_b = g .add_node(1, "B", vec![("temp".to_string(), Prop::Bool(true))], None) .unwrap(); - let _ = g_b.add_constant_properties(vec![("con".to_string(), Prop::I64(11))]); - let g_c = g.add_node(0, "C", NO_PROPS, None).unwrap(); + g_b.add_constant_properties(vec![("con".to_string(), Prop::I64(11))]) + .unwrap(); + g.add_node(0, "C", NO_PROPS, None).unwrap(); let e_a_b = g .add_edge( 2, @@ -938,7 +939,7 @@ mod db_tests { let e_b_c = g.add_edge(2, "B", "C", NO_PROPS, None).unwrap(); let gg = Graph::new(); - let e = gg.add_edge(1, "Y", "Z", NO_PROPS, None).unwrap(); + gg.add_edge(1, "Y", "Z", NO_PROPS, None).unwrap(); let res = gg.import_edges_as([&e_a_b, &e_b_c], [("X", "Y"), ("Y", "Z")], false); match res { Err(GraphError::EdgesExistError(duplicates)) => { @@ -980,7 +981,7 @@ mod db_tests { #[test] fn import_edges_as_merge() { let g = Graph::new(); - let g_a = g.add_node(0, "A", NO_PROPS, None).unwrap(); + g.add_node(0, "A", NO_PROPS, None).unwrap(); let g_b = g .add_node(1, "B", vec![("temp".to_string(), Prop::Bool(true))], None) .unwrap(); @@ -996,8 +997,8 @@ mod db_tests { .unwrap(); let gg = Graph::new(); - let _ = gg.add_edge(3, "X", "Y", NO_PROPS, None).unwrap(); - let res = gg.import_edges_as([&e_a_b], [("X", "Y")], true).unwrap(); + gg.add_edge(3, "X", "Y", NO_PROPS, None).unwrap(); + gg.import_edges_as([&e_a_b], [("X", "Y")], true).unwrap(); let e_x_y = gg.edge("X", "Y").unwrap(); assert_eq!( diff --git a/raphtory/src/db/graph/views/node_subgraph.rs b/raphtory/src/db/graph/views/node_subgraph.rs index 8b58dba7d..b362845cf 100644 --- a/raphtory/src/db/graph/views/node_subgraph.rs +++ b/raphtory/src/db/graph/views/node_subgraph.rs @@ -2,27 +2,24 @@ use crate::{ core::entities::{LayerIds, VID}, db::api::{ properties::internal::InheritPropertiesOps, + state::Index, storage::graph::{ edges::{edge_ref::EdgeStorageRef, edge_storage_ops::EdgeStorageOps}, nodes::{node_ref::NodeStorageRef, node_storage_ops::NodeStorageOps}, }, view::internal::{ - Base, EdgeFilterOps, Immutable, InheritCoreOps, InheritLayerOps, InheritListOps, - InheritMaterialize, InheritTimeSemantics, NodeFilterOps, Static, + Base, EdgeFilterOps, EdgeList, Immutable, InheritCoreOps, InheritLayerOps, + InheritMaterialize, InheritTimeSemantics, ListOps, NodeFilterOps, NodeList, Static, }, }, prelude::GraphViewOps, }; -use rustc_hash::FxHashSet; -use std::{ - fmt::{Debug, Formatter}, - sync::Arc, -}; +use std::fmt::{Debug, Formatter}; #[derive(Clone)] pub struct NodeSubgraph { pub(crate) graph: G, - pub(crate) nodes: Arc>, + pub(crate) nodes: Index, } impl Static for NodeSubgraph {} @@ -53,14 +50,18 @@ impl<'graph, G: GraphViewOps<'graph>> InheritMaterialize for NodeSubgraph {} impl<'graph, G: GraphViewOps<'graph>> InheritLayerOps for NodeSubgraph {} impl<'graph, G: GraphViewOps<'graph>> NodeSubgraph { - pub fn new(graph: G, nodes: FxHashSet) -> Self { - let nodes = Arc::new(nodes); + pub fn new(graph: G, nodes: impl IntoIterator) -> Self { + let mut nodes: Vec<_> = if graph.nodes_filtered() { + nodes.into_iter().filter(|n| graph.has_node(*n)).collect() + } else { + nodes.into_iter().collect() + }; + nodes.sort(); + let nodes = Index::new(nodes, graph.unfiltered_num_nodes()); Self { graph, nodes } } } -// FIXME: this should use the list version ideally -impl<'graph, G: GraphViewOps<'graph>> InheritListOps for NodeSubgraph {} impl<'graph, G: GraphViewOps<'graph>> EdgeFilterOps for NodeSubgraph { #[inline] fn edges_filtered(&self) -> bool { @@ -91,7 +92,7 @@ impl<'graph, G: GraphViewOps<'graph>> NodeFilterOps for NodeSubgraph { } // FIXME: should use list version and make this true fn node_list_trusted(&self) -> bool { - false + true } #[inline] @@ -100,6 +101,18 @@ impl<'graph, G: GraphViewOps<'graph>> NodeFilterOps for NodeSubgraph { } } +impl<'graph, G: GraphViewOps<'graph>> ListOps for NodeSubgraph { + fn node_list(&self) -> NodeList { + NodeList::List { + nodes: self.nodes.clone(), + } + } + + fn edge_list(&self) -> EdgeList { + self.graph.edge_list() + } +} + #[cfg(test)] mod subgraph_tests { use crate::{ diff --git a/raphtory/src/db/graph/views/window_graph.rs b/raphtory/src/db/graph/views/window_graph.rs index 617d0961d..201f40b09 100644 --- a/raphtory/src/db/graph/views/window_graph.rs +++ b/raphtory/src/db/graph/views/window_graph.rs @@ -137,7 +137,7 @@ impl<'graph, G: GraphViewOps<'graph>> ListOps for WindowedGraph { fn node_list(&self) -> NodeList { if self.window_is_empty() { NodeList::List { - nodes: Index::from(vec![]), + nodes: Index::new(vec![], self.graph.unfiltered_num_nodes()), } } else { self.graph.node_list()