From ed4cda240f75a1859f8cce374a98333d29bef022 Mon Sep 17 00:00:00 2001 From: ljeub-pometry <97447091+ljeub-pometry@users.noreply.github.com> Date: Wed, 26 Jun 2024 14:18:44 +0200 Subject: [PATCH] move input node and hashing code to raphtory-api (#1671) * move input node and hashing code to raphtory-api * fix imports --- Cargo.lock | 2 + examples/rust/src/bin/btc/main.rs | 8 +-- examples/rust/src/bin/lotr/main.rs | 4 +- pometry-storage-private | 2 +- raphtory-api/Cargo.toml | 4 ++ .../src/core/input}/input_node.rs | 5 +- raphtory-api/src/core/input/mod.rs | 1 + raphtory-api/src/core/mod.rs | 2 + raphtory-api/src/core/storage/dict_mapper.rs | 5 +- raphtory-api/src/core/utils/hashing.rs | 10 ++++ raphtory-api/src/core/utils/mod.rs | 1 + raphtory-graphql/src/lib.rs | 5 +- .../pathing/temporal_reachability.rs | 10 ++-- raphtory/src/core/entities/graph/tgraph.rs | 9 ++-- raphtory/src/core/entities/nodes/mod.rs | 1 - .../src/core/entities/properties/props.rs | 4 +- raphtory/src/core/mod.rs | 6 +-- raphtory/src/core/utils/hashing.rs | 32 ------------ raphtory/src/core/utils/mod.rs | 1 - raphtory/src/db/api/mutation/addition_ops.rs | 3 +- raphtory/src/db/api/mutation/deletion_ops.rs | 6 +-- raphtory/src/db/api/storage/edges/edges.rs | 29 +++++------ .../src/db/api/storage/nodes/node_entry.rs | 23 --------- raphtory/src/db/graph/graph.rs | 6 +-- .../src/disk_graph/graph_impl/core_ops.rs | 4 +- raphtory/src/disk_graph/graph_impl/mod.rs | 49 ++++++++----------- .../src/disk_graph/storage_interface/node.rs | 5 +- raphtory/src/graph_loader/mod.rs | 8 +-- .../src/graph_loader/source/csv_loader.rs | 24 +++------ raphtory/src/lib.rs | 1 + raphtory/src/python/utils/mod.rs | 3 +- 31 files changed, 102 insertions(+), 171 deletions(-) rename {raphtory/src/core/entities/nodes => raphtory-api/src/core/input}/input_node.rs (95%) create mode 100644 raphtory-api/src/core/input/mod.rs create mode 100644 raphtory-api/src/core/utils/hashing.rs create mode 100644 raphtory-api/src/core/utils/mod.rs delete mode 100644 raphtory/src/core/utils/hashing.rs diff --git a/Cargo.lock b/Cargo.lock index 6aee3bb0d6..4cac904ad5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4441,6 +4441,7 @@ dependencies = [ "dashmap", "lock_api", "parking_lot", + "proptest", "pyo3", "quickcheck 1.0.3", "quickcheck_macros", @@ -4448,6 +4449,7 @@ dependencies = [ "rayon", "rustc-hash", "serde", + "twox-hash", ] [[package]] diff --git a/examples/rust/src/bin/btc/main.rs b/examples/rust/src/bin/btc/main.rs index 0a7d2a27c4..8415c81bda 100644 --- a/examples/rust/src/bin/btc/main.rs +++ b/examples/rust/src/bin/btc/main.rs @@ -2,7 +2,7 @@ #![allow(dead_code)] use chrono::{DateTime, Utc}; -use raphtory::{core::utils::hashing, graph_loader::source::csv_loader::CsvLoader, prelude::*}; +use raphtory::{graph_loader::source::csv_loader::CsvLoader, prelude::*}; use regex::Regex; use serde::Deserialize; use std::{ @@ -54,7 +54,7 @@ fn main() { panic!("Missing data dir = {}", data_dir.to_str().unwrap()) } - let test_v = hashing::calculate_hash(&"139eeGkMGR6F9EuJQ3qYoXebfkBbNAsLtV:btc"); + let test_v = "139eeGkMGR6F9EuJQ3qYoXebfkBbNAsLtV:btc".id(); // If data_dir/graphdb.bincode exists, use bincode to load the graph from binary encoded data files // otherwise load the graph from csv data files @@ -82,8 +82,8 @@ fn main() { CsvLoader::new(data_dir) .with_filter(Regex::new(r".+(sent|received)").unwrap()) .load_into_graph(&g, |sent: Sent, g: &Graph| { - let src = hashing::calculate_hash(&sent.addr); - let dst = hashing::calculate_hash(&sent.txn); + let src = sent.addr.id(); + let dst = sent.txn.id(); let time = sent.time.timestamp(); if src == test_v || dst == test_v { diff --git a/examples/rust/src/bin/lotr/main.rs b/examples/rust/src/bin/lotr/main.rs index e06552363a..aa4611cb12 100644 --- a/examples/rust/src/bin/lotr/main.rs +++ b/examples/rust/src/bin/lotr/main.rs @@ -1,5 +1,5 @@ use raphtory::{ - algorithms::pathing::temporal_reachability::temporally_reachable_nodes, core::utils::hashing, + algorithms::pathing::temporal_reachability::temporally_reachable_nodes, graph_loader::source::csv_loader::CsvLoader, prelude::*, }; use serde::Deserialize; @@ -99,7 +99,7 @@ fn main() { assert_eq!(graph.count_nodes(), 139); assert_eq!(graph.count_edges(), 701); - let gandalf = hashing::calculate_hash(&"Gandalf"); + let gandalf = "Gandalf".id(); assert_eq!(gandalf, 2760374808085341115); assert!(graph.has_node(gandalf)); diff --git a/pometry-storage-private b/pometry-storage-private index 00addb990e..7a99bad373 160000 --- a/pometry-storage-private +++ b/pometry-storage-private @@ -1 +1 @@ -Subproject commit 00addb990e0a98f91a3c1d3f0ee083a666c4bbe7 +Subproject commit 7a99bad37344cbbf985b842d989771edea135387 diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 05686db562..256e9cb601 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -26,6 +26,10 @@ rayon = { workspace = true } rand = { workspace = true } quickcheck = { workspace = true } quickcheck_macros = { workspace = true } +twox-hash.workspace = true + +[dev-dependencies] +proptest.workspace = true [features] default = [] diff --git a/raphtory/src/core/entities/nodes/input_node.rs b/raphtory-api/src/core/input/input_node.rs similarity index 95% rename from raphtory/src/core/entities/nodes/input_node.rs rename to raphtory-api/src/core/input/input_node.rs index 10f99ce39e..747e6cd959 100644 --- a/raphtory/src/core/entities/nodes/input_node.rs +++ b/raphtory-api/src/core/input/input_node.rs @@ -5,6 +5,7 @@ //! `u64`, `&str`, and `String`. use crate::core::utils::hashing; + const MAX_U64_BYTES: [u8; 20] = [ 49, 56, 52, 52, 54, 55, 52, 52, 48, 55, 51, 55, 48, 57, 53, 53, 49, 54, 49, 53, ]; @@ -85,7 +86,7 @@ impl InputNode for String { #[cfg(test)] mod test { - use crate::core::entities::nodes::input_node::{parse_u64_strict, InputNode}; + use crate::core::input::input_node::{parse_u64_strict, InputNode}; use proptest::prelude::*; #[test] @@ -109,6 +110,8 @@ mod test { let res = parse_u64_strict(&s); if let Some(n) = res { assert_eq!(n.to_string(), s) + } else { + assert_ne!(s.id().to_string(), s) } }); } diff --git a/raphtory-api/src/core/input/mod.rs b/raphtory-api/src/core/input/mod.rs new file mode 100644 index 0000000000..6da2c75cc0 --- /dev/null +++ b/raphtory-api/src/core/input/mod.rs @@ -0,0 +1 @@ +pub mod input_node; diff --git a/raphtory-api/src/core/mod.rs b/raphtory-api/src/core/mod.rs index c8a39e619a..a1f13607c0 100644 --- a/raphtory-api/src/core/mod.rs +++ b/raphtory-api/src/core/mod.rs @@ -1,5 +1,7 @@ pub mod entities; +pub mod input; pub mod storage; +pub mod utils; /// Denotes the direction of an edge. Can be incoming, outgoing or both. #[derive( diff --git a/raphtory-api/src/core/storage/dict_mapper.rs b/raphtory-api/src/core/storage/dict_mapper.rs index e447224834..3c91352ef7 100644 --- a/raphtory-api/src/core/storage/dict_mapper.rs +++ b/raphtory-api/src/core/storage/dict_mapper.rs @@ -67,14 +67,11 @@ impl DictMapper { #[cfg(test)] mod test { - use std::{collections::HashMap, sync::Arc, thread}; - use crate::core::storage::dict_mapper::DictMapper; use quickcheck_macros::quickcheck; use rand::seq::SliceRandom; use rayon::prelude::*; - - use super::*; + use std::collections::HashMap; #[test] fn test_dict_mapper() { diff --git a/raphtory-api/src/core/utils/hashing.rs b/raphtory-api/src/core/utils/hashing.rs new file mode 100644 index 0000000000..1a3bef2cbe --- /dev/null +++ b/raphtory-api/src/core/utils/hashing.rs @@ -0,0 +1,10 @@ +//! Utility functions used throughout the modules. + +use std::hash::{Hash, Hasher}; +use twox_hash::XxHash64; + +pub fn calculate_hash(t: &T) -> u64 { + let mut s = XxHash64::default(); + t.hash(&mut s); + s.finish() +} diff --git a/raphtory-api/src/core/utils/mod.rs b/raphtory-api/src/core/utils/mod.rs new file mode 100644 index 0000000000..62c68590b8 --- /dev/null +++ b/raphtory-api/src/core/utils/mod.rs @@ -0,0 +1 @@ +pub mod hashing; diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index 92e059a2d6..9ad43a5175 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -49,10 +49,7 @@ mod graphql_test { prelude::*, }; use serde_json::json; - use std::{ - collections::{HashMap, HashSet}, - path::Path, - }; + use std::collections::{HashMap, HashSet}; use tempfile::{tempdir, TempDir}; #[tokio::test] diff --git a/raphtory/src/algorithms/pathing/temporal_reachability.rs b/raphtory/src/algorithms/pathing/temporal_reachability.rs index 8db7379ed1..ba1bd14d1c 100644 --- a/raphtory/src/algorithms/pathing/temporal_reachability.rs +++ b/raphtory/src/algorithms/pathing/temporal_reachability.rs @@ -1,11 +1,8 @@ use crate::{ algorithms::algorithm_result::AlgorithmResult, - core::{ - entities::nodes::input_node::InputNode, - state::{ - accumulator_id::accumulators::{hash_set, min, or}, - compute_state::ComputeStateVec, - }, + core::state::{ + accumulator_id::accumulators::{hash_set, min, or}, + compute_state::ComputeStateVec, }, db::{ api::view::StaticGraphViewOps, @@ -20,6 +17,7 @@ use crate::{ }; use itertools::Itertools; use num_traits::Zero; +use raphtory_api::core::input::input_node::InputNode; use std::{collections::HashMap, ops::Add}; #[derive(Eq, Hash, PartialEq, Clone, Debug, Default)] diff --git a/raphtory/src/core/entities/graph/tgraph.rs b/raphtory/src/core/entities/graph/tgraph.rs index bd71ec16ba..a80db59ab3 100644 --- a/raphtory/src/core/entities/graph/tgraph.rs +++ b/raphtory/src/core/entities/graph/tgraph.rs @@ -6,7 +6,7 @@ use crate::{ tgraph_storage::GraphStorage, timer::{MaxCounter, MinCounter, TimeCounterTrait}, }, - nodes::{input_node::InputNode, node_ref::NodeRef, node_store::NodeStore}, + nodes::{node_ref::NodeRef, node_store::NodeStore}, properties::{graph_meta::GraphMeta, props::Meta, tprop::TProp}, LayerIds, EID, VID, }, @@ -24,8 +24,11 @@ use crate::{ }, prelude::DeletionOps, }; -use dashmap::{DashMap, DashSet}; -use raphtory_api::core::storage::{arc_str::ArcStr, locked_vec::ArcReadLockedVec, FxDashMap}; +use dashmap::DashSet; +use raphtory_api::core::{ + input::input_node::InputNode, + storage::{arc_str::ArcStr, locked_vec::ArcReadLockedVec, FxDashMap}, +}; use rustc_hash::FxHasher; use serde::{Deserialize, Serialize}; use std::{ diff --git a/raphtory/src/core/entities/nodes/mod.rs b/raphtory/src/core/entities/nodes/mod.rs index 50bcd8fd31..094e8f0f17 100644 --- a/raphtory/src/core/entities/nodes/mod.rs +++ b/raphtory/src/core/entities/nodes/mod.rs @@ -1,4 +1,3 @@ -pub mod input_node; pub mod node_ref; pub mod node_store; pub mod structure; diff --git a/raphtory/src/core/entities/properties/props.rs b/raphtory/src/core/entities/properties/props.rs index bc47b7fa9b..9da6ab3625 100644 --- a/raphtory/src/core/entities/properties/props.rs +++ b/raphtory/src/core/entities/properties/props.rs @@ -10,13 +10,12 @@ use crate::{ }, db::api::storage::tprop_storage_ops::TPropOps, }; -use lock_api; use parking_lot::RwLock; use raphtory_api::core::storage::{ arc_str::ArcStr, dict_mapper::DictMapper, locked_vec::ArcReadLockedVec, }; use serde::{Deserialize, Serialize}; -use std::{borrow::Borrow, fmt::Debug, hash::Hash, ops::Deref, sync::Arc}; +use std::{fmt::Debug, hash::Hash, ops::Deref, sync::Arc}; #[derive(Serialize, Deserialize, Default, Debug, PartialEq)] pub struct Props { @@ -319,7 +318,6 @@ impl PropMapper { #[cfg(test)] mod test { use super::*; - use rayon::prelude::*; use std::{sync::Arc, thread}; #[test] diff --git a/raphtory/src/core/mod.rs b/raphtory/src/core/mod.rs index 0ecfb962dd..7da0d312c2 100644 --- a/raphtory/src/core/mod.rs +++ b/raphtory/src/core/mod.rs @@ -29,16 +29,15 @@ use crate::{ prelude::GraphViewOps, }; use chrono::{DateTime, NaiveDateTime, Utc}; +use raphtory_api::core::storage::arc_str::ArcStr; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::{ - borrow::Borrow, cmp::Ordering, collections::HashMap, fmt, fmt::{Display, Formatter}, hash::{Hash, Hasher}, - ops::Deref, sync::Arc, }; @@ -53,9 +52,6 @@ pub mod state; pub mod storage; pub mod utils; -// this is here because Arc annoyingly doesn't implement all the expected comparisons - -use raphtory_api::core::storage::arc_str::ArcStr; pub use raphtory_api::core::*; #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Hash)] diff --git a/raphtory/src/core/utils/hashing.rs b/raphtory/src/core/utils/hashing.rs deleted file mode 100644 index 006b10b429..0000000000 --- a/raphtory/src/core/utils/hashing.rs +++ /dev/null @@ -1,32 +0,0 @@ -#![allow(dead_code)] - -//! Utility functions used throughout the modules. - -use std::hash::{Hash, Hasher}; -use twox_hash::XxHash64; - -#[cfg(feature = "storage")] -use pometry_storage::GID; - -pub fn calculate_hash(t: &T) -> u64 { - let mut s = XxHash64::default(); - t.hash(&mut s); - s.finish() -} - -#[cfg(feature = "storage")] -pub fn calculate_hash_spark(gid: &GID) -> i64 { - let mut s = XxHash64::with_seed(42); - match gid { - GID::U64(x) => s.write_u64(*x), - GID::I64(x) => s.write_i64(*x), - GID::Str(t) => { - t.chars().for_each(|c| s.write_u8(c as u8)); - } - } - s.finish() as i64 -} - -pub fn get_shard_id_from_global_vid(v_id: u64, n_shards: usize) -> usize { - (v_id % n_shards as u64) as usize -} diff --git a/raphtory/src/core/utils/mod.rs b/raphtory/src/core/utils/mod.rs index 7c640621d9..f9a5a35317 100644 --- a/raphtory/src/core/utils/mod.rs +++ b/raphtory/src/core/utils/mod.rs @@ -1,3 +1,2 @@ pub mod errors; -pub mod hashing; pub mod time; diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index d1a50e619f..1bb5ae0a8f 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -1,6 +1,6 @@ use crate::{ core::{ - entities::{edges::edge_ref::EdgeRef, nodes::input_node::InputNode}, + entities::edges::edge_ref::EdgeRef, utils::{errors::GraphError, time::IntoTimeWithFormat}, Prop, }, @@ -12,6 +12,7 @@ use crate::{ graph::{edge::EdgeView, node::NodeView}, }, }; +use raphtory_api::core::input::input_node::InputNode; use super::time_from_input; diff --git a/raphtory/src/db/api/mutation/deletion_ops.rs b/raphtory/src/db/api/mutation/deletion_ops.rs index 9ab844abeb..888db66bd3 100644 --- a/raphtory/src/db/api/mutation/deletion_ops.rs +++ b/raphtory/src/db/api/mutation/deletion_ops.rs @@ -1,13 +1,11 @@ use crate::{ - core::{ - entities::nodes::input_node::InputNode, - utils::{errors::GraphError, time::IntoTimeWithFormat}, - }, + core::utils::{errors::GraphError, time::IntoTimeWithFormat}, db::api::mutation::{ internal::{InternalAdditionOps, InternalDeletionOps}, TryIntoInputTime, }, }; +use raphtory_api::core::input::input_node::InputNode; use super::time_from_input; diff --git a/raphtory/src/db/api/storage/edges/edges.rs b/raphtory/src/db/api/storage/edges/edges.rs index 8117610716..7ded8af78c 100644 --- a/raphtory/src/db/api/storage/edges/edges.rs +++ b/raphtory/src/db/api/storage/edges/edges.rs @@ -1,22 +1,20 @@ +use super::edge_entry::EdgeStorageEntry; use crate::{ core::{ entities::{edges::edge_store::EdgeStore, LayerIds, EID}, storage::ReadLockedStorage, }, - db::api::storage::nodes::unlocked::UnlockedEdges, + db::api::storage::{ + edges::edge_storage_ops::EdgeStorageOps, nodes::unlocked::UnlockedEdges, + variants::storage_variants3::StorageVariants, + }, }; - -#[cfg(feature = "storage")] -use crate::disk_graph::storage_interface::edges_ref::DiskEdgesRef; - -use super::edge_entry::EdgeStorageEntry; -use crate::db::api::storage::edges::edge_storage_ops::EdgeStorageOps; -#[cfg(feature = "storage")] -use crate::disk_graph::storage_interface::edges::DiskEdges; -use either::Either; use rayon::iter::ParallelIterator; use std::sync::Arc; +#[cfg(feature = "storage")] +use crate::disk_graph::storage_interface::{edges::DiskEdges, edges_ref::DiskEdgesRef}; + pub enum EdgesStorage { Mem(Arc>), #[cfg(feature = "storage")] @@ -42,9 +40,6 @@ pub enum EdgesStorageRef<'a> { Disk(DiskEdgesRef<'a>), } -#[cfg(feature = "storage")] -use crate::db::api::storage::variants::storage_variants3::StorageVariants; - impl<'a> EdgesStorageRef<'a> { #[cfg(feature = "storage")] pub fn iter(self, layers: LayerIds) -> impl Iterator> { @@ -70,13 +65,13 @@ impl<'a> EdgesStorageRef<'a> { #[cfg(not(feature = "storage"))] pub fn iter(self, layers: LayerIds) -> impl Iterator> { match self { - EdgesStorageRef::Mem(storage) => Either::Left( + EdgesStorageRef::Mem(storage) => StorageVariants::Mem( storage .iter() .filter(move |e| e.has_layer(&layers)) .map(EdgeStorageEntry::Mem), ), - EdgesStorageRef::Unlocked(edges) => Either::Right( + EdgesStorageRef::Unlocked(edges) => StorageVariants::Unlocked( edges .iter() .filter(move |e| e.has_layer(&layers)) @@ -109,13 +104,13 @@ impl<'a> EdgesStorageRef<'a> { #[cfg(not(feature = "storage"))] pub fn par_iter(self, layers: LayerIds) -> impl ParallelIterator> { match self { - EdgesStorageRef::Mem(storage) => Either::Left( + EdgesStorageRef::Mem(storage) => StorageVariants::Mem( storage .par_iter() .filter(move |e| e.has_layer(&layers)) .map(EdgeStorageEntry::Mem), ), - EdgesStorageRef::Unlocked(edges) => Either::Right( + EdgesStorageRef::Unlocked(edges) => StorageVariants::Unlocked( edges .par_iter() .filter(move |e| e.has_layer(&layers)) diff --git a/raphtory/src/db/api/storage/nodes/node_entry.rs b/raphtory/src/db/api/storage/nodes/node_entry.rs index 2ab6debb08..a9a07639d8 100644 --- a/raphtory/src/db/api/storage/nodes/node_entry.rs +++ b/raphtory/src/db/api/storage/nodes/node_entry.rs @@ -44,29 +44,6 @@ impl<'a> From> for NodeStorageEntry<'a> { } } -#[cfg(feature = "storage")] -macro_rules! for_all { - ($value:expr, $pattern:pat => $result:expr) => { - match $value { - NodeStorageEntry::Mem($pattern) => $result, - NodeStorageEntry::Unlocked($pattern) => $result, - #[cfg(feature = "storage")] - NodeStorageEntry::Disk($pattern) => $result, - } - }; -} - -#[cfg(feature = "storage")] -macro_rules! for_all_iter { - ($value:expr, $pattern:pat => $result:expr) => {{ - match $value { - NodeStorageEntry::Mem($pattern) => StorageVariants::Mem($result), - NodeStorageEntry::Unlocked($pattern) => StorageVariants::Unlocked($result), - NodeStorageEntry::Disk($pattern) => StorageVariants::Disk($result), - } - }}; -} - impl<'a> NodeStorageEntry<'a> { #[inline] pub fn as_ref(&self) -> NodeStorageRef { diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index 14aad5c3b7..32dd8a89cd 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -234,12 +234,8 @@ mod db_tests { use itertools::Itertools; use quickcheck_macros::quickcheck; use raphtory_api::core::storage::arc_str::{ArcStr, OptionAsStr}; - use rayon::prelude::*; use serde_json::Value; - use std::{ - collections::{HashMap, HashSet}, - path::PathBuf, - }; + use std::collections::{HashMap, HashSet}; use tempfile::TempDir; #[test] diff --git a/raphtory/src/disk_graph/graph_impl/core_ops.rs b/raphtory/src/disk_graph/graph_impl/core_ops.rs index 43750008ea..423f60d9ae 100644 --- a/raphtory/src/disk_graph/graph_impl/core_ops.rs +++ b/raphtory/src/disk_graph/graph_impl/core_ops.rs @@ -2,7 +2,7 @@ use crate::{ core::{ entities::{ edges::edge_ref::EdgeRef, - nodes::{input_node::InputNode, node_ref::NodeRef}, + nodes::node_ref::NodeRef, properties::{graph_meta::GraphMeta, props::Meta, tprop::TProp}, LayerIds, ELID, VID, }, @@ -34,7 +34,7 @@ use crate::{ use itertools::Itertools; use polars_arrow::datatypes::ArrowDataType; use pometry_storage::{properties::ConstProps, GidRef, GID}; -use raphtory_api::core::storage::arc_str::ArcStr; +use raphtory_api::core::{input::input_node::InputNode, storage::arc_str::ArcStr}; use rayon::prelude::*; impl CoreGraphOps for DiskGraph { diff --git a/raphtory/src/disk_graph/graph_impl/mod.rs b/raphtory/src/disk_graph/graph_impl/mod.rs index e55615ef5e..28809998c2 100644 --- a/raphtory/src/disk_graph/graph_impl/mod.rs +++ b/raphtory/src/disk_graph/graph_impl/mod.rs @@ -1,17 +1,3 @@ -use std::{ - fmt::{Display, Formatter}, - path::{Path, PathBuf}, - sync::Arc, -}; - -use pometry_storage::{ - disk_hmap::DiskHashMap, graph::TemporalGraph, graph_fragment::TempColGraphFragment, - load::ExternalEdgeList, RAError, -}; -use raphtory_api::core::storage::timeindex::TimeIndexEntry; -use rayon::prelude::*; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; - use crate::{ arrow2::{ array::{PrimitiveArray, StructArray}, @@ -32,6 +18,18 @@ use crate::{ disk_graph::{graph_impl::prop_conversion::make_node_properties_from_graph, Error}, prelude::{Graph, GraphViewOps}, }; +use pometry_storage::{ + disk_hmap::DiskHashMap, graph::TemporalGraph, graph_fragment::TempColGraphFragment, + load::ExternalEdgeList, RAError, +}; +use raphtory_api::core::storage::timeindex::TimeIndexEntry; +use rayon::prelude::*; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::{ + fmt::{Display, Formatter}, + path::{Path, PathBuf}, + sync::Arc, +}; pub mod const_properties_ops; pub mod core_ops; @@ -483,26 +481,21 @@ impl InternalPropertyAdditionOps for DiskGraph { #[cfg(test)] mod test { - use std::{ - cmp::Reverse, - iter::once, - path::{Path, PathBuf}, + use super::{DiskGraph, ParquetLayerCols}; + use crate::{ + algorithms::components::weakly_connected_components, db::api::view::StaticGraphViewOps, + disk_graph::Time, prelude::*, }; - use itertools::{chain, Itertools}; use pometry_storage::{graph::TemporalGraph, properties::Properties}; use proptest::{prelude::*, sample::size_range}; use rayon::prelude::*; - use tempfile::TempDir; - - use crate::{ - algorithms::components::weakly_connected_components, - db::api::view::{internal::TimeSemantics, StaticGraphViewOps}, - disk_graph::Time, - prelude::*, + use std::{ + cmp::Reverse, + iter::once, + path::{Path, PathBuf}, }; - - use super::{DiskGraph, ParquetLayerCols}; + use tempfile::TempDir; fn make_simple_graph(graph_dir: impl AsRef, edges: &[(u64, u64, i64, f64)]) -> DiskGraph { DiskGraph::make_simple_graph(graph_dir, edges, 1000, 1000) diff --git a/raphtory/src/disk_graph/storage_interface/node.rs b/raphtory/src/disk_graph/storage_interface/node.rs index d083f2e73d..df16fec7c2 100644 --- a/raphtory/src/disk_graph/storage_interface/node.rs +++ b/raphtory/src/disk_graph/storage_interface/node.rs @@ -1,6 +1,6 @@ use crate::{ core::{ - entities::{edges::edge_ref::EdgeRef, nodes::input_node::InputNode, LayerIds, EID, VID}, + entities::{edges::edge_ref::EdgeRef, LayerIds, EID, VID}, Direction, }, db::api::{ @@ -9,11 +9,12 @@ use crate::{ tprop_storage_ops::TPropOps, variants::{direction_variants::DirectionVariants, layer_variants::LayerVariants}, }, - view::internal::{CoreGraphOps, NodeAdditions}, + view::internal::NodeAdditions, }, }; use itertools::Itertools; use pometry_storage::{graph::TemporalGraph, timestamps::TimeStamps, GidRef}; +use raphtory_api::core::input::input_node::InputNode; use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; use std::{borrow::Cow, iter, sync::Arc}; diff --git a/raphtory/src/graph_loader/mod.rs b/raphtory/src/graph_loader/mod.rs index 9069fb46b7..76ec92996a 100644 --- a/raphtory/src/graph_loader/mod.rs +++ b/raphtory/src/graph_loader/mod.rs @@ -161,7 +161,7 @@ fn unzip_file(zip_file_path: &str, destination_path: &str) -> std::io::Result<() #[cfg(test)] mod graph_loader_test { - use crate::{core::utils::hashing, graph_loader::fetch_file, prelude::*}; + use crate::{graph_loader::fetch_file, prelude::*}; use csv::StringRecord; #[test] @@ -218,8 +218,8 @@ mod graph_loader_test { if let Ok(mut reader) = csv::Reader::from_path(data_dir) { for rec in reader.records().flatten() { if let Some((src, dst, t)) = parse_record(&rec) { - let src_id = hashing::calculate_hash(&src); - let dst_id = hashing::calculate_hash(&dst); + let src_id = src.id(); + let dst_id = dst.id(); g.add_node(t, src_id, [("name", Prop::str("Character"))], None) .unwrap(); @@ -237,7 +237,7 @@ mod graph_loader_test { } } - let gandalf = hashing::calculate_hash(&"Gandalf"); + let gandalf = "Gandalf".id(); assert!(g.has_node(gandalf)); assert!(g.has_node("Gandalf")) } diff --git a/raphtory/src/graph_loader/source/csv_loader.rs b/raphtory/src/graph_loader/source/csv_loader.rs index 0c0f4ee2b0..d6cc8f5129 100644 --- a/raphtory/src/graph_loader/source/csv_loader.rs +++ b/raphtory/src/graph_loader/source/csv_loader.rs @@ -4,7 +4,6 @@ //! ```no_run //! use std::path::{Path, PathBuf}; //! use regex::Regex; -//! use raphtory::core::utils::hashing::calculate_hash; //! use raphtory::graph_loader::source::csv_loader::CsvLoader; //! use raphtory::graph_loader::example::lotr_graph::Lotr; //! use raphtory::prelude::*; @@ -25,8 +24,8 @@ //! .set_delimiter(delimiter) //! .with_filter(r) //! .load_into_graph(&g, |lotr: Lotr, g: &Graph| { -//! let src_id = calculate_hash(&lotr.src_id); -//! let dst_id = calculate_hash(&lotr.dst_id); +//! let src_id = lotr.src_id.id(); +//! let dst_id = lotr.dst_id.id(); //! let time = lotr.time; //! //! g.add_node( @@ -474,10 +473,7 @@ impl CsvLoader { #[cfg(test)] mod csv_loader_test { - use crate::{ - core::utils::hashing::calculate_hash, graph_loader::source::csv_loader::CsvLoader, - prelude::*, - }; + use crate::{graph_loader::source::csv_loader::CsvLoader, prelude::*}; use csv::StringRecord; use regex::Regex; use serde::Deserialize; @@ -518,8 +514,8 @@ mod csv_loader_test { .set_delimiter(delimiter) .with_filter(r) .load_into_graph(&g, |lotr: Lotr, g: &Graph| { - let src_id = calculate_hash(&lotr.src_id); - let dst_id = calculate_hash(&lotr.dst_id); + let src_id = lotr.src_id.id(); + let dst_id = lotr.dst_id.id(); let time = lotr.time; g.add_node(time, src_id, [("name", Prop::str("Character"))], None) @@ -546,14 +542,8 @@ mod csv_loader_test { .set_delimiter(delimiter) .with_filter(r) .load_rec_into_graph(&g, |lotr: StringRecord, g: &Graph| { - let src_id = lotr - .get(0) - .map(|s| calculate_hash(&(s.to_owned()))) - .unwrap(); - let dst_id = lotr - .get(1) - .map(|s| calculate_hash(&(s.to_owned()))) - .unwrap(); + let src_id = lotr.get(0).map(|s| s.id()).unwrap(); + let dst_id = lotr.get(1).map(|s| s.id()).unwrap(); let time = lotr.get(2).map(|s| s.parse::().unwrap()).unwrap(); g.add_node(time, src_id, [("name", Prop::str("Character"))], None) diff --git a/raphtory/src/lib.rs b/raphtory/src/lib.rs index 1359bd29e4..a0c5fded07 100644 --- a/raphtory/src/lib.rs +++ b/raphtory/src/lib.rs @@ -119,6 +119,7 @@ pub mod prelude { graph::graph::Graph, }, }; + pub use raphtory_api::core::input::input_node::InputNode; } pub const BINCODE_VERSION: u32 = 1u32; diff --git a/raphtory/src/python/utils/mod.rs b/raphtory/src/python/utils/mod.rs index 40609a2b93..7da4755e6c 100644 --- a/raphtory/src/python/utils/mod.rs +++ b/raphtory/src/python/utils/mod.rs @@ -4,7 +4,7 @@ //! These functions are not part of the public API and are not exported to the Python module. use crate::{ core::{ - entities::nodes::{input_node::InputNode, node_ref::NodeRef}, + entities::nodes::node_ref::NodeRef, storage::timeindex::AsTime, utils::time::{error::ParseTimeError, Interval, IntoTime, TryIntoTime}, }, @@ -13,6 +13,7 @@ use crate::{ }; use chrono::{DateTime, Utc}; use pyo3::{exceptions::PyTypeError, prelude::*, types::PyDateTime}; +use raphtory_api::core::input::input_node::InputNode; use std::{future::Future, thread}; pub mod errors;