Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: chunk ids as vec bytes #294

Merged
merged 5 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 20 additions & 36 deletions grovedb/src/replication.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::{
collections::{BTreeMap, BTreeSet},
fmt,
str::Utf8Error,

Check warning on line 4 in grovedb/src/replication.rs

View workflow job for this annotation

GitHub Actions / clippy

unused import: `str::Utf8Error`

warning: unused import: `str::Utf8Error` --> grovedb/src/replication.rs:4:5 | 4 | str::Utf8Error, | ^^^^^^^^^^^^^^ | = note: `#[warn(unused_imports)]` on by default
};

use grovedb_merk::{
Expand Down Expand Up @@ -89,7 +89,7 @@
// Splits the given global chunk id into [SUBTREE_PREFIX:CHUNK_ID]
pub fn util_split_global_chunk_id(
global_chunk_id: &[u8],
) -> Result<(crate::SubtreePrefix, String), Error> {
) -> Result<(crate::SubtreePrefix, Vec<u8>), Error> {
let chunk_prefix_length: usize = 32;
if global_chunk_id.len() < chunk_prefix_length {
return Err(Error::CorruptedData(
Expand All @@ -101,13 +101,7 @@
let mut array = [0u8; 32];
array.copy_from_slice(chunk_prefix);
let chunk_prefix_key: crate::SubtreePrefix = array;
let str_chunk_id = String::from_utf8(chunk_id.to_vec());
match str_chunk_id {
Ok(s) => Ok((chunk_prefix_key, s)),
Err(_) => Err(Error::CorruptedData(
"unable to convert chunk id to string".to_string(),
)),
}
Ok((chunk_prefix_key, chunk_id.to_vec()))
}

#[cfg(feature = "full")]
Expand Down Expand Up @@ -244,20 +238,15 @@

let chunk_producer_res = ChunkProducer::new(&merk);
match chunk_producer_res {
Ok(mut chunk_producer) => match std::str::from_utf8(chunk_id) {
Ok(chunk_id_str) => {
let chunk_res = chunk_producer.chunk(chunk_id_str);
match chunk_res {
Ok((chunk, _)) => Ok(chunk),
Err(_) => Err(Error::CorruptedData(
"Unable to create to load chunk".to_string(),
)),
}
Ok(mut chunk_producer) => {
let chunk_res = chunk_producer.chunk(chunk_id);
match chunk_res {
Ok((chunk, _)) => Ok(chunk),
Err(_) => Err(Error::CorruptedData(
"Unable to create to load chunk".to_string(),
)),
}
Err(_) => Err(Error::CorruptedData(
"Unable to process chunk id".to_string(),
)),
},
}
Err(_) => Err(Error::CorruptedData(
"Unable to create Chunk producer".to_string(),
)),
Expand All @@ -265,7 +254,7 @@
}
Some(t) => {
let merk = self
.open_transactional_merk_at_path(path.into(), &t, None)

Check warning on line 257 in grovedb/src/replication.rs

View workflow job for this annotation

GitHub Actions / clippy

this expression creates a reference which is immediately dereferenced by the compiler

warning: this expression creates a reference which is immediately dereferenced by the compiler --> grovedb/src/replication.rs:257:75 | 257 | ... .open_transactional_merk_at_path(path.into(), &t, None) | ^^ help: change this to: `t` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_borrow = note: `#[warn(clippy::needless_borrow)]` on by default
.value?;

if merk.is_empty_tree().unwrap() {
Expand All @@ -274,20 +263,15 @@

let chunk_producer_res = ChunkProducer::new(&merk);
match chunk_producer_res {
Ok(mut chunk_producer) => match std::str::from_utf8(chunk_id) {
Ok(chunk_id_str) => {
let chunk_res = chunk_producer.chunk(chunk_id_str);
match chunk_res {
Ok((chunk, _)) => Ok(chunk),
Err(_) => Err(Error::CorruptedData(
"Unable to create to load chunk".to_string(),
)),
}
Ok(mut chunk_producer) => {
let chunk_res = chunk_producer.chunk(chunk_id);
match chunk_res {
Ok((chunk, _)) => Ok(chunk),
Err(_) => Err(Error::CorruptedData(
"Unable to create to load chunk".to_string(),
)),
}
Err(_) => Err(Error::CorruptedData(
"Unable to process chunk id".to_string(),
)),
},
}
Err(_) => Err(Error::CorruptedData(
"Unable to create Chunk producer".to_string(),
)),
Expand Down Expand Up @@ -380,12 +364,12 @@
}
state_sync_info.pending_chunks.remove(global_chunk_id);
if !chunk_data.is_empty() {
match restorer.process_chunk(chunk_id.to_string(), chunk_data) {
match restorer.process_chunk(&chunk_id, chunk_data) {
Ok(next_chunk_ids) => {
state_sync_info.num_processed_chunks += 1;
for next_chunk_id in next_chunk_ids {
let mut next_global_chunk_id = chunk_prefix.to_vec();
next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec());
next_global_chunk_id.extend(next_chunk_id.to_vec());
state_sync_info
.pending_chunks
.insert(next_global_chunk_id.clone());
Expand Down
50 changes: 26 additions & 24 deletions merk/src/merk/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@
chunk_op::ChunkOp,
error::ChunkError,
util::{
chunk_height, chunk_id_from_traversal_instruction,
chunk_id_from_traversal_instruction_with_recovery, generate_traversal_instruction,
generate_traversal_instruction_as_string, number_of_chunks,
string_as_traversal_instruction,
chunk_height, chunk_index_from_traversal_instruction,
chunk_index_from_traversal_instruction_with_recovery,
generate_traversal_instruction, generate_traversal_instruction_as_vec_bytes,
number_of_chunks, vec_bytes_as_traversal_instruction,
},
},
Node, Op,
Expand Down Expand Up @@ -72,14 +72,14 @@
#[derive(Debug)]
pub struct MultiChunk {
pub chunk: Vec<ChunkOp>,
pub next_index: Option<String>,
pub next_index: Option<Vec<u8>>,
pub remaining_limit: Option<usize>,
}

impl MultiChunk {
pub fn new(
chunk: Vec<ChunkOp>,
next_index: Option<String>,
next_index: Option<Vec<u8>>,
remaining_limit: Option<usize>,
) -> Self {
Self {
Expand Down Expand Up @@ -131,17 +131,17 @@
}

/// Returns the chunk at a given chunk id.
pub fn chunk(&mut self, chunk_id: &str) -> Result<(Vec<Op>, Option<String>), Error> {
let traversal_instructions = string_as_traversal_instruction(chunk_id)?;
let chunk_index = chunk_id_from_traversal_instruction_with_recovery(
pub fn chunk(&mut self, chunk_id: &[u8]) -> Result<(Vec<Op>, Option<Vec<u8>>), Error> {
let traversal_instructions = vec_bytes_as_traversal_instruction(chunk_id)?;
let chunk_index = chunk_index_from_traversal_instruction_with_recovery(
traversal_instructions.as_slice(),
self.height,
)?;
let (chunk, next_index) = self.chunk_internal(chunk_index, traversal_instructions)?;
let index_string = next_index
.map(|index| generate_traversal_instruction_as_string(self.height, index))
let next_chunk_id = next_index
.map(|index| generate_traversal_instruction_as_vec_bytes(self.height, index))
.transpose()?;
Ok((chunk, index_string))
Ok((chunk, next_chunk_id))
}

/// Returns the chunk at the given index
Expand Down Expand Up @@ -186,12 +186,12 @@
/// chunks or hit some optional limit
pub fn multi_chunk_with_limit(
&mut self,
chunk_id: &str,
chunk_id: &[u8],
limit: Option<usize>,
) -> Result<MultiChunk, Error> {
// we want to convert the chunk id to the index
let chunk_index = string_as_traversal_instruction(chunk_id).and_then(|instruction| {
chunk_id_from_traversal_instruction(instruction.as_slice(), self.height)
let chunk_index = vec_bytes_as_traversal_instruction(chunk_id).and_then(|instruction| {
chunk_index_from_traversal_instruction(instruction.as_slice(), self.height)
})?;
self.multi_chunk_with_limit_and_index(chunk_index, limit)
}
Expand Down Expand Up @@ -267,11 +267,11 @@
current_limit = subtree_multi_chunk.remaining_limit;
}

let index_string = current_index
.map(|index| generate_traversal_instruction_as_string(self.height, index))
let index_bytes = current_index
.map(|index| generate_traversal_instruction_as_vec_bytes(self.height, index))
.transpose()?;

Ok(MultiChunk::new(chunk, index_string, current_limit))
Ok(MultiChunk::new(chunk, index_bytes, current_limit))
}

/// Packs as many chunks as it can from a starting chunk index, into a
Expand Down Expand Up @@ -371,7 +371,7 @@
/// optimizing throughput compared to random access.
// TODO: this is not better than random access, as we are not keeping state
// that will make this more efficient, decide if this should be fixed or not
fn next_chunk(&mut self) -> Option<Result<(Vec<Op>, Option<String>), Error>> {
fn next_chunk(&mut self) -> Option<Result<(Vec<Op>, Option<Vec<u8>>), Error>> {

Check warning on line 374 in merk/src/merk/chunks.rs

View workflow job for this annotation

GitHub Actions / clippy

very complex type used. Consider factoring parts into `type` definitions

warning: very complex type used. Consider factoring parts into `type` definitions --> merk/src/merk/chunks.rs:374:33 | 374 | fn next_chunk(&mut self) -> Option<Result<(Vec<Op>, Option<Vec<u8>>), Error>> { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#type_complexity = note: `#[warn(clippy::type_complexity)]` on by default
let max_index = number_of_chunks(self.height);
if self.index > max_index {
return None;
Expand All @@ -383,7 +383,9 @@
self.chunk_with_index(self.index)
.and_then(|(chunk, chunk_index)| {
chunk_index
.map(|index| generate_traversal_instruction_as_string(self.height, index))
.map(|index| {
generate_traversal_instruction_as_vec_bytes(self.height, index)
})
.transpose()
.map(|v| (chunk, v))
}),
Expand All @@ -396,7 +398,7 @@
where
S: StorageContext<'db>,
{
type Item = Result<(Vec<Op>, Option<String>), Error>;
type Item = Result<(Vec<Op>, Option<Vec<u8>>), Error>;

fn next(&mut self) -> Option<Self::Item> {
self.next_chunk()
Expand Down Expand Up @@ -424,7 +426,7 @@
tests::{traverse_get_kv_feature_type, traverse_get_node_hash},
LEFT, RIGHT,
},
util::traversal_instruction_as_string,
util::traversal_instruction_as_vec_bytes,
},
tree::execute,
Tree,
Expand Down Expand Up @@ -1027,7 +1029,7 @@

// ensure that the remaining limit, next index and values given are correct
// if limit is smaller than first chunk, we should get an error
let chunk_result = chunk_producer.multi_chunk_with_limit("", Some(5));
let chunk_result = chunk_producer.multi_chunk_with_limit(vec![].as_slice(), Some(5));
assert!(matches!(
chunk_result,
Err(Error::ChunkingError(ChunkError::LimitTooSmall(..)))
Expand All @@ -1052,7 +1054,7 @@
.expect("should generate chunk");
assert_eq!(
chunk_result.next_index,
Some(traversal_instruction_as_string(
Some(traversal_instruction_as_vec_bytes(
&generate_traversal_instruction(4, 4).unwrap()
))
);
Expand Down
34 changes: 17 additions & 17 deletions merk/src/merk/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
proofs::{
chunk::{
chunk::{LEFT, RIGHT},
util::traversal_instruction_as_string,
util::traversal_instruction_as_vec_bytes,
},
query::query_item::QueryItem,
Query,
Expand Down Expand Up @@ -556,11 +556,11 @@
pub fn verify(
&self,
skip_sum_checks: bool,
) -> (BTreeMap<String, CryptoHash>, BTreeMap<String, Vec<u8>>) {
) -> (BTreeMap<Vec<u8>, CryptoHash>, BTreeMap<Vec<u8>, Vec<u8>>) {

Check warning on line 559 in merk/src/merk/mod.rs

View workflow job for this annotation

GitHub Actions / clippy

very complex type used. Consider factoring parts into `type` definitions

warning: very complex type used. Consider factoring parts into `type` definitions --> merk/src/merk/mod.rs:559:10 | 559 | ) -> (BTreeMap<Vec<u8>, CryptoHash>, BTreeMap<Vec<u8>, Vec<u8>>) { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#type_complexity
let tree = self.tree.take();

let mut bad_link_map: BTreeMap<String, CryptoHash> = BTreeMap::new();
let mut parent_keys: BTreeMap<String, Vec<u8>> = BTreeMap::new();
let mut bad_link_map: BTreeMap<Vec<u8>, CryptoHash> = BTreeMap::new();
let mut parent_keys: BTreeMap<Vec<u8>, Vec<u8>> = BTreeMap::new();
let mut root_traversal_instruction = vec![];

// TODO: remove clone
Expand All @@ -581,8 +581,8 @@
&self,
tree: &TreeNode,
traversal_instruction: &mut Vec<bool>,
bad_link_map: &mut BTreeMap<String, CryptoHash>,
parent_keys: &mut BTreeMap<String, Vec<u8>>,
bad_link_map: &mut BTreeMap<Vec<u8>, CryptoHash>,
parent_keys: &mut BTreeMap<Vec<u8>, Vec<u8>>,
skip_sum_checks: bool,
) {
if let Some(link) = tree.link(LEFT) {
Expand Down Expand Up @@ -617,8 +617,8 @@
link: &Link,
parent_key: &[u8],
traversal_instruction: &mut Vec<bool>,
bad_link_map: &mut BTreeMap<String, CryptoHash>,
parent_keys: &mut BTreeMap<String, Vec<u8>>,
bad_link_map: &mut BTreeMap<Vec<u8>, CryptoHash>,
parent_keys: &mut BTreeMap<Vec<u8>, Vec<u8>>,
skip_sum_checks: bool,
) {
let (hash, key, sum) = match link {
Expand All @@ -639,7 +639,7 @@
_ => todo!(),
};

let instruction_id = traversal_instruction_as_string(traversal_instruction);
let instruction_id = traversal_instruction_as_vec_bytes(traversal_instruction);
let node = TreeNode::get(
&self.storage,
key,
Expand All @@ -648,29 +648,29 @@
.unwrap();

if node.is_err() {
bad_link_map.insert(instruction_id.clone(), hash);
parent_keys.insert(instruction_id, parent_key.to_vec());
bad_link_map.insert(instruction_id.to_vec(), hash);
parent_keys.insert(instruction_id.to_vec(), parent_key.to_vec());
return;
}

let node = node.unwrap();
if node.is_none() {
bad_link_map.insert(instruction_id.clone(), hash);
parent_keys.insert(instruction_id, parent_key.to_vec());
bad_link_map.insert(instruction_id.to_vec(), hash);
parent_keys.insert(instruction_id.to_vec(), parent_key.to_vec());
return;
}

let node = node.unwrap();
if node.hash().unwrap() != hash {
bad_link_map.insert(instruction_id.clone(), hash);
parent_keys.insert(instruction_id, parent_key.to_vec());
bad_link_map.insert(instruction_id.to_vec(), hash);
parent_keys.insert(instruction_id.to_vec(), parent_key.to_vec());
return;
}

// Need to skip this when restoring a sum tree
if !skip_sum_checks && node.sum().unwrap() != sum {
bad_link_map.insert(instruction_id.clone(), hash);
parent_keys.insert(instruction_id, parent_key.to_vec());
bad_link_map.insert(instruction_id.to_vec(), hash);
parent_keys.insert(instruction_id.to_vec(), parent_key.to_vec());
return;
}

Expand Down
Loading
Loading