From 4f04bf8547928b9d53b1e654125e6a72a720569a Mon Sep 17 00:00:00 2001 From: Quake Wang Date: Mon, 18 Mar 2024 17:59:14 +0800 Subject: [PATCH] chore: refactor DataSource (#5) --- bindings/rust/src/lazy_reader.rs | 112 ++++++++++++-------------- examples/lazy-reader-tests/Cargo.lock | 12 +-- 2 files changed, 59 insertions(+), 65 deletions(-) diff --git a/bindings/rust/src/lazy_reader.rs b/bindings/rust/src/lazy_reader.rs index 252ce59..e76823f 100644 --- a/bindings/rust/src/lazy_reader.rs +++ b/bindings/rust/src/lazy_reader.rs @@ -32,11 +32,7 @@ impl From for Error { } pub trait Read { - /** - * try to read `buf.len()` bytes from data source with `offset`, then fill it in `buf`. - * the return size can be smaller than `buf.len()` which means the remaining data length is - * smaller than `buf.len()` - */ + // Pull some bytes from this source into the specified buffer with `offset`, returning how many bytes were read. fn read(&self, buf: &mut [u8], offset: usize) -> Result; } @@ -46,14 +42,59 @@ pub const NUMBER_SIZE: usize = 4; pub struct DataSource { reader: Box, - total_size: usize, cache_start_point: usize, - // cache size may be smaller than cache.len() - cache_size: usize, + // cache actual size may be smaller than cache.len() + cache_actual_size: usize, cache: Vec, } +impl DataSource { + pub fn new(total_size: usize, reader: Box) -> Self { + DataSource { + reader, + total_size, + cache_start_point: 0, + cache_actual_size: 0, + cache: vec![0u8; MAX_CACHE_SIZE], + } + } + + // Pull some bytes from this source into the specified buffer with `offset` and `read_len`, returning how many bytes were read. + // If the requested range is out of bound, an `Error::Read` will be returned. + pub fn read_at( + &mut self, + buf: &mut [u8], + offset: usize, + read_len: usize, + ) -> Result { + // Read directly if the requested length is larger than maximum cache size + if read_len > self.cache.len() { + return self.reader.read(buf, offset); + } + // Check if the requested data is in cache + if offset >= self.cache_start_point + && offset + read_len <= self.cache_start_point + self.cache_actual_size + { + let read_point = offset - self.cache_start_point; + buf.copy_from_slice(&self.cache[read_point..(read_point + read_len)]); + return Ok(read_len); + } + // Cache miss, read from reader and update cache + let read_actual_size = self.reader.read(&mut self.cache[..], offset)?; + self.cache_start_point = offset; + self.cache_actual_size = read_actual_size; + if read_actual_size < read_len { + return Err(Error::Read(format!( + "read_at: read_actual_size({}) < read_len({})", + read_actual_size, read_len + ))); + } + buf[..read_len].copy_from_slice(&self.cache[0..read_len]); + Ok(read_len) + } +} + #[derive(Clone)] pub struct Cursor { pub offset: usize, @@ -68,70 +109,23 @@ pub struct Union { impl Cursor { /** - cache_size: normally it can be set to MAX_CACHE_SIZE(2K) total_size: the size of cursor. If it's set a smaller value, `out of bound` will occur when `reader` try to read the data beyond that. reader: interface to read underlying data */ pub fn new(total_size: usize, reader: Box) -> Self { - let data_source = DataSource { - reader, - total_size, - cache_start_point: 0, - cache_size: 0, // when created, cache is not filled - cache: vec![0u8; MAX_CACHE_SIZE], - }; + let data_source = DataSource::new(total_size, reader); Cursor { offset: 0, size: total_size, data_source: Rc::new(RefCell::new(data_source)), } } + pub fn read_at(&self, buf: &mut [u8]) -> Result { let read_len = min(self.size, buf.len()); - let ds = &mut *self.data_source.borrow_mut(); - if read_len > ds.cache.len() { - return ds.reader.read(buf, self.offset); - } - if self.offset < ds.cache_start_point - || (self.offset + read_len) > (ds.cache_start_point + ds.cache_size) - { - let reader = &ds.reader; - let size = reader.read(&mut ds.cache[..], self.offset)?; - if size < read_len { - return Err(Error::Read(format!( - "read_at: `if size({}) < read_len({})`", - size, read_len - ))); - } - ds.cache_size = size; - ds.cache_start_point = self.offset; - - if ds.cache_size > ds.cache.len() { - return Err(Error::Read(format!( - "read_at: `if ds.cache_size({}) > ds.cache.len()({})`", - ds.cache_size, - ds.cache.len() - ))); - } - } - if self.offset < ds.cache_start_point - || (self.offset - ds.cache_start_point) > ds.cache.len() - { - return Err(Error::Read(format!( - "read_at: `if self.offset({}) < ds.cache_start_point({}) || ...`", - self.offset, ds.cache_start_point - ))); - } - let read_point = self.offset - ds.cache_start_point; - if read_point + read_len > ds.cache_size { - return Err(Error::Read(format!( - "read_at: `if read_point({}) + read_len({}) > ds.cache_size({})`", - read_point, read_len, ds.cache_size - ))); - } - buf.copy_from_slice(&ds.cache[read_point..(read_point + read_len)]); - Ok(read_len) + let mut data_source = self.data_source.borrow_mut(); + data_source.read_at(buf, self.offset, read_len) } pub fn add_offset(&mut self, offset: usize) -> Result<(), Error> { diff --git a/examples/lazy-reader-tests/Cargo.lock b/examples/lazy-reader-tests/Cargo.lock index 0d2be54..f9c152a 100644 --- a/examples/lazy-reader-tests/Cargo.lock +++ b/examples/lazy-reader-tests/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "bytes" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" [[package]] name = "case" @@ -192,9 +192,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] @@ -212,9 +212,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.32" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ]