diff --git a/src/with_indices.rs b/src/with_indices.rs index e491252..a602902 100644 --- a/src/with_indices.rs +++ b/src/with_indices.rs @@ -1,4 +1,4 @@ -use std::{cell::OnceCell, marker::PhantomData}; +use std::{cell::RefCell, marker::PhantomData}; use crate::helpers::SourceText; @@ -9,8 +9,7 @@ where { /// line is a string reference pub line: S, - /// the byte position of each `char` in `line` string slice . - pub indices_indexes: OnceCell>, + last_char_index_to_byte_index: RefCell<(u32, u32)>, data: PhantomData<&'a S>, } @@ -20,32 +19,69 @@ where { pub fn new(line: S) -> Self { Self { - indices_indexes: OnceCell::new(), line, + last_char_index_to_byte_index: RefCell::new((0, 0)), data: PhantomData, } } - /// substring::SubString with cache - pub(crate) fn substring(&self, start_index: usize, end_index: usize) -> S { - if end_index <= start_index { + pub(crate) fn substring( + &self, + start_char_index: usize, + end_char_index: usize, + ) -> S { + if end_char_index <= start_char_index { return S::default(); } - let indices_indexes = self.indices_indexes.get_or_init(|| { - self.line.char_indices().map(|(i, _)| i).collect::>() - }); + let line_len = self.line.len(); - let str_len = self.line.len(); - let start = *indices_indexes.get(start_index).unwrap_or(&str_len); - let end = *indices_indexes.get(end_index).unwrap_or(&str_len); + let mut start_byte_index = None; + let mut end_byte_index = None; + + let (last_char_index, last_byte_index) = + *self.last_char_index_to_byte_index.borrow(); + let mut last_byte_index = last_byte_index as usize; + let mut char_index = last_char_index as usize; + if start_char_index < last_char_index as usize { + char_index = 0; + last_byte_index = 0; + } + for (byte_index, _) in self + .line + .byte_slice(last_byte_index..line_len) + .char_indices() + { + if char_index == start_char_index { + start_byte_index = Some(byte_index + last_byte_index); + if end_char_index == usize::MAX { + break; + } + } + if char_index == end_char_index { + end_byte_index = Some(byte_index + last_byte_index); + *self.last_char_index_to_byte_index.borrow_mut() = + (end_char_index as u32, (byte_index + last_byte_index) as u32); + break; + } + char_index += 1; + } + + let start_byte_index = if let Some(start_byte_index) = start_byte_index { + start_byte_index + } else { + return S::default(); + }; + let end_byte_index = end_byte_index.unwrap_or(line_len); #[allow(unsafe_code)] unsafe { // SAFETY: Since `indices` iterates over the `CharIndices` of `self`, we can guarantee // that the indices obtained from it will always be within the bounds of `self` and they // will always lie on UTF-8 sequence boundaries. - self.line.byte_slice_unchecked(start..end) + self + .line + .byte_slice_unchecked(start_byte_index..end_byte_index) } } } @@ -90,4 +126,12 @@ mod tests { "øbα" ); } + + #[test] + fn test_last_char_index_to_byte_index() { + let rope_with_indices = WithIndices::new(Rope::from("foobar")); + assert_eq!(rope_with_indices.substring(0, 3), "foo"); + assert_eq!(rope_with_indices.substring(3, 6), "bar"); + assert_eq!(rope_with_indices.substring(0, usize::MAX), "foobar"); + } }