Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: WithIndices #150

Merged
merged 15 commits into from
Jan 6, 2025
55 changes: 24 additions & 31 deletions src/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ pub(crate) struct MappingsDecoder<'a> {
current_value: i64,
current_value_pos: usize,
generated_line: u32,
generated_column: i64,
}

impl<'a> MappingsDecoder<'a> {
Expand All @@ -52,7 +51,6 @@ impl<'a> MappingsDecoder<'a> {
current_value: 0,
current_value_pos: 0,
generated_line: 1,
generated_column: -1,
}
}
}
Expand All @@ -67,44 +65,39 @@ impl Iterator for MappingsDecoder<'_> {
continue;
}
if (value & COM) != 0 {
let mapping = match self.current_data_pos {
1 => Some(Mapping {
generated_line: self.generated_line,
generated_column: self.current_data[0],
original: None,
}),
4 => Some(Mapping {
generated_line: self.generated_line,
generated_column: self.current_data[0],
original: Some(OriginalLocation {
let mut mapping = Mapping {
generated_line: self.generated_line,
generated_column: self.current_data[0],
original: None,
};
let current_data_pos = self.current_data_pos;
self.current_data_pos = 0;
if value == SEM {
self.generated_line += 1;
self.current_data[0] = 0;
}
match current_data_pos {
1 => return Some(mapping),
4 => {
mapping.original = Some(OriginalLocation {
source_index: self.current_data[1],
original_line: self.current_data[2],
original_column: self.current_data[3],
name_index: None,
}),
}),
5 => Some(Mapping {
generated_line: self.generated_line,
generated_column: self.current_data[0],
original: Some(OriginalLocation {
});
return Some(mapping);
}
5 => {
mapping.original = Some(OriginalLocation {
source_index: self.current_data[1],
original_line: self.current_data[2],
original_column: self.current_data[3],
name_index: Some(self.current_data[4]),
}),
}),
_ => None,
});
return Some(mapping);
}
_ => (),
};
self.generated_column = self.current_data[0] as i64;
self.current_data_pos = 0;
if value == SEM {
self.generated_line += 1;
self.current_data[0] = 0;
self.generated_column = -1;
}
if mapping.is_some() {
return mapping;
}
} else if (value & CONTINUATION_BIT) == 0 {
// last sextet
self.current_value |= (value as i64) << self.current_value_pos;
Expand Down
6 changes: 3 additions & 3 deletions src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1252,7 +1252,7 @@ pub trait SourceText<'a>: Default + Clone + ToString {
fn ends_with(&self, value: &str) -> bool;

/// Returns an iterator over the char indices in the text.
fn char_indices(&self) -> impl Iterator<Item = (usize, char)>;
fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)>;

/// Gets the byte at the specified index, if it exists.
fn get_byte(&self, byte_index: usize) -> Option<u8>;
Expand Down Expand Up @@ -1289,7 +1289,7 @@ impl<'a> SourceText<'a> for Rope<'a> {
(*self).ends_with(value)
}

fn char_indices(&self) -> impl Iterator<Item = (usize, char)> {
fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)> {
self.char_indices()
}

Expand Down Expand Up @@ -1331,7 +1331,7 @@ impl<'a> SourceText<'a> for &'a str {
(*self).ends_with(value)
}

fn char_indices(&self) -> impl Iterator<Item = (usize, char)> {
fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)> {
(*self).char_indices()
}

Expand Down
125 changes: 96 additions & 29 deletions src/rope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

use std::{
borrow::Cow,
collections::VecDeque,
hash::Hash,
ops::{Bound, RangeBounds},
rc::Rc,
Expand Down Expand Up @@ -134,13 +133,24 @@ impl<'a> Rope<'a> {
iter: s.char_indices(),
},
},
Repr::Full(data) => CharIndices {
iter: CharIndicesEnum::Full {
chunks: data,
char_indices: VecDeque::new(),
chunk_index: 0,
},
},
Repr::Full(vec) => {
let right_byte_offset = vec.iter().map(|(s, _)| s.len() as u32).sum();

CharIndices {
iter: CharIndicesEnum::Full {
iters: vec
.iter()
.map(|(s, _)| s.char_indices())
.collect::<Vec<_>>(),
left_chunk_index: 0,
left_byte_offset: 0,
last_left_indice: None,
right_chunk_index: (vec.len() - 1) as u32,
right_byte_offset,
right_byte_offset_for: vec.len() as u32,
},
}
}
}
}

Expand Down Expand Up @@ -658,9 +668,13 @@ enum CharIndicesEnum<'a, 'b> {
iter: std::str::CharIndices<'b>,
},
Full {
chunks: &'a [(&'b str, usize)],
char_indices: VecDeque<(usize, char)>,
chunk_index: usize,
iters: Vec<std::str::CharIndices<'a>>,
left_chunk_index: u32,
left_byte_offset: u32,
last_left_indice: Option<(usize, char)>,
right_chunk_index: u32,
right_byte_offset: u32,
right_byte_offset_for: u32,
},
}

Expand All @@ -675,29 +689,59 @@ impl Iterator for CharIndices<'_, '_> {
match &mut self.iter {
CharIndicesEnum::Light { iter } => iter.next(),
CharIndicesEnum::Full {
chunks,
char_indices,
chunk_index,
iters,
left_chunk_index,
left_byte_offset,
last_left_indice,
..
} => {
if let Some(item) = char_indices.pop_front() {
return Some(item);
}

if *chunk_index >= chunks.len() {
if (*left_chunk_index as usize) >= iters.len() {
return None;
}

// skip empty chunks
while *chunk_index < chunks.len() && chunks[*chunk_index].0.is_empty() {
*chunk_index += 1;
if let Some((byte_index, char)) =
iters[*left_chunk_index as usize].next()
{
*last_left_indice = Some((byte_index, char));
Some((byte_index + (*left_byte_offset as usize), char))
} else {
*left_chunk_index += 1;
if let Some((byte_index, char)) = last_left_indice.take() {
*left_byte_offset =
*left_byte_offset + byte_index as u32 + char.len_utf8() as u32;
}
self.next()
}
}
}
}
}

let (chunk, start_pos) = chunks[*chunk_index];

char_indices
.extend(chunk.char_indices().map(|(i, c)| (start_pos + i, c)));
*chunk_index += 1;
char_indices.pop_front()
impl DoubleEndedIterator for CharIndices<'_, '_> {
fn next_back(&mut self) -> Option<Self::Item> {
match &mut self.iter {
CharIndicesEnum::Light { iter } => iter.next_back(),
CharIndicesEnum::Full {
iters,
right_chunk_index,
right_byte_offset,
right_byte_offset_for,
..
} => {
if let Some((byte_index, char)) =
iters[*right_chunk_index as usize].next_back()
{
if *right_byte_offset_for != *right_chunk_index {
*right_byte_offset =
*right_byte_offset - byte_index as u32 - char.len_utf8() as u32;
*right_byte_offset_for = *right_chunk_index;
}
Some((byte_index + (*right_byte_offset as usize), char))
} else if *right_chunk_index > 0 {
*right_chunk_index -= 1;
self.next_back()
} else {
None
}
}
}
}
Expand Down Expand Up @@ -1168,6 +1212,29 @@ mod tests {
);
}

#[test]
fn reverse_char_indices() {
let mut a = Rope::new();
a.add("abc");
a.add("def");
assert_eq!(
a.char_indices().rev().collect::<Vec<_>>(),
"abcdef".char_indices().rev().collect::<Vec<_>>()
);

let mut a = Rope::new();
a.add("こんにちは");
assert_eq!(
a.char_indices().rev().collect::<Vec<_>>(),
"こんにちは".char_indices().rev().collect::<Vec<_>>()
);
a.add("世界");
assert_eq!(
a.char_indices().rev().collect::<Vec<_>>(),
"こんにちは世界".char_indices().rev().collect::<Vec<_>>()
);
}

#[test]
fn lines1() {
let rope = Rope::from("abc");
Expand Down
Loading
Loading