Skip to content

Commit

Permalink
perf: WithIndices (#150)
Browse files Browse the repository at this point in the history
  • Loading branch information
SyMind authored Jan 6, 2025
1 parent 9436375 commit 10b6a4c
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 77 deletions.
55 changes: 24 additions & 31 deletions src/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ pub(crate) struct MappingsDecoder<'a> {
current_value: i64,
current_value_pos: usize,
generated_line: u32,
generated_column: i64,
}

impl<'a> MappingsDecoder<'a> {
Expand All @@ -52,7 +51,6 @@ impl<'a> MappingsDecoder<'a> {
current_value: 0,
current_value_pos: 0,
generated_line: 1,
generated_column: -1,
}
}
}
Expand All @@ -67,44 +65,39 @@ impl Iterator for MappingsDecoder<'_> {
continue;
}
if (value & COM) != 0 {
let mapping = match self.current_data_pos {
1 => Some(Mapping {
generated_line: self.generated_line,
generated_column: self.current_data[0],
original: None,
}),
4 => Some(Mapping {
generated_line: self.generated_line,
generated_column: self.current_data[0],
original: Some(OriginalLocation {
let mut mapping = Mapping {
generated_line: self.generated_line,
generated_column: self.current_data[0],
original: None,
};
let current_data_pos = self.current_data_pos;
self.current_data_pos = 0;
if value == SEM {
self.generated_line += 1;
self.current_data[0] = 0;
}
match current_data_pos {
1 => return Some(mapping),
4 => {
mapping.original = Some(OriginalLocation {
source_index: self.current_data[1],
original_line: self.current_data[2],
original_column: self.current_data[3],
name_index: None,
}),
}),
5 => Some(Mapping {
generated_line: self.generated_line,
generated_column: self.current_data[0],
original: Some(OriginalLocation {
});
return Some(mapping);
}
5 => {
mapping.original = Some(OriginalLocation {
source_index: self.current_data[1],
original_line: self.current_data[2],
original_column: self.current_data[3],
name_index: Some(self.current_data[4]),
}),
}),
_ => None,
});
return Some(mapping);
}
_ => (),
};
self.generated_column = self.current_data[0] as i64;
self.current_data_pos = 0;
if value == SEM {
self.generated_line += 1;
self.current_data[0] = 0;
self.generated_column = -1;
}
if mapping.is_some() {
return mapping;
}
} else if (value & CONTINUATION_BIT) == 0 {
// last sextet
self.current_value |= (value as i64) << self.current_value_pos;
Expand Down
6 changes: 3 additions & 3 deletions src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1252,7 +1252,7 @@ pub trait SourceText<'a>: Default + Clone + ToString {
fn ends_with(&self, value: &str) -> bool;

/// Returns an iterator over the char indices in the text.
fn char_indices(&self) -> impl Iterator<Item = (usize, char)>;
fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)>;

/// Gets the byte at the specified index, if it exists.
fn get_byte(&self, byte_index: usize) -> Option<u8>;
Expand Down Expand Up @@ -1289,7 +1289,7 @@ impl<'a> SourceText<'a> for Rope<'a> {
(*self).ends_with(value)
}

fn char_indices(&self) -> impl Iterator<Item = (usize, char)> {
fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)> {
self.char_indices()
}

Expand Down Expand Up @@ -1331,7 +1331,7 @@ impl<'a> SourceText<'a> for &'a str {
(*self).ends_with(value)
}

fn char_indices(&self) -> impl Iterator<Item = (usize, char)> {
fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)> {
(*self).char_indices()
}

Expand Down
125 changes: 96 additions & 29 deletions src/rope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

use std::{
borrow::Cow,
collections::VecDeque,
hash::Hash,
ops::{Bound, RangeBounds},
rc::Rc,
Expand Down Expand Up @@ -134,13 +133,24 @@ impl<'a> Rope<'a> {
iter: s.char_indices(),
},
},
Repr::Full(data) => CharIndices {
iter: CharIndicesEnum::Full {
chunks: data,
char_indices: VecDeque::new(),
chunk_index: 0,
},
},
Repr::Full(vec) => {
let right_byte_offset = vec.iter().map(|(s, _)| s.len() as u32).sum();

CharIndices {
iter: CharIndicesEnum::Full {
iters: vec
.iter()
.map(|(s, _)| s.char_indices())
.collect::<Vec<_>>(),
left_chunk_index: 0,
left_byte_offset: 0,
last_left_indice: None,
right_chunk_index: (vec.len() - 1) as u32,
right_byte_offset,
right_byte_offset_for: vec.len() as u32,
},
}
}
}
}

Expand Down Expand Up @@ -658,9 +668,13 @@ enum CharIndicesEnum<'a, 'b> {
iter: std::str::CharIndices<'b>,
},
Full {
chunks: &'a [(&'b str, usize)],
char_indices: VecDeque<(usize, char)>,
chunk_index: usize,
iters: Vec<std::str::CharIndices<'a>>,
left_chunk_index: u32,
left_byte_offset: u32,
last_left_indice: Option<(usize, char)>,
right_chunk_index: u32,
right_byte_offset: u32,
right_byte_offset_for: u32,
},
}

Expand All @@ -675,29 +689,59 @@ impl Iterator for CharIndices<'_, '_> {
match &mut self.iter {
CharIndicesEnum::Light { iter } => iter.next(),
CharIndicesEnum::Full {
chunks,
char_indices,
chunk_index,
iters,
left_chunk_index,
left_byte_offset,
last_left_indice,
..
} => {
if let Some(item) = char_indices.pop_front() {
return Some(item);
}

if *chunk_index >= chunks.len() {
if (*left_chunk_index as usize) >= iters.len() {
return None;
}

// skip empty chunks
while *chunk_index < chunks.len() && chunks[*chunk_index].0.is_empty() {
*chunk_index += 1;
if let Some((byte_index, char)) =
iters[*left_chunk_index as usize].next()
{
*last_left_indice = Some((byte_index, char));
Some((byte_index + (*left_byte_offset as usize), char))
} else {
*left_chunk_index += 1;
if let Some((byte_index, char)) = last_left_indice.take() {
*left_byte_offset =
*left_byte_offset + byte_index as u32 + char.len_utf8() as u32;
}
self.next()
}
}
}
}
}

let (chunk, start_pos) = chunks[*chunk_index];

char_indices
.extend(chunk.char_indices().map(|(i, c)| (start_pos + i, c)));
*chunk_index += 1;
char_indices.pop_front()
impl DoubleEndedIterator for CharIndices<'_, '_> {
fn next_back(&mut self) -> Option<Self::Item> {
match &mut self.iter {
CharIndicesEnum::Light { iter } => iter.next_back(),
CharIndicesEnum::Full {
iters,
right_chunk_index,
right_byte_offset,
right_byte_offset_for,
..
} => {
if let Some((byte_index, char)) =
iters[*right_chunk_index as usize].next_back()
{
if *right_byte_offset_for != *right_chunk_index {
*right_byte_offset =
*right_byte_offset - byte_index as u32 - char.len_utf8() as u32;
*right_byte_offset_for = *right_chunk_index;
}
Some((byte_index + (*right_byte_offset as usize), char))
} else if *right_chunk_index > 0 {
*right_chunk_index -= 1;
self.next_back()
} else {
None
}
}
}
}
Expand Down Expand Up @@ -1168,6 +1212,29 @@ mod tests {
);
}

#[test]
fn reverse_char_indices() {
let mut a = Rope::new();
a.add("abc");
a.add("def");
assert_eq!(
a.char_indices().rev().collect::<Vec<_>>(),
"abcdef".char_indices().rev().collect::<Vec<_>>()
);

let mut a = Rope::new();
a.add("こんにちは");
assert_eq!(
a.char_indices().rev().collect::<Vec<_>>(),
"こんにちは".char_indices().rev().collect::<Vec<_>>()
);
a.add("世界");
assert_eq!(
a.char_indices().rev().collect::<Vec<_>>(),
"こんにちは世界".char_indices().rev().collect::<Vec<_>>()
);
}

#[test]
fn lines1() {
let rope = Rope::from("abc");
Expand Down
Loading

0 comments on commit 10b6a4c

Please sign in to comment.