From d948bfd822c59362f0a2ec4caac1130832dc4f17 Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Sun, 22 Dec 2024 13:07:47 +0800
Subject: [PATCH 01/14] perf: WithIndices

---
 src/with_indices.rs | 60 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 46 insertions(+), 14 deletions(-)

diff --git a/src/with_indices.rs b/src/with_indices.rs
index e491252..fc5d6fd 100644
--- a/src/with_indices.rs
+++ b/src/with_indices.rs
@@ -1,4 +1,4 @@
-use std::{cell::OnceCell, marker::PhantomData};
+use std::marker::PhantomData;
 
 use crate::helpers::SourceText;
 
@@ -9,8 +9,7 @@ where
 {
   /// line is a string reference
   pub line: S,
-  /// the byte position of each `char` in `line` string slice .
-  pub indices_indexes: OnceCell<Vec<usize>>,
+  last_char_index_to_byte_index: (usize, usize),
   data: PhantomData<&'a S>,
 }
 
@@ -20,32 +19,65 @@ where
 {
   pub fn new(line: S) -> Self {
     Self {
-      indices_indexes: OnceCell::new(),
       line,
+      last_char_index_to_byte_index: (0, 0),
       data: PhantomData,
     }
   }
 
-  /// substring::SubString with cache
-  pub(crate) fn substring(&self, start_index: usize, end_index: usize) -> S {
-    if end_index <= start_index {
+  pub(crate) fn substring(
+    &self,
+    start_char_index: usize,
+    end_char_index: usize,
+  ) -> S {
+    if end_char_index <= start_char_index {
       return S::default();
     }
 
-    let indices_indexes = self.indices_indexes.get_or_init(|| {
-      self.line.char_indices().map(|(i, _)| i).collect::<Vec<_>>()
-    });
+    let mut start_byte_index = None;
+    let mut end_byte_index = None;
 
-    let str_len = self.line.len();
-    let start = *indices_indexes.get(start_index).unwrap_or(&str_len);
-    let end = *indices_indexes.get(end_index).unwrap_or(&str_len);
+    let (last_char_index, mut last_byte_index) =
+      self.last_char_index_to_byte_index;
+    let mut char_index = last_char_index;
+    if last_char_index < start_char_index {
+      char_index = 0;
+      last_byte_index = 0;
+    }
+    for (byte_index, _) in self
+      .line
+      .byte_slice(last_byte_index..self.line.len())
+      .char_indices()
+    {
+      if char_index == start_char_index {
+        start_byte_index = Some(byte_index);
+      }
+      if char_index == end_char_index {
+        end_byte_index = Some(byte_index);
+        break;
+      }
+      char_index += 1;
+    }
+
+    let start_byte_index = if let Some(start_byte_index) = start_byte_index {
+      start_byte_index
+    } else {
+      return S::default();
+    };
+
+    let end_byte_index = end_byte_index.unwrap_or(self.line.len());
+    if end_byte_index <= start_byte_index {
+      return S::default();
+    }
 
     #[allow(unsafe_code)]
     unsafe {
       // SAFETY: Since `indices` iterates over the `CharIndices` of `self`, we can guarantee
       // that the indices obtained from it will always be within the bounds of `self` and they
       // will always lie on UTF-8 sequence boundaries.
-      self.line.byte_slice_unchecked(start..end)
+      self
+        .line
+        .byte_slice_unchecked(start_byte_index..end_byte_index)
     }
   }
 }

From 18687ce0ea064af196aaad2e025c779003b264c4 Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Sun, 22 Dec 2024 13:22:53 +0800
Subject: [PATCH 02/14] fix: should recode last_char_index_to_byte_index

---
 src/with_indices.rs | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/with_indices.rs b/src/with_indices.rs
index fc5d6fd..af53408 100644
--- a/src/with_indices.rs
+++ b/src/with_indices.rs
@@ -1,4 +1,4 @@
-use std::marker::PhantomData;
+use std::{cell::RefCell, marker::PhantomData};
 
 use crate::helpers::SourceText;
 
@@ -9,7 +9,7 @@ where
 {
   /// line is a string reference
   pub line: S,
-  last_char_index_to_byte_index: (usize, usize),
+  last_char_index_to_byte_index: RefCell<(usize, usize)>,
   data: PhantomData<&'a S>,
 }
 
@@ -20,7 +20,7 @@ where
   pub fn new(line: S) -> Self {
     Self {
       line,
-      last_char_index_to_byte_index: (0, 0),
+      last_char_index_to_byte_index: RefCell::new((0, 0)),
       data: PhantomData,
     }
   }
@@ -38,7 +38,7 @@ where
     let mut end_byte_index = None;
 
     let (last_char_index, mut last_byte_index) =
-      self.last_char_index_to_byte_index;
+      *self.last_char_index_to_byte_index.borrow();
     let mut char_index = last_char_index;
     if last_char_index < start_char_index {
       char_index = 0;
@@ -51,9 +51,13 @@ where
     {
       if char_index == start_char_index {
         start_byte_index = Some(byte_index);
+        if end_char_index == usize::MAX {
+          break;
+        }
       }
       if char_index == end_char_index {
         end_byte_index = Some(byte_index);
+        *self.last_char_index_to_byte_index.borrow_mut() = (end_char_index, byte_index);
         break;
       }
       char_index += 1;

From 30500f335b24870c8a3cce406b929e70d866a9d9 Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Sun, 22 Dec 2024 14:02:06 +0800
Subject: [PATCH 03/14] fix: test

---
 src/with_indices.rs | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/with_indices.rs b/src/with_indices.rs
index af53408..4813072 100644
--- a/src/with_indices.rs
+++ b/src/with_indices.rs
@@ -34,30 +34,33 @@ where
       return S::default();
     }
 
+    let line_len = self.line.len();
+
     let mut start_byte_index = None;
     let mut end_byte_index = None;
 
     let (last_char_index, mut last_byte_index) =
       *self.last_char_index_to_byte_index.borrow();
     let mut char_index = last_char_index;
-    if last_char_index < start_char_index {
+    if start_char_index < last_char_index {
       char_index = 0;
       last_byte_index = 0;
     }
     for (byte_index, _) in self
       .line
-      .byte_slice(last_byte_index..self.line.len())
+      .byte_slice(last_byte_index..line_len)
       .char_indices()
     {
       if char_index == start_char_index {
-        start_byte_index = Some(byte_index);
+        start_byte_index = Some(byte_index + last_byte_index);
         if end_char_index == usize::MAX {
           break;
         }
       }
       if char_index == end_char_index {
-        end_byte_index = Some(byte_index);
-        *self.last_char_index_to_byte_index.borrow_mut() = (end_char_index, byte_index);
+        end_byte_index = Some(byte_index + last_byte_index);
+        *self.last_char_index_to_byte_index.borrow_mut() =
+          (end_char_index, byte_index);
         break;
       }
       char_index += 1;
@@ -68,11 +71,7 @@ where
     } else {
       return S::default();
     };
-
-    let end_byte_index = end_byte_index.unwrap_or(self.line.len());
-    if end_byte_index <= start_byte_index {
-      return S::default();
-    }
+    let end_byte_index = end_byte_index.unwrap_or(line_len);
 
     #[allow(unsafe_code)]
     unsafe {
@@ -126,4 +125,21 @@ mod tests {
       "øbα"
     );
   }
+
+  #[test]
+  fn test_last_char_index_to_byte_index() {
+    let rope_with_indices = WithIndices::new(Rope::from("foobar"));
+    assert_eq!(
+      rope_with_indices.substring(0, 3),
+      "foo"
+    );
+    assert_eq!(
+      rope_with_indices.substring(3, 6),
+      "bar"
+    );
+    assert_eq!(
+      rope_with_indices.substring(0, usize::MAX),
+      "foobar"
+    );
+  }
 }

From dd81442b42b1533ea12edfb117b00bfbe5bc6e54 Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Sun, 22 Dec 2024 14:10:25 +0800
Subject: [PATCH 04/14] fix

---
 src/with_indices.rs | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/src/with_indices.rs b/src/with_indices.rs
index 4813072..aa8eed9 100644
--- a/src/with_indices.rs
+++ b/src/with_indices.rs
@@ -60,7 +60,7 @@ where
       if char_index == end_char_index {
         end_byte_index = Some(byte_index + last_byte_index);
         *self.last_char_index_to_byte_index.borrow_mut() =
-          (end_char_index, byte_index);
+          (end_char_index, byte_index + last_byte_index);
         break;
       }
       char_index += 1;
@@ -129,17 +129,8 @@ mod tests {
   #[test]
   fn test_last_char_index_to_byte_index() {
     let rope_with_indices = WithIndices::new(Rope::from("foobar"));
-    assert_eq!(
-      rope_with_indices.substring(0, 3),
-      "foo"
-    );
-    assert_eq!(
-      rope_with_indices.substring(3, 6),
-      "bar"
-    );
-    assert_eq!(
-      rope_with_indices.substring(0, usize::MAX),
-      "foobar"
-    );
+    assert_eq!(rope_with_indices.substring(0, 3), "foo");
+    assert_eq!(rope_with_indices.substring(3, 6), "bar");
+    assert_eq!(rope_with_indices.substring(0, usize::MAX), "foobar");
   }
 }

From 48477380051a2346eae396b3115df517888edd4a Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Sun, 22 Dec 2024 14:34:36 +0800
Subject: [PATCH 05/14] feat: reduce last_char_index_to_byte_index

---
 src/with_indices.rs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/with_indices.rs b/src/with_indices.rs
index aa8eed9..a602902 100644
--- a/src/with_indices.rs
+++ b/src/with_indices.rs
@@ -9,7 +9,7 @@ where
 {
   /// line is a string reference
   pub line: S,
-  last_char_index_to_byte_index: RefCell<(usize, usize)>,
+  last_char_index_to_byte_index: RefCell<(u32, u32)>,
   data: PhantomData<&'a S>,
 }
 
@@ -39,10 +39,11 @@ where
     let mut start_byte_index = None;
     let mut end_byte_index = None;
 
-    let (last_char_index, mut last_byte_index) =
+    let (last_char_index, last_byte_index) =
       *self.last_char_index_to_byte_index.borrow();
-    let mut char_index = last_char_index;
-    if start_char_index < last_char_index {
+    let mut last_byte_index = last_byte_index as usize;
+    let mut char_index = last_char_index as usize;
+    if start_char_index < last_char_index as usize {
       char_index = 0;
       last_byte_index = 0;
     }
@@ -60,7 +61,7 @@ where
       if char_index == end_char_index {
         end_byte_index = Some(byte_index + last_byte_index);
         *self.last_char_index_to_byte_index.borrow_mut() =
-          (end_char_index, byte_index + last_byte_index);
+          (end_char_index as u32, (byte_index + last_byte_index) as u32);
         break;
       }
       char_index += 1;

From 7674774bb5f40fe5d2f5b76349fc503a12426046 Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Fri, 27 Dec 2024 23:03:16 +0800
Subject: [PATCH 06/14] feat: rope.chars

---
 src/rope.rs | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/src/rope.rs b/src/rope.rs
index b18d90c..bc75d98 100644
--- a/src/rope.rs
+++ b/src/rope.rs
@@ -6,6 +6,7 @@ use std::{
   hash::Hash,
   ops::{Bound, RangeBounds},
   rc::Rc,
+  str::Chars,
 };
 
 use crate::Error;
@@ -144,6 +145,22 @@ impl<'a> Rope<'a> {
     }
   }
 
+  /// Returns an iterator over the [`char`]s of a string slice.
+  pub fn chars(&self) -> RopeChars<'_> {
+    match &self.repr {
+      Repr::Light(s) => RopeChars {
+        iters: vec![s.chars()],
+        left: 0,
+        right: 0
+      },
+      Repr::Full(data) => {
+        let iters = data.iter().map(|(s, _)| s.chars()).collect::<Vec<_>>();
+        let len = iters.len();
+        RopeChars { iters, left: 0, right: (len - 1) as u32 }
+      }
+    }
+  }
+
   /// Returns whether the rope starts with the given string.
   #[inline]
   pub fn starts_with(&self, value: &str) -> bool {
@@ -939,6 +956,46 @@ fn end_bound_to_range_end(end: Bound<&usize>) -> Option<usize> {
   }
 }
 
+pub struct RopeChars<'a> {
+  iters: Vec<Chars<'a>>,
+  left: u32,
+  right: u32
+}
+
+impl<'a> Iterator for RopeChars<'a> {
+  type Item = char;
+
+  #[inline]
+  fn next(&mut self) -> Option<char> {
+    let left = self.left as usize;
+    if left >= self.iters.len() {
+      return None;
+    }
+    if let Some(char) = self.iters[left].next() {
+      return Some(char);
+    } else {
+      self.left += 1;
+      self.next()
+    }
+  }
+}
+
+impl<'a> DoubleEndedIterator for RopeChars<'a> {
+  #[inline]
+  fn next_back(&mut self) -> Option<Self::Item> {
+    let right = self.right as usize;
+    if right == 0 {
+      return self.iters[right].next_back();
+    }
+    if let Some(char) = self.iters[right].next_back() {
+      return Some(char);
+    } else {
+      self.right -= 1;
+      self.next_back()
+    }
+  }
+}
+
 #[cfg(test)]
 mod tests {
   use std::rc::Rc;
@@ -1244,4 +1301,38 @@ mod tests {
       .collect::<Vec<_>>();
     assert_eq!(lines, ["\n"]);
   }
+
+  #[test]
+  fn chars() {
+    let rope = Rope::from("abc");
+    let mut chars = rope.chars();
+    assert_eq!(chars.next(), Some('a'));
+    assert_eq!(chars.next(), Some('b'));
+    assert_eq!(chars.next(), Some('c'));
+    assert_eq!(chars.next(), None);
+
+    let rope = Rope::from_iter(["a", "b", "c"]);
+    let mut chars = rope.chars();
+    assert_eq!(chars.next(), Some('a'));
+    assert_eq!(chars.next(), Some('b'));
+    assert_eq!(chars.next(), Some('c'));
+    assert_eq!(chars.next(), None);
+  }
+
+  #[test]
+  fn reverse_chars() {
+    let rope = Rope::from("abc");
+    let mut chars = rope.chars().rev();
+    assert_eq!(chars.next(), Some('c'));
+    assert_eq!(chars.next(), Some('b'));
+    assert_eq!(chars.next(), Some('a'));
+    assert_eq!(chars.next(), None);
+
+    let rope = Rope::from_iter(["a", "b", "c"]);
+    let mut chars = rope.chars().rev();
+    assert_eq!(chars.next(), Some('c'));
+    assert_eq!(chars.next(), Some('b'));
+    assert_eq!(chars.next(), Some('a'));
+    assert_eq!(chars.next(), None);
+  }
 }

From 7a0856dbd179718d6a90c4a6453722e27488a001 Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Sat, 28 Dec 2024 08:59:01 +0800
Subject: [PATCH 07/14] u

---
 src/helpers.rs      | 11 +++++++
 src/rope.rs         | 18 +++++++-----
 src/with_indices.rs | 70 ++++++++++++++++++++++++++++-----------------
 3 files changed, 65 insertions(+), 34 deletions(-)

diff --git a/src/helpers.rs b/src/helpers.rs
index 2243fde..981da56 100644
--- a/src/helpers.rs
+++ b/src/helpers.rs
@@ -1254,6 +1254,9 @@ pub trait SourceText<'a>: Default + Clone + ToString {
   /// Returns an iterator over the char indices in the text.
   fn char_indices(&self) -> impl Iterator<Item = (usize, char)>;
 
+  /// Returns an iterator over the [`char`]s of a string slice.
+  fn chars(&self) -> impl DoubleEndedIterator<Item = char>;
+
   /// Gets the byte at the specified index, if it exists.
   fn get_byte(&self, byte_index: usize) -> Option<u8>;
 
@@ -1293,6 +1296,10 @@ impl<'a> SourceText<'a> for Rope<'a> {
     self.char_indices()
   }
 
+  fn chars(&self) -> impl DoubleEndedIterator<Item = char> {
+    (*self).chars()
+  }
+
   fn byte_slice(&self, range: Range<usize>) -> Self {
     self.byte_slice(range)
   }
@@ -1335,6 +1342,10 @@ impl<'a> SourceText<'a> for &'a str {
     (*self).char_indices()
   }
 
+  fn chars(&self) -> impl DoubleEndedIterator<Item = char> {
+    (*self).chars()
+  }
+
   fn byte_slice(&self, range: Range<usize>) -> Self {
     self.get(range).unwrap_or_default()
   }
diff --git a/src/rope.rs b/src/rope.rs
index bc75d98..f6e49f6 100644
--- a/src/rope.rs
+++ b/src/rope.rs
@@ -151,12 +151,16 @@ impl<'a> Rope<'a> {
       Repr::Light(s) => RopeChars {
         iters: vec![s.chars()],
         left: 0,
-        right: 0
+        right: 0,
       },
       Repr::Full(data) => {
         let iters = data.iter().map(|(s, _)| s.chars()).collect::<Vec<_>>();
         let len = iters.len();
-        RopeChars { iters, left: 0, right: (len - 1) as u32 }
+        RopeChars {
+          iters,
+          left: 0,
+          right: (len - 1) as u32,
+        }
       }
     }
   }
@@ -959,10 +963,10 @@ fn end_bound_to_range_end(end: Bound<&usize>) -> Option<usize> {
 pub struct RopeChars<'a> {
   iters: Vec<Chars<'a>>,
   left: u32,
-  right: u32
+  right: u32,
 }
 
-impl<'a> Iterator for RopeChars<'a> {
+impl Iterator for RopeChars<'_> {
   type Item = char;
 
   #[inline]
@@ -972,7 +976,7 @@ impl<'a> Iterator for RopeChars<'a> {
       return None;
     }
     if let Some(char) = self.iters[left].next() {
-      return Some(char);
+      Some(char)
     } else {
       self.left += 1;
       self.next()
@@ -980,7 +984,7 @@ impl<'a> Iterator for RopeChars<'a> {
   }
 }
 
-impl<'a> DoubleEndedIterator for RopeChars<'a> {
+impl DoubleEndedIterator for RopeChars<'_> {
   #[inline]
   fn next_back(&mut self) -> Option<Self::Item> {
     let right = self.right as usize;
@@ -988,7 +992,7 @@ impl<'a> DoubleEndedIterator for RopeChars<'a> {
       return self.iters[right].next_back();
     }
     if let Some(char) = self.iters[right].next_back() {
-      return Some(char);
+      Some(char)
     } else {
       self.right -= 1;
       self.next_back()
diff --git a/src/with_indices.rs b/src/with_indices.rs
index a602902..86778e2 100644
--- a/src/with_indices.rs
+++ b/src/with_indices.rs
@@ -36,42 +36,58 @@ where
 
     let line_len = self.line.len();
 
-    let mut start_byte_index = None;
-    let mut end_byte_index = None;
+    let mut start_byte_index =
+      if start_char_index == 0 { Some(0) } else { None };
+    let mut end_byte_index = if end_char_index == usize::MAX {
+      Some(line_len)
+    } else {
+      None
+    };
 
     let (last_char_index, last_byte_index) =
       *self.last_char_index_to_byte_index.borrow();
-    let mut last_byte_index = last_byte_index as usize;
+    let mut byte_index = last_byte_index as usize;
     let mut char_index = last_char_index as usize;
-    if start_char_index < last_char_index as usize {
-      char_index = 0;
-      last_byte_index = 0;
-    }
-    for (byte_index, _) in self
-      .line
-      .byte_slice(last_byte_index..line_len)
-      .char_indices()
+
+    if start_char_index >= last_char_index as usize
+      || end_char_index >= last_char_index as usize
     {
-      if char_index == start_char_index {
-        start_byte_index = Some(byte_index + last_byte_index);
-        if end_char_index == usize::MAX {
+      for char in self.line.byte_slice(byte_index..line_len).chars() {
+        if start_byte_index.is_some() && end_byte_index.is_some() {
           break;
         }
+        if char_index == start_char_index {
+          start_byte_index = Some(byte_index);
+          *self.last_char_index_to_byte_index.borrow_mut() =
+            (char_index as u32, byte_index as u32);
+        }
+        if char_index == end_char_index {
+          end_byte_index = Some(byte_index);
+          *self.last_char_index_to_byte_index.borrow_mut() =
+            (char_index as u32, byte_index as u32);
+        }
+        byte_index += char.len_utf8();
+        char_index += 1;
       }
-      if char_index == end_char_index {
-        end_byte_index = Some(byte_index + last_byte_index);
-        *self.last_char_index_to_byte_index.borrow_mut() =
-          (end_char_index as u32, (byte_index + last_byte_index) as u32);
-        break;
+    } else {
+      for char in self.line.byte_slice(0..byte_index).chars().rev() {
+        if start_byte_index.is_some() && end_byte_index.is_some() {
+          break;
+        }
+        byte_index -= char.len_utf8();
+        char_index -= 1;
+        if char_index == end_char_index {
+          end_byte_index = Some(byte_index);
+          *self.last_char_index_to_byte_index.borrow_mut() =
+            (char_index as u32, byte_index as u32);
+        }
+        if char_index == start_char_index {
+          start_byte_index = Some(byte_index);
+        }
       }
-      char_index += 1;
     }
 
-    let start_byte_index = if let Some(start_byte_index) = start_byte_index {
-      start_byte_index
-    } else {
-      return S::default();
-    };
+    let start_byte_index = start_byte_index.unwrap_or(line_len);
     let end_byte_index = end_byte_index.unwrap_or(line_len);
 
     #[allow(unsafe_code)]
@@ -131,7 +147,7 @@ mod tests {
   fn test_last_char_index_to_byte_index() {
     let rope_with_indices = WithIndices::new(Rope::from("foobar"));
     assert_eq!(rope_with_indices.substring(0, 3), "foo");
-    assert_eq!(rope_with_indices.substring(3, 6), "bar");
-    assert_eq!(rope_with_indices.substring(0, usize::MAX), "foobar");
+    assert_eq!(rope_with_indices.substring(3, 5), "ba");
+    assert_eq!(rope_with_indices.substring(0, 3), "foo");
   }
 }

From 11ea50a742688d1d821cca0d1a79a957f8d8220e Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Sat, 28 Dec 2024 09:20:33 +0800
Subject: [PATCH 08/14] u

---
 src/rope.rs         | 1 +
 src/with_indices.rs | 9 +++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/rope.rs b/src/rope.rs
index f6e49f6..2a43ce3 100644
--- a/src/rope.rs
+++ b/src/rope.rs
@@ -146,6 +146,7 @@ impl<'a> Rope<'a> {
   }
 
   /// Returns an iterator over the [`char`]s of a string slice.
+  #[inline(always)]
   pub fn chars(&self) -> RopeChars<'_> {
     match &self.repr {
       Repr::Light(s) => RopeChars {
diff --git a/src/with_indices.rs b/src/with_indices.rs
index 86778e2..549b44a 100644
--- a/src/with_indices.rs
+++ b/src/with_indices.rs
@@ -145,9 +145,10 @@ mod tests {
 
   #[test]
   fn test_last_char_index_to_byte_index() {
-    let rope_with_indices = WithIndices::new(Rope::from("foobar"));
-    assert_eq!(rope_with_indices.substring(0, 3), "foo");
-    assert_eq!(rope_with_indices.substring(3, 5), "ba");
-    assert_eq!(rope_with_indices.substring(0, 3), "foo");
+    let rope_with_indices =
+      WithIndices::new(Rope::from("hello world 你好世界"));
+    assert_eq!(rope_with_indices.substring(10, 13), "d 你");
+    assert_eq!(rope_with_indices.substring(13, 15), "好世");
+    assert_eq!(rope_with_indices.substring(10, 13), "d 你");
   }
 }

From cedea6e89a84c4377f08a4e3ea04cba9fb228969 Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Sat, 28 Dec 2024 10:51:39 +0800
Subject: [PATCH 09/14] fix: try perf

---
 src/with_indices.rs | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/src/with_indices.rs b/src/with_indices.rs
index 549b44a..f0639b3 100644
--- a/src/with_indices.rs
+++ b/src/with_indices.rs
@@ -1,4 +1,4 @@
-use std::{cell::RefCell, marker::PhantomData};
+use std::{cell::Cell, marker::PhantomData};
 
 use crate::helpers::SourceText;
 
@@ -9,7 +9,7 @@ where
 {
   /// line is a string reference
   pub line: S,
-  last_char_index_to_byte_index: RefCell<(u32, u32)>,
+  last_char_index_to_byte_index: Cell<(u32, u32)>,
   data: PhantomData<&'a S>,
 }
 
@@ -20,7 +20,7 @@ where
   pub fn new(line: S) -> Self {
     Self {
       line,
-      last_char_index_to_byte_index: RefCell::new((0, 0)),
+      last_char_index_to_byte_index: Cell::new((0, 0)),
       data: PhantomData,
     }
   }
@@ -35,7 +35,6 @@ where
     }
 
     let line_len = self.line.len();
-
     let mut start_byte_index =
       if start_char_index == 0 { Some(0) } else { None };
     let mut end_byte_index = if end_char_index == usize::MAX {
@@ -45,32 +44,44 @@ where
     };
 
     let (last_char_index, last_byte_index) =
-      *self.last_char_index_to_byte_index.borrow();
+      self.last_char_index_to_byte_index.get();
     let mut byte_index = last_byte_index as usize;
     let mut char_index = last_char_index as usize;
 
     if start_char_index >= last_char_index as usize
       || end_char_index >= last_char_index as usize
     {
-      for char in self.line.byte_slice(byte_index..line_len).chars() {
+      #[allow(unsafe_code)]
+      let slice = unsafe {
+        // SAFETY: Since `indices` iterates over the `CharIndices` of `self`, we can guarantee
+        // that the indices obtained from it will always be within the bounds of `self` and they
+        // will always lie on UTF-8 sequence boundaries.
+        self.line.byte_slice_unchecked(byte_index..line_len)
+      };
+      for char in slice.chars() {
         if start_byte_index.is_some() && end_byte_index.is_some() {
           break;
         }
         if char_index == start_char_index {
           start_byte_index = Some(byte_index);
-          *self.last_char_index_to_byte_index.borrow_mut() =
-            (char_index as u32, byte_index as u32);
-        }
-        if char_index == end_char_index {
+        } else if char_index == end_char_index {
           end_byte_index = Some(byte_index);
-          *self.last_char_index_to_byte_index.borrow_mut() =
-            (char_index as u32, byte_index as u32);
+          self
+            .last_char_index_to_byte_index
+            .set((char_index as u32, byte_index as u32));
         }
         byte_index += char.len_utf8();
         char_index += 1;
       }
     } else {
-      for char in self.line.byte_slice(0..byte_index).chars().rev() {
+      #[allow(unsafe_code)]
+      let slice = unsafe {
+        // SAFETY: Since `indices` iterates over the `CharIndices` of `self`, we can guarantee
+        // that the indices obtained from it will always be within the bounds of `self` and they
+        // will always lie on UTF-8 sequence boundaries.
+        self.line.byte_slice_unchecked(0..byte_index)
+      };
+      for char in slice.chars().rev() {
         if start_byte_index.is_some() && end_byte_index.is_some() {
           break;
         }
@@ -78,10 +89,7 @@ where
         char_index -= 1;
         if char_index == end_char_index {
           end_byte_index = Some(byte_index);
-          *self.last_char_index_to_byte_index.borrow_mut() =
-            (char_index as u32, byte_index as u32);
-        }
-        if char_index == start_char_index {
+        } else if char_index == start_char_index {
           start_byte_index = Some(byte_index);
         }
       }

From 43494d31803c7c65e60a8fb1efacaddfdf8f2125 Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Mon, 30 Dec 2024 12:31:14 +0800
Subject: [PATCH 10/14] perf: reduce condition

---
 src/with_indices.rs | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/with_indices.rs b/src/with_indices.rs
index f0639b3..92dc650 100644
--- a/src/with_indices.rs
+++ b/src/with_indices.rs
@@ -43,6 +43,10 @@ where
       None
     };
 
+    if start_byte_index.is_some() && end_byte_index.is_some() {
+      return self.line.clone();
+    }
+
     let (last_char_index, last_byte_index) =
       self.last_char_index_to_byte_index.get();
     let mut byte_index = last_byte_index as usize;
@@ -59,16 +63,17 @@ where
         self.line.byte_slice_unchecked(byte_index..line_len)
       };
       for char in slice.chars() {
-        if start_byte_index.is_some() && end_byte_index.is_some() {
-          break;
-        }
         if char_index == start_char_index {
           start_byte_index = Some(byte_index);
+          if end_byte_index.is_some() {
+            break;
+          }
         } else if char_index == end_char_index {
           end_byte_index = Some(byte_index);
           self
             .last_char_index_to_byte_index
             .set((char_index as u32, byte_index as u32));
+          break;
         }
         byte_index += char.len_utf8();
         char_index += 1;
@@ -82,15 +87,16 @@ where
         self.line.byte_slice_unchecked(0..byte_index)
       };
       for char in slice.chars().rev() {
-        if start_byte_index.is_some() && end_byte_index.is_some() {
-          break;
-        }
         byte_index -= char.len_utf8();
         char_index -= 1;
         if char_index == end_char_index {
           end_byte_index = Some(byte_index);
+          if start_byte_index.is_some() {
+            break;
+          }
         } else if char_index == start_char_index {
           start_byte_index = Some(byte_index);
+          break;
         }
       }
     }

From f706b2c00090b3c8ab467058ed4e992cea404ce6 Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Mon, 30 Dec 2024 12:36:23 +0800
Subject: [PATCH 11/14] perf: use char_indices

---
 src/with_indices.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/with_indices.rs b/src/with_indices.rs
index 92dc650..68c2be9 100644
--- a/src/with_indices.rs
+++ b/src/with_indices.rs
@@ -62,20 +62,19 @@ where
         // will always lie on UTF-8 sequence boundaries.
         self.line.byte_slice_unchecked(byte_index..line_len)
       };
-      for char in slice.chars() {
+      for (byte_offset, _) in slice.char_indices() {
         if char_index == start_char_index {
-          start_byte_index = Some(byte_index);
+          start_byte_index = Some(byte_index + byte_offset);
           if end_byte_index.is_some() {
             break;
           }
         } else if char_index == end_char_index {
-          end_byte_index = Some(byte_index);
+          end_byte_index = Some(byte_index + byte_offset);
           self
             .last_char_index_to_byte_index
-            .set((char_index as u32, byte_index as u32));
+            .set((char_index as u32, (byte_index + byte_offset) as u32));
           break;
         }
-        byte_index += char.len_utf8();
         char_index += 1;
       }
     } else {

From 982133116831bdce9b9a5a1aa028db18c6afe0fd Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Mon, 30 Dec 2024 14:58:08 +0800
Subject: [PATCH 12/14] fix: char_indices support rev

---
 src/helpers.rs      |  17 +---
 src/rope.rs         | 217 +++++++++++++++++++-------------------------
 src/with_indices.rs |  11 +--
 3 files changed, 102 insertions(+), 143 deletions(-)

diff --git a/src/helpers.rs b/src/helpers.rs
index 981da56..ff7e9f9 100644
--- a/src/helpers.rs
+++ b/src/helpers.rs
@@ -1252,10 +1252,7 @@ pub trait SourceText<'a>: Default + Clone + ToString {
   fn ends_with(&self, value: &str) -> bool;
 
   /// Returns an iterator over the char indices in the text.
-  fn char_indices(&self) -> impl Iterator<Item = (usize, char)>;
-
-  /// Returns an iterator over the [`char`]s of a string slice.
-  fn chars(&self) -> impl DoubleEndedIterator<Item = char>;
+  fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)>;
 
   /// Gets the byte at the specified index, if it exists.
   fn get_byte(&self, byte_index: usize) -> Option<u8>;
@@ -1292,14 +1289,10 @@ impl<'a> SourceText<'a> for Rope<'a> {
     (*self).ends_with(value)
   }
 
-  fn char_indices(&self) -> impl Iterator<Item = (usize, char)> {
+  fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)> {
     self.char_indices()
   }
 
-  fn chars(&self) -> impl DoubleEndedIterator<Item = char> {
-    (*self).chars()
-  }
-
   fn byte_slice(&self, range: Range<usize>) -> Self {
     self.byte_slice(range)
   }
@@ -1338,14 +1331,10 @@ impl<'a> SourceText<'a> for &'a str {
     (*self).ends_with(value)
   }
 
-  fn char_indices(&self) -> impl Iterator<Item = (usize, char)> {
+  fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)> {
     (*self).char_indices()
   }
 
-  fn chars(&self) -> impl DoubleEndedIterator<Item = char> {
-    (*self).chars()
-  }
-
   fn byte_slice(&self, range: Range<usize>) -> Self {
     self.get(range).unwrap_or_default()
   }
diff --git a/src/rope.rs b/src/rope.rs
index 2a43ce3..17fdcdc 100644
--- a/src/rope.rs
+++ b/src/rope.rs
@@ -2,11 +2,9 @@
 
 use std::{
   borrow::Cow,
-  collections::VecDeque,
   hash::Hash,
   ops::{Bound, RangeBounds},
   rc::Rc,
-  str::Chars,
 };
 
 use crate::Error;
@@ -135,32 +133,22 @@ impl<'a> Rope<'a> {
           iter: s.char_indices(),
         },
       },
-      Repr::Full(data) => CharIndices {
-        iter: CharIndicesEnum::Full {
-          chunks: data,
-          char_indices: VecDeque::new(),
-          chunk_index: 0,
-        },
-      },
-    }
-  }
-
-  /// Returns an iterator over the [`char`]s of a string slice.
-  #[inline(always)]
-  pub fn chars(&self) -> RopeChars<'_> {
-    match &self.repr {
-      Repr::Light(s) => RopeChars {
-        iters: vec![s.chars()],
-        left: 0,
-        right: 0,
-      },
-      Repr::Full(data) => {
-        let iters = data.iter().map(|(s, _)| s.chars()).collect::<Vec<_>>();
-        let len = iters.len();
-        RopeChars {
-          iters,
-          left: 0,
-          right: (len - 1) as u32,
+      Repr::Full(vec) => {
+        let right_byte_offset = vec.iter().map(|(s, _)| s.len() as u32).sum();
+
+        CharIndices {
+          iter: CharIndicesEnum::Full {
+            iters: vec
+              .iter()
+              .map(|(s, _)| s.char_indices())
+              .collect::<Vec<_>>(),
+            left_chunk_index: 0,
+            left_byte_offset: 0,
+            last_left_indice: None,
+            right_chunk_index: (vec.len() - 1) as u32,
+            right_byte_offset,
+            right_byte_offset_for: vec.len() as u32,
+          },
         }
       }
     }
@@ -680,9 +668,13 @@ enum CharIndicesEnum<'a, 'b> {
     iter: std::str::CharIndices<'b>,
   },
   Full {
-    chunks: &'a [(&'b str, usize)],
-    char_indices: VecDeque<(usize, char)>,
-    chunk_index: usize,
+    iters: Vec<std::str::CharIndices<'a>>,
+    left_chunk_index: u32,
+    left_byte_offset: u32,
+    last_left_indice: Option<(usize, char)>,
+    right_chunk_index: u32,
+    right_byte_offset: u32,
+    right_byte_offset_for: u32,
   },
 }
 
@@ -697,29 +689,59 @@ impl Iterator for CharIndices<'_, '_> {
     match &mut self.iter {
       CharIndicesEnum::Light { iter } => iter.next(),
       CharIndicesEnum::Full {
-        chunks,
-        char_indices,
-        chunk_index,
+        iters,
+        left_chunk_index,
+        left_byte_offset,
+        last_left_indice,
+        ..
       } => {
-        if let Some(item) = char_indices.pop_front() {
-          return Some(item);
-        }
-
-        if *chunk_index >= chunks.len() {
+        if (*left_chunk_index as usize) >= iters.len() {
           return None;
         }
-
-        // skip empty chunks
-        while *chunk_index < chunks.len() && chunks[*chunk_index].0.is_empty() {
-          *chunk_index += 1;
+        if let Some((byte_index, char)) =
+          iters[*left_chunk_index as usize].next()
+        {
+          *last_left_indice = Some((byte_index, char));
+          Some((byte_index + (*left_byte_offset as usize), char))
+        } else {
+          *left_chunk_index += 1;
+          if let Some((byte_index, char)) = last_left_indice.take() {
+            *left_byte_offset =
+              *left_byte_offset + byte_index as u32 + char.len_utf8() as u32;
+          }
+          self.next()
         }
+      }
+    }
+  }
+}
 
-        let (chunk, start_pos) = chunks[*chunk_index];
-
-        char_indices
-          .extend(chunk.char_indices().map(|(i, c)| (start_pos + i, c)));
-        *chunk_index += 1;
-        char_indices.pop_front()
+impl DoubleEndedIterator for CharIndices<'_, '_> {
+  fn next_back(&mut self) -> Option<Self::Item> {
+    match &mut self.iter {
+      CharIndicesEnum::Light { iter } => iter.next_back(),
+      CharIndicesEnum::Full {
+        iters,
+        right_chunk_index,
+        right_byte_offset,
+        right_byte_offset_for,
+        ..
+      } => {
+        if let Some((byte_index, char)) =
+          iters[*right_chunk_index as usize].next_back()
+        {
+          if *right_byte_offset_for != *right_chunk_index {
+            *right_byte_offset =
+              *right_byte_offset - byte_index as u32 - char.len_utf8() as u32;
+            *right_byte_offset_for = *right_chunk_index;
+          }
+          Some((byte_index + (*right_byte_offset as usize), char))
+        } else if *right_chunk_index > 0 {
+          *right_chunk_index -= 1;
+          self.next_back()
+        } else {
+          None
+        }
       }
     }
   }
@@ -961,46 +983,6 @@ fn end_bound_to_range_end(end: Bound<&usize>) -> Option<usize> {
   }
 }
 
-pub struct RopeChars<'a> {
-  iters: Vec<Chars<'a>>,
-  left: u32,
-  right: u32,
-}
-
-impl Iterator for RopeChars<'_> {
-  type Item = char;
-
-  #[inline]
-  fn next(&mut self) -> Option<char> {
-    let left = self.left as usize;
-    if left >= self.iters.len() {
-      return None;
-    }
-    if let Some(char) = self.iters[left].next() {
-      Some(char)
-    } else {
-      self.left += 1;
-      self.next()
-    }
-  }
-}
-
-impl DoubleEndedIterator for RopeChars<'_> {
-  #[inline]
-  fn next_back(&mut self) -> Option<Self::Item> {
-    let right = self.right as usize;
-    if right == 0 {
-      return self.iters[right].next_back();
-    }
-    if let Some(char) = self.iters[right].next_back() {
-      Some(char)
-    } else {
-      self.right -= 1;
-      self.next_back()
-    }
-  }
-}
-
 #[cfg(test)]
 mod tests {
   use std::rc::Rc;
@@ -1230,6 +1212,29 @@ mod tests {
     );
   }
 
+  #[test]
+  fn reverse_char_indices() {
+    let mut a = Rope::new();
+    a.add("abc");
+    a.add("def");
+    assert_eq!(
+      a.char_indices().rev().collect::<Vec<_>>(),
+      "abcdef".char_indices().rev().collect::<Vec<_>>()
+    );
+
+    let mut a = Rope::new();
+    a.add("こんにちは");
+    assert_eq!(
+      a.char_indices().rev().collect::<Vec<_>>(),
+      "こんにちは".char_indices().rev().collect::<Vec<_>>()
+    );
+    a.add("世界");
+    assert_eq!(
+      a.char_indices().rev().collect::<Vec<_>>(),
+      "こんにちは世界".char_indices().rev().collect::<Vec<_>>()
+    );
+  }
+
   #[test]
   fn lines1() {
     let rope = Rope::from("abc");
@@ -1306,38 +1311,4 @@ mod tests {
       .collect::<Vec<_>>();
     assert_eq!(lines, ["\n"]);
   }
-
-  #[test]
-  fn chars() {
-    let rope = Rope::from("abc");
-    let mut chars = rope.chars();
-    assert_eq!(chars.next(), Some('a'));
-    assert_eq!(chars.next(), Some('b'));
-    assert_eq!(chars.next(), Some('c'));
-    assert_eq!(chars.next(), None);
-
-    let rope = Rope::from_iter(["a", "b", "c"]);
-    let mut chars = rope.chars();
-    assert_eq!(chars.next(), Some('a'));
-    assert_eq!(chars.next(), Some('b'));
-    assert_eq!(chars.next(), Some('c'));
-    assert_eq!(chars.next(), None);
-  }
-
-  #[test]
-  fn reverse_chars() {
-    let rope = Rope::from("abc");
-    let mut chars = rope.chars().rev();
-    assert_eq!(chars.next(), Some('c'));
-    assert_eq!(chars.next(), Some('b'));
-    assert_eq!(chars.next(), Some('a'));
-    assert_eq!(chars.next(), None);
-
-    let rope = Rope::from_iter(["a", "b", "c"]);
-    let mut chars = rope.chars().rev();
-    assert_eq!(chars.next(), Some('c'));
-    assert_eq!(chars.next(), Some('b'));
-    assert_eq!(chars.next(), Some('a'));
-    assert_eq!(chars.next(), None);
-  }
 }
diff --git a/src/with_indices.rs b/src/with_indices.rs
index 68c2be9..819d42a 100644
--- a/src/with_indices.rs
+++ b/src/with_indices.rs
@@ -49,7 +49,7 @@ where
 
     let (last_char_index, last_byte_index) =
       self.last_char_index_to_byte_index.get();
-    let mut byte_index = last_byte_index as usize;
+    let byte_index = last_byte_index as usize;
     let mut char_index = last_char_index as usize;
 
     if start_char_index >= last_char_index as usize
@@ -85,18 +85,17 @@ where
         // will always lie on UTF-8 sequence boundaries.
         self.line.byte_slice_unchecked(0..byte_index)
       };
-      for char in slice.chars().rev() {
-        byte_index -= char.len_utf8();
-        char_index -= 1;
+      for (byte_index, char) in slice.char_indices().rev() {
         if char_index == end_char_index {
-          end_byte_index = Some(byte_index);
+          end_byte_index = Some(byte_index + char.len_utf8());
           if start_byte_index.is_some() {
             break;
           }
         } else if char_index == start_char_index {
-          start_byte_index = Some(byte_index);
+          start_byte_index = Some(byte_index + char.len_utf8());
           break;
         }
+        char_index -= 1;
       }
     }
 

From 212bbc8a6de6dabf9cc00f941116c9a519f5a052 Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Thu, 2 Jan 2025 10:04:30 +0800
Subject: [PATCH 13/14] fix: remove generate generated_column

---
 src/decoder.rs | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/decoder.rs b/src/decoder.rs
index 9cfd624..4622924 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -39,7 +39,6 @@ pub(crate) struct MappingsDecoder<'a> {
   current_value: i64,
   current_value_pos: usize,
   generated_line: u32,
-  generated_column: i64,
 }
 
 impl<'a> MappingsDecoder<'a> {
@@ -52,7 +51,6 @@ impl<'a> MappingsDecoder<'a> {
       current_value: 0,
       current_value_pos: 0,
       generated_line: 1,
-      generated_column: -1,
     }
   }
 }
@@ -95,12 +93,10 @@ impl Iterator for MappingsDecoder<'_> {
           }),
           _ => None,
         };
-        self.generated_column = self.current_data[0] as i64;
         self.current_data_pos = 0;
         if value == SEM {
           self.generated_line += 1;
           self.current_data[0] = 0;
-          self.generated_column = -1;
         }
         if mapping.is_some() {
           return mapping;

From b201deef5389d4cfe8bddb451921944a8135f9ee Mon Sep 17 00:00:00 2001
From: Cong-Cong <dacongsama@live.com>
Date: Thu, 2 Jan 2025 12:36:11 +0800
Subject: [PATCH 14/14] perf

---
 src/decoder.rs | 51 ++++++++++++++++++++++++--------------------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/src/decoder.rs b/src/decoder.rs
index 4622924..38c64eb 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -65,42 +65,39 @@ impl Iterator for MappingsDecoder<'_> {
         continue;
       }
       if (value & COM) != 0 {
-        let mapping = match self.current_data_pos {
-          1 => Some(Mapping {
-            generated_line: self.generated_line,
-            generated_column: self.current_data[0],
-            original: None,
-          }),
-          4 => Some(Mapping {
-            generated_line: self.generated_line,
-            generated_column: self.current_data[0],
-            original: Some(OriginalLocation {
+        let mut mapping = Mapping {
+          generated_line: self.generated_line,
+          generated_column: self.current_data[0],
+          original: None,
+        };
+        let current_data_pos = self.current_data_pos;
+        self.current_data_pos = 0;
+        if value == SEM {
+          self.generated_line += 1;
+          self.current_data[0] = 0;
+        }
+        match current_data_pos {
+          1 => return Some(mapping),
+          4 => {
+            mapping.original = Some(OriginalLocation {
               source_index: self.current_data[1],
               original_line: self.current_data[2],
               original_column: self.current_data[3],
               name_index: None,
-            }),
-          }),
-          5 => Some(Mapping {
-            generated_line: self.generated_line,
-            generated_column: self.current_data[0],
-            original: Some(OriginalLocation {
+            });
+            return Some(mapping);
+          }
+          5 => {
+            mapping.original = Some(OriginalLocation {
               source_index: self.current_data[1],
               original_line: self.current_data[2],
               original_column: self.current_data[3],
               name_index: Some(self.current_data[4]),
-            }),
-          }),
-          _ => None,
+            });
+            return Some(mapping);
+          }
+          _ => (),
         };
-        self.current_data_pos = 0;
-        if value == SEM {
-          self.generated_line += 1;
-          self.current_data[0] = 0;
-        }
-        if mapping.is_some() {
-          return mapping;
-        }
       } else if (value & CONTINUATION_BIT) == 0 {
         // last sextet
         self.current_value |= (value as i64) << self.current_value_pos;