From 2b193316b6a4af96a05c7fb5cfef53d66def274c Mon Sep 17 00:00:00 2001 From: Hana Date: Thu, 12 Dec 2024 21:06:30 +0800 Subject: [PATCH] refactor: reduce memory consumption of `CachedSource` (#144) * refactor: init * perf: faster lines iterator for `Rope` (#145) * refactor: init * refactor: faster lines * refactor: try * chore: clippy * test: more * chore: more * perf * finish * finish --- Cargo.lock | 16 +- Cargo.toml | 14 +- src/cached_source.rs | 37 +- src/concat_source.rs | 20 +- src/error.rs | 3 + src/helpers.rs | 347 ++++++++--- src/lib.rs | 2 + src/original_source.rs | 30 +- src/raw_source.rs | 59 +- src/replace_source.rs | 167 +++--- src/rope.rs | 1199 ++++++++++++++++++++++++++++++++++++++ src/source.rs | 21 +- src/source_map_source.rs | 16 +- src/with_indices.rs | 59 +- tests/compat_source.rs | 11 +- 15 files changed, 1731 insertions(+), 270 deletions(-) create mode 100644 src/rope.rs diff --git a/Cargo.lock b/Cargo.lock index b80b4518..9ecd4f28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "ahash" @@ -189,7 +189,7 @@ dependencies = [ "clap", "criterion-plot", "is-terminal", - "itertools", + "itertools 0.10.5", "num-traits", "once_cell", "oorandom", @@ -208,7 +208,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] @@ -320,6 +320,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.9" @@ -539,6 +548,7 @@ dependencies = [ "criterion", "dashmap", "dyn-clone", + "itertools 0.13.0", "memchr", "regex", "rustc-hash", diff --git a/Cargo.toml b/Cargo.toml index f33750a3..0209cf08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,18 +14,18 @@ readme = "README.md" include = ["/src/**/*.rs", "/*.toml", "/LICENSE", "/README.md"] [lints.rust] -unsafe_code = "warn" +unsafe_code = "warn" missing_docs = "warn" [lints.clippy] -dbg_macro = "warn" -todo = "warn" +dbg_macro = "warn" +todo = "warn" unimplemented = "warn" -print_stdout = "warn" -print_stderr = "warn" +print_stdout = "warn" +print_stderr = "warn" [[bench]] -name = "bench" +name = "bench" path = "benches/bench.rs" harness = false @@ -36,6 +36,8 @@ dyn-clone = "1" rustc-hash = "1" dashmap = "5" memchr = "2.6.4" +itertools = "0.13" + codspeed-criterion-compat = { version = "2.3.3", default-features = false, optional = true } static_assertions = "1.1.0" diff --git a/src/cached_source.rs b/src/cached_source.rs index 1356d792..a27af637 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -12,6 +12,7 @@ use crate::{ stream_and_get_source_and_map, stream_chunks_of_raw_source, stream_chunks_of_source_map, StreamChunks, }, + rope::Rope, MapOptions, Source, SourceMap, }; @@ -50,8 +51,6 @@ use crate::{ /// ``` pub struct CachedSource { inner: Arc, - cached_buffer: Arc>>, - cached_source: Arc>>, cached_hash: Arc>, cached_maps: Arc, BuildHasherDefault>>, @@ -62,8 +61,6 @@ impl CachedSource { pub fn new(inner: T) -> Self { Self { inner: Arc::new(inner), - cached_buffer: Default::default(), - cached_source: Default::default(), cached_hash: Default::default(), cached_maps: Default::default(), } @@ -77,17 +74,15 @@ impl CachedSource { impl Source for CachedSource { fn source(&self) -> Cow { - let cached = self - .cached_source - .get_or_init(|| self.inner.source().into()); - Cow::Borrowed(cached) + self.inner.source() + } + + fn rope(&self) -> Rope<'_> { + self.inner.rope() } fn buffer(&self) -> Cow<[u8]> { - let cached = self - .cached_buffer - .get_or_init(|| self.inner.buffer().to_vec()); - Cow::Borrowed(cached) + self.inner.buffer() } fn size(&self) -> usize { @@ -109,7 +104,7 @@ impl Source for CachedSource { } } -impl StreamChunks<'_> +impl StreamChunks for CachedSource { fn stream_chunks<'a>( @@ -122,9 +117,7 @@ impl StreamChunks<'_> let cached_map = self.cached_maps.entry(options.clone()); match cached_map { Entry::Occupied(entry) => { - let source = self - .cached_source - .get_or_init(|| self.inner.source().into()); + let source = self.rope(); if let Some(map) = entry.get() { #[allow(unsafe_code)] // SAFETY: We guarantee that once a `SourceMap` is stored in the cache, it will never be removed. @@ -162,8 +155,6 @@ impl Clone for CachedSource { fn clone(&self) -> Self { Self { inner: self.inner.clone(), - cached_buffer: self.cached_buffer.clone(), - cached_source: self.cached_source.clone(), cached_hash: self.cached_hash.clone(), cached_maps: self.cached_maps.clone(), } @@ -196,8 +187,7 @@ impl std::fmt::Debug for CachedSource { ) -> Result<(), std::fmt::Error> { f.debug_struct("CachedSource") .field("inner", self.inner.as_ref()) - .field("cached_buffer", &self.cached_buffer.get().is_some()) - .field("cached_source", &self.cached_source.get().is_some()) + .field("cached_hash", self.cached_hash.as_ref()) .field("cached_maps", &(!self.cached_maps.is_empty())) .finish() } @@ -205,8 +195,6 @@ impl std::fmt::Debug for CachedSource { #[cfg(test)] mod tests { - use std::borrow::Borrow; - use crate::{ ConcatSource, OriginalSource, RawSource, ReplaceSource, SourceExt, SourceMapSource, WithoutOriginalOptions, @@ -247,11 +235,6 @@ mod tests { source.size(); source.map(&map_options); - assert_eq!(clone.cached_source.get().unwrap().borrow(), source.source()); - assert_eq!( - *clone.cached_buffer.get().unwrap(), - source.buffer().to_vec() - ); assert_eq!( *clone.cached_maps.get(&map_options).unwrap().value(), source.map(&map_options) diff --git a/src/concat_source.rs b/src/concat_source.rs index 7435fcb6..98226fd9 100644 --- a/src/concat_source.rs +++ b/src/concat_source.rs @@ -10,7 +10,7 @@ use crate::{ helpers::{get_map, GeneratedInfo, OnChunk, OnName, OnSource, StreamChunks}, linear_map::LinearMap, source::{Mapping, OriginalLocation}, - BoxSource, MapOptions, Source, SourceExt, SourceMap, + BoxSource, MapOptions, Rope, Source, SourceExt, SourceMap, }; /// Concatenate multiple [Source]s to a single [Source]. @@ -109,6 +109,20 @@ impl Source for ConcatSource { } } + fn rope(&self) -> Rope<'_> { + let children = self.children(); + if children.len() == 1 { + children[0].rope() + } else { + let mut rope = Rope::new(); + for child in children { + let child_rope = child.rope(); + rope.append(child_rope); + } + rope + } + } + fn buffer(&self) -> Cow<[u8]> { let children = self.children(); if children.len() == 1 { @@ -155,8 +169,8 @@ impl PartialEq for ConcatSource { } impl Eq for ConcatSource {} -impl<'a> StreamChunks<'a> for ConcatSource { - fn stream_chunks( +impl StreamChunks for ConcatSource { + fn stream_chunks<'a>( &'a self, options: &MapOptions, on_chunk: OnChunk<'_, 'a>, diff --git a/src/error.rs b/src/error.rs index b1222d14..6ff7e2c9 100644 --- a/src/error.rs +++ b/src/error.rs @@ -8,12 +8,15 @@ pub type Result = result::Result; pub enum Error { /// a JSON parsing related failure BadJson(simd_json::Error), + /// rope related failure + Rope(&'static str), } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Error::BadJson(err) => write!(f, "bad json: {err}"), + Error::Rope(err) => write!(f, "rope error: {err}"), } } } diff --git a/src/helpers.rs b/src/helpers.rs index 8e0788a8..73cf9310 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -1,6 +1,8 @@ use std::{ borrow::{BorrowMut, Cow}, cell::{OnceCell, RefCell}, + marker::PhantomData, + ops::Range, }; use rustc_hash::FxHashMap as HashMap; @@ -11,14 +13,14 @@ use crate::{ linear_map::LinearMap, source::{Mapping, OriginalLocation}, with_indices::WithIndices, - MapOptions, SourceMap, + MapOptions, Rope, SourceMap, }; // Adding this type because sourceContentLine not happy -type InnerSourceContentLine<'a> = - RefCell>>>>>; +type InnerSourceContentLine<'a, 'b> = + RefCell>>>>>>; -pub fn get_map<'a, S: StreamChunks<'a>>( +pub fn get_map<'a, S: StreamChunks>( stream: &'a S, options: &'a MapOptions, ) -> Option { @@ -65,9 +67,9 @@ pub fn get_map<'a, S: StreamChunks<'a>>( } /// [StreamChunks] abstraction, see [webpack-sources source.streamChunks](https://github.com/webpack/webpack-sources/blob/9f98066311d53a153fdc7c633422a1d086528027/lib/helpers/streamChunks.js#L13). -pub trait StreamChunks<'a> { +pub trait StreamChunks { /// [StreamChunks] abstraction - fn stream_chunks( + fn stream_chunks<'a>( &'a self, options: &MapOptions, on_chunk: OnChunk<'_, 'a>, @@ -77,24 +79,27 @@ pub trait StreamChunks<'a> { } /// [OnChunk] abstraction, see [webpack-sources onChunk](https://github.com/webpack/webpack-sources/blob/9f98066311d53a153fdc7c633422a1d086528027/lib/helpers/streamChunks.js#L13). -pub type OnChunk<'a, 'b> = &'a mut dyn FnMut(Option>, Mapping); +pub type OnChunk<'a, 'b> = &'a mut dyn FnMut(Option>, Mapping); /// [OnSource] abstraction, see [webpack-sources onSource](https://github.com/webpack/webpack-sources/blob/9f98066311d53a153fdc7c633422a1d086528027/lib/helpers/streamChunks.js#L13). pub type OnSource<'a, 'b> = - &'a mut dyn FnMut(u32, Cow<'b, str>, Option<&'b str>); + &'a mut dyn FnMut(u32, Cow<'b, str>, Option>); /// [OnName] abstraction, see [webpack-sources onName](https://github.com/webpack/webpack-sources/blob/9f98066311d53a153fdc7c633422a1d086528027/lib/helpers/streamChunks.js#L13). pub type OnName<'a, 'b> = &'a mut dyn FnMut(u32, Cow<'b, str>); /// Default stream chunks behavior impl, see [webpack-sources streamChunks](https://github.com/webpack/webpack-sources/blob/9f98066311d53a153fdc7c633422a1d086528027/lib/helpers/streamChunks.js#L15-L35). -pub fn stream_chunks_default<'a>( - source: &'a str, +pub fn stream_chunks_default<'a, S>( + source: S, source_map: Option<&'a SourceMap>, options: &MapOptions, on_chunk: OnChunk<'_, 'a>, on_source: OnSource<'_, 'a>, on_name: OnName<'_, 'a>, -) -> GeneratedInfo { +) -> GeneratedInfo +where + S: SourceText<'a> + 'a, +{ if let Some(map) = source_map { stream_chunks_of_source_map( source, map, on_chunk, on_source, on_name, options, @@ -127,25 +132,31 @@ pub fn encode_mappings(mappings: impl Iterator) -> String { encoder.drain() } -pub struct PotentialTokens<'a> { - bytes: &'a [u8], - source: &'a str, +pub struct PotentialTokens<'a, S> +where + S: SourceText<'a>, +{ + source: S, index: usize, + data: PhantomData<&'a S>, } -impl<'a> Iterator for PotentialTokens<'a> { - type Item = &'a str; +impl<'a, S> Iterator for PotentialTokens<'a, S> +where + S: SourceText<'a>, +{ + type Item = S; fn next(&mut self) -> Option { - if let Some(&c) = self.bytes.get(self.index) { + if let Some(c) = self.source.get_byte(self.index) { let start = self.index; let mut c = char::from(c); while c != '\n' && c != ';' && c != '{' && c != '}' { self.index += 1; - if let Some(&ch) = self.bytes.get(self.index) { + if let Some(ch) = self.source.get_byte(self.index) { c = char::from(ch); } else { - return Some(&self.source[start..self.index]); + return Some(self.source.byte_slice(start..self.index)); } } while c == ';' @@ -156,16 +167,16 @@ impl<'a> Iterator for PotentialTokens<'a> { || c == '\t' { self.index += 1; - if let Some(&ch) = self.bytes.get(self.index) { + if let Some(ch) = self.source.get_byte(self.index) { c = char::from(ch); } else { - return Some(&self.source[start..self.index]); + return Some(self.source.byte_slice(start..self.index)); } } if c == '\n' { self.index += 1; } - Some(&self.source[start..self.index]) + Some(self.source.byte_slice(start..self.index)) } else { None } @@ -173,18 +184,23 @@ impl<'a> Iterator for PotentialTokens<'a> { } // /[^\n;{}]+[;{} \r\t]*\n?|[;{} \r\t]+\n?|\n/g -pub fn split_into_potential_tokens(source: &str) -> PotentialTokens { +pub fn split_into_potential_tokens<'a, S>(source: S) -> PotentialTokens<'a, S> +where + S: SourceText<'a>, +{ PotentialTokens { - bytes: source.as_bytes(), source, index: 0, + data: PhantomData, } } +const EMPTY_ROPE: Rope = Rope::new(); + /// Split the string with a needle, each string will contain the needle. /// /// Copied and modified from https://github.com/rust-lang/cargo/blob/30efe860c0e4adc1a6d7057ad223dc6e47d34edf/src/cargo/sources/registry/index.rs#L1048-L1072 -fn split(haystack: &str, needle: u8) -> impl Iterator { +pub fn split(haystack: &str, needle: u8) -> impl Iterator { struct Split<'a> { haystack: &'a str, needle: u8, @@ -211,20 +227,30 @@ fn split(haystack: &str, needle: u8) -> impl Iterator { } // /[^\n]+\n?|\n/g -pub fn split_into_lines(source: &str) -> impl Iterator { - split(source, b'\n') +pub fn split_into_lines<'a, S>( + source: &S, +) -> impl Iterator + use<'_, 'a, S> +where + S: SourceText<'a>, +{ + source.split_into_lines() } -pub fn get_generated_source_info(source: &str) -> GeneratedInfo { - let (generated_line, generated_column) = if source.ends_with('\n') { - (split(source, b'\n').count() + 1, 0) +pub fn get_generated_source_info<'a, S>(source: S) -> GeneratedInfo +where + S: SourceText<'a>, +{ + let (generated_line, generated_column) = if source.ends_with("\n") { + (source.split_into_lines().count() + 1, 0) } else { let mut line_count = 0; - let mut last_line = ""; - for line in split(source, b'\n') { + let mut last_line = S::default(); + + for line in source.split_into_lines() { line_count += 1; last_line = line; } + (line_count.max(1), last_line.len()) }; GeneratedInfo { @@ -233,22 +259,25 @@ pub fn get_generated_source_info(source: &str) -> GeneratedInfo { } } -pub fn stream_chunks_of_raw_source<'a>( - source: &'a str, +pub fn stream_chunks_of_raw_source<'a, S>( + source: S, options: &MapOptions, on_chunk: OnChunk<'_, 'a>, _on_source: OnSource<'_, 'a>, _on_name: OnName<'_, 'a>, -) -> GeneratedInfo { +) -> GeneratedInfo +where + S: SourceText<'a>, +{ if options.final_source { return get_generated_source_info(source); } let mut line = 1; let mut last_line = None; - for l in split_into_lines(source) { + for l in split_into_lines(&source) { on_chunk( - Some(Cow::Borrowed(l)), + Some(l.clone().into_rope()), Mapping { generated_line: line, generated_column: 0, @@ -259,7 +288,7 @@ pub fn stream_chunks_of_raw_source<'a>( last_line = Some(l); } if let Some(last_line) = - last_line.filter(|last_line| !last_line.ends_with('\n')) + last_line.filter(|last_line| !last_line.ends_with("\n")) { GeneratedInfo { generated_line: line - 1, @@ -273,14 +302,17 @@ pub fn stream_chunks_of_raw_source<'a>( } } -pub fn stream_chunks_of_source_map<'a>( - source: &'a str, +pub fn stream_chunks_of_source_map<'a, S>( + source: S, source_map: &'a SourceMap, on_chunk: OnChunk<'_, 'a>, on_source: OnSource<'_, 'a>, on_name: OnName<'_, 'a>, options: &MapOptions, -) -> GeneratedInfo { +) -> GeneratedInfo +where + S: SourceText<'a> + 'a, +{ match options { MapOptions { columns: true, @@ -321,13 +353,16 @@ fn get_source<'a>(source_map: &SourceMap, source: &'a str) -> Cow<'a, str> { } } -fn stream_chunks_of_source_map_final<'a>( - source: &'a str, +fn stream_chunks_of_source_map_final<'a, S>( + source: S, source_map: &'a SourceMap, on_chunk: OnChunk, on_source: OnSource<'_, 'a>, on_name: OnName<'_, 'a>, -) -> GeneratedInfo { +) -> GeneratedInfo +where + S: SourceText<'a>, +{ let result = get_generated_source_info(source); if result.generated_line == 1 && result.generated_column == 0 { return result; @@ -336,7 +371,7 @@ fn stream_chunks_of_source_map_final<'a>( on_source( i as u32, get_source(source_map, source), - source_map.get_source_content(i), + source_map.get_source_content(i).map(Rope::from), ) } for (i, name) in source_map.names().iter().enumerate() { @@ -377,14 +412,17 @@ fn stream_chunks_of_source_map_final<'a>( result } -fn stream_chunks_of_source_map_full<'a>( - source: &'a str, +fn stream_chunks_of_source_map_full<'a, S>( + source: S, source_map: &'a SourceMap, on_chunk: OnChunk<'_, 'a>, on_source: OnSource<'_, 'a>, on_name: OnName<'_, 'a>, -) -> GeneratedInfo { - let lines = split_into_lines(source); +) -> GeneratedInfo +where + S: SourceText<'a> + 'a, +{ + let lines = split_into_lines(&source); let line_with_indices_list = lines.map(WithIndices::new).collect::>(); if line_with_indices_list.is_empty() { @@ -397,14 +435,15 @@ fn stream_chunks_of_source_map_full<'a>( on_source( i as u32, get_source(source_map, source), - source_map.get_source_content(i), + source_map.get_source_content(i).map(Rope::from), ) } for (i, name) in source_map.names().iter().enumerate() { on_name(i as u32, Cow::Borrowed(name)); } - let last_line = line_with_indices_list[line_with_indices_list.len() - 1].line; - let last_new_line = last_line.ends_with('\n'); + let last_line = + &line_with_indices_list[line_with_indices_list.len() - 1].line; + let last_new_line = last_line.ends_with("\n"); let final_line: u32 = if last_new_line { line_with_indices_list.len() + 1 } else { @@ -421,7 +460,7 @@ fn stream_chunks_of_source_map_full<'a>( if mapping_active && current_generated_line as usize <= line_with_indices_list.len() { - let chunk: &str; + let chunk: S; let mapping_line = current_generated_line; let mapping_column = current_generated_column; let line = &line_with_indices_list[(current_generated_line - 1) as usize]; @@ -438,7 +477,7 @@ fn stream_chunks_of_source_map_full<'a>( } if !chunk.is_empty() { on_chunk( - Some(Cow::Borrowed(chunk)), + Some(chunk.into_rope()), Mapping { generated_line: mapping_line, generated_column: mapping_column, @@ -452,11 +491,11 @@ fn stream_chunks_of_source_map_full<'a>( && current_generated_column > 0 { if current_generated_line as usize <= line_with_indices_list.len() { - let chunk = &line_with_indices_list + let chunk = line_with_indices_list [(current_generated_line - 1) as usize] .substring(current_generated_column as usize, usize::MAX); on_chunk( - Some(Cow::Borrowed(chunk)), + Some(chunk.into_rope()), Mapping { generated_line: current_generated_line, generated_column: current_generated_column, @@ -470,9 +509,9 @@ fn stream_chunks_of_source_map_full<'a>( while mapping.generated_line > current_generated_line { if current_generated_line as usize <= line_with_indices_list.len() { let chunk = - line_with_indices_list[(current_generated_line as usize) - 1].line; + &line_with_indices_list[(current_generated_line as usize) - 1].line; on_chunk( - Some(Cow::Borrowed(chunk)), + Some(chunk.clone().into_rope()), Mapping { generated_line: current_generated_line, generated_column: 0, @@ -491,7 +530,7 @@ fn stream_chunks_of_source_map_full<'a>( mapping.generated_column as usize, ); on_chunk( - Some(Cow::Owned(chunk.to_string())), + Some(chunk.into_rope()), Mapping { generated_line: current_generated_line, generated_column: current_generated_column, @@ -525,13 +564,16 @@ fn stream_chunks_of_source_map_full<'a>( } } -fn stream_chunks_of_source_map_lines_final<'a>( - source: &'a str, +fn stream_chunks_of_source_map_lines_final<'a, S>( + source: S, source_map: &'a SourceMap, on_chunk: OnChunk, on_source: OnSource<'_, 'a>, _on_name: OnName, -) -> GeneratedInfo { +) -> GeneratedInfo +where + S: SourceText<'a>, +{ let result = get_generated_source_info(source); if result.generated_line == 1 && result.generated_column == 0 { return GeneratedInfo { @@ -543,7 +585,7 @@ fn stream_chunks_of_source_map_lines_final<'a>( on_source( i as u32, get_source(source_map, source), - source_map.get_source_content(i), + source_map.get_source_content(i).map(Rope::from), ) } let final_line = if result.generated_column == 0 { @@ -570,14 +612,17 @@ fn stream_chunks_of_source_map_lines_final<'a>( result } -fn stream_chunks_of_source_map_lines_full<'a>( - source: &'a str, +fn stream_chunks_of_source_map_lines_full<'a, S>( + source: S, source_map: &'a SourceMap, on_chunk: OnChunk<'_, 'a>, on_source: OnSource<'_, 'a>, _on_name: OnName, -) -> GeneratedInfo { - let lines: Vec<&str> = split_into_lines(source).collect(); +) -> GeneratedInfo +where + S: SourceText<'a>, +{ + let lines: Vec = split_into_lines(&source).collect(); if lines.is_empty() { return GeneratedInfo { generated_line: 1, @@ -588,7 +633,7 @@ fn stream_chunks_of_source_map_lines_full<'a>( on_source( i as u32, get_source(source_map, source), - source_map.get_source_content(i), + source_map.get_source_content(i).map(Rope::from), ) } let mut current_generated_line = 1; @@ -601,9 +646,9 @@ fn stream_chunks_of_source_map_lines_full<'a>( } while mapping.generated_line > current_generated_line { if current_generated_line as usize <= lines.len() { - let chunk = lines[current_generated_line as usize - 1]; + let chunk = &lines[current_generated_line as usize - 1]; on_chunk( - Some(Cow::Borrowed(chunk)), + Some(chunk.clone().into_rope()), Mapping { generated_line: current_generated_line, generated_column: 0, @@ -618,10 +663,10 @@ fn stream_chunks_of_source_map_lines_full<'a>( .as_mut() .filter(|_| mapping.generated_line as usize <= lines.len()) { - let chunk = lines[current_generated_line as usize - 1]; + let chunk = &lines[current_generated_line as usize - 1]; mapping.generated_column = 0; original.name_index = None; - on_chunk(Some(Cow::Borrowed(chunk)), mapping); + on_chunk(Some(chunk.clone().into_rope()), mapping); current_generated_line += 1; } }; @@ -629,9 +674,9 @@ fn stream_chunks_of_source_map_lines_full<'a>( on_mapping(mapping); } while current_generated_line as usize <= lines.len() { - let chunk = lines[current_generated_line as usize - 1]; + let chunk = &lines[current_generated_line as usize - 1]; on_chunk( - Some(Cow::Borrowed(chunk)), + Some(chunk.clone().into_rope()), Mapping { generated_line: current_generated_line, generated_column: 0, @@ -640,8 +685,8 @@ fn stream_chunks_of_source_map_lines_full<'a>( ); current_generated_line += 1; } - let last_line = lines[lines.len() - 1]; - let last_new_line = last_line.ends_with('\n'); + let last_line = &lines[lines.len() - 1]; + let last_new_line = last_line.ends_with("\n"); let final_line = if last_new_line { lines.len() + 1 } else { @@ -657,27 +702,30 @@ fn stream_chunks_of_source_map_lines_full<'a>( #[derive(Debug)] struct SourceMapLineData<'a> { pub mappings_data: Vec, - pub chunks: Vec>, + pub chunks: Vec>, } type InnerSourceIndexValueMapping<'a> = - LinearMap<(Cow<'a, str>, Option<&'a str>)>; + LinearMap<(Cow<'a, str>, Option>)>; #[allow(clippy::too_many_arguments)] -pub fn stream_chunks_of_combined_source_map<'a>( - source: &'a str, +pub fn stream_chunks_of_combined_source_map<'a, S>( + source: S, source_map: &'a SourceMap, inner_source_name: &'a str, - inner_source: Option<&'a str>, + inner_source: Option>, inner_source_map: &'a SourceMap, remove_inner_source: bool, on_chunk: OnChunk<'_, 'a>, on_source: OnSource<'_, 'a>, on_name: OnName<'_, 'a>, options: &MapOptions, -) -> GeneratedInfo { +) -> GeneratedInfo +where + S: SourceText<'a> + 'a, +{ let on_source = RefCell::new(on_source); - let inner_source: RefCell> = RefCell::new(inner_source); + let inner_source: RefCell>> = RefCell::new(inner_source); let source_mapping: RefCell, u32>> = RefCell::new(HashMap::default()); let mut name_mapping: HashMap, u32> = HashMap::default(); @@ -692,7 +740,7 @@ pub fn stream_chunks_of_combined_source_map<'a>( RefCell::new(LinearMap::default()); let inner_source_index_value_mapping: RefCell = RefCell::new(LinearMap::default()); - let inner_source_contents: RefCell>> = + let inner_source_contents: RefCell>>> = RefCell::new(LinearMap::default()); let inner_source_content_lines: InnerSourceContentLine = RefCell::new(LinearMap::default()); @@ -727,7 +775,7 @@ pub fn stream_chunks_of_combined_source_map<'a>( }; stream_chunks_of_source_map( - source, + source.clone(), source_map, &mut |chunk, mapping| { let source_index = mapping @@ -800,7 +848,7 @@ pub fn stream_chunks_of_combined_source_map<'a>( }); if let Some(original_chunk) = original_chunk { if original_chunk.len() <= inner_chunk.len() - && inner_chunk.get(..original_chunk.len()) + && inner_chunk.get_byte_slice(..original_chunk.len()) == Some(original_chunk) { inner_original_column += location_in_chunk; @@ -897,12 +945,12 @@ pub fn stream_chunks_of_combined_source_map<'a>( name_index_value_mapping.get(&name_index).cloned().unwrap(); let original_name = original_source_lines .get(inner_original_line as usize - 1) - .map_or("", |i| { + .map_or(EMPTY_ROPE, |i| { let start = inner_original_column as usize; let end = start + name.len(); i.substring(start, end) }); - if name == original_name { + if Rope::from(&name) == original_name { let mut name_index_mapping = name_index_mapping.borrow_mut(); final_name_index = name_index_mapping.get(&name_index).copied().unwrap_or(-2); @@ -963,11 +1011,11 @@ pub fn stream_chunks_of_combined_source_map<'a>( source_mapping.get(inner_source_name).copied(); if global_index.is_none() { let len = source_mapping.len() as u32; - source_mapping.insert(source.into(), len); + source_mapping.insert(Cow::Owned(source.to_string()), len); on_source.borrow_mut()( len, Cow::Borrowed(inner_source_name), - *inner_source.borrow(), + inner_source.borrow().clone(), ); global_index = Some(len); } @@ -1041,9 +1089,9 @@ pub fn stream_chunks_of_combined_source_map<'a>( *inner_source_index.borrow_mut() = i as i64; let mut inner_source = inner_source.borrow_mut(); if let Some(inner_source) = inner_source.as_ref() { - source_content = Some(inner_source); + source_content = Some(inner_source.clone()); } else { - *inner_source = source_content; + *inner_source = source_content.clone(); } source_index_mapping.borrow_mut().insert(i, -2); stream_chunks_of_source_map( @@ -1101,7 +1149,9 @@ pub fn stream_chunks_of_combined_source_map<'a>( data.chunks.push(chunk.unwrap()); }, &mut |i, source, source_content| { - inner_source_contents.borrow_mut().insert(i, source_content); + inner_source_contents + .borrow_mut() + .insert(i, source_content.clone()); inner_source_content_lines .borrow_mut() .insert(i, Default::default()); @@ -1141,7 +1191,7 @@ pub fn stream_chunks_of_combined_source_map<'a>( ) } -pub fn stream_and_get_source_and_map<'a, S: StreamChunks<'a>>( +pub fn stream_and_get_source_and_map<'a, S: StreamChunks>( input_source: &'a S, options: &MapOptions, on_chunk: OnChunk<'_, 'a>, @@ -1165,7 +1215,7 @@ pub fn stream_and_get_source_and_map<'a, S: StreamChunks<'a>>( sources.push("".into()); } sources[source_index2] = source.to_string(); - if let Some(source_content) = source_content { + if let Some(ref source_content) = source_content { while sources_content.len() <= source_index2 { sources_content.push("".into()); } @@ -1191,3 +1241,110 @@ pub fn stream_and_get_source_and_map<'a, S: StreamChunks<'a>>( }; (generated_info, map) } + +/// Represents a text source that can be manipulated for source mapping purposes. +pub trait SourceText<'a>: Default + Clone + ToString { + /// Splits the text into lines, returning an iterator over each line. + /// Each line includes its line ending character if present. + fn split_into_lines(&self) -> impl Iterator; + + /// Checks if the text ends with the given string. + fn ends_with(&self, value: &str) -> bool; + + /// Returns an iterator over the char indices in the text. + fn char_indices(&self) -> impl Iterator; + + /// Gets the byte at the specified index, if it exists. + fn get_byte(&self, byte_index: usize) -> Option; + + /// Returns a slice of the text specified by the byte range. + fn byte_slice(&self, range: Range) -> Self; + + /// Returns true if the text is empty. + fn is_empty(&self) -> bool; + + /// Returns the length of the text in bytes. + fn len(&self) -> usize; + + /// Converts this text into a Rope. + fn into_rope(self) -> Rope<'a> + where + Self: Sized; +} + +impl<'a> SourceText<'a> for Rope<'a> { + fn split_into_lines(&self) -> impl Iterator { + // Split the text into lines, including the line ending character. + // If the text ends with a newline, the last line will be ignored + // For example: "abc\nefg\n" => ["abc\n", "efg\n"] + self.lines_impl(false) + } + + #[inline] + fn ends_with(&self, value: &str) -> bool { + (*self).ends_with(value) + } + + fn char_indices(&self) -> impl Iterator { + self.char_indices() + } + + fn byte_slice(&self, range: Range) -> Self { + self.byte_slice(range) + } + + #[inline] + fn is_empty(&self) -> bool { + self.is_empty() + } + + #[inline] + fn len(&self) -> usize { + self.len() + } + + fn into_rope(self) -> Rope<'a> { + self + } + + fn get_byte(&self, byte_index: usize) -> Option { + self.get_byte(byte_index) + } +} + +impl<'a> SourceText<'a> for &'a str { + fn split_into_lines(&self) -> impl Iterator { + split(self, b'\n') + } + + #[inline] + fn ends_with(&self, value: &str) -> bool { + (*self).ends_with(value) + } + + fn char_indices(&self) -> impl Iterator { + (*self).char_indices() + } + + fn byte_slice(&self, range: Range) -> Self { + self.get(range).unwrap_or_default() + } + + #[inline] + fn is_empty(&self) -> bool { + (*self).is_empty() + } + + #[inline] + fn len(&self) -> usize { + (*self).len() + } + + fn into_rope(self) -> Rope<'a> { + Rope::from(self) + } + + fn get_byte(&self, byte_index: usize) -> Option { + self.as_bytes().get(byte_index).copied() + } +} diff --git a/src/lib.rs b/src/lib.rs index ba33fa5b..7f16b8fa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,7 @@ mod linear_map; mod original_source; mod raw_source; mod replace_source; +mod rope; mod source; mod source_map_source; mod with_indices; @@ -20,6 +21,7 @@ pub use error::{Error, Result}; pub use original_source::OriginalSource; pub use raw_source::{RawBufferSource, RawSource, RawStringSource}; pub use replace_source::{ReplaceSource, ReplacementEnforce}; +pub use rope::Rope; pub use source::{ BoxSource, MapOptions, Mapping, OriginalLocation, Source, SourceExt, SourceMap, diff --git a/src/original_source.rs b/src/original_source.rs index b73c275d..d8edda1a 100644 --- a/src/original_source.rs +++ b/src/original_source.rs @@ -7,10 +7,10 @@ use crate::{ helpers::{ get_generated_source_info, get_map, split_into_lines, split_into_potential_tokens, GeneratedInfo, OnChunk, OnName, OnSource, - StreamChunks, + SourceText, StreamChunks, }, source::{Mapping, OriginalLocation}, - MapOptions, Source, SourceMap, + MapOptions, Rope, Source, SourceMap, }; /// Represents source code, it will create source map for the source code, @@ -55,6 +55,10 @@ impl Source for OriginalSource { Cow::Borrowed(&self.value) } + fn rope(&self) -> Rope<'_> { + Rope::from(&self.value) + } + fn buffer(&self) -> Cow<[u8]> { Cow::Borrowed(self.value.as_bytes()) } @@ -98,25 +102,25 @@ impl std::fmt::Debug for OriginalSource { } } -impl<'a> StreamChunks<'a> for OriginalSource { - fn stream_chunks( +impl StreamChunks for OriginalSource { + fn stream_chunks<'a>( &'a self, options: &MapOptions, on_chunk: OnChunk<'_, 'a>, on_source: OnSource<'_, 'a>, _on_name: OnName, ) -> crate::helpers::GeneratedInfo { - on_source(0, Cow::Borrowed(&self.name), Some(&self.value)); + on_source(0, Cow::Borrowed(&self.name), Some(Rope::from(&self.value))); if options.columns { // With column info we need to read all lines and split them let mut line = 1; let mut column = 0; - for token in split_into_potential_tokens(&self.value) { - let is_end_of_line = token.ends_with('\n'); + for token in split_into_potential_tokens(&*self.value) { + let is_end_of_line = token.ends_with("\n"); if is_end_of_line && token.len() == 1 { if !options.final_source { on_chunk( - Some(Cow::Borrowed(token)), + Some(token.into_rope()), Mapping { generated_line: line, generated_column: column, @@ -126,7 +130,7 @@ impl<'a> StreamChunks<'a> for OriginalSource { } } else { on_chunk( - (!options.final_source).then_some(Cow::Borrowed(token)), + (!options.final_source).then_some(token.into_rope()), Mapping { generated_line: line, generated_column: column, @@ -153,7 +157,7 @@ impl<'a> StreamChunks<'a> for OriginalSource { } else if options.final_source { // Without column info and with final source we only // need meta info to generate mapping - let result = get_generated_source_info(&self.value); + let result = get_generated_source_info(&*self.value); if result.generated_column == 0 { for line in 1..result.generated_line { on_chunk( @@ -193,9 +197,9 @@ impl<'a> StreamChunks<'a> for OriginalSource { // we need to split source by lines let mut line = 1; let mut last_line = None; - for l in split_into_lines(&self.value) { + for l in split_into_lines(&self.value.as_str()) { on_chunk( - (!options.final_source).then_some(Cow::Borrowed(l)), + (!options.final_source).then_some(l.into_rope()), Mapping { generated_line: line, generated_column: 0, @@ -211,7 +215,7 @@ impl<'a> StreamChunks<'a> for OriginalSource { last_line = Some(l); } if let Some(last_line) = - last_line.filter(|last_line| !last_line.ends_with('\n')) + last_line.filter(|last_line| !last_line.ends_with("\n")) { GeneratedInfo { generated_line: line - 1, diff --git a/src/raw_source.rs b/src/raw_source.rs index 388ea69d..5ac2bd0b 100644 --- a/src/raw_source.rs +++ b/src/raw_source.rs @@ -9,7 +9,7 @@ use crate::{ get_generated_source_info, stream_chunks_of_raw_source, OnChunk, OnName, OnSource, StreamChunks, }, - MapOptions, Source, SourceMap, + MapOptions, Rope, Source, SourceMap, }; #[derive(Clone, PartialEq, Eq)] @@ -123,6 +123,17 @@ impl Source for RawSource { } } + fn rope(&self) -> Rope<'_> { + match &self.value { + RawValue::Buffer(v) => Rope::from( + self + .value_as_string + .get_or_init(|| String::from_utf8_lossy(v).to_string()), + ), + RawValue::String(s) => Rope::from(s), + } + } + fn buffer(&self) -> Cow<[u8]> { match &self.value { RawValue::String(v) => Cow::Borrowed(v.as_bytes()), @@ -187,8 +198,8 @@ impl std::fmt::Debug for RawSource { } } -impl<'a> StreamChunks<'a> for RawSource { - fn stream_chunks( +impl StreamChunks for RawSource { + fn stream_chunks<'a>( &'a self, options: &MapOptions, on_chunk: OnChunk<'_, 'a>, @@ -196,7 +207,15 @@ impl<'a> StreamChunks<'a> for RawSource { on_name: OnName<'_, 'a>, ) -> crate::helpers::GeneratedInfo { if options.final_source { - get_generated_source_info(&self.source()) + match &self.value { + RawValue::Buffer(buffer) => { + let source = self + .value_as_string + .get_or_init(|| String::from_utf8_lossy(buffer).to_string()); + get_generated_source_info(&**source) + } + RawValue::String(source) => get_generated_source_info(&**source), + } } else { match &self.value { RawValue::Buffer(buffer) => { @@ -204,11 +223,11 @@ impl<'a> StreamChunks<'a> for RawSource { .value_as_string .get_or_init(|| String::from_utf8_lossy(buffer).to_string()); stream_chunks_of_raw_source( - source, options, on_chunk, on_source, on_name, + &**source, options, on_chunk, on_source, on_name, ) } RawValue::String(source) => stream_chunks_of_raw_source( - source, options, on_chunk, on_source, on_name, + &**source, options, on_chunk, on_source, on_name, ), } } @@ -266,6 +285,10 @@ impl Source for RawStringSource { Cow::Borrowed(&self.0) } + fn rope(&self) -> Rope<'_> { + Rope::from(&self.0) + } + fn buffer(&self) -> Cow<[u8]> { Cow::Borrowed(self.0.as_bytes()) } @@ -301,8 +324,8 @@ impl Hash for RawStringSource { } } -impl<'a> StreamChunks<'a> for RawStringSource { - fn stream_chunks( +impl StreamChunks for RawStringSource { + fn stream_chunks<'a>( &'a self, options: &MapOptions, on_chunk: OnChunk<'_, 'a>, @@ -310,10 +333,10 @@ impl<'a> StreamChunks<'a> for RawStringSource { on_name: OnName<'_, 'a>, ) -> crate::helpers::GeneratedInfo { if options.final_source { - get_generated_source_info(&self.source()) + get_generated_source_info(&*self.0) } else { stream_chunks_of_raw_source( - &self.0, options, on_chunk, on_source, on_name, + &*self.0, options, on_chunk, on_source, on_name, ) } } @@ -365,6 +388,14 @@ impl Source for RawBufferSource { ) } + fn rope(&self) -> Rope<'_> { + Rope::from( + self + .value_as_string + .get_or_init(|| String::from_utf8_lossy(&self.value).to_string()), + ) + } + fn buffer(&self) -> Cow<[u8]> { Cow::Borrowed(&self.value) } @@ -400,8 +431,8 @@ impl Hash for RawBufferSource { } } -impl<'a> StreamChunks<'a> for RawBufferSource { - fn stream_chunks( +impl StreamChunks for RawBufferSource { + fn stream_chunks<'a>( &'a self, options: &MapOptions, on_chunk: OnChunk<'_, 'a>, @@ -409,10 +440,10 @@ impl<'a> StreamChunks<'a> for RawBufferSource { on_name: OnName<'_, 'a>, ) -> crate::helpers::GeneratedInfo { if options.final_source { - get_generated_source_info(&self.source()) + get_generated_source_info(&*self.source()) } else { stream_chunks_of_raw_source( - self + &**self .value_as_string .get_or_init(|| String::from_utf8_lossy(&self.value).to_string()), options, diff --git a/src/replace_source.rs b/src/replace_source.rs index 378cd841..29eaad34 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -4,15 +4,19 @@ use std::{ hash::{Hash, Hasher}, sync::{ atomic::{AtomicBool, Ordering}, - Arc, Mutex, MutexGuard, + Arc, Mutex, }, }; +use itertools::Itertools; use rustc_hash::FxHashMap as HashMap; use crate::{ - helpers::{get_map, split_into_lines, GeneratedInfo, StreamChunks}, + helpers::{ + get_map, split_into_lines, GeneratedInfo, SourceText, StreamChunks, + }, linear_map::LinearMap, + rope::Rope, MapOptions, Mapping, OriginalLocation, Source, SourceMap, }; @@ -37,7 +41,8 @@ use crate::{ /// ``` pub struct ReplaceSource { inner: Arc, - replacements: Mutex>, + replacements: Vec, + sorted_index: Mutex>, /// Whether `replacements` is sorted. is_sorted: AtomicBool, } @@ -86,7 +91,8 @@ impl ReplaceSource { pub fn new(source: T) -> Self { Self { inner: Arc::new(source), - replacements: Mutex::new(Vec::new()), + replacements: Vec::new(), + sorted_index: Mutex::new(Vec::new()), is_sorted: AtomicBool::new(true), } } @@ -96,19 +102,31 @@ impl ReplaceSource { &self.inner } - fn replacements(&self) -> MutexGuard> { - self.replacements.lock().unwrap() - } - fn sort_replacement(&self) { if self.is_sorted.load(Ordering::SeqCst) { return; } - self.replacements().sort_by(|a, b| { - (a.start, a.end, a.enforce).cmp(&(b.start, b.end, b.enforce)) - }); + let sorted_index = self + .replacements + .iter() + .enumerate() + .sorted_by(|(_, a), (_, b)| { + (a.start, a.end, a.enforce).cmp(&(b.start, b.end, b.enforce)) + }) + .map(|replacement| replacement.0) + .collect::>(); + *self.sorted_index.lock().unwrap() = sorted_index; self.is_sorted.store(true, Ordering::SeqCst) } + + fn sorted_replacement(&self) -> Vec<&Replacement> { + self.sort_replacement(); + let sorted_index = self.sorted_index.lock().unwrap(); + sorted_index + .iter() + .map(|idx| &self.replacements[*idx]) + .collect() + } } impl ReplaceSource { @@ -136,7 +154,7 @@ impl ReplaceSource { content: &str, name: Option<&str>, ) { - self.replacements().push(Replacement::new( + self.replacements.push(Replacement::new( start, end, content.into(), @@ -155,7 +173,7 @@ impl ReplaceSource { name: Option<&str>, enforce: ReplacementEnforce, ) { - self.replacements().push(Replacement::new( + self.replacements.push(Replacement::new( start, end, content.into(), @@ -168,13 +186,11 @@ impl ReplaceSource { impl Source for ReplaceSource { fn source(&self) -> Cow { - self.sort_replacement(); - let inner_source_code = self.inner.source(); // mut_string_push_str is faster that vec join // concatenate strings benchmark, see https://github.com/hoodie/concatenation_benchmarks-rs - let replacements = self.replacements.lock().unwrap(); + let replacements = self.sorted_replacement(); if replacements.is_empty() { return inner_source_code; } @@ -205,6 +221,38 @@ impl Source for ReplaceSource { source_code.into() } + fn rope(&self) -> Rope<'_> { + let inner_source_code = self.inner.rope(); + + // mut_string_push_str is faster that vec join + // concatenate strings benchmark, see https://github.com/hoodie/concatenation_benchmarks-rs + let replacements = self.sorted_replacement(); + if replacements.is_empty() { + return inner_source_code; + } + let mut source_code = Rope::new(); + let mut inner_pos = 0; + for replacement in replacements.iter() { + if inner_pos < replacement.start { + let end_pos = (replacement.start as usize).min(inner_source_code.len()); + let slice = inner_source_code.byte_slice(inner_pos as usize..end_pos); + source_code.append(slice); + } + source_code.add(&replacement.content); + #[allow(clippy::manual_clamp)] + { + inner_pos = inner_pos + .max(replacement.end) + .min(inner_source_code.len() as u32); + } + } + let slice = + inner_source_code.byte_slice(inner_pos as usize..inner_source_code.len()); + source_code.append(slice); + + source_code + } + fn buffer(&self) -> Cow<[u8]> { match self.source() { Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), @@ -217,11 +265,10 @@ impl Source for ReplaceSource { } fn map(&self, options: &crate::MapOptions) -> Option { - let replacements = self.replacements.lock().unwrap(); + let replacements = &self.replacements; if replacements.is_empty() { return self.inner.map(options); } - drop(replacements); get_map(self, options) } @@ -239,7 +286,7 @@ impl std::fmt::Debug for ReplaceSource { .field("inner", self.inner.as_ref()) .field( "replacements", - &self.replacements.lock().iter().take(3).collect::>(), + &self.replacements.iter().take(3).collect::>(), ) .field("is_sorted", &self.is_sorted.load(Ordering::SeqCst)) .finish() @@ -247,15 +294,15 @@ impl std::fmt::Debug for ReplaceSource { } enum SourceContent<'a> { - Raw(&'a str), - Lines(Vec<&'a str>), + Raw(Rope<'a>), + Lines(Vec>), } fn check_content_at_position( - lines: &[&str], + lines: &[Rope], line: u32, column: u32, - expected: &str, + expected: Rope, // FIXME: memory ) -> bool { if let Some(line) = lines.get(line as usize - 1) { match line @@ -264,7 +311,8 @@ fn check_content_at_position( .map(|(byte_index, _)| byte_index) { Some(byte_index) => { - line.get(byte_index..byte_index + expected.len()) == Some(expected) + line.get_byte_slice(byte_index..byte_index + expected.len()) + == Some(expected) } None => false, } @@ -273,17 +321,16 @@ fn check_content_at_position( } } -impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { - fn stream_chunks( +impl StreamChunks for ReplaceSource { + fn stream_chunks<'a>( &'a self, options: &crate::MapOptions, on_chunk: crate::helpers::OnChunk<'_, 'a>, on_source: crate::helpers::OnSource<'_, 'a>, on_name: crate::helpers::OnName<'_, 'a>, ) -> crate::helpers::GeneratedInfo { - self.sort_replacement(); let on_name = RefCell::new(on_name); - let repls = self.replacements(); + let repls = &self.sorted_replacement(); let mut pos: u32 = 0; let mut i: usize = 0; let mut replacement_end: Option = None; @@ -325,7 +372,7 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { // In this case, we can't split this mapping. // webpack-sources also have this function, refer https://github.com/webpack/webpack-sources/blob/main/lib/ReplaceSource.js#L158 let check_original_content = - |source_index: u32, line: u32, column: u32, expected_chunk: &str| { + |source_index: u32, line: u32, column: u32, expected_chunk: Rope| { if let Some(Some(source_content)) = source_content_lines.borrow_mut().get_mut(&source_index) { @@ -363,7 +410,7 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { // Skip over the whole chunk if replacement_end >= end_pos { let line = mapping.generated_line as i64 + generated_line_offset; - if chunk.ends_with('\n') { + if chunk.ends_with("\n") { generated_line_offset -= 1; if generated_column_offset_line == line { // undo exiting corrections form the current line @@ -385,7 +432,7 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { original.source_index, original.original_line, original.original_column, - &chunk[0..chunk_pos as usize], + chunk.byte_slice(0..chunk_pos as usize), ) }) { original.original_column += chunk_pos; @@ -409,15 +456,8 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { if next_replacement_pos > pos { // Emit chunk until replacement let offset = next_replacement_pos - pos; - let chunk_slice = match &chunk { - Cow::Borrowed(c) => Cow::Borrowed( - &c[chunk_pos as usize..(chunk_pos + offset) as usize], - ), - Cow::Owned(c) => Cow::Owned( - c[chunk_pos as usize..(chunk_pos + offset) as usize] - .to_string(), - ), - }; + let chunk_slice = chunk + .byte_slice(chunk_pos as usize..(chunk_pos + offset) as usize); on_chunk( Some(chunk_slice.clone()), Mapping { @@ -449,7 +489,7 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { original.source_index, original.original_line, original.original_column, - &chunk_slice, + chunk_slice.clone(), ) }) { @@ -460,10 +500,11 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { #[allow(unsafe_code)] // SAFETY: The safety of this operation relies on the fact that the `ReplaceSource` type will not delete the `replacements` during its entire lifetime. let repl = unsafe { - std::mem::transmute::<&Replacement, &'a Replacement>(&repls[i]) + std::mem::transmute::<&Replacement, &'a Replacement>(repls[i]) }; - let lines: Vec<&str> = split_into_lines(&repl.content).collect(); + let lines = + split_into_lines(&repl.content.as_str()).collect::>(); let mut replacement_name_index = mapping .original .as_ref() @@ -483,7 +524,7 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { } for (m, content_line) in lines.iter().enumerate() { on_chunk( - Some(Cow::Borrowed(content_line)), + Some(content_line.into_rope()), Mapping { generated_line: line as u32, generated_column: ((mapping.generated_column as i64) @@ -505,7 +546,7 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { // Only the first chunk has name assigned replacement_name_index = None; - if m == lines.len() - 1 && !content_line.ends_with('\n') { + if m == lines.len() - 1 && !content_line.ends_with("\n") { if generated_column_offset_line == line { generated_column_offset += content_line.len() as i64; } else { @@ -545,7 +586,7 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { .is_some_and(|replacement_end| replacement_end >= end_pos) { let line = mapping.generated_line as i64 + generated_line_offset; - if chunk.ends_with('\n') { + if chunk.ends_with("\n") { generated_line_offset -= 1; if generated_column_offset_line == line { // undo exiting corrections form the current line @@ -570,8 +611,9 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { original.source_index, original.original_line, original.original_column, - &chunk - [chunk_pos as usize..(chunk_pos + offset as u32) as usize], + chunk.byte_slice( + chunk_pos as usize..(chunk_pos + offset as u32) as usize, + ), ) }) { @@ -594,10 +636,7 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { let chunk_slice = if chunk_pos == 0 { chunk } else { - match chunk { - Cow::Borrowed(c) => Cow::Borrowed(&c[chunk_pos as usize..]), - Cow::Owned(c) => Cow::Owned(c[chunk_pos as usize..].to_string()), - } + chunk.byte_slice(chunk_pos as usize..chunk.len()) }; let line = mapping.generated_line as i64 + generated_line_offset; on_chunk( @@ -627,7 +666,7 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { }, &mut |source_index, source, source_content| { let mut source_content_lines = source_content_lines.borrow_mut(); - let lines = source_content.map(SourceContent::Raw); + let lines = source_content.clone().map(SourceContent::Raw); source_content_lines.insert(source_index, lines); on_source(source_index, source, source_content); }, @@ -647,22 +686,18 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { ); // Handle remaining replacements - let mut len = 0; - for replacement in &repls[i..] { - len += replacement.content.len(); - } - let mut remainder = String::with_capacity(len); + let mut remainder = Rope::new(); while i < repls.len() { - remainder += &repls[i].content; + remainder.add(&repls[i].content); i += 1; } // Insert remaining replacements content split into chunks by lines let mut line = result.generated_line as i64 + generated_line_offset; - let matches: Vec<&str> = split_into_lines(&remainder).collect(); + let matches: Vec = split_into_lines(&remainder).collect(); for (m, content_line) in matches.iter().enumerate() { on_chunk( - Some(Cow::Owned(content_line.to_string())), + Some(content_line.clone()), Mapping { generated_line: line as u32, generated_column: ((result.generated_column as i64) @@ -675,7 +710,7 @@ impl<'a, T: Source> StreamChunks<'a> for ReplaceSource { }, ); - if m == matches.len() - 1 && !content_line.ends_with('\n') { + if m == matches.len() - 1 && !content_line.ends_with("\n") { if generated_column_offset_line == line { generated_column_offset += content_line.len() as i64; } else { @@ -706,7 +741,8 @@ impl Clone for ReplaceSource { fn clone(&self) -> Self { Self { inner: self.inner.clone(), - replacements: Mutex::new(self.replacements().clone()), + replacements: self.replacements.clone(), + sorted_index: Mutex::new(self.sorted_index.lock().unwrap().clone()), is_sorted: AtomicBool::new(self.is_sorted.load(Ordering::SeqCst)), } } @@ -714,9 +750,8 @@ impl Clone for ReplaceSource { impl Hash for ReplaceSource { fn hash(&self, state: &mut H) { - self.sort_replacement(); "ReplaceSource".hash(state); - for repl in self.replacements().iter() { + for repl in self.sorted_replacement() { repl.hash(state); } self.inner.hash(state); @@ -725,7 +760,7 @@ impl Hash for ReplaceSource { impl PartialEq for ReplaceSource { fn eq(&self, other: &Self) -> bool { - self.inner == other.inner && *self.replacements() == *other.replacements() + self.inner == other.inner && self.replacements == other.replacements } } diff --git a/src/rope.rs b/src/rope.rs new file mode 100644 index 00000000..308abe46 --- /dev/null +++ b/src/rope.rs @@ -0,0 +1,1199 @@ +#![allow(unsafe_code)] + +use std::{ + borrow::Cow, + cell::RefCell, + collections::VecDeque, + hash::Hash, + ops::{Bound, RangeBounds}, + rc::Rc, +}; + +use crate::Error; + +#[derive(Clone, Debug)] +pub(crate) enum Repr<'a> { + Simple(&'a str), + Complex(Rc>), +} + +/// A rope data structure. +#[derive(Clone, Debug)] +pub struct Rope<'a> { + repr: Repr<'a>, +} + +impl<'a> Rope<'a> { + /// Creates a new empty rope. + pub const fn new() -> Self { + Self { + repr: Repr::Simple(""), + } + } + + /// Adds a string slice to the end of the rope. + /// + /// Converts from simple to complex representation on first add. + /// Empty strings are ignored. + pub fn add(&mut self, value: &'a str) { + if value.is_empty() { + return; + } + + match &mut self.repr { + Repr::Simple(s) => { + let vec = Vec::from_iter([(*s, 0), (value, s.len())]); + self.repr = Repr::Complex(Rc::new(vec)); + } + Repr::Complex(data) => { + let len = data + .last() + .map_or(0, |(chunk, start_pos)| *start_pos + chunk.len()); + Rc::make_mut(data).push((value, len)); + } + } + } + + /// Appends another rope to this rope. + /// + /// Handles all combinations of simple and complex representations efficiently. + pub fn append(&mut self, value: Rope<'a>) { + match (&mut self.repr, value.repr) { + (Repr::Simple(s), Repr::Simple(other)) => { + let raw = Vec::from_iter([(*s, 0), (other, s.len())]); + self.repr = Repr::Complex(Rc::new(raw)); + } + (Repr::Complex(s), Repr::Complex(other)) => { + if !other.is_empty() { + let mut len = s + .last() + .map_or(0, |(chunk, start_pos)| *start_pos + chunk.len()); + + let cur = Rc::make_mut(s); + cur.reserve_exact(other.len()); + + for &(chunk, _) in other.iter() { + cur.push((chunk, len)); + len += chunk.len(); + } + } + } + (Repr::Complex(s), Repr::Simple(other)) => { + if !other.is_empty() { + let len = s + .last() + .map_or(0, |(chunk, start_pos)| *start_pos + chunk.len()); + Rc::make_mut(s).push((other, len)); + } + } + (Repr::Simple(s), Repr::Complex(other)) => { + let mut raw = Vec::with_capacity(other.len() + 1); + raw.push((*s, 0)); + let mut len = s.len(); + for &(chunk, _) in other.iter() { + raw.push((chunk, len)); + len += chunk.len(); + } + self.repr = Repr::Complex(Rc::new(raw)); + } + } + } + + /// Gets the byte at the given index. + /// + /// # Panics + /// When index is out of bounds. + pub fn byte(&self, byte_index: usize) -> u8 { + self.get_byte(byte_index).expect("byte out of bounds") + } + + /// Non-panicking version of [Rope::byte]. + /// + /// Gets the byte at the given index, returning None if out of bounds. + pub fn get_byte(&self, byte_index: usize) -> Option { + if byte_index >= self.len() { + return None; + } + match &self.repr { + Repr::Simple(s) => Some(s.as_bytes()[byte_index]), + Repr::Complex(data) => { + let chunk_index = data + .binary_search_by(|(_, start_pos)| start_pos.cmp(&byte_index)) + .unwrap_or_else(|index| index.saturating_sub(1)); + let (s, start_pos) = &data.get(chunk_index)?; + let pos = byte_index - start_pos; + Some(s.as_bytes()[pos]) + } + } + } + + /// Returns an iterator over the characters and their byte positions. + pub fn char_indices(&self) -> CharIndices<'_, 'a> { + match &self.repr { + Repr::Simple(s) => CharIndices { + repr: CharIndicesRepr::Simple { + iter: s.char_indices(), + }, + }, + Repr::Complex(data) => CharIndices { + repr: CharIndicesRepr::Complex { + chunks: data, + char_indices: VecDeque::new(), + chunk_index: 0, + }, + }, + } + } + + /// Returns whether the rope starts with the given string. + #[inline] + pub fn starts_with(&self, value: &str) -> bool { + match &self.repr { + Repr::Simple(s) => s.starts_with(value), + Repr::Complex(data) => { + if let Some((first, _)) = data.first() { + first.starts_with(value) + } else { + false + } + } + } + } + + /// Returns whether the rope ends with the given string. + #[inline] + pub fn ends_with(&self, value: &str) -> bool { + match &self.repr { + Repr::Simple(s) => s.ends_with(value), + Repr::Complex(data) => { + if let Some((last, _)) = data.last() { + last.ends_with(value) + } else { + false + } + } + } + } + + /// Returns whether the rope is empty. + #[inline] + pub fn is_empty(&self) -> bool { + match &self.repr { + Repr::Simple(s) => s.is_empty(), + Repr::Complex(data) => data.iter().all(|(s, _)| s.is_empty()), + } + } + + /// Returns the length of the rope in bytes. + #[inline] + pub fn len(&self) -> usize { + match &self.repr { + Repr::Simple(s) => s.len(), + Repr::Complex(data) => data + .last() + .map_or(0, |(chunk, start_pos)| start_pos + chunk.len()), + } + } + + /// Returns a slice of the rope in the given byte range. + /// + /// # Panics + /// - When start > end + /// - When end is out of bounds + /// - When indices are not on char boundaries + pub fn byte_slice(&self, range: R) -> Rope<'a> + where + R: RangeBounds, + { + self.get_byte_slice_impl(range).unwrap_or_else(|e| { + panic!("byte_slice: {}", e); + }) + } + + /// Non-panicking version of [Rope::byte_slice]. + pub fn get_byte_slice(&self, range: R) -> Option> + where + R: RangeBounds, + { + self.get_byte_slice_impl(range).ok() + } + + /// Implementation for byte_slice operations. + #[inline] + pub(crate) fn get_byte_slice_impl( + &self, + range: R, + ) -> Result, Error> + where + R: RangeBounds, + { + let start_range = start_bound_to_range_start(range.start_bound()); + let end_range = end_bound_to_range_end(range.end_bound()); + + match (start_range, end_range) { + (Some(start), Some(end)) => { + if start > end { + return Err(Error::Rope("start >= end")); + } else if end > self.len() { + return Err(Error::Rope("end out of bounds")); + } + } + (None, Some(end)) => { + if end > self.len() { + return Err(Error::Rope("end out of bounds")); + } + } + (Some(start), None) => { + if start > self.len() { + return Err(Error::Rope("start out of bounds")); + } + } + _ => {} + } + + let start_range = start_range.unwrap_or(0); + let end_range = end_range.unwrap_or_else(|| self.len()); + + match &self.repr { + Repr::Simple(s) => s + .get(start_range..end_range) + .map(Rope::from) + .ok_or(Error::Rope("invalid char boundary")), + Repr::Complex(data) => { + // [start_chunk + let start_chunk_index = data + .binary_search_by(|(_, start_pos)| start_pos.cmp(&start_range)) + .unwrap_or_else(|insert_pos| insert_pos.saturating_sub(1)); + + // end_chunk) + let end_chunk_index = data + .binary_search_by(|(chunk, start_pos)| { + let end_pos = start_pos + chunk.len(); // exclusive + end_pos.cmp(&end_range) + }) + .unwrap_or_else(|insert_pos| insert_pos); + + // same chunk + if start_chunk_index == end_chunk_index { + // SAFETY: start_chunk_index guarantees valid range + let (chunk, start_pos) = + unsafe { data.get_unchecked(start_chunk_index) }; + let start = start_range - start_pos; + let end = end_range - start_pos; + return chunk + .get(start..end) + .map(Rope::from) + .ok_or(Error::Rope("invalid char boundary")); + } + + if end_chunk_index < start_chunk_index { + return Ok(Rope::new()); + } + + let mut raw = + Vec::with_capacity(end_chunk_index - start_chunk_index + 1); + let mut len = 0; + + // different chunk + // [start_chunk, end_chunk] + (start_chunk_index..end_chunk_index + 1).try_for_each(|i| { + // SAFETY: [start_chunk_index, end_chunk_index] guarantees valid range + let (chunk, start_pos) = unsafe { data.get_unchecked(i) }; + + if start_chunk_index == i { + let start = start_range - start_pos; + if let Some(chunk) = chunk.get(start..) { + raw.push((chunk, len)); + len += chunk.len(); + } else { + return Err(Error::Rope("invalid char boundary")); + } + } else if end_chunk_index == i { + let end = end_range - start_pos; + if let Some(chunk) = chunk.get(..end) { + raw.push((chunk, len)); + len += chunk.len(); + } else { + return Err(Error::Rope("invalid char boundary")); + } + } else { + raw.push((chunk, len)); + len += chunk.len(); + } + + Ok(()) + })?; + + Ok(Rope { + repr: Repr::Complex(Rc::new(raw)), + }) + } + } + } + + /// Range-unchecked version of [Rope::byte_slice]. + /// + /// # Safety + /// + /// This is not safe, due to the following invariants that must be upheld: + /// + /// - Range must be within bounds. + /// - Range start must be less than or equal to the end. + /// - Both range start and end must be on char boundaries. + pub unsafe fn byte_slice_unchecked(&self, range: R) -> Rope<'a> + where + R: RangeBounds, + { + let start_range = start_bound_to_range_start(range.start_bound()); + let end_range = end_bound_to_range_end(range.end_bound()); + + let start_range = start_range.unwrap_or(0); + let end_range = end_range.unwrap_or_else(|| self.len()); + + match &self.repr { + Repr::Simple(s) => { + // SAFETY: invariant guarantees valid range + Rope::from(unsafe { s.get_unchecked(start_range..end_range) }) + } + Repr::Complex(data) => { + // [start_chunk + let start_chunk_index = data + .binary_search_by(|(_, start_pos)| start_pos.cmp(&start_range)) + .unwrap_or_else(|insert_pos| insert_pos.saturating_sub(1)); + + // end_chunk) + let end_chunk_index = data + .binary_search_by(|(chunk, start_pos)| { + let end_pos = start_pos + chunk.len(); // exclusive + end_pos.cmp(&end_range) + }) + .unwrap_or_else(|insert_pos| insert_pos); + + // same chunk + if start_chunk_index == end_chunk_index { + // SAFETY: start_chunk_index guarantees valid range + let (chunk, start_pos) = + unsafe { data.get_unchecked(start_chunk_index) }; + let start = start_range - start_pos; + let end = end_range - start_pos; + // SAFETY: invariant guarantees valid range + return Rope::from(unsafe { chunk.get_unchecked(start..end) }); + } + + if end_chunk_index < start_chunk_index { + return Rope::new(); + } + + let mut raw = + Vec::with_capacity(end_chunk_index - start_chunk_index + 1); + let mut len = 0; + + // different chunk + // [start_chunk, end_chunk] + (start_chunk_index..end_chunk_index + 1).for_each(|i| { + // SAFETY: [start_chunk_index, end_chunk_index] guarantees valid range + let (chunk, start_pos) = unsafe { data.get_unchecked(i) }; + + if start_chunk_index == i { + let start = start_range - start_pos; + // SAFETY: invariant guarantees valid range + let chunk = unsafe { chunk.get_unchecked(start..) }; + raw.push((chunk, len)); + len += chunk.len(); + } else if end_chunk_index == i { + let end = end_range - start_pos; + // SAFETY: invariant guarantees valid range + let chunk = unsafe { chunk.get_unchecked(..end) }; + raw.push((chunk, len)); + len += chunk.len(); + } else { + raw.push((chunk, len)); + len += chunk.len(); + } + }); + + Rope { + repr: Repr::Complex(Rc::new(raw)), + } + } + } + } + + /// Returns an iterator over the lines of the rope. + pub fn lines(&self) -> Lines<'_, 'a> { + self.lines_impl(true) + } + + /// Returns an iterator over the lines of the rope. + /// + /// If `trailing_line_break_as_newline` is true, the end of the rope with ('\n') is treated as an empty newline + pub(crate) fn lines_impl( + &self, + trailing_line_break_as_newline: bool, + ) -> Lines<'_, 'a> { + Lines { + iter: match &self.repr { + Repr::Simple(s) => LinesEnum::Simple(s), + Repr::Complex(data) => LinesEnum::Complex { + iter: data, + in_chunk_byte_idx: 0, + chunk_idx: 0, + }, + }, + byte_idx: 0, + ended: false, + total_bytes: self.len(), + trailing_line_break_as_newline, + } + } + + /// Converts the rope to bytes. + /// + /// Returns borrowed bytes for simple ropes and owned bytes for complex ropes. + pub fn to_bytes(&self) -> Cow<'a, [u8]> { + match &self.repr { + Repr::Simple(s) => Cow::Borrowed(s.as_bytes()), + Repr::Complex(data) => { + let mut bytes = vec![]; + for (chunk, _) in data.iter() { + bytes.extend_from_slice(chunk.as_bytes()); + } + Cow::Owned(bytes) + } + } + } + + /// Returns the underlying &str if this is a simple rope. + pub fn get_simple(&self) -> Option<&'a str> { + match &self.repr { + Repr::Simple(s) => Some(s), + _ => None, + } + } +} + +impl Hash for Rope<'_> { + fn hash(&self, state: &mut H) { + match &self.repr { + Repr::Simple(s) => s.hash(state), + Repr::Complex(data) => { + for (s, _) in data.iter() { + s.hash(state); + } + } + } + } +} + +enum LinesEnum<'a, 'b> { + Simple(&'b str), + Complex { + iter: &'a Vec<(&'b str, usize)>, + in_chunk_byte_idx: usize, + chunk_idx: usize, + }, +} + +pub struct Lines<'a, 'b> { + iter: LinesEnum<'a, 'b>, + byte_idx: usize, + ended: bool, + total_bytes: usize, + + /// Whether to treat the end of the rope with ('\n') as an empty newline. + trailing_line_break_as_newline: bool, +} + +impl<'a> Iterator for Lines<'_, 'a> { + type Item = Rope<'a>; + + fn next(&mut self) -> Option { + match *self { + Lines { + iter: LinesEnum::Simple(s), + ref mut byte_idx, + ref mut ended, + ref total_bytes, + trailing_line_break_as_newline, + .. + } => { + if *ended { + return None; + } else if byte_idx == total_bytes { + if trailing_line_break_as_newline { + *ended = true; + return Some(Rope::from("")); + } + return None; + } else if let Some(idx) = + memchr::memchr(b'\n', &s.as_bytes()[*byte_idx..]) + { + let end = *byte_idx + idx + 1; + let rope = Rope::from(&s[*byte_idx..end]); + *byte_idx = end; + return Some(rope); + } + *ended = true; + Some(Rope::from(&s[*byte_idx..])) + } + Lines { + iter: + LinesEnum::Complex { + iter: chunks, + ref mut in_chunk_byte_idx, + ref mut chunk_idx, + }, + ref mut byte_idx, + ref mut ended, + ref total_bytes, + trailing_line_break_as_newline, + } => { + if *ended { + return None; + } else if byte_idx == total_bytes { + if trailing_line_break_as_newline { + *ended = true; + return Some(Rope::from("")); + } + return None; + } else if chunks.is_empty() { + return None; + } + + debug_assert!(*chunk_idx < chunks.len()); + + let &(chunk, _) = &chunks[*chunk_idx]; + + // If the current chunk has ran out of bytes, move to the next chunk. + if *in_chunk_byte_idx == chunk.len() && *chunk_idx < chunks.len() - 1 { + *chunk_idx += 1; + *in_chunk_byte_idx = 0; + return self.next(); + } + + let start_chunk_idx = *chunk_idx; + let start_in_chunk_byte_idx = *in_chunk_byte_idx; + + let end_info = loop { + if *chunk_idx == chunks.len() { + break None; + } + let &(chunk, _) = &chunks[*chunk_idx]; + if let Some(idx) = + memchr::memchr(b'\n', &chunk.as_bytes()[*in_chunk_byte_idx..]) + { + *in_chunk_byte_idx += idx + 1; + break Some((*chunk_idx, *in_chunk_byte_idx)); + } else { + *in_chunk_byte_idx = 0; + *chunk_idx += 1; + } + }; + + // If we find a newline in the next few chunks, return the line. + if let Some((end_chunk_idx, end_in_chunk_byte_idx)) = end_info { + if start_chunk_idx == end_chunk_idx { + let &(chunk, _) = &chunks[start_chunk_idx]; + *byte_idx += end_in_chunk_byte_idx - start_in_chunk_byte_idx; + return Some(Rope::from( + &chunk[start_in_chunk_byte_idx..end_in_chunk_byte_idx], + )); + } + + // The line spans multiple chunks. + let mut raw = Vec::with_capacity(end_chunk_idx - start_chunk_idx + 1); + let mut len = 0; + (start_chunk_idx..end_chunk_idx + 1).for_each(|i| { + let &(chunk, _) = &chunks[i]; + + if start_chunk_idx == i { + let start = start_in_chunk_byte_idx; + raw.push((&chunk[start..], len)); + len += chunk.len() - start; + } else if end_chunk_idx == i { + let end = end_in_chunk_byte_idx; + raw.push((&chunk[..end], len)); + len += end; + } else { + raw.push((chunk, len)); + len += chunk.len(); + } + }); + // Advance the byte index to the end of the line. + *byte_idx += len; + Some(Rope { + repr: Repr::Complex(Rc::new(raw)), + }) + } else { + // If we did not find a newline in the next few chunks, + // return the remaining bytes. This is the end of the rope. + *ended = true; + + // If we only have one chunk left, return the remaining bytes. + if chunks.len() - start_chunk_idx == 1 { + let &(chunk, _) = &chunks[start_chunk_idx]; + let start = start_in_chunk_byte_idx; + let end = chunk.len(); + *byte_idx += end - start; + return Some(Rope::from(&chunk[start..end])); + } + + let mut raw = Vec::with_capacity(chunks.len() - start_chunk_idx); + let mut len = 0; + (start_chunk_idx..chunks.len()).for_each(|i| { + let &(chunk, _) = &chunks[i]; + if start_chunk_idx == i { + let start = start_in_chunk_byte_idx; + raw.push((&chunk[start..], len)); + len += chunk.len() - start; + } else { + raw.push((chunk, len)); + len += chunk.len(); + } + }); + // Advance the byte index to the end of the rope. + *byte_idx += len; + Some(Rope { + repr: Repr::Complex(Rc::new(raw)), + }) + } + } + } + } +} + +enum CharIndicesRepr<'a, 'b> { + Simple { + iter: std::str::CharIndices<'b>, + }, + Complex { + chunks: &'a [(&'b str, usize)], + char_indices: VecDeque<(usize, char)>, + chunk_index: usize, + }, +} + +pub struct CharIndices<'a, 'b> { + repr: CharIndicesRepr<'a, 'b>, +} + +impl Iterator for CharIndices<'_, '_> { + type Item = (usize, char); + + fn next(&mut self) -> Option { + match &mut self.repr { + CharIndicesRepr::Simple { iter } => iter.next(), + CharIndicesRepr::Complex { + chunks, + char_indices, + chunk_index, + } => { + if let Some(item) = char_indices.pop_front() { + return Some(item); + } + + if *chunk_index >= chunks.len() { + return None; + } + + // skip empty chunks + while *chunk_index < chunks.len() && chunks[*chunk_index].0.is_empty() { + *chunk_index += 1; + } + + let (chunk, start_pos) = chunks[*chunk_index]; + + char_indices + .extend(chunk.char_indices().map(|(i, c)| (start_pos + i, c))); + *chunk_index += 1; + char_indices.pop_front() + } + } + } +} + +impl Default for Rope<'_> { + fn default() -> Self { + Self::new() + } +} + +// Implement `ToString` than `Display` to manually allocate the string with capacity. +// This is faster than using `Display` and `write!` for large ropes. +#[allow(clippy::to_string_trait_impl)] +impl ToString for Rope<'_> { + fn to_string(&self) -> String { + match &self.repr { + Repr::Simple(s) => s.to_string(), + Repr::Complex(data) => { + let mut s = String::with_capacity(self.len()); + for (chunk, _) in data.iter() { + s.push_str(chunk); + } + s + } + } + } +} + +impl PartialEq> for Rope<'_> { + fn eq(&self, other: &Rope<'_>) -> bool { + if self.len() != other.len() { + return false; + } + + let chunks = match &self.repr { + Repr::Simple(s) => &[(*s, 0)][..], + Repr::Complex(data) => &data[..], + }; + let other_chunks = match &other.repr { + Repr::Simple(s) => &[(*s, 0)][..], + Repr::Complex(data) => &data[..], + }; + + let mut cur = 0; + let other_chunk_index = RefCell::new(0); + let mut other_chunk_byte_index = 0; + let other_chunk = || other_chunks[*other_chunk_index.borrow()].0.as_bytes(); + for (chunk, start_pos) in chunks.iter() { + let chunk = chunk.as_bytes(); + while (cur - start_pos) < chunk.len() { + if other_chunk_byte_index >= other_chunk().len() { + other_chunk_byte_index = 0; + *other_chunk_index.borrow_mut() += 1; + } + if chunk[cur - start_pos] == other_chunk()[other_chunk_byte_index] { + cur += 1; + other_chunk_byte_index += 1; + } else { + return false; + } + } + } + + true + } +} + +impl PartialEq for Rope<'_> { + fn eq(&self, other: &str) -> bool { + if self.len() != other.len() { + return false; + } + + let other = other.as_bytes(); + + match &self.repr { + Repr::Simple(s) => { + if s.as_bytes() != other { + return false; + } + } + Repr::Complex(data) => { + let mut idx = 0; + for (chunk, _) in data.iter() { + let chunk = chunk.as_bytes(); + if chunk != &other[idx..(idx + chunk.len())] { + return false; + } + idx += chunk.len(); + } + } + } + + true + } +} + +impl PartialEq<&str> for Rope<'_> { + fn eq(&self, other: &&str) -> bool { + if self.len() != other.len() { + return false; + } + + let other = other.as_bytes(); + + match &self.repr { + Repr::Simple(s) => { + if s.as_bytes() != other { + return false; + } + } + Repr::Complex(data) => { + let mut idx = 0; + for (chunk, _) in data.iter() { + let chunk = chunk.as_bytes(); + if chunk != &other[idx..(idx + chunk.len())] { + return false; + } + idx += chunk.len(); + } + } + } + + true + } +} + +impl<'a> From<&'a str> for Rope<'a> { + fn from(value: &'a str) -> Self { + Rope { + repr: Repr::Simple(value), + } + } +} + +impl<'a> From<&'a String> for Rope<'a> { + fn from(value: &'a String) -> Self { + Rope { + repr: Repr::Simple(value), + } + } +} + +impl<'a> From<&'a Cow<'a, str>> for Rope<'a> { + fn from(value: &'a Cow<'a, str>) -> Self { + Rope { + repr: Repr::Simple(value), + } + } +} + +impl<'a> FromIterator<&'a str> for Rope<'a> { + fn from_iter>(iter: T) -> Self { + let mut len = 0; + let raw = iter + .into_iter() + .map(|chunk| { + let cur = (chunk, len); + len += chunk.len(); + cur + }) + .collect::>(); + + Self { + repr: Repr::Complex(Rc::new(raw)), + } + } +} + +#[inline(always)] +fn start_bound_to_range_start(start: Bound<&usize>) -> Option { + match start { + Bound::Included(&start) => Some(start), + Bound::Excluded(&start) => Some(start + 1), + Bound::Unbounded => None, + } +} + +#[inline(always)] +fn end_bound_to_range_end(end: Bound<&usize>) -> Option { + match end { + Bound::Included(&end) => Some(end + 1), + Bound::Excluded(&end) => Some(end), + Bound::Unbounded => None, + } +} + +#[cfg(test)] +mod tests { + use std::rc::Rc; + + use crate::rope::{Repr, Rope}; + + impl<'a> PartialEq for Repr<'a> { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Repr::Simple(a), Repr::Simple(b)) => a == b, + (Repr::Complex(a), Repr::Complex(b)) => a == b, + _ => false, + } + } + } + + impl<'a> Eq for Repr<'a> {} + + #[test] + fn add() { + let mut simple = Rope::from("abc"); + assert_eq!(simple.repr, Repr::Simple("abc")); + assert_eq!(simple.len(), 3); + + simple.add("def"); + assert_eq!(simple, "abcdef"); + assert_eq!( + simple.repr, + Repr::Complex(Rc::new(Vec::from_iter([("abc", 0), ("def", 3)]))) + ); + assert_eq!(simple.len(), 6); + + simple.add("ghi"); + assert_eq!(simple, "abcdefghi"); + assert_eq!( + simple.repr, + Repr::Complex(Rc::new(Vec::from_iter([ + ("abc", 0), + ("def", 3), + ("ghi", 6), + ]))) + ); + assert_eq!(simple.len(), 9); + } + + #[test] + fn append() { + let simple1 = Rope::from("abc"); + let simple2 = Rope::from("def"); + + let complex1 = Rope::from_iter(["1", "2", "3"]); + let complex2 = Rope::from_iter(["4", "5", "6"]); + + // simple - simple + let mut append1 = simple1.clone(); + append1.append(simple2.clone()); + assert_eq!(append1, "abcdef"); + assert_eq!( + append1.repr, + Repr::Complex(Rc::new(Vec::from_iter([("abc", 0), ("def", 3),]))) + ); + + // simple - complex + let mut append2 = simple1.clone(); + append2.append(complex1.clone()); + assert_eq!(append2, "abc123"); + assert_eq!( + append2.repr, + Repr::Complex(Rc::new(Vec::from_iter([ + ("abc", 0), + ("1", 3), + ("2", 4), + ("3", 5), + ]))) + ); + + // complex - simple + let mut append3 = complex1.clone(); + append3.append(simple1.clone()); + assert_eq!(append3, "123abc"); + assert_eq!( + append3.repr, + Repr::Complex(Rc::new(Vec::from_iter([ + ("1", 0), + ("2", 1), + ("3", 2), + ("abc", 3), + ]))) + ); + + // complex - complex + let mut append4 = complex1.clone(); + append4.append(complex2.clone()); + assert_eq!(append4, "123456"); + assert_eq!( + append4.repr, + Repr::Complex(Rc::new(Vec::from_iter([ + ("1", 0), + ("2", 1), + ("3", 2), + ("4", 3), + ("5", 4), + ("6", 5), + ]))) + ); + } + + #[test] + fn slice() { + let mut a = Rope::new(); + a.add("abc"); + a.add("def"); + a.add("ghi"); + + // same chunk start + let rope = a.byte_slice(0..1); + assert_eq!(rope.to_string(), "a".to_string()); + + // same chunk end + let rope = a.byte_slice(2..3); + assert_eq!(rope.to_string(), "c".to_string()); + + // cross chunks + let rope = a.byte_slice(2..5); + assert_eq!(rope.to_string(), "cde".to_string()); + + // empty slice + let rope = a.byte_slice(0..0); + assert_eq!(rope.to_string(), "".to_string()); + + // slice with len + let rope = Rope::from("abc"); + let rope = rope.byte_slice(3..3); + assert_eq!(rope.to_string(), "".to_string()) + } + + #[test] + #[should_panic] + fn slice_panics_range_start_out_of_bounds() { + let mut a = Rope::new(); + a.add("abc"); + a.byte_slice(3..4); + } + + #[test] + #[should_panic] + fn slice_panics_range_start_greater_than_end() { + let mut a = Rope::new(); + a.add("abc"); + a.byte_slice(1..0); + } + + #[test] + #[should_panic] + fn slice_panics_range_end_out_of_bounds() { + let mut a = Rope::new(); + a.add("abc"); + a.byte_slice(0..4); + } + + #[test] + fn eq() { + let mut a = Rope::new(); + a.add("abc"); + a.add("def"); + a.add("ghi"); + assert_eq!(&a, "abcdefghi"); + assert_eq!(a, "abcdefghi"); + + let mut b = Rope::new(); + b.add("abcde"); + b.add("fghi"); + + assert_eq!(a, b); + } + + #[test] + fn from() { + let _ = Rope::from("abc"); + let _ = Rope::from("abc"); + let rope = Rope::from_iter(["abc", "def"]); + assert_eq!(rope, "abcdef"); + assert_eq!( + rope.repr, + Repr::Complex(Rc::new(Vec::from_iter([("abc", 0), ("def", 3)]))) + ); + } + + #[test] + fn byte() { + let mut a = Rope::from("abc"); + assert_eq!(a.byte(0), b'a'); + a.add("d"); + assert_eq!(a.byte(3), b'd'); + } + + #[test] + fn char_indices() { + let mut a = Rope::new(); + a.add("abc"); + a.add("def"); + assert_eq!( + a.char_indices().collect::>(), + "abcdef".char_indices().collect::>() + ); + + let mut a = Rope::new(); + a.add("こんにちは"); + assert_eq!( + a.char_indices().collect::>(), + "こんにちは".char_indices().collect::>() + ); + a.add("世界"); + assert_eq!( + a.char_indices().collect::>(), + "こんにちは世界".char_indices().collect::>() + ); + } + + #[test] + fn lines1() { + let rope = Rope::from("abc"); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["abc"]); + + // empty line at the end if the line before ends with a newline ('\n') + let rope = Rope::from("abc\ndef\n"); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["abc\n", "def\n", ""]); + + // no empty line at the end if the line before does not end with a newline ('\n') + let rope = Rope::from("abc\ndef"); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["abc\n", "def"]); + + let rope = Rope::from("Test\nTest\nTest\n"); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["Test\n", "Test\n", "Test\n", ""]); + + let rope = Rope::from("\n"); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["\n", ""]); + + let rope = Rope::from("\n\n"); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["\n", "\n", ""]); + + let rope = Rope::from("abc"); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["abc"]); + } + + #[test] + fn lines2() { + let rope = Rope::from_iter(["abc\n", "def\n", "ghi\n"]); + let lines = rope.lines().collect::>(); + // empty line at the end if the line before ends with a newline ('\n') + assert_eq!(lines, ["abc\n", "def\n", "ghi\n", ""]); + + let rope = Rope::from_iter(["abc\n", "def\n", "ghi"]); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["abc\n", "def\n", "ghi"]); + + let rope = Rope::from_iter(["abc\ndef", "ghi\n", "jkl"]); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["abc\n", "defghi\n", "jkl"]); + + let rope = Rope::from_iter(["a\nb", "c\n", "d\n"]); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["a\n", "bc\n", "d\n", ""]); + + let rope = Rope::from_iter(["\n"]); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["\n", ""]); + + let rope = Rope::from_iter(["a", "b", "c"]); + let lines = rope.lines().collect::>(); + assert_eq!(lines, ["abc"]); + } + + #[test] + fn lines_with_trailing_line_break_as_newline() { + let trailing_line_break_as_newline = false; + let rope = Rope::from("abc\n"); + let lines = rope + .lines_impl(trailing_line_break_as_newline) + .collect::>(); + assert_eq!(lines, ["abc\n"]); + + let rope = Rope::from("\n"); + let lines = rope + .lines_impl(trailing_line_break_as_newline) + .collect::>(); + assert_eq!(lines, ["\n"]); + } + + #[test] + fn t() { + let a = Rope::from_iter(["a"]); + dbg!(a.lines().collect::>()); + } +} diff --git a/src/source.rs b/src/source.rs index d0aea61a..da3f7b43 100644 --- a/src/source.rs +++ b/src/source.rs @@ -12,6 +12,7 @@ use serde::{Deserialize, Serialize}; use crate::{ helpers::{decode_mappings, StreamChunks}, + rope::Rope, Result, }; @@ -20,18 +21,14 @@ pub type BoxSource = Arc; /// [Source] abstraction, [webpack-sources docs](https://github.com/webpack/webpack-sources/#source). pub trait Source: - for<'a> StreamChunks<'a> - + DynHash - + AsAny - + DynEq - + DynClone - + fmt::Debug - + Sync - + Send + StreamChunks + DynHash + AsAny + DynEq + DynClone + fmt::Debug + Sync + Send { /// Get the source code. fn source(&self) -> Cow; + /// Get the source code as a [Rope]. + fn rope(&self) -> Rope<'_>; + /// Get the source buffer. fn buffer(&self) -> Cow<[u8]>; @@ -55,6 +52,10 @@ impl Source for BoxSource { self.as_ref().source() } + fn rope(&self) -> Rope<'_> { + self.as_ref().rope() + } + fn buffer(&self) -> Cow<[u8]> { self.as_ref().buffer() } @@ -74,8 +75,8 @@ impl Source for BoxSource { dyn_clone::clone_trait_object!(Source); -impl<'a> StreamChunks<'a> for BoxSource { - fn stream_chunks( +impl StreamChunks for BoxSource { + fn stream_chunks<'a>( &'a self, options: &MapOptions, on_chunk: crate::helpers::OnChunk<'_, 'a>, diff --git a/src/source_map_source.rs b/src/source_map_source.rs index 07b32a05..dcf7a760 100644 --- a/src/source_map_source.rs +++ b/src/source_map_source.rs @@ -8,7 +8,7 @@ use crate::{ get_map, stream_chunks_of_combined_source_map, stream_chunks_of_source_map, StreamChunks, }, - MapOptions, Source, SourceMap, + MapOptions, Rope, Source, SourceMap, }; /// Options for [SourceMapSource::new]. @@ -92,6 +92,10 @@ impl Source for SourceMapSource { Cow::Borrowed(&self.value) } + fn rope(&self) -> Rope<'_> { + Rope::from(&self.value) + } + fn buffer(&self) -> Cow<[u8]> { Cow::Borrowed(self.value.as_bytes()) } @@ -146,8 +150,8 @@ impl std::fmt::Debug for SourceMapSource { } } -impl<'a> StreamChunks<'a> for SourceMapSource { - fn stream_chunks( +impl StreamChunks for SourceMapSource { + fn stream_chunks<'a>( &'a self, options: &MapOptions, on_chunk: crate::helpers::OnChunk<'_, 'a>, @@ -156,10 +160,10 @@ impl<'a> StreamChunks<'a> for SourceMapSource { ) -> crate::helpers::GeneratedInfo { if let Some(inner_source_map) = &self.inner_source_map { stream_chunks_of_combined_source_map( - &self.value, + &*self.value, &self.source_map, &self.name, - self.original_source.as_deref(), + self.original_source.as_deref().map(Rope::from), inner_source_map, self.remove_original_source, on_chunk, @@ -169,7 +173,7 @@ impl<'a> StreamChunks<'a> for SourceMapSource { ) } else { stream_chunks_of_source_map( - &self.value, + self.value.as_str(), &self.source_map, on_chunk, on_source, diff --git a/src/with_indices.rs b/src/with_indices.rs index f9c07f80..17c8e1d6 100644 --- a/src/with_indices.rs +++ b/src/with_indices.rs @@ -1,30 +1,35 @@ -use std::cell::OnceCell; +use std::{cell::OnceCell, marker::PhantomData}; + +use crate::helpers::SourceText; #[derive(Debug, Clone)] -pub struct WithIndices<'a> { +pub struct WithIndices<'a, S> +where + S: SourceText<'a>, +{ /// line is a string reference - pub line: &'a str, + pub line: S, /// the byte position of each `char` in `line` string slice . pub indices_indexes: OnceCell>, + data: PhantomData<&'a S>, } -impl<'a> WithIndices<'a> { - pub fn new(line: &'a str) -> Self { +impl<'a, S> WithIndices<'a, S> +where + S: SourceText<'a>, +{ + pub fn new(line: S) -> Self { Self { indices_indexes: OnceCell::new(), line, + data: PhantomData, } } /// substring::SubString with cache - #[allow(unsafe_code)] - pub(crate) fn substring( - &self, - start_index: usize, - end_index: usize, - ) -> &'a str { + pub(crate) fn substring(&self, start_index: usize, end_index: usize) -> S { if end_index <= start_index { - return ""; + return S::default(); } let indices_indexes = self.indices_indexes.get_or_init(|| { @@ -34,42 +39,48 @@ impl<'a> WithIndices<'a> { let str_len = self.line.len(); let start = *indices_indexes.get(start_index).unwrap_or(&str_len); let end = *indices_indexes.get(end_index).unwrap_or(&str_len); - unsafe { - // SAFETY: Since `indices` iterates over the `CharIndices` of `self`, we can guarantee - // that the indices obtained from it will always be within the bounds of `self` and they - // will always lie on UTF-8 sequence boundaries. - self.line.get_unchecked(start..end) - } + self.line.byte_slice(start..end) } } /// tests are just copy from `substring` crate #[cfg(test)] mod tests { + use crate::Rope; + use super::WithIndices; #[test] fn test_substring() { - assert_eq!(WithIndices::new("foobar").substring(0, 3), "foo"); + assert_eq!( + WithIndices::new(Rope::from("foobar")).substring(0, 3), + "foo" + ); } #[test] fn test_out_of_bounds() { - assert_eq!(WithIndices::new("foobar").substring(0, 10), "foobar"); - assert_eq!(WithIndices::new("foobar").substring(6, 10), ""); + assert_eq!( + WithIndices::new(Rope::from("foobar")).substring(0, 10), + "foobar" + ); + assert_eq!(WithIndices::new(Rope::from("foobar")).substring(6, 10), ""); } #[test] fn test_start_less_than_end() { - assert_eq!(WithIndices::new("foobar").substring(3, 2), ""); + assert_eq!(WithIndices::new(Rope::from("foobar")).substring(3, 2), ""); } #[test] fn test_start_and_end_equal() { - assert_eq!(WithIndices::new("foobar").substring(3, 3), ""); + assert_eq!(WithIndices::new(Rope::from("foobar")).substring(3, 3), ""); } #[test] fn test_multiple_byte_characters() { - assert_eq!(WithIndices::new("fõøbα®").substring(2, 5), "øbα"); + assert_eq!( + WithIndices::new(Rope::from("fõøbα®")).substring(2, 5), + "øbα" + ); } } diff --git a/tests/compat_source.rs b/tests/compat_source.rs index 9079d26c..1a9347d9 100644 --- a/tests/compat_source.rs +++ b/tests/compat_source.rs @@ -1,3 +1,4 @@ +#![allow(missing_docs)] use std::borrow::Cow; use std::hash::Hash; @@ -5,7 +6,7 @@ use rspack_sources::stream_chunks::{ stream_chunks_default, GeneratedInfo, OnChunk, OnName, OnSource, StreamChunks, }; use rspack_sources::{ - ConcatSource, MapOptions, RawSource, Source, SourceExt, SourceMap, + ConcatSource, MapOptions, RawSource, Rope, Source, SourceExt, SourceMap, }; #[derive(Debug, Eq)] @@ -16,6 +17,10 @@ impl Source for CompatSource { Cow::Borrowed(self.0) } + fn rope(&self) -> Rope<'_> { + Rope::from(self.0) + } + fn buffer(&self) -> Cow<[u8]> { Cow::Borrowed(self.0.as_bytes()) } @@ -33,8 +38,8 @@ impl Source for CompatSource { } } -impl<'a> StreamChunks<'a> for CompatSource { - fn stream_chunks( +impl StreamChunks for CompatSource { + fn stream_chunks<'a>( &'a self, options: &MapOptions, on_chunk: OnChunk<'_, 'a>,